BackDoorBuster/detection/backdoor_detection.py

import os
import re
import sys
from typing import Dict, List, Tuple
from reportlab.lib.pagesizes import letter
from reportlab.pdfgen import canvas
from reportlab.lib.styles import getSampleStyleSheet
from reportlab.platypus import Paragraph, Spacer, SimpleDocTemplate
from reportlab.lib import colors

SUPPORTED_EXTENSIONS = {".py", ".js", ".cpp"}
OUTPUT_FORMATS = ["html", "md", "txt", "pdf"]


def read_file_content(file_path: str) -> str:
    try:
        with open(file_path, "r", encoding="utf-8") as file:
            return file.read()
    except FileNotFoundError:
        print("Error: File not found.")
        sys.exit(1)
    except IOError:
        print("Error: Could not read file.")
        sys.exit(1)


def remove_comments(code: str, extension: str) -> str:
    if extension == ".py":
        return code.split("#")[0].strip()
    elif extension in {".js", ".cpp"}:
        code = re.sub(r"//.*", "", code)
        code = re.sub(r"/\*.*?\*/", "", code, flags=re.DOTALL)
        return code.strip()
    return code.strip()


def find_dangerous_functions(
    file_content: str, file_extension: str
) -> Dict[str, List[Tuple[int, str]]]:
    patterns = {
        ".py": {
            r"\bsystem\(": "high",
            r"\bexec\(": "high",
            r"\bpopen\(": "medium",
            r"\beval\(": "high",
            r"\bsubprocess\.run\(": "medium",
            r"\b__getattribute__\(": "high",
            r"\bgetattr\(": "medium",
            r"\b__import__\(": "high",
        },
        ".js": {
            r"\beval\(": "high",
            r"\bexec\(": "high",
            r"\bchild_process\.exec\(": "high",
        },
        ".cpp": {
            r"\bsystem\(": "high",
        },
    }
    risk_patterns = patterns.get(file_extension, {})
    classified_results = {"high": [], "medium": [], "low": [], "none": []}
    for line_number, line in enumerate(file_content.split("\n"), start=1):
        clean_line = remove_comments(line, file_extension)
        if not clean_line:
            continue
        for pattern, risk_level in risk_patterns.items():
            if re.search(pattern, clean_line):
                classified_results[risk_level].append((line_number, clean_line))
    return classified_results


def generate_text_content(results):
    text_output = "Security Analysis Report\n"
    for risk_level, entries in results.items():
        if entries and risk_level != "none":
            text_output += f"{risk_level.capitalize()} Risk:\n"
            for line_num, line in entries:
                text_output += f"  Line {line_num}: {line}\n"
    return text_output


def output_results(results, output_format, output_file=None):
    if output_file:
        file_name, file_extension = os.path.splitext(output_file)
        if output_format not in OUTPUT_FORMATS:
            output_format = "txt"
            output_file = f"{file_name}.txt"
        results_dir = os.path.dirname(output_file)
        if not os.path.exists(results_dir):
            os.makedirs(results_dir)
        if output_format == "pdf":
            output_pdf(results, output_file)
        elif output_format == "html":
            output_html(results, output_file)
        elif output_format == "md":
            output_markdown(results, output_file)
        else:  # Default to txt
            output_text(results, output_file)
    else:
        # If no output file is specified, default to text output to the terminal.
        txt_output = generate_text_content(results)
        print(txt_output)


def output_pdf(results: Dict[str, List[Tuple[int, str]]], file_name):
    doc = SimpleDocTemplate(file_name, pagesize=letter)
    story = []
    styles = getSampleStyleSheet()

    # Add the title centered
    title_style = styles["Title"]
    title_style.alignment = 1  # Center alignment
    title = Paragraph("Security Analysis Report", title_style)
    story.append(title)
    story.append(Spacer(1, 20))  # Space after title

    # Add risk levels and entries
    normal_style = styles["BodyText"]
    for risk_level, entries in results.items():
        if risk_level != "none":
            story.append(
                Paragraph(f"{risk_level.capitalize()} Risk:", styles["Heading2"])
            )
            for line_num, line in entries:
                entry = Paragraph(f"Line {line_num}: {line}", normal_style)
                story.append(entry)
            story.append(Spacer(1, 12))  # Space between sections

    doc.build(story)


def output_html(results: Dict[str, List[Tuple[int, str]]], file_name=None):
    html_output = "<html><head><title>Security Analysis Report</title></head><body>"
    html_output += "<h1>Security Analysis Report</h1>"
    for risk_level, entries in results.items():
        if risk_level != "none":
            html_output += f"<h2>{risk_level.capitalize()} Risk</h2><ul>"
            for line_num, line in entries:
                html_output += f"<li>{line_num}: {line}</li>"
            html_output += "</ul>"
    html_output += "</body></html>"
    if file_name:
        with open(file_name, "w") as file:
            file.write(html_output)
    else:
        return html_output


def output_markdown(results: Dict[str, List[Tuple[int, str]]], file_name=None):
    md_output = "# Security Analysis Report\n"
    for risk_level, entries in results.items():
        if risk_level != "none":
            md_output += f"## {risk_level.capitalize()} Risk\n"
            for line_num, line in entries:
                md_output += f"- {line_num}: {line}\n"
    if file_name:
        with open(file_name, "w") as file:
            file.write(md_output)
    else:
        return md_output


def output_text(results: Dict[str, List[Tuple[int, str]]], file_name=None):
    text_output = "Security Analysis Report\n"
    for risk_level, entries in results.items():
        if risk_level != "none":
            text_output += f"{risk_level.capitalize()} Risk:\n"
            for line_num, line in entries:
                text_output += f"  {line_num}: {line}\n"
    if file_name:
        with open(file_name, "w") as file:
            file.write(text_output)
    else:
        return text_output


def process_path(path: str, output_format: str, output_file=None):
    results = {"high": [], "medium": [], "low": [], "none": []}
    if os.path.isdir(path):
        for root, dirs, files in os.walk(path):
            for file in files:
                file_extension = os.path.splitext(file)[1]
                if file_extension in SUPPORTED_EXTENSIONS:
                    file_path = os.path.join(root, file)
                    file_results = find_dangerous_functions(
                        read_file_content(file_path), file_extension
                    )
                    for key in file_results:
                        if key != "none":  # Exclude 'none' risk level
                            results[key].extend(
                                [
                                    (f"{file_path}: Line {line_num}", line)
                                    for line_num, line in file_results[key]
                                ]
                            )
    elif os.path.isfile(path):
        file_extension = os.path.splitext(path)[1]
        if file_extension in SUPPORTED_EXTENSIONS:
            file_results = find_dangerous_functions(
                read_file_content(path), file_extension
            )
            for key in file_results:
                if key != "none":  # Exclude 'none' risk level
                    results[key].extend(
                        [
                            (f"{path}: Line {line_num}", line)
                            for line_num, line in file_results[key]
                        ]
                    )
        else:
            print("Unsupported file type.")
            return
    else:
        print("Invalid path.")
        sys.exit(1)

    output_results(results, output_format, output_file)


def main():
    import argparse

    parser = argparse.ArgumentParser(description="Backdoor detection tool.")
    parser.add_argument("path", help="Path to the code to analyze")
    parser.add_argument("-o", "--output", help="Output file path", default=None)
    args = parser.parse_args()
    output_format = "txt"  # Default output format
    output_file = None
    if args.output:
        _, ext = os.path.splitext(args.output)
        ext = ext.lower()
        if ext in [".html", ".md", ".txt", ".pdf"]:
            output_format = ext.replace(".", "")
            output_file = args.output
        else:
            print(
                "Your input file format was incorrect, the output has been saved as a TXT file."
            )
            output_file = args.output.rsplit(".", 1)[0] + ".txt"
    process_path(args.path, output_format, output_file)


if __name__ == "__main__":
    main()