BackDoorBuster/detection/backdoor_detection.py

"""
Usage: python backdoor_detection.py your_file_path
"""

import re
from typing import List, Tuple, Dict
import sys


def read_file_content(file_path: str) -> str:
    """
    Reads and returns the content of a specified file. Exits the program with an error if the file does not exist or cannot be read.

    :param file_path: The full path to the file.
    :return: The text content of the file.
    :raises FileNotFoundError: If the file does not exist.
    :raises IOError: If the file cannot be read.
    """
    try:
        with open(file_path, "r", encoding="utf-8") as file:
            return file.read()
    except FileNotFoundError:
        print("Error: File not found.")
        sys.exit(1)
    except IOError:
        print("Error: Could not read file.")
        sys.exit(1)


def find_dangerous_functions(file_content: str) -> Dict[str, List[Tuple[int, str]]]:
    """
    Searches the given code text for potentially dangerous function calls and classifies results by risk level.
    Ignores comments in the code.

    :param file_content: String content of the code file.
    :return: Dictionary with risk levels as keys and lists of tuples (line number, matched line content) as values.
    """
    # Define dangerous functions and their risk levels
    patterns: Dict[str, str] = {
        r"\bsystem\(": "high",
        r"\bexec\(": "high",
        r"\bpopen\(": "medium",
        r"\beval\(": "high",
        r"\bsubprocess\.run\(": "medium",
    }
    # Store results classified by risk level
    classified_results = {"high": [], "medium": [], "low": []}
    for line_number, line in enumerate(file_content.split("\n"), start=1):
        # Remove comments from the line
        clean_line = line.split("#")[0].strip()
        if not clean_line:  # Skip empty or comment-only lines
            continue
        found = False
        for pattern, risk_level in patterns.items():
            if re.search(pattern, clean_line):
                classified_results[risk_level].append((line_number, clean_line))
                found = True
                break  # Stop checking other patterns once a match is found
    return classified_results


def main(file_path: str):
    """
    Main function that reads file content, checks for dangerous functions, and outputs classified results by risk level.

    :param file_path: File path input from the command line.
    """
    file_content = read_file_content(file_path)
    classified_dangerous = find_dangerous_functions(file_content)
    for risk_level in [
        "high",
        "medium",
    ]:  # Only iterate over high and medium risk levels
        occurrences = classified_dangerous[risk_level]
        if occurrences:
            print(f"Dangerous functions found at risk level {risk_level}:")
            for line_num, func in occurrences:
                print(f"  Line {line_num}: {func}")
        else:
            print(f"No dangerous functions found at risk level {risk_level}.")


if __name__ == "__main__":
    if len(sys.argv) < 2:
        print("Usage: python script.py <file_path>")
        sys.exit(1)
    main(sys.argv[1])