diff --git a/detection/backdoor_detection.py b/detection/backdoor_detection.py index afca29b..f5139ae 100644 --- a/detection/backdoor_detection.py +++ b/detection/backdoor_detection.py @@ -1,21 +1,13 @@ -""" -Usage: python backdoor_detection.py your_file_path -""" - +import os import re -from typing import List, Tuple, Dict import sys +from typing import Dict, List, Tuple + +SUPPORTED_EXTENSIONS = {".py", ".js", ".cpp"} +OUTPUT_FORMATS = ["html", "md", "txt"] def read_file_content(file_path: str) -> str: - """ - Reads and returns the content of a specified file. Exits the program with an error if the file does not exist or cannot be read. - - :param file_path: The full path to the file. - :return: The text content of the file. - :raises FileNotFoundError: If the file does not exist. - :raises IOError: If the file cannot be read. - """ try: with open(file_path, "r", encoding="utf-8") as file: return file.read() @@ -27,61 +19,150 @@ def read_file_content(file_path: str) -> str: sys.exit(1) -def find_dangerous_functions(file_content: str) -> Dict[str, List[Tuple[int, str]]]: - """ - Searches the given code text for potentially dangerous function calls and classifies results by risk level. - Ignores comments in the code. +def remove_comments(code: str, extension: str) -> str: + if extension == ".py": + return code.split("#")[0].strip() + elif extension in {".js", ".cpp"}: + code = re.sub(r"//.*", "", code) + code = re.sub(r"/\*.*?\*/", "", code, flags=re.DOTALL) + return code.strip() + return code.strip() - :param file_content: String content of the code file. - :return: Dictionary with risk levels as keys and lists of tuples (line number, matched line content) as values. - """ - # Define dangerous functions and their risk levels - patterns: Dict[str, str] = { - r"\bsystem\(": "high", - r"\bexec\(": "high", - r"\bpopen\(": "medium", - r"\beval\(": "high", - r"\bsubprocess\.run\(": "medium", + +def find_dangerous_functions( + file_content: str, file_extension: str +) -> Dict[str, List[Tuple[int, str]]]: + patterns = { + ".py": { + r"\bsystem\(": "high", + r"\bexec\(": "high", + r"\bpopen\(": "medium", + r"\beval\(": "high", + r"\bsubprocess\.run\(": "medium", + }, + ".js": { + r"\beval\(": "high", + r"\bexec\(": "high", + r"\bchild_process\.exec\(": "high", + }, + ".cpp": { + r"\bsystem\(": "high", + }, } - # Store results classified by risk level - classified_results = {"high": [], "medium": [], "low": []} + risk_patterns = patterns.get(file_extension, {}) + classified_results = {"high": [], "medium": [], "low": [], "none": []} for line_number, line in enumerate(file_content.split("\n"), start=1): - # Remove comments from the line - clean_line = line.split("#")[0].strip() - if not clean_line: # Skip empty or comment-only lines + clean_line = remove_comments(line, file_extension) + if not clean_line: continue found = False - for pattern, risk_level in patterns.items(): + for pattern, risk_level in risk_patterns.items(): if re.search(pattern, clean_line): classified_results[risk_level].append((line_number, clean_line)) found = True - break # Stop checking other patterns once a match is found + break + if not found: + classified_results["none"].append((line_number, clean_line)) return classified_results -def main(file_path: str): - """ - Main function that reads file content, checks for dangerous functions, and outputs classified results by risk level. +def output_results( + results: Dict[str, List[Tuple[int, str]]], output_format: str, file_path: str +): + # Create the 'results' directory if it does not exist + results_dir = "../results" + if not os.path.exists(results_dir): + os.makedirs(results_dir) - :param file_path: File path input from the command line. - """ - file_content = read_file_content(file_path) - classified_dangerous = find_dangerous_functions(file_content) - for risk_level in [ - "high", - "medium", - ]: # Only iterate over high and medium risk levels - occurrences = classified_dangerous[risk_level] - if occurrences: - print(f"Dangerous functions found at risk level {risk_level}:") - for line_num, func in occurrences: - print(f" Line {line_num}: {func}") + base_name = os.path.basename(file_path) + output_file = os.path.join( + results_dir, f"{os.path.splitext(base_name)[0]}.{output_format}" + ) + + if output_format == "html": + output_html(results, output_file) + elif output_format == "md": + output_markdown(results, output_file) + elif output_format == "txt": + output_text(results, output_file) + + +def output_html(results: Dict[str, List[Tuple[int, str]]], file_name: str): + html_output = f"