From c5cfcb00f7edfe2afffc01efe36af6547c7a7aca Mon Sep 17 00:00:00 2001 From: dqy <1016751306@qq.com> Date: Mon, 22 Apr 2024 11:47:23 +0800 Subject: [PATCH] =?UTF-8?q?feat:=20=E5=AE=9E=E7=8E=B0=E5=AF=B9=E6=96=87?= =?UTF-8?q?=E4=BB=B6=E5=A4=B9=E8=BF=9B=E8=A1=8C=E9=80=92=E5=BD=92=E6=A3=80?= =?UTF-8?q?=E6=B5=8B=EF=BC=9B=E6=94=AF=E6=8C=81html=EF=BC=8Ctxt=EF=BC=8Cmd?= =?UTF-8?q?=E7=AD=89=E5=A4=9A=E7=A7=8D=E8=BE=93=E5=87=BA=E6=96=B9=E5=BC=8F?= =?UTF-8?q?=EF=BC=9B=E4=BF=AE=E6=94=B9=E5=8D=95=E5=85=83=E6=B5=8B=E8=AF=95?= =?UTF-8?q?=EF=BC=9B=E6=94=AF=E6=8C=81=E6=A3=80=E6=B5=8B=E5=A4=9A=E7=A7=8D?= =?UTF-8?q?=E8=AF=AD=E8=A8=80=EF=BC=9B=E6=B7=BB=E5=8A=A0=E7=AD=89=E7=BA=A7?= =?UTF-8?q?-none=EF=BC=9B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- detection/backdoor_detection.py | 191 +++++++++++++++++++++++--------- 1 file changed, 136 insertions(+), 55 deletions(-) diff --git a/detection/backdoor_detection.py b/detection/backdoor_detection.py index afca29b..f5139ae 100644 --- a/detection/backdoor_detection.py +++ b/detection/backdoor_detection.py @@ -1,21 +1,13 @@ -""" -Usage: python backdoor_detection.py your_file_path -""" - +import os import re -from typing import List, Tuple, Dict import sys +from typing import Dict, List, Tuple + +SUPPORTED_EXTENSIONS = {".py", ".js", ".cpp"} +OUTPUT_FORMATS = ["html", "md", "txt"] def read_file_content(file_path: str) -> str: - """ - Reads and returns the content of a specified file. Exits the program with an error if the file does not exist or cannot be read. - - :param file_path: The full path to the file. - :return: The text content of the file. - :raises FileNotFoundError: If the file does not exist. - :raises IOError: If the file cannot be read. - """ try: with open(file_path, "r", encoding="utf-8") as file: return file.read() @@ -27,61 +19,150 @@ def read_file_content(file_path: str) -> str: sys.exit(1) -def find_dangerous_functions(file_content: str) -> Dict[str, List[Tuple[int, str]]]: - """ - Searches the given code text for potentially dangerous function calls and classifies results by risk level. - Ignores comments in the code. +def remove_comments(code: str, extension: str) -> str: + if extension == ".py": + return code.split("#")[0].strip() + elif extension in {".js", ".cpp"}: + code = re.sub(r"//.*", "", code) + code = re.sub(r"/\*.*?\*/", "", code, flags=re.DOTALL) + return code.strip() + return code.strip() - :param file_content: String content of the code file. - :return: Dictionary with risk levels as keys and lists of tuples (line number, matched line content) as values. - """ - # Define dangerous functions and their risk levels - patterns: Dict[str, str] = { - r"\bsystem\(": "high", - r"\bexec\(": "high", - r"\bpopen\(": "medium", - r"\beval\(": "high", - r"\bsubprocess\.run\(": "medium", + +def find_dangerous_functions( + file_content: str, file_extension: str +) -> Dict[str, List[Tuple[int, str]]]: + patterns = { + ".py": { + r"\bsystem\(": "high", + r"\bexec\(": "high", + r"\bpopen\(": "medium", + r"\beval\(": "high", + r"\bsubprocess\.run\(": "medium", + }, + ".js": { + r"\beval\(": "high", + r"\bexec\(": "high", + r"\bchild_process\.exec\(": "high", + }, + ".cpp": { + r"\bsystem\(": "high", + }, } - # Store results classified by risk level - classified_results = {"high": [], "medium": [], "low": []} + risk_patterns = patterns.get(file_extension, {}) + classified_results = {"high": [], "medium": [], "low": [], "none": []} for line_number, line in enumerate(file_content.split("\n"), start=1): - # Remove comments from the line - clean_line = line.split("#")[0].strip() - if not clean_line: # Skip empty or comment-only lines + clean_line = remove_comments(line, file_extension) + if not clean_line: continue found = False - for pattern, risk_level in patterns.items(): + for pattern, risk_level in risk_patterns.items(): if re.search(pattern, clean_line): classified_results[risk_level].append((line_number, clean_line)) found = True - break # Stop checking other patterns once a match is found + break + if not found: + classified_results["none"].append((line_number, clean_line)) return classified_results -def main(file_path: str): - """ - Main function that reads file content, checks for dangerous functions, and outputs classified results by risk level. +def output_results( + results: Dict[str, List[Tuple[int, str]]], output_format: str, file_path: str +): + # Create the 'results' directory if it does not exist + results_dir = "../results" + if not os.path.exists(results_dir): + os.makedirs(results_dir) - :param file_path: File path input from the command line. - """ - file_content = read_file_content(file_path) - classified_dangerous = find_dangerous_functions(file_content) - for risk_level in [ - "high", - "medium", - ]: # Only iterate over high and medium risk levels - occurrences = classified_dangerous[risk_level] - if occurrences: - print(f"Dangerous functions found at risk level {risk_level}:") - for line_num, func in occurrences: - print(f" Line {line_num}: {func}") + base_name = os.path.basename(file_path) + output_file = os.path.join( + results_dir, f"{os.path.splitext(base_name)[0]}.{output_format}" + ) + + if output_format == "html": + output_html(results, output_file) + elif output_format == "md": + output_markdown(results, output_file) + elif output_format == "txt": + output_text(results, output_file) + + +def output_html(results: Dict[str, List[Tuple[int, str]]], file_name: str): + html_output = f"Analysis of {file_name}" + html_output += "

Security Analysis Report

" + for risk_level, entries in results.items(): + html_output += f"

{risk_level.capitalize()} Risk

" + html_output += "" + with open(file_name, "w") as file: + file.write(html_output) + + +def output_markdown(results: Dict[str, List[Tuple[int, str]]], file_name: str): + md_output = f"# Security Analysis Report for {file_name}\n" + for risk_level, entries in results.items(): + md_output += f"## {risk_level.capitalize()} Risk\n" + for line_num, line in entries: + md_output += f"- Line {line_num}: {line}\n" + with open(file_name, "w") as file: + file.write(md_output) + + +def output_text(results: Dict[str, List[Tuple[int, str]]], file_name: str): + text_output = f"Security Analysis Report for {file_name}\n" + for risk_level, entries in results.items(): + text_output += f"{risk_level.capitalize()} Risk:\n" + for line_num, line in entries: + text_output += f" Line {line_num}: {line}\n" + with open(file_name, "w") as file: + file.write(text_output) + + +def process_path(path: str, output_format: str): + if os.path.isdir(path): + for root, dirs, files in os.walk(path): + for file in files: + file_extension = os.path.splitext(file)[1] + if file_extension in SUPPORTED_EXTENSIONS: + file_path = os.path.join(root, file) + print(f"Processing {file_path}...") + file_results = find_dangerous_functions( + read_file_content(file_path), file_extension + ) + output_results(file_results, output_format, file_path) + elif os.path.isfile(path): + file_extension = os.path.splitext(path)[1] + if file_extension in SUPPORTED_EXTENSIONS: + file_results = find_dangerous_functions( + read_file_content(path), file_extension + ) + output_results(file_results, output_format, path) else: - print(f"No dangerous functions found at risk level {risk_level}.") + print("Unsupported file type.") + else: + print("Invalid path.") + sys.exit(1) + + +def test(): + print("hello world") + + +def main(): + if len(sys.argv) < 3: + print("Usage: python backdoor_detection.py ") + sys.exit(1) + path = sys.argv[1] + output_format = sys.argv[2] + if output_format not in OUTPUT_FORMATS: + print( + f"Unsupported output format. Supported formats are: {', '.join(OUTPUT_FORMATS)}" + ) + sys.exit(1) + process_path(path, output_format) if __name__ == "__main__": - if len(sys.argv) < 2: - print("Usage: python script.py ") - sys.exit(1) - main(sys.argv[1]) + main()