2024-04-29 18:58:49 +08:00
3 changed files with 54 additions and 50 deletions
--- a/detection/GPTdetection.py
+++ b/detection/GPTdetection.py
@ -4,10 +4,6 @@ from .utils import *
 import openai
 def detect_gpt(filename: str):
    content = read_file_content(filename)
    return detectGPT(content)
 def detectGPT(content: str):
    client = openai.OpenAI(api_key=os.environ.get("OPENAI_API_KEY"))
    text = content
--- a/detection/Regexdetection.py
+++ b/detection/Regexdetection.py
@ -0,0 +1,37 @@
 import re
 from typing import Dict, List, Tuple
 from .utils import remove_comments
 def find_dangerous_functions(
    file_content: str, file_extension: str
 ) -> Dict[str, List[Tuple[int, str]]]:
    patterns = {
        ".py": {
            r"\bsystem\(": "high",
            r"\bexec\(": "high",
            r"\bpopen\(": "medium",
            r"\beval\(": "high",
            r"\bsubprocess\.run\(": "medium",
            r"\b__getattribute__\(": "high",
            r"\bgetattr\(": "medium",
            r"\b__import__\(": "high",
        },
        ".js": {
            r"\beval\(": "high",
            r"\bexec\(": "high",
            r"\bchild_process\.exec\(": "high",
        },
        ".cpp": {
            r"\bsystem\(": "high",
        },
    }
    risk_patterns = patterns.get(file_extension, {})
    classified_results = {"high": [], "medium": [], "low": [], "none": []}
    for line_number, line in enumerate(file_content.split("\n"), start=1):
        clean_line = remove_comments(line, file_extension)
        if not clean_line:
            continue
        for pattern, risk_level in risk_patterns.items():
            if re.search(pattern, clean_line):
                classified_results[risk_level].append((line_number, clean_line))
    return classified_results
--- a/detection/backdoor_detection.py
+++ b/detection/backdoor_detection.py
@ -5,49 +5,14 @@ from reportlab.pdfgen import canvas
 from reportlab.lib.styles import getSampleStyleSheet
 from reportlab.platypus import Paragraph, Spacer, SimpleDocTemplate
 from reportlab.lib import colors
 from .Regexdetection import find_dangerous_functions
 from .GPTdetection import detectGPT
 from .utils import *
 SUPPORTED_EXTENSIONS = {".py", ".js", ".cpp"}
 OUTPUT_FORMATS = ["html", "md", "txt", "pdf"]
 def find_dangerous_functions(
    file_content: str, file_extension: str
 ) -> Dict[str, List[Tuple[int, str]]]:
    patterns = {
        ".py": {
            r"\bsystem\(": "high",
            r"\bexec\(": "high",
            r"\bpopen\(": "medium",
            r"\beval\(": "high",
            r"\bsubprocess\.run\(": "medium",
            r"\b__getattribute__\(": "high",
            r"\bgetattr\(": "medium",
            r"\b__import__\(": "high",
        },
        ".js": {
            r"\beval\(": "high",
            r"\bexec\(": "high",
            r"\bchild_process\.exec\(": "high",
        },
        ".cpp": {
            r"\bsystem\(": "high",
        },
    }
    risk_patterns = patterns.get(file_extension, {})
    classified_results = {"high": [], "medium": [], "low": [], "none": []}
    for line_number, line in enumerate(file_content.split("\n"), start=1):
        clean_line = remove_comments(line, file_extension)
        if not clean_line:
            continue
        for pattern, risk_level in risk_patterns.items():
            if re.search(pattern, clean_line):
                classified_results[risk_level].append((line_number, clean_line))
    return classified_results
 def generate_text_content(results):
    text_output = "Security Analysis Report\n"
    for risk_level, entries in results.items():
@ -153,7 +118,15 @@ def output_text(results: Dict[str, List[Tuple[int, str]]], file_name=None):
        return text_output
-def process_path(path: str, output_format: str, output_file=None):
+def checkModeAndDetect(mode: str,filePath: str,fileExtension: str):
    #TODO:添加更多方式，这里提高代码的复用性和扩展性
    if mode == "regex":
        return find_dangerous_functions(read_file_content(filePath), fileExtension)
    elif mode == "llm":
        return detectGPT(read_file_content(filePath))
 def process_path(path: str, output_format: str, mode: str, output_file=None):
    results = {"high": [], "medium": [], "low": [], "none": []}
    if os.path.isdir(path):
        for root, dirs, files in os.walk(path):
@ -161,9 +134,8 @@ def process_path(path: str, output_format: str, output_file=None):
                file_extension = os.path.splitext(file)[1]
                if file_extension in SUPPORTED_EXTENSIONS:
                    file_path = os.path.join(root, file)
-                    file_results = find_dangerous_functions(
+
-                        read_file_content(file_path), file_extension
+                    file_results = checkModeAndDetect(mode,file_path,file_extension)
                    )
                    for key in file_results:
                        if key != "none":  # Exclude 'none' risk level
                            results[key].extend(
@ -175,9 +147,7 @@ def process_path(path: str, output_format: str, output_file=None):
    elif os.path.isfile(path):
        file_extension = os.path.splitext(path)[1]
        if file_extension in SUPPORTED_EXTENSIONS:
-            file_results = find_dangerous_functions(
+            file_results = checkModeAndDetect(mode,path,file_extension)
                read_file_content(path), file_extension
            )
            for key in file_results:
                if key != "none":  # Exclude 'none' risk level
                    results[key].extend(
@ -202,6 +172,7 @@ def main():
    parser = argparse.ArgumentParser(description="Backdoor detection tool.")
    parser.add_argument("path", help="Path to the code to analyze")
    parser.add_argument("-o", "--output", help="Output file path", default=None)
    parser.add_argument("-m", "--mode", help="Mode of operation:[regex,llm]", default="regex")
    args = parser.parse_args()
    output_format = "txt"  # Default output format
    output_file = None
@ -216,7 +187,7 @@ def main():
                "Your input file format was incorrect, the output has been saved as a TXT file."
            )
            output_file = args.output.rsplit(".", 1)[0] + ".txt"
-    process_path(args.path, output_format, output_file)
+        process_path(args.path, output_format, args.mode, output_file)
 if __name__ == "__main__":