feat:更改代码分布，实现模块化添加功能

2024-04-28 21:53:43 +08:00 · 2024-04-28 21:53:43 +08:00 · 18454a0228
commit 18454a0228
parent 9e6b13d80e
3 changed files with 54 additions and 50 deletions
--- a/detection/GPTdetection.py
+++ b/detection/GPTdetection.py
@ -4,10 +4,6 @@ from .utils import *
 import openai


-def detect_gpt(filename: str):
-    content = read_file_content(filename)
-    return detectGPT(content)
-
 def detectGPT(content: str):
    client = openai.OpenAI(api_key=os.environ.get("OPENAI_API_KEY"))
    text = content
--- a/detection/Regexdetection.py
+++ b/detection/Regexdetection.py
@ -0,0 +1,37 @@
+import re
+from typing import Dict, List, Tuple
+from .utils import remove_comments
+
+def find_dangerous_functions(
+    file_content: str, file_extension: str
+) -> Dict[str, List[Tuple[int, str]]]:
+    patterns = {
+        ".py": {
+            r"\bsystem\(": "high",
+            r"\bexec\(": "high",
+            r"\bpopen\(": "medium",
+            r"\beval\(": "high",
+            r"\bsubprocess\.run\(": "medium",
+            r"\b__getattribute__\(": "high",
+            r"\bgetattr\(": "medium",
+            r"\b__import__\(": "high",
+        },
+        ".js": {
+            r"\beval\(": "high",
+            r"\bexec\(": "high",
+            r"\bchild_process\.exec\(": "high",
+        },
+        ".cpp": {
+            r"\bsystem\(": "high",
+        },
+    }
+    risk_patterns = patterns.get(file_extension, {})
+    classified_results = {"high": [], "medium": [], "low": [], "none": []}
+    for line_number, line in enumerate(file_content.split("\n"), start=1):
+        clean_line = remove_comments(line, file_extension)
+        if not clean_line:
+            continue
+        for pattern, risk_level in risk_patterns.items():
+            if re.search(pattern, clean_line):
+                classified_results[risk_level].append((line_number, clean_line))
+    return classified_results
--- a/detection/backdoor_detection.py
+++ b/detection/backdoor_detection.py
@ -5,49 +5,14 @@ from reportlab.pdfgen import canvas
 from reportlab.lib.styles import getSampleStyleSheet
 from reportlab.platypus import Paragraph, Spacer, SimpleDocTemplate
 from reportlab.lib import colors
+from .Regexdetection import find_dangerous_functions
+from .GPTdetection import detectGPT
 from .utils import *
+
 SUPPORTED_EXTENSIONS = {".py", ".js", ".cpp"}
 OUTPUT_FORMATS = ["html", "md", "txt", "pdf"]


-
-
-
-def find_dangerous_functions(
-    file_content: str, file_extension: str
-) -> Dict[str, List[Tuple[int, str]]]:
-    patterns = {
-        ".py": {
-            r"\bsystem\(": "high",
-            r"\bexec\(": "high",
-            r"\bpopen\(": "medium",
-            r"\beval\(": "high",
-            r"\bsubprocess\.run\(": "medium",
-            r"\b__getattribute__\(": "high",
-            r"\bgetattr\(": "medium",
-            r"\b__import__\(": "high",
-        },
-        ".js": {
-            r"\beval\(": "high",
-            r"\bexec\(": "high",
-            r"\bchild_process\.exec\(": "high",
-        },
-        ".cpp": {
-            r"\bsystem\(": "high",
-        },
-    }
-    risk_patterns = patterns.get(file_extension, {})
-    classified_results = {"high": [], "medium": [], "low": [], "none": []}
-    for line_number, line in enumerate(file_content.split("\n"), start=1):
-        clean_line = remove_comments(line, file_extension)
-        if not clean_line:
-            continue
-        for pattern, risk_level in risk_patterns.items():
-            if re.search(pattern, clean_line):
-                classified_results[risk_level].append((line_number, clean_line))
-    return classified_results
-
-
 def generate_text_content(results):
    text_output = "Security Analysis Report\n"
    for risk_level, entries in results.items():
@ -153,7 +118,15 @@ def output_text(results: Dict[str, List[Tuple[int, str]]], file_name=None):
        return text_output


-def process_path(path: str, output_format: str, output_file=None):
+def checkModeAndDetect(mode: str,filePath: str,fileExtension: str):
+    #TODO:添加更多方式，这里提高代码的复用性和扩展性
+    if mode == "regex":
+        return find_dangerous_functions(read_file_content(filePath), fileExtension)
+    elif mode == "llm":
+        return detectGPT(read_file_content(filePath))
+
+
+def process_path(path: str, output_format: str, mode: str, output_file=None):
    results = {"high": [], "medium": [], "low": [], "none": []}
    if os.path.isdir(path):
        for root, dirs, files in os.walk(path):
@ -161,9 +134,8 @@ def process_path(path: str, output_format: str, output_file=None):
                file_extension = os.path.splitext(file)[1]
                if file_extension in SUPPORTED_EXTENSIONS:
                    file_path = os.path.join(root, file)
-                    file_results = find_dangerous_functions(
-                        read_file_content(file_path), file_extension
-                    )
+
+                    file_results = checkModeAndDetect(mode,file_path,file_extension)
                    for key in file_results:
                        if key != "none":  # Exclude 'none' risk level
                            results[key].extend(
@ -175,9 +147,7 @@ def process_path(path: str, output_format: str, output_file=None):
    elif os.path.isfile(path):
        file_extension = os.path.splitext(path)[1]
        if file_extension in SUPPORTED_EXTENSIONS:
-            file_results = find_dangerous_functions(
-                read_file_content(path), file_extension
-            )
+            file_results = checkModeAndDetect(mode,path,file_extension)
            for key in file_results:
                if key != "none":  # Exclude 'none' risk level
                    results[key].extend(
@ -202,6 +172,7 @@ def main():
    parser = argparse.ArgumentParser(description="Backdoor detection tool.")
    parser.add_argument("path", help="Path to the code to analyze")
    parser.add_argument("-o", "--output", help="Output file path", default=None)
+    parser.add_argument("-m", "--mode", help="Mode of operation:[regex,llm]", default="regex")
    args = parser.parse_args()
    output_format = "txt"  # Default output format
    output_file = None
@ -216,7 +187,7 @@ def main():
                "Your input file format was incorrect, the output has been saved as a TXT file."
            )
            output_file = args.output.rsplit(".", 1)[0] + ".txt"
-    process_path(args.path, output_format, output_file)
+        process_path(args.path, output_format, args.mode, output_file)


 if __name__ == "__main__":