fix: 修改正则匹配逻辑

2024-06-06 16:05:25 +08:00
parent 167bbe0a14
commit 752e774714
2 changed files with 13 additions and 15 deletions
--- a/detection/Regexdetection.py
+++ b/detection/Regexdetection.py
@@ -34,6 +34,7 @@ def find_dangerous_functions(
            r"\bos\.kill\b": "high",
            r"\bos\.popen\b": "medium",
            r"\bos\.spawn\b": "medium",
            r"\bsubprocess": "medium",
        },
    }
    risk_patterns = patterns.get(file_extension, {})
@@ -43,7 +44,9 @@ def find_dangerous_functions(
            clean_line = remove_comments(line, file_extension)
            if not clean_line:
                continue
            # 消除换行符，避免影响正则匹配
            clean_line = clean_line.replace("\\n", "")
            for pattern, risk_level in risk_patterns.items():
-                if re.search(pattern, clean_line):
+                if re.search(pattern, clean_line, re.MULTILINE | re.DOTALL):
                    classified_results[risk_level].append((line_number, clean_line))
    return classified_results
--- a/detection/main.py
+++ b/detection/main.py
@@ -7,7 +7,8 @@ from reportlab.platypus import Paragraph, Spacer, SimpleDocTemplate
 from detection.pickle_detection import pickleDataDetection
 from .Regexdetection import find_dangerous_functions
-from .GPTdetection import detectGPT,GPTdetectFileList
+from .GPTdetection import detectGPT, GPTdetectFileList
 # from .cngptdetection import detectGPT,GPTdetectFileList
 from .pyc_detection import disassemble_pyc
 from .utils import *
@@ -30,6 +31,8 @@ ORDERS = [
    "__getattribute__",
    "getattr",
    "child_process",
    "kill",
    "fork",
 ]
 # Initialize colorama
@@ -146,8 +149,6 @@ def generate_text_content(results: Dict[str, List[Tuple[int, str]]]) -> str:
                text_output += line_text
            text_output += "\n"
    return text_output
@@ -372,7 +373,7 @@ def checkModeAndDetect(mode: str, filePath: str, fileExtension: str, pycdc_addr:
 def process_path(
    path: str, output_format: str, mode: str, pycdc_addr: str, output_file=None
 ):
-    results = {"high": [], "medium": [], "low": [], "none": [],"pickles": []}
+    results = {"high": [], "medium": [], "low": [], "none": [], "pickles": []}
    if os.path.isdir(path):
        # 使用rglob获取所有文件
        all_files = [
@@ -383,15 +384,12 @@ def process_path(
        if mode == "llm":
            results = GPTdetectFileList(all_files)
        else:
-        # 扫描动画
+            # 扫描动画
            for file_path in tqdm(all_files, desc="Scanning files", unit="file"):
                file_extension = file_path.suffix
-                if file_extension in [".pkl",".pickle"]:
+                if file_extension in [".pkl", ".pickle"]:
                    res = pickleDataDetection(str(file_path), output_file)
-                    results["pickles"].append({
+                    results["pickles"].append({"file": str(file_path), "result": res})
                        "file": str(file_path),
                        "result": res
                    })
                    continue
                file_results = checkModeAndDetect(
                    mode, str(file_path), file_extension, pycdc_addr
@@ -409,10 +407,7 @@ def process_path(
        file_extension = os.path.splitext(path)[1]
        if file_extension in [".pkl", ".pickle"]:
            res = pickleDataDetection(str(path), output_file)
-            results["pickles"].append({
+            results["pickles"].append({"file": str(path), "result": res})
                "file": str(path),
                "result": res
            })
        elif file_extension in SUPPORTED_EXTENSIONS:
            file_results = checkModeAndDetect(mode, path, file_extension, pycdc_addr)
            if file_results is not None: