fix: 修改正则匹配逻辑

This commit is contained in:
dqy 2024-06-06 16:05:25 +08:00
parent 167bbe0a14
commit 752e774714
2 changed files with 13 additions and 15 deletions

View File

@ -34,6 +34,7 @@ def find_dangerous_functions(
r"\bos\.kill\b": "high", r"\bos\.kill\b": "high",
r"\bos\.popen\b": "medium", r"\bos\.popen\b": "medium",
r"\bos\.spawn\b": "medium", r"\bos\.spawn\b": "medium",
r"\bsubprocess": "medium",
}, },
} }
risk_patterns = patterns.get(file_extension, {}) risk_patterns = patterns.get(file_extension, {})
@ -43,7 +44,9 @@ def find_dangerous_functions(
clean_line = remove_comments(line, file_extension) clean_line = remove_comments(line, file_extension)
if not clean_line: if not clean_line:
continue continue
# 消除换行符,避免影响正则匹配
clean_line = clean_line.replace("\\n", "")
for pattern, risk_level in risk_patterns.items(): for pattern, risk_level in risk_patterns.items():
if re.search(pattern, clean_line): if re.search(pattern, clean_line, re.MULTILINE | re.DOTALL):
classified_results[risk_level].append((line_number, clean_line)) classified_results[risk_level].append((line_number, clean_line))
return classified_results return classified_results

View File

@ -7,7 +7,8 @@ from reportlab.platypus import Paragraph, Spacer, SimpleDocTemplate
from detection.pickle_detection import pickleDataDetection from detection.pickle_detection import pickleDataDetection
from .Regexdetection import find_dangerous_functions from .Regexdetection import find_dangerous_functions
from .GPTdetection import detectGPT,GPTdetectFileList from .GPTdetection import detectGPT, GPTdetectFileList
# from .cngptdetection import detectGPT,GPTdetectFileList # from .cngptdetection import detectGPT,GPTdetectFileList
from .pyc_detection import disassemble_pyc from .pyc_detection import disassemble_pyc
from .utils import * from .utils import *
@ -30,6 +31,8 @@ ORDERS = [
"__getattribute__", "__getattribute__",
"getattr", "getattr",
"child_process", "child_process",
"kill",
"fork",
] ]
# Initialize colorama # Initialize colorama
@ -146,8 +149,6 @@ def generate_text_content(results: Dict[str, List[Tuple[int, str]]]) -> str:
text_output += line_text text_output += line_text
text_output += "\n" text_output += "\n"
return text_output return text_output
@ -372,7 +373,7 @@ def checkModeAndDetect(mode: str, filePath: str, fileExtension: str, pycdc_addr:
def process_path( def process_path(
path: str, output_format: str, mode: str, pycdc_addr: str, output_file=None path: str, output_format: str, mode: str, pycdc_addr: str, output_file=None
): ):
results = {"high": [], "medium": [], "low": [], "none": [],"pickles": []} results = {"high": [], "medium": [], "low": [], "none": [], "pickles": []}
if os.path.isdir(path): if os.path.isdir(path):
# 使用rglob获取所有文件 # 使用rglob获取所有文件
all_files = [ all_files = [
@ -383,15 +384,12 @@ def process_path(
if mode == "llm": if mode == "llm":
results = GPTdetectFileList(all_files) results = GPTdetectFileList(all_files)
else: else:
# 扫描动画 # 扫描动画
for file_path in tqdm(all_files, desc="Scanning files", unit="file"): for file_path in tqdm(all_files, desc="Scanning files", unit="file"):
file_extension = file_path.suffix file_extension = file_path.suffix
if file_extension in [".pkl",".pickle"]: if file_extension in [".pkl", ".pickle"]:
res = pickleDataDetection(str(file_path), output_file) res = pickleDataDetection(str(file_path), output_file)
results["pickles"].append({ results["pickles"].append({"file": str(file_path), "result": res})
"file": str(file_path),
"result": res
})
continue continue
file_results = checkModeAndDetect( file_results = checkModeAndDetect(
mode, str(file_path), file_extension, pycdc_addr mode, str(file_path), file_extension, pycdc_addr
@ -409,10 +407,7 @@ def process_path(
file_extension = os.path.splitext(path)[1] file_extension = os.path.splitext(path)[1]
if file_extension in [".pkl", ".pickle"]: if file_extension in [".pkl", ".pickle"]:
res = pickleDataDetection(str(path), output_file) res = pickleDataDetection(str(path), output_file)
results["pickles"].append({ results["pickles"].append({"file": str(path), "result": res})
"file": str(path),
"result": res
})
elif file_extension in SUPPORTED_EXTENSIONS: elif file_extension in SUPPORTED_EXTENSIONS:
file_results = checkModeAndDetect(mode, path, file_extension, pycdc_addr) file_results = checkModeAndDetect(mode, path, file_extension, pycdc_addr)
if file_results is not None: if file_results is not None: