From 40f5c07fa1359ab864d90d0acbbe9a4de615cbb0 Mon Sep 17 00:00:00 2001 From: dqy <1016751306@qq.com> Date: Wed, 29 May 2024 20:08:40 +0800 Subject: [PATCH] =?UTF-8?q?feat:=20=E6=B7=BB=E5=8A=A0=E5=AF=B9pyc=E6=96=87?= =?UTF-8?q?=E4=BB=B6=E7=9A=84=E5=8F=8D=E6=B1=87=E7=BC=96=E5=8A=9F=E8=83=BD?= =?UTF-8?q?=E6=A8=A1=E5=9D=97?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- detection/Regexdetection.py | 11 ++++++++++- detection/backdoor_detection.py | 30 +++++++++++++++++++++--------- detection/pyc_detection.py | 22 ++++++++++++++++++++++ 3 files changed, 53 insertions(+), 10 deletions(-) create mode 100644 detection/pyc_detection.py diff --git a/detection/Regexdetection.py b/detection/Regexdetection.py index b1b1549..39f6280 100644 --- a/detection/Regexdetection.py +++ b/detection/Regexdetection.py @@ -25,6 +25,16 @@ def find_dangerous_functions( ".cpp": { r"\bsystem\(": "high", }, + ".pyc": { + r"\bexec\b": "high", + r"\beval\b": "high", + r"\bos\.system\b": "high", + r"\bos\.exec\b": "high", + r"\bos\.fork\b": "high", + r"\bos\.kill\b": "high", + r"\bos\.popen\b": "medium", + r"\bos\.spawn\b": "medium", + }, } risk_patterns = patterns.get(file_extension, {}) classified_results = {"high": [], "medium": [], "low": [], "none": []} @@ -36,4 +46,3 @@ def find_dangerous_functions( if re.search(pattern, clean_line): classified_results[risk_level].append((line_number, clean_line)) return classified_results - diff --git a/detection/backdoor_detection.py b/detection/backdoor_detection.py index ef3e32b..0dbbe40 100644 --- a/detection/backdoor_detection.py +++ b/detection/backdoor_detection.py @@ -3,12 +3,13 @@ from typing import Dict, List, Tuple from reportlab.lib.pagesizes import letter from reportlab.lib.styles import getSampleStyleSheet from reportlab.platypus import Paragraph, Spacer, SimpleDocTemplate -from .Regexdetection import find_dangerous_functions -from .GPTdetection import detectGPT -from .utils import * +from detection.Regexdetection import find_dangerous_functions +from detection.GPTdetection import detectGPT +from detection.utils import * +from detection.pyc_detection import disassemble_pyc import sys -SUPPORTED_EXTENSIONS = {".py", ".js", ".cpp"} +SUPPORTED_EXTENSIONS = {".py", ".js", ".cpp", ".pyc"} OUTPUT_FORMATS = ["html", "md", "txt", "pdf"] @@ -119,12 +120,23 @@ def output_text(results: Dict[str, List[Tuple[int, str]]], file_name=None): def checkModeAndDetect(mode: str, filePath: str, fileExtension: str): # TODO:添加更多方式,这里提高代码的复用性和扩展性 - if mode == "regex": - return find_dangerous_functions(read_file_content(filePath), fileExtension) - elif mode == "llm": - return detectGPT(read_file_content(filePath)) + if fileExtension == ".pyc": + # 反汇编pyc文件 + file_content = disassemble_pyc(filePath) + if mode == "regex": + return find_dangerous_functions(file_content, fileExtension) + elif mode == "llm": + return detectGPT(file_content) + else: + return find_dangerous_functions(file_content, fileExtension) else: - return find_dangerous_functions(read_file_content(filePath), fileExtension) + file_content = read_file_content(filePath) + if mode == "regex": + return find_dangerous_functions(file_content, fileExtension) + elif mode == "llm": + return detectGPT(file_content) + else: + return find_dangerous_functions(file_content, fileExtension) def process_path(path: str, output_format: str, mode: str, output_file=None): diff --git a/detection/pyc_detection.py b/detection/pyc_detection.py new file mode 100644 index 0000000..697da18 --- /dev/null +++ b/detection/pyc_detection.py @@ -0,0 +1,22 @@ +from typing import List, Tuple +import uncompyle6 +import io + + +def disassemble_pyc(file_path: str) -> str: + """ + Disassembles a .pyc file using uncompyle6. + + Args: + file_path (str): The path to the .pyc file. + + Returns: + str: The disassembled code as a string. + """ + output = io.StringIO() + try: + uncompyle6.main.decompile_file(file_path, output) + return output.getvalue() + except Exception as e: + print(f"Error occurred while disassembling: {e}") + return ""