Merge branch 'main' into feature/pickle-data

2024-06-03 20:31:12 +08:00
parent b518fef6d2 4f4860342c
commit f113449fc4
17 changed files with 938 additions and 42 deletions
--- a/.gitattributes
+++ b/.gitattributes
@@ -0,0 +1 @@
 *.webp filter=lfs diff=lfs merge=lfs -text
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -0,0 +1,2 @@
 include README.md
 include LICENSE
--- a/README.md
+++ b/README.md
@@ -1,5 +1,7 @@
 # BackDoorBuster
 ![BackDoorBuster Banner](./banner.webp)
 ## 项目背景
 随着网络安全威胁的增加，恶意软件和后门的检测成为了保护个人和组织数据安全的重要任务。后门通常被隐藏在合法软件中，给黑客提供远程控制目标系统的能力。本项目旨在开发一个工具，能够有效识别和评估潜在的后门风险。
@@ -16,21 +18,66 @@
 - **报告生成**: 自动生成详细的检测报告，列出所有发现的敏感操作和对应的风险等级。
 - **持续更新与维护**: 随着新的后门技术和检测方法的出现，持续更新正则表达式库和评级标准。
 ## 打包
 ### pip
 #### 打包命令
 ```bash
 pip install wheel
 python setup.py sdist bdist_wheel
 ```
 执行上述命令后，会在 dist 目录下生成 .tar.gz 和 .whl 文件。
 #### 本地安装
 - 安装 .whl 文件：
 ``` bash
 pip install dist/backdoor_buster-0.1.0-py3-none-any.whl
 ```
 - 安装 .tar.gz 文件：
 ``` bash
 pip install dist/backdoor_buster-0.1.0.tar.gz
 ```
 #### 上传到 PyPI
 - 安装 twine：
 ``` bash
 pip install twine
 ```
 - 使用 twine 上传包到 PyPI：
 ``` bash
 twine upload dist/*
 ```
 需要提供 PyPI 的用户名和密码。如果没有 PyPI 账号，可以在 PyPI 注册。
 #### 使用 PyPI 安装
 包上传到 PyPI 后，可以通过以下命令安装：
 ``` bash
 pip install backdoor_buster
 ```
 ## 使用说明
-1. 安装依赖:
+1. 执行扫描:
   ```bash
   pip install -r requirements.txt
   ```
 2. 执行扫描:
    ```bash
-    python scan.py <project_directory>
+    python -m detection <project_directory> -o <path> -m <mode>
    ```
-3. 查看报告:
+2. 查看报告:
    报告将以文本形式输出在控制台，并可选择输出到指定文件。
--- a/banner.webp
+++ b/banner.webp
--- a/detection/Regexdetection.py
+++ b/detection/Regexdetection.py
@@ -25,15 +25,25 @@ def find_dangerous_functions(
        ".cpp": {
            r"\bsystem\(": "high",
        },
        ".pyc": {
            r"\bexec\b": "high",
            r"\beval\b": "high",
            r"\bos\.system\b": "high",
            r"\bos\.exec\b": "high",
            r"\bos\.fork\b": "high",
            r"\bos\.kill\b": "high",
            r"\bos\.popen\b": "medium",
            r"\bos\.spawn\b": "medium",
        },
    }
    risk_patterns = patterns.get(file_extension, {})
    classified_results = {"high": [], "medium": [], "low": [], "none": []}
-    for line_number, line in enumerate(file_content.split("\n"), start=1):
+    if file_content is not None:
-        clean_line = remove_comments(line, file_extension)
+        for line_number, line in enumerate(file_content.split("\n"), start=1):
-        if not clean_line:
+            clean_line = remove_comments(line, file_extension)
-            continue
+            if not clean_line:
-        for pattern, risk_level in risk_patterns.items():
+                continue
-            if re.search(pattern, clean_line):
+            for pattern, risk_level in risk_patterns.items():
-                classified_results[risk_level].append((line_number, clean_line))
+                if re.search(pattern, clean_line):
                    classified_results[risk_level].append((line_number, clean_line))
    return classified_results
--- a/detection/main.py
+++ b/detection/main.py
@@ -0,0 +1,452 @@
 import os
 from typing import Dict, List, Tuple, Optional
 from reportlab.lib.pagesizes import letter
 from reportlab.lib.styles import getSampleStyleSheet
 from reportlab.platypus import Paragraph, Spacer, SimpleDocTemplate
 from .Regexdetection import find_dangerous_functions
 from .GPTdetection import detectGPT
 from .pyc_detection import disassemble_pyc
 from .utils import *
 import sys
 from colorama import init, Fore, Style
 from tqdm import tqdm
 from pathlib import Path
 PYCDC_FLAG = True
 PYCDC_ADDR_FLAG = True
 SUPPORTED_EXTENSIONS = {".py", ".js", ".cpp", ".pyc"}
 OUTPUT_FORMATS = ["html", "md", "txt", "pdf"]
 ORDERS = [
    "__import__",
    "system",
    "exec",
    "popen",
    "eval",
    "subprocess",
    "__getattribute__",
    "getattr",
    "child_process",
 ]
 # Initialize colorama
 init(autoreset=True)
 ORANGE = "\033[38;5;214m"
 CYAN = Fore.CYAN
 def supports_color() -> bool:
    """
    Checks if the running terminal supports color output.
    Returns:
        bool: True if the terminal supports color, False otherwise.
    """
    # Windows support
    if sys.platform == "win32":
        return True
    # Check if output is a TTY (terminal)
    if hasattr(sys.stdout, "isatty") and sys.stdout.isatty():
        return True
    return False
 def supports_emoji() -> bool:
    """
    Checks if the running terminal supports emoji output.
    Returns:
        bool: True if the terminal supports emoji, False otherwise.
    """
    # This is a simple check. Modern terminals typically support emoji.
    return sys.platform != "win32" or os.getenv("WT_SESSION") is not None
 def highlight_orders(line: str, risk_level: str, use_color: bool) -> str:
    """
    Highlights specific orders in the line based on risk level.
    Args:
        line (str): The line to highlight.
        risk_level (str): The risk level of the line ("high", "medium", "low").
        use_color (bool): Whether to use color for highlighting.
    Returns:
        str: The highlighted line.
    """
    risk_colors = {
        "high": Fore.RED,
        "medium": Fore.YELLOW,
        "low": CYAN,
    }
    color = risk_colors.get(risk_level, Fore.WHITE) if use_color else ""
    reset = Style.RESET_ALL if use_color else ""
    for order in ORDERS:
        line = line.replace(order, f"{color}{order}{reset}")
    return line
 def generate_text_content(results: Dict[str, List[Tuple[int, str]]]) -> str:
    """
    Generates a formatted text report for security analysis results.
    Args:
        results (Dict[str, List[Tuple[int, str]]]): The security analysis results categorized by risk levels.
    Returns:
        str: The formatted text report as a string.
    """
    use_color = supports_color()
    use_emoji = supports_emoji()
    text_output = "Security Analysis Report\n"
    text_output += "=" * 30 + "\n\n"
    for risk_level, entries in results.items():
        if entries and risk_level != "none":
            risk_color = (
                {
                    "high": Fore.RED,
                    "medium": Fore.YELLOW,
                    "low": Fore.GREEN,
                }.get(risk_level, Fore.WHITE)
                if use_color
                else ""
            )
            risk_title = (
                {
                    "High": "👹",
                    "Medium": "👾",
                    "Low": "👻",
                }
                if use_emoji
                else {
                    "High": "",
                    "Medium": "",
                    "Low": "",
                }
            )
            text_output += f"{risk_color}{risk_level.capitalize()} Risk{risk_title[risk_level.capitalize()]}:{Style.RESET_ALL if use_color else ''}\n"
            text_output += "-" * (len(risk_level) + 6) + "\n"
            for line_num, line in entries:
                line = highlight_orders(line, risk_level, use_color)
                line_text = f"{Style.RESET_ALL if use_color else ''} {Fore.GREEN if use_color else ''}{line_num}{Style.RESET_ALL if use_color else ''}: {line}{Style.RESET_ALL if use_color else ''}\n"
                text_output += line_text
            text_output += "\n"
    return text_output
 def output_results(
    results: Dict[str, List[Tuple[int, str]]],
    output_format: str,
    output_file: Optional[str] = None,
 ) -> None:
    """
    Outputs the security analysis results in the specified format.
    Args:
        results (Dict[str, List[Tuple[int, str]]]): The security analysis results categorized by risk levels.
        output_format (str): The format to output the results in. Supported formats: "pdf", "html", "md", "txt".
        output_file (Optional[str]): The name of the file to save the output. If None, prints to the terminal.
    """
    OUTPUT_FORMATS = {"pdf", "html", "md", "txt"}
    if output_file:
        file_name, file_ext = os.path.splitext(output_file)
        if output_format not in OUTPUT_FORMATS:
            output_format = "txt"
            output_file = f"{file_name}.txt"
        results_dir = os.path.dirname(output_file)
        if not os.path.exists(results_dir) and results_dir != "":
            os.makedirs(results_dir)
        if output_format == "pdf":
            output_pdf(results, output_file)
        elif output_format == "html":
            output_html(results, output_file)
        elif output_format == "md":
            output_markdown(results, output_file)
        else:  # Default to txt
            output_text(results, output_file)
    else:
        # If no output file is specified, default to text output to the terminal.
        txt_output = generate_text_content(results)
        print(txt_output)
 def output_pdf(results: Dict[str, List[Tuple[int, str]]], file_name):
    doc = SimpleDocTemplate(file_name, pagesize=letter)
    story = []
    styles = getSampleStyleSheet()
    # Add the title centered
    title_style = styles["Title"]
    title_style.alignment = 1  # Center alignment
    title = Paragraph("Security Analysis Report", title_style)
    story.append(title)
    story.append(Spacer(1, 20))  # Space after title
    # Add risk levels and entries
    normal_style = styles["BodyText"]
    for risk_level, entries in results.items():
        if risk_level != "none":
            story.append(
                Paragraph(f"{risk_level.capitalize()} Risk:", styles["Heading2"])
            )
            for line_num, line in entries:
                entry = Paragraph(f"Line {line_num}: {line}", normal_style)
                story.append(entry)
            story.append(Spacer(1, 12))  # Space between sections
    doc.build(story)
 def output_html(results: Dict[str, List[Tuple[int, str]]], file_name=None):
    """
    Generates an HTML report for security analysis results.
    Args:
        results (Dict[str, List[Tuple[int, str]]]): The security analysis results categorized by risk levels.
        file_name (Optional[str]): The name of the file to save the HTML output. If None, returns the HTML string.
    Returns:
        Optional[str]: The HTML string if file_name is None, otherwise None.
    """
    html_output = """
    <html>
    <head>
        <meta charset="UTF-8">
        <meta name="viewport" content="width=device-width, initial-scale=1.0">
        <link rel="icon" href="https://s2.loli.net/2024/05/30/WDc6MekjbuCU9Qo.png">
        <title>Security Analysis Report</title>
        <style>
            body {
                background-image: url('https://s2.loli.net/2024/05/30/85Mv7leB2IRWNp6.jpg');
                background-size: 100%, auto;
                background-attachment: fixed;
                font-family: Arial, sans-serif;
            }
            h1, h2 {
                color: white;
            }
            ul {
                list-style-type: none;
                padding: 0;
            }
            li {
                background: rgba(255, 255, 255, 0.8);
                margin: 5px 0;
                padding: 10px;
                border-radius: 5px;
            }
        </style>
    </head>
    <body>
        <h1>Security Analysis Report</h1>
    """
    for risk_level, entries in results.items():
        if risk_level != "none":
            risk_title = {
                "High": f"<h2>{risk_level.capitalize()} Risk👹</h2><ul>",
                "Medium": f"<h2>{risk_level.capitalize()} Risk👾</h2><ul>",
                "Low": f"<h2>{risk_level.capitalize()} Risk👻</h2><ul>",
            }
            html_output += risk_title[risk_level.capitalize()]
            for line_num, line in entries:
                html_output += f"<li>{line_num}: {line}</li>"
            html_output += "</ul>"
    html_output += "</body></html>"
    if file_name:
        with open(file_name, "w", encoding="utf-8") as file:
            file.write(html_output)
        return None
    else:
        return html_output
 def output_markdown(results: Dict[str, List[Tuple[int, str]]], file_name=None):
    """
    Generates a Markdown report for security analysis results.
    Args:
        results (Dict[str, List[Tuple[int, str]]]): The security analysis results categorized by risk levels.
        file_name (Optional[str]): The name of the file to save the Markdown output. If None, returns the Markdown string.
    Returns:
        Optional[str]: The Markdown string if file_name is None, otherwise None.
    """
    md_output = "# Security Analysis Report\n\n"
    for risk_level, entries in results.items():
        if risk_level != "none":
            md_output += f"## {risk_level.capitalize()} Risk\n\n"
            md_output += "| Line Number | Description |\n"
            md_output += "|-------------|-------------|\n"
            for line_num, line in entries:
                md_output += f"| {line_num} | {line} |\n"
            md_output += "\n"
    if file_name:
        with open(file_name, "w") as file:
            file.write(md_output)
        return None
    else:
        return md_output
 def output_text(results: Dict[str, List[Tuple[int, str]]], file_name=None):
    """
    Generates a plain text report for security analysis results.
    Args:
        results (Dict[str, List[Tuple[int, str]]]): The security analysis results categorized by risk levels.
        file_name (Optional[str]): The name of the file to save the text output. If None, returns the text string.
    Returns:
        Optional[str]: The text string if file_name is None, otherwise None.
    """
    text_output = "Security Analysis Report\n"
    text_output += "=" * len("Security Analysis Report") + "\n\n"
    for risk_level, entries in results.items():
        if risk_level != "none":
            text_output += f"{risk_level.capitalize()} Risk:\n"
            text_output += "-" * len(f"{risk_level.capitalize()} Risk:") + "\n"
            for line_num, line in entries:
                text_output += f"  Line {line_num}: {line}\n"
            text_output += "\n"
    if file_name:
        with open(file_name, "w") as file:
            file.write(text_output)
        return None
    else:
        return text_output
 def checkModeAndDetect(mode: str, filePath: str, fileExtension: str, pycdc_addr: str):
    # TODO:添加更多方式，这里提高代码的复用性和扩展性
    if fileExtension == ".pyc":
        # 反汇编pyc文件
        file_content = disassemble_pyc(filePath, pycdc_addr)
        if file_content == "none":
            global PYCDC_FLAG
            PYCDC_FLAG = False
            return ""
        elif file_content == "invalid":
            global PYCDC_ADDR_FLAG
            PYCDC_ADDR_FLAG = False
        if mode == "regex":
            return find_dangerous_functions(file_content, fileExtension)
        elif mode == "llm":
            return detectGPT(file_content)
        else:
            return find_dangerous_functions(file_content, fileExtension)
    else:
        file_content = read_file_content(filePath)
        if mode == "regex":
            return find_dangerous_functions(file_content, fileExtension)
        elif mode == "llm":
            return detectGPT(file_content)
        else:
            return find_dangerous_functions(file_content, fileExtension)
 def process_path(
    path: str, output_format: str, mode: str, pycdc_addr: str, output_file=None
 ):
    results = {"high": [], "medium": [], "low": [], "none": []}
    if os.path.isdir(path):
        # 使用rglob获取所有文件
        all_files = [
            file_path
            for file_path in Path(path).rglob("*")
            if file_path.suffix in SUPPORTED_EXTENSIONS
        ]
        # 扫描动画
        for file_path in tqdm(all_files, desc="Scanning files", unit="file"):
            file_extension = file_path.suffix
            file_results = checkModeAndDetect(
                mode, str(file_path), file_extension, pycdc_addr
            )
            if file_results is not None:
                for key in file_results:
                    if key != "none":  # Exclude 'none' risk level
                        results[key].extend(
                            [
                                (f"{file_path}: Line {line_num}", line)
                                for line_num, line in file_results[key]
                            ]
                        )
    elif os.path.isfile(path):
        file_extension = os.path.splitext(path)[1]
        if file_extension in SUPPORTED_EXTENSIONS:
            file_results = checkModeAndDetect(mode, path, file_extension, pycdc_addr)
            if file_results is not None:
                for key in file_results:
                    if key != "none":  # Exclude 'none' risk level
                        results[key].extend(
                            [
                                (f"{path}: Line {line_num}", line)
                                for line_num, line in file_results[key]
                            ]
                        )
        else:
            print("Unsupported file type.")
            return
    else:
        print("Invalid path.")
        sys.exit(1)
    output_results(results, output_format, output_file)
 def main():
    import argparse
    parser = argparse.ArgumentParser(
        description="Backdoor detection tool.", prog="detection"
    )
    parser.add_argument("path", help="Path to the code to analyze")
    parser.add_argument("-o", "--output", help="Output file path", default=None)
    parser.add_argument(
        "-m", "--mode", help="Mode of operation:[regex,llm]", default="regex"
    )
    parser.add_argument(
        "-p", "--pycdc", help="Path to pycdc.exe to decompile", default=None
    )
    args = parser.parse_args()
    output_format = "txt"  # Default output format
    output_file = None
    if args.output:
        _, ext = os.path.splitext(args.output)
        ext = ext.lower()
        if ext in [".html", ".md", ".txt", ".pdf"]:
            output_format = ext.replace(".", "")
            output_file = args.output
        else:
            print(
                "Your input file format was incorrect, the output has been saved as a TXT file."
            )
            output_file = args.output.rsplit(".", 1)[0] + ".txt"
    # 如果未指定输出文件，则输出到 stdout；否则写入文件
    process_path(args.path, output_format, args.mode, args.pycdc, output_file)
    if PYCDC_FLAG == False:
        print(
            "ERROR: Detected Python 3.11 or above .pyc files. You need to install pycdc and compile it yourself to obtain pycdc."
        )
        print("Repo: https://github.com/zrax/pycdc.git")
    if PYCDC_ADDR_FLAG == False:
        print("ERROR: The specified pycdc.exe path is not valid")
        print("Please check your pycdc path.")
 if __name__ == "__main__":
    main()
--- a/detection/cngptdetection.py
+++ b/detection/cngptdetection.py
@@ -0,0 +1,113 @@
 import os
 import requests
 import re
 import json
 from typing import List, Dict, Any
 class TimeoutException(Exception):
    """自定义异常用于处理超时情况。"""
    pass
 def detectGPT(content: str) -> str:
    """
    检测给定的代码内容中的潜在安全漏洞。
    参数:
    - content: 要检测的代码字符串。
    返回:
    - 分类后的漏洞信息的JSON字符串。
    """
    api_key = os.getenv("BAIDU_API_KEY")
    secret_key = os.getenv("BAIDU_SECRET_KEY")
    #api_key = "DUBWNIrB6QJLOsLkpnEz2ZZa"
    #secret_key = "9WK4HIV2n9r1ePPirqD4EQ6Ea33rH1m7"
    if not api_key or not secret_key:
        raise ValueError("BAIDU_API_KEY or BAIDU_SECRET_KEY is not set")
    url = "https://aip.baidubce.com/rpc/2.0/ai_custom/v1/wenxinworkshop/chat/ernie-4.0-8k-0329?access_token=" + get_access_token(
        api_key, secret_key)
    payload = json.dumps({
        "messages": [
            {
                "role": "user",
                "content": (
                        "You are a Python code reviewer. Read the code below and identify any potential "
                        "security vulnerabilities. Classify them by risk level (high, medium, low, none). "
                        'Only report the line number and the risk level.\nYou should output the result as '
                        'json format in one line. For example: [{"Line": {the line number}, "Risk": "{choose from (high,medium,low)}","Reason":"{how it is vulnerable}"}] '
                        "Each of these three fields is required.\nYou are required to only output the json format. "
                        "Do not output any other information." + content
                )
            }
        ]
    })
    headers = {
        'Content-Type': 'application/json'
    }
    try:
        response = requests.post(url, headers=headers, data=payload)
        response.raise_for_status()
        res_json = response.json()
        message_content = res_json.get('result')
        if message_content is None:
            raise ValueError("API response content is None")
    except requests.RequestException as e:
        raise ValueError(f"Request failed: {str(e)}")
    extracted_data = extract_json_from_text(message_content)
    classified_results = {"high": [], "medium": [], "low": [], "none": []}
    for res in extracted_data:
        try:
            line_number = int(res["Line"])
            classified_results[res["Risk"]].append(
                (line_number, content.split("\n")[line_number - 1].strip())
            )
        except (ValueError, IndexError, KeyError):
            continue
    return json.dumps(classified_results, indent=2, ensure_ascii=False)
 def get_access_token(api_key: str, secret_key: str) -> str:
    """
    使用API密钥和秘密生成访问令牌。
    返回:
    - access_token字符串。
    """
    url = "https://aip.baidubce.com/oauth/2.0/token"
    params = {"grant_type": "client_credentials", "client_id": api_key, "client_secret": secret_key}
    response = requests.post(url, params=params)
    response.raise_for_status()
    return response.json().get("access_token")
 def extract_json_from_text(text: str) -> List[Dict[str, Any]]:
    """
    从文本中提取JSON数据。
    参数:
    - text: 包含JSON数据的字符串文本。
    返回:
    - 包含提取JSON数据的字典列表。
    """
    json_match = re.search(r'\[\s*{.*?}\s*\]', text, re.DOTALL)
    if not json_match:
        print("未找到 JSON 数据")
        return []
    json_string = json_match.group(0)
    try:
        data = json.loads(json_string)
    except json.JSONDecodeError as e:
        print(f"解码 JSON 时出错: {e}")
        return []
    return data
--- a/detection/pyc_detection.py
+++ b/detection/pyc_detection.py
@@ -0,0 +1,49 @@
 from typing import List, Tuple
 import uncompyle6
 import io
 import os
 import subprocess
 from contextlib import redirect_stdout, redirect_stderr
 def run_pycdc(exe_path: str, pyc_file: str) -> str:
    """
    Executes pycdc.exe with the given .pyc file using a command line string and captures the output.
    Args:
        exe_path (str): Path to the pycdc.exe executable.
        pyc_file (str): Path to the .pyc file to decompile.
    Returns:
        str: Output from pycdc.exe.
    """
    if not os.path.isfile(exe_path):
        return "invalid"
    command = f'"{exe_path}" "{pyc_file}"'
    result = subprocess.run(
        command, capture_output=True, text=True, shell=True, encoding="utf-8"
    )
    return result.stdout
 def disassemble_pyc(file_path: str, pycdc_addr=None) -> str:
    """
    Disassembles a .pyc file using uncompyle6.
    Args:
        file_path (str): The path to the .pyc file.
    Returns:
        str: The disassembled code as a string.
    """
    output = io.StringIO()
    try:
        uncompyle6.main.decompile_file(file_path, output)
        return output.getvalue()
    except Exception as e:
        if pycdc_addr is None:
            return "none"
        else:
            return run_pycdc(pycdc_addr, file_path)
--- a/detection/utils.py
+++ b/detection/utils.py
@@ -4,7 +4,7 @@ import sys
 def read_file_content(file_path: str) -> str:
    try:
-        with open(file_path, "r", encoding="utf-8") as file:
+        with open(file_path, "r", encoding="utf-8", errors="ignore") as file:
            return file.read()
    except FileNotFoundError:
        print("Error: File not found.")
@@ -21,4 +21,4 @@ def remove_comments(code: str, extension: str) -> str:
        code = re.sub(r"//.*", "", code)
        code = re.sub(r"/\*.*?\*/", "", code, flags=re.DOTALL)
        return code.strip()
-    return code.strip()
+    return code.strip()
--- a/docs/design.md
+++ b/docs/design.md
@@ -1,9 +1,64 @@
 # 项目设计文档 - 后门检测系统
 ## 打包
 ### 简介
 本项目需要将 Python 代码打包成`pip`包和`deb`包，以便于分发和安装。以下是如何实现和使用该打包功能的详细步骤。
 ### pip
 #### 打包命令
 ```bash
 pip install wheel
 python setup.py sdist bdist_wheel
 ```
 执行上述命令后，会在 dist 目录下生成 .tar.gz 和 .whl 文件。
 #### 本地安装
 - 安装 .whl 文件：
 ``` bash
 pip install dist/backdoor_buster-0.1.0-py3-none-any.whl
 ```
 - 安装 .tar.gz 文件：
 ``` bash
 pip install dist/backdoor_buster-0.1.0.tar.gz
 ```
 #### 上传到 PyPI
 - 安装 twine：
 ``` bash
 pip install twine
 ```
 - 使用 twine 上传包到 PyPI：
 ``` bash
 twine upload dist/*
 ```
 需要提供 PyPI 的用户名和密码。如果没有 PyPI 账号，可以在 PyPI 注册。
 #### 使用 PyPI 安装
 包上传到 PyPI 后，可以通过以下命令安装：
 ``` bash
 pip install backdoor_buster
 ```
 ## 静态代码后门检测
 **功能描述**:
-这个脚本用于扫描指定路径下的代码文件，检测潜在的危险函数调用，支持 `.py`, `.js`, `.cpp` 文件。
+这个脚本用于扫描指定路径下的代码文件，检测潜在的危险函数调用，支持 `.py`, `.js`, `.cpp`, `.pyc` 文件。
 **主要组件**:
@@ -67,7 +122,7 @@ python backdoor_detection.py ./src -o ./output/report.pdf
 **使用示例**:
 ```bash
-python requirements_detection.py ./requirements.txt -o ./output/report.md
+python -m detection.requirements_detection ./requirements.txt -o ./output/report.md
 ```
 ---
--- a/docs/idea.md
+++ b/docs/idea.md
@@ -8,6 +8,10 @@
 参考项目： [https://github.com/SonarSource/sonarqube]
 检查源代码的语法和关键词。通过这种方式，可以发现是否存在与其他语言的交互，比如调用外部命令、使用其他语言的扩展模块、与其他语言的接口交互等。
 实现方法：可以使用Python代码解析库（如ast模块）来分析语法树，并检查特定的代码模式或结构；开发脚本来搜索Python代码中常用于与其他语言交互的关键词和函数，例如ctypes、subprocess、os.system等
 ## 控制流分析
 通过分析程序的控制流（即程序中各个操作的执行顺序），可以检测到异常的控制流路径，这些路径可能是后门的迹象。
@@ -22,6 +26,10 @@
 这个网站可以搜索依赖中是否存在漏洞： [https://security.snyk.io/package/pip/]
 分析代码库中的依赖关系，查找是否导入了与其他语言交互相关的模块或库
 实施策略：开发脚本进行依赖库对比匹配
 ## 异常行为检测
 通过定义“正常”代码行为的基线，可以标识出异常行为，这些异常行为可能指示着后门的存在。
@@ -33,3 +41,6 @@
 使用NLP技术来训练机器学习模型，以自动从大量代码中学习和识别异常或潜在的后门模式。
 开发方法：采用深度学习框架如TensorFlow或PyTorch，结合NLP处理工具，训练模型识别代码中的异常行为。
--- a/docs/tech_notes.md
+++ b/docs/tech_notes.md
@@ -46,7 +46,18 @@
 - **主要应用**：通过爬虫收集漏洞依赖信息并进行汇总，用于判断依赖是否存在漏洞版本。
-## 8. 代码和风险分析
+## 8. 打包
 本项目支持打包作为`pip`包进行发布
 - **主要应用**：
  - `pip`通过`wheel`并自行撰写`setup.py`以及`MANIFEST.in`，将项目打包发布
 ## 9. 反汇编
 项目通过`uncompyle6`库提供的反汇编模块可以实现对python字节码进行反汇编之后扫描危险代码
 ## 10. 代码和风险分析
 项目中实现了基本的静态代码分析功能，用于识别和报告潜在的安全风险函数调用，如 `system`、`exec` 等。
--- a/docs/usage.md
+++ b/docs/usage.md
@@ -2,31 +2,68 @@
 本文档提供了后门检测系统的使用方法，包括依赖版本漏洞检测和静态代码后门检测两部分。这将帮助用户正确执行安全检测，并理解输出结果。
 ## 安装需求
 在开始使用本系统之前，请确保您的环境中安装了以下依赖：
 - Python 3.6 或更高版本
 - `packaging` 库：用于版本控制和比较
 - `reportlab` 库：用于生成 PDF 报告
 您可以通过以下命令安装必要的 Python 库：
 ```bash
 pip install packaging reportlab
 ```
 ## 下载和配置
 - 克隆或下载后门检测系统到您的本地环境。
 - 确保脚本文件 (`requirements_detection.py` 和 `backdoor_detection.py`) 在您的工作目录中。
 ## 打包
 ### pip
 #### 打包命令
 ```bash
 pip install wheel
 python setup.py sdist bdist_wheel
 ```
 执行上述命令后，会在 dist 目录下生成 .tar.gz 和 .whl 文件。
 #### 本地安装
 - 安装 .whl 文件：
 ``` bash
 pip install dist/backdoor_buster-0.1.0-py3-none-any.whl
 ```
 - 安装 .tar.gz 文件：
 ``` bash
 pip install dist/backdoor_buster-0.1.0.tar.gz
 ```
 #### 上传到 PyPI
 - 安装 twine：
 ``` bash
 pip install twine
 ```
 - 使用 twine 上传包到 PyPI：
 ``` bash
 twine upload dist/*
 ```
 需要提供 PyPI 的用户名和密码。如果没有 PyPI 账号，可以在 PyPI 注册。
 #### 使用 PyPI 安装
 包上传到 PyPI 后，可以通过以下命令安装：
 ``` bash
 pip install backdoor_buster
 ```
 ## 运行依赖版本漏洞检测脚本
 **命令格式**：
 ```bash
-python requirements_detection.py <requirements_file> -o <output_file> 
+python -m detection.requirements_detection <requirements_file> -o <output_file> 
 ```
 **参数说明**：
@@ -37,7 +74,7 @@ python requirements_detection.py <requirements_file> -o <output_file>
 **示例**：
 ```bash
-python requirements_detection.py requirements.txt -o output/report.md
+python -m detection.requirements_detection requirements.txt -o output/report.md
 ```
 ## 运行静态代码后门检测脚本
@@ -45,7 +82,7 @@ python requirements_detection.py requirements.txt -o output/report.md
 **命令格式**：
 ```bash
-python backdoor_detection.py <code_path> -o <output_file> -m <mode>
+python -m detection <code_path> -o <output_file> -m <mode>
 ```
 **参数说明**：
@@ -57,7 +94,7 @@ python backdoor_detection.py <code_path> -o <output_file> -m <mode>
 **示例**：
 ```bash
-python backdoor_detection.py ./src -o output/report.pdf -m regex
+python -m detection ./src -o output/report.pdf -m regex
 ```
 ## 结果解读
--- a/requirements.txt
+++ b/requirements.txt
@@ -2,4 +2,7 @@ reportlab
 requests
 packaging
 openai
-bs4
+bs4
 uncompyle6
 colorama
 tqdm
--- a/setup.py
+++ b/setup.py
@@ -0,0 +1,45 @@
 # pip install wheel
 # python setup.py sdist bdist_wheel
 from setuptools import setup, find_packages
 def read_file(filename: str) -> str:
    """Read a file and return its content as a string.
    Args:
        filename (str): The name of the file to read.
    Returns:
        str: The content of the file.
    """
    with open(filename, encoding="utf-8") as f:
        return f.read()
 setup(
    name="backdoor_buster",
    version="0.1.0",
    author="ciscn",
    description="A tool for integrated backdoor detection",
    long_description=read_file("README.md"),
    long_description_content_type="text/markdown",
    url="https://git.mamahaha.work/sangge/BackDoorBuster",
    packages=find_packages(),
    classifiers=[
        "Programming Language :: Python :: 3",
        "License :: OSI Approved :: MIT License",
        "Operating System :: OS Independent",
    ],
    python_requires=">=3.6",
    install_requires=[
        "reportlab",
        "requests",
        "packaging",
        "openai",
        "bs4",
        "uncompyle6",
        "tqdm",
        "colorama",
    ],
 )
--- a/tests/test_CN_GPT_detection.py
+++ b/tests/test_CN_GPT_detection.py
@@ -0,0 +1,40 @@
 import unittest
 import warnings
 import os
 import json
 from detection.cngptdetection import detectGPT
 class TestBackdoorDetection(unittest.TestCase):
    def test_gpt_risk_detection(self):
        if os.getenv("BAIDU_API_KEY") is None or os.getenv("BAIDU_SECRET_KEY") is None:
            warnings.warn("BAIDU_API_KEY or BAIDU_SECRET_KEY is not set, test skipped.", UserWarning)
            self.skipTest("BAIDU_API_KEY or BAIDU_SECRET_KEY is not set")
        content = """import os
        os.system('rm -rf /')   # high risk
        exec('print("Hello")')  # high risk
        eval('2 + 2')   # high risk
        """
        results1 = detectGPT(content)
        classified_results = json.loads(results1)
        self.assertEqual(len(classified_results["high"]), 3)
    def test_gpt_no_risk_detection(self):
        if os.getenv("BAIDU_API_KEY") is None or os.getenv("BAIDU_SECRET_KEY") is None:
            warnings.warn("BAIDU_API_KEY or BAIDU_SECRET_KEY is not set, test skipped.", UserWarning)
            self.skipTest("BAIDU_API_KEY or BAIDU_SECRET_KEY is not set")
        content = """a = 10
        b = a + 5
        print('This should not be detected as risky.')
        """
        results2 = detectGPT(content)
        classified_results = json.loads(results2)
        self.assertEqual(len(classified_results["high"]), 0)
        self.assertEqual(len(classified_results["medium"]), 0)
        self.assertEqual(len(classified_results["low"]), 0)
 if __name__ == "__main__":
    unittest.main()
--- a/tests/test_backdoor_detection.py
+++ b/tests/test_backdoor_detection.py
@@ -1,7 +1,7 @@
 import unittest
 import warnings
-from detection.backdoor_detection import find_dangerous_functions
+from detection.__main__ import find_dangerous_functions
 from detection.GPTdetection import detectGPT
 import os
@@ -90,6 +90,23 @@ class TestBackdoorDetection(unittest.TestCase):
        with self.assertRaises(ValueError):
            detectGPT(content)
    def test_find_dangerous_functions_pyc(self):
        file_content = """import os
        os.system('rm -rf /')
        """
        file_extension = ".pyc"
        expected_result = {
            "high": [(2, "os.system('rm -rf /')")],
            "medium": [],
            "low": [],
            "none": [],
        }
        result = find_dangerous_functions(file_content, file_extension)
        self.assertEqual(result, expected_result)
 if __name__ == "__main__":
    unittest.main()