From 102c631ed9b76cb44e8d8b6886a044613d905118 Mon Sep 17 00:00:00 2001
From: dqy <1016751306@qq.com>
Date: Thu, 25 Apr 2024 17:01:24 +0800
Subject: [PATCH] =?UTF-8?q?feat:=20=E5=AE=8C=E5=96=84=E5=AF=B9=E4=BA=8E?=
 =?UTF-8?q?=E4=BB=A3=E7=A0=81=E7=9A=84=E6=AD=A3=E5=88=99=E5=8C=B9=E9=85=8D?=
 =?UTF-8?q?=E5=8A=9F=E8=83=BD?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .github/workflows/detection.yml |   7 ++
 detection/backdoor_detection.py | 204 ++++++++++++++++++++++----------
 2 files changed, 146 insertions(+), 65 deletions(-)

diff --git a/.github/workflows/detection.yml b/.github/workflows/detection.yml
index 0651804..0d0710c 100644
--- a/.github/workflows/detection.yml
+++ b/.github/workflows/detection.yml
@@ -33,6 +33,7 @@ jobs:
       #     code_path: "./tests"
       #     requirements_file: "./requirements.txt"
       #     output_format: "txt"
+      # 需要新建一个仓库进行测试
       # - name: Clone custom Git repository
       #   run: |
       #     git clone -b feature/match https://git.mamahaha.work/sangge/BackDoorBuster
@@ -43,3 +44,9 @@ jobs:
 
       - name: Run Requirements Detection
         run: python ${{ github.workspace }}/BackDoorBuster/detection/requirements_detection.py ${{ github.workspace }}/BackDoorBuster/crawler/trans_extracted_data.txt ${{ github.workspace }}/BackDoorBuster/requirements.txt txt
+
+      - name: Upload Result Artifacts
+        uses: actions/upload-artifact@v2
+        with:
+          name: detection-results
+          path: ./results/code/
diff --git a/detection/backdoor_detection.py b/detection/backdoor_detection.py
index 457ca93..705def4 100644
--- a/detection/backdoor_detection.py
+++ b/detection/backdoor_detection.py
@@ -1,12 +1,15 @@
-# Usage: python backdoor_detection.py <code_path> <output_format>
-
 import os
 import re
 import sys
 from typing import Dict, List, Tuple
+from reportlab.lib.pagesizes import letter
+from reportlab.pdfgen import canvas
+from reportlab.lib.styles import getSampleStyleSheet
+from reportlab.platypus import Paragraph, Spacer, SimpleDocTemplate
+from reportlab.lib import colors
 
 SUPPORTED_EXTENSIONS = {".py", ".js", ".cpp"}
-OUTPUT_FORMATS = ["html", "md", "txt"]
+OUTPUT_FORMATS = ["html", "md", "txt", "pdf"]
 
 
 def read_file_content(file_path: str) -> str:
@@ -57,110 +60,181 @@ def find_dangerous_functions(
         clean_line = remove_comments(line, file_extension)
         if not clean_line:
             continue
-        found = False
         for pattern, risk_level in risk_patterns.items():
             if re.search(pattern, clean_line):
                 classified_results[risk_level].append((line_number, clean_line))
-                found = True
-                break
-        if not found:
-            classified_results["none"].append((line_number, clean_line))
     return classified_results
 
 
-def output_results(
-    results: Dict[str, List[Tuple[int, str]]], output_format: str, file_path: str
-):
-    # Create the 'results' directory if it does not exist
-    # 这里如果集成测试的话应该设置为./
-    results_dir = "./results/code"
-    if not os.path.exists(results_dir):
-        os.makedirs(results_dir)
-
-    base_name = os.path.basename(file_path)
-    output_file = os.path.join(
-        results_dir, f"{os.path.splitext(base_name)[0]}.{output_format}"
-    )
-
-    if output_format == "html":
-        output_html(results, output_file)
-    elif output_format == "md":
-        output_markdown(results, output_file)
-    elif output_format == "txt":
-        output_text(results, output_file)
+def generate_text_content(results):
+    text_output = "Security Analysis Report\n"
+    for risk_level, entries in results.items():
+        if entries and risk_level != "none":
+            text_output += f"{risk_level.capitalize()} Risk:\n"
+            for line_num, line in entries:
+                text_output += f"  Line {line_num}: {line}\n"
+    return text_output
 
 
-def output_html(results: Dict[str, List[Tuple[int, str]]], file_name: str):
-    html_output = f"<html><head><title>Analysis of {file_name}</title></head><body>"
+def output_results(results, output_format, output_file=None):
+    if output_file:
+        file_name, file_extension = os.path.splitext(output_file)
+        if output_format not in OUTPUT_FORMATS:
+            output_format = "txt"
+            output_file = f"{file_name}.txt"
+        results_dir = os.path.dirname(output_file)
+        if not os.path.exists(results_dir):
+            os.makedirs(results_dir)
+        if output_format == "pdf":
+            output_pdf(results, output_file)
+        elif output_format == "html":
+            output_html(results, output_file)
+        elif output_format == "md":
+            output_markdown(results, output_file)
+        else:  # Default to txt
+            output_text(results, output_file)
+    else:
+        # If no output file is specified, default to text output to the terminal.
+        txt_output = generate_text_content(results)
+        print(txt_output)
+
+
+def output_pdf(results: Dict[str, List[Tuple[int, str]]], file_name):
+    doc = SimpleDocTemplate(file_name, pagesize=letter)
+    story = []
+    styles = getSampleStyleSheet()
+
+    # Add the title centered
+    title_style = styles["Title"]
+    title_style.alignment = 1  # Center alignment
+    title = Paragraph("Security Analysis Report", title_style)
+    story.append(title)
+    story.append(Spacer(1, 20))  # Space after title
+
+    # Add risk levels and entries
+    normal_style = styles["BodyText"]
+    for risk_level, entries in results.items():
+        if risk_level != "none":
+            story.append(
+                Paragraph(f"{risk_level.capitalize()} Risk:", styles["Heading2"])
+            )
+            for line_num, line in entries:
+                entry = Paragraph(f"Line {line_num}: {line}", normal_style)
+                story.append(entry)
+            story.append(Spacer(1, 12))  # Space between sections
+
+    doc.build(story)
+
+
+def output_html(results: Dict[str, List[Tuple[int, str]]], file_name=None):
+    html_output = "<html><head><title>Security Analysis Report</title></head><body>"
     html_output += "<h1>Security Analysis Report</h1>"
     for risk_level, entries in results.items():
-        html_output += f"<h2>{risk_level.capitalize()} Risk</h2><ul>"
-        for line_num, line in entries:
-            html_output += f"<li>Line {line_num}: {line}</li>"
-        html_output += "</ul>"
+        if risk_level != "none":
+            html_output += f"<h2>{risk_level.capitalize()} Risk</h2><ul>"
+            for line_num, line in entries:
+                html_output += f"<li>{line_num}: {line}</li>"
+            html_output += "</ul>"
     html_output += "</body></html>"
-    with open(file_name, "w") as file:
-        file.write(html_output)
+    if file_name:
+        with open(file_name, "w") as file:
+            file.write(html_output)
+    else:
+        return html_output
 
 
-def output_markdown(results: Dict[str, List[Tuple[int, str]]], file_name: str):
-    md_output = f"# Security Analysis Report for {file_name}\n"
+def output_markdown(results: Dict[str, List[Tuple[int, str]]], file_name=None):
+    md_output = "# Security Analysis Report\n"
     for risk_level, entries in results.items():
-        md_output += f"## {risk_level.capitalize()} Risk\n"
-        for line_num, line in entries:
-            md_output += f"- Line {line_num}: {line}\n"
-    with open(file_name, "w") as file:
-        file.write(md_output)
+        if risk_level != "none":
+            md_output += f"## {risk_level.capitalize()} Risk\n"
+            for line_num, line in entries:
+                md_output += f"- {line_num}: {line}\n"
+    if file_name:
+        with open(file_name, "w") as file:
+            file.write(md_output)
+    else:
+        return md_output
 
 
-def output_text(results: Dict[str, List[Tuple[int, str]]], file_name: str):
-    text_output = f"Security Analysis Report for {file_name}\n"
+def output_text(results: Dict[str, List[Tuple[int, str]]], file_name=None):
+    text_output = "Security Analysis Report\n"
     for risk_level, entries in results.items():
-        text_output += f"{risk_level.capitalize()} Risk:\n"
-        for line_num, line in entries:
-            text_output += f"  Line {line_num}: {line}\n"
-    with open(file_name, "w") as file:
-        file.write(text_output)
+        if risk_level != "none":
+            text_output += f"{risk_level.capitalize()} Risk:\n"
+            for line_num, line in entries:
+                text_output += f"  {line_num}: {line}\n"
+    if file_name:
+        with open(file_name, "w") as file:
+            file.write(text_output)
+    else:
+        return text_output
 
 
-def process_path(path: str, output_format: str):
+def process_path(path: str, output_format: str, output_file=None):
+    results = {"high": [], "medium": [], "low": [], "none": []}
     if os.path.isdir(path):
         for root, dirs, files in os.walk(path):
             for file in files:
                 file_extension = os.path.splitext(file)[1]
                 if file_extension in SUPPORTED_EXTENSIONS:
                     file_path = os.path.join(root, file)
-                    print(f"Processing {file_path}...")
                     file_results = find_dangerous_functions(
                         read_file_content(file_path), file_extension
                     )
-                    output_results(file_results, output_format, file_path)
+                    for key in file_results:
+                        if key != "none":  # Exclude 'none' risk level
+                            results[key].extend(
+                                [
+                                    (f"{file_path}: Line {line_num}", line)
+                                    for line_num, line in file_results[key]
+                                ]
+                            )
     elif os.path.isfile(path):
         file_extension = os.path.splitext(path)[1]
         if file_extension in SUPPORTED_EXTENSIONS:
             file_results = find_dangerous_functions(
                 read_file_content(path), file_extension
             )
-            output_results(file_results, output_format, path)
+            for key in file_results:
+                if key != "none":  # Exclude 'none' risk level
+                    results[key].extend(
+                        [
+                            (f"{path}: Line {line_num}", line)
+                            for line_num, line in file_results[key]
+                        ]
+                    )
         else:
             print("Unsupported file type.")
+            return
     else:
         print("Invalid path.")
         sys.exit(1)
 
+    output_results(results, output_format, output_file)
+
 
 def main():
-    if len(sys.argv) < 3:
-        print("Usage: python backdoor_detection.py <path> <output_format>")
-        sys.exit(1)
-    path = sys.argv[1]
-    output_format = sys.argv[2]
-    if output_format not in OUTPUT_FORMATS:
-        print(
-            f"Unsupported output format. Supported formats are: {', '.join(OUTPUT_FORMATS)}"
-        )
-        sys.exit(1)
-    process_path(path, output_format)
+    import argparse
+
+    parser = argparse.ArgumentParser(description="Backdoor detection tool.")
+    parser.add_argument("path", help="Path to the code to analyze")
+    parser.add_argument("-o", "--output", help="Output file path", default=None)
+    args = parser.parse_args()
+    output_format = "txt"  # Default output format
+    output_file = None
+    if args.output:
+        _, ext = os.path.splitext(args.output)
+        ext = ext.lower()
+        if ext in [".html", ".md", ".txt", ".pdf"]:
+            output_format = ext.replace(".", "")
+            output_file = args.output
+        else:
+            print(
+                "Your input file format was incorrect, the output has been saved as a TXT file."
+            )
+            output_file = args.output.rsplit(".", 1)[0] + ".txt"
+    process_path(args.path, output_format, output_file)
 
 
 if __name__ == "__main__":