feat: 完善对于代码的正则匹配功能

This commit is contained in:
dqy
2024-04-25 17:01:24 +08:00
parent 323200fd85
commit 102c631ed9
2 changed files with 146 additions and 65 deletions

View File

@@ -33,6 +33,7 @@ jobs:
# code_path: "./tests"
# requirements_file: "./requirements.txt"
# output_format: "txt"
# 需要新建一个仓库进行测试
# - name: Clone custom Git repository
# run: |
# git clone -b feature/match https://git.mamahaha.work/sangge/BackDoorBuster
@@ -43,3 +44,9 @@ jobs:
- name: Run Requirements Detection
run: python ${{ github.workspace }}/BackDoorBuster/detection/requirements_detection.py ${{ github.workspace }}/BackDoorBuster/crawler/trans_extracted_data.txt ${{ github.workspace }}/BackDoorBuster/requirements.txt txt
- name: Upload Result Artifacts
uses: actions/upload-artifact@v2
with:
name: detection-results
path: ./results/code/

View File

@@ -1,12 +1,15 @@
# Usage: python backdoor_detection.py <code_path> <output_format>
import os
import re
import sys
from typing import Dict, List, Tuple
from reportlab.lib.pagesizes import letter
from reportlab.pdfgen import canvas
from reportlab.lib.styles import getSampleStyleSheet
from reportlab.platypus import Paragraph, Spacer, SimpleDocTemplate
from reportlab.lib import colors
SUPPORTED_EXTENSIONS = {".py", ".js", ".cpp"}
OUTPUT_FORMATS = ["html", "md", "txt"]
OUTPUT_FORMATS = ["html", "md", "txt", "pdf"]
def read_file_content(file_path: str) -> str:
@@ -57,110 +60,181 @@ def find_dangerous_functions(
clean_line = remove_comments(line, file_extension)
if not clean_line:
continue
found = False
for pattern, risk_level in risk_patterns.items():
if re.search(pattern, clean_line):
classified_results[risk_level].append((line_number, clean_line))
found = True
break
if not found:
classified_results["none"].append((line_number, clean_line))
return classified_results
def output_results(
results: Dict[str, List[Tuple[int, str]]], output_format: str, file_path: str
):
# Create the 'results' directory if it does not exist
# 这里如果集成测试的话应该设置为./
results_dir = "./results/code"
if not os.path.exists(results_dir):
os.makedirs(results_dir)
base_name = os.path.basename(file_path)
output_file = os.path.join(
results_dir, f"{os.path.splitext(base_name)[0]}.{output_format}"
)
if output_format == "html":
output_html(results, output_file)
elif output_format == "md":
output_markdown(results, output_file)
elif output_format == "txt":
output_text(results, output_file)
def output_html(results: Dict[str, List[Tuple[int, str]]], file_name: str):
html_output = f"<html><head><title>Analysis of {file_name}</title></head><body>"
html_output += "<h1>Security Analysis Report</h1>"
for risk_level, entries in results.items():
html_output += f"<h2>{risk_level.capitalize()} Risk</h2><ul>"
for line_num, line in entries:
html_output += f"<li>Line {line_num}: {line}</li>"
html_output += "</ul>"
html_output += "</body></html>"
with open(file_name, "w") as file:
file.write(html_output)
def output_markdown(results: Dict[str, List[Tuple[int, str]]], file_name: str):
md_output = f"# Security Analysis Report for {file_name}\n"
for risk_level, entries in results.items():
md_output += f"## {risk_level.capitalize()} Risk\n"
for line_num, line in entries:
md_output += f"- Line {line_num}: {line}\n"
with open(file_name, "w") as file:
file.write(md_output)
def output_text(results: Dict[str, List[Tuple[int, str]]], file_name: str):
text_output = f"Security Analysis Report for {file_name}\n"
def generate_text_content(results):
text_output = "Security Analysis Report\n"
for risk_level, entries in results.items():
if entries and risk_level != "none":
text_output += f"{risk_level.capitalize()} Risk:\n"
for line_num, line in entries:
text_output += f" Line {line_num}: {line}\n"
return text_output
def output_results(results, output_format, output_file=None):
if output_file:
file_name, file_extension = os.path.splitext(output_file)
if output_format not in OUTPUT_FORMATS:
output_format = "txt"
output_file = f"{file_name}.txt"
results_dir = os.path.dirname(output_file)
if not os.path.exists(results_dir):
os.makedirs(results_dir)
if output_format == "pdf":
output_pdf(results, output_file)
elif output_format == "html":
output_html(results, output_file)
elif output_format == "md":
output_markdown(results, output_file)
else: # Default to txt
output_text(results, output_file)
else:
# If no output file is specified, default to text output to the terminal.
txt_output = generate_text_content(results)
print(txt_output)
def output_pdf(results: Dict[str, List[Tuple[int, str]]], file_name):
doc = SimpleDocTemplate(file_name, pagesize=letter)
story = []
styles = getSampleStyleSheet()
# Add the title centered
title_style = styles["Title"]
title_style.alignment = 1 # Center alignment
title = Paragraph("Security Analysis Report", title_style)
story.append(title)
story.append(Spacer(1, 20)) # Space after title
# Add risk levels and entries
normal_style = styles["BodyText"]
for risk_level, entries in results.items():
if risk_level != "none":
story.append(
Paragraph(f"{risk_level.capitalize()} Risk:", styles["Heading2"])
)
for line_num, line in entries:
entry = Paragraph(f"Line {line_num}: {line}", normal_style)
story.append(entry)
story.append(Spacer(1, 12)) # Space between sections
doc.build(story)
def output_html(results: Dict[str, List[Tuple[int, str]]], file_name=None):
html_output = "<html><head><title>Security Analysis Report</title></head><body>"
html_output += "<h1>Security Analysis Report</h1>"
for risk_level, entries in results.items():
if risk_level != "none":
html_output += f"<h2>{risk_level.capitalize()} Risk</h2><ul>"
for line_num, line in entries:
html_output += f"<li>{line_num}: {line}</li>"
html_output += "</ul>"
html_output += "</body></html>"
if file_name:
with open(file_name, "w") as file:
file.write(html_output)
else:
return html_output
def output_markdown(results: Dict[str, List[Tuple[int, str]]], file_name=None):
md_output = "# Security Analysis Report\n"
for risk_level, entries in results.items():
if risk_level != "none":
md_output += f"## {risk_level.capitalize()} Risk\n"
for line_num, line in entries:
md_output += f"- {line_num}: {line}\n"
if file_name:
with open(file_name, "w") as file:
file.write(md_output)
else:
return md_output
def output_text(results: Dict[str, List[Tuple[int, str]]], file_name=None):
text_output = "Security Analysis Report\n"
for risk_level, entries in results.items():
if risk_level != "none":
text_output += f"{risk_level.capitalize()} Risk:\n"
for line_num, line in entries:
text_output += f" {line_num}: {line}\n"
if file_name:
with open(file_name, "w") as file:
file.write(text_output)
else:
return text_output
def process_path(path: str, output_format: str):
def process_path(path: str, output_format: str, output_file=None):
results = {"high": [], "medium": [], "low": [], "none": []}
if os.path.isdir(path):
for root, dirs, files in os.walk(path):
for file in files:
file_extension = os.path.splitext(file)[1]
if file_extension in SUPPORTED_EXTENSIONS:
file_path = os.path.join(root, file)
print(f"Processing {file_path}...")
file_results = find_dangerous_functions(
read_file_content(file_path), file_extension
)
output_results(file_results, output_format, file_path)
for key in file_results:
if key != "none": # Exclude 'none' risk level
results[key].extend(
[
(f"{file_path}: Line {line_num}", line)
for line_num, line in file_results[key]
]
)
elif os.path.isfile(path):
file_extension = os.path.splitext(path)[1]
if file_extension in SUPPORTED_EXTENSIONS:
file_results = find_dangerous_functions(
read_file_content(path), file_extension
)
output_results(file_results, output_format, path)
for key in file_results:
if key != "none": # Exclude 'none' risk level
results[key].extend(
[
(f"{path}: Line {line_num}", line)
for line_num, line in file_results[key]
]
)
else:
print("Unsupported file type.")
return
else:
print("Invalid path.")
sys.exit(1)
output_results(results, output_format, output_file)
def main():
if len(sys.argv) < 3:
print("Usage: python backdoor_detection.py <path> <output_format>")
sys.exit(1)
path = sys.argv[1]
output_format = sys.argv[2]
if output_format not in OUTPUT_FORMATS:
import argparse
parser = argparse.ArgumentParser(description="Backdoor detection tool.")
parser.add_argument("path", help="Path to the code to analyze")
parser.add_argument("-o", "--output", help="Output file path", default=None)
args = parser.parse_args()
output_format = "txt" # Default output format
output_file = None
if args.output:
_, ext = os.path.splitext(args.output)
ext = ext.lower()
if ext in [".html", ".md", ".txt", ".pdf"]:
output_format = ext.replace(".", "")
output_file = args.output
else:
print(
f"Unsupported output format. Supported formats are: {', '.join(OUTPUT_FORMATS)}"
"Your input file format was incorrect, the output has been saved as a TXT file."
)
sys.exit(1)
process_path(path, output_format)
output_file = args.output.rsplit(".", 1)[0] + ".txt"
process_path(args.path, output_format, output_file)
if __name__ == "__main__":