import os
from typing import Dict, List, Tuple
from reportlab.lib.pagesizes import letter
from reportlab.lib.styles import getSampleStyleSheet
from reportlab.platypus import Paragraph, Spacer, SimpleDocTemplate
from detection.pickle_detection import pickleDataDetection
from .Regexdetection import find_dangerous_functions
from .GPTdetection import detectGPT
from .utils import *
import sys
SUPPORTED_EXTENSIONS = {".py", ".js", ".cpp"}
OUTPUT_FORMATS = ["html", "md", "txt", "pdf"]
def generate_text_content(results):
text_output = "Security Analysis Report\n"
for risk_level, entries in results.items():
if entries and risk_level != "none":
text_output += f"{risk_level.capitalize()} Risk:\n"
for line_num, line in entries:
text_output += f" Line {line_num}: {line}\n"
return text_output
def output_results(results, output_format, output_file=None):
if output_file:
file_name = os.path.splitext(output_file)
if output_format not in OUTPUT_FORMATS:
output_format = "txt"
output_file = f"{file_name}.txt"
results_dir = os.path.dirname(output_file)
if not os.path.exists(results_dir):
os.makedirs(results_dir)
if output_format == "pdf":
output_pdf(results, output_file)
elif output_format == "html":
output_html(results, output_file)
elif output_format == "md":
output_markdown(results, output_file)
else: # Default to txt
output_text(results, output_file)
else:
# If no output file is specified, default to text output to the terminal.
txt_output = generate_text_content(results)
print(txt_output)
def output_pdf(results: Dict[str, List[Tuple[int, str]]], file_name):
doc = SimpleDocTemplate(file_name, pagesize=letter)
story = []
styles = getSampleStyleSheet()
# Add the title centered
title_style = styles["Title"]
title_style.alignment = 1 # Center alignment
title = Paragraph("Security Analysis Report", title_style)
story.append(title)
story.append(Spacer(1, 20)) # Space after title
# Add risk levels and entries
normal_style = styles["BodyText"]
for risk_level, entries in results.items():
if risk_level != "none":
story.append(
Paragraph(f"{risk_level.capitalize()} Risk:", styles["Heading2"])
)
for line_num, line in entries:
entry = Paragraph(f"Line {line_num}: {line}", normal_style)
story.append(entry)
story.append(Spacer(1, 12)) # Space between sections
doc.build(story)
def output_html(results: Dict[str, List[Tuple[int, str]]], file_name=None):
html_output = "
Security Analysis Report"
html_output += "Security Analysis Report
"
for risk_level, entries in results.items():
if risk_level != "none":
html_output += f"{risk_level.capitalize()} Risk
"
for line_num, line in entries:
html_output += f"- {line_num}: {line}
"
html_output += "
"
html_output += ""
if file_name:
with open(file_name, "w") as file:
file.write(html_output)
else:
return html_output
def output_markdown(results: Dict[str, List[Tuple[int, str]]], file_name=None):
md_output = "# Security Analysis Report\n"
for risk_level, entries in results.items():
if risk_level != "none":
md_output += f"## {risk_level.capitalize()} Risk\n"
for line_num, line in entries:
md_output += f"- {line_num}: {line}\n"
if file_name:
with open(file_name, "w") as file:
file.write(md_output)
else:
return md_output
def output_text(results: Dict[str, List[Tuple[int, str]]], file_name=None):
text_output = "Security Analysis Report\n"
for risk_level, entries in results.items():
if risk_level != "none":
text_output += f"{risk_level.capitalize()} Risk:\n"
for line_num, line in entries:
text_output += f" {line_num}: {line}\n"
if file_name:
with open(file_name, "w") as file:
file.write(text_output)
else:
return text_output
def checkModeAndDetect(mode: str, filePath: str, fileExtension: str):
# TODO:添加更多方式,这里提高代码的复用性和扩展性
if mode == "regex":
return find_dangerous_functions(read_file_content(filePath), fileExtension)
elif mode == "llm":
return detectGPT(read_file_content(filePath))
else:
return find_dangerous_functions(read_file_content(filePath), fileExtension)
def process_path(path: str, output_format: str, mode: str, output_file=None):
results = {"high": [], "medium": [], "low": [], "none": []}
if os.path.isdir(path):
for root, dirs, files in os.walk(path):
for file in files:
file_extension = os.path.splitext(file)[1]
if file_extension in SUPPORTED_EXTENSIONS:
file_path = os.path.join(root, file)
file_results = checkModeAndDetect(mode, file_path, file_extension)
for key in file_results:
if key != "none": # Exclude 'none' risk level
results[key].extend(
[
(f"{file_path}: Line {line_num}", line)
for line_num, line in file_results[key]
]
)
elif os.path.isfile(path):
file_extension = os.path.splitext(path)[1]
if file_extension in SUPPORTED_EXTENSIONS:
file_results = checkModeAndDetect(mode, path, file_extension)
for key in file_results:
if key != "none": # Exclude 'none' risk level
results[key].extend(
[
(f"{path}: Line {line_num}", line)
for line_num, line in file_results[key]
]
)
else:
print("Unsupported file type.")
return
else:
print("Invalid path.")
sys.exit(1)
output_results(results, output_format, output_file)
def main():
import argparse
parser = argparse.ArgumentParser(description="Backdoor detection tool.")
parser.add_argument("path", help="Path to the code to analyze")
parser.add_argument("-o", "--output", help="Output file path", default=None)
parser.add_argument(
"-m", "--mode", help="Mode of operation:[regex,llm]", default="regex"
)
parser.add_argument(
"-p","--pickle",help="analyze the pickle file",default=None
)
args = parser.parse_args()
output_format = "txt" # Default output format
output_file = None
if args.output:
_, ext = os.path.splitext(args.output)
ext = ext.lower()
if ext in [".html", ".md", ".txt", ".pdf"]:
output_format = ext.replace(".", "")
output_file = args.output
else:
print(
"Your input file format was incorrect, the output has been saved as a TXT file."
)
output_file = args.output.rsplit(".", 1)[0] + ".txt"
# 如果未指定输出文件,则输出到 stdout;否则写入文件
if args.pickle:
pickleDataDetection(args.pickle, output_file)
else:
process_path(args.path, output_format, args.mode, output_file)
if __name__ == "__main__":
main()