feature/match #5

Merged
Tritium merged 63 commits from feature/match into main 2024-04-25 21:30:29 +08:00
20 changed files with 4101 additions and 1 deletions

View File

@ -0,0 +1,32 @@
name: "Backdoor Detection"
description: "Perform backdoor and vulnerability detection on your code and dependencies."
inputs:
code_path:
description: "Path to the code directory to be analyzed."
required: true
requirements_file:
description: "Path to the requirements.txt file."
required: true
output_format:
description: "Output format for the detection results (html, md, txt)."
required: true
default: "txt"
runs:
using: "composite"
steps:
- name: Checkout code
uses: actions/checkout@v2
- name: Set up Python
uses: actions/setup-python@v2
with:
python-version: "3.x"
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install packaging
- name: Run Backdoor Detection
run: python ${{ github.workspace }}/detection/backdoor_detection.py ${{ inputs.code_path }} ${{ inputs.output_format }}
shell: bash
- name: Run Requirements Detection
run: python ${{ github.workspace }}/detection/requirements_detection.py ${{ github.workspace }}/crawler/trans_extracted_data.txt ${{ inputs.requirements_file }} ${{ inputs.output_format }}
shell: bash

18
.github/workflows/python-tests.yml vendored Normal file
View File

@ -0,0 +1,18 @@
name: Python application test
on:
push:
branches: [main]
pull_request:
branches: [main]
jobs:
build:
runs-on: "ubuntu-latest"
steps:
- uses: actions/checkout@v2
- name: Install dependencies
run: pip install -r requirements.txt -i https://pypi.tuna.tsinghua.edu.cn/simple
- name: Run tests
run: python -m unittest discover -s tests

2
.gitignore vendored
View File

@ -158,5 +158,5 @@ cython_debug/
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/
.idea/

0
__init__.py Normal file
View File

32
action.yml Normal file
View File

@ -0,0 +1,32 @@
name: "Backdoor Detection"
description: "Perform backdoor and vulnerability detection on your code and dependencies."
inputs:
code_path:
description: "Path to the code directory to be analyzed."
required: true
requirements_file:
description: "Path to the requirements.txt file."
required: true
output_format:
description: "Output format for the detection results (html, md, txt)."
required: true
default: "txt"
runs:
using: "composite"
steps:
- name: Checkout code
uses: actions/checkout@v2
- name: Set up Python
uses: actions/setup-python@v2
with:
python-version: "3.x"
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install packaging
- name: Run Backdoor Detection
run: python ${{ github.workspace }}/detection/backdoor_detection.py ${{ inputs.code_path }} ${{ inputs.output_format }}
shell: bash
- name: Run Requirements Detection
run: python ${{ github.workspace }}/detection/requirements_detection.py ${{ github.workspace }}/crawler/trans_extracted_data.txt ${{ inputs.requirements_file }} ${{ inputs.output_format }}
shell: bash

62
crawler/crawler.py Normal file
View File

@ -0,0 +1,62 @@
import requests
from bs4 import BeautifulSoup
def fetch_html(url):
"""从指定URL获取HTML内容"""
response = requests.get(url)
if response.status_code == 200:
return response.text
else:
return None
def parse_html(html):
"""解析HTML获取每个tr中第二个td下的所有a和span标签的内容"""
soup = BeautifulSoup(html, "html.parser")
table = soup.find("table", id="sortable-table")
results = []
if table:
rows = table.find("tbody").find_all("tr")
for row in rows:
tds = row.find_all("td")
if len(tds) >= 2:
a_tags = tds[1].find_all("a")
span_tags = tds[1].find_all("span")
spans = [span.text.strip() for span in span_tags]
for a_tag in a_tags:
results.append((a_tag.text.strip(), spans))
return results
def save_results_to_file(results, filename):
"""保存提取的数据到TXT文件"""
with open(filename, "a", encoding="utf-8") as file: # Append mode
for data in results:
package_name, version_ranges = data
file.write(f"Package Name: {package_name}\n")
file.write("Version Ranges: " + ", ".join(version_ranges) + "\n")
file.write("-" * 50 + "\n") # Adds a separator for clarity
def main():
base_url = "https://security.snyk.io/vuln/pip/"
page_number = 1
while True:
url = f"{base_url}{page_number}"
print(f"Fetching data from {url}")
html_content = fetch_html(url)
if not html_content:
print("No more data found or failed to fetch.")
break
extracted_data = parse_html(html_content)
if not extracted_data:
print("No relevant data found on page.")
break
save_results_to_file(extracted_data, "extracted_data.txt")
page_number += 1
print("Results have been saved to 'extracted_data.txt'.")
if __name__ == "__main__":
main()

2700
crawler/extracted_data.txt Normal file

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,507 @@
Package Name: apache-airflow
Version Ranges: <2.6.1,>=2.3.0
--------------------------------------------------
Package Name: mlflow
Version Ranges: <2.10.0
--------------------------------------------------
Package Name: torch
Version Ranges: <1.10.0
--------------------------------------------------
Package Name: aiohttp
Version Ranges: <3.9.2,>=1.0.5
--------------------------------------------------
Package Name: keras
Version Ranges: <2.13.1rc0
--------------------------------------------------
Package Name: llama-index
Version Ranges: <0.10.24
--------------------------------------------------
Package Name: zenml
Version Ranges: <0.42.2,<0.43.1,<0.44.4,<0.47.0,>=0.43.0,>=0.44.0,>=0.46.0
--------------------------------------------------
Package Name: gradio
Version Ranges: <4.9.0
--------------------------------------------------
Package Name: bentoml
Version Ranges: <1.2.5
--------------------------------------------------
Package Name: langchain
Version Ranges: <0.0.353
--------------------------------------------------
Package Name: scrapy
Version Ranges: <1.8.4,<2.11.1,>=2.0.0
--------------------------------------------------
Package Name: sqlparse
Version Ranges: <0.5.0
--------------------------------------------------
Package Name: gunicorn
Version Ranges: <22.0.0
--------------------------------------------------
Package Name: magnum
Version Ranges: <14.1.2,<15.0.2,<16.0.2,<17.0.2,>=15.0.0.0rc1,>=16.0.0.0rc1,>=17.0.0.0rc1
--------------------------------------------------
Package Name: nicegui
Version Ranges: <1.4.21,>=1.4.6
--------------------------------------------------
Package Name: idna
Version Ranges: <3.7
--------------------------------------------------
Package Name: llama-index-core
Version Ranges: <0.10.24
--------------------------------------------------
Package Name: litellm
Version Ranges: <1.34.42
--------------------------------------------------
Package Name: roundup
Version Ranges: <1.2.1,<1.4.6,>=1.2.0,>=1.4.0
--------------------------------------------------
Package Name: transformers
Version Ranges: <4.37.0
--------------------------------------------------
Package Name: dirac
Version Ranges: <8.0.37,<9.0.0a22,>=8.0.0,>=8.1.0a1
--------------------------------------------------
Package Name: yt-dlp
Version Ranges: <2024.4.9,>=2021.4.11
--------------------------------------------------
Package Name: rafcon
Version Ranges: <0.15.4
--------------------------------------------------
Package Name: radicale
Version Ranges: <3.0.0
--------------------------------------------------
Package Name: pcaspy
Version Ranges: <0.7.1
--------------------------------------------------
Package Name: holidays
Version Ranges: <0.45
--------------------------------------------------
Package Name: evennia
Version Ranges: <4.0.0
--------------------------------------------------
Package Name: django-json-widget
Version Ranges: <2.0.0
--------------------------------------------------
Package Name: avocado-framework
Version Ranges: <104.0
--------------------------------------------------
Package Name: arrendatools.plantillas
Version Ranges: <0.4.3
--------------------------------------------------
Package Name: amazon-product-details-scraper
Version Ranges: <1.0.4
--------------------------------------------------
Package Name: aiopioneer
Version Ranges: <0.1.5
--------------------------------------------------
Package Name: pgadmin4
Version Ranges: <8.4
--------------------------------------------------
Package Name: pymongo
Version Ranges: <4.6.3
--------------------------------------------------
Package Name: voila
Version Ranges: <0.2.17,<0.3.8,<0.4.4,<0.5.6,>=0.0.2,>=0.3.0a0,>=0.4.0a0,>=0.5.0a0
--------------------------------------------------
Package Name: piccolo-admin
Version Ranges: <1.3.2
--------------------------------------------------
Package Name: cryptoauthlib
Version Ranges: <20200912
--------------------------------------------------
Package Name: mosaicml
Version Ranges: <0.5.0
--------------------------------------------------
Package Name: mlrun
Version Ranges: <1.7.0rc5
--------------------------------------------------
Package Name: eventlet
Version Ranges: <0.34.3
--------------------------------------------------
Package Name: salt
Version Ranges: <3005.5
--------------------------------------------------
Package Name: django-two-factor-auth
Version Ranges: <1.13
--------------------------------------------------
Package Name: pillow
Version Ranges: <10.2.0
--------------------------------------------------
Package Name: ipywidgets
Version Ranges: <5.2.0,>=5.0.0
--------------------------------------------------
Package Name: pylint
Version Ranges: <2.6.1
--------------------------------------------------
Package Name: pytest-cov
Version Ranges: <2.0.0
--------------------------------------------------
Package Name: jupyterhub
Version Ranges: <4.1.0
--------------------------------------------------
Package Name: geonode
Version Ranges: <4.1.0
--------------------------------------------------
Package Name: langchain-core
Version Ranges: <0.1.7
--------------------------------------------------
Package Name: lektor
Version Ranges: <3.3.11
--------------------------------------------------
Package Name: ansys-geometry-core
Version Ranges: <0.3.3,<0.4.12,>=0.3.0,>=0.4.0
--------------------------------------------------
Package Name: nautobot
Version Ranges: <1.6.10,<2.1.2,>=2.0.0
--------------------------------------------------
Package Name: mobsfscan
Version Ranges: <0.3.8
--------------------------------------------------
Package Name: esphome
Version Ranges: <2024.2.1
--------------------------------------------------
Package Name: qiskit-ibm-runtime
Version Ranges: <0.21.2,>=0.1.0
--------------------------------------------------
Package Name: jupyter-server-proxy
Version Ranges: <3.2.3,<4.1.1,>=4.0.0
--------------------------------------------------
Package Name: oauthenticator
Version Ranges: <16.3.0
--------------------------------------------------
Package Name: octoprint
Version Ranges: <1.10.0rc1
--------------------------------------------------
Package Name: wiki
Version Ranges: <0.10.1
--------------------------------------------------
Package Name: astropy
Version Ranges: <5.3.3
--------------------------------------------------
Package Name: yaql
Version Ranges: <3.0.0
--------------------------------------------------
Package Name: black
Version Ranges: <24.3.0
--------------------------------------------------
Package Name: fgr
Version Ranges: <0.4.0
--------------------------------------------------
Package Name: vantage6
Version Ranges: <4.2.0
--------------------------------------------------
Package Name: paddlepaddle
Version Ranges: <2.6.0
--------------------------------------------------
Package Name: mssql-django
Version Ranges: <1.4.1
--------------------------------------------------
Package Name: aiosmtpd
Version Ranges: <1.4.5
--------------------------------------------------
Package Name: ckan
Version Ranges: <2.10.1,<2.9.9,>=2.10.0
--------------------------------------------------
Package Name: langchain-community
Version Ranges: <0.0.27
--------------------------------------------------
Package Name: libosdp
Version Ranges: <3.0.0
--------------------------------------------------
Package Name: weasyprint
Version Ranges: <61.2,>=61.0
--------------------------------------------------
Package Name: apache-superset
Version Ranges: <3.0.3
--------------------------------------------------
Package Name: jwcrypto
Version Ranges: <1.5.6,>=0.5.0
--------------------------------------------------
Package Name: paho-mqtt
Version Ranges: <1.1
--------------------------------------------------
Package Name: rq
Version Ranges: <0.7.1
--------------------------------------------------
Package Name: eth-abi
Version Ranges: <5.0.1
--------------------------------------------------
Package Name: prefect
Version Ranges: <2.15.0
--------------------------------------------------
Package Name: django-treenode
Version Ranges: <0.20.0
--------------------------------------------------
Package Name: hypercorn
Version Ranges: <0.16.0
--------------------------------------------------
Package Name: streamlink
Version Ranges: <5.3.0
--------------------------------------------------
Package Name: kedro
Version Ranges: <0.19.3
--------------------------------------------------
Package Name: pyccel
Version Ranges: <1.9.0
--------------------------------------------------
Package Name: django
Version Ranges: <3.2.24,<4.2.10,<5.0.2,>=3.2,>=4.2,>=5.0
--------------------------------------------------
Package Name: videomass
Version Ranges: <5.0.4
--------------------------------------------------
Package Name: ultralytics
Version Ranges: <8.1.0
--------------------------------------------------
Package Name: intel-extension-for-transformers
Version Ranges: <1.2.2
--------------------------------------------------
Package Name: labgrid
Version Ranges: <23.0.2
--------------------------------------------------
Package Name: docassemble.webapp
Version Ranges: <1.4.97
--------------------------------------------------
Package Name: docassemble.base
Version Ranges: <1.4.97,>=1.4.53
--------------------------------------------------
Package Name: docassemble
Version Ranges: <1.4.97
--------------------------------------------------
Package Name: langchain-experimental
Version Ranges: <0.0.52
--------------------------------------------------
Package Name: label-studio
Version Ranges: <1.10.1
--------------------------------------------------
Package Name: rpyc
Version Ranges: <5.2.1
--------------------------------------------------
Package Name: peewee
Version Ranges: <3.17.1
--------------------------------------------------
Package Name: urllib3-future
Version Ranges: <2.4.902
--------------------------------------------------
Package Name: flask-appbuilder
Version Ranges: <4.3.11
--------------------------------------------------
Package Name: pretix
Version Ranges: <2024.1.1
--------------------------------------------------
Package Name: orjson
Version Ranges: <3.9.15
--------------------------------------------------
Package Name: pypqc
Version Ranges: <0.0.6.1
--------------------------------------------------
Package Name: mjml
Version Ranges: <0.11.0
--------------------------------------------------
Package Name: onnx
Version Ranges: <1.16.0
--------------------------------------------------
Package Name: fastecdsa
Version Ranges: <2.3.2
--------------------------------------------------
Package Name: pymatgen
Version Ranges: <2024.2.20
--------------------------------------------------
Package Name: cryptography
Version Ranges: <42.0.2,>=35.0.0
--------------------------------------------------
Package Name: apache-airflow-providers-mongo
Version Ranges: <4.0.0,>=1.0.0
--------------------------------------------------
Package Name: cbor2
Version Ranges: <5.6.0
--------------------------------------------------
Package Name: intel-extension-for-tensorflow
Version Ranges: <2.13.0.0
--------------------------------------------------
Package Name: tuf
Version Ranges: <3.1.1,>=2.0.0
--------------------------------------------------
Package Name: zpywallet
Version Ranges: <0.6.2
--------------------------------------------------
Package Name: dipdup
Version Ranges: <3.0.2
--------------------------------------------------
Package Name: clip-retrieval
Version Ranges: <2.23.1
--------------------------------------------------
Package Name: procrastinate
Version Ranges: <0.11.0
--------------------------------------------------
Package Name: embedchain
Version Ranges: <0.1.57
--------------------------------------------------
Package Name: miarec-ftpfs
Version Ranges: <2024.1.2
--------------------------------------------------
Package Name: miarec-sshfs
Version Ranges: <2024.1.5
--------------------------------------------------
Package Name: linkml
Version Ranges: <1.5.2
--------------------------------------------------
Package Name: toodledo
Version Ranges: <1.5.0
--------------------------------------------------
Package Name: renku
Version Ranges: <1.11.0
--------------------------------------------------
Package Name: vunnel
Version Ranges: <0.18.0
--------------------------------------------------
Package Name: panda3d
Version Ranges: <1.9.4
--------------------------------------------------
Package Name: ludwig
Version Ranges: <0.7
--------------------------------------------------
Package Name: ethyca-fides
Version Ranges: <2.1.0
--------------------------------------------------
Package Name: hiddifypanel
Version Ranges: <9.0.0.dev30
--------------------------------------------------
Package Name: dgl
Version Ranges: <0.9.0
--------------------------------------------------
Package Name: deephaven-core
Version Ranges: <0.30.0
--------------------------------------------------
Package Name: borgmatic
Version Ranges: <1.8.7
--------------------------------------------------
Package Name: cg
Version Ranges: <26.0.4
--------------------------------------------------
Package Name: ccryptofeed
Version Ranges: <2.2.3
--------------------------------------------------
Package Name: c2cgeoform
Version Ranges: <2.1.26
--------------------------------------------------
Package Name: appfl
Version Ranges: <0.4.0
--------------------------------------------------
Package Name: nonebot2
Version Ranges: <2.2.0,>=2.0.0a16
--------------------------------------------------
Package Name: acryl-datahub
Version Ranges: <0.8.45
--------------------------------------------------
Package Name: bullmq
Version Ranges: <1.15.0
--------------------------------------------------
Package Name: aiobotocore
Version Ranges: <2.9.1
--------------------------------------------------
Package Name: diffoscope
Version Ranges: <256
--------------------------------------------------
Package Name: kinto-attachment
Version Ranges: <6.4.0
--------------------------------------------------
Package Name: bandit
Version Ranges: <1.7.7
--------------------------------------------------
Package Name: dnspython
Version Ranges: <2.6.1
--------------------------------------------------
Package Name: products.sqlalchemyda
Version Ranges: <2.2
--------------------------------------------------
Package Name: clearml
Version Ranges: <1.14.2
--------------------------------------------------
Package Name: tensorflow
Version Ranges: <1.7.1
--------------------------------------------------
Package Name: pyload-ng
Version Ranges: <0.5.0b3.dev78
--------------------------------------------------
Package Name: fastapi
Version Ranges: <0.109.1
--------------------------------------------------
Package Name: python-multipart
Version Ranges: <0.0.7
--------------------------------------------------
Package Name: kinto
Version Ranges: <6.1.0
--------------------------------------------------
Package Name: cupy
Version Ranges: <13.0.0
--------------------------------------------------
Package Name: llama-hub
Version Ranges: <0.0.67
--------------------------------------------------
Package Name: borgbackup
Version Ranges: <1.0.7
--------------------------------------------------
Package Name: snakemake
Version Ranges: <7.9.0
--------------------------------------------------
Package Name: lief
Version Ranges: <0.12.3
--------------------------------------------------
Package Name: checkov
Version Ranges: <2.0.1029
--------------------------------------------------
Package Name: dash-html-components
Version Ranges: <2.0.0
--------------------------------------------------
Package Name: dash
Version Ranges: <2.15.0
--------------------------------------------------
Package Name: dash-core-components
Version Ranges: <2.0.0
--------------------------------------------------
Package Name: glance-store
Version Ranges: <4.3.3,<4.7.0,>=4.4.0
--------------------------------------------------
Package Name: dagster
Version Ranges: <1.1.10
--------------------------------------------------
Package Name: wagtail
Version Ranges: <5.2rc1
--------------------------------------------------
Package Name: pycryptodome
Version Ranges: <3.19.1
--------------------------------------------------
Package Name: celery
Version Ranges: <4.4.0rc5
--------------------------------------------------
Package Name: vantage6-server
Version Ranges: <4.2.0
--------------------------------------------------
Package Name: tuitse-tsusin
Version Ranges: <1.3.2
--------------------------------------------------
Package Name: apache-airflow-providers-cncf-kubernetes
Version Ranges: <7.0.0,>=5.2.0
--------------------------------------------------
Package Name: whoogle-search
Version Ranges: <0.8.4
--------------------------------------------------
Package Name: jupyterlab-lsp
Version Ranges: <5.0.2
--------------------------------------------------
Package Name: changedetection.io
Version Ranges: <0.45.13
--------------------------------------------------
Package Name: jupyterlab
Version Ranges: <4.0.11,>=4.0.0
--------------------------------------------------
Package Name: ansible-core
Version Ranges: <2.14.14,<2.15.9,<2.16.3,>=2.15.0,>=2.16.0
--------------------------------------------------
Package Name: readthedocs-sphinx-search
Version Ranges: <0.3.2
--------------------------------------------------
Package Name: zodb3
Version Ranges: <3.8.3,<3.9.0c2,>=3.8.0a1,>=3.9.0
--------------------------------------------------

48
crawler/transfer.py Normal file
View File

@ -0,0 +1,48 @@
"""转换原有的漏洞文件格式"""
import re
from packaging.specifiers import SpecifierSet
def load_vulnerable_packages(filename):
"""从文件加载有漏洞的包信息"""
with open(filename, "r", encoding="utf-8") as file:
content = file.read()
vulnerabilities = {}
blocks = content.split("--------------------------------------------------")
range_pattern = re.compile(r"\[(.*?),\s*(.*?)\)")
for block in blocks:
name_match = re.search(r"Package Name: (.+)", block)
if name_match:
package_name = name_match.group(1).strip()
ranges = range_pattern.findall(block)
specifier_list = []
for start, end in ranges:
if start and end:
specifier_list.append(f">={start},<{end}")
elif start:
specifier_list.append(f">={start}")
elif end:
specifier_list.append(f"<{end}")
if specifier_list:
vulnerabilities[package_name] = SpecifierSet(",".join(specifier_list))
return vulnerabilities
def save_vulnerabilities_to_file(vuln_packages, filename):
"""将漏洞信息写入到文件中"""
with open(filename, "w", encoding="utf-8") as file:
for package, specifiers in vuln_packages.items():
file.write(f"Package Name: {package}\n")
file.write(f"Version Ranges: {specifiers}\n")
file.write("-" * 50 + "\n")
def main():
vulnerabilities = load_vulnerable_packages("extracted_data.txt")
save_vulnerabilities_to_file(vulnerabilities, "trans_extracted_data.txt")
if __name__ == "__main__":
main()

0
detection/__init__.py Normal file
View File

View File

@ -0,0 +1,244 @@
import os
import re
import sys
from typing import Dict, List, Tuple
from reportlab.lib.pagesizes import letter
from reportlab.pdfgen import canvas
from reportlab.lib.styles import getSampleStyleSheet
from reportlab.platypus import Paragraph, Spacer, SimpleDocTemplate
from reportlab.lib import colors
SUPPORTED_EXTENSIONS = {".py", ".js", ".cpp"}
OUTPUT_FORMATS = ["html", "md", "txt", "pdf"]
def read_file_content(file_path: str) -> str:
try:
with open(file_path, "r", encoding="utf-8") as file:
return file.read()
except FileNotFoundError:
print("Error: File not found.")
sys.exit(1)
except IOError:
print("Error: Could not read file.")
sys.exit(1)
def remove_comments(code: str, extension: str) -> str:
if extension == ".py":
return code.split("#")[0].strip()
elif extension in {".js", ".cpp"}:
code = re.sub(r"//.*", "", code)
code = re.sub(r"/\*.*?\*/", "", code, flags=re.DOTALL)
return code.strip()
return code.strip()
def find_dangerous_functions(
file_content: str, file_extension: str
) -> Dict[str, List[Tuple[int, str]]]:
patterns = {
".py": {
r"\bsystem\(": "high",
r"\bexec\(": "high",
r"\bpopen\(": "medium",
r"\beval\(": "high",
r"\bsubprocess\.run\(": "medium",
r"\b__getattribute__\(": "high",
r"\bgetattr\(": "medium",
r"\b__import__\(": "high",
},
".js": {
r"\beval\(": "high",
r"\bexec\(": "high",
r"\bchild_process\.exec\(": "high",
},
".cpp": {
r"\bsystem\(": "high",
},
}
risk_patterns = patterns.get(file_extension, {})
classified_results = {"high": [], "medium": [], "low": [], "none": []}
for line_number, line in enumerate(file_content.split("\n"), start=1):
clean_line = remove_comments(line, file_extension)
if not clean_line:
continue
for pattern, risk_level in risk_patterns.items():
if re.search(pattern, clean_line):
classified_results[risk_level].append((line_number, clean_line))
return classified_results
def generate_text_content(results):
text_output = "Security Analysis Report\n"
for risk_level, entries in results.items():
if entries and risk_level != "none":
text_output += f"{risk_level.capitalize()} Risk:\n"
for line_num, line in entries:
text_output += f" Line {line_num}: {line}\n"
return text_output
def output_results(results, output_format, output_file=None):
if output_file:
file_name, file_extension = os.path.splitext(output_file)
if output_format not in OUTPUT_FORMATS:
output_format = "txt"
output_file = f"{file_name}.txt"
results_dir = os.path.dirname(output_file)
if not os.path.exists(results_dir):
os.makedirs(results_dir)
if output_format == "pdf":
output_pdf(results, output_file)
elif output_format == "html":
output_html(results, output_file)
elif output_format == "md":
output_markdown(results, output_file)
else: # Default to txt
output_text(results, output_file)
else:
# If no output file is specified, default to text output to the terminal.
txt_output = generate_text_content(results)
print(txt_output)
def output_pdf(results: Dict[str, List[Tuple[int, str]]], file_name):
doc = SimpleDocTemplate(file_name, pagesize=letter)
story = []
styles = getSampleStyleSheet()
# Add the title centered
title_style = styles["Title"]
title_style.alignment = 1 # Center alignment
title = Paragraph("Security Analysis Report", title_style)
story.append(title)
story.append(Spacer(1, 20)) # Space after title
# Add risk levels and entries
normal_style = styles["BodyText"]
for risk_level, entries in results.items():
if risk_level != "none":
story.append(
Paragraph(f"{risk_level.capitalize()} Risk:", styles["Heading2"])
)
for line_num, line in entries:
entry = Paragraph(f"Line {line_num}: {line}", normal_style)
story.append(entry)
story.append(Spacer(1, 12)) # Space between sections
doc.build(story)
def output_html(results: Dict[str, List[Tuple[int, str]]], file_name=None):
html_output = "<html><head><title>Security Analysis Report</title></head><body>"
html_output += "<h1>Security Analysis Report</h1>"
for risk_level, entries in results.items():
if risk_level != "none":
html_output += f"<h2>{risk_level.capitalize()} Risk</h2><ul>"
for line_num, line in entries:
html_output += f"<li>{line_num}: {line}</li>"
html_output += "</ul>"
html_output += "</body></html>"
if file_name:
with open(file_name, "w") as file:
file.write(html_output)
else:
return html_output
def output_markdown(results: Dict[str, List[Tuple[int, str]]], file_name=None):
md_output = "# Security Analysis Report\n"
for risk_level, entries in results.items():
if risk_level != "none":
md_output += f"## {risk_level.capitalize()} Risk\n"
for line_num, line in entries:
md_output += f"- {line_num}: {line}\n"
if file_name:
with open(file_name, "w") as file:
file.write(md_output)
else:
return md_output
def output_text(results: Dict[str, List[Tuple[int, str]]], file_name=None):
text_output = "Security Analysis Report\n"
for risk_level, entries in results.items():
if risk_level != "none":
text_output += f"{risk_level.capitalize()} Risk:\n"
for line_num, line in entries:
text_output += f" {line_num}: {line}\n"
if file_name:
with open(file_name, "w") as file:
file.write(text_output)
else:
return text_output
def process_path(path: str, output_format: str, output_file=None):
results = {"high": [], "medium": [], "low": [], "none": []}
if os.path.isdir(path):
for root, dirs, files in os.walk(path):
for file in files:
file_extension = os.path.splitext(file)[1]
if file_extension in SUPPORTED_EXTENSIONS:
file_path = os.path.join(root, file)
file_results = find_dangerous_functions(
read_file_content(file_path), file_extension
)
for key in file_results:
if key != "none": # Exclude 'none' risk level
results[key].extend(
[
(f"{file_path}: Line {line_num}", line)
for line_num, line in file_results[key]
]
)
elif os.path.isfile(path):
file_extension = os.path.splitext(path)[1]
if file_extension in SUPPORTED_EXTENSIONS:
file_results = find_dangerous_functions(
read_file_content(path), file_extension
)
for key in file_results:
if key != "none": # Exclude 'none' risk level
results[key].extend(
[
(f"{path}: Line {line_num}", line)
for line_num, line in file_results[key]
]
)
else:
print("Unsupported file type.")
return
else:
print("Invalid path.")
sys.exit(1)
output_results(results, output_format, output_file)
def main():
import argparse
parser = argparse.ArgumentParser(description="Backdoor detection tool.")
parser.add_argument("path", help="Path to the code to analyze")
parser.add_argument("-o", "--output", help="Output file path", default=None)
args = parser.parse_args()
output_format = "txt" # Default output format
output_file = None
if args.output:
_, ext = os.path.splitext(args.output)
ext = ext.lower()
if ext in [".html", ".md", ".txt", ".pdf"]:
output_format = ext.replace(".", "")
output_file = args.output
else:
print(
"Your input file format was incorrect, the output has been saved as a TXT file."
)
output_file = args.output.rsplit(".", 1)[0] + ".txt"
process_path(args.path, output_format, output_file)
if __name__ == "__main__":
main()

View File

@ -0,0 +1,160 @@
import argparse
import os
import re
import sys
from packaging import version
from packaging.specifiers import SpecifierSet
from reportlab.lib.pagesizes import letter
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
def load_vulnerable_packages(filename):
"""从文件加载有漏洞的包信息"""
with open(filename, "r", encoding="utf-8") as file:
content = file.read()
vulnerabilities = {}
blocks = content.split("--------------------------------------------------")
for block in blocks:
name_match = re.search(r"Package Name: (.+)", block)
range_match = re.search(r"Version Ranges: (.+)", block)
if name_match and range_match:
package_name = name_match.group(1).strip()
version_range = range_match.group(1).strip()
version_range = ",".join(
[part.strip() for part in version_range.split(",")]
)
vulnerabilities[package_name] = SpecifierSet(version_range)
return vulnerabilities
def load_requirements(filename):
"""从文件加载项目的依赖信息"""
with open(filename, "r", encoding="utf-8") as file:
lines = file.readlines()
requirements = {}
for line in lines:
if "==" in line:
package_name, package_version = line.strip().split("==")
requirements[package_name] = package_version
return requirements
def output_pdf(results, file_name):
doc = SimpleDocTemplate(file_name, pagesize=letter)
story = []
styles = getSampleStyleSheet()
# Custom styles
title_style = styles["Title"]
title_style.alignment = 1 # Center alignment
warning_style = ParagraphStyle(
"WarningStyle", parent=styles["BodyText"], fontName="Helvetica-Bold"
)
normal_style = styles["BodyText"]
# Add the title
title = Paragraph("Vulnerability Report", title_style)
story.append(title)
story.append(Spacer(1, 20)) # Space after title
# Iterate through results to add entries
for result in results:
if "WARNING:" in result:
# Add warning text in bold
entry = Paragraph(
result.replace("WARNING:", "<b>WARNING:</b>"), warning_style
)
else:
# Add normal text
entry = Paragraph(result, normal_style)
story.append(entry)
story.append(Spacer(1, 12)) # Space between entries
doc.build(story)
def output_results(filename, results, format_type):
"""根据指定的格式输出结果"""
output_dir = os.path.dirname(filename)
if not os.path.exists(output_dir):
os.makedirs(output_dir)
with open(filename, "w", encoding="utf-8") as file:
if format_type == "html":
file.write("<html><head><title>Vulnerability Report</title></head><body>\n")
file.write("<h1>Vulnerability Report</h1>\n")
for result in results:
file.write(f"<p>{result}</p>\n")
file.write("</body></html>")
elif format_type == "md":
file.write("# Vulnerability Report\n")
for result in results:
file.write(f"* {result}\n")
elif format_type == "pdf":
output_pdf(results, filename)
else: # 默认为txt
for result in results:
file.write(f"{result}\n")
def check_vulnerabilities(requirements, vulnerabilities, output_file):
"""检查依赖项是否存在已知漏洞,并输出结果"""
results_warning = [] # 存储有漏洞的依赖
results_ok = [] # 存储没有漏洞的依赖
for req_name, req_version in requirements.items():
if req_name in vulnerabilities:
spec = vulnerabilities[req_name]
if version.parse(req_version) in spec:
results_warning.append(
f"WARNING: {req_name}=={req_version} is vulnerable!"
)
else:
results_ok.append(f"OK: {req_name}=={req_version} is not affected.")
else:
results_ok.append(
f"OK: {req_name} not found in the vulnerability database."
)
# 合并结果,先输出所有警告,然后输出所有正常情况
results = results_warning + results_ok
if output_file:
filename, ext = os.path.splitext(output_file)
output_format = ext[1:] if ext[1:] else "txt"
if output_format not in ["txt", "md", "html", "pdf"]:
print("Warning: Invalid file format specified. Defaulting to TXT format.")
output_format = "txt" # 确保使用默认格式
output_file = filename + ".txt"
output_results(output_file, results, output_format)
else:
print("\n".join(results))
def main():
parser = argparse.ArgumentParser(
description="Check project dependencies for vulnerabilities."
)
parser.add_argument(
"vulnerabilities_file", help="Path to the file containing vulnerability data"
)
parser.add_argument(
"requirements_file", help="Path to the requirements file of the project"
)
parser.add_argument(
"-o",
"--output",
help="Output file path with extension, e.g., './output/report.txt'",
)
args = parser.parse_args()
vulnerabilities = load_vulnerable_packages(args.vulnerabilities_file)
requirements = load_requirements(args.requirements_file)
check_vulnerabilities(requirements, vulnerabilities, args.output)
if __name__ == "__main__":
main()

View File

@ -0,0 +1,81 @@
# 项目设计文档 - 后门检测系统
## 静态代码后门检测
**功能描述**:
这个脚本用于扫描指定路径下的代码文件,检测潜在的危险函数调用,支持 `.py`, `.js`, `.cpp` 文件。
**主要组件**:
- `read_file_content(file_path)`: 读取文件内容。
- `remove_comments(code, extension)`: 移除代码中的注释。
- `find_dangerous_functions(file_content, file_extension)`: 检测并标记危险函数的使用与威胁等级。
- `output_results(results, output_format, output_file)`: 输出检测结果到指定格式和路径。
**输入**:
- 代码路径(文件或目录)。
- 输出文件路径和格式(通过命令行参数指定)。
**输出**:
- 安全分析报告,可选格式为 HTML、Markdown、TXT 或 PDF。
**设计考虑**:
- 动态识别文件类型并适应不同的注释规则。
- 使用正则表达式检测潜在的危险函数调用。
- 使用 ReportLab 库生成 PDF提供丰富的文档布局。
**使用示例**:
```bash
python backdoor_detection.py ./src -o ./output/report.pdf
```
---
## 依赖版本漏洞检测
**功能描述**:
这个脚本用于检测项目依赖中是否存在已知的安全漏洞。它通过读取一个包含漏洞信息的文件和项目的 `requirements.txt`,对比确定哪些依赖项是不安全的。
**主要组件**:
- `load_vulnerable_packages(filename)`: 从给定的文件中加载包含漏洞的包信息。
- `load_requirements(filename)`: 从项目的 `requirements.txt` 文件中加载依赖信息。
- `output_results(filename, results, format_type)`: 根据指定格式输出检测结果。
- `check_vulnerabilities(requirements, vulnerabilities, output_file)`: 核心功能,对比依赖与漏洞信息并生成报告。
- `output_results(filename, results, format_type)`: 根据用户需求设置扫描结果输出格式。
**输入**:
- 依赖漏洞文件路径。
- 项目 `requirements.txt` 文件路径。
- 输出文件路径和格式(通过命令行参数指定)。
**输出**:
- 报告文件,格式可以是 HTML、Markdown、TXT 或 PDF。
**设计考虑**:
- 使用 `argparse` 处理命令行输入。
- 使用 `packaging` 库来处理和比较版本号。
- 使用异常处理来确保文件读写操作的安全性。
**使用示例**:
```bash
python requirements_detection.py vulnerabilities_data.txt requirements.txt -o ./output/report.md
```
---
### 结论
这两个脚本为后门检测项目提供了两个不同的安全检查角度:一个是外部依赖的安全性,另一个是内部代码潜在的安全漏洞。通过将这两种功能结合,可以提供一个全面的安全审计工具,以保障项目的安全性。
---
以上就是针对后门检测系统的项目设计文档。通过这样的设计,项目团队可以更好地了解系统的运作方式和如何使用系统进行安全检测。

View File

@ -6,6 +6,8 @@
工具开发:使用正则表达式和模式匹配来搜索代码中的可疑结构或者片段。
参考项目: [https://github.com/SonarSource/sonarqube]
## 控制流分析
通过分析程序的控制流(即程序中各个操作的执行顺序),可以检测到异常的控制流路径,这些路径可能是后门的迹象。
@ -18,6 +20,8 @@
实施策略:开发脚本或工具来自动化检查外部库的可信度和更新记录。
这个网站可以搜索依赖中是否存在漏洞: [https://security.snyk.io/package/pip/]
## 异常行为检测
通过定义“正常”代码行为的基线,可以标识出异常行为,这些异常行为可能指示着后门的存在。

View File

@ -0,0 +1,51 @@
# 技术说明文档 - 后门检测系统
本文档详细说明了后门检测系统中使用的技术和库,以及这些技术的应用方式和原理。
## 1. Python 编程语言
本项目主要使用 Python 编程语言编写。Python 是一种解释型、高级和通用的编程语言。Python 的设计哲学强调代码的可读性和简洁的语法(尤其是使用空格缩进划分代码块,而非使用大括号或关键字)。详细信息可参考:[Python 官网](https://www.python.org/)
## 2. `packaging`
`packaging` 库提供了版本号解析和比较的功能,非常适合用于处理和比较软件包的版本号。在本项目中,它被用来解析 `requirements.txt` 文件中的依赖版本,并与已知的漏洞版本进行比较,以判断是否存在安全风险。
- **主要应用**:比较依赖包版本是否在漏洞版本范围内。
- **官方文档**[packaging on PyPI](https://pypi.org/project/packaging/)
## 3. `reportlab`
`reportlab` 是 Python 中强大的 PDF 生成库,允许快速创建复杂的 PDF 文档。在此项目中,`reportlab` 用于生成具有格式化文本和布局的 PDF 报告。
- **主要应用**:生成 PDF 格式的报告,包括带有标题、段落和间距的文档结构。
- **官方文档**[ReportLab User Guide](https://www.reportlab.com/docs/reportlab-user-guide.pdf)
## 4. `argparse`
`argparse` 库是用于解析命令行参数和选项的标准库。它让开发者能够轻松地编写用户友好的命令行接口,程序可以从 `sys.argv` 中提取出所需的命令行参数。本项目中使用 `argparse` 来接收用户指定的文件路径和输出格式。
- **主要应用**:解析命令行输入,获取用户指定的文件路径和输出选项。
- **官方文档**[argparse — Command-line option and argument parsing](https://docs.python.org/3/library/argparse.html)
## 5. 正则表达式 (`re` 模块)
正则表达式在本项目中用于从配置文件中提取出软件包名称和版本范围。`re` 模块提供了对正则表达式的全面支持,允许进行复杂的字符串搜索、匹配及替换。
- **主要应用**:解析和处理文本数据,特别是在加载漏洞信息和分析代码文件时用于提取特定模式的字符串。
- **官方文档**[re — Regular expression operations](https://docs.python.org/3/library/re.html)
## 6. 文件处理
文件的读取和写入是通过 Python 的内置功能进行的,确保了项目能够处理外部数据文件和输出结果到指定的文件中。
- **主要应用**读取漏洞数据文件和依赖文件输出结果报告到文本、Markdown、HTML 或 PDF 文件。
## 7. 代码和风险分析
项目中实现了基本的静态代码分析功能,用于识别和报告潜在的安全风险函数调用,如 `system``exec` 等。
- **技术说明**:通过正则表达式匹配高风险函数的调用,评估代码文件的安全性。
通过这些技术的综合应用,后门检测系统能够为用户提供全面的安全检测功能,
帮助识别和预防安全风险。这些技术的深入了解和正确应用是确保系统有效运行的关键。

View File

@ -0,0 +1,81 @@
# 使用说明文档 - 后门检测系统
本文档提供了后门检测系统的使用方法,包括依赖版本漏洞检测和静态代码后门检测两部分。这将帮助用户正确执行安全检测,并理解输出结果。
## 安装需求
在开始使用本系统之前,请确保您的环境中安装了以下依赖:
- Python 3.6 或更高版本
- `packaging` 库:用于版本控制和比较
- `reportlab` 库:用于生成 PDF 报告
您可以通过以下命令安装必要的 Python 库:
```bash
pip install packaging reportlab
```
## 下载和配置
- 克隆或下载后门检测系统到您的本地环境。
- 确保脚本文件 (`requirements_detection.py``backdoor_detection.py`) 在您的工作目录中。
## 运行依赖版本漏洞检测脚本
**命令格式**
```bash
python requirements_detection.py <vulnerabilities_file> <requirements_file> -o <output_file>
```
**参数说明**
- `<vulnerabilities_file>`: 包含漏洞信息的文件路径。
- `<requirements_file>`: 项目的 `requirements.txt` 文件路径。
- `<output_file>`: 指定输出结果的文件路径和格式,支持的格式有 `.txt`, `.md`, `.html`, `.pdf`
**示例**
```bash
python requirements_detection.py vulnerabilities_data.txt requirements.txt -o output/report.md
```
## 运行静态代码后门检测脚本
**命令格式**
```bash
python backdoor_detection.py <code_path> -o <output_file>
```
**参数说明**
- `<code_path>`: 代码文件或目录的路径。
- `<output_file>`: 指定输出结果的文件路径和格式,支持的格式有 `.txt`, `.md`, `.html`, `.pdf`
**示例**
```bash
python backdoor_detection.py ./src -o output/report.pdf
```
## 结果解读
- 输出结果将根据指定的格式保存在您指定的文件中。
- 结果中会标注出每个文件中发现的高风险和中风险函数调用位置。
- 对于依赖检测,结果将标明每个依赖包的安全状态,包括存在安全风险的依赖及其版本。
## 常见问题处理
- 确保所有路径都正确无误,避免因路径错误导致文件读取失败。
- 如果输出格式指定错误,系统将默认输出为 `.txt` 格式。
- 确保安装了所有必要的依赖库,以避免运行时错误。
## 支持
如果您在使用过程中遇到任何问题或需要进一步的技术支持请联系开发团队或访问我们的Git仓库以获取帮助和最新信息。
---
以上是后门检测系统的使用说明文档。请按照这些步骤进行操作,以确保您能有效地使用本系统进行安全检测。

2
requirements.txt Normal file
View File

@ -0,0 +1,2 @@
reportlab
packaging

0
tests/__init__.py Normal file
View File

18
tests/python-test.yml Normal file
View File

@ -0,0 +1,18 @@
name: Python application test
on:
push:
branches: [main]
pull_request:
branches: [main]
jobs:
build:
runs-on: "ubuntu-latest"
steps:
- uses: actions/checkout@v2
- name: Install dependencies
run: pip install -r requirements.txt -i https://pypi.tuna.tsinghua.edu.cn/simple
- name: Run tests
run: python -m unittest discover -s tests

View File

@ -0,0 +1,60 @@
import unittest
from detection.backdoor_detection import find_dangerous_functions
class TestBackdoorDetection(unittest.TestCase):
def test_high_risk_detection(self):
content = """import os
os.system('rm -rf /') # high risk
exec('print("Hello")') # high risk
eval('2 + 2') # high risk
"""
file_extension = ".py"
results = find_dangerous_functions(content, file_extension)
self.assertIn((2, "os.system('rm -rf /')"), results["high"])
self.assertIn((3, "exec('print(\"Hello\")')"), results["high"])
self.assertIn((4, "eval('2 + 2')"), results["high"])
def test_medium_risk_detection(self):
content = """import subprocess
subprocess.run(['ls', '-l']) # medium risk
import os
os.popen('ls') # medium risk
"""
file_extension = ".py"
results = find_dangerous_functions(content, file_extension)
self.assertIn((2, "subprocess.run(['ls', '-l'])"), results["medium"])
self.assertIn((4, "os.popen('ls')"), results["medium"])
def test_no_risk_detection(self):
content = """a = 10
b = a + 5
print('This should not be detected as risky.')
"""
file_extension = ".py"
results = find_dangerous_functions(content, file_extension)
self.assertEqual(len(results["high"]), 0)
self.assertEqual(len(results["medium"]), 0)
self.assertEqual(len(results["low"]), 0)
def test_inclusion_of_comments(self):
content = """# Just a comment line
print('This is a safe line')
eval('2 + 2') # This should be high risk
subprocess.run(['echo', 'hello']) # This should be medium risk
"""
file_extension = ".py"
results = find_dangerous_functions(content, file_extension)
self.assertIn(
(3, "eval('2 + 2')"),
results["high"],
)
self.assertIn(
(4, "subprocess.run(['echo', 'hello'])"),
results["medium"],
)
if __name__ == "__main__":
unittest.main()