Merge pull request 'feature/crawler' (#14) from feature/crawler into main
Some checks are pending
Python application test / build (push) Waiting to run

Reviewed-on: #14
Reviewed-by: sangge <sangge@noreply.localhost>
Reviewed-by: ccyj <ccyj@noreply.localhost>
This commit is contained in:
ccyj 2024-05-11 20:49:00 +08:00
commit ca68c1ee94
10 changed files with 287 additions and 3382 deletions

View File

@ -1,62 +0,0 @@
import requests
from bs4 import BeautifulSoup
def fetch_html(url):
"""从指定URL获取HTML内容"""
response = requests.get(url)
if response.status_code == 200:
return response.text
else:
return None
def parse_html(html):
"""解析HTML获取每个tr中第二个td下的所有a和span标签的内容"""
soup = BeautifulSoup(html, "html.parser")
table = soup.find("table", id="sortable-table")
results = []
if table:
rows = table.find("tbody").find_all("tr")
for row in rows:
tds = row.find_all("td")
if len(tds) >= 2:
a_tags = tds[1].find_all("a")
span_tags = tds[1].find_all("span")
spans = [span.text.strip() for span in span_tags]
for a_tag in a_tags:
results.append((a_tag.text.strip(), spans))
return results
def save_results_to_file(results, filename):
"""保存提取的数据到TXT文件"""
with open(filename, "a", encoding="utf-8") as file: # Append mode
for data in results:
package_name, version_ranges = data
file.write(f"Package Name: {package_name}\n")
file.write("Version Ranges: " + ", ".join(version_ranges) + "\n")
file.write("-" * 50 + "\n") # Adds a separator for clarity
def main():
base_url = "https://security.snyk.io/vuln/pip/"
page_number = 1
while True:
url = f"{base_url}{page_number}"
print(f"Fetching data from {url}")
html_content = fetch_html(url)
if not html_content:
print("No more data found or failed to fetch.")
break
extracted_data = parse_html(html_content)
if not extracted_data:
print("No relevant data found on page.")
break
save_results_to_file(extracted_data, "extracted_data.txt")
page_number += 1
print("Results have been saved to 'extracted_data.txt'.")
if __name__ == "__main__":
main()

File diff suppressed because it is too large Load Diff

View File

@ -1,507 +0,0 @@
Package Name: apache-airflow
Version Ranges: <2.6.1,>=2.3.0
--------------------------------------------------
Package Name: mlflow
Version Ranges: <2.10.0
--------------------------------------------------
Package Name: torch
Version Ranges: <1.10.0
--------------------------------------------------
Package Name: aiohttp
Version Ranges: <3.9.2,>=1.0.5
--------------------------------------------------
Package Name: keras
Version Ranges: <2.13.1rc0
--------------------------------------------------
Package Name: llama-index
Version Ranges: <0.10.24
--------------------------------------------------
Package Name: zenml
Version Ranges: <0.42.2,<0.43.1,<0.44.4,<0.47.0,>=0.43.0,>=0.44.0,>=0.46.0
--------------------------------------------------
Package Name: gradio
Version Ranges: <4.9.0
--------------------------------------------------
Package Name: bentoml
Version Ranges: <1.2.5
--------------------------------------------------
Package Name: langchain
Version Ranges: <0.0.353
--------------------------------------------------
Package Name: scrapy
Version Ranges: <1.8.4,<2.11.1,>=2.0.0
--------------------------------------------------
Package Name: sqlparse
Version Ranges: <0.5.0
--------------------------------------------------
Package Name: gunicorn
Version Ranges: <22.0.0
--------------------------------------------------
Package Name: magnum
Version Ranges: <14.1.2,<15.0.2,<16.0.2,<17.0.2,>=15.0.0.0rc1,>=16.0.0.0rc1,>=17.0.0.0rc1
--------------------------------------------------
Package Name: nicegui
Version Ranges: <1.4.21,>=1.4.6
--------------------------------------------------
Package Name: idna
Version Ranges: <3.7
--------------------------------------------------
Package Name: llama-index-core
Version Ranges: <0.10.24
--------------------------------------------------
Package Name: litellm
Version Ranges: <1.34.42
--------------------------------------------------
Package Name: roundup
Version Ranges: <1.2.1,<1.4.6,>=1.2.0,>=1.4.0
--------------------------------------------------
Package Name: transformers
Version Ranges: <4.37.0
--------------------------------------------------
Package Name: dirac
Version Ranges: <8.0.37,<9.0.0a22,>=8.0.0,>=8.1.0a1
--------------------------------------------------
Package Name: yt-dlp
Version Ranges: <2024.4.9,>=2021.4.11
--------------------------------------------------
Package Name: rafcon
Version Ranges: <0.15.4
--------------------------------------------------
Package Name: radicale
Version Ranges: <3.0.0
--------------------------------------------------
Package Name: pcaspy
Version Ranges: <0.7.1
--------------------------------------------------
Package Name: holidays
Version Ranges: <0.45
--------------------------------------------------
Package Name: evennia
Version Ranges: <4.0.0
--------------------------------------------------
Package Name: django-json-widget
Version Ranges: <2.0.0
--------------------------------------------------
Package Name: avocado-framework
Version Ranges: <104.0
--------------------------------------------------
Package Name: arrendatools.plantillas
Version Ranges: <0.4.3
--------------------------------------------------
Package Name: amazon-product-details-scraper
Version Ranges: <1.0.4
--------------------------------------------------
Package Name: aiopioneer
Version Ranges: <0.1.5
--------------------------------------------------
Package Name: pgadmin4
Version Ranges: <8.4
--------------------------------------------------
Package Name: pymongo
Version Ranges: <4.6.3
--------------------------------------------------
Package Name: voila
Version Ranges: <0.2.17,<0.3.8,<0.4.4,<0.5.6,>=0.0.2,>=0.3.0a0,>=0.4.0a0,>=0.5.0a0
--------------------------------------------------
Package Name: piccolo-admin
Version Ranges: <1.3.2
--------------------------------------------------
Package Name: cryptoauthlib
Version Ranges: <20200912
--------------------------------------------------
Package Name: mosaicml
Version Ranges: <0.5.0
--------------------------------------------------
Package Name: mlrun
Version Ranges: <1.7.0rc5
--------------------------------------------------
Package Name: eventlet
Version Ranges: <0.34.3
--------------------------------------------------
Package Name: salt
Version Ranges: <3005.5
--------------------------------------------------
Package Name: django-two-factor-auth
Version Ranges: <1.13
--------------------------------------------------
Package Name: pillow
Version Ranges: <10.2.0
--------------------------------------------------
Package Name: ipywidgets
Version Ranges: <5.2.0,>=5.0.0
--------------------------------------------------
Package Name: pylint
Version Ranges: <2.6.1
--------------------------------------------------
Package Name: pytest-cov
Version Ranges: <2.0.0
--------------------------------------------------
Package Name: jupyterhub
Version Ranges: <4.1.0
--------------------------------------------------
Package Name: geonode
Version Ranges: <4.1.0
--------------------------------------------------
Package Name: langchain-core
Version Ranges: <0.1.7
--------------------------------------------------
Package Name: lektor
Version Ranges: <3.3.11
--------------------------------------------------
Package Name: ansys-geometry-core
Version Ranges: <0.3.3,<0.4.12,>=0.3.0,>=0.4.0
--------------------------------------------------
Package Name: nautobot
Version Ranges: <1.6.10,<2.1.2,>=2.0.0
--------------------------------------------------
Package Name: mobsfscan
Version Ranges: <0.3.8
--------------------------------------------------
Package Name: esphome
Version Ranges: <2024.2.1
--------------------------------------------------
Package Name: qiskit-ibm-runtime
Version Ranges: <0.21.2,>=0.1.0
--------------------------------------------------
Package Name: jupyter-server-proxy
Version Ranges: <3.2.3,<4.1.1,>=4.0.0
--------------------------------------------------
Package Name: oauthenticator
Version Ranges: <16.3.0
--------------------------------------------------
Package Name: octoprint
Version Ranges: <1.10.0rc1
--------------------------------------------------
Package Name: wiki
Version Ranges: <0.10.1
--------------------------------------------------
Package Name: astropy
Version Ranges: <5.3.3
--------------------------------------------------
Package Name: yaql
Version Ranges: <3.0.0
--------------------------------------------------
Package Name: black
Version Ranges: <24.3.0
--------------------------------------------------
Package Name: fgr
Version Ranges: <0.4.0
--------------------------------------------------
Package Name: vantage6
Version Ranges: <4.2.0
--------------------------------------------------
Package Name: paddlepaddle
Version Ranges: <2.6.0
--------------------------------------------------
Package Name: mssql-django
Version Ranges: <1.4.1
--------------------------------------------------
Package Name: aiosmtpd
Version Ranges: <1.4.5
--------------------------------------------------
Package Name: ckan
Version Ranges: <2.10.1,<2.9.9,>=2.10.0
--------------------------------------------------
Package Name: langchain-community
Version Ranges: <0.0.27
--------------------------------------------------
Package Name: libosdp
Version Ranges: <3.0.0
--------------------------------------------------
Package Name: weasyprint
Version Ranges: <61.2,>=61.0
--------------------------------------------------
Package Name: apache-superset
Version Ranges: <3.0.3
--------------------------------------------------
Package Name: jwcrypto
Version Ranges: <1.5.6,>=0.5.0
--------------------------------------------------
Package Name: paho-mqtt
Version Ranges: <1.1
--------------------------------------------------
Package Name: rq
Version Ranges: <0.7.1
--------------------------------------------------
Package Name: eth-abi
Version Ranges: <5.0.1
--------------------------------------------------
Package Name: prefect
Version Ranges: <2.15.0
--------------------------------------------------
Package Name: django-treenode
Version Ranges: <0.20.0
--------------------------------------------------
Package Name: hypercorn
Version Ranges: <0.16.0
--------------------------------------------------
Package Name: streamlink
Version Ranges: <5.3.0
--------------------------------------------------
Package Name: kedro
Version Ranges: <0.19.3
--------------------------------------------------
Package Name: pyccel
Version Ranges: <1.9.0
--------------------------------------------------
Package Name: django
Version Ranges: <3.2.24,<4.2.10,<5.0.2,>=3.2,>=4.2,>=5.0
--------------------------------------------------
Package Name: videomass
Version Ranges: <5.0.4
--------------------------------------------------
Package Name: ultralytics
Version Ranges: <8.1.0
--------------------------------------------------
Package Name: intel-extension-for-transformers
Version Ranges: <1.2.2
--------------------------------------------------
Package Name: labgrid
Version Ranges: <23.0.2
--------------------------------------------------
Package Name: docassemble.webapp
Version Ranges: <1.4.97
--------------------------------------------------
Package Name: docassemble.base
Version Ranges: <1.4.97,>=1.4.53
--------------------------------------------------
Package Name: docassemble
Version Ranges: <1.4.97
--------------------------------------------------
Package Name: langchain-experimental
Version Ranges: <0.0.52
--------------------------------------------------
Package Name: label-studio
Version Ranges: <1.10.1
--------------------------------------------------
Package Name: rpyc
Version Ranges: <5.2.1
--------------------------------------------------
Package Name: peewee
Version Ranges: <3.17.1
--------------------------------------------------
Package Name: urllib3-future
Version Ranges: <2.4.902
--------------------------------------------------
Package Name: flask-appbuilder
Version Ranges: <4.3.11
--------------------------------------------------
Package Name: pretix
Version Ranges: <2024.1.1
--------------------------------------------------
Package Name: orjson
Version Ranges: <3.9.15
--------------------------------------------------
Package Name: pypqc
Version Ranges: <0.0.6.1
--------------------------------------------------
Package Name: mjml
Version Ranges: <0.11.0
--------------------------------------------------
Package Name: onnx
Version Ranges: <1.16.0
--------------------------------------------------
Package Name: fastecdsa
Version Ranges: <2.3.2
--------------------------------------------------
Package Name: pymatgen
Version Ranges: <2024.2.20
--------------------------------------------------
Package Name: cryptography
Version Ranges: <42.0.2,>=35.0.0
--------------------------------------------------
Package Name: apache-airflow-providers-mongo
Version Ranges: <4.0.0,>=1.0.0
--------------------------------------------------
Package Name: cbor2
Version Ranges: <5.6.0
--------------------------------------------------
Package Name: intel-extension-for-tensorflow
Version Ranges: <2.13.0.0
--------------------------------------------------
Package Name: tuf
Version Ranges: <3.1.1,>=2.0.0
--------------------------------------------------
Package Name: zpywallet
Version Ranges: <0.6.2
--------------------------------------------------
Package Name: dipdup
Version Ranges: <3.0.2
--------------------------------------------------
Package Name: clip-retrieval
Version Ranges: <2.23.1
--------------------------------------------------
Package Name: procrastinate
Version Ranges: <0.11.0
--------------------------------------------------
Package Name: embedchain
Version Ranges: <0.1.57
--------------------------------------------------
Package Name: miarec-ftpfs
Version Ranges: <2024.1.2
--------------------------------------------------
Package Name: miarec-sshfs
Version Ranges: <2024.1.5
--------------------------------------------------
Package Name: linkml
Version Ranges: <1.5.2
--------------------------------------------------
Package Name: toodledo
Version Ranges: <1.5.0
--------------------------------------------------
Package Name: renku
Version Ranges: <1.11.0
--------------------------------------------------
Package Name: vunnel
Version Ranges: <0.18.0
--------------------------------------------------
Package Name: panda3d
Version Ranges: <1.9.4
--------------------------------------------------
Package Name: ludwig
Version Ranges: <0.7
--------------------------------------------------
Package Name: ethyca-fides
Version Ranges: <2.1.0
--------------------------------------------------
Package Name: hiddifypanel
Version Ranges: <9.0.0.dev30
--------------------------------------------------
Package Name: dgl
Version Ranges: <0.9.0
--------------------------------------------------
Package Name: deephaven-core
Version Ranges: <0.30.0
--------------------------------------------------
Package Name: borgmatic
Version Ranges: <1.8.7
--------------------------------------------------
Package Name: cg
Version Ranges: <26.0.4
--------------------------------------------------
Package Name: ccryptofeed
Version Ranges: <2.2.3
--------------------------------------------------
Package Name: c2cgeoform
Version Ranges: <2.1.26
--------------------------------------------------
Package Name: appfl
Version Ranges: <0.4.0
--------------------------------------------------
Package Name: nonebot2
Version Ranges: <2.2.0,>=2.0.0a16
--------------------------------------------------
Package Name: acryl-datahub
Version Ranges: <0.8.45
--------------------------------------------------
Package Name: bullmq
Version Ranges: <1.15.0
--------------------------------------------------
Package Name: aiobotocore
Version Ranges: <2.9.1
--------------------------------------------------
Package Name: diffoscope
Version Ranges: <256
--------------------------------------------------
Package Name: kinto-attachment
Version Ranges: <6.4.0
--------------------------------------------------
Package Name: bandit
Version Ranges: <1.7.7
--------------------------------------------------
Package Name: dnspython
Version Ranges: <2.6.1
--------------------------------------------------
Package Name: products.sqlalchemyda
Version Ranges: <2.2
--------------------------------------------------
Package Name: clearml
Version Ranges: <1.14.2
--------------------------------------------------
Package Name: tensorflow
Version Ranges: <1.7.1
--------------------------------------------------
Package Name: pyload-ng
Version Ranges: <0.5.0b3.dev78
--------------------------------------------------
Package Name: fastapi
Version Ranges: <0.109.1
--------------------------------------------------
Package Name: python-multipart
Version Ranges: <0.0.7
--------------------------------------------------
Package Name: kinto
Version Ranges: <6.1.0
--------------------------------------------------
Package Name: cupy
Version Ranges: <13.0.0
--------------------------------------------------
Package Name: llama-hub
Version Ranges: <0.0.67
--------------------------------------------------
Package Name: borgbackup
Version Ranges: <1.0.7
--------------------------------------------------
Package Name: snakemake
Version Ranges: <7.9.0
--------------------------------------------------
Package Name: lief
Version Ranges: <0.12.3
--------------------------------------------------
Package Name: checkov
Version Ranges: <2.0.1029
--------------------------------------------------
Package Name: dash-html-components
Version Ranges: <2.0.0
--------------------------------------------------
Package Name: dash
Version Ranges: <2.15.0
--------------------------------------------------
Package Name: dash-core-components
Version Ranges: <2.0.0
--------------------------------------------------
Package Name: glance-store
Version Ranges: <4.3.3,<4.7.0,>=4.4.0
--------------------------------------------------
Package Name: dagster
Version Ranges: <1.1.10
--------------------------------------------------
Package Name: wagtail
Version Ranges: <5.2rc1
--------------------------------------------------
Package Name: pycryptodome
Version Ranges: <3.19.1
--------------------------------------------------
Package Name: celery
Version Ranges: <4.4.0rc5
--------------------------------------------------
Package Name: vantage6-server
Version Ranges: <4.2.0
--------------------------------------------------
Package Name: tuitse-tsusin
Version Ranges: <1.3.2
--------------------------------------------------
Package Name: apache-airflow-providers-cncf-kubernetes
Version Ranges: <7.0.0,>=5.2.0
--------------------------------------------------
Package Name: whoogle-search
Version Ranges: <0.8.4
--------------------------------------------------
Package Name: jupyterlab-lsp
Version Ranges: <5.0.2
--------------------------------------------------
Package Name: changedetection.io
Version Ranges: <0.45.13
--------------------------------------------------
Package Name: jupyterlab
Version Ranges: <4.0.11,>=4.0.0
--------------------------------------------------
Package Name: ansible-core
Version Ranges: <2.14.14,<2.15.9,<2.16.3,>=2.15.0,>=2.16.0
--------------------------------------------------
Package Name: readthedocs-sphinx-search
Version Ranges: <0.3.2
--------------------------------------------------
Package Name: zodb3
Version Ranges: <3.8.3,<3.9.0c2,>=3.8.0a1,>=3.9.0
--------------------------------------------------

View File

@ -1,48 +0,0 @@
"""转换原有的漏洞文件格式"""
import re
from packaging.specifiers import SpecifierSet
def load_vulnerable_packages(filename):
"""从文件加载有漏洞的包信息"""
with open(filename, "r", encoding="utf-8") as file:
content = file.read()
vulnerabilities = {}
blocks = content.split("--------------------------------------------------")
range_pattern = re.compile(r"\[(.*?),\s*(.*?)\)")
for block in blocks:
name_match = re.search(r"Package Name: (.+)", block)
if name_match:
package_name = name_match.group(1).strip()
ranges = range_pattern.findall(block)
specifier_list = []
for start, end in ranges:
if start and end:
specifier_list.append(f">={start},<{end}")
elif start:
specifier_list.append(f">={start}")
elif end:
specifier_list.append(f"<{end}")
if specifier_list:
vulnerabilities[package_name] = SpecifierSet(",".join(specifier_list))
return vulnerabilities
def save_vulnerabilities_to_file(vuln_packages, filename):
"""将漏洞信息写入到文件中"""
with open(filename, "w", encoding="utf-8") as file:
for package, specifiers in vuln_packages.items():
file.write(f"Package Name: {package}\n")
file.write(f"Version Ranges: {specifiers}\n")
file.write("-" * 50 + "\n")
def main():
vulnerabilities = load_vulnerable_packages("extracted_data.txt")
save_vulnerabilities_to_file(vulnerabilities, "trans_extracted_data.txt")
if __name__ == "__main__":
main()

View File

@ -1,7 +1,9 @@
import argparse
import os
import re
import sys
import os
import requests
import argparse
from bs4 import BeautifulSoup
from typing import List, Tuple, Optional
from packaging import version
from packaging.specifiers import SpecifierSet
from reportlab.lib.pagesizes import letter
@ -9,10 +11,154 @@ from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
def load_vulnerable_packages(filename):
"""从文件加载有漏洞的包信息"""
def fetch_html(url: str) -> Optional[str]:
"""Fetch HTML content from the specified URL.
Args:
url (str): URL to fetch HTML from.
Returns:
Optional[str]: HTML content as a string, or None if fetch fails.
"""
response = requests.get(url)
if response.status_code == 200:
return response.text
return None
def parse_html(html: str) -> List[Tuple[str, List[str]]]:
"""Parse HTML to get content of all 'a' and 'span' tags under the second 'td' of each 'tr'.
Args:
html (str): HTML content as a string.
Returns:
List[Tuple[str, List[str]]]: A list of tuples containing the text of 'a' tags and lists of 'span' texts.
"""
soup = BeautifulSoup(html, "html.parser")
table = soup.find("table", id="sortable-table")
results = []
if table:
rows = table.find("tbody").find_all("tr")
for row in rows:
tds = row.find_all("td")
if len(tds) >= 2:
a_tags = tds[1].find_all("a")
span_tags = tds[1].find_all("span")
spans = [span.text.strip() for span in span_tags]
for a_tag in a_tags:
results.append((a_tag.text.strip(), spans))
return results
def format_results(results: List[Tuple[str, List[str]]]) -> str:
"""Format extracted data as a string.
Args:
results (List[Tuple[str, List[str]]]): Extracted data to format.
Returns:
str: Formatted string of the extracted data.
"""
formatted_result = ""
for package_name, version_ranges in results:
formatted_result += f"Package Name: {package_name}\n"
formatted_result += "Version Ranges: " + ", ".join(version_ranges) + "\n"
formatted_result += "-" * 50 + "\n"
return formatted_result
def trans_vulnerable_packages(content):
"""将漏洞版本中的集合形式转换为大于小于的格式
Args:
content (str): 漏洞版本汇总信息.
"""
vulnerabilities = {}
blocks = content.split("--------------------------------------------------")
range_pattern = re.compile(r"\[(.*?),\s*(.*?)\)")
for block in blocks:
name_match = re.search(r"Package Name: (.+)", block)
if name_match:
package_name = name_match.group(1).strip()
ranges = range_pattern.findall(block)
specifier_list = []
for start, end in ranges:
if start and end:
specifier_list.append(f">={start},<{end}")
elif start:
specifier_list.append(f">={start}")
elif end:
specifier_list.append(f"<{end}")
if specifier_list:
vulnerabilities[package_name] = SpecifierSet(",".join(specifier_list))
return vulnerabilities
def format_vulnerabilities(vuln_packages):
"""将字典形式的漏洞信息格式化
Args:
vuln_packages (List[Tuple[str, List[str]]]): Extracted data to format.
"""
res = ""
for package, specifiers in vuln_packages.items():
res += f"Package Name: {package}\n"
res += f"Version Ranges: {specifiers}\n"
res += "-" * 50 + "\n"
return res
def load_requirements(filename):
"""从文件加载项目的依赖信息"""
with open(filename, "r", encoding="utf-8") as file:
content = file.read()
lines = file.readlines()
requirements = {}
for line in lines:
if "==" in line:
package_name, package_version = line.strip().split("==")
requirements[package_name] = package_version
return requirements
def check_vulnerabilities(requirements, vulnerabilities, output_file):
"""检查依赖项是否存在已知漏洞,并输出结果"""
results_warning = [] # 存储有漏洞的依赖
results_ok = [] # 存储没有漏洞的依赖
for req_name, req_version in requirements.items():
if req_name in vulnerabilities:
spec = vulnerabilities[req_name]
if version.parse(req_version) in spec:
results_warning.append(
f"WARNING: {req_name}=={req_version} is vulnerable!"
)
else:
results_ok.append(f"OK: {req_name}=={req_version} is not affected.")
else:
results_ok.append(
f"OK: {req_name} not found in the vulnerability database."
)
# 合并结果,先输出所有警告,然后输出所有正常情况
results = results_warning + results_ok
# print(results)
if output_file:
filename, ext = os.path.splitext(output_file)
output_format = ext[1:] if ext[1:] else "txt"
if output_format not in ["txt", "md", "html", "pdf"]:
print("Warning: Invalid file format specified. Defaulting to TXT format.")
output_format = "txt" # 确保使用默认格式
output_file = filename + ".txt"
output_results(output_file, results, output_format)
else:
print("\n".join(results))
def trans_vulnerable_packages_to_dict(content):
"""将漏洞信息转换为字典格式
Args:
content str: 漏洞信息汇总.
"""
vulnerabilities = {}
blocks = content.split("--------------------------------------------------")
for block in blocks:
@ -28,18 +174,6 @@ def load_vulnerable_packages(filename):
return vulnerabilities
def load_requirements(filename):
"""从文件加载项目的依赖信息"""
with open(filename, "r", encoding="utf-8") as file:
lines = file.readlines()
requirements = {}
for line in lines:
if "==" in line:
package_name, package_version = line.strip().split("==")
requirements[package_name] = package_version
return requirements
def output_pdf(results, file_name):
doc = SimpleDocTemplate(file_name, pagesize=letter)
story = []
@ -99,48 +233,13 @@ def output_results(filename, results, format_type):
for result in results:
file.write(f"{result}\n")
def check_vulnerabilities(requirements, vulnerabilities, output_file):
"""检查依赖项是否存在已知漏洞,并输出结果"""
results_warning = [] # 存储有漏洞的依赖
results_ok = [] # 存储没有漏洞的依赖
for req_name, req_version in requirements.items():
if req_name in vulnerabilities:
spec = vulnerabilities[req_name]
if version.parse(req_version) in spec:
results_warning.append(
f"WARNING: {req_name}=={req_version} is vulnerable!"
)
else:
results_ok.append(f"OK: {req_name}=={req_version} is not affected.")
else:
results_ok.append(
f"OK: {req_name} not found in the vulnerability database."
)
# 合并结果,先输出所有警告,然后输出所有正常情况
results = results_warning + results_ok
if output_file:
filename, ext = os.path.splitext(output_file)
output_format = ext[1:] if ext[1:] else "txt"
if output_format not in ["txt", "md", "html", "pdf"]:
print("Warning: Invalid file format specified. Defaulting to TXT format.")
output_format = "txt" # 确保使用默认格式
output_file = filename + ".txt"
output_results(output_file, results, output_format)
else:
print("\n".join(results))
print("Results have been saved as " + filename)
def main():
parser = argparse.ArgumentParser(
description="Check project dependencies for vulnerabilities."
)
parser.add_argument(
"vulnerabilities_file", help="Path to the file containing vulnerability data"
)
parser.add_argument(
"requirements_file", help="Path to the requirements file of the project"
)
@ -151,9 +250,29 @@ def main():
)
args = parser.parse_args()
vulnerabilities = load_vulnerable_packages(args.vulnerabilities_file)
base_url = "https://security.snyk.io/vuln/pip/"
page_number = 1
crawler_results = ""
while True:
url = f"{base_url}{page_number}"
print(f"Fetching data from {url}")
html_content = fetch_html(url)
if not html_content:
print("No more data found or failed to fetch.")
break
extracted_data = parse_html(html_content)
if not extracted_data:
print("No relevant data found on page.")
break
crawler_results += format_results(extracted_data)
page_number += 1
print("Results have been stored in memory.\n")
trans_res = trans_vulnerable_packages(crawler_results)
trans_res = format_vulnerabilities(trans_res)
trans_res = trans_vulnerable_packages_to_dict(trans_res)
requirements = load_requirements(args.requirements_file)
check_vulnerabilities(requirements, vulnerabilities, args.output)
check_vulnerabilities(requirements, trans_res, args.output)
if __name__ == "__main__":

View File

@ -42,7 +42,8 @@ python backdoor_detection.py ./src -o ./output/report.pdf
**主要组件**:
- `load_vulnerable_packages(filename)`: 从给定的文件中加载包含漏洞的包信息。
- `parse_html`: 爬取网站收集的漏洞依赖信息。
- `trans_vulnerable_packages(content)`: 将漏洞版本中的集合形式转换为大于小于的格式。
- `load_requirements(filename)`: 从项目的 `requirements.txt` 文件中加载依赖信息。
- `output_results(filename, results, format_type)`: 根据指定格式输出检测结果。
- `check_vulnerabilities(requirements, vulnerabilities, output_file)`: 核心功能,对比依赖与漏洞信息并生成报告。
@ -50,7 +51,6 @@ python backdoor_detection.py ./src -o ./output/report.pdf
**输入**:
- 依赖漏洞文件路径。
- 项目 `requirements.txt` 文件路径。
- 输出文件路径和格式(通过命令行参数指定)。
@ -67,7 +67,7 @@ python backdoor_detection.py ./src -o ./output/report.pdf
**使用示例**:
```bash
python requirements_detection.py vulnerabilities_data.txt requirements.txt -o ./output/report.md
python requirements_detection.py ./requirements.txt -o ./output/report.md
```
---

View File

@ -40,7 +40,13 @@
- **主要应用**读取漏洞数据文件和依赖文件输出结果报告到文本、Markdown、HTML 或 PDF 文件。
## 7. 代码和风险分析
## 7. 爬虫
利用`python``BeautifulSoup`制作爬虫快速收集整理信息
- **主要应用**:通过爬虫收集漏洞依赖信息并进行汇总,用于判断依赖是否存在漏洞版本。
## 8. 代码和风险分析
项目中实现了基本的静态代码分析功能,用于识别和报告潜在的安全风险函数调用,如 `system``exec` 等。

View File

@ -26,20 +26,18 @@ pip install packaging reportlab
**命令格式**
```bash
python requirements_detection.py <vulnerabilities_file> <requirements_file> -o <output_file>
python requirements_detection.py <requirements_file> -o <output_file>
```
**参数说明**
- `<vulnerabilities_file>`: 包含漏洞信息的文件路径。
- `<requirements_file>`: 项目的 `requirements.txt` 文件路径。
- `<output_file>`: 指定输出结果的文件路径和格式,支持的格式有 `.txt`, `.md`, `.html`, `.pdf`
**示例**
```bash
python requirements_detection.py vulnerabilities_data.txt requirements.txt -o output/report.md
python requirements_detection.py requirements.txt -o output/report.md
```
## 运行静态代码后门检测脚本

View File

@ -1,3 +1,5 @@
reportlab
requests
packaging
openai
bs4

View File

@ -0,0 +1,97 @@
import unittest
from unittest.mock import patch, Mock, MagicMock
from detection.requirements_detection import (
fetch_html,
parse_html,
format_results,
check_vulnerabilities,
)
from packaging.version import Version
from packaging.specifiers import SpecifierSet
# Assuming the functions from your provided code are imported here
# from your_module import fetch_html, parse_html, format_results, ...
# 测试网页抓取和结果报告的测试类
class TestWebScrapingAndReporting(unittest.TestCase):
def test_fetch_html_success(self):
"""测试fetch_html在请求成功时返回正确的HTML内容。"""
with patch("requests.get") as mocked_get:
mocked_get.return_value.status_code = 200
mocked_get.return_value.text = "success"
url = "https://security.snyk.io/vuln/pip/"
result = fetch_html(url)
self.assertEqual(result, "success")
def test_fetch_html_failure(self):
"""测试fetch_html在请求失败时返回None。"""
with patch("requests.get") as mocked_get:
mocked_get.return_code.status_code = 404
url = "https://security.snyk.io/vuln/pip/"
result = fetch_html(url)
self.assertIsNone(result)
def test_parse_html(self):
"""测试parse_html能准确地解析HTML并提取预期的数据。"""
html_content = """
<table id="sortable-table">
<tbody>
<tr><td></td><td><a href="#">Link1</a><span>Span1</span></td></tr>
<tr><td></td><td><a href="#">Link2</a><span>Span2</span></td></tr>
</tbody>
</table>
"""
expected = [("Link1", ["Span1"]), ("Link2", ["Span2"])]
result = parse_html(html_content)
self.assertEqual(result, expected)
def test_format_results(self):
"""测试format_results能正确格式化解析后的数据。"""
results = [("Package1", ["1.0", "2.0"]), ("Package2", ["1.5", "2.5"])]
expected_output = (
"Package Name: Package1\nVersion Ranges: 1.0, 2.0\n"
+ "--------------------------------------------------\n"
+ "Package Name: Package2\nVersion Ranges: 1.5, 2.5\n"
+ "--------------------------------------------------\n"
)
formatted_result = format_results(results)
self.assertEqual(formatted_result, expected_output)
# 测试报警
def setUp(self):
"""假设的依赖和漏洞数据"""
self.requirements = {"package1": "1.0", "package2": "2.0"}
self.vulnerabilities = {
"package1": SpecifierSet(">=1.0,<2.0"),
"package3": SpecifierSet(">=1.0,<1.5"),
}
@patch("builtins.print") # 模拟内置的print函数以捕获输出
def test_check_vulnerabilities_no_output_file(self, mock_print):
"""测试当不提供输出文件时的情况,应该打印输出到控制台。"""
check_vulnerabilities(self.requirements, self.vulnerabilities, None)
expected_calls = [
unittest.mock.call(
"WARNING: package1==1.0 is vulnerable!\nOK: package2 not found in the vulnerability database."
)
]
mock_print.assert_has_calls(expected_calls, any_order=True)
@patch("builtins.open", new_callable=unittest.mock.mock_open)
@patch("os.path.splitext", return_value=("output", ".txt"))
@patch("os.path.exists", return_value=False)
@patch("os.makedirs")
def test_check_vulnerabilities_with_output_file(
self, mock_makedirs, mock_exists, mock_splitext, mock_open
):
"""测试当提供输出文件时,应该将结果写入文件。"""
check_vulnerabilities(self.requirements, self.vulnerabilities, "output.txt")
mock_open.assert_called_once_with("output.txt", "w", encoding="utf-8")
handle = mock_open()
handle.write.assert_called()
if __name__ == "__main__":
unittest.main()