Merge pull request 'feature/crawler' (#14) from feature/crawler into main
Some checks are pending
Python application test / build (push) Waiting to run
Some checks are pending
Python application test / build (push) Waiting to run
Reviewed-on: #14 Reviewed-by: sangge <sangge@noreply.localhost> Reviewed-by: ccyj <ccyj@noreply.localhost>
This commit is contained in:
commit
ca68c1ee94
@ -1,62 +0,0 @@
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
|
||||
def fetch_html(url):
|
||||
"""从指定URL获取HTML内容"""
|
||||
response = requests.get(url)
|
||||
if response.status_code == 200:
|
||||
return response.text
|
||||
else:
|
||||
return None
|
||||
|
||||
|
||||
def parse_html(html):
|
||||
"""解析HTML,获取每个tr中第二个td下的所有a和span标签的内容"""
|
||||
soup = BeautifulSoup(html, "html.parser")
|
||||
table = soup.find("table", id="sortable-table")
|
||||
results = []
|
||||
if table:
|
||||
rows = table.find("tbody").find_all("tr")
|
||||
for row in rows:
|
||||
tds = row.find_all("td")
|
||||
if len(tds) >= 2:
|
||||
a_tags = tds[1].find_all("a")
|
||||
span_tags = tds[1].find_all("span")
|
||||
spans = [span.text.strip() for span in span_tags]
|
||||
for a_tag in a_tags:
|
||||
results.append((a_tag.text.strip(), spans))
|
||||
return results
|
||||
|
||||
|
||||
def save_results_to_file(results, filename):
|
||||
"""保存提取的数据到TXT文件"""
|
||||
with open(filename, "a", encoding="utf-8") as file: # Append mode
|
||||
for data in results:
|
||||
package_name, version_ranges = data
|
||||
file.write(f"Package Name: {package_name}\n")
|
||||
file.write("Version Ranges: " + ", ".join(version_ranges) + "\n")
|
||||
file.write("-" * 50 + "\n") # Adds a separator for clarity
|
||||
|
||||
|
||||
def main():
|
||||
base_url = "https://security.snyk.io/vuln/pip/"
|
||||
page_number = 1
|
||||
while True:
|
||||
url = f"{base_url}{page_number}"
|
||||
print(f"Fetching data from {url}")
|
||||
html_content = fetch_html(url)
|
||||
if not html_content:
|
||||
print("No more data found or failed to fetch.")
|
||||
break
|
||||
extracted_data = parse_html(html_content)
|
||||
if not extracted_data:
|
||||
print("No relevant data found on page.")
|
||||
break
|
||||
save_results_to_file(extracted_data, "extracted_data.txt")
|
||||
page_number += 1
|
||||
print("Results have been saved to 'extracted_data.txt'.")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
File diff suppressed because it is too large
Load Diff
@ -1,507 +0,0 @@
|
||||
Package Name: apache-airflow
|
||||
Version Ranges: <2.6.1,>=2.3.0
|
||||
--------------------------------------------------
|
||||
Package Name: mlflow
|
||||
Version Ranges: <2.10.0
|
||||
--------------------------------------------------
|
||||
Package Name: torch
|
||||
Version Ranges: <1.10.0
|
||||
--------------------------------------------------
|
||||
Package Name: aiohttp
|
||||
Version Ranges: <3.9.2,>=1.0.5
|
||||
--------------------------------------------------
|
||||
Package Name: keras
|
||||
Version Ranges: <2.13.1rc0
|
||||
--------------------------------------------------
|
||||
Package Name: llama-index
|
||||
Version Ranges: <0.10.24
|
||||
--------------------------------------------------
|
||||
Package Name: zenml
|
||||
Version Ranges: <0.42.2,<0.43.1,<0.44.4,<0.47.0,>=0.43.0,>=0.44.0,>=0.46.0
|
||||
--------------------------------------------------
|
||||
Package Name: gradio
|
||||
Version Ranges: <4.9.0
|
||||
--------------------------------------------------
|
||||
Package Name: bentoml
|
||||
Version Ranges: <1.2.5
|
||||
--------------------------------------------------
|
||||
Package Name: langchain
|
||||
Version Ranges: <0.0.353
|
||||
--------------------------------------------------
|
||||
Package Name: scrapy
|
||||
Version Ranges: <1.8.4,<2.11.1,>=2.0.0
|
||||
--------------------------------------------------
|
||||
Package Name: sqlparse
|
||||
Version Ranges: <0.5.0
|
||||
--------------------------------------------------
|
||||
Package Name: gunicorn
|
||||
Version Ranges: <22.0.0
|
||||
--------------------------------------------------
|
||||
Package Name: magnum
|
||||
Version Ranges: <14.1.2,<15.0.2,<16.0.2,<17.0.2,>=15.0.0.0rc1,>=16.0.0.0rc1,>=17.0.0.0rc1
|
||||
--------------------------------------------------
|
||||
Package Name: nicegui
|
||||
Version Ranges: <1.4.21,>=1.4.6
|
||||
--------------------------------------------------
|
||||
Package Name: idna
|
||||
Version Ranges: <3.7
|
||||
--------------------------------------------------
|
||||
Package Name: llama-index-core
|
||||
Version Ranges: <0.10.24
|
||||
--------------------------------------------------
|
||||
Package Name: litellm
|
||||
Version Ranges: <1.34.42
|
||||
--------------------------------------------------
|
||||
Package Name: roundup
|
||||
Version Ranges: <1.2.1,<1.4.6,>=1.2.0,>=1.4.0
|
||||
--------------------------------------------------
|
||||
Package Name: transformers
|
||||
Version Ranges: <4.37.0
|
||||
--------------------------------------------------
|
||||
Package Name: dirac
|
||||
Version Ranges: <8.0.37,<9.0.0a22,>=8.0.0,>=8.1.0a1
|
||||
--------------------------------------------------
|
||||
Package Name: yt-dlp
|
||||
Version Ranges: <2024.4.9,>=2021.4.11
|
||||
--------------------------------------------------
|
||||
Package Name: rafcon
|
||||
Version Ranges: <0.15.4
|
||||
--------------------------------------------------
|
||||
Package Name: radicale
|
||||
Version Ranges: <3.0.0
|
||||
--------------------------------------------------
|
||||
Package Name: pcaspy
|
||||
Version Ranges: <0.7.1
|
||||
--------------------------------------------------
|
||||
Package Name: holidays
|
||||
Version Ranges: <0.45
|
||||
--------------------------------------------------
|
||||
Package Name: evennia
|
||||
Version Ranges: <4.0.0
|
||||
--------------------------------------------------
|
||||
Package Name: django-json-widget
|
||||
Version Ranges: <2.0.0
|
||||
--------------------------------------------------
|
||||
Package Name: avocado-framework
|
||||
Version Ranges: <104.0
|
||||
--------------------------------------------------
|
||||
Package Name: arrendatools.plantillas
|
||||
Version Ranges: <0.4.3
|
||||
--------------------------------------------------
|
||||
Package Name: amazon-product-details-scraper
|
||||
Version Ranges: <1.0.4
|
||||
--------------------------------------------------
|
||||
Package Name: aiopioneer
|
||||
Version Ranges: <0.1.5
|
||||
--------------------------------------------------
|
||||
Package Name: pgadmin4
|
||||
Version Ranges: <8.4
|
||||
--------------------------------------------------
|
||||
Package Name: pymongo
|
||||
Version Ranges: <4.6.3
|
||||
--------------------------------------------------
|
||||
Package Name: voila
|
||||
Version Ranges: <0.2.17,<0.3.8,<0.4.4,<0.5.6,>=0.0.2,>=0.3.0a0,>=0.4.0a0,>=0.5.0a0
|
||||
--------------------------------------------------
|
||||
Package Name: piccolo-admin
|
||||
Version Ranges: <1.3.2
|
||||
--------------------------------------------------
|
||||
Package Name: cryptoauthlib
|
||||
Version Ranges: <20200912
|
||||
--------------------------------------------------
|
||||
Package Name: mosaicml
|
||||
Version Ranges: <0.5.0
|
||||
--------------------------------------------------
|
||||
Package Name: mlrun
|
||||
Version Ranges: <1.7.0rc5
|
||||
--------------------------------------------------
|
||||
Package Name: eventlet
|
||||
Version Ranges: <0.34.3
|
||||
--------------------------------------------------
|
||||
Package Name: salt
|
||||
Version Ranges: <3005.5
|
||||
--------------------------------------------------
|
||||
Package Name: django-two-factor-auth
|
||||
Version Ranges: <1.13
|
||||
--------------------------------------------------
|
||||
Package Name: pillow
|
||||
Version Ranges: <10.2.0
|
||||
--------------------------------------------------
|
||||
Package Name: ipywidgets
|
||||
Version Ranges: <5.2.0,>=5.0.0
|
||||
--------------------------------------------------
|
||||
Package Name: pylint
|
||||
Version Ranges: <2.6.1
|
||||
--------------------------------------------------
|
||||
Package Name: pytest-cov
|
||||
Version Ranges: <2.0.0
|
||||
--------------------------------------------------
|
||||
Package Name: jupyterhub
|
||||
Version Ranges: <4.1.0
|
||||
--------------------------------------------------
|
||||
Package Name: geonode
|
||||
Version Ranges: <4.1.0
|
||||
--------------------------------------------------
|
||||
Package Name: langchain-core
|
||||
Version Ranges: <0.1.7
|
||||
--------------------------------------------------
|
||||
Package Name: lektor
|
||||
Version Ranges: <3.3.11
|
||||
--------------------------------------------------
|
||||
Package Name: ansys-geometry-core
|
||||
Version Ranges: <0.3.3,<0.4.12,>=0.3.0,>=0.4.0
|
||||
--------------------------------------------------
|
||||
Package Name: nautobot
|
||||
Version Ranges: <1.6.10,<2.1.2,>=2.0.0
|
||||
--------------------------------------------------
|
||||
Package Name: mobsfscan
|
||||
Version Ranges: <0.3.8
|
||||
--------------------------------------------------
|
||||
Package Name: esphome
|
||||
Version Ranges: <2024.2.1
|
||||
--------------------------------------------------
|
||||
Package Name: qiskit-ibm-runtime
|
||||
Version Ranges: <0.21.2,>=0.1.0
|
||||
--------------------------------------------------
|
||||
Package Name: jupyter-server-proxy
|
||||
Version Ranges: <3.2.3,<4.1.1,>=4.0.0
|
||||
--------------------------------------------------
|
||||
Package Name: oauthenticator
|
||||
Version Ranges: <16.3.0
|
||||
--------------------------------------------------
|
||||
Package Name: octoprint
|
||||
Version Ranges: <1.10.0rc1
|
||||
--------------------------------------------------
|
||||
Package Name: wiki
|
||||
Version Ranges: <0.10.1
|
||||
--------------------------------------------------
|
||||
Package Name: astropy
|
||||
Version Ranges: <5.3.3
|
||||
--------------------------------------------------
|
||||
Package Name: yaql
|
||||
Version Ranges: <3.0.0
|
||||
--------------------------------------------------
|
||||
Package Name: black
|
||||
Version Ranges: <24.3.0
|
||||
--------------------------------------------------
|
||||
Package Name: fgr
|
||||
Version Ranges: <0.4.0
|
||||
--------------------------------------------------
|
||||
Package Name: vantage6
|
||||
Version Ranges: <4.2.0
|
||||
--------------------------------------------------
|
||||
Package Name: paddlepaddle
|
||||
Version Ranges: <2.6.0
|
||||
--------------------------------------------------
|
||||
Package Name: mssql-django
|
||||
Version Ranges: <1.4.1
|
||||
--------------------------------------------------
|
||||
Package Name: aiosmtpd
|
||||
Version Ranges: <1.4.5
|
||||
--------------------------------------------------
|
||||
Package Name: ckan
|
||||
Version Ranges: <2.10.1,<2.9.9,>=2.10.0
|
||||
--------------------------------------------------
|
||||
Package Name: langchain-community
|
||||
Version Ranges: <0.0.27
|
||||
--------------------------------------------------
|
||||
Package Name: libosdp
|
||||
Version Ranges: <3.0.0
|
||||
--------------------------------------------------
|
||||
Package Name: weasyprint
|
||||
Version Ranges: <61.2,>=61.0
|
||||
--------------------------------------------------
|
||||
Package Name: apache-superset
|
||||
Version Ranges: <3.0.3
|
||||
--------------------------------------------------
|
||||
Package Name: jwcrypto
|
||||
Version Ranges: <1.5.6,>=0.5.0
|
||||
--------------------------------------------------
|
||||
Package Name: paho-mqtt
|
||||
Version Ranges: <1.1
|
||||
--------------------------------------------------
|
||||
Package Name: rq
|
||||
Version Ranges: <0.7.1
|
||||
--------------------------------------------------
|
||||
Package Name: eth-abi
|
||||
Version Ranges: <5.0.1
|
||||
--------------------------------------------------
|
||||
Package Name: prefect
|
||||
Version Ranges: <2.15.0
|
||||
--------------------------------------------------
|
||||
Package Name: django-treenode
|
||||
Version Ranges: <0.20.0
|
||||
--------------------------------------------------
|
||||
Package Name: hypercorn
|
||||
Version Ranges: <0.16.0
|
||||
--------------------------------------------------
|
||||
Package Name: streamlink
|
||||
Version Ranges: <5.3.0
|
||||
--------------------------------------------------
|
||||
Package Name: kedro
|
||||
Version Ranges: <0.19.3
|
||||
--------------------------------------------------
|
||||
Package Name: pyccel
|
||||
Version Ranges: <1.9.0
|
||||
--------------------------------------------------
|
||||
Package Name: django
|
||||
Version Ranges: <3.2.24,<4.2.10,<5.0.2,>=3.2,>=4.2,>=5.0
|
||||
--------------------------------------------------
|
||||
Package Name: videomass
|
||||
Version Ranges: <5.0.4
|
||||
--------------------------------------------------
|
||||
Package Name: ultralytics
|
||||
Version Ranges: <8.1.0
|
||||
--------------------------------------------------
|
||||
Package Name: intel-extension-for-transformers
|
||||
Version Ranges: <1.2.2
|
||||
--------------------------------------------------
|
||||
Package Name: labgrid
|
||||
Version Ranges: <23.0.2
|
||||
--------------------------------------------------
|
||||
Package Name: docassemble.webapp
|
||||
Version Ranges: <1.4.97
|
||||
--------------------------------------------------
|
||||
Package Name: docassemble.base
|
||||
Version Ranges: <1.4.97,>=1.4.53
|
||||
--------------------------------------------------
|
||||
Package Name: docassemble
|
||||
Version Ranges: <1.4.97
|
||||
--------------------------------------------------
|
||||
Package Name: langchain-experimental
|
||||
Version Ranges: <0.0.52
|
||||
--------------------------------------------------
|
||||
Package Name: label-studio
|
||||
Version Ranges: <1.10.1
|
||||
--------------------------------------------------
|
||||
Package Name: rpyc
|
||||
Version Ranges: <5.2.1
|
||||
--------------------------------------------------
|
||||
Package Name: peewee
|
||||
Version Ranges: <3.17.1
|
||||
--------------------------------------------------
|
||||
Package Name: urllib3-future
|
||||
Version Ranges: <2.4.902
|
||||
--------------------------------------------------
|
||||
Package Name: flask-appbuilder
|
||||
Version Ranges: <4.3.11
|
||||
--------------------------------------------------
|
||||
Package Name: pretix
|
||||
Version Ranges: <2024.1.1
|
||||
--------------------------------------------------
|
||||
Package Name: orjson
|
||||
Version Ranges: <3.9.15
|
||||
--------------------------------------------------
|
||||
Package Name: pypqc
|
||||
Version Ranges: <0.0.6.1
|
||||
--------------------------------------------------
|
||||
Package Name: mjml
|
||||
Version Ranges: <0.11.0
|
||||
--------------------------------------------------
|
||||
Package Name: onnx
|
||||
Version Ranges: <1.16.0
|
||||
--------------------------------------------------
|
||||
Package Name: fastecdsa
|
||||
Version Ranges: <2.3.2
|
||||
--------------------------------------------------
|
||||
Package Name: pymatgen
|
||||
Version Ranges: <2024.2.20
|
||||
--------------------------------------------------
|
||||
Package Name: cryptography
|
||||
Version Ranges: <42.0.2,>=35.0.0
|
||||
--------------------------------------------------
|
||||
Package Name: apache-airflow-providers-mongo
|
||||
Version Ranges: <4.0.0,>=1.0.0
|
||||
--------------------------------------------------
|
||||
Package Name: cbor2
|
||||
Version Ranges: <5.6.0
|
||||
--------------------------------------------------
|
||||
Package Name: intel-extension-for-tensorflow
|
||||
Version Ranges: <2.13.0.0
|
||||
--------------------------------------------------
|
||||
Package Name: tuf
|
||||
Version Ranges: <3.1.1,>=2.0.0
|
||||
--------------------------------------------------
|
||||
Package Name: zpywallet
|
||||
Version Ranges: <0.6.2
|
||||
--------------------------------------------------
|
||||
Package Name: dipdup
|
||||
Version Ranges: <3.0.2
|
||||
--------------------------------------------------
|
||||
Package Name: clip-retrieval
|
||||
Version Ranges: <2.23.1
|
||||
--------------------------------------------------
|
||||
Package Name: procrastinate
|
||||
Version Ranges: <0.11.0
|
||||
--------------------------------------------------
|
||||
Package Name: embedchain
|
||||
Version Ranges: <0.1.57
|
||||
--------------------------------------------------
|
||||
Package Name: miarec-ftpfs
|
||||
Version Ranges: <2024.1.2
|
||||
--------------------------------------------------
|
||||
Package Name: miarec-sshfs
|
||||
Version Ranges: <2024.1.5
|
||||
--------------------------------------------------
|
||||
Package Name: linkml
|
||||
Version Ranges: <1.5.2
|
||||
--------------------------------------------------
|
||||
Package Name: toodledo
|
||||
Version Ranges: <1.5.0
|
||||
--------------------------------------------------
|
||||
Package Name: renku
|
||||
Version Ranges: <1.11.0
|
||||
--------------------------------------------------
|
||||
Package Name: vunnel
|
||||
Version Ranges: <0.18.0
|
||||
--------------------------------------------------
|
||||
Package Name: panda3d
|
||||
Version Ranges: <1.9.4
|
||||
--------------------------------------------------
|
||||
Package Name: ludwig
|
||||
Version Ranges: <0.7
|
||||
--------------------------------------------------
|
||||
Package Name: ethyca-fides
|
||||
Version Ranges: <2.1.0
|
||||
--------------------------------------------------
|
||||
Package Name: hiddifypanel
|
||||
Version Ranges: <9.0.0.dev30
|
||||
--------------------------------------------------
|
||||
Package Name: dgl
|
||||
Version Ranges: <0.9.0
|
||||
--------------------------------------------------
|
||||
Package Name: deephaven-core
|
||||
Version Ranges: <0.30.0
|
||||
--------------------------------------------------
|
||||
Package Name: borgmatic
|
||||
Version Ranges: <1.8.7
|
||||
--------------------------------------------------
|
||||
Package Name: cg
|
||||
Version Ranges: <26.0.4
|
||||
--------------------------------------------------
|
||||
Package Name: ccryptofeed
|
||||
Version Ranges: <2.2.3
|
||||
--------------------------------------------------
|
||||
Package Name: c2cgeoform
|
||||
Version Ranges: <2.1.26
|
||||
--------------------------------------------------
|
||||
Package Name: appfl
|
||||
Version Ranges: <0.4.0
|
||||
--------------------------------------------------
|
||||
Package Name: nonebot2
|
||||
Version Ranges: <2.2.0,>=2.0.0a16
|
||||
--------------------------------------------------
|
||||
Package Name: acryl-datahub
|
||||
Version Ranges: <0.8.45
|
||||
--------------------------------------------------
|
||||
Package Name: bullmq
|
||||
Version Ranges: <1.15.0
|
||||
--------------------------------------------------
|
||||
Package Name: aiobotocore
|
||||
Version Ranges: <2.9.1
|
||||
--------------------------------------------------
|
||||
Package Name: diffoscope
|
||||
Version Ranges: <256
|
||||
--------------------------------------------------
|
||||
Package Name: kinto-attachment
|
||||
Version Ranges: <6.4.0
|
||||
--------------------------------------------------
|
||||
Package Name: bandit
|
||||
Version Ranges: <1.7.7
|
||||
--------------------------------------------------
|
||||
Package Name: dnspython
|
||||
Version Ranges: <2.6.1
|
||||
--------------------------------------------------
|
||||
Package Name: products.sqlalchemyda
|
||||
Version Ranges: <2.2
|
||||
--------------------------------------------------
|
||||
Package Name: clearml
|
||||
Version Ranges: <1.14.2
|
||||
--------------------------------------------------
|
||||
Package Name: tensorflow
|
||||
Version Ranges: <1.7.1
|
||||
--------------------------------------------------
|
||||
Package Name: pyload-ng
|
||||
Version Ranges: <0.5.0b3.dev78
|
||||
--------------------------------------------------
|
||||
Package Name: fastapi
|
||||
Version Ranges: <0.109.1
|
||||
--------------------------------------------------
|
||||
Package Name: python-multipart
|
||||
Version Ranges: <0.0.7
|
||||
--------------------------------------------------
|
||||
Package Name: kinto
|
||||
Version Ranges: <6.1.0
|
||||
--------------------------------------------------
|
||||
Package Name: cupy
|
||||
Version Ranges: <13.0.0
|
||||
--------------------------------------------------
|
||||
Package Name: llama-hub
|
||||
Version Ranges: <0.0.67
|
||||
--------------------------------------------------
|
||||
Package Name: borgbackup
|
||||
Version Ranges: <1.0.7
|
||||
--------------------------------------------------
|
||||
Package Name: snakemake
|
||||
Version Ranges: <7.9.0
|
||||
--------------------------------------------------
|
||||
Package Name: lief
|
||||
Version Ranges: <0.12.3
|
||||
--------------------------------------------------
|
||||
Package Name: checkov
|
||||
Version Ranges: <2.0.1029
|
||||
--------------------------------------------------
|
||||
Package Name: dash-html-components
|
||||
Version Ranges: <2.0.0
|
||||
--------------------------------------------------
|
||||
Package Name: dash
|
||||
Version Ranges: <2.15.0
|
||||
--------------------------------------------------
|
||||
Package Name: dash-core-components
|
||||
Version Ranges: <2.0.0
|
||||
--------------------------------------------------
|
||||
Package Name: glance-store
|
||||
Version Ranges: <4.3.3,<4.7.0,>=4.4.0
|
||||
--------------------------------------------------
|
||||
Package Name: dagster
|
||||
Version Ranges: <1.1.10
|
||||
--------------------------------------------------
|
||||
Package Name: wagtail
|
||||
Version Ranges: <5.2rc1
|
||||
--------------------------------------------------
|
||||
Package Name: pycryptodome
|
||||
Version Ranges: <3.19.1
|
||||
--------------------------------------------------
|
||||
Package Name: celery
|
||||
Version Ranges: <4.4.0rc5
|
||||
--------------------------------------------------
|
||||
Package Name: vantage6-server
|
||||
Version Ranges: <4.2.0
|
||||
--------------------------------------------------
|
||||
Package Name: tuitse-tsusin
|
||||
Version Ranges: <1.3.2
|
||||
--------------------------------------------------
|
||||
Package Name: apache-airflow-providers-cncf-kubernetes
|
||||
Version Ranges: <7.0.0,>=5.2.0
|
||||
--------------------------------------------------
|
||||
Package Name: whoogle-search
|
||||
Version Ranges: <0.8.4
|
||||
--------------------------------------------------
|
||||
Package Name: jupyterlab-lsp
|
||||
Version Ranges: <5.0.2
|
||||
--------------------------------------------------
|
||||
Package Name: changedetection.io
|
||||
Version Ranges: <0.45.13
|
||||
--------------------------------------------------
|
||||
Package Name: jupyterlab
|
||||
Version Ranges: <4.0.11,>=4.0.0
|
||||
--------------------------------------------------
|
||||
Package Name: ansible-core
|
||||
Version Ranges: <2.14.14,<2.15.9,<2.16.3,>=2.15.0,>=2.16.0
|
||||
--------------------------------------------------
|
||||
Package Name: readthedocs-sphinx-search
|
||||
Version Ranges: <0.3.2
|
||||
--------------------------------------------------
|
||||
Package Name: zodb3
|
||||
Version Ranges: <3.8.3,<3.9.0c2,>=3.8.0a1,>=3.9.0
|
||||
--------------------------------------------------
|
@ -1,48 +0,0 @@
|
||||
"""转换原有的漏洞文件格式"""
|
||||
|
||||
import re
|
||||
from packaging.specifiers import SpecifierSet
|
||||
|
||||
|
||||
def load_vulnerable_packages(filename):
|
||||
"""从文件加载有漏洞的包信息"""
|
||||
with open(filename, "r", encoding="utf-8") as file:
|
||||
content = file.read()
|
||||
vulnerabilities = {}
|
||||
blocks = content.split("--------------------------------------------------")
|
||||
range_pattern = re.compile(r"\[(.*?),\s*(.*?)\)")
|
||||
|
||||
for block in blocks:
|
||||
name_match = re.search(r"Package Name: (.+)", block)
|
||||
if name_match:
|
||||
package_name = name_match.group(1).strip()
|
||||
ranges = range_pattern.findall(block)
|
||||
specifier_list = []
|
||||
for start, end in ranges:
|
||||
if start and end:
|
||||
specifier_list.append(f">={start},<{end}")
|
||||
elif start:
|
||||
specifier_list.append(f">={start}")
|
||||
elif end:
|
||||
specifier_list.append(f"<{end}")
|
||||
if specifier_list:
|
||||
vulnerabilities[package_name] = SpecifierSet(",".join(specifier_list))
|
||||
return vulnerabilities
|
||||
|
||||
|
||||
def save_vulnerabilities_to_file(vuln_packages, filename):
|
||||
"""将漏洞信息写入到文件中"""
|
||||
with open(filename, "w", encoding="utf-8") as file:
|
||||
for package, specifiers in vuln_packages.items():
|
||||
file.write(f"Package Name: {package}\n")
|
||||
file.write(f"Version Ranges: {specifiers}\n")
|
||||
file.write("-" * 50 + "\n")
|
||||
|
||||
|
||||
def main():
|
||||
vulnerabilities = load_vulnerable_packages("extracted_data.txt")
|
||||
save_vulnerabilities_to_file(vulnerabilities, "trans_extracted_data.txt")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
@ -1,7 +1,9 @@
|
||||
import argparse
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
import os
|
||||
import requests
|
||||
import argparse
|
||||
from bs4 import BeautifulSoup
|
||||
from typing import List, Tuple, Optional
|
||||
from packaging import version
|
||||
from packaging.specifiers import SpecifierSet
|
||||
from reportlab.lib.pagesizes import letter
|
||||
@ -9,10 +11,154 @@ from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer
|
||||
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
|
||||
|
||||
|
||||
def load_vulnerable_packages(filename):
|
||||
"""从文件加载有漏洞的包信息"""
|
||||
def fetch_html(url: str) -> Optional[str]:
|
||||
"""Fetch HTML content from the specified URL.
|
||||
|
||||
Args:
|
||||
url (str): URL to fetch HTML from.
|
||||
|
||||
Returns:
|
||||
Optional[str]: HTML content as a string, or None if fetch fails.
|
||||
"""
|
||||
response = requests.get(url)
|
||||
if response.status_code == 200:
|
||||
return response.text
|
||||
return None
|
||||
|
||||
|
||||
def parse_html(html: str) -> List[Tuple[str, List[str]]]:
|
||||
"""Parse HTML to get content of all 'a' and 'span' tags under the second 'td' of each 'tr'.
|
||||
|
||||
Args:
|
||||
html (str): HTML content as a string.
|
||||
|
||||
Returns:
|
||||
List[Tuple[str, List[str]]]: A list of tuples containing the text of 'a' tags and lists of 'span' texts.
|
||||
"""
|
||||
soup = BeautifulSoup(html, "html.parser")
|
||||
table = soup.find("table", id="sortable-table")
|
||||
results = []
|
||||
if table:
|
||||
rows = table.find("tbody").find_all("tr")
|
||||
for row in rows:
|
||||
tds = row.find_all("td")
|
||||
if len(tds) >= 2:
|
||||
a_tags = tds[1].find_all("a")
|
||||
span_tags = tds[1].find_all("span")
|
||||
spans = [span.text.strip() for span in span_tags]
|
||||
for a_tag in a_tags:
|
||||
results.append((a_tag.text.strip(), spans))
|
||||
return results
|
||||
|
||||
|
||||
def format_results(results: List[Tuple[str, List[str]]]) -> str:
|
||||
"""Format extracted data as a string.
|
||||
|
||||
Args:
|
||||
results (List[Tuple[str, List[str]]]): Extracted data to format.
|
||||
|
||||
Returns:
|
||||
str: Formatted string of the extracted data.
|
||||
"""
|
||||
formatted_result = ""
|
||||
for package_name, version_ranges in results:
|
||||
formatted_result += f"Package Name: {package_name}\n"
|
||||
formatted_result += "Version Ranges: " + ", ".join(version_ranges) + "\n"
|
||||
formatted_result += "-" * 50 + "\n"
|
||||
return formatted_result
|
||||
|
||||
|
||||
def trans_vulnerable_packages(content):
|
||||
"""将漏洞版本中的集合形式转换为大于小于的格式
|
||||
Args:
|
||||
content (str): 漏洞版本汇总信息.
|
||||
"""
|
||||
vulnerabilities = {}
|
||||
blocks = content.split("--------------------------------------------------")
|
||||
range_pattern = re.compile(r"\[(.*?),\s*(.*?)\)")
|
||||
|
||||
for block in blocks:
|
||||
name_match = re.search(r"Package Name: (.+)", block)
|
||||
if name_match:
|
||||
package_name = name_match.group(1).strip()
|
||||
ranges = range_pattern.findall(block)
|
||||
specifier_list = []
|
||||
for start, end in ranges:
|
||||
if start and end:
|
||||
specifier_list.append(f">={start},<{end}")
|
||||
elif start:
|
||||
specifier_list.append(f">={start}")
|
||||
elif end:
|
||||
specifier_list.append(f"<{end}")
|
||||
if specifier_list:
|
||||
vulnerabilities[package_name] = SpecifierSet(",".join(specifier_list))
|
||||
return vulnerabilities
|
||||
|
||||
|
||||
def format_vulnerabilities(vuln_packages):
|
||||
"""将字典形式的漏洞信息格式化
|
||||
Args:
|
||||
vuln_packages (List[Tuple[str, List[str]]]): Extracted data to format.
|
||||
"""
|
||||
res = ""
|
||||
for package, specifiers in vuln_packages.items():
|
||||
res += f"Package Name: {package}\n"
|
||||
res += f"Version Ranges: {specifiers}\n"
|
||||
res += "-" * 50 + "\n"
|
||||
return res
|
||||
|
||||
|
||||
def load_requirements(filename):
|
||||
"""从文件加载项目的依赖信息"""
|
||||
with open(filename, "r", encoding="utf-8") as file:
|
||||
content = file.read()
|
||||
lines = file.readlines()
|
||||
requirements = {}
|
||||
for line in lines:
|
||||
if "==" in line:
|
||||
package_name, package_version = line.strip().split("==")
|
||||
requirements[package_name] = package_version
|
||||
return requirements
|
||||
|
||||
|
||||
def check_vulnerabilities(requirements, vulnerabilities, output_file):
|
||||
"""检查依赖项是否存在已知漏洞,并输出结果"""
|
||||
results_warning = [] # 存储有漏洞的依赖
|
||||
results_ok = [] # 存储没有漏洞的依赖
|
||||
|
||||
for req_name, req_version in requirements.items():
|
||||
if req_name in vulnerabilities:
|
||||
spec = vulnerabilities[req_name]
|
||||
if version.parse(req_version) in spec:
|
||||
results_warning.append(
|
||||
f"WARNING: {req_name}=={req_version} is vulnerable!"
|
||||
)
|
||||
else:
|
||||
results_ok.append(f"OK: {req_name}=={req_version} is not affected.")
|
||||
else:
|
||||
results_ok.append(
|
||||
f"OK: {req_name} not found in the vulnerability database."
|
||||
)
|
||||
|
||||
# 合并结果,先输出所有警告,然后输出所有正常情况
|
||||
results = results_warning + results_ok
|
||||
# print(results)
|
||||
if output_file:
|
||||
filename, ext = os.path.splitext(output_file)
|
||||
output_format = ext[1:] if ext[1:] else "txt"
|
||||
if output_format not in ["txt", "md", "html", "pdf"]:
|
||||
print("Warning: Invalid file format specified. Defaulting to TXT format.")
|
||||
output_format = "txt" # 确保使用默认格式
|
||||
output_file = filename + ".txt"
|
||||
output_results(output_file, results, output_format)
|
||||
else:
|
||||
print("\n".join(results))
|
||||
|
||||
|
||||
def trans_vulnerable_packages_to_dict(content):
|
||||
"""将漏洞信息转换为字典格式
|
||||
Args:
|
||||
content str: 漏洞信息汇总.
|
||||
"""
|
||||
vulnerabilities = {}
|
||||
blocks = content.split("--------------------------------------------------")
|
||||
for block in blocks:
|
||||
@ -28,18 +174,6 @@ def load_vulnerable_packages(filename):
|
||||
return vulnerabilities
|
||||
|
||||
|
||||
def load_requirements(filename):
|
||||
"""从文件加载项目的依赖信息"""
|
||||
with open(filename, "r", encoding="utf-8") as file:
|
||||
lines = file.readlines()
|
||||
requirements = {}
|
||||
for line in lines:
|
||||
if "==" in line:
|
||||
package_name, package_version = line.strip().split("==")
|
||||
requirements[package_name] = package_version
|
||||
return requirements
|
||||
|
||||
|
||||
def output_pdf(results, file_name):
|
||||
doc = SimpleDocTemplate(file_name, pagesize=letter)
|
||||
story = []
|
||||
@ -99,48 +233,13 @@ def output_results(filename, results, format_type):
|
||||
for result in results:
|
||||
file.write(f"{result}\n")
|
||||
|
||||
|
||||
def check_vulnerabilities(requirements, vulnerabilities, output_file):
|
||||
"""检查依赖项是否存在已知漏洞,并输出结果"""
|
||||
results_warning = [] # 存储有漏洞的依赖
|
||||
results_ok = [] # 存储没有漏洞的依赖
|
||||
|
||||
for req_name, req_version in requirements.items():
|
||||
if req_name in vulnerabilities:
|
||||
spec = vulnerabilities[req_name]
|
||||
if version.parse(req_version) in spec:
|
||||
results_warning.append(
|
||||
f"WARNING: {req_name}=={req_version} is vulnerable!"
|
||||
)
|
||||
else:
|
||||
results_ok.append(f"OK: {req_name}=={req_version} is not affected.")
|
||||
else:
|
||||
results_ok.append(
|
||||
f"OK: {req_name} not found in the vulnerability database."
|
||||
)
|
||||
|
||||
# 合并结果,先输出所有警告,然后输出所有正常情况
|
||||
results = results_warning + results_ok
|
||||
|
||||
if output_file:
|
||||
filename, ext = os.path.splitext(output_file)
|
||||
output_format = ext[1:] if ext[1:] else "txt"
|
||||
if output_format not in ["txt", "md", "html", "pdf"]:
|
||||
print("Warning: Invalid file format specified. Defaulting to TXT format.")
|
||||
output_format = "txt" # 确保使用默认格式
|
||||
output_file = filename + ".txt"
|
||||
output_results(output_file, results, output_format)
|
||||
else:
|
||||
print("\n".join(results))
|
||||
print("Results have been saved as " + filename)
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Check project dependencies for vulnerabilities."
|
||||
)
|
||||
parser.add_argument(
|
||||
"vulnerabilities_file", help="Path to the file containing vulnerability data"
|
||||
)
|
||||
parser.add_argument(
|
||||
"requirements_file", help="Path to the requirements file of the project"
|
||||
)
|
||||
@ -151,9 +250,29 @@ def main():
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
vulnerabilities = load_vulnerable_packages(args.vulnerabilities_file)
|
||||
base_url = "https://security.snyk.io/vuln/pip/"
|
||||
page_number = 1
|
||||
crawler_results = ""
|
||||
while True:
|
||||
url = f"{base_url}{page_number}"
|
||||
print(f"Fetching data from {url}")
|
||||
html_content = fetch_html(url)
|
||||
if not html_content:
|
||||
print("No more data found or failed to fetch.")
|
||||
break
|
||||
extracted_data = parse_html(html_content)
|
||||
if not extracted_data:
|
||||
print("No relevant data found on page.")
|
||||
break
|
||||
crawler_results += format_results(extracted_data)
|
||||
page_number += 1
|
||||
print("Results have been stored in memory.\n")
|
||||
|
||||
trans_res = trans_vulnerable_packages(crawler_results)
|
||||
trans_res = format_vulnerabilities(trans_res)
|
||||
trans_res = trans_vulnerable_packages_to_dict(trans_res)
|
||||
requirements = load_requirements(args.requirements_file)
|
||||
check_vulnerabilities(requirements, vulnerabilities, args.output)
|
||||
check_vulnerabilities(requirements, trans_res, args.output)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
@ -42,7 +42,8 @@ python backdoor_detection.py ./src -o ./output/report.pdf
|
||||
|
||||
**主要组件**:
|
||||
|
||||
- `load_vulnerable_packages(filename)`: 从给定的文件中加载包含漏洞的包信息。
|
||||
- `parse_html`: 爬取网站收集的漏洞依赖信息。
|
||||
- `trans_vulnerable_packages(content)`: 将漏洞版本中的集合形式转换为大于小于的格式。
|
||||
- `load_requirements(filename)`: 从项目的 `requirements.txt` 文件中加载依赖信息。
|
||||
- `output_results(filename, results, format_type)`: 根据指定格式输出检测结果。
|
||||
- `check_vulnerabilities(requirements, vulnerabilities, output_file)`: 核心功能,对比依赖与漏洞信息并生成报告。
|
||||
@ -50,7 +51,6 @@ python backdoor_detection.py ./src -o ./output/report.pdf
|
||||
|
||||
**输入**:
|
||||
|
||||
- 依赖漏洞文件路径。
|
||||
- 项目 `requirements.txt` 文件路径。
|
||||
- 输出文件路径和格式(通过命令行参数指定)。
|
||||
|
||||
@ -67,7 +67,7 @@ python backdoor_detection.py ./src -o ./output/report.pdf
|
||||
**使用示例**:
|
||||
|
||||
```bash
|
||||
python requirements_detection.py vulnerabilities_data.txt requirements.txt -o ./output/report.md
|
||||
python requirements_detection.py ./requirements.txt -o ./output/report.md
|
||||
```
|
||||
|
||||
---
|
||||
|
@ -40,7 +40,13 @@
|
||||
|
||||
- **主要应用**:读取漏洞数据文件和依赖文件,输出结果报告到文本、Markdown、HTML 或 PDF 文件。
|
||||
|
||||
## 7. 代码和风险分析
|
||||
## 7. 爬虫
|
||||
|
||||
利用`python`的`BeautifulSoup`制作爬虫快速收集整理信息
|
||||
|
||||
- **主要应用**:通过爬虫收集漏洞依赖信息并进行汇总,用于判断依赖是否存在漏洞版本。
|
||||
|
||||
## 8. 代码和风险分析
|
||||
|
||||
项目中实现了基本的静态代码分析功能,用于识别和报告潜在的安全风险函数调用,如 `system`、`exec` 等。
|
||||
|
||||
|
@ -26,20 +26,18 @@ pip install packaging reportlab
|
||||
**命令格式**:
|
||||
|
||||
```bash
|
||||
python requirements_detection.py <vulnerabilities_file> <requirements_file> -o <output_file>
|
||||
python requirements_detection.py <requirements_file> -o <output_file>
|
||||
```
|
||||
|
||||
**参数说明**:
|
||||
|
||||
- `<vulnerabilities_file>`: 包含漏洞信息的文件路径。
|
||||
- `<requirements_file>`: 项目的 `requirements.txt` 文件路径。
|
||||
- `<output_file>`: 指定输出结果的文件路径和格式,支持的格式有 `.txt`, `.md`, `.html`, `.pdf`。
|
||||
|
||||
|
||||
**示例**:
|
||||
|
||||
```bash
|
||||
python requirements_detection.py vulnerabilities_data.txt requirements.txt -o output/report.md
|
||||
python requirements_detection.py requirements.txt -o output/report.md
|
||||
```
|
||||
|
||||
## 运行静态代码后门检测脚本
|
||||
|
@ -1,3 +1,5 @@
|
||||
reportlab
|
||||
requests
|
||||
packaging
|
||||
openai
|
||||
bs4
|
97
tests/test_requirements_detection.py
Normal file
97
tests/test_requirements_detection.py
Normal file
@ -0,0 +1,97 @@
|
||||
import unittest
|
||||
from unittest.mock import patch, Mock, MagicMock
|
||||
from detection.requirements_detection import (
|
||||
fetch_html,
|
||||
parse_html,
|
||||
format_results,
|
||||
check_vulnerabilities,
|
||||
)
|
||||
from packaging.version import Version
|
||||
from packaging.specifiers import SpecifierSet
|
||||
|
||||
# Assuming the functions from your provided code are imported here
|
||||
# from your_module import fetch_html, parse_html, format_results, ...
|
||||
|
||||
|
||||
# 测试网页抓取和结果报告的测试类
|
||||
class TestWebScrapingAndReporting(unittest.TestCase):
|
||||
|
||||
def test_fetch_html_success(self):
|
||||
"""测试fetch_html在请求成功时返回正确的HTML内容。"""
|
||||
with patch("requests.get") as mocked_get:
|
||||
mocked_get.return_value.status_code = 200
|
||||
mocked_get.return_value.text = "success"
|
||||
url = "https://security.snyk.io/vuln/pip/"
|
||||
result = fetch_html(url)
|
||||
self.assertEqual(result, "success")
|
||||
|
||||
def test_fetch_html_failure(self):
|
||||
"""测试fetch_html在请求失败时返回None。"""
|
||||
with patch("requests.get") as mocked_get:
|
||||
mocked_get.return_code.status_code = 404
|
||||
url = "https://security.snyk.io/vuln/pip/"
|
||||
result = fetch_html(url)
|
||||
self.assertIsNone(result)
|
||||
|
||||
def test_parse_html(self):
|
||||
"""测试parse_html能准确地解析HTML并提取预期的数据。"""
|
||||
html_content = """
|
||||
<table id="sortable-table">
|
||||
<tbody>
|
||||
<tr><td></td><td><a href="#">Link1</a><span>Span1</span></td></tr>
|
||||
<tr><td></td><td><a href="#">Link2</a><span>Span2</span></td></tr>
|
||||
</tbody>
|
||||
</table>
|
||||
"""
|
||||
expected = [("Link1", ["Span1"]), ("Link2", ["Span2"])]
|
||||
result = parse_html(html_content)
|
||||
self.assertEqual(result, expected)
|
||||
|
||||
def test_format_results(self):
|
||||
"""测试format_results能正确格式化解析后的数据。"""
|
||||
results = [("Package1", ["1.0", "2.0"]), ("Package2", ["1.5", "2.5"])]
|
||||
expected_output = (
|
||||
"Package Name: Package1\nVersion Ranges: 1.0, 2.0\n"
|
||||
+ "--------------------------------------------------\n"
|
||||
+ "Package Name: Package2\nVersion Ranges: 1.5, 2.5\n"
|
||||
+ "--------------------------------------------------\n"
|
||||
)
|
||||
formatted_result = format_results(results)
|
||||
self.assertEqual(formatted_result, expected_output)
|
||||
|
||||
# 测试报警
|
||||
def setUp(self):
|
||||
"""假设的依赖和漏洞数据"""
|
||||
self.requirements = {"package1": "1.0", "package2": "2.0"}
|
||||
self.vulnerabilities = {
|
||||
"package1": SpecifierSet(">=1.0,<2.0"),
|
||||
"package3": SpecifierSet(">=1.0,<1.5"),
|
||||
}
|
||||
|
||||
@patch("builtins.print") # 模拟内置的print函数以捕获输出
|
||||
def test_check_vulnerabilities_no_output_file(self, mock_print):
|
||||
"""测试当不提供输出文件时的情况,应该打印输出到控制台。"""
|
||||
check_vulnerabilities(self.requirements, self.vulnerabilities, None)
|
||||
expected_calls = [
|
||||
unittest.mock.call(
|
||||
"WARNING: package1==1.0 is vulnerable!\nOK: package2 not found in the vulnerability database."
|
||||
)
|
||||
]
|
||||
mock_print.assert_has_calls(expected_calls, any_order=True)
|
||||
|
||||
@patch("builtins.open", new_callable=unittest.mock.mock_open)
|
||||
@patch("os.path.splitext", return_value=("output", ".txt"))
|
||||
@patch("os.path.exists", return_value=False)
|
||||
@patch("os.makedirs")
|
||||
def test_check_vulnerabilities_with_output_file(
|
||||
self, mock_makedirs, mock_exists, mock_splitext, mock_open
|
||||
):
|
||||
"""测试当提供输出文件时,应该将结果写入文件。"""
|
||||
check_vulnerabilities(self.requirements, self.vulnerabilities, "output.txt")
|
||||
mock_open.assert_called_once_with("output.txt", "w", encoding="utf-8")
|
||||
handle = mock_open()
|
||||
handle.write.assert_called()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
Loading…
x
Reference in New Issue
Block a user