Merge branch 'tests/final-tests' of https://git.mamahaha.work/sangge/BackDoorBuster into tests/final-tests

This commit is contained in:
dqy 2024-06-04 20:58:35 +08:00
commit 7198c8b4da
4 changed files with 141 additions and 52 deletions

View File

@ -3,6 +3,8 @@ from typing import Dict, List, Tuple, Optional
from reportlab.lib.pagesizes import letter from reportlab.lib.pagesizes import letter
from reportlab.lib.styles import getSampleStyleSheet from reportlab.lib.styles import getSampleStyleSheet
from reportlab.platypus import Paragraph, Spacer, SimpleDocTemplate from reportlab.platypus import Paragraph, Spacer, SimpleDocTemplate
from detection.pickle_detection import pickleDataDetection
from .Regexdetection import find_dangerous_functions from .Regexdetection import find_dangerous_functions
from .GPTdetection import detectGPT from .GPTdetection import detectGPT
from .pyc_detection import disassemble_pyc from .pyc_detection import disassemble_pyc
@ -373,6 +375,13 @@ def process_path(
# 扫描动画 # 扫描动画
for file_path in tqdm(all_files, desc="Scanning files", unit="file"): for file_path in tqdm(all_files, desc="Scanning files", unit="file"):
file_extension = file_path.suffix file_extension = file_path.suffix
if file_extension in [".pkl",".pickle"]:
res = pickleDataDetection(str(file_path), output_file)
results["pickles"].append({
"file": str(file_path),
"result": res
})
continue
file_results = checkModeAndDetect( file_results = checkModeAndDetect(
mode, str(file_path), file_extension, pycdc_addr mode, str(file_path), file_extension, pycdc_addr
) )
@ -387,7 +396,13 @@ def process_path(
) )
elif os.path.isfile(path): elif os.path.isfile(path):
file_extension = os.path.splitext(path)[1] file_extension = os.path.splitext(path)[1]
if file_extension in SUPPORTED_EXTENSIONS: if file_extension in [".pkl", ".pickle"]:
res = pickleDataDetection(str(path), output_file)
results["pickles"].append({
"file": str(path),
"result": res
})
elif file_extension in SUPPORTED_EXTENSIONS:
file_results = checkModeAndDetect(mode, path, file_extension, pycdc_addr) file_results = checkModeAndDetect(mode, path, file_extension, pycdc_addr)
if file_results is not None: if file_results is not None:
for key in file_results: for key in file_results:
@ -425,9 +440,18 @@ def main():
help="Path to pycdc.exe to decompile", help="Path to pycdc.exe to decompile",
default=os.getenv("PATH"), default=os.getenv("PATH"),
) )
parser.add_argument(
"-P",
"--Pickle",
help="Path to pickle file to analyze",
default=None,
)
args = parser.parse_args() args = parser.parse_args()
output_format = "txt" # Default output format output_format = "txt" # Default output format
output_file = None output_file = None
if args.Pickle:
pickleDataDetection(args.Pickle, args.output)
return
if args.output: if args.output:
_, ext = os.path.splitext(args.output) _, ext = os.path.splitext(args.output)
ext = ext.lower() ext = ext.lower()

View File

@ -142,11 +142,7 @@ def pickleDataDetection(filename: str, output_file=None):
pickscan = pickleScanner(file) pickscan = pickleScanner(file)
pickscan.load() pickscan.load()
res = pickscan.output() res = pickscan.output()
if output_file: return res
with open(output_file, "w") as file:
json.dump(res, file, indent=4)
else:
print(json.dumps(res))
if __name__ == "__main__": if __name__ == "__main__":

View File

@ -3,9 +3,8 @@ from git import Repo # type: ignore
import random import random
from pathlib import Path from pathlib import Path
import pickle import pickle
import marshal
import importlib.util
import os import os
import py_compile
def clone_repo(repo_url: str, clone_dir: str) -> None: def clone_repo(repo_url: str, clone_dir: str) -> None:
@ -26,7 +25,7 @@ def clone_repo(repo_url: str, clone_dir: str) -> None:
def inject_random_backdoor( def inject_random_backdoor(
path: str, pickle: bool = False, pyc: bool = False, sample_rate: float = 0.1 path: str, sample_rate: float = 0.1
) -> Tuple[Tuple[str, int], ...]: ) -> Tuple[Tuple[str, int], ...]:
""" """
Insert random backdoor into the path. Insert random backdoor into the path.
@ -36,11 +35,6 @@ def inject_random_backdoor(
pickle (bool): Whether to insert a backdoor into a pickle file. pickle (bool): Whether to insert a backdoor into a pickle file.
pyc (bool): Whether to insert a backdoor into a compiled Python file. pyc (bool): Whether to insert a backdoor into a compiled Python file.
""" """
if pickle:
inject_pickle_backdoor(path)
if pyc:
inject_pyc_backdoor(path)
project_path = Path(path) project_path = Path(path)
all_python_files = list(project_path.rglob("*.py")) all_python_files = list(project_path.rglob("*.py"))
injected_python_files = [] injected_python_files = []
@ -175,24 +169,18 @@ def inject_pyc_backdoor(root_path: str) -> None:
for path in paths: for path in paths:
backdoor_id = random.randrange(0, len(backdoors)) backdoor_id = random.randrange(0, len(backdoors))
backdoor = backdoors[backdoor_id] backdoor = backdoors[backdoor_id]
filename = os.path.join(path, f"backdoor{backdoor_id}.pyc") py_filename = os.path.join(path, f"backdoor{backdoor_id}.py")
pyc_filename = os.path.join(path, f"backdoor{backdoor_id}.pyc")
with open(py_filename, "w") as f:
f.write(backdoor)
# Compile the string to a code object py_compile.compile(py_filename, cfile=pyc_filename)
code = compile(backdoor, filename, "exec") os.remove(py_filename)
# Create a code object header
header = importlib.util.MAGIC_NUMBER
if hasattr(importlib.util, "SOURCE_SUFFIXES"):
header += b"\x00" * 4
# Write the .pyc file
with open(filename, "wb") as file:
file.write(header)
marshal.dump(code, file)
if __name__ == "__main__": if __name__ == "__main__":
repo_url = "https://github.com/TheAlgorithms/Python.git" repo_url = "https://github.com/TheAlgorithms/Python.git"
clone_dir = "/tmp/repo" clone_dir = "/tmp/repo"
clone_repo(repo_url, clone_dir) clone_repo(repo_url, clone_dir)
inject_random_backdoor(clone_dir, pickle=True, pyc=True) inject_random_backdoor(clone_dir)
inject_pickle_backdoor(clone_dir)

View File

@ -1,10 +1,19 @@
import time import time
import unittest import unittest
import shutil import shutil
import os
import threading import threading
import re
from detection.utils import read_file_content from detection.utils import read_file_content
from .final_tests_util import clone_repo, Path, inject_random_backdoor from .final_tests_util import (
clone_repo,
Path,
inject_pickle_backdoor,
inject_random_backdoor,
inject_pyc_backdoor,
backdoors,
)
from detection.Regexdetection import find_dangerous_functions from detection.Regexdetection import find_dangerous_functions
from detection.GPTdetection import detectGPT from detection.GPTdetection import detectGPT
@ -22,29 +31,44 @@ def GPTdetectFileList(fileList):
thread.join() thread.join()
return results return results
def GPTThread(content, results): def GPTThread(content, results):
try: try:
results.append(detectGPT(content)) results.append(detectGPT(content))
except Exception as e: except Exception as e:
print(e) print(e)
class TestFinalTests(unittest.TestCase): class TestFinalTests(unittest.TestCase):
def setUp(self) -> None: def setUp(self) -> None:
self.path = "./tmp/repo" self.path = "./tmp/repo/"
shutil.rmtree(self.path, ignore_errors=True) shutil.rmtree(self.path, ignore_errors=True)
clone_repo("https://github.com/injetlee/Python.git", self.path) if not os.path.exists("/tmp/Python/"):
clone_repo("https://github.com/TheAlgorithms/Python.git", "/tmp/Python")
shutil.copytree("/tmp/Python", self.path)
sampleRate = 0.1 sampleRate = 0.1
self.inject_reslt = inject_random_backdoor(self.path, sample_rate=sampleRate) self.inject_result = inject_random_backdoor(self.path, sample_rate=sampleRate)
self.injectedNum = len(self.inject_reslt) self.pickle_true_num = inject_pickle_backdoor(self.path)
self.pyc_true_num = inject_pyc_backdoor(self.path)
self.injectedNum = len(self.inject_result)
print(self.injectedNum) print(self.injectedNum)
project_path = Path(self.path) project_path = Path(self.path)
self.all_python_files = list(project_path.rglob("*.py"))
self.py_filesNum = len(self.all_python_files)
self.trueRate = self.injectedNum / self.py_filesNum
print(self.trueRate)
# test backdoor code in python files self.all_python_files = list(project_path.rglob("*.py"))
self.py_files_num = len(self.all_python_files)
all_pickle_files = list(project_path.rglob("*.pickle"))
self.pickle_files_num = len(all_pickle_files)
all_pyc_files = list(project_path.rglob("*.pyc"))
self.pyc_files_num = len(all_pyc_files)
os.system(
"python -m detection " + self.path + " -o " + self.path + "output.txt"
)
def test_final_tests_pycode(self): def test_final_tests_pycode(self):
# test backdoor code in python files
detectedNum = 0 detectedNum = 0
possibly_dangerous_file = [] possibly_dangerous_file = []
for file in self.all_python_files: for file in self.all_python_files:
@ -57,26 +81,83 @@ class TestFinalTests(unittest.TestCase):
): ):
detectedNum += 1 detectedNum += 1
possibly_dangerous_file.append(file) possibly_dangerous_file.append(file)
print(detectedNum / self.py_filesNum) print(detectedNum / self.py_files_num)
self.assertAlmostEqual(detectedNum, self.py_filesNum, places=1)
GPTdetectedNum = 0 GPTdetectedNum = 0
for i in possibly_dangerous_file:
content = read_file_content(str(i))
results = {}
try:
results = detectGPT(content)
if (
len(results["high"]) > 0
or len(results["medium"]) > 0
or len(results["low"]) > 0
):
GPTdetectedNum += 1
print(GPTdetectedNum)
except Exception as e:
# print(e)
pass
# test injected code
with open(self.path + "output.txt", "r") as f:
lines = f.readlines()
injected_detected_num = 0
injected_correct_num = 0
pattern = r"\w+\.py: Line \d+: (.+)"
for line in lines:
if "py:" in line:
injected_detected_num += 1
match = re.search(pattern, line)
command = ""
if match:
command = match.group(1)
for backdoor in backdoors:
if command in backdoor:
injected_correct_num += 1
break
injected_accurency = injected_detected_num / self.py_files_num
print(f"injected files accurency: {injected_accurency}")
try:
GPTresult = GPTdetectFileList(possibly_dangerous_file) GPTresult = GPTdetectFileList(possibly_dangerous_file)
for result in GPTresult: for result in GPTresult:
if len(result) > 0: if len(result) > 0:
GPTdetectedNum += 1 GPTdetectedNum += 1
print(GPTdetectedNum) print(GPTdetectedNum)
self.assertGreaterEqual(GPTdetectedNum, detectedNum) self.assertGreaterEqual(GPTdetectedNum, detectedNum)
except Exception as e:
# print(e)
pass
# test pickle files # test pickle files
pickle_detectedNum = 0 with open(self.path + "output.txt", "r") as f:
pickle_tureNum = len(list(Path(self.path).glob("*.pickle"))) lines = f.readlines()
pickle_detected_num = 0
pickle_correct_num = 0
for line in lines:
if "pickle" in line:
pickle_detected_num += 1
if re.search(r"backdoor\d*\.pickle", line):
pickle_correct_num += 1
self.assertAlmostEqual(pickle_detectedNum, pickle_tureNum, places=1) pickle_accurency = pickle_detected_num / self.pickle_true_num
print(f"pickle files accurency: {pickle_accurency}")
# test pyc files # test pyc files
pyc_detectedNum = 0 with open(self.path + "output.txt", "r") as f:
pyc_tureNum = len(list(Path(self.path).glob("*.pyc"))) lines = f.readlines()
self.assertAlmostEqual(pyc_detectedNum, pyc_tureNum, places=1) pyc_detected_num = 0
pyc_correct_num = 0
for line in lines:
if "pyc" in line:
pyc_detected_num += 1
if re.search(r"backdoor\d*\.pyc", line):
pyc_correct_num += 1
pyc_accurency = pyc_detected_num / self.pyc_true_num
print(f"pyc files accurency: {pyc_accurency}")
if __name__ == "__main__": if __name__ == "__main__":