import time import unittest import shutil import os import threading import re from detection.utils import read_file_content from .final_tests_util import ( clone_repo, Path, inject_pickle_backdoor, inject_random_backdoor, inject_pyc_backdoor, backdoors, ) from detection.Regexdetection import find_dangerous_functions from detection.GPTdetection import detectGPT def GPTdetectFileList(fileList): results = [] threads = [] for file in fileList: content = read_file_content(str(file)) threads.append(threading.Thread(target=GPTThread(), args=(content, results))) for thread in threads: thread.start() time.sleep(0.5) for thread in threads: thread.join() return results def GPTThread(content, results): try: results.append(detectGPT(content)) except Exception as e: print(e) class TestFinalTests(unittest.TestCase): def setUp(self) -> None: self.path = "./tmp/repo/" shutil.rmtree(self.path, ignore_errors=True) if not os.path.exists("/tmp/Python/"): clone_repo("https://github.com/TheAlgorithms/Python.git", "/tmp/Python") shutil.copytree("/tmp/Python", self.path) sampleRate = 0.1 self.inject_result = inject_random_backdoor(self.path, sample_rate=sampleRate) self.pickle_true_num = inject_pickle_backdoor(self.path) self.pyc_true_num = inject_pyc_backdoor(self.path) self.injectedNum = len(self.inject_result) print(self.injectedNum) project_path = Path(self.path) self.all_python_files = list(project_path.rglob("*.py")) self.py_files_num = len(self.all_python_files) all_pickle_files = list(project_path.rglob("*.pickle")) self.pickle_files_num = len(all_pickle_files) all_pyc_files = list(project_path.rglob("*.pyc")) self.pyc_files_num = len(all_pyc_files) os.system( "python -m detection " + self.path + " -o " + self.path + "output.txt" ) def test_final_tests_pycode(self): # test backdoor code in python files detectedNum = 0 possibly_dangerous_file = [] for file in self.all_python_files: content = read_file_content(str(file)) results = find_dangerous_functions(content, ".py") if ( len(results["high"]) > 0 or len(results["medium"]) > 0 or len(results["low"]) > 0 ): detectedNum += 1 possibly_dangerous_file.append(file) print(detectedNum / self.py_files_num) GPTdetectedNum = 0 for i in possibly_dangerous_file: content = read_file_content(str(i)) results = {} try: results = detectGPT(content) if ( len(results["high"]) > 0 or len(results["medium"]) > 0 or len(results["low"]) > 0 ): GPTdetectedNum += 1 print(GPTdetectedNum) except Exception as e: # print(e) pass # test injected code with open(self.path + "output.txt", "r") as f: lines = f.readlines() injected_detected_num = 0 injected_correct_num = 0 pattern = r"\w+\.py: Line \d+: (.+)" for line in lines: if "py:" in line: injected_detected_num += 1 match = re.search(pattern, line) command = "" if match: command = match.group(1) for backdoor in backdoors: if command in backdoor: injected_correct_num += 1 break injected_accurency = injected_detected_num / self.py_files_num print(f"injected files accurency: {injected_accurency}") try: GPTresult = GPTdetectFileList(possibly_dangerous_file) for result in GPTresult: if len(result) > 0: GPTdetectedNum += 1 print(GPTdetectedNum) self.assertGreaterEqual(GPTdetectedNum, detectedNum) except Exception as e: # print(e) pass # test pickle files with open(self.path + "output.txt", "r") as f: lines = f.readlines() pickle_detected_num = 0 pickle_correct_num = 0 for line in lines: if "pickle" in line: pickle_detected_num += 1 if re.search(r"backdoor\d*\.pickle", line): pickle_correct_num += 1 pickle_accurency = pickle_detected_num / self.pickle_true_num print(f"pickle files accurency: {pickle_accurency}") # test pyc files with open(self.path + "output.txt", "r") as f: lines = f.readlines() pyc_detected_num = 0 pyc_correct_num = 0 for line in lines: if "pyc" in line: pyc_detected_num += 1 if re.search(r"backdoor\d*\.pyc", line): pyc_correct_num += 1 pyc_accurency = pyc_detected_num / self.pyc_true_num print(f"pyc files accurency: {pyc_accurency}") if __name__ == "__main__": unittest.main()