169 lines
5.4 KiB
Python
169 lines
5.4 KiB
Python
import time
|
|
import unittest
|
|
import shutil
|
|
import os
|
|
import threading
|
|
import re
|
|
|
|
from detection.utils import read_file_content
|
|
from .final_tests_util import (
|
|
clone_repo,
|
|
Path,
|
|
inject_pickle_backdoor,
|
|
inject_random_backdoor,
|
|
inject_pyc_backdoor,
|
|
backdoors,
|
|
)
|
|
from detection.Regexdetection import find_dangerous_functions
|
|
from detection.GPTdetection import detectGPT
|
|
|
|
|
|
def GPTdetectFileList(fileList):
|
|
results = []
|
|
threads = []
|
|
for file in fileList:
|
|
content = read_file_content(str(file))
|
|
threads.append(threading.Thread(target=GPTThread(), args=(content, results)))
|
|
for thread in threads:
|
|
thread.start()
|
|
time.sleep(0.5)
|
|
for thread in threads:
|
|
thread.join()
|
|
return results
|
|
|
|
|
|
def GPTThread(content, results):
|
|
try:
|
|
results.append(detectGPT(content))
|
|
except Exception as e:
|
|
print(e)
|
|
|
|
|
|
class TestFinalTests(unittest.TestCase):
|
|
def setUp(self) -> None:
|
|
self.path = "./tmp/repo/"
|
|
shutil.rmtree(self.path, ignore_errors=True)
|
|
if not os.path.exists("/tmp/Python/"):
|
|
clone_repo("https://github.com/TheAlgorithms/Python.git", "/tmp/Python")
|
|
shutil.copytree("/tmp/Python", self.path)
|
|
sampleRate = 0.1
|
|
|
|
# TODO
|
|
# preproccessing
|
|
|
|
self.inject_result = inject_random_backdoor(self.path, sample_rate=sampleRate)
|
|
self.pickle_true_num = inject_pickle_backdoor(self.path)
|
|
self.pyc_true_num = inject_pyc_backdoor(self.path)
|
|
self.injectedNum = len(self.inject_result)
|
|
print(self.injectedNum)
|
|
project_path = Path(self.path)
|
|
|
|
self.all_python_files = list(project_path.rglob("*.py"))
|
|
self.py_files_num = len(self.all_python_files)
|
|
|
|
all_pickle_files = list(project_path.rglob("*.pickle"))
|
|
self.pickle_files_num = len(all_pickle_files)
|
|
|
|
all_pyc_files = list(project_path.rglob("*.pyc"))
|
|
self.pyc_files_num = len(all_pyc_files)
|
|
|
|
os.system(
|
|
"python -m detection " + self.path + " -o " + self.path + "output.txt"
|
|
)
|
|
|
|
def test_final_tests_pycode(self):
|
|
# test backdoor code in python files
|
|
detectedNum = 0
|
|
possibly_dangerous_file = []
|
|
for file in self.all_python_files:
|
|
content = read_file_content(str(file))
|
|
results = find_dangerous_functions(content, ".py")
|
|
if (
|
|
len(results["high"]) > 0
|
|
or len(results["medium"]) > 0
|
|
or len(results["low"]) > 0
|
|
):
|
|
detectedNum += 1
|
|
possibly_dangerous_file.append(file)
|
|
print(detectedNum / self.py_files_num)
|
|
GPTdetectedNum = 0
|
|
|
|
for i in possibly_dangerous_file:
|
|
content = read_file_content(str(i))
|
|
results = {}
|
|
try:
|
|
results = detectGPT(content)
|
|
if (
|
|
len(results["high"]) > 0
|
|
or len(results["medium"]) > 0
|
|
or len(results["low"]) > 0
|
|
):
|
|
GPTdetectedNum += 1
|
|
print(GPTdetectedNum)
|
|
|
|
except Exception as e:
|
|
# print(e)
|
|
pass
|
|
|
|
# test injected code
|
|
with open(self.path + "output.txt", "r") as f:
|
|
lines = f.readlines()
|
|
injected_detected_num = 0
|
|
injected_correct_num = 0
|
|
pattern = r"\w+\.py: Line \d+: (.+)"
|
|
for line in lines:
|
|
if "py:" in line:
|
|
injected_detected_num += 1
|
|
match = re.search(pattern, line)
|
|
command = ""
|
|
if match:
|
|
command = match.group(1)
|
|
for backdoor in backdoors:
|
|
if command in backdoor:
|
|
injected_correct_num += 1
|
|
break
|
|
|
|
injected_accurency = injected_detected_num / self.py_files_num
|
|
print(f"injected files accurency: {injected_accurency}")
|
|
try:
|
|
GPTresult = GPTdetectFileList(possibly_dangerous_file)
|
|
for result in GPTresult:
|
|
if len(result) > 0:
|
|
GPTdetectedNum += 1
|
|
print(GPTdetectedNum)
|
|
self.assertGreaterEqual(GPTdetectedNum, detectedNum)
|
|
except Exception as e:
|
|
# print(e)
|
|
pass
|
|
|
|
# test pickle files
|
|
with open(self.path + "output.txt", "r") as f:
|
|
lines = f.readlines()
|
|
pickle_detected_num = 0
|
|
pickle_correct_num = 0
|
|
for line in lines:
|
|
if "pickle" in line:
|
|
pickle_detected_num += 1
|
|
if re.search(r"backdoor\d*\.pickle", line):
|
|
pickle_correct_num += 1
|
|
|
|
pickle_accurency = pickle_detected_num / self.pickle_true_num
|
|
print(f"pickle files accurency: {pickle_accurency}")
|
|
|
|
# test pyc files
|
|
with open(self.path + "output.txt", "r") as f:
|
|
lines = f.readlines()
|
|
pyc_detected_num = 0
|
|
pyc_correct_num = 0
|
|
for line in lines:
|
|
if "pyc" in line:
|
|
pyc_detected_num += 1
|
|
if re.search(r"backdoor\d*\.pyc", line):
|
|
pyc_correct_num += 1
|
|
pyc_accurency = pyc_detected_num / self.pyc_true_num
|
|
print(f"pyc files accurency: {pyc_accurency}")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
unittest.main()
|