From 7f3591959ba897eefae65d7a78b756fcb07a4a35 Mon Sep 17 00:00:00 2001 From: Tritium0041 Date: Thu, 25 Apr 2024 21:52:44 +0800 Subject: [PATCH 01/17] =?UTF-8?q?feat:=E6=B7=BB=E5=8A=A0gpt=E6=A3=80?= =?UTF-8?q?=E6=B5=8B=E5=9F=BA=E7=A1=80=E5=8A=9F=E8=83=BD?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- detection/GPTdetection.py | 25 +++++++++++++++++++++++++ requirements.txt | 3 ++- 2 files changed, 27 insertions(+), 1 deletion(-) create mode 100644 detection/GPTdetection.py diff --git a/detection/GPTdetection.py b/detection/GPTdetection.py new file mode 100644 index 0000000..95d3977 --- /dev/null +++ b/detection/GPTdetection.py @@ -0,0 +1,25 @@ +import os + +import openai + + +def detect_gpt(text): + client = openai.OpenAI(api_key=os.environ.get("OPENAI_API_KEY")) + response = client.chat.completions.create( + messages=[ + { + "role": "system", + "content": "You are a Python code reviewer.Read the code below and identify any potential security vulnerabilities. Classify them by risk level (high, medium, low, none). Only report the line number and the risk level.", + }, + { + "role": "user", + "content": text, + } + ], + model="gpt-3.5-turbo", + ) + return response.choices[0].message.content +#TODO: 解析GPT输出成标准格式 +#TODO: 强化提示词,更改模型输出文本格式 + +print(detect_gpt("import os\nos.system('rm -rf /')")) \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 01b4d11..4b24848 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1,3 @@ reportlab -packaging \ No newline at end of file +packaging +openai \ No newline at end of file -- 2.47.2 From a6b67856efc3f46b6f3401375a702bdfbc5e8fe0 Mon Sep 17 00:00:00 2001 From: tritium0041 Date: Sun, 28 Apr 2024 14:55:38 +0800 Subject: [PATCH 02/17] =?UTF-8?q?feat:=E5=AE=8C=E5=96=84=E4=BA=86gpt?= =?UTF-8?q?=E6=A3=80=E6=B5=8B=E5=8A=9F=E8=83=BD=EF=BC=8C=E8=BF=94=E5=9B=9E?= =?UTF-8?q?=E6=A0=87=E5=87=86=E6=A0=BC=E5=BC=8F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- detection/GPTdetection.py | 47 +++++++++++++++++++++++++++++++++------ 1 file changed, 40 insertions(+), 7 deletions(-) diff --git a/detection/GPTdetection.py b/detection/GPTdetection.py index 95d3977..68695d0 100644 --- a/detection/GPTdetection.py +++ b/detection/GPTdetection.py @@ -1,15 +1,44 @@ +import json import os +import re +import sys import openai +#utils +def read_file_content(file_path: str) -> str: + try: + with open(file_path, "r", encoding="utf-8") as file: + return file.read() + except FileNotFoundError: + print("Error: File not found.") + sys.exit(1) + except IOError: + print("Error: Could not read file.") + sys.exit(1) -def detect_gpt(text): + +def remove_comments(code: str, extension: str) -> str: + if extension == ".py": + return code.split("#")[0].strip() + elif extension in {".js", ".cpp"}: + code = re.sub(r"//.*", "", code) + code = re.sub(r"/\*.*?\*/", "", code, flags=re.DOTALL) + return code.strip() + return code.strip() + + +def detect_gpt(filename: str): client = openai.OpenAI(api_key=os.environ.get("OPENAI_API_KEY")) + text = read_file_content(filename) + # client = openai.OpenAI(api_key="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") #测试用key response = client.chat.completions.create( messages=[ { "role": "system", - "content": "You are a Python code reviewer.Read the code below and identify any potential security vulnerabilities. Classify them by risk level (high, medium, low, none). Only report the line number and the risk level.", + "content": "You are a Python code reviewer.Read the code below and identify any potential security vulnerabilities. Classify them by risk level (high, medium, low, none). Only report the line number and the risk level.\nYou should output the result as json format in one line. For example: " + "[{\"Line\": {the line number}, \"Risk\": \"{choose from (high,medium,low)}\",\"Reason\":\"{how it is vulnable}\"}] Each of these three field is required.\n" + "You are required to only output the json format. Do not output any other information.\n" }, { "role": "user", @@ -18,8 +47,12 @@ def detect_gpt(text): ], model="gpt-3.5-turbo", ) - return response.choices[0].message.content -#TODO: 解析GPT输出成标准格式 -#TODO: 强化提示词,更改模型输出文本格式 - -print(detect_gpt("import os\nos.system('rm -rf /')")) \ No newline at end of file + try: + res_json = json.loads(response.choices[0].message.content) + except: + print("Error: Could not parse the response. Please try again.") + sys.exit(1) + classified_results = {"high": [], "medium": [], "low": [], "none": []} + for res in res_json: + classified_results[res["Risk"]].append((res["Line"], text.split("\n")[res["Line"] - 1].strip())) + return classified_results \ No newline at end of file -- 2.47.2 From cafc83e517561a200ae1d34656cee9bae3191d51 Mon Sep 17 00:00:00 2001 From: tritium0041 Date: Sun, 28 Apr 2024 14:59:53 +0800 Subject: [PATCH 03/17] =?UTF-8?q?fix:=E5=AE=8C=E5=96=84=E4=BA=86=E4=BB=A3?= =?UTF-8?q?=E7=A0=81=E6=A0=BC=E5=BC=8F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 将一部分函数移至utils内 --- detection/GPTdetection.py | 24 +----------------------- detection/backdoor_detection.py | 23 +---------------------- detection/utils.py | 24 ++++++++++++++++++++++++ 3 files changed, 26 insertions(+), 45 deletions(-) create mode 100644 detection/utils.py diff --git a/detection/GPTdetection.py b/detection/GPTdetection.py index 68695d0..cbba884 100644 --- a/detection/GPTdetection.py +++ b/detection/GPTdetection.py @@ -2,31 +2,9 @@ import json import os import re import sys - +from utils import * import openai -#utils -def read_file_content(file_path: str) -> str: - try: - with open(file_path, "r", encoding="utf-8") as file: - return file.read() - except FileNotFoundError: - print("Error: File not found.") - sys.exit(1) - except IOError: - print("Error: Could not read file.") - sys.exit(1) - - -def remove_comments(code: str, extension: str) -> str: - if extension == ".py": - return code.split("#")[0].strip() - elif extension in {".js", ".cpp"}: - code = re.sub(r"//.*", "", code) - code = re.sub(r"/\*.*?\*/", "", code, flags=re.DOTALL) - return code.strip() - return code.strip() - def detect_gpt(filename: str): client = openai.OpenAI(api_key=os.environ.get("OPENAI_API_KEY")) diff --git a/detection/backdoor_detection.py b/detection/backdoor_detection.py index 6611263..52109d4 100644 --- a/detection/backdoor_detection.py +++ b/detection/backdoor_detection.py @@ -1,38 +1,17 @@ import os -import re -import sys from typing import Dict, List, Tuple from reportlab.lib.pagesizes import letter from reportlab.pdfgen import canvas from reportlab.lib.styles import getSampleStyleSheet from reportlab.platypus import Paragraph, Spacer, SimpleDocTemplate from reportlab.lib import colors - +from utils import * SUPPORTED_EXTENSIONS = {".py", ".js", ".cpp"} OUTPUT_FORMATS = ["html", "md", "txt", "pdf"] -def read_file_content(file_path: str) -> str: - try: - with open(file_path, "r", encoding="utf-8") as file: - return file.read() - except FileNotFoundError: - print("Error: File not found.") - sys.exit(1) - except IOError: - print("Error: Could not read file.") - sys.exit(1) -def remove_comments(code: str, extension: str) -> str: - if extension == ".py": - return code.split("#")[0].strip() - elif extension in {".js", ".cpp"}: - code = re.sub(r"//.*", "", code) - code = re.sub(r"/\*.*?\*/", "", code, flags=re.DOTALL) - return code.strip() - return code.strip() - def find_dangerous_functions( file_content: str, file_extension: str diff --git a/detection/utils.py b/detection/utils.py new file mode 100644 index 0000000..563e7f0 --- /dev/null +++ b/detection/utils.py @@ -0,0 +1,24 @@ +import re +import sys + + +def read_file_content(file_path: str) -> str: + try: + with open(file_path, "r", encoding="utf-8") as file: + return file.read() + except FileNotFoundError: + print("Error: File not found.") + sys.exit(1) + except IOError: + print("Error: Could not read file.") + sys.exit(1) + + +def remove_comments(code: str, extension: str) -> str: + if extension == ".py": + return code.split("#")[0].strip() + elif extension in {".js", ".cpp"}: + code = re.sub(r"//.*", "", code) + code = re.sub(r"/\*.*?\*/", "", code, flags=re.DOTALL) + return code.strip() + return code.strip() \ No newline at end of file -- 2.47.2 From be59c891e5d8ef2fa1327e8f6a33019d88a74cb8 Mon Sep 17 00:00:00 2001 From: tritium0041 Date: Sun, 28 Apr 2024 15:08:06 +0800 Subject: [PATCH 04/17] =?UTF-8?q?fix:=E4=BF=AE=E5=A4=8D=E5=AF=BC=E5=85=A5u?= =?UTF-8?q?tils?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- detection/GPTdetection.py | 2 +- detection/backdoor_detection.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/detection/GPTdetection.py b/detection/GPTdetection.py index cbba884..cdd7cf9 100644 --- a/detection/GPTdetection.py +++ b/detection/GPTdetection.py @@ -2,7 +2,7 @@ import json import os import re import sys -from utils import * +from .utils import * import openai diff --git a/detection/backdoor_detection.py b/detection/backdoor_detection.py index 52109d4..8f41aff 100644 --- a/detection/backdoor_detection.py +++ b/detection/backdoor_detection.py @@ -5,7 +5,7 @@ from reportlab.pdfgen import canvas from reportlab.lib.styles import getSampleStyleSheet from reportlab.platypus import Paragraph, Spacer, SimpleDocTemplate from reportlab.lib import colors -from utils import * +from .utils import * SUPPORTED_EXTENSIONS = {".py", ".js", ".cpp"} OUTPUT_FORMATS = ["html", "md", "txt", "pdf"] -- 2.47.2 From 135a07219d524c93ed117af7d2af273f0c0f66bd Mon Sep 17 00:00:00 2001 From: tritium0041 Date: Sun, 28 Apr 2024 15:37:11 +0800 Subject: [PATCH 05/17] =?UTF-8?q?test:=E6=B7=BB=E5=8A=A0GPT=E6=A3=80?= =?UTF-8?q?=E6=B5=8B=E6=B5=8B=E8=AF=95=E7=94=A8=E4=BE=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- detection/GPTdetection.py | 6 +++++- tests/python-test.yml | 3 +++ tests/test_backdoor_detection.py | 20 ++++++++++++++++++++ 3 files changed, 28 insertions(+), 1 deletion(-) diff --git a/detection/GPTdetection.py b/detection/GPTdetection.py index cdd7cf9..ead9343 100644 --- a/detection/GPTdetection.py +++ b/detection/GPTdetection.py @@ -7,8 +7,12 @@ import openai def detect_gpt(filename: str): + content = read_file_content(filename) + return detectGPT(content) + +def detectGPT(content: str): client = openai.OpenAI(api_key=os.environ.get("OPENAI_API_KEY")) - text = read_file_content(filename) + text = content # client = openai.OpenAI(api_key="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") #测试用key response = client.chat.completions.create( messages=[ diff --git a/tests/python-test.yml b/tests/python-test.yml index 1042ee4..18d550a 100644 --- a/tests/python-test.yml +++ b/tests/python-test.yml @@ -16,3 +16,6 @@ jobs: run: pip install -r requirements.txt -i https://pypi.tuna.tsinghua.edu.cn/simple - name: Run tests run: python -m unittest discover -s tests + env: + OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} + http_proxy: http://192.168.1.3:10809 \ No newline at end of file diff --git a/tests/test_backdoor_detection.py b/tests/test_backdoor_detection.py index abafaa3..b3ca532 100644 --- a/tests/test_backdoor_detection.py +++ b/tests/test_backdoor_detection.py @@ -1,6 +1,7 @@ import unittest from detection.backdoor_detection import find_dangerous_functions +from detection.GPTdetection import * class TestBackdoorDetection(unittest.TestCase): @@ -55,6 +56,25 @@ class TestBackdoorDetection(unittest.TestCase): results["medium"], ) + def test_gpt_risk_detection(self): + content = """import os + os.system('rm -rf /') # high risk + exec('print("Hello")') # high risk + eval('2 + 2') # high risk + """ + results = detectGPT(content) + self.assertEqual(len(results["high"]), 3) + + def test_gpt_no_risk_detection(self): + content = """a = 10 + b = a + 5 + print('This should not be detected as risky.') + """ + results = detectGPT(content) + self.assertEqual(len(results["high"]), 0) + self.assertEqual(len(results["medium"]), 0) + self.assertEqual(len(results["low"]), 0) + if __name__ == "__main__": unittest.main() -- 2.47.2 From 4abd93f688beb857dd3b76e52a177cd6b272248b Mon Sep 17 00:00:00 2001 From: sangge-redmi <2251250136@qq.com> Date: Sun, 28 Apr 2024 15:48:15 +0800 Subject: [PATCH 06/17] =?UTF-8?q?fix:=20=E7=A7=BB=E9=99=A4=E4=B8=80?= =?UTF-8?q?=E4=BA=9B=E5=BA=93=EF=BC=8C=E6=B7=BB=E5=8A=A0=E9=94=99=E8=AF=AF?= =?UTF-8?q?=E5=88=A4=E6=96=AD?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- detection/GPTdetection.py | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/detection/GPTdetection.py b/detection/GPTdetection.py index cdd7cf9..ecb6674 100644 --- a/detection/GPTdetection.py +++ b/detection/GPTdetection.py @@ -1,7 +1,5 @@ import json import os -import re -import sys from .utils import * import openai @@ -15,22 +13,27 @@ def detect_gpt(filename: str): { "role": "system", "content": "You are a Python code reviewer.Read the code below and identify any potential security vulnerabilities. Classify them by risk level (high, medium, low, none). Only report the line number and the risk level.\nYou should output the result as json format in one line. For example: " - "[{\"Line\": {the line number}, \"Risk\": \"{choose from (high,medium,low)}\",\"Reason\":\"{how it is vulnable}\"}] Each of these three field is required.\n" - "You are required to only output the json format. Do not output any other information.\n" + '[{"Line": {the line number}, "Risk": "{choose from (high,medium,low)}","Reason":"{how it is vulnable}"}] Each of these three field is required.\n' + "You are required to only output the json format. Do not output any other information.\n", }, { "role": "user", "content": text, - } + }, ], model="gpt-3.5-turbo", ) try: - res_json = json.loads(response.choices[0].message.content) - except: - print("Error: Could not parse the response. Please try again.") - sys.exit(1) + message_content = response.choices[0].message.content + if message_content is None: + raise ValueError("API response content is None") + res_json = json.loads(message_content) + except json.JSONDecodeError: + raise ValueError("Error: Could not parse the response. Please try again.") + classified_results = {"high": [], "medium": [], "low": [], "none": []} for res in res_json: - classified_results[res["Risk"]].append((res["Line"], text.split("\n")[res["Line"] - 1].strip())) - return classified_results \ No newline at end of file + classified_results[res["Risk"]].append( + (res["Line"], text.split("\n")[res["Line"] - 1].strip()) + ) + return classified_results -- 2.47.2 From 9e6b13d80e6cc7cfa7ff96394d50fcc0dcb602ee Mon Sep 17 00:00:00 2001 From: tritium0041 Date: Sun, 28 Apr 2024 16:06:53 +0800 Subject: [PATCH 07/17] =?UTF-8?q?fix:=E4=BF=AE=E6=94=B9action=20=E6=B7=BB?= =?UTF-8?q?=E5=8A=A0env?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .github/workflows/python-tests.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/python-tests.yml b/.github/workflows/python-tests.yml index 1042ee4..2546211 100644 --- a/.github/workflows/python-tests.yml +++ b/.github/workflows/python-tests.yml @@ -16,3 +16,6 @@ jobs: run: pip install -r requirements.txt -i https://pypi.tuna.tsinghua.edu.cn/simple - name: Run tests run: python -m unittest discover -s tests + env: + OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} + http_proxy: http://192.168.1.3:10809 -- 2.47.2 From 18454a02283f7429a6f9cda3dbf697f5f0ebabf6 Mon Sep 17 00:00:00 2001 From: tritium0041 Date: Sun, 28 Apr 2024 21:53:43 +0800 Subject: [PATCH 08/17] =?UTF-8?q?feat:=E6=9B=B4=E6=94=B9=E4=BB=A3=E7=A0=81?= =?UTF-8?q?=E5=88=86=E5=B8=83=EF=BC=8C=E5=AE=9E=E7=8E=B0=E6=A8=A1=E5=9D=97?= =?UTF-8?q?=E5=8C=96=E6=B7=BB=E5=8A=A0=E5=8A=9F=E8=83=BD?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- detection/GPTdetection.py | 4 --- detection/Regexdetection.py | 37 +++++++++++++++++++ detection/backdoor_detection.py | 63 +++++++++------------------------ 3 files changed, 54 insertions(+), 50 deletions(-) create mode 100644 detection/Regexdetection.py diff --git a/detection/GPTdetection.py b/detection/GPTdetection.py index b7e6648..e8fca6a 100644 --- a/detection/GPTdetection.py +++ b/detection/GPTdetection.py @@ -4,10 +4,6 @@ from .utils import * import openai -def detect_gpt(filename: str): - content = read_file_content(filename) - return detectGPT(content) - def detectGPT(content: str): client = openai.OpenAI(api_key=os.environ.get("OPENAI_API_KEY")) text = content diff --git a/detection/Regexdetection.py b/detection/Regexdetection.py new file mode 100644 index 0000000..63c01b9 --- /dev/null +++ b/detection/Regexdetection.py @@ -0,0 +1,37 @@ +import re +from typing import Dict, List, Tuple +from .utils import remove_comments + +def find_dangerous_functions( + file_content: str, file_extension: str +) -> Dict[str, List[Tuple[int, str]]]: + patterns = { + ".py": { + r"\bsystem\(": "high", + r"\bexec\(": "high", + r"\bpopen\(": "medium", + r"\beval\(": "high", + r"\bsubprocess\.run\(": "medium", + r"\b__getattribute__\(": "high", + r"\bgetattr\(": "medium", + r"\b__import__\(": "high", + }, + ".js": { + r"\beval\(": "high", + r"\bexec\(": "high", + r"\bchild_process\.exec\(": "high", + }, + ".cpp": { + r"\bsystem\(": "high", + }, + } + risk_patterns = patterns.get(file_extension, {}) + classified_results = {"high": [], "medium": [], "low": [], "none": []} + for line_number, line in enumerate(file_content.split("\n"), start=1): + clean_line = remove_comments(line, file_extension) + if not clean_line: + continue + for pattern, risk_level in risk_patterns.items(): + if re.search(pattern, clean_line): + classified_results[risk_level].append((line_number, clean_line)) + return classified_results \ No newline at end of file diff --git a/detection/backdoor_detection.py b/detection/backdoor_detection.py index 8f41aff..00937b7 100644 --- a/detection/backdoor_detection.py +++ b/detection/backdoor_detection.py @@ -5,49 +5,14 @@ from reportlab.pdfgen import canvas from reportlab.lib.styles import getSampleStyleSheet from reportlab.platypus import Paragraph, Spacer, SimpleDocTemplate from reportlab.lib import colors +from .Regexdetection import find_dangerous_functions +from .GPTdetection import detectGPT from .utils import * + SUPPORTED_EXTENSIONS = {".py", ".js", ".cpp"} OUTPUT_FORMATS = ["html", "md", "txt", "pdf"] - - - -def find_dangerous_functions( - file_content: str, file_extension: str -) -> Dict[str, List[Tuple[int, str]]]: - patterns = { - ".py": { - r"\bsystem\(": "high", - r"\bexec\(": "high", - r"\bpopen\(": "medium", - r"\beval\(": "high", - r"\bsubprocess\.run\(": "medium", - r"\b__getattribute__\(": "high", - r"\bgetattr\(": "medium", - r"\b__import__\(": "high", - }, - ".js": { - r"\beval\(": "high", - r"\bexec\(": "high", - r"\bchild_process\.exec\(": "high", - }, - ".cpp": { - r"\bsystem\(": "high", - }, - } - risk_patterns = patterns.get(file_extension, {}) - classified_results = {"high": [], "medium": [], "low": [], "none": []} - for line_number, line in enumerate(file_content.split("\n"), start=1): - clean_line = remove_comments(line, file_extension) - if not clean_line: - continue - for pattern, risk_level in risk_patterns.items(): - if re.search(pattern, clean_line): - classified_results[risk_level].append((line_number, clean_line)) - return classified_results - - def generate_text_content(results): text_output = "Security Analysis Report\n" for risk_level, entries in results.items(): @@ -153,7 +118,15 @@ def output_text(results: Dict[str, List[Tuple[int, str]]], file_name=None): return text_output -def process_path(path: str, output_format: str, output_file=None): +def checkModeAndDetect(mode: str,filePath: str,fileExtension: str): + #TODO:添加更多方式,这里提高代码的复用性和扩展性 + if mode == "regex": + return find_dangerous_functions(read_file_content(filePath), fileExtension) + elif mode == "llm": + return detectGPT(read_file_content(filePath)) + + +def process_path(path: str, output_format: str, mode: str, output_file=None): results = {"high": [], "medium": [], "low": [], "none": []} if os.path.isdir(path): for root, dirs, files in os.walk(path): @@ -161,9 +134,8 @@ def process_path(path: str, output_format: str, output_file=None): file_extension = os.path.splitext(file)[1] if file_extension in SUPPORTED_EXTENSIONS: file_path = os.path.join(root, file) - file_results = find_dangerous_functions( - read_file_content(file_path), file_extension - ) + + file_results = checkModeAndDetect(mode,file_path,file_extension) for key in file_results: if key != "none": # Exclude 'none' risk level results[key].extend( @@ -175,9 +147,7 @@ def process_path(path: str, output_format: str, output_file=None): elif os.path.isfile(path): file_extension = os.path.splitext(path)[1] if file_extension in SUPPORTED_EXTENSIONS: - file_results = find_dangerous_functions( - read_file_content(path), file_extension - ) + file_results = checkModeAndDetect(mode,path,file_extension) for key in file_results: if key != "none": # Exclude 'none' risk level results[key].extend( @@ -202,6 +172,7 @@ def main(): parser = argparse.ArgumentParser(description="Backdoor detection tool.") parser.add_argument("path", help="Path to the code to analyze") parser.add_argument("-o", "--output", help="Output file path", default=None) + parser.add_argument("-m", "--mode", help="Mode of operation:[regex,llm]", default="regex") args = parser.parse_args() output_format = "txt" # Default output format output_file = None @@ -216,7 +187,7 @@ def main(): "Your input file format was incorrect, the output has been saved as a TXT file." ) output_file = args.output.rsplit(".", 1)[0] + ".txt" - process_path(args.path, output_format, output_file) + process_path(args.path, output_format, args.mode, output_file) if __name__ == "__main__": -- 2.47.2 From b3f4a77a73e02573e2c39007d314d68511b7ea49 Mon Sep 17 00:00:00 2001 From: tritium0041 Date: Sun, 28 Apr 2024 22:06:11 +0800 Subject: [PATCH 09/17] =?UTF-8?q?docs:=E5=A2=9E=E5=8A=A0=E9=80=89=E6=8B=A9?= =?UTF-8?q?=E6=A3=80=E6=B5=8B=E6=A8=A1=E5=BC=8F=E7=9A=84=E4=BD=BF=E7=94=A8?= =?UTF-8?q?=E8=AF=B4=E6=98=8E?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- docs/usage.md | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/docs/usage.md b/docs/usage.md index c905362..f712e82 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -26,7 +26,7 @@ pip install packaging reportlab **命令格式**: ```bash -python requirements_detection.py -o +python requirements_detection.py -o ``` **参数说明**: @@ -35,6 +35,7 @@ python requirements_detection.py -o < - ``: 项目的 `requirements.txt` 文件路径。 - ``: 指定输出结果的文件路径和格式,支持的格式有 `.txt`, `.md`, `.html`, `.pdf`。 + **示例**: ```bash @@ -46,18 +47,19 @@ python requirements_detection.py vulnerabilities_data.txt requirements.txt -o ou **命令格式**: ```bash -python backdoor_detection.py -o +python backdoor_detection.py -o -m ``` **参数说明**: - ``: 代码文件或目录的路径。 - ``: 指定输出结果的文件路径和格式,支持的格式有 `.txt`, `.md`, `.html`, `.pdf`。 +- ``: 指定检测模式,目前支持的模式有 `regex` 和 `llm`。 **示例**: ```bash -python backdoor_detection.py ./src -o output/report.pdf +python backdoor_detection.py ./src -o output/report.pdf -m regex ``` ## 结果解读 -- 2.47.2 From 698cf1c75c55644c0c4e522282d343a1211b6c75 Mon Sep 17 00:00:00 2001 From: sangge-redmi <2251250136@qq.com> Date: Mon, 29 Apr 2024 11:09:23 +0800 Subject: [PATCH 10/17] =?UTF-8?q?fix:=20=E4=BF=AE=E5=A4=8D=E4=B8=80?= =?UTF-8?q?=E4=BA=9B=E9=94=99=E8=AF=AF?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 附带完成了一些格式化 --- detection/backdoor_detection.py | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/detection/backdoor_detection.py b/detection/backdoor_detection.py index 00937b7..b3c7e46 100644 --- a/detection/backdoor_detection.py +++ b/detection/backdoor_detection.py @@ -1,10 +1,8 @@ import os from typing import Dict, List, Tuple from reportlab.lib.pagesizes import letter -from reportlab.pdfgen import canvas from reportlab.lib.styles import getSampleStyleSheet from reportlab.platypus import Paragraph, Spacer, SimpleDocTemplate -from reportlab.lib import colors from .Regexdetection import find_dangerous_functions from .GPTdetection import detectGPT from .utils import * @@ -25,7 +23,7 @@ def generate_text_content(results): def output_results(results, output_format, output_file=None): if output_file: - file_name, file_extension = os.path.splitext(output_file) + file_name = os.path.splitext(output_file) if output_format not in OUTPUT_FORMATS: output_format = "txt" output_file = f"{file_name}.txt" @@ -118,12 +116,14 @@ def output_text(results: Dict[str, List[Tuple[int, str]]], file_name=None): return text_output -def checkModeAndDetect(mode: str,filePath: str,fileExtension: str): - #TODO:添加更多方式,这里提高代码的复用性和扩展性 +def checkModeAndDetect(mode: str, filePath: str, fileExtension: str): + # TODO:添加更多方式,这里提高代码的复用性和扩展性 if mode == "regex": return find_dangerous_functions(read_file_content(filePath), fileExtension) elif mode == "llm": return detectGPT(read_file_content(filePath)) + else: + return find_dangerous_functions(read_file_content(filePath), fileExtension) def process_path(path: str, output_format: str, mode: str, output_file=None): @@ -135,7 +135,7 @@ def process_path(path: str, output_format: str, mode: str, output_file=None): if file_extension in SUPPORTED_EXTENSIONS: file_path = os.path.join(root, file) - file_results = checkModeAndDetect(mode,file_path,file_extension) + file_results = checkModeAndDetect(mode, file_path, file_extension) for key in file_results: if key != "none": # Exclude 'none' risk level results[key].extend( @@ -147,7 +147,7 @@ def process_path(path: str, output_format: str, mode: str, output_file=None): elif os.path.isfile(path): file_extension = os.path.splitext(path)[1] if file_extension in SUPPORTED_EXTENSIONS: - file_results = checkModeAndDetect(mode,path,file_extension) + file_results = checkModeAndDetect(mode, path, file_extension) for key in file_results: if key != "none": # Exclude 'none' risk level results[key].extend( @@ -172,7 +172,9 @@ def main(): parser = argparse.ArgumentParser(description="Backdoor detection tool.") parser.add_argument("path", help="Path to the code to analyze") parser.add_argument("-o", "--output", help="Output file path", default=None) - parser.add_argument("-m", "--mode", help="Mode of operation:[regex,llm]", default="regex") + parser.add_argument( + "-m", "--mode", help="Mode of operation:[regex,llm]", default="regex" + ) args = parser.parse_args() output_format = "txt" # Default output format output_file = None @@ -187,7 +189,8 @@ def main(): "Your input file format was incorrect, the output has been saved as a TXT file." ) output_file = args.output.rsplit(".", 1)[0] + ".txt" - process_path(args.path, output_format, args.mode, output_file) + # 如果未指定输出文件,则输出到 stdout;否则写入文件 + process_path(args.path, output_format, args.mode, output_file) if __name__ == "__main__": -- 2.47.2 From bc067743abbb3e1b2cc9b1f3035cc5d34f72c071 Mon Sep 17 00:00:00 2001 From: sangge-redmi <2251250136@qq.com> Date: Mon, 29 Apr 2024 11:13:36 +0800 Subject: [PATCH 11/17] fix: add sys lib --- detection/backdoor_detection.py | 1 + 1 file changed, 1 insertion(+) diff --git a/detection/backdoor_detection.py b/detection/backdoor_detection.py index b3c7e46..ef3e32b 100644 --- a/detection/backdoor_detection.py +++ b/detection/backdoor_detection.py @@ -6,6 +6,7 @@ from reportlab.platypus import Paragraph, Spacer, SimpleDocTemplate from .Regexdetection import find_dangerous_functions from .GPTdetection import detectGPT from .utils import * +import sys SUPPORTED_EXTENSIONS = {".py", ".js", ".cpp"} OUTPUT_FORMATS = ["html", "md", "txt", "pdf"] -- 2.47.2 From f0a915c0fd8a08efe0d9c5784b07af233d6396ad Mon Sep 17 00:00:00 2001 From: sangge-redmi <2251250136@qq.com> Date: Mon, 29 Apr 2024 11:13:50 +0800 Subject: [PATCH 12/17] style: format style --- detection/Regexdetection.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/detection/Regexdetection.py b/detection/Regexdetection.py index 63c01b9..b1b1549 100644 --- a/detection/Regexdetection.py +++ b/detection/Regexdetection.py @@ -2,6 +2,7 @@ import re from typing import Dict, List, Tuple from .utils import remove_comments + def find_dangerous_functions( file_content: str, file_extension: str ) -> Dict[str, List[Tuple[int, str]]]: @@ -34,4 +35,5 @@ def find_dangerous_functions( for pattern, risk_level in risk_patterns.items(): if re.search(pattern, clean_line): classified_results[risk_level].append((line_number, clean_line)) - return classified_results \ No newline at end of file + return classified_results + -- 2.47.2 From 7a420b9bf892a9f963612dfa353e0159c85dca9a Mon Sep 17 00:00:00 2001 From: sangge-redmi <2251250136@qq.com> Date: Mon, 29 Apr 2024 11:52:27 +0800 Subject: [PATCH 13/17] =?UTF-8?q?remove:=20=E5=88=A0=E9=99=A4=E6=97=A0?= =?UTF-8?q?=E7=94=A8=E6=96=87=E4=BB=B6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- tests/python-test.yml | 21 --------------------- 1 file changed, 21 deletions(-) delete mode 100644 tests/python-test.yml diff --git a/tests/python-test.yml b/tests/python-test.yml deleted file mode 100644 index 18d550a..0000000 --- a/tests/python-test.yml +++ /dev/null @@ -1,21 +0,0 @@ -name: Python application test - -on: - push: - branches: [main] - pull_request: - branches: [main] - -jobs: - build: - runs-on: "ubuntu-latest" - - steps: - - uses: actions/checkout@v2 - - name: Install dependencies - run: pip install -r requirements.txt -i https://pypi.tuna.tsinghua.edu.cn/simple - - name: Run tests - run: python -m unittest discover -s tests - env: - OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} - http_proxy: http://192.168.1.3:10809 \ No newline at end of file -- 2.47.2 From d771976b359abab957721e8db4a46eaa36e31659 Mon Sep 17 00:00:00 2001 From: sangge-redmi <2251250136@qq.com> Date: Mon, 29 Apr 2024 11:52:46 +0800 Subject: [PATCH 14/17] =?UTF-8?q?fix:=20=E6=B7=BB=E5=8A=A0=E5=AE=B9?= =?UTF-8?q?=E9=94=99=E4=BB=A3=E7=A0=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- detection/GPTdetection.py | 5 ++++- tests/test_backdoor_detection.py | 15 ++++++++++++++- 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/detection/GPTdetection.py b/detection/GPTdetection.py index e8fca6a..4d8afee 100644 --- a/detection/GPTdetection.py +++ b/detection/GPTdetection.py @@ -5,7 +5,10 @@ import openai def detectGPT(content: str): - client = openai.OpenAI(api_key=os.environ.get("OPENAI_API_KEY")) + api_key = os.getenv("OPENAI_API_KEY") + if api_key is None: + raise ValueError("env OPENAI_API_KEY no set") + client = openai.OpenAI(api_key=api_key) text = content # client = openai.OpenAI(api_key="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") #测试用key response = client.chat.completions.create( diff --git a/tests/test_backdoor_detection.py b/tests/test_backdoor_detection.py index b3ca532..6e2fe60 100644 --- a/tests/test_backdoor_detection.py +++ b/tests/test_backdoor_detection.py @@ -1,7 +1,9 @@ import unittest +import warnings from detection.backdoor_detection import find_dangerous_functions -from detection.GPTdetection import * +from detection.GPTdetection import detectGPT +import os class TestBackdoorDetection(unittest.TestCase): @@ -57,6 +59,9 @@ class TestBackdoorDetection(unittest.TestCase): ) def test_gpt_risk_detection(self): + if os.getenv("OPENAI_API_KEY") is None: + warnings.warn("OPENAI_API_KEY is not set, test skipped.", UserWarning) + self.skipTest("OPENAI_API_KEY is not set") content = """import os os.system('rm -rf /') # high risk exec('print("Hello")') # high risk @@ -66,6 +71,9 @@ class TestBackdoorDetection(unittest.TestCase): self.assertEqual(len(results["high"]), 3) def test_gpt_no_risk_detection(self): + if os.getenv("OPENAI_API_KEY") is None: + warnings.warn("OPENAI_API_KEY is not set, test skipped.", UserWarning) + self.skipTest("OPENAI_API_KEY is not set") content = """a = 10 b = a + 5 print('This should not be detected as risky.') @@ -75,6 +83,11 @@ class TestBackdoorDetection(unittest.TestCase): self.assertEqual(len(results["medium"]), 0) self.assertEqual(len(results["low"]), 0) + def test_gpt_env_no_set(self): + content = "print('test test')" + with self.assertRaises(ValueError): + detectGPT(content) + if __name__ == "__main__": unittest.main() -- 2.47.2 From 1f9ccc53c132b05b8538ee4aca59fb7f0db33157 Mon Sep 17 00:00:00 2001 From: sangge-redmi <2251250136@qq.com> Date: Mon, 29 Apr 2024 18:22:07 +0800 Subject: [PATCH 15/17] =?UTF-8?q?update:=20=E4=BD=BF=E7=94=A8=E6=9C=AC?= =?UTF-8?q?=E5=9C=B0actions?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .github/workflows/python-tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/python-tests.yml b/.github/workflows/python-tests.yml index 2546211..42d9e64 100644 --- a/.github/workflows/python-tests.yml +++ b/.github/workflows/python-tests.yml @@ -11,7 +11,7 @@ jobs: runs-on: "ubuntu-latest" steps: - - uses: actions/checkout@v2 + - uses: https://git.mamahaha.work/actions/checkout@v2 - name: Install dependencies run: pip install -r requirements.txt -i https://pypi.tuna.tsinghua.edu.cn/simple - name: Run tests -- 2.47.2 From 7523e0c06a272d75d6f6361b59159f76c1254e28 Mon Sep 17 00:00:00 2001 From: sangge-redmi <2251250136@qq.com> Date: Mon, 29 Apr 2024 18:37:04 +0800 Subject: [PATCH 16/17] =?UTF-8?q?fix:=20=E6=B7=BB=E5=8A=A0=E8=B6=85?= =?UTF-8?q?=E6=97=B6=E6=8A=A5=E9=94=99=EF=BC=8C=E4=BF=AE=E5=A4=8D=E4=B8=80?= =?UTF-8?q?=E4=B8=AA=E9=94=99=E8=AF=AF=E7=9A=84=E6=B5=8B=E8=AF=95?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- detection/GPTdetection.py | 24 ++++++++++++++++++++++++ tests/test_backdoor_detection.py | 4 ++++ 2 files changed, 28 insertions(+) diff --git a/detection/GPTdetection.py b/detection/GPTdetection.py index 4d8afee..983e847 100644 --- a/detection/GPTdetection.py +++ b/detection/GPTdetection.py @@ -2,12 +2,29 @@ import json import os from .utils import * import openai +import signal + + +class TimeoutException(Exception): + """Custom exception to handle timeouts.""" + + pass + + +def timeout_handler(signum, frame): + """Handle the SIGALRM signal by raising a TimeoutException.""" + raise TimeoutException def detectGPT(content: str): api_key = os.getenv("OPENAI_API_KEY") if api_key is None: raise ValueError("env OPENAI_API_KEY no set") + + # Set alarm timer + signal.signal(signal.SIGTERM, timeout_handler) + signal.alarm(10) + client = openai.OpenAI(api_key=api_key) text = content # client = openai.OpenAI(api_key="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") #测试用key @@ -31,9 +48,16 @@ def detectGPT(content: str): if message_content is None: raise ValueError("API response content is None") res_json = json.loads(message_content) + except json.JSONDecodeError: raise ValueError("Error: Could not parse the response. Please try again.") + except TimeoutException: + raise TimeoutException("The api call timed out") + + finally: + signal.alarm(0) + classified_results = {"high": [], "medium": [], "low": [], "none": []} for res in res_json: classified_results[res["Risk"]].append( diff --git a/tests/test_backdoor_detection.py b/tests/test_backdoor_detection.py index 6e2fe60..7bbb0d4 100644 --- a/tests/test_backdoor_detection.py +++ b/tests/test_backdoor_detection.py @@ -1,6 +1,8 @@ import unittest import warnings +from pydantic import NoneStr + from detection.backdoor_detection import find_dangerous_functions from detection.GPTdetection import detectGPT import os @@ -84,6 +86,8 @@ class TestBackdoorDetection(unittest.TestCase): self.assertEqual(len(results["low"]), 0) def test_gpt_env_no_set(self): + if os.getenv("OPENAI_API_KEY") is not None: + self.skipTest("OPENAI_API_KEY is setted") content = "print('test test')" with self.assertRaises(ValueError): detectGPT(content) -- 2.47.2 From ebfc70eeae46c153b040e7c5153dca2e7263d788 Mon Sep 17 00:00:00 2001 From: sangge-redmi <2251250136@qq.com> Date: Mon, 29 Apr 2024 18:48:19 +0800 Subject: [PATCH 17/17] fix: remove pydantic --- tests/test_backdoor_detection.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/tests/test_backdoor_detection.py b/tests/test_backdoor_detection.py index 7bbb0d4..ebbcd58 100644 --- a/tests/test_backdoor_detection.py +++ b/tests/test_backdoor_detection.py @@ -1,8 +1,6 @@ import unittest import warnings -from pydantic import NoneStr - from detection.backdoor_detection import find_dangerous_functions from detection.GPTdetection import detectGPT import os -- 2.47.2