feat:完善了gpt检测功能，返回标准格式

2024-04-28 14:55:38 +08:00 · 2024-04-28 14:55:38 +08:00 · a6b67856ef
commit a6b67856ef
parent 7f3591959b
1 changed files with 40 additions and 7 deletions
--- a/detection/GPTdetection.py
+++ b/detection/GPTdetection.py
@ -1,15 +1,44 @@
+import json
 import os
+import re
+import sys

 import openai

+#utils
+def read_file_content(file_path: str) -> str:
+    try:
+        with open(file_path, "r", encoding="utf-8") as file:
+            return file.read()
+    except FileNotFoundError:
+        print("Error: File not found.")
+        sys.exit(1)
+    except IOError:
+        print("Error: Could not read file.")
+        sys.exit(1)

-def detect_gpt(text):
+
+def remove_comments(code: str, extension: str) -> str:
+    if extension == ".py":
+        return code.split("#")[0].strip()
+    elif extension in {".js", ".cpp"}:
+        code = re.sub(r"//.*", "", code)
+        code = re.sub(r"/\*.*?\*/", "", code, flags=re.DOTALL)
+        return code.strip()
+    return code.strip()
+
+
+def detect_gpt(filename: str):
    client = openai.OpenAI(api_key=os.environ.get("OPENAI_API_KEY"))
+    text = read_file_content(filename)
+    # client = openai.OpenAI(api_key="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") #测试用key
    response = client.chat.completions.create(
        messages=[
            {
                "role": "system",
-                "content": "You are a Python code reviewer.Read the code below and identify any potential security vulnerabilities. Classify them by risk level (high, medium, low, none). Only report the line number and the risk level.",
+                "content": "You are a Python code reviewer.Read the code below and identify any potential security vulnerabilities. Classify them by risk level (high, medium, low, none). Only report the line number and the risk level.\nYou should output the result as json format in one line. For example: "
+                           "[{\"Line\": {the line number}, \"Risk\": \"{choose from (high,medium,low)}\",\"Reason\":\"{how it is vulnable}\"}] Each of these three field is required.\n"
+                           "You are required to only output the json format. Do not output any other information.\n"
            },
            {
                "role": "user",
@ -18,8 +47,12 @@ def detect_gpt(text):
        ],
        model="gpt-3.5-turbo",
    )
-    return response.choices[0].message.content
-#TODO: 解析GPT输出成标准格式
-#TODO: 强化提示词，更改模型输出文本格式
-
-print(detect_gpt("import os\nos.system('rm -rf /')"))
+    try:
+        res_json = json.loads(response.choices[0].message.content)
+    except:
+        print("Error: Could not parse the response. Please try again.")
+        sys.exit(1)
+    classified_results = {"high": [], "medium": [], "low": [], "none": []}
+    for res in res_json:
+        classified_results[res["Risk"]].append((res["Line"], text.split("\n")[res["Line"] - 1].strip()))
+    return classified_results