BackDoorBuster/detection/GPTdetection.py

106 lines
3.6 KiB
Python

import json
import os
import threading
import time
from .utils import *
import openai
# import signal
class TimeoutException(Exception):
"""Custom exception to handle timeouts."""
pass
def timeout_handler(signum, frame):
"""Handle the SIGALRM signal by raising a TimeoutException."""
raise TimeoutException
def detectGPT(content: str):
api_key = os.getenv("OPENAI_API_KEY")
if api_key is None:
raise ValueError("env OPENAI_API_KEY no set")
# Set alarm timer
# signal.signal(signal.SIGTERM, timeout_handler)
# signal.alarm(10)
client = openai.OpenAI(base_url="https://api.kpi7.cn/v1", api_key=api_key)
text = content
# client = openai.OpenAI(api_key="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") #测试用key
response = client.chat.completions.create(
messages=[
{
"role": "system",
"content": "You are a Python code reviewer.Read the code below and identify any potential security vulnerabilities. Classify them by risk level (high, medium, low, none). Only report the line number and the risk level.\nYou should output the result as json format in one line. For example: "
'[{"Line": {the line number}, "Risk": "{choose from (high,medium,low)}","Reason":"{how it is vulnable}"}] Each of these three field is required.\n'
"You are required to only output the json format. Do not output any other information.请注意:只对有具体危害的代码片段判定为有风险。\n"
"For examples:\nos.system('ls'),subprocess.call(['ls', '-l']),subprocess.call([\"/bin/sh\",\"-i\"]),eval(code),exec(code) and so on.\n"
"Please IGNORE the risks that dont matter a lot.",
},
{
"role": "user",
"content": text,
},
],
model="gpt-4o",
)
try:
message_content = response.choices[0].message.content
if message_content is None:
raise ValueError("API response content is None")
res_json = json.loads(message_content)
except json.JSONDecodeError:
raise ValueError("Error: Could not parse the response. Please try again.")
except TimeoutException:
raise TimeoutException("The api call timed out")
# finally:
# signal.alarm(0)
classified_results = {"high": [], "medium": [], "low": [], "none": []}
for res in res_json:
try:
classified_results[res["Risk"]].append(
(res["Line"], text.split("\n")[res["Line"] - 1].strip())
)
except IndexError:
pass
return classified_results
def GPTdetectFileList(fileList):
# print(len(fileList))
results = {"high": [], "medium": [], "low": [], "none": []}
threads = []
for file in fileList:
content = read_file_content(str(file))
threads.append(threading.Thread(target=GPTThread, args=(str(file), content, results)))
for thread in threads:
thread.start()
time.sleep(0.1)
for thread in threads:
thread.join()
return results
def GPTThread(filename, content, results):
try:
res = detectGPT(content)
# print(res)
for key in res:
if key != "none": # Exclude 'none' risk level
results[key].extend(
[
(f"{filename}: Line {line_num}", line)
for line_num, line in res[key]
]
)
except Exception as e:
print(e)