Compare commits
14 Commits
89b37ddfd6
...
49408eda9f
Author | SHA1 | Date | |
---|---|---|---|
49408eda9f | |||
d1ac4594e4 | |||
62b77812af | |||
7eb4de8e6c | |||
b99334ed12 | |||
17245a9bcf | |||
b673575fe4 | |||
df65fff2c7 | |||
aeb4a33d98 | |||
e80e83ad51 | |||
8a14ef4341 | |||
e418bbf380 | |||
d30ea0ca61 | |||
40f5c07fa1 |
@ -25,15 +25,25 @@ def find_dangerous_functions(
|
||||
".cpp": {
|
||||
r"\bsystem\(": "high",
|
||||
},
|
||||
".pyc": {
|
||||
r"\bexec\b": "high",
|
||||
r"\beval\b": "high",
|
||||
r"\bos\.system\b": "high",
|
||||
r"\bos\.exec\b": "high",
|
||||
r"\bos\.fork\b": "high",
|
||||
r"\bos\.kill\b": "high",
|
||||
r"\bos\.popen\b": "medium",
|
||||
r"\bos\.spawn\b": "medium",
|
||||
},
|
||||
}
|
||||
risk_patterns = patterns.get(file_extension, {})
|
||||
classified_results = {"high": [], "medium": [], "low": [], "none": []}
|
||||
for line_number, line in enumerate(file_content.split("\n"), start=1):
|
||||
clean_line = remove_comments(line, file_extension)
|
||||
if not clean_line:
|
||||
continue
|
||||
for pattern, risk_level in risk_patterns.items():
|
||||
if re.search(pattern, clean_line):
|
||||
classified_results[risk_level].append((line_number, clean_line))
|
||||
if file_content is not None:
|
||||
for line_number, line in enumerate(file_content.split("\n"), start=1):
|
||||
clean_line = remove_comments(line, file_extension)
|
||||
if not clean_line:
|
||||
continue
|
||||
for pattern, risk_level in risk_patterns.items():
|
||||
if re.search(pattern, clean_line):
|
||||
classified_results[risk_level].append((line_number, clean_line))
|
||||
return classified_results
|
||||
|
||||
|
@ -5,11 +5,16 @@ from reportlab.lib.styles import getSampleStyleSheet
|
||||
from reportlab.platypus import Paragraph, Spacer, SimpleDocTemplate
|
||||
from .Regexdetection import find_dangerous_functions
|
||||
from .GPTdetection import detectGPT
|
||||
from .pyc_detection import disassemble_pyc
|
||||
from .utils import *
|
||||
import sys
|
||||
from colorama import init, Fore, Style
|
||||
from tqdm import tqdm
|
||||
from pathlib import Path
|
||||
|
||||
SUPPORTED_EXTENSIONS = {".py", ".js", ".cpp"}
|
||||
PYCDC_FLAG = True
|
||||
PYCDC_ADDR_FLAG = True
|
||||
SUPPORTED_EXTENSIONS = {".py", ".js", ".cpp", ".pyc"}
|
||||
OUTPUT_FORMATS = ["html", "md", "txt", "pdf"]
|
||||
ORDERS = [
|
||||
"__import__",
|
||||
@ -325,46 +330,74 @@ def output_text(results: Dict[str, List[Tuple[int, str]]], file_name=None):
|
||||
return text_output
|
||||
|
||||
|
||||
def checkModeAndDetect(mode: str, filePath: str, fileExtension: str):
|
||||
def checkModeAndDetect(mode: str, filePath: str, fileExtension: str, pycdc_addr: str):
|
||||
# TODO:添加更多方式,这里提高代码的复用性和扩展性
|
||||
if mode == "regex":
|
||||
return find_dangerous_functions(read_file_content(filePath), fileExtension)
|
||||
elif mode == "llm":
|
||||
return detectGPT(read_file_content(filePath))
|
||||
if fileExtension == ".pyc":
|
||||
# 反汇编pyc文件
|
||||
file_content = disassemble_pyc(filePath, pycdc_addr)
|
||||
if file_content == "none":
|
||||
global PYCDC_FLAG
|
||||
PYCDC_FLAG = False
|
||||
return ""
|
||||
elif file_content == "invalid":
|
||||
global PYCDC_ADDR_FLAG
|
||||
PYCDC_ADDR_FLAG = False
|
||||
if mode == "regex":
|
||||
return find_dangerous_functions(file_content, fileExtension)
|
||||
elif mode == "llm":
|
||||
return detectGPT(file_content)
|
||||
else:
|
||||
return find_dangerous_functions(file_content, fileExtension)
|
||||
else:
|
||||
return find_dangerous_functions(read_file_content(filePath), fileExtension)
|
||||
file_content = read_file_content(filePath)
|
||||
if mode == "regex":
|
||||
return find_dangerous_functions(file_content, fileExtension)
|
||||
elif mode == "llm":
|
||||
return detectGPT(file_content)
|
||||
else:
|
||||
return find_dangerous_functions(file_content, fileExtension)
|
||||
|
||||
|
||||
def process_path(path: str, output_format: str, mode: str, output_file=None):
|
||||
def process_path(
|
||||
path: str, output_format: str, mode: str, pycdc_addr: str, output_file=None
|
||||
):
|
||||
results = {"high": [], "medium": [], "low": [], "none": []}
|
||||
if os.path.isdir(path):
|
||||
for root, dirs, files in os.walk(path):
|
||||
for file in files:
|
||||
file_extension = os.path.splitext(file)[1]
|
||||
if file_extension in SUPPORTED_EXTENSIONS:
|
||||
file_path = os.path.join(root, file)
|
||||
# 使用rglob获取所有文件
|
||||
all_files = [
|
||||
file_path
|
||||
for file_path in Path(path).rglob("*")
|
||||
if file_path.suffix in SUPPORTED_EXTENSIONS
|
||||
]
|
||||
|
||||
file_results = checkModeAndDetect(mode, file_path, file_extension)
|
||||
for key in file_results:
|
||||
if key != "none": # Exclude 'none' risk level
|
||||
results[key].extend(
|
||||
[
|
||||
(f"{file_path}: Line {line_num}", line)
|
||||
for line_num, line in file_results[key]
|
||||
]
|
||||
)
|
||||
# 扫描动画
|
||||
for file_path in tqdm(all_files, desc="Scanning files", unit="file"):
|
||||
file_extension = file_path.suffix
|
||||
file_results = checkModeAndDetect(
|
||||
mode, str(file_path), file_extension, pycdc_addr
|
||||
)
|
||||
if file_results is not None:
|
||||
for key in file_results:
|
||||
if key != "none": # Exclude 'none' risk level
|
||||
results[key].extend(
|
||||
[
|
||||
(f"{file_path}: Line {line_num}", line)
|
||||
for line_num, line in file_results[key]
|
||||
]
|
||||
)
|
||||
elif os.path.isfile(path):
|
||||
file_extension = os.path.splitext(path)[1]
|
||||
if file_extension in SUPPORTED_EXTENSIONS:
|
||||
file_results = checkModeAndDetect(mode, path, file_extension)
|
||||
for key in file_results:
|
||||
if key != "none": # Exclude 'none' risk level
|
||||
results[key].extend(
|
||||
[
|
||||
(f"{path}: Line {line_num}", line)
|
||||
for line_num, line in file_results[key]
|
||||
]
|
||||
)
|
||||
file_results = checkModeAndDetect(mode, path, file_extension, pycdc_addr)
|
||||
if file_results is not None:
|
||||
for key in file_results:
|
||||
if key != "none": # Exclude 'none' risk level
|
||||
results[key].extend(
|
||||
[
|
||||
(f"{path}: Line {line_num}", line)
|
||||
for line_num, line in file_results[key]
|
||||
]
|
||||
)
|
||||
else:
|
||||
print("Unsupported file type.")
|
||||
return
|
||||
@ -386,6 +419,9 @@ def main():
|
||||
parser.add_argument(
|
||||
"-m", "--mode", help="Mode of operation:[regex,llm]", default="regex"
|
||||
)
|
||||
parser.add_argument(
|
||||
"-p", "--pycdc", help="Path to pycdc.exe to decompile", default=None
|
||||
)
|
||||
args = parser.parse_args()
|
||||
output_format = "txt" # Default output format
|
||||
output_file = None
|
||||
@ -401,7 +437,15 @@ def main():
|
||||
)
|
||||
output_file = args.output.rsplit(".", 1)[0] + ".txt"
|
||||
# 如果未指定输出文件,则输出到 stdout;否则写入文件
|
||||
process_path(args.path, output_format, args.mode, output_file)
|
||||
process_path(args.path, output_format, args.mode, args.pycdc, output_file)
|
||||
if PYCDC_FLAG == False:
|
||||
print(
|
||||
"ERROR: Detected Python 3.11 or above .pyc files. You need to install pycdc and compile it yourself to obtain pycdc."
|
||||
)
|
||||
print("Repo: https://github.com/zrax/pycdc.git")
|
||||
if PYCDC_ADDR_FLAG == False:
|
||||
print("ERROR: The specified pycdc.exe path is not valid")
|
||||
print("Please check your pycdc path.")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
49
detection/pyc_detection.py
Normal file
49
detection/pyc_detection.py
Normal file
@ -0,0 +1,49 @@
|
||||
from typing import List, Tuple
|
||||
import uncompyle6
|
||||
import io
|
||||
import os
|
||||
import subprocess
|
||||
from contextlib import redirect_stdout, redirect_stderr
|
||||
|
||||
|
||||
def run_pycdc(exe_path: str, pyc_file: str) -> str:
|
||||
"""
|
||||
Executes pycdc.exe with the given .pyc file using a command line string and captures the output.
|
||||
|
||||
Args:
|
||||
exe_path (str): Path to the pycdc.exe executable.
|
||||
pyc_file (str): Path to the .pyc file to decompile.
|
||||
|
||||
Returns:
|
||||
str: Output from pycdc.exe.
|
||||
"""
|
||||
if not os.path.isfile(exe_path):
|
||||
return "invalid"
|
||||
|
||||
command = f'"{exe_path}" "{pyc_file}"'
|
||||
result = subprocess.run(
|
||||
command, capture_output=True, text=True, shell=True, encoding="utf-8"
|
||||
)
|
||||
|
||||
return result.stdout
|
||||
|
||||
|
||||
def disassemble_pyc(file_path: str, pycdc_addr=None) -> str:
|
||||
"""
|
||||
Disassembles a .pyc file using uncompyle6.
|
||||
|
||||
Args:
|
||||
file_path (str): The path to the .pyc file.
|
||||
|
||||
Returns:
|
||||
str: The disassembled code as a string.
|
||||
"""
|
||||
output = io.StringIO()
|
||||
try:
|
||||
uncompyle6.main.decompile_file(file_path, output)
|
||||
return output.getvalue()
|
||||
except Exception as e:
|
||||
if pycdc_addr is None:
|
||||
return "none"
|
||||
else:
|
||||
return run_pycdc(pycdc_addr, file_path)
|
@ -4,7 +4,7 @@ import sys
|
||||
|
||||
def read_file_content(file_path: str) -> str:
|
||||
try:
|
||||
with open(file_path, "r", encoding="utf-8") as file:
|
||||
with open(file_path, "r", encoding="utf-8", errors="ignore") as file:
|
||||
return file.read()
|
||||
except FileNotFoundError:
|
||||
print("Error: File not found.")
|
||||
@ -21,4 +21,4 @@ def remove_comments(code: str, extension: str) -> str:
|
||||
code = re.sub(r"//.*", "", code)
|
||||
code = re.sub(r"/\*.*?\*/", "", code, flags=re.DOTALL)
|
||||
return code.strip()
|
||||
return code.strip()
|
||||
return code.strip()
|
||||
|
@ -83,6 +83,30 @@ class TestBackdoorDetection(unittest.TestCase):
|
||||
self.assertEqual(len(results["medium"]), 0)
|
||||
self.assertEqual(len(results["low"]), 0)
|
||||
|
||||
def test_gpt_env_no_set(self):
|
||||
if os.getenv("OPENAI_API_KEY") is not None:
|
||||
self.skipTest("OPENAI_API_KEY is setted")
|
||||
content = "print('test test')"
|
||||
with self.assertRaises(ValueError):
|
||||
detectGPT(content)
|
||||
|
||||
def test_find_dangerous_functions_pyc(self):
|
||||
file_content = """import os
|
||||
os.system('rm -rf /')
|
||||
"""
|
||||
file_extension = ".pyc"
|
||||
|
||||
expected_result = {
|
||||
"high": [(2, "os.system('rm -rf /')")],
|
||||
"medium": [],
|
||||
"low": [],
|
||||
"none": [],
|
||||
}
|
||||
|
||||
result = find_dangerous_functions(file_content, file_extension)
|
||||
|
||||
self.assertEqual(result, expected_result)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
|
Loading…
x
Reference in New Issue
Block a user