Compare commits

..

14 Commits

Author SHA1 Message Date
49408eda9f Merge pull request 'feature/rglob' (#29) from feature/rglob into main
Reviewed-on: #29
Reviewed-by: sangge <sangge@noreply.localhost>
Reviewed-by: ccyj <ccyj@noreply.localhost>
2024-06-03 20:24:42 +08:00
dqy
d1ac4594e4 feat: 使用rglob扫描 2024-06-03 16:29:35 +08:00
dqy
62b77812af fix: 去除扫描单个文件进度条 2024-06-03 11:41:19 +08:00
dqy
7eb4de8e6c style: 添加扫描动画 2024-06-02 20:24:03 +08:00
dqy
b99334ed12 fix: 解决unicode字符报错 2024-06-02 19:54:47 +08:00
dqy
17245a9bcf fix: 解决unicode编码错误 2024-05-31 21:13:01 +08:00
dqy
b673575fe4 fix: 删除无效模块 2024-05-31 20:36:42 +08:00
dqy
df65fff2c7 feat: 添加对python 3.11的反编译模块 2024-05-31 20:33:47 +08:00
dqy
aeb4a33d98 Merge branch 'main' of https://git.mamahaha.work/sangge/BackDoorBuster into feature/pyc-detection 2024-05-31 19:20:35 +08:00
dqy
e80e83ad51 Merge branch 'main' of https://git.mamahaha.work/sangge/BackDoorBuster into feature/pyc-detection
Some checks failed
Python application test / build (pull_request) Failing after 52s
2024-05-30 16:13:40 +08:00
dqy
8a14ef4341 fix: 修改相对模块引入 2024-05-29 20:36:09 +08:00
dqy
e418bbf380 test: 添加反汇编之后的正则匹配测试 2024-05-29 20:32:24 +08:00
dqy
d30ea0ca61 feat: 添加反汇编模块依赖 2024-05-29 20:31:42 +08:00
dqy
40f5c07fa1 feat: 添加对pyc文件的反汇编功能模块 2024-05-29 20:08:40 +08:00
5 changed files with 169 additions and 42 deletions

View File

@@ -25,9 +25,20 @@ def find_dangerous_functions(
".cpp": { ".cpp": {
r"\bsystem\(": "high", r"\bsystem\(": "high",
}, },
".pyc": {
r"\bexec\b": "high",
r"\beval\b": "high",
r"\bos\.system\b": "high",
r"\bos\.exec\b": "high",
r"\bos\.fork\b": "high",
r"\bos\.kill\b": "high",
r"\bos\.popen\b": "medium",
r"\bos\.spawn\b": "medium",
},
} }
risk_patterns = patterns.get(file_extension, {}) risk_patterns = patterns.get(file_extension, {})
classified_results = {"high": [], "medium": [], "low": [], "none": []} classified_results = {"high": [], "medium": [], "low": [], "none": []}
if file_content is not None:
for line_number, line in enumerate(file_content.split("\n"), start=1): for line_number, line in enumerate(file_content.split("\n"), start=1):
clean_line = remove_comments(line, file_extension) clean_line = remove_comments(line, file_extension)
if not clean_line: if not clean_line:
@@ -36,4 +47,3 @@ def find_dangerous_functions(
if re.search(pattern, clean_line): if re.search(pattern, clean_line):
classified_results[risk_level].append((line_number, clean_line)) classified_results[risk_level].append((line_number, clean_line))
return classified_results return classified_results

View File

@@ -5,11 +5,16 @@ from reportlab.lib.styles import getSampleStyleSheet
from reportlab.platypus import Paragraph, Spacer, SimpleDocTemplate from reportlab.platypus import Paragraph, Spacer, SimpleDocTemplate
from .Regexdetection import find_dangerous_functions from .Regexdetection import find_dangerous_functions
from .GPTdetection import detectGPT from .GPTdetection import detectGPT
from .pyc_detection import disassemble_pyc
from .utils import * from .utils import *
import sys import sys
from colorama import init, Fore, Style from colorama import init, Fore, Style
from tqdm import tqdm
from pathlib import Path
SUPPORTED_EXTENSIONS = {".py", ".js", ".cpp"} PYCDC_FLAG = True
PYCDC_ADDR_FLAG = True
SUPPORTED_EXTENSIONS = {".py", ".js", ".cpp", ".pyc"}
OUTPUT_FORMATS = ["html", "md", "txt", "pdf"] OUTPUT_FORMATS = ["html", "md", "txt", "pdf"]
ORDERS = [ ORDERS = [
"__import__", "__import__",
@@ -325,26 +330,53 @@ def output_text(results: Dict[str, List[Tuple[int, str]]], file_name=None):
return text_output return text_output
def checkModeAndDetect(mode: str, filePath: str, fileExtension: str): def checkModeAndDetect(mode: str, filePath: str, fileExtension: str, pycdc_addr: str):
# TODO:添加更多方式,这里提高代码的复用性和扩展性 # TODO:添加更多方式,这里提高代码的复用性和扩展性
if fileExtension == ".pyc":
# 反汇编pyc文件
file_content = disassemble_pyc(filePath, pycdc_addr)
if file_content == "none":
global PYCDC_FLAG
PYCDC_FLAG = False
return ""
elif file_content == "invalid":
global PYCDC_ADDR_FLAG
PYCDC_ADDR_FLAG = False
if mode == "regex": if mode == "regex":
return find_dangerous_functions(read_file_content(filePath), fileExtension) return find_dangerous_functions(file_content, fileExtension)
elif mode == "llm": elif mode == "llm":
return detectGPT(read_file_content(filePath)) return detectGPT(file_content)
else: else:
return find_dangerous_functions(read_file_content(filePath), fileExtension) return find_dangerous_functions(file_content, fileExtension)
else:
file_content = read_file_content(filePath)
if mode == "regex":
return find_dangerous_functions(file_content, fileExtension)
elif mode == "llm":
return detectGPT(file_content)
else:
return find_dangerous_functions(file_content, fileExtension)
def process_path(path: str, output_format: str, mode: str, output_file=None): def process_path(
path: str, output_format: str, mode: str, pycdc_addr: str, output_file=None
):
results = {"high": [], "medium": [], "low": [], "none": []} results = {"high": [], "medium": [], "low": [], "none": []}
if os.path.isdir(path): if os.path.isdir(path):
for root, dirs, files in os.walk(path): # 使用rglob获取所有文件
for file in files: all_files = [
file_extension = os.path.splitext(file)[1] file_path
if file_extension in SUPPORTED_EXTENSIONS: for file_path in Path(path).rglob("*")
file_path = os.path.join(root, file) if file_path.suffix in SUPPORTED_EXTENSIONS
]
file_results = checkModeAndDetect(mode, file_path, file_extension) # 扫描动画
for file_path in tqdm(all_files, desc="Scanning files", unit="file"):
file_extension = file_path.suffix
file_results = checkModeAndDetect(
mode, str(file_path), file_extension, pycdc_addr
)
if file_results is not None:
for key in file_results: for key in file_results:
if key != "none": # Exclude 'none' risk level if key != "none": # Exclude 'none' risk level
results[key].extend( results[key].extend(
@@ -356,7 +388,8 @@ def process_path(path: str, output_format: str, mode: str, output_file=None):
elif os.path.isfile(path): elif os.path.isfile(path):
file_extension = os.path.splitext(path)[1] file_extension = os.path.splitext(path)[1]
if file_extension in SUPPORTED_EXTENSIONS: if file_extension in SUPPORTED_EXTENSIONS:
file_results = checkModeAndDetect(mode, path, file_extension) file_results = checkModeAndDetect(mode, path, file_extension, pycdc_addr)
if file_results is not None:
for key in file_results: for key in file_results:
if key != "none": # Exclude 'none' risk level if key != "none": # Exclude 'none' risk level
results[key].extend( results[key].extend(
@@ -386,6 +419,9 @@ def main():
parser.add_argument( parser.add_argument(
"-m", "--mode", help="Mode of operation:[regex,llm]", default="regex" "-m", "--mode", help="Mode of operation:[regex,llm]", default="regex"
) )
parser.add_argument(
"-p", "--pycdc", help="Path to pycdc.exe to decompile", default=None
)
args = parser.parse_args() args = parser.parse_args()
output_format = "txt" # Default output format output_format = "txt" # Default output format
output_file = None output_file = None
@@ -401,7 +437,15 @@ def main():
) )
output_file = args.output.rsplit(".", 1)[0] + ".txt" output_file = args.output.rsplit(".", 1)[0] + ".txt"
# 如果未指定输出文件,则输出到 stdout否则写入文件 # 如果未指定输出文件,则输出到 stdout否则写入文件
process_path(args.path, output_format, args.mode, output_file) process_path(args.path, output_format, args.mode, args.pycdc, output_file)
if PYCDC_FLAG == False:
print(
"ERROR: Detected Python 3.11 or above .pyc files. You need to install pycdc and compile it yourself to obtain pycdc."
)
print("Repo: https://github.com/zrax/pycdc.git")
if PYCDC_ADDR_FLAG == False:
print("ERROR: The specified pycdc.exe path is not valid")
print("Please check your pycdc path.")
if __name__ == "__main__": if __name__ == "__main__":

View File

@@ -0,0 +1,49 @@
from typing import List, Tuple
import uncompyle6
import io
import os
import subprocess
from contextlib import redirect_stdout, redirect_stderr
def run_pycdc(exe_path: str, pyc_file: str) -> str:
"""
Executes pycdc.exe with the given .pyc file using a command line string and captures the output.
Args:
exe_path (str): Path to the pycdc.exe executable.
pyc_file (str): Path to the .pyc file to decompile.
Returns:
str: Output from pycdc.exe.
"""
if not os.path.isfile(exe_path):
return "invalid"
command = f'"{exe_path}" "{pyc_file}"'
result = subprocess.run(
command, capture_output=True, text=True, shell=True, encoding="utf-8"
)
return result.stdout
def disassemble_pyc(file_path: str, pycdc_addr=None) -> str:
"""
Disassembles a .pyc file using uncompyle6.
Args:
file_path (str): The path to the .pyc file.
Returns:
str: The disassembled code as a string.
"""
output = io.StringIO()
try:
uncompyle6.main.decompile_file(file_path, output)
return output.getvalue()
except Exception as e:
if pycdc_addr is None:
return "none"
else:
return run_pycdc(pycdc_addr, file_path)

View File

@@ -4,7 +4,7 @@ import sys
def read_file_content(file_path: str) -> str: def read_file_content(file_path: str) -> str:
try: try:
with open(file_path, "r", encoding="utf-8") as file: with open(file_path, "r", encoding="utf-8", errors="ignore") as file:
return file.read() return file.read()
except FileNotFoundError: except FileNotFoundError:
print("Error: File not found.") print("Error: File not found.")

View File

@@ -83,6 +83,30 @@ class TestBackdoorDetection(unittest.TestCase):
self.assertEqual(len(results["medium"]), 0) self.assertEqual(len(results["medium"]), 0)
self.assertEqual(len(results["low"]), 0) self.assertEqual(len(results["low"]), 0)
def test_gpt_env_no_set(self):
if os.getenv("OPENAI_API_KEY") is not None:
self.skipTest("OPENAI_API_KEY is setted")
content = "print('test test')"
with self.assertRaises(ValueError):
detectGPT(content)
def test_find_dangerous_functions_pyc(self):
file_content = """import os
os.system('rm -rf /')
"""
file_extension = ".pyc"
expected_result = {
"high": [(2, "os.system('rm -rf /')")],
"medium": [],
"low": [],
"none": [],
}
result = find_dangerous_functions(file_content, file_extension)
self.assertEqual(result, expected_result)
if __name__ == "__main__": if __name__ == "__main__":
unittest.main() unittest.main()