Compare commits
	
		
			14 Commits
		
	
	
		
			89b37ddfd6
			...
			49408eda9f
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
| 49408eda9f | |||
| d1ac4594e4 | |||
| 62b77812af | |||
| 7eb4de8e6c | |||
| b99334ed12 | |||
| 17245a9bcf | |||
| b673575fe4 | |||
| df65fff2c7 | |||
| aeb4a33d98 | |||
| e80e83ad51 | |||
| 8a14ef4341 | |||
| e418bbf380 | |||
| d30ea0ca61 | |||
| 40f5c07fa1 | 
| @@ -25,9 +25,20 @@ def find_dangerous_functions( | |||||||
|         ".cpp": { |         ".cpp": { | ||||||
|             r"\bsystem\(": "high", |             r"\bsystem\(": "high", | ||||||
|         }, |         }, | ||||||
|  |         ".pyc": { | ||||||
|  |             r"\bexec\b": "high", | ||||||
|  |             r"\beval\b": "high", | ||||||
|  |             r"\bos\.system\b": "high", | ||||||
|  |             r"\bos\.exec\b": "high", | ||||||
|  |             r"\bos\.fork\b": "high", | ||||||
|  |             r"\bos\.kill\b": "high", | ||||||
|  |             r"\bos\.popen\b": "medium", | ||||||
|  |             r"\bos\.spawn\b": "medium", | ||||||
|  |         }, | ||||||
|     } |     } | ||||||
|     risk_patterns = patterns.get(file_extension, {}) |     risk_patterns = patterns.get(file_extension, {}) | ||||||
|     classified_results = {"high": [], "medium": [], "low": [], "none": []} |     classified_results = {"high": [], "medium": [], "low": [], "none": []} | ||||||
|  |     if file_content is not None: | ||||||
|         for line_number, line in enumerate(file_content.split("\n"), start=1): |         for line_number, line in enumerate(file_content.split("\n"), start=1): | ||||||
|             clean_line = remove_comments(line, file_extension) |             clean_line = remove_comments(line, file_extension) | ||||||
|             if not clean_line: |             if not clean_line: | ||||||
| @@ -36,4 +47,3 @@ def find_dangerous_functions( | |||||||
|                 if re.search(pattern, clean_line): |                 if re.search(pattern, clean_line): | ||||||
|                     classified_results[risk_level].append((line_number, clean_line)) |                     classified_results[risk_level].append((line_number, clean_line)) | ||||||
|     return classified_results |     return classified_results | ||||||
|  |  | ||||||
|   | |||||||
| @@ -5,11 +5,16 @@ from reportlab.lib.styles import getSampleStyleSheet | |||||||
| from reportlab.platypus import Paragraph, Spacer, SimpleDocTemplate | from reportlab.platypus import Paragraph, Spacer, SimpleDocTemplate | ||||||
| from .Regexdetection import find_dangerous_functions | from .Regexdetection import find_dangerous_functions | ||||||
| from .GPTdetection import detectGPT | from .GPTdetection import detectGPT | ||||||
|  | from .pyc_detection import disassemble_pyc | ||||||
| from .utils import * | from .utils import * | ||||||
| import sys | import sys | ||||||
| from colorama import init, Fore, Style | from colorama import init, Fore, Style | ||||||
|  | from tqdm import tqdm | ||||||
|  | from pathlib import Path | ||||||
|  |  | ||||||
| SUPPORTED_EXTENSIONS = {".py", ".js", ".cpp"} | PYCDC_FLAG = True | ||||||
|  | PYCDC_ADDR_FLAG = True | ||||||
|  | SUPPORTED_EXTENSIONS = {".py", ".js", ".cpp", ".pyc"} | ||||||
| OUTPUT_FORMATS = ["html", "md", "txt", "pdf"] | OUTPUT_FORMATS = ["html", "md", "txt", "pdf"] | ||||||
| ORDERS = [ | ORDERS = [ | ||||||
|     "__import__", |     "__import__", | ||||||
| @@ -325,26 +330,53 @@ def output_text(results: Dict[str, List[Tuple[int, str]]], file_name=None): | |||||||
|         return text_output |         return text_output | ||||||
|  |  | ||||||
|  |  | ||||||
| def checkModeAndDetect(mode: str, filePath: str, fileExtension: str): | def checkModeAndDetect(mode: str, filePath: str, fileExtension: str, pycdc_addr: str): | ||||||
|     # TODO:添加更多方式,这里提高代码的复用性和扩展性 |     # TODO:添加更多方式,这里提高代码的复用性和扩展性 | ||||||
|  |     if fileExtension == ".pyc": | ||||||
|  |         # 反汇编pyc文件 | ||||||
|  |         file_content = disassemble_pyc(filePath, pycdc_addr) | ||||||
|  |         if file_content == "none": | ||||||
|  |             global PYCDC_FLAG | ||||||
|  |             PYCDC_FLAG = False | ||||||
|  |             return "" | ||||||
|  |         elif file_content == "invalid": | ||||||
|  |             global PYCDC_ADDR_FLAG | ||||||
|  |             PYCDC_ADDR_FLAG = False | ||||||
|         if mode == "regex": |         if mode == "regex": | ||||||
|         return find_dangerous_functions(read_file_content(filePath), fileExtension) |             return find_dangerous_functions(file_content, fileExtension) | ||||||
|         elif mode == "llm": |         elif mode == "llm": | ||||||
|         return detectGPT(read_file_content(filePath)) |             return detectGPT(file_content) | ||||||
|         else: |         else: | ||||||
|         return find_dangerous_functions(read_file_content(filePath), fileExtension) |             return find_dangerous_functions(file_content, fileExtension) | ||||||
|  |     else: | ||||||
|  |         file_content = read_file_content(filePath) | ||||||
|  |         if mode == "regex": | ||||||
|  |             return find_dangerous_functions(file_content, fileExtension) | ||||||
|  |         elif mode == "llm": | ||||||
|  |             return detectGPT(file_content) | ||||||
|  |         else: | ||||||
|  |             return find_dangerous_functions(file_content, fileExtension) | ||||||
|  |  | ||||||
|  |  | ||||||
| def process_path(path: str, output_format: str, mode: str, output_file=None): | def process_path( | ||||||
|  |     path: str, output_format: str, mode: str, pycdc_addr: str, output_file=None | ||||||
|  | ): | ||||||
|     results = {"high": [], "medium": [], "low": [], "none": []} |     results = {"high": [], "medium": [], "low": [], "none": []} | ||||||
|     if os.path.isdir(path): |     if os.path.isdir(path): | ||||||
|         for root, dirs, files in os.walk(path): |         # 使用rglob获取所有文件 | ||||||
|             for file in files: |         all_files = [ | ||||||
|                 file_extension = os.path.splitext(file)[1] |             file_path | ||||||
|                 if file_extension in SUPPORTED_EXTENSIONS: |             for file_path in Path(path).rglob("*") | ||||||
|                     file_path = os.path.join(root, file) |             if file_path.suffix in SUPPORTED_EXTENSIONS | ||||||
|  |         ] | ||||||
|  |  | ||||||
|                     file_results = checkModeAndDetect(mode, file_path, file_extension) |         # 扫描动画 | ||||||
|  |         for file_path in tqdm(all_files, desc="Scanning files", unit="file"): | ||||||
|  |             file_extension = file_path.suffix | ||||||
|  |             file_results = checkModeAndDetect( | ||||||
|  |                 mode, str(file_path), file_extension, pycdc_addr | ||||||
|  |             ) | ||||||
|  |             if file_results is not None: | ||||||
|                 for key in file_results: |                 for key in file_results: | ||||||
|                     if key != "none":  # Exclude 'none' risk level |                     if key != "none":  # Exclude 'none' risk level | ||||||
|                         results[key].extend( |                         results[key].extend( | ||||||
| @@ -356,7 +388,8 @@ def process_path(path: str, output_format: str, mode: str, output_file=None): | |||||||
|     elif os.path.isfile(path): |     elif os.path.isfile(path): | ||||||
|         file_extension = os.path.splitext(path)[1] |         file_extension = os.path.splitext(path)[1] | ||||||
|         if file_extension in SUPPORTED_EXTENSIONS: |         if file_extension in SUPPORTED_EXTENSIONS: | ||||||
|             file_results = checkModeAndDetect(mode, path, file_extension) |             file_results = checkModeAndDetect(mode, path, file_extension, pycdc_addr) | ||||||
|  |             if file_results is not None: | ||||||
|                 for key in file_results: |                 for key in file_results: | ||||||
|                     if key != "none":  # Exclude 'none' risk level |                     if key != "none":  # Exclude 'none' risk level | ||||||
|                         results[key].extend( |                         results[key].extend( | ||||||
| @@ -386,6 +419,9 @@ def main(): | |||||||
|     parser.add_argument( |     parser.add_argument( | ||||||
|         "-m", "--mode", help="Mode of operation:[regex,llm]", default="regex" |         "-m", "--mode", help="Mode of operation:[regex,llm]", default="regex" | ||||||
|     ) |     ) | ||||||
|  |     parser.add_argument( | ||||||
|  |         "-p", "--pycdc", help="Path to pycdc.exe to decompile", default=None | ||||||
|  |     ) | ||||||
|     args = parser.parse_args() |     args = parser.parse_args() | ||||||
|     output_format = "txt"  # Default output format |     output_format = "txt"  # Default output format | ||||||
|     output_file = None |     output_file = None | ||||||
| @@ -401,7 +437,15 @@ def main(): | |||||||
|             ) |             ) | ||||||
|             output_file = args.output.rsplit(".", 1)[0] + ".txt" |             output_file = args.output.rsplit(".", 1)[0] + ".txt" | ||||||
|     # 如果未指定输出文件,则输出到 stdout;否则写入文件 |     # 如果未指定输出文件,则输出到 stdout;否则写入文件 | ||||||
|     process_path(args.path, output_format, args.mode, output_file) |     process_path(args.path, output_format, args.mode, args.pycdc, output_file) | ||||||
|  |     if PYCDC_FLAG == False: | ||||||
|  |         print( | ||||||
|  |             "ERROR: Detected Python 3.11 or above .pyc files. You need to install pycdc and compile it yourself to obtain pycdc." | ||||||
|  |         ) | ||||||
|  |         print("Repo: https://github.com/zrax/pycdc.git") | ||||||
|  |     if PYCDC_ADDR_FLAG == False: | ||||||
|  |         print("ERROR: The specified pycdc.exe path is not valid") | ||||||
|  |         print("Please check your pycdc path.") | ||||||
|  |  | ||||||
|  |  | ||||||
| if __name__ == "__main__": | if __name__ == "__main__": | ||||||
|   | |||||||
							
								
								
									
										49
									
								
								detection/pyc_detection.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										49
									
								
								detection/pyc_detection.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,49 @@ | |||||||
|  | from typing import List, Tuple | ||||||
|  | import uncompyle6 | ||||||
|  | import io | ||||||
|  | import os | ||||||
|  | import subprocess | ||||||
|  | from contextlib import redirect_stdout, redirect_stderr | ||||||
|  |  | ||||||
|  |  | ||||||
|  | def run_pycdc(exe_path: str, pyc_file: str) -> str: | ||||||
|  |     """ | ||||||
|  |     Executes pycdc.exe with the given .pyc file using a command line string and captures the output. | ||||||
|  |  | ||||||
|  |     Args: | ||||||
|  |         exe_path (str): Path to the pycdc.exe executable. | ||||||
|  |         pyc_file (str): Path to the .pyc file to decompile. | ||||||
|  |  | ||||||
|  |     Returns: | ||||||
|  |         str: Output from pycdc.exe. | ||||||
|  |     """ | ||||||
|  |     if not os.path.isfile(exe_path): | ||||||
|  |         return "invalid" | ||||||
|  |  | ||||||
|  |     command = f'"{exe_path}" "{pyc_file}"' | ||||||
|  |     result = subprocess.run( | ||||||
|  |         command, capture_output=True, text=True, shell=True, encoding="utf-8" | ||||||
|  |     ) | ||||||
|  |  | ||||||
|  |     return result.stdout | ||||||
|  |  | ||||||
|  |  | ||||||
|  | def disassemble_pyc(file_path: str, pycdc_addr=None) -> str: | ||||||
|  |     """ | ||||||
|  |     Disassembles a .pyc file using uncompyle6. | ||||||
|  |  | ||||||
|  |     Args: | ||||||
|  |         file_path (str): The path to the .pyc file. | ||||||
|  |  | ||||||
|  |     Returns: | ||||||
|  |         str: The disassembled code as a string. | ||||||
|  |     """ | ||||||
|  |     output = io.StringIO() | ||||||
|  |     try: | ||||||
|  |         uncompyle6.main.decompile_file(file_path, output) | ||||||
|  |         return output.getvalue() | ||||||
|  |     except Exception as e: | ||||||
|  |         if pycdc_addr is None: | ||||||
|  |             return "none" | ||||||
|  |         else: | ||||||
|  |             return run_pycdc(pycdc_addr, file_path) | ||||||
| @@ -4,7 +4,7 @@ import sys | |||||||
|  |  | ||||||
| def read_file_content(file_path: str) -> str: | def read_file_content(file_path: str) -> str: | ||||||
|     try: |     try: | ||||||
|         with open(file_path, "r", encoding="utf-8") as file: |         with open(file_path, "r", encoding="utf-8", errors="ignore") as file: | ||||||
|             return file.read() |             return file.read() | ||||||
|     except FileNotFoundError: |     except FileNotFoundError: | ||||||
|         print("Error: File not found.") |         print("Error: File not found.") | ||||||
|   | |||||||
| @@ -83,6 +83,30 @@ class TestBackdoorDetection(unittest.TestCase): | |||||||
|         self.assertEqual(len(results["medium"]), 0) |         self.assertEqual(len(results["medium"]), 0) | ||||||
|         self.assertEqual(len(results["low"]), 0) |         self.assertEqual(len(results["low"]), 0) | ||||||
|  |  | ||||||
|  |     def test_gpt_env_no_set(self): | ||||||
|  |         if os.getenv("OPENAI_API_KEY") is not None: | ||||||
|  |             self.skipTest("OPENAI_API_KEY is setted") | ||||||
|  |         content = "print('test test')" | ||||||
|  |         with self.assertRaises(ValueError): | ||||||
|  |             detectGPT(content) | ||||||
|  |  | ||||||
|  |     def test_find_dangerous_functions_pyc(self): | ||||||
|  |         file_content = """import os | ||||||
|  |         os.system('rm -rf /') | ||||||
|  |         """ | ||||||
|  |         file_extension = ".pyc" | ||||||
|  |  | ||||||
|  |         expected_result = { | ||||||
|  |             "high": [(2, "os.system('rm -rf /')")], | ||||||
|  |             "medium": [], | ||||||
|  |             "low": [], | ||||||
|  |             "none": [], | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         result = find_dangerous_functions(file_content, file_extension) | ||||||
|  |  | ||||||
|  |         self.assertEqual(result, expected_result) | ||||||
|  |  | ||||||
|  |  | ||||||
| if __name__ == "__main__": | if __name__ == "__main__": | ||||||
|     unittest.main() |     unittest.main() | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user