From fa86f12a4810b1e2db8ec4ad0c76ac9568aa367c Mon Sep 17 00:00:00 2001 From: tritium0041 Date: Tue, 14 May 2024 21:02:45 +0800 Subject: [PATCH 1/9] =?UTF-8?q?feat:=E6=B7=BB=E5=8A=A0=E4=BA=86pickle?= =?UTF-8?q?=E6=95=B0=E6=8D=AE=E6=89=AB=E6=8F=8F=E7=B1=BB?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- detection/pickle_detection.py | 157 ++++++++++++++++++++++++++++++++++ detection/test.pkl | Bin 0 -> 33 bytes 2 files changed, 157 insertions(+) create mode 100644 detection/pickle_detection.py create mode 100644 detection/test.pkl diff --git a/detection/pickle_detection.py b/detection/pickle_detection.py new file mode 100644 index 0000000..b833c1c --- /dev/null +++ b/detection/pickle_detection.py @@ -0,0 +1,157 @@ +import io +import os +import pickletools +import pickle +import nt + +class _Unframer: + + def __init__(self, file_read, file_readline, file_tell=None): + self.file_read = file_read + self.file_readline = file_readline + self.current_frame = None + + def readinto(self, buf): + if self.current_frame: + n = self.current_frame.readinto(buf) + if n == 0 and len(buf) != 0: + self.current_frame = None + n = len(buf) + buf[:] = self.file_read(n) + return n + if n < len(buf): + raise pickle.UnpicklingError( + "pickle exhausted before end of frame") + return n + else: + n = len(buf) + buf[:] = self.file_read(n) + return n + + def read(self, n): + if self.current_frame: + data = self.current_frame.read(n) + if not data and n != 0: + self.current_frame = None + return self.file_read(n) + if len(data) < n: + raise pickle.UnpicklingError( + "pickle exhausted before end of frame") + return data + else: + return self.file_read(n) + + def readline(self): + if self.current_frame: + data = self.current_frame.readline() + if not data: + self.current_frame = None + return self.file_readline() + if data[-1] != b'\n'[0]: + raise pickle.UnpicklingError( + "pickle exhausted before end of frame") + return data + else: + return self.file_readline() + + def load_frame(self, frame_size): + if self.current_frame and self.current_frame.read() != b'': + raise pickle.UnpicklingError( + "beginning of a new frame before end of current frame") + self.current_frame = io.BytesIO(self.file_read(frame_size)) + + + + + + +dangerous_modules = ["os", "subprocess","builtins","nt"] +dangerous_names = ["system", "popen", "run", "call", "check_output", "check_call",] + +class pickleScanner(): + + ReduceCount = 0 + maliciousModule = [] + dispatch = {} + + def __init__(self, file, *, fix_imports=True, + encoding="ASCII", errors="strict", buffers=None): + self._buffers = iter(buffers) if buffers is not None else None + self._file_readline = file.readline + self._file_read = file.read + self.memo = {} + self.encoding = encoding + self.errors = errors + self.proto = 0 + self.fix_imports = fix_imports + self.file = file + + def find_class(self, module, name): + print(module, name) + if module.decode() in dangerous_modules or name.decode() in dangerous_names: + # self.maliciousCount += 1 + self.maliciousModule.append((module, name)) + + def load(self): + self._unframer = _Unframer(self._file_read, self._file_readline) + self.read = self._unframer.read + self.readinto = self._unframer.readinto + self.readline = self._unframer.readline + self.seek = self.file.seek + self.metastack = [] + self.stack = [] + self.append = self.stack.append + self.proto = 0 + read = self.read + dispatch = self.dispatch + # 扫描所有的opcodes + opcode = self.read(1) + while opcode: + if opcode == b'c': + self.seek(-2,1) + codeN1 = self.read(1) + if 65<= ord(codeN1) <=90 or 97<= ord(codeN1) <=122 or ord(codeN1) == 0: + self.read(1) + else: + self.read(1) + module = self.readline()[:-1] + name = self.readline()[:-1] + self.find_class(module, name) + elif opcode in self.unsafe_opcodes: + self.ReduceCount += 1 + opcode = self.read(1) + + unsafe_opcodes = { + b'r', # REDUCE - call a callable with arguments + b'R', # REDUCE - same as 'r', but for args tuple + } + + + def output(self): + if self.ReduceCount > 0 or len(self.maliciousModule) > 0: + print("The pickle file maybe contains malicious code") + print(f"The number of REDUCE opcodes is {self.ReduceCount}") + print("The malicious options are: ", self.maliciousModule) + else: + print("The pickle file is safe") + + + + +class test: + a = 1 + b = 2 + def __reduce__(self): + return (__import__("os").system,('calc',)) + + +data = pickle.dumps(test(),protocol=2) +print(data) +print(pickletools.dis(data)) +with open("test.pkl", "wb") as file: + file.write(data) + +with open("test.pkl", "rb") as file: + pickscan = pickleScanner(file) + pickscan.load() +pickscan.output() \ No newline at end of file diff --git a/detection/test.pkl b/detection/test.pkl new file mode 100644 index 0000000000000000000000000000000000000000..30c49be8d5204a05722eb8901cdb2eb5afb0575f GIT binary patch literal 33 ocmZo*O3o|cDy}RpNzLUdWQbs4U|>j2%t Date: Tue, 14 May 2024 21:31:31 +0800 Subject: [PATCH 2/9] =?UTF-8?q?update:=E5=AE=8C=E5=96=84=E8=B0=83=E7=94=A8?= =?UTF-8?q?=E6=96=B9=E5=BC=8F=EF=BC=8C=E5=88=A0=E9=99=A4=E5=A4=9A=E4=BD=99?= =?UTF-8?q?=E4=BB=A3=E7=A0=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- detection/backdoor_detection.py | 7 +++++ detection/pickle_detection.py | 48 +++++++++++++++------------------ 2 files changed, 28 insertions(+), 27 deletions(-) diff --git a/detection/backdoor_detection.py b/detection/backdoor_detection.py index ef3e32b..e50963c 100644 --- a/detection/backdoor_detection.py +++ b/detection/backdoor_detection.py @@ -3,6 +3,8 @@ from typing import Dict, List, Tuple from reportlab.lib.pagesizes import letter from reportlab.lib.styles import getSampleStyleSheet from reportlab.platypus import Paragraph, Spacer, SimpleDocTemplate + +from detection.pickle_detection import pickleDataDetection from .Regexdetection import find_dangerous_functions from .GPTdetection import detectGPT from .utils import * @@ -176,6 +178,9 @@ def main(): parser.add_argument( "-m", "--mode", help="Mode of operation:[regex,llm]", default="regex" ) + parser.add_argument( + "-p","--pickle",help="analyze the pickle file",default=None + ) args = parser.parse_args() output_format = "txt" # Default output format output_file = None @@ -191,6 +196,8 @@ def main(): ) output_file = args.output.rsplit(".", 1)[0] + ".txt" # 如果未指定输出文件,则输出到 stdout;否则写入文件 + if args.pickle: + pickleDataDetection(args.pickle, output_file) process_path(args.path, output_format, args.mode, output_file) diff --git a/detection/pickle_detection.py b/detection/pickle_detection.py index b833c1c..4730f79 100644 --- a/detection/pickle_detection.py +++ b/detection/pickle_detection.py @@ -1,4 +1,5 @@ import io +import json import os import pickletools import pickle @@ -87,10 +88,9 @@ class pickleScanner(): self.file = file def find_class(self, module, name): - print(module, name) if module.decode() in dangerous_modules or name.decode() in dangerous_names: # self.maliciousCount += 1 - self.maliciousModule.append((module, name)) + self.maliciousModule.append((module.decode(), name.decode())) def load(self): self._unframer = _Unframer(self._file_read, self._file_readline) @@ -128,30 +128,24 @@ class pickleScanner(): def output(self): - if self.ReduceCount > 0 or len(self.maliciousModule) > 0: - print("The pickle file maybe contains malicious code") - print(f"The number of REDUCE opcodes is {self.ReduceCount}") - print("The malicious options are: ", self.maliciousModule) - else: - print("The pickle file is safe") + return { + "ReduceCount": self.ReduceCount, + "maliciousModule": self.maliciousModule + } +def pickleDataDetection(file,output_file=None): + ''' + :param file: pickle file path + ''' + with open(file, "rb") as file: + pickscan = pickleScanner(file) + pickscan.load() + res = pickscan.output() + if output_file: + with open(output_file, "w") as file: + file.writelines(json.dumps(res)) + else: + print(json.dumps(res)) - - -class test: - a = 1 - b = 2 - def __reduce__(self): - return (__import__("os").system,('calc',)) - - -data = pickle.dumps(test(),protocol=2) -print(data) -print(pickletools.dis(data)) -with open("test.pkl", "wb") as file: - file.write(data) - -with open("test.pkl", "rb") as file: - pickscan = pickleScanner(file) - pickscan.load() -pickscan.output() \ No newline at end of file +if __name__ == '__main__': + pickleDataDetection("test.pkl") \ No newline at end of file -- 2.47.2 From d073cfad31dbca927d1b5d79dc1a123d7e0dd7c0 Mon Sep 17 00:00:00 2001 From: tritium0041 Date: Tue, 14 May 2024 21:33:08 +0800 Subject: [PATCH 3/9] =?UTF-8?q?del=EF=BC=9A=E5=88=A0=E9=99=A4=E6=97=A0?= =?UTF-8?q?=E7=94=A8=E5=BA=93?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- detection/pickle_detection.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/detection/pickle_detection.py b/detection/pickle_detection.py index 4730f79..808d06e 100644 --- a/detection/pickle_detection.py +++ b/detection/pickle_detection.py @@ -1,9 +1,6 @@ import io import json -import os -import pickletools import pickle -import nt class _Unframer: -- 2.47.2 From db3244f55a03079ef7e4af1a23b4417dcbbd057e Mon Sep 17 00:00:00 2001 From: tritium0041 Date: Tue, 14 May 2024 21:34:54 +0800 Subject: [PATCH 4/9] =?UTF-8?q?fix=EF=BC=9A=E9=80=BB=E8=BE=91=E5=B0=8F?= =?UTF-8?q?=E9=94=99=E8=AF=AF?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- detection/backdoor_detection.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/detection/backdoor_detection.py b/detection/backdoor_detection.py index e50963c..e0c6398 100644 --- a/detection/backdoor_detection.py +++ b/detection/backdoor_detection.py @@ -198,7 +198,8 @@ def main(): # 如果未指定输出文件,则输出到 stdout;否则写入文件 if args.pickle: pickleDataDetection(args.pickle, output_file) - process_path(args.path, output_format, args.mode, output_file) + else: + process_path(args.path, output_format, args.mode, output_file) if __name__ == "__main__": -- 2.47.2 From 97fbf649a8eb823397ebd6901f376710433a28e0 Mon Sep 17 00:00:00 2001 From: tritium0041 Date: Tue, 14 May 2024 21:37:16 +0800 Subject: [PATCH 5/9] =?UTF-8?q?del:=E5=88=A0=E9=99=A4=E6=B5=8B=E8=AF=95?= =?UTF-8?q?=E6=96=87=E4=BB=B6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- detection/test.pkl | Bin 33 -> 0 bytes 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 detection/test.pkl diff --git a/detection/test.pkl b/detection/test.pkl deleted file mode 100644 index 30c49be8d5204a05722eb8901cdb2eb5afb0575f..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 33 ocmZo*O3o|cDy}RpNzLUdWQbs4U|>j2%t Date: Wed, 15 May 2024 19:10:35 +0800 Subject: [PATCH 6/9] style: format code style --- detection/backdoor_detection.py | 4 +- detection/pickle_detection.py | 72 +++++++++++++++++++-------------- 2 files changed, 42 insertions(+), 34 deletions(-) diff --git a/detection/backdoor_detection.py b/detection/backdoor_detection.py index e0c6398..b4786d6 100644 --- a/detection/backdoor_detection.py +++ b/detection/backdoor_detection.py @@ -178,9 +178,7 @@ def main(): parser.add_argument( "-m", "--mode", help="Mode of operation:[regex,llm]", default="regex" ) - parser.add_argument( - "-p","--pickle",help="analyze the pickle file",default=None - ) + parser.add_argument("-p", "--pickle", help="analyze the pickle file", default=None) args = parser.parse_args() output_format = "txt" # Default output format output_file = None diff --git a/detection/pickle_detection.py b/detection/pickle_detection.py index 808d06e..b94dd91 100644 --- a/detection/pickle_detection.py +++ b/detection/pickle_detection.py @@ -2,6 +2,7 @@ import io import json import pickle + class _Unframer: def __init__(self, file_read, file_readline, file_tell=None): @@ -18,8 +19,7 @@ class _Unframer: buf[:] = self.file_read(n) return n if n < len(buf): - raise pickle.UnpicklingError( - "pickle exhausted before end of frame") + raise pickle.UnpicklingError("pickle exhausted before end of frame") return n else: n = len(buf) @@ -33,8 +33,7 @@ class _Unframer: self.current_frame = None return self.file_read(n) if len(data) < n: - raise pickle.UnpicklingError( - "pickle exhausted before end of frame") + raise pickle.UnpicklingError("pickle exhausted before end of frame") return data else: return self.file_read(n) @@ -45,35 +44,40 @@ class _Unframer: if not data: self.current_frame = None return self.file_readline() - if data[-1] != b'\n'[0]: - raise pickle.UnpicklingError( - "pickle exhausted before end of frame") + if data[-1] != b"\n"[0]: + raise pickle.UnpicklingError("pickle exhausted before end of frame") return data else: return self.file_readline() def load_frame(self, frame_size): - if self.current_frame and self.current_frame.read() != b'': + if self.current_frame and self.current_frame.read() != b"": raise pickle.UnpicklingError( - "beginning of a new frame before end of current frame") + "beginning of a new frame before end of current frame" + ) self.current_frame = io.BytesIO(self.file_read(frame_size)) +dangerous_modules = ["os", "subprocess", "builtins", "nt"] +dangerous_names = [ + "system", + "popen", + "run", + "call", + "check_output", + "check_call", +] - - -dangerous_modules = ["os", "subprocess","builtins","nt"] -dangerous_names = ["system", "popen", "run", "call", "check_output", "check_call",] - -class pickleScanner(): +class pickleScanner: ReduceCount = 0 maliciousModule = [] dispatch = {} - def __init__(self, file, *, fix_imports=True, - encoding="ASCII", errors="strict", buffers=None): + def __init__( + self, file, *, fix_imports=True, encoding="ASCII", errors="strict", buffers=None + ): self._buffers = iter(buffers) if buffers is not None else None self._file_readline = file.readline self._file_read = file.read @@ -104,10 +108,14 @@ class pickleScanner(): # 扫描所有的opcodes opcode = self.read(1) while opcode: - if opcode == b'c': - self.seek(-2,1) + if opcode == b"c": + self.seek(-2, 1) codeN1 = self.read(1) - if 65<= ord(codeN1) <=90 or 97<= ord(codeN1) <=122 or ord(codeN1) == 0: + if ( + 65 <= ord(codeN1) <= 90 + or 97 <= ord(codeN1) <= 122 + or ord(codeN1) == 0 + ): self.read(1) else: self.read(1) @@ -119,21 +127,21 @@ class pickleScanner(): opcode = self.read(1) unsafe_opcodes = { - b'r', # REDUCE - call a callable with arguments - b'R', # REDUCE - same as 'r', but for args tuple + b"r", # REDUCE - call a callable with arguments + b"R", # REDUCE - same as 'r', but for args tuple } - def output(self): return { - "ReduceCount": self.ReduceCount, - "maliciousModule": self.maliciousModule - } + "ReduceCount": self.ReduceCount, + "maliciousModule": self.maliciousModule, + } -def pickleDataDetection(file,output_file=None): - ''' + +def pickleDataDetection(file, output_file=None): + """ :param file: pickle file path - ''' + """ with open(file, "rb") as file: pickscan = pickleScanner(file) pickscan.load() @@ -144,5 +152,7 @@ def pickleDataDetection(file,output_file=None): else: print(json.dumps(res)) -if __name__ == '__main__': - pickleDataDetection("test.pkl") \ No newline at end of file + +if __name__ == "__main__": + pickleDataDetection("test.pkl") + -- 2.47.2 From fab5e680ef1f2ac31de4b425e95e2e522333ea4d Mon Sep 17 00:00:00 2001 From: sangge-redmi <2251250136@qq.com> Date: Mon, 27 May 2024 17:08:12 +0800 Subject: [PATCH 7/9] style: format code --- detection/pickle_detection.py | 1 - 1 file changed, 1 deletion(-) diff --git a/detection/pickle_detection.py b/detection/pickle_detection.py index b94dd91..7016641 100644 --- a/detection/pickle_detection.py +++ b/detection/pickle_detection.py @@ -155,4 +155,3 @@ def pickleDataDetection(file, output_file=None): if __name__ == "__main__": pickleDataDetection("test.pkl") - -- 2.47.2 From accd50e8ce0c4aeb9629f1e7c00fa61d6e6c0480 Mon Sep 17 00:00:00 2001 From: sangge-redmi <2251250136@qq.com> Date: Mon, 27 May 2024 20:35:13 +0800 Subject: [PATCH 8/9] fix: fix some error --- detection/pickle_detection.py | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/detection/pickle_detection.py b/detection/pickle_detection.py index 7016641..cfbd258 100644 --- a/detection/pickle_detection.py +++ b/detection/pickle_detection.py @@ -71,10 +71,6 @@ dangerous_names = [ class pickleScanner: - ReduceCount = 0 - maliciousModule = [] - dispatch = {} - def __init__( self, file, *, fix_imports=True, encoding="ASCII", errors="strict", buffers=None ): @@ -87,6 +83,8 @@ class pickleScanner: self.proto = 0 self.fix_imports = fix_imports self.file = file + self.ReduceCount = 0 + self.maliciousModule = [] def find_class(self, module, name): if module.decode() in dangerous_modules or name.decode() in dangerous_names: @@ -103,8 +101,6 @@ class pickleScanner: self.stack = [] self.append = self.stack.append self.proto = 0 - read = self.read - dispatch = self.dispatch # 扫描所有的opcodes opcode = self.read(1) while opcode: @@ -131,24 +127,24 @@ class pickleScanner: b"R", # REDUCE - same as 'r', but for args tuple } - def output(self): + def output(self) -> dict: return { "ReduceCount": self.ReduceCount, "maliciousModule": self.maliciousModule, } -def pickleDataDetection(file, output_file=None): +def pickleDataDetection(filename: str, output_file=None): """ :param file: pickle file path """ - with open(file, "rb") as file: + with open(filename, "rb") as file: pickscan = pickleScanner(file) pickscan.load() res = pickscan.output() if output_file: with open(output_file, "w") as file: - file.writelines(json.dumps(res)) + json.dump(res, file, indent=4) else: print(json.dumps(res)) -- 2.47.2 From b518fef6d25aa4f632bc31df1c5a5eac79c6e93a Mon Sep 17 00:00:00 2001 From: sangge-redmi <2251250136@qq.com> Date: Mon, 27 May 2024 20:36:18 +0800 Subject: [PATCH 9/9] test: add pickle unittest --- tests/test_pickle_detection.py | 56 ++++++++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) create mode 100644 tests/test_pickle_detection.py diff --git a/tests/test_pickle_detection.py b/tests/test_pickle_detection.py new file mode 100644 index 0000000..34521e1 --- /dev/null +++ b/tests/test_pickle_detection.py @@ -0,0 +1,56 @@ +import unittest +import pickle +import tempfile +from detection.pickle_detection import pickleScanner, pickleDataDetection +from unittest.mock import patch + + +class TestPickleScanner(unittest.TestCase): + + def setUp(self): + # Create temporary files with valid and malicious data + self.valid_data = {"key": "value"} + self.malicious_data = b"\x80\x03csubprocess\ncheck_output\nq\x00X\x05\x00\x00\x00echo 1q\x01\x85q\x02Rq\x03." + + self.valid_file = tempfile.NamedTemporaryFile(delete=False) + self.valid_file.write(pickle.dumps(self.valid_data)) + self.valid_file.close() + + self.malicious_file = tempfile.NamedTemporaryFile(delete=False) + self.malicious_file.write(self.malicious_data) + self.malicious_file.close() + + def tearDown(self): + # Clean up temporary files + import os + + os.remove(self.valid_file.name) + os.remove(self.malicious_file.name) + + def test_valid_pickle(self): + with open(self.valid_file.name, "rb") as file: + scanner = pickleScanner(file) + print(scanner.maliciousModule) + scanner.load() + output = scanner.output() + self.assertEqual(output["ReduceCount"], 0) + self.assertEqual(output["maliciousModule"], []) + + def test_malicious_pickle(self): + with open(self.malicious_file.name, "rb") as file: + scanner = pickleScanner(file) + scanner.load() + output = scanner.output() + self.assertEqual(output["ReduceCount"], 1) + self.assertIn(("subprocess", "check_output"), output["maliciousModule"]) + + @patch("builtins.print") + def test_pickleDataDetection_no_output_file(self, mock_print): + # test output to stdout if filename is not given + with patch("builtins.print") as mock_print: + pickleDataDetection(self.valid_file.name) + mock_print.assert_called_once() + + +if __name__ == "__main__": + unittest.main() -- 2.47.2