feature/pickle-data #20

Merged
sangge merged 10 commits from feature/pickle-data into main 2024-06-03 20:31:35 +08:00
2 changed files with 42 additions and 34 deletions
Showing only changes of commit 79a605a6b4 - Show all commits

View File

@ -178,9 +178,7 @@ def main():
parser.add_argument( parser.add_argument(
"-m", "--mode", help="Mode of operation:[regex,llm]", default="regex" "-m", "--mode", help="Mode of operation:[regex,llm]", default="regex"
) )
parser.add_argument( parser.add_argument("-p", "--pickle", help="analyze the pickle file", default=None)
"-p","--pickle",help="analyze the pickle file",default=None
)
args = parser.parse_args() args = parser.parse_args()
output_format = "txt" # Default output format output_format = "txt" # Default output format
output_file = None output_file = None

View File

@ -2,6 +2,7 @@ import io
import json import json
import pickle import pickle
class _Unframer: class _Unframer:
def __init__(self, file_read, file_readline, file_tell=None): def __init__(self, file_read, file_readline, file_tell=None):
@ -18,8 +19,7 @@ class _Unframer:
buf[:] = self.file_read(n) buf[:] = self.file_read(n)
return n return n
if n < len(buf): if n < len(buf):
raise pickle.UnpicklingError( raise pickle.UnpicklingError("pickle exhausted before end of frame")
"pickle exhausted before end of frame")
return n return n
else: else:
n = len(buf) n = len(buf)
@ -33,8 +33,7 @@ class _Unframer:
self.current_frame = None self.current_frame = None
return self.file_read(n) return self.file_read(n)
if len(data) < n: if len(data) < n:
raise pickle.UnpicklingError( raise pickle.UnpicklingError("pickle exhausted before end of frame")
"pickle exhausted before end of frame")
return data return data
else: else:
return self.file_read(n) return self.file_read(n)
@ -45,35 +44,40 @@ class _Unframer:
if not data: if not data:
self.current_frame = None self.current_frame = None
return self.file_readline() return self.file_readline()
if data[-1] != b'\n'[0]: if data[-1] != b"\n"[0]:
raise pickle.UnpicklingError( raise pickle.UnpicklingError("pickle exhausted before end of frame")
"pickle exhausted before end of frame")
return data return data
else: else:
return self.file_readline() return self.file_readline()
def load_frame(self, frame_size): def load_frame(self, frame_size):
if self.current_frame and self.current_frame.read() != b'': if self.current_frame and self.current_frame.read() != b"":
raise pickle.UnpicklingError( raise pickle.UnpicklingError(
"beginning of a new frame before end of current frame") "beginning of a new frame before end of current frame"
)
self.current_frame = io.BytesIO(self.file_read(frame_size)) self.current_frame = io.BytesIO(self.file_read(frame_size))
dangerous_modules = ["os", "subprocess", "builtins", "nt"] dangerous_modules = ["os", "subprocess", "builtins", "nt"]
dangerous_names = ["system", "popen", "run", "call", "check_output", "check_call",] dangerous_names = [
"system",
"popen",
"run",
"call",
"check_output",
"check_call",
]
class pickleScanner():
class pickleScanner:
ReduceCount = 0 ReduceCount = 0
maliciousModule = [] maliciousModule = []
dispatch = {} dispatch = {}
def __init__(self, file, *, fix_imports=True, def __init__(
encoding="ASCII", errors="strict", buffers=None): self, file, *, fix_imports=True, encoding="ASCII", errors="strict", buffers=None
):
self._buffers = iter(buffers) if buffers is not None else None self._buffers = iter(buffers) if buffers is not None else None
self._file_readline = file.readline self._file_readline = file.readline
self._file_read = file.read self._file_read = file.read
@ -104,10 +108,14 @@ class pickleScanner():
# 扫描所有的opcodes # 扫描所有的opcodes
opcode = self.read(1) opcode = self.read(1)
while opcode: while opcode:
if opcode == b'c': if opcode == b"c":
self.seek(-2, 1) self.seek(-2, 1)
codeN1 = self.read(1) codeN1 = self.read(1)
if 65<= ord(codeN1) <=90 or 97<= ord(codeN1) <=122 or ord(codeN1) == 0: if (
65 <= ord(codeN1) <= 90
or 97 <= ord(codeN1) <= 122
or ord(codeN1) == 0
):
self.read(1) self.read(1)
else: else:
self.read(1) self.read(1)
@ -119,21 +127,21 @@ class pickleScanner():
opcode = self.read(1) opcode = self.read(1)
unsafe_opcodes = { unsafe_opcodes = {
b'r', # REDUCE - call a callable with arguments b"r", # REDUCE - call a callable with arguments
b'R', # REDUCE - same as 'r', but for args tuple b"R", # REDUCE - same as 'r', but for args tuple
} }
def output(self): def output(self):
return { return {
"ReduceCount": self.ReduceCount, "ReduceCount": self.ReduceCount,
"maliciousModule": self.maliciousModule "maliciousModule": self.maliciousModule,
} }
def pickleDataDetection(file, output_file=None): def pickleDataDetection(file, output_file=None):
''' """
:param file: pickle file path :param file: pickle file path
''' """
with open(file, "rb") as file: with open(file, "rb") as file:
pickscan = pickleScanner(file) pickscan = pickleScanner(file)
pickscan.load() pickscan.load()
@ -144,5 +152,7 @@ def pickleDataDetection(file,output_file=None):
else: else:
print(json.dumps(res)) print(json.dumps(res))
if __name__ == '__main__':
if __name__ == "__main__":
pickleDataDetection("test.pkl") pickleDataDetection("test.pkl")