From 5a59c748c3ee47c95d23c8a7c6f5d6ae2f1b182e Mon Sep 17 00:00:00 2001 From: hincheung Date: Thu, 6 Aug 2020 16:22:15 +0800 Subject: [PATCH 1/2] Recover .py source codes --- .gitignore | 1 + crash_test.py | 7 +- pydump.py | 137 +++++++++++++++-------- run_test.py | 32 ++++++ tests/__init__.py | 0 tests/folderA/__init__.py | 0 tests/folderA/folderB/__init__.py | 0 tests/folderA/folderB/raise_exception.py | 2 + tests/folderA/multiple_layout_test.py | 5 + tests/pyc_source/__init__.py | 0 tests/pyc_source/pyc_error.py | 2 + tests/pyc_test.py | 64 +++++++++++ 12 files changed, 200 insertions(+), 50 deletions(-) create mode 100644 run_test.py create mode 100644 tests/__init__.py create mode 100644 tests/folderA/__init__.py create mode 100644 tests/folderA/folderB/__init__.py create mode 100644 tests/folderA/folderB/raise_exception.py create mode 100644 tests/folderA/multiple_layout_test.py create mode 100644 tests/pyc_source/__init__.py create mode 100644 tests/pyc_source/pyc_error.py create mode 100644 tests/pyc_test.py diff --git a/.gitignore b/.gitignore index 023368a..fb9e7e2 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,6 @@ *.py[cod] *.dump +tests/pyc_error.py # C extensions *.so diff --git a/crash_test.py b/crash_test.py index d7cc3a5..ffb28d6 100644 --- a/crash_test.py +++ b/crash_test.py @@ -1,12 +1,13 @@ if __name__ == '__main__': + def foo(): foovar = 7 bar() def bar(): barvar = "hello" - list_sample = [1,2,3,4] - dict_sample = {'a':1, 'b':2} + list_sample = [1, 2, 3, 4] + dict_sample = {'a': 1, 'b': 2} baz() def baz(): @@ -22,7 +23,7 @@ def raiser(self): try: foo() - except: + except Exception: import pydump filename = __file__ + '.dump' print("Exception caught, writing %s" % filename) diff --git a/pydump.py b/pydump.py index e1418bc..7ed41b8 100644 --- a/pydump.py +++ b/pydump.py @@ -26,12 +26,12 @@ import pdb import gzip import linecache + try: import cPickle as pickle except ImportError: import pickle - PY2 = (sys.version_info.major == 2) if PY2: @@ -44,9 +44,9 @@ except ImportError: dill = None - __version__ = "1.2.0" DUMP_VERSION = 1 +PYC_FILE_MARKER = "__THIS_IS_PYC_FILE__" def save_dump(filename, tb=None): @@ -89,7 +89,7 @@ def load_dump(filename): try: with open(filename, "rb") as f: return dill.load(f) - except: + except Exception: pass # dill load failed, try pickle instead try: return pickle.load(f) @@ -98,15 +98,22 @@ def load_dump(filename): return pickle.load(f) -def debug_dump(dump_filename, post_mortem_func=pdb.post_mortem): +def debug_dump(dump_filename, + py_source_directory, + post_mortem_func=pdb.post_mortem): # monkey patching for pdb's longlist command - import inspect, types - inspect.isframe = lambda obj: isinstance(obj, types.FrameType) or obj.__class__.__name__ == "FakeFrame" - inspect.iscode = lambda obj: isinstance(obj, types.CodeType) or obj.__class__.__name__ == "FakeCode" - inspect.isclass = lambda obj: isinstance(obj, type) or obj.__class__.__name__ == "FakeClass" - inspect.istraceback = lambda obj: isinstance(obj, types.TracebackType) or obj.__class__.__name__ == "FakeTraceback" + import inspect + import types + inspect.isframe = lambda obj: isinstance( + obj, types.FrameType) or obj.__class__.__name__ == "FakeFrame" + inspect.iscode = lambda obj: isinstance( + obj, types.CodeType) or obj.__class__.__name__ == "FakeCode" + inspect.isclass = lambda obj: isinstance( + obj, type) or obj.__class__.__name__ == "FakeClass" + inspect.istraceback = lambda obj: isinstance( + obj, types.TracebackType) or obj.__class__.__name__ == "FakeTraceback" dump = load_dump(dump_filename) - _cache_files(dump["files"]) + _cache_files(dump["files"], py_source_directory) tb = dump["traceback"] _inject_builtins(tb) _old_checkcache = linecache.checkcache @@ -116,7 +123,6 @@ def debug_dump(dump_filename, post_mortem_func=pdb.post_mortem): class FakeClass(object): - def __init__(self, repr, vars): self.__repr = repr self.__dict__.update(vars) @@ -126,14 +132,13 @@ def __repr__(self): class FakeCode(object): - def __init__(self, code): self.co_filename = os.path.abspath(code.co_filename) self.co_name = code.co_name self.co_argcount = code.co_argcount self.co_consts = tuple( - FakeCode(c) if hasattr(c, "co_filename") else c for c in code.co_consts - ) + FakeCode(c) if hasattr(c, "co_filename") else c + for c in code.co_consts) self.co_firstlineno = code.co_firstlineno self.co_lnotab = code.co_lnotab self.co_varnames = code.co_varnames @@ -141,7 +146,6 @@ def __init__(self, code): class FakeFrame(object): - def __init__(self, frame): self.f_code = FakeCode(frame.f_code) self.f_locals = _convert_dict(frame.f_locals) @@ -152,13 +156,13 @@ def __init__(self, frame): if "self" in self.f_locals: self.f_locals["self"] = _convert_obj(frame.f_locals["self"]) - -class FakeTraceback(object): +class FakeTraceback(object): def __init__(self, traceback): self.tb_frame = FakeFrame(traceback.tb_frame) self.tb_lineno = traceback.tb_lineno - self.tb_next = FakeTraceback(traceback.tb_next) if traceback.tb_next else None + self.tb_next = FakeTraceback( + traceback.tb_next) if traceback.tb_next else None self.tb_lasti = 0 @@ -167,9 +171,8 @@ def _remove_builtins(fake_tb): while traceback: frame = traceback.tb_frame while frame: - frame.f_globals = dict( - (k, v) for k, v in frame.f_globals.items() if k not in dir(builtins) - ) + frame.f_globals = dict((k, v) for k, v in frame.f_globals.items() + if k not in dir(builtins)) frame = frame.f_back traceback = traceback.tb_next @@ -192,11 +195,17 @@ def _get_traceback_files(traceback): filename = os.path.abspath(frame.f_code.co_filename) if filename not in files: try: - files[filename] = open(filename).read() + with open(filename) as f: + files[filename] = f.read() except IOError: - files[ - filename - ] = "couldn't locate '%s' during dump" % frame.f_code.co_filename + root, _ = os.path.splitext(filename) + pyc_path = "".join((root, ".pyc")) + pyc_file_exists = os.path.exists(pyc_path) + if pyc_file_exists: + files[filename] = PYC_FILE_MARKER + else: + files[ + filename] = "couldn't locate '%s' during dump" % frame.f_code.co_filename frame = frame.f_back traceback = traceback.tb_next return files @@ -212,7 +221,7 @@ def _safe_repr(v): def _convert_obj(obj): try: return FakeClass(_safe_repr(obj), _convert_dict(obj.__dict__)) - except: + except Exception: return _convert(obj) @@ -229,51 +238,83 @@ def _convert(v): try: dill.dumps(v) return v - except: + except Exception: return _safe_repr(v) else: from datetime import date, time, datetime, timedelta - + if PY2: - BUILTIN = (str, unicode, int, long, float, date, time, datetime, timedelta) + BUILTIN = (str, unicode, int, long, float, date, time, datetime, + timedelta) else: BUILTIN = (str, int, float, date, time, datetime, timedelta) # XXX: what about bytes and bytearray? - + if v is None: return v - + if type(v) in BUILTIN: return v - + if type(v) is tuple: return tuple(_convert_seq(v)) - + if type(v) is list: return list(_convert_seq(v)) - + if type(v) is set: return set(_convert_seq(v)) - + if type(v) is dict: return _convert_dict(v) - + return _safe_repr(v) - -def _cache_files(files): - for name, data in files.items(): - lines = [line + "\n" for line in data.splitlines()] - linecache.cache[name] = (len(data), None, lines, name) + +def _get_expect_file_paths(py_source_directory, name): + expect_file_paths = [] + os.path.join(py_source_directory, name) + dir_parts = name.split(os.path.sep) + for index in range(len(dir_parts)): + expect_file_path = os.path.join(py_source_directory, + *dir_parts[index:]) + expect_file_paths.append(expect_file_path) + + return expect_file_paths + + +def _find_py_file_path(name, py_source_directory): + expect_file_paths = _get_expect_file_paths(py_source_directory, name) + for expect_file_path in expect_file_paths: + if os.path.exists(expect_file_path): + return expect_file_path + raise Exception("Cannot recover pyc files") + + +def _recover_py_source_codes(name, source, py_source_directory): + if source == PYC_FILE_MARKER: + py_file_path = _find_py_file_path(name, py_source_directory) + with open(py_file_path) as f: + data = f.read() + return data + + return source + + +def _cache_files(files, py_source_directory): + for name, source in files.items(): + source_codes = \ + _recover_py_source_codes(name, source, py_source_directory) + lines = [line + "\n" for line in source_codes.splitlines()] + linecache.cache[name] = (len(source_codes), None, lines, name) def main(): import argparse parser = argparse.ArgumentParser( - description="%s v%s: post-mortem debugging for Python programs" - % (sys.executable, __version__) - ) + description="%s v%s: post-mortem debugging for Python programs" % + (sys.executable, __version__)) debugger_group = parser.add_mutually_exclusive_group(required=False) debugger_group.add_argument( "--pdb", @@ -297,15 +338,17 @@ def main(): help="Use ipdb IPython debugger", ) parser.add_argument("filename", help="dumped file") + parser.add_argument("--directory", + dest="py_source_directory", + default=".", + help="Py source directory") args = parser.parse_args() if not args.debugger: args.debugger = "pdb" print("Starting %s..." % args.debugger, file=sys.stderr) dbg = __import__(args.debugger) - return debug_dump( - args.filename, dbg.post_mortem - ) + return debug_dump(args.filename, args.py_source_directory, dbg.post_mortem) if __name__ == "__main__": diff --git a/run_test.py b/run_test.py new file mode 100644 index 0000000..f1ee61d --- /dev/null +++ b/run_test.py @@ -0,0 +1,32 @@ +import pydump +from tests.folderA.multiple_layout_test import multiple_layout_test +from tests.pyc_test import PycTest + + +def save_pydump(func, dump_filename, extend_command=""): + try: + func() + except Exception: + filename = dump_filename + ".dump" + print("Exception caught, writing {}".format(filename)) + pydump.save_dump(filename) + print("Run 'python -m pydump {0}{1}' to debug".format( + filename, extend_command)) + + +def run_pyc_test(): + pyc_test = PycTest() + pyc_test.init_pyc_test() + py_source_path = pyc_test.get_py_source_path() + extend_command = " --directory {}".format(py_source_path) + save_pydump(pyc_test.run_pyc_file, "pyc_test", extend_command) + pyc_test.remove_dst_pyc_file() + + +def run_multiple_layout_test(): + save_pydump(multiple_layout_test, "multiple_layout") + + +if __name__ == "__main__": + run_pyc_test() + run_multiple_layout_test() diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/folderA/__init__.py b/tests/folderA/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/folderA/folderB/__init__.py b/tests/folderA/folderB/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/folderA/folderB/raise_exception.py b/tests/folderA/folderB/raise_exception.py new file mode 100644 index 0000000..aff875f --- /dev/null +++ b/tests/folderA/folderB/raise_exception.py @@ -0,0 +1,2 @@ +def error(): + raise Exception("Multiple layout test error") diff --git a/tests/folderA/multiple_layout_test.py b/tests/folderA/multiple_layout_test.py new file mode 100644 index 0000000..48bbfcd --- /dev/null +++ b/tests/folderA/multiple_layout_test.py @@ -0,0 +1,5 @@ +from tests.folderA.folderB.raise_exception import error + + +def multiple_layout_test(): + error() diff --git a/tests/pyc_source/__init__.py b/tests/pyc_source/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/pyc_source/pyc_error.py b/tests/pyc_source/pyc_error.py new file mode 100644 index 0000000..efac029 --- /dev/null +++ b/tests/pyc_source/pyc_error.py @@ -0,0 +1,2 @@ +def error(): + raise Exception("Pyc exception") diff --git a/tests/pyc_test.py b/tests/pyc_test.py new file mode 100644 index 0000000..ca01b5e --- /dev/null +++ b/tests/pyc_test.py @@ -0,0 +1,64 @@ +import os +import sys +from shutil import copyfile + + +class PycTest: + def __init__(self): + self.workspace = os.path.dirname(os.path.abspath(__file__)) + self.base_pyc_filename = "pyc_error{}.pyc" + self.py_filename = "pyc_error.py" + + def _get_src_and_dst(self, filename): + src = os.path.join(self.workspace, "pyc_source", filename) + dst = os.path.join(self.workspace, filename) + return src, dst + + def _init_py_file(self): + src, dst = self._get_src_and_dst(self.py_filename) + copyfile(src, dst) + + def _get_pycache_filename(self): + major = sys.version_info.major + minor = sys.version_info.minor + pycache_filename = self.base_pyc_filename.format( + ".cpython-{0}{1}".format(major, minor)) + return pycache_filename + + def _get_test_pyc_filename(self): + return self.base_pyc_filename.format("") + + def _create_pycache_file(self): + from tests import pyc_error + python_version = sys.version_info.major + if python_version == 3: + pycache_filename = self._get_pycache_filename() + pycache_file_path = os.path.join(self.workspace, "__pycache__", + pycache_filename) + pyc_filename = self._get_test_pyc_filename() + src, pyc_file_path = self._get_src_and_dst(pyc_filename) + copyfile(pycache_file_path, pyc_file_path) + + def _remove_dst_py_file(self): + _, dst = self._get_src_and_dst(self.py_filename) + return os.remove(dst) + + def init_pyc_test(self): + self._init_py_file() + self._create_pycache_file() + self._remove_dst_py_file() + + def get_py_source_path(self): + pyc_filename = self._get_test_pyc_filename() + src, _ = self._get_src_and_dst(pyc_filename) + return os.path.dirname(src) + + @staticmethod + def run_pyc_file(): + from tests.pyc_error import error + error() + + def remove_dst_pyc_file(self): + pyc_filename = self._get_test_pyc_filename() + _, dst = self._get_src_and_dst(pyc_filename) + return os.remove(dst) From bf0af6dede6c569ae180626e096fc4d41df15036 Mon Sep 17 00:00:00 2001 From: hincheung Date: Tue, 11 Jan 2022 22:50:40 +0800 Subject: [PATCH 2/2] Fix Windows path error --- pydump.py | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/pydump.py b/pydump.py index 7ed41b8..c445ed6 100644 --- a/pydump.py +++ b/pydump.py @@ -26,6 +26,7 @@ import pdb import gzip import linecache +from pathlib import PureWindowsPath try: import cPickle as pickle @@ -47,6 +48,8 @@ __version__ = "1.2.0" DUMP_VERSION = 1 PYC_FILE_MARKER = "__THIS_IS_PYC_FILE__" +PY_FILE_MARKER = "__THIS_IS_PY_FILE__" +UNKNOWN_FILE_MARKER = "__THIS_IS_UNKNOWN_FILE__" def save_dump(filename, tb=None): @@ -195,17 +198,17 @@ def _get_traceback_files(traceback): filename = os.path.abspath(frame.f_code.co_filename) if filename not in files: try: - with open(filename) as f: + with open(filename, encoding="utf-8") as f: files[filename] = f.read() except IOError: - root, _ = os.path.splitext(filename) - pyc_path = "".join((root, ".pyc")) - pyc_file_exists = os.path.exists(pyc_path) - if pyc_file_exists: + isEndWithPyc = filename.endswith(".pyc") + isEndWithPy = filename.endswith(".py") + if isEndWithPyc: files[filename] = PYC_FILE_MARKER + elif isEndWithPy: + files[filename] = PY_FILE_MARKER else: - files[ - filename] = "couldn't locate '%s' during dump" % frame.f_code.co_filename + files[filename] = UNKNOWN_FILE_MARKER frame = frame.f_back traceback = traceback.tb_next return files @@ -274,7 +277,8 @@ def _convert(v): def _get_expect_file_paths(py_source_directory, name): expect_file_paths = [] os.path.join(py_source_directory, name) - dir_parts = name.split(os.path.sep) + dir_parts = PureWindowsPath(name).parts + for index in range(len(dir_parts)): expect_file_path = os.path.join(py_source_directory, *dir_parts[index:]) @@ -292,9 +296,11 @@ def _find_py_file_path(name, py_source_directory): def _recover_py_source_codes(name, source, py_source_directory): - if source == PYC_FILE_MARKER: + expectSourceSet = {PYC_FILE_MARKER, PY_FILE_MARKER} + isExpectedSource = source in expectSourceSet + if isExpectedSource: py_file_path = _find_py_file_path(name, py_source_directory) - with open(py_file_path) as f: + with open(py_file_path, encoding="utf-8") as f: data = f.read() return data