diff options
author | Sophie Brun <sophie@offensive-security.com> | 2021-01-27 07:58:24 +0100 |
---|---|---|
committer | Raphaƫl Hertzog <raphael@offensive-security.com> | 2021-01-27 06:48:04 -0800 |
commit | c9aea7efe0c8223f4b6f9742bcfe95c67f151ec5 (patch) | |
tree | 6042d0667414448027a1cca2bf5eaf95d1813514 | |
parent | 8bc3335deae3e4ad41a23eed79007a654224653a (diff) |
Changes to lexer to better handle recursive structs of libtsk
Origin: https://github.com/py4n6/pytsk/commit/923dd422cd1152548db79192db2dff8324ab4d50
Bug: https://github.com/py4n6/pytsk/issues/71
Bug-Debian: https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=971149
sleuthkit (libtsk) version 4.8 breaks the generation of TSK_FS_INFO.
Gbp-Pq: Name change-lexer.patch
-rw-r--r-- | class_parser.py | 18 | ||||
-rw-r--r-- | lexer.py | 21 |
2 files changed, 22 insertions, 17 deletions
diff --git a/class_parser.py b/class_parser.py index db140f3..be994dc 100644 --- a/class_parser.py +++ b/class_parser.py @@ -235,7 +235,7 @@ import lexer DEBUG = 0 # The pytsk3 version. -VERSION = "20200117" +VERSION = "20210126" # These functions are used to manage library memory. FREE = "aff4_free" @@ -2022,7 +2022,7 @@ class Wrapper(Type): class PointerWrapper(Wrapper): - """ A pointer to a wrapped class """ + """A pointer to a wrapped class """ def __init__(self, name, type, *args, **kwargs): type = type.split()[0] @@ -2062,7 +2062,7 @@ class PointerWrapper(Wrapper): class StructWrapper(Wrapper): - """ A wrapper for struct classes """ + """A wrapper for struct classes """ active = False def __init__(self, name, type, *args, **kwargs): @@ -3316,7 +3316,7 @@ class ProxiedMethod(Method): class StructConstructor(ConstructorMethod): - """ A constructor for struct wrappers - basically just allocate + """A constructor for struct wrappers - basically just allocate memory for the struct. """ @@ -3414,7 +3414,7 @@ class ClassGenerator(object): return result def prepare(self): - """ This method is called just before we need to write the + """This method is called just before we need to write the output and allows us to do any last minute fixups. """ pass @@ -4147,6 +4147,7 @@ class HeaderParser(lexer.SelfFeederMixIn): # we cant handle them at all) ["(RECURSIVE_)?STRUCT", "(struct|union)\s+([_A-Za-z0-9]+)?\s*{", "PUSH_STATE", "RECURSIVE_STRUCT"], ["RECURSIVE_STRUCT", "}\s+[0-9A-Za-z]+", "POP_STATE", None], + ["RECURSIVE_STRUCT", "};", "POP_STATE", None], # Process enums (2 forms - named and typedefed) ["INITIAL", r"enum\s+([0-9A-Za-z_]+)\s+{", "PUSH_STATE,ENUM_START", "ENUM"], @@ -4168,10 +4169,13 @@ class HeaderParser(lexer.SelfFeederMixIn): ] - def __init__(self, name, verbose=1, base=""): + def __init__(self, name, verbose=0, base=""): + if DEBUG > 0: + verbose = 1 + self.module = Module(name) self.base = base - super(HeaderParser, self).__init__(verbose=0) + super(HeaderParser, self).__init__(verbose=verbose) file_object = io.BytesIO( b"// Base object\n" @@ -16,6 +16,7 @@ """A simple feed lexer.""" import re +import sys class Lexer(object): @@ -75,7 +76,7 @@ class Lexer(object): error = self.error, ) if self.verbose > 1: - print("Saving state {0:s}".format(self.processed)) + sys.stderr.write("Saving state {0:s}\n".format(self.processed)) def restore_state(self): state = self.saved_state @@ -92,7 +93,7 @@ class Lexer(object): self.error = state["error"] if self.verbose > 1: - print("Restoring state to offset {0:s}".format(self.processed)) + sys.stderr.write("Restoring state to offset {0:s}\n".format(self.processed)) def next_token(self, end=True): ## Now try to match any of the regexes in order: @@ -101,12 +102,12 @@ class Lexer(object): ## Does the rule apply for us now? if state.match(current_state): if self.verbose > 2: - print("{0:s}: Trying to match {1:s} with {2:s}".format( + sys.stderr.write("{0:s}: Trying to match {1:s} with {2:s}\n".format( self.state, repr(self.buffer[:10]), repr(re_str))) match = regex.match(self.buffer) if match: if self.verbose > 3: - print("{0:s} matched {1:s}".format( + sys.stderr.write("{0:s} matched {1:s}\n".format( re_str, match.group(0).encode("utf8"))) ## The match consumes the data off the buffer (the @@ -119,7 +120,7 @@ class Lexer(object): for t in token.split(","): try: if self.verbose > 0: - print("0x{0:X}: Calling {1:s} {2:s}".format( + sys.stderr.write("0x{0:X}: Calling {1:s} {2:s}\n".format( self.processed, t, repr(match.group(0)))) cb = getattr(self, t, self.default_handler) except AttributeError: @@ -165,18 +166,18 @@ class Lexer(object): def default_handler(self, token, match): if self.verbose > 2: - print("Default handler: {0:s} with {1:s}".format( + sys.stderr.write("Default handler: {0:s} with {1:s}\n".format( token, repr(match.group(0)))) def ERROR(self, message=None, weight=1): if self.verbose > 0 and message: - print("Error({0:s}): {1:s}".format(weight, message)) + sys.stderr.write("Error({0:d}): {1!s}\n".format(weight, message)) self.error += weight def PUSH_STATE(self, dummy_token=None, dummy_match=None): if self.verbose > 1: - print("Storing state {0:s}".format(self.state)) + sys.stderr.write("Storing state {0:s}\n".format(self.state)) self.state_stack.append(self.state) @@ -184,9 +185,9 @@ class Lexer(object): try: state = self.state_stack.pop() if self.verbose > 1: - print("Returned state to {0:s}".format(state)) + sys.stderr.write("Returned state to {0:s}\n".format(state)) except IndexError: - print("Tried to pop the state but failed - possible recursion error") + sys.stderr.write("Tried to pop the state but failed - possible recursion error\n") state = None return state |