diff options
Diffstat (limited to 'pdfrw/objects/pdfstring.py')
-rw-r--r-- | pdfrw/objects/pdfstring.py | 31 |
1 files changed, 16 insertions, 15 deletions
diff --git a/pdfrw/objects/pdfstring.py b/pdfrw/objects/pdfstring.py index 7a7d1e4..5c35d70 100644 --- a/pdfrw/objects/pdfstring.py +++ b/pdfrw/objects/pdfstring.py @@ -1,9 +1,10 @@ -# A part of pdfrw (pdfrw.googlecode.com) -# Copyright (C) 2006-2012 Patrick Maupin, Austin, Texas +# A part of pdfrw (https://github.com/pmaupin/pdfrw) +# Copyright (C) 2006-2015 Patrick Maupin, Austin, Texas # MIT license -- See LICENSE.txt for details import re + class PdfString(str): ''' A PdfString is an encoded string. It has a decode method to get the actual string data out, and there @@ -12,18 +13,20 @@ class PdfString(str): defaults to being a direct object. ''' indirect = False - unescape_dict = {'\\b':'\b', '\\f':'\f', '\\n':'\n', - '\\r':'\r', '\\t':'\t', - '\\\r\n': '', '\\\r':'', '\\\n':'', - '\\\\':'\\', '\\':'', - } - unescape_pattern = r'(\\\\|\\b|\\f|\\n|\\r|\\t|\\\r\n|\\\r|\\\n|\\[0-9]+|\\)' + unescape_dict = {'\\b': '\b', '\\f': '\f', '\\n': '\n', + '\\r': '\r', '\\t': '\t', + '\\\r\n': '', '\\\r': '', '\\\n': '', + '\\\\': '\\', '\\': '', + } + unescape_pattern = (r'(\\\\|\\b|\\f|\\n|\\r|\\t' + r'|\\\r\n|\\\r|\\\n|\\[0-9]+|\\)') unescape_func = re.compile(unescape_pattern).split hex_pattern = '([a-fA-F0-9][a-fA-F0-9]|[a-fA-F0-9])' hex_func = re.compile(hex_pattern).split - hex_pattern2 = '([a-fA-F0-9][a-fA-F0-9][a-fA-F0-9][a-fA-F0-9]|[a-fA-F0-9][a-fA-F0-9]|[a-fA-F0-9])' + hex_pattern2 = ('([a-fA-F0-9][a-fA-F0-9][a-fA-F0-9][a-fA-F0-9]|' + '[a-fA-F0-9][a-fA-F0-9]|[a-fA-F0-9])') hex_func2 = re.compile(hex_pattern2).split hex_funcs = hex_func, hex_func2 @@ -50,7 +53,9 @@ class PdfString(str): data = self.hex_funcs[twobytes](data) chars = data[1::2] other = data[0::2] - assert other[0] == '<' and other[-1] == '>' and ''.join(other) == '<>', self + assert (other[0] == '<' and + other[-1] == '>' and + ''.join(other) == '<>'), self return ''.join([remap(int(x, 16)) for x in chars]) def decode(self, remap=chr, twobytes=False): @@ -62,12 +67,8 @@ class PdfString(str): def encode(cls, source, usehex=False): assert not usehex, "Not supported yet" - if isinstance(source, unicode): - source = source.encode('utf-8') - else: - source = str(source) source = source.replace('\\', '\\\\') source = source.replace('(', '\\(') source = source.replace(')', '\\)') - return cls('(' +source + ')') + return cls('(' + source + ')') encode = classmethod(encode) |