pdfrw (0.1-3) unstable; urgency=medium

* QA upload. * Build using dh_python2 # imported from the archive
author: Matthias Klose <doko@debian.org> 2014-07-13 17:50:59 +0200
committer: Matthias Klose <doko@debian.org> 2014-07-13 17:50:59 +0200
commit: a1959ba9c0c9f3881c3e593e5aef1046750880f2 (patch)
tree: e4fc630e9e26b227d9a7e41db65d80f6158e8ae9 /pdfrw/objects/pdfstring.py
1 files changed, 73 insertions, 0 deletions
diff --git a/pdfrw/objects/pdfstring.py b/pdfrw/objects/pdfstring.py
new file mode 100644
index 0000000..7a7d1e4
--- /dev/null
+++ b/pdfrw/objects/pdfstring.py
@@ -0,0 +1,73 @@
+# A part of pdfrw (pdfrw.googlecode.com)
+# Copyright (C) 2006-2012 Patrick Maupin, Austin, Texas
+# MIT license -- See LICENSE.txt for details
+
+import re
+
+class PdfString(str):
+    ''' A PdfString is an encoded string.  It has a decode
+        method to get the actual string data out, and there
+        is an encode class method to create such a string.
+        Like any PDF object, it could be indirect, but it
+        defaults to being a direct object.
+    '''
+    indirect = False
+    unescape_dict = {'\\b':'\b', '\\f':'\f', '\\n':'\n',
+                     '\\r':'\r', '\\t':'\t',
+                     '\\\r\n': '', '\\\r':'', '\\\n':'',
+                     '\\\\':'\\', '\\':'',
+                    }
+    unescape_pattern = r'(\\\\|\\b|\\f|\\n|\\r|\\t|\\\r\n|\\\r|\\\n|\\[0-9]+|\\)'
+    unescape_func = re.compile(unescape_pattern).split
+
+    hex_pattern = '([a-fA-F0-9][a-fA-F0-9]|[a-fA-F0-9])'
+    hex_func = re.compile(hex_pattern).split
+
+    hex_pattern2 = '([a-fA-F0-9][a-fA-F0-9][a-fA-F0-9][a-fA-F0-9]|[a-fA-F0-9][a-fA-F0-9]|[a-fA-F0-9])'
+    hex_func2 = re.compile(hex_pattern2).split
+
+    hex_funcs = hex_func, hex_func2
+
+    def decode_regular(self, remap=chr):
+        assert self[0] == '(' and self[-1] == ')'
+        mylist = self.unescape_func(self[1:-1])
+        result = []
+        unescape = self.unescape_dict.get
+        for chunk in mylist:
+            chunk = unescape(chunk, chunk)
+            if chunk.startswith('\\') and len(chunk) > 1:
+                value = int(chunk[1:], 8)
+                # FIXME: TODO: Handle unicode here
+                if value > 127:
+                    value = 127
+                chunk = remap(value)
+            if chunk:
+                result.append(chunk)
+        return ''.join(result)
+
+    def decode_hex(self, remap=chr, twobytes=False):
+        data = ''.join(self.split())
+        data = self.hex_funcs[twobytes](data)
+        chars = data[1::2]
+        other = data[0::2]
+        assert other[0] == '<' and other[-1] == '>' and ''.join(other) == '<>', self
+        return ''.join([remap(int(x, 16)) for x in chars])
+
+    def decode(self, remap=chr, twobytes=False):
+        if self.startswith('('):
+            return self.decode_regular(remap)
+
+        else:
+            return self.decode_hex(remap, twobytes)
+
+    def encode(cls, source, usehex=False):
+        assert not usehex, "Not supported yet"
+        if isinstance(source, unicode):
+            source = source.encode('utf-8')
+        else:
+            source = str(source)
+        source = source.replace('\\', '\\\\')
+        source = source.replace('(', '\\(')
+        source = source.replace(')', '\\)')
+        return cls('(' +source + ')')
+    encode = classmethod(encode)
author	Matthias Klose <doko@debian.org>	2014-07-13 17:50:59 +0200
committer	Matthias Klose <doko@debian.org>	2014-07-13 17:50:59 +0200
commit	a1959ba9c0c9f3881c3e593e5aef1046750880f2 (patch)
tree	e4fc630e9e26b227d9a7e41db65d80f6158e8ae9 /pdfrw/objects/pdfstring.py