summaryrefslogtreecommitdiff
path: root/pdfrw/objects/pdfname.py
diff options
context:
space:
mode:
Diffstat (limited to 'pdfrw/objects/pdfname.py')
-rw-r--r--pdfrw/objects/pdfname.py83
1 files changed, 73 insertions, 10 deletions
diff --git a/pdfrw/objects/pdfname.py b/pdfrw/objects/pdfname.py
index 1adae9d..1fdf5b5 100644
--- a/pdfrw/objects/pdfname.py
+++ b/pdfrw/objects/pdfname.py
@@ -1,17 +1,80 @@
-# A part of pdfrw (pdfrw.googlecode.com)
-# Copyright (C) 2006-2012 Patrick Maupin, Austin, Texas
+# A part of pdfrw (https://github.com/pmaupin/pdfrw)
+# Copyright (C) 2006-2015 Patrick Maupin, Austin, Texas
# MIT license -- See LICENSE.txt for details
-from pdfrw.objects.pdfobject import PdfObject
+import re
+
+from ..errors import log
+
+warn = log.warning
+
+
+class BasePdfName(str):
+ ''' A PdfName is an identifier that starts with
+ a slash.
+
+ If a PdfName has illegal space or delimiter characters,
+ then it will be decorated with an "encoded" attribute that
+ has those characters properly escaped as #<hex><hex>
+
+ The "encoded" attribute is what is sent out to a PDF file,
+ the non-encoded main object is what is compared for equality
+ in a PDF dictionary.
+ '''
+
+ indirect = False
+
+ whitespace = '\x00 \t\f\r\n'
+ delimiters = '()<>{}[]/%'
+ forbidden = list(whitespace) + list('\\' + x for x in delimiters)
+ remap = dict((x, '#%02X' % ord(x)) for x in (whitespace + delimiters))
+ split_to_encode = re.compile('(%s)' % '|'.join(forbidden)).split
+ split_to_decode = re.compile(r'\#([0-9A-Fa-f]{2})').split
+
+ def __new__(cls, name, pre_encoded=True, remap=remap,
+ join=''.join, new=str.__new__, chr=chr, int=int,
+ split_to_encode=split_to_encode,
+ split_to_decode=split_to_decode,
+ ):
+ ''' We can build a PdfName from scratch, or from
+ a pre-encoded name (e.g. coming in from a file).
+ '''
+ # Optimization for normal case
+ if name[1:].isalnum():
+ return new(cls, name)
+ encoded = name
+ if pre_encoded:
+ if '#' in name:
+ substrs = split_to_decode(name)
+ substrs[1::2] = (chr(int(x, 16)) for x in substrs[1::2])
+ name = join(substrs)
+ else:
+ encoded = split_to_encode(encoded)
+ encoded[3::2] = (remap[x] for x in encoded[3::2])
+ encoded = join(encoded)
+ self = new(cls, name)
+ if encoded != name:
+ self.encoded = encoded
+ return self
+
+
+# We could have used a metaclass, but this matches what
+# we were doing historically.
class PdfName(object):
- ''' PdfName is a simple way to get a PDF name from a string:
+ ''' Two simple ways to get a PDF name from a string:
+
+ x = PdfName.FooBar
+ x = pdfName('FooBar')
+
+ Either technique will return "/FooBar"
- PdfName.FooBar == PdfObject('/FooBar')
'''
- def __getattr__(self, name):
- return self(name)
- def __call__(self, name, PdfObject=PdfObject):
- return PdfObject('/' + name)
-PdfName = PdfName()
+ def __getattr__(self, name, BasePdfName=BasePdfName):
+ return BasePdfName('/' + name, False)
+
+ def __call__(self, name, BasePdfName=BasePdfName):
+ return BasePdfName('/' + name, False)
+
+PdfName = PdfName()