summaryrefslogtreecommitdiff
path: root/pdfrw/objects/pdfdict.py
diff options
context:
space:
mode:
Diffstat (limited to 'pdfrw/objects/pdfdict.py')
-rw-r--r--pdfrw/objects/pdfdict.py205
1 files changed, 205 insertions, 0 deletions
diff --git a/pdfrw/objects/pdfdict.py b/pdfrw/objects/pdfdict.py
new file mode 100644
index 0000000..1b3f6a0
--- /dev/null
+++ b/pdfrw/objects/pdfdict.py
@@ -0,0 +1,205 @@
+# A part of pdfrw (pdfrw.googlecode.com)
+# Copyright (C) 2006-2012 Patrick Maupin, Austin, Texas
+# MIT license -- See LICENSE.txt for details
+
+from pdfrw.objects.pdfname import PdfName
+from pdfrw.objects.pdfindirect import PdfIndirect
+from pdfrw.objects.pdfobject import PdfObject
+
+class _DictSearch(object):
+ ''' Used to search for inheritable attributes.
+ '''
+ def __init__(self, basedict):
+ self.basedict = basedict
+ def __getattr__(self, name, PdfName=PdfName):
+ return self[PdfName(name)]
+ def __getitem__(self, name, set=set, getattr=getattr, id=id):
+ visited = set()
+ mydict = self.basedict
+ while 1:
+ value = mydict[name]
+ if value is not None:
+ return value
+ myid = id(mydict)
+ assert myid not in visited
+ visited.add(myid)
+ mydict = mydict.Parent
+ if mydict is None:
+ return
+
+class _Private(object):
+ ''' Used to store private attributes (not output to PDF files)
+ on PdfDict classes
+ '''
+ def __init__(self, pdfdict):
+ vars(self)['pdfdict'] = pdfdict
+ def __setattr__(self, name, value):
+ vars(self.pdfdict)[name] = value
+
+class PdfDict(dict):
+ ''' PdfDict objects are subclassed dictionaries with the following features:
+
+ - Every key in the dictionary starts with "/"
+
+ - A dictionary item can be deleted by assigning it to None
+
+ - Keys that (after the initial "/") conform to Python naming conventions
+ can also be accessed (set and retrieved) as attributes of the dictionary.
+ E.g. mydict.Page is the same thing as mydict['/Page']
+
+ - Private attributes (not in the PDF space) can be set on the dictionary
+ object attribute dictionary by using the private attribute:
+
+ mydict.private.foo = 3
+ mydict.foo = 5
+ x = mydict.foo # x will now contain 3
+ y = mydict['/foo'] # y will now contain 5
+
+ Most standard adobe dictionary keys start with an upper case letter,
+ so to avoid conflicts, it is best to start private attributes with
+ lower case letters.
+
+ - PdfDicts have the following read-only properties:
+
+ - private -- as discussed above, provides write access to dictionary's
+ attributes
+ - inheritable -- this creates and returns a "view" attribute that
+ will search through the object hierarchy for any desired
+ attribute, such as /Rotate or /MediaBox
+
+ - PdfDicts also have the following special attributes:
+ - indirect is not stored in the PDF dictionary, but in the object's
+ attribute dictionary
+ - stream is also stored in the object's attribute dictionary
+ and will also update the stream length.
+ - _stream will store in the object's attribute dictionary without
+ updating the stream length.
+
+ It is possible, for example, to have a PDF name such as "/indirect"
+ or "/stream", but you cannot access such a name as an attribute:
+
+ mydict.indirect -- accesses object's attribute dictionary
+ mydict["/indirect"] -- accesses actual PDF dictionary
+ '''
+ indirect = False
+ stream = None
+
+ _special = dict(indirect = ('indirect', False),
+ stream = ('stream', True),
+ _stream = ('stream', False),
+ )
+
+ def __setitem__(self, name, value, setter=dict.__setitem__):
+ assert name.startswith('/'), name
+ if value is not None:
+ setter(self, name, value)
+ elif name in self:
+ del self[name]
+
+ def __init__(self, *args, **kw):
+ if args:
+ if len(args) == 1:
+ args = args[0]
+ self.update(args)
+ if isinstance(args, PdfDict):
+ self.indirect = args.indirect
+ self._stream = args.stream
+ for key, value in kw.iteritems():
+ setattr(self, key, value)
+
+ def __getattr__(self, name, PdfName=PdfName):
+ ''' If the attribute doesn't exist on the dictionary object,
+ try to slap a '/' in front of it and get it out
+ of the actual dictionary itself.
+ '''
+ return self.get(PdfName(name))
+
+ def get(self, key, dictget=dict.get, isinstance=isinstance, PdfIndirect=PdfIndirect):
+ ''' Get a value out of the dictionary, after resolving any indirect objects.
+ '''
+ value = dictget(self, key)
+ if isinstance(value, PdfIndirect):
+ self[key] = value = value.real_value()
+ return value
+
+ def __getitem__(self, key):
+ return self.get(key)
+
+ def __setattr__(self, name, value, special=_special.get, PdfName=PdfName, vars=vars):
+ ''' Set an attribute on the dictionary. Handle the keywords
+ indirect, stream, and _stream specially (for content objects)
+ '''
+ info = special(name)
+ if info is None:
+ self[PdfName(name)] = value
+ else:
+ name, setlen = info
+ vars(self)[name] = value
+ if setlen:
+ notnone = value is not None
+ self.Length = notnone and PdfObject(len(value)) or None
+
+ def iteritems(self, dictiter=dict.iteritems, isinstance=isinstance, PdfIndirect=PdfIndirect):
+ ''' Iterate over the dictionary, resolving any unresolved objects
+ '''
+ for key, value in list(dictiter(self)):
+ if isinstance(value, PdfIndirect):
+ self[key] = value = value.real_value()
+ if value is not None:
+ assert key.startswith('/'), (key, value)
+ yield key, value
+
+ def items(self):
+ return list(self.iteritems())
+ def itervalues(self):
+ for key, value in self.iteritems():
+ yield value
+ def values(self):
+ return list((value for key, value in self.iteritems()))
+ def keys(self):
+ return list((key for key, value in self.iteritems()))
+ def __iter__(self):
+ for key, value in self.iteritems():
+ yield key
+ def iterkeys(self):
+ return iter(self)
+
+ def copy(self):
+ return type(self)(self)
+
+ def pop(self, key):
+ value = self.get(key)
+ del self[key]
+ return value
+
+ def popitem(self):
+ key, value = dict.pop(self)
+ if isinstance(value, PdfIndirect):
+ value = value.real_value()
+ return value
+
+ def inheritable(self):
+ ''' Search through ancestors as needed for inheritable
+ dictionary items.
+ NOTE: You might think it would be a good idea
+ to cache this class, but then you'd have to worry
+ about it pointing to the wrong dictionary if you
+ made a copy of the object...
+ '''
+ return _DictSearch(self)
+ inheritable = property(inheritable)
+
+ def private(self):
+ ''' Allows setting private metadata for use in
+ processing (not sent to PDF file).
+ See note on inheritable
+ '''
+ return _Private(self)
+ private = property(private)
+
+class IndirectPdfDict(PdfDict):
+ ''' IndirectPdfDict is a convenience class. You could
+ create a direct PdfDict and then set indirect = True on it,
+ or you could just create an IndirectPdfDict.
+ '''
+ indirect = True