diff options
Diffstat (limited to 'pdfrw/objects/pdfdict.py')
-rw-r--r-- | pdfrw/objects/pdfdict.py | 205 |
1 files changed, 205 insertions, 0 deletions
diff --git a/pdfrw/objects/pdfdict.py b/pdfrw/objects/pdfdict.py new file mode 100644 index 0000000..1b3f6a0 --- /dev/null +++ b/pdfrw/objects/pdfdict.py @@ -0,0 +1,205 @@ +# A part of pdfrw (pdfrw.googlecode.com) +# Copyright (C) 2006-2012 Patrick Maupin, Austin, Texas +# MIT license -- See LICENSE.txt for details + +from pdfrw.objects.pdfname import PdfName +from pdfrw.objects.pdfindirect import PdfIndirect +from pdfrw.objects.pdfobject import PdfObject + +class _DictSearch(object): + ''' Used to search for inheritable attributes. + ''' + def __init__(self, basedict): + self.basedict = basedict + def __getattr__(self, name, PdfName=PdfName): + return self[PdfName(name)] + def __getitem__(self, name, set=set, getattr=getattr, id=id): + visited = set() + mydict = self.basedict + while 1: + value = mydict[name] + if value is not None: + return value + myid = id(mydict) + assert myid not in visited + visited.add(myid) + mydict = mydict.Parent + if mydict is None: + return + +class _Private(object): + ''' Used to store private attributes (not output to PDF files) + on PdfDict classes + ''' + def __init__(self, pdfdict): + vars(self)['pdfdict'] = pdfdict + def __setattr__(self, name, value): + vars(self.pdfdict)[name] = value + +class PdfDict(dict): + ''' PdfDict objects are subclassed dictionaries with the following features: + + - Every key in the dictionary starts with "/" + + - A dictionary item can be deleted by assigning it to None + + - Keys that (after the initial "/") conform to Python naming conventions + can also be accessed (set and retrieved) as attributes of the dictionary. + E.g. mydict.Page is the same thing as mydict['/Page'] + + - Private attributes (not in the PDF space) can be set on the dictionary + object attribute dictionary by using the private attribute: + + mydict.private.foo = 3 + mydict.foo = 5 + x = mydict.foo # x will now contain 3 + y = mydict['/foo'] # y will now contain 5 + + Most standard adobe dictionary keys start with an upper case letter, + so to avoid conflicts, it is best to start private attributes with + lower case letters. + + - PdfDicts have the following read-only properties: + + - private -- as discussed above, provides write access to dictionary's + attributes + - inheritable -- this creates and returns a "view" attribute that + will search through the object hierarchy for any desired + attribute, such as /Rotate or /MediaBox + + - PdfDicts also have the following special attributes: + - indirect is not stored in the PDF dictionary, but in the object's + attribute dictionary + - stream is also stored in the object's attribute dictionary + and will also update the stream length. + - _stream will store in the object's attribute dictionary without + updating the stream length. + + It is possible, for example, to have a PDF name such as "/indirect" + or "/stream", but you cannot access such a name as an attribute: + + mydict.indirect -- accesses object's attribute dictionary + mydict["/indirect"] -- accesses actual PDF dictionary + ''' + indirect = False + stream = None + + _special = dict(indirect = ('indirect', False), + stream = ('stream', True), + _stream = ('stream', False), + ) + + def __setitem__(self, name, value, setter=dict.__setitem__): + assert name.startswith('/'), name + if value is not None: + setter(self, name, value) + elif name in self: + del self[name] + + def __init__(self, *args, **kw): + if args: + if len(args) == 1: + args = args[0] + self.update(args) + if isinstance(args, PdfDict): + self.indirect = args.indirect + self._stream = args.stream + for key, value in kw.iteritems(): + setattr(self, key, value) + + def __getattr__(self, name, PdfName=PdfName): + ''' If the attribute doesn't exist on the dictionary object, + try to slap a '/' in front of it and get it out + of the actual dictionary itself. + ''' + return self.get(PdfName(name)) + + def get(self, key, dictget=dict.get, isinstance=isinstance, PdfIndirect=PdfIndirect): + ''' Get a value out of the dictionary, after resolving any indirect objects. + ''' + value = dictget(self, key) + if isinstance(value, PdfIndirect): + self[key] = value = value.real_value() + return value + + def __getitem__(self, key): + return self.get(key) + + def __setattr__(self, name, value, special=_special.get, PdfName=PdfName, vars=vars): + ''' Set an attribute on the dictionary. Handle the keywords + indirect, stream, and _stream specially (for content objects) + ''' + info = special(name) + if info is None: + self[PdfName(name)] = value + else: + name, setlen = info + vars(self)[name] = value + if setlen: + notnone = value is not None + self.Length = notnone and PdfObject(len(value)) or None + + def iteritems(self, dictiter=dict.iteritems, isinstance=isinstance, PdfIndirect=PdfIndirect): + ''' Iterate over the dictionary, resolving any unresolved objects + ''' + for key, value in list(dictiter(self)): + if isinstance(value, PdfIndirect): + self[key] = value = value.real_value() + if value is not None: + assert key.startswith('/'), (key, value) + yield key, value + + def items(self): + return list(self.iteritems()) + def itervalues(self): + for key, value in self.iteritems(): + yield value + def values(self): + return list((value for key, value in self.iteritems())) + def keys(self): + return list((key for key, value in self.iteritems())) + def __iter__(self): + for key, value in self.iteritems(): + yield key + def iterkeys(self): + return iter(self) + + def copy(self): + return type(self)(self) + + def pop(self, key): + value = self.get(key) + del self[key] + return value + + def popitem(self): + key, value = dict.pop(self) + if isinstance(value, PdfIndirect): + value = value.real_value() + return value + + def inheritable(self): + ''' Search through ancestors as needed for inheritable + dictionary items. + NOTE: You might think it would be a good idea + to cache this class, but then you'd have to worry + about it pointing to the wrong dictionary if you + made a copy of the object... + ''' + return _DictSearch(self) + inheritable = property(inheritable) + + def private(self): + ''' Allows setting private metadata for use in + processing (not sent to PDF file). + See note on inheritable + ''' + return _Private(self) + private = property(private) + +class IndirectPdfDict(PdfDict): + ''' IndirectPdfDict is a convenience class. You could + create a direct PdfDict and then set indirect = True on it, + or you could just create an IndirectPdfDict. + ''' + indirect = True |