summaryrefslogtreecommitdiff
path: root/vobject/base.py
diff options
context:
space:
mode:
Diffstat (limited to 'vobject/base.py')
-rw-r--r--vobject/base.py1106
1 files changed, 1106 insertions, 0 deletions
diff --git a/vobject/base.py b/vobject/base.py
new file mode 100644
index 0000000..107e71c
--- /dev/null
+++ b/vobject/base.py
@@ -0,0 +1,1106 @@
+"""vobject module for reading vCard and vCalendar files."""
+
+import copy
+import re
+import sys
+import logging
+import StringIO, cStringIO
+import string
+import exceptions
+import codecs
+
+#------------------------------------ Logging ----------------------------------
+logger = logging.getLogger(__name__)
+if not logging.getLogger().handlers:
+ handler = logging.StreamHandler()
+ formatter = logging.Formatter('%(name)s %(levelname)s %(message)s')
+ handler.setFormatter(formatter)
+ logger.addHandler(handler)
+logger.setLevel(logging.ERROR) # Log errors
+DEBUG = False # Don't waste time on debug calls
+#----------------------------------- Constants ---------------------------------
+CR = '\r'
+LF = '\n'
+CRLF = CR + LF
+SPACE = ' '
+TAB = '\t'
+SPACEORTAB = SPACE + TAB
+#-------------------------------- Useful modules -------------------------------
+# use doctest, it kills two birds with one stone and docstrings often become
+# more readable to boot (see parseLine's docstring).
+# use logging, then when debugging we can just set our verbosity.
+# use epydoc syntax for documenting code, please document every class and non-
+# trivial method (see http://epydoc.sourceforge.net/epytext.html
+# and http://epydoc.sourceforge.net/fields.html). Also, please
+# follow http://www.python.org/peps/pep-0257.html for docstrings.
+#-------------------------------------------------------------------------------
+
+#--------------------------------- Main classes --------------------------------
+class VBase(object):
+ """Base class for ContentLine and Component.
+
+ @ivar behavior:
+ The Behavior class associated with this object, which controls
+ validation, transformations, and encoding.
+ @ivar parentBehavior:
+ The object's parent's behavior, or None if no behaviored parent exists.
+ @ivar isNative:
+ Boolean describing whether this component is a Native instance.
+ @ivar group:
+ An optional group prefix, should be used only to indicate sort order in
+ vCards, according to RFC2426
+ """
+ def __init__(self, group=None, *args, **kwds):
+ super(VBase, self).__init__(*args, **kwds)
+ self.group = group
+ self.behavior = None
+ self.parentBehavior = None
+ self.isNative = False
+
+ def copy(self, copyit):
+ self.group = copyit.group
+ self.behavior = copyit.behavior
+ self.parentBehavior = copyit.parentBehavior
+ self.isNative = copyit.isNative
+
+ def validate(self, *args, **kwds):
+ """Call the behavior's validate method, or return True."""
+ if self.behavior:
+ return self.behavior.validate(self, *args, **kwds)
+ else: return True
+
+ def getChildren(self):
+ """Return an iterable containing the contents of the object."""
+ return []
+
+ def clearBehavior(self, cascade=True):
+ """Set behavior to None. Do for all descendants if cascading."""
+ self.behavior=None
+ if cascade: self.transformChildrenFromNative()
+
+ def autoBehavior(self, cascade=False):
+ """Set behavior if name is in self.parentBehavior.knownChildren.
+
+ If cascade is True, unset behavior and parentBehavior for all
+ descendants, then recalculate behavior and parentBehavior.
+
+ """
+ parentBehavior = self.parentBehavior
+ if parentBehavior is not None:
+ knownChildTup = parentBehavior.knownChildren.get(self.name, None)
+ if knownChildTup is not None:
+ behavior = getBehavior(self.name, knownChildTup[2])
+ if behavior is not None:
+ self.setBehavior(behavior, cascade)
+ if isinstance(self, ContentLine) and self.encoded:
+ self.behavior.decode(self)
+ elif isinstance(self, ContentLine):
+ self.behavior = parentBehavior.defaultBehavior
+ if self.encoded and self.behavior:
+ self.behavior.decode(self)
+
+ def setBehavior(self, behavior, cascade=True):
+ """Set behavior. If cascade is True, autoBehavior all descendants."""
+ self.behavior=behavior
+ if cascade:
+ for obj in self.getChildren():
+ obj.parentBehavior=behavior
+ obj.autoBehavior(True)
+
+ def transformToNative(self):
+ """Transform this object into a custom VBase subclass.
+
+ transformToNative should always return a representation of this object.
+ It may do so by modifying self in place then returning self, or by
+ creating a new object.
+
+ """
+ if self.isNative or not self.behavior or not self.behavior.hasNative:
+ return self
+ else:
+ try:
+ return self.behavior.transformToNative(self)
+ except Exception, e:
+ # wrap errors in transformation in a ParseError
+ lineNumber = getattr(self, 'lineNumber', None)
+ if isinstance(e, ParseError):
+ if lineNumber is not None:
+ e.lineNumber = lineNumber
+ raise
+ else:
+ msg = "In transformToNative, unhandled exception: %s: %s"
+ msg = msg % (sys.exc_info()[0], sys.exc_info()[1])
+ new_error = ParseError(msg, lineNumber)
+ raise ParseError, new_error, sys.exc_info()[2]
+
+
+ def transformFromNative(self):
+ """Return self transformed into a ContentLine or Component if needed.
+
+ May have side effects. If it does, transformFromNative and
+ transformToNative MUST have perfectly inverse side effects. Allowing
+ such side effects is convenient for objects whose transformations only
+ change a few attributes.
+
+ Note that it isn't always possible for transformFromNative to be a
+ perfect inverse of transformToNative, in such cases transformFromNative
+ should return a new object, not self after modifications.
+
+ """
+ if self.isNative and self.behavior and self.behavior.hasNative:
+ try:
+ return self.behavior.transformFromNative(self)
+ except Exception, e:
+ # wrap errors in transformation in a NativeError
+ lineNumber = getattr(self, 'lineNumber', None)
+ if isinstance(e, NativeError):
+ if lineNumber is not None:
+ e.lineNumber = lineNumber
+ raise
+ else:
+ msg = "In transformFromNative, unhandled exception: %s: %s"
+ msg = msg % (sys.exc_info()[0], sys.exc_info()[1])
+ new_error = NativeError(msg, lineNumber)
+ raise NativeError, new_error, sys.exc_info()[2]
+ else: return self
+
+ def transformChildrenToNative(self):
+ """Recursively replace children with their native representation."""
+ pass
+
+ def transformChildrenFromNative(self, clearBehavior=True):
+ """Recursively transform native children to vanilla representations."""
+ pass
+
+ def serialize(self, buf=None, lineLength=75, validate=True, behavior=None):
+ """Serialize to buf if it exists, otherwise return a string.
+
+ Use self.behavior.serialize if behavior exists.
+
+ """
+ if not behavior:
+ behavior = self.behavior
+
+ if behavior:
+ if DEBUG: logger.debug("serializing %s with behavior" % self.name)
+ return behavior.serialize(self, buf, lineLength, validate)
+ else:
+ if DEBUG: logger.debug("serializing %s without behavior" % self.name)
+ return defaultSerialize(self, buf, lineLength)
+
+def ascii(s):
+ """Turn s into a printable string. Won't work for 8-bit ASCII."""
+ return unicode(s).encode('ascii', 'replace')
+
+def toVName(name, stripNum = 0, upper = False):
+ """
+ Turn a Python name into an iCalendar style name, optionally uppercase and
+ with characters stripped off.
+ """
+ if upper:
+ name = name.upper()
+ if stripNum != 0:
+ name = name[:-stripNum]
+ return name.replace('_', '-')
+
+class ContentLine(VBase):
+ """Holds one content line for formats like vCard and vCalendar.
+
+ For example::
+ <SUMMARY{u'param1' : [u'val1'], u'param2' : [u'val2']}Bastille Day Party>
+
+ @ivar name:
+ The uppercased name of the contentline.
+ @ivar params:
+ A dictionary of parameters and associated lists of values (the list may
+ be empty for empty parameters).
+ @ivar value:
+ The value of the contentline.
+ @ivar singletonparams:
+ A list of parameters for which it's unclear if the string represents the
+ parameter name or the parameter value. In vCard 2.1, "The value string
+ can be specified alone in those cases where the value is unambiguous".
+ This is crazy, but we have to deal with it.
+ @ivar encoded:
+ A boolean describing whether the data in the content line is encoded.
+ Generally, text read from a serialized vCard or vCalendar should be
+ considered encoded. Data added programmatically should not be encoded.
+ @ivar lineNumber:
+ An optional line number associated with the contentline.
+ """
+ def __init__(self, name, params, value, group=None,
+ encoded=False, isNative=False,
+ lineNumber = None, *args, **kwds):
+ """Take output from parseLine, convert params list to dictionary."""
+ # group is used as a positional argument to match parseLine's return
+ super(ContentLine, self).__init__(group, *args, **kwds)
+ self.name = name.upper()
+ self.value = value
+ self.encoded = encoded
+ self.params = {}
+ self.singletonparams = []
+ self.isNative = isNative
+ self.lineNumber = lineNumber
+ def updateTable(x):
+ if len(x) == 1:
+ self.singletonparams += x
+ else:
+ paramlist = self.params.setdefault(x[0].upper(), [])
+ paramlist.extend(x[1:])
+ map(updateTable, params)
+ qp = False
+ if 'ENCODING' in self.params:
+ if 'QUOTED-PRINTABLE' in self.params['ENCODING']:
+ qp = True
+ self.params['ENCODING'].remove('QUOTED-PRINTABLE')
+ if 0==len(self.params['ENCODING']):
+ del self.params['ENCODING']
+ if 'QUOTED-PRINTABLE' in self.singletonparams:
+ qp = True
+ self.singletonparams.remove('QUOTED-PRINTABLE')
+ if qp:
+ self.value = str(self.value).decode('quoted-printable')
+
+ # self.value should be unicode for iCalendar, but if quoted-printable
+ # is used, or if the quoted-printable state machine is used, text may be
+ # encoded
+ if type(self.value) is str:
+ charset = 'iso-8859-1'
+ if 'CHARSET' in self.params:
+ charsets = self.params.pop('CHARSET')
+ if charsets:
+ charset = charsets[0]
+ self.value = unicode(self.value, charset)
+
+ @classmethod
+ def duplicate(clz, copyit):
+ newcopy = clz('', {}, '')
+ newcopy.copy(copyit)
+ return newcopy
+
+ def copy(self, copyit):
+ super(ContentLine, self).copy(copyit)
+ self.name = copyit.name
+ self.value = copy.copy(copyit.value)
+ self.encoded = self.encoded
+ self.params = copy.copy(copyit.params)
+ self.singletonparams = copy.copy(copyit.singletonparams)
+ self.lineNumber = copyit.lineNumber
+
+ def __eq__(self, other):
+ try:
+ return (self.name == other.name) and (self.params == other.params) and (self.value == other.value)
+ except:
+ return False
+
+ def __getattr__(self, name):
+ """Make params accessible via self.foo_param or self.foo_paramlist.
+
+ Underscores, legal in python variable names, are converted to dashes,
+ which are legal in IANA tokens.
+
+ """
+ try:
+ if name.endswith('_param'):
+ return self.params[toVName(name, 6, True)][0]
+ elif name.endswith('_paramlist'):
+ return self.params[toVName(name, 10, True)]
+ else:
+ raise exceptions.AttributeError, name
+ except KeyError:
+ raise exceptions.AttributeError, name
+
+ def __setattr__(self, name, value):
+ """Make params accessible via self.foo_param or self.foo_paramlist.
+
+ Underscores, legal in python variable names, are converted to dashes,
+ which are legal in IANA tokens.
+
+ """
+ if name.endswith('_param'):
+ if type(value) == list:
+ self.params[toVName(name, 6, True)] = value
+ else:
+ self.params[toVName(name, 6, True)] = [value]
+ elif name.endswith('_paramlist'):
+ if type(value) == list:
+ self.params[toVName(name, 10, True)] = value
+ else:
+ raise VObjectError("Parameter list set to a non-list")
+ else:
+ prop = getattr(self.__class__, name, None)
+ if isinstance(prop, property):
+ prop.fset(self, value)
+ else:
+ object.__setattr__(self, name, value)
+
+ def __delattr__(self, name):
+ try:
+ if name.endswith('_param'):
+ del self.params[toVName(name, 6, True)]
+ elif name.endswith('_paramlist'):
+ del self.params[toVName(name, 10, True)]
+ else:
+ object.__delattr__(self, name)
+ except KeyError:
+ raise exceptions.AttributeError, name
+
+ def valueRepr( self ):
+ """transform the representation of the value according to the behavior,
+ if any"""
+ v = self.value
+ if self.behavior:
+ v = self.behavior.valueRepr( self )
+ return ascii( v )
+
+ def __str__(self):
+ return "<"+ascii(self.name)+ascii(self.params)+self.valueRepr()+">"
+
+ def __repr__(self):
+ return self.__str__().replace('\n', '\\n')
+
+ def prettyPrint(self, level = 0, tabwidth=3):
+ pre = ' ' * level * tabwidth
+ print pre, self.name + ":", self.valueRepr()
+ if self.params:
+ lineKeys= self.params.keys()
+ print pre, "params for ", self.name +':'
+ for aKey in lineKeys:
+ print pre + ' ' * tabwidth, aKey, ascii(self.params[aKey])
+
+class Component(VBase):
+ """A complex property that can contain multiple ContentLines.
+
+ For our purposes, a component must start with a BEGIN:xxxx line and end with
+ END:xxxx, or have a PROFILE:xxx line if a top-level component.
+
+ @ivar contents:
+ A dictionary of lists of Component or ContentLine instances. The keys
+ are the lowercased names of child ContentLines or Components.
+ Note that BEGIN and END ContentLines are not included in contents.
+ @ivar name:
+ Uppercase string used to represent this Component, i.e VCARD if the
+ serialized object starts with BEGIN:VCARD.
+ @ivar useBegin:
+ A boolean flag determining whether BEGIN: and END: lines should
+ be serialized.
+
+ """
+ def __init__(self, name=None, *args, **kwds):
+ super(Component, self).__init__(*args, **kwds)
+ self.contents = {}
+ if name:
+ self.name=name.upper()
+ self.useBegin = True
+ else:
+ self.name = ''
+ self.useBegin = False
+
+ self.autoBehavior()
+
+ @classmethod
+ def duplicate(clz, copyit):
+ newcopy = clz()
+ newcopy.copy(copyit)
+ return newcopy
+
+ def copy(self, copyit):
+ super(Component, self).copy(copyit)
+
+ # deep copy of contents
+ self.contents = {}
+ for key, lvalue in copyit.contents.items():
+ newvalue = []
+ for value in lvalue:
+ newitem = value.duplicate(value)
+ newvalue.append(newitem)
+ self.contents[key] = newvalue
+
+ self.name = copyit.name
+ self.useBegin = copyit.useBegin
+
+ def setProfile(self, name):
+ """Assign a PROFILE to this unnamed component.
+
+ Used by vCard, not by vCalendar.
+
+ """
+ if self.name or self.useBegin:
+ if self.name == name: return
+ raise VObjectError("This component already has a PROFILE or uses BEGIN.")
+ self.name = name.upper()
+
+ def __getattr__(self, name):
+ """For convenience, make self.contents directly accessible.
+
+ Underscores, legal in python variable names, are converted to dashes,
+ which are legal in IANA tokens.
+
+ """
+ try:
+ if name.endswith('_list'):
+ return self.contents[toVName(name, 5)]
+ else:
+ return self.contents[toVName(name)][0]
+ except KeyError:
+ raise exceptions.AttributeError, name
+
+ normal_attributes = ['contents','name','behavior','parentBehavior','group']
+ def __setattr__(self, name, value):
+ """For convenience, make self.contents directly accessible.
+
+ Underscores, legal in python variable names, are converted to dashes,
+ which are legal in IANA tokens.
+
+ """
+ if name not in self.normal_attributes and name.lower()==name:
+ if type(value) == list:
+ if name.endswith('_list'):
+ name = name[:-5]
+ self.contents[toVName(name)] = value
+ elif name.endswith('_list'):
+ raise VObjectError("Component list set to a non-list")
+ else:
+ self.contents[toVName(name)] = [value]
+ else:
+ prop = getattr(self.__class__, name, None)
+ if isinstance(prop, property):
+ prop.fset(self, value)
+ else:
+ object.__setattr__(self, name, value)
+
+ def __delattr__(self, name):
+ try:
+ if name not in self.normal_attributes and name.lower()==name:
+ if name.endswith('_list'):
+ del self.contents[toVName(name, 5)]
+ else:
+ del self.contents[toVName(name)]
+ else:
+ object.__delattr__(self, name)
+ except KeyError:
+ raise exceptions.AttributeError, name
+
+ def getChildValue(self, childName, default = None, childNumber = 0):
+ """Return a child's value (the first, by default), or None."""
+ child = self.contents.get(toVName(childName))
+ if child is None:
+ return default
+ else:
+ return child[childNumber].value
+
+ def add(self, objOrName, group = None):
+ """Add objOrName to contents, set behavior if it can be inferred.
+
+ If objOrName is a string, create an empty component or line based on
+ behavior. If no behavior is found for the object, add a ContentLine.
+
+ group is an optional prefix to the name of the object (see
+ RFC 2425).
+ """
+ if isinstance(objOrName, VBase):
+ obj = objOrName
+ if self.behavior:
+ obj.parentBehavior = self.behavior
+ obj.autoBehavior(True)
+ else:
+ name = objOrName.upper()
+ try:
+ id=self.behavior.knownChildren[name][2]
+ behavior = getBehavior(name, id)
+ if behavior.isComponent:
+ obj = Component(name)
+ else:
+ obj = ContentLine(name, [], '', group)
+ obj.parentBehavior = self.behavior
+ obj.behavior = behavior
+ obj = obj.transformToNative()
+ except (KeyError, AttributeError):
+ obj = ContentLine(objOrName, [], '', group)
+ if obj.behavior is None and self.behavior is not None:
+ if isinstance(obj, ContentLine):
+ obj.behavior = self.behavior.defaultBehavior
+ self.contents.setdefault(obj.name.lower(), []).append(obj)
+ return obj
+
+ def remove(self, obj):
+ """Remove obj from contents."""
+ named = self.contents.get(obj.name.lower())
+ if named:
+ try:
+ named.remove(obj)
+ if len(named) == 0:
+ del self.contents[obj.name.lower()]
+ except ValueError:
+ pass;
+
+ def getChildren(self):
+ """Return an iterable of all children."""
+ for objList in self.contents.values():
+ for obj in objList: yield obj
+
+ def components(self):
+ """Return an iterable of all Component children."""
+ return (i for i in self.getChildren() if isinstance(i, Component))
+
+ def lines(self):
+ """Return an iterable of all ContentLine children."""
+ return (i for i in self.getChildren() if isinstance(i, ContentLine))
+
+ def sortChildKeys(self):
+ try:
+ first = [s for s in self.behavior.sortFirst if s in self.contents]
+ except:
+ first = []
+ return first + sorted(k for k in self.contents.keys() if k not in first)
+
+ def getSortedChildren(self):
+ return [obj for k in self.sortChildKeys() for obj in self.contents[k]]
+
+ def setBehaviorFromVersionLine(self, versionLine):
+ """Set behavior if one matches name, versionLine.value."""
+ v=getBehavior(self.name, versionLine.value)
+ if v: self.setBehavior(v)
+
+ def transformChildrenToNative(self):
+ """Recursively replace children with their native representation."""
+ #sort to get dependency order right, like vtimezone before vevent
+ for childArray in (self.contents[k] for k in self.sortChildKeys()):
+ for i in xrange(len(childArray)):
+ childArray[i]=childArray[i].transformToNative()
+ childArray[i].transformChildrenToNative()
+
+ def transformChildrenFromNative(self, clearBehavior=True):
+ """Recursively transform native children to vanilla representations."""
+ for childArray in self.contents.values():
+ for i in xrange(len(childArray)):
+ childArray[i]=childArray[i].transformFromNative()
+ childArray[i].transformChildrenFromNative(clearBehavior)
+ if clearBehavior:
+ childArray[i].behavior = None
+ childArray[i].parentBehavior = None
+
+ def __str__(self):
+ if self.name:
+ return "<" + self.name + "| " + str(self.getSortedChildren()) + ">"
+ else:
+ return '<' + '*unnamed*' + '| ' + str(self.getSortedChildren()) + '>'
+
+ def __repr__(self):
+ return self.__str__()
+
+ def prettyPrint(self, level = 0, tabwidth=3):
+ pre = ' ' * level * tabwidth
+ print pre, self.name
+ if isinstance(self, Component):
+ for line in self.getChildren():
+ line.prettyPrint(level + 1, tabwidth)
+ print
+
+class VObjectError(Exception):
+ def __init__(self, message, lineNumber=None):
+ self.message = message
+ if lineNumber is not None:
+ self.lineNumber = lineNumber
+ def __str__(self):
+ if hasattr(self, 'lineNumber'):
+ return "At line %s: %s" % \
+ (self.lineNumber, self.message)
+ else:
+ return repr(self.message)
+
+class ParseError(VObjectError):
+ pass
+
+class ValidateError(VObjectError):
+ pass
+
+class NativeError(VObjectError):
+ pass
+
+#-------------------------- Parsing functions ----------------------------------
+
+# parseLine regular expressions
+
+patterns = {}
+
+# Note that underscore is not legal for names, it's included because
+# Lotus Notes uses it
+patterns['name'] = '[a-zA-Z0-9\-_]+'
+patterns['safe_char'] = '[^";:,]'
+patterns['qsafe_char'] = '[^"]'
+
+# the combined Python string replacement and regex syntax is a little confusing;
+# remember that %(foobar)s is replaced with patterns['foobar'], so for instance
+# param_value is any number of safe_chars or any number of qsaf_chars surrounded
+# by double quotes.
+
+patterns['param_value'] = ' "%(qsafe_char)s * " | %(safe_char)s * ' % patterns
+
+
+# get a tuple of two elements, one will be empty, the other will have the value
+patterns['param_value_grouped'] = """
+" ( %(qsafe_char)s * )" | ( %(safe_char)s + )
+""" % patterns
+
+# get a parameter and its values, without any saved groups
+patterns['param'] = r"""
+; (?: %(name)s ) # parameter name
+(?:
+ (?: = (?: %(param_value)s ) )? # 0 or more parameter values, multiple
+ (?: , (?: %(param_value)s ) )* # parameters are comma separated
+)*
+""" % patterns
+
+# get a parameter, saving groups for name and value (value still needs parsing)
+patterns['params_grouped'] = r"""
+; ( %(name)s )
+
+(?: =
+ (
+ (?: (?: %(param_value)s ) )? # 0 or more parameter values, multiple
+ (?: , (?: %(param_value)s ) )* # parameters are comma separated
+ )
+)?
+""" % patterns
+
+# get a full content line, break it up into group, name, parameters, and value
+patterns['line'] = r"""
+^ ((?P<group> %(name)s)\.)?(?P<name> %(name)s) # name group
+ (?P<params> (?: %(param)s )* ) # params group (may be empty)
+: (?P<value> .* )$ # value group
+""" % patterns
+
+' "%(qsafe_char)s*" | %(safe_char)s* '
+
+param_values_re = re.compile(patterns['param_value_grouped'], re.VERBOSE)
+params_re = re.compile(patterns['params_grouped'], re.VERBOSE)
+line_re = re.compile(patterns['line'], re.DOTALL | re.VERBOSE)
+begin_re = re.compile('BEGIN', re.IGNORECASE)
+
+
+def parseParams(string):
+ """
+ >>> parseParams(';ALTREP="http://www.wiz.org"')
+ [['ALTREP', 'http://www.wiz.org']]
+ >>> parseParams('')
+ []
+ >>> parseParams(';ALTREP="http://www.wiz.org;;",Blah,Foo;NEXT=Nope;BAR')
+ [['ALTREP', 'http://www.wiz.org;;', 'Blah', 'Foo'], ['NEXT', 'Nope'], ['BAR']]
+ """
+ all = params_re.findall(string)
+ allParameters = []
+ for tup in all:
+ paramList = [tup[0]] # tup looks like (name, valuesString)
+ for pair in param_values_re.findall(tup[1]):
+ # pair looks like ('', value) or (value, '')
+ if pair[0] != '':
+ paramList.append(pair[0])
+ else:
+ paramList.append(pair[1])
+ allParameters.append(paramList)
+ return allParameters
+
+
+def parseLine(line, lineNumber = None):
+ """
+ >>> parseLine("BLAH:")
+ ('BLAH', [], '', None)
+ >>> parseLine("RDATE:VALUE=DATE:19970304,19970504,19970704,19970904")
+ ('RDATE', [], 'VALUE=DATE:19970304,19970504,19970704,19970904', None)
+ >>> parseLine('DESCRIPTION;ALTREP="http://www.wiz.org":The Fall 98 Wild Wizards Conference - - Las Vegas, NV, USA')
+ ('DESCRIPTION', [['ALTREP', 'http://www.wiz.org']], 'The Fall 98 Wild Wizards Conference - - Las Vegas, NV, USA', None)
+ >>> parseLine("EMAIL;PREF;INTERNET:john@nowhere.com")
+ ('EMAIL', [['PREF'], ['INTERNET']], 'john@nowhere.com', None)
+ >>> parseLine('EMAIL;TYPE="blah",hah;INTERNET="DIGI",DERIDOO:john@nowhere.com')
+ ('EMAIL', [['TYPE', 'blah', 'hah'], ['INTERNET', 'DIGI', 'DERIDOO']], 'john@nowhere.com', None)
+ >>> parseLine('item1.ADR;type=HOME;type=pref:;;Reeperbahn 116;Hamburg;;20359;')
+ ('ADR', [['type', 'HOME'], ['type', 'pref']], ';;Reeperbahn 116;Hamburg;;20359;', 'item1')
+ >>> parseLine(":")
+ Traceback (most recent call last):
+ ...
+ ParseError: 'Failed to parse line: :'
+ """
+
+ match = line_re.match(line)
+ if match is None:
+ raise ParseError("Failed to parse line: %s" % line, lineNumber)
+ # Underscores are replaced with dash to work around Lotus Notes
+ return (match.group('name').replace('_','-'),
+ parseParams(match.group('params')),
+ match.group('value'), match.group('group'))
+
+# logical line regular expressions
+
+patterns['lineend'] = r'(?:\r\n|\r|\n|$)'
+patterns['wrap'] = r'%(lineend)s [\t ]' % patterns
+patterns['logicallines'] = r"""
+(
+ (?: [^\r\n] | %(wrap)s )*
+ %(lineend)s
+)
+""" % patterns
+
+patterns['wraporend'] = r'(%(wrap)s | %(lineend)s )' % patterns
+
+wrap_re = re.compile(patterns['wraporend'], re.VERBOSE)
+logical_lines_re = re.compile(patterns['logicallines'], re.VERBOSE)
+
+testLines="""
+Line 0 text
+ , Line 0 continued.
+Line 1;encoding=quoted-printable:this is an evil=
+ evil=
+ format.
+Line 2 is a new line, it does not start with whitespace.
+"""
+
+def getLogicalLines(fp, allowQP=True, findBegin=False):
+ """Iterate through a stream, yielding one logical line at a time.
+
+ Because many applications still use vCard 2.1, we have to deal with the
+ quoted-printable encoding for long lines, as well as the vCard 3.0 and
+ vCalendar line folding technique, a whitespace character at the start
+ of the line.
+
+ Quoted-printable data will be decoded in the Behavior decoding phase.
+
+ >>> import StringIO
+ >>> f=StringIO.StringIO(testLines)
+ >>> for n, l in enumerate(getLogicalLines(f)):
+ ... print "Line %s: %s" % (n, l[0])
+ ...
+ Line 0: Line 0 text, Line 0 continued.
+ Line 1: Line 1;encoding=quoted-printable:this is an evil=
+ evil=
+ format.
+ Line 2: Line 2 is a new line, it does not start with whitespace.
+
+ """
+ if not allowQP:
+ bytes = fp.read(-1)
+ if len(bytes) > 0:
+ if type(bytes[0]) == unicode:
+ val = bytes
+ elif not findBegin:
+ val = bytes.decode('utf-8')
+ else:
+ for encoding in 'utf-8', 'utf-16-LE', 'utf-16-BE', 'iso-8859-1':
+ try:
+ val = bytes.decode(encoding)
+ if begin_re.search(val) is not None:
+ break
+ except UnicodeDecodeError:
+ pass
+ else:
+ raise ParseError, 'Could not find BEGIN when trying to determine encoding'
+ else:
+ val = bytes
+
+ # strip off any UTF8 BOMs which Python's UTF8 decoder leaves
+
+ val = val.lstrip( unicode( codecs.BOM_UTF8, "utf8" ) )
+
+ lineNumber = 1
+ for match in logical_lines_re.finditer(val):
+ line, n = wrap_re.subn('', match.group())
+ if line != '':
+ yield line, lineNumber
+ lineNumber += n
+
+ else:
+ quotedPrintable=False
+ newbuffer = StringIO.StringIO
+ logicalLine = newbuffer()
+ lineNumber = 0
+ lineStartNumber = 0
+ while True:
+ line = fp.readline()
+ if line == '':
+ break
+ else:
+ line = line.rstrip(CRLF)
+ lineNumber += 1
+ if line.rstrip() == '':
+ if logicalLine.pos > 0:
+ yield logicalLine.getvalue(), lineStartNumber
+ lineStartNumber = lineNumber
+ logicalLine = newbuffer()
+ quotedPrintable=False
+ continue
+
+ if quotedPrintable and allowQP:
+ logicalLine.write('\n')
+ logicalLine.write(line)
+ quotedPrintable=False
+ elif line[0] in SPACEORTAB:
+ logicalLine.write(line[1:])
+ elif logicalLine.pos > 0:
+ yield logicalLine.getvalue(), lineStartNumber
+ lineStartNumber = lineNumber
+ logicalLine = newbuffer()
+ logicalLine.write(line)
+ else:
+ logicalLine = newbuffer()
+ logicalLine.write(line)
+
+ # hack to deal with the fact that vCard 2.1 allows parameters to be
+ # encoded without a parameter name. False positives are unlikely, but
+ # possible.
+ val = logicalLine.getvalue()
+ if val[-1]=='=' and val.lower().find('quoted-printable') >= 0:
+ quotedPrintable=True
+
+ if logicalLine.pos > 0:
+ yield logicalLine.getvalue(), lineStartNumber
+
+
+def textLineToContentLine(text, n=None):
+ return ContentLine(*parseLine(text, n), **{'encoded':True, 'lineNumber' : n})
+
+
+def dquoteEscape(param):
+ """Return param, or "param" if ',' or ';' or ':' is in param."""
+ if param.find('"') >= 0:
+ raise VObjectError("Double quotes aren't allowed in parameter values.")
+ for char in ',;:':
+ if param.find(char) >= 0:
+ return '"'+ param + '"'
+ return param
+
+def foldOneLine(outbuf, input, lineLength = 75):
+ # Folding line procedure that ensures multi-byte utf-8 sequences are not broken
+ # across lines
+
+ if len(input) < lineLength:
+ # Optimize for unfolded line case
+ outbuf.write(input)
+ else:
+ # Look for valid utf8 range and write that out
+ start = 0
+ written = 0
+ while written < len(input):
+ # Start max length -1 chars on from where we are
+ offset = start + lineLength - 1
+ if offset >= len(input):
+ line = input[start:]
+ outbuf.write(line)
+ written = len(input)
+ else:
+ # Check whether next char is valid utf8 lead byte
+ while (input[offset] > 0x7F) and ((ord(input[offset]) & 0xC0) == 0x80):
+ # Step back until we have a valid char
+ offset -= 1
+
+ line = input[start:offset]
+ outbuf.write(line)
+ outbuf.write("\r\n ")
+ written += offset - start
+ start = offset
+ outbuf.write("\r\n")
+
+def defaultSerialize(obj, buf, lineLength):
+ """Encode and fold obj and its children, write to buf or return a string."""
+
+ outbuf = buf or cStringIO.StringIO()
+
+ if isinstance(obj, Component):
+ if obj.group is None:
+ groupString = ''
+ else:
+ groupString = obj.group + '.'
+ if obj.useBegin:
+ foldOneLine(outbuf, str(groupString + u"BEGIN:" + obj.name), lineLength)
+ for child in obj.getSortedChildren():
+ #validate is recursive, we only need to validate once
+ child.serialize(outbuf, lineLength, validate=False)
+ if obj.useBegin:
+ foldOneLine(outbuf, str(groupString + u"END:" + obj.name), lineLength)
+
+ elif isinstance(obj, ContentLine):
+ startedEncoded = obj.encoded
+ if obj.behavior and not startedEncoded: obj.behavior.encode(obj)
+ s=codecs.getwriter('utf-8')(cStringIO.StringIO()) #unfolded buffer
+ if obj.group is not None:
+ s.write(obj.group + '.')
+ s.write(obj.name.upper())
+ for key, paramvals in obj.params.iteritems():
+ s.write(';' + key + '=' + ','.join(dquoteEscape(p) for p in paramvals))
+ s.write(':' + obj.value)
+ if obj.behavior and not startedEncoded: obj.behavior.decode(obj)
+ foldOneLine(outbuf, s.getvalue(), lineLength)
+
+ return buf or outbuf.getvalue()
+
+
+testVCalendar="""
+BEGIN:VCALENDAR
+BEGIN:VEVENT
+SUMMARY;blah=hi!:Bastille Day Party
+END:VEVENT
+END:VCALENDAR"""
+
+class Stack:
+ def __init__(self):
+ self.stack = []
+ def __len__(self):
+ return len(self.stack)
+ def top(self):
+ if len(self) == 0: return None
+ else: return self.stack[-1]
+ def topName(self):
+ if len(self) == 0: return None
+ else: return self.stack[-1].name
+ def modifyTop(self, item):
+ top = self.top()
+ if top:
+ top.add(item)
+ else:
+ new = Component()
+ self.push(new)
+ new.add(item) #add sets behavior for item and children
+ def push(self, obj): self.stack.append(obj)
+ def pop(self): return self.stack.pop()
+
+
+def readComponents(streamOrString, validate=False, transform=True,
+ findBegin=True, ignoreUnreadable=False,
+ allowQP=False):
+ """Generate one Component at a time from a stream.
+
+ >>> import StringIO
+ >>> f = StringIO.StringIO(testVCalendar)
+ >>> cal=readComponents(f).next()
+ >>> cal
+ <VCALENDAR| [<VEVENT| [<SUMMARY{u'BLAH': [u'hi!']}Bastille Day Party>]>]>
+ >>> cal.vevent.summary
+ <SUMMARY{u'BLAH': [u'hi!']}Bastille Day Party>
+
+ """
+ if isinstance(streamOrString, basestring):
+ stream = StringIO.StringIO(streamOrString)
+ else:
+ stream = streamOrString
+
+ try:
+ stack = Stack()
+ versionLine = None
+ n = 0
+ for line, n in getLogicalLines(stream, allowQP, findBegin):
+ if ignoreUnreadable:
+ try:
+ vline = textLineToContentLine(line, n)
+ except VObjectError, e:
+ if e.lineNumber is not None:
+ msg = "Skipped line %(lineNumber)s, message: %(msg)s"
+ else:
+ msg = "Skipped a line, message: %(msg)s"
+ logger.error(msg % {'lineNumber' : e.lineNumber,
+ 'msg' : e.message})
+ continue
+ else:
+ vline = textLineToContentLine(line, n)
+ if vline.name == "VERSION":
+ versionLine = vline
+ stack.modifyTop(vline)
+ elif vline.name == "BEGIN":
+ stack.push(Component(vline.value, group=vline.group))
+ elif vline.name == "PROFILE":
+ if not stack.top(): stack.push(Component())
+ stack.top().setProfile(vline.value)
+ elif vline.name == "END":
+ if len(stack) == 0:
+ err = "Attempted to end the %s component, \
+ but it was never opened" % vline.value
+ raise ParseError(err, n)
+ if vline.value.upper() == stack.topName(): #START matches END
+ if len(stack) == 1:
+ component=stack.pop()
+ if versionLine is not None:
+ component.setBehaviorFromVersionLine(versionLine)
+ if validate: component.validate(raiseException=True)
+ if transform: component.transformChildrenToNative()
+ yield component #EXIT POINT
+ else: stack.modifyTop(stack.pop())
+ else:
+ err = "%s component wasn't closed"
+ raise ParseError(err % stack.topName(), n)
+ else: stack.modifyTop(vline) #not a START or END line
+ if stack.top():
+ if stack.topName() is None:
+ logger.warning("Top level component was never named")
+ elif stack.top().useBegin:
+ raise ParseError("Component %s was never closed" % (stack.topName()), n)
+ yield stack.pop()
+
+ except ParseError, e:
+ e.input = streamOrString
+ raise
+
+
+def readOne(stream, validate=False, transform=True, findBegin=True,
+ ignoreUnreadable=False, allowQP=False):
+ """Return the first component from stream."""
+ return readComponents(stream, validate, transform, findBegin,
+ ignoreUnreadable, allowQP).next()
+
+#--------------------------- version registry ----------------------------------
+__behaviorRegistry={}
+
+def registerBehavior(behavior, name=None, default=False, id=None):
+ """Register the given behavior.
+
+ If default is True (or if this is the first version registered with this
+ name), the version will be the default if no id is given.
+
+ """
+ if not name: name=behavior.name.upper()
+ if id is None: id=behavior.versionString
+ if name in __behaviorRegistry:
+ if default:
+ __behaviorRegistry[name].insert(0, (id, behavior))
+ else:
+ __behaviorRegistry[name].append((id, behavior))
+ else:
+ __behaviorRegistry[name]=[(id, behavior)]
+
+def getBehavior(name, id=None):
+ """Return a matching behavior if it exists, or None.
+
+ If id is None, return the default for name.
+
+ """
+ name=name.upper()
+ if name in __behaviorRegistry:
+ if id:
+ for n, behavior in __behaviorRegistry[name]:
+ if n==id:
+ return behavior
+ else:
+ return __behaviorRegistry[name][0][1]
+ return None
+
+def newFromBehavior(name, id=None):
+ """Given a name, return a behaviored ContentLine or Component."""
+ name = name.upper()
+ behavior = getBehavior(name, id)
+ if behavior is None:
+ raise VObjectError("No behavior found named %s" % name)
+ if behavior.isComponent:
+ obj = Component(name)
+ else:
+ obj = ContentLine(name, [], '')
+ obj.behavior = behavior
+ obj.isNative = False
+ return obj
+
+
+#--------------------------- Helper function -----------------------------------
+def backslashEscape(s):
+ s=s.replace("\\","\\\\").replace(";","\;").replace(",","\,")
+ return s.replace("\r\n", "\\n").replace("\n","\\n").replace("\r","\\n")
+
+#------------------- Testing and running functions -----------------------------
+if __name__ == '__main__':
+ import tests
+ tests._test()