diff options
Diffstat (limited to 'ofxparse/ofxparse.py')
-rw-r--r-- | ofxparse/ofxparse.py | 337 |
1 files changed, 250 insertions, 87 deletions
diff --git a/ofxparse/ofxparse.py b/ofxparse/ofxparse.py index a66df88..5686030 100644 --- a/ofxparse/ofxparse.py +++ b/ofxparse/ofxparse.py @@ -22,13 +22,37 @@ else: from . import mcc + +def skip_headers(fh): + ''' + Prepare `fh` for parsing by BeautifulSoup by skipping its OFX + headers. + ''' + if fh is None or isinstance(fh, six.string_types): + return + fh.seek(0) + header_re = re.compile(r"^\s*\w+:\s*\w+\s*$") + while True: + pos = fh.tell() + line = fh.readline() + if not line: + break + if header_re.search(line) is None: + fh.seek(pos) + return + + def soup_maker(fh): + skip_headers(fh) try: from bs4 import BeautifulSoup - return BeautifulSoup(fh) + soup = BeautifulSoup(fh, "xml") + for tag in soup.findAll(): + tag.name = tag.name.lower() except ImportError: from BeautifulSoup import BeautifulStoneSoup - return BeautifulStoneSoup(fh) + soup = BeautifulStoneSoup(fh) + return soup def try_decode(string, encoding): @@ -36,11 +60,13 @@ def try_decode(string, encoding): string = string.decode(encoding) return string + def is_iterable(candidate): - if sys.version_info < (2,6): + if sys.version_info < (2, 6): return hasattr(candidate, 'next') return isinstance(candidate, collections.Iterable) + @contextlib.contextmanager def save_pos(fh): """ @@ -54,6 +80,7 @@ def save_pos(fh): finally: fh.seek(orig_pos) + class OfxFile(object): def __init__(self, fh): """ @@ -76,7 +103,7 @@ class OfxFile(object): head_data = self.fh.read(1024 * 10) head_data = head_data[:head_data.find(six.b('<'))] - for line in re.split(six.b('\r?\n?'), head_data): + for line in head_data.splitlines(): # Newline? if line.strip() == six.b(""): break @@ -138,7 +165,7 @@ class OfxFile(object): class OfxPreprocessedFile(OfxFile): def __init__(self, fh): - super(OfxPreprocessedFile,self).__init__(fh) + super(OfxPreprocessedFile, self).__init__(fh) if self.fh is None: return @@ -146,19 +173,21 @@ class OfxPreprocessedFile(OfxFile): ofx_string = self.fh.read() # find all closing tags as hints - closing_tags = [ t.upper() for t in re.findall(r'(?i)</([a-z0-9_\.]+)>', ofx_string) ] + closing_tags = [t.upper() for t in re.findall(r'(?i)</([a-z0-9_\.]+)>', + ofx_string)] # close all tags that don't have closing tags and # leave all other data intact last_open_tag = None - tokens = re.split(r'(?i)(</?[a-z0-9_\.]+>)', ofx_string) - new_fh = StringIO() - for idx,token in enumerate(tokens): + tokens = re.split(r'(?i)(</?[a-z0-9_\.]+>)', ofx_string) + new_fh = StringIO() + for idx, token in enumerate(tokens): is_closing_tag = token.startswith('</') is_processing_tag = token.startswith('<?') is_cdata = token.startswith('<!') is_tag = token.startswith('<') and not is_cdata - is_open_tag = is_tag and not is_closing_tag and not is_processing_tag + is_open_tag = is_tag and not is_closing_tag \ + and not is_processing_tag if is_tag: if last_open_tag is not None: new_fh.write("</%s>" % last_open_tag) @@ -175,7 +204,8 @@ class OfxPreprocessedFile(OfxFile): class Ofx(object): def __str__(self): return "" -# headers = "\r\n".join(":".join(el if el else "NONE" for el in item) for item in six.iteritems(self.headers)) +# headers = "\r\n".join(":".join(el if el else "NONE" for el in item) +# for item in six.iteritems(self.headers)) # headers += "\r\n\r\n" # # return headers + str(self.signon) @@ -187,6 +217,7 @@ class AccountType(object): class Account(object): def __init__(self): + self.curdef = None self.statement = None self.account_id = '' self.routing_number = '' @@ -216,25 +247,52 @@ class Security: self.ticker = ticker self.memo = memo + class Signon: - def __init__(self, code, severity, message): - self.code = code - self.severity = severity - self.message = message - if int(code) == 0: + def __init__(self, keys): + self.code = keys['code'] + self.severity = keys['severity'] + self.message = keys['message'] + self.dtserver = keys['dtserver'] + self.language = keys['language'] + self.dtprofup = keys['dtprofup'] + self.fi_org = keys['org'] + self.fi_fid = keys['fid'] + self.intu_bid = keys['intu.bid'] + + if int(self.code) == 0: self.success = True else: self.success = False def __str__(self): - ret = "\t<SIGNONMSGSRSV1>\r\n" + "\t\t<SONRS>\r\n" + "\t\t\t<STATUS>\r\n" + ret = "\t<SIGNONMSGSRSV1>\r\n" + "\t\t<SONRS>\r\n" + \ + "\t\t\t<STATUS>\r\n" ret += "\t\t\t\t<CODE>%s\r\n" % self.code ret += "\t\t\t\t<SEVERITY>%s\r\n" % self.severity if self.message: ret += "\t\t\t\t<MESSAGE>%s\r\n" % self.message - ret += "\t\t\t</STATUS>\r\n" + "\t\t</SONRS>\r\n" + "\t</SIGNONMSGSRSV1>\r\n" + ret += "\t\t\t</STATUS>\r\n" + if self.dtserver is not None: + ret += "\t\t\t<DTSERVER>" + self.dtserver + "\r\n" + if self.language is not None: + ret += "\t\t\t<LANGUAGE>" + self.language + "\r\n" + if self.dtprofup is not None: + ret += "\t\t\t<DTPROFUP>" + self.dtprofup + "\r\n" + if (self.fi_org is not None) or (self.fi_fid is not None): + ret += "\t\t\t<FI>\r\n" + if self.fi_org is not None: + ret += "\t\t\t\t<ORG>" + self.fi_org + "\r\n" + if self.fi_fid is not None: + ret += "\t\t\t\t<FID>" + self.fi_fid + "\r\n" + ret += "\t\t\t</FI>\r\n" + if self.intu_bid is not None: + ret += "\t\t\t<INTU.BID>" + self.intu_bid + "\r\n" + ret += "\t\t</SONRS>\r\n" + ret += "\t</SIGNONMSGSRSV1>\r\n" return ret + class Statement(object): def __init__(self): self.start_date = '' @@ -272,20 +330,29 @@ class Transaction(object): class InvestmentTransaction(object): - (Unknown, BuyMF, SellMF, Reinvest, BuyStock, SellStock) = [x for x in range(-1, 5)] + AGGREGATE_TYPES = ['buydebt', 'buymf', 'buyopt', 'buyother', + 'buystock', 'closureopt', 'income', + 'invexpense', 'jrnlfund', 'jrnlsec', + 'margininterest', 'reinvest', 'retofcap', + 'selldebt', 'sellmf', 'sellopt', 'sellother', + 'sellstock', 'split', 'transfer'] + def __init__(self, type): - try: - self.type = ['buymf', 'sellmf', 'reinvest', 'buystock', 'sellstock'].index(type.lower()) - except ValueError: - self.type = InvestmentTransaction.Unknown + self.type = type.lower() self.tradeDate = None self.settleDate = None + self.memo = '' self.security = '' + self.income_type = '' self.units = decimal.Decimal(0) self.unit_price = decimal.Decimal(0) + self.commission = decimal.Decimal(0) + self.fees = decimal.Decimal(0) + self.total = decimal.Decimal(0) def __repr__(self): - return "<InvestmentTransaction type=" + str(self.type) + ", units=" + str(self.units) + ">" + return "<InvestmentTransaction type=" + str(self.type) + ", \ + units=" + str(self.units) + ">" class Position(object): @@ -321,8 +388,9 @@ class OfxParser(object): ''' cls_.fail_fast = fail_fast - if isinstance(file_handle, type('')): - raise RuntimeError(six.u("parse() takes in a file handle, not a string")) + if not hasattr(file_handle, 'seek'): + raise TypeError(six.u('parse() accepts a seek-able file handle\ + , not %s' % type(file_handle).__name__)) ofx_obj = Ofx() @@ -332,14 +400,30 @@ class OfxParser(object): ofx_obj.accounts = [] ofx_obj.signon = None + skip_headers(ofx_file.fh) ofx = soup_maker(ofx_file.fh) - if len(ofx.contents) == 0: + if ofx.find('ofx') is None: raise OfxParserException('The ofx file is empty!') sonrs_ofx = ofx.find('sonrs') if sonrs_ofx: ofx_obj.signon = cls_.parseSonrs(sonrs_ofx) + stmttrnrs = ofx.find('stmttrnrs') + if stmttrnrs: + stmttrnrs_trnuid = stmttrnrs.find('trnuid') + if stmttrnrs_trnuid: + ofx_obj.trnuid = stmttrnrs_trnuid.contents[0].strip() + + stmttrnrs_status = stmttrnrs.find('status') + if stmttrnrs_status: + ofx_obj.status = {} + ofx_obj.status['code'] = int( + stmttrnrs_status.find('code').contents[0].strip() + ) + ofx_obj.status['severity'] = \ + stmttrnrs_status.find('severity').contents[0].strip() + stmtrs_ofx = ofx.findAll('stmtrs') if stmtrs_ofx: ofx_obj.accounts += cls_.parseStmtrs(stmtrs_ofx, AccountType.Bank) @@ -386,14 +470,23 @@ class OfxParser(object): timeZoneOffset = datetime.timedelta(hours=tz) + res = re.search("^[0-9]*\.([0-9]{0,5})", ofxDateTime) + if res: + msec = datetime.timedelta(seconds=float("0." + res.group(1))) + else: + msec = datetime.timedelta(seconds=0) + try: local_date = datetime.datetime.strptime( ofxDateTime[:14], '%Y%m%d%H%M%S' ) - return local_date - timeZoneOffset + return local_date - timeZoneOffset + msec except: + if ofxDateTime[:8] == "00000000": + return None + return datetime.datetime.strptime( - ofxDateTime[:8], '%Y%m%d') - timeZoneOffset + ofxDateTime[:8], '%Y%m%d') - timeZoneOffset + msec @classmethod def parseAcctinfors(cls_, acctinfors_ofx, ofx): @@ -462,7 +555,12 @@ class OfxParser(object): name_tag = secinfo_ofx.find('secname') ticker_tag = secinfo_ofx.find('ticker') memo_tag = secinfo_ofx.find('memo') - if uniqueid_tag and name_tag and ticker_tag: + if uniqueid_tag and name_tag: + try: + ticker = ticker_tag.contents[0].strip() + except AttributeError: + # ticker can be empty + ticker = None try: memo = memo_tag.contents[0].strip() except AttributeError: @@ -471,7 +569,7 @@ class OfxParser(object): securityList.append( Security(uniqueid_tag.contents[0].strip(), name_tag.contents[0].strip(), - ticker_tag.contents[0].strip(), + ticker, memo)) return securityList @@ -483,10 +581,10 @@ class OfxParser(object): position.security = tag.contents[0].strip() tag = ofx.find('units') if (hasattr(tag, 'contents')): - position.units = decimal.Decimal(tag.contents[0].strip()) + position.units = cls_.toDecimal(tag) tag = ofx.find('unitprice') if (hasattr(tag, 'contents')): - position.unit_price = decimal.Decimal(tag.contents[0].strip()) + position.unit_price = cls_.toDecimal(tag) tag = ofx.find('dtpriceasof') if (hasattr(tag, 'contents')): try: @@ -521,12 +619,27 @@ class OfxParser(object): tag = ofx.find('uniqueid') if (hasattr(tag, 'contents')): transaction.security = tag.contents[0].strip() + tag = ofx.find('incometype') + if (hasattr(tag, 'contents')): + transaction.income_type = tag.contents[0].strip() tag = ofx.find('units') if (hasattr(tag, 'contents')): - transaction.units = decimal.Decimal(tag.contents[0].strip()) + transaction.units = cls_.toDecimal(tag) tag = ofx.find('unitprice') if (hasattr(tag, 'contents')): - transaction.unit_price = decimal.Decimal(tag.contents[0].strip()) + transaction.unit_price = cls_.toDecimal(tag) + tag = ofx.find('commission') + if (hasattr(tag, 'contents')): + transaction.commission = cls_.toDecimal(tag) + tag = ofx.find('fees') + if (hasattr(tag, 'contents')): + transaction.fees = cls_.toDecimal(tag) + tag = ofx.find('total') + if (hasattr(tag, 'contents')): + transaction.total = cls_.toDecimal(tag) + tag = ofx.find('inv401ksource') + if (hasattr(tag, 'contents')): + transaction.inv401ksource = tag.contents[0].strip() return transaction @classmethod @@ -548,7 +661,8 @@ class OfxParser(object): raise except ValueError: e = sys.exc_info()[1] - statement.warnings.append(six.u('Invalid start date: %s') % e) + statement.warnings.append(six.u('Invalid start date:\ + %s') % e) if cls_.fail_fast: raise @@ -561,7 +675,8 @@ class OfxParser(object): statement.warnings.append(six.u('Empty end date.')) except ValueError: e = sys.exc_info()[1] - statement.warnings.append(six.u('Invalid end date: %s') % e) + statement.warnings.append(six.u('Invalid end date: \ + %s') % e) if cls_.fail_fast: raise @@ -576,12 +691,11 @@ class OfxParser(object): if cls_.fail_fast: raise statement.discarded_entries.append( - {six.u('error'): six.u("Error parsing positions: ") + str(e), - six.u('content'): investment_ofx} + {six.u('error'): six.u("Error parsing positions: \ + ") + str(e), six.u('content'): investment_ofx} ) - for transaction_type in ['buymf', 'sellmf', 'reinvest', 'buystock', - 'sellstock', 'buyopt', 'sellopt']: + for transaction_type in InvestmentTransaction.AGGREGATE_TYPES: try: for investment_ofx in invstmtrs_ofx.findAll(transaction_type): statement.transactions.append( @@ -613,14 +727,28 @@ class OfxParser(object): @classmethod def parseSonrs(cls_, sonrs): - code = int(sonrs.find('code').contents[0].strip()) - severity = sonrs.find('severity').contents[0].strip() - try: - message = sonrs.find('message').contents[0].strip() - except: - message = '' + items = [ + 'code', + 'severity', + 'dtserver', + 'language', + 'dtprofup', + 'org', + 'fid', + 'intu.bid', + 'message' + ] + idict = {} + for i in items: + try: + idict[i] = sonrs.find(i).contents[0].strip() + except: + idict[i] = None + idict['code'] = int(idict['code']) + if idict['message'] is None: + idict['message'] = '' - return Signon(code,severity,message) + return Signon(idict) @classmethod def parseStmtrs(cls_, stmtrs_list, accountType): @@ -628,6 +756,9 @@ class OfxParser(object): ret = [] for stmtrs_ofx in stmtrs_list: account = Account() + act_curdef = stmtrs_ofx.find('curdef') + if act_curdef: + account.curdef = act_curdef.contents[0].strip() acctid_tag = stmtrs_ofx.find('acctid') if hasattr(acctid_tag, 'contents'): account.account_id = acctid_tag.contents[0].strip() @@ -648,6 +779,41 @@ class OfxParser(object): return ret @classmethod + def parseBalance(cls_, statement, stmt_ofx, bal_tag_name, bal_attr, + bal_date_attr, bal_type_string): + bal_tag = stmt_ofx.find(bal_tag_name) + if hasattr(bal_tag, "contents"): + balamt_tag = bal_tag.find('balamt') + dtasof_tag = bal_tag.find('dtasof') + if hasattr(balamt_tag, "contents"): + try: + setattr(statement, bal_attr, cls_.toDecimal(balamt_tag)) + except (IndexError, decimal.InvalidOperation): + ex = sys.exc_info()[1] + statement.warnings.append( + six.u("%s balance amount was empty for \ + %s") % (bal_type_string, stmt_ofx)) + if cls_.fail_fast: + raise OfxParserException("Empty %s balance\ + " % bal_type_string) + if hasattr(dtasof_tag, "contents"): + try: + setattr(statement, bal_date_attr, cls_.parseOfxDateTime( + dtasof_tag.contents[0].strip())) + except IndexError: + statement.warnings.append( + six.u("%s balance date was empty for %s\ + ") % (bal_type_string, stmt_ofx)) + if cls_.fail_fast: + raise + except ValueError: + statement.warnings.append( + six.u("%s balance date was not allowed for \ + %s") % (bal_type_string, stmt_ofx)) + if cls_.fail_fast: + raise + + @classmethod def parseStatement(cls_, stmt_ofx): ''' Parse a statement in ofx-land and return a Statement object. @@ -665,7 +831,8 @@ class OfxParser(object): raise except ValueError: statement.warnings.append( - six.u("Statement start date was not allowed for %s") % stmt_ofx) + six.u("Statement start date was not allowed for \ + %s") % stmt_ofx) if cls_.fail_fast: raise @@ -682,13 +849,14 @@ class OfxParser(object): except ValueError: ve = sys.exc_info()[1] msg = six.u("Statement start date was not formatted " - "correctly for %s") + "correctly for %s") statement.warnings.append(msg % stmt_ofx) if cls_.fail_fast: raise except TypeError: statement.warnings.append( - six.u("Statement start date was not allowed for %s") % stmt_ofx) + six.u("Statement start date was not allowed for \ + %s") % stmt_ofx) if cls_.fail_fast: raise @@ -702,33 +870,11 @@ class OfxParser(object): if cls_.fail_fast: raise - ledger_bal_tag = stmt_ofx.find('ledgerbal') - if hasattr(ledger_bal_tag, "contents"): - balamt_tag = ledger_bal_tag.find('balamt') - if hasattr(balamt_tag, "contents"): - try: - statement.balance = decimal.Decimal( - balamt_tag.contents[0].strip()) - except (IndexError, decimal.InvalidOperation): - ex = sys.exc_info()[1] - statement.warnings.append( - six.u("Ledger balance amount was empty for %s") % stmt_ofx) - if cls_.fail_fast: - raise OfxParserException("Empty ledger balance") + cls_.parseBalance(statement, stmt_ofx, 'ledgerbal', + 'balance', 'balance_date', 'ledger') - avail_bal_tag = stmt_ofx.find('availbal') - if hasattr(avail_bal_tag, "contents"): - balamt_tag = avail_bal_tag.find('balamt') - if hasattr(balamt_tag, "contents"): - try: - statement.available_balance = decimal.Decimal( - balamt_tag.contents[0].strip()) - except (IndexError, decimal.InvalidOperation): - ex = sys.exc_info()[1] - msg = six.u("Available balance amount was empty for %s") - statement.warnings.append(msg % stmt_ofx) - if cls_.fail_fast: - raise OfxParserException("Empty available balance") + cls_.parseBalance(statement, stmt_ofx, 'availbal', 'available_balance', + 'available_balance_date', 'ledger') for transaction_ofx in stmt_ofx.findAll('stmttrn'): try: @@ -783,13 +929,17 @@ class OfxParser(object): amt_tag = txn_ofx.find('trnamt') if hasattr(amt_tag, "contents"): try: - transaction.amount = decimal.Decimal( - amt_tag.contents[0].strip()) + transaction.amount = cls_.toDecimal(amt_tag) except IndexError: raise OfxParserException("Invalid Transaction Date") except decimal.InvalidOperation: - raise OfxParserException( - six.u("Invalid Transaction Amount: '%s'") % amt_tag.contents[0]) + # Some banks use a null transaction for including interest + # rate changes on your statement. + if amt_tag.contents[0].strip() in ('null', '-null'): + transaction.amount = 0 + else: + raise OfxParserException( + six.u("Invalid Transaction Amount: '%s'") % amt_tag.contents[0]) except TypeError: raise OfxParserException( six.u("No Transaction Amount (a required field)")) @@ -819,24 +969,29 @@ class OfxParser(object): try: transaction.id = id_tag.contents[0].strip() except IndexError: - raise OfxParserException(six.u("Empty FIT id (a required field)")) + raise OfxParserException(six.u("Empty FIT id (a required \ + field)")) except TypeError: raise OfxParserException(six.u("No FIT id (a required field)")) else: - raise OfxParserException(six.u("Missing FIT id (a required field)")) + raise OfxParserException(six.u("Missing FIT id (a required \ + field)")) sic_tag = txn_ofx.find('sic') if hasattr(sic_tag, 'contents'): try: transaction.sic = sic_tag.contents[0].strip() except IndexError: - raise OfxParserException(six.u("Empty transaction Standard Industry Code (SIC)")) + raise OfxParserException(six.u("Empty transaction Standard \ + Industry Code (SIC)")) if transaction.sic is not None and transaction.sic in mcc.codes: try: - transaction.mcc = mcc.codes.get(transaction.sic, '').get('combined description') + transaction.mcc = mcc.codes.get(transaction.sic, '').get('combined \ + description') except IndexError: - raise OfxParserException(six.u("Empty transaction Merchant Category Code (MCC)")) + raise OfxParserException(six.u("Empty transaction Merchant Category \ + Code (MCC)")) except AttributeError: if cls._fail_fast: raise @@ -846,6 +1001,14 @@ class OfxParser(object): try: transaction.checknum = checknum_tag.contents[0].strip() except IndexError: - raise OfxParserException(six.u("Empty Check (or other reference) number")) + raise OfxParserException(six.u("Empty Check (or other reference) \ + number")) return transaction + + @classmethod + def toDecimal(cls_, tag): + d = tag.contents[0].strip() + if '.' not in d and ',' in d: + d = d.replace(',', '.') + return decimal.Decimal(d) |