diff options
author | David Drysdale <dmd@lurklurk.org> | 2013-10-10 12:01:32 +0100 |
---|---|---|
committer | David Drysdale <dmd@lurklurk.org> | 2013-10-10 12:32:55 +0100 |
commit | 077f8a975e52ae09df630ecfb2abe098042af0ca (patch) | |
tree | 89021da27c58c5456d18222f0715d3fdd98c4759 /tools | |
parent | a623ded7f038fb35d8f69c4554c71194d3c99afb (diff) |
Add support for flat (not per-locale) prefix data, and for multiple values.
Diffstat (limited to 'tools')
-rwxr-xr-x | tools/python/buildprefixdata.py | 64 |
1 files changed, 51 insertions, 13 deletions
diff --git a/tools/python/buildprefixdata.py b/tools/python/buildprefixdata.py index 5ade0e4c..10c1ae2e 100755 --- a/tools/python/buildprefixdata.py +++ b/tools/python/buildprefixdata.py @@ -2,10 +2,15 @@ """Script to read the libphonenumber per-prefix metadata and generate Python code. Invocation: - buildprefixdata.py indir outfile + buildprefixdata.py [options] input outfile Processes all of the per-prefix data under the given input directory and emit generated Python code. + +Options: + --var XXX : use this prefix for variable names in generated code + --flat : don't do per-locale processing + --sep C : expect metadata to be a list with C as separator """ # Based on original metadata data files from libphonenumber: @@ -54,7 +59,12 @@ COMMENT_LINE_RE = re.compile(r'^\s*#.*$', re.UNICODE) DATA_LINE_RE = re.compile(r'^\+?(?P<prefix>\d+)\|(?P<stringdata>.*)$', re.UNICODE) # Boilerplate header -PREFIXDATA_FILE_PROLOG = '''"""Per-prefix data, mapping each prefix to a dict of locale:name. +PREFIXDATA_LOCALE_FILE_PROLOG = '''"""Per-prefix data, mapping each prefix to a dict of locale:name. + +Auto-generated file, do not edit by hand. +""" +''' +PREFIXDATA_FILE_PROLOG = '''"""Per-prefix data, mapping each prefix to a name. Auto-generated file, do not edit by hand. """ @@ -77,7 +87,7 @@ COPYRIGHT_NOTICE = """# Copyright (C) 2011-%s The Libphonenumber Authors """ % datetime.datetime.now().year -def load_locale_prefixdata_file(prefixdata, filename, locale, overall_prefix): +def load_locale_prefixdata_file(prefixdata, filename, locale=None, overall_prefix=None, separator=None): """Load per-prefix data from the given file, for the given locale and prefix. We assume that this file: @@ -85,6 +95,13 @@ def load_locale_prefixdata_file(prefixdata, filename, locale, overall_prefix): - may have comment lines (starting with #) and blank lines - has data lines of the form '<prefix>|<stringdata>' - contains only data for prefixes that are extensions of the filename. + + If overall_prefix is specified, lines are checked to ensure their prefix falls within this value. + + If locale is specified, prefixdata[prefix][locale] is filled in; otherwise, just prefixdata[prefix]. + + If separator is specified, the string data will be split on this separator, and the output values + in the dict will be tuples of strings rather than strings. """ with open(filename, "rb") as infile: lineno = 0 @@ -98,12 +115,17 @@ def load_locale_prefixdata_file(prefixdata, filename, locale, overall_prefix): if stringdata != stringdata.rstrip(): print ("%s:%d: Warning: stripping trailing whitespace" % (filename, lineno)) stringdata = stringdata.rstrip() - if not prefix.startswith(overall_prefix): + if overall_prefix is not None and not prefix.startswith(overall_prefix): raise Exception("%s:%d: Prefix %s is not within %s" % (filename, lineno, prefix, overall_prefix)) + if separator is not None: + stringdata = tuple(stringdata.split(separator)) if prefix not in prefixdata: prefixdata[prefix] = {} - prefixdata[prefix][locale] = stringdata + if locale is not None: + prefixdata[prefix][locale] = stringdata + else: + prefixdata[prefix] = stringdata elif BLANK_LINE_RE.match(uline): pass elif COMMENT_LINE_RE.match(uline): @@ -113,7 +135,7 @@ def load_locale_prefixdata_file(prefixdata, filename, locale, overall_prefix): (filename, lineno, line)) -def load_locale_prefixdata(indir): +def load_locale_prefixdata(indir, separator=None): """Load per-prefix data from the given top-level directory. Prefix data is assumed to be held in files <indir>/<locale>/<prefix>.txt. @@ -126,7 +148,7 @@ def load_locale_prefixdata(indir): continue for filename in glob.glob(os.path.join(indir, locale, "*%s" % PREFIXDATA_SUFFIX)): overall_prefix, ext = os.path.splitext(os.path.basename(filename)) - load_locale_prefixdata_file(prefixdata, filename, locale, overall_prefix) + load_locale_prefixdata_file(prefixdata, filename, locale, overall_prefix, separator) return prefixdata @@ -138,17 +160,23 @@ def _stable_dict_repr(strdict): return "{%s}" % ", ".join(lines) -def output_prefixdata_code(prefixdata, outfilename, varprefix): +def output_prefixdata_code(prefixdata, outfilename, varprefix, per_locale): """Output the per-prefix data in Python form to the given file """ with open(outfilename, "w") as outfile: longest_prefix = 0 - prnt(PREFIXDATA_FILE_PROLOG, file=outfile) + if per_locale: + prnt(PREFIXDATA_LOCALE_FILE_PROLOG, file=outfile) + else: + prnt(PREFIXDATA_FILE_PROLOG, file=outfile) prnt(COPYRIGHT_NOTICE, file=outfile) prnt("%s_DATA = {" % varprefix, file=outfile) for prefix in sorted(prefixdata.keys()): if len(prefix) > longest_prefix: longest_prefix = len(prefix) - prnt(" '%s':%s," % (prefix, _stable_dict_repr(prefixdata[prefix])), file=outfile) + if per_locale: + prnt(" '%s':%s," % (prefix, _stable_dict_repr(prefixdata[prefix])), file=outfile) + else: + prnt(" '%s':%r," % (prefix, prefixdata[prefix]), file=outfile) prnt("}", file=outfile) prnt("%s_LONGEST_PREFIX = %d" % (varprefix, longest_prefix), file=outfile) @@ -156,8 +184,10 @@ def output_prefixdata_code(prefixdata, outfilename, varprefix): def _standalone(argv): """Parse the given input directory and emit generated code.""" varprefix = "GEOCODE" + per_locale = True + separator = None try: - opts, args = getopt.getopt(argv, "hv:", ("help", "var=")) + opts, args = getopt.getopt(argv, "hv:fs:", ("help", "var=", "flat", "sep=")) except getopt.GetoptError: prnt(__doc__, file=sys.stderr) sys.exit(1) @@ -167,6 +197,10 @@ def _standalone(argv): sys.exit(1) elif opt in ("-v", "--var"): varprefix = arg + elif opt in ("-f", "--flat"): + per_locale = False + elif opt in ("-s", "--sep"): + separator = arg else: prnt("Unknown option %s" % opt, file=sys.stderr) prnt(__doc__, file=sys.stderr) @@ -174,8 +208,12 @@ def _standalone(argv): if len(args) != 2: prnt(__doc__, file=sys.stderr) sys.exit(1) - prefixdata = load_locale_prefixdata(args[0]) - output_prefixdata_code(prefixdata, args[1], varprefix) + if per_locale: + prefixdata = load_locale_prefixdata(args[0], separator=separator) + else: + prefixdata = {} + load_locale_prefixdata_file(prefixdata, args[0], separator=separator) + output_prefixdata_code(prefixdata, args[1], varprefix, per_locale) if __name__ == "__main__": |