summaryrefslogtreecommitdiff
path: root/tools
diff options
context:
space:
mode:
authorDavid Drysdale <dmd@lurklurk.org>2013-10-10 12:01:32 +0100
committerDavid Drysdale <dmd@lurklurk.org>2013-10-10 12:32:55 +0100
commit077f8a975e52ae09df630ecfb2abe098042af0ca (patch)
tree89021da27c58c5456d18222f0715d3fdd98c4759 /tools
parenta623ded7f038fb35d8f69c4554c71194d3c99afb (diff)
Add support for flat (not per-locale) prefix data, and for multiple values.
Diffstat (limited to 'tools')
-rwxr-xr-xtools/python/buildprefixdata.py64
1 files changed, 51 insertions, 13 deletions
diff --git a/tools/python/buildprefixdata.py b/tools/python/buildprefixdata.py
index 5ade0e4c..10c1ae2e 100755
--- a/tools/python/buildprefixdata.py
+++ b/tools/python/buildprefixdata.py
@@ -2,10 +2,15 @@
"""Script to read the libphonenumber per-prefix metadata and generate Python code.
Invocation:
- buildprefixdata.py indir outfile
+ buildprefixdata.py [options] input outfile
Processes all of the per-prefix data under the given input directory and emit
generated Python code.
+
+Options:
+ --var XXX : use this prefix for variable names in generated code
+ --flat : don't do per-locale processing
+ --sep C : expect metadata to be a list with C as separator
"""
# Based on original metadata data files from libphonenumber:
@@ -54,7 +59,12 @@ COMMENT_LINE_RE = re.compile(r'^\s*#.*$', re.UNICODE)
DATA_LINE_RE = re.compile(r'^\+?(?P<prefix>\d+)\|(?P<stringdata>.*)$', re.UNICODE)
# Boilerplate header
-PREFIXDATA_FILE_PROLOG = '''"""Per-prefix data, mapping each prefix to a dict of locale:name.
+PREFIXDATA_LOCALE_FILE_PROLOG = '''"""Per-prefix data, mapping each prefix to a dict of locale:name.
+
+Auto-generated file, do not edit by hand.
+"""
+'''
+PREFIXDATA_FILE_PROLOG = '''"""Per-prefix data, mapping each prefix to a name.
Auto-generated file, do not edit by hand.
"""
@@ -77,7 +87,7 @@ COPYRIGHT_NOTICE = """# Copyright (C) 2011-%s The Libphonenumber Authors
""" % datetime.datetime.now().year
-def load_locale_prefixdata_file(prefixdata, filename, locale, overall_prefix):
+def load_locale_prefixdata_file(prefixdata, filename, locale=None, overall_prefix=None, separator=None):
"""Load per-prefix data from the given file, for the given locale and prefix.
We assume that this file:
@@ -85,6 +95,13 @@ def load_locale_prefixdata_file(prefixdata, filename, locale, overall_prefix):
- may have comment lines (starting with #) and blank lines
- has data lines of the form '<prefix>|<stringdata>'
- contains only data for prefixes that are extensions of the filename.
+
+ If overall_prefix is specified, lines are checked to ensure their prefix falls within this value.
+
+ If locale is specified, prefixdata[prefix][locale] is filled in; otherwise, just prefixdata[prefix].
+
+ If separator is specified, the string data will be split on this separator, and the output values
+ in the dict will be tuples of strings rather than strings.
"""
with open(filename, "rb") as infile:
lineno = 0
@@ -98,12 +115,17 @@ def load_locale_prefixdata_file(prefixdata, filename, locale, overall_prefix):
if stringdata != stringdata.rstrip():
print ("%s:%d: Warning: stripping trailing whitespace" % (filename, lineno))
stringdata = stringdata.rstrip()
- if not prefix.startswith(overall_prefix):
+ if overall_prefix is not None and not prefix.startswith(overall_prefix):
raise Exception("%s:%d: Prefix %s is not within %s" %
(filename, lineno, prefix, overall_prefix))
+ if separator is not None:
+ stringdata = tuple(stringdata.split(separator))
if prefix not in prefixdata:
prefixdata[prefix] = {}
- prefixdata[prefix][locale] = stringdata
+ if locale is not None:
+ prefixdata[prefix][locale] = stringdata
+ else:
+ prefixdata[prefix] = stringdata
elif BLANK_LINE_RE.match(uline):
pass
elif COMMENT_LINE_RE.match(uline):
@@ -113,7 +135,7 @@ def load_locale_prefixdata_file(prefixdata, filename, locale, overall_prefix):
(filename, lineno, line))
-def load_locale_prefixdata(indir):
+def load_locale_prefixdata(indir, separator=None):
"""Load per-prefix data from the given top-level directory.
Prefix data is assumed to be held in files <indir>/<locale>/<prefix>.txt.
@@ -126,7 +148,7 @@ def load_locale_prefixdata(indir):
continue
for filename in glob.glob(os.path.join(indir, locale, "*%s" % PREFIXDATA_SUFFIX)):
overall_prefix, ext = os.path.splitext(os.path.basename(filename))
- load_locale_prefixdata_file(prefixdata, filename, locale, overall_prefix)
+ load_locale_prefixdata_file(prefixdata, filename, locale, overall_prefix, separator)
return prefixdata
@@ -138,17 +160,23 @@ def _stable_dict_repr(strdict):
return "{%s}" % ", ".join(lines)
-def output_prefixdata_code(prefixdata, outfilename, varprefix):
+def output_prefixdata_code(prefixdata, outfilename, varprefix, per_locale):
"""Output the per-prefix data in Python form to the given file """
with open(outfilename, "w") as outfile:
longest_prefix = 0
- prnt(PREFIXDATA_FILE_PROLOG, file=outfile)
+ if per_locale:
+ prnt(PREFIXDATA_LOCALE_FILE_PROLOG, file=outfile)
+ else:
+ prnt(PREFIXDATA_FILE_PROLOG, file=outfile)
prnt(COPYRIGHT_NOTICE, file=outfile)
prnt("%s_DATA = {" % varprefix, file=outfile)
for prefix in sorted(prefixdata.keys()):
if len(prefix) > longest_prefix:
longest_prefix = len(prefix)
- prnt(" '%s':%s," % (prefix, _stable_dict_repr(prefixdata[prefix])), file=outfile)
+ if per_locale:
+ prnt(" '%s':%s," % (prefix, _stable_dict_repr(prefixdata[prefix])), file=outfile)
+ else:
+ prnt(" '%s':%r," % (prefix, prefixdata[prefix]), file=outfile)
prnt("}", file=outfile)
prnt("%s_LONGEST_PREFIX = %d" % (varprefix, longest_prefix), file=outfile)
@@ -156,8 +184,10 @@ def output_prefixdata_code(prefixdata, outfilename, varprefix):
def _standalone(argv):
"""Parse the given input directory and emit generated code."""
varprefix = "GEOCODE"
+ per_locale = True
+ separator = None
try:
- opts, args = getopt.getopt(argv, "hv:", ("help", "var="))
+ opts, args = getopt.getopt(argv, "hv:fs:", ("help", "var=", "flat", "sep="))
except getopt.GetoptError:
prnt(__doc__, file=sys.stderr)
sys.exit(1)
@@ -167,6 +197,10 @@ def _standalone(argv):
sys.exit(1)
elif opt in ("-v", "--var"):
varprefix = arg
+ elif opt in ("-f", "--flat"):
+ per_locale = False
+ elif opt in ("-s", "--sep"):
+ separator = arg
else:
prnt("Unknown option %s" % opt, file=sys.stderr)
prnt(__doc__, file=sys.stderr)
@@ -174,8 +208,12 @@ def _standalone(argv):
if len(args) != 2:
prnt(__doc__, file=sys.stderr)
sys.exit(1)
- prefixdata = load_locale_prefixdata(args[0])
- output_prefixdata_code(prefixdata, args[1], varprefix)
+ if per_locale:
+ prefixdata = load_locale_prefixdata(args[0], separator=separator)
+ else:
+ prefixdata = {}
+ load_locale_prefixdata_file(prefixdata, args[0], separator=separator)
+ output_prefixdata_code(prefixdata, args[1], varprefix, per_locale)
if __name__ == "__main__":