summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid Drysdale <dmd@lurklurk.org>2013-08-16 14:41:24 +0100
committerDavid Drysdale <dmd@lurklurk.org>2013-08-16 15:00:49 +0100
commit2ea483bfa9ac19144183dadaa7cafb2772a72bfe (patch)
treefd8d61e092ffc8fee3cc43ffec8667b01c147579
parentc265fd4d3ed2dcf25c11a5185298be5d37a09505 (diff)
Add LOCALE_NORMALIZATION_MAP for locale equivalences
-rw-r--r--python/phonenumbers/geocoder.py39
-rw-r--r--python/tests/geocodertest.py12
2 files changed, 44 insertions, 7 deletions
diff --git a/python/phonenumbers/geocoder.py b/python/phonenumbers/geocoder.py
index 521ba5df..4297de92 100644
--- a/python/phonenumbers/geocoder.py
+++ b/python/phonenumbers/geocoder.py
@@ -64,6 +64,9 @@ except ImportError: # pragma no cover
raise
+_LOCALE_NORMALIZATION_MAP = {"zh_TW": "zh_Hant", "zh_HK": "zh_Hant", "zh_MO": "zh_Hant"}
+
+
def _may_fall_back_to_english(lang):
# Don't fall back to English if the requested language is among the following:
# - Chinese
@@ -72,16 +75,38 @@ def _may_fall_back_to_english(lang):
return lang != "zh" and lang != "ja" and lang != "ko"
+def _full_locale(lang, script, region):
+ if script is not None:
+ if region is not None:
+ return "%s_%s_%s" % (lang, script, region)
+ else:
+ return "%s_%s" % (lang, script)
+ elif region is not None:
+ return "%s_%s" % (lang, region)
+ else:
+ return lang
+
+
def _find_lang(langdict, lang, script, region):
"""Return the entry in the dictionary for the given language information."""
- # First look for lang, script as a combination
- lang_script = "%s_%s" % (lang, script)
- if lang_script in langdict:
- return langdict[lang_script]
+ # Check if we should map this to a different locale.
+ full_locale = _full_locale(lang, script, region)
+ if (full_locale in _LOCALE_NORMALIZATION_MAP and
+ _LOCALE_NORMALIZATION_MAP[full_locale] in langdict):
+ return langdict[_LOCALE_NORMALIZATION_MAP[full_locale]]
+ # First look for the full locale
+ if full_locale in langdict:
+ return langdict[full_locale]
+ # Then look for lang, script as a combination
+ if script is not None:
+ lang_script = "%s_%s" % (lang, script)
+ if lang_script in langdict:
+ return langdict[lang_script]
# Next look for lang, region as a combination
- lang_region = "%s_%s" % (lang, region)
- if lang_region in langdict:
- return langdict[lang_region]
+ if region is not None:
+ lang_region = "%s_%s" % (lang, region)
+ if lang_region in langdict:
+ return langdict[lang_region]
# Fall back to bare language code lookup
if lang in langdict:
return langdict[lang]
diff --git a/python/tests/geocodertest.py b/python/tests/geocodertest.py
index 4a9ecfe1..9cd136c3 100644
--- a/python/tests/geocodertest.py
+++ b/python/tests/geocodertest.py
@@ -169,6 +169,7 @@ class PhoneNumberGeocoderTest(unittest.TestCase):
TEST_GEOCODE_DATA['1650960'] = {'en': u'Mountain View, CA',
"en_GB": u'Mountain View California',
"en_US": u'Mountain View, Sunny California',
+ "en_Xyzz_US": u'MTV - xyzz',
"en_Latn": u'MountainView'}
# The following test might one day return "Mountain View California"
self.assertEqual("United States",
@@ -179,7 +180,18 @@ class PhoneNumberGeocoderTest(unittest.TestCase):
description_for_number(US_NUMBER2, _ENGLISH, script="Latn"))
self.assertEqual("United States",
description_for_number(US_NUMBER2, _ENGLISH, script="Latn", region="GB"))
+ self.assertEqual("MTV - xyzz",
+ description_for_number(US_NUMBER2, _ENGLISH, script="Xyzz", region="US"))
+ self.assertEqual("Mountain View, Sunny California",
+ description_for_number(US_NUMBER2, _ENGLISH, script="Zazz", region="US"))
# Get a different result when there is a script-specific variant
self.assertEqual("MountainView",
description_for_number(US_NUMBER2, _ENGLISH, script="Latn", region="US"))
TEST_GEOCODE_DATA['1650960'] = {'en': u'Mountain View, CA'}
+
+ # Test the locale mapping
+ TEST_GEOCODE_DATA['8868'] = {'zh': u'Chinese', 'zh_Hant': u'Hant-specific'}
+ tw_number = FrozenPhoneNumber(country_code=886, national_number=810080123)
+ self.assertEqual("Hant-specific",
+ description_for_number(tw_number, "zh", region="TW"))
+ del TEST_GEOCODE_DATA['8868']