diff options
author | David Drysdale <dmd@lurklurk.org> | 2013-08-16 14:41:24 +0100 |
---|---|---|
committer | David Drysdale <dmd@lurklurk.org> | 2013-08-16 15:00:49 +0100 |
commit | 2ea483bfa9ac19144183dadaa7cafb2772a72bfe (patch) | |
tree | fd8d61e092ffc8fee3cc43ffec8667b01c147579 | |
parent | c265fd4d3ed2dcf25c11a5185298be5d37a09505 (diff) |
Add LOCALE_NORMALIZATION_MAP for locale equivalences
-rw-r--r-- | python/phonenumbers/geocoder.py | 39 | ||||
-rw-r--r-- | python/tests/geocodertest.py | 12 |
2 files changed, 44 insertions, 7 deletions
diff --git a/python/phonenumbers/geocoder.py b/python/phonenumbers/geocoder.py index 521ba5df..4297de92 100644 --- a/python/phonenumbers/geocoder.py +++ b/python/phonenumbers/geocoder.py @@ -64,6 +64,9 @@ except ImportError: # pragma no cover raise +_LOCALE_NORMALIZATION_MAP = {"zh_TW": "zh_Hant", "zh_HK": "zh_Hant", "zh_MO": "zh_Hant"} + + def _may_fall_back_to_english(lang): # Don't fall back to English if the requested language is among the following: # - Chinese @@ -72,16 +75,38 @@ def _may_fall_back_to_english(lang): return lang != "zh" and lang != "ja" and lang != "ko" +def _full_locale(lang, script, region): + if script is not None: + if region is not None: + return "%s_%s_%s" % (lang, script, region) + else: + return "%s_%s" % (lang, script) + elif region is not None: + return "%s_%s" % (lang, region) + else: + return lang + + def _find_lang(langdict, lang, script, region): """Return the entry in the dictionary for the given language information.""" - # First look for lang, script as a combination - lang_script = "%s_%s" % (lang, script) - if lang_script in langdict: - return langdict[lang_script] + # Check if we should map this to a different locale. + full_locale = _full_locale(lang, script, region) + if (full_locale in _LOCALE_NORMALIZATION_MAP and + _LOCALE_NORMALIZATION_MAP[full_locale] in langdict): + return langdict[_LOCALE_NORMALIZATION_MAP[full_locale]] + # First look for the full locale + if full_locale in langdict: + return langdict[full_locale] + # Then look for lang, script as a combination + if script is not None: + lang_script = "%s_%s" % (lang, script) + if lang_script in langdict: + return langdict[lang_script] # Next look for lang, region as a combination - lang_region = "%s_%s" % (lang, region) - if lang_region in langdict: - return langdict[lang_region] + if region is not None: + lang_region = "%s_%s" % (lang, region) + if lang_region in langdict: + return langdict[lang_region] # Fall back to bare language code lookup if lang in langdict: return langdict[lang] diff --git a/python/tests/geocodertest.py b/python/tests/geocodertest.py index 4a9ecfe1..9cd136c3 100644 --- a/python/tests/geocodertest.py +++ b/python/tests/geocodertest.py @@ -169,6 +169,7 @@ class PhoneNumberGeocoderTest(unittest.TestCase): TEST_GEOCODE_DATA['1650960'] = {'en': u'Mountain View, CA', "en_GB": u'Mountain View California', "en_US": u'Mountain View, Sunny California', + "en_Xyzz_US": u'MTV - xyzz', "en_Latn": u'MountainView'} # The following test might one day return "Mountain View California" self.assertEqual("United States", @@ -179,7 +180,18 @@ class PhoneNumberGeocoderTest(unittest.TestCase): description_for_number(US_NUMBER2, _ENGLISH, script="Latn")) self.assertEqual("United States", description_for_number(US_NUMBER2, _ENGLISH, script="Latn", region="GB")) + self.assertEqual("MTV - xyzz", + description_for_number(US_NUMBER2, _ENGLISH, script="Xyzz", region="US")) + self.assertEqual("Mountain View, Sunny California", + description_for_number(US_NUMBER2, _ENGLISH, script="Zazz", region="US")) # Get a different result when there is a script-specific variant self.assertEqual("MountainView", description_for_number(US_NUMBER2, _ENGLISH, script="Latn", region="US")) TEST_GEOCODE_DATA['1650960'] = {'en': u'Mountain View, CA'} + + # Test the locale mapping + TEST_GEOCODE_DATA['8868'] = {'zh': u'Chinese', 'zh_Hant': u'Hant-specific'} + tw_number = FrozenPhoneNumber(country_code=886, national_number=810080123) + self.assertEqual("Hant-specific", + description_for_number(tw_number, "zh", region="TW")) + del TEST_GEOCODE_DATA['8868'] |