summaryrefslogtreecommitdiff
path: root/icu/translit_Any_Latin1.txt
diff options
context:
space:
mode:
Diffstat (limited to 'icu/translit_Any_Latin1.txt')
-rw-r--r--icu/translit_Any_Latin1.txt89
1 files changed, 89 insertions, 0 deletions
diff --git a/icu/translit_Any_Latin1.txt b/icu/translit_Any_Latin1.txt
new file mode 100644
index 0000000..d049845
--- /dev/null
+++ b/icu/translit_Any_Latin1.txt
@@ -0,0 +1,89 @@
+// Any_Latin1
+
+translit_Any_Latin1 {
+ Rule {
+
+ //NFKD then NFC; then NFD all non-Latin-1 codepoints
+ ":: NFKC ();"
+ ":: [^\u0000-\u00FF] NFD ();"
+
+ //remove non-spacing marks
+ "[:Mn:] > ;"
+
+ //change some non-Latin-1 codepoints to similar characters in Latin-1 range
+ " [\u2000-\u200A\u3000] > ' ' ;"
+ " [\u01C3\u2762]> '!' ;"
+ " \u203C > '!!' ;"
+ " [\u203D\u2048] > '?!' ;"
+ " [\u02BA\u030B\u030E\u2033\u3003\u201C-\u201F] > '\"' ;"
+ " [\u066A\u2030\u2031] > '%' ;"
+ " [\u02B9\u02BC\u02C8\u0301\u2032\u2018-\u201B] > '' ;"
+ " [\u066D\u2217\u2731] > '*' ;"
+ " [\u060C\u201A\u3001] > ',' ;"
+ " [\u2010-\u2013\u2212] > '-' ;"
+ " [\u2023\u06D4\u3002\u0589] > '.' ;"
+ " [\u0338\u2044\u2215] > '/' ;"
+ " \u2236 > ':' ;"
+ " \u061B > ';' ;"
+ " [\u2039\u2329\u3008] > '<' ;"
+ " \u2261 > '=' ;"
+ " [\u203A\u232A\u3009] > '>' ;"
+ " [\u037E\u061F] > '?' ;"
+ " \u2049 > '!?' ;"
+ " \u212C > B ;"
+ " [\u2102\u212D] > C ;"
+ " [\u2107\u2130] > E ;"
+ " [\u2131\u2132] > F ;"
+ " [\u210B\u210C\u210D] > H ;"
+ " [\u2110\u2111\u2160] > I ;"
+ " \u212A > K ;"
+ " \u2112 > L ;"
+ " \u2133 > L ;"
+ " \u2115 > N ;"
+ " \u2119 > P ;"
+ " \u211A > Q ;"
+ " [\u211B\u211C\u211D] > R ;"
+ " [\u2124\u2128] > Z ;"
+ " \u2216 > '\\' ;"
+ " [\u02C4\u02C6\u0302\u2303] > '^' ;"
+ " [\u02CD\u0331\u0332\u2017] > '_' ;"
+ " [\u02CB\u0300\u2035] > '`' ;"
+ " [\u212E\u212F] > e ;"
+ " [\u0261\u210A] > g ;"
+ " [\u04BB\u210E] > h ;"
+ " \u0131 > i ;"
+ " \u207F > n ;"
+ " \u2134 > o ;"
+ " \u01B6 > z ;"
+ " [\u01C0\u2223\u2758] > '|' ;"
+ " [\u02DC\u0303\u223C\uFF5E] > '~' ;"
+ " \u202F > \u00A0 ;"
+ " \uFFFD > \u001A;"
+ " \u20A4 > \u00A3 ;"
+ " [\u20A0-\u20AF] > \u00A4 ;"
+ " \u0308 > \u00A8 ;"
+ " \u2117 > \u00A9 ;"
+ " [\u226A\u300A] > \u00AB ;"
+ " \u2310 > \u00AC ;"
+ " \u1806 > \u00AD ;"
+ " [\u02C9\u0304\u0305] > \u00AF ;"
+ " [\u02DA\u030A\u2070\u2218] > \u00B0 ;"
+ " \u2213 > \u00B1 ;"
+ " [\u02B9\u02CA\u0301\u2032] > \u00B4 ;"
+ " [\u204B\u2761] > \u00B6 ;"
+ " [\u2022\u2024\u2027\u2219\u22C5\u30FB] > \u00B7 ;"
+ " \u0327 > \u00B8 ;"
+ " [\u226B\u300B] > \u00BB ;"
+ " \u2014 > '--';"
+ " \u2015 > '---';"
+ " \u2024 > '..';"
+ " \u2025 > '...';"
+ " \u2016 > '||';"
+ " \uFEFF > ;"
+
+ //change all spacing character to space
+ "[^\u0000-\u00FF] > ' ';"
+
+ }
+}
+