summaryrefslogtreecommitdiff
path: root/src/modules/filters/utf8transliterator.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/modules/filters/utf8transliterator.cpp')
-rw-r--r--src/modules/filters/utf8transliterator.cpp889
1 files changed, 0 insertions, 889 deletions
diff --git a/src/modules/filters/utf8transliterator.cpp b/src/modules/filters/utf8transliterator.cpp
deleted file mode 100644
index b753c0c..0000000
--- a/src/modules/filters/utf8transliterator.cpp
+++ /dev/null
@@ -1,889 +0,0 @@
-/******************************************************************************
-*
-* utf8transliterators - SWFilter descendant to transliterate between
-* ICU-supported scripts.
-*/
-
-#ifdef _ICU_
-
-#include <stdlib.h>
-
-#ifdef __GNUC__
-#include <unixstr.h>
-#endif
-
-#include <unicode/ucnv.h>
-#include <unicode/uchar.h>
-#include <utf8transliterator.h>
-
-#ifndef _ICUSWORD_
-#include "unicode/resbund.h"
-#endif
-#include <swlog.h>
-
-SWORD_NAMESPACE_START
-
-const char UTF8Transliterator::optionstring[NUMTARGETSCRIPTS][16] = {
- "Off",
- "Latin",
- "IPA",
- "Basic Latin",
- "SBL",
- "TC",
- "Beta",
- "BGreek",
- "SERA",
- "Hugoye",
- "UNGEGN",
- "ISO",
- "ALA-LC",
- "BGN-PCGN",
- "Greek",
- "Hebrew",
- "Cyrillic",
- "Arabic",
- "Syriac",
- "Katakana",
- "Hiragana",
- "Hangul",
- "Devanagari",
- "Tamil",
- "Bengali",
- "Gurmukhi",
- "Gujarati",
- "Oriya",
- "Telugu",
- "Kannada",
- "Malayalam",
- "Thai",
- "Georgian",
- "Armenian",
- "Ethiopic",
- "Gothic",
- "Ugaritic",
- "Coptic",
- "Meroitic",
- "Linear B",
- "Cypriot",
- "Runic",
- "Ogham",
- "Thaana",
- "Glagolitic",
- "Tengwar",
- "Cirth"
-};
-
-const char UTF8Transliterator::optName[] = "Transliteration";
-const char UTF8Transliterator::optTip[] = "Transliterates between scripts";
-
-SWTransMap UTF8Transliterator::transMap;
-
-#ifndef _ICUSWORD_
-
-const char UTF8Transliterator::SW_RB_RULE_BASED_IDS[] = "RuleBasedTransliteratorIDs";
-const char UTF8Transliterator::SW_RB_RULE[] = "Rule";
-#ifdef SWICU_DATA
-const char UTF8Transliterator::SW_RESDATA[] = SWICU_DATA;
-#else
-const char UTF8Transliterator::SW_RESDATA[] = "/usr/local/lib/sword/";
-#endif
-
-class SWCharString {
- public:
- inline SWCharString(const UnicodeString& str);
- inline ~SWCharString();
- inline operator const char*() { return ptr; }
- private:
- char buf[128];
- char* ptr;
-};
-SWCharString::SWCharString(const UnicodeString& str) {
- // TODO This isn't quite right -- we should probably do
- // preflighting here to determine the real length.
- if (str.length() >= (int32_t)sizeof(buf)) {
- ptr = new char[str.length() + 8];
- } else {
- ptr = buf;
- }
- str.extract(0, 0x7FFFFFFF, ptr, "");
-}
-
-SWCharString::~SWCharString() {
- if (ptr != buf) {
- delete[] ptr;
- }
-}
-
-#endif // _ICUSWORD_
-
-
-UTF8Transliterator::UTF8Transliterator() {
- option = 0;
- unsigned long i;
- for (i = 0; i < NUMTARGETSCRIPTS; i++) {
- options.push_back(optionstring[i]);
- }
-#ifndef _ICUSWORD_
- utf8status = U_ZERO_ERROR;
- Load(utf8status);
-#endif
-}
-
-void UTF8Transliterator::Load(UErrorCode &status)
-{
-#ifndef _ICUSWORD_
- static const char translit_swordindex[] = "translit_swordindex";
-
- UResourceBundle *bundle, *transIDs, *colBund;
- bundle = ures_openDirect(SW_RESDATA, translit_swordindex, &status);
- if (U_FAILURE(status)) {
- SWLog::systemlog->LogError("no resource index to load");
- SWLog::systemlog->LogError("status %s", u_errorName(status));
- return;
- }
-
- transIDs = ures_getByKey(bundle, SW_RB_RULE_BASED_IDS, 0, &status);
- UParseError parseError;
-
- int32_t row, maxRows;
- if (U_SUCCESS(status)) {
- maxRows = ures_getSize(transIDs);
- for (row = 0; row < maxRows; row++) {
- colBund = ures_getByIndex(transIDs, row, 0, &status);
-
- if (U_SUCCESS(status) && ures_getSize(colBund) == 4) {
- UnicodeString id = ures_getUnicodeStringByIndex(colBund, 0, &status);
- UChar type = ures_getUnicodeStringByIndex(colBund, 1, &status).charAt(0);
- UnicodeString resString = ures_getUnicodeStringByIndex(colBund, 2, &status);
- SWLog::systemlog->LogInformation("ok so far");
-
- if (U_SUCCESS(status)) {
- switch (type) {
- case 0x66: // 'f'
- case 0x69: // 'i'
- // 'file' or 'internal';
- // row[2]=resource, row[3]=direction
- {
- UBool visible = (type == 0x0066 /*f*/);
- UTransDirection dir =
- (ures_getUnicodeStringByIndex(colBund, 3, &status).charAt(0) ==
- 0x0046 /*F*/) ?
- UTRANS_FORWARD : UTRANS_REVERSE;
- //registry->put(id, resString, dir, visible);
- SWLog::systemlog->LogInformation("instantiating %s ...", resString.getBuffer());
- registerTrans(id, resString, dir, status);
- SWLog::systemlog->LogInformation("done.");
- }
- break;
- case 0x61: // 'a'
- // 'alias'; row[2]=createInstance argument
- //registry->put(id, resString, TRUE);
- break;
- }
- }
- else SWLog::systemlog->LogError("Failed to get resString");
- }
- else SWLog::systemlog->LogError("Failed to get row");
- ures_close(colBund);
- }
- }
- else
- {
- SWLog::systemlog->LogError("no resource index to load");
- SWLog::systemlog->LogError("status %s", u_errorName(status));
- }
-
- ures_close(transIDs);
- ures_close(bundle);
-
-#endif // _ICUSWORD_
-}
-
-void UTF8Transliterator::registerTrans(const UnicodeString& ID, const UnicodeString& resource,
- UTransDirection dir, UErrorCode &status )
-{
-#ifndef _ICUSWORD_
- SWLog::systemlog->LogInformation("registering ID locally %s", ID.getBuffer());
- SWTransData swstuff;
- swstuff.resource = resource;
- swstuff.dir = dir;
- SWTransPair swpair;
- swpair.first = ID;
- swpair.second = swstuff;
- transMap.insert(swpair);
-#endif
-}
-
-bool UTF8Transliterator::checkTrans(const UnicodeString& ID, UErrorCode &status )
-{
-#ifndef _ICUSWORD_
- Transliterator *trans = Transliterator::createInstance(ID, UTRANS_FORWARD, status);
- if (!U_FAILURE(status))
- {
- // already have it, clean up and return true
- SWLog::systemlog->LogInformation("already have it %s", ID.getBuffer());
- delete trans;
- return true;
- }
- status = U_ZERO_ERROR;
-
- SWTransMap::iterator swelement;
- if ((swelement = transMap.find(ID)) != transMap.end())
- {
- SWLog::systemlog->LogInformation("found element in map");
- SWTransData swstuff = (*swelement).second;
- UParseError parseError;
- //UErrorCode status;
- //std::cout << "unregistering " << ID << std::endl;
- //Transliterator::unregister(ID);
- SWLog::systemlog->LogInformation("resource is %s", swstuff.resource.getBuffer());
-
- // Get the rules
- //std::cout << "importing: " << ID << ", " << resource << std::endl;
- SWCharString ch(swstuff.resource);
- UResourceBundle *bundle = ures_openDirect(SW_RESDATA, ch, &status);
- const UnicodeString rules = ures_getUnicodeStringByKey(bundle, SW_RB_RULE, &status);
- ures_close(bundle);
- //parser.parse(rules, isReverse ? UTRANS_REVERSE : UTRANS_FORWARD,
- // parseError, status);
- if (U_FAILURE(status)) {
- SWLog::systemlog->LogError("Failed to get rules");
- SWLog::systemlog->LogError("status %s", u_errorName(status));
- return false;
- }
-
-
- Transliterator *trans = Transliterator::createFromRules(ID, rules, swstuff.dir,
- parseError,status);
- if (U_FAILURE(status)) {
- SWLog::systemlog->LogError("Failed to create transliterator");
- SWLog::systemlog->LogError("status %s", u_errorName(status));
- SWLog::systemlog->LogError("Parse error: line %s", parseError.line);
- SWLog::systemlog->LogError("Parse error: offset %d", parseError.offset);
- SWLog::systemlog->LogError("Parse error: preContext %s", *parseError.preContext);
- SWLog::systemlog->LogError("Parse error: postContext %s", *parseError.postContext);
- SWLog::systemlog->LogError("rules were");
-// SWLog::systemlog->LogError((const char *)rules);
- return false;
- }
-
- Transliterator::registerInstance(trans);
- return true;
-
- //Transliterator *trans = instantiateTrans(ID, swstuff.resource, swstuff.dir, parseError, status);
- //return trans;
- }
- else
- {
- return false;
- }
-#else
-return true;
-#endif // _ICUSWORD_
-}
-
-bool UTF8Transliterator::addTrans(const char* newTrans, SWBuf* transList) {
-#ifdef _ICUSWORD_
- UErrorCode status;
- if (checkTrans(UnicodeString(newTrans), status)) {
-#endif
- *transList += newTrans;
- *transList += ";";
- return true;
-#ifdef _ICUSWORD_
- }
- else {
- return false;
- }
-#endif
-}
-
-Transliterator * UTF8Transliterator::createTrans(const UnicodeString& ID, UTransDirection dir, UErrorCode &status )
-{
- Transliterator *trans = Transliterator::createInstance(ID,UTRANS_FORWARD,status);
- if (U_FAILURE(status)) {
- delete trans;
- return NULL;
- }
- else {
- return trans;
- }
-}
-
-void UTF8Transliterator::setOptionValue(const char *ival)
-{
- unsigned char i = option = NUMTARGETSCRIPTS;
- while (i && stricmp(ival, optionstring[i])) {
- i--;
- option = i;
- }
-}
-
-const char *UTF8Transliterator::getOptionValue()
-{
- return (NUMTARGETSCRIPTS > option) ? optionstring[option] : 0;
-}
-
-char UTF8Transliterator::processText(SWBuf &text, const SWKey *key, const SWModule *module)
-{
- if (option) { // if we want transliteration
- unsigned long i, j;
- UErrorCode err = U_ZERO_ERROR;
- UConverter * conv = NULL;
- conv = ucnv_open("UTF-8", &err);
- SWBuf ID;
-
- bool compat = false;
-
- // Convert UTF-8 string to UTF-16 (UChars)
- j = strlen(text);
- int32_t len = (j * 2) + 1;
- UChar *source = new UChar[len];
- err = U_ZERO_ERROR;
- len = ucnv_toUChars(conv, source, len, text, j, &err);
- source[len] = 0;
-
- // Figure out which scripts are used in the string
- unsigned char scripts[NUMSCRIPTS];
-
- for (i = 0; i < NUMSCRIPTS; i++) {
- scripts[i] = false;
- }
-
- for (i = 0; i < len; i++) {
- j = ublock_getCode(source[i]);
- scripts[SE_LATIN] = true;
- switch (j) {
- //case UBLOCK_BASIC_LATIN: scripts[SE_LATIN] = true; break;
- case UBLOCK_GREEK: scripts[SE_GREEK] = true; break;
- case UBLOCK_HEBREW: scripts[SE_HEBREW] = true; break;
- case UBLOCK_CYRILLIC: scripts[SE_CYRILLIC] = true; break;
- case UBLOCK_ARABIC: scripts[SE_ARABIC] = true; break;
- case UBLOCK_SYRIAC: scripts[SE_SYRIAC] = true; break;
- case UBLOCK_KATAKANA: scripts[SE_KATAKANA] = true; break;
- case UBLOCK_HIRAGANA: scripts[SE_HIRAGANA] = true; break;
- case UBLOCK_HANGUL_SYLLABLES: scripts[SE_HANGUL] = true; break;
- case UBLOCK_HANGUL_JAMO: scripts[SE_JAMO] = true; break;
- case UBLOCK_DEVANAGARI: scripts[SE_DEVANAGARI] = true; break;
- case UBLOCK_TAMIL: scripts[SE_TAMIL] = true; break;
- case UBLOCK_BENGALI: scripts[SE_BENGALI] = true; break;
- case UBLOCK_GURMUKHI: scripts[SE_GURMUKHI] = true; break;
- case UBLOCK_GUJARATI: scripts[SE_GUJARATI] = true; break;
- case UBLOCK_ORIYA: scripts[SE_ORIYA] = true; break;
- case UBLOCK_TELUGU: scripts[SE_TELUGU] = true; break;
- case UBLOCK_KANNADA: scripts[SE_KANNADA] = true; break;
- case UBLOCK_MALAYALAM: scripts[SE_MALAYALAM] = true; break;
- case UBLOCK_THAI: scripts[SE_THAI] = true; break;
- case UBLOCK_GEORGIAN: scripts[SE_GEORGIAN] = true; break;
- case UBLOCK_ARMENIAN: scripts[SE_ARMENIAN] = true; break;
- case UBLOCK_ETHIOPIC: scripts[SE_ETHIOPIC] = true; break;
- case UBLOCK_GOTHIC: scripts[SE_GOTHIC] = true; break;
- case UBLOCK_UGARITIC: scripts[SE_UGARITIC] = true; break;
-// case UBLOCK_MEROITIC: scripts[SE_MEROITIC] = true; break;
-// case UBLOCK_LINEARB: scripts[SE_LINEARB] = true; break;
-// case UBLOCK_CYPRIOT: scripts[SE_CYPRIOT] = true; break;
- case UBLOCK_RUNIC: scripts[SE_RUNIC] = true; break;
- case UBLOCK_OGHAM: scripts[SE_OGHAM] = true; break;
- case UBLOCK_THAANA: scripts[SE_THAANA] = true; break;
-// case UBLOCK_GLAGOLITIC: scripts[SE_GLAGOLITIC] = true; break;
-// case UBLOCK_TENGWAR: scripts[SE_TENGWAR] = true; break;
-// case UBLOCK_CIRTH: scripts[SE_CIRTH] = true; break;
- case UBLOCK_CJK_RADICALS_SUPPLEMENT:
- case UBLOCK_KANGXI_RADICALS:
- case UBLOCK_IDEOGRAPHIC_DESCRIPTION_CHARACTERS:
- case UBLOCK_CJK_SYMBOLS_AND_PUNCTUATION:
- case UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A:
- case UBLOCK_CJK_UNIFIED_IDEOGRAPHS:
- scripts[SE_HAN] = true;
- break;
- case UBLOCK_CJK_COMPATIBILITY:
- case UBLOCK_CJK_COMPATIBILITY_IDEOGRAPHS:
- case UBLOCK_CJK_COMPATIBILITY_FORMS:
- scripts[SE_HAN] = true;
- compat = true;
- break;
- case UBLOCK_HANGUL_COMPATIBILITY_JAMO:
- scripts[SE_HANGUL] = true;
- compat = true;
- break;
-
- //default: scripts[SE_LATIN] = true;
- }
- }
- scripts[option] = false; //turn off the reflexive transliteration
-
- //return if we have no transliteration to do for this text
- j = 0;
- for (i = 0; !j && i < NUMSCRIPTS; i++) {
- if (scripts[i]) j++;
- }
- if (!j) {
- ucnv_close(conv);
- return 0;
- }
-
- if (compat) {
- addTrans("NFKD", &ID);
- }
- else {
- addTrans("NFD", &ID);
- }
-
- //Simple X to Latin transliterators
- if (scripts[SE_GREEK]) {
- if (strnicmp (((SWModule*)module)->Lang(), "cop", 3)) {
- if (option == SE_SBL)
- addTrans("Greek-Latin/SBL", &ID);
- else if (option == SE_TC)
- addTrans("Greek-Latin/TC", &ID);
- else if (option == SE_BETA)
- addTrans("Greek-Latin/Beta", &ID);
- else if (option == SE_BGREEK)
- addTrans("Greek-Latin/BGreek", &ID);
- else if (option == SE_UNGEGN)
- addTrans("Greek-Latin/UNGEGN", &ID);
- else if (option == SE_ISO)
- addTrans("Greek-Latin/ISO", &ID);
- else if (option == SE_ALALC)
- addTrans("Greek-Latin/ALALC", &ID);
- else if (option == SE_BGNPCGN)
- addTrans("Greek-Latin/BGNPCGN", &ID);
- else if (option == SE_IPA)
- addTrans("Greek-IPA/Ancient", &ID);
- else {
- addTrans("Greek-Latin", &ID);
- scripts[SE_LATIN] = true;
- }
- }
- else {
- if (option == SE_SBL)
- addTrans("Coptic-Latin/SBL", &ID);
- else if (option == SE_TC)
- addTrans("Coptic-Latin/TC", &ID);
- else if (option == SE_BETA)
- addTrans("Coptic-Latin/Beta", &ID);
- else if (option == SE_IPA)
- addTrans("Coptic-IPA", &ID);
- else {
- addTrans("Coptic-Latin", &ID);
- scripts[SE_LATIN] = true;
- }
- }
- }
- if (scripts[SE_HEBREW]) {
- if (option == SE_SBL)
- addTrans("Hebrew-Latin/SBL", &ID);
- else if (option == SE_TC)
- addTrans("Hebrew-Latin/TC", &ID);
- else if (option == SE_BETA)
- addTrans("Hebrew-Latin/Beta", &ID);
- else if (option == SE_UNGEGN)
- addTrans("Hebrew-Latin/UNGEGN", &ID);
- else if (option == SE_ALALC)
- addTrans("Hebrew-Latin/ALALC", &ID);
- else if (option == SE_SYRIAC)
- addTrans("Hebrew-Syriac", &ID);
- else {
- addTrans("Hebrew-Latin", &ID);
- scripts[SE_LATIN] = true;
- }
- }
- if (scripts[SE_CYRILLIC]) {
- if (option == SE_GLAGOLITIC)
- addTrans("Cyrillic-Glagolitic", &ID);
- else {
- addTrans("Cyrillic-Latin", &ID);
- scripts[SE_LATIN] = true;
- }
- }
- if (scripts[SE_ARABIC]) {
- addTrans("Arabic-Latin", &ID);
- scripts[SE_LATIN] = true;
- }
- if (scripts[SE_SYRIAC]) {
- if (option == SE_TC)
- addTrans("Syriac-Latin/TC", &ID);
- else if (option == SE_BETA)
- addTrans("Syriac-Latin/Beta", &ID);
- else if (option == SE_HUGOYE)
- addTrans("Syriac-Latin/Hugoye", &ID);
- else if (option == SE_HEBREW)
- addTrans("Syriac-Hebrew", &ID);
- else {
- addTrans("Syriac-Latin", &ID);
- scripts[SE_LATIN] = true;
- }
- }
- if (scripts[SE_THAI]) {
- addTrans("Thai-Latin", &ID);
- scripts[SE_LATIN] = true;
- }
- if (scripts[SE_GEORGIAN]) {
- if (option == SE_ISO)
- addTrans("Georgian-Latin/ISO", &ID);
- else if (option == SE_ALALC)
- addTrans("Georgian-Latin/ALALC", &ID);
- else if (option == SE_BGNPCGN)
- addTrans("Georgian-Latin/BGNPCGN", &ID);
- else if (option == SE_IPA)
- addTrans("Georgian-IPA", &ID);
- else {
- addTrans("Georgian-Latin", &ID);
- scripts[SE_LATIN] = true;
- }
- }
- if (scripts[SE_ARMENIAN]) {
- if (option == SE_ISO)
- addTrans("Armenian-Latin/ISO", &ID);
- else if (option == SE_ALALC)
- addTrans("Armenian-Latin/ALALC", &ID);
- else if (option == SE_BGNPCGN)
- addTrans("Armenian-Latin/BGNPCGN", &ID);
- else if (option == SE_IPA)
- addTrans("Armenian-IPA", &ID);
- else {
- addTrans("Armenian-Latin", &ID);
- scripts[SE_LATIN] = true;
- }
- }
- if (scripts[SE_ETHIOPIC]) {
- if (option == SE_UNGEGN)
- addTrans("Ethiopic-Latin/UNGEGN", &ID);
- else if (option == SE_ISO)
- addTrans("Ethiopic-Latin/ISO", &ID);
- else if (option == SE_ALALC)
- addTrans("Ethiopic-Latin/ALALC", &ID);
- else if (option == SE_SERA)
- addTrans("Ethiopic-Latin/SERA", &ID);
- else {
- addTrans("Ethiopic-Latin", &ID);
- scripts[SE_LATIN] = true;
- }
- }
- if (scripts[SE_GOTHIC]) {
- if (option == SE_BASICLATIN)
- addTrans("Gothic-Latin/Basic", &ID);
- else if (option == SE_IPA)
- addTrans("Gothic-IPA", &ID);
- else {
- addTrans("Gothic-Latin", &ID);
- scripts[SE_LATIN] = true;
- }
- }
- if (scripts[SE_UGARITIC]) {
- if (option == SE_SBL)
- addTrans("Ugaritic-Latin/SBL", &ID);
- else {
- addTrans("Ugaritic-Latin", &ID);
- scripts[SE_LATIN] = true;
- }
- }
- if (scripts[SE_MEROITIC]) {
- addTrans("Meroitic-Latin", &ID);
- scripts[SE_LATIN] = true;
- }
- if (scripts[SE_LINEARB]) {
- addTrans("LinearB-Latin", &ID);
- scripts[SE_LATIN] = true;
- }
- if (scripts[SE_CYPRIOT]) {
- addTrans("Cypriot-Latin", &ID);
- scripts[SE_LATIN] = true;
- }
- if (scripts[SE_RUNIC]) {
- addTrans("Runic-Latin", &ID);
- scripts[SE_LATIN] = true;
- }
- if (scripts[SE_OGHAM]) {
- addTrans("Ogham-Latin", &ID);
- scripts[SE_LATIN] = true;
- }
- if (scripts[SE_THAANA]) {
- if (option == SE_ALALC)
- addTrans("Thaana-Latin/ALALC", &ID);
- else if (option == SE_BGNPCGN)
- addTrans("Thaana-Latin/BGNPCGN", &ID);
- else {
- addTrans("Thaana-Latin", &ID);
- scripts[SE_LATIN] = true;
- }
- }
- if (scripts[SE_GLAGOLITIC]) {
- if (option == SE_ISO)
- addTrans("Glagolitic-Latin/ISO", &ID);
- else if (option == SE_ALALC)
- addTrans("Glagolitic-Latin/ALALC", &ID);
- else if (option == SE_ALALC)
- addTrans("Glagolitic-Cyrillic", &ID);
- else {
- addTrans("Glagolitic-Latin", &ID);
- scripts[SE_LATIN] = true;
- }
- }
- if (scripts[SE_THAI]) {
- addTrans("Thai-Latin", &ID);
- scripts[SE_LATIN] = true;
- }
- if (scripts[SE_THAI]) {
- addTrans("Thai-Latin", &ID);
- scripts[SE_LATIN] = true;
- }
-
- if (scripts[SE_HAN]) {
- if (!strnicmp (((SWModule*)module)->Lang(), "ja", 2)) {
- addTrans("Kanji-Romaji", &ID);
- }
- else {
- addTrans("Han-Latin", &ID);
- }
- scripts[SE_LATIN] = true;
- }
-
- // Inter-Kana and Kana to Latin transliterators
- if (option == SE_HIRAGANA && scripts[SE_KATAKANA]) {
- addTrans("Katakana-Hiragana", &ID);
- scripts[SE_HIRAGANA] = true;
- }
- else if (option == SE_KATAKANA && scripts[SE_HIRAGANA]) {
- addTrans("Hiragana-Katakana", &ID);
- scripts[SE_KATAKANA] = true;
- }
- else {
- if (scripts[SE_KATAKANA]) {
- addTrans("Katakana-Latin", &ID);
- scripts[SE_LATIN] = true;
- }
- if (scripts[SE_HIRAGANA]) {
- addTrans("Hiragana-Latin", &ID);
- scripts[SE_LATIN] = true;
- }
- }
-
- // Korean to Latin transliterators
- if (scripts[SE_HANGUL]) {
- addTrans("Hangul-Latin", &ID);
- scripts[SE_LATIN] = true;
- }
- if (scripts[SE_JAMO]) {
- addTrans("Jamo-Latin", &ID);
- scripts[SE_LATIN] = true;
- }
-
- // Indic-Latin
- if (option < SE_DEVANAGARI || option > SE_MALAYALAM) {
- // Indic to Latin
- if (scripts[SE_TAMIL]) {
- addTrans("Tamil-Latin", &ID);
- scripts[SE_LATIN] = true;
- }
- if (scripts[SE_BENGALI]) {
- addTrans("Bengali-Latin", &ID);
- scripts[SE_LATIN] = true;
- }
- if (scripts[SE_GURMUKHI]) {
- addTrans("Gurmukhi-Latin", &ID);
- scripts[SE_LATIN] = true;
- }
- if (scripts[SE_GUJARATI]) {
- addTrans("Gujarati-Latin", &ID);
- scripts[SE_LATIN] = true;
- }
- if (scripts[SE_ORIYA]) {
- addTrans("Oriya-Latin", &ID);
- scripts[SE_LATIN] = true;
- }
- if (scripts[SE_TELUGU]) {
- addTrans("Telugu-Latin", &ID);
- scripts[SE_LATIN] = true;
- }
- if (scripts[SE_KANNADA]) {
- addTrans("Kannada-Latin", &ID);
- scripts[SE_LATIN] = true;
- }
- if (scripts[SE_MALAYALAM]) {
- addTrans("Malayalam-Latin", &ID);
- scripts[SE_LATIN] = true;
- }
- }
- else {
- if (scripts[SE_LATIN]) {
- addTrans("Latin-InterIndic", &ID);
- }
- if (scripts[SE_DEVANAGARI]) {
- addTrans("Devanagari-InterIndic", &ID);
- }
- if (scripts[SE_TAMIL]) {
- addTrans("Tamil-InterIndic", &ID);
- }
- if (scripts[SE_BENGALI]) {
- addTrans("Bengali-InterIndic", &ID);
- }
- if (scripts[SE_GURMUKHI]) {
- addTrans("Gurmurkhi-InterIndic", &ID);
- }
- if (scripts[SE_GUJARATI]) {
- addTrans("Gujarati-InterIndic", &ID);
- }
- if (scripts[SE_ORIYA]) {
- addTrans("Oriya-InterIndic", &ID);
- }
- if (scripts[SE_TELUGU]) {
- addTrans("Telugu-InterIndic", &ID);
- }
- if (scripts[SE_KANNADA]) {
- addTrans("Kannada-InterIndic", &ID);
- }
- if (scripts[SE_MALAYALAM]) {
- addTrans("Malayalam-InterIndic", &ID);
- }
-
- switch(option) {
- case SE_DEVANAGARI:
- addTrans("InterIndic-Devanagari", &ID);
- break;
- case SE_TAMIL:
- addTrans("InterIndic-Tamil", &ID);
- break;
- case SE_BENGALI:
- addTrans("InterIndic-Bengali", &ID);
- break;
- case SE_GURMUKHI:
- addTrans("InterIndic-Gurmukhi", &ID);
- break;
- case SE_GUJARATI:
- addTrans("InterIndic-Gujarati", &ID);
- break;
- case SE_ORIYA:
- addTrans("InterIndic-Oriya", &ID);
- break;
- case SE_TELUGU:
- addTrans("InterIndic-Telugu", &ID);
- break;
- case SE_KANNADA:
- addTrans("InterIndic-Kannada", &ID);
- break;
- case SE_MALAYALAM:
- addTrans("InterIndic-Malayalam", &ID);
- break;
- default:
- addTrans("InterIndic-Latin", &ID);
- scripts[SE_LATIN] = true;
- break;
- }
- }
-
- if (scripts[SE_TENGWAR]) {
- addTrans("Tengwar-Latin", &ID);
- scripts[SE_LATIN] = true;
- }
- if (scripts[SE_CIRTH]) {
- addTrans("Cirth-Latin", &ID);
- scripts[SE_LATIN] = true;
- }
-
- if (scripts[SE_LATIN]) {
- switch (option) {
- case SE_GREEK:
- addTrans("Latin-Greek", &ID);
- break;
- case SE_HEBREW:
- addTrans("Latin-Hebrew", &ID);
- break;
- case SE_CYRILLIC:
- addTrans("Latin-Cyrillic", &ID);
- break;
- case SE_ARABIC:
- addTrans("Latin-Arabic", &ID);
- break;
- case SE_SYRIAC:
- addTrans("Latin-Syriac", &ID);
- break;
- case SE_THAI:
- addTrans("Latin-Thai", &ID);
- break;
- case SE_GEORGIAN:
- addTrans("Latin-Georgian", &ID);
- break;
- case SE_ARMENIAN:
- addTrans("Latin-Armenian", &ID);
- break;
- case SE_ETHIOPIC:
- addTrans("Latin-Ethiopic", &ID);
- break;
- case SE_GOTHIC:
- addTrans("Latin-Gothic", &ID);
- break;
- case SE_UGARITIC:
- addTrans("Latin-Ugaritic", &ID);
- break;
- case SE_COPTIC:
- addTrans("Latin-Coptic", &ID);
- break;
- case SE_KATAKANA:
- addTrans("Latin-Katakana", &ID);
- break;
- case SE_HIRAGANA:
- addTrans("Latin-Hiragana", &ID);
- break;
- case SE_JAMO:
- addTrans("Latin-Jamo", &ID);
- break;
- case SE_HANGUL:
- addTrans("Latin-Hangul", &ID);
- break;
- case SE_MEROITIC:
- addTrans("Latin-Meroitic", &ID);
- break;
- case SE_LINEARB:
- addTrans("Latin-LinearB", &ID);
- break;
- case SE_CYPRIOT:
- addTrans("Latin-Cypriot", &ID);
- break;
- case SE_RUNIC:
- addTrans("Latin-Runic", &ID);
- break;
- case SE_OGHAM:
- addTrans("Latin-Ogham", &ID);
- break;
- case SE_THAANA:
- addTrans("Latin-Thaana", &ID);
- break;
- case SE_GLAGOLITIC:
- addTrans("Latin-Glagolitic", &ID);
- break;
- case SE_TENGWAR:
- addTrans("Latin-Tengwar", &ID);
- break;
- case SE_CIRTH:
- addTrans("Latin-Cirth", &ID);
- break;
- }
- }
-
- if (option == SE_BASICLATIN) {
- addTrans("Any-Latin1", &ID);
- }
-
- addTrans("NFC", &ID);
-
- err = U_ZERO_ERROR;
- Transliterator * trans = createTrans(UnicodeString(ID), UTRANS_FORWARD, err);
- if (trans && !U_FAILURE(err)) {
- UnicodeString target = UnicodeString(source);
- trans->transliterate(target);
- text.setSize(text.size()*2);
- len = ucnv_fromUChars(conv, text.getRawData(), text.size(), target.getBuffer(), target.length(), &err);
- text.setSize(len);
- delete trans;
- }
- ucnv_close(conv);
- }
- return 0;
-}
-
-SWORD_NAMESPACE_END
-#endif
-
-
-