diff options
Diffstat (limited to 'src/modules/filters/utf8transliterator.cpp')
-rw-r--r-- | src/modules/filters/utf8transliterator.cpp | 889 |
1 files changed, 0 insertions, 889 deletions
diff --git a/src/modules/filters/utf8transliterator.cpp b/src/modules/filters/utf8transliterator.cpp deleted file mode 100644 index b753c0c..0000000 --- a/src/modules/filters/utf8transliterator.cpp +++ /dev/null @@ -1,889 +0,0 @@ -/****************************************************************************** -* -* utf8transliterators - SWFilter descendant to transliterate between -* ICU-supported scripts. -*/ - -#ifdef _ICU_ - -#include <stdlib.h> - -#ifdef __GNUC__ -#include <unixstr.h> -#endif - -#include <unicode/ucnv.h> -#include <unicode/uchar.h> -#include <utf8transliterator.h> - -#ifndef _ICUSWORD_ -#include "unicode/resbund.h" -#endif -#include <swlog.h> - -SWORD_NAMESPACE_START - -const char UTF8Transliterator::optionstring[NUMTARGETSCRIPTS][16] = { - "Off", - "Latin", - "IPA", - "Basic Latin", - "SBL", - "TC", - "Beta", - "BGreek", - "SERA", - "Hugoye", - "UNGEGN", - "ISO", - "ALA-LC", - "BGN-PCGN", - "Greek", - "Hebrew", - "Cyrillic", - "Arabic", - "Syriac", - "Katakana", - "Hiragana", - "Hangul", - "Devanagari", - "Tamil", - "Bengali", - "Gurmukhi", - "Gujarati", - "Oriya", - "Telugu", - "Kannada", - "Malayalam", - "Thai", - "Georgian", - "Armenian", - "Ethiopic", - "Gothic", - "Ugaritic", - "Coptic", - "Meroitic", - "Linear B", - "Cypriot", - "Runic", - "Ogham", - "Thaana", - "Glagolitic", - "Tengwar", - "Cirth" -}; - -const char UTF8Transliterator::optName[] = "Transliteration"; -const char UTF8Transliterator::optTip[] = "Transliterates between scripts"; - -SWTransMap UTF8Transliterator::transMap; - -#ifndef _ICUSWORD_ - -const char UTF8Transliterator::SW_RB_RULE_BASED_IDS[] = "RuleBasedTransliteratorIDs"; -const char UTF8Transliterator::SW_RB_RULE[] = "Rule"; -#ifdef SWICU_DATA -const char UTF8Transliterator::SW_RESDATA[] = SWICU_DATA; -#else -const char UTF8Transliterator::SW_RESDATA[] = "/usr/local/lib/sword/"; -#endif - -class SWCharString { - public: - inline SWCharString(const UnicodeString& str); - inline ~SWCharString(); - inline operator const char*() { return ptr; } - private: - char buf[128]; - char* ptr; -}; -SWCharString::SWCharString(const UnicodeString& str) { - // TODO This isn't quite right -- we should probably do - // preflighting here to determine the real length. - if (str.length() >= (int32_t)sizeof(buf)) { - ptr = new char[str.length() + 8]; - } else { - ptr = buf; - } - str.extract(0, 0x7FFFFFFF, ptr, ""); -} - -SWCharString::~SWCharString() { - if (ptr != buf) { - delete[] ptr; - } -} - -#endif // _ICUSWORD_ - - -UTF8Transliterator::UTF8Transliterator() { - option = 0; - unsigned long i; - for (i = 0; i < NUMTARGETSCRIPTS; i++) { - options.push_back(optionstring[i]); - } -#ifndef _ICUSWORD_ - utf8status = U_ZERO_ERROR; - Load(utf8status); -#endif -} - -void UTF8Transliterator::Load(UErrorCode &status) -{ -#ifndef _ICUSWORD_ - static const char translit_swordindex[] = "translit_swordindex"; - - UResourceBundle *bundle, *transIDs, *colBund; - bundle = ures_openDirect(SW_RESDATA, translit_swordindex, &status); - if (U_FAILURE(status)) { - SWLog::systemlog->LogError("no resource index to load"); - SWLog::systemlog->LogError("status %s", u_errorName(status)); - return; - } - - transIDs = ures_getByKey(bundle, SW_RB_RULE_BASED_IDS, 0, &status); - UParseError parseError; - - int32_t row, maxRows; - if (U_SUCCESS(status)) { - maxRows = ures_getSize(transIDs); - for (row = 0; row < maxRows; row++) { - colBund = ures_getByIndex(transIDs, row, 0, &status); - - if (U_SUCCESS(status) && ures_getSize(colBund) == 4) { - UnicodeString id = ures_getUnicodeStringByIndex(colBund, 0, &status); - UChar type = ures_getUnicodeStringByIndex(colBund, 1, &status).charAt(0); - UnicodeString resString = ures_getUnicodeStringByIndex(colBund, 2, &status); - SWLog::systemlog->LogInformation("ok so far"); - - if (U_SUCCESS(status)) { - switch (type) { - case 0x66: // 'f' - case 0x69: // 'i' - // 'file' or 'internal'; - // row[2]=resource, row[3]=direction - { - UBool visible = (type == 0x0066 /*f*/); - UTransDirection dir = - (ures_getUnicodeStringByIndex(colBund, 3, &status).charAt(0) == - 0x0046 /*F*/) ? - UTRANS_FORWARD : UTRANS_REVERSE; - //registry->put(id, resString, dir, visible); - SWLog::systemlog->LogInformation("instantiating %s ...", resString.getBuffer()); - registerTrans(id, resString, dir, status); - SWLog::systemlog->LogInformation("done."); - } - break; - case 0x61: // 'a' - // 'alias'; row[2]=createInstance argument - //registry->put(id, resString, TRUE); - break; - } - } - else SWLog::systemlog->LogError("Failed to get resString"); - } - else SWLog::systemlog->LogError("Failed to get row"); - ures_close(colBund); - } - } - else - { - SWLog::systemlog->LogError("no resource index to load"); - SWLog::systemlog->LogError("status %s", u_errorName(status)); - } - - ures_close(transIDs); - ures_close(bundle); - -#endif // _ICUSWORD_ -} - -void UTF8Transliterator::registerTrans(const UnicodeString& ID, const UnicodeString& resource, - UTransDirection dir, UErrorCode &status ) -{ -#ifndef _ICUSWORD_ - SWLog::systemlog->LogInformation("registering ID locally %s", ID.getBuffer()); - SWTransData swstuff; - swstuff.resource = resource; - swstuff.dir = dir; - SWTransPair swpair; - swpair.first = ID; - swpair.second = swstuff; - transMap.insert(swpair); -#endif -} - -bool UTF8Transliterator::checkTrans(const UnicodeString& ID, UErrorCode &status ) -{ -#ifndef _ICUSWORD_ - Transliterator *trans = Transliterator::createInstance(ID, UTRANS_FORWARD, status); - if (!U_FAILURE(status)) - { - // already have it, clean up and return true - SWLog::systemlog->LogInformation("already have it %s", ID.getBuffer()); - delete trans; - return true; - } - status = U_ZERO_ERROR; - - SWTransMap::iterator swelement; - if ((swelement = transMap.find(ID)) != transMap.end()) - { - SWLog::systemlog->LogInformation("found element in map"); - SWTransData swstuff = (*swelement).second; - UParseError parseError; - //UErrorCode status; - //std::cout << "unregistering " << ID << std::endl; - //Transliterator::unregister(ID); - SWLog::systemlog->LogInformation("resource is %s", swstuff.resource.getBuffer()); - - // Get the rules - //std::cout << "importing: " << ID << ", " << resource << std::endl; - SWCharString ch(swstuff.resource); - UResourceBundle *bundle = ures_openDirect(SW_RESDATA, ch, &status); - const UnicodeString rules = ures_getUnicodeStringByKey(bundle, SW_RB_RULE, &status); - ures_close(bundle); - //parser.parse(rules, isReverse ? UTRANS_REVERSE : UTRANS_FORWARD, - // parseError, status); - if (U_FAILURE(status)) { - SWLog::systemlog->LogError("Failed to get rules"); - SWLog::systemlog->LogError("status %s", u_errorName(status)); - return false; - } - - - Transliterator *trans = Transliterator::createFromRules(ID, rules, swstuff.dir, - parseError,status); - if (U_FAILURE(status)) { - SWLog::systemlog->LogError("Failed to create transliterator"); - SWLog::systemlog->LogError("status %s", u_errorName(status)); - SWLog::systemlog->LogError("Parse error: line %s", parseError.line); - SWLog::systemlog->LogError("Parse error: offset %d", parseError.offset); - SWLog::systemlog->LogError("Parse error: preContext %s", *parseError.preContext); - SWLog::systemlog->LogError("Parse error: postContext %s", *parseError.postContext); - SWLog::systemlog->LogError("rules were"); -// SWLog::systemlog->LogError((const char *)rules); - return false; - } - - Transliterator::registerInstance(trans); - return true; - - //Transliterator *trans = instantiateTrans(ID, swstuff.resource, swstuff.dir, parseError, status); - //return trans; - } - else - { - return false; - } -#else -return true; -#endif // _ICUSWORD_ -} - -bool UTF8Transliterator::addTrans(const char* newTrans, SWBuf* transList) { -#ifdef _ICUSWORD_ - UErrorCode status; - if (checkTrans(UnicodeString(newTrans), status)) { -#endif - *transList += newTrans; - *transList += ";"; - return true; -#ifdef _ICUSWORD_ - } - else { - return false; - } -#endif -} - -Transliterator * UTF8Transliterator::createTrans(const UnicodeString& ID, UTransDirection dir, UErrorCode &status ) -{ - Transliterator *trans = Transliterator::createInstance(ID,UTRANS_FORWARD,status); - if (U_FAILURE(status)) { - delete trans; - return NULL; - } - else { - return trans; - } -} - -void UTF8Transliterator::setOptionValue(const char *ival) -{ - unsigned char i = option = NUMTARGETSCRIPTS; - while (i && stricmp(ival, optionstring[i])) { - i--; - option = i; - } -} - -const char *UTF8Transliterator::getOptionValue() -{ - return (NUMTARGETSCRIPTS > option) ? optionstring[option] : 0; -} - -char UTF8Transliterator::processText(SWBuf &text, const SWKey *key, const SWModule *module) -{ - if (option) { // if we want transliteration - unsigned long i, j; - UErrorCode err = U_ZERO_ERROR; - UConverter * conv = NULL; - conv = ucnv_open("UTF-8", &err); - SWBuf ID; - - bool compat = false; - - // Convert UTF-8 string to UTF-16 (UChars) - j = strlen(text); - int32_t len = (j * 2) + 1; - UChar *source = new UChar[len]; - err = U_ZERO_ERROR; - len = ucnv_toUChars(conv, source, len, text, j, &err); - source[len] = 0; - - // Figure out which scripts are used in the string - unsigned char scripts[NUMSCRIPTS]; - - for (i = 0; i < NUMSCRIPTS; i++) { - scripts[i] = false; - } - - for (i = 0; i < len; i++) { - j = ublock_getCode(source[i]); - scripts[SE_LATIN] = true; - switch (j) { - //case UBLOCK_BASIC_LATIN: scripts[SE_LATIN] = true; break; - case UBLOCK_GREEK: scripts[SE_GREEK] = true; break; - case UBLOCK_HEBREW: scripts[SE_HEBREW] = true; break; - case UBLOCK_CYRILLIC: scripts[SE_CYRILLIC] = true; break; - case UBLOCK_ARABIC: scripts[SE_ARABIC] = true; break; - case UBLOCK_SYRIAC: scripts[SE_SYRIAC] = true; break; - case UBLOCK_KATAKANA: scripts[SE_KATAKANA] = true; break; - case UBLOCK_HIRAGANA: scripts[SE_HIRAGANA] = true; break; - case UBLOCK_HANGUL_SYLLABLES: scripts[SE_HANGUL] = true; break; - case UBLOCK_HANGUL_JAMO: scripts[SE_JAMO] = true; break; - case UBLOCK_DEVANAGARI: scripts[SE_DEVANAGARI] = true; break; - case UBLOCK_TAMIL: scripts[SE_TAMIL] = true; break; - case UBLOCK_BENGALI: scripts[SE_BENGALI] = true; break; - case UBLOCK_GURMUKHI: scripts[SE_GURMUKHI] = true; break; - case UBLOCK_GUJARATI: scripts[SE_GUJARATI] = true; break; - case UBLOCK_ORIYA: scripts[SE_ORIYA] = true; break; - case UBLOCK_TELUGU: scripts[SE_TELUGU] = true; break; - case UBLOCK_KANNADA: scripts[SE_KANNADA] = true; break; - case UBLOCK_MALAYALAM: scripts[SE_MALAYALAM] = true; break; - case UBLOCK_THAI: scripts[SE_THAI] = true; break; - case UBLOCK_GEORGIAN: scripts[SE_GEORGIAN] = true; break; - case UBLOCK_ARMENIAN: scripts[SE_ARMENIAN] = true; break; - case UBLOCK_ETHIOPIC: scripts[SE_ETHIOPIC] = true; break; - case UBLOCK_GOTHIC: scripts[SE_GOTHIC] = true; break; - case UBLOCK_UGARITIC: scripts[SE_UGARITIC] = true; break; -// case UBLOCK_MEROITIC: scripts[SE_MEROITIC] = true; break; -// case UBLOCK_LINEARB: scripts[SE_LINEARB] = true; break; -// case UBLOCK_CYPRIOT: scripts[SE_CYPRIOT] = true; break; - case UBLOCK_RUNIC: scripts[SE_RUNIC] = true; break; - case UBLOCK_OGHAM: scripts[SE_OGHAM] = true; break; - case UBLOCK_THAANA: scripts[SE_THAANA] = true; break; -// case UBLOCK_GLAGOLITIC: scripts[SE_GLAGOLITIC] = true; break; -// case UBLOCK_TENGWAR: scripts[SE_TENGWAR] = true; break; -// case UBLOCK_CIRTH: scripts[SE_CIRTH] = true; break; - case UBLOCK_CJK_RADICALS_SUPPLEMENT: - case UBLOCK_KANGXI_RADICALS: - case UBLOCK_IDEOGRAPHIC_DESCRIPTION_CHARACTERS: - case UBLOCK_CJK_SYMBOLS_AND_PUNCTUATION: - case UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A: - case UBLOCK_CJK_UNIFIED_IDEOGRAPHS: - scripts[SE_HAN] = true; - break; - case UBLOCK_CJK_COMPATIBILITY: - case UBLOCK_CJK_COMPATIBILITY_IDEOGRAPHS: - case UBLOCK_CJK_COMPATIBILITY_FORMS: - scripts[SE_HAN] = true; - compat = true; - break; - case UBLOCK_HANGUL_COMPATIBILITY_JAMO: - scripts[SE_HANGUL] = true; - compat = true; - break; - - //default: scripts[SE_LATIN] = true; - } - } - scripts[option] = false; //turn off the reflexive transliteration - - //return if we have no transliteration to do for this text - j = 0; - for (i = 0; !j && i < NUMSCRIPTS; i++) { - if (scripts[i]) j++; - } - if (!j) { - ucnv_close(conv); - return 0; - } - - if (compat) { - addTrans("NFKD", &ID); - } - else { - addTrans("NFD", &ID); - } - - //Simple X to Latin transliterators - if (scripts[SE_GREEK]) { - if (strnicmp (((SWModule*)module)->Lang(), "cop", 3)) { - if (option == SE_SBL) - addTrans("Greek-Latin/SBL", &ID); - else if (option == SE_TC) - addTrans("Greek-Latin/TC", &ID); - else if (option == SE_BETA) - addTrans("Greek-Latin/Beta", &ID); - else if (option == SE_BGREEK) - addTrans("Greek-Latin/BGreek", &ID); - else if (option == SE_UNGEGN) - addTrans("Greek-Latin/UNGEGN", &ID); - else if (option == SE_ISO) - addTrans("Greek-Latin/ISO", &ID); - else if (option == SE_ALALC) - addTrans("Greek-Latin/ALALC", &ID); - else if (option == SE_BGNPCGN) - addTrans("Greek-Latin/BGNPCGN", &ID); - else if (option == SE_IPA) - addTrans("Greek-IPA/Ancient", &ID); - else { - addTrans("Greek-Latin", &ID); - scripts[SE_LATIN] = true; - } - } - else { - if (option == SE_SBL) - addTrans("Coptic-Latin/SBL", &ID); - else if (option == SE_TC) - addTrans("Coptic-Latin/TC", &ID); - else if (option == SE_BETA) - addTrans("Coptic-Latin/Beta", &ID); - else if (option == SE_IPA) - addTrans("Coptic-IPA", &ID); - else { - addTrans("Coptic-Latin", &ID); - scripts[SE_LATIN] = true; - } - } - } - if (scripts[SE_HEBREW]) { - if (option == SE_SBL) - addTrans("Hebrew-Latin/SBL", &ID); - else if (option == SE_TC) - addTrans("Hebrew-Latin/TC", &ID); - else if (option == SE_BETA) - addTrans("Hebrew-Latin/Beta", &ID); - else if (option == SE_UNGEGN) - addTrans("Hebrew-Latin/UNGEGN", &ID); - else if (option == SE_ALALC) - addTrans("Hebrew-Latin/ALALC", &ID); - else if (option == SE_SYRIAC) - addTrans("Hebrew-Syriac", &ID); - else { - addTrans("Hebrew-Latin", &ID); - scripts[SE_LATIN] = true; - } - } - if (scripts[SE_CYRILLIC]) { - if (option == SE_GLAGOLITIC) - addTrans("Cyrillic-Glagolitic", &ID); - else { - addTrans("Cyrillic-Latin", &ID); - scripts[SE_LATIN] = true; - } - } - if (scripts[SE_ARABIC]) { - addTrans("Arabic-Latin", &ID); - scripts[SE_LATIN] = true; - } - if (scripts[SE_SYRIAC]) { - if (option == SE_TC) - addTrans("Syriac-Latin/TC", &ID); - else if (option == SE_BETA) - addTrans("Syriac-Latin/Beta", &ID); - else if (option == SE_HUGOYE) - addTrans("Syriac-Latin/Hugoye", &ID); - else if (option == SE_HEBREW) - addTrans("Syriac-Hebrew", &ID); - else { - addTrans("Syriac-Latin", &ID); - scripts[SE_LATIN] = true; - } - } - if (scripts[SE_THAI]) { - addTrans("Thai-Latin", &ID); - scripts[SE_LATIN] = true; - } - if (scripts[SE_GEORGIAN]) { - if (option == SE_ISO) - addTrans("Georgian-Latin/ISO", &ID); - else if (option == SE_ALALC) - addTrans("Georgian-Latin/ALALC", &ID); - else if (option == SE_BGNPCGN) - addTrans("Georgian-Latin/BGNPCGN", &ID); - else if (option == SE_IPA) - addTrans("Georgian-IPA", &ID); - else { - addTrans("Georgian-Latin", &ID); - scripts[SE_LATIN] = true; - } - } - if (scripts[SE_ARMENIAN]) { - if (option == SE_ISO) - addTrans("Armenian-Latin/ISO", &ID); - else if (option == SE_ALALC) - addTrans("Armenian-Latin/ALALC", &ID); - else if (option == SE_BGNPCGN) - addTrans("Armenian-Latin/BGNPCGN", &ID); - else if (option == SE_IPA) - addTrans("Armenian-IPA", &ID); - else { - addTrans("Armenian-Latin", &ID); - scripts[SE_LATIN] = true; - } - } - if (scripts[SE_ETHIOPIC]) { - if (option == SE_UNGEGN) - addTrans("Ethiopic-Latin/UNGEGN", &ID); - else if (option == SE_ISO) - addTrans("Ethiopic-Latin/ISO", &ID); - else if (option == SE_ALALC) - addTrans("Ethiopic-Latin/ALALC", &ID); - else if (option == SE_SERA) - addTrans("Ethiopic-Latin/SERA", &ID); - else { - addTrans("Ethiopic-Latin", &ID); - scripts[SE_LATIN] = true; - } - } - if (scripts[SE_GOTHIC]) { - if (option == SE_BASICLATIN) - addTrans("Gothic-Latin/Basic", &ID); - else if (option == SE_IPA) - addTrans("Gothic-IPA", &ID); - else { - addTrans("Gothic-Latin", &ID); - scripts[SE_LATIN] = true; - } - } - if (scripts[SE_UGARITIC]) { - if (option == SE_SBL) - addTrans("Ugaritic-Latin/SBL", &ID); - else { - addTrans("Ugaritic-Latin", &ID); - scripts[SE_LATIN] = true; - } - } - if (scripts[SE_MEROITIC]) { - addTrans("Meroitic-Latin", &ID); - scripts[SE_LATIN] = true; - } - if (scripts[SE_LINEARB]) { - addTrans("LinearB-Latin", &ID); - scripts[SE_LATIN] = true; - } - if (scripts[SE_CYPRIOT]) { - addTrans("Cypriot-Latin", &ID); - scripts[SE_LATIN] = true; - } - if (scripts[SE_RUNIC]) { - addTrans("Runic-Latin", &ID); - scripts[SE_LATIN] = true; - } - if (scripts[SE_OGHAM]) { - addTrans("Ogham-Latin", &ID); - scripts[SE_LATIN] = true; - } - if (scripts[SE_THAANA]) { - if (option == SE_ALALC) - addTrans("Thaana-Latin/ALALC", &ID); - else if (option == SE_BGNPCGN) - addTrans("Thaana-Latin/BGNPCGN", &ID); - else { - addTrans("Thaana-Latin", &ID); - scripts[SE_LATIN] = true; - } - } - if (scripts[SE_GLAGOLITIC]) { - if (option == SE_ISO) - addTrans("Glagolitic-Latin/ISO", &ID); - else if (option == SE_ALALC) - addTrans("Glagolitic-Latin/ALALC", &ID); - else if (option == SE_ALALC) - addTrans("Glagolitic-Cyrillic", &ID); - else { - addTrans("Glagolitic-Latin", &ID); - scripts[SE_LATIN] = true; - } - } - if (scripts[SE_THAI]) { - addTrans("Thai-Latin", &ID); - scripts[SE_LATIN] = true; - } - if (scripts[SE_THAI]) { - addTrans("Thai-Latin", &ID); - scripts[SE_LATIN] = true; - } - - if (scripts[SE_HAN]) { - if (!strnicmp (((SWModule*)module)->Lang(), "ja", 2)) { - addTrans("Kanji-Romaji", &ID); - } - else { - addTrans("Han-Latin", &ID); - } - scripts[SE_LATIN] = true; - } - - // Inter-Kana and Kana to Latin transliterators - if (option == SE_HIRAGANA && scripts[SE_KATAKANA]) { - addTrans("Katakana-Hiragana", &ID); - scripts[SE_HIRAGANA] = true; - } - else if (option == SE_KATAKANA && scripts[SE_HIRAGANA]) { - addTrans("Hiragana-Katakana", &ID); - scripts[SE_KATAKANA] = true; - } - else { - if (scripts[SE_KATAKANA]) { - addTrans("Katakana-Latin", &ID); - scripts[SE_LATIN] = true; - } - if (scripts[SE_HIRAGANA]) { - addTrans("Hiragana-Latin", &ID); - scripts[SE_LATIN] = true; - } - } - - // Korean to Latin transliterators - if (scripts[SE_HANGUL]) { - addTrans("Hangul-Latin", &ID); - scripts[SE_LATIN] = true; - } - if (scripts[SE_JAMO]) { - addTrans("Jamo-Latin", &ID); - scripts[SE_LATIN] = true; - } - - // Indic-Latin - if (option < SE_DEVANAGARI || option > SE_MALAYALAM) { - // Indic to Latin - if (scripts[SE_TAMIL]) { - addTrans("Tamil-Latin", &ID); - scripts[SE_LATIN] = true; - } - if (scripts[SE_BENGALI]) { - addTrans("Bengali-Latin", &ID); - scripts[SE_LATIN] = true; - } - if (scripts[SE_GURMUKHI]) { - addTrans("Gurmukhi-Latin", &ID); - scripts[SE_LATIN] = true; - } - if (scripts[SE_GUJARATI]) { - addTrans("Gujarati-Latin", &ID); - scripts[SE_LATIN] = true; - } - if (scripts[SE_ORIYA]) { - addTrans("Oriya-Latin", &ID); - scripts[SE_LATIN] = true; - } - if (scripts[SE_TELUGU]) { - addTrans("Telugu-Latin", &ID); - scripts[SE_LATIN] = true; - } - if (scripts[SE_KANNADA]) { - addTrans("Kannada-Latin", &ID); - scripts[SE_LATIN] = true; - } - if (scripts[SE_MALAYALAM]) { - addTrans("Malayalam-Latin", &ID); - scripts[SE_LATIN] = true; - } - } - else { - if (scripts[SE_LATIN]) { - addTrans("Latin-InterIndic", &ID); - } - if (scripts[SE_DEVANAGARI]) { - addTrans("Devanagari-InterIndic", &ID); - } - if (scripts[SE_TAMIL]) { - addTrans("Tamil-InterIndic", &ID); - } - if (scripts[SE_BENGALI]) { - addTrans("Bengali-InterIndic", &ID); - } - if (scripts[SE_GURMUKHI]) { - addTrans("Gurmurkhi-InterIndic", &ID); - } - if (scripts[SE_GUJARATI]) { - addTrans("Gujarati-InterIndic", &ID); - } - if (scripts[SE_ORIYA]) { - addTrans("Oriya-InterIndic", &ID); - } - if (scripts[SE_TELUGU]) { - addTrans("Telugu-InterIndic", &ID); - } - if (scripts[SE_KANNADA]) { - addTrans("Kannada-InterIndic", &ID); - } - if (scripts[SE_MALAYALAM]) { - addTrans("Malayalam-InterIndic", &ID); - } - - switch(option) { - case SE_DEVANAGARI: - addTrans("InterIndic-Devanagari", &ID); - break; - case SE_TAMIL: - addTrans("InterIndic-Tamil", &ID); - break; - case SE_BENGALI: - addTrans("InterIndic-Bengali", &ID); - break; - case SE_GURMUKHI: - addTrans("InterIndic-Gurmukhi", &ID); - break; - case SE_GUJARATI: - addTrans("InterIndic-Gujarati", &ID); - break; - case SE_ORIYA: - addTrans("InterIndic-Oriya", &ID); - break; - case SE_TELUGU: - addTrans("InterIndic-Telugu", &ID); - break; - case SE_KANNADA: - addTrans("InterIndic-Kannada", &ID); - break; - case SE_MALAYALAM: - addTrans("InterIndic-Malayalam", &ID); - break; - default: - addTrans("InterIndic-Latin", &ID); - scripts[SE_LATIN] = true; - break; - } - } - - if (scripts[SE_TENGWAR]) { - addTrans("Tengwar-Latin", &ID); - scripts[SE_LATIN] = true; - } - if (scripts[SE_CIRTH]) { - addTrans("Cirth-Latin", &ID); - scripts[SE_LATIN] = true; - } - - if (scripts[SE_LATIN]) { - switch (option) { - case SE_GREEK: - addTrans("Latin-Greek", &ID); - break; - case SE_HEBREW: - addTrans("Latin-Hebrew", &ID); - break; - case SE_CYRILLIC: - addTrans("Latin-Cyrillic", &ID); - break; - case SE_ARABIC: - addTrans("Latin-Arabic", &ID); - break; - case SE_SYRIAC: - addTrans("Latin-Syriac", &ID); - break; - case SE_THAI: - addTrans("Latin-Thai", &ID); - break; - case SE_GEORGIAN: - addTrans("Latin-Georgian", &ID); - break; - case SE_ARMENIAN: - addTrans("Latin-Armenian", &ID); - break; - case SE_ETHIOPIC: - addTrans("Latin-Ethiopic", &ID); - break; - case SE_GOTHIC: - addTrans("Latin-Gothic", &ID); - break; - case SE_UGARITIC: - addTrans("Latin-Ugaritic", &ID); - break; - case SE_COPTIC: - addTrans("Latin-Coptic", &ID); - break; - case SE_KATAKANA: - addTrans("Latin-Katakana", &ID); - break; - case SE_HIRAGANA: - addTrans("Latin-Hiragana", &ID); - break; - case SE_JAMO: - addTrans("Latin-Jamo", &ID); - break; - case SE_HANGUL: - addTrans("Latin-Hangul", &ID); - break; - case SE_MEROITIC: - addTrans("Latin-Meroitic", &ID); - break; - case SE_LINEARB: - addTrans("Latin-LinearB", &ID); - break; - case SE_CYPRIOT: - addTrans("Latin-Cypriot", &ID); - break; - case SE_RUNIC: - addTrans("Latin-Runic", &ID); - break; - case SE_OGHAM: - addTrans("Latin-Ogham", &ID); - break; - case SE_THAANA: - addTrans("Latin-Thaana", &ID); - break; - case SE_GLAGOLITIC: - addTrans("Latin-Glagolitic", &ID); - break; - case SE_TENGWAR: - addTrans("Latin-Tengwar", &ID); - break; - case SE_CIRTH: - addTrans("Latin-Cirth", &ID); - break; - } - } - - if (option == SE_BASICLATIN) { - addTrans("Any-Latin1", &ID); - } - - addTrans("NFC", &ID); - - err = U_ZERO_ERROR; - Transliterator * trans = createTrans(UnicodeString(ID), UTRANS_FORWARD, err); - if (trans && !U_FAILURE(err)) { - UnicodeString target = UnicodeString(source); - trans->transliterate(target); - text.setSize(text.size()*2); - len = ucnv_fromUChars(conv, text.getRawData(), text.size(), target.getBuffer(), target.length(), &err); - text.setSize(len); - delete trans; - } - ucnv_close(conv); - } - return 0; -} - -SWORD_NAMESPACE_END -#endif - - - |