diff options
author | Roberto C. Sanchez <roberto@connexer.com> | 2014-03-29 10:54:01 -0400 |
---|---|---|
committer | Roberto C. Sanchez <roberto@connexer.com> | 2014-03-29 10:54:01 -0400 |
commit | 71a39f4652cd51df814c930dd268f3c9ad2aee86 (patch) | |
tree | 5994350a603908c4e4d660bc9d72c4ec43dd648e /src/modules/filters/utf8utf16.cpp | |
parent | 03134fa5f6f25d92724ce4c183f9bbe12a9e37dc (diff) |
Imported Upstream version 1.6.0+dfsg
Diffstat (limited to 'src/modules/filters/utf8utf16.cpp')
-rw-r--r-- | src/modules/filters/utf8utf16.cpp | 79 |
1 files changed, 36 insertions, 43 deletions
diff --git a/src/modules/filters/utf8utf16.cpp b/src/modules/filters/utf8utf16.cpp index 5c1614c..a770d5f 100644 --- a/src/modules/filters/utf8utf16.cpp +++ b/src/modules/filters/utf8utf16.cpp @@ -2,12 +2,30 @@ * * UTF8UTF16 - SWFilter descendant to convert UTF-8 to UTF-16 * + * + * + * Copyright 2009 CrossWire Bible Society (http://www.crosswire.org) + * CrossWire Bible Society + * P. O. Box 2528 + * Tempe, AZ 85280-2528 + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * */ #include <stdlib.h> #include <stdio.h> +#include <sysdata.h> #include <utf8utf16.h> +#include <utilstr.h> #include <swbuf.h> SWORD_NAMESPACE_START @@ -15,61 +33,36 @@ SWORD_NAMESPACE_START UTF8UTF16::UTF8UTF16() { } + char UTF8UTF16::processText(SWBuf &text, const SWKey *key, const SWModule *module) { const unsigned char *from; - unsigned long ch; - signed short utf16; - unsigned char from2[7]; - SWBuf orig = text; from = (const unsigned char *)orig.c_str(); // ------------------------------- - for (text = ""; *from; from++) { - ch = 0; - //case: ANSI - if ((*from & 128) != 128) { + text = ""; + while (*from) { + + __u32 ch = getUniCharFromUTF8(&from); + + if (!ch) continue; // invalid char + + if (ch < 0x10000) { text.setSize(text.size()+2); - *((unsigned short *)(text.getRawData()+(text.size()-2))) = (unsigned short)*from; - continue; - } - //case: Invalid UTF-8 (illegal continuing byte in initial position) - if ((*from & 128) && ((*from & 64) != 64)) { - continue; + *((__u16 *)(text.getRawData()+(text.size()-2))) = (__u16)ch; } - //case: 2+ byte codepoint - from2[0] = *from; - from2[0] <<= 1; - int subsequent; - for (subsequent = 1; (from2[0] & 128) && (subsequent < 7); subsequent++) { - from2[0] <<= 1; - from2[subsequent] = from[subsequent]; - from2[subsequent] &= 63; - ch <<= 6; - ch |= from2[subsequent]; + else { + __u16 utf16; + utf16 = (__s16)((ch - 0x10000) / 0x400 + 0xD800); + text.setSize(text.size()+4); + *((__u16 *)(text.getRawData()+(text.size()-4))) = utf16; + utf16 = (__s16)((ch - 0x10000) % 0x400 + 0xDC00); + *((__u16 *)(text.getRawData()+(text.size()-2))) = utf16; } - subsequent--; - from2[0] <<= 1; - char significantFirstBits = 8 - (2+subsequent); - - ch |= (((short)from2[0]) << (((6*subsequent)+significantFirstBits)-8)); - from += subsequent; - if (ch < 0x10000) { - text.setSize(text.size()+2); - *((unsigned short *)(text.getRawData()+(text.size()-2))) = (unsigned short)ch; - } - else { - utf16 = (signed short)((ch - 0x10000) / 0x400 + 0xD800); - text.setSize(text.size()+2); - *((unsigned short *)(text.getRawData()+(text.size()-2))) = (unsigned short)utf16; - utf16 = (signed short)((ch - 0x10000) % 0x400 + 0xDC00); - text.setSize(text.size()+2); - *((unsigned short *)(text.getRawData()+(text.size()-2))) = (unsigned short)utf16; - } } text.setSize(text.size()+2); - *((unsigned short *)(text.getRawData()+(text.size()-2))) = (unsigned short)0; + *((__u16 *)(text.getRawData()+(text.size()-2))) = (__u16)0; return 0; |