diff options
Diffstat (limited to 'src/modules/filters/unicodertf.cpp')
-rw-r--r-- | src/modules/filters/unicodertf.cpp | 114 |
1 files changed, 65 insertions, 49 deletions
diff --git a/src/modules/filters/unicodertf.cpp b/src/modules/filters/unicodertf.cpp index b53a2d7..0a2bca8 100644 --- a/src/modules/filters/unicodertf.cpp +++ b/src/modules/filters/unicodertf.cpp @@ -1,6 +1,6 @@ /****************************************************************************** * - * unicodertf - SWFilter decendant to convert a double byte unicode file + * unicodertf - SWFilter descendant to convert a double byte unicode file * to RTF tags */ @@ -9,62 +9,78 @@ #include <stdio.h> #include <unicodertf.h> +SWORD_NAMESPACE_START + UnicodeRTF::UnicodeRTF() { } -char UnicodeRTF::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) +char UnicodeRTF::processText(SWBuf &text, const SWKey *key, const SWModule *module) { - unsigned char *to, *from, *maxto; - int len; - char digit[10]; - short ch; // must be signed per unicode spec (negative is ok for big numbers > 32768) + const unsigned char *from; + char digit[10]; + unsigned long ch; + signed short utf16; + unsigned char from2[7]; - len = strlenw(text) + 2; // shift string to right of buffer - if (len < maxlen) { - memmove(&text[maxlen - len], text, len); - from = (unsigned char*)&text[maxlen - len]; - } - else from = (unsigned char*)text; - maxto =(unsigned char*)text + maxlen; + SWBuf orig = text; + + from = (const unsigned char *)orig.c_str(); // ------------------------------- - for (to = (unsigned char*)text; *from && (to <= maxto); from++) { - ch = 0; - if ((*from & 128) != 128) { - *to++ = *from; - continue; - } - if ((*from & 128) && ((*from & 64) != 64)) { - // error - *from = 'x'; - continue; - } - *from <<= 1; - int subsequent; - for (subsequent = 1; (*from & 128); subsequent++) { - *from <<= 1; - from[subsequent] &= 63; - ch <<= 6; - ch |= from[subsequent]; - } - subsequent--; - *from <<=1; - char significantFirstBits = 8 - (2+subsequent); - - ch |= (((short)*from) << (((6*subsequent)+significantFirstBits)-8)); - from += subsequent; - *to++ = '\\'; - *to++ = 'u'; - sprintf(digit, "%d", ch); - for (char *dig = digit; *dig; dig++) - *to++ = *dig; - *to++ = '?'; + for (text = ""; *from; from++) { + ch = 0; + //case: ANSI + if ((*from & 128) != 128) { + text += *from; + continue; + } + //case: Invalid UTF-8 (illegal continuing byte in initial position) + if ((*from & 128) && ((*from & 64) != 64)) { + continue; + } + //case: 2+ byte codepoint + from2[0] = *from; + from2[0] <<= 1; + int subsequent; + for (subsequent = 1; (from2[0] & 128) && (subsequent < 7); subsequent++) { + from2[0] <<= 1; + from2[subsequent] = from[subsequent]; + from2[subsequent] &= 63; + ch <<= 6; + ch |= from2[subsequent]; + } + subsequent--; + from2[0] <<= 1; + char significantFirstBits = 8 - (2+subsequent); + + ch |= (((short)from2[0]) << (((6*subsequent)+significantFirstBits)-8)); + from += subsequent; + if (ch < 0x10000) { + utf16 = (signed short)ch; + text += '\\'; + text += 'u'; + sprintf(digit, "%d", utf16); + text += digit; + text += '?'; + } + else { + utf16 = (signed short)((ch - 0x10000) / 0x400 + 0xD800); + text += '\\'; + text += 'u'; + sprintf(digit, "%d", utf16); + text += digit; + text += '?'; + utf16 = (signed short)((ch - 0x10000) % 0x400 + 0xDC00); + text += '\\'; + text += 'u'; + sprintf(digit, "%d", utf16); + text += digit; + text += '?'; + } } - - if (to != maxto) { - *to++ = 0; - } - *to = 0; + return 0; } + +SWORD_NAMESPACE_END |