1 files changed, 65 insertions, 49 deletions
diff --git a/src/modules/filters/unicodertf.cpp b/src/modules/filters/unicodertf.cpp
index b53a2d7..0a2bca8 100644
--- a/src/modules/filters/unicodertf.cpp
+++ b/src/modules/filters/unicodertf.cpp
@@ -1,6 +1,6 @@
 /******************************************************************************
  *
- * unicodertf -	SWFilter decendant to convert a double byte unicode file
+ * unicodertf -	SWFilter descendant to convert a double byte unicode file
  *				 to RTF tags
  */
 
@@ -9,62 +9,78 @@
 #include <stdio.h>
 #include <unicodertf.h>
 
+SWORD_NAMESPACE_START
+
 UnicodeRTF::UnicodeRTF() {
 }
 
 
-char UnicodeRTF::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module)
+char UnicodeRTF::processText(SWBuf &text, const SWKey *key, const SWModule *module)
 {
-	unsigned char *to, *from, *maxto;
-	int len;
-        char digit[10];
-        short ch;	// must be signed per unicode spec (negative is ok for big numbers > 32768)
+	const unsigned char *from;
+	char digit[10];
+	unsigned long ch;
+        signed short utf16;
+	unsigned char from2[7];
 
-	len = strlenw(text) + 2;						// shift string to right of buffer
-	if (len < maxlen) {
-	        memmove(&text[maxlen - len], text, len);
-		from = (unsigned char*)&text[maxlen - len];
-	}
-	else	from = (unsigned char*)text;
-        maxto =(unsigned char*)text + maxlen;
+	SWBuf orig = text;
+
+	from = (const unsigned char *)orig.c_str();
 
 	// -------------------------------
-	for (to = (unsigned char*)text; *from && (to <= maxto); from++) {
-	  ch = 0;
-          if ((*from & 128) != 128) {
-	       *to++ = *from;
-               continue;
-          }
-          if ((*from & 128) && ((*from & 64) != 64)) {
-	    // error
-               *from = 'x';
-               continue;
-          }
-          *from <<= 1;
-          int subsequent;
-          for (subsequent = 1; (*from & 128); subsequent++) {
-          	*from <<= 1;
-               from[subsequent] &= 63;
-               ch <<= 6;
-               ch |= from[subsequent];
-          }
-          subsequent--;
-          *from <<=1;
-          char significantFirstBits = 8 - (2+subsequent);
-          
-          ch |= (((short)*from) << (((6*subsequent)+significantFirstBits)-8));
-          from += subsequent;
-          *to++ = '\\';
-          *to++ = 'u';
-	  sprintf(digit, "%d", ch);
-		for (char *dig = digit; *dig; dig++)
-			*to++ = *dig;
-		*to++ = '?';
+	for (text = ""; *from; from++) {
+		ch = 0;
+                //case: ANSI
+		if ((*from & 128) != 128) {
+			text += *from;
+			continue;
+		}
+                //case: Invalid UTF-8 (illegal continuing byte in initial position)
+		if ((*from & 128) && ((*from & 64) != 64)) {
+			continue;
+		}
+                //case: 2+ byte codepoint
+		from2[0] = *from;
+		from2[0] <<= 1;
+		int subsequent;
+		for (subsequent = 1; (from2[0] & 128) && (subsequent < 7); subsequent++) {
+			from2[0] <<= 1;
+			from2[subsequent] = from[subsequent];
+			from2[subsequent] &= 63;
+			ch <<= 6;
+			ch |= from2[subsequent];
+		}
+		subsequent--;
+		from2[0] <<= 1;
+		char significantFirstBits = 8 - (2+subsequent);
+		
+		ch |= (((short)from2[0]) << (((6*subsequent)+significantFirstBits)-8));
+		from += subsequent;
+                if (ch < 0x10000) {
+				utf16 = (signed short)ch;
+				text += '\\';
+				text += 'u';
+				sprintf(digit, "%d", utf16);
+				text += digit;
+				text += '?';
+			 }
+			else {
+				utf16 = (signed short)((ch - 0x10000) / 0x400 + 0xD800);
+				text += '\\';
+				text += 'u';
+				sprintf(digit, "%d", utf16);
+				text += digit;
+				text += '?';
+				utf16 = (signed short)((ch - 0x10000) % 0x400 + 0xDC00);
+				text += '\\';
+				text += 'u';
+				sprintf(digit, "%d", utf16);
+				text += digit;
+				text += '?';
+			}
 	}
-        
-        if (to != maxto) {
-              	*to++ = 0;
-        }
-        *to = 0;
+	   
 	return 0;
 }
+
+SWORD_NAMESPACE_END