summaryrefslogtreecommitdiff
path: root/src/modules/filters/unicodertf.cpp
blob: bdd2ae11bd12c3a2d4fa99aea0303142f7d84e31 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
/******************************************************************************
 *
 *  unicodertf.cpp -	SWFilter descendant to convert UTF-8 to RTF tags
 *
 * $Id: unicodertf.cpp 3081 2014-03-05 19:52:08Z chrislit $
 *
 * Copyright 2001-2013 CrossWire Bible Society (http://www.crosswire.org)
 *	CrossWire Bible Society
 *	P. O. Box 2528
 *	Tempe, AZ  85280-2528
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License as published by the
 * Free Software Foundation version 2.
 *
 * This program is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * General Public License for more details.
 *
 */

#include <stdio.h>
#include <unicodertf.h>
#include <swbuf.h>

SWORD_NAMESPACE_START

UnicodeRTF::UnicodeRTF() {
}


char UnicodeRTF::processText(SWBuf &text, const SWKey *key, const SWModule *module)
{
	const unsigned char *from;
	char digit[10];
	unsigned long ch;
        signed short utf16;
	unsigned char from2[7];

	SWBuf orig = text;

	from = (const unsigned char *)orig.c_str();

	// -------------------------------
	for (text = ""; *from; from++) {
		ch = 0;
                //case: ANSI
		if ((*from & 128) != 128) {
			text += *from;
			continue;
		}
                //case: Invalid UTF-8 (illegal continuing byte in initial position)
		if ((*from & 128) && ((*from & 64) != 64)) {
			continue;
		}
                //case: 2+ byte codepoint
		from2[0] = *from;
		from2[0] <<= 1;
		int subsequent;
		for (subsequent = 1; (from2[0] & 128) && (subsequent < 7); subsequent++) {
			from2[0] <<= 1;
			from2[subsequent] = from[subsequent];
			from2[subsequent] &= 63;
			ch <<= 6;
			ch |= from2[subsequent];
		}
		subsequent--;
		from2[0] <<= 1;
		char significantFirstBits = 8 - (2+subsequent);
		
		ch |= (((short)from2[0]) << (((6*subsequent)+significantFirstBits)-8));
		from += subsequent;
                if (ch < 0x10000) {
				utf16 = (signed short)ch;
				text += '\\';
				text += 'u';
				sprintf(digit, "%d", utf16);
				text += digit;
				text += '?';
			 }
			else {
				utf16 = (signed short)((ch - 0x10000) / 0x400 + 0xD800);
				text += '\\';
				text += 'u';
				sprintf(digit, "%d", utf16);
				text += digit;
				text += '?';
				utf16 = (signed short)((ch - 0x10000) % 0x400 + 0xDC00);
				text += '\\';
				text += 'u';
				sprintf(digit, "%d", utf16);
				text += digit;
				text += '?';
			}
	}
	   
	return 0;
}

SWORD_NAMESPACE_END