summaryrefslogtreecommitdiff
path: root/src/backend/managers/btstringmgr.cpp
blob: 9f5725823cd4cb2f3fb4048be4be50e219566d94 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
/*********
*
* This file is part of BibleTime's source code, http://www.bibletime.info/.
*
* Copyright 1999-2008 by the BibleTime developers.
* The BibleTime source code is licensed under the GNU General Public License version 2.0.
*
**********/

#include "btstringmgr.h"

char* BTStringMgr::upperUTF8(char* text, unsigned int maxlen) const {
	const int max = (maxlen>0) ? maxlen : strlen(text);

	if (isUtf8(text)) {
		strncpy(text, (const char*)QString::fromUtf8(text).toUpper().toUtf8(), max);

		return text;
	}
	else {
		char* ret = text;

		while (*text) {
			*text = toupper(*text);
			text++;
		}

		return ret;
	}

	return text;
}

char* BTStringMgr::upperLatin1(char* text, unsigned int /*max*/) const {
	char* ret = text;

	while (*text) {
		*text = toupper(*text);
		text++;
	}

	return ret;
}

bool BTStringMgr::supportsUnicode() const {
	return true;
}

bool BTStringMgr::isUtf8(const char *buf) const {
	int i, n;
	register unsigned char c;
	bool gotone = false;

	#define F 0   /* character never appears in text */
	#define T 1   /* character appears in plain ASCII text */
	#define I 2   /* character appears in ISO-8859 text */
	#define X 3   /* character appears in non-ISO extended ASCII (Mac, IBM PC) */

	static const unsigned char text_chars[256] = {
				/*                  BEL BS HT LF    FF CR    */
				F, F, F, F, F, F, F, T, T, T, T, F, T, T, F, F,  /* 0x0X */
				/*                              ESC          */
				F, F, F, F, F, F, F, F, F, F, F, T, F, F, F, F,  /* 0x1X */
				T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T,  /* 0x2X */
				T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T,  /* 0x3X */
				T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T,  /* 0x4X */
				T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T,  /* 0x5X */
				T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T,  /* 0x6X */
				T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, F,  /* 0x7X */
				/*            NEL                            */
				X, X, X, X, X, T, X, X, X, X, X, X, X, X, X, X,  /* 0x8X */
				X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,  /* 0x9X */
				I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I,  /* 0xaX */
				I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I,  /* 0xbX */
				I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I,  /* 0xcX */
				I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I,  /* 0xdX */
				I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I,  /* 0xeX */
				I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I   /* 0xfX */
			};

	/* *ulen = 0; */

	for (i = 0; (c = buf[i]); i++) {
		if ((c & 0x80) == 0) {        /* 0xxxxxxx is plain ASCII */
			/*
			 * Even if the whole file is valid UTF-8 sequences,
			 * still reject it if it uses weird control characters.
			 */

			if (text_chars[c] != T)
				return false;

		}
		else if ((c & 0x40) == 0) { /* 10xxxxxx never 1st byte */
			return false;
		}
		else {                           /* 11xxxxxx begins UTF-8 */
			int following;

			if ((c & 0x20) == 0) {             /* 110xxxxx */
				following = 1;
			}
			else if ((c & 0x10) == 0) {      /* 1110xxxx */
				following = 2;
			}
			else if ((c & 0x08) == 0) {      /* 11110xxx */
				following = 3;
			}
			else if ((c & 0x04) == 0) {      /* 111110xx */
				following = 4;
			}
			else if ((c & 0x02) == 0) {      /* 1111110x */
				following = 5;
			}
			else
				return false;

			for (n = 0; n < following; n++) {
				i++;

				if (!(c = buf[i]))
					goto done;

				if ((c & 0x80) == 0 || (c & 0x40))
					return false;
			}

			gotone = true;
		}
	}

done:
	return gotone;   /* don't claim it's UTF-8 if it's all 7-bit */
}

#undef F
#undef T
#undef I
#undef X