diff options
Diffstat (limited to 'src/backend/managers/btstringmgr.cpp')
-rw-r--r-- | src/backend/managers/btstringmgr.cpp | 139 |
1 files changed, 139 insertions, 0 deletions
diff --git a/src/backend/managers/btstringmgr.cpp b/src/backend/managers/btstringmgr.cpp new file mode 100644 index 0000000..9f57258 --- /dev/null +++ b/src/backend/managers/btstringmgr.cpp @@ -0,0 +1,139 @@ +/********* +* +* This file is part of BibleTime's source code, http://www.bibletime.info/. +* +* Copyright 1999-2008 by the BibleTime developers. +* The BibleTime source code is licensed under the GNU General Public License version 2.0. +* +**********/ + +#include "btstringmgr.h" + +char* BTStringMgr::upperUTF8(char* text, unsigned int maxlen) const { + const int max = (maxlen>0) ? maxlen : strlen(text); + + if (isUtf8(text)) { + strncpy(text, (const char*)QString::fromUtf8(text).toUpper().toUtf8(), max); + + return text; + } + else { + char* ret = text; + + while (*text) { + *text = toupper(*text); + text++; + } + + return ret; + } + + return text; +} + +char* BTStringMgr::upperLatin1(char* text, unsigned int /*max*/) const { + char* ret = text; + + while (*text) { + *text = toupper(*text); + text++; + } + + return ret; +} + +bool BTStringMgr::supportsUnicode() const { + return true; +} + +bool BTStringMgr::isUtf8(const char *buf) const { + int i, n; + register unsigned char c; + bool gotone = false; + + #define F 0 /* character never appears in text */ + #define T 1 /* character appears in plain ASCII text */ + #define I 2 /* character appears in ISO-8859 text */ + #define X 3 /* character appears in non-ISO extended ASCII (Mac, IBM PC) */ + + static const unsigned char text_chars[256] = { + /* BEL BS HT LF FF CR */ + F, F, F, F, F, F, F, T, T, T, T, F, T, T, F, F, /* 0x0X */ + /* ESC */ + F, F, F, F, F, F, F, F, F, F, F, T, F, F, F, F, /* 0x1X */ + T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x2X */ + T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x3X */ + T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x4X */ + T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x5X */ + T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x6X */ + T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, F, /* 0x7X */ + /* NEL */ + X, X, X, X, X, T, X, X, X, X, X, X, X, X, X, X, /* 0x8X */ + X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, /* 0x9X */ + I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, /* 0xaX */ + I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, /* 0xbX */ + I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, /* 0xcX */ + I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, /* 0xdX */ + I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, /* 0xeX */ + I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I /* 0xfX */ + }; + + /* *ulen = 0; */ + + for (i = 0; (c = buf[i]); i++) { + if ((c & 0x80) == 0) { /* 0xxxxxxx is plain ASCII */ + /* + * Even if the whole file is valid UTF-8 sequences, + * still reject it if it uses weird control characters. + */ + + if (text_chars[c] != T) + return false; + + } + else if ((c & 0x40) == 0) { /* 10xxxxxx never 1st byte */ + return false; + } + else { /* 11xxxxxx begins UTF-8 */ + int following; + + if ((c & 0x20) == 0) { /* 110xxxxx */ + following = 1; + } + else if ((c & 0x10) == 0) { /* 1110xxxx */ + following = 2; + } + else if ((c & 0x08) == 0) { /* 11110xxx */ + following = 3; + } + else if ((c & 0x04) == 0) { /* 111110xx */ + following = 4; + } + else if ((c & 0x02) == 0) { /* 1111110x */ + following = 5; + } + else + return false; + + for (n = 0; n < following; n++) { + i++; + + if (!(c = buf[i])) + goto done; + + if ((c & 0x80) == 0 || (c & 0x40)) + return false; + } + + gotone = true; + } + } + +done: + return gotone; /* don't claim it's UTF-8 if it's all 7-bit */ +} + +#undef F +#undef T +#undef I +#undef X |