diff options
Diffstat (limited to 'src/utilfuns/utilstr.cpp')
-rw-r--r-- | src/utilfuns/utilstr.cpp | 135 |
1 files changed, 66 insertions, 69 deletions
diff --git a/src/utilfuns/utilstr.cpp b/src/utilfuns/utilstr.cpp index 52dc286..9a0ddc6 100644 --- a/src/utilfuns/utilstr.cpp +++ b/src/utilfuns/utilstr.cpp @@ -1,20 +1,26 @@ +/* + * Copyright 2009 CrossWire Bible Society (http://www.crosswire.org) + * CrossWire Bible Society + * P. O. Box 2528 + * Tempe, AZ 85280-2528 + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + */ + #include <utilstr.h> #include <ctype.h> #include <string.h> -#include <localemgr.h> - +#include <sysdata.h> -#ifdef _ICU_ -#include <unicode/utypes.h> -#include <unicode/ucnv.h> -#include <unicode/ustring.h> -#include <unicode/uchar.h> - -#include <unicode/unistr.h> -#include <unicode/translit.h> - -#endif SWORD_NAMESPACE_START @@ -67,13 +73,14 @@ const unsigned char SW_toupper_array[256] = */ char *stdstr(char **ipstr, const char *istr, unsigned int memPadFactor) { + if (*ipstr) + delete [] *ipstr; if (istr) { - if (*ipstr) - delete [] *ipstr; int len = strlen(istr) + 1; *ipstr = new char [ len * memPadFactor ]; memcpy(*ipstr, istr, len); } + else *ipstr = 0; return *ipstr; } @@ -174,68 +181,58 @@ int stricmp(const char *s1, const char *s2) { #endif } + /****************************************************************************** - * toupperstr - converts a string to uppercase string + * getUniCharFromUTF8 - retrieves the next Unicode codepoint from a UTF8 string + * and increments buf to start of next codepoint * - * ENT: target - string to convert + * ENT: buf - address of a utf8 buffer * - * RET: target + * RET: buf - incremented past last byte used in computing the current codepoint + * unicode codepoint value (0 with buf incremented is invalid UTF8 byte */ -// char *toupperstr(char *buf) { -// char *ret = buf; -// -// /*if (StringHelper::getSystemStringHelper()) { -// StringHelper::getSystemStringHelper()->upperStringLatin1( ret ); -// } -// else*/ { -// while (*buf) { -// *buf++ = SW_toupper(*buf); -// } -// // } -// return ret; -// } +__u32 getUniCharFromUTF8(const unsigned char **buf) { + __u32 ch = 0; + unsigned char multibuf[7]; + + //case: We're at the end + if (!(**buf)) { + return ch; + } + //case: ANSI + if (!(**buf & 128)) { + ch = **buf; + (*buf)++; + return ch; + } -/****************************************************************************** - * toupperstr - converts a string to uppercase string - * - * ENT: target - string to convert - * - * RET: target - */ + //case: Invalid UTF-8 (illegal continuing byte in initial position) + if ((**buf & 128) && (!(**buf & 64))) { + (*buf)++; + return ch; + } + + //case: 2+ byte codepoint + multibuf[0] = **buf; + multibuf[0] <<= 1; + int subsequent; + for (subsequent = 1; (multibuf[0] & 128) && (subsequent < 7); subsequent++) { + multibuf[0] <<= 1; + multibuf[subsequent] = (*buf)[subsequent]; + multibuf[subsequent] &= 63; + ch <<= 6; + ch |= multibuf[subsequent]; + } + subsequent--; + multibuf[0] <<= 1; + char significantFirstBits = 8 - (2+subsequent); + + ch |= (((__s16)multibuf[0]) << (((6*subsequent)+significantFirstBits)-8)); + *buf += (subsequent+1); + return ch; +} -// char *toupperstr_utf8(char *buf, unsigned int max) { -// char *ret = buf; -// -// /* if (StringHelper::getSystemStringHelper()) { -// StringHelper::getSystemStringHelper()->upperStringUtf8( ret ); -// return ret; -// }*/ -// -// #ifndef _ICU_ -// // try to decide if it's worth trying to toupper. Do we have more -// // characters that are probably lower latin than not? -// long performOp = 0; -// for (const char *ch = buf; *ch; ch++) -// performOp += (*ch > 0) ? 1 : -1; -// -// if (performOp > 0) { -// while (*buf) -// *buf = SW_toupper(*buf++); -// } -// #else -// if (!max) -// max = strlen(ret); -// UErrorCode err = U_ZERO_ERROR; -// UConverter *conv = ucnv_open("UTF-8", &err); -// UnicodeString str(buf, -1, conv, err); -// UnicodeString ustr = str.toUpper(); -// ustr.extract(ret, max, conv, err); -// ucnv_close(conv); -// #endif -// -// return ret; -// } SWORD_NAMESPACE_END |