1 files changed, 66 insertions, 69 deletions
diff --git a/src/utilfuns/utilstr.cpp b/src/utilfuns/utilstr.cpp
index 52dc286..9a0ddc6 100644
--- a/src/utilfuns/utilstr.cpp
+++ b/src/utilfuns/utilstr.cpp
@@ -1,20 +1,26 @@
+/*
+ * Copyright 2009 CrossWire Bible Society (http://www.crosswire.org)
+ *	CrossWire Bible Society
+ *	P. O. Box 2528
+ *	Tempe, AZ  85280-2528
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ */
+
 #include <utilstr.h>
 #include <ctype.h>
 #include <string.h>
 
-#include <localemgr.h>
-
+#include <sysdata.h>
 
-#ifdef _ICU_
-#include <unicode/utypes.h>
-#include <unicode/ucnv.h>
-#include <unicode/ustring.h>
-#include <unicode/uchar.h>
-
-#include <unicode/unistr.h>
-#include <unicode/translit.h>
-
-#endif
 
 SWORD_NAMESPACE_START
 
@@ -67,13 +73,14 @@ const unsigned char SW_toupper_array[256] =
  */
 
 char *stdstr(char **ipstr, const char *istr, unsigned int memPadFactor) {
+	if (*ipstr)
+		delete [] *ipstr;
 	if (istr) {
-		if (*ipstr)
-			delete [] *ipstr;
 		int len = strlen(istr) + 1;
 		*ipstr = new char [ len * memPadFactor ];
 		memcpy(*ipstr, istr, len);
 	}
+	else *ipstr = 0;
 	return *ipstr;
 }
 
@@ -174,68 +181,58 @@ int stricmp(const char *s1, const char *s2) {
 #endif
 }
 
+
 /******************************************************************************
- * toupperstr - converts a string to uppercase string
+ * getUniCharFromUTF8 - retrieves the next Unicode codepoint from a UTF8 string
+ * 					and increments buf to start of next codepoint
  *
- * ENT:	target - string to convert
+ * ENT:	buf - address of a utf8 buffer
  *
- * RET:	target
+ * RET:	buf - incremented past last byte used in computing the current codepoint
+ * 		unicode codepoint value (0 with buf incremented is invalid UTF8 byte
  */
 
-// char *toupperstr(char *buf) {
-// 	char *ret = buf;
-// 
-// 	/*if (StringHelper::getSystemStringHelper()) {
-// 		StringHelper::getSystemStringHelper()->upperStringLatin1( ret );
-// 	}
-// 	else*/ {
-// 		while (*buf) {
-// 			*buf++ = SW_toupper(*buf);
-// 		}
-// // 	}
-// 	return ret;
-// }
+__u32 getUniCharFromUTF8(const unsigned char **buf) {
+	__u32 ch = 0;
+	unsigned char multibuf[7];
+
+	//case: We're at the end
+	if (!(**buf)) {
+		return ch;
+	}
 
+	//case: ANSI
+	if (!(**buf & 128)) {
+		ch = **buf;
+		(*buf)++;
+		return ch;
+	}
 
-/******************************************************************************
- * toupperstr - converts a string to uppercase string
- *
- * ENT:	target - string to convert
- *
- * RET:	target
- */
+	//case: Invalid UTF-8 (illegal continuing byte in initial position)
+	if ((**buf & 128) && (!(**buf & 64))) {
+		(*buf)++;
+		return ch;
+	}
+
+	//case: 2+ byte codepoint
+	multibuf[0] = **buf;
+	multibuf[0] <<= 1;
+	int subsequent;
+	for (subsequent = 1; (multibuf[0] & 128) && (subsequent < 7); subsequent++) {
+		multibuf[0] <<= 1;
+		multibuf[subsequent] = (*buf)[subsequent];
+		multibuf[subsequent] &= 63;
+		ch <<= 6;
+		ch |= multibuf[subsequent];
+	}
+	subsequent--;
+	multibuf[0] <<= 1;
+	char significantFirstBits = 8 - (2+subsequent);
+	
+	ch |= (((__s16)multibuf[0]) << (((6*subsequent)+significantFirstBits)-8));
+	*buf += (subsequent+1);
+	return ch;
+}
 
-// char *toupperstr_utf8(char *buf, unsigned int max) {
-// 	char *ret = buf;
-// 
-// /*	if (StringHelper::getSystemStringHelper()) {
-// 		StringHelper::getSystemStringHelper()->upperStringUtf8( ret );
-// 		return ret;
-// 	}*/
-// 	
-// #ifndef _ICU_
-// 	// try to decide if it's worth trying to toupper.  Do we have more
-// 	// characters that are probably lower latin than not?
-// 	long performOp = 0;
-// 	for (const char *ch = buf; *ch; ch++)
-// 		performOp += (*ch > 0) ? 1 : -1;
-// 
-// 	if (performOp > 0) {
-// 		while (*buf)
-// 			*buf = SW_toupper(*buf++);
-// 	}
-// #else
-// 	if (!max)
-// 		max = strlen(ret);
-// 	UErrorCode err = U_ZERO_ERROR;
-// 	UConverter *conv = ucnv_open("UTF-8", &err);
-// 	UnicodeString str(buf, -1, conv, err);
-// 	UnicodeString ustr = str.toUpper();
-// 	ustr.extract(ret, max, conv, err);
-// 	ucnv_close(conv);
-// #endif
-// 
-// 	return ret;
-// }
 
 SWORD_NAMESPACE_END