summaryrefslogtreecommitdiff
path: root/src/modules/filters/utf8nfc.cpp
diff options
context:
space:
mode:
authorRoberto C. Sanchez <roberto@connexer.com>2014-03-29 10:53:59 -0400
committerRoberto C. Sanchez <roberto@connexer.com>2014-03-29 10:53:59 -0400
commit03134fa5f6f25d92724ce4c183f9bbe12a9e37dc (patch)
tree847326a4de82f0241ac87cbbc427a1b92a696a02 /src/modules/filters/utf8nfc.cpp
parentd7469385b05b9510338407fa123e9ad090f80af6 (diff)
Imported Upstream version 1.5.11
Diffstat (limited to 'src/modules/filters/utf8nfc.cpp')
-rw-r--r--src/modules/filters/utf8nfc.cpp50
1 files changed, 50 insertions, 0 deletions
diff --git a/src/modules/filters/utf8nfc.cpp b/src/modules/filters/utf8nfc.cpp
new file mode 100644
index 0000000..15b76b5
--- /dev/null
+++ b/src/modules/filters/utf8nfc.cpp
@@ -0,0 +1,50 @@
+/******************************************************************************
+*
+* utf8nfc - SWFilter descendant to perform NFC (canonical composition
+* normalization) on UTF-8 text
+*/
+
+#ifdef _ICU_
+
+#include <stdlib.h>
+
+#include <utilstr.h>
+#include <unicode/unistr.h>
+#include <unicode/normlzr.h>
+#include <unicode/unorm.h>
+
+#include <utf8nfc.h>
+#include <swbuf.h>
+
+SWORD_NAMESPACE_START
+
+UTF8NFC::UTF8NFC() {
+ conv = ucnv_open("UTF-8", &err);
+}
+
+UTF8NFC::~UTF8NFC() {
+ ucnv_close(conv);
+}
+
+char UTF8NFC::processText(SWBuf &text, const SWKey *key, const SWModule *module)
+{
+ if ((unsigned long)key < 2) // hack, we're en(1)/de(0)ciphering
+ return -1;
+
+ UErrorCode status = U_ZERO_ERROR;
+ UnicodeString source(text.getRawData(), text.length(), conv, status);
+ UnicodeString target;
+
+ status = U_ZERO_ERROR;
+ Normalizer::normalize(source, UNORM_NFC, 0, target, status);
+
+ status = U_ZERO_ERROR;
+ text.setSize(text.size()*2); // potentially, it can grow to 2x the original size
+ int32_t len = target.extract(text.getRawData(), text.size(), conv, status);
+ text.setSize(len);
+
+ return 0;
+}
+
+SWORD_NAMESPACE_END
+#endif