diff options
author | Roberto C. Sanchez <roberto@connexer.com> | 2014-03-29 10:53:59 -0400 |
---|---|---|
committer | Roberto C. Sanchez <roberto@connexer.com> | 2014-03-29 10:53:59 -0400 |
commit | 03134fa5f6f25d92724ce4c183f9bbe12a9e37dc (patch) | |
tree | 847326a4de82f0241ac87cbbc427a1b92a696a02 /src/modules/filters/utf8nfc.cpp | |
parent | d7469385b05b9510338407fa123e9ad090f80af6 (diff) |
Imported Upstream version 1.5.11
Diffstat (limited to 'src/modules/filters/utf8nfc.cpp')
-rw-r--r-- | src/modules/filters/utf8nfc.cpp | 50 |
1 files changed, 50 insertions, 0 deletions
diff --git a/src/modules/filters/utf8nfc.cpp b/src/modules/filters/utf8nfc.cpp new file mode 100644 index 0000000..15b76b5 --- /dev/null +++ b/src/modules/filters/utf8nfc.cpp @@ -0,0 +1,50 @@ +/****************************************************************************** +* +* utf8nfc - SWFilter descendant to perform NFC (canonical composition +* normalization) on UTF-8 text +*/ + +#ifdef _ICU_ + +#include <stdlib.h> + +#include <utilstr.h> +#include <unicode/unistr.h> +#include <unicode/normlzr.h> +#include <unicode/unorm.h> + +#include <utf8nfc.h> +#include <swbuf.h> + +SWORD_NAMESPACE_START + +UTF8NFC::UTF8NFC() { + conv = ucnv_open("UTF-8", &err); +} + +UTF8NFC::~UTF8NFC() { + ucnv_close(conv); +} + +char UTF8NFC::processText(SWBuf &text, const SWKey *key, const SWModule *module) +{ + if ((unsigned long)key < 2) // hack, we're en(1)/de(0)ciphering + return -1; + + UErrorCode status = U_ZERO_ERROR; + UnicodeString source(text.getRawData(), text.length(), conv, status); + UnicodeString target; + + status = U_ZERO_ERROR; + Normalizer::normalize(source, UNORM_NFC, 0, target, status); + + status = U_ZERO_ERROR; + text.setSize(text.size()*2); // potentially, it can grow to 2x the original size + int32_t len = target.extract(text.getRawData(), text.size(), conv, status); + text.setSize(len); + + return 0; +} + +SWORD_NAMESPACE_END +#endif |