summaryrefslogtreecommitdiff
path: root/src/modules/filters/utf8arshaping.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/modules/filters/utf8arshaping.cpp')
-rw-r--r--src/modules/filters/utf8arshaping.cpp51
1 files changed, 51 insertions, 0 deletions
diff --git a/src/modules/filters/utf8arshaping.cpp b/src/modules/filters/utf8arshaping.cpp
new file mode 100644
index 0000000..702fb62
--- /dev/null
+++ b/src/modules/filters/utf8arshaping.cpp
@@ -0,0 +1,51 @@
+/******************************************************************************
+*
+* utf8arshaping - SWFilter descendant to perform Arabic shaping on
+* UTF-8 text
+*/
+
+#ifdef _ICU_
+
+#include <stdlib.h>
+
+#include <utilstr.h>
+
+#include <utf8arshaping.h>
+#include <swbuf.h>
+
+SWORD_NAMESPACE_START
+
+UTF8arShaping::UTF8arShaping() {
+ conv = ucnv_open("UTF-8", &err);
+}
+
+UTF8arShaping::~UTF8arShaping() {
+ ucnv_close(conv);
+}
+
+char UTF8arShaping::processText(SWBuf &text, const SWKey *key, const SWModule *module)
+{
+ UChar *ustr, *ustr2;
+ if ((unsigned long)key < 2) // hack, we're en(1)/de(0)ciphering
+ return -1;
+
+ int32_t len = text.length();
+ ustr = new UChar[len];
+ ustr2 = new UChar[len];
+
+ // Convert UTF-8 string to UTF-16 (UChars)
+ len = ucnv_toUChars(conv, ustr, len, text.c_str(), -1, &err);
+
+ len = u_shapeArabic(ustr, len, ustr2, len, U_SHAPE_LETTERS_SHAPE | U_SHAPE_DIGITS_EN2AN, &err);
+
+ text.setSize(text.size()*2);
+ len = ucnv_fromUChars(conv, text.getRawData(), text.size(), ustr2, len, &err);
+ text.setSize(len);
+
+ delete [] ustr2;
+ delete [] ustr;
+ return 0;
+}
+
+SWORD_NAMESPACE_END
+#endif