summaryrefslogtreecommitdiff
path: root/src/modules/filters/utf8arshaping.cpp
diff options
context:
space:
mode:
authorRoberto C. Sanchez <roberto@connexer.com>2014-03-29 10:53:59 -0400
committerRoberto C. Sanchez <roberto@connexer.com>2014-03-29 10:53:59 -0400
commit03134fa5f6f25d92724ce4c183f9bbe12a9e37dc (patch)
tree847326a4de82f0241ac87cbbc427a1b92a696a02 /src/modules/filters/utf8arshaping.cpp
parentd7469385b05b9510338407fa123e9ad090f80af6 (diff)
Imported Upstream version 1.5.11
Diffstat (limited to 'src/modules/filters/utf8arshaping.cpp')
-rw-r--r--src/modules/filters/utf8arshaping.cpp51
1 files changed, 51 insertions, 0 deletions
diff --git a/src/modules/filters/utf8arshaping.cpp b/src/modules/filters/utf8arshaping.cpp
new file mode 100644
index 0000000..702fb62
--- /dev/null
+++ b/src/modules/filters/utf8arshaping.cpp
@@ -0,0 +1,51 @@
+/******************************************************************************
+*
+* utf8arshaping - SWFilter descendant to perform Arabic shaping on
+* UTF-8 text
+*/
+
+#ifdef _ICU_
+
+#include <stdlib.h>
+
+#include <utilstr.h>
+
+#include <utf8arshaping.h>
+#include <swbuf.h>
+
+SWORD_NAMESPACE_START
+
+UTF8arShaping::UTF8arShaping() {
+ conv = ucnv_open("UTF-8", &err);
+}
+
+UTF8arShaping::~UTF8arShaping() {
+ ucnv_close(conv);
+}
+
+char UTF8arShaping::processText(SWBuf &text, const SWKey *key, const SWModule *module)
+{
+ UChar *ustr, *ustr2;
+ if ((unsigned long)key < 2) // hack, we're en(1)/de(0)ciphering
+ return -1;
+
+ int32_t len = text.length();
+ ustr = new UChar[len];
+ ustr2 = new UChar[len];
+
+ // Convert UTF-8 string to UTF-16 (UChars)
+ len = ucnv_toUChars(conv, ustr, len, text.c_str(), -1, &err);
+
+ len = u_shapeArabic(ustr, len, ustr2, len, U_SHAPE_LETTERS_SHAPE | U_SHAPE_DIGITS_EN2AN, &err);
+
+ text.setSize(text.size()*2);
+ len = ucnv_fromUChars(conv, text.getRawData(), text.size(), ustr2, len, &err);
+ text.setSize(len);
+
+ delete [] ustr2;
+ delete [] ustr;
+ return 0;
+}
+
+SWORD_NAMESPACE_END
+#endif