summaryrefslogtreecommitdiff
path: root/utilities/imp2gbs.cpp
diff options
context:
space:
mode:
authorRoberto C. Sanchez <roberto@connexer.com>2014-03-29 10:53:59 -0400
committerRoberto C. Sanchez <roberto@connexer.com>2014-03-29 10:53:59 -0400
commit03134fa5f6f25d92724ce4c183f9bbe12a9e37dc (patch)
tree847326a4de82f0241ac87cbbc427a1b92a696a02 /utilities/imp2gbs.cpp
parentd7469385b05b9510338407fa123e9ad090f80af6 (diff)
Imported Upstream version 1.5.11
Diffstat (limited to 'utilities/imp2gbs.cpp')
-rw-r--r--utilities/imp2gbs.cpp254
1 files changed, 254 insertions, 0 deletions
diff --git a/utilities/imp2gbs.cpp b/utilities/imp2gbs.cpp
new file mode 100644
index 0000000..9488eef
--- /dev/null
+++ b/utilities/imp2gbs.cpp
@@ -0,0 +1,254 @@
+#include <ctype.h>
+#include <stdio.h>
+#include <errno.h>
+#include <stdlib.h>
+
+#include <entriesblk.h>
+#include <iostream>
+#include <treekeyidx.h>
+#include <rawgenbook.h>
+#include <utilstr.h>
+#include <filemgr.h>
+#include <utf8greekaccents.h>
+#include <stringmgr.h>
+
+#ifdef _ICU_
+
+#include <unicode/utypes.h>
+#include <unicode/ucnv.h>
+#include <unicode/ustring.h>
+#include <unicode/uchar.h>
+
+#include <unicode/unistr.h>
+#include <unicode/translit.h>
+
+#include <unicode/locid.h>
+
+#endif
+
+#ifndef NO_SWORD_NAMESPACE
+using namespace sword;
+#endif
+
+SWBuf outPath;
+SWBuf inFile;
+bool toUpper = false;
+bool greekFilter = false;
+bool augMod = false;
+bool augEnt = true;
+int lexLevels = 0;
+UTF8GreekAccents greekAccentsFilter;
+
+
+void usage(const char *app) {
+ fprintf(stderr, "imp2gbs 1.0 General Book module creation tool for the SWORD Project\n\n");
+ fprintf(stderr, "usage: %s <inFile> [OPTIONS]\n", app);
+ fprintf(stderr, "\t-o <outPath>\n\t\tSpecify an output Path other than inFile location.\n");
+ fprintf(stderr, "\t-a\n\t\tAugment Module [default: create new]\n");
+ fprintf(stderr, "\t-O\n\t\tOverwrite entries of same key [default: append to]\n");
+ fprintf(stderr, "\t-U\n\t\tKey filter: Convert toUpper\n");
+ fprintf(stderr, "\t-g\n\t\tKey filter: Strip Greek diacritics\n");
+ fprintf(stderr, "\t-l <levels>\n\t\tKey filter: Pseudo-Lexicon n-level generation using first character\n");
+ fprintf(stderr, "\t\te.g. -l 2 \"Abbey\" -> \"A/AB/Abbey\"\n");
+ fprintf(stderr, "\n");
+ exit (-1);
+}
+
+
+void parseParams(int argc, char **argv) {
+
+ if (argc < 2) {
+ usage(*argv);
+ }
+
+ inFile = argv[1];
+
+ for (int i = 2; i < argc; i++) {
+ if (!strcmp(argv[i], "-o")) {
+ if ((i+1 < argc) && (argv[i+1][0] != '-')) {
+ outPath = argv[i+1];
+ i++;
+ }
+ else usage(*argv);
+ }
+ else if (!strcmp(argv[i], "-U")) {
+ if (StringMgr::hasUTF8Support()) {
+ toUpper = true;
+ }
+ else {
+ fprintf(stderr, "Error: %s. Cannot reliably toUpper without UTF8 support\n\t(recompile with ICU enabled)\n\n", *argv);
+ usage(*argv);
+ }
+ }
+ else if (!strcmp(argv[i], "-g")) {
+ greekFilter = true;
+ }
+ else if (!strcmp(argv[i], "-O")) {
+ augEnt = false;
+ }
+ else if (!strcmp(argv[i], "-a")) {
+ augMod = true;
+ }
+ else if (!strcmp(argv[i], "-l")) {
+ if (i+1 < argc) {
+ lexLevels = atoi(argv[i+1]);
+ i++;
+ }
+ if (!lexLevels) usage(*argv);
+ }
+ }
+ if (!outPath.size()) {
+ outPath = inFile;
+ unsigned int i;
+ for (i = 0; (i < outPath.size() && outPath[i] != '.'); i++);
+ outPath.size(i);
+ }
+}
+
+
+void writeEntry(SWModule *book, SWBuf keyBuffer, SWBuf entBuffer) {
+
+
+ if (greekFilter) {
+ greekAccentsFilter.processText(keyBuffer);
+ }
+
+ if (toUpper) {
+ unsigned size = (keyBuffer.size()+5)*3;
+ keyBuffer.setFillByte(0);
+ keyBuffer.resize(size);
+ StringMgr::getSystemStringMgr()->upperUTF8(keyBuffer.getRawData(), size-2);
+ }
+
+// Added for Hesychius, but this stuff should be pushed back into new StringMgr
+// functionality
+#ifdef _ICU_
+// if (lexLevels) {
+ if (lexLevels && !keyBuffer.startsWith("/Intro")) {
+ unsigned size = (keyBuffer.size()+(lexLevels*2));
+ keyBuffer.setFillByte(0);
+ keyBuffer.resize(size);
+
+ UErrorCode err = U_ZERO_ERROR;
+
+ int max = (size+5)*3;
+ UChar *ubuffer = new UChar[max+10];
+ int32_t len;
+
+ u_strFromUTF8(ubuffer, max+9, &len, keyBuffer.c_str(), -1, &err);
+ if (err == U_ZERO_ERROR) {
+ UChar *upper = new UChar[(lexLevels+1)*3];
+ memcpy(upper, ubuffer, lexLevels*sizeof(UChar));
+ upper[lexLevels] = 0;
+ len = u_strToUpper(upper, (lexLevels+1)*3, upper, -1, 0, &err);
+ memmove(ubuffer+len+1, ubuffer, (max-len)*sizeof(UChar));
+ memcpy(ubuffer, upper, len*sizeof(UChar));
+ ubuffer[len] = '/';
+ delete [] upper;
+
+ int totalShift = 0;
+ for (int i = lexLevels-1; i; i--) {
+ int shift = (i < len)? i : len;
+ memmove(ubuffer+(shift+1), ubuffer, (max-shift)*sizeof(UChar));
+ ubuffer[shift] = '/';
+ totalShift += (shift+1);
+ }
+ u_strToUTF8(keyBuffer.getRawData(), max, 0, ubuffer, -1, &err);
+ }
+
+/*
+ u_strFromUTF8(ubuffer, max+9, &len, keyBuffer.c_str(), -1, &err);
+ if (err == U_ZERO_ERROR) {
+ int totalShift = 0;
+ for (int i = lexLevels; i; i--) {
+ int shift = (i < len)? i : len;
+ memmove(ubuffer+(shift+1), ubuffer, (max-shift)*sizeof(UChar));
+ ubuffer[shift] = '/';
+ totalShift += (shift+1);
+ }
+ UChar *upper = new UChar[(totalShift+1)*3];
+ memcpy(upper, ubuffer, totalShift*sizeof(UChar));
+ upper[totalShift] = 0;
+ len = u_strToUpper(upper, (totalShift+1)*3, upper, -1, 0, &err);
+ memmove(ubuffer+len, ubuffer+totalShift, (max-totalShift)*sizeof(UChar));
+ memcpy(ubuffer, upper, len*sizeof(UChar));
+ delete [] upper;
+ u_strToUTF8(keyBuffer.getRawData(), max, 0, ubuffer, -1, &err);
+ }
+*/
+
+ delete [] ubuffer;
+ }
+#endif
+
+ std::cout << keyBuffer << std::endl;
+
+ book->setKey(keyBuffer.c_str());
+
+ // check to see if we already have an entry
+ for (int i = 2; book->getKey()->Error() != KEYERR_OUTOFBOUNDS; i++) {
+ SWBuf key;
+ key.setFormatted("%s {%d}", keyBuffer.c_str(), i);
+ std::cout << "dup key, trying: " << key << std::endl;
+ book->setKey(key.c_str());
+ }
+
+ book->setEntry(entBuffer);
+}
+
+
+int main(int argc, char **argv) {
+ greekAccentsFilter.setOptionValue("Off"); // off = accents off
+ parseParams(argc, argv);
+
+ // Let's see if we can open our input file
+ FileDesc *fd = FileMgr::getSystemFileMgr()->open(inFile, FileMgr::RDONLY);
+ if (fd->getFd() < 0) {
+ fprintf(stderr, "error: %s: couldn't open input file: %s \n", argv[0], inFile.c_str());
+ exit(-2);
+ }
+
+ RawGenBook *book;
+
+ // Do some initialization stuff
+ if (!augMod) {
+ RawGenBook::createModule(outPath);
+ }
+ book = new RawGenBook(outPath);
+
+ SWBuf lineBuffer;
+ SWBuf keyBuffer;
+ SWBuf entBuffer;
+
+ bool more = true;
+ do {
+ more = FileMgr::getLine(fd, lineBuffer);
+ if (lineBuffer.startsWith("$$$")) {
+ if ((keyBuffer.size()) && (entBuffer.size())) {
+ writeEntry(book, keyBuffer, entBuffer);
+ }
+ keyBuffer = lineBuffer;
+ keyBuffer << 3;
+ keyBuffer.trim();
+ entBuffer.size(0);
+ }
+ else {
+ if (keyBuffer.size()) {
+ entBuffer += lineBuffer;
+ entBuffer += "\n";
+ }
+ }
+ } while (more);
+ if ((keyBuffer.size()) && (entBuffer.size())) {
+ writeEntry(book, keyBuffer, entBuffer);
+ }
+
+ delete book;
+
+ FileMgr::getSystemFileMgr()->close(fd);
+
+ return 0;
+}
+
+
+