summaryrefslogtreecommitdiff
path: root/utilities/tei2mod.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'utilities/tei2mod.cpp')
-rw-r--r--utilities/tei2mod.cpp103
1 files changed, 88 insertions, 15 deletions
diff --git a/utilities/tei2mod.cpp b/utilities/tei2mod.cpp
index a5ae6f4..e8c6e95 100644
--- a/utilities/tei2mod.cpp
+++ b/utilities/tei2mod.cpp
@@ -25,6 +25,24 @@
*
* author DM Smith
*/
+
+/*
+ * Copyright 2009 CrossWire Bible Society (http://www.crosswire.org)
+ * CrossWire Bible Society
+ * P. O. Box 2528
+ * Tempe, AZ 85280-2528
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ */
+
#include <string>
#include <vector>
#include <fstream>
@@ -58,11 +76,14 @@ Latin1UTF8 converter;
int converted = 0;
#endif
-//#define DEBUG
+#define DEBUG
SWLD *module = NULL;
SWKey *currentKey = NULL;
bool normalize = true;
+SWBuf keyStr;
+
+unsigned long entryCount = 0;
/**
* Determine whether the string contains a valid unicode sequence.
@@ -172,7 +193,7 @@ void normalizeInput(SWKey &key, SWBuf &text) {
void writeEntry(SWKey &key, SWBuf &text) {
#ifdef DEBUG
- cout << key << endl;
+ cout << "(" << entryCount << ") " << key << endl;
#endif
module->setKey(key);
@@ -182,16 +203,12 @@ void writeEntry(SWKey &key, SWBuf &text) {
module->setEntry(text);
}
-void linkToEntry(SWBuf &keyBuf, vector<string> &linkBuf) {
-
-/*
- char links = linkBuf.size();
- for (int i = 0; i < links; i++) {
- SWKey tmpkey = linkBuf[i].c_str();
- module->linkEntry(&tmpkey);
- cout << "Linking: " << linkBuf[i] << endl;
- }
-*/
+void linkToEntry(const SWBuf &keyBuf, const SWBuf &linkBuf) {
+ SWKey tmpkey = linkBuf.c_str();
+ module->linkEntry(&tmpkey);
+#ifdef DEBUG
+ cout << "(" << entryCount << ") " << "Linking: " << linkBuf << endl;
+#endif
}
// Return true if the content was handled or is to be ignored.
@@ -199,7 +216,6 @@ void linkToEntry(SWBuf &keyBuf, vector<string> &linkBuf) {
bool handleToken(SWBuf &text, XMLTag *token) {
// The start token for the current entry;
static XMLTag startTag;
- static SWBuf keyBuf;
// Flags to indicate whether we are in a entry, entryFree or superentry
static bool inEntry = false;
@@ -207,6 +223,10 @@ bool handleToken(SWBuf &text, XMLTag *token) {
static bool inSuperEntry = false;
const char *tokenName = token->getName();
+
+ static const char *splitPtr, *splitPtr2 = NULL;
+ static char *splitBuffer = new char[4096];
+ static SWKey tmpKey;
//-- START TAG -------------------------------------------------------------------------
if (!token->isEndTag()) {
@@ -221,7 +241,14 @@ bool handleToken(SWBuf &text, XMLTag *token) {
#endif
startTag = *token;
text = "";
- *currentKey = token->getAttribute("key");
+
+ keyStr = token->getAttribute("n"); // P5 with linking and/or non-URI chars
+ if (!strlen(keyStr)) {
+ keyStr = token->getAttribute("sortKey"); // P5 otherwise
+ if (!strlen(keyStr)) {
+ keyStr = token->getAttribute("key"); // P4
+ }
+ }
return false; // make tag be part of the output
}
@@ -245,7 +272,52 @@ bool handleToken(SWBuf &text, XMLTag *token) {
inEntryFree = false;
inSuperEntry = false;
text += token->toString();
- writeEntry(*currentKey, text);
+
+ entryCount++;
+#ifdef DEBUG
+ cout << "keyStr: " << keyStr << endl;
+#endif
+ splitPtr = strstr(keyStr, "|");
+ if (splitPtr) {
+ strncpy (splitBuffer, keyStr.c_str(), splitPtr - keyStr.c_str());
+ splitBuffer[splitPtr - keyStr.c_str()] = 0;
+ *currentKey = splitBuffer;
+#ifdef DEBUG
+ cout << "splitBuffer: " << splitBuffer << endl;
+ cout << "currentKey: " << *currentKey << endl;
+#endif
+ writeEntry(*currentKey, text);
+#if 1
+ while (splitPtr) {
+ splitPtr += 1;
+ splitPtr2 = strstr(splitPtr, "|");
+ entryCount++;
+ if (splitPtr2) {
+ strncpy (splitBuffer, splitPtr, splitPtr2 - splitPtr);
+ splitBuffer[splitPtr2 - splitPtr] = 0;
+#ifdef DEBUG
+ cout << "splitBuffer: " << splitBuffer << endl;
+ cout << "currentKey: " << *currentKey << endl;
+#endif
+ linkToEntry(currentKey->getText(), splitBuffer);
+ splitPtr = splitPtr2;
+ }
+ else {
+ strcpy (splitBuffer, splitPtr);
+#ifdef DEBUG
+ cout << "splitBuffer: " << splitBuffer << endl;
+ cout << "currentKey: " << *currentKey << endl;
+#endif
+ linkToEntry(currentKey->getText(), splitBuffer);
+ splitPtr = 0;
+ }
+ }
+#endif
+ }
+ else {
+ *currentKey = keyStr;
+ writeEntry(*currentKey, text);
+ }
// Since we consumed the text, clear it
// and tell the caller that the tag was consumed.
@@ -448,6 +520,7 @@ int main(int argc, char **argv) {
if (!handleToken(text, t)) {
text.append(*t);
}
+ delete t;
continue;
}