Imported Upstream version 1.6.0+dfsg

author: Roberto C. Sanchez <roberto@connexer.com> 2014-03-29 10:54:01 -0400
committer: Roberto C. Sanchez <roberto@connexer.com> 2014-03-29 10:54:01 -0400
commit: 71a39f4652cd51df814c930dd268f3c9ad2aee86 (patch)
tree: 5994350a603908c4e4d660bc9d72c4ec43dd648e /utilities/osis2mod.cpp
parent: 03134fa5f6f25d92724ce4c183f9bbe12a9e37dc (diff)
1 files changed, 1154 insertions, 644 deletions
diff --git a/utilities/osis2mod.cpp b/utilities/osis2mod.cpp
index 69d984d..473a90f 100644
--- a/utilities/osis2mod.cpp
+++ b/utilities/osis2mod.cpp
@@ -1,24 +1,40 @@
+/*
+ * Copyright 2009 CrossWire Bible Society (http://www.crosswire.org)
+ *	CrossWire Bible Society
+ *	P. O. Box 2528
+ *	Tempe, AZ  85280-2528
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ */
+
 #include <ctype.h>
 #include <stdio.h>
 #include <fcntl.h>
 #include <errno.h>
 #include <stdlib.h>
-#include <string>
 #include <stack>
+#include <vector>
 #include <iostream>
 #include <fstream>
 
 #include <utilstr.h>
 #include <swmgr.h>
 #include <rawtext.h>
+#include <rawtext4.h>
 #include <swbuf.h>
 #include <utilxml.h>
 #include <listkey.h>
 #include <versekey.h>
 
 #include <ztext.h>
-// #include <zld.h>
-// #include <zcom.h>
 #include <lzsscomprs.h>
 #include <zipcomprs.h>
 #include <cipherfil.h>
@@ -28,61 +44,61 @@
 #include <latin1utf8.h>
 #endif
 
-//#define DEBUG
-
-// Debug for simple transformation stack
-//#define DEBUG_XFORM
-
-// Debug for parsing osisRefs
-//#define DEBUG_REF
-
-// Debug for tag stack
-//#define DEBUG_STACK
-
 #ifndef NO_SWORD_NAMESPACE
 using namespace sword;
 #endif
 
 using namespace std;
 
-#ifdef _ICU_
-UTF8NFC normalizer;
-int normalized = 0;
+// Turn debugging on and off
+//#define DEBUG
+int       debug            =   0;
+const int DEBUG_WRITE      =   1; // writing to module
+const int DEBUG_VERSE      =   2; // verse start and end
+const int DEBUG_QUOTE      =   4; // quotes, especially Words of Christ (WOC)
+const int DEBUG_TITLE      =   8; // titles
+const int DEBUG_INTERVERSE =  16; // inter-verse maerial
+const int DEBUG_XFORM      =  32; // transformations
+const int DEBUG_REV11N     =  64; // versification
+const int DEBUG_REF        = 128; // parsing of osisID and osisRef
+const int DEBUG_STACK      = 256; // cleanup of references
+const int DEBUG_OTHER      = 512; // ins and outs of books, chapters and verses
+
+// Exit codes
+const int EXIT_BAD_ARG     =   1; // Bad parameter given for program
+const int EXIT_NO_WRITE    =   2; // Could not open the module for writing
+const int EXIT_NO_CREATE   =   3; // Could not create the module
+const int EXIT_NO_READ     =   4; // Could not open the input file for reading.
+const int EXIT_BAD_NESTING =   5; // BSP or BCV nesting is bad
 
+#ifdef _ICU_
+UTF8NFC    normalizer;
 Latin1UTF8 converter;
-int converted = 0;
 #endif
+int normalized = 0;
+int converted  = 0;
 
 SWText *module = 0;
-VerseKey *currentVerse = 0;
+VerseKey currentVerse;
+SWBuf v11n     = "KJV";
 char activeOsisID[255];
 char currentOsisID[255];
-const char *osisabbrevs[] = {"Gen", "Exod", "Lev", "Num", "Deut", "Josh", "Judg",
-	"Ruth", "1Sam", "2Sam", "1Kgs", "2Kgs", "1Chr", "2Chr", "Ezra", "Neh",
-	"Esth", "Job", "Ps", "Prov", "Eccl", "Song", "Isa", "Jer", "Lam", "Ezek",
-	"Dan", "Hos", "Joel", "Amos", "Obad", "Jonah", "Mic", "Nah", "Hab",
-	"Zeph", "Hag", "Zech", "Mal",
 
-	"Matt", "Mark", "Luke", "John", "Acts", "Rom", "1Cor", "2Cor", "Gal",
-	"Eph", "Phil", "Col", "1Thess", "2Thess", "1Tim", "2Tim", "Titus",
-	"Phlm", "Heb", "Jas", "1Pet", "2Pet", "1John", "2John", "3John",
-	"Jude", "Rev"};
+SWBuf activeVerseText;
+
+ListKey currentKeyIDs = ListKey();
+
+std::vector<ListKey> linkedVerses;
 
 static bool inCanonicalOSISBook = true; // osisID is for a book that is not in Sword's canon
-static bool normalize = true; // Whether to normalize UTF-8 to NFC
+static bool normalize           = true; // Whether to normalize UTF-8 to NFC
 
 bool isOSISAbbrev(const char *buf) {
-	bool match = false;
-	for (int i = 0; i < 66; i++) {
-		if (!strcmp(buf, osisabbrevs[i])) {
-			match = true;
-			break;
-		}
-	}
-	return match;
+	VerseMgr *vmgr = VerseMgr::getSystemVerseMgr();
+	const VerseMgr::System *av11n = vmgr->getVersificationSystem(currentVerse.getVersificationSystem());
+	return av11n->getBookNumberByOSISName(buf) >= 0;
 }
 
-
 /**
  * Determine whether the string contains a valid unicode sequence.
  * The following table give the pattern of a valid UTF-8 character.
@@ -109,51 +125,92 @@ bool isOSISAbbrev(const char *buf) {
  * author DM Smith
  */
 int detectUTF8(const char *txt) {
-    unsigned int  countUTF8 = 0;
-    int count = 0;
-    
-    // Cast it to make masking and shifting easier
-    const unsigned char *p = (const unsigned char*) txt;
-    while (*p) {
-        // Is the high order bit set?
-        if (*p & 0x80) {
-            // Then count the number of high order bits that are set.
-            // This determines the number of following bytes
-            // that are a part of the unicode character
-            unsigned char i = *p;
-            for (count = 0; i & 0x80; count++) {
-                i <<= 1;
-            }
-
-            // Validate count:
-            // Count 0: bug in code that would cause core walking
-            // Count 1: is a pattern of 10nnnnnn,
-            //          which does not signal the start of a unicode character
-            // Count 5 to 8: 111110nn, 1111110n and 11111110 and 11111111
-            //          are not legal starts, either
-            if (count < 2 || count > 4) return 0;
-
-            // At this point we expect (count - 1) following characters
-            // of the pattern 10nnnnnn
-            while (--count && *++p) {
-                // The pattern of each following character must be: 10nnnnnn
-                // So, compare the top 2 bits.
-                if ((0xc0 & *p) != 0x80) return  0;
-            }
-
-            // Oops, we've run out of bytes too soon: Cannot be UTF-8
-            if (count) return 0;
-
-            // We have a valid UTF-8 character, so count it
-            countUTF8++;
-        }
+	unsigned int  countUTF8 = 0;
+	int           count     = 0;
+	
+	// Cast it to make masking and shifting easier
+	const unsigned char *p = (const unsigned char*) txt;
+	while (*p) {
+		// Is the high order bit set?
+		if (*p & 0x80) {
+			// Then count the number of high order bits that are set.
+			// This determines the number of following bytes
+			// that are a part of the unicode character
+			unsigned char i = *p;
+			for (count = 0; i & 0x80; count++) {
+				i <<= 1;
+			}
+
+			// Validate count:
+			// Count 0: bug in code that would cause core walking
+			// Count 1: is a pattern of 10nnnnnn,
+			//          which does not signal the start of a unicode character
+			// Count 5 to 8: 111110nn, 1111110n and 11111110 and 11111111
+			//          are not legal starts, either
+			if (count < 2 || count > 4) return 0;
+
+			// At this point we expect (count - 1) following characters
+			// of the pattern 10nnnnnn
+			while (--count && *++p) {
+				// The pattern of each following character must be: 10nnnnnn
+				// So, compare the top 2 bits.
+				if ((0xc0 & *p) != 0x80) return  0;
+			}
+
+			// Oops, we've run out of bytes too soon: Cannot be UTF-8
+			if (count) return 0;
+
+			// We have a valid UTF-8 character, so count it
+			countUTF8++;
+		}
 
-        // Advance to the next character to examine.
-        p++;
-    }
-    
-    // At this point it is either UTF-8 or 7-bit ascii
-    return countUTF8 ? 1 : -1;
+		// Advance to the next character to examine.
+		p++;
+	}
+	
+	// At this point it is either UTF-8 or 7-bit ascii
+	return countUTF8 ? 1 : -1;
+}
+
+void prepareSWText(const char *osisID, SWBuf &text)
+{
+	// Always check on UTF8 and report on non-UTF8 entries
+	int utf8State = detectUTF8(text.c_str());
+
+	// Trust, but verify.
+	if (!normalize && !utf8State) {
+		cout << "WARNING(UTF8): " << osisID << ": Should be converted to UTF-8 (" << text << ")" << endl;
+	}
+
+#ifdef _ICU_
+	if (normalize) {
+		// Don't need to normalize text that is ASCII
+		// But assume other non-UTF-8 text is Latin1 (cp1252) and convert it to UTF-8
+		if (!utf8State) {
+			cout << "INFO(UTF8): " << osisID << ": Converting to UTF-8 (" << text << ")" << endl;
+			converter.processText(text, (SWKey *)2);  // note the hack of 2 to mimic a real key. TODO: remove all hacks
+			converted++;
+
+			// Prepare for double check. This probably can be removed.
+			// But for now we are running the check again.
+			// This is to determine whether we need to normalize output of the conversion.
+			utf8State = detectUTF8(text.c_str());
+		}
+
+		// Double check. This probably can be removed.
+		if (!utf8State) {
+			cout << "ERROR(UTF8): " << osisID << ": Converting to UTF-8 (" << text << ")" << endl;
+		}
+
+		if (utf8State > 0) {
+			SWBuf before = text;
+			normalizer.processText(text, (SWKey *)2);  // note the hack of 2 to mimic a real key. TODO: remove all hacks
+			if (before != text) {
+				normalized++;
+			}
+		}
+	}
+#endif
 }
 
 // This routine converts an osisID or osisRef into one that SWORD can parse into a verse list
@@ -175,8 +232,10 @@ void prepareSWVerseKey(SWBuf &buf) {
 	bool inRange = false;
 	while (*p) {
 		if (inRange) {
-#ifdef DEBUG_REF
-		cout << "Copy range marker:" << *p << endl;;
+#ifdef DEBUG
+		if (debug & DEBUG_REF) {
+			cout << "DEBUG(REF): Copy range marker:" << *p << endl;;
+		}
 #endif
 			// Range markers are copied as is
 			*s++ = *p++;
@@ -193,28 +252,36 @@ void prepareSWVerseKey(SWBuf &buf) {
 		if (*n == ':') {
 			// set p to skip the work prefix
 			p = n + 1;
-#ifdef DEBUG_REF
-			cout << "Found a work prefix ";
-			for (char *x = s; x <= n; x++) {
-				cout << *x;
+#ifdef DEBUG
+			if (debug & DEBUG_REF) {
+				cout << "DEBUG(REF): Found a work prefix ";
+				for (char *x = s; x <= n; x++) {
+					cout << *x;
+				}
+				cout << endl;
 			}
-			cout << endl;
 #endif
 		}
 
 		// Now we are in the meat of an osisID.
 		// Copy it to its end but stop on a grain marker of '!'
-#ifdef DEBUG_REF
-		cout << "Copy osisID:";
+#ifdef DEBUG
+		if (debug & DEBUG_REF) {
+			cout << "DEBUG(REF): Copy osisID:";
+		}
 #endif
 		while (*p && *p != '!' && *p != ' ' && *p != '-') {
-#ifdef DEBUG_REF
-			cout << *p;
+#ifdef DEBUG
+			if (debug & DEBUG_REF) {
+				cout << *p;
+			}
 #endif
 			*s++ = *p++;
 		}
-#ifdef DEBUG_REF
-		cout << endl;
+#ifdef DEBUG
+		if (debug & DEBUG_REF) {
+			cout << endl;
+		}
 #endif
 
 		// The ! and everything following until we hit
@@ -224,12 +291,14 @@ void prepareSWVerseKey(SWBuf &buf) {
 			while (*n && *n != ' ' && *n != '-') {
 				n++;
 			}
-#ifdef DEBUG_REF
-			cout << "Found a grain suffix ";
-			for (char *x = p; x < n; x++) {
-				cout << *x;
+#ifdef DEBUG
+			if (debug & DEBUG_REF) {
+				cout << "DEBUG(REF): Found a grain suffix ";
+				for (char *x = p; x < n; x++) {
+					cout << *x;
+				}
+				cout << endl;
 			}
-			cout << endl;
 #endif
 			p = n;
 		}
@@ -240,9 +309,11 @@ void prepareSWVerseKey(SWBuf &buf) {
 		// then we are entering a range
 		inRange = !inRange && *p == '-';
 
-#ifdef DEBUG_REF
-		if (inRange) {
-			cout << "Found a range" << endl;
+#ifdef DEBUG
+		if (debug & DEBUG_REF) {
+			if (inRange) {
+				cout << "DEBUG(REF): Found a range" << endl;
+			}
 		}
 #endif
 
@@ -254,8 +325,10 @@ void prepareSWVerseKey(SWBuf &buf) {
 			}
 			// replacing them all with a ';'
 			*s++ = ';';
-#ifdef DEBUG_REF
-			cout << "replacing space with ;. Remaining: " << p << endl;
+#ifdef DEBUG
+			if (debug & DEBUG_REF) {
+				cout << "DEBUG(REF): replacing space with ;. Remaining: " << p << endl;
+			}
 #endif
 		}
 	}
@@ -267,607 +340,921 @@ void prepareSWVerseKey(SWBuf &buf) {
 		*s = '\0';
 		// Since we modified the swbuf, we need to tell it what we have done
 		buf.setSize(s - buf.c_str());
-#ifdef DEBUG_REF
-		cout << "shortended keyVal to`" << buf.c_str() << "`"<< endl;
+#ifdef DEBUG
+		if (debug & DEBUG_REF) {
+			cout << "DEBUG(REF): shortended keyVal to`" << buf.c_str() << "`"<< endl;
+		}
 #endif
 	}
 }
 
-bool isKJVRef(const char *buf) {
-	VerseKey vk, test;
-	vk.AutoNormalize(0);
-	vk.Headings(1);	// turn on mod/testmnt/book/chap headings
-	vk.Persist(1);
-	// lets do some tests on the verse --------------
-	vk = buf;
-	test = buf;
+/**
+ * Determine whether a verse as given is valid for the versification.
+ * This is done by comparing the before and after of normalization.
+ */
+bool isValidRef(const char *buf) {
+	// Create a VerseKey that does not do auto normalization
+	// Note: need to turn on headings so that a heading does not get normalized anyway
+	// And set it to the reference under question
+	VerseKey before;
+	before.setVersificationSystem(currentVerse.getVersificationSystem());
+	before.AutoNormalize(0);
+	before.Headings(1);
+	before.setText(buf);
+
+	// If we are a heading we must bail
+	// These will autonormalize to the last verse of the prior chapter
+	if (!before.Testament() || !before.Book() || !before.Chapter() || !before.Verse()) {
+		return true;
+	}
+
+	// Create a VerseKey that does do auto normalization
+	// And set it to the reference under question
+	VerseKey after;
+	after.setVersificationSystem(currentVerse.getVersificationSystem());
+	after.AutoNormalize(1);
+	after.setText(buf);
+
+	if (before == after)
+	{
+		return true;
+	}
+
+	// If we have gotten here the reference is not in the selected versification.
+	cout << "INFO(V11N): " << before << " is not in the " << currentVerse.getVersificationSystem() << " versification." << endl;
 
-	if (vk.Testament() && vk.Book() && vk.Chapter() && vk.Verse()) { // if we're not a heading
 #ifdef DEBUG
-		cout << (const char*)vk << " == "  << (const char*)test << endl;
-#endif
-		return (vk == test);
+	if (debug & DEBUG_REV11N) {
+		cout << "DEBUG(V11N): " << before << " normalizes to "  << after << endl;
 	}
-	else return true;	// no check if we're a heading... Probably bad.
+#endif
+
+	return false;
 }
 
+/**
+ * This routine is used to ensure that all the text in the input is saved to the module.
+ * Assumption: The input orders all the verses for a chapter in numerical order. Thus, any
+ * verses that are not in the chosen versification (v11n) follow those that are.
+ *
+ * The prior implementation of this adjusted the verse to the last one that is in the chosen v11n.
+ * If it the chapter were extra, then it is appended to the last verse of the last
+ * chapter in the chosen v11n for that book. If it is just extra verses for a chapter, then it is
+ * appended to the last verse of the chapter.
+ *
+ * The problem with this is when a OSIS verse refers to more than one verse, e.g.
+ * osisID="Gen.1.29 Gen.1.30 Gen.1.31" (Gen.1.31 is the last verse of the chapter in the chosen v11n)
+ * and then it is followed by Gen.1.32.
+ *
+ * This routine assumes that linking is postponed to the end so that in the example Gen.1.30-31
+ * are not linked but rather empty. This routine will then find the last verse in the computed
+ * chapter that has content.
+ *
+ * Alternative, we could have done linking as we went, but this routine would have needed
+ * to find the first entry in the link set and elsewhere in the code when appending to a
+ * verse, it would need to be checked for adjacent links and those would have needed to be adjusted.
+ *
+ * param key the key that may need to be adjusted
+ */
+void makeValidRef(VerseKey &key) {
+
+	int chapterMax = key.getChapterMax();
+	int verseMax   = key.getVerseMax();
 
-void makeKJVRef(VerseKey &key) {
-	cout << "re-versified " << key;
 #ifdef DEBUG
-	cout << "\tC" << (int)(key.builtin_books[key.Testament()-1][key.Book()-1].chapmax) << ":V" << (int)(key.builtin_books[key.Testament()-1][key.Book()-1].versemax[key.Chapter()-1]);
+	if (debug & DEBUG_REV11N) {
+		cout << "DEBUG(V11N) Chapter max:" << chapterMax << ", Verse Max:" << verseMax << endl;
+	}
 #endif
-	if (key.Chapter() > key.builtin_books[key.Testament()-1][key.Book()-1].chapmax) {
-		key.Chapter(key.builtin_books[key.Testament()-1][key.Book()-1].chapmax);
-		key.Verse(key.builtin_books[key.Testament()-1][key.Book()-1].versemax[key.Chapter()-1]);
+
+	cout << "INFO(V11N): " << key.getOSISRef() << " is not in the " << key.getVersificationSystem() << " versification.";
+	// Since isValidRef returned false constrain the key to the nearest prior reference.
+	// If we are past the last chapter set the reference to the last chapter
+	if (key.Chapter() > chapterMax) {
+		key.Chapter(chapterMax);
 	}
-	else if (key.Verse() > key.builtin_books[key.Testament()-1][key.Book()-1].versemax[key.Chapter()-1]) {
-		key.Verse(key.builtin_books[key.Testament()-1][key.Book()-1].versemax[key.Chapter()-1]);
+
+	// Either we set the chapter to the last chapter and now need to set to the last verse in the chapter
+	// Or the verse is beyond the end of the chapter.
+	// In any case we need to constrain the verse to it's chapter.
+	key.Verse(verseMax);
+
+	// There are three cases we want to handle:
+	// In the examples we are using the KJV versification where the last verse of Matt.7 is Matt.7.29.
+	// In each of these cases the out-of-versification, extra verse is Matt.7.30.
+	// 1) The "extra" verse follows the last verse in the chapter.
+	//      <verse osisID="Matt.7.29">...</verse><verse osisID="Matt.7.30">...</verse>
+	//    In this case re-versify Matt.7.30 as Matt.7.29.
+	//
+	// 2) The "extra" verse follows a range (a set of linked verses).
+	//      <verse osisID="Matt.7.28-Matt.7.29">...</verse><verse osisID="Matt.7.30">...</verse>
+	//    In this case, re-versify Matt.7.30 as Matt.7.28, the first verse in the linked set.
+	//    Since we are post-poning linking, we want to re-reversify to the last entry in the module.
+	//
+	// 3) The last verse in the chapter is not in the input. There may be other verses missing as well.
+	//      <verse osisID="Matt.7.8">...</verse><verse osisID="Matt.7.30">...</verse>
+	//    In this case we should re-versify Matt.7.30 as Matt.7.29.
+	//    However, since this and 2) are ambiguous, we'll re-reversify to the last entry in the module.
+	
+	while (!key.Error() && !module->hasEntry(&key)) {
+		key.decrement(1);
 	}
-	cout << "\tas " << key << endl;
-}
 
+	cout << " Appending content to " << key.getOSISRef() << endl;
+}
 
-void writeEntry(VerseKey &key, SWBuf &text, bool force = false) {
-	static SWBuf activeVerseText;
+void writeEntry(SWBuf &text, bool force = false) {
 	char keyOsisID[255];
 
-	if (inCanonicalOSISBook) {
-		strcpy(keyOsisID, key.getOSISRef());
+	static const char* revision = "<milestone type=\"x-importer\" subType=\"x-osis2mod\" n=\"$Rev: 2400 $\"/>";
+	static bool firstOT = true;
+	static bool firstNT = true;
 
-		// set keyOsisID to anything that an osisID cannot be.
-		if (force) {
-			strcpy(keyOsisID, "-force");
-		}
-
-		static VerseKey lastKey;
-		lastKey.AutoNormalize(0);
-		lastKey.Headings(1);
+	if (!inCanonicalOSISBook) {
+		return;
+	}
 
-		VerseKey saveKey;
-		saveKey.AutoNormalize(0);
-		saveKey.Headings(1);
-		saveKey = key;
+	strcpy(keyOsisID, currentVerse.getOSISRef());
 
-		// If we have seen a verse and the supplied one is different then we output the collected one.
-		if (*activeOsisID && strcmp(activeOsisID, keyOsisID)) {
+	// set keyOsisID to anything that an osisID cannot be.
+	if (force) {
+		strcpy(keyOsisID, "-force");
+	}
 
-			key = lastKey;
+	static VerseKey lastKey;
+	lastKey.setVersificationSystem(currentVerse.getVersificationSystem());
+	lastKey.AutoNormalize(0);
+	lastKey.Headings(1);
 
-			if (!isKJVRef(key)) {
-				makeKJVRef(key);
-			}
+	VerseKey saveKey;
+	saveKey.setVersificationSystem(currentVerse.getVersificationSystem());
+	saveKey.AutoNormalize(0);
+	saveKey.Headings(1);
+	saveKey = currentVerse;
 
-#ifdef _ICU_
-			int utf8State = detectUTF8(activeVerseText.c_str());
-			if (normalize) {
-				// Don't need to normalize text that is ASCII
-				// But assume other non-UTF-8 text is Latin1 (cp1252) and convert it to UTF-8
-				if (!utf8State) {
-					cout << "Warning: " << activeOsisID << ": Converting to UTF-8 (" << activeVerseText << ")" << endl;
-					converter.processText(activeVerseText, (SWKey *)2);  // note the hack of 2 to mimic a real key. TODO: remove all hacks
-					converted++;
-
-					// Prepare for double check. This probably can be removed.
-					// But for now we are running the check again.
-					// This is to determine whether we need to normalize output of the conversion.
-					utf8State = detectUTF8(activeVerseText.c_str());
-				}
+	// If we have seen a verse and the supplied one is different then we output the collected one.
+	if (*activeOsisID && strcmp(activeOsisID, keyOsisID)) {
 
-				// Double check. This probably can be removed.
-				if (!utf8State) {
-					cout << "Error: " << activeOsisID << ": Converting to UTF-8 (" << activeVerseText << ")" << endl;
-				}
+		if (!isValidRef(lastKey)) {
+			makeValidRef(lastKey);
+		}
 
-				if (utf8State > 0) {
-					SWBuf before = activeVerseText;
-					normalizer.processText(activeVerseText, (SWKey *)2);  // note the hack of 2 to mimic a real key. TODO: remove all hacks
-					if (before != activeVerseText) {
-						normalized++;
-					}
-				}
+		currentVerse = lastKey;
+
+		prepareSWText(activeOsisID, activeVerseText);
+
+		// Put the revision into the module
+		int testmt = currentVerse.Testament();
+		if ((testmt == 1 && firstOT) || (testmt == 2 && firstNT)) {
+			VerseKey t;
+			t.setVersificationSystem(currentVerse.getVersificationSystem());
+			t.AutoNormalize(0);
+			t.Headings(1);
+			t = currentVerse;
+			currentVerse.Book(0);
+			currentVerse.Chapter(0);
+			currentVerse.Verse(0);
+			module->setEntry(revision);
+			currentVerse = t;
+			switch (testmt) {
+			case 1:
+				firstOT = false;
+				break;
+			case 2:
+				firstNT = false;
+				break;
 			}
-#endif
+		}
 
-			SWBuf currentText = module->getRawEntry();
-			if (currentText.length()) {
-				cout << "Appending entry: " << key.getOSISRef() << ": " << activeVerseText << endl;
-				activeVerseText = currentText + " " + activeVerseText;
-			}
+		// If the entry already exists, then append this entry to the text.
+		// This is for verses that are outside the chosen versification. They are appended to the prior verse.
+		// The space should not be needed if we retained verse tags.
+		SWBuf currentText = module->getRawEntry();
+		if (currentText.length()) {
+			cout << "INFO(WRITE): Appending entry: " << currentVerse.getOSISRef() << ": " << activeVerseText << endl;
+			activeVerseText = currentText + " " + activeVerseText;
+		}
 
 #ifdef DEBUG
-			cout << "Write: " << activeOsisID << ":" << key.getOSISRef() << ": " << activeVerseText << endl;
-#endif
-
-			module->setEntry(activeVerseText);
-			activeVerseText = "";
+		if (debug & DEBUG_WRITE) {
+			cout << "DEBUG(WRITE): " << activeOsisID << ":" << currentVerse.getOSISRef() << ": " << activeVerseText << endl;
 		}
+#endif
 
-		// eliminate leading whitespace on the beginning of each verse and
-		// before we append to current content, since we just added one
-		text.trimStart();
-		if (activeVerseText.length()) {
-			activeVerseText += " ";
-			activeVerseText += text;
-		}
-		else {
-			activeVerseText = text;
-		}
+		module->setEntry(activeVerseText);
+		activeVerseText = "";
+	}
 
-		key = saveKey;
-		lastKey = key;
-		strcpy(activeOsisID, keyOsisID);
+	// The following is for initial verse content and for appending interverse content.
+	// Eliminate leading whitespace on the beginning of each verse and
+	// before we append to current content, since we just added one
+	text.trimStart();
+	if (activeVerseText.length()) {
+		activeVerseText += " ";
+		activeVerseText += text;
+	}
+	else {
+		activeVerseText = text;
 	}
+	// text has been consumed so clear it out.
+	text = "";
+
+	currentVerse = saveKey;
+	lastKey = currentVerse;
+	strcpy(activeOsisID, keyOsisID);
 }
 
+void linkToEntry(VerseKey &linkKey, VerseKey &dest) {
+
+	// Only link verses that are in the versification.
+	if (!isValidRef(linkKey)) {
+		return;
+	}
 
-void linkToEntry(VerseKey& dest) {
-	//cout << "Verse: " << key << "\n";
-	//cout << "TEXT: " << text << "\n\n";
-	//SWBuf currentText = module->getRawEntry();
-	//if (currentText.length())
-	//	text = currentText + " " + text;
 	VerseKey saveKey;
+	saveKey.setVersificationSystem(currentVerse.getVersificationSystem());
 	saveKey.AutoNormalize(0);
 	saveKey.Headings(1);
-	saveKey = *currentVerse;
+	saveKey = currentVerse;
+	currentVerse = linkKey;
 
-	if (!isKJVRef(*currentVerse)) {
-		makeKJVRef(*currentVerse);
-	}
-
-	cout << "Linking " << module->KeyText() << " to " << dest.getText() << "\n";
+	cout << "INFO(LINK): Linking " << currentVerse.getOSISRef() << " to " << dest.getOSISRef() << "\n";
 	module->linkEntry(&dest);
 
-	*currentVerse = saveKey;
+	currentVerse = saveKey;
 }
 
 // Return true if the content was handled or is to be ignored.
 //		false if the what has been seen is to be accumulated and considered later.
-bool handleToken(SWBuf &text, XMLTag *token) {
+bool handleToken(SWBuf &text, XMLTag token) {
 
 	// Everything between the begin book tag and the first begin chapter tag is inBookHeader
-	static bool inBookHeader = false;
+	static bool               inBookHeader    = false;
+
 	// Everything between the begin chapter tag and the first begin verse tag is inChapterHeader
-	static bool inChapterHeader = false;
+	static bool               inChapterHeader = false;
 
-	// Flags to indicate whether we are in a book, chapter and/or verse
-	//static bool inBook = false;
-	//static bool inChapter = false;
-	static bool inVerse = true;
+	// Flags indicating whether we are processing the content of a chapter
+	static bool               inChapter       = false;
 
-	static SWBuf header = "";
+	// Flags indicating whether we are processing the content of a verse
+	static bool               inVerse         = false;
 
-	// Used to remember titles that need to be handle specially
-	static SWBuf lastTitle = "";
-	static int titleOffset = -1;
-	static bool inTitle = false;
-	static int titleDepth = 0;
+	// Flags indicating whether we are processing the content of to be prepended to a verse
+	static bool               inPreVerse      = false;
+	static int                genID           = 1;
 
-	static ListKey lastVerseIDs = ListKey();
+	// Flag indicating whether we are in "Words of Christ"
+	static bool               inWOC           = false;
+	// Tag for WOC quotes within a verse
+	static XMLTag             wocTag          = "<q who=\"Jesus\" marker=\"\">";
+
+	// Flag used to indicate where useful text begins
+	static bool               firstDiv        = false;
+
+	// Stack of quote elements used to handle Words of Christ
+	static std::stack<XMLTag> quoteStack;
 
 	// Stack of elements used to validate that books, chapters and verses are well-formed
 	// This goes beyond simple xml well-formed and also considers milestoned div, chapter and verse
 	// to be begin and end tags, too.
 	// It is an error if books and chapters are not well formed (though not required by OSIS)
 	// It is a warning that verses are not well formed (because some clients are not ready)
-	static std::stack<XMLTag*> tagStack;
-	// The following are used to validate well-formedness
-	static int chapterDepth = 0;
-	static int bookDepth = 0;
-	static int verseDepth = 0;
-
-	int tagDepth = tagStack.size();
-	const char *tokenName = token->getName();
-	bool isEndTag = token->isEndTag() || token->getAttribute("eID");
-	const char *typeAttr = token->getAttribute("type");
-
-	//Titles are treated specially.
-	// If the title has an attribute type of "main" or "chapter"
-	// it belongs to its <div> or <chapter> and is treated as part of its heading
-	// Otherwise if it a title in a chapter before the first the first verse it
-	// is put into the verse as a preverse title.
-	if (!token->isEmpty() && !isEndTag && titleDepth == 0 && (!strcmp(tokenName, "title")) && (!typeAttr || (strcmp(typeAttr, "main") && strcmp(typeAttr, "chapter")))) {
-		titleOffset = text.length(); //start of the title tag
-		lastTitle = "";
-		inTitle = true;
-		tagStack.push(token);
-#ifdef DEBUG_STACK
-		cout << currentOsisID << ": push (" << tagStack.size() << ") " << token->getName() << endl;
-#endif
-		titleDepth = tagStack.size();
-		return false;
-	}
-	// Check titleDepth since titles can be nested. Don't want to quit too early.
-	else if (isEndTag && tagDepth == titleDepth && (!strcmp(tokenName, "title"))) {
-		lastTitle.append(text.c_str() + titleOffset); //<title ...> up to the end </title>
-		lastTitle.append(*token); //</title>
-
-#ifdef DEBUG
-		cout << currentOsisID << ":" << endl;
- 		cout << "\tlastTitle:      " << lastTitle.c_str() << endl;
- 		cout << "\ttext-lastTitle: " << text.c_str()+titleOffset << endl;
-		cout << "\ttext:	   " << text.c_str() << endl;
-#endif
-		inTitle = false;
-		titleDepth = 0;
-#ifdef DEBUG_STACK
-		cout << currentOsisID << ": pop(" << tagStack.size() << ") " << tagStack.top()->getName() << endl;
-#endif
-		tagStack.pop();
-		return false; // don't add </title> to the text itself
-	}
-
+	static std::stack<XMLTag> tagStack;
 
+	// The following are used to validate well-formedness
+	static int                chapterDepth    = 0;
+	static int                bookDepth       = 0;
+	static int                verseDepth      = 0;
 
-//-- START TAG -------------------------------------------------------------------------
+	int                       tagDepth        = tagStack.size();
+	const char               *tokenName       = token.getName();
+	bool                      isEndTag        = token.isEndTag() || token.getAttribute("eID");
+	const char               *typeAttr        = token.getAttribute("type");
 
+	// process start tags
 	if (!isEndTag) {
 
 		// Remember non-empty start tags
-		if (!token->isEmpty()) {
+		if (!token.isEmpty()) {
 			tagStack.push(token);
-#ifdef DEBUG_STACK
-			cout << currentOsisID << ": push (" << tagStack.size() << ") " << token->getName() << endl;
+#ifdef DEBUG
+			if (debug & DEBUG_STACK) {
+				cout << "DEBUG(STACK): " << currentOsisID << ": push (" << tagStack.size() << ") " << token.getName() << endl;
+			}
 #endif
 		}
 
-		//-- WITH OSIS ID      -------------------------------------------------------------------------
-		//--   OR ANNOTATE REF -------------------------------------------------------------------------
-		if (token->getAttribute("osisID") || token->getAttribute("annotateRef")) {
+		// throw away everything up to the first div
+		if (!firstDiv) {
+			if (!strcmp(tokenName, "div")) {
+#ifdef DEBUG
+				if (debug & DEBUG_OTHER) {
+					cout << "DEBUG(FOUND): Found first div and pitching prior material: " << text << endl;
+				}
+#endif
+				// TODO: Save off the content to use it to suggest the module's conf.
+				firstDiv = true;
+				text     = "";
+			}
+			else {
+				// Collect the content so it can be used to suggest the module's conf.
+				return false;
+			}
+		}
+
+		//-- WITH osisID OR annotateRef -------------------------------------------------------------------------
+		// Handle Book, Chapter, and Verse (or commentary equivalent)
+		if (token.getAttribute("osisID") || token.getAttribute("annotateRef")) {
 
-			// BOOK START
+			// BOOK START, <div type="book" ...>
 			if ((!strcmp(tokenName, "div")) && (typeAttr && !strcmp(typeAttr, "book"))) {
-				inVerse = false;
 				if (inBookHeader || inChapterHeader) {	// this one should never happen, but just in case
 #ifdef DEBUG
-					cout << currentOsisID << ": HEADING ";
+					if (debug & DEBUG_TITLE) {
+						cout << "DEBUG(TITLE): " << currentOsisID << ": OOPS HEADING " << endl;
+						cout << "\tinChapterHeader = " << inChapterHeader << endl;
+						cout << "\tinBookHeader = " << inBookHeader << endl;
+					}
 #endif
-					currentVerse->Testament(0);
-					currentVerse->Book(0);
-					currentVerse->Chapter(0);
-					currentVerse->Verse(0);
-					writeEntry(*currentVerse, text);
+					currentVerse.Testament(0);
+					currentVerse.Book(0);
+					currentVerse.Chapter(0);
+					currentVerse.Verse(0);
+					writeEntry(text);
 				}
-				strcpy(currentOsisID, token->getAttribute("osisID"));
-				*currentVerse = currentOsisID;
-				currentVerse->Chapter(0);
-				currentVerse->Verse(0);
-				inBookHeader = true;
+				currentVerse = token.getAttribute("osisID");
+				currentVerse.Chapter(0);
+				currentVerse.Verse(0);
+				strcpy(currentOsisID, currentVerse.getOSISRef());
+
+				inChapter       = false;
+				inVerse         = false;
+				inPreVerse      = false;
+				inBookHeader    = true;
 				inChapterHeader = false;
-				lastTitle = "";
-				text = "";
-				bookDepth = tagStack.size();
-				chapterDepth = 0;
-				verseDepth = 0;
 
-				inCanonicalOSISBook = isOSISAbbrev(token->getAttribute("osisID"));
+				bookDepth       = tagStack.size();
+				chapterDepth    = 0;
+				verseDepth      = 0;
 
-				return true;
+				inCanonicalOSISBook = isOSISAbbrev(token.getAttribute("osisID"));
+				if (!inCanonicalOSISBook) {
+					cout << "WARNING(V11N): New book is " << token.getAttribute("osisID") << " and is not in " << v11n << " versification, ignoring" << endl;
+				}
+#ifdef DEBUG
+				else if (debug & DEBUG_OTHER) {
+					cout << "DEBUG(FOUND): New book is " << currentVerse.getOSISRef() << endl;
+				}
+#endif
+
+				return false;
 			}
 
-			// CHAPTER START
-			else if (((!strcmp(tokenName, "div")) && (typeAttr && !strcmp(typeAttr, "chapter")))
-					 || (!strcmp(tokenName, "chapter"))
-					 ) {
-				inVerse = false;
+			// CHAPTER START, <div type="chapter" ...> or <chapter ...>
+			if (((!strcmp(tokenName, "div")) && (typeAttr && !strcmp(typeAttr, "chapter"))) ||
+			     (!strcmp(tokenName, "chapter"))
+			   ) {
 				if (inBookHeader) {
 #ifdef DEBUG
-					cout << currentOsisID << ": BOOK HEADING "<< text.c_str() << endl;
+					if (debug & DEBUG_TITLE) {
+						cout << "DEBUG(TITLE): " << currentOsisID << ": BOOK HEADING "<< text.c_str() << endl;
+					}
 #endif
-					writeEntry(*currentVerse, text);
+					writeEntry(text);
 				}
 
-				strcpy(currentOsisID, token->getAttribute("osisID"));
-				*currentVerse = currentOsisID;
-				currentVerse->Verse(0);
-				inBookHeader = false;
+				currentVerse = token.getAttribute("osisID");
+				currentVerse.Verse(0);
+#ifdef DEBUG
+				if (debug & DEBUG_OTHER) {
+					cout << "DEBUG(FOUND): Current chapter is " << currentVerse.getOSISRef() << " (" << token.getAttribute("osisID") << ")" << endl;
+				}
+#endif
+				strcpy(currentOsisID, currentVerse.getOSISRef());
+
+				inChapter       = true;
+				inVerse         = false;
+				inPreVerse      = false;
+				inBookHeader    = false;
 				inChapterHeader = true;
-				lastTitle = "";
-				text = "";
-				chapterDepth = tagStack.size();
-				verseDepth = 0;
 
-				return true;
+				chapterDepth    = tagStack.size();
+				verseDepth      = 0;
+
+				return false;
 			}
 
-			// VERSE OR COMMENTARY START
-			else if (!strcmp(tokenName, "verse") ||
-				 (!strcmp(tokenName, "div") &&
-				   token->getAttribute("annotateType"))) {
+			// VERSE, <verse ...> OR COMMENTARY START, <div annotateType="xxx" ...>
+			if (!strcmp(tokenName, "verse") ||
+			   (!strcmp(tokenName, "div") && token.getAttribute("annotateType"))) {
 #ifdef DEBUG
-				cout << "Entering verse" << endl;
+				if (debug & DEBUG_OTHER) {
+					cout << "DEBUG(FOUND): Entering verse" << endl;
+				}
 #endif
-				inVerse = true;
 				if (inChapterHeader) {
 					SWBuf heading = text;
-
-					//make sure we don't insert the preverse title which belongs to the first verse of this chapter!
-					// Did we have a preverse title?
-					if (lastTitle.length())
-					{
-						//Was the preVerse title in the header (error if not)?
-						const char* header = heading.c_str();
-						const char* preVerse = strstr(header, lastTitle);
-						if (preVerse) {
-							if (preVerse == header) {
-								heading = ""; // do nothing
-							}
-							else {
-								// remove everything before the title from the beginning.
-								text = preVerse;
-								// Remove text from the end of the header.
-								heading.setSize(preVerse - header);
-							}
-						}
-						else {
-							cout << currentOsisID << ": Warning: Bug in code. Could not find title." << endl;
-						}
-					}
-					else {
-						text = "";
-					}
+					text = "";
 
 					if (heading.length()) {
 #ifdef DEBUG
-						cout << currentOsisID << ": CHAPTER HEADING "<< heading.c_str() << endl;
+						if (debug & DEBUG_TITLE) {
+							cout << "DEBUG(TITLE): " << currentOsisID << ": CHAPTER HEADING "<< heading.c_str() << endl;
+						}
 #endif
-						writeEntry(*currentVerse, heading);
+						writeEntry(heading);
 					}
 
 					inChapterHeader = false;
 				}
 
-				SWBuf keyVal = token->getAttribute(strcmp(tokenName, "verse") ? "annotateRef" : "osisID");
+				// Did we have pre-verse material that needs to be marked?
+				if (inPreVerse) {
+					char genBuf[200];
+					sprintf(genBuf, "<div type=\"x-milestone\" subType=\"x-preverse\" eID=\"pv%d\"/>", genID++);
+					text.append(genBuf);
+				}
+
+				// Get osisID for verse or annotateRef for commentary
+				SWBuf keyVal = token.getAttribute(strcmp(tokenName, "verse") ? "annotateRef" : "osisID");
+
+				// Massage the key into a form that ParseVerseList can accept
 				prepareSWVerseKey(keyVal);
-				lastVerseIDs = currentVerse->ParseVerseList(keyVal, *currentVerse, true);
 
-				// set currentVerse to the first value in the keyVal
-				VerseKey *element = SWDYNAMIC_CAST(VerseKey, lastVerseIDs.GetElement(0));
-				if (element) {
-					*currentVerse = element->LowerBound().getText();
+				// The osisID or annotateRef can be more than a single verse
+				// The first or only one is the currentVerse
+				// Use the last verse seen (i.e. the currentVerse) as the basis for recovering from bad parsing.
+				// This should never happen if the references are valid OSIS references
+				ListKey verseKeys = currentVerse.ParseVerseList(keyVal, currentVerse, true);
+				int memberKeyCount = verseKeys.Count();
+				if (memberKeyCount) {
+					currentVerse = verseKeys.getElement(0);
+					// See if this osisID or annotateRef refers to more than one verse.
+					// If it does, save it until all verses have been seen.
+					// At that point we will output links.
+					// This can be done by incrementing, which will produce an error
+					// if there is only one verse.
+					verseKeys.setPosition(TOP);
+					verseKeys.increment(1);
+					if (!verseKeys.Error()) {
+						linkedVerses.push_back(verseKeys);
+					}
 				}
 				else {
-					*currentVerse = lastVerseIDs.GetElement(0)->getText();
+					cout << "ERROR(REF): Invalid osisID/annotateRef: " << token.getAttribute(strcmp(tokenName, "verse") ? "annotateRef" : "osisID") << endl;
 				}
 
-				strcpy(currentOsisID, currentVerse->getOSISRef());
+				strcpy(currentOsisID, currentVerse.getOSISRef());
 #ifdef DEBUG
-				cout << "Current verse is " << *currentVerse << endl;
-				cout << "osisID/annotateRef is adjusted to" << keyVal << endl;
+				if (debug & DEBUG_OTHER) {
+					cout << "DEBUG(FOUND): New current verse is " << currentVerse.getOSISRef() << endl;
+					cout << "DEBUG(FOUND): osisID/annotateRef is adjusted to: " << keyVal << endl;
+				}
 #endif
 
-				verseDepth = tagStack.size();
+				inVerse         = true;
+				inPreVerse      = false;
+				inBookHeader    = false;
+				inChapterHeader = false;
+				verseDepth      = tagStack.size();
 
-				return true;
-			}
-		}
-		// Handle stuff between the verses
-		// Whitespace producing empty tokens are appended to prior entry
-		// Also the quote
-		// This is a hack to get ESV to work
-		else if (!inTitle && !inVerse && token->isEmpty()) { // && !inBookHeader && !inChapterHeader) {
-			if (!strcmp(tokenName, "p") ||
-					!strcmp(tokenName, "div") ||
-					!strcmp(tokenName, "q")  ||
-					!strcmp(tokenName, "l") ||
-					!strcmp(tokenName, "lb") ||
-					!strcmp(tokenName, "lg")
-					) {
+				// Include the token if it is not a verse
+				if (strcmp(tokenName, "verse")) {
+					text.append(token);
+				}
 #ifdef DEBUG
-					if (token) {
-						cout << currentOsisID << ": appending interverse start token " << *token << ":" << text.c_str() << endl;
+				else if (debug & DEBUG_VERSE)
+				{
+					// transform the verse into a milestone
+					XMLTag t = "<milestone resp=\"v\" />";
+					// copy all the attributes of the verse element to the milestone
+					StringList attrNames = token.getAttributeNames();
+					for (StringList::iterator loop = attrNames.begin(); loop != attrNames.end(); loop++) {
+						const char* attr = (*loop).c_str();
+						t.setAttribute(attr, token.getAttribute(attr));
 					}
+					text.append(t);
+				}
 #endif
-				SWBuf tmp = token->toString();
-				writeEntry(*currentVerse, tmp);
+
+				if (inWOC) {
+					text.append(wocTag);
+				}
 				return true;
 			}
+		} // done with Handle Book, Chapter, and Verse (or commentary equivalent)
+
+		// Now consider everything else.
+
+		// Handle WOC quotes.
+		// Note this requires transformBSP to make them into milestones
+		// Otherwise have to do it here
+		if (!strcmp(tokenName, "q")) {
+			quoteStack.push(token);
 #ifdef DEBUG
-			else {
-				if (token) {
-					cout << currentOsisID << ": interverse start token " << *token << ":" << text.c_str() << endl;
-				}
+			if (debug & DEBUG_QUOTE) {
+				cout << "DEBUG(QUOTE): " << currentOsisID << ": quote top(" << quoteStack.size() << ") " << token << endl;
 			}
 #endif
+			if (token.getAttribute("who") && !strcmp(token.getAttribute("who"), "Jesus")) {
+				inWOC = true;
+
+				// Output per verse WOC markup.
+				text.append(wocTag);
+
+				// Output the quotation mark if appropriate, inside the WOC.
+				// If there is no marker attribute, let the SWORD engine manufacture one.
+				// If there is a marker attribute and it has content, then output that.
+				// If the marker attribute is present and empty, then there is nothing to do.
+				// And have it within the WOC markup
+				if (!token.getAttribute("marker") || token.getAttribute("marker")[0]) {
+					token.setAttribute("who", 0); // remove the who="Jesus"
+					text.append(token);
+				}
+				return true;
+			}
+			return false;
+		}
+
+		// Have we found the start of pre-verse material?
+		// Pre-verse material follows the following rules
+		// 1) Between the opening of a book and the first chapter, all the material is handled as an introduction to the book.
+		// 2) Between the opening of a chapter and the first verse, the material is split between the introduction of the chapter
+		//    and the first verse of the chapter.
+		//    A <div> with a type other than section will be taken as a chapter introduction.
+		//    A <title> of type acrostic, psalm or no type, will be taken as a title for the verse.
+		//    A <title> of type main or chapter will be seen as a chapter title.
+		// 3) Between verses, the material is split between the prior verse and the next verse.
+		//    Basically, while end and empty tags are found, they belong to the prior verse.
+		//    Once a begin tag is found, it belongs to the next verse.
+		// If the title has an attribute type of "main" or "chapter"
+		// it belongs to its <div> or <chapter> and is treated as part of its heading
+		// Otherwise if it a title in a chapter before the first the first verse it
+		// is put into the verse as a preverse title.
+
+		if (!inPreVerse && !inBookHeader) {
+			if (inChapterHeader) {
+				// Determine when we are no longer in a chapter heading, but in pre-verse material:
+				// If we see one of the following:
+				// 	a section div
+				// 	a title that is not main or chapter
+				if ((!strcmp(tokenName, "div") && (typeAttr && !strcmp(typeAttr, "section"))) ||
+				    (!strcmp(tokenName, "title") && (!typeAttr || (strcmp(typeAttr, "main") && strcmp(typeAttr, "chapter"))))
+				   ) {
+					// Since we have found the boundary, we need to write out the chapter heading
+					writeEntry(text);
+					// And we are no longer in the chapter heading
+					inChapterHeader = false;
+					// But rather, we are now in pre-verse material
+					inPreVerse      = true;
+				}
+			}
+			else if (!inVerse && inChapter) {
+				inPreVerse = true;
+			}
+
+			if (inPreVerse) {
+				char genBuf[200];
+				sprintf(genBuf, "<div type=\"x-milestone\" subType=\"x-preverse\" sID=\"pv%d\"/>", genID++);
+				text.append(genBuf);
+			}
 		}
-	}
 
-//-- EMPTY and END TAG ---------------------------------------------------------------------------------------------
+#ifdef DEBUG
+		if (debug & DEBUG_INTERVERSE) {
+			if (!inVerse && !inBookHeader && !inChapterHeader) {
+				cout << "DEBUG(INTERVERSE): " << currentOsisID << ": interverse start token " << token << ":" << text.c_str() << endl;
+			}
+		}
+#endif
 
+		return false;
+	} // Done with procesing start and empty tags
+
+	// Process end tags
 	else {
 
 		if (tagStack.empty()) {
-			cout << currentOsisID << ": tag expected" << endl;
-			exit(1);
+			cout << "FATAL(NESTING): " << currentOsisID << ": tag expected" << endl;
+			exit(EXIT_BAD_NESTING);
 		}
 
-		XMLTag* topToken = 0;
-		if (!token->isEmpty()) {
-			topToken = tagStack.top();
+		// Note: empty end tags have the eID attribute
+		if (!token.isEmpty()) {
+			XMLTag topToken = tagStack.top();
 			tagDepth = tagStack.size();
-#ifdef DEBUG_STACK
-			cout << currentOsisID << ": pop(" << tagDepth << ") " << topToken->getName() << endl;
+#ifdef DEBUG
+			if (debug & DEBUG_STACK) {
+				cout << "DEBUG(STACK): " << currentOsisID << ": pop(" << tagDepth << ") " << topToken.getName() << endl;
+			}
 #endif
 			tagStack.pop();
 
-			if (strcmp(topToken->getName(), tokenName)) {
-				cout << "Error: " << currentOsisID << ": Expected " << topToken->getName() << " found " << tokenName << endl;
-//				exit(1);	// I'm sure this validity check is a good idea, but there's a but somewhere that's killing the converter here.
+			if (strcmp(topToken.getName(), tokenName)) {
+				cout << "FATAL(NESTING): " << currentOsisID << ": Expected " << topToken.getName() << " found " << tokenName << endl;
+//				exit(EXIT_BAD_NESTING);	// (OSK) I'm sure this validity check is a good idea, but there's a but somewhere that's killing the converter here.
 						// So I'm disabling this line. Unvalidated OSIS files shouldn't be run through the converter anyway.
+						// (DM) This has nothing to do with well-form or valid. It checks milestoned elements for proper nesting.
 			}
 		}
 
+		// We haven't seen the first div so there is nothing to do.
+		if (!firstDiv) {
+			// Collect the content so it can be used to suggest the module's conf.
+			return false;
+		}
+
 		// VERSE and COMMENTARY END
 		if (!strcmp(tokenName, "verse") || (inVerse && !strcmp(tokenName, "div"))) {
-			inVerse = false;
 
 			if (tagDepth != verseDepth) {
-				cout << "Warning verse " << currentOsisID << " is not well formed:(" << verseDepth << "," << tagDepth << ")" << endl;
+				cout << "WARNING(NESTING): verse " << currentOsisID << " is not well formed:(" << verseDepth << "," << tagDepth << ")" << endl;
 			}
 
-			if (lastTitle.length()) {
-				const char* end = strchr(lastTitle, '>');
+			// If we are in WOC then we need to terminate the <q who="Jesus" marker=""> that was added earlier in the verse.
+			if (inWOC) {
+				text.append("</q>");
+			}
+
+
+			// Include the token if it is not a verse
+			if (strcmp(tokenName, "verse")) {
+				text.append(token);
+			}
 #ifdef DEBUG
-				cout << currentOsisID << ":" << endl;
-				cout << "\t" << lastTitle << endl;
-	 			cout << "\tlength=" << int(end+1 - lastTitle.c_str()) << ", tag:" << lastTitle.c_str() << endl;
+			else if (debug & DEBUG_VERSE)
+			{
+				// transform the verse into a milestone
+				XMLTag t = "<milestone resp=\"v\" />";
+				// copy all the attributes of the verse element to the milestone
+				StringList attrNames = token.getAttributeNames();
+				for (StringList::iterator loop = attrNames.begin(); loop != attrNames.end(); loop++) {
+					const char* attr = (*loop).c_str();
+					t.setAttribute(attr, token.getAttribute(attr));
+				}
+				text.append(t);
+			}
 #endif
 
-				SWBuf titleTagText;
-				titleTagText.append(lastTitle.c_str(), end+1 - lastTitle.c_str());
+			writeEntry(text);
+
+			inVerse     = false;
+			inPreVerse  = false;
+			verseDepth  = 0;
+
+			return true;
+		}
+		
+		// Handle WOC quotes.
+		// Note this requires transformBSP to make them into milestones
+		// Otherwise have to manage it here
+		if (!strcmp(tokenName, "q")) {
+			XMLTag topToken = quoteStack.top();
 #ifdef DEBUG
-				cout << currentOsisID << ": tagText: " << titleTagText.c_str() << endl;;
+			if (debug & DEBUG_QUOTE) {
+				cout << "DEBUG(QUOTE): " << currentOsisID << ": quote pop(" << quoteStack.size() << ") " << topToken << " -- " << token << endl;
+			}
 #endif
+			quoteStack.pop();
 
-				XMLTag titleTag(titleTagText);
-				titleTag.setAttribute("type", "section");
-				titleTag.setAttribute("subType", "x-preverse");
-
-				//we insert the title into the text again - make sure to remove the old title text
-				const char* pos = strstr(text, lastTitle);
-				if (pos) {
-					SWBuf temp;
-					temp.append(text, pos-text.c_str());
-					temp.append(pos+lastTitle.length());
-					text = temp;
+			// If we have found an end tag for a <q who="Jesus"> then we are done with the WOC
+			// and we need to terminate the <q who="Jesus" marker=""> that was added earlier in the verse.
+			if (token.getAttribute("who") && !strcmp(token.getAttribute("who"), "Jesus")) {
+#ifdef DEBUG
+				if (debug & DEBUG_QUOTE) {
+					cout << "DEBUG(QUOTE): " << currentOsisID << ": (" << quoteStack.size() << ") " << topToken << " -- " << token << endl;
 				}
-
-				//if a title was already inserted at the beginning insert this one after that first title
-				int titlePos = 0;
-				if (!strncmp(text.c_str(),"<title ",7)) {
-					const char* tmp = strstr(text.c_str(), "</title>");
-					if (tmp) {
-						titlePos = (tmp-text.c_str()) + 8;
-					}
+#endif
+				inWOC = false;
+				const char *sID = topToken.getAttribute("sID");
+				const char *eID = token.getAttribute("eID");
+				if (!sID) {
+					sID = "";
 				}
-				text.insert(titlePos, end+1);
-				text.insert(titlePos, titleTag);
-			}
-	//		text += token;
-			writeEntry(*currentVerse, text);
-
-			// If we found an osisID like osisID="Gen.1.1 Gen.1.2 Gen.1.3" we have to link Gen.1.2 and Gen.1.3 to Gen.1.1
-			VerseKey dest = *currentVerse;
-			VerseKey linkKey;
-			linkKey.AutoNormalize(0);
-			linkKey.Headings(1);	// turn on mod/testmnt/book/chap headings
-			linkKey.Persist(1);
-			for (lastVerseIDs = TOP; !lastVerseIDs.Error(); lastVerseIDs++) {
-				linkKey = lastVerseIDs;
-
-				if (linkKey.Verse()     != dest.Verse()   ||
-				    linkKey.Chapter()   != dest.Chapter() ||
-				    linkKey.Book()      != dest.Book()    ||
-				    linkKey.Testament() != dest.Testament())
-				{
-					*currentVerse = linkKey;
-					linkToEntry(dest);
+				if (!eID) {
+					eID = "";
+				}
+				if (strcmp(sID, eID)) {
+					cout << "ERROR(NESTING): improper nesting " << currentOsisID << ": matching (sID,eID) not found. Looking at (" << sID << "," << eID << ")" << endl;
 				}
-			}
 
-			lastTitle = "";
-			text = "";
-			verseDepth = 0;
-			return true;
+
+				// Output the quotation mark if appropriate, inside the WOC.
+				// If there is no marker attribute, let the SWORD engine manufacture one.
+				// If there is a marker attribute and it has content, then output that.
+				// If the marker attribute is present and empty, then there is nothing to do.
+				// And have it within the WOC markup
+				if (!token.getAttribute("marker") || token.getAttribute("marker")[0]) {
+					token.setAttribute("who", 0); // remove the who="Jesus"
+					text.append(token);
+				}
+
+				// Now close the WOC
+				text.append("</q>");
+				return true;
+			}
+			return false;
 		}
-		else if (!inTitle && !inVerse && !inBookHeader && !inChapterHeader) {
+
+		// Look for the end of document, book and chapter
+		// Also for material that goes with last entry
+		if (!inVerse && !inBookHeader && !inChapterHeader) {
 			// Is this the end of a chapter.
 			if (tagDepth == chapterDepth && (!strcmp(tokenName, "div") || !strcmp(tokenName, "chapter"))) {
+				text.append(token);
+				writeEntry(text);
+				inChapter    = false;
 				chapterDepth = 0;
-				verseDepth = 0;
-				text = "";
+				verseDepth   = 0;
 				return true;
 			}
-			// Or is it the end of a book
-			else if (tagDepth == bookDepth && (!strcmp(tokenName, "div"))) {
-				bookDepth = 0;
+
+			// Is it the end of a book
+			if (tagDepth == bookDepth && (!strcmp(tokenName, "div"))) {
+				text.append(token);
+				writeEntry(text);
+				bookDepth    = 0;
 				chapterDepth = 0;
-				verseDepth = 0;
-				text = "";
+				verseDepth   = 0;
 				return true;
 			}
-			// Or is it the end of an osis document
-			else if (!strcmp(tokenName, "osisText") || !strcmp(tokenName, "osis")) {
-				bookDepth = 0;
+
+			// Do not include the end of an osis document
+			if (!strcmp(tokenName, "osisText") || !strcmp(tokenName, "osis")) {
+				bookDepth    = 0;
 				chapterDepth = 0;
-				verseDepth = 0;
-				text = "";
+				verseDepth   = 0;
+				text         = "";
 				return true;
 			}
-			// OTHER MISC END TAGS WHEN !INVERSE
-			// Test that is between verses, or after the last is appended to the preceeding verse.
-			else if (!strcmp(tokenName, "p") ||
-					!strcmp(tokenName, "div") ||
-					!strcmp(tokenName, "q")  ||
-					!strcmp(tokenName, "l") ||
-					!strcmp(tokenName, "lb") ||
-					!strcmp(tokenName, "lg")
-					) {
-				text.append(*token);
-				writeEntry(*currentVerse, text);
-				text = "";
+
+			// When we are not inPreVerse, the interverse tags get appended to the preceeding verse.
+			if (!inPreVerse) {
+				text.append(token);
+				writeEntry(text);
 #ifdef DEBUG
-				cout << currentOsisID << ": appending interverse end tag: " << tokenName << "(" << tagDepth << "," << chapterDepth << "," << bookDepth << ")" << endl;
+				if (debug & DEBUG_INTERVERSE) {
+					cout << "DEBUG(INTERVERSE): " << currentOsisID << ": appending interverse end tag: " << tokenName << "(" << tagDepth << "," << chapterDepth << "," << bookDepth << ")" << endl;
+				}
 #endif
 				return true;
 			}
+
 #ifdef DEBUG
-			cout << currentOsisID << ": interverse end tag: " << tokenName << "(" << tagDepth << "," << chapterDepth << "," << bookDepth << ")" << endl;
+			if (debug & DEBUG_INTERVERSE) {
+				cout << "DEBUG(INTERVERSE): " << currentOsisID << ": interverse end tag: " << tokenName << "(" << tagDepth << "," << chapterDepth << "," << bookDepth << ")" << endl;
+			}
 #endif
+			return false;
+
 		}
-	}
+
+		return false;
+	} // done with Processing end tags
+
 	return false;
 }
 
-XMLTag* transform(XMLTag* t) {
-	static std::stack<XMLTag*> tagStack;
+/**
+ * Support normalizations necessary for a SWORD module.
+ * OSIS allows for document structure (Book, Section, Paragraph or BSP)
+ * to overlap Bible versification (Book, Chapter, Verse).
+ * Most SWORD applications need to display verses in isolation or in HTML table cells,
+ * requiring each stored entry (i.e. verses) to be well-formed xml.
+ * This routine normalizes container elements which could cross verse boundaries into milestones.
+ * For most of these OSIS elements, there is a milestone form. However, p is not milestoneable.
+ * For this reason, p is transformed into lb elements.
+ * param t the tag to transform
+ * return the transformed tag or the original one
+ */
+XMLTag transformBSP(XMLTag t) {
+	static std::stack<XMLTag> bspTagStack;
 	static int sID = 1;
 	char buf[11];
 
 	// Support simplification transformations
-	if (!t->isEmpty()) {
-		if (!t->isEndTag()) {
-			tagStack.push(t);
-#ifdef DEBUG_XFORM
-			cout << currentOsisID << ": xform push (" << tagStack.size() << ") " << t->getName() << endl;
+	if (t.isEmpty()) {
+#ifdef DEBUG
+		if (debug & DEBUG_XFORM) {
+			cout << "DEBUG(XFORM): " << currentOsisID << ": xform empty " << t << endl;
+		}
 #endif
-			// Transform <q> into <q sID=""/> except for <q who="Jesus">
-			if ((!strcmp(t->getName(), "q")) && (!t->getAttribute("who") || strcmp(t->getAttribute("who"), "Jesus"))) {
-				t->setEmpty(true);
-				sprintf(buf, "q%d", sID++);
-				t->setAttribute("sID", buf);
-			}
+		return t;
+	}
 
-			// Transform <p> into <lb type="x-begin-paragraph"/>
-			else if (!strcmp(t->getName(), "p")) {
-				// note there is no process that should care about type, it is there for reversability
-				t->setText("<lb type=\"x-begin-paragraph\" />");
-			}
+	const char* tagName = t.getName();
+	if (!t.isEndTag()) {
+		// Transform <p> into <div type="paragraph"> and milestone it
+		if (!strcmp(tagName, "p")) {
+			t.setText("<div type=\"paragraph\" />");
+			sprintf(buf, "gen%d", sID++);
+			t.setAttribute("sID", buf);
+		}
+
+		// Transform <tag> into <tag  sID="">, where tag is a milestoneable element.
+		// The following containers are milestoneable.
+		// abbr, closer, div, foreign, l, lg, salute, signed, speech
+		// Leaving out:
+		//   abbr	When would this ever cross a boundary?
+		//   seg	as it is used for a divineName hack
+		//   foreign	so that it can be easily italicized
+		else if (!strcmp(tagName, "chapter") ||
+			 !strcmp(tagName, "closer")  ||
+			 !strcmp(tagName, "div")     ||
+			 !strcmp(tagName, "l")       ||
+			 !strcmp(tagName, "lg")      ||
+			 !strcmp(tagName, "q")       ||
+			 !strcmp(tagName, "salute")  ||
+			 !strcmp(tagName, "signed")  ||
+			 !strcmp(tagName, "speech")  ||
+			 !strcmp(tagName, "verse")
+			) {
+			t.setEmpty(true);
+			sprintf(buf, "gen%d", sID++);
+			t.setAttribute("sID", buf);
+		}
+		bspTagStack.push(t);
+#ifdef DEBUG
+		if (debug & DEBUG_XFORM) {
+			cout << "DEBUG(XFORM): " << currentOsisID << ": xform push (" << bspTagStack.size() << ") " << t << " (tagname=" << tagName << ")" << endl;
+			XMLTag topToken = bspTagStack.top();
+			cout << "DEBUG(XFORM): " << currentOsisID << ": xform top(" << bspTagStack.size() << ") " << topToken << endl;
 		}
-		else {
-			XMLTag *topToken = tagStack.top();
-#ifdef DEBUG_XFORM
-			cout << currentOsisID << ": xform pop(" << tagStack.size() << ") " << topToken->getName() << endl;
 #endif
-			tagStack.pop();
+	}
+	else {
+		XMLTag topToken = bspTagStack.top();
+#ifdef DEBUG
+		if (debug & DEBUG_XFORM) {
+			cout << "DEBUG(XFORM): " << currentOsisID << ": xform pop(" << bspTagStack.size() << ") " << topToken << endl;
+		}
+#endif
+		bspTagStack.pop();
+
+		// Look for the milestoneable container tags handled above.
+		if (!strcmp(tagName, "chapter") ||
+			 !strcmp(tagName, "closer")  ||
+			 !strcmp(tagName, "div")     ||
+			 !strcmp(tagName, "l")       ||
+			 !strcmp(tagName, "lg")      ||
+			 !strcmp(tagName, "p")       ||
+			 !strcmp(tagName, "q")       ||
+			 !strcmp(tagName, "salute")  ||
+			 !strcmp(tagName, "signed")  ||
+			 !strcmp(tagName, "speech")  ||
+			 !strcmp(tagName, "verse")
+			) {
+			// make this a clone of the start tag with sID changed to eID
+			// Note: in the case of </p> the topToken is a <div type="paragraph">
+			t = topToken;
+			t.setAttribute("eID", t.getAttribute("sID"));
+			t.setAttribute("sID", 0);
+		}
+	}
 
-			// If we have found an end tag for a <q> that was transformed then transform this one as well.
-			if ((!strcmp(t->getName(), "q")) && (!strcmp(topToken->getName(), "q")) && (!topToken->getAttribute("who") || strcmp(topToken->getAttribute("who"), "Jesus"))) {
-				// make this a clone of the start tag with sID changed to eID
-				*t = *topToken;
-				t->setAttribute("eID", t->getAttribute("sID"));
-				t->setAttribute("sID", 0);
-			}
+	return t;
+}
 
-			// Look for paragraph tags.
-			// If we have found an end tag for a <p> that was transformed then transform this as well.
-			else if ((!strcmp(t->getName(), "p")) && (!strcmp(topToken->getName(), "lb"))) {
-				t->setText("<lb type=\"x-end-paragraph\" />");
-			}
+/**
+ * Write out all links in the module.
+ * Waiting is necessary because writeEntry might ultimately append
+ * text to a verse moving it's offset in the data file.
+ * While we are minimizing it by postponing the write until we have
+ * gathered the next verse, the following scenario is happening:
+ * A module is using linked verses and has some verses that are not
+ * in the chosen versification. If the out-of-canon verse happens following
+ * a linked verse, the out-of-canon verse is appended to the prior
+ * verse. Care has to be taken that the linked verses all point to
+ * the first of the set.
+ */
+void writeLinks()
+{
+	// Link all the verses
+	VerseKey destKey;
+	destKey.setVersificationSystem(currentVerse.getVersificationSystem());
+	destKey.AutoNormalize(0);
+	destKey.Headings(1);
+
+	VerseKey linkKey;
+	linkKey.setVersificationSystem(currentVerse.getVersificationSystem());
+	linkKey.AutoNormalize(0);
+	linkKey.Headings(1);
+	for (unsigned int i = 0; i < linkedVerses.size(); i++) {
+		// The verseKeys is a list of verses
+		// where the first is the real verse
+		// and the others link to it.
+		ListKey verseKeys = linkedVerses[i];
+		verseKeys.setPosition(TOP);
+		destKey = verseKeys.getElement();
+		verseKeys.increment(1);
+
+		while (!verseKeys.Error()) {
+			linkKey = verseKeys.getElement();
+			verseKeys.increment(1);
+			linkToEntry(linkKey, destKey);
 		}
 	}
-	return t;
 }
 
 void usage(const char *app, const char *error = 0) {
@@ -875,6 +1262,8 @@ void usage(const char *app, const char *error = 0) {
 	if (error) fprintf(stderr, "\n%s: %s\n", app, error);
 
 	fprintf(stderr, "\nusage: %s <output/path> <osisDoc> [OPTIONS]\n", app);
+	fprintf(stderr, "  <output/path>\t\t an existing folder that the module will be written\n");
+	fprintf(stderr, "  <osisDoc>\t\t path to the validated OSIS document, or '-' to read from standard input\n");
 	fprintf(stderr, "  -a\t\t\t augment module if exists (default is to create new)\n");
 	fprintf(stderr, "  -z\t\t\t use ZIP compression (default no compression)\n");
 	fprintf(stderr, "  -Z\t\t\t use LZSS compression (default no compression)\n");
@@ -882,15 +1271,133 @@ void usage(const char *app, const char *error = 0) {
 	fprintf(stderr, "\t\t\t\t 2 - verse; 3 - chapter; 4 - book\n");
 	fprintf(stderr, "  -c <cipher_key>\t encipher module using supplied key\n");
 	fprintf(stderr, "\t\t\t\t (default no enciphering)\n");
-	fprintf(stderr, "  -N\t\t\t Do not convert UTF-8 or normalize UTF-8 to NFC\n");
-	fprintf(stderr, "\t\t\t\t (default is to convert to UTF-8, if needed, and then normalize to NFC");
-	fprintf(stderr, "\t\t\t\t Note: all UTF-8 texts should be normalized to NFC\n");
-	exit(-1);
+	fprintf(stderr, "  -N\t\t\t do not convert UTF-8 or normalize UTF-8 to NFC\n");
+	fprintf(stderr, "\t\t\t\t (default is to convert to UTF-8, if needed,\n");
+	fprintf(stderr, "\t\t\t\t  and then normalize to NFC)\n");
+	fprintf(stderr, "\t\t\t\t Note: UTF-8 texts should be normalized to NFC.\n");
+	fprintf(stderr, "  -s <2|4>\t\t max text size per entry (default is 2).\n");
+	fprintf(stderr, "\t\t\t\t Note: useful for commentaries with very large entries\n");
+	fprintf(stderr, "\t\t\t\t       in uncompressed modules (default is 65535 bytes)\n");
+	fprintf(stderr, "  -v <v11n>\t\t specify a versification scheme to use (default is KJV)\n");
+	fprintf(stderr, "\t\t\t\t Note: The following are valid values for v11n:\n");
+	VerseMgr *vmgr = VerseMgr::getSystemVerseMgr();
+	StringList av11n = vmgr->getVersificationSystems();
+	for (StringList::iterator loop = av11n.begin(); loop != av11n.end(); loop++) {
+		fprintf(stderr, "\t\t\t\t\t%s\n", (*loop).c_str());
+        }
+#ifdef DEBUG
+	fprintf(stderr, "  -d <flags>\t\t turn on debugging (default is 0)\n");
+	fprintf(stderr, "\t\t\t\t Note: This flag may change in the future.\n");
+	fprintf(stderr, "\t\t\t\t Flags: The following are valid values:\n");
+	fprintf(stderr, "\t\t\t\t\t0   - no debugging\n");
+	fprintf(stderr, "\t\t\t\t\t1   - writes to module, very verbose\n");
+	fprintf(stderr, "\t\t\t\t\t2   - verse start and end\n");
+	fprintf(stderr, "\t\t\t\t\t4   - quotes, especially Words of Christ (WOC)\n");
+	fprintf(stderr, "\t\t\t\t\t8   - titles\n");
+	fprintf(stderr, "\t\t\t\t\t16  - inter-verse material\n");
+	fprintf(stderr, "\t\t\t\t\t32  - BSP to BCV transformations\n");
+	fprintf(stderr, "\t\t\t\t\t64  - v11n exceptions\n");
+	fprintf(stderr, "\t\t\t\t\t128 - parsing of osisID and osisRef\n");
+	fprintf(stderr, "\t\t\t\t\t256 - internal stack\n");
+	fprintf(stderr, "\t\t\t\t\t512 - miscellaneous\n");
+	fprintf(stderr, "\t\t\t\t This flag can be used more than once.\n");
+#endif
+	fprintf(stderr, "\n");
+	fprintf(stderr, "See http://www.crosswire.org/wiki/osis2mod for more details.\n");
+	fprintf(stderr, "\n");
+	exit(EXIT_BAD_ARG);
+}
+
+void processOSIS(istream& infile) {
+	activeOsisID[0] = '\0';
+
+	strcpy(currentOsisID,"N/A");
+
+	currentVerse.setVersificationSystem(v11n);
+	currentVerse.AutoNormalize(0);
+	currentVerse.Headings(1);	// turn on mod/testmnt/book/chap headings
+	currentVerse.Persist(1);
+
+	module->setKey(currentVerse);
+	module->setPosition(TOP);
+
+	SWBuf token;
+	SWBuf text;
+	bool intoken = false;
+	bool inWhitespace = false;
+	bool seeingSpace = false;
+	char curChar = '\0';
+
+	while (infile.good()) {
+		
+		curChar = infile.get();
+
+		// skip the character if it is bad. infile.good() will catch the problem
+		if (curChar == -1) {
+			continue;
+		}
+
+		if (!intoken && curChar == '<') {
+			intoken = true;
+			token = "<";
+			continue;
+		}
+
+		// Outside of tokens merge adjacent whitespace
+		if (!intoken) {
+			seeingSpace = isspace(curChar);
+			if (seeingSpace) {
+				if (inWhitespace) {
+					continue;
+				}
+				// convert all whitespace to blanks
+				curChar = ' ';
+			}
+			inWhitespace = seeingSpace;
+		}
+
+		if (intoken && curChar == '>') {
+			intoken = false;
+			inWhitespace = false;
+			token.append('>');
+			// take this isalpha if out to check for bugs in text
+			if ((isalpha(token[1])) || (isalpha(token[2]))) {
+				//cout << "Handle:" << token.c_str() << endl;
+				XMLTag t = transformBSP(token.c_str());
+
+				if (!handleToken(text, t)) {
+					text.append(t);
+				}
+			}
+			continue;
+		}
+
+		if (intoken) {
+			token.append(curChar);
+		}
+		else {
+			switch (curChar) {
+				case '>' : text.append("&gt;"); break;
+				case '<' : text.append("&lt;"); break;
+				default  : text.append(curChar); break;
+			}
+		}
+	}
+
+	// Force the last entry from the text buffer.
+	text = "";
+	writeEntry(text, true);
+	writeLinks();
+
+#ifdef _ICU_
+	if (converted)  fprintf(stderr, "osis2mod converted %d verses to UTF-8\n", converted);
+	if (normalized) fprintf(stderr, "osis2mod normalized %d verses to NFC\n", normalized);
+#endif
 }
 
 int main(int argc, char **argv) {
 
-	fprintf(stderr, "You are running osis2mod: $Rev: 2169 $\n");
+	fprintf(stderr, "You are running osis2mod: $Rev: 2400 $\n");
 
 	// Let's test our command line arguments
 	if (argc < 3) {
@@ -898,14 +1405,15 @@ int main(int argc, char **argv) {
 	}
 
 	// variables for arguments, holding defaults
-	const char* program = argv[0];
-	const char* path    = argv[1];
-	const char* osisDoc = argv[2];
-	int append          = 0;
-	int compType        = 0;
-	int iType           = 4;
-	string cipherKey    = "";
-
+	const char* program    = argv[0];
+	const char* path       = argv[1];
+	const char* osisDoc    = argv[2];
+	int append             = 0;
+	SWBuf compType         = "";
+	bool isCommentary      = false;
+	int iType              = 4;
+	int entrySize          = 0;
+	SWBuf cipherKey        = "";
 	SWCompress *compressor = 0;
 
 	for (int i = 3; i < argc; i++) {
@@ -913,12 +1421,14 @@ int main(int argc, char **argv) {
 			append = 1;
 		}
 		else if (!strcmp(argv[i], "-z")) {
-			if (compType) usage(*argv, "Cannot specify both -z and -Z");
-			compType = 2;
+			if (compType.size()) usage(*argv, "Cannot specify both -z and -Z");
+			if (entrySize) usage(*argv, "Cannot specify both -z and -s");
+			compType = "ZIP";
 		}
 		else if (!strcmp(argv[i], "-Z")) {
-			if (compType) usage(*argv, "Cannot specify both -z and -Z");
-			compType = 1;
+			if (compType.size()) usage(*argv, "Cannot specify both -z and -Z");
+			if (entrySize) usage(*argv, "Cannot specify both -Z and -s");
+			compType = "LZSS";
 		}
 		else if (!strcmp(argv[i], "-b")) {
 			if (i+1 < argc) {
@@ -934,62 +1444,127 @@ int main(int argc, char **argv) {
 			if (i+1 < argc) cipherKey = argv[++i];
 			else usage(*argv, "-c requires <cipher_key>");
 		}
+		else if (!strcmp(argv[i], "-v")) {
+			if (i+1 < argc) v11n = argv[++i];
+			else usage(*argv, "-v requires <v11n>");
+		}
+		else if (!strcmp(argv[i], "-s")) {
+			if (compType.size()) usage(*argv, "Cannot specify -s and -z or -Z");
+                        if (i+1 < argc) {
+                                entrySize = atoi(argv[++i]);
+                                if (entrySize == 2 || entrySize == 4) {
+                                        continue;
+                                }
+                        }
+                        usage(*argv, "-s requires one of <2|4>");
+		}
+		else if (!strcmp(argv[i], "-C")) {
+			isCommentary = true;
+		}
+#ifdef DEBUG
+		else if (!strcmp(argv[i], "-d")) {
+			if (i+1 < argc) debug |= atoi(argv[++i]);
+			else usage(*argv, "-d requires <flags>");
+		}
+#endif
 		else usage(*argv, (((SWBuf)"Unknown argument: ")+ argv[i]).c_str());
 	}
 
-	switch (compType) {	// these are deleted by zText
-		case 0: break;
-		case 1: compressor = new LZSSCompress(); break;
-		case 2: compressor = new ZipCompress(); break;
-	}
+        if (compType == "ZIP") {
+                compressor = new ZipCompress();
+        }
+        else if (compType = "LZSS") {
+                compressor = new LZSSCompress();
+        }
 
 #ifndef _ICU_
 	if (normalize) {
 		normalize = false;
-		cout << program << " is not compiled with support for ICU. Ignoring -n flag." << endl;
+		cout << "WARNING(UTF8): " << program << " is not compiled with support for ICU. Assuming -N." << endl;
 	}
 #endif
 
 #ifdef DEBUG
-	cout << "path: " << path << " osisDoc: " << osisDoc << " create: " << append << " compressType: " << compType << " blockType: " << iType << " cipherKey: " << cipherKey.c_str() << " normalize: " << normalize << "\n";
-	cout << "";
-//	exit(-3);
+	if (debug & DEBUG_OTHER) {
+		cout << "DEBUG(ARGS):\n\tpath: " << path << "\n\tosisDoc: " << osisDoc << "\n\tcreate: " << append << "\n\tcompressType: " << compType << "\n\tblockType: " << iType << "\n\tcipherKey: " << cipherKey.c_str() << "\n\tnormalize: " << normalize << endl;
+	}
 #endif
 
-
 	if (!append) {	// == 0 then create module
 	// Try to initialize a default set of datafiles and indicies at our
 	// datapath location passed to us from the user.
-		if ( compressor ) {
-			if ( zText::createModule(path, iType) ) {
-				fprintf(stderr, "error: %s: couldn't create module at path: %s \n", program, path);
-				exit(-3);
+		if (compressor) {
+			if (zText::createModule(path, iType, v11n)) {
+				fprintf(stderr, "ERROR: %s: couldn't create module at path: %s \n", program, path);
+				exit(EXIT_NO_CREATE);
+			}
+		}
+		else if (entrySize == 4) {
+			if (RawText4::createModule(path, v11n)) {
+				fprintf(stderr, "ERROR: %s: couldn't create module at path: %s \n", program, path);
+				exit(EXIT_NO_CREATE);
 			}
 		}
-		else if (RawText::createModule(path)) {
-			fprintf(stderr, "error: %s: couldn't create module at path: %s \n", program, path);
-			exit(-3);
+		else {
+			if (RawText::createModule(path, v11n)) {
+				fprintf(stderr, "ERROR: %s: couldn't create module at path: %s \n", program, path);
+				exit(EXIT_NO_CREATE);
+			}
 		}
 	}
 
-	// Let's see if we can open our input file
-	ifstream infile(osisDoc);
-	if (infile.fail()) {
-		fprintf(stderr, "error: %s: couldn't open input file: %s \n", program, osisDoc);
-		exit(-2);
-	}
-
 	// Do some initialization stuff
 	if (compressor) {
-		module = new zText(path, 0, 0, iType, compressor);
+		// Create a compressed text module allowing very large entries
+		// Taking defaults except for first, fourth, fifth and last argument
+		module = new zText(
+				path,		// ipath
+				0,		// iname
+				0,		// idesc
+				iType,		// iblockType
+				compressor,	// icomp
+				0,		// idisp
+				ENC_UNKNOWN,	// enc
+				DIRECTION_LTR,	// dir
+				FMT_UNKNOWN,	// markup
+				0,		// lang
+				v11n		// versification
+                       );
+	}
+	else if (entrySize == 4) {
+		// Create a raw text module allowing very large entries
+		// Taking defaults except for first and last argument
+		module = new RawText4(
+				path,		// ipath
+				0,		// iname
+				0,		// idesc
+				0,		// idisp
+				ENC_UNKNOWN,	// encoding
+				DIRECTION_LTR,	// dir
+				FMT_UNKNOWN,	// markup
+				0,		// ilang
+				v11n		// versification
+			);
 	}
-	else{
-		module = new RawText(path);	// open our datapath with our RawText driver.
+	else {
+		// Create a raw text module allowing reasonable sized entries
+		// Taking defaults except for first and last argument
+		module = new RawText(
+				path,		// ipath
+				0,		// iname
+				0,		// idesc
+				0,		// idisp
+				ENC_UNKNOWN,	// encoding
+				DIRECTION_LTR,	// dir
+				FMT_UNKNOWN,	// markup
+				0,		// ilang
+				v11n		// versification
+			);
 	}
 
 	SWFilter *cipherFilter = 0;
 
-	if (!cipherKey.empty()) {
+	if (cipherKey.length()) {
 		fprintf(stderr, "Adding cipher filter with phrase: %s\n", cipherKey.c_str() );
 		cipherFilter = new CipherFilter(cipherKey.c_str());
 		module->AddRawFilter(cipherFilter);
@@ -997,94 +1572,29 @@ int main(int argc, char **argv) {
 
 	if (!module->isWritable()) {
 		fprintf(stderr, "The module is not writable. Writing text to it will not work.\nExiting.\n" );
-		exit(-1);
+		exit(EXIT_NO_WRITE);
 	}
 
-	activeOsisID[0] = '\0';
-	strcpy(currentOsisID,"N/A");
-
-	currentVerse = new VerseKey();
-	currentVerse->AutoNormalize(0);
-	currentVerse->Headings(1);	// turn on mod/testmnt/book/chap headings
-	currentVerse->Persist(1);
-
-	module->setKey(*currentVerse);
-
-	(*module) = TOP;
-
-	SWBuf token;
-	SWBuf text;
-	bool intoken = false;
-	bool inWhitespace = false;
-	bool seeingSpace = false;
-	char curChar = '\0';
-
-	while (infile.good()) {
-		
-		curChar = infile.get();
-
-		// skip the character if it is bad. infile.good() will catch the problem
-		if (curChar == -1) {
-			continue;
-		}
-
-		if (!intoken && curChar == '<') {
-			intoken = true;
-			token = "<";
-			continue;
-		}
-
-		// Outside of tokens merge adjacent whitespace
-		if (!intoken) {
-			seeingSpace = isspace(curChar);
-			if (seeingSpace) {
-				if (inWhitespace) {
-					continue;
-				}
-				// convert all whitespace to blanks
-				curChar = ' ';
-			}
-			inWhitespace = seeingSpace;
-		}
-
-		if (intoken && curChar == '>') {
-			intoken = false;
-			inWhitespace = false;
-			token.append('>');
-			// take this isalpha if out to check for bugs in text
-			if ((isalpha(token[1])) || (isalpha(token[2]))) {
-				//cout << "Handle:" << token.c_str() << endl;
-				XMLTag *t = new XMLTag(token.c_str());
-
-				if (!handleToken(text, transform(t))) {
-					text.append(*t);
-				}
-			}
-			continue;
+	// Either read from std::cin (aka stdin), when the argument is a '-'
+	// or from a specified file.
+	if (!strcmp(osisDoc, "-")) {
+		processOSIS(cin);
+	}
+	else {
+		// Let's see if we can open our input file
+		ifstream infile(osisDoc);
+		if (infile.fail()) {
+			fprintf(stderr, "ERROR: %s: couldn't open input file: %s \n", program, osisDoc);
+			exit(EXIT_NO_READ);
 		}
-
-		if (intoken)
-			token.append(curChar);
-		else
-			switch (curChar) {
-				case '>' : text.append("&gt;"); break;
-				case '<' : text.append("&lt;"); break;
-				default  : text.append(curChar); break;
-			}
+		processOSIS(infile);
+		infile.close();
 	}
 
-	// Force the last entry from the text buffer.
-	text = "";
-	writeEntry(*currentVerse, text, true);
 	delete module;
-	delete currentVerse;
 	if (cipherFilter)
 		delete cipherFilter;
-	infile.close();
 
-#ifdef _ICU_
-	if (converted)  fprintf(stderr, "osis2mod converted %d verses to UTF-8\n", converted);
-	if (normalized) fprintf(stderr, "osis2mod normalized %d verses to NFC\n", normalized);
-#endif
+	exit(0); // success
 }
author	Roberto C. Sanchez <roberto@connexer.com>	2014-03-29 10:54:01 -0400
committer	Roberto C. Sanchez <roberto@connexer.com>	2014-03-29 10:54:01 -0400
commit	71a39f4652cd51df814c930dd268f3c9ad2aee86 (patch)
tree	5994350a603908c4e4d660bc9d72c4ec43dd648e /utilities/osis2mod.cpp
parent	03134fa5f6f25d92724ce4c183f9bbe12a9e37dc (diff)