Imported Upstream version 1.7.2+dfsg

author: Roberto C. Sanchez <roberto@connexer.com> 2014-05-12 08:21:30 -0400
committer: Roberto C. Sanchez <roberto@connexer.com> 2014-05-12 08:21:30 -0400
commit: 7a00574163029c0c2b649878c95d5acbd083564a (patch)
tree: c13cc5736025834df2874ed87ee8598070025ea6 /utilities/osis2mod.cpp
parent: b745315323de9f27538edac9453205ca70e6186e (diff)
1 files changed, 209 insertions, 78 deletions
diff --git a/utilities/osis2mod.cpp b/utilities/osis2mod.cpp
index 7b937fd..eab2ee0 100644
--- a/utilities/osis2mod.cpp
+++ b/utilities/osis2mod.cpp
@@ -1,5 +1,10 @@
-/*
- * Copyright 2009 CrossWire Bible Society (http://www.crosswire.org)
+/******************************************************************************
+ *
+ *  osis2mod.cpp -	Utility to import a module in OSIS format
+ *
+ * $Id: osis2mod.cpp 2893 2013-07-16 03:07:02Z scribe $
+ *
+ * Copyright 2003-2013 CrossWire Bible Society (http://www.crosswire.org)
  *	CrossWire Bible Society
  *	P. O. Box 2528
  *	Tempe, AZ  85280-2528
@@ -96,8 +101,8 @@ static bool inCanonicalOSISBook = true; // osisID is for a book that is not in S
 static bool normalize           = true; // Whether to normalize UTF-8 to NFC
 
 bool isOSISAbbrev(const char *buf) {
-	VerseMgr *vmgr = VerseMgr::getSystemVerseMgr();
-	const VerseMgr::System *av11n = vmgr->getVersificationSystem(currentVerse.getVersificationSystem());
+	VersificationMgr *vmgr = VersificationMgr::getSystemVersificationMgr();
+	const VersificationMgr::System *av11n = vmgr->getVersificationSystem(currentVerse.getVersificationSystem());
 	return av11n->getBookNumberByOSISName(buf) >= 0;
 }
 
@@ -352,13 +357,13 @@ bool isValidRef(const char *buf) {
 	// And set it to the reference under question
 	VerseKey before;
 	before.setVersificationSystem(currentVerse.getVersificationSystem());
-	before.AutoNormalize(0);
-	before.Headings(1);
+	before.setAutoNormalize(false);
+	before.setIntros(true);
 	before.setText(buf);
 
 	// If we are a heading we must bail
 	// These will autonormalize to the last verse of the prior chapter
-	if (!before.Testament() || !before.Book() || !before.Chapter() || !before.Verse()) {
+	if (!before.getTestament() || !before.getBook() || !before.getChapter() || !before.getVerse()) {
 		return true;
 	}
 
@@ -366,7 +371,7 @@ bool isValidRef(const char *buf) {
 	// And set it to the reference under question
 	VerseKey after;
 	after.setVersificationSystem(currentVerse.getVersificationSystem());
-	after.AutoNormalize(1);
+	after.setAutoNormalize(true);
 	after.setText(buf);
 
 	if (before == after)
@@ -375,8 +380,7 @@ bool isValidRef(const char *buf) {
 	}
 
 	// If we have gotten here the reference is not in the selected versification.
-	cout << "INFO(V11N): " << before << " is not in the " << currentVerse.getVersificationSystem() << " versification." << endl;
-
+	// cout << "INFO(V11N): " << before << " is not in the " << currentVerse.getVersificationSystem() << " versification." << endl;
 	if (debug & DEBUG_REV11N) {
 		cout << "DEBUG(V11N): " << before << " normalizes to "  << after << endl;
 	}
@@ -409,25 +413,28 @@ bool isValidRef(const char *buf) {
  * param key the key that may need to be adjusted
  */
 void makeValidRef(VerseKey &key) {
+	VerseKey saveKey;
+	saveKey.setVersificationSystem(currentVerse.getVersificationSystem());
+	saveKey.setAutoNormalize(false);
+	saveKey.setIntros(true);
+	saveKey = currentVerse;
 
-	int chapterMax = key.getChapterMax();
-	int verseMax   = key.getVerseMax();
-
-	if (debug & DEBUG_REV11N) {
-		cout << "DEBUG(V11N) Chapter max:" << chapterMax << ", Verse Max:" << verseMax << endl;
-	}
-
-	cout << "INFO(V11N): " << key.getOSISRef() << " is not in the " << key.getVersificationSystem() << " versification.";
 	// Since isValidRef returned false constrain the key to the nearest prior reference.
 	// If we are past the last chapter set the reference to the last chapter
-	if (key.Chapter() > chapterMax) {
-		key.Chapter(chapterMax);
+	int chapterMax = key.getChapterMax();
+	if (key.getChapter() > chapterMax) {
+		key.setChapter(chapterMax);
 	}
 
 	// Either we set the chapter to the last chapter and now need to set to the last verse in the chapter
 	// Or the verse is beyond the end of the chapter.
 	// In any case we need to constrain the verse to it's chapter.
-	key.Verse(verseMax);
+	int verseMax   = key.getVerseMax();
+	key.setVerse(verseMax);
+
+	if (debug & DEBUG_REV11N) {
+		cout << "DEBUG(V11N) Chapter max:" << chapterMax << ", Verse Max:" << verseMax << endl;
+	}
 
 	// There are three cases we want to handle:
 	// In the examples we are using the KJV versification where the last verse of Matt.7 is Matt.7.29.
@@ -446,17 +453,19 @@ void makeValidRef(VerseKey &key) {
 	//    In this case we should re-versify Matt.7.30 as Matt.7.29.
 	//    However, since this and 2) are ambiguous, we'll re-reversify to the last entry in the module.
 	
-	while (!key.Error() && !module->hasEntry(&key)) {
+	while (!key.popError() && !module->hasEntry(&key)) {
 		key.decrement(1);
 	}
 
-	cout << " Appending content to " << key.getOSISRef() << endl;
+	cout << "INFO(V11N): " << saveKey.getOSISRef()
+	     << " is not in the " << key.getVersificationSystem()
+	     << " versification. Appending content to " << key.getOSISRef() << endl;
 }
 
 void writeEntry(SWBuf &text, bool force = false) {
 	char keyOsisID[255];
 
-	static const char* revision = "<milestone type=\"x-importer\" subType=\"x-osis2mod\" n=\"$Rev: 2562 $\"/>";
+	static const char* revision = "<milestone type=\"x-importer\" subType=\"x-osis2mod\" n=\"$Rev: 2893 $\"/>";
 	static bool firstOT = true;
 	static bool firstNT = true;
 
@@ -473,13 +482,13 @@ void writeEntry(SWBuf &text, bool force = false) {
 
 	static VerseKey lastKey;
 	lastKey.setVersificationSystem(currentVerse.getVersificationSystem());
-	lastKey.AutoNormalize(0);
-	lastKey.Headings(1);
+	lastKey.setAutoNormalize(0);
+	lastKey.setIntros(1);
 
 	VerseKey saveKey;
 	saveKey.setVersificationSystem(currentVerse.getVersificationSystem());
-	saveKey.AutoNormalize(0);
-	saveKey.Headings(1);
+	saveKey.setAutoNormalize(0);
+	saveKey.setIntros(1);
 	saveKey = currentVerse;
 
 	// If we have seen a verse and the supplied one is different then we output the collected one.
@@ -494,16 +503,16 @@ void writeEntry(SWBuf &text, bool force = false) {
 		prepareSWText(activeOsisID, activeVerseText);
 
 		// Put the revision into the module
-		int testmt = currentVerse.Testament();
+		int testmt = currentVerse.getTestament();
 		if ((testmt == 1 && firstOT) || (testmt == 2 && firstNT)) {
 			VerseKey t;
 			t.setVersificationSystem(currentVerse.getVersificationSystem());
-			t.AutoNormalize(0);
-			t.Headings(1);
+			t.setAutoNormalize(0);
+			t.setIntros(1);
 			t = currentVerse;
-			currentVerse.Book(0);
-			currentVerse.Chapter(0);
-			currentVerse.Verse(0);
+			currentVerse.setBook(0);
+			currentVerse.setChapter(0);
+			currentVerse.setVerse(0);
 			module->setEntry(revision);
 			currentVerse = t;
 			switch (testmt) {
@@ -534,14 +543,12 @@ void writeEntry(SWBuf &text, bool force = false) {
 	}
 
 	// The following is for initial verse content and for appending interverse content.
-	// Eliminate leading whitespace on the beginning of each verse and
-	// before we append to current content, since we just added one
-	text.trimStart();
 	if (activeVerseText.length()) {
-		activeVerseText += " ";
 		activeVerseText += text;
 	}
 	else {
+		// Eliminate leading whitespace on the beginning of each verse
+		text.trimStart();
 		activeVerseText = text;
 	}
 	// text has been consumed so clear it out.
@@ -561,8 +568,8 @@ void linkToEntry(VerseKey &linkKey, VerseKey &dest) {
 
 	VerseKey saveKey;
 	saveKey.setVersificationSystem(currentVerse.getVersificationSystem());
-	saveKey.AutoNormalize(0);
-	saveKey.Headings(1);
+	saveKey.setAutoNormalize(0);
+	saveKey.setIntros(1);
 	saveKey = currentVerse;
 	currentVerse = linkKey;
 
@@ -590,7 +597,7 @@ bool handleToken(SWBuf &text, XMLTag token) {
 
 	// Flags indicating whether we are processing the content of to be prepended to a verse
 	static bool               inPreVerse      = false;
-//	static int                genID           = 1;
+	static int                genID           = 1;
 
 	// Flag indicating whether we are in "Words of Christ"
 	static bool               inWOC           = false;
@@ -599,6 +606,7 @@ bool handleToken(SWBuf &text, XMLTag token) {
 
 	// Flag used to indicate where useful text begins
 	static bool               firstDiv        = false;
+	static bool               headerEnded     = false;
 
 	// Retain the sID of book, chapter and verse (commentary) divs so that we can find them again.
 	// This relies on transformBSP.
@@ -639,9 +647,9 @@ bool handleToken(SWBuf &text, XMLTag token) {
 			}
 		}
 
-		// throw away everything up to the first div
+		// throw away everything up to the first div (that is outside the header)
 		if (!firstDiv) {
-			if (tokenName == "div") {
+			if (headerEnded && (tokenName == "div")) {
 				if (debug & DEBUG_OTHER) {
 					cout << "DEBUG(FOUND): Found first div and pitching prior material: " << text << endl;
 				}
@@ -670,15 +678,15 @@ bool handleToken(SWBuf &text, XMLTag token) {
 						cout << "\tinBookIntro = " << inBookIntro << endl;
 					}
 
-					currentVerse.Testament(0);
-					currentVerse.Book(0);
-					currentVerse.Chapter(0);
-					currentVerse.Verse(0);
+					currentVerse.setTestament(0);
+					currentVerse.setBook(0);
+					currentVerse.setChapter(0);
+					currentVerse.setVerse(0);
 					writeEntry(text);
 				}
 				currentVerse = token.getAttribute("osisID");
-				currentVerse.Chapter(0);
-				currentVerse.Verse(0);
+				currentVerse.setChapter(0);
+				currentVerse.setVerse(0);
 				strcpy(currentOsisID, currentVerse.getOSISRef());
 
 				sidBook         = token.getAttribute("sID");
@@ -720,7 +728,7 @@ bool handleToken(SWBuf &text, XMLTag token) {
 				}
 
 				currentVerse = token.getAttribute("osisID");
-				currentVerse.Verse(0);
+				currentVerse.setVerse(0);
 
 				if (debug & DEBUG_OTHER) {
 					cout << "DEBUG(FOUND): Current chapter is " << currentVerse.getOSISRef() << " (" << token.getAttribute("osisID") << ")" << endl;
@@ -769,23 +777,23 @@ bool handleToken(SWBuf &text, XMLTag token) {
 
 				// Did we have pre-verse material that needs to be marked?
 				if (inPreVerse) {
-//					char genBuf[200];
-//					sprintf(genBuf, "<div type=\"x-milestone\" subType=\"x-preverse\" eID=\"pv%d\"/>", genID++);
-//					text.append(genBuf);
+					char genBuf[200];
+					sprintf(genBuf, "<div type=\"x-milestone\" subType=\"x-preverse\" eID=\"pv%d\"/>", genID++);
+					text.append(genBuf);
 				}
 
 				// Get osisID for verse or annotateRef for commentary
 				SWBuf keyVal = token.getAttribute(tokenName == "verse" ? "osisID" : "annotateRef");
 
-				// Massage the key into a form that ParseVerseList can accept
+				// Massage the key into a form that parseVerseList can accept
 				prepareSWVerseKey(keyVal);
 
 				// The osisID or annotateRef can be more than a single verse
 				// The first or only one is the currentVerse
 				// Use the last verse seen (i.e. the currentVerse) as the basis for recovering from bad parsing.
 				// This should never happen if the references are valid OSIS references
-				ListKey verseKeys = currentVerse.ParseVerseList(keyVal, currentVerse, true);
-				int memberKeyCount = verseKeys.Count();
+				ListKey verseKeys = currentVerse.parseVerseList(keyVal, currentVerse, true);
+				int memberKeyCount = verseKeys.getCount();
 				if (memberKeyCount) {
 					currentVerse = verseKeys.getElement(0);
 					// See if this osisID or annotateRef refers to more than one verse.
@@ -795,7 +803,7 @@ bool handleToken(SWBuf &text, XMLTag token) {
 					// if there is only one verse.
 					verseKeys.setPosition(TOP);
 					verseKeys.increment(1);
-					if (!verseKeys.Error()) {
+					if (!verseKeys.popError()) {
 						linkedVerses.push_back(verseKeys);
 					}
 				}
@@ -916,9 +924,9 @@ bool handleToken(SWBuf &text, XMLTag token) {
 			}
 
 			if (inPreVerse) {
-//				char genBuf[200];
-//				sprintf(genBuf, "<div type=\"x-milestone\" subType=\"x-preverse\" sID=\"pv%d\"/>", genID);
-//				text.append(genBuf);
+				char genBuf[200];
+				sprintf(genBuf, "<div type=\"x-milestone\" subType=\"x-preverse\" sID=\"pv%d\"/>", genID);
+				text.append(genBuf);
 			}
 		}
 
@@ -958,8 +966,16 @@ bool handleToken(SWBuf &text, XMLTag token) {
 			}
 		}
 
-		// We haven't seen the first div so there is nothing to do.
+		// We haven't seen the first div outside the header so there is little to do.
 		if (!firstDiv) {
+			if (tokenName == "header") {
+				headerEnded = true;
+
+				if (debug & DEBUG_OTHER) {
+					cout << "DEBUG(FOUND): End of header found" << endl;
+				}
+			}
+
 			// Collect the content so it can be used to suggest the module's conf.
 			return false;
 		}
@@ -1237,13 +1253,13 @@ void writeLinks()
 	// Link all the verses
 	VerseKey destKey;
 	destKey.setVersificationSystem(currentVerse.getVersificationSystem());
-	destKey.AutoNormalize(0);
-	destKey.Headings(1);
+	destKey.setAutoNormalize(0);
+	destKey.setIntros(1);
 
 	VerseKey linkKey;
 	linkKey.setVersificationSystem(currentVerse.getVersificationSystem());
-	linkKey.AutoNormalize(0);
-	linkKey.Headings(1);
+	linkKey.setAutoNormalize(0);
+	linkKey.setIntros(1);
 	for (unsigned int i = 0; i < linkedVerses.size(); i++) {
 		// The verseKeys is a list of verses
 		// where the first is the real verse
@@ -1253,7 +1269,7 @@ void writeLinks()
 		destKey = verseKeys.getElement();
 		verseKeys.increment(1);
 
-		while (!verseKeys.Error()) {
+		while (!verseKeys.popError()) {
 			linkKey = verseKeys.getElement();
 			verseKeys.increment(1);
 			linkToEntry(linkKey, destKey);
@@ -1287,7 +1303,7 @@ void usage(const char *app, const char *error = 0) {
 	fprintf(stderr, "\t\t\t\t (2 bytes to store size equal 65535 characters)\n");
 	fprintf(stderr, "  -v <v11n>\t\t specify a versification scheme to use (default is KJV)\n");
 	fprintf(stderr, "\t\t\t\t Note: The following are valid values for v11n:\n");
-	VerseMgr *vmgr = VerseMgr::getSystemVerseMgr();
+	VersificationMgr *vmgr = VersificationMgr::getSystemVersificationMgr();
 	StringList av11n = vmgr->getVersificationSystems();
 	for (StringList::iterator loop = av11n.begin(); loop != av11n.end(); loop++) {
 		fprintf(stderr, "\t\t\t\t\t%s\n", (*loop).c_str());
@@ -1315,40 +1331,149 @@ void usage(const char *app, const char *error = 0) {
 }
 
 void processOSIS(istream& infile) {
+	typedef enum {
+		CS_NOT_IN_COMMENT,		// or seen starting "<"
+		CS_SEEN_STARTING_EXCLAMATION,
+		CS_SEEN_STARTING_HYPHEN,
+		CS_IN_COMMENT,
+		CS_SEEN_ENDING_HYPHEN,
+		CS_SEEN_SECOND_ENDING_HYPHEN,
+		CS_SEEN_ENDING_GREATER_THAN
+	} t_commentstate;
+
 	activeOsisID[0] = '\0';
 
 	strcpy(currentOsisID,"N/A");
 
 	currentVerse.setVersificationSystem(v11n);
-	currentVerse.AutoNormalize(0);
-	currentVerse.Headings(1);	// turn on mod/testmnt/book/chap headings
-	currentVerse.Persist(1);
+	currentVerse.setAutoNormalize(false);
+	currentVerse.setIntros(true);	// turn on mod/testmnt/book/chap headings
+	currentVerse.setPersist(true);
 
 	module->setKey(currentVerse);
 	module->setPosition(TOP);
 
 	SWBuf token;
 	SWBuf text;
+	bool incomment = false;
+	t_commentstate commentstate = CS_NOT_IN_COMMENT;
 	bool intoken = false;
 	bool inWhitespace = false;
 	bool seeingSpace = false;
 	unsigned char curChar = '\0';
 
 	while (infile.good()) {
-		
-		curChar = infile.get();
+
+		int possibleChar = infile.get();
 
 		// skip the character if it is bad. infile.good() will catch the problem
-		if (curChar == -1) {
+		if (possibleChar == -1) {
 			continue;
 		}
 
+		curChar = (unsigned char) possibleChar;
+
+		// All newlines are simply whitespace
+		// Does a SWORD module actually require this?
+		if (curChar == '\n') {
+			curChar = ' ';
+		}
+
 		if (!intoken && curChar == '<') {
 			intoken = true;
 			token = "<";
 			continue;
 		}
 
+		// Handle XML comments starting with "<!--", ending with "-->"
+
+		if (intoken && !incomment) {
+			switch (commentstate) {
+				case CS_NOT_IN_COMMENT :
+					if (curChar == '!') {
+						commentstate = CS_SEEN_STARTING_EXCLAMATION;
+						token.append((char) curChar);
+						continue;
+					} else {
+						break;
+					}
+
+				case CS_SEEN_STARTING_EXCLAMATION :
+					if (curChar == '-') {
+						commentstate = CS_SEEN_STARTING_HYPHEN;
+						token.append((char) curChar);
+						continue;
+					} else {
+						commentstate = CS_NOT_IN_COMMENT;
+						break;
+					}
+
+				case CS_SEEN_STARTING_HYPHEN :
+					if (curChar == '-') {
+						incomment = true;
+						commentstate = CS_IN_COMMENT;
+						token.append((char) curChar);
+
+						if (debug & DEBUG_OTHER) {
+							cout << "DEBUG(COMMENTS): in comment" << endl;
+						}
+
+						continue;
+					} else {
+						commentstate = CS_NOT_IN_COMMENT;
+						break;
+					}
+
+				default:
+					cout << "FATAL(COMMENTS): unknown commentstate on comment start: " << commentstate << endl;
+					exit(EXIT_BAD_NESTING);
+			}
+		}
+
+		if (incomment) {
+			switch (commentstate) {
+				case CS_IN_COMMENT:
+					if (curChar == '-') {
+						commentstate = CS_SEEN_ENDING_HYPHEN;
+						continue;
+					} else {
+						// ignore the character
+						continue;
+					}
+
+				case CS_SEEN_ENDING_HYPHEN :
+					if (curChar == '-') {
+						commentstate = CS_SEEN_SECOND_ENDING_HYPHEN;
+						continue;
+					} else {
+						// ignore character
+						commentstate = CS_IN_COMMENT;
+						continue;
+					}
+
+				case CS_SEEN_SECOND_ENDING_HYPHEN :
+					if (curChar == '>') {
+						intoken = false;
+						incomment = false;
+						commentstate = CS_NOT_IN_COMMENT;
+
+						if (debug & DEBUG_OTHER) {
+							cout << "DEBUG(COMMENTS): out of comment" << endl;
+						}
+
+						continue;
+					} else {
+						// ignore character
+						commentstate = CS_IN_COMMENT;
+						continue;
+					}
+
+				default:
+					cout << "FATAL(COMMENTS): unknown commentstate on comment end: " << commentstate << endl;
+					exit(EXIT_BAD_NESTING);
+			}
+		}
+
 		// Outside of tokens merge adjacent whitespace
 		if (!intoken) {
 			seeingSpace = isspace(curChar)!=0;
@@ -1367,25 +1492,28 @@ void processOSIS(istream& infile) {
 			inWhitespace = false;
 			token.append('>');
 			// take this isalpha if out to check for bugs in text
-			if ((isalpha(token[1])) || (isalpha(token[2]))) {
+			if (isalpha(token[1]) ||
+			    (((token[1] == '/') || (token[1] == '?')) && isalpha(token[2]))) {
 				//cout << "Handle:" << token.c_str() << endl;
 				XMLTag t = transformBSP(token.c_str());
 
 				if (!handleToken(text, t)) {
 					text.append(t);
 				}
+			} else {
+				cout << "WARNING(PARSE): malformed token: " << token << endl;
 			}
 			continue;
 		}
 
 		if (intoken) {
-			token.append(curChar);
+			token.append((char) curChar);
 		}
 		else {
 			switch (curChar) {
 				case '>' : text.append("&gt;"); break;
 				case '<' : text.append("&lt;"); break;
-				default  : text.append(curChar); break;
+				default  : text.append((char) curChar); break;
 			}
 		}
 	}
@@ -1403,7 +1531,7 @@ void processOSIS(istream& infile) {
 
 int main(int argc, char **argv) {
 
-	fprintf(stderr, "You are running osis2mod: $Rev: 2562 $\n");
+	fprintf(stderr, "You are running osis2mod: $Rev: 2893 $\n");
 
 	// Let's test our command line arguments
 	if (argc < 3) {
@@ -1474,6 +1602,8 @@ int main(int argc, char **argv) {
 		else usage(*argv, (((SWBuf)"Unknown argument: ")+ argv[i]).c_str());
 	}
 
+	if (isCommentary) isCommentary = true;	// avoid unused warning for now
+
 	if (compType == "ZIP") {
 #ifndef EXCLUDEZLIB
 		compressor = new ZipCompress();
@@ -1573,7 +1703,7 @@ int main(int argc, char **argv) {
 	if (cipherKey.length()) {
 		fprintf(stderr, "Adding cipher filter with phrase: %s\n", cipherKey.c_str() );
 		cipherFilter = new CipherFilter(cipherKey.c_str());
-		module->AddRawFilter(cipherFilter);
+		module->addRawFilter(cipherFilter);
 	}
 
 	if (!module->isWritable()) {
@@ -1601,6 +1731,7 @@ int main(int argc, char **argv) {
 	if (cipherFilter)
 		delete cipherFilter;
 
+	fprintf(stderr, "SUCCESS: %s: has finished its work and will now rest\n", program);
 	exit(0); // success
 }
author	Roberto C. Sanchez <roberto@connexer.com>	2014-05-12 08:21:30 -0400
committer	Roberto C. Sanchez <roberto@connexer.com>	2014-05-12 08:21:30 -0400
commit	7a00574163029c0c2b649878c95d5acbd083564a (patch)
tree	c13cc5736025834df2874ed87ee8598070025ea6 /utilities/osis2mod.cpp
parent	b745315323de9f27538edac9453205ca70e6186e (diff)