summaryrefslogtreecommitdiff
path: root/utilities/osis2mod.cpp
diff options
context:
space:
mode:
authorRoberto C. Sanchez <roberto@connexer.com>2014-05-12 08:21:30 -0400
committerRoberto C. Sanchez <roberto@connexer.com>2014-05-12 08:21:30 -0400
commit7a00574163029c0c2b649878c95d5acbd083564a (patch)
treec13cc5736025834df2874ed87ee8598070025ea6 /utilities/osis2mod.cpp
parentb745315323de9f27538edac9453205ca70e6186e (diff)
Imported Upstream version 1.7.2+dfsg
Diffstat (limited to 'utilities/osis2mod.cpp')
-rw-r--r--utilities/osis2mod.cpp287
1 files changed, 209 insertions, 78 deletions
diff --git a/utilities/osis2mod.cpp b/utilities/osis2mod.cpp
index 7b937fd..eab2ee0 100644
--- a/utilities/osis2mod.cpp
+++ b/utilities/osis2mod.cpp
@@ -1,5 +1,10 @@
-/*
- * Copyright 2009 CrossWire Bible Society (http://www.crosswire.org)
+/******************************************************************************
+ *
+ * osis2mod.cpp - Utility to import a module in OSIS format
+ *
+ * $Id: osis2mod.cpp 2893 2013-07-16 03:07:02Z scribe $
+ *
+ * Copyright 2003-2013 CrossWire Bible Society (http://www.crosswire.org)
* CrossWire Bible Society
* P. O. Box 2528
* Tempe, AZ 85280-2528
@@ -96,8 +101,8 @@ static bool inCanonicalOSISBook = true; // osisID is for a book that is not in S
static bool normalize = true; // Whether to normalize UTF-8 to NFC
bool isOSISAbbrev(const char *buf) {
- VerseMgr *vmgr = VerseMgr::getSystemVerseMgr();
- const VerseMgr::System *av11n = vmgr->getVersificationSystem(currentVerse.getVersificationSystem());
+ VersificationMgr *vmgr = VersificationMgr::getSystemVersificationMgr();
+ const VersificationMgr::System *av11n = vmgr->getVersificationSystem(currentVerse.getVersificationSystem());
return av11n->getBookNumberByOSISName(buf) >= 0;
}
@@ -352,13 +357,13 @@ bool isValidRef(const char *buf) {
// And set it to the reference under question
VerseKey before;
before.setVersificationSystem(currentVerse.getVersificationSystem());
- before.AutoNormalize(0);
- before.Headings(1);
+ before.setAutoNormalize(false);
+ before.setIntros(true);
before.setText(buf);
// If we are a heading we must bail
// These will autonormalize to the last verse of the prior chapter
- if (!before.Testament() || !before.Book() || !before.Chapter() || !before.Verse()) {
+ if (!before.getTestament() || !before.getBook() || !before.getChapter() || !before.getVerse()) {
return true;
}
@@ -366,7 +371,7 @@ bool isValidRef(const char *buf) {
// And set it to the reference under question
VerseKey after;
after.setVersificationSystem(currentVerse.getVersificationSystem());
- after.AutoNormalize(1);
+ after.setAutoNormalize(true);
after.setText(buf);
if (before == after)
@@ -375,8 +380,7 @@ bool isValidRef(const char *buf) {
}
// If we have gotten here the reference is not in the selected versification.
- cout << "INFO(V11N): " << before << " is not in the " << currentVerse.getVersificationSystem() << " versification." << endl;
-
+ // cout << "INFO(V11N): " << before << " is not in the " << currentVerse.getVersificationSystem() << " versification." << endl;
if (debug & DEBUG_REV11N) {
cout << "DEBUG(V11N): " << before << " normalizes to " << after << endl;
}
@@ -409,25 +413,28 @@ bool isValidRef(const char *buf) {
* param key the key that may need to be adjusted
*/
void makeValidRef(VerseKey &key) {
+ VerseKey saveKey;
+ saveKey.setVersificationSystem(currentVerse.getVersificationSystem());
+ saveKey.setAutoNormalize(false);
+ saveKey.setIntros(true);
+ saveKey = currentVerse;
- int chapterMax = key.getChapterMax();
- int verseMax = key.getVerseMax();
-
- if (debug & DEBUG_REV11N) {
- cout << "DEBUG(V11N) Chapter max:" << chapterMax << ", Verse Max:" << verseMax << endl;
- }
-
- cout << "INFO(V11N): " << key.getOSISRef() << " is not in the " << key.getVersificationSystem() << " versification.";
// Since isValidRef returned false constrain the key to the nearest prior reference.
// If we are past the last chapter set the reference to the last chapter
- if (key.Chapter() > chapterMax) {
- key.Chapter(chapterMax);
+ int chapterMax = key.getChapterMax();
+ if (key.getChapter() > chapterMax) {
+ key.setChapter(chapterMax);
}
// Either we set the chapter to the last chapter and now need to set to the last verse in the chapter
// Or the verse is beyond the end of the chapter.
// In any case we need to constrain the verse to it's chapter.
- key.Verse(verseMax);
+ int verseMax = key.getVerseMax();
+ key.setVerse(verseMax);
+
+ if (debug & DEBUG_REV11N) {
+ cout << "DEBUG(V11N) Chapter max:" << chapterMax << ", Verse Max:" << verseMax << endl;
+ }
// There are three cases we want to handle:
// In the examples we are using the KJV versification where the last verse of Matt.7 is Matt.7.29.
@@ -446,17 +453,19 @@ void makeValidRef(VerseKey &key) {
// In this case we should re-versify Matt.7.30 as Matt.7.29.
// However, since this and 2) are ambiguous, we'll re-reversify to the last entry in the module.
- while (!key.Error() && !module->hasEntry(&key)) {
+ while (!key.popError() && !module->hasEntry(&key)) {
key.decrement(1);
}
- cout << " Appending content to " << key.getOSISRef() << endl;
+ cout << "INFO(V11N): " << saveKey.getOSISRef()
+ << " is not in the " << key.getVersificationSystem()
+ << " versification. Appending content to " << key.getOSISRef() << endl;
}
void writeEntry(SWBuf &text, bool force = false) {
char keyOsisID[255];
- static const char* revision = "<milestone type=\"x-importer\" subType=\"x-osis2mod\" n=\"$Rev: 2562 $\"/>";
+ static const char* revision = "<milestone type=\"x-importer\" subType=\"x-osis2mod\" n=\"$Rev: 2893 $\"/>";
static bool firstOT = true;
static bool firstNT = true;
@@ -473,13 +482,13 @@ void writeEntry(SWBuf &text, bool force = false) {
static VerseKey lastKey;
lastKey.setVersificationSystem(currentVerse.getVersificationSystem());
- lastKey.AutoNormalize(0);
- lastKey.Headings(1);
+ lastKey.setAutoNormalize(0);
+ lastKey.setIntros(1);
VerseKey saveKey;
saveKey.setVersificationSystem(currentVerse.getVersificationSystem());
- saveKey.AutoNormalize(0);
- saveKey.Headings(1);
+ saveKey.setAutoNormalize(0);
+ saveKey.setIntros(1);
saveKey = currentVerse;
// If we have seen a verse and the supplied one is different then we output the collected one.
@@ -494,16 +503,16 @@ void writeEntry(SWBuf &text, bool force = false) {
prepareSWText(activeOsisID, activeVerseText);
// Put the revision into the module
- int testmt = currentVerse.Testament();
+ int testmt = currentVerse.getTestament();
if ((testmt == 1 && firstOT) || (testmt == 2 && firstNT)) {
VerseKey t;
t.setVersificationSystem(currentVerse.getVersificationSystem());
- t.AutoNormalize(0);
- t.Headings(1);
+ t.setAutoNormalize(0);
+ t.setIntros(1);
t = currentVerse;
- currentVerse.Book(0);
- currentVerse.Chapter(0);
- currentVerse.Verse(0);
+ currentVerse.setBook(0);
+ currentVerse.setChapter(0);
+ currentVerse.setVerse(0);
module->setEntry(revision);
currentVerse = t;
switch (testmt) {
@@ -534,14 +543,12 @@ void writeEntry(SWBuf &text, bool force = false) {
}
// The following is for initial verse content and for appending interverse content.
- // Eliminate leading whitespace on the beginning of each verse and
- // before we append to current content, since we just added one
- text.trimStart();
if (activeVerseText.length()) {
- activeVerseText += " ";
activeVerseText += text;
}
else {
+ // Eliminate leading whitespace on the beginning of each verse
+ text.trimStart();
activeVerseText = text;
}
// text has been consumed so clear it out.
@@ -561,8 +568,8 @@ void linkToEntry(VerseKey &linkKey, VerseKey &dest) {
VerseKey saveKey;
saveKey.setVersificationSystem(currentVerse.getVersificationSystem());
- saveKey.AutoNormalize(0);
- saveKey.Headings(1);
+ saveKey.setAutoNormalize(0);
+ saveKey.setIntros(1);
saveKey = currentVerse;
currentVerse = linkKey;
@@ -590,7 +597,7 @@ bool handleToken(SWBuf &text, XMLTag token) {
// Flags indicating whether we are processing the content of to be prepended to a verse
static bool inPreVerse = false;
-// static int genID = 1;
+ static int genID = 1;
// Flag indicating whether we are in "Words of Christ"
static bool inWOC = false;
@@ -599,6 +606,7 @@ bool handleToken(SWBuf &text, XMLTag token) {
// Flag used to indicate where useful text begins
static bool firstDiv = false;
+ static bool headerEnded = false;
// Retain the sID of book, chapter and verse (commentary) divs so that we can find them again.
// This relies on transformBSP.
@@ -639,9 +647,9 @@ bool handleToken(SWBuf &text, XMLTag token) {
}
}
- // throw away everything up to the first div
+ // throw away everything up to the first div (that is outside the header)
if (!firstDiv) {
- if (tokenName == "div") {
+ if (headerEnded && (tokenName == "div")) {
if (debug & DEBUG_OTHER) {
cout << "DEBUG(FOUND): Found first div and pitching prior material: " << text << endl;
}
@@ -670,15 +678,15 @@ bool handleToken(SWBuf &text, XMLTag token) {
cout << "\tinBookIntro = " << inBookIntro << endl;
}
- currentVerse.Testament(0);
- currentVerse.Book(0);
- currentVerse.Chapter(0);
- currentVerse.Verse(0);
+ currentVerse.setTestament(0);
+ currentVerse.setBook(0);
+ currentVerse.setChapter(0);
+ currentVerse.setVerse(0);
writeEntry(text);
}
currentVerse = token.getAttribute("osisID");
- currentVerse.Chapter(0);
- currentVerse.Verse(0);
+ currentVerse.setChapter(0);
+ currentVerse.setVerse(0);
strcpy(currentOsisID, currentVerse.getOSISRef());
sidBook = token.getAttribute("sID");
@@ -720,7 +728,7 @@ bool handleToken(SWBuf &text, XMLTag token) {
}
currentVerse = token.getAttribute("osisID");
- currentVerse.Verse(0);
+ currentVerse.setVerse(0);
if (debug & DEBUG_OTHER) {
cout << "DEBUG(FOUND): Current chapter is " << currentVerse.getOSISRef() << " (" << token.getAttribute("osisID") << ")" << endl;
@@ -769,23 +777,23 @@ bool handleToken(SWBuf &text, XMLTag token) {
// Did we have pre-verse material that needs to be marked?
if (inPreVerse) {
-// char genBuf[200];
-// sprintf(genBuf, "<div type=\"x-milestone\" subType=\"x-preverse\" eID=\"pv%d\"/>", genID++);
-// text.append(genBuf);
+ char genBuf[200];
+ sprintf(genBuf, "<div type=\"x-milestone\" subType=\"x-preverse\" eID=\"pv%d\"/>", genID++);
+ text.append(genBuf);
}
// Get osisID for verse or annotateRef for commentary
SWBuf keyVal = token.getAttribute(tokenName == "verse" ? "osisID" : "annotateRef");
- // Massage the key into a form that ParseVerseList can accept
+ // Massage the key into a form that parseVerseList can accept
prepareSWVerseKey(keyVal);
// The osisID or annotateRef can be more than a single verse
// The first or only one is the currentVerse
// Use the last verse seen (i.e. the currentVerse) as the basis for recovering from bad parsing.
// This should never happen if the references are valid OSIS references
- ListKey verseKeys = currentVerse.ParseVerseList(keyVal, currentVerse, true);
- int memberKeyCount = verseKeys.Count();
+ ListKey verseKeys = currentVerse.parseVerseList(keyVal, currentVerse, true);
+ int memberKeyCount = verseKeys.getCount();
if (memberKeyCount) {
currentVerse = verseKeys.getElement(0);
// See if this osisID or annotateRef refers to more than one verse.
@@ -795,7 +803,7 @@ bool handleToken(SWBuf &text, XMLTag token) {
// if there is only one verse.
verseKeys.setPosition(TOP);
verseKeys.increment(1);
- if (!verseKeys.Error()) {
+ if (!verseKeys.popError()) {
linkedVerses.push_back(verseKeys);
}
}
@@ -916,9 +924,9 @@ bool handleToken(SWBuf &text, XMLTag token) {
}
if (inPreVerse) {
-// char genBuf[200];
-// sprintf(genBuf, "<div type=\"x-milestone\" subType=\"x-preverse\" sID=\"pv%d\"/>", genID);
-// text.append(genBuf);
+ char genBuf[200];
+ sprintf(genBuf, "<div type=\"x-milestone\" subType=\"x-preverse\" sID=\"pv%d\"/>", genID);
+ text.append(genBuf);
}
}
@@ -958,8 +966,16 @@ bool handleToken(SWBuf &text, XMLTag token) {
}
}
- // We haven't seen the first div so there is nothing to do.
+ // We haven't seen the first div outside the header so there is little to do.
if (!firstDiv) {
+ if (tokenName == "header") {
+ headerEnded = true;
+
+ if (debug & DEBUG_OTHER) {
+ cout << "DEBUG(FOUND): End of header found" << endl;
+ }
+ }
+
// Collect the content so it can be used to suggest the module's conf.
return false;
}
@@ -1237,13 +1253,13 @@ void writeLinks()
// Link all the verses
VerseKey destKey;
destKey.setVersificationSystem(currentVerse.getVersificationSystem());
- destKey.AutoNormalize(0);
- destKey.Headings(1);
+ destKey.setAutoNormalize(0);
+ destKey.setIntros(1);
VerseKey linkKey;
linkKey.setVersificationSystem(currentVerse.getVersificationSystem());
- linkKey.AutoNormalize(0);
- linkKey.Headings(1);
+ linkKey.setAutoNormalize(0);
+ linkKey.setIntros(1);
for (unsigned int i = 0; i < linkedVerses.size(); i++) {
// The verseKeys is a list of verses
// where the first is the real verse
@@ -1253,7 +1269,7 @@ void writeLinks()
destKey = verseKeys.getElement();
verseKeys.increment(1);
- while (!verseKeys.Error()) {
+ while (!verseKeys.popError()) {
linkKey = verseKeys.getElement();
verseKeys.increment(1);
linkToEntry(linkKey, destKey);
@@ -1287,7 +1303,7 @@ void usage(const char *app, const char *error = 0) {
fprintf(stderr, "\t\t\t\t (2 bytes to store size equal 65535 characters)\n");
fprintf(stderr, " -v <v11n>\t\t specify a versification scheme to use (default is KJV)\n");
fprintf(stderr, "\t\t\t\t Note: The following are valid values for v11n:\n");
- VerseMgr *vmgr = VerseMgr::getSystemVerseMgr();
+ VersificationMgr *vmgr = VersificationMgr::getSystemVersificationMgr();
StringList av11n = vmgr->getVersificationSystems();
for (StringList::iterator loop = av11n.begin(); loop != av11n.end(); loop++) {
fprintf(stderr, "\t\t\t\t\t%s\n", (*loop).c_str());
@@ -1315,40 +1331,149 @@ void usage(const char *app, const char *error = 0) {
}
void processOSIS(istream& infile) {
+ typedef enum {
+ CS_NOT_IN_COMMENT, // or seen starting "<"
+ CS_SEEN_STARTING_EXCLAMATION,
+ CS_SEEN_STARTING_HYPHEN,
+ CS_IN_COMMENT,
+ CS_SEEN_ENDING_HYPHEN,
+ CS_SEEN_SECOND_ENDING_HYPHEN,
+ CS_SEEN_ENDING_GREATER_THAN
+ } t_commentstate;
+
activeOsisID[0] = '\0';
strcpy(currentOsisID,"N/A");
currentVerse.setVersificationSystem(v11n);
- currentVerse.AutoNormalize(0);
- currentVerse.Headings(1); // turn on mod/testmnt/book/chap headings
- currentVerse.Persist(1);
+ currentVerse.setAutoNormalize(false);
+ currentVerse.setIntros(true); // turn on mod/testmnt/book/chap headings
+ currentVerse.setPersist(true);
module->setKey(currentVerse);
module->setPosition(TOP);
SWBuf token;
SWBuf text;
+ bool incomment = false;
+ t_commentstate commentstate = CS_NOT_IN_COMMENT;
bool intoken = false;
bool inWhitespace = false;
bool seeingSpace = false;
unsigned char curChar = '\0';
while (infile.good()) {
-
- curChar = infile.get();
+
+ int possibleChar = infile.get();
// skip the character if it is bad. infile.good() will catch the problem
- if (curChar == -1) {
+ if (possibleChar == -1) {
continue;
}
+ curChar = (unsigned char) possibleChar;
+
+ // All newlines are simply whitespace
+ // Does a SWORD module actually require this?
+ if (curChar == '\n') {
+ curChar = ' ';
+ }
+
if (!intoken && curChar == '<') {
intoken = true;
token = "<";
continue;
}
+ // Handle XML comments starting with "<!--", ending with "-->"
+
+ if (intoken && !incomment) {
+ switch (commentstate) {
+ case CS_NOT_IN_COMMENT :
+ if (curChar == '!') {
+ commentstate = CS_SEEN_STARTING_EXCLAMATION;
+ token.append((char) curChar);
+ continue;
+ } else {
+ break;
+ }
+
+ case CS_SEEN_STARTING_EXCLAMATION :
+ if (curChar == '-') {
+ commentstate = CS_SEEN_STARTING_HYPHEN;
+ token.append((char) curChar);
+ continue;
+ } else {
+ commentstate = CS_NOT_IN_COMMENT;
+ break;
+ }
+
+ case CS_SEEN_STARTING_HYPHEN :
+ if (curChar == '-') {
+ incomment = true;
+ commentstate = CS_IN_COMMENT;
+ token.append((char) curChar);
+
+ if (debug & DEBUG_OTHER) {
+ cout << "DEBUG(COMMENTS): in comment" << endl;
+ }
+
+ continue;
+ } else {
+ commentstate = CS_NOT_IN_COMMENT;
+ break;
+ }
+
+ default:
+ cout << "FATAL(COMMENTS): unknown commentstate on comment start: " << commentstate << endl;
+ exit(EXIT_BAD_NESTING);
+ }
+ }
+
+ if (incomment) {
+ switch (commentstate) {
+ case CS_IN_COMMENT:
+ if (curChar == '-') {
+ commentstate = CS_SEEN_ENDING_HYPHEN;
+ continue;
+ } else {
+ // ignore the character
+ continue;
+ }
+
+ case CS_SEEN_ENDING_HYPHEN :
+ if (curChar == '-') {
+ commentstate = CS_SEEN_SECOND_ENDING_HYPHEN;
+ continue;
+ } else {
+ // ignore character
+ commentstate = CS_IN_COMMENT;
+ continue;
+ }
+
+ case CS_SEEN_SECOND_ENDING_HYPHEN :
+ if (curChar == '>') {
+ intoken = false;
+ incomment = false;
+ commentstate = CS_NOT_IN_COMMENT;
+
+ if (debug & DEBUG_OTHER) {
+ cout << "DEBUG(COMMENTS): out of comment" << endl;
+ }
+
+ continue;
+ } else {
+ // ignore character
+ commentstate = CS_IN_COMMENT;
+ continue;
+ }
+
+ default:
+ cout << "FATAL(COMMENTS): unknown commentstate on comment end: " << commentstate << endl;
+ exit(EXIT_BAD_NESTING);
+ }
+ }
+
// Outside of tokens merge adjacent whitespace
if (!intoken) {
seeingSpace = isspace(curChar)!=0;
@@ -1367,25 +1492,28 @@ void processOSIS(istream& infile) {
inWhitespace = false;
token.append('>');
// take this isalpha if out to check for bugs in text
- if ((isalpha(token[1])) || (isalpha(token[2]))) {
+ if (isalpha(token[1]) ||
+ (((token[1] == '/') || (token[1] == '?')) && isalpha(token[2]))) {
//cout << "Handle:" << token.c_str() << endl;
XMLTag t = transformBSP(token.c_str());
if (!handleToken(text, t)) {
text.append(t);
}
+ } else {
+ cout << "WARNING(PARSE): malformed token: " << token << endl;
}
continue;
}
if (intoken) {
- token.append(curChar);
+ token.append((char) curChar);
}
else {
switch (curChar) {
case '>' : text.append("&gt;"); break;
case '<' : text.append("&lt;"); break;
- default : text.append(curChar); break;
+ default : text.append((char) curChar); break;
}
}
}
@@ -1403,7 +1531,7 @@ void processOSIS(istream& infile) {
int main(int argc, char **argv) {
- fprintf(stderr, "You are running osis2mod: $Rev: 2562 $\n");
+ fprintf(stderr, "You are running osis2mod: $Rev: 2893 $\n");
// Let's test our command line arguments
if (argc < 3) {
@@ -1474,6 +1602,8 @@ int main(int argc, char **argv) {
else usage(*argv, (((SWBuf)"Unknown argument: ")+ argv[i]).c_str());
}
+ if (isCommentary) isCommentary = true; // avoid unused warning for now
+
if (compType == "ZIP") {
#ifndef EXCLUDEZLIB
compressor = new ZipCompress();
@@ -1573,7 +1703,7 @@ int main(int argc, char **argv) {
if (cipherKey.length()) {
fprintf(stderr, "Adding cipher filter with phrase: %s\n", cipherKey.c_str() );
cipherFilter = new CipherFilter(cipherKey.c_str());
- module->AddRawFilter(cipherFilter);
+ module->addRawFilter(cipherFilter);
}
if (!module->isWritable()) {
@@ -1601,6 +1731,7 @@ int main(int argc, char **argv) {
if (cipherFilter)
delete cipherFilter;
+ fprintf(stderr, "SUCCESS: %s: has finished its work and will now rest\n", program);
exit(0); // success
}