diff options
author | Roberto C. Sanchez <roberto@connexer.com> | 2014-05-12 08:21:30 -0400 |
---|---|---|
committer | Roberto C. Sanchez <roberto@connexer.com> | 2014-05-12 08:21:30 -0400 |
commit | 7a00574163029c0c2b649878c95d5acbd083564a (patch) | |
tree | c13cc5736025834df2874ed87ee8598070025ea6 /utilities/osis2mod.cpp | |
parent | b745315323de9f27538edac9453205ca70e6186e (diff) |
Imported Upstream version 1.7.2+dfsg
Diffstat (limited to 'utilities/osis2mod.cpp')
-rw-r--r-- | utilities/osis2mod.cpp | 287 |
1 files changed, 209 insertions, 78 deletions
diff --git a/utilities/osis2mod.cpp b/utilities/osis2mod.cpp index 7b937fd..eab2ee0 100644 --- a/utilities/osis2mod.cpp +++ b/utilities/osis2mod.cpp @@ -1,5 +1,10 @@ -/* - * Copyright 2009 CrossWire Bible Society (http://www.crosswire.org) +/****************************************************************************** + * + * osis2mod.cpp - Utility to import a module in OSIS format + * + * $Id: osis2mod.cpp 2893 2013-07-16 03:07:02Z scribe $ + * + * Copyright 2003-2013 CrossWire Bible Society (http://www.crosswire.org) * CrossWire Bible Society * P. O. Box 2528 * Tempe, AZ 85280-2528 @@ -96,8 +101,8 @@ static bool inCanonicalOSISBook = true; // osisID is for a book that is not in S static bool normalize = true; // Whether to normalize UTF-8 to NFC bool isOSISAbbrev(const char *buf) { - VerseMgr *vmgr = VerseMgr::getSystemVerseMgr(); - const VerseMgr::System *av11n = vmgr->getVersificationSystem(currentVerse.getVersificationSystem()); + VersificationMgr *vmgr = VersificationMgr::getSystemVersificationMgr(); + const VersificationMgr::System *av11n = vmgr->getVersificationSystem(currentVerse.getVersificationSystem()); return av11n->getBookNumberByOSISName(buf) >= 0; } @@ -352,13 +357,13 @@ bool isValidRef(const char *buf) { // And set it to the reference under question VerseKey before; before.setVersificationSystem(currentVerse.getVersificationSystem()); - before.AutoNormalize(0); - before.Headings(1); + before.setAutoNormalize(false); + before.setIntros(true); before.setText(buf); // If we are a heading we must bail // These will autonormalize to the last verse of the prior chapter - if (!before.Testament() || !before.Book() || !before.Chapter() || !before.Verse()) { + if (!before.getTestament() || !before.getBook() || !before.getChapter() || !before.getVerse()) { return true; } @@ -366,7 +371,7 @@ bool isValidRef(const char *buf) { // And set it to the reference under question VerseKey after; after.setVersificationSystem(currentVerse.getVersificationSystem()); - after.AutoNormalize(1); + after.setAutoNormalize(true); after.setText(buf); if (before == after) @@ -375,8 +380,7 @@ bool isValidRef(const char *buf) { } // If we have gotten here the reference is not in the selected versification. - cout << "INFO(V11N): " << before << " is not in the " << currentVerse.getVersificationSystem() << " versification." << endl; - + // cout << "INFO(V11N): " << before << " is not in the " << currentVerse.getVersificationSystem() << " versification." << endl; if (debug & DEBUG_REV11N) { cout << "DEBUG(V11N): " << before << " normalizes to " << after << endl; } @@ -409,25 +413,28 @@ bool isValidRef(const char *buf) { * param key the key that may need to be adjusted */ void makeValidRef(VerseKey &key) { + VerseKey saveKey; + saveKey.setVersificationSystem(currentVerse.getVersificationSystem()); + saveKey.setAutoNormalize(false); + saveKey.setIntros(true); + saveKey = currentVerse; - int chapterMax = key.getChapterMax(); - int verseMax = key.getVerseMax(); - - if (debug & DEBUG_REV11N) { - cout << "DEBUG(V11N) Chapter max:" << chapterMax << ", Verse Max:" << verseMax << endl; - } - - cout << "INFO(V11N): " << key.getOSISRef() << " is not in the " << key.getVersificationSystem() << " versification."; // Since isValidRef returned false constrain the key to the nearest prior reference. // If we are past the last chapter set the reference to the last chapter - if (key.Chapter() > chapterMax) { - key.Chapter(chapterMax); + int chapterMax = key.getChapterMax(); + if (key.getChapter() > chapterMax) { + key.setChapter(chapterMax); } // Either we set the chapter to the last chapter and now need to set to the last verse in the chapter // Or the verse is beyond the end of the chapter. // In any case we need to constrain the verse to it's chapter. - key.Verse(verseMax); + int verseMax = key.getVerseMax(); + key.setVerse(verseMax); + + if (debug & DEBUG_REV11N) { + cout << "DEBUG(V11N) Chapter max:" << chapterMax << ", Verse Max:" << verseMax << endl; + } // There are three cases we want to handle: // In the examples we are using the KJV versification where the last verse of Matt.7 is Matt.7.29. @@ -446,17 +453,19 @@ void makeValidRef(VerseKey &key) { // In this case we should re-versify Matt.7.30 as Matt.7.29. // However, since this and 2) are ambiguous, we'll re-reversify to the last entry in the module. - while (!key.Error() && !module->hasEntry(&key)) { + while (!key.popError() && !module->hasEntry(&key)) { key.decrement(1); } - cout << " Appending content to " << key.getOSISRef() << endl; + cout << "INFO(V11N): " << saveKey.getOSISRef() + << " is not in the " << key.getVersificationSystem() + << " versification. Appending content to " << key.getOSISRef() << endl; } void writeEntry(SWBuf &text, bool force = false) { char keyOsisID[255]; - static const char* revision = "<milestone type=\"x-importer\" subType=\"x-osis2mod\" n=\"$Rev: 2562 $\"/>"; + static const char* revision = "<milestone type=\"x-importer\" subType=\"x-osis2mod\" n=\"$Rev: 2893 $\"/>"; static bool firstOT = true; static bool firstNT = true; @@ -473,13 +482,13 @@ void writeEntry(SWBuf &text, bool force = false) { static VerseKey lastKey; lastKey.setVersificationSystem(currentVerse.getVersificationSystem()); - lastKey.AutoNormalize(0); - lastKey.Headings(1); + lastKey.setAutoNormalize(0); + lastKey.setIntros(1); VerseKey saveKey; saveKey.setVersificationSystem(currentVerse.getVersificationSystem()); - saveKey.AutoNormalize(0); - saveKey.Headings(1); + saveKey.setAutoNormalize(0); + saveKey.setIntros(1); saveKey = currentVerse; // If we have seen a verse and the supplied one is different then we output the collected one. @@ -494,16 +503,16 @@ void writeEntry(SWBuf &text, bool force = false) { prepareSWText(activeOsisID, activeVerseText); // Put the revision into the module - int testmt = currentVerse.Testament(); + int testmt = currentVerse.getTestament(); if ((testmt == 1 && firstOT) || (testmt == 2 && firstNT)) { VerseKey t; t.setVersificationSystem(currentVerse.getVersificationSystem()); - t.AutoNormalize(0); - t.Headings(1); + t.setAutoNormalize(0); + t.setIntros(1); t = currentVerse; - currentVerse.Book(0); - currentVerse.Chapter(0); - currentVerse.Verse(0); + currentVerse.setBook(0); + currentVerse.setChapter(0); + currentVerse.setVerse(0); module->setEntry(revision); currentVerse = t; switch (testmt) { @@ -534,14 +543,12 @@ void writeEntry(SWBuf &text, bool force = false) { } // The following is for initial verse content and for appending interverse content. - // Eliminate leading whitespace on the beginning of each verse and - // before we append to current content, since we just added one - text.trimStart(); if (activeVerseText.length()) { - activeVerseText += " "; activeVerseText += text; } else { + // Eliminate leading whitespace on the beginning of each verse + text.trimStart(); activeVerseText = text; } // text has been consumed so clear it out. @@ -561,8 +568,8 @@ void linkToEntry(VerseKey &linkKey, VerseKey &dest) { VerseKey saveKey; saveKey.setVersificationSystem(currentVerse.getVersificationSystem()); - saveKey.AutoNormalize(0); - saveKey.Headings(1); + saveKey.setAutoNormalize(0); + saveKey.setIntros(1); saveKey = currentVerse; currentVerse = linkKey; @@ -590,7 +597,7 @@ bool handleToken(SWBuf &text, XMLTag token) { // Flags indicating whether we are processing the content of to be prepended to a verse static bool inPreVerse = false; -// static int genID = 1; + static int genID = 1; // Flag indicating whether we are in "Words of Christ" static bool inWOC = false; @@ -599,6 +606,7 @@ bool handleToken(SWBuf &text, XMLTag token) { // Flag used to indicate where useful text begins static bool firstDiv = false; + static bool headerEnded = false; // Retain the sID of book, chapter and verse (commentary) divs so that we can find them again. // This relies on transformBSP. @@ -639,9 +647,9 @@ bool handleToken(SWBuf &text, XMLTag token) { } } - // throw away everything up to the first div + // throw away everything up to the first div (that is outside the header) if (!firstDiv) { - if (tokenName == "div") { + if (headerEnded && (tokenName == "div")) { if (debug & DEBUG_OTHER) { cout << "DEBUG(FOUND): Found first div and pitching prior material: " << text << endl; } @@ -670,15 +678,15 @@ bool handleToken(SWBuf &text, XMLTag token) { cout << "\tinBookIntro = " << inBookIntro << endl; } - currentVerse.Testament(0); - currentVerse.Book(0); - currentVerse.Chapter(0); - currentVerse.Verse(0); + currentVerse.setTestament(0); + currentVerse.setBook(0); + currentVerse.setChapter(0); + currentVerse.setVerse(0); writeEntry(text); } currentVerse = token.getAttribute("osisID"); - currentVerse.Chapter(0); - currentVerse.Verse(0); + currentVerse.setChapter(0); + currentVerse.setVerse(0); strcpy(currentOsisID, currentVerse.getOSISRef()); sidBook = token.getAttribute("sID"); @@ -720,7 +728,7 @@ bool handleToken(SWBuf &text, XMLTag token) { } currentVerse = token.getAttribute("osisID"); - currentVerse.Verse(0); + currentVerse.setVerse(0); if (debug & DEBUG_OTHER) { cout << "DEBUG(FOUND): Current chapter is " << currentVerse.getOSISRef() << " (" << token.getAttribute("osisID") << ")" << endl; @@ -769,23 +777,23 @@ bool handleToken(SWBuf &text, XMLTag token) { // Did we have pre-verse material that needs to be marked? if (inPreVerse) { -// char genBuf[200]; -// sprintf(genBuf, "<div type=\"x-milestone\" subType=\"x-preverse\" eID=\"pv%d\"/>", genID++); -// text.append(genBuf); + char genBuf[200]; + sprintf(genBuf, "<div type=\"x-milestone\" subType=\"x-preverse\" eID=\"pv%d\"/>", genID++); + text.append(genBuf); } // Get osisID for verse or annotateRef for commentary SWBuf keyVal = token.getAttribute(tokenName == "verse" ? "osisID" : "annotateRef"); - // Massage the key into a form that ParseVerseList can accept + // Massage the key into a form that parseVerseList can accept prepareSWVerseKey(keyVal); // The osisID or annotateRef can be more than a single verse // The first or only one is the currentVerse // Use the last verse seen (i.e. the currentVerse) as the basis for recovering from bad parsing. // This should never happen if the references are valid OSIS references - ListKey verseKeys = currentVerse.ParseVerseList(keyVal, currentVerse, true); - int memberKeyCount = verseKeys.Count(); + ListKey verseKeys = currentVerse.parseVerseList(keyVal, currentVerse, true); + int memberKeyCount = verseKeys.getCount(); if (memberKeyCount) { currentVerse = verseKeys.getElement(0); // See if this osisID or annotateRef refers to more than one verse. @@ -795,7 +803,7 @@ bool handleToken(SWBuf &text, XMLTag token) { // if there is only one verse. verseKeys.setPosition(TOP); verseKeys.increment(1); - if (!verseKeys.Error()) { + if (!verseKeys.popError()) { linkedVerses.push_back(verseKeys); } } @@ -916,9 +924,9 @@ bool handleToken(SWBuf &text, XMLTag token) { } if (inPreVerse) { -// char genBuf[200]; -// sprintf(genBuf, "<div type=\"x-milestone\" subType=\"x-preverse\" sID=\"pv%d\"/>", genID); -// text.append(genBuf); + char genBuf[200]; + sprintf(genBuf, "<div type=\"x-milestone\" subType=\"x-preverse\" sID=\"pv%d\"/>", genID); + text.append(genBuf); } } @@ -958,8 +966,16 @@ bool handleToken(SWBuf &text, XMLTag token) { } } - // We haven't seen the first div so there is nothing to do. + // We haven't seen the first div outside the header so there is little to do. if (!firstDiv) { + if (tokenName == "header") { + headerEnded = true; + + if (debug & DEBUG_OTHER) { + cout << "DEBUG(FOUND): End of header found" << endl; + } + } + // Collect the content so it can be used to suggest the module's conf. return false; } @@ -1237,13 +1253,13 @@ void writeLinks() // Link all the verses VerseKey destKey; destKey.setVersificationSystem(currentVerse.getVersificationSystem()); - destKey.AutoNormalize(0); - destKey.Headings(1); + destKey.setAutoNormalize(0); + destKey.setIntros(1); VerseKey linkKey; linkKey.setVersificationSystem(currentVerse.getVersificationSystem()); - linkKey.AutoNormalize(0); - linkKey.Headings(1); + linkKey.setAutoNormalize(0); + linkKey.setIntros(1); for (unsigned int i = 0; i < linkedVerses.size(); i++) { // The verseKeys is a list of verses // where the first is the real verse @@ -1253,7 +1269,7 @@ void writeLinks() destKey = verseKeys.getElement(); verseKeys.increment(1); - while (!verseKeys.Error()) { + while (!verseKeys.popError()) { linkKey = verseKeys.getElement(); verseKeys.increment(1); linkToEntry(linkKey, destKey); @@ -1287,7 +1303,7 @@ void usage(const char *app, const char *error = 0) { fprintf(stderr, "\t\t\t\t (2 bytes to store size equal 65535 characters)\n"); fprintf(stderr, " -v <v11n>\t\t specify a versification scheme to use (default is KJV)\n"); fprintf(stderr, "\t\t\t\t Note: The following are valid values for v11n:\n"); - VerseMgr *vmgr = VerseMgr::getSystemVerseMgr(); + VersificationMgr *vmgr = VersificationMgr::getSystemVersificationMgr(); StringList av11n = vmgr->getVersificationSystems(); for (StringList::iterator loop = av11n.begin(); loop != av11n.end(); loop++) { fprintf(stderr, "\t\t\t\t\t%s\n", (*loop).c_str()); @@ -1315,40 +1331,149 @@ void usage(const char *app, const char *error = 0) { } void processOSIS(istream& infile) { + typedef enum { + CS_NOT_IN_COMMENT, // or seen starting "<" + CS_SEEN_STARTING_EXCLAMATION, + CS_SEEN_STARTING_HYPHEN, + CS_IN_COMMENT, + CS_SEEN_ENDING_HYPHEN, + CS_SEEN_SECOND_ENDING_HYPHEN, + CS_SEEN_ENDING_GREATER_THAN + } t_commentstate; + activeOsisID[0] = '\0'; strcpy(currentOsisID,"N/A"); currentVerse.setVersificationSystem(v11n); - currentVerse.AutoNormalize(0); - currentVerse.Headings(1); // turn on mod/testmnt/book/chap headings - currentVerse.Persist(1); + currentVerse.setAutoNormalize(false); + currentVerse.setIntros(true); // turn on mod/testmnt/book/chap headings + currentVerse.setPersist(true); module->setKey(currentVerse); module->setPosition(TOP); SWBuf token; SWBuf text; + bool incomment = false; + t_commentstate commentstate = CS_NOT_IN_COMMENT; bool intoken = false; bool inWhitespace = false; bool seeingSpace = false; unsigned char curChar = '\0'; while (infile.good()) { - - curChar = infile.get(); + + int possibleChar = infile.get(); // skip the character if it is bad. infile.good() will catch the problem - if (curChar == -1) { + if (possibleChar == -1) { continue; } + curChar = (unsigned char) possibleChar; + + // All newlines are simply whitespace + // Does a SWORD module actually require this? + if (curChar == '\n') { + curChar = ' '; + } + if (!intoken && curChar == '<') { intoken = true; token = "<"; continue; } + // Handle XML comments starting with "<!--", ending with "-->" + + if (intoken && !incomment) { + switch (commentstate) { + case CS_NOT_IN_COMMENT : + if (curChar == '!') { + commentstate = CS_SEEN_STARTING_EXCLAMATION; + token.append((char) curChar); + continue; + } else { + break; + } + + case CS_SEEN_STARTING_EXCLAMATION : + if (curChar == '-') { + commentstate = CS_SEEN_STARTING_HYPHEN; + token.append((char) curChar); + continue; + } else { + commentstate = CS_NOT_IN_COMMENT; + break; + } + + case CS_SEEN_STARTING_HYPHEN : + if (curChar == '-') { + incomment = true; + commentstate = CS_IN_COMMENT; + token.append((char) curChar); + + if (debug & DEBUG_OTHER) { + cout << "DEBUG(COMMENTS): in comment" << endl; + } + + continue; + } else { + commentstate = CS_NOT_IN_COMMENT; + break; + } + + default: + cout << "FATAL(COMMENTS): unknown commentstate on comment start: " << commentstate << endl; + exit(EXIT_BAD_NESTING); + } + } + + if (incomment) { + switch (commentstate) { + case CS_IN_COMMENT: + if (curChar == '-') { + commentstate = CS_SEEN_ENDING_HYPHEN; + continue; + } else { + // ignore the character + continue; + } + + case CS_SEEN_ENDING_HYPHEN : + if (curChar == '-') { + commentstate = CS_SEEN_SECOND_ENDING_HYPHEN; + continue; + } else { + // ignore character + commentstate = CS_IN_COMMENT; + continue; + } + + case CS_SEEN_SECOND_ENDING_HYPHEN : + if (curChar == '>') { + intoken = false; + incomment = false; + commentstate = CS_NOT_IN_COMMENT; + + if (debug & DEBUG_OTHER) { + cout << "DEBUG(COMMENTS): out of comment" << endl; + } + + continue; + } else { + // ignore character + commentstate = CS_IN_COMMENT; + continue; + } + + default: + cout << "FATAL(COMMENTS): unknown commentstate on comment end: " << commentstate << endl; + exit(EXIT_BAD_NESTING); + } + } + // Outside of tokens merge adjacent whitespace if (!intoken) { seeingSpace = isspace(curChar)!=0; @@ -1367,25 +1492,28 @@ void processOSIS(istream& infile) { inWhitespace = false; token.append('>'); // take this isalpha if out to check for bugs in text - if ((isalpha(token[1])) || (isalpha(token[2]))) { + if (isalpha(token[1]) || + (((token[1] == '/') || (token[1] == '?')) && isalpha(token[2]))) { //cout << "Handle:" << token.c_str() << endl; XMLTag t = transformBSP(token.c_str()); if (!handleToken(text, t)) { text.append(t); } + } else { + cout << "WARNING(PARSE): malformed token: " << token << endl; } continue; } if (intoken) { - token.append(curChar); + token.append((char) curChar); } else { switch (curChar) { case '>' : text.append(">"); break; case '<' : text.append("<"); break; - default : text.append(curChar); break; + default : text.append((char) curChar); break; } } } @@ -1403,7 +1531,7 @@ void processOSIS(istream& infile) { int main(int argc, char **argv) { - fprintf(stderr, "You are running osis2mod: $Rev: 2562 $\n"); + fprintf(stderr, "You are running osis2mod: $Rev: 2893 $\n"); // Let's test our command line arguments if (argc < 3) { @@ -1474,6 +1602,8 @@ int main(int argc, char **argv) { else usage(*argv, (((SWBuf)"Unknown argument: ")+ argv[i]).c_str()); } + if (isCommentary) isCommentary = true; // avoid unused warning for now + if (compType == "ZIP") { #ifndef EXCLUDEZLIB compressor = new ZipCompress(); @@ -1573,7 +1703,7 @@ int main(int argc, char **argv) { if (cipherKey.length()) { fprintf(stderr, "Adding cipher filter with phrase: %s\n", cipherKey.c_str() ); cipherFilter = new CipherFilter(cipherKey.c_str()); - module->AddRawFilter(cipherFilter); + module->addRawFilter(cipherFilter); } if (!module->isWritable()) { @@ -1601,6 +1731,7 @@ int main(int argc, char **argv) { if (cipherFilter) delete cipherFilter; + fprintf(stderr, "SUCCESS: %s: has finished its work and will now rest\n", program); exit(0); // success } |