of type acrostic, psalm or no type, will be taken as a title for the verse. // A <title> of type main or chapter will be seen as a chapter title. // 3) Between verses, the material is split between the prior verse and the next verse. // Basically, while end and empty tags are found, they belong to the prior verse. // Once a begin tag is found, it belongs to the next verse. // If the title has an attribute type of "main" or "chapter" // it belongs to its <div> or <chapter> and is treated as part of its heading // Otherwise if it a title in a chapter before the first the first verse it // is put into the verse as a preverse title. if (!inPreVerse && !inBookHeader) { if (inChapterHeader) { // Determine when we are no longer in a chapter heading, but in pre-verse material: // If we see one of the following: // a section div // a title that is not main or chapter if ((!strcmp(tokenName, "div") && (typeAttr && !strcmp(typeAttr, "section"))) || (!strcmp(tokenName, "title") && (!typeAttr || (strcmp(typeAttr, "main") && strcmp(typeAttr, "chapter")))) ) { // Since we have found the boundary, we need to write out the chapter heading writeEntry(text); // And we are no longer in the chapter heading inChapterHeader = false; // But rather, we are now in pre-verse material inPreVerse = true; } } else if (!inVerse && inChapter) { inPreVerse = true; } if (inPreVerse) { char genBuf[200]; sprintf(genBuf, "<div type=\"x-milestone\" subType=\"x-preverse\" sID=\"pv%d\"/>", genID++); text.append(genBuf); } } #ifdef DEBUG if (debug & DEBUG_INTERVERSE) { if (!inVerse && !inBookHeader && !inChapterHeader) { cout << "DEBUG(INTERVERSE): " << currentOsisID << ": interverse start token " << token << ":" << text.c_str() << endl; } } #endif return false; } // Done with procesing start and empty tags // Process end tags else { if (tagStack.empty()) { cout << "FATAL(NESTING): " << currentOsisID << ": tag expected" << endl; exit(EXIT_BAD_NESTING); } // Note: empty end tags have the eID attribute if (!token.isEmpty()) { XMLTag topToken = tagStack.top(); tagDepth = tagStack.size(); #ifdef DEBUG if (debug & DEBUG_STACK) { cout << "DEBUG(STACK): " << currentOsisID << ": pop(" << tagDepth << ") " << topToken.getName() << endl; } #endif tagStack.pop(); if (strcmp(topToken.getName(), tokenName)) { cout << "FATAL(NESTING): " << currentOsisID << ": Expected " << topToken.getName() << " found " << tokenName << endl; // exit(EXIT_BAD_NESTING); // (OSK) I'm sure this validity check is a good idea, but there's a but somewhere that's killing the converter here. // So I'm disabling this line. Unvalidated OSIS files shouldn't be run through the converter anyway. // (DM) This has nothing to do with well-form or valid. It checks milestoned elements for proper nesting. } } // We haven't seen the first div so there is nothing to do. if (!firstDiv) { // Collect the content so it can be used to suggest the module's conf. return false; } // VERSE and COMMENTARY END if (!strcmp(tokenName, "verse") || (inVerse && !strcmp(tokenName, "div"))) { if (tagDepth != verseDepth) { cout << "WARNING(NESTING): verse " << currentOsisID << " is not well formed:(" << verseDepth << "," << tagDepth << ")" << endl; } // If we are in WOC then we need to terminate the <q who="Jesus" marker=""> that was added earlier in the verse. if (inWOC) { text.append("</q>"); } // Include the token if it is not a verse if (strcmp(tokenName, "verse")) { text.append(token); } #ifdef DEBUG else if (debug & DEBUG_VERSE) { // transform the verse into a milestone XMLTag t = "<milestone resp=\"v\" />"; // copy all the attributes of the verse element to the milestone StringList attrNames = token.getAttributeNames(); for (StringList::iterator loop = attrNames.begin(); loop != attrNames.end(); loop++) { const char* attr = (*loop).c_str(); t.setAttribute(attr, token.getAttribute(attr)); } text.append(t); } #endif writeEntry(text); inVerse = false; inPreVerse = false; verseDepth = 0; return true; } // Handle WOC quotes. // Note this requires transformBSP to make them into milestones // Otherwise have to manage it here if (!strcmp(tokenName, "q")) { XMLTag topToken = quoteStack.top(); #ifdef DEBUG if (debug & DEBUG_QUOTE) { cout << "DEBUG(QUOTE): " << currentOsisID << ": quote pop(" << quoteStack.size() << ") " << topToken << " -- " << token << endl; } #endif quoteStack.pop(); // If we have found an end tag for a <q who="Jesus"> then we are done with the WOC // and we need to terminate the <q who="Jesus" marker=""> that was added earlier in the verse. if (token.getAttribute("who") && !strcmp(token.getAttribute("who"), "Jesus")) { #ifdef DEBUG if (debug & DEBUG_QUOTE) { cout << "DEBUG(QUOTE): " << currentOsisID << ": (" << quoteStack.size() << ") " << topToken << " -- " << token << endl; } #endif inWOC = false; const char *sID = topToken.getAttribute("sID"); const char *eID = token.getAttribute("eID"); if (!sID) { sID = ""; } if (!eID) { eID = ""; } if (strcmp(sID, eID)) { cout << "ERROR(NESTING): improper nesting " << currentOsisID << ": matching (sID,eID) not found. Looking at (" << sID << "," << eID << ")" << endl; } // Output the quotation mark if appropriate, inside the WOC. // If there is no marker attribute, let the SWORD engine manufacture one. // If there is a marker attribute and it has content, then output that. // If the marker attribute is present and empty, then there is nothing to do. // And have it within the WOC markup if (!token.getAttribute("marker") || token.getAttribute("marker")[0]) { token.setAttribute("who", 0); // remove the who="Jesus" text.append(token); } // Now close the WOC text.append("</q>"); return true; } return false; } // Look for the end of document, book and chapter // Also for material that goes with last entry if (!inVerse && !inBookHeader && !inChapterHeader) { // Is this the end of a chapter. if (tagDepth == chapterDepth && (!strcmp(tokenName, "div") || !strcmp(tokenName, "chapter"))) { text.append(token); writeEntry(text); inChapter = false; chapterDepth = 0; verseDepth = 0; return true; } // Is it the end of a book if (tagDepth == bookDepth && (!strcmp(tokenName, "div"))) { text.append(token); writeEntry(text); bookDepth = 0; chapterDepth = 0; verseDepth = 0; return true; } // Do not include the end of an osis document if (!strcmp(tokenName, "osisText") || !strcmp(tokenName, "osis")) { bookDepth = 0; chapterDepth = 0; verseDepth = 0; text = ""; return true; } // When we are not inPreVerse, the interverse tags get appended to the preceeding verse. if (!inPreVerse) { text.append(token); writeEntry(text); #ifdef DEBUG if (debug & DEBUG_INTERVERSE) { cout << "DEBUG(INTERVERSE): " << currentOsisID << ": appending interverse end tag: " << tokenName << "(" << tagDepth << "," << chapterDepth << "," << bookDepth << ")" << endl; } #endif return true; } #ifdef DEBUG if (debug & DEBUG_INTERVERSE) { cout << "DEBUG(INTERVERSE): " << currentOsisID << ": interverse end tag: " << tokenName << "(" << tagDepth << "," << chapterDepth << "," << bookDepth << ")" << endl; } #endif return false; } return false; } // done with Processing end tags return false; } /** * Support normalizations necessary for a SWORD module. * OSIS allows for document structure (Book, Section, Paragraph or BSP) * to overlap Bible versification (Book, Chapter, Verse). * Most SWORD applications need to display verses in isolation or in HTML table cells, * requiring each stored entry (i.e. verses) to be well-formed xml. * This routine normalizes container elements which could cross verse boundaries into milestones. * For most of these OSIS elements, there is a milestone form. However, p is not milestoneable. * For this reason, p is transformed into lb elements. * param t the tag to transform * return the transformed tag or the original one */ XMLTag transformBSP(XMLTag t) { static std::stack<XMLTag> bspTagStack; static int sID = 1; char buf[11]; // Support simplification transformations if (t.isEmpty()) { #ifdef DEBUG if (debug & DEBUG_XFORM) { cout << "DEBUG(XFORM): " << currentOsisID << ": xform empty " << t << endl; } #endif return t; } const char* tagName = t.getName(); if (!t.isEndTag()) { // Transform <p> into <div type="paragraph"> and milestone it if (!strcmp(tagName, "p")) { t.setText("<div type=\"paragraph\" />"); sprintf(buf, "gen%d", sID++); t.setAttribute("sID", buf); } // Transform <tag> into <tag sID="">, where tag is a milestoneable element. // The following containers are milestoneable. // abbr, closer, div, foreign, l, lg, salute, signed, speech // Leaving out: // abbr When would this ever cross a boundary? // seg as it is used for a divineName hack // foreign so that it can be easily italicized else if (!strcmp(tagName, "chapter") || !strcmp(tagName, "closer") || !strcmp(tagName, "div") || !strcmp(tagName, "l") || !strcmp(tagName, "lg") || !strcmp(tagName, "q") || !strcmp(tagName, "salute") || !strcmp(tagName, "signed") || !strcmp(tagName, "speech") || !strcmp(tagName, "verse") ) { t.setEmpty(true); sprintf(buf, "gen%d", sID++); t.setAttribute("sID", buf); } bspTagStack.push(t); #ifdef DEBUG if (debug & DEBUG_XFORM) { cout << "DEBUG(XFORM): " << currentOsisID << ": xform push (" << bspTagStack.size() << ") " << t << " (tagname=" << tagName << ")" << endl; XMLTag topToken = bspTagStack.top(); cout << "DEBUG(XFORM): " << currentOsisID << ": xform top(" << bspTagStack.size() << ") " << topToken << endl; } #endif } else { XMLTag topToken = bspTagStack.top(); #ifdef DEBUG if (debug & DEBUG_XFORM) { cout << "DEBUG(XFORM): " << currentOsisID << ": xform pop(" << bspTagStack.size() << ") " << topToken << endl; } #endif bspTagStack.pop(); // Look for the milestoneable container tags handled above. if (!strcmp(tagName, "chapter") || !strcmp(tagName, "closer") || !strcmp(tagName, "div") || !strcmp(tagName, "l") || !strcmp(tagName, "lg") || !strcmp(tagName, "p") || !strcmp(tagName, "q") || !strcmp(tagName, "salute") || !strcmp(tagName, "signed") || !strcmp(tagName, "speech") || !strcmp(tagName, "verse") ) { // make this a clone of the start tag with sID changed to eID // Note: in the case of </p> the topToken is a <div type="paragraph"> t = topToken; t.setAttribute("eID", t.getAttribute("sID")); t.setAttribute("sID", 0); } } return t; } /** * Write out all links in the module. * Waiting is necessary because writeEntry might ultimately append * text to a verse moving it's offset in the data file. * While we are minimizing it by postponing the write until we have * gathered the next verse, the following scenario is happening: * A module is using linked verses and has some verses that are not * in the chosen versification. If the out-of-canon verse happens following * a linked verse, the out-of-canon verse is appended to the prior * verse. Care has to be taken that the linked verses all point to * the first of the set. */ void writeLinks() { // Link all the verses VerseKey destKey; destKey.setVersificationSystem(currentVerse.getVersificationSystem()); destKey.AutoNormalize(0); destKey.Headings(1); VerseKey linkKey; linkKey.setVersificationSystem(currentVerse.getVersificationSystem()); linkKey.AutoNormalize(0); linkKey.Headings(1); for (unsigned int i = 0; i < linkedVerses.size(); i++) { // The verseKeys is a list of verses // where the first is the real verse // and the others link to it. ListKey verseKeys = linkedVerses[i]; verseKeys.setPosition(TOP); destKey = verseKeys.getElement(); verseKeys.increment(1); while (!verseKeys.Error()) { linkKey = verseKeys.getElement(); verseKeys.increment(1); linkToEntry(linkKey, destKey); } } } void usage(const char *app, const char *error = 0) { if (error) fprintf(stderr, "\n%s: %s\n", app, error); fprintf(stderr, "\nusage: %s <output/path> <osisDoc> [OPTIONS]\n", app); fprintf(stderr, " <output/path>\t\t an existing folder that the module will be written\n"); fprintf(stderr, " <osisDoc>\t\t path to the validated OSIS document, or '-' to read from standard input\n"); fprintf(stderr, " -a\t\t\t augment module if exists (default is to create new)\n"); fprintf(stderr, " -z\t\t\t use ZIP compression (default no compression)\n"); fprintf(stderr, " -Z\t\t\t use LZSS compression (default no compression)\n"); fprintf(stderr, " -b <2|3|4>\t\t compression block size (default 4):\n"); fprintf(stderr, "\t\t\t\t 2 - verse; 3 - chapter; 4 - book\n"); fprintf(stderr, " -c <cipher_key>\t encipher module using supplied key\n"); fprintf(stderr, "\t\t\t\t (default no enciphering)\n"); fprintf(stderr, " -N\t\t\t do not convert UTF-8 or normalize UTF-8 to NFC\n"); fprintf(stderr, "\t\t\t\t (default is to convert to UTF-8, if needed,\n"); fprintf(stderr, "\t\t\t\t and then normalize to NFC)\n"); fprintf(stderr, "\t\t\t\t Note: UTF-8 texts should be normalized to NFC.\n"); fprintf(stderr, " -s <2|4>\t\t max text size per entry (default is 2).\n"); fprintf(stderr, "\t\t\t\t Note: useful for commentaries with very large entries\n"); fprintf(stderr, "\t\t\t\t in uncompressed modules (default is 65535 bytes)\n"); fprintf(stderr, " -v <v11n>\t\t specify a versification scheme to use (default is KJV)\n"); fprintf(stderr, "\t\t\t\t Note: The following are valid values for v11n:\n"); VerseMgr *vmgr = VerseMgr::getSystemVerseMgr(); StringList av11n = vmgr->getVersificationSystems(); for (StringList::iterator loop = av11n.begin(); loop != av11n.end(); loop++) { fprintf(stderr, "\t\t\t\t\t%s\n", (*loop).c_str()); } #ifdef DEBUG fprintf(stderr, " -d <flags>\t\t turn on debugging (default is 0)\n"); fprintf(stderr, "\t\t\t\t Note: This flag may change in the future.\n"); fprintf(stderr, "\t\t\t\t Flags: The following are valid values:\n"); fprintf(stderr, "\t\t\t\t\t0 - no debugging\n"); fprintf(stderr, "\t\t\t\t\t1 - writes to module, very verbose\n"); fprintf(stderr, "\t\t\t\t\t2 - verse start and end\n"); fprintf(stderr, "\t\t\t\t\t4 - quotes, especially Words of Christ (WOC)\n"); fprintf(stderr, "\t\t\t\t\t8 - titles\n"); fprintf(stderr, "\t\t\t\t\t16 - inter-verse material\n"); fprintf(stderr, "\t\t\t\t\t32 - BSP to BCV transformations\n"); fprintf(stderr, "\t\t\t\t\t64 - v11n exceptions\n"); fprintf(stderr, "\t\t\t\t\t128 - parsing of osisID and osisRef\n"); fprintf(stderr, "\t\t\t\t\t256 - internal stack\n"); fprintf(stderr, "\t\t\t\t\t512 - miscellaneous\n"); fprintf(stderr, "\t\t\t\t This flag can be used more than once.\n"); #endif fprintf(stderr, "\n"); fprintf(stderr, "See http://www.crosswire.org/wiki/osis2mod for more details.\n"); fprintf(stderr, "\n"); exit(EXIT_BAD_ARG); } void processOSIS(istream& infile) { activeOsisID[0] = '\0'; strcpy(currentOsisID,"N/A"); currentVerse.setVersificationSystem(v11n); currentVerse.AutoNormalize(0); currentVerse.Headings(1); // turn on mod/testmnt/book/chap headings currentVerse.Persist(1); module->setKey(currentVerse); module->setPosition(TOP); SWBuf token; SWBuf text; bool intoken = false; bool inWhitespace = false; bool seeingSpace = false; char curChar = '\0'; while (infile.good()) { curChar = infile.get(); // skip the character if it is bad. infile.good() will catch the problem if (curChar == -1) { continue; } if (!intoken && curChar == '<') { intoken = true; token = "<"; continue; } // Outside of tokens merge adjacent whitespace if (!intoken) { seeingSpace = isspace(curChar); if (seeingSpace) { if (inWhitespace) { continue; } // convert all whitespace to blanks curChar = ' '; } inWhitespace = seeingSpace; } if (intoken && curChar == '>') { intoken = false; inWhitespace = false; token.append('>'); // take this isalpha if out to check for bugs in text if ((isalpha(token[1])) || (isalpha(token[2]))) { //cout << "Handle:" << token.c_str() << endl; XMLTag t = transformBSP(token.c_str()); if (!handleToken(text, t)) { text.append(t); } } continue; } if (intoken) { token.append(curChar); } else { switch (curChar) { case '>' : text.append(">"); break; case '<' : text.append("<"); break; default : text.append(curChar); break; } } } // Force the last entry from the text buffer. text = ""; writeEntry(text, true); writeLinks(); #ifdef _ICU_ if (converted) fprintf(stderr, "osis2mod converted %d verses to UTF-8\n", converted); if (normalized) fprintf(stderr, "osis2mod normalized %d verses to NFC\n", normalized); #endif } int main(int argc, char **argv) { fprintf(stderr, "You are running osis2mod: $Rev: 2400 $\n"); // Let's test our command line arguments if (argc < 3) { usage(*argv); } // variables for arguments, holding defaults const char* program = argv[0]; const char* path = argv[1]; const char* osisDoc = argv[2]; int append = 0; SWBuf compType = ""; bool isCommentary = false; int iType = 4; int entrySize = 0; SWBuf cipherKey = ""; SWCompress *compressor = 0; for (int i = 3; i < argc; i++) { if (!strcmp(argv[i], "-a")) { append = 1; } else if (!strcmp(argv[i], "-z")) { if (compType.size()) usage(*argv, "Cannot specify both -z and -Z"); if (entrySize) usage(*argv, "Cannot specify both -z and -s"); compType = "ZIP"; } else if (!strcmp(argv[i], "-Z")) { if (compType.size()) usage(*argv, "Cannot specify both -z and -Z"); if (entrySize) usage(*argv, "Cannot specify both -Z and -s"); compType = "LZSS"; } else if (!strcmp(argv[i], "-b")) { if (i+1 < argc) { iType = atoi(argv[++i]); if ((iType >= 2) && (iType <= 4)) continue; } usage(*argv, "-b requires one of <2|3|4>"); } else if (!strcmp(argv[i], "-N")) { normalize = false; } else if (!strcmp(argv[i], "-c")) { if (i+1 < argc) cipherKey = argv[++i]; else usage(*argv, "-c requires <cipher_key>"); } else if (!strcmp(argv[i], "-v")) { if (i+1 < argc) v11n = argv[++i]; else usage(*argv, "-v requires <v11n>"); } else if (!strcmp(argv[i], "-s")) { if (compType.size()) usage(*argv, "Cannot specify -s and -z or -Z"); if (i+1 < argc) { entrySize = atoi(argv[++i]); if (entrySize == 2 || entrySize == 4) { continue; } } usage(*argv, "-s requires one of <2|4>"); } else if (!strcmp(argv[i], "-C")) { isCommentary = true; } #ifdef DEBUG else if (!strcmp(argv[i], "-d")) { if (i+1 < argc) debug |= atoi(argv[++i]); else usage(*argv, "-d requires <flags>"); } #endif else usage(*argv, (((SWBuf)"Unknown argument: ")+ argv[i]).c_str()); } if (compType == "ZIP") { compressor = new ZipCompress(); } else if (compType = "LZSS") { compressor = new LZSSCompress(); } #ifndef _ICU_ if (normalize) { normalize = false; cout << "WARNING(UTF8): " << program << " is not compiled with support for ICU. Assuming -N." << endl; } #endif #ifdef DEBUG if (debug & DEBUG_OTHER) { cout << "DEBUG(ARGS):\n\tpath: " << path << "\n\tosisDoc: " << osisDoc << "\n\tcreate: " << append << "\n\tcompressType: " << compType << "\n\tblockType: " << iType << "\n\tcipherKey: " << cipherKey.c_str() << "\n\tnormalize: " << normalize << endl; } #endif if (!append) { // == 0 then create module // Try to initialize a default set of datafiles and indicies at our // datapath location passed to us from the user. if (compressor) { if (zText::createModule(path, iType, v11n)) { fprintf(stderr, "ERROR: %s: couldn't create module at path: %s \n", program, path); exit(EXIT_NO_CREATE); } } else if (entrySize == 4) { if (RawText4::createModule(path, v11n)) { fprintf(stderr, "ERROR: %s: couldn't create module at path: %s \n", program, path); exit(EXIT_NO_CREATE); } } else { if (RawText::createModule(path, v11n)) { fprintf(stderr, "ERROR: %s: couldn't create module at path: %s \n", program, path); exit(EXIT_NO_CREATE); } } } // Do some initialization stuff if (compressor) { // Create a compressed text module allowing very large entries // Taking defaults except for first, fourth, fifth and last argument module = new zText( path, // ipath 0, // iname 0, // idesc iType, // iblockType compressor, // icomp 0, // idisp ENC_UNKNOWN, // enc DIRECTION_LTR, // dir FMT_UNKNOWN, // markup 0, // lang v11n // versification ); } else if (entrySize == 4) { // Create a raw text module allowing very large entries // Taking defaults except for first and last argument module = new RawText4( path, // ipath 0, // iname 0, // idesc 0, // idisp ENC_UNKNOWN, // encoding DIRECTION_LTR, // dir FMT_UNKNOWN, // markup 0, // ilang v11n // versification ); } else { // Create a raw text module allowing reasonable sized entries // Taking defaults except for first and last argument module = new RawText( path, // ipath 0, // iname 0, // idesc 0, // idisp ENC_UNKNOWN, // encoding DIRECTION_LTR, // dir FMT_UNKNOWN, // markup 0, // ilang v11n // versification ); } SWFilter *cipherFilter = 0; if (cipherKey.length()) { fprintf(stderr, "Adding cipher filter with phrase: %s\n", cipherKey.c_str() ); cipherFilter = new CipherFilter(cipherKey.c_str()); module->AddRawFilter(cipherFilter); } if (!module->isWritable()) { fprintf(stderr, "The module is not writable. Writing text to it will not work.\nExiting.\n" ); exit(EXIT_NO_WRITE); } // Either read from std::cin (aka stdin), when the argument is a '-' // or from a specified file. if (!strcmp(osisDoc, "-")) { processOSIS(cin); } else { // Let's see if we can open our input file ifstream infile(osisDoc); if (infile.fail()) { fprintf(stderr, "ERROR: %s: couldn't open input file: %s \n", program, osisDoc); exit(EXIT_NO_READ); } processOSIS(infile); infile.close(); } delete module; if (cipherFilter) delete cipherFilter; exit(0); // success }

if ((!strcmp(tokenName, "div")) && (typeAttr && !strcmp(typeAttr, "book"))) { if (inBookHeader || inChapterHeader) { // this one should never happen, but just in case #ifdef DEBUG if (debug & DEBUG_TITLE) { cout << "DEBUG(TITLE): " << currentOsisID << ": OOPS HEADING " << endl; cout << "\tinChapterHeader = " << inChapterHeader << endl; cout << "\tinBookHeader = " << inBookHeader << endl; } #endif currentVerse.Testament(0); currentVerse.Book(0); currentVerse.Chapter(0); currentVerse.Verse(0); writeEntry(text); } currentVerse = token.getAttribute("osisID"); currentVerse.Chapter(0); currentVerse.Verse(0); strcpy(currentOsisID, currentVerse.getOSISRef()); inChapter = false; inVerse = false; inPreVerse = false; inBookHeader = true; inChapterHeader = false; bookDepth = tagStack.size(); chapterDepth = 0; verseDepth = 0; inCanonicalOSISBook = isOSISAbbrev(token.getAttribute("osisID")); if (!inCanonicalOSISBook) { cout << "WARNING(V11N): New book is " << token.getAttribute("osisID") << " and is not in " << v11n << " versification, ignoring" << endl; } #ifdef DEBUG else if (debug & DEBUG_OTHER) { cout << "DEBUG(FOUND): New book is " << currentVerse.getOSISRef() << endl; } #endif return false; } // CHAPTER START,

or if (((!strcmp(tokenName, "div")) && (typeAttr && !strcmp(typeAttr, "chapter"))) || (!strcmp(tokenName, "chapter")) ) { if (inBookHeader) { #ifdef DEBUG if (debug & DEBUG_TITLE) { cout << "DEBUG(TITLE): " << currentOsisID << ": BOOK HEADING "<< text.c_str() << endl; } #endif writeEntry(text); } currentVerse = token.getAttribute("osisID"); currentVerse.Verse(0); #ifdef DEBUG if (debug & DEBUG_OTHER) { cout << "DEBUG(FOUND): Current chapter is " << currentVerse.getOSISRef() << " (" << token.getAttribute("osisID") << ")" << endl; } #endif strcpy(currentOsisID, currentVerse.getOSISRef()); inChapter = true; inVerse = false; inPreVerse = false; inBookHeader = false; inChapterHeader = true; chapterDepth = tagStack.size(); verseDepth = 0; return false; } // VERSE, OR COMMENTARY START,

if (!strcmp(tokenName, "verse") || (!strcmp(tokenName, "div") && token.getAttribute("annotateType"))) { #ifdef DEBUG if (debug & DEBUG_OTHER) { cout << "DEBUG(FOUND): Entering verse" << endl; } #endif if (inChapterHeader) { SWBuf heading = text; text = ""; if (heading.length()) { #ifdef DEBUG if (debug & DEBUG_TITLE) { cout << "DEBUG(TITLE): " << currentOsisID << ": CHAPTER HEADING "<< heading.c_str() << endl; } #endif writeEntry(heading); } inChapterHeader = false; } // Did we have pre-verse material that needs to be marked? if (inPreVerse) { char genBuf[200]; sprintf(genBuf, "

", genID++); text.append(genBuf); } // Get osisID for verse or annotateRef for commentary SWBuf keyVal = token.getAttribute(strcmp(tokenName, "verse") ? "annotateRef" : "osisID"); // Massage the key into a form that ParseVerseList can accept prepareSWVerseKey(keyVal); // The osisID or annotateRef can be more than a single verse // The first or only one is the currentVerse // Use the last verse seen (i.e. the currentVerse) as the basis for recovering from bad parsing. // This should never happen if the references are valid OSIS references ListKey verseKeys = currentVerse.ParseVerseList(keyVal, currentVerse, true); int memberKeyCount = verseKeys.Count(); if (memberKeyCount) { currentVerse = verseKeys.getElement(0); // See if this osisID or annotateRef refers to more than one verse. // If it does, save it until all verses have been seen. // At that point we will output links. // This can be done by incrementing, which will produce an error // if there is only one verse. verseKeys.setPosition(TOP); verseKeys.increment(1); if (!verseKeys.Error()) { linkedVerses.push_back(verseKeys); } } else { cout << "ERROR(REF): Invalid osisID/annotateRef: " << token.getAttribute(strcmp(tokenName, "verse") ? "annotateRef" : "osisID") << endl; } strcpy(currentOsisID, currentVerse.getOSISRef()); #ifdef DEBUG if (debug & DEBUG_OTHER) { cout << "DEBUG(FOUND): New current verse is " << currentVerse.getOSISRef() << endl; cout << "DEBUG(FOUND): osisID/annotateRef is adjusted to: " << keyVal << endl; } #endif inVerse = true; inPreVerse = false; inBookHeader = false; inChapterHeader = false; verseDepth = tagStack.size(); // Include the token if it is not a verse if (strcmp(tokenName, "verse")) { text.append(token); } #ifdef DEBUG else if (debug & DEBUG_VERSE) { // transform the verse into a milestone XMLTag t = ""; // copy all the attributes of the verse element to the milestone StringList attrNames = token.getAttributeNames(); for (StringList::iterator loop = attrNames.begin(); loop != attrNames.end(); loop++) { const char* attr = (*loop).c_str(); t.setAttribute(attr, token.getAttribute(attr)); } text.append(t); } #endif if (inWOC) { text.append(wocTag); } return true; } } // done with Handle Book, Chapter, and Verse (or commentary equivalent) // Now consider everything else. // Handle WOC quotes. // Note this requires transformBSP to make them into milestones // Otherwise have to do it here if (!strcmp(tokenName, "q")) { quoteStack.push(token); #ifdef DEBUG if (debug & DEBUG_QUOTE) { cout << "DEBUG(QUOTE): " << currentOsisID << ": quote top(" << quoteStack.size() << ") " << token << endl; } #endif if (token.getAttribute("who") && !strcmp(token.getAttribute("who"), "Jesus")) { inWOC = true; // Output per verse WOC markup. text.append(wocTag); // Output the quotation mark if appropriate, inside the WOC. // If there is no marker attribute, let the SWORD engine manufacture one. // If there is a marker attribute and it has content, then output that. // If the marker attribute is present and empty, then there is nothing to do. // And have it within the WOC markup if (!token.getAttribute("marker") || token.getAttribute("marker")[0]) { token.setAttribute("who", 0); // remove the who="Jesus" text.append(token); } return true; } return false; } // Have we found the start of pre-verse material? // Pre-verse material follows the following rules // 1) Between the opening of a book and the first chapter, all the material is handled as an introduction to the book. // 2) Between the opening of a chapter and the first verse, the material is split between the introduction of the chapter // and the first verse of the chapter. // A

with a type other than section will be taken as a chapter introduction. // A of type acrostic, psalm or no type, will be taken as a title for the verse. // A <title> of type main or chapter will be seen as a chapter title. // 3) Between verses, the material is split between the prior verse and the next verse. // Basically, while end and empty tags are found, they belong to the prior verse. // Once a begin tag is found, it belongs to the next verse. // If the title has an attribute type of "main" or "chapter" // it belongs to its <div> or <chapter> and is treated as part of its heading // Otherwise if it a title in a chapter before the first the first verse it // is put into the verse as a preverse title. if (!inPreVerse && !inBookHeader) { if (inChapterHeader) { // Determine when we are no longer in a chapter heading, but in pre-verse material: // If we see one of the following: // a section div // a title that is not main or chapter if ((!strcmp(tokenName, "div") && (typeAttr && !strcmp(typeAttr, "section"))) || (!strcmp(tokenName, "title") && (!typeAttr || (strcmp(typeAttr, "main") && strcmp(typeAttr, "chapter")))) ) { // Since we have found the boundary, we need to write out the chapter heading writeEntry(text); // And we are no longer in the chapter heading inChapterHeader = false; // But rather, we are now in pre-verse material inPreVerse = true; } } else if (!inVerse && inChapter) { inPreVerse = true; } if (inPreVerse) { char genBuf[200]; sprintf(genBuf, "<div type=\"x-milestone\" subType=\"x-preverse\" sID=\"pv%d\"/>", genID++); text.append(genBuf); } } #ifdef DEBUG if (debug & DEBUG_INTERVERSE) { if (!inVerse && !inBookHeader && !inChapterHeader) { cout << "DEBUG(INTERVERSE): " << currentOsisID << ": interverse start token " << token << ":" << text.c_str() << endl; } } #endif return false; } // Done with procesing start and empty tags // Process end tags else { if (tagStack.empty()) { cout << "FATAL(NESTING): " << currentOsisID << ": tag expected" << endl; exit(EXIT_BAD_NESTING); } // Note: empty end tags have the eID attribute if (!token.isEmpty()) { XMLTag topToken = tagStack.top(); tagDepth = tagStack.size(); #ifdef DEBUG if (debug & DEBUG_STACK) { cout << "DEBUG(STACK): " << currentOsisID << ": pop(" << tagDepth << ") " << topToken.getName() << endl; } #endif tagStack.pop(); if (strcmp(topToken.getName(), tokenName)) { cout << "FATAL(NESTING): " << currentOsisID << ": Expected " << topToken.getName() << " found " << tokenName << endl; // exit(EXIT_BAD_NESTING); // (OSK) I'm sure this validity check is a good idea, but there's a but somewhere that's killing the converter here. // So I'm disabling this line. Unvalidated OSIS files shouldn't be run through the converter anyway. // (DM) This has nothing to do with well-form or valid. It checks milestoned elements for proper nesting. } } // We haven't seen the first div so there is nothing to do. if (!firstDiv) { // Collect the content so it can be used to suggest the module's conf. return false; } // VERSE and COMMENTARY END if (!strcmp(tokenName, "verse") || (inVerse && !strcmp(tokenName, "div"))) { if (tagDepth != verseDepth) { cout << "WARNING(NESTING): verse " << currentOsisID << " is not well formed:(" << verseDepth << "," << tagDepth << ")" << endl; } // If we are in WOC then we need to terminate the <q who="Jesus" marker=""> that was added earlier in the verse. if (inWOC) { text.append("</q>"); } // Include the token if it is not a verse if (strcmp(tokenName, "verse")) { text.append(token); } #ifdef DEBUG else if (debug & DEBUG_VERSE) { // transform the verse into a milestone XMLTag t = "<milestone resp=\"v\" />"; // copy all the attributes of the verse element to the milestone StringList attrNames = token.getAttributeNames(); for (StringList::iterator loop = attrNames.begin(); loop != attrNames.end(); loop++) { const char* attr = (*loop).c_str(); t.setAttribute(attr, token.getAttribute(attr)); } text.append(t); } #endif writeEntry(text); inVerse = false; inPreVerse = false; verseDepth = 0; return true; } // Handle WOC quotes. // Note this requires transformBSP to make them into milestones // Otherwise have to manage it here if (!strcmp(tokenName, "q")) { XMLTag topToken = quoteStack.top(); #ifdef DEBUG if (debug & DEBUG_QUOTE) { cout << "DEBUG(QUOTE): " << currentOsisID << ": quote pop(" << quoteStack.size() << ") " << topToken << " -- " << token << endl; } #endif quoteStack.pop(); // If we have found an end tag for a <q who="Jesus"> then we are done with the WOC // and we need to terminate the <q who="Jesus" marker=""> that was added earlier in the verse. if (token.getAttribute("who") && !strcmp(token.getAttribute("who"), "Jesus")) { #ifdef DEBUG if (debug & DEBUG_QUOTE) { cout << "DEBUG(QUOTE): " << currentOsisID << ": (" << quoteStack.size() << ") " << topToken << " -- " << token << endl; } #endif inWOC = false; const char *sID = topToken.getAttribute("sID"); const char *eID = token.getAttribute("eID"); if (!sID) { sID = ""; } if (!eID) { eID = ""; } if (strcmp(sID, eID)) { cout << "ERROR(NESTING): improper nesting " << currentOsisID << ": matching (sID,eID) not found. Looking at (" << sID << "," << eID << ")" << endl; } // Output the quotation mark if appropriate, inside the WOC. // If there is no marker attribute, let the SWORD engine manufacture one. // If there is a marker attribute and it has content, then output that. // If the marker attribute is present and empty, then there is nothing to do. // And have it within the WOC markup if (!token.getAttribute("marker") || token.getAttribute("marker")[0]) { token.setAttribute("who", 0); // remove the who="Jesus" text.append(token); } // Now close the WOC text.append("</q>"); return true; } return false; } // Look for the end of document, book and chapter // Also for material that goes with last entry if (!inVerse && !inBookHeader && !inChapterHeader) { // Is this the end of a chapter. if (tagDepth == chapterDepth && (!strcmp(tokenName, "div") || !strcmp(tokenName, "chapter"))) { text.append(token); writeEntry(text); inChapter = false; chapterDepth = 0; verseDepth = 0; return true; } // Is it the end of a book if (tagDepth == bookDepth && (!strcmp(tokenName, "div"))) { text.append(token); writeEntry(text); bookDepth = 0; chapterDepth = 0; verseDepth = 0; return true; } // Do not include the end of an osis document if (!strcmp(tokenName, "osisText") || !strcmp(tokenName, "osis")) { bookDepth = 0; chapterDepth = 0; verseDepth = 0; text = ""; return true; } // When we are not inPreVerse, the interverse tags get appended to the preceeding verse. if (!inPreVerse) { text.append(token); writeEntry(text); #ifdef DEBUG if (debug & DEBUG_INTERVERSE) { cout << "DEBUG(INTERVERSE): " << currentOsisID << ": appending interverse end tag: " << tokenName << "(" << tagDepth << "," << chapterDepth << "," << bookDepth << ")" << endl; } #endif return true; } #ifdef DEBUG if (debug & DEBUG_INTERVERSE) { cout << "DEBUG(INTERVERSE): " << currentOsisID << ": interverse end tag: " << tokenName << "(" << tagDepth << "," << chapterDepth << "," << bookDepth << ")" << endl; } #endif return false; } return false; } // done with Processing end tags return false; } /** * Support normalizations necessary for a SWORD module. * OSIS allows for document structure (Book, Section, Paragraph or BSP) * to overlap Bible versification (Book, Chapter, Verse). * Most SWORD applications need to display verses in isolation or in HTML table cells, * requiring each stored entry (i.e. verses) to be well-formed xml. * This routine normalizes container elements which could cross verse boundaries into milestones. * For most of these OSIS elements, there is a milestone form. However, p is not milestoneable. * For this reason, p is transformed into lb elements. * param t the tag to transform * return the transformed tag or the original one */ XMLTag transformBSP(XMLTag t) { static std::stack<XMLTag> bspTagStack; static int sID = 1; char buf[11]; // Support simplification transformations if (t.isEmpty()) { #ifdef DEBUG if (debug & DEBUG_XFORM) { cout << "DEBUG(XFORM): " << currentOsisID << ": xform empty " << t << endl; } #endif return t; } const char* tagName = t.getName(); if (!t.isEndTag()) { // Transform <p> into <div type="paragraph"> and milestone it if (!strcmp(tagName, "p")) { t.setText("<div type=\"paragraph\" />"); sprintf(buf, "gen%d", sID++); t.setAttribute("sID", buf); } // Transform <tag> into <tag sID="">, where tag is a milestoneable element. // The following containers are milestoneable. // abbr, closer, div, foreign, l, lg, salute, signed, speech // Leaving out: // abbr When would this ever cross a boundary? // seg as it is used for a divineName hack // foreign so that it can be easily italicized else if (!strcmp(tagName, "chapter") || !strcmp(tagName, "closer") || !strcmp(tagName, "div") || !strcmp(tagName, "l") || !strcmp(tagName, "lg") || !strcmp(tagName, "q") || !strcmp(tagName, "salute") || !strcmp(tagName, "signed") || !strcmp(tagName, "speech") || !strcmp(tagName, "verse") ) { t.setEmpty(true); sprintf(buf, "gen%d", sID++); t.setAttribute("sID", buf); } bspTagStack.push(t); #ifdef DEBUG if (debug & DEBUG_XFORM) { cout << "DEBUG(XFORM): " << currentOsisID << ": xform push (" << bspTagStack.size() << ") " << t << " (tagname=" << tagName << ")" << endl; XMLTag topToken = bspTagStack.top(); cout << "DEBUG(XFORM): " << currentOsisID << ": xform top(" << bspTagStack.size() << ") " << topToken << endl; } #endif } else { XMLTag topToken = bspTagStack.top(); #ifdef DEBUG if (debug & DEBUG_XFORM) { cout << "DEBUG(XFORM): " << currentOsisID << ": xform pop(" << bspTagStack.size() << ") " << topToken << endl; } #endif bspTagStack.pop(); // Look for the milestoneable container tags handled above. if (!strcmp(tagName, "chapter") || !strcmp(tagName, "closer") || !strcmp(tagName, "div") || !strcmp(tagName, "l") || !strcmp(tagName, "lg") || !strcmp(tagName, "p") || !strcmp(tagName, "q") || !strcmp(tagName, "salute") || !strcmp(tagName, "signed") || !strcmp(tagName, "speech") || !strcmp(tagName, "verse") ) { // make this a clone of the start tag with sID changed to eID // Note: in the case of </p> the topToken is a <div type="paragraph"> t = topToken; t.setAttribute("eID", t.getAttribute("sID")); t.setAttribute("sID", 0); } } return t; } /** * Write out all links in the module. * Waiting is necessary because writeEntry might ultimately append * text to a verse moving it's offset in the data file. * While we are minimizing it by postponing the write until we have * gathered the next verse, the following scenario is happening: * A module is using linked verses and has some verses that are not * in the chosen versification. If the out-of-canon verse happens following * a linked verse, the out-of-canon verse is appended to the prior * verse. Care has to be taken that the linked verses all point to * the first of the set. */ void writeLinks() { // Link all the verses VerseKey destKey; destKey.setVersificationSystem(currentVerse.getVersificationSystem()); destKey.AutoNormalize(0); destKey.Headings(1); VerseKey linkKey; linkKey.setVersificationSystem(currentVerse.getVersificationSystem()); linkKey.AutoNormalize(0); linkKey.Headings(1); for (unsigned int i = 0; i < linkedVerses.size(); i++) { // The verseKeys is a list of verses // where the first is the real verse // and the others link to it. ListKey verseKeys = linkedVerses[i]; verseKeys.setPosition(TOP); destKey = verseKeys.getElement(); verseKeys.increment(1); while (!verseKeys.Error()) { linkKey = verseKeys.getElement(); verseKeys.increment(1); linkToEntry(linkKey, destKey); } } } void usage(const char *app, const char *error = 0) { if (error) fprintf(stderr, "\n%s: %s\n", app, error); fprintf(stderr, "\nusage: %s <output/path> <osisDoc> [OPTIONS]\n", app); fprintf(stderr, " <output/path>\t\t an existing folder that the module will be written\n"); fprintf(stderr, " <osisDoc>\t\t path to the validated OSIS document, or '-' to read from standard input\n"); fprintf(stderr, " -a\t\t\t augment module if exists (default is to create new)\n"); fprintf(stderr, " -z\t\t\t use ZIP compression (default no compression)\n"); fprintf(stderr, " -Z\t\t\t use LZSS compression (default no compression)\n"); fprintf(stderr, " -b <2|3|4>\t\t compression block size (default 4):\n"); fprintf(stderr, "\t\t\t\t 2 - verse; 3 - chapter; 4 - book\n"); fprintf(stderr, " -c <cipher_key>\t encipher module using supplied key\n"); fprintf(stderr, "\t\t\t\t (default no enciphering)\n"); fprintf(stderr, " -N\t\t\t do not convert UTF-8 or normalize UTF-8 to NFC\n"); fprintf(stderr, "\t\t\t\t (default is to convert to UTF-8, if needed,\n"); fprintf(stderr, "\t\t\t\t and then normalize to NFC)\n"); fprintf(stderr, "\t\t\t\t Note: UTF-8 texts should be normalized to NFC.\n"); fprintf(stderr, " -s <2|4>\t\t max text size per entry (default is 2).\n"); fprintf(stderr, "\t\t\t\t Note: useful for commentaries with very large entries\n"); fprintf(stderr, "\t\t\t\t in uncompressed modules (default is 65535 bytes)\n"); fprintf(stderr, " -v <v11n>\t\t specify a versification scheme to use (default is KJV)\n"); fprintf(stderr, "\t\t\t\t Note: The following are valid values for v11n:\n"); VerseMgr *vmgr = VerseMgr::getSystemVerseMgr(); StringList av11n = vmgr->getVersificationSystems(); for (StringList::iterator loop = av11n.begin(); loop != av11n.end(); loop++) { fprintf(stderr, "\t\t\t\t\t%s\n", (*loop).c_str()); } #ifdef DEBUG fprintf(stderr, " -d <flags>\t\t turn on debugging (default is 0)\n"); fprintf(stderr, "\t\t\t\t Note: This flag may change in the future.\n"); fprintf(stderr, "\t\t\t\t Flags: The following are valid values:\n"); fprintf(stderr, "\t\t\t\t\t0 - no debugging\n"); fprintf(stderr, "\t\t\t\t\t1 - writes to module, very verbose\n"); fprintf(stderr, "\t\t\t\t\t2 - verse start and end\n"); fprintf(stderr, "\t\t\t\t\t4 - quotes, especially Words of Christ (WOC)\n"); fprintf(stderr, "\t\t\t\t\t8 - titles\n"); fprintf(stderr, "\t\t\t\t\t16 - inter-verse material\n"); fprintf(stderr, "\t\t\t\t\t32 - BSP to BCV transformations\n"); fprintf(stderr, "\t\t\t\t\t64 - v11n exceptions\n"); fprintf(stderr, "\t\t\t\t\t128 - parsing of osisID and osisRef\n"); fprintf(stderr, "\t\t\t\t\t256 - internal stack\n"); fprintf(stderr, "\t\t\t\t\t512 - miscellaneous\n"); fprintf(stderr, "\t\t\t\t This flag can be used more than once.\n"); #endif fprintf(stderr, "\n"); fprintf(stderr, "See http://www.crosswire.org/wiki/osis2mod for more details.\n"); fprintf(stderr, "\n"); exit(EXIT_BAD_ARG); } void processOSIS(istream& infile) { activeOsisID[0] = '\0'; strcpy(currentOsisID,"N/A"); currentVerse.setVersificationSystem(v11n); currentVerse.AutoNormalize(0); currentVerse.Headings(1); // turn on mod/testmnt/book/chap headings currentVerse.Persist(1); module->setKey(currentVerse); module->setPosition(TOP); SWBuf token; SWBuf text; bool intoken = false; bool inWhitespace = false; bool seeingSpace = false; char curChar = '\0'; while (infile.good()) { curChar = infile.get(); // skip the character if it is bad. infile.good() will catch the problem if (curChar == -1) { continue; } if (!intoken && curChar == '<') { intoken = true; token = "<"; continue; } // Outside of tokens merge adjacent whitespace if (!intoken) { seeingSpace = isspace(curChar); if (seeingSpace) { if (inWhitespace) { continue; } // convert all whitespace to blanks curChar = ' '; } inWhitespace = seeingSpace; } if (intoken && curChar == '>') { intoken = false; inWhitespace = false; token.append('>'); // take this isalpha if out to check for bugs in text if ((isalpha(token[1])) || (isalpha(token[2]))) { //cout << "Handle:" << token.c_str() << endl; XMLTag t = transformBSP(token.c_str()); if (!handleToken(text, t)) { text.append(t); } } continue; } if (intoken) { token.append(curChar); } else { switch (curChar) { case '>' : text.append(">"); break; case '<' : text.append("<"); break; default : text.append(curChar); break; } } } // Force the last entry from the text buffer. text = ""; writeEntry(text, true); writeLinks(); #ifdef _ICU_ if (converted) fprintf(stderr, "osis2mod converted %d verses to UTF-8\n", converted); if (normalized) fprintf(stderr, "osis2mod normalized %d verses to NFC\n", normalized); #endif } int main(int argc, char **argv) { fprintf(stderr, "You are running osis2mod: $Rev: 2400 $\n"); // Let's test our command line arguments if (argc < 3) { usage(*argv); } // variables for arguments, holding defaults const char* program = argv[0]; const char* path = argv[1]; const char* osisDoc = argv[2]; int append = 0; SWBuf compType = ""; bool isCommentary = false; int iType = 4; int entrySize = 0; SWBuf cipherKey = ""; SWCompress *compressor = 0; for (int i = 3; i < argc; i++) { if (!strcmp(argv[i], "-a")) { append = 1; } else if (!strcmp(argv[i], "-z")) { if (compType.size()) usage(*argv, "Cannot specify both -z and -Z"); if (entrySize) usage(*argv, "Cannot specify both -z and -s"); compType = "ZIP"; } else if (!strcmp(argv[i], "-Z")) { if (compType.size()) usage(*argv, "Cannot specify both -z and -Z"); if (entrySize) usage(*argv, "Cannot specify both -Z and -s"); compType = "LZSS"; } else if (!strcmp(argv[i], "-b")) { if (i+1 < argc) { iType = atoi(argv[++i]); if ((iType >= 2) && (iType <= 4)) continue; } usage(*argv, "-b requires one of <2|3|4>"); } else if (!strcmp(argv[i], "-N")) { normalize = false; } else if (!strcmp(argv[i], "-c")) { if (i+1 < argc) cipherKey = argv[++i]; else usage(*argv, "-c requires <cipher_key>"); } else if (!strcmp(argv[i], "-v")) { if (i+1 < argc) v11n = argv[++i]; else usage(*argv, "-v requires <v11n>"); } else if (!strcmp(argv[i], "-s")) { if (compType.size()) usage(*argv, "Cannot specify -s and -z or -Z"); if (i+1 < argc) { entrySize = atoi(argv[++i]); if (entrySize == 2 || entrySize == 4) { continue; } } usage(*argv, "-s requires one of <2|4>"); } else if (!strcmp(argv[i], "-C")) { isCommentary = true; } #ifdef DEBUG else if (!strcmp(argv[i], "-d")) { if (i+1 < argc) debug |= atoi(argv[++i]); else usage(*argv, "-d requires <flags>"); } #endif else usage(*argv, (((SWBuf)"Unknown argument: ")+ argv[i]).c_str()); } if (compType == "ZIP") { compressor = new ZipCompress(); } else if (compType = "LZSS") { compressor = new LZSSCompress(); } #ifndef _ICU_ if (normalize) { normalize = false; cout << "WARNING(UTF8): " << program << " is not compiled with support for ICU. Assuming -N." << endl; } #endif #ifdef DEBUG if (debug & DEBUG_OTHER) { cout << "DEBUG(ARGS):\n\tpath: " << path << "\n\tosisDoc: " << osisDoc << "\n\tcreate: " << append << "\n\tcompressType: " << compType << "\n\tblockType: " << iType << "\n\tcipherKey: " << cipherKey.c_str() << "\n\tnormalize: " << normalize << endl; } #endif if (!append) { // == 0 then create module // Try to initialize a default set of datafiles and indicies at our // datapath location passed to us from the user. if (compressor) { if (zText::createModule(path, iType, v11n)) { fprintf(stderr, "ERROR: %s: couldn't create module at path: %s \n", program, path); exit(EXIT_NO_CREATE); } } else if (entrySize == 4) { if (RawText4::createModule(path, v11n)) { fprintf(stderr, "ERROR: %s: couldn't create module at path: %s \n", program, path); exit(EXIT_NO_CREATE); } } else { if (RawText::createModule(path, v11n)) { fprintf(stderr, "ERROR: %s: couldn't create module at path: %s \n", program, path); exit(EXIT_NO_CREATE); } } } // Do some initialization stuff if (compressor) { // Create a compressed text module allowing very large entries // Taking defaults except for first, fourth, fifth and last argument module = new zText( path, // ipath 0, // iname 0, // idesc iType, // iblockType compressor, // icomp 0, // idisp ENC_UNKNOWN, // enc DIRECTION_LTR, // dir FMT_UNKNOWN, // markup 0, // lang v11n // versification ); } else if (entrySize == 4) { // Create a raw text module allowing very large entries // Taking defaults except for first and last argument module = new RawText4( path, // ipath 0, // iname 0, // idesc 0, // idisp ENC_UNKNOWN, // encoding DIRECTION_LTR, // dir FMT_UNKNOWN, // markup 0, // ilang v11n // versification ); } else { // Create a raw text module allowing reasonable sized entries // Taking defaults except for first and last argument module = new RawText( path, // ipath 0, // iname 0, // idesc 0, // idisp ENC_UNKNOWN, // encoding DIRECTION_LTR, // dir FMT_UNKNOWN, // markup 0, // ilang v11n // versification ); } SWFilter *cipherFilter = 0; if (cipherKey.length()) { fprintf(stderr, "Adding cipher filter with phrase: %s\n", cipherKey.c_str() ); cipherFilter = new CipherFilter(cipherKey.c_str()); module->AddRawFilter(cipherFilter); } if (!module->isWritable()) { fprintf(stderr, "The module is not writable. Writing text to it will not work.\nExiting.\n" ); exit(EXIT_NO_WRITE); } // Either read from std::cin (aka stdin), when the argument is a '-' // or from a specified file. if (!strcmp(osisDoc, "-")) { processOSIS(cin); } else { // Let's see if we can open our input file ifstream infile(osisDoc); if (infile.fail()) { fprintf(stderr, "ERROR: %s: couldn't open input file: %s \n", program, osisDoc); exit(EXIT_NO_READ); } processOSIS(infile); infile.close(); } delete module; if (cipherFilter) delete cipherFilter; exit(0); // success }