diff options
author | Roberto C. Sanchez <roberto@connexer.com> | 2014-03-29 10:53:49 -0400 |
---|---|---|
committer | Roberto C. Sanchez <roberto@connexer.com> | 2014-03-29 10:53:49 -0400 |
commit | 8c8aa6b07e595cfac56838b5964ab3e96051f1b2 (patch) | |
tree | da38e2c1979148dbd3b0c7b87f930746f5ba7f44 /utilities/osis2mod.cpp | |
parent | 8d3fc864d094eeadc721f8e93436b37a5fab173e (diff) |
Imported Upstream version 1.5.7
Diffstat (limited to 'utilities/osis2mod.cpp')
-rw-r--r-- | utilities/osis2mod.cpp | 345 |
1 files changed, 345 insertions, 0 deletions
diff --git a/utilities/osis2mod.cpp b/utilities/osis2mod.cpp new file mode 100644 index 0000000..241eff5 --- /dev/null +++ b/utilities/osis2mod.cpp @@ -0,0 +1,345 @@ +#include <ctype.h> +#include <stdio.h> +#include <fcntl.h> +#include <errno.h> +#include <stdlib.h> + +#ifndef __GNUC__ +#include <io.h> +#else +#include <unistd.h> +#endif + +#include <swmgr.h> +#include <rawtext.h> +#include <iostream> +#include <swbuf.h> +#include <utilxml.h> + +#ifndef O_BINARY +#define O_BINARY 0 +#endif + +#ifndef NO_SWORD_NAMESPACE +using namespace sword; +#endif + +using namespace std; + +RawText *module; +VerseKey *currentVerse = 0; + +char readline(int fd, char **buf) { + char ch; + if (*buf) + delete [] *buf; + *buf = 0; + int len; + + + long index = lseek(fd, 0, SEEK_CUR); + // clean up any preceding white space + while ((len = read(fd, &ch, 1)) == 1) { + if ((ch != 13) && (ch != ' ') && (ch != '\t')) + break; + else index++; + } + + + while (ch != 10) { + if ((len = read(fd, &ch, 1)) != 1) + break; + } + + int size = (lseek(fd, 0, SEEK_CUR) - index) - 1; + + *buf = new char [ size + 1 ]; + + if (size > 0) { + lseek(fd, index, SEEK_SET); + read(fd, *buf, size); + read(fd, &ch, 1); //pop terminating char + (*buf)[size] = 0; + + // clean up any trailing junk on buf + for (char *it = *buf+(strlen(*buf)-1); it > *buf; it--) { + if ((*it != 10) && (*it != 13) && (*it != ' ') && (*it != '\t')) + break; + else *it = 0; + } + } + else **buf = 0; + return !len; +} + + +bool isKJVRef(const char *buf) { + VerseKey vk, test; + vk.AutoNormalize(0); + vk.Headings(1); // turn on mod/testmnt/book/chap headings + vk.Persist(1); + // lets do some tests on the verse -------------- + vk = buf; + test = buf; + + if (vk.Testament() && vk.Book() && vk.Chapter() && vk.Verse()) { // if we're not a heading +// std::cerr << (const char*)vk << " == " << (const char*)test << std::endl; + return (vk == test); + } + else return true; // no check if we're a heading... Probably bad. +} + + +void writeEntry(VerseKey &key, SWBuf &text) { +// cout << "Verse: " << key << "\n"; +// cout << "TEXT: " << text << "\n\n"; + SWBuf currentText = module->getRawEntry(); + if (currentText.length()) + text = currentText + " " + text; + module->setEntry(text); +} + + +bool handleToken(SWBuf &text, XMLTag token) { + static bool inHeader = false; + static SWBuf headerType = ""; + static SWBuf header = ""; + static SWBuf lastTitle = ""; + static int titleOffset = -1; + + if ((!strcmp(token.getName(), "title")) && (!token.isEndTag())) { + titleOffset = text.length(); + return false; + } + if ((!strcmp(token.getName(), "title")) && (token.isEndTag())) { + lastTitle = (text.c_str() + titleOffset); + lastTitle += token; + return false; + } + if (((!strcmp(token.getName(), "div")) && (!token.isEndTag()) && (token.getAttribute("osisID"))) && (!strcmp(token.getAttribute("type"), "book"))) { + if (inHeader) { // this one should never happen, but just in case +// cout << "HEADING "; + writeEntry(*currentVerse, text); + inHeader = false; + } + *currentVerse = token.getAttribute("osisID"); + currentVerse->Chapter(0); + currentVerse->Verse(0); + inHeader = true; + headerType = "book"; + lastTitle = ""; + text = ""; + } + else if ((((!strcmp(token.getName(), "div")) && (!token.isEndTag()) && (token.getAttribute("osisID"))) && (!strcmp(token.getAttribute("type"), "chapter"))) || ((!strcmp(token.getName(), "chapter")) && (!token.isEndTag()) && (token.getAttribute("osisID")))) { + if (inHeader) { +// cout << "HEADING "; + writeEntry(*currentVerse, text); + inHeader = false; + } + + *currentVerse = token.getAttribute("osisID"); + currentVerse->Verse(0); + inHeader = true; + headerType = "chap"; + lastTitle = ""; + text = ""; + } + if ((!strcmp(token.getName(), "verse")) && (!token.isEndTag())) { + if (inHeader) { +// cout << "HEADING "; + writeEntry(*currentVerse, text); + inHeader = false; + } + + *currentVerse = token.getAttribute("osisID"); + text = ""; + return true; + } + if ((!strcmp(token.getName(), "verse")) && (token.isEndTag())) { + if (lastTitle.length()) { + SWBuf titleHead = lastTitle; + char *end = strchr(lastTitle.getRawData(), '>'); + titleHead.setSize((end - lastTitle.getRawData())+1); + XMLTag titleTag(titleHead); + titleTag.setAttribute("type", "section"); + titleTag.setAttribute("subtype", "x-preverse"); + text = SWBuf(titleTag) + SWBuf(end+1) + text; + } + writeEntry(*currentVerse, text); + lastTitle = ""; + text = ""; + return true; + } + return false; +} + + + + +int main(int argc, char **argv) { + + // Let's test our command line arguments + if (argc < 3) { + fprintf(stderr, "usage: %s <path/to/mod/files> <osisDoc> [0|1 - create module|augment]\n\n", argv[0]); + exit(-1); + } + + + if ((argc<4)||(!strcmp(argv[3], "0"))) { // == 0 then create module + // Try to initialize a default set of datafiles and indicies at our + // datapath location passed to us from the user. + if (RawText::createModule(argv[1])) { + fprintf(stderr, "error: %s: couldn't create module at path: %s \n", argv[0], argv[1]); + exit(-3); + } + } + + // Let's see if we can open our input file + int fd = open(argv[2], O_RDONLY|O_BINARY); + if (fd < 0) { + fprintf(stderr, "error: %s: couldn't open input file: %s \n", argv[0], argv[2]); + exit(-2); + } + + // Do some initialization stuff + char *buffer = 0; + module = new RawText(argv[1]); // open our datapath with our RawText driver. + currentVerse = new VerseKey(); + currentVerse->AutoNormalize(0); + currentVerse->Headings(1); // turn on mod/testmnt/book/chap headings + currentVerse->Persist(1); + + module->setKey(*currentVerse); + + (*module) = TOP; + + int successive = 0; //part of hack below + + char *from; + SWBuf token; + SWBuf text; + bool intoken = false; + + while (!readline(fd, &buffer)) { + for (from = buffer; *from; from++) { + if (*from == '<') { + intoken = true; + token = "<"; + continue; + } + + if (*from == '>') { + intoken = false; + token += ">"; + // take this isalpha if out to check for bugs in text + if ((isalpha(token[1])) || (isalpha(token[2]))) { + if (!handleToken(text, token.c_str())) { + text += token; + } + } + continue; + } + + if (intoken) + token += *from; + else text += *from; + } + } + // clear up our buffer that readline might have allocated + if (buffer) + delete [] buffer; + delete module; + delete currentVerse; + close(fd); +} + + + + +/* + string verseText = ""; + + // chapter number + if (!strncmp("$$$ ", buffer, 4)) { + buffer[7] = 0; + chapter = atoi(buffer+4); + continue; + } + // header + if (!strncmp("<TD COLSPAN=4 VALIGN=TOP><FONT SIZE=2><B>", buffer, 41)) { + char *end = strstr(buffer+41, "</B>"); + *end = 0; + header = buffer+41; + continue; + } + // verse number + if (!strncmp("<TD VALIGN=TOP ALIGN=RIGHT WIDTH=12><FONT SIZE=2 COLOR=RED><B><SUP>", buffer, 67)) { + char *end = strstr(buffer+67, "</SUP>"); + *end = 0; + verse = atoi(buffer+67); + continue; + } + // Actual verse data + if (!strncmp("<TD VALIGN=TOP><FONT SIZE=2>", buffer, 28)) { + char *end = strstr(buffer+28, "</FONT>"); + *end = 0; + } + // extra + else { + continue; + } + + verseText = buffer + 28; + + if (header.length()) { + verseText = "<title type=\"section\" subtype=\"x-preverse\">" + header + "</title>" + verseText; + header = ""; + } + + string vsbuf = argv[3]; + sprintf(tmpBuf, "%i", chapter); + vsbuf += ((string)" ") + tmpBuf; + sprintf(tmpBuf, "%i", verse); + vsbuf += ((string)":") + tmpBuf; + vk = vsbuf.c_str(); + if (vk.Error()) { + std::cerr << "Error parsing key: " << vsbuf << "\n"; + exit(-5); + } + string orig = mod.getRawEntry(); + + if (!isKJVRef(vsbuf.c_str())) { + VerseKey origVK = vk; +// This block is functioning improperly -- problem with AutoNormalize??? +// do { +// vk--; +// } +// while (!vk.Error() && !isKJVRef(vk)); + + //hack to replace above: + successive++; + vk -= successive; + orig = mod.getRawEntry(); + + std::cerr << "Not a valid KJV ref: " << origVK << "\n"; + std::cerr << "appending to ref: " << vk << "\n"; + orig += " [ ("; + orig += origVK; + orig += ") "; + orig += verseText; + orig += " ] "; + verseText = orig.c_str(); + } + else { + successive = 0; + } + + if (orig.length() > 1) + std::cerr << "Warning, overwriting verse: " << vk << std::endl; + + // ------------- End verse tests ----------------- + std::cout << "adding "<< vk << "\n"; + mod << verseText.c_str(); // save text to module at current position + } + +*/ |