diff options
Diffstat (limited to 'src/modules/filters/osisrtf.cpp')
-rw-r--r-- | src/modules/filters/osisrtf.cpp | 520 |
1 files changed, 520 insertions, 0 deletions
diff --git a/src/modules/filters/osisrtf.cpp b/src/modules/filters/osisrtf.cpp new file mode 100644 index 0000000..0352335 --- /dev/null +++ b/src/modules/filters/osisrtf.cpp @@ -0,0 +1,520 @@ +/*************************************************************************** + osisrtf.cpp - OSIS to RTF filter + ------------------- + begin : 2003-02-15 + copyright : 2003 by CrossWire Bible Society + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation version 2 of the License. * + * * + ***************************************************************************/ + +#include <stdlib.h> +#include <ctype.h> +#include <osisrtf.h> +#include <utilxml.h> +#include <utilstr.h> +#include <versekey.h> +#include <swmodule.h> +#include <stringmgr.h> +#include <stack> + +SWORD_NAMESPACE_START + +namespace { + class MyUserData : public BasicFilterUserData { + public: + bool osisQToTick; + bool BiblicalText; + bool inXRefNote; + int suspendLevel; + std::stack<char *> quoteStack; + SWBuf w; + SWBuf version; + MyUserData(const SWModule *module, const SWKey *key); + ~MyUserData(); + }; + + + MyUserData::MyUserData(const SWModule *module, const SWKey *key) : BasicFilterUserData(module, key) { + inXRefNote = false; + BiblicalText = false; + suspendLevel = 0; + if (module) { + version = module->Name(); + BiblicalText = (!strcmp(module->Type(), "Biblical Texts")); + } + osisQToTick = ((!module->getConfigEntry("OSISqToTick")) || (strcmp(module->getConfigEntry("OSISqToTick"), "false"))); + } + + + MyUserData::~MyUserData() { + // Just in case the quotes are not well formed + while (!quoteStack.empty()) { + char *tagData = quoteStack.top(); + quoteStack.pop(); + delete [] tagData; + } + } +static inline void outText(const char *t, SWBuf &o, BasicFilterUserData *u) { if (!u->suspendTextPassThru) o += t; else u->lastSuspendSegment += t; } +static inline void outText(char t, SWBuf &o, BasicFilterUserData *u) { if (!u->suspendTextPassThru) o += t; else u->lastSuspendSegment += t; } +}; + + +OSISRTF::OSISRTF() { + setTokenStart("<"); + setTokenEnd(">"); + + setEscapeStart("&"); + setEscapeEnd(";"); + + setEscapeStringCaseSensitive(true); + + addEscapeStringSubstitute("amp", "&"); + addEscapeStringSubstitute("apos", "'"); + addEscapeStringSubstitute("lt", "<"); + addEscapeStringSubstitute("gt", ">"); + addEscapeStringSubstitute("quot", "\""); + addTokenSubstitute("lg", "{\\par}"); + addTokenSubstitute("/lg", "{\\par}"); + + setTokenCaseSensitive(true); +} + + +BasicFilterUserData *OSISRTF::createUserData(const SWModule *module, const SWKey *key) { + return new MyUserData(module, key); +} + + +char OSISRTF::processText(SWBuf &text, const SWKey *key, const SWModule *module) { + + // preprocess text buffer to escape RTF control codes + const char *from; + SWBuf orig = text; + from = orig.c_str(); + for (text = ""; *from; from++) { //loop to remove extra spaces + switch (*from) { + case '{': + case '}': + case '\\': + text += "\\"; + text += *from; + break; + default: + text += *from; + } + } + text += (char)0; + + SWBasicFilter::processText(text, key, module); //handle tokens as usual + + orig = text; + from = orig.c_str(); + for (text = ""; *from; from++) { //loop to remove extra spaces + if ((strchr(" \t\n\r", *from))) { + while (*(from+1) && (strchr(" \t\n\r", *(from+1)))) { + from++; + } + text += " "; + } + else { + text += *from; + } + } + text += (char)0; // probably not needed, but don't want to remove without investigating (same as above) + return 0; +} + + +bool OSISRTF::handleToken(SWBuf &buf, const char *token, BasicFilterUserData *userData) { + // manually process if it wasn't a simple substitution + MyUserData *u = (MyUserData *)userData; + SWBuf scratch; + bool sub = (u->suspendTextPassThru) ? substituteToken(scratch, token) : substituteToken(buf, token); + if (!sub) { + XMLTag tag(token); + + // <w> tag + if (!strcmp(tag.getName(), "w")) { + + // start <w> tag + if ((!tag.isEmpty()) && (!tag.isEndTag())) { + outText('{', buf, u); + u->w = token; + } + + // end or empty <w> tag + else { + bool endTag = tag.isEndTag(); + SWBuf lastText; + bool show = true; // to handle unplaced article in kjv2003-- temporary till combined + + if (endTag) { + tag = u->w.c_str(); + lastText = u->lastTextNode.c_str(); + } + else lastText = "stuff"; + + const char *attrib; + const char *val; + if ((attrib = tag.getAttribute("xlit"))) { + val = strchr(attrib, ':'); + val = (val) ? (val + 1) : attrib; + scratch.setFormatted(" {\\fs15 <%s>}", val); + outText(scratch.c_str(), buf, u); + } + if ((attrib = tag.getAttribute("gloss"))) { + val = strchr(attrib, ':'); + val = (val) ? (val + 1) : attrib; + scratch.setFormatted(" {\\fs15 <%s>}", val); + outText(scratch.c_str(), buf, u); + } + if ((attrib = tag.getAttribute("lemma"))) { + int count = tag.getAttributePartCount("lemma", ' '); + int i = (count > 1) ? 0 : -1; // -1 for whole value cuz it's faster, but does the same thing as 0 + do { + attrib = tag.getAttribute("lemma", i, ' '); + if (i < 0) i = 0; // to handle our -1 condition + val = strchr(attrib, ':'); + val = (val) ? (val + 1) : attrib; + const char *val2 = val; + if ((strchr("GH", *val)) && (isdigit(val[1]))) + val2++; + if ((!strcmp(val2, "3588")) && (lastText.length() < 1)) + show = false; + else { + scratch.setFormatted(" {\\cf3 \\sub <%s>}", val2); + outText(scratch.c_str(), buf, u); + } + } while (++i < count); + } + if ((attrib = tag.getAttribute("morph")) && (show)) { + SWBuf savelemma = tag.getAttribute("savlm"); + if ((strstr(savelemma.c_str(), "3588")) && (lastText.length() < 1)) + show = false; + if (show) { + int count = tag.getAttributePartCount("morph", ' '); + int i = (count > 1) ? 0 : -1; // -1 for whole value cuz it's faster, but does the same thing as 0 + do { + attrib = tag.getAttribute("morph", i, ' '); + if (i < 0) i = 0; // to handle our -1 condition + val = strchr(attrib, ':'); + val = (val) ? (val + 1) : attrib; + const char *val2 = val; + if ((*val == 'T') && (strchr("GH", val[1])) && (isdigit(val[2]))) + val2+=2; + scratch.setFormatted(" {\\cf4 \\sub (%s)}", val2); + outText(scratch.c_str(), buf, u); + } while (++i < count); + } + } + if ((attrib = tag.getAttribute("POS"))) { + val = strchr(attrib, ':'); + val = (val) ? (val + 1) : attrib; + scratch.setFormatted(" {\\fs15 <%s>}", val); + outText(scratch.c_str(), buf, u); + } + + if (endTag) + outText('}', buf, u); + } + } + + // <note> tag + else if (!strcmp(tag.getName(), "note")) { + if (!tag.isEndTag()) { + if (!tag.isEmpty()) { + SWBuf type = tag.getAttribute("type"); + + if ( (type != "x-strongsMarkup") // leave strong's markup notes out, in the future we'll probably have different option filters to turn different note types on or off + && (type != "strongsMarkup") // deprecated + ) { + SWBuf footnoteNumber = tag.getAttribute("swordFootnote"); + VerseKey *vkey = NULL; + // see if we have a VerseKey * or descendant + SWTRY { + vkey = SWDYNAMIC_CAST(VerseKey, u->key); + } + SWCATCH ( ... ) { } + if (vkey) { + char ch = ((!strcmp(type.c_str(), "crossReference")) || (!strcmp(type.c_str(), "x-cross-ref"))) ? 'x':'n'; + scratch.setFormatted("{\\super <a href=\"\">*%c%i.%s</a>} ", ch, vkey->Verse(), footnoteNumber.c_str()); + outText(scratch.c_str(), buf, u); + u->inXRefNote = (ch == 'x'); + } + } + u->suspendTextPassThru = (++u->suspendLevel); + } + } + if (tag.isEndTag()) { + u->suspendTextPassThru = (--u->suspendLevel); + u->inXRefNote = false; + } + } + + // <p> paragraph tag + else if (!strcmp(tag.getName(), "p")) { + if ((!tag.isEndTag()) && (!tag.isEmpty())) { // non-empty start tag + outText("{\\fi200\\par}", buf, u); + } + else if (tag.isEndTag()) { // end tag + outText("{\\par}", buf, u); + userData->supressAdjacentWhitespace = true; + } + else { // empty paragraph break marker + outText("{\\pard\\par\\par}", buf, u); + userData->supressAdjacentWhitespace = true; + } + } + + // <reference> tag + else if (!strcmp(tag.getName(), "reference")) { + if (!u->inXRefNote) { // only show these if we're not in an xref note + if ((!tag.isEndTag()) && (!tag.isEmpty())) { + outText("{<a href=\"\">", buf, u); + } + else if (tag.isEndTag()) { + outText("</a>}", buf, u); + } + } + } + + // <l> poetry + else if (!strcmp(tag.getName(), "l")) { + // end line marker + if (tag.getAttribute("eID")) { + outText("{\\par}", buf, u); + } + // <l/> without eID or sID + // Note: this is improper osis. This should be <lb/> + else if (tag.isEmpty() && !tag.getAttribute("sID")) { + outText("{\\par}", buf, u); + } + // end of the line + else if (tag.isEndTag()) { + outText("{\\par}", buf, u); + } + } + + // <milestone type="line"/> or <lb.../> + else if ((!strcmp(tag.getName(), "lb")) || ((!strcmp(tag.getName(), "milestone")) && (tag.getAttribute("type")) && (!strcmp(tag.getAttribute("type"), "line")))) { + outText("{\\par}", buf, u); + userData->supressAdjacentWhitespace = true; + } + + // <title> + else if (!strcmp(tag.getName(), "title")) { + if ((!tag.isEndTag()) && (!tag.isEmpty())) { + outText("{\\par\\i1\\b1 ", buf, u); + } + else if (tag.isEndTag()) { + outText("\\par}", buf, u); + } + } + + // <catchWord> & <rdg> tags (italicize) + else if (!strcmp(tag.getName(), "rdg") || !strcmp(tag.getName(), "catchWord")) { + if ((!tag.isEndTag()) && (!tag.isEmpty())) { + outText("{\\i1 ", buf, u); + } + else if (tag.isEndTag()) { + outText('}', buf, u); + } + } + + // <hi> + else if (!strcmp(tag.getName(), "hi")) { + SWBuf type = tag.getAttribute("type"); + if ((!tag.isEndTag()) && (!tag.isEmpty())) { + if (type == "b" || type == "x-b") + outText("{\\b1 ", buf, u); + else // all other types + outText("{\\i1 ", buf, u); + } + else if (tag.isEndTag()) { + outText('}', buf, u); + } + } + + // <q> quote + // Rules for a quote element: + // If the tag is empty with an sID or an eID then use whatever it specifies for quoting. + // Note: empty elements without sID or eID are ignored. + // If the tag is <q> then use it's specifications and push it onto a stack for </q> + // If the tag is </q> then use the pushed <q> for specification + // If there is a marker attribute, possibly empty, this overrides osisQToTick. + // If osisQToTick, then output the marker, using level to determine the type of mark. + else if (!strcmp(tag.getName(), "q")) { + SWBuf type = tag.getAttribute("type"); + SWBuf who = tag.getAttribute("who"); + const char *tmp = tag.getAttribute("level"); + int level = (tmp) ? atoi(tmp) : 1; + tmp = tag.getAttribute("marker"); + bool hasMark = tmp; + SWBuf mark = tmp; + + // open <q> or <q sID... /> + if ((!tag.isEmpty() && !tag.isEndTag()) || (tag.isEmpty() && tag.getAttribute("sID"))) { + // if <q> then remember it for the </q> + if (!tag.isEmpty()) { + char *tagData = 0; + stdstr(&tagData, tag.toString()); + u->quoteStack.push(tagData); + } + + // Do this first so quote marks are included as WoC + if (who == "Jesus") + outText("\\cf6 ", buf, u); + + // first check to see if we've been given an explicit mark + if (hasMark) + outText(mark, buf, u); + //alternate " and ' + else if (u->osisQToTick) + outText((level % 2) ? '\"' : '\'', buf, u); + } + // close </q> or <q eID... /> + else if ((tag.isEndTag()) || (tag.getAttribute("eID"))) { + // if it is </q> then pop the stack for the attributes + if (tag.isEndTag() && !u->quoteStack.empty()) { + char *tagData = u->quoteStack.top(); + u->quoteStack.pop(); + XMLTag qTag(tagData); + delete [] tagData; + + type = qTag.getAttribute("type"); + who = qTag.getAttribute("who"); + tmp = qTag.getAttribute("level"); + level = (tmp) ? atoi(tmp) : 1; + tmp = qTag.getAttribute("marker"); + hasMark = tmp; + mark = tmp; + } + + // first check to see if we've been given an explicit mark + if (hasMark) + outText(mark, buf, u); + // finally, alternate " and ', if config says we should supply a mark + else if (u->osisQToTick) + outText((level % 2) ? '\"' : '\'', buf, u); + + // Do this last so quote marks are included as WoC + if (who == "Jesus") + outText("\\cf0 ", buf, u); + } + } + + + // <milestone type="cQuote" marker="x"/> + else if (!strcmp(tag.getName(), "milestone") && tag.getAttribute("type") && !strcmp(tag.getAttribute("type"), "cQuote")) { + const char *tmp = tag.getAttribute("marker"); + bool hasMark = tmp; + SWBuf mark = tmp; + tmp = tag.getAttribute("level"); + int level = (tmp) ? atoi(tmp) : 1; + + // first check to see if we've been given an explicit mark + if (hasMark) + outText(mark, buf, u); + // finally, alternate " and ', if config says we should supply a mark + else if (u->osisQToTick) + outText((level % 2) ? '\"' : '\'', buf, u); + } + + // <transChange> + else if (!strcmp(tag.getName(), "transChange")) { + SWBuf type = tag.getAttribute("type"); + + if ((!tag.isEndTag()) && (!tag.isEmpty())) { + +// just do all transChange tags this way for now +// if (type == "supplied") + outText("{\\i1 ", buf, u); + } + else if (tag.isEndTag()) { + outText('}', buf, u); + } + } + + // <divineName> + else if (!strcmp(tag.getName(), "divineName")) { + + if ((!tag.isEndTag()) && (!tag.isEmpty())) { + u->suspendTextPassThru = (++u->suspendLevel); + } + else if (tag.isEndTag()) { + SWBuf lastText = u->lastSuspendSegment.c_str(); + u->suspendTextPassThru = (--u->suspendLevel); + if (lastText.size()) { + toupperstr(lastText); + scratch.setFormatted("{\\fs19%c\\fs16%s}", lastText[0], lastText.c_str()+1); + outText(scratch.c_str(), buf, u); + } + } + } + + // <div> + else if (!strcmp(tag.getName(), "div")) { + + if ((!tag.isEndTag()) && (!tag.isEmpty())) { + outText("\\par\\par\\pard ", buf, u); + } + else if (tag.isEndTag()) { + } + } + + // image + else if (!strcmp(tag.getName(), "figure")) { + const char *src = tag.getAttribute("src"); + if (!src) // assert we have a src attribute + return false; + + char* filepath = new char[strlen(u->module->getConfigEntry("AbsoluteDataPath")) + strlen(token)]; + *filepath = 0; + strcpy(filepath, userData->module->getConfigEntry("AbsoluteDataPath")); + strcat(filepath, src); + +// we do this because BibleCS looks for this EXACT format for an image tag + outText("<img src=\"", buf, u); + outText(filepath, buf, u); + outText("\" />", buf, u); +/* + char imgc; + for (c = filepath + strlen(filepath); c > filepath && *c != '.'; c--); + c++; + FILE* imgfile; + if (stricmp(c, "jpg") || stricmp(c, "jpeg")) { + imgfile = fopen(filepath, "r"); + if (imgfile != NULL) { + outText("{\\nonshppict {\\pict\\jpegblip ", buf, u); + while (feof(imgfile) != EOF) { + scratch.setFormatted("%2x", fgetc(imgfile)); + outText(scratch.c_str(), buf, u); + + } + fclose(imgfile); + outText("}}", buf, u); + } + } + else if (stricmp(c, "png")) { + outText("{\\*\\shppict {\\pict\\pngblip ", buf, u); + + outText("}}", buf, u); + } +*/ + delete [] filepath; + } + else { + return false; // we still didn't handle token + } + } + return true; +} + + +SWORD_NAMESPACE_END |