diff options
author | Roberto C. Sanchez <roberto@connexer.com> | 2014-03-29 10:53:49 -0400 |
---|---|---|
committer | Roberto C. Sanchez <roberto@connexer.com> | 2014-03-29 10:53:49 -0400 |
commit | 8c8aa6b07e595cfac56838b5964ab3e96051f1b2 (patch) | |
tree | da38e2c1979148dbd3b0c7b87f930746f5ba7f44 /src/modules/filters/osisplain.cpp | |
parent | 8d3fc864d094eeadc721f8e93436b37a5fab173e (diff) |
Imported Upstream version 1.5.7
Diffstat (limited to 'src/modules/filters/osisplain.cpp')
-rw-r--r-- | src/modules/filters/osisplain.cpp | 151 |
1 files changed, 151 insertions, 0 deletions
diff --git a/src/modules/filters/osisplain.cpp b/src/modules/filters/osisplain.cpp new file mode 100644 index 0000000..7a12a27 --- /dev/null +++ b/src/modules/filters/osisplain.cpp @@ -0,0 +1,151 @@ +/*************************************************************************** + osisplain.cpp - OSIS to Plaintext filter + ------------------- + begin : 2003-02-15 + copyright : 2003 by CrossWire Bible Society + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + ***************************************************************************/ + +#include <stdlib.h> +#include <osisplain.h> +#include <utilxml.h> +#include <ctype.h> + +SWORD_NAMESPACE_START + +OSISPlain::OSISPlain() { + setTokenStart("<"); + setTokenEnd(">"); + + setEscapeStart("&"); + setEscapeEnd(";"); + + setEscapeStringCaseSensitive(true); + + addEscapeStringSubstitute("amp", "&"); + addEscapeStringSubstitute("apos", "'"); + addEscapeStringSubstitute("lt", "<"); + addEscapeStringSubstitute("gt", ">"); + addEscapeStringSubstitute("quot", "\""); + + + addTokenSubstitute("title", "\n"); + addTokenSubstitute("/title", "\n"); + addTokenSubstitute("/l", "\n"); + addTokenSubstitute("lg", "\n"); + addTokenSubstitute("/lg", "\n"); + + setTokenCaseSensitive(true); +} + + +bool OSISPlain::handleToken(SWBuf &buf, const char *token, BasicFilterUserData *userData) { + // manually process if it wasn't a simple substitution + if (!substituteToken(buf, token)) { + MyUserData *u = (MyUserData *)userData; + XMLTag tag(token); + if (((*token == 'w') && (token[1] == ' ')) || + ((*token == '/') && (token[1] == 'w') && (!token[2]))) { + bool start = false; + if (*token == 'w') { + if (token[strlen(token)-1] != '/') { + u->w = token; + return true; + } + start = true; + } + tag = (start) ? token : u->w.c_str(); + bool show = true; // to handle unplaced article in kjv2003-- temporary till combined + + SWBuf lastText = (start) ? "stuff" : u->lastTextNode.c_str(); + + const char *attrib; + const char *val; + if (attrib = tag.getAttribute("xlit")) { + val = strchr(attrib, ':'); + val = (val) ? (val + 1) : attrib; + buf.appendFormatted(" <%s>", val); + } + if (attrib = tag.getAttribute("gloss")) { + val = strchr(attrib, ':'); + val = (val) ? (val + 1) : attrib; + buf.appendFormatted(" <%s>", val); + } + if (attrib = tag.getAttribute("lemma")) { + int count = tag.getAttributePartCount("lemma"); + int i = (count > 1) ? 0 : -1; // -1 for whole value cuz it's faster, but does the same thing as 0 + do { + attrib = tag.getAttribute("lemma", i); + if (i < 0) i = 0; // to handle our -1 condition + val = strchr(attrib, ':'); + val = (val) ? (val + 1) : attrib; + if ((strchr("GH", *val)) && (isdigit(val[1]))) + val++; + if ((!strcmp(val, "3588")) && (lastText.length() < 1)) + show = false; + else buf.appendFormatted(" <%s>}", val); + } while (++i < count); + } + if ((attrib = tag.getAttribute("morph")) && (show)) { + int count = tag.getAttributePartCount("morph"); + int i = (count > 1) ? 0 : -1; // -1 for whole value cuz it's faster, but does the same thing as 0 + do { + attrib = tag.getAttribute("morph", i); + if (i < 0) i = 0; // to handle our -1 condition + val = strchr(attrib, ':'); + val = (val) ? (val + 1) : attrib; + if ((*val == 'T') && (strchr("GH", val[1])) && (isdigit(val[2]))) + val+=2; + buf.appendFormatted(" (%s)", val); + } while (++i < count); + } + if (attrib = tag.getAttribute("POS")) { + val = strchr(attrib, ':'); + val = (val) ? (val + 1) : attrib; + buf.appendFormatted(" <%s>", val); + } + } + + // <note> tag + else if (!strncmp(token, "note", 4)) { + if (!strstr(token, "strongsMarkup")) { // leave strong's markup notes out, in the future we'll probably have different option filters to turn different note types on or off + buf += " ("; + } + else u->suspendTextPassThru = true; + } + else if (!strncmp(token, "/note", 5)) { + if (!u->suspendTextPassThru) + buf += ")"; + else u->suspendTextPassThru = false; + } + + // <p> paragraph tag + else if (((*token == 'p') && ((token[1] == ' ') || (!token[1]))) || + ((*token == '/') && (token[1] == 'p') && (!token[2]))) { + userData->supressAdjacentWhitespace = true; + buf += "\n"; + } + + // <milestone type="line"/> + else if ((!strcmp(tag.getName(), "milestone")) && (tag.getAttribute("type")) && (!strcmp(tag.getAttribute("type"), "line"))) { + userData->supressAdjacentWhitespace = true; + buf += "\n"; + } + + else { + return false; // we still didn't handle token + } + } + return true; +} + + +SWORD_NAMESPACE_END |