diff options
author | Roberto C. Sanchez <roberto@connexer.com> | 2014-03-29 10:53:59 -0400 |
---|---|---|
committer | Roberto C. Sanchez <roberto@connexer.com> | 2014-03-29 10:53:59 -0400 |
commit | 03134fa5f6f25d92724ce4c183f9bbe12a9e37dc (patch) | |
tree | 847326a4de82f0241ac87cbbc427a1b92a696a02 /src/modules/filters/teiplain.cpp | |
parent | d7469385b05b9510338407fa123e9ad090f80af6 (diff) |
Imported Upstream version 1.5.11
Diffstat (limited to 'src/modules/filters/teiplain.cpp')
-rw-r--r-- | src/modules/filters/teiplain.cpp | 116 |
1 files changed, 116 insertions, 0 deletions
diff --git a/src/modules/filters/teiplain.cpp b/src/modules/filters/teiplain.cpp new file mode 100644 index 0000000..c721d84 --- /dev/null +++ b/src/modules/filters/teiplain.cpp @@ -0,0 +1,116 @@ +/*************************************************************************** + teiplain.cpp - TEI to Plaintext filter + ------------------- + begin : 2006-07-05 + copyright : 2006 by CrossWire Bible Society + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + ***************************************************************************/ + +#include <stdlib.h> +#include <teiplain.h> +#include <ctype.h> + +SWORD_NAMESPACE_START + +TEIPlain::TEIPlain() { + setTokenStart("<"); + setTokenEnd(">"); + + setEscapeStart("&"); + setEscapeEnd(";"); + + setEscapeStringCaseSensitive(true); + + addEscapeStringSubstitute("amp", "&"); + addEscapeStringSubstitute("apos", "'"); + addEscapeStringSubstitute("lt", "<"); + addEscapeStringSubstitute("gt", ">"); + addEscapeStringSubstitute("quot", "\""); + + setTokenCaseSensitive(true); +} + + +bool TEIPlain::handleToken(SWBuf &buf, const char *token, BasicFilterUserData *userData) { + // manually process if it wasn't a simple substitution + if (!substituteToken(buf, token)) { + //MyUserData *u = (MyUserData *)userData; + XMLTag tag(token); + + // <p> paragraph tag + if (!strcmp(tag.getName(), "p")) { + if ((!tag.isEndTag()) && (!tag.isEmpty())) { // non-empty start tag + buf += "\n"; + } + else if (tag.isEndTag()) { // end tag + buf += "\n"; + userData->supressAdjacentWhitespace = true; + } + else { // empty paragraph break marker + buf += "\n\n"; + userData->supressAdjacentWhitespace = true; + } + } + + // <entryFree> + else if (!strcmp(tag.getName(), "entryFree")) { + SWBuf n = tag.getAttribute("n"); + if ((!tag.isEndTag()) && (!tag.isEmpty())) { + if (n != "") { + buf += n; + buf += ". "; + } + } + } + + // <sense> + else if (!strcmp(tag.getName(), "sense")) { + SWBuf n = tag.getAttribute("n"); + if ((!tag.isEndTag()) && (!tag.isEmpty())) { + if (n != "") { + buf += n; + buf += ". "; + } + } + else if (tag.isEndTag()) { + buf += "\n"; + } + } + + // <div> + else if (!strcmp(tag.getName(), "div")) { + + if ((!tag.isEndTag()) && (!tag.isEmpty())) { + buf.append("\n\n\n"); + } + else if (tag.isEndTag()) { + } + } + + // <etym> + else if (!strcmp(tag.getName(), "etym")) { + if ((!tag.isEndTag()) && (!tag.isEmpty())) { + buf += "["; + } + else if (tag.isEndTag()) { + buf += "]"; + } + } + + else { + return false; // we still didn't handle token + } + } + return true; +} + + +SWORD_NAMESPACE_END |