summaryrefslogtreecommitdiff
path: root/src/modules/filters/swbasicfilter.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/modules/filters/swbasicfilter.cpp')
-rw-r--r--src/modules/filters/swbasicfilter.cpp406
1 files changed, 406 insertions, 0 deletions
diff --git a/src/modules/filters/swbasicfilter.cpp b/src/modules/filters/swbasicfilter.cpp
new file mode 100644
index 0000000..ef10e45
--- /dev/null
+++ b/src/modules/filters/swbasicfilter.cpp
@@ -0,0 +1,406 @@
+/******************************************************************************
+ * swbasicfilter.h - definition of class SWBasicFilter. An SWFilter
+ * impl that provides some basic methods that
+ * many filters will need and can use as a starting
+ * point.
+ *
+ * $Id: swbasicfilter.cpp 2167 2008-05-16 23:23:39Z scribe $
+ *
+ * Copyright 2001 CrossWire Bible Society (http://www.crosswire.org)
+ * CrossWire Bible Society
+ * P. O. Box 2528
+ * Tempe, AZ 85280-2528
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ */
+
+#include <stdlib.h>
+#include <swbasicfilter.h>
+#include <stdio.h>
+#include <stdarg.h>
+#include <utilstr.h>
+#include <stringmgr.h>
+#include <map>
+#include <set>
+
+SWORD_NAMESPACE_START
+
+typedef std::map<SWBuf, SWBuf> DualStringMap;
+typedef std::set<SWBuf> StringSet;
+
+// I hate bridge patterns but this isolates std::map from a ton of filters
+class SWBasicFilter::Private {
+public:
+ DualStringMap tokenSubMap;
+ DualStringMap escSubMap;
+ StringSet escPassSet;
+};
+
+const char SWBasicFilter::INITIALIZE = 1;
+const char SWBasicFilter::PRECHAR = 2;
+const char SWBasicFilter::POSTCHAR = 4;
+const char SWBasicFilter::FINALIZE = 8;
+
+SWBasicFilter::SWBasicFilter() {
+
+ p = new Private;
+
+ processStages = 0;
+ tokenStart = 0;
+ tokenEnd = 0;
+ escStart = 0;
+ escEnd = 0;
+
+ setTokenStart("<");
+ setTokenEnd(">");
+ setEscapeStart("&");
+ setEscapeEnd(";");
+
+ escStringCaseSensitive = false;
+ tokenCaseSensitive = false;
+ passThruUnknownToken = false;
+ passThruUnknownEsc = false;
+ passThruNumericEsc = false;
+}
+
+
+SWBasicFilter::~SWBasicFilter() {
+ if (tokenStart)
+ delete [] tokenStart;
+
+ if (tokenEnd)
+ delete [] tokenEnd;
+
+ if (escStart)
+ delete [] escStart;
+
+ if (escEnd)
+ delete [] escEnd;
+
+ delete p;
+}
+
+
+void SWBasicFilter::setPassThruUnknownToken(bool val) {
+ passThruUnknownToken = val;
+}
+
+
+void SWBasicFilter::setPassThruUnknownEscapeString(bool val) {
+ passThruUnknownEsc = val;
+}
+
+void SWBasicFilter::setPassThruNumericEscapeString(bool val) {
+ passThruUnknownEsc = val;
+}
+
+
+void SWBasicFilter::setTokenCaseSensitive(bool val) {
+ tokenCaseSensitive = val;
+}
+
+
+void SWBasicFilter::setEscapeStringCaseSensitive(bool val) {
+ escStringCaseSensitive = val;
+}
+
+
+void SWBasicFilter::addTokenSubstitute(const char *findString, const char *replaceString) {
+ char *buf = 0;
+
+ if (!tokenCaseSensitive) {
+ stdstr(&buf, findString);
+ toupperstr(buf);
+ p->tokenSubMap[buf] = replaceString;
+ delete [] buf;
+ }
+ else p->tokenSubMap[findString] = replaceString;
+}
+
+
+void SWBasicFilter::removeTokenSubstitute(const char *findString) {
+ if (p->tokenSubMap.find(findString) != p->tokenSubMap.end()) {
+ p->tokenSubMap.erase( p->tokenSubMap.find(findString) );
+ }
+}
+
+void SWBasicFilter::addAllowedEscapeString(const char *findString) {
+ char *buf = 0;
+
+ if (!escStringCaseSensitive) {
+ stdstr(&buf, findString);
+ toupperstr(buf);
+ p->escPassSet.insert(StringSet::value_type(buf));
+ delete [] buf;
+ }
+ else p->escPassSet.insert(StringSet::value_type(findString));
+}
+
+void SWBasicFilter::removeAllowedEscapeString(const char *findString) {
+ if (p->escPassSet.find(findString) != p->escPassSet.end()) {
+ p->escPassSet.erase( p->escPassSet.find(findString) );
+ }
+}
+
+void SWBasicFilter::addEscapeStringSubstitute(const char *findString, const char *replaceString) {
+ char *buf = 0;
+
+ if (!escStringCaseSensitive) {
+ stdstr(&buf, findString);
+ toupperstr(buf);
+ p->escSubMap.insert(DualStringMap::value_type(buf, replaceString));
+ delete [] buf;
+ }
+ else p->escSubMap.insert(DualStringMap::value_type(findString, replaceString));
+}
+
+void SWBasicFilter::removeEscapeStringSubstitute(const char *findString) {
+ if (p->escSubMap.find(findString) != p->escSubMap.end()) {
+ p->escSubMap.erase( p->escSubMap.find(findString) );
+ }
+}
+
+bool SWBasicFilter::substituteToken(SWBuf &buf, const char *token) {
+ DualStringMap::iterator it;
+
+ if (!tokenCaseSensitive) {
+ char *tmp = 0;
+ stdstr(&tmp, token);
+ toupperstr(tmp);
+ it = p->tokenSubMap.find(tmp);
+ delete [] tmp;
+ } else
+ it = p->tokenSubMap.find(token);
+
+ if (it != p->tokenSubMap.end()) {
+ buf += it->second.c_str();
+ return true;
+ }
+ return false;
+}
+
+void SWBasicFilter::appendEscapeString(SWBuf &buf, const char *escString) {
+ buf += escStart;
+ buf += escString;
+ buf += escEnd;
+}
+
+bool SWBasicFilter::passAllowedEscapeString(SWBuf &buf, const char *escString) {
+ StringSet::iterator it;
+
+ if (!escStringCaseSensitive) {
+ char *tmp = 0;
+ stdstr(&tmp, escString);
+ toupperstr(tmp);
+ it = p->escPassSet.find(tmp);
+ delete [] tmp;
+ } else
+ it = p->escPassSet.find(escString);
+
+ if (it != p->escPassSet.end()) {
+ appendEscapeString(buf, escString);
+ return true;
+ }
+
+ return false;
+}
+
+bool SWBasicFilter::handleNumericEscapeString(SWBuf &buf, const char *escString) {
+ if (passThruNumericEsc) {
+ appendEscapeString(buf, escString);
+ return true;
+ }
+ return false;
+}
+
+bool SWBasicFilter::substituteEscapeString(SWBuf &buf, const char *escString) {
+ DualStringMap::iterator it;
+
+ if (*escString == '#') {
+ return handleNumericEscapeString(buf, escString);
+ }
+
+ if (passAllowedEscapeString(buf, escString)) {
+ return true;
+ }
+
+ if (!escStringCaseSensitive) {
+ char *tmp = 0;
+ stdstr(&tmp, escString);
+ toupperstr(tmp);
+ it = p->escSubMap.find(tmp);
+ delete [] tmp;
+ } else
+ it = p->escSubMap.find(escString);
+
+ if (it != p->escSubMap.end()) {
+ buf += it->second.c_str();
+ return true;
+ }
+ return false;
+}
+
+
+bool SWBasicFilter::handleToken(SWBuf &buf, const char *token, BasicFilterUserData *userData) {
+ return substituteToken(buf, token);
+}
+
+
+bool SWBasicFilter::handleEscapeString(SWBuf &buf, const char *escString, BasicFilterUserData *userData) {
+ return substituteEscapeString(buf, escString);
+}
+
+
+void SWBasicFilter::setEscapeStart(const char *escStart) {
+ stdstr(&(this->escStart), escStart);
+ escStartLen = strlen(escStart);
+}
+
+
+void SWBasicFilter::setEscapeEnd(const char *escEnd) {
+ stdstr(&(this->escEnd), escEnd);
+ escEndLen = strlen(escEnd);
+}
+
+
+void SWBasicFilter::setTokenStart(const char *tokenStart) {
+ stdstr(&(this->tokenStart), tokenStart);
+ tokenStartLen = strlen(tokenStart);
+}
+
+
+void SWBasicFilter::setTokenEnd(const char *tokenEnd) {
+ stdstr(&(this->tokenEnd), tokenEnd);
+ tokenEndLen = strlen(tokenEnd);
+}
+
+
+char SWBasicFilter::processText(SWBuf &text, const SWKey *key, const SWModule *module) {
+ char *from;
+ char token[4096];
+ int tokpos = 0;
+ bool intoken = false;
+ bool inEsc = false;
+ int escStartPos = 0, escEndPos = 0;
+ int tokenStartPos = 0, tokenEndPos = 0;
+ SWBuf lastTextNode;
+ BasicFilterUserData *userData = createUserData(module, key);
+
+ SWBuf orig = text;
+ from = orig.getRawData();
+ text = "";
+
+ if (processStages & INITIALIZE) {
+ if (processStage(INITIALIZE, text, from, userData)) { // processStage handled it all
+ delete userData;
+ return 0;
+ }
+ }
+
+ for (;*from; from++) {
+
+ if (processStages & PRECHAR) {
+ if (processStage(PRECHAR, text, from, userData)) // processStage handled this char
+ continue;
+ }
+
+ if (*from == tokenStart[tokenStartPos]) {
+ if (tokenStartPos == (tokenStartLen - 1)) {
+ intoken = true;
+ tokpos = 0;
+ token[0] = 0;
+ token[1] = 0;
+ token[2] = 0;
+ inEsc = false;
+ }
+ else tokenStartPos++;
+ continue;
+ }
+
+ if (*from == escStart[escStartPos]) {
+ if (escStartPos == (escStartLen - 1)) {
+ intoken = true;
+ tokpos = 0;
+ token[0] = 0;
+ token[1] = 0;
+ token[2] = 0;
+ inEsc = true;
+ }
+ else escStartPos++;
+ continue;
+ }
+
+ if (inEsc) {
+ if (*from == escEnd[escEndPos]) {
+ if (escEndPos == (escEndLen - 1)) {
+ intoken = inEsc = false;
+ userData->lastTextNode = lastTextNode;
+
+ if (!userData->suspendTextPassThru) { //if text through is disabled no tokens should pass, too
+ if ((!handleEscapeString(text, token, userData)) && (passThruUnknownEsc)) {
+ appendEscapeString(text, token);
+ }
+ }
+ escEndPos = escStartPos = tokenEndPos = tokenStartPos = 0;
+ lastTextNode = "";
+ continue;
+ }
+ }
+ }
+
+ if (!inEsc) {
+ if (*from == tokenEnd[tokenEndPos]) {
+ if (tokenEndPos == (tokenEndLen - 1)) {
+ intoken = false;
+ userData->lastTextNode = lastTextNode;
+ if ((!handleToken(text, token, userData)) && (passThruUnknownToken)) {
+ text += tokenStart;
+ text += token;
+ text += tokenEnd;
+ }
+ escEndPos = escStartPos = tokenEndPos = tokenStartPos = 0;
+ lastTextNode = "";
+ continue;
+ }
+ }
+ }
+
+ if (intoken) {
+ if (tokpos < 4090) {
+ token[tokpos++] = *from;
+ token[tokpos+2] = 0;
+ }
+ }
+ else {
+ if ((!userData->supressAdjacentWhitespace) || (*from != ' ')) {
+ if (!userData->suspendTextPassThru) {
+ text.append(*from);
+ userData->lastSuspendSegment.size(0);
+ }
+ else userData->lastSuspendSegment.append(*from);
+ lastTextNode.append(*from);
+ }
+ userData->supressAdjacentWhitespace = false;
+ }
+
+ if (processStages & POSTCHAR)
+ processStage(POSTCHAR, text, from, userData);
+
+ }
+
+ if (processStages & FINALIZE)
+ processStage(FINALIZE, text, from, userData);
+
+ delete userData;
+ return 0;
+}
+
+SWORD_NAMESPACE_END