diff options
author | Roberto C. Sanchez <roberto@connexer.com> | 2014-03-29 10:53:33 -0400 |
---|---|---|
committer | Roberto C. Sanchez <roberto@connexer.com> | 2014-03-29 10:53:33 -0400 |
commit | 8d3fc864d094eeadc721f8e93436b37a5fab173e (patch) | |
tree | 05e201c67dca55b4ccdf90ad479a25d95e3b1e63 /src/modules/filters |
Imported Upstream version 1.5.3
Diffstat (limited to 'src/modules/filters')
48 files changed, 7295 insertions, 0 deletions
diff --git a/src/modules/filters/Makefile b/src/modules/filters/Makefile new file mode 100644 index 0000000..1a2d00d --- /dev/null +++ b/src/modules/filters/Makefile @@ -0,0 +1,5 @@ + +root := ../../.. + +all: + make -C ${root} diff --git a/src/modules/filters/Makefile.am b/src/modules/filters/Makefile.am new file mode 100644 index 0000000..c58fb5f --- /dev/null +++ b/src/modules/filters/Makefile.am @@ -0,0 +1,65 @@ +filtersdir = $(top_srcdir)/src/modules/filters + +libsword_la_SOURCES += $(filtersdir)/swbasicfilter.cpp + +libsword_la_SOURCES += $(filtersdir)/gbfhtml.cpp +libsword_la_SOURCES += $(filtersdir)/gbfhtmlhref.cpp +libsword_la_SOURCES += $(filtersdir)/gbfplain.cpp +libsword_la_SOURCES += $(filtersdir)/gbfrtf.cpp +libsword_la_SOURCES += $(filtersdir)/plainhtml.cpp +libsword_la_SOURCES += $(filtersdir)/rwphtml.cpp +libsword_la_SOURCES += $(filtersdir)/rwprtf.cpp +libsword_la_SOURCES += $(filtersdir)/cipherfil.cpp +libsword_la_SOURCES += $(filtersdir)/rtfhtml.cpp + +libsword_la_SOURCES += $(filtersdir)/gbfstrongs.cpp +libsword_la_SOURCES += $(filtersdir)/gbffootnotes.cpp +libsword_la_SOURCES += $(filtersdir)/gbfheadings.cpp +libsword_la_SOURCES += $(filtersdir)/gbfmorph.cpp +libsword_la_SOURCES += $(filtersdir)/plainfootnotes.cpp +libsword_la_SOURCES += $(filtersdir)/thmlstrongs.cpp +libsword_la_SOURCES += $(filtersdir)/thmlfootnotes.cpp +libsword_la_SOURCES += $(filtersdir)/thmlheadings.cpp +libsword_la_SOURCES += $(filtersdir)/thmlmorph.cpp +libsword_la_SOURCES += $(filtersdir)/thmllemma.cpp +libsword_la_SOURCES += $(filtersdir)/thmlscripref.cpp +libsword_la_SOURCES += $(filtersdir)/thmlvariants.cpp + +libsword_la_SOURCES += $(filtersdir)/gbfthml.cpp +libsword_la_SOURCES += $(filtersdir)/thmlgbf.cpp +libsword_la_SOURCES += $(filtersdir)/thmlrtf.cpp +libsword_la_SOURCES += $(filtersdir)/thmlhtml.cpp +libsword_la_SOURCES += $(filtersdir)/thmlhtmlhref.cpp +libsword_la_SOURCES += $(filtersdir)/thmlplain.cpp + +libsword_la_SOURCES += $(filtersdir)/unicodertf.cpp +libsword_la_SOURCES += $(filtersdir)/latin1utf8.cpp +libsword_la_SOURCES += $(filtersdir)/latin1utf16.cpp +libsword_la_SOURCES += $(filtersdir)/utf8utf16.cpp +libsword_la_SOURCES += $(filtersdir)/utf16utf8.cpp +libsword_la_SOURCES += $(filtersdir)/scsuutf8.cpp +libsword_la_SOURCES += $(filtersdir)/utf8html.cpp +libsword_la_SOURCES += $(filtersdir)/utf8latin1.cpp + +libsword_la_SOURCES += $(filtersdir)/thmlolb.cpp + +libsword_la_SOURCES += $(filtersdir)/greeklexattribs.cpp + +if ICU +ICUDEFS = -D_ICU_ +SWICUSRC = $(filtersdir)/utf8transliterator.cpp +SWICUSRC += $(filtersdir)/utf8nfc.cpp +SWICUSRC += $(filtersdir)/utf8nfkd.cpp +SWICUSRC += $(filtersdir)/utf8arshaping.cpp +SWICUSRC += $(filtersdir)/utf8bidireorder.cpp +else +SWICUSRC = +ICUDEFS = +endif +libsword_la_SOURCES += $(SWICUSRC) +DEFS += $(ICUDEFS) + +libsword_la_SOURCES += $(filtersdir)/utf8cantillation.cpp +libsword_la_SOURCES += $(filtersdir)/utf8hebrewpoints.cpp +libsword_la_SOURCES += $(filtersdir)/utf8greekaccents.cpp + diff --git a/src/modules/filters/cipherfil.cpp b/src/modules/filters/cipherfil.cpp new file mode 100644 index 0000000..ad55396 --- /dev/null +++ b/src/modules/filters/cipherfil.cpp @@ -0,0 +1,38 @@ +/****************************************************************************** + * + * cipherfil - SWFilter decendant to decipher a module + */ + + +#include <stdlib.h> +#include <string.h> +#include <cipherfil.h> + + +CipherFilter::CipherFilter(const char *key) { + cipher = new SWCipher((unsigned char *)key); +} + + +CipherFilter::~CipherFilter() { + delete cipher; +} + + +SWCipher *CipherFilter::getCipher() { + return cipher; +} + + +char CipherFilter::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) { + unsigned int len; +// len = strlen(text); + len = maxlen; + if (len > 0) { + cipher->cipherBuf(&len, text); + strncpy(text, cipher->Buf(), (len < (unsigned int)maxlen) ? len : maxlen); + } + text[maxlen] = 0; + text[maxlen+1] = 0; + return 0; +} diff --git a/src/modules/filters/gbffootnotes.cpp b/src/modules/filters/gbffootnotes.cpp new file mode 100644 index 0000000..c5b7b90 --- /dev/null +++ b/src/modules/filters/gbffootnotes.cpp @@ -0,0 +1,118 @@ +/****************************************************************************** + * + * gbffootnotes - SWFilter decendant to hide or show footnotes + * in a GBF module. + */ + + +#include <stdlib.h> +#include <string.h> +#include <gbffootnotes.h> +#ifndef __GNUC__ +#else +#include <unixstr.h> +#endif + + +const char GBFFootnotes::on[] = "On"; +const char GBFFootnotes::off[] = "Off"; +const char GBFFootnotes::optName[] = "Footnotes"; +const char GBFFootnotes::optTip[] = "Toggles Footnotes On and Off if they exist"; + + +GBFFootnotes::GBFFootnotes() { + option = false; + options.push_back(on); + options.push_back(off); +} + + +GBFFootnotes::~GBFFootnotes() { +} + +void GBFFootnotes::setOptionValue(const char *ival) +{ + option = (!stricmp(ival, on)); +} + +const char *GBFFootnotes::getOptionValue() +{ + return (option) ? on:off; +} + +char GBFFootnotes::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) +{ + if (!option) { // if we don't want footnotes + char *to, *from, token[4096]; // cheese. Fix. + int tokpos = 0; + bool intoken = false; + int len; + bool hide = false; + + len = strlen(text) + 1; // shift string to right of buffer + if (len < maxlen) { + memmove(&text[maxlen - len], text, len); + from = &text[maxlen - len]; + } + else from = text; // ------------------------------- + + for (to = text; *from; from++) { + if (*from == '<') { + intoken = true; + tokpos = 0; +// memset(token, 0, 4096); + token[0] = 0; + token[1] = 0; + token[2] = 0; + continue; + } + if (*from == '>') { // process tokens + intoken = false; + switch (*token) { + case 'R': // Reference + switch(token[1]) { + case 'F': // Begin footnote + hide = true; + break; + case 'f': // end footnote + hide = false; + break; + } + continue; // skip token + case 'W': + if (token[1] == 'T') { + switch (token[2]) { + case 'P': + case 'S': + case 'A': + continue; // remove this token + default: + break; + } + } + } + // if not a footnote token, keep token in text + if (!hide) { + *to++ = '<'; + for (char *tok = token; *tok; tok++) + *to++ = *tok; + *to++ = '>'; + } + continue; + } + if (intoken) { + if (tokpos < 4090) + token[tokpos++] = *from; + token[tokpos+2] = 0; // +2 cuz we init token with 2 extra '0' because of switch statement + } + else { + if (!hide) { + *to++ = *from; + } + } + } + *to++ = 0; + *to = 0; + } + return 0; +} diff --git a/src/modules/filters/gbfheadings.cpp b/src/modules/filters/gbfheadings.cpp new file mode 100644 index 0000000..590e2fa --- /dev/null +++ b/src/modules/filters/gbfheadings.cpp @@ -0,0 +1,107 @@ +/****************************************************************************** + * + * gbfheadings - SWFilter decendant to hide or show headings + * in a GBF module. + */ + + +#include <stdlib.h> +#include <string.h> +#include <gbfheadings.h> +#ifndef __GNUC__ +#else +#include <unixstr.h> +#endif + + +const char GBFHeadings::on[] = "On"; +const char GBFHeadings::off[] = "Off"; +const char GBFHeadings::optName[] = "Headings"; +const char GBFHeadings::optTip[] = "Toggles Headings On and Off if they exist"; + + +GBFHeadings::GBFHeadings() { + option = false; + options.push_back(on); + options.push_back(off); +} + + +GBFHeadings::~GBFHeadings() { +} + +void GBFHeadings::setOptionValue(const char *ival) +{ + option = (!stricmp(ival, on)); +} + +const char *GBFHeadings::getOptionValue() +{ + return (option) ? on:off; +} + +char GBFHeadings::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) +{ + if (!option) { // if we don't want headings + char *to, *from, token[2048]; // cheese. Fix. + int tokpos = 0; + bool intoken = false; + int len; + bool hide = false; + + len = strlen(text) + 1; // shift string to right of buffer + if (len < maxlen) { + memmove(&text[maxlen - len], text, len); + from = &text[maxlen - len]; + } + else from = text; // ------------------------------- + + for (to = text; *from; from++) { + if (*from == '<') { + intoken = true; + tokpos = 0; +// memset(token, 0, 2048); + token[0] = 0; + token[1] = 0; + token[2] = 0; + continue; + } + if (*from == '>') { // process tokens + intoken = false; + switch (*token) { + case 'T': // Reference + switch(token[1]) { + case 'S': // Begin heading + hide = true; + break; + case 's': // end heading + hide = false; + break; + } + continue; // skip token + } + // if not a heading token, keep token in text + if (!hide) { + *to++ = '<'; + for (char *tok = token; *tok; tok++) + *to++ = *tok; + *to++ = '>'; + } + continue; + } + if (intoken) { + if (tokpos < 2045) + token[tokpos++] = *from; + token[tokpos+2] = 0; + } + else { + if (!hide) { + *to++ = *from; + } + } + } + *to++ = 0; + *to = 0; + } + return 0; +} diff --git a/src/modules/filters/gbfhtml.cpp b/src/modules/filters/gbfhtml.cpp new file mode 100644 index 0000000..73d445a --- /dev/null +++ b/src/modules/filters/gbfhtml.cpp @@ -0,0 +1,536 @@ +/*************************************************************************** + gbfhtml.cpp - description + ------------------- + begin : Thu Jun 24 1999 + copyright : (C) 1999 by Torsten Uhlmann + email : TUhlmann@gmx.de + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + ***************************************************************************/ + +#include <stdlib.h> +#include <string.h> +#include <gbfhtml.h> + + +GBFHTML::GBFHTML() +{ +} + + +char GBFHTML::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) +{ + char *to, *from, token[2048]; + int tokpos = 0; + bool intoken = false; + bool hasFootnotePreTag = false; + bool isRightJustified = false; + bool isCentered = false; + int len; + const char *tok; + + len = strlen(text) + 1; // shift string to right of buffer + if (len < maxlen) { + memmove(&text[maxlen - len], text, len); + from = &text[maxlen - len]; + } + else + from = text; // ------------------------------- + + for (to = text; *from; from++) + { + if (*from == '\n') { + *from = ' '; + } + if (*from == '<') { + intoken = true; + tokpos = 0; + token[0] = 0; + token[1] = 0; + token[2] = 0; + continue; + } + if (*from == '>') { + intoken = false; + // process desired tokens + switch (*token) { + case 'W': // Strongs + switch(token[1]) + { + case 'G': // Greek + case 'H': // Hebrew + case 'T': // Tense + *to++ = ' '; + *to++ = '<'; + *to++ = 's'; + *to++ = 'm'; + *to++ = 'a'; + *to++ = 'l'; + *to++ = 'l'; + *to++ = '>'; + *to++ = '<'; + *to++ = 'e'; + *to++ = 'm'; + *to++ = '>'; + for (tok = token+2; *tok; tok++) + *to++ = *tok; + *to++ = '<'; + *to++ = '/'; + *to++ = 'e'; + *to++ = 'm'; + *to++ = '>'; + *to++ = '<'; + *to++ = '/'; + *to++ = 's'; + *to++ = 'm'; + *to++ = 'a'; + *to++ = 'l'; + *to++ = 'l'; + *to++ = '>'; + *to++ = ' '; + continue; + } + break; + case 'R': + switch(token[1]) + { + case 'X': + *to++ = '<'; + *to++ = 'a'; + *to++ = ' '; + *to++ = 'h'; + *to++ = 'r'; + *to++ = 'e'; + *to++ = 'f'; + *to++ = '='; + *to++ = '\"'; + for (tok = token + 3; *tok; tok++) { + if(*tok != '<' && *tok+1 != 'R' && *tok+2 != 'x') { + *to++ = *tok; + } + else { + break; + } + } + *to++ = '\"'; + *to++ = '>'; + continue; + case 'x': + *to++ = '<'; + *to++ = '/'; + *to++ = 'a'; + *to++ = '>'; + continue; + case 'B': //word(s) explained in footnote + *to++ = '<'; + *to++ = 'i'; + *to++ = '>'; + hasFootnotePreTag = true; //we have the RB tag + continue; + case 'F': // footnote begin + if (hasFootnotePreTag) { + *to++ = '<'; + *to++ = '/'; + *to++ = 'i'; + *to++ = '>'; + *to++ = ' '; + } + *to++ = '<'; + *to++ = 'f'; + *to++ = 'o'; + *to++ = 'n'; + *to++ = 't'; + *to++ = ' '; + *to++ = 'c'; + *to++ = 'o'; + *to++ = 'l'; + *to++ = 'o'; + *to++ = 'r'; + *to++ = '='; + *to++ = '\"'; + *to++ = '#'; + *to++ = '8'; + *to++ = '0'; + *to++ = '0'; + *to++ = '0'; + *to++ = '0'; + *to++ = '0'; + *to++ = '\"'; + *to++ = '>'; + + *to++ = ' '; + *to++ = '<'; + *to++ = 's'; + *to++ = 'm'; + *to++ = 'a'; + *to++ = 'l'; + *to++ = 'l'; + *to++ = '>'; + *to++ = '('; + + continue; + case 'f': // footnote end + *to++ = ')'; + *to++ = '<'; + *to++ = '/'; + *to++ = 's'; + *to++ = 'm'; + *to++ = 'a'; + *to++ = 'l'; + *to++ = 'l'; + *to++ = '>'; + *to++ = ' '; + *to++ = '<'; + *to++ = '/'; + *to++ = 'f'; + *to++ = 'o'; + *to++ = 'n'; + *to++ = 't'; + *to++ = '>'; + hasFootnotePreTag = false; + continue; + } + break; + + case 'F': // font tags + switch(token[1]) + { + case 'I': // italic start + *to++ = '<'; + *to++ = 'i'; + *to++ = '>'; + continue; + case 'i': // italic end + *to++ = '<'; + *to++ = '/'; + *to++ = 'i'; + *to++ = '>'; + continue; + case 'B': // bold start + *to++ = '<'; + *to++ = 'b'; + *to++ = '>'; + continue; + case 'b': // bold end + *to++ = '<'; + *to++ = '/'; + *to++ = 'b'; + *to++ = '>'; + continue; + case 'R': // words of Jesus begin + *to++ = '<'; + *to++ = 'f'; + *to++ = 'o'; + *to++ = 'n'; + *to++ = 't'; + *to++ = ' '; + *to++ = 'c'; + *to++ = 'o'; + *to++ = 'l'; + *to++ = 'o'; + *to++ = 'r'; + *to++ = '='; + *to++ = '#'; + *to++ = 'F'; + *to++ = 'F'; + *to++ = '0'; + *to++ = '0'; + *to++ = '0'; + *to++ = '0'; + *to++ = '>'; + continue; + case 'r': // words of Jesus end + *to++ = '<'; + *to++ = '/'; + *to++ = 'f'; + *to++ = 'o'; + *to++ = 'n'; + *to++ = 't'; + *to++ = '>'; + continue; + case 'U': // Underline start + *to++ = '<'; + *to++ = 'u'; + *to++ = '>'; + continue; + case 'u': // Underline end + *to++ = '<'; + *to++ = '/'; + *to++ = 'u'; + *to++ = '>'; + continue; + case 'O': // Old Testament quote begin + *to++ = '<'; + *to++ = 'c'; + *to++ = 'i'; + *to++ = 't'; + *to++ = 'e'; + *to++ = '>'; + continue; + case 'o': // Old Testament quote end + *to++ = '<'; + *to++ = '/'; + *to++ = 'c'; + *to++ = 'i'; + *to++ = 't'; + *to++ = 'e'; + *to++ = '>'; + continue; + case 'S': // Superscript begin + *to++ = '<'; + *to++ = 's'; + *to++ = 'u'; + *to++ = 'p'; + *to++ = '>'; + continue; + case 's': // Superscript end + *to++ = '<'; + *to++ = '/'; + *to++ = 's'; + *to++ = 'u'; + *to++ = 'p'; + *to++ = '>'; + continue; + case 'V': // Subscript begin + *to++ = '<'; + *to++ = 's'; + *to++ = 'u'; + *to++ = 'b'; + *to++ = '>'; + continue; + case 'v': // Subscript end + *to++ = '<'; + *to++ = '/'; + *to++ = 's'; + *to++ = 'u'; + *to++ = 'b'; + *to++ = '>'; + continue; + case 'N': + *to++ = '<'; + *to++ = 'f'; + *to++ = 'o'; + *to++ = 'n'; + *to++ = 't'; + *to++ = ' '; + *to++ = 'f'; + *to++ = 'a'; + *to++ = 'c'; + *to++ = 'e'; + *to++ = '='; + *to++ = '"'; + for (tok = token + 2; *tok; tok++) + *to++ = *tok; + *to++ = '"'; + *to++ = '>'; + continue; + case 'n': + *to++ = '<'; + *to++ = '/'; + *to++ = 'f'; + *to++ = 'o'; + *to++ = 'n'; + *to++ = 't'; + *to++ = '>'; + continue; + } + break; + case 'C': // special character tags + switch(token[1]) + { + case 'A': // ASCII value + *to++ = (char)atoi(&token[2]); + continue; + case 'G': + //*to++ = ' '; + continue; + case 'L': // line break + *to++ = '<'; + *to++ = 'b'; + *to++ = 'r'; + *to++ = ' '; + *to++ = '/'; + *to++ = '>'; + *to++ = ' '; + continue; + case 'M': // new paragraph + *to++ = '<'; + *to++ = 'b'; + *to++ = 'r'; + *to++ = ' '; + *to++ = '/'; + *to++ = '>'; + continue; + case 'T': + //*to++ = ' '; + continue; + } + break; + case 'J': //Justification + switch(token[1]) + { + case 'R': //right + *to++ = '<'; + *to++ = 'd'; + *to++ = 'i'; + *to++ = 'v'; + *to++ = ' '; + *to++ = 'a'; + *to++ = 'l'; + *to++ = 'i'; + *to++ = 'g'; + *to++ = 'n'; + *to++ = '='; + *to++ = '\"'; + *to++ = 'r'; + *to++ = 'i'; + *to++ = 'g'; + *to++ = 'h'; + *to++ = 't'; + *to++ = '\"'; + *to++ = '>'; + isRightJustified = true; + continue; + + case 'C': //center + *to++ = '<'; + *to++ = 'd'; + *to++ = 'i'; + *to++ = 'v'; + *to++ = ' '; + *to++ = 'a'; + *to++ = 'l'; + *to++ = 'i'; + *to++ = 'g'; + *to++ = 'n'; + *to++ = '='; + *to++ = '\"'; + *to++ = 'c'; + *to++ = 'e'; + *to++ = 'n'; + *to++ = 't'; + *to++ = 'e'; + *to++ = 'r'; + *to++ = '\"'; + *to++ = '>'; + isCentered = true; + continue; + + case 'L': //left, reset right and center + if (isCentered) { + *to++ = '<'; + *to++ = '/'; + *to++ = 'c'; + *to++ = 'e'; + *to++ = 'n'; + *to++ = 't'; + *to++ = 'e'; + *to++ = 'r'; + *to++ = '>'; + isCentered = false; + } + if (isRightJustified) { + *to++ = '<'; + *to++ = '/'; + *to++ = 'd'; + *to++ = 'i'; + *to++ = 'v'; + *to++ = '>'; + isRightJustified = false; + } + continue; + } + break; + case 'T': // title formatting + switch(token[1]) + { + case 'T': // Book title begin + *to++ = '<'; + *to++ = 'b'; + *to++ = 'i'; + *to++ = 'g'; + *to++ = '>'; + continue; + case 't': + *to++ = '<'; + *to++ = '/'; + *to++ = 'b'; + *to++ = 'i'; + *to++ = 'g'; + *to++ = '>'; + continue;/* + case 'S': + *to++ = '<'; + *to++ = 'b'; + *to++ = 'r'; + *to++ = ' '; + *to++ = '/'; + *to++ = '>'; + *to++ = '<'; + *to++ = 'b'; + *to++ = 'i'; + *to++ = 'g'; + *to++ = '>'; + continue; + case 's': + *to++ = '<'; + *to++ = '/'; + *to++ = 'b'; + *to++ = 'i'; + *to++ = 'g'; + *to++ = '>'; + *to++ = '<'; + *to++ = 'b'; + *to++ = 'r'; + *to++ = ' '; + *to++ = '/'; + *to++ = '>'; + continue;*/ + } + break; + + case 'P': // special formatting + switch(token[1]) + { + case 'P': // Poetry begin + *to++ = '<'; + *to++ = 'c'; + *to++ = 'i'; + *to++ = 't'; + *to++ = 'e'; + *to++ = '>'; + continue; + case 'p': + *to++ = '<'; + *to++ = '/'; + *to++ = 'c'; + *to++ = 'i'; + *to++ = 't'; + *to++ = 'e'; + *to++ = '>'; + continue; + } + break; + } + continue; + } + if (intoken) { + if (tokpos < 2045) { + token[tokpos++] = *from; + token[tokpos+2] = 0; + } + } + else + *to++ = *from; + } + *to++ = 0; + *to = 0; + return 0; +} diff --git a/src/modules/filters/gbfhtmlhref.cpp b/src/modules/filters/gbfhtmlhref.cpp new file mode 100644 index 0000000..30b27ba --- /dev/null +++ b/src/modules/filters/gbfhtmlhref.cpp @@ -0,0 +1,148 @@ +/*************************************************************************** + gbfhtmlhref.cpp - GBF to HTML filter with hrefs + for strongs and morph tags + ------------------- + begin : 2001-09-03 + copyright : 2001 by CrossWire Bible Society + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + ***************************************************************************/ + +#include <stdlib.h> +#include <string.h> +#include <gbfhtmlhref.h> + +GBFHTMLHREF::GBFHTMLHREF() { + setTokenStart("<"); + setTokenEnd(">"); + + setTokenCaseSensitive(true); + + addTokenSubstitute("Rf", ")</small></font>"); + addTokenSubstitute("Rx", "</a>"); + addTokenSubstitute("FI", "<i>"); // italics begin + addTokenSubstitute("Fi", "</i>"); + addTokenSubstitute("FB", "<n>"); // bold begin + addTokenSubstitute("Fb", "</n>"); + addTokenSubstitute("FR", "<font color=\"#FF0000\">"); // words of Jesus begin + addTokenSubstitute("Fr", "</font>"); + addTokenSubstitute("FU", "<u>"); // underline begin + addTokenSubstitute("Fu", "</u>"); + addTokenSubstitute("FO", "<cite>"); // Old Testament quote begin + addTokenSubstitute("Fo", "</cite>"); + addTokenSubstitute("FS", "<sup>"); // Superscript begin// Subscript begin + addTokenSubstitute("Fs", "</sup>"); + addTokenSubstitute("FV", "<sub>"); // Subscript begin + addTokenSubstitute("Fv", "</sub>"); + addTokenSubstitute("TT", "<big>"); // Book title begin + addTokenSubstitute("Tt", "</big>"); + addTokenSubstitute("PP", "<cite>"); // poetry begin + addTokenSubstitute("Pp", "</cite>"); + addTokenSubstitute("Fn", "</font>"); // font end + addTokenSubstitute("CL", "<br />"); // new line + addTokenSubstitute("CM", "<!P><br />"); // paragraph <!P> is a non showing comment that can be changed in the front end to <P> if desired + addTokenSubstitute("CG", ""); // ??? + addTokenSubstitute("CT", ""); // ??? + addTokenSubstitute("JR", "<div align=\"right\">"); // right align begin + addTokenSubstitute("JC", "<div align=\"center\">"); // center align begin + addTokenSubstitute("JL", "</div>"); // align end + +} + + +bool GBFHTMLHREF::handleToken(char **buf, const char *token, DualStringMap &userData) { + const char *tok; + + if (!substituteToken(buf, token)) { + if (!strncmp(token, "WG", 2) || !strncmp(token, "WH", 2)) { // strong's numbers + pushString(buf, " <small><em><<a href=\"#"); + for (tok = token+1; *tok; tok++) + //if(token[i] != '\"') + *(*buf)++ = *tok; + *(*buf)++ = '\"'; + *(*buf)++ = '>'; + for (tok = token + 2; *tok; tok++) + //if(token[i] != '\"') + *(*buf)++ = *tok; + pushString(buf, "</a>></em></small>"); + } + + else if (!strncmp(token, "WTG", 3) || !strncmp(token, "WTH", 3)) { // strong's numbers tense + pushString(buf, " <small><em>(<A HREF=\"#"); + for (tok = token + 2; *tok; tok++) + if(*tok != '\"') + *(*buf)++ = *tok; + *(*buf)++ = '\"'; + *(*buf)++ = '>'; + for (tok = token + 3; *tok; tok++) + if(*tok != '\"') + *(*buf)++ = *tok; + pushString(buf, "</a>)</em></small>"); + } + + else if (!strncmp(token, "WT", 2) && strncmp(token, "WTH", 3) && strncmp(token, "WTG", 3)) { // morph tags + pushString(buf, " <small><em>(<a href=\"M"); + for (tok = token + 2; *tok; tok++) + if(*tok != '\"') + *(*buf)++ = *tok; + *(*buf)++ = '\"'; + *(*buf)++ = '>'; + for (tok = token + 2; *tok; tok++) + if(*tok != '\"') + *(*buf)++ = *tok; + pushString(buf, "</a>)</em></small>"); + } + + else if (!strncmp(token, "RX", 2)) { + pushString(buf, "<a href=\""); + for (tok = token + 3; *tok; tok++) { + if(*tok != '<' && *tok+1 != 'R' && *tok+2 != 'x') { + *(*buf)++ = *tok; + } + else { + break; + } + } + *(*buf)++ = '\"'; + *(*buf)++ = '>'; + } + + else if (!strncmp(token, "RB", 2)) { + pushString(buf, "<i>"); + userData["hasFootnotePreTag"] = "true"; + } + + else if (!strncmp(token, "RF", 2)) { + if(userData["hasFootnotePreTag"] == "true") { + userData["hasFootnotePreTag"] = "false"; + pushString(buf, "</i> "); + } + pushString(buf, "<font color=\"#800000\"><small> ("); + } + + else if (!strncmp(token, "FN", 2)) { + pushString(buf, "<font face=\""); + for (tok = token + 2; *tok; tok++) + if(*tok != '\"') + *(*buf)++ = *tok; + *(*buf)++ = '\"'; + *(*buf)++ = '>'; + } + + else if (!strncmp(token, "CA", 2)) { // ASCII value + *(*buf)++ = (char)atoi(&token[2]); + } + + else { + return false; + } + } + return true; +} diff --git a/src/modules/filters/gbfmorph.cpp b/src/modules/filters/gbfmorph.cpp new file mode 100644 index 0000000..f8d336e --- /dev/null +++ b/src/modules/filters/gbfmorph.cpp @@ -0,0 +1,98 @@ +/****************************************************************************** + * + * gbfmorph - SWFilter decendant to hide or show morph tags + * in a GBF module. + */ + + +#include <stdlib.h> +#include <string.h> +#include <gbfmorph.h> +#ifndef __GNUC__ +#else +#include <unixstr.h> +#endif + + +const char GBFMorph::on[] = "On"; +const char GBFMorph::off[] = "Off"; +const char GBFMorph::optName[] = "Morphological Tags"; +const char GBFMorph::optTip[] = "Toggles Morphological Tags On and Off if they exist"; + + +GBFMorph::GBFMorph() { + option = false; + options.push_back(on); + options.push_back(off); +} + + +GBFMorph::~GBFMorph() { +} + +void GBFMorph::setOptionValue(const char *ival) +{ + option = (!stricmp(ival, on)); +} + +const char *GBFMorph::getOptionValue() +{ + return (option) ? on:off; +} + +char GBFMorph::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) +{ + if (!option) { // if we don't want morph tags + char *to, *from, token[2048]; // cheese. Fix. + int tokpos = 0; + bool intoken = false; + int len; + bool lastspace = false; + + len = strlen(text) + 1; // shift string to right of buffer + if (len < maxlen) { + memmove(&text[maxlen - len], text, len); + from = &text[maxlen - len]; + } + else from = text; // ------------------------------- + + for (to = text; *from; from++) { + if (*from == '<') { + intoken = true; + tokpos = 0; + token[0] = 0; + token[1] = 0; + token[2] = 0; + continue; + } + if (*from == '>') { // process tokens + intoken = false; + if (*token == 'W' && token[1] == 'T') { // Morph + if ((from[1] == ' ') || (from[1] == ',') || (from[1] == ';') || (from[1] == '.') || (from[1] == '?') || (from[1] == '!') || (from[1] == ')') || (from[1] == '\'') || (from[1] == '\"')) { + if (lastspace) + to--; + } + continue; + } + // if not a morph tag token, keep token in text + *to++ = '<'; + for (char *tok = token; *tok; tok++) + *to++ = *tok; + *to++ = '>'; + continue; + } + if (intoken) { + if (tokpos < 2045) + token[tokpos++] = *from; + token[tokpos+2] = 0; + } + else { + *to++ = *from; + lastspace = (*from == ' '); + } + } + *to++ = 0; + *to = 0; + } + return 0; +} diff --git a/src/modules/filters/gbfplain.cpp b/src/modules/filters/gbfplain.cpp new file mode 100644 index 0000000..65766d3 --- /dev/null +++ b/src/modules/filters/gbfplain.cpp @@ -0,0 +1,106 @@ +/****************************************************************************** + * + * gbfplain - SWFilter decendant to strip out all GBF tags or convert to + * ASCII rendered symbols. + */ + + +#include <stdlib.h> +#include <string.h> +#include <gbfplain.h> + + +GBFPlain::GBFPlain() { +} + + +char GBFPlain::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) +{ + char *to, *from, token[2048]; + int tokpos = 0; + bool intoken = false; + int len; + + len = strlen(text) + 1; // shift string to right of buffer + if (len < maxlen) { + memmove(&text[maxlen - len], text, len); + from = &text[maxlen - len]; + } + else from = text; // ------------------------------- + + for (to = text; *from; from++) { + if (*from == '<') { + intoken = true; + tokpos = 0; + token[0] = 0; + token[1] = 0; + token[2] = 0; + continue; + } + if (*from == '>') { + intoken = false; + // process desired tokens + switch (*token) { + case 'W': // Strongs + switch(token[1]) { + case 'G': // Greek + case 'H': // Hebrew + case 'T': // Tense + *to++ = ' '; + *to++ = '<'; + for (char *tok = token + 2; *tok; tok++) + *to++ = *tok; + *to++ = '>'; + *to++ = ' '; + continue; + } + break; + case 'R': + switch(token[1]) { + case 'F': // footnote begin + *to++ = ' '; + *to++ = '['; + continue; + case 'f': // footnote end + *to++ = ']'; + *to++ = ' '; + continue; + } + break; + case 'C': + switch(token[1]) { + case 'A': // ASCII value + *to++ = (char)atoi(&token[2]); + continue; + case 'G': + *to++ = '>'; + continue; +/* Bug in WEB + case 'L': + *to++ = '<'; + continue; +*/ + case 'L': // Bug in WEB. Use above entry when fixed + case 'N': // new line + *to++ = '\n'; + continue; + case 'M': // new paragraph + *to++ = '\n'; + *to++ = '\n'; + continue; + } + break; + } + continue; + } + if (intoken) { + if (tokpos < 2045) + token[tokpos++] = *from; + token[tokpos+2] = 0; + } + else *to++ = *from; + } + *to++ = 0; + *to = 0; + return 0; +} diff --git a/src/modules/filters/gbfrtf.cpp b/src/modules/filters/gbfrtf.cpp new file mode 100644 index 0000000..5f7d064 --- /dev/null +++ b/src/modules/filters/gbfrtf.cpp @@ -0,0 +1,277 @@ +/****************************************************************************** + * + * gbfrtf - SWFilter decendant to convert all GBF tags to RTF tags + */ + + +#include <stdlib.h> +#include <string.h> +#include <gbfrtf.h> +#include <ctype.h> + +GBFRTF::GBFRTF() { +} + + +char GBFRTF::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) +{ + unsigned char *to, *from; + char token[2048]; + int tokpos = 0; + bool intoken = false; + int len; + const char *tok; + + len = strlen(text) + 1; // shift string to right of buffer + if (len < maxlen) { + memmove(&text[maxlen - len], text, len); + from = (unsigned char *)&text[maxlen - len]; + } + else from = (unsigned char *)text; // ------------------------------- + for (to = (unsigned char *)text; *from; from++) { + if (*from == '<') { + intoken = true; + tokpos = 0; + token[0] = 0; + token[1] = 0; + token[2] = 0; + continue; + } + if (*from == '>') { + intoken = false; + // process desired tokens + switch (*token) { + case 'W': // Strongs + switch(token[1]) { + case 'G': // Greek + case 'H': // Hebrew + *to++ = '{'; + *to++ = '\\'; + *to++ = 'f'; + *to++ = 's'; + *to++ = '1'; + *to++ = '7'; + *to++ = ' '; + *to++ = '<'; + for (tok = token + 2; *tok; tok++) + *to++ = *tok; + *to++ = '>'; + *to++ = '}'; + continue; + + case 'T': // Tense + *to++ = '{'; + *to++ = '\\'; + *to++ = 'f'; + *to++ = 's'; + *to++ = '1'; + *to++ = '7'; + *to++ = ' '; + *to++ = '('; + bool separate = false; + for (tok = token + 2; *tok; tok++) { + if (separate) { + *to++ = ';'; + *to++ = ' '; + separate = false; + } + switch (*tok) { + case 'G': + case 'H': + for (tok++; *tok; tok++) { + if (isdigit(*tok)) { + *to++ = *tok; + separate = true; + } + else { + tok--; + break; + } + } + break; + default: + for (; *tok; tok++) { + *to++ = *tok; + } + } + } + *to++ = ')'; + *to++ = '}'; + continue; + } + break; + case 'R': + switch(token[1]) { + case 'X': + *to++ = '#'; + continue; + case 'x': + *to++ = '|'; + continue; + case 'F': // footnote begin + *to++ = '{'; + *to++ = '\\'; + *to++ = 'i'; + *to++ = '1'; + *to++ = ' '; + *to++ = '\\'; + *to++ = 'f'; + *to++ = 's'; + *to++ = '1'; + *to++ = '7'; + *to++ = ' '; + *to++ = '('; + continue; + case 'f': // footnote end + *to++ = ')'; + *to++ = ' '; + *to++ = '}'; + continue; + } + break; + case 'F': // font tags + switch(token[1]) { + case 'I': // italic start + *to++ = '\\'; + *to++ = 'i'; + *to++ = '1'; + *to++ = ' '; + continue; + case 'i': // italic end + *to++ = '\\'; + *to++ = 'i'; + *to++ = '0'; + *to++ = ' '; + continue; + case 'B': // bold start + *to++ = '\\'; + *to++ = 'b'; + *to++ = '1'; + *to++ = ' '; + continue; + case 'b': // bold end + *to++ = '\\'; + *to++ = 'b'; + *to++ = '0'; + *to++ = ' '; + continue; + case 'N': + *to++ = '{'; + if (!strnicmp(token+2, "Symbol", 6)) { + *to++ = '\\'; + *to++ = 'f'; + *to++ = '7'; + *to++ = ' '; + } + continue; + case 'n': + *to++ = '}'; + continue; + case 'S': + *to++ = '{'; + *to++ = '\\'; + *to++ = 's'; + *to++ = 'u'; + *to++ = 'p'; + *to++ = 'e'; + *to++ = 'r'; + *to++ = ' '; + continue; + case 's': + *to++ = '}'; + continue; + case 'R': + *to++ = '{'; + *to++ = '\\'; + *to++ = 'c'; + *to++ = 'f'; + *to++ = '6'; + *to++ = ' '; + continue; + case 'r': + *to++ = '}'; + continue; + } + break; + case 'C': // special character tags + switch(token[1]) { + case 'A': // ASCII value + *to++ = (char)atoi(&token[2]); + continue; + case 'G': + *to++ = '>'; + continue; + case 'L': // line break + *to++ = '\\'; + *to++ = 'l'; + *to++ = 'i'; + *to++ = 'n'; + *to++ = 'e'; + *to++ = ' '; + continue; + case 'M': // new paragraph + *to++ = '\\'; + *to++ = 'p'; + *to++ = 'a'; + *to++ = 'r'; + *to++ = ' '; + continue; + case 'T': + *to++ = '<'; + } + break; + case 'T': // title formatting + switch(token[1]) + { + case 'T': // Book title begin + *to++ = '{'; + *to++ = '\\'; + *to++ = 'f'; + *to++ = 's'; + *to++ = '2'; + *to++ = '2'; + *to++ = ' '; + continue; + case 't': + *to++ = '}'; + continue; + case 'S': + *to++ = '\\'; + *to++ = 'p'; + *to++ = 'a'; + *to++ = 'r'; + *to++ = ' '; + *to++ = '{'; + *to++ = '\\'; + *to++ = 'i'; + *to++ = '1'; + *to++ = '\\'; + *to++ = 'b'; + *to++ = '1'; + *to++ = ' '; + continue; + case 's': + *to++ = '}'; + *to++ = '\\'; + *to++ = 'p'; + *to++ = 'a'; + *to++ = 'r'; + *to++ = ' '; + continue; + } + break; + + } + continue; + } + if (intoken) { + if (tokpos < 2045) + token[tokpos++] = *from; + token[tokpos+2] = 0; + } + else *to++ = *from; + } + *to++ = 0; + *to = 0; + return 0; +} diff --git a/src/modules/filters/gbfstrongs.cpp b/src/modules/filters/gbfstrongs.cpp new file mode 100644 index 0000000..40fc958 --- /dev/null +++ b/src/modules/filters/gbfstrongs.cpp @@ -0,0 +1,98 @@ +/****************************************************************************** + * + * gbfstrongs - SWFilter decendant to hide or show strongs number + * in a GBF module. + */ + + +#include <stdlib.h> +#include <string.h> +#include <gbfstrongs.h> +#ifndef __GNUC__ +#else +#include <unixstr.h> +#endif + + +const char GBFStrongs::on[] = "On"; +const char GBFStrongs::off[] = "Off"; +const char GBFStrongs::optName[] = "Strong's Numbers"; +const char GBFStrongs::optTip[] = "Toggles Strong's Numbers On and Off if they exist"; + + +GBFStrongs::GBFStrongs() { + option = false; + options.push_back(on); + options.push_back(off); +} + + +GBFStrongs::~GBFStrongs() { +} + +void GBFStrongs::setOptionValue(const char *ival) +{ + option = (!stricmp(ival, on)); +} + +const char *GBFStrongs::getOptionValue() +{ + return (option) ? on:off; +} + +char GBFStrongs::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) +{ + if (!option) { // if we don't want strongs + char *to, *from, token[2048]; // cheese. Fix. + int tokpos = 0; + bool intoken = false; + int len; + bool lastspace = false; + + len = strlen(text) + 1; // shift string to right of buffer + if (len < maxlen) { + memmove(&text[maxlen - len], text, len); + from = &text[maxlen - len]; + } + else from = text; // ------------------------------- + + for (to = text; *from; from++) { + if (*from == '<') { + intoken = true; + tokpos = 0; + token[0] = 0; + token[1] = 0; + token[2] = 0; + continue; + } + if (*from == '>') { // process tokens + intoken = false; + if (*token == 'W' && (token[1] == 'G' || token[1] == 'H')) { // Strongs + if ((from[1] == ' ') || (from[1] == ',') || (from[1] == ';') || (from[1] == '.') || (from[1] == '?') || (from[1] == '!') || (from[1] == ')') || (from[1] == '\'') || (from[1] == '\"')) { + if (lastspace) + to--; + } + continue; + } + // if not a strongs token, keep token in text + *to++ = '<'; + for (char *tok = token; *tok; tok++) + *to++ = *tok; + *to++ = '>'; + continue; + } + if (intoken) { + if (tokpos < 2045) + token[tokpos++] = *from; + token[tokpos+2] = 0; + } + else { + *to++ = *from; + lastspace = (*from == ' '); + } + } + *to++ = 0; + *to = 0; + } + return 0; +} diff --git a/src/modules/filters/gbfthml.cpp b/src/modules/filters/gbfthml.cpp new file mode 100644 index 0000000..ca03e71 --- /dev/null +++ b/src/modules/filters/gbfthml.cpp @@ -0,0 +1,463 @@ +/*************************************************************************** + gbfthml.cpp - GBF to ThML filter + ------------------- + begin : 1999-10-27 + copyright : 2001 by CrossWire Bible Society + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + ***************************************************************************/ + +#include <stdlib.h> +#include <string.h> +#include <gbfthml.h> + + +GBFThML::GBFThML() +{ +} + + +char GBFThML::ProcessText(char *text, int maxlen) +{ + char *to, *from, token[2048]; + int tokpos = 0; + bool intoken = false; + int len; + const char *tok; + + len = strlen(text) + 1; // shift string to right of buffer + if (len < maxlen) { + memmove(&text[maxlen - len], text, len); + from = &text[maxlen - len]; + } + else from = text; // ------------------------------- + + for (to = text; *from; from++) + { + if (*from == '<') { + intoken = true; + tokpos = 0; + token[0] = 0; + token[1] = 0; + token[2] = 0; + continue; + } + if (*from == '>') + { + intoken = false; + // process desired tokens + switch (*token) { + case 'W': // Strongs + switch(token[1]) { + case 'G': + case 'H': + *to++ = '<'; + *to++ = 's'; + *to++ = 'y'; + *to++ = 'n'; + *to++ = 'c'; + *to++ = ' '; + *to++ = 't'; + *to++ = 'y'; + *to++ = 'p'; + *to++ = 'e'; + *to++ = '='; + *to++ = '"'; + *to++ = 'S'; + *to++ = 't'; + *to++ = 'r'; + *to++ = 'o'; + *to++ = 'n'; + *to++ = 'g'; + *to++ = 's'; + *to++ = '"'; + *to++ = ' '; + *to++ = 'v'; + *to++ = 'a'; + *to++ = 'l'; + *to++ = 'u'; + *to++ = 'e'; + *to++ = '='; + *to++ = '"'; + for (tok = token + 1; *tok; tok++) + *to++ = *tok; + *to++ = '"'; + *to++ = ' '; + *to++ = '/'; + *to++ = '>'; + continue; + + case 'T': // Tense + *to++ = '<'; + *to++ = 's'; + *to++ = 'y'; + *to++ = 'n'; + *to++ = 'c'; + *to++ = ' '; + *to++ = 't'; + *to++ = 'y'; + *to++ = 'p'; + *to++ = 'e'; + *to++ = '='; + *to++ = '"'; + *to++ = 'M'; + *to++ = 'o'; + *to++ = 'r'; + *to++ = 'p'; + *to++ = 'h'; + *to++ = '"'; + *to++ = ' '; + *to++ = 'v'; + *to++ = 'a'; + *to++ = 'l'; + *to++ = 'u'; + *to++ = 'e'; + *to++ = '='; + *to++ = '"'; + for (tok = token + 2; *tok; tok++) + *to++ = *tok; + *to++ = '"'; + *to++ = ' '; + *to++ = '/'; + *to++ = '>'; + continue; + } + break; + case 'R': + switch(token[1]) + { + case 'X': + *to++ = '<'; + *to++ = 'a'; + *to++ = ' '; + *to++ = 'h'; + *to++ = 'r'; + *to++ = 'e'; + *to++ = 'f'; + *to++ = '='; + *to++ = '\"'; + for (tok = token + 3; *tok; tok++) { + if(*tok != '<' && *tok+1 != 'R' && *tok+2 != 'x') { + *to++ = *tok; + } + else { + break; + } + } + *to++ = '\"'; + *to++ = '>'; + continue; + case 'x': + *to++ = '<'; + *to++ = '/'; + *to++ = 'a'; + *to++ = '>'; + continue; + case 'F': // footnote begin + *to++ = '<'; + *to++ = 'n'; + *to++ = 'o'; + *to++ = 't'; + *to++ = 'e'; + *to++ = ' '; + *to++ = 'p'; + *to++ = 'l'; + *to++ = 'a'; + *to++ = 'c'; + *to++ = 'e'; + *to++ = '='; + *to++ = '"'; + *to++ = 'f'; + *to++ = 'o'; + *to++ = 'o'; + *to++ = 't'; + *to++ = '"'; + *to++ = '>'; + continue; + case 'f': // footnote end + *to++ = '<'; + *to++ = '/'; + *to++ = 'n'; + *to++ = 'o'; + *to++ = 't'; + *to++ = 'e'; + *to++ = '>'; + continue; + } + break; + case 'F': // font tags + switch(token[1]) + { + case 'N': + *to++ = '<'; + *to++ = 'f'; + *to++ = 'o'; + *to++ = 'n'; + *to++ = 't'; + *to++ = ' '; + *to++ = 'f'; + *to++ = 'a'; + *to++ = 'c'; + *to++ = 'e'; + *to++ = '='; + *to++ = '"'; + for (tok = token + 2; *tok; tok++) + *to++ = *tok; + *to++ = '"'; + *to++ = '>'; + continue; + case 'n': + *to++ = '<'; + *to++ = '/'; + *to++ = 'f'; + *to++ = 'o'; + *to++ = 'n'; + *to++ = 't'; + *to++ = '>'; + continue; + case 'I': // italic start + *to++ = '<'; + *to++ = 'i'; + *to++ = '>'; + continue; + case 'i': // italic end + *to++ = '<'; + *to++ = '/'; + *to++ = 'i'; + *to++ = '>'; + continue; + case 'B': // bold start + *to++ = '<'; + *to++ = 'b'; + *to++ = '>'; + continue; + case 'b': // bold end + *to++ = '<'; + *to++ = '/'; + *to++ = 'b'; + *to++ = '>'; + continue; + + case 'R': // words of Jesus begin + *to++ = '<'; + *to++ = 'f'; + *to++ = 'o'; + *to++ = 'n'; + *to++ = 't'; + *to++ = ' '; + *to++ = 'c'; + *to++ = 'o'; + *to++ = 'l'; + *to++ = 'o'; + *to++ = 'r'; + *to++ = '='; + *to++ = '\"'; + *to++ = '#'; + *to++ = 'f'; + *to++ = 'f'; + *to++ = '0'; + *to++ = '0'; + *to++ = '0'; + *to++ = '0'; + *to++ = '\"'; + *to++ = '>'; + continue; + case 'r': // words of Jesus end + *to++ = '<'; + *to++ = '/'; + *to++ = 'f'; + *to++ = 'o'; + *to++ = 'n'; + *to++ = 't'; + *to++ = '>'; + continue; + case 'U': // Underline start + *to++ = '<'; + *to++ = 'u'; + *to++ = '>'; + continue; + case 'u': // Underline end + *to++ = '<'; + *to++ = '/'; + *to++ = 'u'; + *to++ = '>'; + continue; + case 'O': // Old Testament quote begin + *to++ = '<'; + *to++ = 'c'; + *to++ = 'i'; + *to++ = 't'; + *to++ = 'e'; + *to++ = '>'; + continue; + case 'o': // Old Testament quote end + *to++ = '<'; + *to++ = '/'; + *to++ = 'c'; + *to++ = 'i'; + *to++ = 't'; + *to++ = 'e'; + *to++ = '>'; + continue; + case 'S': // Superscript begin + *to++ = '<'; + *to++ = 's'; + *to++ = 'u'; + *to++ = 'p'; + *to++ = '>'; + continue; + case 's': // Superscript end + *to++ = '<'; + *to++ = '/'; + *to++ = 's'; + *to++ = 'u'; + *to++ = 'p'; + *to++ = '>'; + continue; + case 'V': // Subscript begin + *to++ = '<'; + *to++ = 's'; + *to++ = 'u'; + *to++ = 'b'; + *to++ = '>'; + continue; + case 'v': // Subscript end + *to++ = '<'; + *to++ = '/'; + *to++ = 's'; + *to++ = 'u'; + *to++ = 'b'; + *to++ = '>'; + continue; + } + break; + case 'C': // special character tags + switch(token[1]) + { + case 'A': // ASCII value + *to++ = (char)atoi(&token[2]); + continue; + case 'G': + //*to++ = ' '; + continue; + case 'L': // line break + *to++ = '<'; + *to++ = 'b'; + *to++ = 'r'; + *to++ = ' '; + *to++ = '/'; + *to++ = '>'; + *to++ = ' '; + continue; + case 'M': // new paragraph + *to++ = '<'; + *to++ = 'p'; + *to++ = ' '; + *to++ = '/'; + *to++ = '>'; + continue; + case 'T': + //*to++ = ' '; + continue; + } + break; + case 'T': // title formatting + switch(token[1]) + { + case 'T': // Book title begin + *to++ = '<'; + *to++ = 'b'; + *to++ = 'i'; + *to++ = 'g'; + *to++ = '>'; + continue; + case 't': + *to++ = '<'; + *to++ = '/'; + *to++ = 'b'; + *to++ = 'i'; + *to++ = 'g'; + *to++ = '>'; + continue; + case 'S': + *to++ = '<'; + *to++ = 'd'; + *to++ = 'i'; + *to++ = 'v'; + *to++ = ' '; + *to++ = 'c'; + *to++ = 'l'; + *to++ = 'a'; + *to++ = 's'; + *to++ = 's'; + *to++ = '='; + *to++ = '\"'; + *to++ = 's'; + *to++ = 'e'; + *to++ = 'c'; + *to++ = 'h'; + *to++ = 'e'; + *to++ = 'a'; + *to++ = 'd'; + *to++ = '\"'; + *to++ = '>'; + continue; + case 's': + *to++ = '<'; + *to++ = '/'; + *to++ = 'd'; + *to++ = 'i'; + *to++ = 'v'; + *to++ = '>'; + continue; + } + break; + + case 'P': // special formatting + switch(token[1]) + { + case 'P': // Poetry begin + *to++ = '<'; + *to++ = 'v'; + *to++ = 'e'; + *to++ = 'r'; + *to++ = 's'; + *to++ = 'e'; + *to++ = '>'; + continue; + case 'p': + *to++ = '<'; + *to++ = '/'; + *to++ = 'v'; + *to++ = 'e'; + *to++ = 'r'; + *to++ = 's'; + *to++ = 'e'; + *to++ = '>'; + continue; + } + break; + } + continue; + } + if (intoken) { + if (tokpos < 2045) + token[tokpos++] = *from; + token[tokpos+2] = 0; + } + else *to++ = *from; + } + *to++ = 0; + *to = 0; + return 0; +} + + + diff --git a/src/modules/filters/greeklexattribs.cpp b/src/modules/filters/greeklexattribs.cpp new file mode 100644 index 0000000..fb166df --- /dev/null +++ b/src/modules/filters/greeklexattribs.cpp @@ -0,0 +1,58 @@ +/****************************************************************************** + * + * greeklexattribs - SWFilter decendant to set entry attributes for greek + * lexicons + */ + + +#include <stdlib.h> +#include <string.h> +#include <greeklexattribs.h> +#include <swmodule.h> + + +GreekLexAttribs::GreekLexAttribs() { +} + + +char GreekLexAttribs::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) { + + if (module->isProcessEntryAttributes()) { + char *from; + bool inAV = false; + string phrase; + string freq; + char *currentPhrase = 0; + + + for (from = text; *from; from++) { + if (inAV) { + if (currentPhrase == 0) { + if (isalpha(*from)) + currentPhrase = from; + } + else { + if ((!isalpha(*from)) && (*from != ' ')) { + phrase = ""; + phrase.append(currentPhrase, (int)(from - currentPhrase)-1); + currentPhrase = from; + while (*from && isdigit(*from)) from++; + freq = ""; + freq.append(currentPhrase, (int)(from - currentPhrase)); + module->getEntryAttributes()["AVPhrase"][phrase]["Frequency"] = freq; + currentPhrase = 0; + } + } + if (*from == ';') inAV = false; + + } + else if (!strncmp(from, "AV-", 3)) { + inAV = true; + from+=2; + } + } + } + return 0; +} + + diff --git a/src/modules/filters/latin1utf16.cpp b/src/modules/filters/latin1utf16.cpp new file mode 100644 index 0000000..75ee998 --- /dev/null +++ b/src/modules/filters/latin1utf16.cpp @@ -0,0 +1,120 @@ +/****************************************************************************** + * + * Latin1UTF16 - SWFilter decendant to convert a Latin-1 character to UTF-16 + * + */ + + +#include <stdlib.h> +#include <stdio.h> +#include <latin1utf16.h> + +Latin1UTF16::Latin1UTF16() { +} + + +char Latin1UTF16::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) +{ + unsigned char *from; + unsigned short *to; + int len; + + len = strlen(text) + 1; // shift string to right of buffer + if (len < maxlen) { + memmove(&text[maxlen - len], text, len); + from = (unsigned char*)&text[maxlen - len]; + } + else + from = (unsigned char*)text; + // ------------------------------- + + for (to = (unsigned short*)text; *from; from++) { + switch (*from) { + case 0x80: // '€' + *to++ = 0x20AC; + break; + case 0x82: // '‚' + *to++ = 0x201A; + break; + case 0x83: // 'ƒ' + *to++ = 0x0192; + break; + case 0x84: // '„' + *to++ = 0x201E; + break; + case 0x85: // '…' + *to++ = 0x2026; + break; + case 0x86: // '†' + *to++ = 0x2020; + break; + case 0x87: // '‡' + *to++ = 0x2021; + break; + case 0x88: // 'ˆ' + *to++ = 0x02C6; + break; + case 0x89: // '‰' + *to++ = 0x2030; + break; + case 0x8A: // 'Š' + *to++ = 0x0160; + break; + case 0x8B: // '‹' + *to++ = 0x2039; + break; + case 0x8C: // 'Œ' + *to++ = 0x0152; + break; + case 0x8E: // 'Ž' + *to++ = 0x017D; + break; + case 0x91: // '‘' + *to++ = 0x2018; + break; + case 0x92: // '’' + *to++ = 0x2019; + break; + case 0x93: // '“' + *to++ = 0x201C; + break; + case 0x94: // '”' + *to++ = 0x201D; + break; + case 0x95: // '•' + *to++ = 0x2022; + break; + case 0x96: // '–' + *to++ = 0x2013; + break; + case 0x97: // '—' + *to++ = 0x2014; + break; + case 0x98: // '˜' + *to++ = 0x02DC; + break; + case 0x99: // '™' + *to++ = 0x2122; + break; + case 0x9A: // 'š' + *to++ = 0x0161; + break; + case 0x9B: // '›' + *to++ = 0x203A; + break; + case 0x9C: // 'œ' + *to++ = 0x0153; + break; + case 0x9E: // 'ž' + *to++ = 0x017E; + break; + case 0x9F: // 'Ÿ' + *to++ = 0x0178; + break; + default: + *to++ = (unsigned short)*from; + } + } + *to = 0; + return 0; +} diff --git a/src/modules/filters/latin1utf8.cpp b/src/modules/filters/latin1utf8.cpp new file mode 100644 index 0000000..91af8dc --- /dev/null +++ b/src/modules/filters/latin1utf8.cpp @@ -0,0 +1,179 @@ +/****************************************************************************** + * + * Latin1UTF8 - SWFilter decendant to convert a Latin-1 character to UTF-8 + * + */ + + +#include <stdlib.h> +#include <stdio.h> +#include <latin1utf8.h> +#include <swmodule.h> + +Latin1UTF8::Latin1UTF8() { +} + + +char Latin1UTF8::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) +{ + unsigned char *to, *from; + int len; + + len = strlen(text) + 1; + if (len == maxlen + 1) + maxlen = (maxlen + 1) * FILTERPAD; + // shift string to right of buffer + if (len < maxlen) { + memmove(&text[maxlen - len], text, len); + from = (unsigned char*)&text[maxlen - len]; + } + else + from = (unsigned char*)text; // ------------------------------- + + + + for (to = (unsigned char*)text; *from; from++) { + if (*from < 0x80) { + *to++ = *from; + } + else if (*from < 0xc0) { + switch(*from) { + case 0x80: // '€' + *to++ = 0xe2; // 'â' + *to++ = 0x82; // '‚' + *to++ = 0xac; // '¬' + break; + case 0x82: // '‚' + *to++ = 0xe2; // 'â' + *to++ = 0x80; // '€' + *to++ = 0x9a; // 'š' + break; + case 0x83: // 'ƒ' + *to++ = 0xc6; // 'Æ' + *to++ = 0x92; // '’' + break; + case 0x84: // '„' + *to++ = 0xe2; // 'â' + *to++ = 0x80; // '€' + *to++ = 0x9e; // 'ž' + break; + case 0x85: // '…' + *to++ = 0xe2; // 'â' + *to++ = 0x80; // '€' + *to++ = 0xa6; // '¦' + break; + case 0x86: // '†' + *to++ = 0xe2; // 'â' + *to++ = 0x80; // '€' + *to++ = 0xa0; // ' ' + break; + case 0x87: // '‡' + *to++ = 0xe2; // 'â' + *to++ = 0x80; // '€' + *to++ = 0xa1; // '¡' + break; + case 0x88: // 'ˆ' + *to++ = 0xcb; // 'Ë' + *to++ = 0x86; // '†' + break; + case 0x89: // '‰' + *to++ = 0xe2; // 'â' + *to++ = 0x80; // '€' + *to++ = 0xb0; // '°' + break; + case 0x8A: // 'Š' + *to++ = 0xc5; // 'Å' + *to++ = 0xa0; // ' ' + break; + case 0x8B: // '‹' + *to++ = 0xe2; // 'â' + *to++ = 0x80; // '€' + *to++ = 0xb9; // '¹' + break; + case 0x8C: // 'Œ' + *to++ = 0xc5; // 'Å' + *to++ = 0x92; // '’' + break; + case 0x8E: // 'Ž' + *to++ = 0xc5; // 'Å' + *to++ = 0xbd; // '½' + break; + case 0x91: // '‘' + *to++ = 0xe2; // 'â' + *to++ = 0x80; // '€' + *to++ = 0x98; // '˜' + break; + case 0x92: // '’' + *to++ = 0xe2; // 'â' + *to++ = 0x80; // '€' + *to++ = 0x99; // '™' + break; + case 0x93: // '“' + *to++ = 0xe2; // 'â' + *to++ = 0x80; // '€' + *to++ = 0x9c; // 'œ' + break; + case 0x94: // '”' + *to++ = 0xe2; // 'â' + *to++ = 0x80; // '€' + *to++ = 0x9d; // '' + break; + case 0x95: // '•' + *to++ = 0xe2; // 'â' + *to++ = 0x80; // '€' + *to++ = 0xa2; // '¢' + break; + case 0x96: // '–' + *to++ = 0xe2; // 'â' + *to++ = 0x80; // '€' + *to++ = 0x93; // '“' + break; + case 0x97: // '—' + *to++ = 0xe2; // 'â' + *to++ = 0x80; // '€' + *to++ = 0x94; // '”' + break; + case 0x98: // '˜' + *to++ = 0xcb; // 'Ë' + *to++ = 0x9c; // 'œ' + break; + case 0x99: // '™' + *to++ = 0xe2; // 'â' + *to++ = 0x84; // '„' + *to++ = 0xa2; // '¢' + break; + case 0x9A: // 'š' + *to++ = 0xc5; // 'Å' + *to++ = 0xa1; // '¡' + break; + case 0x9B: // '›' + *to++ = 0xe2; // 'â' + *to++ = 0x80; // '€' + *to++ = 0xba; // 'º' + break; + case 0x9C: // 'œ' + *to++ = 0xc5; // 'Å' + *to++ = 0x93; // '“' + break; + case 0x9E: // 'ž' + *to++ = 0xc5; // 'Å' + *to++ = 0xbe; // '¾' + break; + case 0x9F: // 'Ÿ' + *to++ = 0xc5; // 'Å' + *to++ = 0xb8; // '¸' + break; + default: + *to++ = 0xC2; + *to++ = *from; + } + } + else { + *to++ = 0xC3; + *to++ = (*from - 0x40); + } + } + *to++ = 0; + *to = 0; + return 0; +} diff --git a/src/modules/filters/plainfootnotes.cpp b/src/modules/filters/plainfootnotes.cpp new file mode 100644 index 0000000..96fc4d8 --- /dev/null +++ b/src/modules/filters/plainfootnotes.cpp @@ -0,0 +1,102 @@ +/*************************************************************************** + plainfootnotes.cpp - description + ------------------- + begin : Wed Oct 13 1999 + copyright : (C) 1999 by The team of BibleTime + email : info@bibletime.de + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + ***************************************************************************/ + +#include <plainfootnotes.h> +#include <swkey.h> + +#include <stdlib.h> +#include <string.h> +#ifndef __GNUC__ +#else +#include <unixstr.h> +#endif + +const char PLAINFootnotes::on[] = "On"; +const char PLAINFootnotes::off[] = "Off"; +const char PLAINFootnotes::optName[] = "Footnotes"; +const char PLAINFootnotes::optTip[] = "Toggles Footnotes On and Off In Bible Texts If They Exist"; + +PLAINFootnotes::PLAINFootnotes(){ + option = false; + options.push_back(on); + options.push_back(off); +} + +PLAINFootnotes::~PLAINFootnotes(){ +} + + +void PLAINFootnotes::setOptionValue(const char *ival) +{ + option = (!stricmp(ival, on)); +} + +const char *PLAINFootnotes::getOptionValue() +{ + return (option) ? on:off; +} + + +char PLAINFootnotes::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) +{ + char token[2048]; + int tokpos = 0; + bool intoken = false; + bool lastspace = false; + + if (!option) { // if we don't want footnotes + char *to, *from; + int len; + bool hide = false; + + len = strlen(text) + 1; // shift string to right of buffer + if (len < maxlen) + { + memmove(&text[maxlen - len], text, len); + from = &text[maxlen - len]; + } + else from = text; // ------------------------------- + + for (to = text; *from; from++) { + if (*from == '{') // Footnote start + { + hide = true; + continue; + } + if (*from == '}') // Footnote end + { + hide=false; + continue; + } + if (intoken) { + if (tokpos < 2045) + token[tokpos++] = *from; + token[tokpos+2] = 0; + } + else { + if (!hide) { + *to++ = *from; + lastspace = (*from == ' '); + } + } + } + *to++ = 0; + *to = 0; + } + return 0; +} + diff --git a/src/modules/filters/plainhtml.cpp b/src/modules/filters/plainhtml.cpp new file mode 100644 index 0000000..fefb029 --- /dev/null +++ b/src/modules/filters/plainhtml.cpp @@ -0,0 +1,134 @@ +/*************************************************************************** + rwphtml.cpp - description + ------------------- + begin : Thu Jun 24 1999 + copyright : (C) 1999 by Torsten Uhlmann + email : TUhlmann@gmx.de + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + ***************************************************************************/ + +#include <stdlib.h> +#include <string.h> +#include <plainhtml.h> + + +PLAINHTML::PLAINHTML() +{ +} + + +char PLAINHTML::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) +{ + char *to, *from; + int len; + int count = 0; + + len = strlen(text) + 1; // shift string to right of buffer + if (len < maxlen) { + memmove(&text[maxlen - len], text, len); + from = &text[maxlen - len]; + } + else from = text; // ------------------------------- + for (to = text; *from; from++) + { + if ((*from == '\n') && (from[1] == '\n')) // paragraph + { + *to++ = '<'; + *to++ = 'P'; + *to++ = '>'; + from++; + continue; + } else { + if ((*from == '\n')) // && (from[1] != '\n')) // new line + { + *to++ = '<'; + *to++ = 'B'; + *to++ = 'R'; + *to++ = '>'; + continue; + } + } + + if (*from == '{') { + *to++ = '<'; + *to++ = 'F'; + *to++ = 'O'; + *to++ = 'N'; + *to++ = 'T'; + *to++ = ' '; + *to++ = 'C'; + *to++ = 'O'; + *to++ = 'L'; + *to++ = 'O'; + *to++ = 'R'; + *to++ = '='; + *to++ = '#'; + *to++ = '8'; + *to++ = '0'; + *to++ = '0'; + *to++ = '0'; + *to++ = '0'; + *to++ = '0'; + *to++ = '>'; + + *to++ = '<'; + *to++ = 'S'; + *to++ = 'M'; + *to++ = 'A'; + *to++ = 'L'; + *to++ = 'L'; + *to++ = '>'; + *to++ = ' '; + *to++ = '('; + continue; + } + + if (*from == '}') + { + *to++ = ')'; + *to++ = ' '; + *to++ = '<'; + *to++ = '/'; + *to++ = 'S'; + *to++ = 'M'; + *to++ = 'A'; + *to++ = 'L'; + *to++ = 'L'; + *to++ = '>'; + + *to++ = '<'; + *to++ = '/'; + *to++ = 'F'; + *to++ = 'O'; + *to++ = 'N'; + *to++ = 'T'; + *to++ = '>'; + continue; + } + + if ((*from == ' ') && (count > 5000)) + { + *to++ = '<'; + *to++ = 'W'; + *to++ = 'B'; + *to++ = 'R'; + *to++ = '>'; + count = 0; + continue; + } + + *to++ = *from; + count++; + } + *to++ = 0; + *to = 0; + return 0; +} diff --git a/src/modules/filters/rtfhtml.cpp b/src/modules/filters/rtfhtml.cpp new file mode 100644 index 0000000..f0b842b --- /dev/null +++ b/src/modules/filters/rtfhtml.cpp @@ -0,0 +1,99 @@ +/*************************************************************************** + rtfhtml.cpp - description + ------------------- + begin : Wed Oct 13 1999 + copyright : (C) 1999 by The team of BibleTime + email : info@bibletime.de + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + ***************************************************************************/ + +#include <stdlib.h> +#include <string.h> +#include <rtfhtml.h> + + +RTFHTML::RTFHTML() { + +} + + +char RTFHTML::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) +{ + char *to, *from; + int len; + bool center = false; + + len = strlen(text) + 1; // shift string to right of buffer + if (len < maxlen) { + memmove(&text[maxlen - len], text, len); + from = &text[maxlen - len]; + } + else from = text; // ------------------------------- + for (to = text; *from; from++) { + if (*from == '\\') // a RTF command + { + if ((from[1] == 'p') && (from[2] == 'a') && (from[3] == 'r') && (from[4] == 'd')) + { // switch all modifier off + if (center) + { + *to++ = '<'; + *to++ = '/'; + *to++ = 'C'; + *to++ = 'E'; + *to++ = 'N'; + *to++ = 'T'; + *to++ = 'E'; + *to++ = 'R'; + *to++ = '>'; + center = false; + } + from += 4; + continue; + } + if ((from[1] == 'p') && (from[2] == 'a') && (from[3] == 'r')) + { + *to++ = '<'; + *to++ = 'P'; + *to++ = '>'; + *to++ = '\n'; + from += 3; + continue; + } + if (from[1] == ' ') + { + from += 1; + continue; + } + if ((from[1] == 'q') && (from[2] == 'c')) // center on + { + if (!center) + { + *to++ = '<'; + *to++ = 'C'; + *to++ = 'E'; + *to++ = 'N'; + *to++ = 'T'; + *to++ = 'E'; + *to++ = 'R'; + *to++ = '>'; + center = true; + } + from += 2; + continue; + } + } + + *to++ = *from; + } + *to++ = 0; + *to = 0; + return 0; +} diff --git a/src/modules/filters/rwphtml.cpp b/src/modules/filters/rwphtml.cpp new file mode 100644 index 0000000..6f8ae4f --- /dev/null +++ b/src/modules/filters/rwphtml.cpp @@ -0,0 +1,187 @@ +/*************************************************************************** + rwphtml.cpp - description + ------------------- + begin : Thu Jun 24 1999 + copyright : (C) 1999 by Torsten Uhlmann + email : TUhlmann@gmx.de + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + ***************************************************************************/ + +#include <stdlib.h> +#include <string.h> +#include <ctype.h> +#include <rwphtml.h> + +RWPHTML::RWPHTML() +{ +} + + +char RWPHTML::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) +{ + char *to, *from; + signed char greek_str[500]; + bool inverse = false; + bool first_letter = false; + int len; + + len = strlen(text) + 1; // shift string to right of buffer + if (len < maxlen) { + memmove(&text[maxlen - len], text, len); + from = &text[maxlen - len]; + } else + from = text; + for (to = text; *from; from++) { + if (*from == '\\') { + ++from; + int i=0; + first_letter = true; + greek_str[0] = '\0'; + while (*from != '\\') { /* get the greek word or phrase */ + greek_str[i++] = *from; + greek_str[i + 1] = '\0'; + from++; + } /* convert to symbol font as best we can */ + strcpy(to,"<I> </I><FONT FACE=\"symbol\">"); + to += strlen(to); + for (int j = 0; j < i; j++) { + if ((first_letter) + && (greek_str[j] == 'h')) { + if (greek_str[j + 1] == 'o') { + *to++ = 'o'; + first_letter = false; + ++j; + continue; + } else if (greek_str[j + 1] == 'a') { + *to++ = 'a'; + first_letter = false; + ++j; + continue; + } else if (greek_str[j + 1] == 'w') { + *to++ = 'w'; + first_letter = false; + ++j; + continue; + } else if (greek_str[j + 1] == 'u') { + *to++ = 'u'; + first_letter = false; + ++j; + continue; + } else if (greek_str[j + 1] == + -109) { + *to++ = 'w'; + first_letter = false; + ++j; + continue; + } else if (greek_str[j + 1] == + -120) { + *to++ = 'h'; + first_letter = false; + ++j; + continue; + } else if (greek_str[j + 1] == 'i') { + *to++ = 'i'; + first_letter = false; + ++j; + continue; + }else if (greek_str[j + 1] == 'e') { + *to++ = 'e'; + first_letter = false; + ++j; + continue; + } + first_letter = false; + } + if ((greek_str[j] == 't') + && (greek_str[j + 1] == 'h')) { + *to++ = 'q'; + ++j; + continue; + } + if ((greek_str[j] == 'c') + && (greek_str[j + 1] == 'h')) { + *to++ = 'c'; + ++j; + continue; + } + if ((greek_str[j] == 'p') + && (greek_str[j + 1] == 'h')) { + ++j; + *to++ = 'f'; + continue; + } + if (greek_str[j] == -120) { + *to++ = 'h'; + continue; + } + if (greek_str[j] == -125) { + *to++ = 'a'; + continue; + } + if (greek_str[j] == -109) { + if(greek_str[j+1] == 'i') ++j; + *to++ = 'w'; + continue; + } + if (greek_str[j] == ' ') + first_letter = true; + if (greek_str[j] == 's') { + if(isalpha(greek_str[j + 1])) *to++ = 's'; + else if(!isprint(greek_str[j] )) *to++ = 's'; + else *to++ = 'V'; + continue; + } + if (greek_str[j] == '\'') { + continue; + } + *to++ = greek_str[j]; + } + strcpy(to,"</FONT><I> </I>"); + to += strlen(to); + continue; + } + if (*from == '#') { // verse markings (e.g. "#Mark 1:1|") + inverse = true; + strcpy(to,"<FONT COLOR=#0000FF>"); + to += strlen(to); + continue; + } + if ((*from == '|') && (inverse)) { + inverse = false; + strcpy(to,"</FONT>"); + to += strlen(to); + continue; + } + if (*from == '{') { + strcpy(to,"<BR><STRONG>"); + to += strlen(to); + if ((from - &text[maxlen - len]) > 10) { // not the beginning of the entry + strcpy(to,"<P>"); + to += strlen(to); + } + continue; + } + if (*from == '}') { + strcpy(to," </STRONG>"); + to += strlen(to); + continue; + } + if ((*from == '\n') && (from[1] == '\n')) { + strcpy(to,"<P>"); + to += strlen(to); + continue; + } + *to++ = *from; + } + *to++ = 0; + *to = 0; + return 0; +} diff --git a/src/modules/filters/rwprtf.cpp b/src/modules/filters/rwprtf.cpp new file mode 100644 index 0000000..8f7b074 --- /dev/null +++ b/src/modules/filters/rwprtf.cpp @@ -0,0 +1,107 @@ +/****************************************************************************** + * + * rwprtf - SWFilter decendant to convert all GBF tags to RTF tags + */ + + +#include <stdlib.h> +#include <string.h> +#include <rwprtf.h> + + +RWPRTF::RWPRTF() { + +} + + +char RWPRTF::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) +{ + char *to, *from; + bool ingreek = false; + bool inverse = false; + int len; + + len = strlen(text) + 1; // shift string to right of buffer + if (len < maxlen) { + memmove(&text[maxlen - len], text, len); + from = &text[maxlen - len]; + } + else from = text; // ------------------------------- + for (to = text; *from; from++) { + if (*from == '\\') { + if(!ingreek) { + ingreek = true; + *to++ = '['; + *to++ = '{'; + *to++ = '\\'; + *to++ = 'f'; + *to++ = '8'; + *to++ = ' '; + continue; + } + else { + ingreek = false; + *to++ = '}'; + *to++ = ']'; + continue; + } + } + + if ((ingreek) && ((*from == 'h') || (*from == 'H'))) + continue; // 'h's are mostly useless in RWP translitterations. The greek is more correct without them. + + if (*from == '#') { // verse markings (e.g. "#Mark 1:1|") + inverse = true; + *to++ = '{'; + *to++ = '\\'; + *to++ = 'c'; + *to++ = 'f'; + *to++ = '2'; + *to++ = ' '; + *to++ = '#'; + continue; + } + if ((*from == '|') && (inverse)) { + inverse = false; + *to++ = '|'; + *to++ = '}'; + continue; + } + + if (*from == '{') { + *to++ = '{'; + *to++ = '\\'; + *to++ = 'b'; + *to++ = ' '; + if ((from - &text[maxlen - len]) > 10) { // not the beginning of the entry + *to++ = '\\'; + *to++ = 'p'; + *to++ = 'a'; + *to++ = 'r'; + *to++ = ' '; + } + continue; + } + + if (*from == '}') { + // this is kinda neat... DO NOTHING + } + if ((*from == '\n') && (from[1] == '\n')) { + *to++ = '\\'; + *to++ = 'p'; + *to++ = 'a'; + *to++ = 'r'; + *to++ = '\\'; + *to++ = 'p'; + *to++ = 'a'; + *to++ = 'r'; + *to++ = ' '; + continue; + } + + *to++ = *from; + } + *to++ = 0; + *to = 0; + return 0; +} diff --git a/src/modules/filters/scsuutf8.cpp b/src/modules/filters/scsuutf8.cpp new file mode 100644 index 0000000..d0d5ceb --- /dev/null +++ b/src/modules/filters/scsuutf8.cpp @@ -0,0 +1,220 @@ +/****************************************************************************** + * + * SCSUUTF8 - SWFilter decendant to convert a SCSU character to UTF-8 + * + */ + + +/* This class is based on: + * http://czyborra.com/scsu/scsu.c written by Roman Czyborra@dds.nl + * on Andrea's balcony in North Amsterdam on 1998-08-04 + * Thanks to Richard Verhoeven <rcb5@win.tue.nl> for his suggestion + * to correct the haphazard "if" after UQU to "else if" on 1998-10-01 + * + * This is a deflator to UTF-8 output for input compressed in SCSU, + * the (Reuters) Standard Compression Scheme for Unicode as described + * in http://www.unicode.org/unicode/reports/tr6.html + */ + +#include <stdlib.h> +#include <stdio.h> +#include <swmodule.h> + +#include <scsuutf8.h> + +SCSUUTF8::SCSUUTF8() { +} + + +unsigned char* SCSUUTF8::UTF8Output(unsigned long uchar, unsigned char* text) +{ + /* join UTF-16 surrogates without any pairing sanity checks */ + + static int d; + + if (uchar >= 0xd800 && uchar <= 0xdbff) { d = uchar & 0x3f; return text; } + if (uchar >= 0xdc00 && uchar <= 0xdfff) { uchar = uchar + 0x2400 + d * 0x400; } + + /* output one character as UTF-8 multibyte sequence */ + + if (uchar < 0x80) { + *text++ = c; + } + else if (uchar < 0x800) { + *text++ = 0xc0 | uchar >> 6; + *text++ = 0x80 | uchar & 0x3f; + } + else if (uchar < 0x10000) { + *text++ = 0xe0 | uchar >> 12; + *text++ = 0x80 | uchar >> 6 & 0x3f; + *text++ = 0x80 | uchar & 0x3f; + } + else if (uchar < 0x200000) { + *text++ = 0xf0 | uchar >> 18; + *text++ = 0x80 | uchar >> 12 & 0x3f; + *text++ = 0x80 | uchar >> 6 & 0x3f; + *text++ = 0x80 | uchar & 0x3f; + } + + return text; +} + +char SCSUUTF8::ProcessText(char *text, int len, const SWKey *key, const SWModule *module) +{ + unsigned char *to, *from; + unsigned long buflen = len * FILTERPAD; + char active = 0, mode = 0; + + static unsigned short start[8] = {0x0000,0x0080,0x0100,0x0300,0x2000,0x2080,0x2100,0x3000}; + static unsigned short slide[8] = {0x0080,0x00C0,0x0400,0x0600,0x0900,0x3040,0x30A0,0xFF00}; + static unsigned short win[256] = { + 0x0000, 0x0080, 0x0100, 0x0180, 0x0200, 0x0280, 0x0300, 0x0380, + 0x0400, 0x0480, 0x0500, 0x0580, 0x0600, 0x0680, 0x0700, 0x0780, + 0x0800, 0x0880, 0x0900, 0x0980, 0x0A00, 0x0A80, 0x0B00, 0x0B80, + 0x0C00, 0x0C80, 0x0D00, 0x0D80, 0x0E00, 0x0E80, 0x0F00, 0x0F80, + 0x1000, 0x1080, 0x1100, 0x1180, 0x1200, 0x1280, 0x1300, 0x1380, + 0x1400, 0x1480, 0x1500, 0x1580, 0x1600, 0x1680, 0x1700, 0x1780, + 0x1800, 0x1880, 0x1900, 0x1980, 0x1A00, 0x1A80, 0x1B00, 0x1B80, + 0x1C00, 0x1C80, 0x1D00, 0x1D80, 0x1E00, 0x1E80, 0x1F00, 0x1F80, + 0x2000, 0x2080, 0x2100, 0x2180, 0x2200, 0x2280, 0x2300, 0x2380, + 0x2400, 0x2480, 0x2500, 0x2580, 0x2600, 0x2680, 0x2700, 0x2780, + 0x2800, 0x2880, 0x2900, 0x2980, 0x2A00, 0x2A80, 0x2B00, 0x2B80, + 0x2C00, 0x2C80, 0x2D00, 0x2D80, 0x2E00, 0x2E80, 0x2F00, 0x2F80, + 0x3000, 0x3080, 0x3100, 0x3180, 0x3200, 0x3280, 0x3300, 0x3800, + 0xE000, 0xE080, 0xE100, 0xE180, 0xE200, 0xE280, 0xE300, 0xE380, + 0xE400, 0xE480, 0xE500, 0xE580, 0xE600, 0xE680, 0xE700, 0xE780, + 0xE800, 0xE880, 0xE900, 0xE980, 0xEA00, 0xEA80, 0xEB00, 0xEB80, + 0xEC00, 0xEC80, 0xED00, 0xED80, 0xEE00, 0xEE80, 0xEF00, 0xEF80, + 0xF000, 0xF080, 0xF100, 0xF180, 0xF200, 0xF280, 0xF300, 0xF380, + 0xF400, 0xF480, 0xF500, 0xF580, 0xF600, 0xF680, 0xF700, 0xF780, + 0xF800, 0xF880, 0xF900, 0xF980, 0xFA00, 0xFA80, 0xFB00, 0xFB80, + 0xFC00, 0xFC80, 0xFD00, 0xFD80, 0xFE00, 0xFE80, 0xFF00, 0xFF80, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x00C0, 0x0250, 0x0370, 0x0530, 0x3040, 0x30A0, 0xFF60 + }; + + if (!len) + return 0; + + memmove(&text[buflen - len], text, len); + from = (unsigned char*)&text[buflen - len]; + to = (unsigned char *)text; + + // ------------------------------- + + for (int i = 0; i < len;) { + + + if (i >= len) break; + c = from[i++]; + + if (c >= 0x80) + { + to = UTF8Output (c - 0x80 + slide[active], to); + } + else if (c >= 0x20 && c <= 0x7F) + { + to = UTF8Output (c, to); + } + else if (c == 0x0 || c == 0x9 || c == 0xA || c == 0xC || c == 0xD) + { + to = UTF8Output (c, to); + } + else if (c >= 0x1 && c <= 0x8) /* SQn */ + { + if (i >= len) break; + /* single quote */ d = from[i++]; + + to = UTF8Output (d < 0x80 ? d + start [c - 0x1] : + d - 0x80 + slide [c - 0x1], to); + } + else if (c >= 0x10 && c <= 0x17) /* SCn */ + { + /* change window */ active = c - 0x10; + } + else if (c >= 0x18 && c <= 0x1F) /* SDn */ + { + /* define window */ active = c - 0x18; + if (i >= len) break; + slide [active] = win [from[i++]]; + } + else if (c == 0xB) /* SDX */ + { + if (i >= len) break; + c = from[i++]; + + if (i >= len) break; + d = from[i++]; + + slide [active = c>>5] = 0x10000 + (((c & 0x1F) << 8 | d) << 7); + } + else if (c == 0xE) /* SQU */ + { + if (i >= len) break; + /* SQU */ c = from[i++]; + + if (i >= len) break; + to = UTF8Output (c << 8 | from[i++], to); + } + else if (c == 0xF) /* SCU */ + { + /* change to Unicode mode */ mode = 1; + + while (mode) + { + if (i >= len) break; + c = from[i++]; + + if (c <= 0xDF || c >= 0xF3) + { + if (i >= len) break; + to = UTF8Output (c << 8 | from[i++], to); + } + else if (c == 0xF0) /* UQU */ + { + if (i >= len) break; + c = from[i++]; + + if (i >= len) break; + to = UTF8Output (c << 8 | from[i++], to); + } + else if (c >= 0xE0 && c <= 0xE7) /* UCn */ + { + active = c - 0xE0; mode = 0; + } + else if (c >= 0xE8 && c <= 0xEF) /* UDn */ + { + if (i >= len) break; + slide [active=c-0xE8] = win [from[i++]]; mode = 0; + } + else if (c == 0xF1) /* UDX */ + { + if (i >= len) break; + c = from[i++]; + + if (i >= len) break; + d = from[i++]; + + slide [active = c>>5] = + 0x10000 + (((c & 0x1F) << 8 | d) << 7); mode = 0; + } + } + } + + + } + + *to++ = 0; + *to = 0; + return 0; +} + diff --git a/src/modules/filters/swbasicfilter.cpp b/src/modules/filters/swbasicfilter.cpp new file mode 100644 index 0000000..2865085 --- /dev/null +++ b/src/modules/filters/swbasicfilter.cpp @@ -0,0 +1,299 @@ +/****************************************************************************** + * swbasicfilter.h - definition of class SWBasicFilter. An SWFilter + * impl that provides some basic methods that + * many filters will need and can use as a starting + * point. + * + * $Id: swbasicfilter.cpp,v 1.17 2002/03/11 19:01:28 scribe Exp $ + * + * Copyright 2001 CrossWire Bible Society (http://www.crosswire.org) + * CrossWire Bible Society + * P. O. Box 2528 + * Tempe, AZ 85280-2528 + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + */ + +#include <stdlib.h> +#include <string.h> +#include <swbasicfilter.h> +#include <stdio.h> +#include <stdarg.h> + +SWBasicFilter::SWBasicFilter() { + tokenStart = 0; + tokenEnd = 0; + escStart = 0; + escEnd = 0; + + setTokenStart("<"); + setTokenEnd(">"); + setEscapeStart("&"); + setEscapeEnd(";"); + + escStringCaseSensitive = false; + tokenCaseSensitive = false; + passThruUnknownToken = false; + passThruUnknownEsc = false; +} + + +void SWBasicFilter::setPassThruUnknownToken(bool val) { + passThruUnknownToken = val; +} + + +void SWBasicFilter::setPassThruUnknownEscapeString(bool val) { + passThruUnknownEsc = val; +} + + +void SWBasicFilter::setTokenCaseSensitive(bool val) { + tokenCaseSensitive = val; +} + + +void SWBasicFilter::setEscapeStringCaseSensitive(bool val) { + escStringCaseSensitive = val; +} + + +SWBasicFilter::~SWBasicFilter() { + if (tokenStart) + delete [] tokenStart; + + if (tokenEnd) + delete [] tokenEnd; + + if (escStart) + delete [] escStart; + + if (escEnd) + delete [] escEnd; +} + + +void SWBasicFilter::addTokenSubstitute(const char *findString, const char *replaceString) { + char *buf = 0; + + if (!tokenCaseSensitive) { + stdstr(&buf, findString); + toupperstr(buf); + tokenSubMap.insert(DualStringMap::value_type(buf, replaceString)); + delete [] buf; + } + else tokenSubMap.insert(DualStringMap::value_type(findString, replaceString)); +} + + +void SWBasicFilter::addEscapeStringSubstitute(const char *findString, const char *replaceString) { + char *buf = 0; + + if (!escStringCaseSensitive) { + stdstr(&buf, findString); + toupperstr(buf); + escSubMap.insert(DualStringMap::value_type(buf, replaceString)); + delete [] buf; + } + else escSubMap.insert(DualStringMap::value_type(findString, replaceString)); +} + + +void SWBasicFilter::pushString(char **buf, const char *format, ...) { + va_list argptr; + + va_start(argptr, format); + *buf += vsprintf(*buf, format, argptr); + va_end(argptr); + +// *buf += strlen(*buf); +} + + +bool SWBasicFilter::substituteToken(char **buf, const char *token) { + DualStringMap::iterator it; + + if (!tokenCaseSensitive) { + char *tmp = 0; + stdstr(&tmp, token); + toupperstr(tmp); + it = tokenSubMap.find(tmp); + delete [] tmp; + } else + it = tokenSubMap.find(token); + + if (it != tokenSubMap.end()) { + pushString(buf, it->second.c_str()); + return true; + } + return false; +} + + +bool SWBasicFilter::substituteEscapeString(char **buf, const char *escString) { + DualStringMap::iterator it; + + if (!escStringCaseSensitive) { + char *tmp = 0; + stdstr(&tmp, escString); + toupperstr(tmp); + it = escSubMap.find(tmp); + delete [] tmp; + } else + it = escSubMap.find(escString); + + if (it != escSubMap.end()) { + pushString(buf, it->second.c_str()); + return true; + } + return false; +} + + +bool SWBasicFilter::handleToken(char **buf, const char *token, DualStringMap &userData) { + return substituteToken(buf, token); +} + + +bool SWBasicFilter::handleEscapeString(char **buf, const char *escString, DualStringMap &userData) { + return substituteEscapeString(buf, escString); +} + + +void SWBasicFilter::setEscapeStart(const char *escStart) { + stdstr(&(this->escStart), escStart); +} + + +void SWBasicFilter::setEscapeEnd(const char *escEnd) { + stdstr(&(this->escEnd), escEnd); +} + + +void SWBasicFilter::setTokenStart(const char *tokenStart) { + stdstr(&(this->tokenStart), tokenStart); +} + + +void SWBasicFilter::setTokenEnd(const char *tokenEnd) { + stdstr(&(this->tokenEnd), tokenEnd); +} + + +char SWBasicFilter::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) { + this->key = key; + this->module = module; + char *to, *from, token[4096]; + int tokpos = 0; + bool intoken = false; + int len; + bool inEsc = false; + char escStartLen = strlen(escStart); + char escEndLen = strlen(escEnd); + char escStartPos = 0, escEndPos = 0; + char tokenStartLen = strlen(tokenStart); + char tokenEndLen = strlen(tokenEnd); + char tokenStartPos = 0, tokenEndPos = 0; + DualStringMap userData; + string lastTextNode; + + bool suspendTextPassThru = false; + userData["suspendTextPassThru"] = "false"; + + len = strlen(text) + 1; // shift string to right of buffer + if (len < maxlen) { + memmove(&text[maxlen - len], text, len); + from = &text[maxlen - len]; + } + else from = text; // ------------------------------- + + + for (to = text; *from; from++) { + if (*from == tokenStart[tokenStartPos]) { + if (tokenStartPos == (tokenStartLen - 1)) { + intoken = true; + tokpos = 0; + token[0] = 0; + token[1] = 0; + token[2] = 0; + inEsc = false; + } + else tokenStartPos++; + continue; + } + + if (*from == escStart[escStartPos]) { + if (escStartPos == (escStartLen - 1)) { + intoken = true; + tokpos = 0; + token[0] = 0; + token[1] = 0; + token[2] = 0; + inEsc = true; + } + else escStartPos++; + continue; + } + + if (inEsc) { + if (*from == escEnd[escEndPos]) { + if (escEndPos == (escEndLen - 1)) { + intoken = false; + userData["lastTextNode"] = lastTextNode; + if ((!handleEscapeString(&to, token, userData)) && (passThruUnknownEsc)) { + pushString(&to, escStart); + pushString(&to, token); + pushString(&to, escEnd); + } + escEndPos = escStartPos = tokenEndPos = tokenStartPos = 0; + lastTextNode = ""; + suspendTextPassThru = (!userData["suspendTextPassThru"].compare("true")); + continue; + } + } + } + + if (!inEsc) { + if (*from == tokenEnd[tokenEndPos]) { + if (tokenEndPos == (tokenEndLen - 1)) { + intoken = false; + userData["lastTextNode"] = lastTextNode; + if ((!handleToken(&to, token, userData)) && (passThruUnknownToken)) { + pushString(&to, tokenStart); + pushString(&to, token); + pushString(&to, tokenEnd); + } + escEndPos = escStartPos = tokenEndPos = tokenStartPos = 0; + lastTextNode = ""; + suspendTextPassThru = (!userData["suspendTextPassThru"].compare("true")); + continue; + } + } + } + + if (intoken) { + if (tokpos < 4090) + token[tokpos++] = *from; + token[tokpos+2] = 0; + } + else { + if (!suspendTextPassThru) + *to++ = *from; + lastTextNode += *from; + } + } + *to++ = 0; + *to = 0; + return 0; +} + + + diff --git a/src/modules/filters/thmlfootnotes.cpp b/src/modules/filters/thmlfootnotes.cpp new file mode 100644 index 0000000..d9b1f0e --- /dev/null +++ b/src/modules/filters/thmlfootnotes.cpp @@ -0,0 +1,103 @@ +/****************************************************************************** + * + * thmlfootnotes - SWFilter decendant to hide or show footnotes + * in a ThML module. + */ + + +#include <stdlib.h> +#include <string.h> +#include <thmlfootnotes.h> +#ifndef __GNUC__ +#else +#include <unixstr.h> +#endif + + +const char ThMLFootnotes::on[] = "On"; +const char ThMLFootnotes::off[] = "Off"; +const char ThMLFootnotes::optName[] = "Footnotes"; +const char ThMLFootnotes::optTip[] = "Toggles Footnotes On and Off if they exist"; + + +ThMLFootnotes::ThMLFootnotes() { + option = false; + options.push_back(on); + options.push_back(off); +} + + +ThMLFootnotes::~ThMLFootnotes() { +} + +void ThMLFootnotes::setOptionValue(const char *ival) +{ + option = (!stricmp(ival, on)); +} + +const char *ThMLFootnotes::getOptionValue() +{ + return (option) ? on:off; +} + +char ThMLFootnotes::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) +{ + if (!option) { // if we don't want footnotes + char *to, *from, token[2048]; // cheese. Fix. + int tokpos = 0; + bool intoken = false; + int len; + bool hide = false; + + len = strlen(text) + 1; // shift string to right of buffer + if (len < maxlen) { + memmove(&text[maxlen - len], text, len); + from = &text[maxlen - len]; + } + else from = text; // ------------------------------- + + for (to = text; *from; from++) { + if (*from == '<') { + intoken = true; + tokpos = 0; + token[0] = 0; + token[1] = 0; + token[2] = 0; + continue; + } + if (*from == '>') { // process tokens + intoken = false; + if (!strncmp(token, "note", 4)) { + hide = true; + continue; + } + else if (!strncmp(token, "/note", 5)) { + hide = false; + continue; + } + + // if not a footnote token, keep token in text + if (!hide) { + *to++ = '<'; + for (char *tok = token; *tok; tok++) + *to++ = *tok; + *to++ = '>'; + } + continue; + } + if (intoken) { + if (tokpos < 2045) + token[tokpos++] = *from; + token[tokpos+2] = 0; + } + else { + if (!hide) { + *to++ = *from; + } + } + } + *to++ = 0; + *to = 0; + } + return 0; +} diff --git a/src/modules/filters/thmlgbf.cpp b/src/modules/filters/thmlgbf.cpp new file mode 100644 index 0000000..66d9a20 --- /dev/null +++ b/src/modules/filters/thmlgbf.cpp @@ -0,0 +1,330 @@ +/*************************************************************************** + thmlgbf.cpp - ThML to GBF filter + ------------------- + begin : 1999-10-28 + copyright : 2001 by CrossWire Bible Society + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + ***************************************************************************/ + +#include <stdlib.h> +#include <string.h> +#include <thmlgbf.h> + + +ThMLGBF::ThMLGBF() +{ +} + + +char ThMLGBF::ProcessText(char *text, int maxlen) +{ + char *to, *from, token[2048]; + int tokpos = 0; + bool intoken = false; + int len; + bool ampersand = false; + bool sechead = false; + bool title = false; + + len = strlen(text) + 1; // shift string to right of buffer + if (len < maxlen) { + memmove(&text[maxlen - len], text, len); + from = &text[maxlen - len]; + } + else from = text; // ------------------------------- + for (to = text; *from; from++) { + if (*from == '<') { + intoken = true; + tokpos = 0; + token[0] = 0; + token[1] = 0; + token[2] = 0; + ampersand = false; + continue; + } + else if (*from == '&') { + intoken = true; + tokpos = 0; + memset(token, 0, 2048); + ampersand = true; + continue; + } + if (*from == ';' && ampersand) { + intoken = false; + + if (!strncmp("nbsp", token, 4)) *to++ = ' '; + else if (!strncmp("quot", token, 4)) *to++ = '"'; + else if (!strncmp("amp", token, 3)) *to++ = '&'; + else if (!strncmp("lt", token, 2)) *to++ = '<'; + else if (!strncmp("gt", token, 2)) *to++ = '>'; + else if (!strncmp("brvbar", token, 6)) *to++ = '|'; + else if (!strncmp("sect", token, 4)) *to++ = '§'; + else if (!strncmp("copy", token, 4)) *to++ = '©'; + else if (!strncmp("laquo", token, 5)) *to++ = '«'; + else if (!strncmp("reg", token, 3)) *to++ = '®'; + else if (!strncmp("acute", token, 5)) *to++ = '´'; + else if (!strncmp("para", token, 4)) *to++ = '¶'; + else if (!strncmp("raquo", token, 5)) *to++ = '»'; + + else if (!strncmp("Aacute", token, 6)) *to++ = 'Á'; + else if (!strncmp("Agrave", token, 6)) *to++ = 'À'; + else if (!strncmp("Acirc", token, 5)) *to++ = 'Â'; + else if (!strncmp("Auml", token, 4)) *to++ = 'Ä'; + else if (!strncmp("Atilde", token, 6)) *to++ = 'Ã'; + else if (!strncmp("Aring", token, 5)) *to++ = 'Å'; + else if (!strncmp("aacute", token, 6)) *to++ = 'á'; + else if (!strncmp("agrave", token, 6)) *to++ = 'à'; + else if (!strncmp("acirc", token, 5)) *to++ = 'â'; + else if (!strncmp("auml", token, 4)) *to++ = 'ä'; + else if (!strncmp("atilde", token, 6)) *to++ = 'ã'; + else if (!strncmp("aring", token, 5)) *to++ = 'å'; + else if (!strncmp("Eacute", token, 6)) *to++ = 'É'; + else if (!strncmp("Egrave", token, 6)) *to++ = 'È'; + else if (!strncmp("Ecirc", token, 5)) *to++ = 'Ê'; + else if (!strncmp("Euml", token, 4)) *to++ = 'Ë'; + else if (!strncmp("eacute", token, 6)) *to++ = 'é'; + else if (!strncmp("egrave", token, 6)) *to++ = 'è'; + else if (!strncmp("ecirc", token, 5)) *to++ = 'ê'; + else if (!strncmp("euml", token, 4)) *to++ = 'ë'; + else if (!strncmp("Iacute", token, 6)) *to++ = 'Í'; + else if (!strncmp("Igrave", token, 6)) *to++ = 'Ì'; + else if (!strncmp("Icirc", token, 5)) *to++ = 'Î'; + else if (!strncmp("Iuml", token, 4)) *to++ = 'Ï'; + else if (!strncmp("iacute", token, 6)) *to++ = 'í'; + else if (!strncmp("igrave", token, 6)) *to++ = 'ì'; + else if (!strncmp("icirc", token, 5)) *to++ = 'î'; + else if (!strncmp("iuml", token, 4)) *to++ = 'ï'; + else if (!strncmp("Oacute", token, 6)) *to++ = 'Ó'; + else if (!strncmp("Ograve", token, 6)) *to++ = 'Ò'; + else if (!strncmp("Ocirc", token, 5)) *to++ = 'Ô'; + else if (!strncmp("Ouml", token, 4)) *to++ = 'Ö'; + else if (!strncmp("Otilde", token, 6)) *to++ = 'Õ'; + else if (!strncmp("oacute", token, 6)) *to++ = 'ó'; + else if (!strncmp("ograve", token, 6)) *to++ = 'ò'; + else if (!strncmp("ocirc", token, 5)) *to++ = 'ô'; + else if (!strncmp("ouml", token, 4)) *to++ = 'ö'; + else if (!strncmp("otilde", token, 6)) *to++ = 'õ'; + else if (!strncmp("Uacute", token, 6)) *to++ = 'Ú'; + else if (!strncmp("Ugrave", token, 6)) *to++ = 'Ù'; + else if (!strncmp("Ucirc", token, 5)) *to++ = 'Û'; + else if (!strncmp("Uuml", token, 4)) *to++ = 'Ü'; + else if (!strncmp("uacute", token, 6)) *to++ = 'ú'; + else if (!strncmp("ugrave", token, 6)) *to++ = 'ù'; + else if (!strncmp("ucirc", token, 5)) *to++ = 'û'; + else if (!strncmp("uuml", token, 4)) *to++ = 'ü'; + else if (!strncmp("Yacute", token, 6)) *to++ = 'Ý'; + else if (!strncmp("yacute", token, 6)) *to++ = 'ý'; + else if (!strncmp("yuml", token, 4)) *to++ = 'ÿ'; + + else if (!strncmp("deg", token, 3)) *to++ = '°'; + else if (!strncmp("plusmn", token, 6)) *to++ = '±'; + else if (!strncmp("sup2", token, 4)) *to++ = '²'; + else if (!strncmp("sup3", token, 4)) *to++ = '³'; + else if (!strncmp("sup1", token, 4)) *to++ = '¹'; + else if (!strncmp("nbsp", token, 4)) *to++ = 'º'; + else if (!strncmp("pound", token, 5)) *to++ = '£'; + else if (!strncmp("cent", token, 4)) *to++ = '¢'; + else if (!strncmp("frac14", token, 6)) *to++ = '¼'; + else if (!strncmp("frac12", token, 6)) *to++ = '½'; + else if (!strncmp("frac34", token, 6)) *to++ = '¾'; + else if (!strncmp("iquest", token, 6)) *to++ = '¿'; + else if (!strncmp("iexcl", token, 5)) *to++ = '¡'; + else if (!strncmp("ETH", token, 3)) *to++ = 'Ð'; + else if (!strncmp("eth", token, 3)) *to++ = 'ð'; + else if (!strncmp("THORN", token, 5)) *to++ = 'Þ'; + else if (!strncmp("thorn", token, 5)) *to++ = 'þ'; + else if (!strncmp("AElig", token, 5)) *to++ = 'Æ'; + else if (!strncmp("aelig", token, 5)) *to++ = 'æ'; + else if (!strncmp("Oslash", token, 6)) *to++ = 'Ø'; + else if (!strncmp("curren", token, 6)) *to++ = '¤'; + else if (!strncmp("Ccedil", token, 6)) *to++ = 'Ç'; + else if (!strncmp("ccedil", token, 6)) *to++ = 'ç'; + else if (!strncmp("szlig", token, 5)) *to++ = 'ß'; + else if (!strncmp("Ntilde", token, 6)) *to++ = 'Ñ'; + else if (!strncmp("ntilde", token, 6)) *to++ = 'ñ'; + else if (!strncmp("yen", token, 3)) *to++ = '¥'; + else if (!strncmp("not", token, 3)) *to++ = '¬'; + else if (!strncmp("ordf", token, 4)) *to++ = 'ª'; + else if (!strncmp("uml", token, 3)) *to++ = '¨'; + else if (!strncmp("shy", token, 3)) *to++ = ''; + else if (!strncmp("macr", token, 4)) *to++ = '¯'; + continue; + + } + else if (*from == '>' && !ampersand) { + intoken = false; + // process desired tokens + if (!strncmp(token, "sync type=\"Strongs\" value=\"", 27)) { + *to++ = '<'; + *to++ = 'W'; + for (unsigned int i = 27; token[i] != '\"'; i++) + *to++ = token[i]; + *to++ = '>'; + continue; + } + if (!strncmp(token, "sync type=\"morph\" value=\"", 25)) { + *to++ = '<'; + *to++ = 'W'; + *to++ = 'T'; + for (unsigned int i = 25; token[i] != '\"'; i++) + *to++ = token[i]; + *to++ = '>'; + continue; + } + else if (!strncmp(token, "scripRef", 8)) { + *to++ = '<'; + *to++ = 'R'; + *to++ = 'X'; + *to++ = '>'; + continue; + } + else if (!strncmp(token, "/scripRef", 9)) { + *to++ = '<'; + *to++ = 'R'; + *to++ = 'x'; + *to++ = '>'; + continue; + } + else if (!strncmp(token, "note", 4)) { + *to++ = '<'; + *to++ = 'R'; + *to++ = 'F'; + *to++ = '>'; + continue; + } + else if (!strncmp(token, "/note", 5)) { + *to++ = '<'; + *to++ = 'R'; + *to++ = 'f'; + *to++ = '>'; + continue; + } + else if (!strncmp(token, "sup", 3)) { + *to++ = '<'; + *to++ = 'F'; + *to++ = 'S'; + *to++ = '>'; + } + else if (!strncmp(token, "/sup", 4)) { + *to++ = '<'; + *to++ = 'F'; + *to++ = 's'; + *to++ = '>'; + } + else if (!strnicmp(token, "font color=#ff0000", 18)) { + *to++ = '<'; + *to++ = 'F'; + *to++ = 'R'; + *to++ = '>'; + continue; + } + else if (!strnicmp(token, "/font", 5)) { + *to++ = '<'; + *to++ = 'F'; + *to++ = 'r'; + *to++ = '>'; + continue; + } + else if (!strncmp(token, "div class=\"sechead\"", 19)) { + *to++ = '<'; + *to++ = 'T'; + *to++ = 'S'; + *to++ = '>'; + sechead = true; + continue; + } + else if (sechead && !strncmp(token, "/div", 19)) { + *to++ = '<'; + *to++ = 'T'; + *to++ = 's'; + *to++ = '>'; + sechead = false; + continue; + } + else if (!strncmp(token, "div class=\"title\"", 19)) { + *to++ = '<'; + *to++ = 'T'; + *to++ = 'T'; + *to++ = '>'; + title = true; + continue; + } + else if (title && !strncmp(token, "/div", 19)) { + *to++ = '<'; + *to++ = 'T'; + *to++ = 't'; + *to++ = '>'; + title = false; + continue; + } + else if (!strnicmp(token, "br", 2)) { + *to++ = '<'; + *to++ = 'C'; + *to++ = 'L'; + *to++ = '>'; + continue; + } + else switch(*token) { + case 'I': // font tags + case 'i': + *to++ = '<'; + *to++ = 'F'; + *to++ = 'I'; + *to++ = '>'; + continue; + case 'B': // bold start + case 'b': + *to++ = '<'; + *to++ = 'F'; + *to++ = 'B'; + *to++ = '>'; + continue; + case '/': + switch(token[1]) { + case 'P': + case 'p': + *to++ = '<'; + *to++ = 'C'; + *to++ = 'M'; + *to++ = '>'; + continue; + case 'I': + case 'i': // italic end + *to++ = '<'; + *to++ = 'F'; + *to++ = 'i'; + *to++ = '>'; + continue; + case 'B': // bold start + case 'b': + *to++ = '<'; + *to++ = 'F'; + *to++ = 'b'; + *to++ = '>'; + continue; + } + } + continue; + } + if (intoken) { + if (tokpos < 2045) + token[tokpos++] = *from; + token[tokpos+2] = 0; + } + else *to++ = *from; + } + *to++ = 0; + *to = 0; + return 0; +} + + + diff --git a/src/modules/filters/thmlheadings.cpp b/src/modules/filters/thmlheadings.cpp new file mode 100644 index 0000000..00b8a23 --- /dev/null +++ b/src/modules/filters/thmlheadings.cpp @@ -0,0 +1,107 @@ +/****************************************************************************** + * + * thmlheadings - SWFilter decendant to hide or show headings + * in a ThML module. + */ + + +#include <stdlib.h> +#include <string.h> +#include <thmlheadings.h> +#ifndef __GNUC__ +#else +#include <unixstr.h> +#endif + + +const char ThMLHeadings::on[] = "On"; +const char ThMLHeadings::off[] = "Off"; +const char ThMLHeadings::optName[] = "Headings"; +const char ThMLHeadings::optTip[] = "Toggles Headings On and Off if they exist"; + + +ThMLHeadings::ThMLHeadings() { + option = false; + options.push_back(on); + options.push_back(off); +} + + +ThMLHeadings::~ThMLHeadings() { +} + +void ThMLHeadings::setOptionValue(const char *ival) +{ + option = (!stricmp(ival, on)); +} + +const char *ThMLHeadings::getOptionValue() +{ + return (option) ? on:off; +} + +char ThMLHeadings::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) +{ + if (!option) { // if we don't want headings + char *to, *from, token[2048]; // cheese. Fix. + int tokpos = 0; + bool intoken = false; + int len; + bool hide = false; + + len = strlen(text) + 1; // shift string to right of buffer + if (len < maxlen) { + memmove(&text[maxlen - len], text, len); + from = &text[maxlen - len]; + } + else from = text; // ------------------------------- + + for (to = text; *from; from++) { + if (*from == '<') { + intoken = true; + tokpos = 0; + token[0] = 0; + token[1] = 0; + token[2] = 0; + continue; + } + if (*from == '>') { // process tokens + intoken = false; + if (!strnicmp(token, "div class=\"sechead\"", 19)) { + hide = true; + continue; + } + if (!strnicmp(token, "div class=\"title\"", 17)) { + hide = true; + continue; + } + else if (hide && !strnicmp(token, "/div", 4)) { + hide = false; + continue; + } + + // if not a heading token, keep token in text + if (!hide) { + *to++ = '<'; + for (char *tok = token; *tok; tok++) + *to++ = *tok; + *to++ = '>'; + } + continue; + } + if (intoken) { + if (tokpos < 2045) + token[tokpos++] = *from; + token[tokpos+2] = 0; + } + else { + if (!hide) { + *to++ = *from; + } + } + } + *to++ = 0; + *to = 0; + } + return 0; +} diff --git a/src/modules/filters/thmlhtml.cpp b/src/modules/filters/thmlhtml.cpp new file mode 100644 index 0000000..9cb8679 --- /dev/null +++ b/src/modules/filters/thmlhtml.cpp @@ -0,0 +1,211 @@ +/*************************************************************************** + thmlhtml.cpp - ThML to HTML filter + ------------------- + begin : 1999-10-27 + copyright : 2001 by CrossWire Bible Society + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + ***************************************************************************/ + +#include <stdlib.h> +#include <string.h> +#include <thmlhtml.h> +#include <swmodule.h> + + +ThMLHTML::ThMLHTML() { + setTokenStart("<"); + setTokenEnd(">"); +/* + setEscapeStart("&"); + setEscapeEnd(";"); + + setEscapeStringCaseSensitive(true); + + addEscapeStringSubstitute("nbsp", " "); + addEscapeStringSubstitute("quot", "\""); + addEscapeStringSubstitute("amp", "&"); + addEscapeStringSubstitute("lt", "<"); + addEscapeStringSubstitute("gt", ">"); + addEscapeStringSubstitute("brvbar", "|"); + addEscapeStringSubstitute("sect", "§"); + addEscapeStringSubstitute("copy", "©"); + addEscapeStringSubstitute("laquo", "«"); + addEscapeStringSubstitute("reg", "®"); + addEscapeStringSubstitute("acute", "´"); + addEscapeStringSubstitute("para", "¶"); + addEscapeStringSubstitute("raquo", "»"); + + addEscapeStringSubstitute("Aacute", "Á"); + addEscapeStringSubstitute("Agrave", "À"); + addEscapeStringSubstitute("Acirc", "Â"); + addEscapeStringSubstitute("Auml", "Ä"); + addEscapeStringSubstitute("Atilde", "Ã"); + addEscapeStringSubstitute("Aring", "Å"); + addEscapeStringSubstitute("aacute", "á"); + addEscapeStringSubstitute("agrave", "à"); + addEscapeStringSubstitute("acirc", "â"); + addEscapeStringSubstitute("auml", "ä"); + addEscapeStringSubstitute("atilde", "ã"); + addEscapeStringSubstitute("aring", "å"); + addEscapeStringSubstitute("Eacute", "É"); + addEscapeStringSubstitute("Egrave", "È"); + addEscapeStringSubstitute("Ecirc", "Ê"); + addEscapeStringSubstitute("Euml", "Ë"); + addEscapeStringSubstitute("eacute", "é"); + addEscapeStringSubstitute("egrave", "è"); + addEscapeStringSubstitute("ecirc", "ê"); + addEscapeStringSubstitute("euml", "ë"); + addEscapeStringSubstitute("Iacute", "Í"); + addEscapeStringSubstitute("Igrave", "Ì"); + addEscapeStringSubstitute("Icirc", "Î"); + addEscapeStringSubstitute("Iuml", "Ï"); + addEscapeStringSubstitute("iacute", "í"); + addEscapeStringSubstitute("igrave", "ì"); + addEscapeStringSubstitute("icirc", "î"); + addEscapeStringSubstitute("iuml", "ï"); + addEscapeStringSubstitute("Oacute", "Ó"); + addEscapeStringSubstitute("Ograve", "Ò"); + addEscapeStringSubstitute("Ocirc", "Ô"); + addEscapeStringSubstitute("Ouml", "Ö"); + addEscapeStringSubstitute("Otilde", "Õ"); + addEscapeStringSubstitute("oacute", "ó"); + addEscapeStringSubstitute("ograve", "ò"); + addEscapeStringSubstitute("ocirc", "ô"); + addEscapeStringSubstitute("ouml", "ö"); + addEscapeStringSubstitute("otilde", "õ"); + addEscapeStringSubstitute("Uacute", "Ú"); + addEscapeStringSubstitute("Ugrave", "Ù"); + addEscapeStringSubstitute("Ucirc", "Û"); + addEscapeStringSubstitute("Uuml", "Ü"); + addEscapeStringSubstitute("uacute", "ú"); + addEscapeStringSubstitute("ugrave", "ù"); + addEscapeStringSubstitute("ucirc", "û"); + addEscapeStringSubstitute("uuml", "ü"); + addEscapeStringSubstitute("Yacute", "Ý"); + addEscapeStringSubstitute("yacute", "ý"); + addEscapeStringSubstitute("yuml", "ÿ"); + + addEscapeStringSubstitute("deg", "°"); + addEscapeStringSubstitute("plusmn", "±"); + addEscapeStringSubstitute("sup2", "²"); + addEscapeStringSubstitute("sup3", "³"); + addEscapeStringSubstitute("sup1", "¹"); + addEscapeStringSubstitute("nbsp", "º"); + addEscapeStringSubstitute("pound", "£"); + addEscapeStringSubstitute("cent", "¢"); + addEscapeStringSubstitute("frac14", "¼"); + addEscapeStringSubstitute("frac12", "½"); + addEscapeStringSubstitute("frac34", "¾"); + addEscapeStringSubstitute("iquest", "¿"); + addEscapeStringSubstitute("iexcl", "¡"); + addEscapeStringSubstitute("ETH", "Ð"); + addEscapeStringSubstitute("eth", "ð"); + addEscapeStringSubstitute("THORN", "Þ"); + addEscapeStringSubstitute("thorn", "þ"); + addEscapeStringSubstitute("AElig", "Æ"); + addEscapeStringSubstitute("aelig", "æ"); + addEscapeStringSubstitute("Oslash", "Ø"); + addEscapeStringSubstitute("curren", "¤"); + addEscapeStringSubstitute("Ccedil", "Ç"); + addEscapeStringSubstitute("ccedil", "ç"); + addEscapeStringSubstitute("szlig", "ß"); + addEscapeStringSubstitute("Ntilde", "Ñ"); + addEscapeStringSubstitute("ntilde", "ñ"); + addEscapeStringSubstitute("yen", "¥"); + addEscapeStringSubstitute("not", "¬"); + addEscapeStringSubstitute("ordf", "ª"); + addEscapeStringSubstitute("uml", "¨"); + addEscapeStringSubstitute("shy", ""); + addEscapeStringSubstitute("macr", "¯"); +*/ + setTokenCaseSensitive(true); + + addTokenSubstitute("/scripRef", " </a>"); + addTokenSubstitute("note", " <font color=\"#800000\"><small>("); + addTokenSubstitute("/note", ")</small></font> "); +} + + +bool ThMLHTML::handleToken(char **buf, const char *token, DualStringMap &userData) { + if (!substituteToken(buf, token)) { + // manually process if it wasn't a simple substitution + if (!strncmp(token, "sync type=\"Strongs\" value=\"", 27)) { + if (token[27] == 'H' || token[27] == 'G' || token[27] == 'A') { + pushString(buf, "<small><em>"); + for (const char *tok = token + 5; *tok; tok++) + if(*tok != '\"') + *(*buf)++ = *tok; + pushString(buf, "</em></small>"); + } + else if (token[27] == 'T') { + pushString(buf, "<small><i>"); + for (unsigned int i = 29; token[i] != '\"'; i++) + *(*buf)++ = token[i]; + pushString(buf, "</i></small>"); + } + } + else if (!strncmp(token, "sync type=\"morph\" value=\"", 25)) { + pushString(buf, "<small><em>"); + for (unsigned int i = 25; token[i] != '\"'; i++) + *(*buf)++ = token[i]; + pushString(buf, "</em></small>"); + } + else if (!strncmp(token, "sync type=\"lemma\" value=\"", 25)) { + pushString(buf, "<small><em>("); + for (unsigned int i = 25; token[i] != '\"'; i++) + *(*buf)++ = token[i]; + pushString(buf, ")</em></small>"); + } + else if (!strncmp(token, "scripRef", 8)) { + pushString(buf, "<a href=\""); + for (const char *tok = token + 9; *tok; tok++) + if(*tok != '\"') + *(*buf)++ = *tok; + *(*buf)++ = '\"'; + *(*buf)++ = '>'; + } + else if (!strncmp(token, "img ", 4)) { + const char *src = strstr(token, "src"); + if (!src) // assert we have a src attribute + return false; + + *(*buf)++ = '<'; + for (const char *c = token; *c; c++) { + if (c == src) { + for (;((*c) && (*c != '"')); c++) + *(*buf)++ = *c; + + if (!*c) { c--; continue; } + + *(*buf)++ = '"'; + if (*(c+1) == '/') { + pushString(buf, "file:"); + pushString(buf, module->getConfigEntry("AbsoluteDataPath")); + if (*((*buf)-1) == '/') + c++; // skip '/' + } + continue; + } + *(*buf)++ = *c; + } + *(*buf)++ = '>'; + } + else if(!strncmp(token, "note", 4)) { + pushString(buf, " <font color=\"#800000\"><small>("); + } + + else { + return false; // we still didn't handle token + } + } + return true; +} + diff --git a/src/modules/filters/thmlhtmlhref.cpp b/src/modules/filters/thmlhtmlhref.cpp new file mode 100644 index 0000000..ce7e3fd --- /dev/null +++ b/src/modules/filters/thmlhtmlhref.cpp @@ -0,0 +1,269 @@ +/*************************************************************************** + thmlhtmlhref.cpp - ThML to HTML filter with hrefs + ------------------- + begin : 2001-09-03 + copyright : 2001 by CrossWire Bible Society + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + ***************************************************************************/ + +#include <stdlib.h> +#include <string.h> +#include <thmlhtmlhref.h> +#include <swmodule.h> + + +ThMLHTMLHREF::ThMLHTMLHREF() { + setTokenStart("<"); + setTokenEnd(">"); +/* + setEscapeStart("&"); + setEscapeEnd(";"); + + setEscapeStringCaseSensitive(true); + + addEscapeStringSubstitute("nbsp", " "); + addEscapeStringSubstitute("quot", "\""); + addEscapeStringSubstitute("amp", "&"); + addEscapeStringSubstitute("lt", "<"); + addEscapeStringSubstitute("gt", ">"); + addEscapeStringSubstitute("brvbar", "|"); + addEscapeStringSubstitute("sect", "§"); + addEscapeStringSubstitute("copy", "©"); + addEscapeStringSubstitute("laquo", "«"); + addEscapeStringSubstitute("reg", "®"); + addEscapeStringSubstitute("acute", "´"); + addEscapeStringSubstitute("para", "¶"); + addEscapeStringSubstitute("raquo", "»"); + + addEscapeStringSubstitute("Aacute", "Á"); + addEscapeStringSubstitute("Agrave", "À"); + addEscapeStringSubstitute("Acirc", "Â"); + addEscapeStringSubstitute("Auml", "Ä"); + addEscapeStringSubstitute("Atilde", "Ã"); + addEscapeStringSubstitute("Aring", "Å"); + addEscapeStringSubstitute("aacute", "á"); + addEscapeStringSubstitute("agrave", "à"); + addEscapeStringSubstitute("acirc", "â"); + addEscapeStringSubstitute("auml", "ä"); + addEscapeStringSubstitute("atilde", "ã"); + addEscapeStringSubstitute("aring", "å"); + addEscapeStringSubstitute("Eacute", "É"); + addEscapeStringSubstitute("Egrave", "È"); + addEscapeStringSubstitute("Ecirc", "Ê"); + addEscapeStringSubstitute("Euml", "Ë"); + addEscapeStringSubstitute("eacute", "é"); + addEscapeStringSubstitute("egrave", "è"); + addEscapeStringSubstitute("ecirc", "ê"); + addEscapeStringSubstitute("euml", "ë"); + addEscapeStringSubstitute("Iacute", "Í"); + addEscapeStringSubstitute("Igrave", "Ì"); + addEscapeStringSubstitute("Icirc", "Î"); + addEscapeStringSubstitute("Iuml", "Ï"); + addEscapeStringSubstitute("iacute", "í"); + addEscapeStringSubstitute("igrave", "ì"); + addEscapeStringSubstitute("icirc", "î"); + addEscapeStringSubstitute("iuml", "ï"); + addEscapeStringSubstitute("Oacute", "Ó"); + addEscapeStringSubstitute("Ograve", "Ò"); + addEscapeStringSubstitute("Ocirc", "Ô"); + addEscapeStringSubstitute("Ouml", "Ö"); + addEscapeStringSubstitute("Otilde", "Õ"); + addEscapeStringSubstitute("oacute", "ó"); + addEscapeStringSubstitute("ograve", "ò"); + addEscapeStringSubstitute("ocirc", "ô"); + addEscapeStringSubstitute("ouml", "ö"); + addEscapeStringSubstitute("otilde", "õ"); + addEscapeStringSubstitute("Uacute", "Ú"); + addEscapeStringSubstitute("Ugrave", "Ù"); + addEscapeStringSubstitute("Ucirc", "Û"); + addEscapeStringSubstitute("Uuml", "Ü"); + addEscapeStringSubstitute("uacute", "ú"); + addEscapeStringSubstitute("ugrave", "ù"); + addEscapeStringSubstitute("ucirc", "û"); + addEscapeStringSubstitute("uuml", "ü"); + addEscapeStringSubstitute("Yacute", "Ý"); + addEscapeStringSubstitute("yacute", "ý"); + addEscapeStringSubstitute("yuml", "ÿ"); + + addEscapeStringSubstitute("deg", "°"); + addEscapeStringSubstitute("plusmn", "±"); + addEscapeStringSubstitute("sup2", "²"); + addEscapeStringSubstitute("sup3", "³"); + addEscapeStringSubstitute("sup1", "¹"); + addEscapeStringSubstitute("nbsp", "º"); + addEscapeStringSubstitute("pound", "£"); + addEscapeStringSubstitute("cent", "¢"); + addEscapeStringSubstitute("frac14", "¼"); + addEscapeStringSubstitute("frac12", "½"); + addEscapeStringSubstitute("frac34", "¾"); + addEscapeStringSubstitute("iquest", "¿"); + addEscapeStringSubstitute("iexcl", "¡"); + addEscapeStringSubstitute("ETH", "Ð"); + addEscapeStringSubstitute("eth", "ð"); + addEscapeStringSubstitute("THORN", "Þ"); + addEscapeStringSubstitute("thorn", "þ"); + addEscapeStringSubstitute("AElig", "Æ"); + addEscapeStringSubstitute("aelig", "æ"); + addEscapeStringSubstitute("Oslash", "Ø"); + addEscapeStringSubstitute("curren", "¤"); + addEscapeStringSubstitute("Ccedil", "Ç"); + addEscapeStringSubstitute("ccedil", "ç"); + addEscapeStringSubstitute("szlig", "ß"); + addEscapeStringSubstitute("Ntilde", "Ñ"); + addEscapeStringSubstitute("ntilde", "ñ"); + addEscapeStringSubstitute("yen", "¥"); + addEscapeStringSubstitute("not", "¬"); + addEscapeStringSubstitute("ordf", "ª"); + addEscapeStringSubstitute("uml", "¨"); + addEscapeStringSubstitute("shy", ""); + addEscapeStringSubstitute("macr", "¯"); +*/ + setTokenCaseSensitive(true); + + addTokenSubstitute("note", " <font color=\"#800000\"><small>("); + addTokenSubstitute("/note", ")</small></font> "); + addTokenSubstitute("/scripture", "</i> "); +} + + +bool ThMLHTMLHREF::handleToken(char **buf, const char *token, DualStringMap &userData) { + const char *tok; + if (!substituteToken(buf, token)) { + // manually process if it wasn't a simple substitution + if (!strncmp(token, "sync ", 5)) { + pushString(buf, "<a href=\""); + for (tok = token + 5; *(tok+1); tok++) + if(*tok != '\"') + *(*buf)++ = *tok; + *(*buf)++ = '\"'; + *(*buf)++ = '>'; + + //scan for value and add it to the buffer + for (tok = token + 5; *tok; tok++) { + if (!strncmp(tok, "value=\"", 7)) { + tok += 7; + for (;*tok != '\"'; tok++) + *(*buf)++ = *tok; + break; + } + } + pushString(buf, "</a>"); + } + + else if (!strncmp(token, "scripture ", 10)) { + userData["inscriptRef"] = "true"; + pushString(buf, "<i>"); + } + + else if (!strncmp(token, "scripRef p", 10) || !strncmp(token, "scripRef v", 10)) { + userData["inscriptRef"] = "true"; + pushString(buf, "<a href=\""); + for (const char *tok = token + 9; *(tok+1); tok++) + if(*tok != '\"') + *(*buf)++ = *tok; + *(*buf)++ = '\"'; + *(*buf)++ = '>'; + } + + // we're starting a scripRef like "<scripRef>John 3:16</scripRef>" + else if (!strcmp(token, "scripRef")) { + userData["inscriptRef"] = "false"; + // let's stop text from going to output + userData["suspendTextPassThru"] = "true"; + } + + // we've ended a scripRef + else if (!strcmp(token, "/scripRef")) { + if (userData["inscriptRef"] == "true") { // like "<scripRef passage="John 3:16">John 3:16</scripRef>" + userData["inscriptRef"] = "false"; + pushString(buf, "</a>"); + } + + else { // like "<scripRef>John 3:16</scripRef>" + pushString(buf, "<a href=\"passage="); + //char *strbuf = (char *)userData["lastTextNode"].c_str(); + pushString(buf, userData["lastTextNode"].c_str()); + *(*buf)++ = '\"'; + *(*buf)++ = '>'; + pushString(buf, userData["lastTextNode"].c_str()); + // let's let text resume to output again + userData["suspendTextPassThru"] = "false"; + pushString(buf, "</a>"); + } + } + + else if (!strncmp(token, "div class=\"sechead\"", 19)) { + userData["SecHead"] = "true"; + pushString(buf, "<br /><b><i>"); + } + else if (!strncmp(token, "div class=\"title\"", 19)) { + userData["SecHead"] = "true"; + pushString(buf, "<br /><b><i>"); + } + else if (!strncmp(token, "/div", 4)) { + if (userData["SecHead"] == "true") { + pushString(buf, "</i></b><br />"); + userData["SecHead"] = "false"; + } + } + + else if (!strncmp(token, "sync type=\"Strongs\" value=\"T", 28)) { + pushString(buf, "<a href=\""); + for (tok = token + 5; *(tok+1); tok++) + if(*tok != '\"') + *(*buf)++ = *tok; + *(*buf)++ = '\"'; + *(*buf)++ = '>'; + for (tok = token + 29; *(tok+2); tok++) + if(*tok != '\"') + *(*buf)++ = *tok; + pushString(buf, "</a>"); + } + else if (!strncmp(token, "img ", 4)) { + const char *src = strstr(token, "src"); + if (!src) // assert we have a src attribute + return false; + + *(*buf)++ = '<'; + for (const char *c = token; *c; c++) { + if (c == src) { + for (;((*c) && (*c != '"')); c++) + *(*buf)++ = *c; + + if (!*c) { c--; continue; } + + *(*buf)++ = '"'; + if (*(c+1) == '/') { + pushString(buf, "file:"); + pushString(buf, module->getConfigEntry("AbsoluteDataPath")); + if (*((*buf)-1) == '/') + c++; // skip '/' + } + continue; + } + *(*buf)++ = *c; + } + *(*buf)++ = '>'; + } + else if (!strncmp(token, "note", 4)) { + pushString(buf, " <small><font color=\"#800000\">("); + } + else { + *(*buf)++ = '<'; + for (const char *tok = token; *tok; tok++) + *(*buf)++ = *tok; + *(*buf)++ = '>'; + //return false; // we still didn't handle token + } + } + return true; +} + diff --git a/src/modules/filters/thmllemma.cpp b/src/modules/filters/thmllemma.cpp new file mode 100644 index 0000000..33856db --- /dev/null +++ b/src/modules/filters/thmllemma.cpp @@ -0,0 +1,97 @@ +/****************************************************************************** + * + * thmllemma - SWFilter decendant to hide or show lemmas + * in a ThML module. + */ + + +#include <stdlib.h> +#include <string.h> +#include <thmllemma.h> +#ifndef __GNUC__ +#else +#include <unixstr.h> +#endif + + +const char ThMLLemma::on[] = "On"; +const char ThMLLemma::off[] = "Off"; +const char ThMLLemma::optName[] = "Lemmas"; +const char ThMLLemma::optTip[] = "Toggles Lemmas On and Off if they exist"; + +ThMLLemma::ThMLLemma() { + option = false; + options.push_back(on); + options.push_back(off); +} + + +ThMLLemma::~ThMLLemma() { +} + +void ThMLLemma::setOptionValue(const char *ival) +{ + option = (!stricmp(ival, on)); +} + +const char *ThMLLemma::getOptionValue() +{ + return (option) ? on:off; +} + +char ThMLLemma::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) +{ + if (!option) { // if we don't want lemmas + char *to, *from, token[2048]; // cheese. Fix. + int tokpos = 0; + bool intoken = false; + int len; + bool lastspace = false; + + len = strlen(text) + 1; // shift string to right of buffer + if (len < maxlen) { + memmove(&text[maxlen - len], text, len); + from = &text[maxlen - len]; + } + else from = text; // ------------------------------- + + for (to = text; *from; from++) { + if (*from == '<') { + intoken = true; + tokpos = 0; + token[0] = 0; + token[1] = 0; + token[2] = 0; + continue; + } + if (*from == '>') { // process tokens + intoken = false; + if (!strnicmp(token, "sync type=\"lemma\" ", 18)) { // Lemma + if ((from[1] == ' ') || (from[1] == ',') || (from[1] == ';') || (from[1] == '.') || (from[1] == '?') || (from[1] == '!') || (from[1] == ')') || (from[1] == '\'') || (from[1] == '\"')) { + if (lastspace) + to--; + } + continue; + } + // if not a lemma token, keep token in text + *to++ = '<'; + for (char *tok = token; *tok; tok++) + *to++ = *tok; + *to++ = '>'; + continue; + } + if (intoken) { + if (tokpos < 2045) + token[tokpos++] = *from; + token[tokpos+2] = 0; + } + else { + *to++ = *from; + lastspace = (*from == ' '); + } + } + *to++ = 0; + *to = 0; + } + return 0; +} diff --git a/src/modules/filters/thmlmorph.cpp b/src/modules/filters/thmlmorph.cpp new file mode 100644 index 0000000..f95bede --- /dev/null +++ b/src/modules/filters/thmlmorph.cpp @@ -0,0 +1,98 @@ +/****************************************************************************** + * + * thmlmorph - SWFilter decendant to hide or show morph tags + * in a ThML module. + */ + + +#include <stdlib.h> +#include <string.h> +#include <thmlmorph.h> +#ifndef __GNUC__ +#else +#include <unixstr.h> +#endif + + +const char ThMLMorph::on[] = "On"; +const char ThMLMorph::off[] = "Off"; +const char ThMLMorph::optName[] = "Morphological Tags"; +const char ThMLMorph::optTip[] = "Toggles Morphological Tags On and Off if they exist"; + + +ThMLMorph::ThMLMorph() { + option = false; + options.push_back(on); + options.push_back(off); +} + + +ThMLMorph::~ThMLMorph() { +} + +void ThMLMorph::setOptionValue(const char *ival) +{ + option = (!stricmp(ival, on)); +} + +const char *ThMLMorph::getOptionValue() +{ + return (option) ? on:off; +} + +char ThMLMorph::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) +{ + if (!option) { // if we don't want morph tags + char *to, *from, token[2048]; // cheese. Fix. + int tokpos = 0; + bool intoken = false; + int len; + bool lastspace = false; + + len = strlen(text) + 1; // shift string to right of buffer + if (len < maxlen) { + memmove(&text[maxlen - len], text, len); + from = &text[maxlen - len]; + } + else from = text; // ------------------------------- + + for (to = text; *from; from++) { + if (*from == '<') { + intoken = true; + tokpos = 0; + token[0] = 0; + token[1] = 0; + token[2] = 0; + continue; + } + if (*from == '>') { // process tokens + intoken = false; + if (!strnicmp(token, "sync type=\"morph\" ", 18)) { // Morph + if ((from[1] == ' ') || (from[1] == ',') || (from[1] == ';') || (from[1] == '.') || (from[1] == '?') || (from[1] == '!') || (from[1] == ')') || (from[1] == '\'') || (from[1] == '\"')) { + if (lastspace) + to--; + } + continue; + } + // if not a morph tag token, keep token in text + *to++ = '<'; + for (char *tok = token; *tok; tok++) + *to++ = *tok; + *to++ = '>'; + continue; + } + if (intoken) { + if (tokpos < 2045) + token[tokpos++] = *from; + token[tokpos+2] = 0; + } + else { + *to++ = *from; + lastspace = (*from == ' '); + } + } + *to++ = 0; + *to = 0; + } + return 0; +} diff --git a/src/modules/filters/thmlolb.cpp b/src/modules/filters/thmlolb.cpp new file mode 100644 index 0000000..2b31fab --- /dev/null +++ b/src/modules/filters/thmlolb.cpp @@ -0,0 +1,243 @@ +/*************************************************************************** + thmlolb.cpp - ThML to OLB filter + ------------------- + begin : 2001-05-10 + copyright : 2001 by CrossWire Bible Society + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + ***************************************************************************/ + +#include <stdlib.h> +#include <string.h> +#include <thmlolb.h> + + +ThMLOLB::ThMLOLB() +{ +} + + +char ThMLOLB::ProcessText(char *text, int maxlen) +{ + char *to, *from, token[2048]; + int tokpos = 0; + bool intoken = false; + int len; + bool ampersand = false; + int i; + + len = strlen(text) + 1; // shift string to right of buffer + if (len < maxlen) { + memmove(&text[maxlen - len], text, len); + from = &text[maxlen - len]; + } + else from = text; // ------------------------------- + for (to = text; *from; from++) + { + if (*from == '<') { + intoken = true; + tokpos = 0; + memset(token, 0, 2048); + ampersand = false; + continue; + } + else if (*from == '&') { + intoken = true; + tokpos = 0; + memset(token, 0, 2048); + ampersand = true; + continue; + } + if (*from == ';' && ampersand) { + intoken = false; + + if (!strncmp("nbsp", token, 4)) *to++ = ' '; + else if (!strncmp("quot", token, 4)) *to++ = '"'; + else if (!strncmp("amp", token, 3)) *to++ = '&'; + else if (!strncmp("lt", token, 2)) *to++ = '<'; + else if (!strncmp("gt", token, 2)) *to++ = '>'; + else if (!strncmp("brvbar", token, 6)) *to++ = '|'; + else if (!strncmp("sect", token, 4)) *to++ = '§'; + else if (!strncmp("copy", token, 4)) *to++ = '©'; + else if (!strncmp("laquo", token, 5)) *to++ = '«'; + else if (!strncmp("reg", token, 3)) *to++ = '®'; + else if (!strncmp("acute", token, 5)) *to++ = '´'; + else if (!strncmp("para", token, 4)) *to++ = '¶'; + else if (!strncmp("raquo", token, 5)) *to++ = '»'; + + else if (!strncmp("Aacute", token, 6)) *to++ = 'Á'; + else if (!strncmp("Agrave", token, 6)) *to++ = 'À'; + else if (!strncmp("Acirc", token, 5)) *to++ = 'Â'; + else if (!strncmp("Auml", token, 4)) *to++ = 'Ä'; + else if (!strncmp("Atilde", token, 6)) *to++ = 'Ã'; + else if (!strncmp("Aring", token, 5)) *to++ = 'Å'; + else if (!strncmp("aacute", token, 6)) *to++ = 'á'; + else if (!strncmp("agrave", token, 6)) *to++ = 'à'; + else if (!strncmp("acirc", token, 5)) *to++ = 'â'; + else if (!strncmp("auml", token, 4)) *to++ = 'ä'; + else if (!strncmp("atilde", token, 6)) *to++ = 'ã'; + else if (!strncmp("aring", token, 5)) *to++ = 'å'; + else if (!strncmp("Eacute", token, 6)) *to++ = 'É'; + else if (!strncmp("Egrave", token, 6)) *to++ = 'È'; + else if (!strncmp("Ecirc", token, 5)) *to++ = 'Ê'; + else if (!strncmp("Euml", token, 4)) *to++ = 'Ë'; + else if (!strncmp("eacute", token, 6)) *to++ = 'é'; + else if (!strncmp("egrave", token, 6)) *to++ = 'è'; + else if (!strncmp("ecirc", token, 5)) *to++ = 'ê'; + else if (!strncmp("euml", token, 4)) *to++ = 'ë'; + else if (!strncmp("Iacute", token, 6)) *to++ = 'Í'; + else if (!strncmp("Igrave", token, 6)) *to++ = 'Ì'; + else if (!strncmp("Icirc", token, 5)) *to++ = 'Î'; + else if (!strncmp("Iuml", token, 4)) *to++ = 'Ï'; + else if (!strncmp("iacute", token, 6)) *to++ = 'í'; + else if (!strncmp("igrave", token, 6)) *to++ = 'ì'; + else if (!strncmp("icirc", token, 5)) *to++ = 'î'; + else if (!strncmp("iuml", token, 4)) *to++ = 'ï'; + else if (!strncmp("Oacute", token, 6)) *to++ = 'Ó'; + else if (!strncmp("Ograve", token, 6)) *to++ = 'Ò'; + else if (!strncmp("Ocirc", token, 5)) *to++ = 'Ô'; + else if (!strncmp("Ouml", token, 4)) *to++ = 'Ö'; + else if (!strncmp("Otilde", token, 6)) *to++ = 'Õ'; + else if (!strncmp("oacute", token, 6)) *to++ = 'ó'; + else if (!strncmp("ograve", token, 6)) *to++ = 'ò'; + else if (!strncmp("ocirc", token, 5)) *to++ = 'ô'; + else if (!strncmp("ouml", token, 4)) *to++ = 'ö'; + else if (!strncmp("otilde", token, 6)) *to++ = 'õ'; + else if (!strncmp("Uacute", token, 6)) *to++ = 'Ú'; + else if (!strncmp("Ugrave", token, 6)) *to++ = 'Ù'; + else if (!strncmp("Ucirc", token, 5)) *to++ = 'Û'; + else if (!strncmp("Uuml", token, 4)) *to++ = 'Ü'; + else if (!strncmp("uacute", token, 6)) *to++ = 'ú'; + else if (!strncmp("ugrave", token, 6)) *to++ = 'ù'; + else if (!strncmp("ucirc", token, 5)) *to++ = 'û'; + else if (!strncmp("uuml", token, 4)) *to++ = 'ü'; + else if (!strncmp("Yacute", token, 6)) *to++ = 'Ý'; + else if (!strncmp("yacute", token, 6)) *to++ = 'ý'; + else if (!strncmp("yuml", token, 4)) *to++ = 'ÿ'; + + else if (!strncmp("deg", token, 3)) *to++ = '°'; + else if (!strncmp("plusmn", token, 6)) *to++ = '±'; + else if (!strncmp("sup2", token, 4)) *to++ = '²'; + else if (!strncmp("sup3", token, 4)) *to++ = '³'; + else if (!strncmp("sup1", token, 4)) *to++ = '¹'; + else if (!strncmp("nbsp", token, 4)) *to++ = 'º'; + else if (!strncmp("pound", token, 5)) *to++ = '£'; + else if (!strncmp("cent", token, 4)) *to++ = '¢'; + else if (!strncmp("frac14", token, 6)) *to++ = '¼'; + else if (!strncmp("frac12", token, 6)) *to++ = '½'; + else if (!strncmp("frac34", token, 6)) *to++ = '¾'; + else if (!strncmp("iquest", token, 6)) *to++ = '¿'; + else if (!strncmp("iexcl", token, 5)) *to++ = '¡'; + else if (!strncmp("ETH", token, 3)) *to++ = 'Ð'; + else if (!strncmp("eth", token, 3)) *to++ = 'ð'; + else if (!strncmp("THORN", token, 5)) *to++ = 'Þ'; + else if (!strncmp("thorn", token, 5)) *to++ = 'þ'; + else if (!strncmp("AElig", token, 5)) *to++ = 'Æ'; + else if (!strncmp("aelig", token, 5)) *to++ = 'æ'; + else if (!strncmp("Oslash", token, 6)) *to++ = 'Ø'; + else if (!strncmp("curren", token, 6)) *to++ = '¤'; + else if (!strncmp("Ccedil", token, 6)) *to++ = 'Ç'; + else if (!strncmp("ccedil", token, 6)) *to++ = 'ç'; + else if (!strncmp("szlig", token, 5)) *to++ = 'ß'; + else if (!strncmp("Ntilde", token, 6)) *to++ = 'Ñ'; + else if (!strncmp("ntilde", token, 6)) *to++ = 'ñ'; + else if (!strncmp("yen", token, 3)) *to++ = '¥'; + else if (!strncmp("not", token, 3)) *to++ = '¬'; + else if (!strncmp("ordf", token, 4)) *to++ = 'ª'; + else if (!strncmp("uml", token, 3)) *to++ = '¨'; + else if (!strncmp("shy", token, 3)) *to++ = ''; + else if (!strncmp("macr", token, 4)) *to++ = '¯'; + continue; + + } + else if (*from == '>' && !ampersand) + { + intoken = false; + // process desired tokens + if (!strncmp(token, "sync type=\"Strongs\" value=\"G", 27)) { + *to++ = '<'; + for (i = 28; token[i] != '\"'; i++) + *to++ = token[i]; + *to++ = '>'; + continue; + } + else if (!strncmp(token, "sync type=\"Strongs\" value=\"H", 27)) { + *to++ = '<'; + for (i = 28; token[i] != '\"'; i++) + *to++ = token[i]; + *to++ = '>'; + continue; + } + else if (!strncmp(token, "scripRef", 8)) { + *to++ = '#'; + continue; + } + else if (!strncmp(token, "/scripRef", 9)) { + *to++ = ' '; + continue; + } + else if (!strncmp(token, "note ", 5)) { + *to++ = '{'; + continue; + } + else if (!strncmp(token, "/note", 5)) { + *to++ = '}'; + continue; + } + else if (!strnicmp(token, "font", 4)) { + *to++ = '\\'; + *to++ = '\\'; + continue; + } + else if (!strnicmp(token, "/font", 5)) { + *to++ = '\\'; + *to++ = '\\'; + continue; + } + else switch(*token) { + case 'I': // font tags + case 'i': + *to++ = '\\'; + *to++ = '@'; + continue; + case 'B': // bold start + case 'b': + *to++ = '\\'; + *to++ = '$'; + continue; + case '/': + switch(token[1]) { + case 'I': + case 'i': // italic end + *to++ = '\\'; + *to++ = '@'; + continue; + case 'B': // bold start + case 'b': + *to++ = '\\'; + *to++ = '$'; + continue; + } + } + continue; + } + if (intoken) { + if (tokpos < 2047) + token[tokpos++] = *from; + } + else *to++ = *from; + } + *to++ = 0; + *to = 0; + return 0; +} + + + diff --git a/src/modules/filters/thmlplain.cpp b/src/modules/filters/thmlplain.cpp new file mode 100644 index 0000000..5609f16 --- /dev/null +++ b/src/modules/filters/thmlplain.cpp @@ -0,0 +1,201 @@ +/****************************************************************************** + * + * thmlplain - SWFilter decendant to strip out all ThML tags or convert to + * ASCII rendered symbols. + */ + + +#include <stdlib.h> +#include <string.h> +#include <thmlplain.h> + + +ThMLPlain::ThMLPlain() { +} + + +char ThMLPlain::ProcessText(char *text, int maxlen) +{ + char *to, *from, token[2048]; + int tokpos = 0; + bool intoken = false; + int len; + bool ampersand = false; + + len = strlen(text) + 1; // shift string to right of buffer + if (len < maxlen) { + memmove(&text[maxlen - len], text, len); + from = &text[maxlen - len]; + } + else from = text; // ------------------------------- + + for (to = text; *from; from++) { + if (*from == 10 || *from == 13) + from++; + if (*from == '<') { + intoken = true; + tokpos = 0; + token[0] = 0; + token[1] = 0; + token[2] = 0; + ampersand = false; + continue; + } + else if (*from == '&') { + intoken = true; + tokpos = 0; + token[0] = 0; + token[1] = 0; + token[2] = 0; + ampersand = true; + continue; + } + if (*from == ';' && ampersand) { + intoken = false; + + if (!strncmp("nbsp", token, 4)) *to++ = ' '; + else if (!strncmp("quot", token, 4)) *to++ = '"'; + else if (!strncmp("amp", token, 3)) *to++ = '&'; + else if (!strncmp("lt", token, 2)) *to++ = '<'; + else if (!strncmp("gt", token, 2)) *to++ = '>'; + else if (!strncmp("brvbar", token, 6)) *to++ = '|'; + else if (!strncmp("sect", token, 4)) *to++ = '§'; + else if (!strncmp("copy", token, 4)) *to++ = '©'; + else if (!strncmp("laquo", token, 5)) *to++ = '«'; + else if (!strncmp("reg", token, 3)) *to++ = '®'; + else if (!strncmp("acute", token, 5)) *to++ = '´'; + else if (!strncmp("para", token, 4)) *to++ = '¶'; + else if (!strncmp("raquo", token, 5)) *to++ = '»'; + + else if (!strncmp("Aacute", token, 6)) *to++ = 'Á'; + else if (!strncmp("Agrave", token, 6)) *to++ = 'À'; + else if (!strncmp("Acirc", token, 5)) *to++ = 'Â'; + else if (!strncmp("Auml", token, 4)) *to++ = 'Ä'; + else if (!strncmp("Atilde", token, 6)) *to++ = 'Ã'; + else if (!strncmp("Aring", token, 5)) *to++ = 'Å'; + else if (!strncmp("aacute", token, 6)) *to++ = 'á'; + else if (!strncmp("agrave", token, 6)) *to++ = 'à'; + else if (!strncmp("acirc", token, 5)) *to++ = 'â'; + else if (!strncmp("auml", token, 4)) *to++ = 'ä'; + else if (!strncmp("atilde", token, 6)) *to++ = 'ã'; + else if (!strncmp("aring", token, 5)) *to++ = 'å'; + else if (!strncmp("Eacute", token, 6)) *to++ = 'É'; + else if (!strncmp("Egrave", token, 6)) *to++ = 'È'; + else if (!strncmp("Ecirc", token, 5)) *to++ = 'Ê'; + else if (!strncmp("Euml", token, 4)) *to++ = 'Ë'; + else if (!strncmp("eacute", token, 6)) *to++ = 'é'; + else if (!strncmp("egrave", token, 6)) *to++ = 'è'; + else if (!strncmp("ecirc", token, 5)) *to++ = 'ê'; + else if (!strncmp("euml", token, 4)) *to++ = 'ë'; + else if (!strncmp("Iacute", token, 6)) *to++ = 'Í'; + else if (!strncmp("Igrave", token, 6)) *to++ = 'Ì'; + else if (!strncmp("Icirc", token, 5)) *to++ = 'Î'; + else if (!strncmp("Iuml", token, 4)) *to++ = 'Ï'; + else if (!strncmp("iacute", token, 6)) *to++ = 'í'; + else if (!strncmp("igrave", token, 6)) *to++ = 'ì'; + else if (!strncmp("icirc", token, 5)) *to++ = 'î'; + else if (!strncmp("iuml", token, 4)) *to++ = 'ï'; + else if (!strncmp("Oacute", token, 6)) *to++ = 'Ó'; + else if (!strncmp("Ograve", token, 6)) *to++ = 'Ò'; + else if (!strncmp("Ocirc", token, 5)) *to++ = 'Ô'; + else if (!strncmp("Ouml", token, 4)) *to++ = 'Ö'; + else if (!strncmp("Otilde", token, 6)) *to++ = 'Õ'; + else if (!strncmp("oacute", token, 6)) *to++ = 'ó'; + else if (!strncmp("ograve", token, 6)) *to++ = 'ò'; + else if (!strncmp("ocirc", token, 5)) *to++ = 'ô'; + else if (!strncmp("ouml", token, 4)) *to++ = 'ö'; + else if (!strncmp("otilde", token, 6)) *to++ = 'õ'; + else if (!strncmp("Uacute", token, 6)) *to++ = 'Ú'; + else if (!strncmp("Ugrave", token, 6)) *to++ = 'Ù'; + else if (!strncmp("Ucirc", token, 5)) *to++ = 'Û'; + else if (!strncmp("Uuml", token, 4)) *to++ = 'Ü'; + else if (!strncmp("uacute", token, 6)) *to++ = 'ú'; + else if (!strncmp("ugrave", token, 6)) *to++ = 'ù'; + else if (!strncmp("ucirc", token, 5)) *to++ = 'û'; + else if (!strncmp("uuml", token, 4)) *to++ = 'ü'; + else if (!strncmp("Yacute", token, 6)) *to++ = 'Ý'; + else if (!strncmp("yacute", token, 6)) *to++ = 'ý'; + else if (!strncmp("yuml", token, 4)) *to++ = 'ÿ'; + + else if (!strncmp("deg", token, 3)) *to++ = '°'; + else if (!strncmp("plusmn", token, 6)) *to++ = '±'; + else if (!strncmp("sup2", token, 4)) *to++ = '²'; + else if (!strncmp("sup3", token, 4)) *to++ = '³'; + else if (!strncmp("sup1", token, 4)) *to++ = '¹'; + else if (!strncmp("nbsp", token, 4)) *to++ = 'º'; + else if (!strncmp("pound", token, 5)) *to++ = '£'; + else if (!strncmp("cent", token, 4)) *to++ = '¢'; + else if (!strncmp("frac14", token, 6)) *to++ = '¼'; + else if (!strncmp("frac12", token, 6)) *to++ = '½'; + else if (!strncmp("frac34", token, 6)) *to++ = '¾'; + else if (!strncmp("iquest", token, 6)) *to++ = '¿'; + else if (!strncmp("iexcl", token, 5)) *to++ = '¡'; + else if (!strncmp("ETH", token, 3)) *to++ = 'Ð'; + else if (!strncmp("eth", token, 3)) *to++ = 'ð'; + else if (!strncmp("THORN", token, 5)) *to++ = 'Þ'; + else if (!strncmp("thorn", token, 5)) *to++ = 'þ'; + else if (!strncmp("AElig", token, 5)) *to++ = 'Æ'; + else if (!strncmp("aelig", token, 5)) *to++ = 'æ'; + else if (!strncmp("Oslash", token, 6)) *to++ = 'Ø'; + else if (!strncmp("curren", token, 6)) *to++ = '¤'; + else if (!strncmp("Ccedil", token, 6)) *to++ = 'Ç'; + else if (!strncmp("ccedil", token, 6)) *to++ = 'ç'; + else if (!strncmp("szlig", token, 5)) *to++ = 'ß'; + else if (!strncmp("Ntilde", token, 6)) *to++ = 'Ñ'; + else if (!strncmp("ntilde", token, 6)) *to++ = 'ñ'; + else if (!strncmp("yen", token, 3)) *to++ = '¥'; + else if (!strncmp("not", token, 3)) *to++ = '¬'; + else if (!strncmp("ordf", token, 4)) *to++ = 'ª'; + else if (!strncmp("uml", token, 3)) *to++ = '¨'; + else if (!strncmp("shy", token, 3)) *to++ = ''; + else if (!strncmp("macr", token, 4)) *to++ = '¯'; + continue; + + } + else if (*from == '>' && !ampersand) { + intoken = false; + // process desired tokens + if (!strncmp(token, "sync type=\"Strongs\" value=\"", 27)) { + *to++ = ' '; + *to++ = '<'; + for (unsigned int i = 27; token[i] != '\"'; i++) + *to++ = token[i]; + *to++ = '>'; + continue; + } + if (!strncmp(token, "sync type=\"morph\" value=\"", 25)) { + *to++ = ' '; + *to++ = '('; + for (unsigned int i = 25; token[i] != '\"'; i++) + *to++ = token[i]; + *to++ = ')'; + continue; + } + if (!strncmp("note", token, 4)) { + *to++ = ' '; + *to++ = '('; + } + else if (!strncmp("br", token, 2)) + *to++ = '\n'; + else if (!strncmp("/p", token, 2)) + *to++ = '\n'; + else if (!strncmp("/note", token, 5)) { + *to++ = ')'; + *to++ = ' '; + } + continue; + } + if (intoken) { + if (tokpos < 2045) + token[tokpos++] = *from; + token[tokpos+2] = 0; + } + else *to++ = *from; + } + *to++ = 0; + *to = 0; + + return 0; +} + + diff --git a/src/modules/filters/thmlrtf.cpp b/src/modules/filters/thmlrtf.cpp new file mode 100644 index 0000000..76289ec --- /dev/null +++ b/src/modules/filters/thmlrtf.cpp @@ -0,0 +1,219 @@ +/*************************************************************************** + thmlrtf.cpp - ThML to RTF filter + ------------------- + begin : 1999-10-27 + copyright : 2001 by CrossWire Bible Society + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + ***************************************************************************/ + +#include <stdlib.h> +#include <string.h> +#include <thmlrtf.h> + + +ThMLRTF::ThMLRTF() +{ + setTokenStart("<"); + setTokenEnd(">"); + + setEscapeStart("&"); + setEscapeEnd(";"); + + setEscapeStringCaseSensitive(true); + + addEscapeStringSubstitute("nbsp", " "); + addEscapeStringSubstitute("quot", "\""); + addEscapeStringSubstitute("amp", "&"); + addEscapeStringSubstitute("lt", "<"); + addEscapeStringSubstitute("gt", ">"); + addEscapeStringSubstitute("brvbar", "|"); + addEscapeStringSubstitute("sect", "§"); + addEscapeStringSubstitute("copy", "©"); + addEscapeStringSubstitute("laquo", "«"); + addEscapeStringSubstitute("reg", "®"); + addEscapeStringSubstitute("acute", "´"); + addEscapeStringSubstitute("para", "¶"); + addEscapeStringSubstitute("raquo", "»"); + + addEscapeStringSubstitute("Aacute", "Á"); + addEscapeStringSubstitute("Agrave", "À"); + addEscapeStringSubstitute("Acirc", "Â"); + addEscapeStringSubstitute("Auml", "Ä"); + addEscapeStringSubstitute("Atilde", "Ã"); + addEscapeStringSubstitute("Aring", "Å"); + addEscapeStringSubstitute("aacute", "á"); + addEscapeStringSubstitute("agrave", "à"); + addEscapeStringSubstitute("acirc", "â"); + addEscapeStringSubstitute("auml", "ä"); + addEscapeStringSubstitute("atilde", "ã"); + addEscapeStringSubstitute("aring", "å"); + addEscapeStringSubstitute("Eacute", "É"); + addEscapeStringSubstitute("Egrave", "È"); + addEscapeStringSubstitute("Ecirc", "Ê"); + addEscapeStringSubstitute("Euml", "Ë"); + addEscapeStringSubstitute("eacute", "é"); + addEscapeStringSubstitute("egrave", "è"); + addEscapeStringSubstitute("ecirc", "ê"); + addEscapeStringSubstitute("euml", "ë"); + addEscapeStringSubstitute("Iacute", "Í"); + addEscapeStringSubstitute("Igrave", "Ì"); + addEscapeStringSubstitute("Icirc", "Î"); + addEscapeStringSubstitute("Iuml", "Ï"); + addEscapeStringSubstitute("iacute", "í"); + addEscapeStringSubstitute("igrave", "ì"); + addEscapeStringSubstitute("icirc", "î"); + addEscapeStringSubstitute("iuml", "ï"); + addEscapeStringSubstitute("Oacute", "Ó"); + addEscapeStringSubstitute("Ograve", "Ò"); + addEscapeStringSubstitute("Ocirc", "Ô"); + addEscapeStringSubstitute("Ouml", "Ö"); + addEscapeStringSubstitute("Otilde", "Õ"); + addEscapeStringSubstitute("oacute", "ó"); + addEscapeStringSubstitute("ograve", "ò"); + addEscapeStringSubstitute("ocirc", "ô"); + addEscapeStringSubstitute("ouml", "ö"); + addEscapeStringSubstitute("otilde", "õ"); + addEscapeStringSubstitute("Uacute", "Ú"); + addEscapeStringSubstitute("Ugrave", "Ù"); + addEscapeStringSubstitute("Ucirc", "Û"); + addEscapeStringSubstitute("Uuml", "Ü"); + addEscapeStringSubstitute("uacute", "ú"); + addEscapeStringSubstitute("ugrave", "ù"); + addEscapeStringSubstitute("ucirc", "û"); + addEscapeStringSubstitute("uuml", "ü"); + addEscapeStringSubstitute("Yacute", "Ý"); + addEscapeStringSubstitute("yacute", "ý"); + addEscapeStringSubstitute("yuml", "ÿ"); + + addEscapeStringSubstitute("deg", "°"); + addEscapeStringSubstitute("plusmn", "±"); + addEscapeStringSubstitute("sup2", "²"); + addEscapeStringSubstitute("sup3", "³"); + addEscapeStringSubstitute("sup1", "¹"); + addEscapeStringSubstitute("nbsp", "º"); + addEscapeStringSubstitute("pound", "£"); + addEscapeStringSubstitute("cent", "¢"); + addEscapeStringSubstitute("frac14", "¼"); + addEscapeStringSubstitute("frac12", "½"); + addEscapeStringSubstitute("frac34", "¾"); + addEscapeStringSubstitute("iquest", "¿"); + addEscapeStringSubstitute("iexcl", "¡"); + addEscapeStringSubstitute("ETH", "Ð"); + addEscapeStringSubstitute("eth", "ð"); + addEscapeStringSubstitute("THORN", "Þ"); + addEscapeStringSubstitute("thorn", "þ"); + addEscapeStringSubstitute("AElig", "Æ"); + addEscapeStringSubstitute("aelig", "æ"); + addEscapeStringSubstitute("Oslash", "Ø"); + addEscapeStringSubstitute("curren", "¤"); + addEscapeStringSubstitute("Ccedil", "Ç"); + addEscapeStringSubstitute("ccedil", "ç"); + addEscapeStringSubstitute("szlig", "ß"); + addEscapeStringSubstitute("Ntilde", "Ñ"); + addEscapeStringSubstitute("ntilde", "ñ"); + addEscapeStringSubstitute("yen", "¥"); + addEscapeStringSubstitute("not", "¬"); + addEscapeStringSubstitute("ordf", "ª"); + addEscapeStringSubstitute("uml", "¨"); + addEscapeStringSubstitute("shy", ""); + addEscapeStringSubstitute("macr", "¯"); + + setTokenCaseSensitive(true); + + addTokenSubstitute("/scripRef", "|}"); + addTokenSubstitute("/note", ") }"); + + addTokenSubstitute("br", "\\line "); + addTokenSubstitute("br /", "\\line "); + addTokenSubstitute("i", "{\\i1 "); + addTokenSubstitute("/i", "}"); + addTokenSubstitute("b", "{\\b1 "); + addTokenSubstitute("/b", "}"); + addTokenSubstitute("p", "\\par "); + + //we need uppercase forms for the moment to support a few early ThML modules that aren't XHTML compliant + addTokenSubstitute("BR", "\\line "); + addTokenSubstitute("I", "{\\i1 "); + addTokenSubstitute("/I", "}"); + addTokenSubstitute("B", "{\\b1 "); + addTokenSubstitute("/B", "}"); + addTokenSubstitute("P", "\\par "); +} + +bool ThMLRTF::handleToken(char **buf, const char *token, DualStringMap &userData) { + if (!substituteToken(buf, token)) { + // manually process if it wasn't a simple substitution + if (!strncmp(token, "sync type=\"Strongs\" value=\"", 27)) { +/* if (token[27] == 'H' || token[27] == 'G' || token[27] == 'A') { + pushString(buf, " {\\fs15 <"); + for (unsigned int i = 28; token[i] != '\"'; i++) + *(*buf)++ = token[i]; + pushString(buf, ">}"); + } + else if (token[27] == 'T') { + pushString(buf, " {\\fs15 ("); + for (unsigned int i = 28; token[i] != '\"'; i++) + *(*buf)++ = token[i]; + pushString(buf, ")}"); + } + } + else if (!strncmp(token, "sync type=\"morph\" ", 18)) { + pushString(buf, " {\\fs15 ("); + for (const char *tok = token + 5; *tok; tok++) { + if (!strncmp(tok, "value=\"", 7)) { + tok += 7; + for (;*tok != '\"'; tok++) + *(*buf)++ = *tok; + break; + } + } + + pushString(buf, ")}"); +*/ } + else if (!strncmp(token, "sync type=\"lemma\" value=\"", 25)) { + pushString(buf, "{\\fs15 ("); + for (unsigned int i = 25; token[i] != '\"'; i++) + *(*buf)++ = token[i]; + pushString(buf, ")}"); + } + else if (!strncmp(token, "scripRef", 8)) { + pushString(buf, "{\\cf2 #"); + } + else if (!strncmp(token, "div", 3)) { + *(*buf)++ = '{'; + if (!strncmp(token, "div class=\"title\"", 17)) { + pushString(buf, "\\par\\i1\\b1 "); + userData["sechead"] = "true"; + } + else if (!strncmp(token, "div class=\"sechead\"", 19)) { + pushString(buf, "\\par\\i1\\b1 "); + userData["sechead"] = "true"; + } + } + else if (!strncmp(token, "/div", 4)) { + *(*buf)++ = '}'; + if (userData["sechead"] == "true") { + pushString(buf, "\\par "); + userData["sechead"] == "false"; + } + } + else if (!strncmp(token, "note", 4)) { + pushString(buf, " {\\i1\\fs15 ("); + } + + else { + return false; // we still didn't handle token + } + } + return true; +} + + diff --git a/src/modules/filters/thmlscripref.cpp b/src/modules/filters/thmlscripref.cpp new file mode 100644 index 0000000..23edd6d --- /dev/null +++ b/src/modules/filters/thmlscripref.cpp @@ -0,0 +1,103 @@ +/****************************************************************************** + * + * thmlscripref - SWFilter decendant to hide or show scripture references + * in a ThML module. + */ + + +#include <stdlib.h> +#include <string.h> +#include <thmlscripref.h> +#ifndef __GNUC__ +#else +#include <unixstr.h> +#endif + + +const char ThMLScripref::on[] = "On"; +const char ThMLScripref::off[] = "Off"; +const char ThMLScripref::optName[] = "Scripture Cross-references"; +const char ThMLScripref::optTip[] = "Toggles Scripture Cross-references On and Off if they exist"; + + +ThMLScripref::ThMLScripref() { + option = false; + options.push_back(on); + options.push_back(off); +} + + +ThMLScripref::~ThMLScripref() { +} + +void ThMLScripref::setOptionValue(const char *ival) +{ + option = (!stricmp(ival, on)); +} + +const char *ThMLScripref::getOptionValue() +{ + return (option) ? on:off; +} + +char ThMLScripref::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) +{ + if (!option) { // if we don't want scriprefs + char *to, *from, token[2048]; // cheese. Fix. + int tokpos = 0; + bool intoken = false; + int len; + bool hide = false; + + len = strlen(text) + 1; // shift string to right of buffer + if (len < maxlen) { + memmove(&text[maxlen - len], text, len); + from = &text[maxlen - len]; + } + else from = text; // ------------------------------- + + for (to = text; *from; from++) { + if (*from == '<') { + intoken = true; + tokpos = 0; + token[0] = 0; + token[1] = 0; + token[2] = 0; + continue; + } + if (*from == '>') { // process tokens + intoken = false; + if (!strnicmp(token, "scripRef", 8)) { + hide = true; + continue; + } + else if (!strnicmp(token, "/scripRef", 9)) { + hide = false; + continue; + } + + // if not a scripref token, keep token in text + if (!hide) { + *to++ = '<'; + for (char *tok = token; *tok; tok++) + *to++ = *tok; + *to++ = '>'; + } + continue; + } + if (intoken) { + if (tokpos < 2045) + token[tokpos++] = *from; + token[tokpos+2] = 0; + } + else { + if (!hide) { + *to++ = *from; + } + } + } + *to++ = 0; + *to = 0; + } + return 0; +} diff --git a/src/modules/filters/thmlstrongs.cpp b/src/modules/filters/thmlstrongs.cpp new file mode 100644 index 0000000..8d0466c --- /dev/null +++ b/src/modules/filters/thmlstrongs.cpp @@ -0,0 +1,138 @@ +/****************************************************************************** + * + * thmlstrongs - SWFilter decendant to hide or show strongs number + * in a ThML module. + */ + + +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <thmlstrongs.h> +#include <swmodule.h> +#ifndef __GNUC__ +#else +#include <unixstr.h> +#endif + + +const char ThMLStrongs::on[] = "On"; +const char ThMLStrongs::off[] = "Off"; +const char ThMLStrongs::optName[] = "Strong's Numbers"; +const char ThMLStrongs::optTip[] = "Toggles Strong's Numbers On and Off if they exist"; + + +ThMLStrongs::ThMLStrongs() { + option = false; + options.push_back(on); + options.push_back(off); +} + + +ThMLStrongs::~ThMLStrongs() { +} + +void ThMLStrongs::setOptionValue(const char *ival) +{ + option = (!stricmp(ival, on)); +} + +const char *ThMLStrongs::getOptionValue() +{ + return (option) ? on:off; +} + +char ThMLStrongs::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) +{ + char *to, *from, token[2048]; // cheese. Fix. + int tokpos = 0; + bool intoken = false; + int len; + bool lastspace = false; + int word = 1; + char val[128]; + char wordstr[5]; + char *valto; + char *ch; + + len = strlen(text) + 1; // shift string to right of buffer + if (len < maxlen) { + memmove(&text[maxlen - len], text, len); + from = &text[maxlen - len]; + } + else from = text; + + // ------------------------------- + + for (to = text; *from; from++) { + if (*from == '<') { + intoken = true; + tokpos = 0; + token[0] = 0; + token[1] = 0; + token[2] = 0; + continue; + } + if (*from == '>') { // process tokens + intoken = false; + if (!strnicmp(token, "sync type=\"Strongs\" ", 20)) { // Strongs + if (module->isProcessEntryAttributes()) { + valto = val; + for (unsigned int i = 27; token[i] != '\"' && i < 150; i++) + *valto++ = token[i]; + *valto = 0; + sprintf(wordstr, "%03d", word++); + module->getEntryAttributes()["Word"][wordstr]["Strongs"] = val; + } + + if (!option) { // if we don't want strongs + if ((from[1] == ' ') || (from[1] == ',') || (from[1] == ';') || (from[1] == '.') || (from[1] == '?') || (from[1] == '!') || (from[1] == ')') || (from[1] == '\'') || (from[1] == '\"')) { + if (lastspace) + to--; + } + continue; + } + } + if (module->isProcessEntryAttributes()) { + if (!strncmp(token, "sync type=\"morph\"", 17)) { + for (ch = token+17; *ch; ch++) { + if (!strncmp(ch, "class=\"", 7)) { + valto = val; + for (unsigned int i = 7; ch[i] != '\"' && i < 127; i++) + *valto++ = ch[i]; + *valto = 0; + sprintf(wordstr, "%03d", word-1); + module->getEntryAttributes()["Word"][wordstr]["MorphClass"] = val; + } + if (!strncmp(ch, "value=\"", 7)) { + valto = val; + for (unsigned int i = 7; ch[i] != '\"' && i < 127; i++) + *valto++ = ch[i]; + *valto = 0; + sprintf(wordstr, "%03d", word-1); + module->getEntryAttributes()["Word"][wordstr]["Morph"] = val; + } + } + } + } + // if not a strongs token, keep token in text + *to++ = '<'; + for (char *tok = token; *tok; tok++) + *to++ = *tok; + *to++ = '>'; + continue; + } + if (intoken) { + if (tokpos < 2045) + token[tokpos++] = *from; + token[tokpos+2] = 0; + } + else { + *to++ = *from; + lastspace = (*from == ' '); + } + } + *to++ = 0; + *to = 0; + return 0; +} diff --git a/src/modules/filters/thmlvariants.cpp b/src/modules/filters/thmlvariants.cpp new file mode 100644 index 0000000..fda0950 --- /dev/null +++ b/src/modules/filters/thmlvariants.cpp @@ -0,0 +1,183 @@ +/****************************************************************************** + * + * thmlvariants - SWFilter decendant to hide or show textual variants + * in a ThML module. + */ + + +#include <stdlib.h> +#include <string.h> +#include <thmlvariants.h> +#ifndef __GNUC__ +#else +#include <unixstr.h> +#endif + + +const char ThMLVariants::primary[] = "Primary Reading"; +const char ThMLVariants::secondary[] = "Secondary Reading"; +const char ThMLVariants::all[] = "All Readings"; + +const char ThMLVariants::optName[] = "Textual Variants"; +const char ThMLVariants::optTip[] = "Switch between Textual Variants modes"; + + +ThMLVariants::ThMLVariants() { + option = false; + options.push_back(primary); + options.push_back(secondary); + options.push_back(all); +} + + +ThMLVariants::~ThMLVariants() { +} + +void ThMLVariants::setOptionValue(const char *ival) +{ + option = (!stricmp(ival, primary)); +} + +const char *ThMLVariants::getOptionValue() +{ + if (option == 0) { + return primary; + } + else if (option == 1) { + return secondary; + } + else { + return all; + } +} + +char ThMLVariants::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) +{ + if (option == 0) { //we want primary only + char *to, *from, token[2048]; // cheese. Fix. + int tokpos = 0; + bool intoken = false; + int len; + bool hide = false; + + len = strlen(text) + 1; // shift string to right of buffer + if (len < maxlen) { + memmove(&text[maxlen - len], text, len); + from = &text[maxlen - len]; + } + else from = text; + + // ------------------------------- + + for (to = text; *from; from++) { + if (*from == '<') { + intoken = true; + tokpos = 0; + token[0] = 0; + token[1] = 0; + token[2] = 0; + continue; + } + if (*from == '>') { // process tokens + intoken = false; + if (!strncmp(token, "div type=\"variant\"", 19)) { + hide = true; + continue; + } + else if (!strncmp(token, "/div", 4)) { + hide = false; + continue; + } + + // if not a footnote token, keep token in text + if (!hide) { + *to++ = '<'; + for (char *tok = token; *tok; tok++) + *to++ = *tok; + *to++ = '>'; + } + continue; + } + if (intoken) { + if (tokpos < 2045) + token[tokpos++] = *from; + token[tokpos+2] = 0; + } + else { + if (!hide) { + *to++ = *from; + } + } + } + *to++ = 0; + *to = 0; + + } + else if (option == 1) { //we want variant only + char *to, *from, token[2048]; // cheese. Fix. + int tokpos = 0; + bool intoken = false; + int len; + bool hide = false; + + len = strlen(text) + 1; // shift string to right of buffer + if (len < maxlen) { + memmove(&text[maxlen - len], text, len); + from = &text[maxlen - len]; + } + else from = text; + + // ------------------------------- + + for (to = text; *from; from++) { + if (*from == '<') { + intoken = true; + tokpos = 0; + token[0] = 0; + token[1] = 0; + token[2] = 0; + continue; + } + if (*from == '>') { // process tokens + intoken = false; + if (!strncmp(token, "div type=\"primary\"", 19)) { + hide = true; + continue; + } + else if (!strncmp(token, "/div", 4)) { + hide = false; + continue; + } + + // if not a footnote token, keep token in text + if (!hide) { + *to++ = '<'; + for (char *tok = token; *tok; tok++) + *to++ = *tok; + *to++ = '>'; + } + continue; + } + if (intoken) { + if (tokpos < 2045) + token[tokpos++] = *from; + token[tokpos+2] = 0; + } + else { + if (!hide) { + *to++ = *from; + } + } + } + *to++ = 0; + *to = 0; + + } + return 0; +} + + + + + + diff --git a/src/modules/filters/unicodertf.cpp b/src/modules/filters/unicodertf.cpp new file mode 100644 index 0000000..b53a2d7 --- /dev/null +++ b/src/modules/filters/unicodertf.cpp @@ -0,0 +1,70 @@ +/****************************************************************************** + * + * unicodertf - SWFilter decendant to convert a double byte unicode file + * to RTF tags + */ + + +#include <stdlib.h> +#include <stdio.h> +#include <unicodertf.h> + +UnicodeRTF::UnicodeRTF() { +} + + +char UnicodeRTF::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) +{ + unsigned char *to, *from, *maxto; + int len; + char digit[10]; + short ch; // must be signed per unicode spec (negative is ok for big numbers > 32768) + + len = strlenw(text) + 2; // shift string to right of buffer + if (len < maxlen) { + memmove(&text[maxlen - len], text, len); + from = (unsigned char*)&text[maxlen - len]; + } + else from = (unsigned char*)text; + maxto =(unsigned char*)text + maxlen; + + // ------------------------------- + for (to = (unsigned char*)text; *from && (to <= maxto); from++) { + ch = 0; + if ((*from & 128) != 128) { + *to++ = *from; + continue; + } + if ((*from & 128) && ((*from & 64) != 64)) { + // error + *from = 'x'; + continue; + } + *from <<= 1; + int subsequent; + for (subsequent = 1; (*from & 128); subsequent++) { + *from <<= 1; + from[subsequent] &= 63; + ch <<= 6; + ch |= from[subsequent]; + } + subsequent--; + *from <<=1; + char significantFirstBits = 8 - (2+subsequent); + + ch |= (((short)*from) << (((6*subsequent)+significantFirstBits)-8)); + from += subsequent; + *to++ = '\\'; + *to++ = 'u'; + sprintf(digit, "%d", ch); + for (char *dig = digit; *dig; dig++) + *to++ = *dig; + *to++ = '?'; + } + + if (to != maxto) { + *to++ = 0; + } + *to = 0; + return 0; +} diff --git a/src/modules/filters/utf16utf8.cpp b/src/modules/filters/utf16utf8.cpp new file mode 100644 index 0000000..5a7719f --- /dev/null +++ b/src/modules/filters/utf16utf8.cpp @@ -0,0 +1,95 @@ +/****************************************************************************** + * + * UTF16UTF8 - SWFilter decendant to convert UTF-16 to UTF-8 + * + */ + +#include <stdlib.h> +#include <stdio.h> + +#include <utf16utf8.h> + +UTF16UTF8::UTF16UTF8() { +} + + +char UTF16UTF8::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) +{ + unsigned short *from; + unsigned char *to; + + int len; + unsigned long uchar; + unsigned short schar; + + len = 0; + from = (unsigned short*) text; + while (*from) { + len += 2; + from++; + } + + // shift string to right of buffer + if (len < maxlen) { + memmove(&text[maxlen - len], text, len); + from = (unsigned short*)&text[maxlen - len]; + } + else + from = (unsigned short*)text; + + + // ------------------------------- + + for (to = (unsigned char*)text; *from; from++) { + uchar = 0; + + if (*from < 0xD800 || *from > 0xDFFF) { + uchar = *from; + } + else if (*from >= 0xD800 && *from <= 0xDBFF) { + uchar = *from; + schar = *(from+1); + if (uchar < 0xDC00 || uchar > 0xDFFF) { + //error, do nothing + continue; + } + uchar &= 0x03ff; + schar &= 0x03ff; + uchar <<= 10; + uchar |= schar; + uchar += 0x10000; + from++; + } + else { + //error, do nothing + continue; + } + + if (uchar < 0x80) { + *to++ = uchar; + } + else if (uchar < 0x800) { + *to++ = 0xc0 | (uchar >> 6); + *to++ = 0x80 | (uchar & 0x3f); + } + else if (uchar < 0x10000) { + *to++ = 0xe0 | (uchar >> 12); + *to++ = 0x80 | (uchar >> 6) & 0x3f; + *to++ = 0x80 | uchar & 0x3f; + } + else if (uchar < 0x200000) { + *to++ = 0xF0 | (uchar >> 18); + *to++ = 0x80 | (uchar >> 12) & 0x3F; + *to++ = 0x80 | (uchar >> 6) & 0x3F; + *to++ = 0x80 | uchar & 0x3F; + } + } + *to++ = 0; + *to = 0; + + return 0; +} + + + + diff --git a/src/modules/filters/utf8arshaping.cpp b/src/modules/filters/utf8arshaping.cpp new file mode 100644 index 0000000..5121f48 --- /dev/null +++ b/src/modules/filters/utf8arshaping.cpp @@ -0,0 +1,48 @@ +/****************************************************************************** +* +* utf8arshaping - SWFilter decendant to perform Arabic shaping on +* UTF-8 text +*/ + +#ifdef _ICU_ + +#include <stdlib.h> +#include <string.h> + +#ifdef __GNUC__ +#include <unixstr.h> +#endif + +#include <utf8arshaping.h> + +UTF8arShaping::UTF8arShaping() { + + conv = ucnv_open("UTF-8", &err); + +} + +UTF8arShaping::~UTF8arShaping() { + ucnv_close(conv); +} + +char UTF8arShaping::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) +{ + UChar *ustr, *ustr2; + + int32_t len = strlen(text); + ustr = new UChar[len]; + ustr2 = new UChar[len]; + + // Convert UTF-8 string to UTF-16 (UChars) + len = ucnv_toUChars(conv, ustr, len, text, -1, &err); + + len = u_shapeArabic(ustr, len, ustr2, len, U_SHAPE_LETTERS_SHAPE | U_SHAPE_DIGITS_EN2AN, &err); + + ucnv_fromUChars(conv, text, maxlen, ustr2, len, &err); + + delete [] ustr2; + delete [] ustr; + return 0; +} + +#endif diff --git a/src/modules/filters/utf8bidireorder.cpp b/src/modules/filters/utf8bidireorder.cpp new file mode 100644 index 0000000..8fa7280 --- /dev/null +++ b/src/modules/filters/utf8bidireorder.cpp @@ -0,0 +1,55 @@ +/****************************************************************************** +* +* utf8cnormalizer - SWFilter decendant to perform reordering of UTF-8 +* text to visual order according to Unicode BiDi +*/ + +#ifdef _ICU_ + +#include <stdlib.h> +#include <string.h> + +#ifdef __GNUC__ +#include <unixstr.h> +#endif + +#include <utf8bidireorder.h> + +UTF8BiDiReorder::UTF8BiDiReorder() { + + conv = ucnv_open("UTF-8", &err); + +} + +UTF8BiDiReorder::~UTF8BiDiReorder() { + ucnv_close(conv); +} + +char UTF8BiDiReorder::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) +{ + UChar *ustr, *ustr2; + + int32_t len = strlen(text); + ustr = new UChar[len]; //each char could become a surrogate pair + + // Convert UTF-8 string to UTF-16 (UChars) + len = ucnv_toUChars(conv, ustr, len, text, -1, &err); + ustr2 = new UChar[len]; + + UBiDi* bidi = ubidi_openSized(len + 1, 0, &err); + ubidi_setPara(bidi, ustr, len, UBIDI_DEFAULT_RTL, NULL, &err); + len = ubidi_writeReordered(bidi, ustr2, len, + UBIDI_DO_MIRRORING | UBIDI_REMOVE_BIDI_CONTROLS, &err); + ubidi_close(bidi); + +// len = ubidi_writeReverse(ustr, len, ustr2, len, +// UBIDI_DO_MIRRORING | UBIDI_REMOVE_BIDI_CONTROLS, &err); + + ucnv_fromUChars(conv, text, maxlen, ustr2, len, &err); + + delete [] ustr2; + delete [] ustr; + return 0; +} + +#endif diff --git a/src/modules/filters/utf8cantillation.cpp b/src/modules/filters/utf8cantillation.cpp new file mode 100644 index 0000000..84cb513 --- /dev/null +++ b/src/modules/filters/utf8cantillation.cpp @@ -0,0 +1,64 @@ +/****************************************************************************** + * + * UTF8Cantillation - SWFilter decendant to remove UTF-8 Hebrew cantillation + * + */ + + +#include <stdlib.h> +#include <stdio.h> +#include <utf8cantillation.h> + + +const char UTF8Cantillation::on[] = "On"; +const char UTF8Cantillation::off[] = "Off"; +const char UTF8Cantillation::optName[] = "Hebrew Cantillation"; +const char UTF8Cantillation::optTip[] = "Toggles Hebrew Cantillation Marks"; + +UTF8Cantillation::UTF8Cantillation() { + option = false; + options.push_back(on); + options.push_back(off); +} + +UTF8Cantillation::~UTF8Cantillation(){}; + +void UTF8Cantillation::setOptionValue(const char *ival) +{ + option = (!stricmp(ival, on)); +} + +const char *UTF8Cantillation::getOptionValue() +{ + return (option) ? on:off; +} + +char UTF8Cantillation::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) +{ + if (!option) { + unsigned char *to, *from; + to = (unsigned char*)text; + //The UTF-8 range 0xD6 0x90 to 0xD6 0xAF and 0xD7 0x84 consist of Hebrew cantillation marks so block those out. + for (from = (unsigned char*)text; *from; from++) { + if (*from != 0xD6) { + if (*from == 0xD7 && *(from + 1) == 0x84) { + from++; + } + else { + *to++ = *from; + } + } + else if (*(from + 1) < 0x90 || *(from + 1) > 0xAF) { + *to++ = *from; + from++; + *to++ = *from; + } + else { + from++; + } + } + *to++ = 0; + *to = 0; + } + return 0; +} diff --git a/src/modules/filters/utf8greekaccents.cpp b/src/modules/filters/utf8greekaccents.cpp new file mode 100644 index 0000000..b0e5dc8 --- /dev/null +++ b/src/modules/filters/utf8greekaccents.cpp @@ -0,0 +1,252 @@ +/****************************************************************************** + * + * UTF8GreekAccents - SWFilter decendant to remove UTF-8 Greek accents + * + */ + + +#include <stdlib.h> +#include <stdio.h> +#include <utf8greekaccents.h> + + +const char UTF8GreekAccents::on[] = "On"; +const char UTF8GreekAccents::off[] = "Off"; +const char UTF8GreekAccents::optName[] = "Greek Accents"; +const char UTF8GreekAccents::optTip[] = "Toggles Greek Accents"; + +UTF8GreekAccents::UTF8GreekAccents() { + option = true; + options.push_back(on); + options.push_back(off); +} + +UTF8GreekAccents::~UTF8GreekAccents(){}; + +void UTF8GreekAccents::setOptionValue(const char *ival) +{ + option = (!stricmp(ival, on)); +} + +const char *UTF8GreekAccents::getOptionValue() +{ + return (option) ? on:off; +} + +char UTF8GreekAccents::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) +{ + if (!option) { + unsigned char *to, *from; + + to = (unsigned char*)text; + for (from = (unsigned char*)text; *from; from++) { + //first just remove combining characters + if (*from == 0xE2 && *(from + 1) == 0x80 && *(from + 2) == 0x99) + from += 2; + else if (*from == 0xCC && *(from + 1)) { + if (*(from + 1) == 0x80 || *(from + 1) == 0x81 || *(from + 1) == 0x82 || *(from + 1) == 0x88 || *(from + 1) == 0x93 || *(from + 1) == 0x94) + from++; + } + else if (*from == 0xCD && *(from + 1) == 0xBA) + from++; + //now converted pre-composed characters to their alphabetic bases, discarding the accents + + //Greek + //capital alpha + else if ((*from == 0xCE && *(from + 1) == 0x86)) { + *to++ = 0xCE; + *to++ = 0x91; + from++; + } + //capital epsilon + else if ((*from == 0xCE && *(from + 1) == 0x88)) { + *to++ = 0xCE; + *to++ = 0x95; + from++; + } + //capital eta + else if ((*from == 0xCE && *(from + 1) == 0x89)) { + *to++ = 0xCE; + *to++ = 0x97; + from++; + } + //capital iota + else if ((*from == 0xCE && (*(from + 1) == 0x8A || *(from + 1) == 0xAA))) { + *to++ = 0xCE; + *to++ = 0x99; + from++; + } + //capital omicron + else if ((*from == 0xCE && *(from + 1) == 0x8C)) { + *to++ = 0xCE; + *to++ = 0x9F; + from++; + } + //capital upsilon + else if ((*from == 0xCE && (*(from + 1) == 0x8E || *(from + 1) == 0xAB))) { + *to++ = 0xCE; + *to++ = 0xA5; + from++; + } + //capital omega + else if ((*from == 0xCE && *(from + 1) == 0x8F)) { + *to++ = 0xCE; + *to++ = 0xA9; + from++; + } + + //alpha + else if ((*from == 0xCE && *(from + 1) == 0xAC)) { + *to++ = 0xCE; + *to++ = 0xB1; + from++; + } + //epsilon + else if ((*from == 0xCE && *(from + 1) == 0xAD)) { + *to++ = 0xCE; + *to++ = 0xB5; + from++; + } + //eta + else if ((*from == 0xCE && *(from + 1) == 0xAE)) { + *to++ = 0xCE; + *to++ = 0xB7; + from++; + } + //iota + else if ((*from == 0xCE && *(from + 1) == 0xAF) || (*from == 0xCF && *(from + 1) == 0x8A)) { + *to++ = 0xCE; + *to++ = 0xB9; + from++; + } + //omicron + else if ((*from == 0xCF && *(from + 1) == 0x8C)) { + *to++ = 0xCE; + *to++ = 0xBF; + from++; + } + //upsilon + else if ((*from == 0xCE && *(from + 1) == 0x88) || (*from == 0xCF && (*(from + 1) == 0x8B || *(from + 1) == 0x8D))) { + *to++ = 0xCF; + *to++ = 0x85; + from++; + } + //omega + else if ((*from == 0xCF && *(from + 1) == 0x8E)) { + *to++ = 0xCF; + *to++ = 0x89; + from++; + } + + //Extended Greek + //capital alpha + else if (*from == 0xE1 && ((*(from + 1) == 0xBC || *(from + 1) == 0xBE) && *(from + 2) >= 0x88 && *(from + 2) <= 0x8F) || (*(from + 1) == 0xBE && *(from + 2) >= 0xB8 && *(from + 2) <= 0xBC)) { + *to++ = 0xCE; + *to++ = 0x91; + from+=2; + } + //capital epsilon + else if (*from == 0xE1 && ((*(from + 1) == 0xBC && *(from + 2) >= 0x98 && *(from + 2) <= 0x9D) || (*(from + 1) == 0xBF && (*(from + 2) == 0x88 || *(from + 2) == 0x89)))) { + *to++ = 0xCE; + *to++ = 0x95; + from+=2; + } + //capital eta + else if (*from == 0xE1 && ((*(from + 1) == 0xBC && *(from + 2) >= 0xA8 && *(from + 2) <= 0xAF) || (*(from + 1) == 0xBE && *(from + 2) >= 0x98 && *(from + 2) <= 0x9F) || (*(from + 1) == 0xBF && *(from + 2) >= 0x8A && *(from + 2) <= 0x8C))) { + *to++ = 0xCE; + *to++ = 0x97; + from+=2; + } + //capital iota + else if (*from == 0xE1 && ((*(from + 1) == 0xBC && *(from + 2) >= 0xB8 && *(from + 2) <= 0xBF) || (*(from + 1) == 0xBF && *(from + 2) >= 0x98 && *(from + 2) <= 0x9B))) { + *to++ = 0xCE; + *to++ = 0x99; + from+=2; + } + //capital omicron + else if (*from == 0xE1 && ((*(from + 1) == 0xBD && *(from + 2) >= 0x88 && *(from + 2) <= 0x8D) || (*(from + 1) == 0xBF && *(from + 2) == 0xB8 || *(from + 2) == 0xB9))) { + *to++ = 0xCE; + *to++ = 0x9F; + from+=2; + } + //capital upsilon + else if (*from == 0xE1 && ((*(from + 1) == 0xBD && *(from + 2) >= 0x99 && *(from + 2) <= 0x9F) || (*(from + 1) == 0xBF && *(from + 2) >= 0xA8 && *(from + 2) <= 0xAB))) { + *to++ = 0xCE; + *to++ = 0xA5; + from+=2; + } + //capital omega + else if (*from == 0xE1 && (((*(from + 1) == 0xBD || *(from + 1) == 0xBE) && *(from + 2) >= 0xA8 && *(from + 2) <= 0xAF) || (*(from + 1) == 0xBF && *(from + 2) >= 0xBA && *(from + 2) <= 0xBC))) { + *to++ = 0xCE; + *to++ = 0xA9; + from+=2; + } + //capital rho + else if (*from == 0xE1 && *(from + 1) == 0xBF && *(from + 2) == 0xAC) { + *to++ = 0xCE; + *to++ = 0xA1; + from+=2; + } + + //alpha + else if (*from == 0xE1 && ((*(from + 1) == 0xBC || *(from + 1) == 0xBE) && *(from + 2) >= 0x80 && *(from + 2) <= 0x87) || (*(from + 1) == 0xBD && (*(from + 2) == 0xB0 || *(from + 2) == 0xB1)) || (*(from + 1) == 0xBE && *(from + 2) >= 0xB0 && *(from + 2) <= 0xB7)) { + *to++ = 0xCE; + *to++ = 0xB1; + from+=2; + } + //epsilon + else if (*from == 0xE1 && ((*(from + 1) == 0xBC && *(from + 2) >= 0x90 && *(from + 2) <= 0x95) || (*(from + 1) == 0xBD && (*(from + 2) == 0xB2 || *(from + 2) == 0xB3)))) { + *to++ = 0xCE; + *to++ = 0xB5; + from+=2; + } + //eta + else if (*from == 0xE1 && ((*(from + 1) == 0xBE && *(from + 2) >= 0x90 && *(from + 2) <= 0x97) || (*(from + 1) == 0xBC && *(from + 2) >= 0xA0 && *(from + 2) <= 0xA7) || (*(from + 1) == 0xBF && *(from + 2) >= 0x82 && *(from + 2) <= 0x87) || (*(from + 1) == 0xBD && (*(from + 2) == 0xB4 || *(from + 2) == 0xB5)))) { + *to++ = 0xCE; + *to++ = 0xB7; + from+=2; + } + //iota + else if (*from == 0xE1 && ((*(from + 1) == 0xBC && *(from + 2) >= 0xB0 && *(from + 2) <= 0xB7) || (*(from + 1) == 0xBD && (*(from + 2) == 0xB6 || *(from + 2) == 0xB7)) || (*(from + 1) == 0xBF && *(from + 2) >= 0x90 && *(from + 2) <= 0x97))) { + *to++ = 0xCE; + *to++ = 0xB9; + from+=2; + } + //omicron + else if (*from == 0xE1 && (*(from + 1) == 0xBD && ((*(from + 2) >= 0x80 && *(from + 2) <= 0x85) || (*(from + 2) == 0xB8 || *(from + 2) == 0xB9)))) { + *to++ = 0xCE; + *to++ = 0xBF; + from+=2; + } + //upsilon + else if (*from == 0xE1 && ((*(from + 1) == 0xBD && ((*(from + 2) >= 0x90 && *(from + 2) <= 0x97) || *(from + 2) == 0xBA || *(from + 2) == 0xBB)) || (*(from + 1) == 0xBF && ((*(from + 2) >= 0xA0 && *(from + 2) <= 0xA3) || *(from + 2) == 0xA6 || *(from + 2) == 0xA7)))) { + *to++ = 0xCF; + *to++ = 0x85; + from+=2; + } + //omega + else if (*from == 0xE1 && ((*(from + 1) == 0xBD && ((*(from + 2) >= 0xA0 && *(from + 2) <= 0xA7) || (*(from + 2) == 0xBC || *(from + 2) == 0xBD))) || (*(from + 1) == 0xBE && (*(from + 2) >= 0xA0 && *(from + 2) <= 0xA7)) || (*(from + 1) == 0xBF && *(from + 2) >= 0xB2 && *(from + 2) <= 0xB7))) { + *to++ = 0xCF; + *to++ = 0x89; + from+=2; + } + //rho + else if (*from == 0xE1 && *(from + 1) == 0xBF && (*(from + 2) == 0xA4 && *(from + 2) == 0xA5)) { + *to++ = 0xCF; + *to++ = 0x81; + from+=2; + } + else + *to++ = *from; + } + *to++ = 0; + *to = 0; + } + return 0; +} + + + + + + diff --git a/src/modules/filters/utf8hebrewpoints.cpp b/src/modules/filters/utf8hebrewpoints.cpp new file mode 100644 index 0000000..e5b50e1 --- /dev/null +++ b/src/modules/filters/utf8hebrewpoints.cpp @@ -0,0 +1,55 @@ +/****************************************************************************** + * + * UTF8HebrewPoints - SWFilter decendant to remove UTF-8 Hebrew vowel points + * + */ + + +#include <stdlib.h> +#include <stdio.h> +#include <utf8hebrewpoints.h> + + +const char UTF8HebrewPoints::on[] = "On"; +const char UTF8HebrewPoints::off[] = "Off"; +const char UTF8HebrewPoints::optName[] = "Hebrew Vowel Points"; +const char UTF8HebrewPoints::optTip[] = "Toggles Hebrew Vowel Points"; + +UTF8HebrewPoints::UTF8HebrewPoints() { + option = true; + options.push_back(on); + options.push_back(off); +} + +UTF8HebrewPoints::~UTF8HebrewPoints(){}; + +void UTF8HebrewPoints::setOptionValue(const char *ival) +{ + option = (!stricmp(ival, on)); +} + +const char *UTF8HebrewPoints::getOptionValue() +{ + return (option) ? on:off; +} + +char UTF8HebrewPoints::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) +{ + if (!option) { + unsigned char *to, *from; + + to = (unsigned char*)text; + //The UTF-8 range 0xD6 0xB0 to 0xD6 0xBF excluding 0xD6 0x consist of Hebrew cantillation marks so block those out. + for (from = (unsigned char*)text; *from; from++) { + if ((*from == 0xD6) && (*(from + 1) >= 0xB0 && *(from + 1) <= 0xBF) && (*(from + 1) != 0xBE)) { + from++; + } + else { + *to++ = *from; + } + } + *to++ = 0; + *to = 0; + } + return 0; +} diff --git a/src/modules/filters/utf8html.cpp b/src/modules/filters/utf8html.cpp new file mode 100644 index 0000000..7487815 --- /dev/null +++ b/src/modules/filters/utf8html.cpp @@ -0,0 +1,66 @@ +/****************************************************************************** + * + * utf8html - SWFilter decendant to convert a UTF-8 stream to HTML escapes + * + */ + + +#include <stdlib.h> +#include <stdio.h> +#include <utf8html.h> + +UTF8HTML::UTF8HTML() { +} + + +char UTF8HTML::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) +{ + unsigned char *to, *from; + int len; + char digit[10]; + unsigned long ch; + + len = strlenw(text) + 2; // shift string to right of buffer + if (len < maxlen) { + memmove(&text[maxlen - len], text, len); + from = (unsigned char*)&text[maxlen - len]; + } + else from = (unsigned char*)text; + // ------------------------------- + for (to = (unsigned char*)text; *from; from++) { + ch = 0; + if ((*from & 128) != 128) { +// if (*from != ' ') + *to++ = *from; + continue; + } + if ((*from & 128) && ((*from & 64) != 64)) { + // error + *from = 'x'; + continue; + } + *from <<= 1; + int subsequent; + for (subsequent = 1; (*from & 128); subsequent++) { + *from <<= 1; + from[subsequent] &= 63; + ch <<= 6; + ch |= from[subsequent]; + } + subsequent--; + *from <<=1; + char significantFirstBits = 8 - (2+subsequent); + + ch |= (((short)*from) << (((6*subsequent)+significantFirstBits)-8)); + from += subsequent; + *to++ = '&'; + *to++ = '#'; + sprintf(digit, "%d", ch); + for (char *dig = digit; *dig; dig++) + *to++ = *dig; + *to++ = ';'; + } + *to++ = 0; + *to = 0; + return 0; +} diff --git a/src/modules/filters/utf8latin1.cpp b/src/modules/filters/utf8latin1.cpp new file mode 100644 index 0000000..6cc1acd --- /dev/null +++ b/src/modules/filters/utf8latin1.cpp @@ -0,0 +1,74 @@ +/****************************************************************************** + * + * UTF8Latin1 - SWFilter decendant to convert UTF-8 to Latin-1 + * + */ + +#include <stdlib.h> +#include <stdio.h> + +#include <utf8latin1.h> + +UTF8Latin1::UTF8Latin1(char rchar) : replacementChar(rchar) { +} + + +char UTF8Latin1::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) +{ + unsigned char *from; + unsigned short *to; + + int len; + unsigned long uchar; + unsigned char significantFirstBits, subsequent; + + len = strlen(text) + 1; // shift string to right of buffer + if (len < maxlen) { + memmove(&text[maxlen - len], text, len); + from = (unsigned char*)&text[maxlen - len]; + } + else + from = (unsigned char*)text; + + + // ------------------------------- + + for (to = (unsigned short*)text; *from; from++) { + uchar = 0; + if ((*from & 128) != 128) { + // if (*from != ' ') + uchar = *from; + } + else if ((*from & 128) && ((*from & 64) != 64)) { + // error, do nothing + continue; + } + else { + *from <<= 1; + for (subsequent = 1; (*from & 128); subsequent++) { + *from <<= 1; + from[subsequent] &= 63; + uchar <<= 6; + uchar |= from[subsequent]; + } + subsequent--; + *from <<=1; + significantFirstBits = 8 - (2+subsequent); + + uchar |= (((short)*from) << (((6*subsequent)+significantFirstBits)-8)); + from += subsequent; + } + + if (uchar < 0xff) { + *to++ = (unsigned char)uchar; + } + else { + *to++ = replacementChar; + } + } + *to++ = 0; + *to = 0; + + return 0; +} + diff --git a/src/modules/filters/utf8nfc.cpp b/src/modules/filters/utf8nfc.cpp new file mode 100644 index 0000000..df9e090 --- /dev/null +++ b/src/modules/filters/utf8nfc.cpp @@ -0,0 +1,46 @@ +/****************************************************************************** +* +* utf8nfc - SWFilter decendant to perform NFC (canonical composition +* normalization) on UTF-8 text +*/ + +#ifdef _ICU_ + +#include <stdlib.h> +#include <string.h> + +#ifdef __GNUC__ +#include <unixstr.h> +#endif + +#include <utf8nfc.h> + +UTF8NFC::UTF8NFC() { + conv = ucnv_open("UTF-8", &err); +} + +UTF8NFC::~UTF8NFC() { + ucnv_close(conv); +} + +char UTF8NFC::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) +{ + int32_t len = strlen(text) * 2; + source = new UChar[len + 1]; //each char could become a surrogate pair + + // Convert UTF-8 string to UTF-16 (UChars) + len = ucnv_toUChars(conv, source, len, text, -1, &err); + target = new UChar[len + 1]; + + //canonical composition + unorm_normalize(source, len, UNORM_NFC, 0, target, len, &err); + + ucnv_fromUChars(conv, text, maxlen, target, -1, &err); + + delete [] source; + delete [] target; + + return 0; +} + +#endif diff --git a/src/modules/filters/utf8nfkd.cpp b/src/modules/filters/utf8nfkd.cpp new file mode 100644 index 0000000..450cbbf --- /dev/null +++ b/src/modules/filters/utf8nfkd.cpp @@ -0,0 +1,46 @@ +/****************************************************************************** +* +* utf8nfkd - SWFilter decendant to perform NFKD (compatability decomposition +* normalization) on UTF-8 text +*/ + +#ifdef _ICU_ + +#include <stdlib.h> +#include <string.h> + +#ifdef __GNUC__ +#include <unixstr.h> +#endif + +#include <utf8nfkd.h> + +UTF8NFKD::UTF8NFKD() { + conv = ucnv_open("UTF-8", &err); +} + +UTF8NFKD::~UTF8NFKD() { + ucnv_close(conv); +} + +char UTF8NFKD::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) +{ + int32_t len = strlen(text) * 2; + source = new UChar[len + 1]; //each char could become a surrogate pair + + // Convert UTF-8 string to UTF-16 (UChars) + len = ucnv_toUChars(conv, source, len, text, -1, &err); + target = new UChar[len + 1]; + + //compatability decomposition + unorm_normalize(source, len, UNORM_NFKD, 0, target, len, &err); + + ucnv_fromUChars(conv, text, maxlen, target, -1, &err); + + delete [] source; + delete [] target; + + return 0; +} + +#endif diff --git a/src/modules/filters/utf8transliterator.cpp b/src/modules/filters/utf8transliterator.cpp new file mode 100644 index 0000000..7bc068a --- /dev/null +++ b/src/modules/filters/utf8transliterator.cpp @@ -0,0 +1,479 @@ +/****************************************************************************** +* +* utf8transliterators - SWFilter decendant to transliterate between +* ICU-supported scripts. +*/ + +#ifdef _ICU_ + +#include <stdlib.h> +#include <string.h> + +#ifdef __GNUC__ +#include <unixstr.h> +#endif + +#include <utf8transliterator.h> + +const char UTF8Transliterator::optionstring[NUMTARGETSCRIPTS][16] = { + "Off", + "Latin", + "Basic Latin", + "Beta", + "BGreek", +/* + "Greek", + "Hebrew", + "Cyrillic", + "Arabic", + "Syriac", + "Katakana", + "Hiragana", + "Jamo", + "Hangul", + "Devanagari", + "Tamil", + "Bengali", + "Gurmukhi", + "Gujarati", + "Oriya", + "Telugu", + "Kannada", + "Malayalam", + "Thai", + "Georgian", + "Armenian", + "Ethiopic", + "Gothic", + "Ugaritic", + "Coptic" + */ +}; + +const char UTF8Transliterator::optName[] = "Transliteration"; +const char UTF8Transliterator::optTip[] = "Transliterates between scripts"; + +UTF8Transliterator::UTF8Transliterator() { + option = 0; + unsigned long i; + for (i = 0; i < NUMTARGETSCRIPTS; i++) { + options.push_back(optionstring[i]); + } +} + +void UTF8Transliterator::setOptionValue(const char *ival) +{ + unsigned char i = option = NUMTARGETSCRIPTS; + while (i && stricmp(ival, optionstring[i])) { + i--; + option = i; + } +} + +const char *UTF8Transliterator::getOptionValue() +{ + return (NUMTARGETSCRIPTS > option) ? optionstring[option] : 0; +} + +char UTF8Transliterator::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) +{ + if (option) { // if we want transliteration + unsigned long i, j; + UErrorCode err = U_ZERO_ERROR; + UConverter * conv = NULL; + conv = ucnv_open("UTF-8", &err); + + bool compat = false; + bool noNFC = false; + + if (option == SE_JAMO) { + noNFC = true; + } + + // Convert UTF-8 string to UTF-16 (UChars) + j = strlen(text); + int32_t len = (j * 2) + 1; + UChar *source = new UChar[len]; + err = U_ZERO_ERROR; + len = ucnv_toUChars(conv, source, len, text, j, &err); + source[len] = 0; + + // Figure out which scripts are used in the string + unsigned char scripts[NUMSCRIPTS]; + + for (i = 0; i < NUMSCRIPTS; i++) { + scripts[i] = false; + } + + for (i = 0; i < len; i++) { + j = ublock_getCode(source[i]); + switch (j) { + case UBLOCK_BASIC_LATIN: scripts[SE_LATIN] = true; break; + case UBLOCK_GREEK: scripts[SE_GREEK] = true; break; + case UBLOCK_HEBREW: scripts[SE_HEBREW] = true; break; + case UBLOCK_CYRILLIC: scripts[SE_CYRILLIC] = true; break; + case UBLOCK_ARABIC: scripts[SE_ARABIC] = true; break; + case UBLOCK_SYRIAC: scripts[SE_SYRIAC] = true; break; + case UBLOCK_KATAKANA: scripts[SE_KATAKANA] = true; break; + case UBLOCK_HIRAGANA: scripts[SE_HIRAGANA] = true; break; + case UBLOCK_HANGUL_SYLLABLES: scripts[SE_HANGUL] = true; break; + case UBLOCK_HANGUL_JAMO: scripts[SE_JAMO] = true; break; + case UBLOCK_DEVANAGARI: scripts[SE_DEVANAGARI] = true; break; + case UBLOCK_TAMIL: scripts[SE_TAMIL] = true; break; + case UBLOCK_BENGALI: scripts[SE_BENGALI] = true; break; + case UBLOCK_GURMUKHI: scripts[SE_GURMUKHI] = true; break; + case UBLOCK_GUJARATI: scripts[SE_GUJARATI] = true; break; + case UBLOCK_ORIYA: scripts[SE_ORIYA] = true; break; + case UBLOCK_TELUGU: scripts[SE_TELUGU] = true; break; + case UBLOCK_KANNADA: scripts[SE_KANNADA] = true; break; + case UBLOCK_MALAYALAM: scripts[SE_MALAYALAM] = true; break; + case UBLOCK_THAI: scripts[SE_THAI] = true; break; + case UBLOCK_GEORGIAN: scripts[SE_GEORGIAN] = true; break; + case UBLOCK_ARMENIAN: scripts[SE_ARMENIAN] = true; break; + case UBLOCK_ETHIOPIC: scripts[SE_ETHIOPIC] = true; break; + case UBLOCK_GOTHIC: scripts[SE_GOTHIC] = true; break; + // needs Unicode 3.2? or 4.0? support from ICU + //case UBLOCK_UGARITIC: scripts[SE_UGARITIC] = true; break; + case UBLOCK_CJK_RADICALS_SUPPLEMENT: + case UBLOCK_KANGXI_RADICALS: + case UBLOCK_IDEOGRAPHIC_DESCRIPTION_CHARACTERS: + case UBLOCK_CJK_SYMBOLS_AND_PUNCTUATION: + case UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A: + case UBLOCK_CJK_UNIFIED_IDEOGRAPHS: + scripts[SE_HAN] = true; + break; + case UBLOCK_CJK_COMPATIBILITY: + case UBLOCK_CJK_COMPATIBILITY_IDEOGRAPHS: + case UBLOCK_CJK_COMPATIBILITY_FORMS: + scripts[SE_HAN] = true; + compat = true; + break; + case UBLOCK_HANGUL_COMPATIBILITY_JAMO: + scripts[SE_HANGUL] = true; + compat = true; + break; + + default: scripts[SE_LATIN] = true; + } + } + scripts[option] = false; //turn off the reflexive transliteration + + //return if we have no transliteration to do for this text + j = 0; + for (i = 0; !j && i < NUMSCRIPTS; i++) { + if (scripts[i]) j++; + } + if (!j) { + ucnv_close(conv); + return 0; + } + + UnicodeString id; + if (compat) { + id = UnicodeString("NFKD"); + } + else { + id = UnicodeString("NFD"); + } + + //Simple X to Latin transliterators + if (scripts[SE_GREEK]) { + if (option == SE_BETA) + id += UnicodeString(";Greek-Beta"); + else if (option == SE_BGREEK) + id += UnicodeString(";Greek-BGreek"); + else { + if (!strnicmp (((SWModule*)module)->Lang(), "cop", 3)) { + id += UnicodeString(";Coptic-Latin"); + } + else { + id += UnicodeString(";Greek-Latin"); + } + scripts[SE_LATIN] = true; + } + } + if (scripts[SE_HEBREW]) { + if (option == SE_BETA) + id += UnicodeString(";Hebrew-CCAT"); + else if (option == SE_SYRIAC) + id += UnicodeString(";Hebrew-Syriac"); + else { + id += UnicodeString(";Hebrew-Latin"); + scripts[SE_LATIN] = true; + } + } + if (scripts[SE_CYRILLIC]) { + id += UnicodeString(";Cyrillic-Latin"); + scripts[SE_LATIN] = true; + } + if (scripts[SE_ARABIC]) { + id += UnicodeString(";Arabic-Latin"); + scripts[SE_LATIN] = true; + } + if (scripts[SE_SYRIAC]) { + if (option == SE_BETA) + id += UnicodeString(";Syriac-CCAT"); + else if (option == SE_HEBREW) + id += UnicodeString(";Syriac-Hebrew"); + else { + id += UnicodeString(";Syriac-Latin"); + scripts[SE_LATIN] = true; + } + } + if (scripts[SE_THAI]) { + id += UnicodeString(";Thai-Latin"); + scripts[SE_LATIN] = true; + } + if (scripts[SE_GEORGIAN]) { + id += UnicodeString(";Georgian-Latin"); + scripts[SE_LATIN] = true; + } + if (scripts[SE_ARMENIAN]) { + id += UnicodeString(";Armenian-Latin"); + scripts[SE_LATIN] = true; + } + if (scripts[SE_ETHIOPIC]) { + id += UnicodeString(";Ethiopic-Latin"); + scripts[SE_LATIN] = true; + } + if (scripts[SE_GOTHIC]) { + id += UnicodeString(";Gothic-Latin"); + scripts[SE_LATIN] = true; + } + if (scripts[SE_UGARITIC]) { + id += UnicodeString(";Ugaritic-Latin"); + scripts[SE_LATIN] = true; + } + if (scripts[SE_HAN]) { + if (!strnicmp (((SWModule*)module)->Lang(), "ja", 2)) { + id += UnicodeString(";Kanji-OnRomaji"); + } + else { + id += UnicodeString(";Han-Pinyin"); + } + scripts[SE_LATIN] = true; + } + + // Inter-Kana and Kana to Latin transliterators + if (option == SE_HIRAGANA && scripts[SE_KATAKANA]) { + id += UnicodeString(";Katakana-Hiragana"); + scripts[SE_HIRAGANA] = true; + } + else if (option == SE_KATAKANA && scripts[SE_HIRAGANA]) { + id += UnicodeString(";Hiragana-Katakana"); + scripts[SE_KATAKANA] = true; + } + else { + if (scripts[SE_KATAKANA]) { + id += UnicodeString(";Katakana-Latin"); + scripts[SE_LATIN] = true; + } + if (scripts[SE_HIRAGANA]) { + id += UnicodeString(";Hiragana-Latin"); + scripts[SE_LATIN] = true; + } + } + + // Inter-Korean and Korean to Latin transliterators + if (option == SE_HANGUL && scripts[SE_JAMO]) { + noNFC = false; + scripts[SE_HANGUL] = true; + } + else if (option == SE_JAMO && scripts[SE_HANGUL]) { + noNFC = true; + scripts[SE_JAMO] = true; + } + else { + if (scripts[SE_HANGUL]) { + id += UnicodeString(";Hangul-Latin"); + scripts[SE_LATIN] = true; + } + if (scripts[SE_JAMO]) { + id += UnicodeString(";Jamo-Latin"); + scripts[SE_LATIN] = true; + } + } + + // Indic-Latin + if (option < SE_DEVANAGARI || option > SE_MALAYALAM) { + // Indic to Latin + if (scripts[SE_TAMIL]) { + id += UnicodeString(";Tamil-Latin"); + scripts[SE_LATIN] = true; + } + if (scripts[SE_BENGALI]) { + id += UnicodeString(";Bengali-Latin"); + scripts[SE_LATIN] = true; + } + if (scripts[SE_GURMUKHI]) { + id += UnicodeString(";Gurmukhi-Latin"); + scripts[SE_LATIN] = true; + } + if (scripts[SE_GUJARATI]) { + id += UnicodeString(";Gujarati-Latin"); + scripts[SE_LATIN] = true; + } + if (scripts[SE_ORIYA]) { + id += UnicodeString(";Oriya-Latin"); + scripts[SE_LATIN] = true; + } + if (scripts[SE_TELUGU]) { + id += UnicodeString(";Telugu-Latin"); + scripts[SE_LATIN] = true; + } + if (scripts[SE_KANNADA]) { + id += UnicodeString(";Kannada-Latin"); + scripts[SE_LATIN] = true; + } + if (scripts[SE_MALAYALAM]) { + id += UnicodeString(";Malayalam-Latin"); + scripts[SE_LATIN] = true; + } + } + else { + if (scripts[SE_LATIN]) { + id += UnicodeString(";Latin-InterIndic"); + } + if (scripts[SE_DEVANAGARI]) { + id += UnicodeString(";Devanagari-InterIndic"); + } + if (scripts[SE_TAMIL]) { + id += UnicodeString(";Tamil-InterIndic"); + } + if (scripts[SE_BENGALI]) { + id += UnicodeString(";Bengali-InterIndic"); + } + if (scripts[SE_GURMUKHI]) { + id += UnicodeString(";Gurmurkhi-InterIndic"); + } + if (scripts[SE_GUJARATI]) { + id += UnicodeString(";Gujarati-InterIndic"); + } + if (scripts[SE_ORIYA]) { + id += UnicodeString(";Oriya-InterIndic"); + } + if (scripts[SE_TELUGU]) { + id += UnicodeString(";Telugu-InterIndic"); + } + if (scripts[SE_KANNADA]) { + id += UnicodeString(";Kannada-InterIndic"); + } + if (scripts[SE_MALAYALAM]) { + id += UnicodeString(";Malayalam-InterIndic"); + } + + switch(option) { + case SE_DEVANAGARI: + id += UnicodeString(";InterIndic-Devanagari"); + break; + case SE_TAMIL: + id += UnicodeString(";InterIndic-Tamil"); + break; + case SE_BENGALI: + id += UnicodeString(";InterIndic-Bengali"); + break; + case SE_GURMUKHI: + id += UnicodeString(";InterIndic-Gurmukhi"); + break; + case SE_GUJARATI: + id += UnicodeString(";InterIndic-Gujarati"); + break; + case SE_ORIYA: + id += UnicodeString(";InterIndic-Oriya"); + break; + case SE_TELUGU: + id += UnicodeString(";InterIndic-Telugu"); + break; + case SE_KANNADA: + id += UnicodeString(";InterIndic-Kannada"); + break; + case SE_MALAYALAM: + id += UnicodeString(";InterIndic-Malayalam"); + break; + default: + id += UnicodeString(";InterIndic-Latin"); + scripts[SE_LATIN] = true; + break; + } + } + + if (scripts[SE_LATIN]) { + switch (option) { + case SE_GREEK: + id += UnicodeString(";Latin-Greek"); + break; + case SE_HEBREW: + id += UnicodeString(";Latin-Hebrew"); + break; + case SE_CYRILLIC: + id += UnicodeString(";Latin-Cyrillic"); + break; + case SE_ARABIC: + id += UnicodeString(";Latin-Arabic"); + break; + case SE_SYRIAC: + id += UnicodeString(";Latin-Syriac"); + break; + case SE_THAI: + id += UnicodeString(";Latin-Thai"); + break; + case SE_GEORGIAN: + id += UnicodeString(";Latin-Georgian"); + break; + case SE_ARMENIAN: + id += UnicodeString(";Latin-Armenian"); + break; + case SE_ETHIOPIC: + id += UnicodeString(";Latin-Ethiopic"); + break; + case SE_GOTHIC: + id += UnicodeString(";Latin-Gothic"); + break; + case SE_UGARITIC: + id += UnicodeString(";Latin-Ugaritic"); + break; + case SE_COPTIC: + id += UnicodeString(";Latin-Coptic"); + break; + case SE_KATAKANA: + id += UnicodeString(";Latin-Katakana"); + break; + case SE_HIRAGANA: + id += UnicodeString(";Latin-Hiragana"); + break; + case SE_JAMO: + id += UnicodeString(";Latin-Jamo"); + break; + case SE_HANGUL: + id += UnicodeString(";Latin-Hangul"); + break; + } + } + + if (option == SE_BASICLATIN) { + id += UnicodeString(";Any-Latin1"); + } + + if (noNFC) { + id += UnicodeString(";NFD"); + } else { + id += UnicodeString(";NFC"); + } + + UParseError perr; + + err = U_ZERO_ERROR; + Transliterator * trans = Transliterator::createInstance(id, UTRANS_FORWARD, perr, err); + if (trans) { + UnicodeString target = UnicodeString(source); + trans->transliterate(target); + len = ucnv_fromUChars(conv, text, maxlen, target.getBuffer(), target.length(), &err); + if (len < maxlen) *(text + len) = 0; + else *(text + maxlen) = 0; + delete trans; + } + ucnv_close(conv); + } + return 0; +} +#endif diff --git a/src/modules/filters/utf8utf16.cpp b/src/modules/filters/utf8utf16.cpp new file mode 100644 index 0000000..9aea6fe --- /dev/null +++ b/src/modules/filters/utf8utf16.cpp @@ -0,0 +1,79 @@ +/****************************************************************************** + * + * UTF8UTF16 - SWFilter decendant to convert UTF-8 to UTF-16 + * + */ + +#include <stdlib.h> +#include <stdio.h> + +#include <utf8utf16.h> + +UTF8UTF16::UTF8UTF16() { +} + + +char UTF8UTF16::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) +{ + unsigned char *from; + unsigned short *to; + + int len; + unsigned long uchar; + unsigned char significantFirstBits, subsequent; + unsigned short schar; + + len = strlen(text) + 1; // shift string to right of buffer + if (len < maxlen) { + memmove(&text[maxlen - len], text, len); + from = (unsigned char*)&text[maxlen - len]; + } + else + from = (unsigned char*)text; + + + // ------------------------------- + + for (to = (unsigned short*)text; *from; from++) { + uchar = 0; + if ((*from & 128) != 128) { + // if (*from != ' ') + uchar = *from; + } + else if ((*from & 128) && ((*from & 64) != 64)) { + // error, do nothing + continue; + } + else { + *from <<= 1; + for (subsequent = 1; (*from & 128); subsequent++) { + *from <<= 1; + from[subsequent] &= 63; + uchar <<= 6; + uchar |= from[subsequent]; + } + subsequent--; + *from <<=1; + significantFirstBits = 8 - (2+subsequent); + + uchar |= (((short)*from) << (((6*subsequent)+significantFirstBits)-8)); + from += subsequent; + } + + if (uchar < 0x1ffff) { + *to++ = (unsigned short)uchar; + } + else { + uchar -= 0x10000; + schar = 0xD800 | (uchar & 0x03ff); + uchar >>= 10; + uchar |= 0xDC00; + *to++ = (unsigned short)schar; + *to++ = (unsigned short)uchar; + } + } + *to = (unsigned short)0; + + return 0; +} + |