diff options
author | Teus Benschop <teusjannette@gmail.com> | 2018-10-28 11:51:26 +0100 |
---|---|---|
committer | Teus Benschop <teusjannette@gmail.com> | 2018-10-28 11:51:26 +0100 |
commit | 1d0ff54794b5edea7cdf1d2d66710a0fa885bcc5 (patch) | |
tree | 8ece5f9ef437fbb151f2b22ed0c6e1a714879c7c /src/modules/filters | |
parent | c7dbdc9161a7c460526b80fe01af49d714856126 (diff) |
New upstream version 1.8.1
Diffstat (limited to 'src/modules/filters')
56 files changed, 2917 insertions, 820 deletions
diff --git a/src/modules/filters/Makefile.am b/src/modules/filters/Makefile.am index a3d1154..3e8556b 100644 --- a/src/modules/filters/Makefile.am +++ b/src/modules/filters/Makefile.am @@ -15,6 +15,7 @@ GBFFIL += $(filtersdir)/gbfheadings.cpp GBFFIL += $(filtersdir)/gbfredletterwords.cpp GBFFIL += $(filtersdir)/gbfmorph.cpp GBFFIL += $(filtersdir)/gbfwordjs.cpp +GBFFIL += $(filtersdir)/gbflatex.cpp THMLFIL = $(filtersdir)/thmlstrongs.cpp THMLFIL += $(filtersdir)/thmlfootnotes.cpp @@ -30,11 +31,13 @@ THMLFIL += $(filtersdir)/thmlhtmlhref.cpp THMLFIL += $(filtersdir)/thmlxhtml.cpp THMLFIL += $(filtersdir)/thmlwebif.cpp THMLFIL += $(filtersdir)/thmlwordjs.cpp +THMLFIL += $(filtersdir)/thmllatex.cpp TEIFIL = $(filtersdir)/teiplain.cpp TEIFIL += $(filtersdir)/teirtf.cpp TEIFIL += $(filtersdir)/teihtmlhref.cpp TEIFIL += $(filtersdir)/teixhtml.cpp +TEIFIL += $(filtersdir)/teilatex.cpp CONVFIL = $(filtersdir)/gbfthml.cpp CONVFIL += $(filtersdir)/gbfosis.cpp @@ -61,6 +64,7 @@ OSISFIL += $(filtersdir)/osisglosses.cpp OSISFIL += $(filtersdir)/osisenum.cpp OSISFIL += $(filtersdir)/osisxlit.cpp OSISFIL += $(filtersdir)/osisreferencelinks.cpp +OSISFIL += $(filtersdir)/osislatex.cpp libsword_la_SOURCES += $(filtersdir)/latin1utf8.cpp libsword_la_SOURCES += $(filtersdir)/latin1utf16.cpp @@ -70,6 +74,7 @@ libsword_la_SOURCES += $(filtersdir)/utf8html.cpp libsword_la_SOURCES += $(filtersdir)/utf8latin1.cpp libsword_la_SOURCES += $(filtersdir)/unicodertf.cpp libsword_la_SOURCES += $(filtersdir)/scsuutf8.cpp +libsword_la_SOURCES += $(filtersdir)/utf8scsu.cpp libsword_la_SOURCES += $(filtersdir)/utf8cantillation.cpp libsword_la_SOURCES += $(filtersdir)/utf8hebrewpoints.cpp diff --git a/src/modules/filters/gbfheadings.cpp b/src/modules/filters/gbfheadings.cpp index 7e19ea1..a679924 100644 --- a/src/modules/filters/gbfheadings.cpp +++ b/src/modules/filters/gbfheadings.cpp @@ -3,7 +3,7 @@ * gbfheadings.cpp - SWFilter descendant to hide or show headings * in a GBF module * - * $Id: gbfheadings.cpp 2980 2013-09-14 21:51:47Z scribe $ + * $Id: gbfheadings.cpp 3427 2016-07-03 14:30:33Z scribe $ * * Copyright 2001-2013 CrossWire Bible Society (http://www.crosswire.org) * CrossWire Bible Society @@ -93,11 +93,13 @@ char GBFHeadings::processText(SWBuf &text, const SWKey *key, const SWModule *mod continue; } if (intoken) { - if (tokpos < 2045) + if (tokpos < 2045) { token[tokpos++] = *from; + // TODO: why is this + 2 ? token[tokpos+2] = 0; + } } - else { + else { if (!hide) { text += *from; } diff --git a/src/modules/filters/gbfhtmlhref.cpp b/src/modules/filters/gbfhtmlhref.cpp index 6df038e..723137f 100644 --- a/src/modules/filters/gbfhtmlhref.cpp +++ b/src/modules/filters/gbfhtmlhref.cpp @@ -2,7 +2,7 @@ * * gbfhtmlhref.cpp - GBF to HTML filter with hrefs * - * $Id: gbfhtmlhref.cpp 2833 2013-06-29 06:40:28Z chrislit $ + * $Id: gbfhtmlhref.cpp 3548 2017-12-10 05:11:38Z scribe $ * * Copyright 2001-2013 CrossWire Bible Society (http://www.crosswire.org) * CrossWire Bible Society @@ -232,19 +232,13 @@ bool GBFHTMLHREF::handleToken(SWBuf &buf, const char *token, BasicFilterUserData SWBuf type = tag.getAttribute("type"); SWBuf footnoteNumber = tag.getAttribute("swordFootnote"); SWBuf noteName = tag.getAttribute("n"); - VerseKey *vkey = NULL; - // see if we have a VerseKey * or descendant - SWTRY { - vkey = SWDYNAMIC_CAST(VerseKey, u->key); - } - SWCATCH ( ... ) { } - if (vkey) { + if (u->vkey) { // leave this special osis type in for crossReference notes types? Might thml use this some day? Doesn't hurt. //char ch = ((tag.getAttribute("type") && ((!strcmp(tag.getAttribute("type"), "crossReference")) || (!strcmp(tag.getAttribute("type"), "x-cross-ref")))) ? 'x':'n'); buf.appendFormatted("<a href=\"passagestudy.jsp?action=showNote&type=n&value=%s&module=%s&passage=%s\"><small><sup class=\"n\">*n%s</sup></small></a> ", URL::encode(footnoteNumber.c_str()).c_str(), URL::encode(u->version.c_str()).c_str(), - URL::encode(vkey->getText()).c_str(), + URL::encode(u->vkey->getText()).c_str(), (renderNoteNumbers ? URL::encode(noteName.c_str()).c_str(): "")); } u->suspendTextPassThru = true; diff --git a/src/modules/filters/gbflatex.cpp b/src/modules/filters/gbflatex.cpp new file mode 100644 index 0000000..d3c726a --- /dev/null +++ b/src/modules/filters/gbflatex.cpp @@ -0,0 +1,198 @@ +/****************************************************************************** + * + * gbflatex.cpp - GBF to LaTeX + * + * $Id: gbflatex.cpp 3548 2017-12-10 05:11:38Z scribe $ + * + * Copyright 2011-2014 CrossWire Bible Society (http://www.crosswire.org) + * CrossWire Bible Society + * P. O. Box 2528 + * Tempe, AZ 85280-2528 + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + */ + + +#include <stdlib.h> +#include <gbflatex.h> +#include <swmodule.h> +#include <utilxml.h> +#include <versekey.h> +#include <ctype.h> +#include <url.h> + +SWORD_NAMESPACE_START + +const char *GBFLaTeX::getHeader() const { + return "\\usepackage{color}"; +} + +GBFLaTeX::MyUserData::MyUserData(const SWModule *module, const SWKey *key) : BasicFilterUserData(module, key) { + if (module) { + version = module->getName(); + } +} + +GBFLaTeX::GBFLaTeX() { + setTokenStart("<"); + setTokenEnd(">"); + + setTokenCaseSensitive(true); + + //addTokenSubstitute("Rf", ")</small></font>"); + addTokenSubstitute("FA", "{\\color{maroon}"); // for ASV footnotes to mark text + addTokenSubstitute("Rx", "}"); + addTokenSubstitute("FI", "\\emph{"); // italics begin + addTokenSubstitute("Fi", "}"); + addTokenSubstitute("FB", "\\bold{"); // bold begin + addTokenSubstitute("Fb", "}"); + addTokenSubstitute("FR", "{\\swordwoj{"); // words of Jesus begin + addTokenSubstitute("Fr", "}"); + addTokenSubstitute("FU", "\\underline{"); // underline begin + addTokenSubstitute("Fu", "}"); + addTokenSubstitute("FO", "\\begin{quote}"); // Old Testament quote begin + addTokenSubstitute("Fo", "\\end{quote}"); + addTokenSubstitute("FS", "\\textsuperscript{"); // Superscript begin// Subscript begin + addTokenSubstitute("Fs", "}"); + addTokenSubstitute("FV", "\\textsubscript{"); // Subscript begin + addTokenSubstitute("Fv", "}"); + addTokenSubstitute("TT", "\\section*{"); // Book title begin + addTokenSubstitute("Tt", "}"); + addTokenSubstitute("PP", "\\begin{swordpoetry}"); // poetry begin + addTokenSubstitute("Pp", "\\end{swordpoetry}"); + addTokenSubstitute("Fn", ""); // font end + addTokenSubstitute("CL", "\\\\"); // new line + addTokenSubstitute("CM", "\\\\"); // paragraph <!P> is a non showing comment that can be changed in the front end to <P> if desired + addTokenSubstitute("CG", ""); // ??? + addTokenSubstitute("CT", ""); // ??? + addTokenSubstitute("JR", "{\\raggedright{}"); // right align begin + addTokenSubstitute("JC", "{\\raggedcenter{}"); // center align begin + addTokenSubstitute("JL", "}"); // align end + + renderNoteNumbers = false; +} + + +bool GBFLaTeX::handleToken(SWBuf &buf, const char *token, BasicFilterUserData *userData) { + const char *tok; + MyUserData *u = (MyUserData *)userData; + + if (!substituteToken(buf, token)) { + XMLTag tag(token); + + if (!strncmp(token, "WG", 2)) { // strong's numbers + //buf += " <small><em><<a href=\"type=Strongs value="; + buf += " \\swordstrong[Greek]{"; + for (tok = token+2; *tok; tok++) + //if(token[i] != '\"') + buf += *tok; + buf += ", "; + for (tok = token + 2; *tok; tok++) + //if(token[i] != '\"') + buf += *tok; + buf += "}"; + } + else if (!strncmp(token, "WH", 2)) { // strong's numbers + buf += " \\swordstrong[Hebrew]{"; + for (tok = token+2; *tok; tok++) + //if(token[i] != '\"') + buf += *tok; + buf += ", "; + for (tok = token + 2; *tok; tok++) + //if(token[i] != '\"') + buf += *tok; + buf += "}"; + } + else if (!strncmp(token, "WTG", 3)) { // strong's numbers tense + buf += " \\swordstrong[Greektense]{"; + for (tok = token + 3; *tok; tok++) + if(*tok != '\"') + buf += *tok; + buf += ", "; + for (tok = token + 3; *tok; tok++) + if(*tok != '\"') + buf += *tok; + buf += "}"; + } + else if (!strncmp(token, "WTH", 3)) { // strong's numbers tense + buf += " \\swordstrong[Hebrewtense]{"; + for (tok = token + 3; *tok; tok++) + if(*tok != '\"') + buf += *tok; + buf += ","; + for (tok = token + 3; *tok; tok++) + if(*tok != '\"') + buf += *tok; + buf += "}"; + } + + else if (!strncmp(token, "WT", 2) && strncmp(token, "WTH", 3) && strncmp(token, "WTG", 3)) { // morph tags + buf += " \\swordmorph{"; + + for (tok = token + 2; *tok; tok++) + if(*tok != '\"') + buf += *tok; + buf += ", >"; + for (tok = token + 2; *tok; tok++) + if(*tok != '\"') + buf += *tok; + buf += "}"; + } + + else if (!strcmp(tag.getName(), "RX")) { + buf += "\\swordxref{"; + for (tok = token + 3; *tok; tok++) { + if(*tok != '<' && *tok+1 != 'R' && *tok+2 != 'x') { + buf += *tok; + } + else { + break; + } + } + buf += "}"; + } + else if (!strcmp(tag.getName(), "RF")) { + SWBuf type = tag.getAttribute("type"); + SWBuf footnoteNumber = tag.getAttribute("swordFootnote"); + SWBuf noteName = tag.getAttribute("n"); + if (u->vkey) { + + buf.appendFormatted("\\swordfootnote{%s}{%s}{%s}{", + footnoteNumber.c_str(), + u->version.c_str(), + u->vkey->getText()).c_str(); + } + u->suspendTextPassThru = false; + } + else if (!strcmp(tag.getName(), "Rf")) { + u->suspendTextPassThru = false; + buf += "}"; + } + else if (!strncmp(token, "FN", 2)) { + buf += "\\swordfont{"; + for (tok = token + 2; *tok; tok++) + if(*tok != '\"') + buf += *tok; + buf += "}"; + } + + else if (!strncmp(token, "CA", 2)) { // ASCII value + buf += (char)atoi(&token[2]); + } + + else { + return false; + } + } + return true; +} + +SWORD_NAMESPACE_END diff --git a/src/modules/filters/gbfmorph.cpp b/src/modules/filters/gbfmorph.cpp index 3399ae8..cf1405b 100644 --- a/src/modules/filters/gbfmorph.cpp +++ b/src/modules/filters/gbfmorph.cpp @@ -3,7 +3,7 @@ * gbfmorph.cpp - SWFilter descendant to hide or show morph tags * in a GBF module * - * $Id: gbfmorph.cpp 2980 2013-09-14 21:51:47Z scribe $ + * $Id: gbfmorph.cpp 3427 2016-07-03 14:30:33Z scribe $ * * Copyright 2001-2013 CrossWire Bible Society (http://www.crosswire.org) * CrossWire Bible Society @@ -84,11 +84,13 @@ char GBFMorph::processText(SWBuf &text, const SWKey *key, const SWModule *module continue; } if (intoken) { - if (tokpos < 2045) + if (tokpos < 2045) { token[tokpos++] = *from; + // TODO: why is this + 2 ? token[tokpos+2] = 0; + } } - else { + else { text += *from; lastspace = (*from == ' '); } diff --git a/src/modules/filters/gbfplain.cpp b/src/modules/filters/gbfplain.cpp index b2a1ae4..45588af 100644 --- a/src/modules/filters/gbfplain.cpp +++ b/src/modules/filters/gbfplain.cpp @@ -3,7 +3,7 @@ * gbfplain.cpp - SWFilter descendant to strip out all GBF tags or * convert to ASCII rendered symbols * - * $Id: gbfplain.cpp 2980 2013-09-14 21:51:47Z scribe $ + * $Id: gbfplain.cpp 3427 2016-07-03 14:30:33Z scribe $ * * Copyright 1997-2013 CrossWire Bible Society (http://www.crosswire.org) * CrossWire Bible Society @@ -103,9 +103,11 @@ char GBFPlain::processText (SWBuf &text, const SWKey *key, const SWModule *modul continue; } if (intoken) { - if (tokpos < 2045) + if (tokpos < 2045) { token[tokpos++] = *from; + // TODO: why is this + 2 ? token[tokpos+2] = 0; + } } else text.append(*from); } diff --git a/src/modules/filters/gbfredletterwords.cpp b/src/modules/filters/gbfredletterwords.cpp index cac3cd9..f3e74f3 100644 --- a/src/modules/filters/gbfredletterwords.cpp +++ b/src/modules/filters/gbfredletterwords.cpp @@ -3,7 +3,7 @@ * gbfredletterwords.cpp - SWFilter descendant to toggle red coloring of * words of Christ in a GBF module * - * $Id: gbfredletterwords.cpp 2980 2013-09-14 21:51:47Z scribe $ + * $Id: gbfredletterwords.cpp 3427 2016-07-03 14:30:33Z scribe $ * * Copyright 2003-2013 CrossWire Bible Society (http://www.crosswire.org) * CrossWire Bible Society @@ -101,9 +101,12 @@ char GBFRedLetterWords::processText(SWBuf &text, const SWKey *key, const SWModul continue; } if (intoken) { - if (tokpos < 4090) + if (tokpos < 4090) { token[tokpos++] = *from; + // TODO: why is this + 2 ? The below comment still doesn't help me understand. The switch statment + // is commented out in this filter token[tokpos+2] = 0; // +2 cuz we init token with 2 extra '0' because of switch statement + } } else { text += *from; diff --git a/src/modules/filters/gbfrtf.cpp b/src/modules/filters/gbfrtf.cpp index 8a8e1b1..c327c82 100644 --- a/src/modules/filters/gbfrtf.cpp +++ b/src/modules/filters/gbfrtf.cpp @@ -2,7 +2,7 @@ * * gbfrtf.cpp - SWFilter descendant to convert all GBF tags to RTF tags * - * $Id: gbfrtf.cpp 2833 2013-06-29 06:40:28Z chrislit $ + * $Id: gbfrtf.cpp 3427 2016-07-03 14:30:33Z scribe $ * * Copyright 1997-2013 CrossWire Bible Society (http://www.crosswire.org) * CrossWire Bible Society @@ -307,9 +307,11 @@ char GBFRTF::processText(SWBuf &text, const SWKey *key, const SWModule *module) continue; } if (intoken) { - if (tokpos < 2045) + if (tokpos < 2045) { token[tokpos++] = *from; + // TODO: why is this + 2 ? token[tokpos+2] = 0; + } } else { if (!hideText) { diff --git a/src/modules/filters/gbfstrongs.cpp b/src/modules/filters/gbfstrongs.cpp index dd4e167..65473de 100644 --- a/src/modules/filters/gbfstrongs.cpp +++ b/src/modules/filters/gbfstrongs.cpp @@ -3,7 +3,7 @@ * gbfstrongs.cpp - SWFilter descendant to hide or show Strong's number * in a GBF module * - * $Id: gbfstrongs.cpp 2980 2013-09-14 21:51:47Z scribe $ + * $Id: gbfstrongs.cpp 3515 2017-11-01 11:38:09Z scribe $ * * Copyright 1999-2013 CrossWire Bible Society (http://www.crosswire.org) * CrossWire Bible Society @@ -58,7 +58,7 @@ char GBFStrongs::processText(SWBuf &text, const SWKey *key, const SWModule *modu bool lastspace = false; int word = 1; char val[128]; - char wordstr[5]; + char wordstr[11]; char *valto; unsigned int textStart = 0, textEnd = 0; bool newText = false; @@ -75,7 +75,7 @@ char GBFStrongs::processText(SWBuf &text, const SWKey *key, const SWModule *modu token[0] = 0; token[1] = 0; token[2] = 0; - textEnd = text.size(); + textEnd = (unsigned int)text.size(); continue; } if (*from == '>') { // process tokens @@ -110,7 +110,7 @@ char GBFStrongs::processText(SWBuf &text, const SWKey *key, const SWModule *modu if (lastspace) text--; } - if (newText) {textStart = text.size(); newText = false; } + if (newText) {textStart = (unsigned int)text.size(); newText = false; } continue; } } @@ -130,15 +130,17 @@ char GBFStrongs::processText(SWBuf &text, const SWKey *key, const SWModule *modu text += '<'; text += token; text += '>'; - if (newText) {textStart = text.size(); newText = false; } + if (newText) {textStart = (unsigned int)text.size(); newText = false; } continue; } if (intoken) { - if (tokpos < 2045) + if (tokpos < 2045) { token[tokpos++] = *from; + // TODO: why is this + 2 ? token[tokpos+2] = 0; + } } - else { + else { text += *from; lastspace = (*from == ' '); } diff --git a/src/modules/filters/gbfthml.cpp b/src/modules/filters/gbfthml.cpp index 303b240..d3dd611 100644 --- a/src/modules/filters/gbfthml.cpp +++ b/src/modules/filters/gbfthml.cpp @@ -2,7 +2,7 @@ * * gbfthml.cpp - GBF to ThML filter * - * $Id: gbfthml.cpp 2980 2013-09-14 21:51:47Z scribe $ + * $Id: gbfthml.cpp 3427 2016-07-03 14:30:33Z scribe $ * * Copyright 1999-2013 CrossWire Bible Society (http://www.crosswire.org) * CrossWire Bible Society @@ -52,8 +52,7 @@ char GBFThML::processText(SWBuf &text, const SWKey *key, const SWModule *module) token[2] = 0; continue; } - if (*from == '>') - { + if (*from == '>') { intoken = false; // process desired tokens switch (*token) { @@ -210,9 +209,15 @@ char GBFThML::processText(SWBuf &text, const SWKey *key, const SWModule *module) continue; } if (intoken) { - if (tokpos < 2045) + if (tokpos < 2045) { token[tokpos++] = *from; + //TODO: why is this + 2? Are we trying to keep 2 or 3 nulls after the last valid char? + // tokpos has been incremented past the last valid token. it should be pointing to null + // +1 should give us 2 nulls, but we're +2 here, which actually keeps 3 nulls after the + // last valid char. Why are we doing any of this? These were written before SWBuf and should + // probably be switched to SWBuf, but perf tests before and after the switch should be run token[tokpos+2] = 0; + } } else text += *from; } diff --git a/src/modules/filters/gbfwordjs.cpp b/src/modules/filters/gbfwordjs.cpp index c9d1bd3..b0f352e 100644 --- a/src/modules/filters/gbfwordjs.cpp +++ b/src/modules/filters/gbfwordjs.cpp @@ -2,7 +2,7 @@ * * gbfwordjs.cpp - SWFilter descendant for ??? * - * $Id: gbfwordjs.cpp 2980 2013-09-14 21:51:47Z scribe $ + * $Id: gbfwordjs.cpp 3515 2017-11-01 11:38:09Z scribe $ * * Copyright 2005-2013 CrossWire Bible Society (http://www.crosswire.org) * CrossWire Bible Society @@ -88,7 +88,7 @@ char GBFWordJS::processText(SWBuf &text, const SWKey *key, const SWModule *modul token[0] = 0; token[1] = 0; token[2] = 0; - textEnd = text.length(); + textEnd = (unsigned int)text.length(); continue; } if (*from == '>') { // process tokens @@ -137,7 +137,7 @@ char GBFWordJS::processText(SWBuf &text, const SWKey *key, const SWModule *modul text += token; text += '>'; if (needWordOut) { - char wstr[10]; + char wstr[11]; sprintf(wstr, "%03d", word-2); AttributeValue *wAttrs = &(module->getEntryAttributes()["Word"][wstr]); needWordOut = false; @@ -212,27 +212,29 @@ char GBFWordJS::processText(SWBuf &text, const SWKey *key, const SWModule *modul else m = morph.c_str(); spanStart.appendFormatted("<span class=\"clk\" onclick=\"p('%s','%s','%s','%s','','%s');\" >", lexName.c_str(), strong.c_str(), wordID.c_str(), m, modName.c_str()); text.insert(textStr, spanStart); - lastAppendLen = spanStart.length(); + lastAppendLen = (unsigned int)spanStart.length(); } } } if (newText) { - textStart = text.length(); newText = false; + textStart = (unsigned int)text.length(); newText = false; } continue; } if (intoken) { - if (tokpos < 2045) + if (tokpos < 2045) { token[tokpos++] = *from; + // TODO: why is this + 2 ? token[tokpos+2] = 0; + } } - else { + else { text += *from; } } - char wstr[10]; + char wstr[11]; sprintf(wstr, "%03d", word-1); AttributeValue *wAttrs = &(module->getEntryAttributes()["Word"][wstr]); needWordOut = false; diff --git a/src/modules/filters/gbfxhtml.cpp b/src/modules/filters/gbfxhtml.cpp index 56d7663..b62fe3a 100644 --- a/src/modules/filters/gbfxhtml.cpp +++ b/src/modules/filters/gbfxhtml.cpp @@ -2,7 +2,7 @@ * * gbfxhtml.cpp - GBF to classed XHTML * - * $Id: gbfxhtml.cpp 2833 2013-06-29 06:40:28Z chrislit $ + * $Id: gbfxhtml.cpp 3548 2017-12-10 05:11:38Z scribe $ * * Copyright 2011-2013 CrossWire Bible Society (http://www.crosswire.org) * CrossWire Bible Society @@ -171,19 +171,13 @@ bool GBFXHTML::handleToken(SWBuf &buf, const char *token, BasicFilterUserData *u SWBuf type = tag.getAttribute("type"); SWBuf footnoteNumber = tag.getAttribute("swordFootnote"); SWBuf noteName = tag.getAttribute("n"); - VerseKey *vkey = NULL; - // see if we have a VerseKey * or descendant - SWTRY { - vkey = SWDYNAMIC_CAST(VerseKey, u->key); - } - SWCATCH ( ... ) { } - if (vkey) { + if (u->vkey) { // leave this special osis type in for crossReference notes types? Might thml use this some day? Doesn't hurt. //char ch = ((tag.getAttribute("type") && ((!strcmp(tag.getAttribute("type"), "crossReference")) || (!strcmp(tag.getAttribute("type"), "x-cross-ref")))) ? 'x':'n'); buf.appendFormatted("<a href=\"passagestudy.jsp?action=showNote&type=n&value=%s&module=%s&passage=%s\"><small><sup class=\"n\">*n%s</sup></small></a> ", URL::encode(footnoteNumber.c_str()).c_str(), URL::encode(u->version.c_str()).c_str(), - URL::encode(vkey->getText()).c_str(), + URL::encode(u->vkey->getText()).c_str(), (renderNoteNumbers ? URL::encode(noteName.c_str()).c_str(): "")); } u->suspendTextPassThru = true; diff --git a/src/modules/filters/greeklexattribs.cpp b/src/modules/filters/greeklexattribs.cpp index 1c2533d..1d98db2 100644 --- a/src/modules/filters/greeklexattribs.cpp +++ b/src/modules/filters/greeklexattribs.cpp @@ -3,7 +3,7 @@ * greeklexattribs.cpp - SWFilter descendant to set entry attributes * for greek lexicons * - * $Id: greeklexattribs.cpp 2833 2013-06-29 06:40:28Z chrislit $ + * $Id: greeklexattribs.cpp 3515 2017-11-01 11:38:09Z scribe $ * * Copyright 2002-2013 CrossWire Bible Society (http://www.crosswire.org) * CrossWire Bible Society @@ -44,7 +44,7 @@ char GreekLexAttribs::processText(SWBuf &text, const SWKey *key, const SWModule string phrase; string freq; char val[128], *valto; - char wordstr[7]; + char wordstr[11]; const char *currentPhrase = 0; const char *currentPhraseEnd = 0; int number = 0; diff --git a/src/modules/filters/osisheadings.cpp b/src/modules/filters/osisheadings.cpp index 7a5f525..74ed5ce 100644 --- a/src/modules/filters/osisheadings.cpp +++ b/src/modules/filters/osisheadings.cpp @@ -3,7 +3,7 @@ * osisheadings.cpp - SWFilter descendant to hide or show headings * in an OSIS module * - * $Id: osisheadings.cpp 3194 2014-04-24 03:02:47Z greg.hellings $ + * $Id: osisheadings.cpp 3423 2016-06-25 13:21:48Z scribe $ * * Copyright 2003-2013 CrossWire Bible Society (http://www.crosswire.org) * CrossWire Bible Society @@ -51,6 +51,7 @@ namespace { SWBuf heading; int depth; int headerNum; + bool canonical; MyUserData(const SWModule *module, const SWKey *key) : BasicFilterUserData(module, key) { clear(); @@ -62,6 +63,7 @@ namespace { heading = ""; depth = 0; headerNum = 0; + canonical=false; } }; } @@ -88,16 +90,16 @@ bool OSISHeadings::handleToken(SWBuf &buf, const char *token, BasicFilterUserDat // are we currently in a heading? if (u->currentHeadingName.size()) { u->heading.append(u->lastTextNode); + if (SWBuf("true") == tag.getAttribute("canonical")) u->canonical = true; if (name == u->currentHeadingName) { if (tag.isEndTag(u->sID)) { if (!u->depth-- || u->sID) { // see comment below about preverse div changed and needing to preserve the <title> container tag for old school pre-verse titles // we've just finished a heading. It's all stored up in u->heading - bool canonical = (SWBuf("true") == u->currentHeadingTag.getAttribute("canonical")); bool preverse = (SWBuf("x-preverse") == u->currentHeadingTag.getAttribute("subType") || SWBuf("x-preverse") == u->currentHeadingTag.getAttribute("subtype")); // do we want to put anything in EntryAttributes? - if (u->module->isProcessEntryAttributes() && (option || canonical || !preverse)) { + if (u->module->isProcessEntryAttributes() && (option || u->canonical || !preverse)) { SWBuf buf; buf.appendFormatted("%i", u->headerNum++); // leave the actual <title...> wrapper in if we're part of an old school preverse title // because now frontend have to deal with preverse as a div which may or may not include <title> elements @@ -121,7 +123,7 @@ bool OSISHeadings::handleToken(SWBuf &buf, const char *token, BasicFilterUserDat } // do we want the heading in the body? - if (!preverse && (option || canonical)) { + if (!preverse && (option || u->canonical)) { buf.append(u->currentHeadingTag); buf.append(u->heading); buf.append(tag); @@ -148,6 +150,7 @@ bool OSISHeadings::handleToken(SWBuf &buf, const char *token, BasicFilterUserDat u->sID = u->currentHeadingTag.getAttribute("sID"); u->depth = 0; u->suspendTextPassThru = true; + u->canonical = (SWBuf("true") == tag.getAttribute("canonical")); return true; } diff --git a/src/modules/filters/osishtmlhref.cpp b/src/modules/filters/osishtmlhref.cpp index 9cda9ab..94194ac 100644 --- a/src/modules/filters/osishtmlhref.cpp +++ b/src/modules/filters/osishtmlhref.cpp @@ -2,7 +2,7 @@ * * osishtmlhref.cpp - OSIS to HTML with hrefs filter * - * $Id: osishtmlhref.cpp 3173 2014-04-17 04:20:33Z greg.hellings $ + * $Id: osishtmlhref.cpp 3548 2017-12-10 05:11:38Z scribe $ * * Copyright 2003-2014 CrossWire Bible Society (http://www.crosswire.org) * CrossWire Bible Society @@ -117,14 +117,12 @@ OSISHTMLHREF::MyUserData::MyUserData(const SWModule *module, const SWKey *key) : tagStacks = new TagStacks(); wordsOfChristStart = "<font color=\"red\"> "; wordsOfChristEnd = "</font> "; + osisQToTick = true; // default + isBiblicalText = false; if (module) { osisQToTick = ((!module->getConfigEntry("OSISqToTick")) || (strcmp(module->getConfigEntry("OSISqToTick"), "false"))); version = module->getName(); - BiblicalText = (!strcmp(module->getType(), "Biblical Texts")); - } - else { - osisQToTick = true; // default - version = ""; + isBiblicalText = (!strcmp(module->getType(), "Biblical Texts")); } } @@ -201,9 +199,9 @@ bool OSISHTMLHREF::handleToken(SWBuf &buf, const char *token, BasicFilterUserDat outText("<ruby><rb>", buf, u); outText(lastText, buf, u); + outText("</rb><rp>(</rp><rt>", buf, u); val = strchr(attrib, ':'); val = (val) ? (val + 1) : attrib; - outText("</rb><rp>(</rp><rt>", buf, u); outText(val, buf, u); outText("</rt><rp>)</rp></ruby>", buf, u); } @@ -241,22 +239,15 @@ bool OSISHTMLHREF::handleToken(SWBuf &buf, const char *token, BasicFilterUserDat if (!strongsMarkup) { // leave strong's markup notes out, in the future we'll probably have different option filters to turn different note types on or off SWBuf footnoteNumber = tag.getAttribute("swordFootnote"); SWBuf noteName = tag.getAttribute("n"); - VerseKey *vkey = NULL; char ch = ((tag.getAttribute("type") && ((!strcmp(tag.getAttribute("type"), "crossReference")) || (!strcmp(tag.getAttribute("type"), "x-cross-ref")))) ? 'x':'n'); u->inXRefNote = true; // Why this change? Ben Morgan: Any note can have references in, so we need to set this to true for all notes // u->inXRefNote = (ch == 'x'); - - // see if we have a VerseKey * or descendant - SWTRY { - vkey = SWDYNAMIC_CAST(VerseKey, u->key); - } - SWCATCH ( ... ) { } buf.appendFormatted("<a href=\"passagestudy.jsp?action=showNote&type=%c&value=%s&module=%s&passage=%s\"><small><sup class=\"%c\">*%c%s</sup></small></a>", ch, URL::encode(footnoteNumber.c_str()).c_str(), URL::encode(u->version.c_str()).c_str(), - URL::encode(vkey ? vkey->getText() : u->key->getText()).c_str(), + URL::encode(u->vkey ? u->vkey->getText() : u->key->getText()).c_str(), ch, ch, (renderNoteNumbers ? noteName.c_str() : "")); @@ -267,7 +258,7 @@ bool OSISHTMLHREF::handleToken(SWBuf &buf, const char *token, BasicFilterUserDat if (tag.isEndTag()) { u->suspendTextPassThru = (--u->suspendLevel); u->inXRefNote = false; - u->lastSuspendSegment = ""; // fix/work-around for nasb devineName in note bug + u->lastSuspendSegment = ""; // fix/work-around for nasb divineName in note bug } } @@ -322,7 +313,7 @@ bool OSISHTMLHREF::handleToken(SWBuf &buf, const char *token, BasicFilterUserDat // Compensate for starting : ref = the_ref + 1; - int size = target.size() - ref.size() - 1; + int size = (int)(target.size() - ref.size() - 1); work.setSize(size); strncpy(work.getRawData(), target, size); @@ -460,7 +451,7 @@ bool OSISHTMLHREF::handleToken(SWBuf &buf, const char *token, BasicFilterUserDat const unsigned char *tmpBuf = (const unsigned char *)lastText.c_str(); getUniCharFromUTF8(&tmpBuf); - int char_length = (tmpBuf - (const unsigned char *)lastText.c_str()); + int char_length = (int)(tmpBuf - (const unsigned char *)lastText.c_str()); scratch.setFormatted("%.*s<font size=\"-1\">%s</font>", char_length, lastText.c_str(), @@ -509,7 +500,7 @@ bool OSISHTMLHREF::handleToken(SWBuf &buf, const char *token, BasicFilterUserDat else if (type == "ol") { outText("</span>", buf, u); } - else if (type == "sup") { + else if (type == "super") { outText("</sup>", buf, u); } else if (type == "sub") { @@ -595,7 +586,7 @@ bool OSISHTMLHREF::handleToken(SWBuf &buf, const char *token, BasicFilterUserDat if ((type == "added") || (type == "supplied")) outText("<i>", buf, u); else if (type == "tenseChange") - buf += "*"; + outText( "*", buf, u); } else if (tag.isEndTag()) { SWBuf type = u->lastTransChange; diff --git a/src/modules/filters/osislatex.cpp b/src/modules/filters/osislatex.cpp new file mode 100644 index 0000000..e642f07 --- /dev/null +++ b/src/modules/filters/osislatex.cpp @@ -0,0 +1,766 @@ +/****************************************************************************** + * + * osislatex.cpp - Render filter for LaTeX of an OSIS module + * + * $Id: osislatex.cpp 3548 2017-12-10 05:11:38Z scribe $ + * + * Copyright 2011-2014 CrossWire Bible Society (http://www.crosswire.org) + * CrossWire Bible Society + * P. O. Box 2528 + * Tempe, AZ 85280-2528 + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + */ + +#include <stdlib.h> +#include <ctype.h> +#include <osislatex.h> +#include <utilxml.h> +#include <utilstr.h> +#include <versekey.h> +#include <swmodule.h> +#include <url.h> +#include <stringmgr.h> +#include <stack> + +SWORD_NAMESPACE_START + +const char *OSISLaTeX::getHeader() const { +// can be used to return static start-up info, like packages to load. Not sure yet if I want to retain it. + + const static char *header = "\ + \\LoadClass[11pt,a4paper,twoside,headinclude=true,footinclude=true,BCOR=0mm,DIV=calc]{scrbook}\n\ + \\LoadClass[11pt,a4paper,twoside,headinclude=true,footinclude=true,BCOR=0mm,DIV=calc]{scrbook}\n\ + \\NeedsTeXFormat{LaTeX2e}\n\ + \\ProvidesClass{sword}[2015/03/29 CrossWire LaTeX class for Biblical texts]\n\ + %\\sworddiclink{%s}{%s}{\n\ + %\\sworddictref{%s}{%s}{\n\ + %\\sworddict{%s}{\n\ + %\\sworddivinename}{%s}{\n\ + %\\swordfont{\n\ + %\\swordfootnote[%c]{%s}{%s}{%s}{%s}{\n\ + %\\swordfootnote{%s}{%s}{%s}{\n\ + %\\swordfootnote{%s}{%s}{%s}{%s}{\n\ + %\\swordmorph{\n\ + %\\swordmorph[Greek]{%s}\n\ + %\\swordmorph[lemma]{%s}\n\ + %\\swordmorph{%s}\n\ + %\\swordpoetryline{\n\ + %\\swordquote{\n\ + %\\swordref{%s}{%s}{\n\ + %\\swordsection{\n\ + %\\swordsection{}{\n\ + %\\swordsection{book}{\n\ + %\\swordsection{sechead}{\n\ + %\\swordstrong[Greek]{\n\ + %\\swordstrong[Greektense]{\n\ + %\\swordstrong[Hebrew]{\n\ + %\\swordstrong[Hebrewtense]{\n\ + %\\swordstrong[%s]{%s}{\n\ + %\\swordstrong{%s}{%s}\n\ + %\\swordtitle{\n\ + %\\swordtranschange{supplied}{\n\ + %\\swordtranschange{tense}{\n\ + %\\swordwoj{\n\ + %\\swordxref{\n\ + %\\swordxref{%s}{\n\ + %\\swordxref{%s}{%s}{\n\ + "; + return header; +} + + +namespace { + +// though this might be slightly slower, possibly causing an extra bool check, this is a renderFilter +// so speed isn't the absolute highest priority, and this is a very minor possible hit +static inline void outText(const char *t, SWBuf &o, BasicFilterUserData *u) { if (!u->suspendTextPassThru) o += t; else u->lastSuspendSegment += t; } +static inline void outText(char t, SWBuf &o, BasicFilterUserData *u) { if (!u->suspendTextPassThru) o += t; else u->lastSuspendSegment += t; } + +void processLemma(bool suspendTextPassThru, XMLTag &tag, SWBuf &buf) { + const char *attrib; + const char *val; + if ((attrib = tag.getAttribute("lemma"))) { + int count = tag.getAttributePartCount("lemma", ' '); + int i = (count > 1) ? 0 : -1; // -1 for whole value cuz it's faster, but does the same thing as 0 + do { + attrib = tag.getAttribute("lemma", i, ' '); + if (i < 0) i = 0; // to handle our -1 condition + val = strchr(attrib, ':'); + val = (val) ? (val + 1) : attrib; + SWBuf gh; + if(*val == 'G') + gh = "Greek"; + if(*val == 'H') + gh = "Hebrew"; + const char *val2 = val; + if ((strchr("GH", *val)) && (isdigit(val[1]))) + val2++; + //if ((!strcmp(val2, "3588")) && (lastText.length() < 1)) + // show = false; + //else { + if (!suspendTextPassThru) { + buf.appendFormatted("\\swordstrong{%s}{%s}", + (gh.length()) ? gh.c_str() : "", + val2); + } + //} + + } while (++i < count); + } +} + + + +void processMorph(bool suspendTextPassThru, XMLTag &tag, SWBuf &buf) { + const char * attrib; + const char *val; + if ((attrib = tag.getAttribute("morph"))) { // && (show)) { + SWBuf savelemma = tag.getAttribute("savlm"); + //if ((strstr(savelemma.c_str(), "3588")) && (lastText.length() < 1)) + // show = false; + //if (show) { + int count = tag.getAttributePartCount("morph", ' '); + int i = (count > 1) ? 0 : -1; // -1 for whole value cuz it's faster, but does the same thing as 0 + do { + attrib = tag.getAttribute("morph", i, ' '); + if (i < 0) i = 0; // to handle our -1 condition + val = strchr(attrib, ':'); + val = (val) ? (val + 1) : attrib; + const char *val2 = val; + if ((*val == 'T') && (strchr("GH", val[1])) && (isdigit(val[2]))) + val2+=2; + if (!suspendTextPassThru) { + buf.appendFormatted("\\swordmorph{%s}", + tag.getAttribute("morph") + ); + } + } while (++i < count); + //} + } +} + + +} // end anonymous namespace + +BasicFilterUserData *OSISLaTeX::createUserData(const SWModule *module, const SWKey *key) { + return new MyUserData(module, key); +} + + +OSISLaTeX::OSISLaTeX() { + setTokenStart("<"); + setTokenEnd(">"); + + setEscapeStart("&"); + setEscapeEnd(";"); + + setEscapeStringCaseSensitive(true); + setPassThruNumericEscapeString(true); + + addAllowedEscapeString("quot"); + addAllowedEscapeString("apos"); + addAllowedEscapeString("amp"); + addAllowedEscapeString("lt"); + addAllowedEscapeString("gt"); + + setTokenCaseSensitive(true); + + // addTokenSubstitute("lg", "<br />"); + // addTokenSubstitute("/lg", "<br />"); + + morphFirst = false; + renderNoteNumbers = false; +} + +class OSISLaTeX::TagStack : public std::stack<SWBuf> { +}; + +OSISLaTeX::MyUserData::MyUserData(const SWModule *module, const SWKey *key) : BasicFilterUserData(module, key), quoteStack(new TagStack()), hiStack(new TagStack()), titleStack(new TagStack()), lineStack(new TagStack()) { + inXRefNote = false; + suspendLevel = 0; + divLevel = "module"; + wordsOfChristStart = "\\swordwoj{"; + wordsOfChristEnd = "}"; + consecutiveNewlines = 0; + firstCell = false; +} + +OSISLaTeX::MyUserData::~MyUserData() { + delete quoteStack; + delete hiStack; + delete titleStack; + delete lineStack; +} + +void OSISLaTeX::MyUserData::outputNewline(SWBuf &buf) { + if (++consecutiveNewlines <= 2) { + outText("//\n", buf, this); + supressAdjacentWhitespace = true; + } +} +bool OSISLaTeX::handleToken(SWBuf &buf, const char *token, BasicFilterUserData *userData) { + MyUserData *u = (MyUserData *)userData; + SWBuf scratch; + + bool sub = (u->suspendTextPassThru) ? substituteToken(scratch, token) : substituteToken(buf, token); + if (!sub) { + // manually process if it wasn't a simple substitution + XMLTag tag(token); + + // <w> tag + if (!strcmp(tag.getName(), "w")) { + + // start <w> tag + if ((!tag.isEmpty()) && (!tag.isEndTag())) { + u->w = token; + } + + // end or empty <w> tag + else { + bool endTag = tag.isEndTag(); + SWBuf lastText; + //bool show = true; // to handle unplaced article in kjv2003-- temporary till combined + + if (endTag) { + tag = u->w.c_str(); + lastText = u->lastTextNode.c_str(); + } + else lastText = "stuff"; + + const char *attrib; + const char *val; + if ((attrib = tag.getAttribute("xlit"))) { + val = strchr(attrib, ':'); + val = (val) ? (val + 1) : attrib; + outText(" ", buf, u); + outText(val, buf, u); + } + if ((attrib = tag.getAttribute("gloss"))) { + // I'm sure this is not the cleanest way to do it, but it gets the job done + // for rendering ruby chars properly ^_^ + buf -= lastText.length(); + + outText("\\ruby{", buf, u); + outText(lastText, buf, u); + outText("}{", buf, u); + outText(attrib, buf, u); + outText("}", buf, u); + } + if (!morphFirst) { + processLemma(u->suspendTextPassThru, tag, buf); + processMorph(u->suspendTextPassThru, tag, buf); + } + else { + processMorph(u->suspendTextPassThru, tag, buf); + processLemma(u->suspendTextPassThru, tag, buf); + } + if ((attrib = tag.getAttribute("POS"))) { + val = strchr(attrib, ':'); + val = (val) ? (val + 1) : attrib; + outText(" ", buf, u); + outText(val, buf, u); + } + + + } + } + + // <note> tag + + else if (!strcmp(tag.getName(), "note")) { + if (!tag.isEndTag()) { + SWBuf type = tag.getAttribute("type"); + bool strongsMarkup = (type == "x-strongsMarkup" || type == "strongsMarkup"); // the latter is deprecated + if (strongsMarkup) { + tag.setEmpty(false); // handle bug in KJV2003 module where some note open tags were <note ... /> + } + + if (!tag.isEmpty()) { + + if (!strongsMarkup) { // leave strong's markup notes out, in the future we'll probably have different option filters to turn different note types on or off + SWBuf footnoteNumber = tag.getAttribute("swordFootnote"); + SWBuf footnoteBody = ""; + if (u->module){ + footnoteBody += u->module->getEntryAttributes()["Footnote"][footnoteNumber]["body"]; + } + SWBuf noteName = tag.getAttribute("n"); + + u->inXRefNote = true; // Why this change? Ben Morgan: Any note can have references in, so we need to set this to true for all notes +// u->inXRefNote = (ch == 'x'); + + if (u->vkey) { + //printf("URL = %s\n",URL::encode(u->vkey->getText()).c_str()); + buf.appendFormatted("\\swordfootnote{%s}{%s}{%s}{%s}{%s}{", + + footnoteNumber.c_str(), + u->version.c_str(), + u->vkey->getText(), + tag.getAttribute("type"), + (renderNoteNumbers ? noteName.c_str() : "")); + if (u->module) { + outText( u->module->renderText(footnoteBody).c_str(), buf, u); + } + } + else { + buf.appendFormatted("\\swordfootnote{%s}{%s}{%s}{%s}{%s}{", + footnoteNumber.c_str(), + u->version.c_str(), + u->key->getText(), + tag.getAttribute("type"), + (renderNoteNumbers ? noteName.c_str() : "")); + if (u->module) { + outText( u->module->renderText(footnoteBody).c_str(), buf, u); + } + } + } + } + u->suspendTextPassThru = (++u->suspendLevel); + } + if (tag.isEndTag()) { + u->suspendTextPassThru = (--u->suspendLevel); + u->inXRefNote = false; + u->lastSuspendSegment = ""; // fix/work-around for nasb divineName in note bug + outText("}", buf, u); + } + } + + // <p> paragraph and <lg> linegroup tags + else if (!strcmp(tag.getName(), "p") || !strcmp(tag.getName(), "lg")) { + if ((!tag.isEndTag()) && (!tag.isEmpty())) { // non-empty start tag + u->outputNewline(buf); + } + else if (tag.isEndTag()) { // end tag + u->outputNewline(buf); + } + else { // empty paragraph break marker + u->outputNewline(buf); + } + } + + // Milestoned paragraphs, created by osis2mod + // <div type="paragraph" sID.../> + // <div type="paragraph" eID.../> + else if (tag.isEmpty() && !strcmp(tag.getName(), "div") && tag.getAttribute("type") && (!strcmp(tag.getAttribute("type"), "x-p") || !strcmp(tag.getAttribute("type"), "paragraph"))) { + // <div type="paragraph" sID... /> + if (tag.getAttribute("sID")) { // non-empty start tag + u->outputNewline(buf); + } + // <div type="paragraph" eID... /> + else if (tag.getAttribute("eID")) { + u->outputNewline(buf); + } + } + + // <reference> tag + else if (!strcmp(tag.getName(), "reference")) { + if (!u->inXRefNote) { // only show these if we're not in an xref note + if (!tag.isEndTag()) { + SWBuf target; + SWBuf work; + SWBuf ref; + bool is_scripRef = false; + + target = tag.getAttribute("osisRef"); + const char* the_ref = strchr(target, ':'); + + if(!the_ref) { + // No work + ref = target; + is_scripRef = true; + } + else { + // Compensate for starting : + ref = the_ref + 1; + + int size = (int)(target.size() - ref.size() - 1); + work.setSize(size); + strncpy(work.getRawData(), target, size); + + // For Bible:Gen.3.15 or Bible.vulgate:Gen.3.15 + if(!strncmp(work, "Bible", 5)) + is_scripRef = true; + } + + if(is_scripRef) + { + buf.appendFormatted("\\swordxref{%s}{", + ref.c_str() +// (work.size()) ? URL::encode(work.c_str()).c_str() : "") + ); + } + else + { + // Dictionary link, or something + buf.appendFormatted("\\sworddiclink{%s}{%s}{", // work, entry + work.c_str(), + ref.c_str() + ); + } + } + else { + outText("}", buf, u); + } + } + } + + // <l> poetry, etc + else if (!strcmp(tag.getName(), "l")) { + // start line marker + if (tag.getAttribute("sID") || (!tag.isEndTag() && !tag.isEmpty())) { + // nested lines plus if the line itself has an x-indent type attribute value + outText("\\swordpoetryline{", buf, u); + u->lineStack->push(tag.toString()); + } + // end line marker + else if (tag.getAttribute("eID") || tag.isEndTag()) { + outText("}", buf, u); + u->outputNewline(buf); + if (u->lineStack->size()) u->lineStack->pop(); + } + // <l/> without eID or sID + // Note: this is improper osis. This should be <lb/> + else if (tag.isEmpty() && !tag.getAttribute("sID")) { + u->outputNewline(buf); + } + } + + // <lb.../> + else if (!strcmp(tag.getName(), "lb") && (!tag.getAttribute("type") || strcmp(tag.getAttribute("type"), "x-optional"))) { + u->outputNewline(buf); + } + // <milestone type="line"/> + // <milestone type="x-p"/> + // <milestone type="cQuote" marker="x"/> + else if ((!strcmp(tag.getName(), "milestone")) && (tag.getAttribute("type"))) { + if (!strcmp(tag.getAttribute("type"), "line")) { + u->outputNewline(buf); + if (tag.getAttribute("subType") && !strcmp(tag.getAttribute("subType"), "x-PM")) { + u->outputNewline(buf); + } + } + else if (!strcmp(tag.getAttribute("type"),"x-p")) { + if (tag.getAttribute("marker")) + outText(tag.getAttribute("marker"), buf, u); + else outText("<!p>", buf, u); + } + else if (!strcmp(tag.getAttribute("type"), "cQuote")) { + const char *tmp = tag.getAttribute("marker"); + bool hasMark = tmp; + SWBuf mark = tmp; + tmp = tag.getAttribute("level"); + int level = (tmp) ? atoi(tmp) : 1; + + // first check to see if we've been given an explicit mark + if (hasMark) + outText(mark, buf, u); + // finally, alternate " and ', if config says we should supply a mark + else if (u->osisQToTick) + outText((level % 2) ? '\"' : '\'', buf, u); + } + } + + // <title> + + else if (!strcmp(tag.getName(), "title")) { + if ((!tag.isEndTag()) && (!tag.isEmpty())) { + const char *tmp = tag.getAttribute("type"); + bool hasType = tmp; + SWBuf type = tmp; + + outText("\n\\swordtitle{", buf, u); + outText(u->divLevel, buf, u); + outText("}{", buf, u); + + if (hasType) outText(type, buf, u); + else outText("", buf, u); + + outText("}{", buf, u); + } + else if (tag.isEndTag()) { + outText( "}", buf, u); + ++u->consecutiveNewlines; + u->supressAdjacentWhitespace = true; + } + } + + // <list> + else if (!strcmp(tag.getName(), "list")) { + if((!tag.isEndTag()) && (!tag.isEmpty())) { + outText("\n\\begin{itemize}", buf, u); + } + else if (tag.isEndTag()) { + outText("\n\\end{itemize}", buf, u); + ++u->consecutiveNewlines; + u->supressAdjacentWhitespace = true; + } + } + + // <item> + else if (!strcmp(tag.getName(), "item")) { + if((!tag.isEndTag()) && (!tag.isEmpty())) { + outText("\n\\item ", buf, u); + } + else if (tag.isEndTag()) { + ++u->consecutiveNewlines; + u->supressAdjacentWhitespace = true; + } + } + // <catchWord> & <rdg> tags (italicize) + else if (!strcmp(tag.getName(), "rdg") || !strcmp(tag.getName(), "catchWord")) { + if ((!tag.isEndTag()) && (!tag.isEmpty())) { + outText("\\emph{", buf, u); + } + else if (tag.isEndTag()) { + outText("}", buf, u); + } + } + + // divineName + else if (!strcmp(tag.getName(), "divineName")) { + if ((!tag.isEndTag()) && (!tag.isEmpty())) { + outText( "\\sworddivinename{", buf, u); + u->suspendTextPassThru = (++u->suspendLevel); + } + else if (tag.isEndTag()) { + SWBuf lastText = u->lastSuspendSegment.c_str(); + u->suspendTextPassThru = (--u->suspendLevel); + if (lastText.size()) { + scratch.setFormatted("%s}", lastText.c_str()); + outText(scratch.c_str(), buf, u); + } + } + } + + // <hi> text highlighting + else if (!strcmp(tag.getName(), "hi")) { + SWBuf type = tag.getAttribute("type"); + + // handle tei rend attribute if type doesn't exist + if (!type.length()) type = tag.getAttribute("rend"); + + if ((!tag.isEndTag()) && (!tag.isEmpty())) { + if (type == "bold" || type == "b" || type == "x-b") { + outText("\\textbold{", buf, u); + } + + // there is no officially supported OSIS overline attribute, + // thus either TEI overline or OSIS x-overline would be best, + // but we have used "ol" in the past, as well. Once a valid + // OSIS overline attribute is made available, these should all + // eventually be deprecated and never documented that they are supported. + else if (type == "ol" || type == "overline" || type == "x-overline") { + outText("\\textoverline{", buf, u); + } + + else if (type == "super") { + outText("\\textsuperscript{", buf, u); + } + else if (type == "sub") { + outText("\\textsubscript{", buf, u); + } + else { // all other types + outText("\\emph {", buf, u); + } + u->hiStack->push(tag.toString()); + } + else if (tag.isEndTag()) { + outText("}", buf, u); + } + } + + // <q> quote + // Rules for a quote element: + // If the tag is empty with an sID or an eID then use whatever it specifies for quoting. + // Note: empty elements without sID or eID are ignored. + // If the tag is <q> then use it's specifications and push it onto a stack for </q> + // If the tag is </q> then use the pushed <q> for specification + // If there is a marker attribute, possibly empty, this overrides osisQToTick. + // If osisQToTick, then output the marker, using level to determine the type of mark. + else if (!strcmp(tag.getName(), "q")) { + SWBuf type = tag.getAttribute("type"); + SWBuf who = tag.getAttribute("who"); + const char *tmp = tag.getAttribute("level"); + int level = (tmp) ? atoi(tmp) : 1; + tmp = tag.getAttribute("marker"); + bool hasMark = tmp; + SWBuf mark = tmp; + + // open <q> or <q sID... /> + if ((!tag.isEmpty() && !tag.isEndTag()) || (tag.isEmpty() && tag.getAttribute("sID"))) { + // if <q> then remember it for the </q> + if (!tag.isEmpty()) { + u->quoteStack->push(tag.toString()); + } + + // Do this first so quote marks are included as WoC + if (who == "Jesus") + outText(u->wordsOfChristStart, buf, u); + + // first check to see if we've been given an explicit mark + if (hasMark) + outText(mark, buf, u); + //alternate " and ' + else if (u->osisQToTick) + outText((level % 2) ? '\"' : '\'', buf, u); + } + // close </q> or <q eID... /> + else if ((tag.isEndTag()) || (tag.isEmpty() && tag.getAttribute("eID"))) { + // if it is </q> then pop the stack for the attributes + if (tag.isEndTag() && !u->quoteStack->empty()) { + XMLTag qTag(u->quoteStack->top()); + if (u->quoteStack->size()) u->quoteStack->pop(); + + type = qTag.getAttribute("type"); + who = qTag.getAttribute("who"); + tmp = qTag.getAttribute("level"); + level = (tmp) ? atoi(tmp) : 1; + tmp = qTag.getAttribute("marker"); + hasMark = tmp; + mark = tmp; + } + + // first check to see if we've been given an explicit mark + if (hasMark) + outText(mark, buf, u); + // finally, alternate " and ', if config says we should supply a mark + else if (u->osisQToTick) + outText((level % 2) ? '\"' : '\'', buf, u); + + // Do this last so quote marks are included as WoC + if (who == "Jesus") + outText(u->wordsOfChristEnd, buf, u); + } + } + + // <transChange> + else if (!strcmp(tag.getName(), "transChange")) { + if ((!tag.isEndTag()) && (!tag.isEmpty())) { + SWBuf type = tag.getAttribute("type"); + u->lastTransChange = type; + + // just do all transChange tags this way for now + if ((type == "added") || (type == "supplied")) + outText("\\swordtranschange{supplied}{", buf, u); + else if (type == "tenseChange") + outText( "\\swordtranschange{tense}{", buf, u); + } + else if (tag.isEndTag()) { + outText("}", buf, u); + } + else { // empty transChange marker? + } + } + + // image + else if (!strcmp(tag.getName(), "figure")) { + const char *src = tag.getAttribute("src"); + if (src) { // assert we have a src attribute + SWBuf filepath; + if (userData->module) { + filepath = userData->module->getConfigEntry("AbsoluteDataPath"); + if ((filepath.size()) && (filepath[filepath.size()-1] != '/') && (src[0] != '/')) + filepath += '/'; + } + filepath += src; + + outText("\\figure{", buf, u); + outText("\\includegraphics{", buf, u); + outText(filepath.c_str(), buf, u); + outText("}}", buf, u); + + } + } + + // ok to leave these in + else if (!strcmp(tag.getName(), "div")) { + SWBuf type = tag.getAttribute("type"); + if (type == "module") { + u->divLevel = type; + outText("\n", buf, u); + } + else if (type == "testament") { + u->divLevel = type; + outText("\n", buf, u); + } + else if (type == "bookGroup") { + u->divLevel = type; + outText("\n", buf, u); + } + else if (type == "book") { + u->divLevel = type; + outText("\n", buf, u); + } + else if (type == "majorSection") { + u->divLevel = type; + outText("\n", buf, u); + } + else if (type == "section") { + u->divLevel = type; + outText("\n", buf, u); + } + else if (type == "paragraph") { + u->divLevel = type; + outText("\n", buf, u); + } + } + else if (!strcmp(tag.getName(), "span")) { + outText( "", buf, u); + } + else if (!strcmp(tag.getName(), "br")) { + outText( "\\", buf, u); + } + else if (!strcmp(tag.getName(), "table")) { + if ((!tag.isEndTag()) && (!tag.isEmpty())) { + outText( "\n\\begin{tabular}", buf, u); + } + else if (tag.isEndTag()) { + outText( "\n\\end{tabular}", buf, u); + ++u->consecutiveNewlines; + u->supressAdjacentWhitespace = true; + } + + } + else if (!strcmp(tag.getName(), "row")) { + if ((!tag.isEndTag()) && (!tag.isEmpty())) { + outText( "\n", buf, u); + u->firstCell = true; + } + else if (tag.isEndTag()) { + outText( "//", buf, u); + u->firstCell = false; + } + + } + else if (!strcmp(tag.getName(), "cell")) { + if ((!tag.isEndTag()) && (!tag.isEmpty())) { + if (u->firstCell == false) { + outText( " & ", buf, u); + } + else { + u->firstCell = false; + } + } + else if (tag.isEndTag()) { + outText( "", buf, u); + } + } + else { + if (!u->supressAdjacentWhitespace) u->consecutiveNewlines = 0; + return false; // we still didn't handle token + } + } + if (!u->supressAdjacentWhitespace) u->consecutiveNewlines = 0; + return true; +} + + +SWORD_NAMESPACE_END diff --git a/src/modules/filters/osislemma.cpp b/src/modules/filters/osislemma.cpp index cf5d8f4..12881ff 100644 --- a/src/modules/filters/osislemma.cpp +++ b/src/modules/filters/osislemma.cpp @@ -3,7 +3,7 @@ * osislemma.cpp - SWFilter descendant to hide or show lemmata * in a OSIS module * - * $Id: osislemma.cpp 2980 2013-09-14 21:51:47Z scribe $ + * $Id: osislemma.cpp 3483 2017-06-25 15:19:34Z scribe $ * * Copyright 2003-2013 CrossWire Bible Society (http://www.crosswire.org) * CrossWire Bible Society @@ -67,6 +67,15 @@ char OSISLemma::processText(SWBuf &text, const SWKey *key, const SWModule *modul intoken = false; if (token.startsWith("w ")) { // Word XMLTag wtag(token); + + // always save off lemma if we haven't yet + if (!wtag.getAttribute("savlm")) { + const char *l = wtag.getAttribute("lemma"); + if (l) { + wtag.setAttribute("savlm", l); + } + } + int count = wtag.getAttributePartCount("lemma", ' '); for (int i = 0; i < count; i++) { SWBuf a = wtag.getAttribute("lemma", i, ' '); @@ -78,6 +87,7 @@ char OSISLemma::processText(SWBuf &text, const SWKey *key, const SWModule *modul count--; } } + token = wtag; token.trim(); // drop <> diff --git a/src/modules/filters/osismorph.cpp b/src/modules/filters/osismorph.cpp index 71de8e5..4ccf15b 100644 --- a/src/modules/filters/osismorph.cpp +++ b/src/modules/filters/osismorph.cpp @@ -3,7 +3,7 @@ * osismorph.cpp - SWFilter descendant to hide or show morph tags * in a OSIS module * - * $Id: osismorph.cpp 2980 2013-09-14 21:51:47Z scribe $ + * $Id: osismorph.cpp 3427 2016-07-03 14:30:33Z scribe $ * * Copyright 2003-2013 CrossWire Bible Society (http://www.crosswire.org) * CrossWire Bible Society @@ -90,11 +90,12 @@ char OSISMorph::processText(SWBuf &text, const SWKey *key, const SWModule *modul continue; } if (intoken) { - if (tokpos < 2045) + if (tokpos < 2045) { token[tokpos++] = *from; token[tokpos] = 0; + } } - else { + else { text.append(*from); } } diff --git a/src/modules/filters/osismorphsegmentation.cpp b/src/modules/filters/osismorphsegmentation.cpp index 1d91428..0e5c1cc 100644 --- a/src/modules/filters/osismorphsegmentation.cpp +++ b/src/modules/filters/osismorphsegmentation.cpp @@ -4,7 +4,7 @@ * morphemes (for morpheme segmented Hebrew in * the WLC) * - * $Id: osismorphsegmentation.cpp 3186 2014-04-17 04:33:19Z greg.hellings $ + * $Id: osismorphsegmentation.cpp 3153 2014-04-14 16:22:11Z scribe $ * * Copyright 2006-2013 CrossWire Bible Society (http://www.crosswire.org) * CrossWire Bible Society diff --git a/src/modules/filters/osisosis.cpp b/src/modules/filters/osisosis.cpp index 2bc5cbb..f3dca3a 100644 --- a/src/modules/filters/osisosis.cpp +++ b/src/modules/filters/osisosis.cpp @@ -2,7 +2,7 @@ * * osisosis.cpp - internal OSIS to public OSIS filter * - * $Id: osisosis.cpp 2833 2013-06-29 06:40:28Z chrislit $ + * $Id: osisosis.cpp 3338 2015-03-12 20:04:06Z refdoc $ * * Copyright 2004-2013 CrossWire Bible Society (http://www.crosswire.org) * CrossWire Bible Society @@ -58,39 +58,28 @@ char OSISOSIS::processText(SWBuf &text, const SWKey *key, const SWModule *module char status = SWBasicFilter::processText(text, key, module); VerseKey *vkey = SWDYNAMIC_CAST(VerseKey, key); if (vkey) { - SWBuf ref = ""; if (vkey->getVerse()) { - ref.appendFormatted("\t\t<verse osisID=\"%s\">", vkey->getOSISRef()); - } - - if (ref.length() > 0) { - - text = ref + text; - - if (vkey->getVerse()) { - VerseKey *tmp = (VerseKey *)vkey->clone(); - *tmp = *vkey; - tmp->setAutoNormalize(false); - tmp->setIntros(true); - - text += "</verse>"; - + VerseKey *tmp = (VerseKey *)vkey->clone(); + *tmp = *vkey; + tmp->setAutoNormalize(false); + tmp->setIntros(true); + + *tmp = MAXVERSE; + if (*vkey == *tmp) { + tmp->setVerse(0); +// sprintf(ref, "\t</div>"); +// pushString(&to, ref); + *tmp = MAXCHAPTER; *tmp = MAXVERSE; if (*vkey == *tmp) { + tmp->setChapter(0); tmp->setVerse(0); // sprintf(ref, "\t</div>"); // pushString(&to, ref); - *tmp = MAXCHAPTER; - *tmp = MAXVERSE; - if (*vkey == *tmp) { - tmp->setChapter(0); - tmp->setVerse(0); -// sprintf(ref, "\t</div>"); -// pushString(&to, ref); - } } - delete tmp; } + delete tmp; + } // // else if (vkey->Chapter()) { @@ -98,7 +87,7 @@ char OSISOSIS::processText(SWBuf &text, const SWKey *key, const SWModule *module // } // else sprintf(ref, "\t<div type=\"book\" osisID=\"%s\">", vkey->getOSISRef()); // - } + } return status; } @@ -149,11 +138,12 @@ bool OSISOSIS::handleToken(SWBuf &buf, const char *token, BasicFilterUserData *u else if (!strcmp(tag.getName(), "note")) { if (!tag.isEndTag()) { SWBuf type = tag.getAttribute("type"); + bool strongsMarkup = (type == "x-strongsMarkup" || type == "strongsMarkup"); // the latter is deprecated if (strongsMarkup) { tag.setEmpty(false); // handle bug in KJV2003 module where some note open tags were <note ... /> } - + if (!tag.isEmpty()) { tag.setAttribute("swordFootnote", 0); @@ -162,6 +152,13 @@ bool OSISOSIS::handleToken(SWBuf &buf, const char *token, BasicFilterUserData *u } else u->suspendTextPassThru = true; } + + if (u->module) { + XMLTag tag = token; + SWBuf swordFootnote = tag.getAttribute("swordFootnote"); + SWBuf footnoteBody = u->module->getEntryAttributes()["Footnote"][swordFootnote]["body"]; + buf.append(u->module->renderText(footnoteBody)); + } } if (tag.isEndTag()) { if (u->suspendTextPassThru == false) diff --git a/src/modules/filters/osisplain.cpp b/src/modules/filters/osisplain.cpp index d594028..436d0c7 100644 --- a/src/modules/filters/osisplain.cpp +++ b/src/modules/filters/osisplain.cpp @@ -2,7 +2,7 @@ * * osisplain.cpp - An SWFilter that provides stripping of OSIS tags * - * $Id: osisplain.cpp 3290 2014-12-04 04:54:52Z greg.hellings $ + * $Id: osisplain.cpp 3548 2017-12-10 05:11:38Z scribe $ * * Copyright 2003-2013 CrossWire Bible Society (http://www.crosswire.org) * CrossWire Bible Society @@ -25,6 +25,8 @@ #include <ctype.h> #include <versekey.h> #include <stringmgr.h> +#include <utilxml.h> +#include <swmodule.h> SWORD_NAMESPACE_START @@ -35,7 +37,6 @@ namespace { public: SWBuf w; XMLTag tag; - VerseKey *vk; char testament; SWBuf hiType; MyUserData(const SWModule *module, const SWKey *key) : BasicFilterUserData(module, key) {} @@ -58,22 +59,35 @@ OSISPlain::OSISPlain() { addEscapeStringSubstitute("gt", ">"); addEscapeStringSubstitute("quot", "\""); - setTokenCaseSensitive(true); - addTokenSubstitute("title", "\n"); - addTokenSubstitute("/title", "\n"); - addTokenSubstitute("/l", "\n"); - addTokenSubstitute("lg", "\n"); - addTokenSubstitute("/lg", "\n"); + setTokenCaseSensitive(true); + addTokenSubstitute("title", "\n"); + addTokenSubstitute("/title", "\n"); + addTokenSubstitute("/l", "\n"); + addTokenSubstitute("lg", "\n"); + addTokenSubstitute("/lg", "\n"); + + setStageProcessing(PRECHAR); } + BasicFilterUserData *OSISPlain::createUserData(const SWModule *module, const SWKey *key) { MyUserData *u = new MyUserData(module, key); - u->vk = SWDYNAMIC_CAST(VerseKey, u->key); - u->testament = (u->vk) ? u->vk->getTestament() : 2; // default to NT + u->testament = (u->vkey) ? u->vkey->getTestament() : 2; // default to NT return u; } +bool OSISPlain::processStage(char stage, SWBuf &text, char *&from, BasicFilterUserData *userData) { + // this is a strip filter so we want to do this as optimized as possible. Avoid calling + // getUniCharFromUTF8 for slight speed improvement + + if (stage == PRECHAR) { + if (from[0] == 0xC2 && from[1] == 0xAD) return true; // skip soft hyphens + } + return false; +} + + bool OSISPlain::handleToken(SWBuf &buf, const char *token, BasicFilterUserData *userData) { // manually process if it wasn't a simple substitution if (!substituteToken(buf, token)) { @@ -105,10 +119,8 @@ bool OSISPlain::handleToken(SWBuf &buf, const char *token, BasicFilterUserData * buf.append('>'); } if ((attrib = u->tag.getAttribute("gloss"))) { - val = strchr(attrib, ':'); - val = (val) ? (val + 1) : attrib; buf.append(" <"); - buf.append(val); + buf.append(attrib); buf.append('>'); } if ((attrib = u->tag.getAttribute("lemma"))) { @@ -168,6 +180,12 @@ bool OSISPlain::handleToken(SWBuf &buf, const char *token, BasicFilterUserData * buf.append(" ["); } else u->suspendTextPassThru = true; + if (u->module) { + XMLTag tag = token; + SWBuf swordFootnote = tag.getAttribute("swordFootnote"); + SWBuf footnoteBody = u->module->getEntryAttributes()["Footnote"][swordFootnote]["body"]; + buf.append(u->module->renderText(footnoteBody)); + } } else if (!strncmp(token, "/note", 5)) { if (!u->suspendTextPassThru) @@ -234,12 +252,17 @@ bool OSISPlain::handleToken(SWBuf &buf, const char *token, BasicFilterUserData * } } else { - buf.append("*"); + buf.append("* "); buf.append(u->lastSuspendSegment); - buf.append("*"); + buf.append(" *"); } u->suspendTextPassThru = false; } + + else if ((!strncmp(token, "q", 1) && (u->tag.getAttribute("marker")))) { + buf.append(u->tag.getAttribute("marker")); + } + // <milestone type="line"/> else if (!strncmp(token, "milestone", 9)) { @@ -248,6 +271,9 @@ bool OSISPlain::handleToken(SWBuf &buf, const char *token, BasicFilterUserData * userData->supressAdjacentWhitespace = true; buf.append('\n'); } + if (u->tag.getAttribute("marker")) { + buf.append(u->tag.getAttribute("marker")); + } } else { diff --git a/src/modules/filters/osisreferencelinks.cpp b/src/modules/filters/osisreferencelinks.cpp index 83308e4..bb30517 100644 --- a/src/modules/filters/osisreferencelinks.cpp +++ b/src/modules/filters/osisreferencelinks.cpp @@ -3,7 +3,7 @@ * osisreferencelinks.cpp - SWFilter descendant to toggle OSIS reference * links with specified type/subType * - * $Id: osisreferencelinks.cpp 2995 2013-12-28 06:32:26Z greg.hellings $ + * $Id: osisreferencelinks.cpp 2989 2013-10-09 08:22:21Z chrislit $ * * Copyright 2001-2013 CrossWire Bible Society (http://www.crosswire.org) * CrossWire Bible Society diff --git a/src/modules/filters/osisrtf.cpp b/src/modules/filters/osisrtf.cpp index 7bc5639..522a0a8 100644 --- a/src/modules/filters/osisrtf.cpp +++ b/src/modules/filters/osisrtf.cpp @@ -2,7 +2,7 @@ * * osisrtf.cpp - OSIS to RTF filter * - * $Id: osisrtf.cpp 3156 2014-04-17 03:50:37Z greg.hellings $ * + * $Id: osisrtf.cpp 3548 2017-12-10 05:11:38Z scribe $ * * * Copyright 2003-2014 CrossWire Bible Society (http://www.crosswire.org) * CrossWire Bible Society @@ -36,7 +36,7 @@ namespace { class MyUserData : public BasicFilterUserData { public: bool osisQToTick; - bool BiblicalText; + bool isBiblicalText; bool inXRefNote; int suspendLevel; std::stack<char *> quoteStack; @@ -49,13 +49,14 @@ namespace { MyUserData::MyUserData(const SWModule *module, const SWKey *key) : BasicFilterUserData(module, key) { inXRefNote = false; - BiblicalText = false; + isBiblicalText = false; suspendLevel = 0; + osisQToTick = true; // default if (module) { version = module->getName(); - BiblicalText = (!strcmp(module->getType(), "Biblical Texts")); + isBiblicalText = (!strcmp(module->getType(), "Biblical Texts")); + osisQToTick = ((!module->getConfigEntry("OSISqToTick")) || (strcmp(module->getConfigEntry("OSISqToTick"), "false"))); } - osisQToTick = ((!module->getConfigEntry("OSISqToTick")) || (strcmp(module->getConfigEntry("OSISqToTick"), "false"))); } @@ -244,15 +245,9 @@ bool OSISRTF::handleToken(SWBuf &buf, const char *token, BasicFilterUserData *us && (type != "strongsMarkup") // deprecated ) { SWBuf footnoteNumber = tag.getAttribute("swordFootnote"); - VerseKey *vkey = NULL; - // see if we have a VerseKey * or descendant - SWTRY { - vkey = SWDYNAMIC_CAST(VerseKey, u->key); - } - SWCATCH ( ... ) { } - if (vkey) { + if (u->vkey) { char ch = ((!strcmp(type.c_str(), "crossReference")) || (!strcmp(type.c_str(), "x-cross-ref"))) ? 'x':'n'; - scratch.setFormatted("{\\super <a href=\"\">*%c%i.%s</a>} ", ch, vkey->getVerse(), footnoteNumber.c_str()); + scratch.setFormatted("{\\super <a href=\"\">*%c%i.%s</a>} ", ch, u->vkey->getVerse(), footnoteNumber.c_str()); outText(scratch.c_str(), buf, u); u->inXRefNote = (ch == 'x'); } diff --git a/src/modules/filters/osisscripref.cpp b/src/modules/filters/osisscripref.cpp index 99d6709..ec188d9 100644 --- a/src/modules/filters/osisscripref.cpp +++ b/src/modules/filters/osisscripref.cpp @@ -3,7 +3,7 @@ * osisscripref.cpp - SWFilter descendant to hide or show scripture * references in an OSIS module * - * $Id: osisscripref.cpp 3156 2014-04-17 03:50:37Z greg.hellings $ + * $Id: osisscripref.cpp 3045 2014-03-02 07:53:52Z chrislit $ * * Copyright 2003-2014 CrossWire Bible Society (http://www.crosswire.org) * CrossWire Bible Society diff --git a/src/modules/filters/osisstrongs.cpp b/src/modules/filters/osisstrongs.cpp index b937883..0c17446 100644 --- a/src/modules/filters/osisstrongs.cpp +++ b/src/modules/filters/osisstrongs.cpp @@ -3,7 +3,7 @@ * osisstrongs.cpp - SWFilter descendant to hide or show Strong's number * in a OSIS module * - * $Id: osisstrongs.cpp 2980 2013-09-14 21:51:47Z scribe $ + * $Id: osisstrongs.cpp 3515 2017-11-01 11:38:09Z scribe $ * * Copyright 2003-2013 CrossWire Bible Society (http://www.crosswire.org) * CrossWire Bible Society @@ -57,7 +57,7 @@ char OSISStrongs::processText(SWBuf &text, const SWKey *key, const SWModule *mod SWBuf token; bool intoken = false; int wordNum = 1; - char wordstr[5]; + char wordstr[11]; const char *wordStart = 0; SWBuf page = ""; // some modules include <seg> page info, so we add these to the words @@ -88,6 +88,15 @@ char OSISStrongs::processText(SWBuf &text, const SWKey *key, const SWModule *mod if (token.startsWith("w ")) { // Word XMLTag wtag(token); + + // always save off lemma if we haven't yet + if (!wtag.getAttribute("savlm")) { + const char *l = wtag.getAttribute("lemma"); + if (l) { + wtag.setAttribute("savlm", l); + } + } + if (module->isProcessEntryAttributes()) { wordStart = from+1; char gh = 0; @@ -117,7 +126,7 @@ char OSISStrongs::processText(SWBuf &text, const SWKey *key, const SWModule *mod const char *m = strchr(attrib, ':'); if (m) { - int len = m-attrib; + int len = (int)(m-attrib); mClass.append(attrib, len); attrib += (len+1); } @@ -138,19 +147,19 @@ char OSISStrongs::processText(SWBuf &text, const SWKey *key, const SWModule *mod } while (++i < count); } - if ((attrib = wtag.getAttribute("lemma"))) { - int count = wtag.getAttributePartCount("lemma", ' '); + if ((attrib = wtag.getAttribute("savlm"))) { + int count = wtag.getAttributePartCount("savlm", ' '); int i = (count > 1) ? 0 : -1; // -1 for whole value cuz it's faster, but does the same thing as 0 do { gh = 0; SWBuf lClass = ""; SWBuf l = ""; - attrib = wtag.getAttribute("lemma", i, ' '); + attrib = wtag.getAttribute("savlm", i, ' '); if (i < 0) i = 0; // to handle our -1 condition const char *m = strchr(attrib, ':'); if (m) { - int len = m-attrib; + int len = (int)(m-attrib); lClass.append(attrib, len); attrib += (len+1); } @@ -217,7 +226,7 @@ char OSISStrongs::processText(SWBuf &text, const SWKey *key, const SWModule *mod if (wtag.isEmpty()) { int j; - for (j = token.length()-1; ((j>0) && (strchr(" /", token[j]))); j--); + for (j = (int)token.length()-1; ((j>0) && (strchr(" /", token[j]))); j--); token.size(j+1); } @@ -232,35 +241,27 @@ char OSISStrongs::processText(SWBuf &text, const SWKey *key, const SWModule *mod wordNum++; } + // if we won't want strongs, then lets get them out of lemma if (!option) { -/* - * Code which handles multiple lemma types. Kindof works but breaks at least WEBIF filters for strongs. - * int count = wtag.getAttributePartCount("lemma", ' '); - for (int i = 0; i < count; i++) { + for (int i = 0; i < count; ++i) { SWBuf a = wtag.getAttribute("lemma", i, ' '); const char *prefix = a.stripPrefix(':'); if ((prefix) && (!strcmp(prefix, "x-Strongs") || !strcmp(prefix, "strong") || !strcmp(prefix, "Strong"))) { // remove attribute part wtag.setAttribute("lemma", 0, i, ' '); - i--; - count--; + --i; + --count; } } -* Instead the codee below just removes the lemma attribute -*****/ - const char *l = wtag.getAttribute("lemma"); - if (l) { - SWBuf savlm = l; - wtag.setAttribute("lemma", 0); - wtag.setAttribute("savlm", savlm); - token = wtag; - token.trim(); - // drop <> - token << 1; - token--; - } + + } + token = wtag; + token.trim(); + // drop <> + token << 1; + token--; } if (token.startsWith("/w")) { // Word End if (module->isProcessEntryAttributes()) { diff --git a/src/modules/filters/osiswebif.cpp b/src/modules/filters/osiswebif.cpp index 4894777..2aa80a7 100644 --- a/src/modules/filters/osiswebif.cpp +++ b/src/modules/filters/osiswebif.cpp @@ -3,7 +3,7 @@ * osiswebif.cpp - OSIS to HTML filter with hrefs for strongs and * morph tags * - * $Id: osiswebif.cpp 3285 2014-12-03 06:14:50Z greg.hellings $ + * $Id: osiswebif.cpp 3548 2017-12-10 05:11:38Z scribe $ * * Copyright 2003-2013 CrossWire Bible Society (http://www.crosswire.org) * CrossWire Bible Society @@ -83,7 +83,7 @@ bool OSISWEBIF::handleToken(SWBuf &buf, const char *token, BasicFilterUserData * if ((attrib = tag.getAttribute("gloss"))) { val = strchr(attrib, ':'); val = (val) ? (val + 1) : attrib; - buf.appendFormatted(" %s", val); +// buf.appendFormatted(" %s", val); } if ((attrib = tag.getAttribute("lemma"))) { int count = tag.getAttributePartCount("lemma", ' '); @@ -144,15 +144,9 @@ bool OSISWEBIF::handleToken(SWBuf &buf, const char *token, BasicFilterUserData * if (!strongsMarkup) { // leave strong's markup notes out, in the future we'll probably have different option filters to turn different note types on or off SWBuf footnoteNumber = tag.getAttribute("swordFootnote"); SWBuf modName = (u->module) ? u->module->getName() : ""; - VerseKey *vkey = NULL; - // see if we have a VerseKey * or descendant - SWTRY { - vkey = SWDYNAMIC_CAST(VerseKey, u->key); - } - SWCATCH ( ... ) { } - if (vkey) { + if (u->vkey) { char ch = ((tag.getAttribute("type") && ((!strcmp(tag.getAttribute("type"), "crossReference")) || (!strcmp(tag.getAttribute("type"), "x-cross-ref")))) ? 'x':'n'); -// buf.appendFormatted("<a href=\"noteID=%s.%c.%s\"><small><sup>*%c</sup></small></a> ", vkey->getText(), ch, footnoteNumber.c_str(), ch); +// buf.appendFormatted("<a href=\"noteID=%s.%c.%s\"><small><sup>*%c</sup></small></a> ", u->vkey->getText(), ch, footnoteNumber.c_str(), ch); buf.appendFormatted("<span class=\"fn\" onclick=\"f(\'%s\',\'%s\',\'%s\');\" >%c</span>", modName.c_str(), u->key->getText(), footnoteNumber.c_str(), ch); } } diff --git a/src/modules/filters/osiswordjs.cpp b/src/modules/filters/osiswordjs.cpp index 75946a8..2da57c9 100644 --- a/src/modules/filters/osiswordjs.cpp +++ b/src/modules/filters/osiswordjs.cpp @@ -2,7 +2,7 @@ * * osiswordjs.cpp - SWFilter descendant for ??? * - * $Id: osiswordjs.cpp 2980 2013-09-14 21:51:47Z scribe $ + * $Id: osiswordjs.cpp 3515 2017-11-01 11:38:09Z scribe $ * * Copyright 2005-2013 CrossWire Bible Society (http://www.crosswire.org) * CrossWire Bible Society @@ -66,7 +66,7 @@ char OSISWordJS::processText(SWBuf &text, const SWKey *key, const SWModule *modu int tokpos = 0; bool intoken = false; int wordNum = 1; - char wordstr[5]; + char wordstr[11]; SWBuf modName = (module)?module->getName():""; // add TR to w src in KJV then remove this next line SWBuf wordSrcPrefix = (modName == "KJV")?SWBuf("TR"):modName; @@ -171,6 +171,10 @@ char OSISWordJS::processText(SWBuf &text, const SWKey *key, const SWModule *modu // 'p' = 'fillpop' to save bandwidth text.appendFormatted("<span class=\"clk\" onclick=\"p('%s','%s','%s','%s','%s','%s');\" >", lexName.c_str(), lemma.c_str(), wordID.c_str(), morph.c_str(), page.c_str(), modName.c_str()); wordNum++; + + if (wtag.isEmpty()) { + text += "</w></span>"; + } } if ((*token == '/') && (token[1] == 'w') && option) { // Word text += "</w></span>"; @@ -185,9 +189,10 @@ char OSISWordJS::processText(SWBuf &text, const SWKey *key, const SWModule *modu continue; } if (intoken) { - if (tokpos < 2045) + if (tokpos < 2045) { token[tokpos++] = *from; token[tokpos+2] = 0; + } } else { text.append(*from); diff --git a/src/modules/filters/osisxhtml.cpp b/src/modules/filters/osisxhtml.cpp index fb743e2..bfdb123 100644 --- a/src/modules/filters/osisxhtml.cpp +++ b/src/modules/filters/osisxhtml.cpp @@ -2,7 +2,7 @@ * * osisxhtml.cpp - Render filter for classed XHTML of an OSIS module * - * $Id: osisxhtml.cpp 3290 2014-12-04 04:54:52Z greg.hellings $ + * $Id: osisxhtml.cpp 3548 2017-12-10 05:11:38Z scribe $ * * Copyright 2011-2014 CrossWire Bible Society (http://www.crosswire.org) * CrossWire Bible Society @@ -37,15 +37,25 @@ const char *OSISXHTML::getHeader() const { const static char *header = "\ .divineName { font-variant: small-caps; }\n\ .wordsOfJesus { color: red; }\n\ - .transChangeSupplied { font-style: italic; }\n\ - .overline { text-decoration: overline; }\n\ - .indent1 { margin-left: 10px }\n\ - .indent2 { margin-left: 20px }\n\ - .indent3 { margin-left: 30px }\n\ - .indent4 { margin-left: 40px }\n\ + .transChange { font-style: italic; }\n\ + .transChange.transChange-supplied { font-style: italic; }\n\ + .transChange.transChange-added { font-style: italic; }\n\ + .transChange.transChange-tenseChange::before { content: '*'; }\n\ + .transChange.transChange-tenseChange { font-style: normal; }\n\ + .transChange:lang(zh) { font-style: normal; text-decoration: dotted underline; }\n\ + .overline { text-decoration: overline; }\n\ + .indent1 { margin-left: 1em; }\n\ + .indent2 { margin-left: 2em; }\n\ + .indent3 { margin-left: 3em; }\n\ + .indent4 { margin-left: 4em; }\n\ + abbr { &:hover{ &:before{ content: attr(title); } } }\n\ .small-caps { font-variant: small-caps; }\n\ .selah { text-align: right; width: 50%; margin: 0; padding: 0; }\n\ .acrostic { text-align: center; }\n\ + .colophon {font-style: italic; font-size: small; display: block; }\n\ + .rdg { font-style: italic; }\n\ + .catchWord {font-style: bold; }\n\ + .x-p-indent {text-indent: 1em; }\n\ "; // Acrostic for things like the titles in Psalm 119 return header; @@ -67,14 +77,19 @@ void processLemma(bool suspendTextPassThru, XMLTag &tag, SWBuf &buf) { int i = (count > 1) ? 0 : -1; // -1 for whole value cuz it's faster, but does the same thing as 0 do { attrib = tag.getAttribute("lemma", i, ' '); + SWBuf at = attrib; + const char *prefix = at.stripPrefix(':'); if (i < 0) i = 0; // to handle our -1 condition val = strchr(attrib, ':'); val = (val) ? (val + 1) : attrib; SWBuf gh; - if(*val == 'G') + if (*val == 'G') { gh = "Greek"; - if(*val == 'H') + } + else if (*val == 'H') { gh = "Hebrew"; + } + else if (prefix) gh = prefix; const char *val2 = val; if ((strchr("GH", *val)) && (isdigit(val[1]))) val2++; @@ -82,7 +97,7 @@ void processLemma(bool suspendTextPassThru, XMLTag &tag, SWBuf &buf) { // show = false; //else { if (!suspendTextPassThru) { - buf.appendFormatted("<small><em class=\"strongs\"><<a href=\"passagestudy.jsp?action=showStrongs&type=%s&value=%s\" class=\"strongs\">%s</a>></em></small>", + buf.appendFormatted("<small><em class=\"strongs\"><<a class=\"strongs\" href=\"passagestudy.jsp?action=showStrongs&type=%s&value=%s\" class=\"strongs\">%s</a>></em></small>", (gh.length()) ? gh.c_str() : "", URL::encode(val2).c_str(), val2); @@ -114,7 +129,7 @@ void processMorph(bool suspendTextPassThru, XMLTag &tag, SWBuf &buf) { if ((*val == 'T') && (strchr("GH", val[1])) && (isdigit(val[2]))) val2+=2; if (!suspendTextPassThru) { - buf.appendFormatted("<small><em class=\"morph\">(<a href=\"passagestudy.jsp?action=showMorph&type=%s&value=%s\" class=\"morph\">%s</a>)</em></small>", + buf.appendFormatted("<small><em class=\"morph\">(<a class=\"morph\" href=\"passagestudy.jsp?action=showMorph&type=%s&value=%s\" class=\"morph\">%s</a>)</em></small>", URL::encode(tag.getAttribute("morph")).c_str(), URL::encode(val).c_str(), val2); @@ -165,18 +180,16 @@ OSISXHTML::MyUserData::MyUserData(const SWModule *module, const SWKey *key) : Ba suspendLevel = 0; wordsOfChristStart = "<span class=\"wordsOfJesus\"> "; wordsOfChristEnd = "</span> "; - interModuleLinkStart = "<a href=\"sword://%s/%s\">"; + interModuleLinkStart = "<a class=\"%s\" href=\"sword://%s/%s\">"; interModuleLinkEnd = "</a>"; + isBiblicalText = false; + osisQToTick = true; // default + consecutiveNewlines = 0; if (module) { osisQToTick = ((!module->getConfigEntry("OSISqToTick")) || (strcmp(module->getConfigEntry("OSISqToTick"), "false"))); version = module->getName(); - BiblicalText = (!strcmp(module->getType(), "Biblical Texts")); + isBiblicalText = (!strcmp(module->getType(), "Biblical Texts")); } - else { - osisQToTick = true; // default - version = ""; - } - consecutiveNewlines = 0; } OSISXHTML::MyUserData::~MyUserData() { @@ -186,9 +199,17 @@ OSISXHTML::MyUserData::~MyUserData() { delete lineStack; } + void OSISXHTML::MyUserData::outputNewline(SWBuf &buf) { if (++consecutiveNewlines <= 2) { - outText("<br />\n", buf, this); + // any newlines at the start of a verse should get appended to a preverse heading + // since preverse cause a newline, simply be sure we have a preverse + if (!buf.size() && vkey && vkey->getVerse() && module && module->isProcessEntryAttributes()) { + module->getEntryAttributes()["Heading"]["Preverse"]["0"] += "<div></div>"; + } + else { + outText("<br />\n", buf, this); + } supressAdjacentWhitespace = true; } } @@ -235,9 +256,9 @@ bool OSISXHTML::handleToken(SWBuf &buf, const char *token, BasicFilterUserData * outText("<ruby><rb>", buf, u); outText(lastText, buf, u); + outText("</rb><rp>(</rp><rt>", buf, u); val = strchr(attrib, ':'); val = (val) ? (val + 1) : attrib; - outText("</rb><rp>(</rp><rt>", buf, u); outText(val, buf, u); outText("</rt><rp>)</rp></ruby>", buf, u); } @@ -257,7 +278,7 @@ bool OSISXHTML::handleToken(SWBuf &buf, const char *token, BasicFilterUserData * } /*if (endTag) - buf += "}";*/ + outText( "}", buf, u);*/ } } @@ -265,6 +286,16 @@ bool OSISXHTML::handleToken(SWBuf &buf, const char *token, BasicFilterUserData * else if (!strcmp(tag.getName(), "note")) { if (!tag.isEndTag()) { SWBuf type = tag.getAttribute("type"); + SWBuf subType = tag.getAttribute("subType"); + SWBuf classExtras = ""; + + if (type.size()) { + classExtras.append(" ").append(type); + } + if (subType.size()) { + classExtras.append(" ").append(subType); + } + bool strongsMarkup = (type == "x-strongsMarkup" || type == "strongsMarkup"); // the latter is deprecated if (strongsMarkup) { tag.setEmpty(false); // handle bug in KJV2003 module where some note open tags were <note ... /> @@ -275,30 +306,26 @@ bool OSISXHTML::handleToken(SWBuf &buf, const char *token, BasicFilterUserData * if (!strongsMarkup) { // leave strong's markup notes out, in the future we'll probably have different option filters to turn different note types on or off SWBuf footnoteNumber = tag.getAttribute("swordFootnote"); SWBuf noteName = tag.getAttribute("n"); - VerseKey *vkey = NULL; char ch = ((tag.getAttribute("type") && ((!strcmp(tag.getAttribute("type"), "crossReference")) || (!strcmp(tag.getAttribute("type"), "x-cross-ref")))) ? 'x':'n'); u->inXRefNote = true; // Why this change? Ben Morgan: Any note can have references in, so we need to set this to true for all notes // u->inXRefNote = (ch == 'x'); - // see if we have a VerseKey * or descendant - SWTRY { - vkey = SWDYNAMIC_CAST(VerseKey, u->key); - } - SWCATCH ( ... ) { } - if (vkey) { - //printf("URL = %s\n",URL::encode(vkey->getText()).c_str()); - buf.appendFormatted("<a href=\"passagestudy.jsp?action=showNote&type=%c&value=%s&module=%s&passage=%s\"><small><sup class=\"%c\">*%c%s</sup></small></a>", + if (u->vkey) { + //printf("URL = %s\n",URL::encode(u->vkey->getText()).c_str()); + buf.appendFormatted("<a class=\"%s\" href=\"passagestudy.jsp?action=showNote&type=%c&value=%s&module=%s&passage=%s\"><small><sup class=\"%c\">*%c%s</sup></small></a>", + classExtras.c_str(), ch, URL::encode(footnoteNumber.c_str()).c_str(), URL::encode(u->version.c_str()).c_str(), - URL::encode(vkey->getText()).c_str(), + URL::encode(u->vkey->getText()).c_str(), ch, ch, (renderNoteNumbers ? noteName.c_str() : "")); } else { - buf.appendFormatted("<a href=\"passagestudy.jsp?action=showNote&type=%c&value=%s&module=%s&passage=%s\"><small><sup class=\"%c\">*%c%s</sup></small></a>", + buf.appendFormatted("<a class=\"%s\" href=\"passagestudy.jsp?action=showNote&type=%c&value=%s&module=%s&passage=%s\"><small><sup class=\"%c\">*%c%s</sup></small></a>", + classExtras.c_str(), ch, URL::encode(footnoteNumber.c_str()).c_str(), URL::encode(u->version.c_str()).c_str(), @@ -314,11 +341,11 @@ bool OSISXHTML::handleToken(SWBuf &buf, const char *token, BasicFilterUserData * if (tag.isEndTag()) { u->suspendTextPassThru = (--u->suspendLevel); u->inXRefNote = false; - u->lastSuspendSegment = ""; // fix/work-around for nasb devineName in note bug + u->lastSuspendSegment = ""; // fix/work-around for nasb divineName in note bug } } - // <p> paragraph and <lg> linegroup tags + // <p> paragraph and <lg> linegroup tags except newline at start of verse (immediately after verse number) else if (!strcmp(tag.getName(), "p") || !strcmp(tag.getName(), "lg")) { if ((!tag.isEndTag()) && (!tag.isEmpty())) { // non-empty start tag u->outputNewline(buf); @@ -334,14 +361,24 @@ bool OSISXHTML::handleToken(SWBuf &buf, const char *token, BasicFilterUserData * // Milestoned paragraphs, created by osis2mod // <div type="paragraph" sID.../> // <div type="paragraph" eID.../> - else if (tag.isEmpty() && !strcmp(tag.getName(), "div") && tag.getAttribute("type") && (!strcmp(tag.getAttribute("type"), "x-p") || !strcmp(tag.getAttribute("type"), "paragraph"))) { + else if (tag.isEmpty() && !strcmp(tag.getName(), "div") && tag.getAttribute("type") && (!strcmp(tag.getAttribute("type"), "x-p") || !strcmp(tag.getAttribute("type"), "paragraph") || !strcmp(tag.getAttribute("type"), "colophon"))) { // <div type="paragraph" sID... /> if (tag.getAttribute("sID")) { // non-empty start tag u->outputNewline(buf); + // safe because we've verified type is present from if statement above + if (!strcmp(tag.getAttribute("type"), "colophon")) { + outText("<div class=\"colophon\">", buf, u); + } + } // <div type="paragraph" eID... /> else if (tag.getAttribute("eID")) { u->outputNewline(buf); + // safe because we've verified type is present from if statement above + if (!strcmp(tag.getAttribute("type"), "colophon")) { + outText("</div>", buf, u); + } + } } @@ -352,10 +389,18 @@ bool OSISXHTML::handleToken(SWBuf &buf, const char *token, BasicFilterUserData * SWBuf target; SWBuf work; SWBuf ref; + SWBuf type; + SWBuf classes = ""; + bool is_scripRef = false; target = tag.getAttribute("osisRef"); const char* the_ref = strchr(target, ':'); + type = tag.getAttribute("type"); + + if (type.size()) { + classes.append(type); + } if(!the_ref) { // No work @@ -377,7 +422,8 @@ bool OSISXHTML::handleToken(SWBuf &buf, const char *token, BasicFilterUserData * if(is_scripRef) { - buf.appendFormatted("<a href=\"passagestudy.jsp?action=showRef&type=scripRef&value=%s&module=\">", + buf.appendFormatted("<a class=\"%s\" href=\"passagestudy.jsp?action=showRef&type=scripRef&value=%s&module=\">", + classes.c_str(), URL::encode(ref.c_str()).c_str() // (work.size()) ? URL::encode(work.c_str()).c_str() : "") ); @@ -385,7 +431,8 @@ bool OSISXHTML::handleToken(SWBuf &buf, const char *token, BasicFilterUserData * else { // Dictionary link, or something - buf.appendFormatted(u->interModuleLinkStart, + buf.appendFormatted(u->interModuleLinkStart, + classes.c_str(), URL::encode(work.c_str()).c_str(), URL::encode(ref.c_str()).c_str() ); @@ -440,18 +487,20 @@ bool OSISXHTML::handleToken(SWBuf &buf, const char *token, BasicFilterUserData * // <milestone type="x-p"/> // <milestone type="cQuote" marker="x"/> else if ((!strcmp(tag.getName(), "milestone")) && (tag.getAttribute("type"))) { - if (!strcmp(tag.getAttribute("type"), "line")) { + // safe because we've verified type is present from if statement above + const char *type = tag.getAttribute("type"); + if (!strcmp(type, "line")) { u->outputNewline(buf); if (tag.getAttribute("subType") && !strcmp(tag.getAttribute("subType"), "x-PM")) { u->outputNewline(buf); } } - else if (!strcmp(tag.getAttribute("type"),"x-p")) { + else if (!strcmp(type,"x-p")) { if (tag.getAttribute("marker")) outText(tag.getAttribute("marker"), buf, u); else outText("<!p>", buf, u); } - else if (!strcmp(tag.getAttribute("type"), "cQuote")) { + else if (!strcmp(type, "cQuote")) { const char *tmp = tag.getAttribute("marker"); bool hasMark = tmp; SWBuf mark = tmp; @@ -465,41 +514,55 @@ bool OSISXHTML::handleToken(SWBuf &buf, const char *token, BasicFilterUserData * else if (u->osisQToTick) outText((level % 2) ? '\"' : '\'', buf, u); } + else if (!strcmp(type, "x-importer")) { + //drop tag as not relevant + } + + + else { + SWBuf type = tag.getAttribute("type"); + outText(SWBuf("<span class=\"") + type + "\"/>", buf,u); + } } // <title> else if (!strcmp(tag.getName(), "title")) { if ((!tag.isEndTag()) && (!tag.isEmpty())) { SWBuf type = tag.getAttribute("type"); - bool keepType = false; + SWBuf canonical = tag.getAttribute("canonical"); + + SWBuf classExtras = ""; + if (type.size()) { - keepType = true; - } - VerseKey *vkey = SWDYNAMIC_CAST(VerseKey, u->key); - if (vkey && !vkey->getVerse()) { - if (!vkey->getChapter()) { - if (!vkey->getBook()) { - if (!vkey->getTestament()) { - buf += SWBuf("<h1 class=\"moduleHeader ") + (keepType ? type : "") + "\">"; + classExtras.append(" ").append(type); + } + if (canonical.size() && !strcmp(canonical,"true")) { + classExtras.append(" canonical"); + } + if (u->vkey && !u->vkey->getVerse()) { + if (!u->vkey->getChapter()) { + if (!u->vkey->getBook()) { + if (!u->vkey->getTestament()) { + outText(SWBuf("<h1 class=\"moduleHeader") + classExtras + "\">", buf, u); tag.setAttribute("pushed", "h1"); } else { - buf += SWBuf("<h1 class=\"testamentHeader ") + (keepType ? type : "") + "\">"; + outText(SWBuf("<h1 class=\"testamentHeader") + classExtras + "\">", buf, u); tag.setAttribute("pushed", "h1"); } } else { - buf += SWBuf("<h1 class=\"bookHeader ") + (keepType ? type : "") + "\">"; + outText(SWBuf("<h1 class=\"bookHeader") + classExtras + "\">", buf, u); tag.setAttribute("pushed", "h1"); } } else { - buf += SWBuf("<h2 class=\"chapterHeader ") + (keepType ? type : "") + "\">"; + outText(SWBuf("<h2 class=\"chapterHeader") + classExtras + "\">", buf, u); tag.setAttribute("pushed", "h2"); } } else { - buf += SWBuf("<h3 class=\"") + (keepType ? type : "") + "\">"; + outText(SWBuf("<h3 class=\"title") + classExtras + "\">", buf, u); tag.setAttribute("pushed", "h3"); } u->titleStack->push(tag.toString()); @@ -510,10 +573,10 @@ bool OSISXHTML::handleToken(SWBuf &buf, const char *token, BasicFilterUserData * if (u->titleStack->size()) u->titleStack->pop(); SWBuf pushed = tag.getAttribute("pushed"); if (pushed.size()) { - buf += (SWBuf)"</" + pushed + ">\n\n"; + outText((SWBuf)"</" + pushed + ">\n\n", buf, u); } else { - buf += "</h3>\n\n"; + outText( "</h3>\n\n", buf, u); } ++u->consecutiveNewlines; u->supressAdjacentWhitespace = true; @@ -547,10 +610,12 @@ bool OSISXHTML::handleToken(SWBuf &buf, const char *token, BasicFilterUserData * // <catchWord> & <rdg> tags (italicize) else if (!strcmp(tag.getName(), "rdg") || !strcmp(tag.getName(), "catchWord")) { if ((!tag.isEndTag()) && (!tag.isEmpty())) { - outText("<i>", buf, u); + outText("<span class=\"", buf, u); + outText(tag.getName(), buf, u); + outText("\">", buf, u); } else if (tag.isEndTag()) { - outText("</i>", buf, u); + outText("</span>", buf, u); } } @@ -617,7 +682,7 @@ bool OSISXHTML::handleToken(SWBuf &buf, const char *token, BasicFilterUserData * else if (type == "ol") { outText("</span>", buf, u); } - else if (type == "sup") { + else if (type == "super") { outText("</sup>", buf, u); } else if (type == "sub") { @@ -700,17 +765,16 @@ bool OSISXHTML::handleToken(SWBuf &buf, const char *token, BasicFilterUserData * if ((!tag.isEndTag()) && (!tag.isEmpty())) { SWBuf type = tag.getAttribute("type"); u->lastTransChange = type; - - // just do all transChange tags this way for now - if ((type == "added") || (type == "supplied")) - outText("<span class=\"transChangeSupplied\">", buf, u); - else if (type == "tenseChange") - buf += "*"; + + outText("<span class=\"transChange", buf, u); + if (type.length()) { + outText(" transChange-", buf, u); + outText(type, buf, u); + } + outText("\">", buf, u); } else if (tag.isEndTag()) { - SWBuf type = u->lastTransChange; - if ((type == "added") || (type == "supplied")) - outText("</span>", buf, u); + outText("</span>", buf, u); } else { // empty transChange marker? } @@ -754,22 +818,42 @@ bool OSISXHTML::handleToken(SWBuf &buf, const char *token, BasicFilterUserData * } else if (type == "majorSection") { } - else { - buf += tag; + else if ((!tag.isEndTag()) && (!tag.isEmpty())) { + SWBuf type = tag.getAttribute("type"); + outText("<div class=\"", buf, u); + outText(type, buf, u); + outText("\">", buf, u); + } + else if (tag.isEndTag()) { + outText("</div>", buf, u); } + else if (!(type == "colophon")) outText(tag, buf, u); + } else if (!strcmp(tag.getName(), "span")) { - buf += tag; + outText(tag, buf, u); + } + else if (!strcmp(tag.getName(), "abbr")) { + if (!tag.isEndTag()) { + SWBuf title = tag.getAttribute("expansion"); + outText("<abbr title=\"", buf, u); + outText(title, buf, u); + outText("\">", buf, u); + } + else if (tag.isEndTag()) { + outText("</abbr>", buf, u); + } + } else if (!strcmp(tag.getName(), "br")) { - buf += tag; + outText( tag, buf, u); } else if (!strcmp(tag.getName(), "table")) { if ((!tag.isEndTag()) && (!tag.isEmpty())) { - buf += "<table><tbody>\n"; + outText( "<table><tbody>\n", buf, u); } else if (tag.isEndTag()) { - buf += "</tbody></table>\n"; + outText( "</tbody></table>\n", buf, u); ++u->consecutiveNewlines; u->supressAdjacentWhitespace = true; } @@ -777,19 +861,19 @@ bool OSISXHTML::handleToken(SWBuf &buf, const char *token, BasicFilterUserData * } else if (!strcmp(tag.getName(), "row")) { if ((!tag.isEndTag()) && (!tag.isEmpty())) { - buf += "\t<tr>"; + outText( "\t<tr>", buf, u); } else if (tag.isEndTag()) { - buf += "</tr>\n"; + outText( "</tr>\n", buf, u); } } else if (!strcmp(tag.getName(), "cell")) { if ((!tag.isEndTag()) && (!tag.isEmpty())) { - buf += "<td>"; + outText( "<td>", buf, u); } else if (tag.isEndTag()) { - buf += "</td>"; + outText( "</td>", buf, u); } } else { diff --git a/src/modules/filters/rtfhtml.cpp b/src/modules/filters/rtfhtml.cpp index df02f38..81a38ea 100644 --- a/src/modules/filters/rtfhtml.cpp +++ b/src/modules/filters/rtfhtml.cpp @@ -2,7 +2,7 @@ * * rtfhtml.cpp - filter to convert RTF to HTML * - * $Id: rtfhtml.cpp 2981 2013-09-15 00:05:26Z scribe $ + * $Id: rtfhtml.cpp 3515 2017-11-01 11:38:09Z scribe $ * * Copyright 1999 The team of Bibletime (info@bibletime.de) * Copyright 2000-2013 CrossWire Bible Society (http://www.crosswire.org) @@ -55,7 +55,7 @@ char RTFHTML::processText(SWBuf &text, const SWKey *key, const SWModule *module) num.append(from, end-from); __s16 n = atoi(num.c_str()); __u32 u = (__u16)n; - text.append(getUTF8FromUniChar(u)); + getUTF8FromUniChar(u, &text); from += (end-from); continue; } diff --git a/src/modules/filters/scsuutf8.cpp b/src/modules/filters/scsuutf8.cpp index c68ea1f..73e4a3f 100644 --- a/src/modules/filters/scsuutf8.cpp +++ b/src/modules/filters/scsuutf8.cpp @@ -3,9 +3,9 @@ * scsuutf8.cpp - SWFilter descendant to convert a SCSU character to * UTF-8 * - * $Id: scsuutf8.cpp 2980 2013-09-14 21:51:47Z scribe $ + * $Id: scsuutf8.cpp 3472 2017-05-22 04:19:02Z scribe $ * - * Copyright 2001-2013 CrossWire Bible Society (http://www.crosswire.org) + * Copyright 2001-2014 CrossWire Bible Society (http://www.crosswire.org) * CrossWire Bible Society * P. O. Box 2528 * Tempe, AZ 85280-2528 @@ -23,223 +23,240 @@ /* This class is based on: * http://czyborra.com/scsu/scsu.c written by Roman Czyborra@dds.nl - * on Andrea's balcony in North Amsterdam on 1998-08-04 - * Thanks to Richard Verhoeven <rcb5@win.tue.nl> for his suggestion - * to correct the haphazard "if" after UQU to "else if" on 1998-10-01 - * + * * This is a deflator to UTF-8 output for input compressed in SCSU, * the (Reuters) Standard Compression Scheme for Unicode as described * in http://www.unicode.org/unicode/reports/tr6.html */ -#include <stdlib.h> -#include <stdio.h> -#include <swmodule.h> - #include <scsuutf8.h> +#include <swbuf.h> +#ifdef _ICU_ +#include <unicode/unistr.h> +#endif + SWORD_NAMESPACE_START SCSUUTF8::SCSUUTF8() { +#ifdef _ICU_ + // initialize SCSU converter + scsuConv = ucnv_open("SCSU", &err); + // initialize UTF-8 converter + utf8Conv = ucnv_open("UTF-8", &err); +#else + active = 0; + mode = 0; +#endif } - -unsigned char* SCSUUTF8::UTF8Output(unsigned long uchar, unsigned char* text) -{ - /* join UTF-16 surrogates without any pairing sanity checks */ - - static int d; - - if (uchar >= 0xd800 && uchar <= 0xdbff) { d = uchar & 0x3f; return text; } - if (uchar >= 0xdc00 && uchar <= 0xdfff) { uchar = uchar + 0x2400 + d * 0x400; } - - /* output one character as UTF-8 multibyte sequence */ - - if (uchar < 0x80) { - *text++ = c; - } - else if (uchar < 0x800) { - *text++ = 0xc0 | uchar >> 6; - *text++ = 0x80 | (uchar & 0x3f); - } - else if (uchar < 0x10000) { - *text++ = 0xe0 | uchar >> 12; - *text++ = 0x80 | (uchar >> 6 & 0x3f); - *text++ = 0x80 | (uchar & 0x3f); - } - else if (uchar < 0x200000) { - *text++ = 0xf0 | uchar >> 18; - *text++ = 0x80 | (uchar >> 12 & 0x3f); - *text++ = 0x80 | (uchar >> 6 & 0x3f); - *text++ = 0x80 | (uchar & 0x3f); - } - - return text; +SCSUUTF8::~SCSUUTF8() { +#ifdef _ICU_ + ucnv_close(scsuConv); + ucnv_close(utf8Conv); +#endif } - -char SCSUUTF8::processText(SWBuf &text, const SWKey *key, const SWModule *module) { -/* - unsigned char *to, *from; - unsigned long buflen = len * FILTERPAD; - char active = 0, mode = 0; - if ((unsigned long)key < 2) // hack, we're en(1)/de(0)ciphering - return -1; - - static unsigned short start[8] = {0x0000,0x0080,0x0100,0x0300,0x2000,0x2080,0x2100,0x3000}; - static unsigned short slide[8] = {0x0080,0x00C0,0x0400,0x0600,0x0900,0x3040,0x30A0,0xFF00}; - static unsigned short win[256] = { - 0x0000, 0x0080, 0x0100, 0x0180, 0x0200, 0x0280, 0x0300, 0x0380, - 0x0400, 0x0480, 0x0500, 0x0580, 0x0600, 0x0680, 0x0700, 0x0780, - 0x0800, 0x0880, 0x0900, 0x0980, 0x0A00, 0x0A80, 0x0B00, 0x0B80, - 0x0C00, 0x0C80, 0x0D00, 0x0D80, 0x0E00, 0x0E80, 0x0F00, 0x0F80, - 0x1000, 0x1080, 0x1100, 0x1180, 0x1200, 0x1280, 0x1300, 0x1380, - 0x1400, 0x1480, 0x1500, 0x1580, 0x1600, 0x1680, 0x1700, 0x1780, - 0x1800, 0x1880, 0x1900, 0x1980, 0x1A00, 0x1A80, 0x1B00, 0x1B80, - 0x1C00, 0x1C80, 0x1D00, 0x1D80, 0x1E00, 0x1E80, 0x1F00, 0x1F80, - 0x2000, 0x2080, 0x2100, 0x2180, 0x2200, 0x2280, 0x2300, 0x2380, - 0x2400, 0x2480, 0x2500, 0x2580, 0x2600, 0x2680, 0x2700, 0x2780, - 0x2800, 0x2880, 0x2900, 0x2980, 0x2A00, 0x2A80, 0x2B00, 0x2B80, - 0x2C00, 0x2C80, 0x2D00, 0x2D80, 0x2E00, 0x2E80, 0x2F00, 0x2F80, - 0x3000, 0x3080, 0x3100, 0x3180, 0x3200, 0x3280, 0x3300, 0x3800, - 0xE000, 0xE080, 0xE100, 0xE180, 0xE200, 0xE280, 0xE300, 0xE380, - 0xE400, 0xE480, 0xE500, 0xE580, 0xE600, 0xE680, 0xE700, 0xE780, - 0xE800, 0xE880, 0xE900, 0xE980, 0xEA00, 0xEA80, 0xEB00, 0xEB80, - 0xEC00, 0xEC80, 0xED00, 0xED80, 0xEE00, 0xEE80, 0xEF00, 0xEF80, - 0xF000, 0xF080, 0xF100, 0xF180, 0xF200, 0xF280, 0xF300, 0xF380, - 0xF400, 0xF480, 0xF500, 0xF580, 0xF600, 0xF680, 0xF700, 0xF780, - 0xF800, 0xF880, 0xF900, 0xF980, 0xFA00, 0xFA80, 0xFB00, 0xFB80, - 0xFC00, 0xFC80, 0xFD00, 0xFD80, 0xFE00, 0xFE80, 0xFF00, 0xFF80, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x00C0, 0x0250, 0x0370, 0x0530, 0x3040, 0x30A0, 0xFF60 - }; - - if (!len) - return 0; - - memmove(&text[buflen - len], text, len); - from = (unsigned char*)&text[buflen - len]; - to = (unsigned char *)text; - - // ------------------------------- - - for (int i = 0; i < len;) { - - - if (i >= len) break; - c = from[i++]; - - if (c >= 0x80) - { - to = UTF8Output (c - 0x80 + slide[active], to); +#ifndef _ICU_ +unsigned short SCSUUTF8::start[] = {0x0000,0x0080,0x0100,0x0300,0x2000,0x2080,0x2100,0x3000}; +unsigned short SCSUUTF8::slide[] = {0x0080,0x00C0,0x0400,0x0600,0x0900,0x3040,0x30A0,0xFF00}; +unsigned short SCSUUTF8::win[] = { + 0x0000, 0x0080, 0x0100, 0x0180, 0x0200, 0x0280, 0x0300, 0x0380, + 0x0400, 0x0480, 0x0500, 0x0580, 0x0600, 0x0680, 0x0700, 0x0780, + 0x0800, 0x0880, 0x0900, 0x0980, 0x0A00, 0x0A80, 0x0B00, 0x0B80, + 0x0C00, 0x0C80, 0x0D00, 0x0D80, 0x0E00, 0x0E80, 0x0F00, 0x0F80, + 0x1000, 0x1080, 0x1100, 0x1180, 0x1200, 0x1280, 0x1300, 0x1380, + 0x1400, 0x1480, 0x1500, 0x1580, 0x1600, 0x1680, 0x1700, 0x1780, + 0x1800, 0x1880, 0x1900, 0x1980, 0x1A00, 0x1A80, 0x1B00, 0x1B80, + 0x1C00, 0x1C80, 0x1D00, 0x1D80, 0x1E00, 0x1E80, 0x1F00, 0x1F80, + 0x2000, 0x2080, 0x2100, 0x2180, 0x2200, 0x2280, 0x2300, 0x2380, + 0x2400, 0x2480, 0x2500, 0x2580, 0x2600, 0x2680, 0x2700, 0x2780, + 0x2800, 0x2880, 0x2900, 0x2980, 0x2A00, 0x2A80, 0x2B00, 0x2B80, + 0x2C00, 0x2C80, 0x2D00, 0x2D80, 0x2E00, 0x2E80, 0x2F00, 0x2F80, + 0x3000, 0x3080, 0x3100, 0x3180, 0x3200, 0x3280, 0x3300, 0x3800, + 0xE000, 0xE080, 0xE100, 0xE180, 0xE200, 0xE280, 0xE300, 0xE380, + 0xE400, 0xE480, 0xE500, 0xE580, 0xE600, 0xE680, 0xE700, 0xE780, + 0xE800, 0xE880, 0xE900, 0xE980, 0xEA00, 0xEA80, 0xEB00, 0xEB80, + 0xEC00, 0xEC80, 0xED00, 0xED80, 0xEE00, 0xEE80, 0xEF00, 0xEF80, + 0xF000, 0xF080, 0xF100, 0xF180, 0xF200, 0xF280, 0xF300, 0xF380, + 0xF400, 0xF480, 0xF500, 0xF580, 0xF600, 0xF680, 0xF700, 0xF780, + 0xF800, 0xF880, 0xF900, 0xF980, 0xFA00, 0xFA80, 0xFB00, 0xFB80, + 0xFC00, 0xFC80, 0xFD00, 0xFD80, 0xFE00, 0xFE80, 0xFF00, 0xFF80, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x00C0, 0x0250, 0x0370, 0x0530, 0x3040, 0x30A0, 0xFF60, +}; + +int SCSUUTF8::UTF8Output(unsigned long uchar, SWBuf* utf8Buf) +{ + // join UTF-16 surrogates without any pairing sanity checks + if (uchar >= 0xd800 && uchar <= 0xdbff) { + d = uchar & 0x3ff; + return 0; } - else if (c >= 0x20 && c <= 0x7F) - { - to = UTF8Output (c, to); + if (uchar >= 0xdc00 && uchar <= 0xdfff) { + uchar = uchar + 0x2400 + d * 0x400; } - else if (c == 0x0 || c == 0x9 || c == 0xA || c == 0xC || c == 0xD) - { - to = UTF8Output (c, to); + + // output one character as UTF-8 multibyte sequence + + if (uchar < 0x80) { + utf8Buf += uchar; } - else if (c >= 0x1 && c <= 0x8) // SQn - { - if (i >= len) break; - d = from[i++]; // single quote - - to = UTF8Output (d < 0x80 ? d + start [c - 0x1] : - d - 0x80 + slide [c - 0x1], to); + else if (uchar < 0x800) { + utf8Buf += (0xc0 | (uchar>>6)); + utf8Buf += (0x80 | (uchar & 0x3f)); } - else if (c >= 0x10 && c <= 0x17) // SCn - { - active = c - 0x10; // change window + else if (uchar < 0x10000) { + utf8Buf += (0xe0 | (uchar>>12)); + utf8Buf += (0x80 | (uchar>>6 & 0x3f)); + utf8Buf += (0x80 | (uchar & 0x3f)); } - else if (c >= 0x18 && c <= 0x1F) // SDn - { - active = c - 0x18; // define window - if (i >= len) break; - slide [active] = win [from[i++]]; + else if (uchar < 0x200000) { + utf8Buf += (0xf0 | (uchar>>18)); + utf8Buf += (0x80 | (uchar>>12 & 0x3f)); + utf8Buf += (0x80 | (uchar>>6 & 0x3f)); + utf8Buf += (0x80 | (uchar & 0x3f)); } - else if (c == 0xB) // SDX - { - if (i >= len) break; - c = from[i++]; - - if (i >= len) break; - d = from[i++]; + + return 0; +} +#endif - slide [active = c>>5] = 0x10000 + (((c & 0x1F) << 8 | d) << 7); +char SCSUUTF8::processText(SWBuf &text, const SWKey *key, const SWModule *module) { + if ((unsigned long)key < 2) // hack, we're en(1)/de(0)ciphering + return -1; + +#ifdef _ICU_ + // Try decoding with ICU if possible + err = U_ZERO_ERROR; + UnicodeString utf16Text(text.getRawData(), text.length(), scsuConv, err); + err = U_ZERO_ERROR; + int32_t len = utf16Text.extract(text.getRawData(), text.size(), utf8Conv, err); + if (len > (int32_t)text.size()+1) { + text.setSize(len+1); + utf16Text.extract(text.getRawData(), text.size(), utf8Conv, err); } - else if (c == 0xE) // SQU - { +#else + // If ICU is unavailable, decode using Czyborra's decoder + SWBuf utf8Buf = ""; + int len = text.length(); + const char* scsuString = text.c_str(); + + for (int i = 0; i < len;) { + if (i >= len) break; - c = from[i++]; // SQU - - if (i >= len) break; - to = UTF8Output (c << 8 | from[i++], to); + c = scsuString[i++]; + + if (c >= 0x80) + { + UTF8Output(c - 0x80 + slide[active], &utf8Buf); } - else if (c == 0xF) // SCU - { - mode = 1; // change to Unicode mode - - while (mode) - { - if (i >= len) break; - c = from[i++]; - - if (c <= 0xDF || c >= 0xF3) + else if (c >= 0x20 && c <= 0x7F) { - if (i >= len) break; - to = UTF8Output (c << 8 | from[i++], to); + UTF8Output(c, &utf8Buf); } - else if (c == 0xF0) // UQU + else if (c == 0x0 || c == 0x9 || c == 0xA || c == 0xC || c == 0xD) { - if (i >= len) break; - c = from[i++]; - - if (i >= len) break; - to = UTF8Output (c << 8 | from[i++], to); + UTF8Output(c, &utf8Buf); } - else if (c >= 0xE0 && c <= 0xE7) // UCn + else if (c >= 0x1 && c <= 0x8) // SQn { - active = c - 0xE0; mode = 0; + if (i >= len) break; + d = scsuString[i++]; // single quote + + UTF8Output(d < 0x80 ? d + start[c - 0x1] : + d - 0x80 + slide[c - 0x1], &utf8Buf); } - else if (c >= 0xE8 && c <= 0xEF) // UDn + else if (c >= 0x10 && c <= 0x17) // SCn { - if (i >= len) break; - slide [active=c-0xE8] = win [from[i++]]; mode = 0; + active = c - 0x10; // change window } - else if (c == 0xF1) // UDX + else if (c >= 0x18 && c <= 0x1F) // SDn { - if (i >= len) break; - c = from[i++]; - - if (i >= len) break; - d = from[i++]; - - slide [active = c>>5] = - 0x10000 + (((c & 0x1F) << 8 | d) << 7); mode = 0; + active = c - 0x18; // define window + if (i >= len) break; + slide[active] = win[(unsigned char)scsuString[i++]]; + } + else if (c == 0xB) // SDX + { + if (i >= len) break; + c = scsuString[i++]; + + if (i >= len) break; + d = scsuString[i++]; + + slide[active = c>>5] = 0x10000 + (((c & 0x1F) << 8 | d) << 7); + } + else if (c == 0xE) // SQU + { + if (i >= len) break; + c = scsuString[i++]; // SQU + + if (i >= len) break; + UTF8Output(c << 8 | scsuString[i++], &utf8Buf); + } + else if (c == 0xF) // SCU + { + mode = 1; // change to Unicode mode + + while (mode) + { + if (i >= len) break; + c = scsuString[i++]; + + if (c <= 0xDF || c >= 0xF3) + { + if (i >= len) break; + UTF8Output(c << 8 | scsuString[i++], &utf8Buf); + } + else if (c == 0xF0) // UQU + { + if (i >= len) break; + c = scsuString[i++]; + + if (i >= len) break; + UTF8Output(c << 8 | scsuString[i++], &utf8Buf); + } + else if (c >= 0xE0 && c <= 0xE7) // UCn + { + active = c - 0xE0; + mode = 0; + } + else if (c >= 0xE8 && c <= 0xEF) // UDn + { + if (i >= len) break; + slide[active=c-0xE8] = win[(unsigned char)scsuString[i++]]; + mode = 0; + } + else if (c == 0xF1) // UDX + { + if (i >= len) break; + c = scsuString[i++]; + + if (i >= len) break; + d = scsuString[i++]; + + slide[active = c>>5] = + 0x10000 + (((c & 0x1F) << 8 | d) << 7); + mode = 0; + } + } } - } } - - - } - - *to++ = 0; - *to = 0; -*/ - return 0; +#endif + + return 0; } diff --git a/src/modules/filters/swbasicfilter.cpp b/src/modules/filters/swbasicfilter.cpp index e66017e..8e3ce0a 100644 --- a/src/modules/filters/swbasicfilter.cpp +++ b/src/modules/filters/swbasicfilter.cpp @@ -5,7 +5,7 @@ * many filters will need and can use as a starting * point. * - * $Id: swbasicfilter.cpp 2980 2013-09-14 21:51:47Z scribe $ + * $Id: swbasicfilter.cpp 3548 2017-12-10 05:11:38Z scribe $ * * Copyright 2001-2013 CrossWire Bible Society (http://www.crosswire.org) * CrossWire Bible Society @@ -29,6 +29,7 @@ #include <stdarg.h> #include <utilstr.h> #include <stringmgr.h> +#include <versekey.h> #include <map> #include <set> @@ -54,6 +55,19 @@ const char SWBasicFilter::POSTCHAR = 4; const char SWBasicFilter::FINALIZE = 8; +BasicFilterUserData::BasicFilterUserData(const SWModule *module, const SWKey *key) { + this->module = module; + this->key = key; + suspendTextPassThru = false; + supressAdjacentWhitespace = false; + vkey = 0; + SWTRY { + vkey = SWDYNAMIC_CAST(VerseKey, key); + } + SWCATCH ( ... ) { } +} + + SWBasicFilter::SWBasicFilter() { p = new Private; @@ -383,6 +397,9 @@ char SWBasicFilter::processText(SWBuf &text, const SWKey *key, const SWModule *m } escEndPos = escStartPos = tokenEndPos = tokenStartPos = 0; lastTextNode = ""; + if (!userData->suspendTextPassThru) { + userData->lastSuspendSegment.size(0); + } continue; } } @@ -398,7 +415,6 @@ char SWBasicFilter::processText(SWBuf &text, const SWKey *key, const SWModule *m if ((!userData->supressAdjacentWhitespace) || (*from != ' ')) { if (!userData->suspendTextPassThru) { text.append(*from); - userData->lastSuspendSegment.size(0); } else userData->lastSuspendSegment.append(*from); lastTextNode.append(*from); diff --git a/src/modules/filters/teihtmlhref.cpp b/src/modules/filters/teihtmlhref.cpp index f08f994..443fd9c 100644 --- a/src/modules/filters/teihtmlhref.cpp +++ b/src/modules/filters/teihtmlhref.cpp @@ -2,7 +2,7 @@ * * teihtmlhref.cpp - TEI to HTML with hrefs filter * - * $Id: teihtmlhref.cpp 3160 2014-04-17 04:02:17Z greg.hellings $ + * $Id: teihtmlhref.cpp 3548 2017-12-10 05:11:38Z scribe $ * * Copyright 2008-2013 CrossWire Bible Society (http://www.crosswire.org) * CrossWire Bible Society @@ -33,10 +33,10 @@ SWORD_NAMESPACE_START TEIHTMLHREF::MyUserData::MyUserData(const SWModule *module, const SWKey *key) : BasicFilterUserData(module, key) { - BiblicalText = false; + isBiblicalText = false; if (module) { version = module->getName(); - BiblicalText = (!strcmp(module->getType(), "Biblical Texts")); + isBiblicalText = (!strcmp(module->getType(), "Biblical Texts")); } } @@ -132,7 +132,7 @@ bool TEIHTMLHREF::handleToken(SWBuf &buf, const char *token, BasicFilterUserData if (n != "") { buf += "<br /><b>"; buf += n; - buf += "</b>"; + buf += "</b> "; } } } @@ -221,7 +221,7 @@ bool TEIHTMLHREF::handleToken(SWBuf &buf, const char *token, BasicFilterUserData // Compensate for starting : ref = the_ref + 1; - int size = target.size() - ref.size() - 1; + int size = (int)(target.size() - ref.size() - 1); work.setSize(size); strncpy(work.getRawData(), target, size); } @@ -275,6 +275,52 @@ bool TEIHTMLHREF::handleToken(SWBuf &buf, const char *token, BasicFilterUserData u->suspendTextPassThru = false; } } + // <graphic> image tag + else if (!strcmp(tag.getName(), "graphic")) { + const char *url = tag.getAttribute("url"); + if (url) { // assert we have a url attribute + SWBuf filepath; + if (userData->module) { + filepath = userData->module->getConfigEntry("AbsoluteDataPath"); + if ((filepath.size()) && (filepath[filepath.size()-1] != '/') && (url[0] != '/')) + filepath += '/'; + } + filepath += url; + // images become clickable, if the UI supports showImage. + buf.appendFormatted("<a href=\"passagestudy.jsp?action=showImage&value=%s&module=%s\"><img src=\"file:%s\" border=\"0\" /></a>", + URL::encode(filepath.c_str()).c_str(), + URL::encode(u->version.c_str()).c_str(), + filepath.c_str()); + u->suspendTextPassThru = true; + } + } + // <table> <row> <cell> + else if (!strcmp(tag.getName(), "table")) { + if ((!tag.isEndTag()) && (!tag.isEmpty())) { + buf += "<table><tbody>\n"; + } + else if (tag.isEndTag()) { + buf += "</tbody></table>\n"; + u->supressAdjacentWhitespace = true; + } + + } + else if (!strcmp(tag.getName(), "row")) { + if ((!tag.isEndTag()) && (!tag.isEmpty())) { + buf += "\t<tr>"; + } + else if (tag.isEndTag()) { + buf += "</tr>\n"; + } + } + else if (!strcmp(tag.getName(), "cell")) { + if ((!tag.isEndTag()) && (!tag.isEmpty())) { + buf += "<td>"; + } + else if (tag.isEndTag()) { + buf += "</td>"; + } + } else { return false; // we still didn't handle token diff --git a/src/modules/filters/teilatex.cpp b/src/modules/filters/teilatex.cpp new file mode 100644 index 0000000..8b05107 --- /dev/null +++ b/src/modules/filters/teilatex.cpp @@ -0,0 +1,326 @@ +/*************************************************************************** + * + * teilatex.cpp - TEI to LATEX filter + * + * $Id: teilatex.cpp 3548 2017-12-10 05:11:38Z scribe $ + * + * Copyright 2012-2014 CrossWire Bible Society (http://www.crosswire.org) + * CrossWire Bible Society + * P. O. Box 2528 + * Tempe, AZ 85280-2528 + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + */ + +#include <stdlib.h> +#include <ctype.h> +#include <teilatex.h> +#include <utilxml.h> +#include <swmodule.h> +#include <url.h> +#include <iostream> + + +SWORD_NAMESPACE_START + + +TEILaTeX::MyUserData::MyUserData(const SWModule *module, const SWKey *key) : BasicFilterUserData(module, key) { + isBiblicalText = false; + if (module) { + version = module->getName(); + isBiblicalText = (!strcmp(module->getType(), "Biblical Texts")); + } +} + + +TEILaTeX::TEILaTeX() { + setTokenStart("<"); + setTokenEnd(">"); + + setEscapeStart("&"); + setEscapeEnd(";"); + + setEscapeStringCaseSensitive(true); + + addAllowedEscapeString("quot"); + addAllowedEscapeString("apos"); + addAllowedEscapeString("amp"); + addAllowedEscapeString("lt"); + addAllowedEscapeString("gt"); + + setTokenCaseSensitive(true); + + renderNoteNumbers = false; +} + +bool TEILaTeX::handleToken(SWBuf &buf, const char *token, BasicFilterUserData *userData) { + // manually process if it wasn't a simple substitution + if (!substituteToken(buf, token)) { + MyUserData *u = (MyUserData *)userData; + XMLTag tag(token); + + if (!strcmp(tag.getName(), "p")) { + if ((!tag.isEndTag()) && (!tag.isEmpty())) { // non-empty start tag + buf += ""; + } + else if (tag.isEndTag()) { // end tag + buf += "//\n"; + //userData->supressAdjacentWhitespace = true; + } + else { // empty paragraph break marker + buf += "//\n"; + //userData->supressAdjacentWhitespace = true; + } + } + + // <hi> + else if (!strcmp(tag.getName(), "hi")) { + if ((!tag.isEndTag()) && (!tag.isEmpty())) { + SWBuf rend = tag.getAttribute("rend"); + + u->lastHi = rend; + if (rend == "italic" || rend == "ital") + buf += "\\it{"; + else if (rend == "bold") + buf += "\\bd{"; + else if (rend == "super" || rend == "sup") + buf += "^{"; + else if (rend == "sub") + buf += "_{"; + else if (rend == "overline") + buf += "\\overline{"; + + } + else if (tag.isEndTag()) { + buf += "}"; + } + } + + // <entryFree> + else if (!strcmp(tag.getName(), "entryFree")) { + if ((!tag.isEndTag()) && (!tag.isEmpty())) { + SWBuf n = tag.getAttribute("n"); + if (n != "") { + buf += "\\teiEntryFree{"; + buf += n; + buf += "}"; + } + } + } + + // <sense> + else if (!strcmp(tag.getName(), "sense")) { + if ((!tag.isEndTag()) && (!tag.isEmpty())) { + SWBuf n = tag.getAttribute("n"); + if (n != "") { + buf += "\n\\teiSense{"; + buf += n; + buf += "}"; + } + } + } + + // <div> + else if (!strcmp(tag.getName(), "div")) { + + if ((!tag.isEndTag()) && (!tag.isEmpty())) { + buf += ""; + } + else if (tag.isEndTag()) { + } + } + + // <lb.../> + else if (!strcmp(tag.getName(), "lb")) { + buf += "//\n"; + } + + // <pos>, <gen>, <case>, <gram>, <number>, <mood>, <pron>, <def> + else if (!strcmp(tag.getName(), "pos") || + !strcmp(tag.getName(), "gen") || + !strcmp(tag.getName(), "case") || + !strcmp(tag.getName(), "gram") || + !strcmp(tag.getName(), "number") || + !strcmp(tag.getName(), "pron") || + !strcmp(tag.getName(), "tr") || + !strcmp(tag.getName(), "orth") || + !strcmp(tag.getName(), "etym") || + !strcmp(tag.getName(), "usg") || + + + !strcmp(tag.getName(), "def")) { + if ((!tag.isEndTag()) && (!tag.isEmpty())) { + buf += "\\tei"; + buf += tag.getName(); + buf += "{"; + } + else if (tag.isEndTag()) { + buf += "}"; + } + } + + else if (!strcmp(tag.getName(), "ref")) { + if (!tag.isEndTag()) { + u->suspendTextPassThru = true; + SWBuf target; + SWBuf work; + SWBuf ref; + + int was_osisref = false; + if(tag.getAttribute("osisRef")) + { + target += tag.getAttribute("osisRef"); + was_osisref=true; + } + else if(tag.getAttribute("target")) + target += tag.getAttribute("target"); + + if(target.size()) + { + const char* the_ref = strchr(target, ':'); + + if(!the_ref) { + // No work + ref = target; + } + else { + // Compensate for starting : + ref = the_ref + 1; + + int size = target.size() - ref.size() - 1; + work.setSize(size); + strncpy(work.getRawData(), target, size); + } + + if(was_osisref) + { + buf.appendFormatted("\\swordref{%s}{%s}{", + (ref) ? ref.c_str() : "", + (work.size()) ? work.c_str() : "" ); + } + else + { + // Dictionary link, or something + buf.appendFormatted("\\sworddictref{%s}{%s}{", + (work.size()) ? work.c_str() : u->version.c_str(), + (ref) ? ref.c_str() : "" + ); + } + } + else + { + //std::cout << "TARGET WASN'T\n"; + } + + } + else { + buf += u->lastTextNode.c_str(); + buf += "}"; + + u->suspendTextPassThru = false; + } + } + + // <note> tag + else if (!strcmp(tag.getName(), "note")) { + if (!tag.isEndTag()) { + if (!tag.isEmpty()) { + u->suspendTextPassThru = true; + } + } + if (tag.isEndTag()) { + SWBuf footnoteNumber = tag.getAttribute("swordFootnote"); + SWBuf noteName = tag.getAttribute("n"); + SWBuf footnoteBody = ""; + if (u->module){ + footnoteBody += u->module->getEntryAttributes()["Footnote"][footnoteNumber]["body"]; + } + + buf.appendFormatted("\\swordfootnote{%s}{%s}{%s}{%s}{", + footnoteNumber.c_str(), + u->version.c_str(), + u->key->getText(), + renderNoteNumbers ? noteName.c_str() : ""); + if (u->module) { + buf += u->module->renderText(footnoteBody).c_str(); + } + u->suspendTextPassThru = false; + } + } + + // <graphic> image tag + else if (!strcmp(tag.getName(), "graphic")) { + const char *url = tag.getAttribute("url"); + if (url) { // assert we have a url attribute + SWBuf filepath; + if (userData->module) { + filepath = userData->module->getConfigEntry("AbsoluteDataPath"); + if ((filepath.size()) && (filepath[filepath.size()-1] != '/') && (url[0] != '/')) + filepath += '/'; + } + filepath += url; + + buf.appendFormatted("\\figure{\\includegraphics{%s}}", + filepath.c_str()); + u->suspendTextPassThru = false; + + } + } + + // <table> <row> <cell> + else if (!strcmp(tag.getName(), "table")) { + if ((!tag.isEndTag()) && (!tag.isEmpty())) { + buf += "\n\\begin{tabular}"; + } + else if (tag.isEndTag()) { + buf += "\n\\end{tabular}"; + ++u->consecutiveNewlines; + u->supressAdjacentWhitespace = true; + } + + } + else if (!strcmp(tag.getName(), "row")) { + if ((!tag.isEndTag()) && (!tag.isEmpty())) { + buf += "\n"; + u->firstCell = true; + } + else if (tag.isEndTag()) { + buf += "//"; + u->firstCell = false; + } + + } + else if (!strcmp(tag.getName(), "cell")) { + if ((!tag.isEndTag()) && (!tag.isEmpty())) { + if (u->firstCell == false) { + buf += " & "; + } + else { + u->firstCell = false; + } + } + else if (tag.isEndTag()) { + buf += ""; + } + } + + + else { + return false; // we still didn't handle token + } + + } + return true; +} + + +SWORD_NAMESPACE_END + diff --git a/src/modules/filters/teirtf.cpp b/src/modules/filters/teirtf.cpp index 1213221..0871270 100644 --- a/src/modules/filters/teirtf.cpp +++ b/src/modules/filters/teirtf.cpp @@ -2,7 +2,7 @@ * * teirtf.cpp - TEI to RTF filter * - * $Id: teirtf.cpp 3160 2014-04-17 04:02:17Z greg.hellings $ + * $Id: teirtf.cpp 3548 2017-12-10 05:11:38Z scribe $ * * Copyright 2006-2013 CrossWire Bible Society (http://www.crosswire.org) * CrossWire Bible Society @@ -31,11 +31,11 @@ SWORD_NAMESPACE_START TEIRTF::MyUserData::MyUserData(const SWModule *module, const SWKey *key) : BasicFilterUserData(module, key) { - BiblicalText = false; + isBiblicalText = false; inOsisRef = false; if (module) { version = module->getName(); - BiblicalText = (!strcmp(module->getType(), "Biblical Texts")); + isBiblicalText = (!strcmp(module->getType(), "Biblical Texts")); } } @@ -170,13 +170,7 @@ bool TEIRTF::handleToken(SWBuf &buf, const char *token, BasicFilterUserData *use SWBuf type = tag.getAttribute("type"); SWBuf footnoteNumber = tag.getAttribute("swordFootnote"); - VerseKey *vkey = 0; - // see if we have a VerseKey * or descendant - SWTRY { - vkey = SWDYNAMIC_CAST(VerseKey, u->key); - } - SWCATCH ( ... ) { } - if (vkey) { + if (u->vkey) { buf.appendFormatted("{\\super <a href=\"\">*%s</a>} ", footnoteNumber.c_str()); } u->suspendTextPassThru = true; @@ -205,6 +199,22 @@ bool TEIRTF::handleToken(SWBuf &buf, const char *token, BasicFilterUserData *use } } + else if (!strcmp(tag.getName(), "graphic")) { + const char *src = tag.getAttribute("url"); + if (!src) // assert we have a src attribute + return false; + + char* filepath = new char[strlen(u->module->getConfigEntry("AbsoluteDataPath")) + strlen(token)]; + *filepath = 0; + strcpy(filepath, userData->module->getConfigEntry("AbsoluteDataPath")); + strcat(filepath, src); + +// we do this because BibleCS looks for this EXACT format for an image tag + buf += "<img src=\""; + buf += filepath; + buf += "\" />"; + delete [] filepath; + } else { return false; // we still didn't handle token diff --git a/src/modules/filters/teixhtml.cpp b/src/modules/filters/teixhtml.cpp index 4ae0d89..1eab978 100644 --- a/src/modules/filters/teixhtml.cpp +++ b/src/modules/filters/teixhtml.cpp @@ -2,7 +2,7 @@ * * teixhtml.cpp - TEI to XHTML filter * - * $Id: teixhtml.cpp 3161 2014-04-17 04:04:03Z greg.hellings $ + * $Id: teixhtml.cpp 3548 2017-12-10 05:11:38Z scribe $ * * Copyright 2012-2013 CrossWire Bible Society (http://www.crosswire.org) * CrossWire Bible Society @@ -32,11 +32,41 @@ SWORD_NAMESPACE_START +const char *TEIXHTML::getHeader() const { + // <pos>, <gen>, <case>, <gram>, <number>, <mood>, <pron>, <def> <tr> <orth> <etym> <usg> + const static char *header = "\n\ + .entryFree, .form, .etym, .def, .usg, .quote {display:block;}\n\ + .pron, .pos, .oVar, .ref, {display:inline}\n\ + [type=headword] {font-weight:bold; font-variant:small-caps; text-decoration:underline;}\n\ + [type=derivative] {font-weight:bold; font-variant:small-caps;}\n\ + [rend=italic] {font-style:italic;}\n\ + [rend=bold] {font-weight:bold;}\n\ + [rend=small-caps] {font-variant:small-caps}\n\ + .pos:before {content: \"Pos.: \"; font-weight:bold;}\n\ + .pron:before {content:\" \\\\ \";}\n\ + .pron:after {content:\" \\\\ \";}\n\ + .etym:before {content:\"Etym.:\"; display:block; font-weight:bold;}\n\ + .usg:before {content:\"Usg.:\"; display:block; font-weight:bold;}\n\ + .def:before {content:\"Def.:\" display:block; font-weight:bold;}\n\ + .quote {background-color:#cfcfdf; padding:0.3em; margin:0.5em; border-width:1px; border-style:solid;}\n\ + .cit:before {content:\"quote:\" ; display:block; margin-top:0.5em; font-size:small;}\n\ + .cit {align:center;}\n\ + .persName:before {content:\" (\"; font-size:small;}\n\ + .persName:after {content:\") \"; font-size:small;}\n\ + .persName {font-size:small;}\n\ + .number {font-style:bold;}\n\ + .def {font-style:bold;}\n\ + "; + return header; +} + + + TEIXHTML::MyUserData::MyUserData(const SWModule *module, const SWKey *key) : BasicFilterUserData(module, key) { - BiblicalText = false; + isBiblicalText = false; if (module) { version = module->getName(); - BiblicalText = (!strcmp(module->getType(), "Biblical Texts")); + isBiblicalText = (!strcmp(module->getType(), "Biblical Texts")); } } @@ -119,9 +149,9 @@ bool TEIXHTML::handleToken(SWBuf &buf, const char *token, BasicFilterUserData *u if ((!tag.isEndTag()) && (!tag.isEmpty())) { SWBuf n = tag.getAttribute("n"); if (n != "") { - buf += "<b>"; + buf += "<span class=\"entryFree\">"; buf += n; - buf += "</b>"; + buf += "</span>"; } } } @@ -130,11 +160,17 @@ bool TEIXHTML::handleToken(SWBuf &buf, const char *token, BasicFilterUserData *u else if (!strcmp(tag.getName(), "sense")) { if ((!tag.isEndTag()) && (!tag.isEmpty())) { SWBuf n = tag.getAttribute("n"); + buf += "<br/><span class=\"sense"; if (n != "") { - buf += "<br /><b>"; + buf += "\" n=\""; buf += n; - buf += "</b>"; + + } + buf += "\">"; + } + else if (tag.isEndTag()) { + buf += "</span> "; } } @@ -153,47 +189,41 @@ bool TEIXHTML::handleToken(SWBuf &buf, const char *token, BasicFilterUserData *u buf += "<br />"; } - // <pos>, <gen>, <case>, <gram>, <number>, <mood>, <pron>, <def> + // <pos>, <gen>, <case>, <gram>, <number>, <mood>, <pron>, <def> <tr> <orth> <etym> <usg> else if (!strcmp(tag.getName(), "pos") || !strcmp(tag.getName(), "gen") || !strcmp(tag.getName(), "case") || !strcmp(tag.getName(), "gram") || !strcmp(tag.getName(), "number") || - !strcmp(tag.getName(), "pron") /*|| - !strcmp(tag.getName(), "def")*/) { - if ((!tag.isEndTag()) && (!tag.isEmpty())) { - buf += "<i>"; - } - else if (tag.isEndTag()) { - buf += "</i>"; - } - } - - // <tr> - else if (!strcmp(tag.getName(), "tr")) { - if ((!tag.isEndTag()) && (!tag.isEmpty())) { - buf += "<i>"; - } - else if (tag.isEndTag()) { - buf += "</i>"; - } - } - - // orth - else if (!strcmp(tag.getName(), "orth")) { + !strcmp(tag.getName(), "pron") || + !strcmp(tag.getName(), "def") || + !strcmp(tag.getName(), "tr") || + !strcmp(tag.getName(), "orth") || + !strcmp(tag.getName(), "etym") || + !strcmp(tag.getName(), "usg") || + !strcmp(tag.getName(), "quote")|| + !strcmp(tag.getName(), "cit")|| + !strcmp(tag.getName(), "persName")|| + !strcmp(tag.getName(), "oVar")) + { if ((!tag.isEndTag()) && (!tag.isEmpty())) { - buf += "<b>"; + buf += "<span class=\""; + buf += tag.getName(); + if (tag.getAttribute("type")) { + buf += "\" type =\""; + buf += tag.getAttribute("type"); + } + if (tag.getAttribute("rend")) { + buf += "\" rend =\""; + buf += tag.getAttribute("rend"); + } + buf += "\">"; } else if (tag.isEndTag()) { - buf += "</b>"; + buf += "</span>"; } } - // <etym>, <usg> - else if (!strcmp(tag.getName(), "etym") || - !strcmp(tag.getName(), "usg")) { - // do nothing here - } else if (!strcmp(tag.getName(), "ref")) { if (!tag.isEndTag()) { u->suspendTextPassThru = true; @@ -276,7 +306,50 @@ bool TEIXHTML::handleToken(SWBuf &buf, const char *token, BasicFilterUserData *u u->suspendTextPassThru = false; } } - + // <graphic> image tag + else if (!strcmp(tag.getName(), "graphic")) { + const char *url = tag.getAttribute("url"); + if (url) { // assert we have a url attribute + SWBuf filepath; + if (userData->module) { + filepath = userData->module->getConfigEntry("AbsoluteDataPath"); + if ((filepath.size()) && (filepath[filepath.size()-1] != '/') && (url[0] != '/')) + filepath += '/'; + } + filepath += url; + buf.appendFormatted("<a href=\"passagestudy.jsp?action=showImage&value=%s&module=%s\"><img src=\"file:%s\" border=\"0\" /></a>", + URL::encode(filepath.c_str()).c_str(), + URL::encode(u->version.c_str()).c_str(), + filepath.c_str()); + u->suspendTextPassThru = false; + } + } + // <table> <row> <cell> + else if (!strcmp(tag.getName(), "table")) { + if ((!tag.isEndTag()) && (!tag.isEmpty())) { + buf += "<table><tbody>\n"; + } + else if (tag.isEndTag()) { + buf += "</tbody></table>\n"; + u->supressAdjacentWhitespace = true; + } + } + else if (!strcmp(tag.getName(), "row")) { + if ((!tag.isEndTag()) && (!tag.isEmpty())) { + buf += "\t<tr>"; + } + else if (tag.isEndTag()) { + buf += "</tr>\n"; + } + } + else if (!strcmp(tag.getName(), "cell")) { + if ((!tag.isEndTag()) && (!tag.isEmpty())) { + buf += "<td>"; + } + else if (tag.isEndTag()) { + buf += "</td>"; + } + } else { return false; // we still didn't handle token } diff --git a/src/modules/filters/thmlgbf.cpp b/src/modules/filters/thmlgbf.cpp index e85fdef..14e0203 100644 --- a/src/modules/filters/thmlgbf.cpp +++ b/src/modules/filters/thmlgbf.cpp @@ -2,7 +2,7 @@ * * thmlgbf.cpp - ThML to GBF filter * - * $Id: thmlgbf.cpp 2833 2013-06-29 06:40:28Z chrislit $ + * $Id: thmlgbf.cpp 3427 2016-07-03 14:30:33Z scribe $ * * Copyright 1999-2013 CrossWire Bible Society (http://www.crosswire.org) * CrossWire Bible Society @@ -269,9 +269,11 @@ char ThMLGBF::processText(SWBuf &text, const SWKey *key, const SWModule *module) continue; } if (intoken) { - if (tokpos < 2045) + if (tokpos < 2045) { token[tokpos++] = *from; + // TODO: why is this + 2 ? token[tokpos+2] = 0; + } } else text += *from; } diff --git a/src/modules/filters/thmlheadings.cpp b/src/modules/filters/thmlheadings.cpp index 99bd12a..8dfcb4e 100644 --- a/src/modules/filters/thmlheadings.cpp +++ b/src/modules/filters/thmlheadings.cpp @@ -3,7 +3,7 @@ * thmlheadings.cpp - SWFilter descendant to hide or show headings * in a ThML module * - * $Id: thmlheadings.cpp 3195 2014-04-24 03:03:26Z greg.hellings $ + * $Id: thmlheadings.cpp 3191 2014-04-19 17:06:38Z scribe $ * * Copyright 2001-2013 CrossWire Bible Society (http://www.crosswire.org) * CrossWire Bible Society diff --git a/src/modules/filters/thmlhtml.cpp b/src/modules/filters/thmlhtml.cpp index b5f624e..30213f5 100644 --- a/src/modules/filters/thmlhtml.cpp +++ b/src/modules/filters/thmlhtml.cpp @@ -2,7 +2,7 @@ * * thmlhtml.cpp - ThML to HTML filter * - * $Id: thmlhtml.cpp 2980 2013-09-14 21:51:47Z scribe $ + * $Id: thmlhtml.cpp 3548 2017-12-10 05:11:38Z scribe $ * * Copyright 1999-2013 CrossWire Bible Society (http://www.crosswire.org) * CrossWire Bible Society @@ -185,17 +185,17 @@ bool ThMLHTML::handleToken(SWBuf &buf, const char *token, BasicFilterUserData *u } } else if (!strcmp(tag.getName(), "div")) { - if (tag.isEndTag() && (u->SecHead)) { + if (tag.isEndTag() && (u->inSecHead)) { buf += "</i></b><br />"; - u->SecHead = false; + u->inSecHead = false; } else if (tag.getAttribute("class")) { if (!strcmp(tag.getAttribute("class"), "sechead")) { - u->SecHead = true; + u->inSecHead = true; buf += "<br /><b><i>"; } else if (!strcmp(tag.getAttribute("class"), "title")) { - u->SecHead = true; + u->inSecHead = true; buf += "<br /><b><i>"; } } diff --git a/src/modules/filters/thmlhtmlhref.cpp b/src/modules/filters/thmlhtmlhref.cpp index f72a93e..6523aa3 100644 --- a/src/modules/filters/thmlhtmlhref.cpp +++ b/src/modules/filters/thmlhtmlhref.cpp @@ -2,7 +2,7 @@ * * thmlhtmlhref.cpp - ThML to HTML filter with hrefs * - * $Id: thmlhtmlhref.cpp 2980 2013-09-14 21:51:47Z scribe $ + * $Id: thmlhtmlhref.cpp 3548 2017-12-10 05:11:38Z scribe $ * * Copyright 2001-2013 CrossWire Bible Society (http://www.crosswire.org) * CrossWire Bible Society @@ -33,10 +33,11 @@ SWORD_NAMESPACE_START ThMLHTMLHREF::MyUserData::MyUserData(const SWModule *module, const SWKey *key) : BasicFilterUserData(module, key) { + isBiblicalText = false; + inSecHead = false; if (module) { version = module->getName(); - BiblicalText = (!strcmp(module->getType(), "Biblical Texts")); - SecHead = false; + isBiblicalText = (!strcmp(module->getType(), "Biblical Texts")); } } @@ -209,20 +210,14 @@ bool ThMLHTMLHREF::handleToken(SWBuf &buf, const char *token, BasicFilterUserDat SWBuf type = tag.getAttribute("type"); SWBuf footnoteNumber = tag.getAttribute("swordFootnote"); SWBuf noteName = tag.getAttribute("n"); - VerseKey *vkey = NULL; - // see if we have a VerseKey * or descendant - SWTRY { - vkey = SWDYNAMIC_CAST(VerseKey, u->key); - } - SWCATCH ( ... ) { } - if (vkey) { + if (u->vkey) { // leave this special osis type in for crossReference notes types? Might thml use this some day? Doesn't hurt. char ch = ((tag.getAttribute("type") && ((!strcmp(tag.getAttribute("type"), "crossReference")) || (!strcmp(tag.getAttribute("type"), "x-cross-ref")))) ? 'x':'n'); buf.appendFormatted("<a href=\"passagestudy.jsp?action=showNote&type=%c&value=%s&module=%s&passage=%s\"><small><sup class=\"%c\">*%c%s</sup></small></a>", ch, URL::encode(footnoteNumber.c_str()).c_str(), URL::encode(u->version.c_str()).c_str(), - URL::encode(vkey->getText()).c_str(), + URL::encode(u->vkey->getText()).c_str(), ch, ch, (renderNoteNumbers ? URL::encode(noteName.c_str()).c_str() : "")); @@ -256,7 +251,7 @@ bool ThMLHTMLHREF::handleToken(SWBuf &buf, const char *token, BasicFilterUserDat } } if (tag.isEndTag()) { // </scripRef> - if (!u->BiblicalText) { + if (!u->isBiblicalText) { SWBuf refList = u->startTag.getAttribute("passage"); if (!refList.length()) refList = u->lastTextNode; @@ -271,19 +266,13 @@ bool ThMLHTMLHREF::handleToken(SWBuf &buf, const char *token, BasicFilterUserDat else { SWBuf footnoteNumber = u->startTag.getAttribute("swordFootnote"); SWBuf noteName = tag.getAttribute("n"); - VerseKey *vkey = NULL; - // see if we have a VerseKey * or descendant - SWTRY { - vkey = SWDYNAMIC_CAST(VerseKey, u->key); - } - SWCATCH ( ... ) {} - if (vkey) { + if (u->vkey) { // leave this special osis type in for crossReference notes types? Might thml use this some day? Doesn't hurt. - //buf.appendFormatted("<a href=\"noteID=%s.x.%s\"><small><sup>*x</sup></small></a> ", vkey->getText(), footnoteNumber.c_str()); + //buf.appendFormatted("<a href=\"noteID=%s.x.%s\"><small><sup>*x</sup></small></a> ", u->vkey->getText(), footnoteNumber.c_str()); buf.appendFormatted("<a href=\"passagestudy.jsp?action=showNote&type=x&value=%s&module=%s&passage=%s\"><small><sup class=\"x\">*x%s</sup></small></a>", URL::encode(footnoteNumber.c_str()).c_str(), URL::encode(u->version.c_str()).c_str(), - URL::encode(vkey->getText()).c_str(), + URL::encode(u->vkey->getText()).c_str(), (renderNoteNumbers ? URL::encode(noteName.c_str()).c_str() : "")); } } @@ -293,17 +282,17 @@ bool ThMLHTMLHREF::handleToken(SWBuf &buf, const char *token, BasicFilterUserDat } } else if (tag.getName() && !strcmp(tag.getName(), "div")) { - if (tag.isEndTag() && u->SecHead) { + if (tag.isEndTag() && u->inSecHead) { buf += "</i></b><br />"; - u->SecHead = false; + u->inSecHead = false; } else if (tag.getAttribute("class")) { if (!stricmp(tag.getAttribute("class"), "sechead")) { - u->SecHead = true; + u->inSecHead = true; buf += "<br /><b><i>"; } else if (!stricmp(tag.getAttribute("class"), "title")) { - u->SecHead = true; + u->inSecHead = true; buf += "<br /><b><i>"; } else { diff --git a/src/modules/filters/thmllatex.cpp b/src/modules/filters/thmllatex.cpp new file mode 100644 index 0000000..7213249 --- /dev/null +++ b/src/modules/filters/thmllatex.cpp @@ -0,0 +1,396 @@ +/****************************************************************************** + * + * thmllatex.cpp - ThML to classed LaTeX + * + * $Id: thmllatex.cpp 3548 2017-12-10 05:11:38Z scribe $ + * + * Copyright 2011-2014 CrossWire Bible Society (http://www.crosswire.org) + * CrossWire Bible Society + * P. O. Box 2528 + * Tempe, AZ 85280-2528 + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + */ + +#include <stdlib.h> +#include <thmllatex.h> +#include <swmodule.h> +#include <utilxml.h> +#include <utilstr.h> +#include <versekey.h> +#include <url.h> + +SWORD_NAMESPACE_START + + +const char *ThMLLaTeX::getHeader() const { + return "\ + "; +} + + +ThMLLaTeX::MyUserData::MyUserData(const SWModule *module, const SWKey *key) : BasicFilterUserData(module, key) { + isBiblicalText = false; + inSecHead = false; + if (module) { + version = module->getName(); + isBiblicalText = (!strcmp(module->getType(), "Biblical Texts")); + } +} + + +ThMLLaTeX::ThMLLaTeX() { + setTokenStart("<"); + setTokenEnd(">"); + + setEscapeStart("&"); + setEscapeEnd(";"); + + setEscapeStringCaseSensitive(true); + setPassThruNumericEscapeString(true); + + addAllowedEscapeString("quot"); + addAllowedEscapeString("amp"); + addAllowedEscapeString("lt"); + addAllowedEscapeString("gt"); + + addAllowedEscapeString("nbsp"); + addAllowedEscapeString("brvbar"); // "Š" + addAllowedEscapeString("sect"); // "§" + addAllowedEscapeString("copy"); // "©" + addAllowedEscapeString("laquo"); // "«" + addAllowedEscapeString("reg"); // "®" + addAllowedEscapeString("acute"); // "Ž" + addAllowedEscapeString("para"); // "¶" + addAllowedEscapeString("raquo"); // "»" + + addAllowedEscapeString("Aacute"); // "Á" + addAllowedEscapeString("Agrave"); // "À" + addAllowedEscapeString("Acirc"); // "Â" + addAllowedEscapeString("Auml"); // "Ä" + addAllowedEscapeString("Atilde"); // "Ã" + addAllowedEscapeString("Aring"); // "Å" + addAllowedEscapeString("aacute"); // "á" + addAllowedEscapeString("agrave"); // "à" + addAllowedEscapeString("acirc"); // "â" + addAllowedEscapeString("auml"); // "ä" + addAllowedEscapeString("atilde"); // "ã" + addAllowedEscapeString("aring"); // "å" + addAllowedEscapeString("Eacute"); // "É" + addAllowedEscapeString("Egrave"); // "È" + addAllowedEscapeString("Ecirc"); // "Ê" + addAllowedEscapeString("Euml"); // "Ë" + addAllowedEscapeString("eacute"); // "é" + addAllowedEscapeString("egrave"); // "è" + addAllowedEscapeString("ecirc"); // "ê" + addAllowedEscapeString("euml"); // "ë" + addAllowedEscapeString("Iacute"); // "Í" + addAllowedEscapeString("Igrave"); // "Ì" + addAllowedEscapeString("Icirc"); // "Î" + addAllowedEscapeString("Iuml"); // "Ï" + addAllowedEscapeString("iacute"); // "í" + addAllowedEscapeString("igrave"); // "ì" + addAllowedEscapeString("icirc"); // "î" + addAllowedEscapeString("iuml"); // "ï" + addAllowedEscapeString("Oacute"); // "Ó" + addAllowedEscapeString("Ograve"); // "Ò" + addAllowedEscapeString("Ocirc"); // "Ô" + addAllowedEscapeString("Ouml"); // "Ö" + addAllowedEscapeString("Otilde"); // "Õ" + addAllowedEscapeString("oacute"); // "ó" + addAllowedEscapeString("ograve"); // "ò" + addAllowedEscapeString("ocirc"); // "ô" + addAllowedEscapeString("ouml"); // "ö" + addAllowedEscapeString("otilde"); // "õ" + addAllowedEscapeString("Uacute"); // "Ú" + addAllowedEscapeString("Ugrave"); // "Ù" + addAllowedEscapeString("Ucirc"); // "Û" + addAllowedEscapeString("Uuml"); // "Ü" + addAllowedEscapeString("uacute"); // "ú" + addAllowedEscapeString("ugrave"); // "ù" + addAllowedEscapeString("ucirc"); // "û" + addAllowedEscapeString("uuml"); // "ü" + addAllowedEscapeString("Yacute"); // "Ý" + addAllowedEscapeString("yacute"); // "ý" + addAllowedEscapeString("yuml"); // "ÿ" + + addAllowedEscapeString("deg"); // "°" + addAllowedEscapeString("plusmn"); // "±" + addAllowedEscapeString("sup2"); // "²" + addAllowedEscapeString("sup3"); // "³" + addAllowedEscapeString("sup1"); // "¹" + addAllowedEscapeString("nbsp"); // "º" + addAllowedEscapeString("pound"); // "£" + addAllowedEscapeString("cent"); // "¢" + addAllowedEscapeString("frac14"); // "Œ" + addAllowedEscapeString("frac12"); // "œ" + addAllowedEscapeString("frac34"); // "Ÿ" + addAllowedEscapeString("iquest"); // "¿" + addAllowedEscapeString("iexcl"); // "¡" + addAllowedEscapeString("ETH"); // "Ð" + addAllowedEscapeString("eth"); // "ð" + addAllowedEscapeString("THORN"); // "Þ" + addAllowedEscapeString("thorn"); // "þ" + addAllowedEscapeString("AElig"); // "Æ" + addAllowedEscapeString("aelig"); // "æ" + addAllowedEscapeString("Oslash"); // "Ø" + addAllowedEscapeString("curren"); // "€" + addAllowedEscapeString("Ccedil"); // "Ç" + addAllowedEscapeString("ccedil"); // "ç" + addAllowedEscapeString("szlig"); // "ß" + addAllowedEscapeString("Ntilde"); // "Ñ" + addAllowedEscapeString("ntilde"); // "ñ" + addAllowedEscapeString("yen"); // "¥" + addAllowedEscapeString("not"); // "¬" + addAllowedEscapeString("ordf"); // "ª" + addAllowedEscapeString("uml"); // "š" + addAllowedEscapeString("shy"); // "" + addAllowedEscapeString("macr"); // "¯" + + addAllowedEscapeString("micro"); // "µ" + addAllowedEscapeString("middot"); // "·" + addAllowedEscapeString("cedil"); // "ž" + addAllowedEscapeString("ordm"); // "º" + addAllowedEscapeString("times"); // "×" + addAllowedEscapeString("divide"); // "÷" + addAllowedEscapeString("oslash"); // "ø" + + setTokenCaseSensitive(true); + addTokenSubstitute("scripture", " \\swordquote{ "); + addTokenSubstitute("/scripture", "}"); + + renderNoteNumbers = false; +} + + +bool ThMLLaTeX::handleToken(SWBuf &buf, const char *token, BasicFilterUserData *userData) { + if (!substituteToken(buf, token)) { // manually process if it wasn't a simple substitution + MyUserData *u = (MyUserData *)userData; + + XMLTag tag(token); + if ((!tag.isEndTag()) && (!tag.isEmpty())) + u->startTag = tag; + + if (tag.getName() && !strcmp(tag.getName(), "sync")) { + SWBuf value = tag.getAttribute("value"); + if (tag.getAttribute("type") && !strcmp(tag.getAttribute("type"), "morph")) { //> + if (value.length()) + buf.appendFormatted("\\swordmorph[Greek]{%s}", + value.c_str()); + } + else if (tag.getAttribute("type") && !strcmp(tag.getAttribute("type"), "lemma")) { //> + if (value.length()) + // empty "type=" is deliberate. + buf.appendFormatted("\\swordmorph[lemma]{%s}", + value.c_str()); + } + else if (tag.getAttribute("type") && !strcmp(tag.getAttribute("type"), "Strongs")) { + if (!tag.isEndTag()) { + char ch = *value; + value<<1; + buf.appendFormatted("\\swordstrong[%s]{%s}{", + ((ch == 'H') ? "Hebrew" : "Greek"), + value.c_str()); + } + else { buf += "}"; } + } + + else if (tag.getAttribute("type") && !strcmp(tag.getAttribute("type"), "Dict")) { + if (!tag.isEndTag()) { + buf.appendFormatted("\\sworddict{%s}{", + value.c_str()); + } + else { buf += "}"; } + } + + } + // <note> tag + else if (!strcmp(tag.getName(), "note")) { + if (!tag.isEndTag()) { + if (!tag.isEmpty()) { + SWBuf type = tag.getAttribute("type"); + SWBuf footnoteNumber = tag.getAttribute("swordFootnote"); + SWBuf noteName = tag.getAttribute("n"); + SWBuf footnoteBody = ""; + if (u->module){ + footnoteBody += u->module->getEntryAttributes()["Footnote"][footnoteNumber]["body"]; + } + if (u->vkey) { + // leave this special osis type in for crossReference notes types? Might thml use this some day? Doesn't hurt. + char ch = ((tag.getAttribute("type") && ((!strcmp(tag.getAttribute("type"), "crossReference")) || (!strcmp(tag.getAttribute("type"), "x-cross-ref")))) ? 'x':'n'); + buf.appendFormatted("\\swordfootnote[%c]{%s}{%s}{%s}{%s}{", + ch, + footnoteNumber.c_str(), + u->version.c_str(), + u->vkey->getText(), + noteName.c_str()); + } + else { + char ch = ((tag.getAttribute("type") && ((!strcmp(tag.getAttribute("type"), "crossReference")) || (!strcmp(tag.getAttribute("type"), "x-cross-ref")))) ? 'x':'n'); + buf.appendFormatted("\\swordfootnote[%c]{%s}{%s}{%s}{%s}{", + ch, + footnoteNumber.c_str(), + u->version.c_str(), + u->key->getText(), + noteName.c_str()); + } + u->suspendTextPassThru = true; + if (u->module) { + buf += u->module->renderText(footnoteBody).c_str(); + } + } + } + if (tag.isEndTag()) { + buf += "}"; + u->suspendTextPassThru = false; + } + } + else if (!strcmp(tag.getName(), "scripture")) { + buf += (tag.isEndTag() ? "\\swordquote" : "}"); + } + // <scripRef> tag + else if (!strcmp(tag.getName(), "scripRef")) { + if (!tag.isEndTag()) { + if (!tag.isEmpty()) { + u->suspendTextPassThru = true; + } + } + if (!tag.isEndTag()) { // </scripRef> + if (!u->isBiblicalText) { + SWBuf refList = u->startTag.getAttribute("passage"); + if (!refList.length()) + refList = u->lastTextNode; + SWBuf version = tag.getAttribute("version"); + + buf.appendFormatted("\\swordxref{%s}{%s}{", + (refList.length()) ? refList.c_str() : "", + (version.length()) ? version.c_str() : ""); + buf += u->lastTextNode.c_str(); + buf += "}"; + } + else { + SWBuf footnoteNumber = u->startTag.getAttribute("swordFootnote"); + SWBuf noteName = tag.getAttribute("n"); + SWBuf footnoteBody = ""; + if (u->module){ + footnoteBody += u->module->getEntryAttributes()["Footnote"][footnoteNumber]["body"]; + } + if (u->vkey) { + // leave this special osis type in for crossReference notes types? Might thml use this some day? Doesn't hurt. + //buf.appendFormatted("<a href=\"noteID=%s.x.%s\"><small><sup>*x</sup></small></a> ", u->vkey->getText(), footnoteNumber.c_str()); + // char ch = ((tag.getAttribute("type") && ((!strcmp(tag.getAttribute("type"), "crossReference")) || (!strcmp(tag.getAttribute("type"), "x-cross-ref")))) ? 'x':'n'); + char ch = 'x'; + buf.appendFormatted("\\swordfootnote[%c]{%s}{%s}{%s}{%s}{", + ch, + footnoteNumber.c_str(), + u->version.c_str(), + u->vkey->getText(), + (renderNoteNumbers ? noteName.c_str() : "")); + if (u->module) { + buf += u->module->renderText(footnoteBody).c_str(); + } + } + } + + + } + else if (tag.isEndTag()){ + buf +="}"; + // let's let text resume to output again + u->suspendTextPassThru = false; + } + } + else if (tag.getName() && !strcmp(tag.getName(), "div")) { + + //if (!tag.isEndTag() && u->vkey && !u->vkey->getChapter()) + // buf += "\\swordsection{book}{"; + //} + + + if (!tag.isEndTag() && u->inSecHead) { + buf += "\\swordsection{sechead}{"; + u->inSecHead = false; + } + + else if (!tag.isEndTag() && tag.getAttribute("class")) { + buf += "\\swordsection{"; + buf += tag.getAttribute("class"); + buf += "}{"; + + } + else if (!tag.isEndTag()) { + buf += "\\swordsection{}{"; + } + + else if (tag.isEndTag()) { + buf += "}"; + } + } + else if (tag.getName() && (!strcmp(tag.getName(), "img") || !strcmp(tag.getName(), "image"))) { + const char *src = strstr(token, "src"); + if (!src) // assert we have a src attribute + return false; + + const char *c, *d; + if (((c = strchr(src+3, '"')) == NULL) || + ((d = strchr( ++c , '"')) == NULL)) // identify endpoints. + return false; // abandon hope. + + + // images become clickable, if the UI supports showImage. + buf +="\\figure{"; + + for (c = token; *c; c++) { + if ((*c == '/') && (*(c+1) == '\0')) + continue; + if (c == src) { + for (;((*c) && (*c != '"')); c++) + buf += *c; + + if (!*c) { c--; continue; } + + buf += '"'; + if (*(c+1) == '/') { + buf += "\\includegraphics{"; + buf += userData->module->getConfigEntry("AbsoluteDataPath"); + if (buf[buf.length()-2] == '/') + c++; // skip '/' + } + continue; + } + buf += *c; + } + buf += "}}"; + } + else if (tag.getName() && (!strcmp(tag.getName(), "i"))){ + if (!tag.isEndTag()) { + buf += "\\emph{"; + } + else { buf += "}"; } + } + else if (tag.getName() && (!strcmp(tag.getName(), "br"))){ + buf += "\\\\"; + + } + else { + buf += '<'; + /*for (const char *tok = token; *tok; tok++) + buf += *tok;*/ + buf += token; + buf += '>'; + //return false; // we still didn't handle token + } + } + return true; +} + + +SWORD_NAMESPACE_END diff --git a/src/modules/filters/thmlplain.cpp b/src/modules/filters/thmlplain.cpp index b9b9839..b30173b 100644 --- a/src/modules/filters/thmlplain.cpp +++ b/src/modules/filters/thmlplain.cpp @@ -3,7 +3,7 @@ * thmlplain.cpp - SWFilter descendant to strip out all ThML tags or * convert to ASCII rendered symbols * - * $Id: thmlplain.cpp 2833 2013-06-29 06:40:28Z chrislit $ + * $Id: thmlplain.cpp 3427 2016-07-03 14:30:33Z scribe $ * * Copyright 1999-2013 CrossWire Bible Society (http://www.crosswire.org) * CrossWire Bible Society @@ -206,9 +206,15 @@ char ThMLPlain::processText(SWBuf &text, const SWKey *key, const SWModule *modul continue; } if (intoken) { - if (tokpos < 2045) + if (tokpos < 2045) { token[tokpos++] = *from; + //TODO: why is this + 2? Are we trying to keep 2 or 3 nulls after the last valid char? + // tokpos has been incremented past the last valid token. it should be pointing to null + // +1 should give us 2 nulls, but we're +2 here, which actually keeps 3 nulls after the + // last valid char. Why are we doing any of this? These were written before SWBuf and should + // probably be switched to SWBuf, but perf tests before and after the switch should be run token[tokpos+2] = 0; + } } else text += *from; } diff --git a/src/modules/filters/thmlrtf.cpp b/src/modules/filters/thmlrtf.cpp index dc50ae9..7852fd7 100644 --- a/src/modules/filters/thmlrtf.cpp +++ b/src/modules/filters/thmlrtf.cpp @@ -2,7 +2,7 @@ * * thmlrtf.cpp - ThML to RTF filter * - * $Id: thmlrtf.cpp 2833 2013-06-29 06:40:28Z chrislit $ + * $Id: thmlrtf.cpp 3548 2017-12-10 05:11:38Z scribe $ * * Copyright 1999-2013 CrossWire Bible Society (http://www.crosswire.org) * CrossWire Bible Society @@ -211,11 +211,12 @@ char ThMLRTF::processText(SWBuf &text, const SWKey *key, const SWModule *module) ThMLRTF::MyUserData::MyUserData(const SWModule *module, const SWKey *key) : BasicFilterUserData(module, key) { - this->SecHead = false; + isBiblicalText = false; + inSecHead = false; XMLTag startTag = ""; if (module) { version = module->getName(); - BiblicalText = (!strcmp(module->getType(), "Biblical Texts")); + isBiblicalText = (!strcmp(module->getType(), "Biblical Texts")); } } @@ -253,16 +254,10 @@ bool ThMLRTF::handleToken(SWBuf &buf, const char *token, BasicFilterUserData *us if (!tag.isEmpty()) { SWBuf type = tag.getAttribute("type"); SWBuf footnoteNumber = tag.getAttribute("swordFootnote"); - VerseKey *vkey = NULL; - // see if we have a VerseKey * or descendant - SWTRY { - vkey = SWDYNAMIC_CAST(VerseKey, u->key); - } - SWCATCH ( ... ) { } - if (vkey) { + if (u->vkey) { // leave this special osis type in for crossReference notes types? Might thml use this some day? Doesn't hurt. char ch = ((tag.getAttribute("type") && ((!strcmp(tag.getAttribute("type"), "crossReference")) || (!strcmp(tag.getAttribute("type"), "x-cross-ref")))) ? 'x':'n'); - buf.appendFormatted("{\\super <a href=\"\">*%c%i.%s</a>} ", ch, vkey->getVerse(), footnoteNumber.c_str()); + buf.appendFormatted("{\\super <a href=\"\">*%c%i.%s</a>} ", ch, u->vkey->getVerse(), footnoteNumber.c_str()); } u->suspendTextPassThru = true; } @@ -280,7 +275,7 @@ bool ThMLRTF::handleToken(SWBuf &buf, const char *token, BasicFilterUserData *us } } if (tag.isEndTag()) { // </scripRef> - if (!u->BiblicalText) { + if (!u->isBiblicalText) { SWBuf refList = u->startTag.getAttribute("passage"); if (!refList.length()) refList = u->lastTextNode; @@ -292,15 +287,9 @@ bool ThMLRTF::handleToken(SWBuf &buf, const char *token, BasicFilterUserData *us } else { SWBuf footnoteNumber = u->startTag.getAttribute("swordFootnote"); - VerseKey *vkey = NULL; - // see if we have a VerseKey * or descendant - SWTRY { - vkey = SWDYNAMIC_CAST(VerseKey, u->key); - } - SWCATCH ( ... ) {} - if (vkey) { + if (u->vkey) { // leave this special osis type in for crossReference notes types? Might thml use this some day? Doesn't hurt. - buf.appendFormatted("{\\super <a href=\"\">*x%i.%s</a>} ", vkey->getVerse(), footnoteNumber.c_str()); + buf.appendFormatted("{\\super <a href=\"\">*x%i.%s</a>} ", u->vkey->getVerse(), footnoteNumber.c_str()); } } @@ -310,17 +299,17 @@ bool ThMLRTF::handleToken(SWBuf &buf, const char *token, BasicFilterUserData *us } else if (tag.getName() && !strcmp(tag.getName(), "div")) { - if (tag.isEndTag() && u->SecHead) { + if (tag.isEndTag() && u->inSecHead) { buf += "\\par}"; - u->SecHead = false; + u->inSecHead = false; } else if (tag.getAttribute("class")) { if (!stricmp(tag.getAttribute("class"), "sechead")) { - u->SecHead = true; + u->inSecHead = true; buf += "{\\par\\i1\\b1 "; } else if (!stricmp(tag.getAttribute("class"), "title")) { - u->SecHead = true; + u->inSecHead = true; buf += "{\\par\\i1\\b1 "; } } diff --git a/src/modules/filters/thmlscripref.cpp b/src/modules/filters/thmlscripref.cpp index 3a8b12a..7bc84ef 100644 --- a/src/modules/filters/thmlscripref.cpp +++ b/src/modules/filters/thmlscripref.cpp @@ -3,7 +3,7 @@ * thmlscripref.cpp - SWFilter descendant to hide or show scripture * referebces in a ThML module * - * $Id: thmlscripref.cpp 3156 2014-04-17 03:50:37Z greg.hellings $ + * $Id: thmlscripref.cpp 3045 2014-03-02 07:53:52Z chrislit $ * * Copyright 2001-2014 CrossWire Bible Society (http://www.crosswire.org) * CrossWire Bible Society diff --git a/src/modules/filters/thmlstrongs.cpp b/src/modules/filters/thmlstrongs.cpp index 09658a9..a43c7aa 100644 --- a/src/modules/filters/thmlstrongs.cpp +++ b/src/modules/filters/thmlstrongs.cpp @@ -3,7 +3,7 @@ * thmlstrongs.cpp - SWFilter descendant to hide or show Strong's number * in a ThML module * - * $Id: thmlstrongs.cpp 2980 2013-09-14 21:51:47Z scribe $ + * $Id: thmlstrongs.cpp 3515 2017-11-01 11:38:09Z scribe $ * * Copyright 2001-2013 CrossWire Bible Society (http://www.crosswire.org) * CrossWire Bible Society @@ -60,7 +60,7 @@ char ThMLStrongs::processText(SWBuf &text, const SWKey *key, const SWModule *mod bool lastspace = false; int word = 1; char val[128]; - char wordstr[5]; + char wordstr[11]; char *valto; char *ch; unsigned int textStart = 0, textEnd = 0; @@ -77,7 +77,7 @@ char ThMLStrongs::processText(SWBuf &text, const SWKey *key, const SWModule *mod token[0] = 0; token[1] = 0; token[2] = 0; - textEnd = text.length(); + textEnd = (unsigned int)text.length(); continue; } if (*from == '>') { // process tokens @@ -116,7 +116,7 @@ char ThMLStrongs::processText(SWBuf &text, const SWKey *key, const SWModule *mod if (lastspace) text--; } - if (newText) {textStart = text.length(); newText = false; } + if (newText) {textStart = (unsigned int)text.length(); newText = false; } continue; } } @@ -150,15 +150,17 @@ char ThMLStrongs::processText(SWBuf &text, const SWKey *key, const SWModule *mod text += '<'; text += token; text += '>'; - if (newText) {textStart = text.length(); newText = false; } + if (newText) {textStart = (unsigned int)text.length(); newText = false; } continue; } if (intoken) { - if (tokpos < 2045) + if (tokpos < 2045) { token[tokpos++] = *from; + // TODO: why is this + 2 ? token[tokpos+2] = 0; + } } - else { + else { text += *from; lastspace = (*from == ' '); } diff --git a/src/modules/filters/thmlwordjs.cpp b/src/modules/filters/thmlwordjs.cpp index 7d80a00..2042453 100644 --- a/src/modules/filters/thmlwordjs.cpp +++ b/src/modules/filters/thmlwordjs.cpp @@ -2,7 +2,7 @@ * * thmlwordjs.cpp - SWFilter descendant to ??? * - * $Id: thmlwordjs.cpp 2980 2013-09-14 21:51:47Z scribe $ + * $Id: thmlwordjs.cpp 3515 2017-11-01 11:38:09Z scribe $ * * Copyright 2005-2013 CrossWire Bible Society (http://www.crosswire.org) * CrossWire Bible Society @@ -68,7 +68,7 @@ char ThMLWordJS::processText(SWBuf &text, const SWKey *key, const SWModule *modu char val[128]; char *valto; char *ch; - char wordstr[5]; + char wordstr[11]; unsigned int textStart = 0, lastAppendLen = 0, textEnd = 0; SWBuf tmp; bool newText = false; @@ -91,7 +91,7 @@ char ThMLWordJS::processText(SWBuf &text, const SWKey *key, const SWModule *modu token[0] = 0; token[1] = 0; token[2] = 0; - textEnd = text.length(); + textEnd = (unsigned int)text.length(); continue; } if (*from == '>') { // process tokens @@ -151,7 +151,7 @@ char ThMLWordJS::processText(SWBuf &text, const SWKey *key, const SWModule *modu text += token; text += '>'; if (needWordOut) { - char wstr[10]; + char wstr[11]; sprintf(wstr, "%03d", word-2); AttributeValue *wAttrs = &(module->getEntryAttributes()["Word"][wstr]); needWordOut = false; @@ -226,27 +226,29 @@ char ThMLWordJS::processText(SWBuf &text, const SWKey *key, const SWModule *modu else m = morph.c_str(); spanStart.appendFormatted("<span class=\"clk\" onclick=\"p('%s','%s','%s','%s','','%s');\" >", lexName.c_str(), strong.c_str(), wordID.c_str(), m, modName.c_str()); text.insert(textStr, spanStart); - lastAppendLen = spanStart.length(); + lastAppendLen = (unsigned int)spanStart.length(); } } } if (newText) { - textStart = text.length(); newText = false; + textStart = (unsigned int)text.length(); newText = false; } continue; } if (intoken) { - if (tokpos < 2045) + if (tokpos < 2045) { token[tokpos++] = *from; + // TODO: why is this + 2 ? token[tokpos+2] = 0; + } } else { text += *from; } } - char wstr[10]; + char wstr[11]; sprintf(wstr, "%03d", word-1); AttributeValue *wAttrs = &(module->getEntryAttributes()["Word"][wstr]); needWordOut = false; diff --git a/src/modules/filters/thmlxhtml.cpp b/src/modules/filters/thmlxhtml.cpp index eb9c40a..af47c72 100644 --- a/src/modules/filters/thmlxhtml.cpp +++ b/src/modules/filters/thmlxhtml.cpp @@ -2,7 +2,7 @@ * * thmlxhtml.cpp - ThML to classed XHTML * - * $Id: thmlxhtml.cpp 3205 2014-05-01 02:31:28Z greg.hellings $ + * $Id: thmlxhtml.cpp 3548 2017-12-10 05:11:38Z scribe $ * * Copyright 2011-2013 CrossWire Bible Society (http://www.crosswire.org) * CrossWire Bible Society @@ -38,10 +38,11 @@ const char *ThMLXHTML::getHeader() const { ThMLXHTML::MyUserData::MyUserData(const SWModule *module, const SWKey *key) : BasicFilterUserData(module, key) { + isBiblicalText = false; + secHeadLevel = 0; if (module) { version = module->getName(); - BiblicalText = (!strcmp(module->getType(), "Biblical Texts")); - SecHead = false; + isBiblicalText = (!strcmp(module->getType(), "Biblical Texts")); } } @@ -214,20 +215,14 @@ bool ThMLXHTML::handleToken(SWBuf &buf, const char *token, BasicFilterUserData * SWBuf type = tag.getAttribute("type"); SWBuf footnoteNumber = tag.getAttribute("swordFootnote"); SWBuf noteName = tag.getAttribute("n"); - VerseKey *vkey = NULL; - // see if we have a VerseKey * or descendant - SWTRY { - vkey = SWDYNAMIC_CAST(VerseKey, u->key); - } - SWCATCH ( ... ) { } - if (vkey) { + if (u->vkey) { // leave this special osis type in for crossReference notes types? Might thml use this some day? Doesn't hurt. char ch = ((tag.getAttribute("type") && ((!strcmp(tag.getAttribute("type"), "crossReference")) || (!strcmp(tag.getAttribute("type"), "x-cross-ref")))) ? 'x':'n'); buf.appendFormatted("<a href=\"passagestudy.jsp?action=showNote&type=%c&value=%s&module=%s&passage=%s\"><small><sup class=\"%c\">*%c%s</sup></small></a>", ch, URL::encode(footnoteNumber.c_str()).c_str(), URL::encode(u->version.c_str()).c_str(), - URL::encode(vkey->getText()).c_str(), + URL::encode(u->vkey->getText()).c_str(), ch, ch, (renderNoteNumbers ? noteName.c_str() : "")); @@ -261,7 +256,7 @@ bool ThMLXHTML::handleToken(SWBuf &buf, const char *token, BasicFilterUserData * } } if (tag.isEndTag()) { // </scripRef> - if (!u->BiblicalText) { + if (!u->isBiblicalText) { SWBuf refList = u->startTag.getAttribute("passage"); if (!refList.length()) refList = u->lastTextNode; @@ -276,19 +271,13 @@ bool ThMLXHTML::handleToken(SWBuf &buf, const char *token, BasicFilterUserData * else { SWBuf footnoteNumber = u->startTag.getAttribute("swordFootnote"); SWBuf noteName = tag.getAttribute("n"); - VerseKey *vkey = NULL; - // see if we have a VerseKey * or descendant - SWTRY { - vkey = SWDYNAMIC_CAST(VerseKey, u->key); - } - SWCATCH ( ... ) {} - if (vkey) { + if (u->vkey) { // leave this special osis type in for crossReference notes types? Might thml use this some day? Doesn't hurt. - //buf.appendFormatted("<a href=\"noteID=%s.x.%s\"><small><sup>*x</sup></small></a> ", vkey->getText(), footnoteNumber.c_str()); + //buf.appendFormatted("<a href=\"noteID=%s.x.%s\"><small><sup>*x</sup></small></a> ", u->vkey->getText(), footnoteNumber.c_str()); buf.appendFormatted("<a href=\"passagestudy.jsp?action=showNote&type=x&value=%s&module=%s&passage=%s\"><small><sup class=\"x\">*x%s</sup></small></a>", URL::encode(footnoteNumber.c_str()).c_str(), URL::encode(u->version.c_str()).c_str(), - URL::encode(vkey->getText()).c_str(), + URL::encode(u->vkey->getText()).c_str(), (renderNoteNumbers ? noteName.c_str() : "")); } } @@ -298,19 +287,19 @@ bool ThMLXHTML::handleToken(SWBuf &buf, const char *token, BasicFilterUserData * } } else if (tag.getName() && !strcmp(tag.getName(), "div")) { - if (tag.isEndTag() && u->SecHead) { + if (tag.isEndTag() && u->secHeadLevel) { buf += "</h"; - buf += u->SecHead; + buf += u->secHeadLevel; buf += ">"; - u->SecHead = false; + u->secHeadLevel = 0; } else if (tag.getAttribute("class")) { if (!stricmp(tag.getAttribute("class"), "sechead")) { - u->SecHead = '3'; + u->secHeadLevel = '3'; buf += "<h3>"; } else if (!stricmp(tag.getAttribute("class"), "title")) { - u->SecHead = '2'; + u->secHeadLevel = '2'; buf += "<h2>"; } else { diff --git a/src/modules/filters/unicodertf.cpp b/src/modules/filters/unicodertf.cpp index 1cde086..bdd2ae1 100644 --- a/src/modules/filters/unicodertf.cpp +++ b/src/modules/filters/unicodertf.cpp @@ -2,7 +2,7 @@ * * unicodertf.cpp - SWFilter descendant to convert UTF-8 to RTF tags * - * $Id: unicodertf.cpp 3157 2014-04-17 03:56:12Z greg.hellings $ + * $Id: unicodertf.cpp 3081 2014-03-05 19:52:08Z chrislit $ * * Copyright 2001-2013 CrossWire Bible Society (http://www.crosswire.org) * CrossWire Bible Society diff --git a/src/modules/filters/utf16utf8.cpp b/src/modules/filters/utf16utf8.cpp index d224db2..3929471 100644 --- a/src/modules/filters/utf16utf8.cpp +++ b/src/modules/filters/utf16utf8.cpp @@ -2,7 +2,7 @@ * * utf16utf8.cpp - SWFilter descendant to convert UTF-16 to UTF-8 * - * $Id: utf16utf8.cpp 3157 2014-04-17 03:56:12Z greg.hellings $ + * $Id: utf16utf8.cpp 3081 2014-03-05 19:52:08Z chrislit $ * * Copyright 2001-2013 CrossWire Bible Society (http://www.crosswire.org) * CrossWire Bible Society diff --git a/src/modules/filters/utf8arabicpoints.cpp b/src/modules/filters/utf8arabicpoints.cpp index 42bfaa5..932fc64 100644 --- a/src/modules/filters/utf8arabicpoints.cpp +++ b/src/modules/filters/utf8arabicpoints.cpp @@ -3,7 +3,7 @@ * utf8arabicpoints.cpp - SWFilter descendant to remove UTF-8 * Arabic vowel points * - * $Id: utf8arabicpoints.cpp 2980 2013-09-14 21:51:47Z scribe $ + * $Id: utf8arabicpoints.cpp 3439 2016-10-23 08:32:02Z scribe $ * * Copyright 2009-2013 CrossWire Bible Society (http://www.crosswire.org) * CrossWire Bible Society @@ -163,7 +163,7 @@ char UTF8ArabicPoints::processText(SWBuf &text, const SWKey *, const SWModule *) // "start_of_input" is either mark_pos or any text between the // end of any previous mark and the current mark_pos. // This text is now ready to be moved into the output. - int ready_size = mark_pos - start_of_input; + int ready_size = (int)(mark_pos - start_of_input); if (ready_size > 0) { // Append the input text before the current mark to the // output. diff --git a/src/modules/filters/utf8greekaccents.cpp b/src/modules/filters/utf8greekaccents.cpp index 039f9a4..cd98f96 100644 --- a/src/modules/filters/utf8greekaccents.cpp +++ b/src/modules/filters/utf8greekaccents.cpp @@ -3,7 +3,7 @@ * utf8greekaccents.cpp - SWFilter descendant to remove UTF-8 Greek * accents * - * $Id: utf8greekaccents.cpp 2980 2013-09-14 21:51:47Z scribe $ + * $Id: utf8greekaccents.cpp 3515 2017-11-01 11:38:09Z scribe $ * * Copyright 2001-2013 CrossWire Bible Society (http://www.crosswire.org) * CrossWire Bible Society @@ -22,8 +22,10 @@ */ #include <stdlib.h> +#include <map> #include <stdio.h> #include <utf8greekaccents.h> +#include <utilstr.h> #ifdef _ICU_ @@ -31,6 +33,7 @@ sword::UTF8NFKD decompose; #endif +using std::map; SWORD_NAMESPACE_START @@ -44,6 +47,297 @@ namespace { static const StringList oVals(&choices[0], &choices[2]); return &oVals; } + + std::map<__u32, SWBuf> converters; + class converters_init { + public: + converters_init() { + SWBuf myBuf = ""; + //first just remove combining characters + converters[0x2019] = ""; // RIGHT SINGLE QUOTATION MARK + converters[0x1FBF] = ""; // GREEK PSILI + converters[0x2CFF] = ""; // COPTIC MORPHOLOGICAL DIVIDER + converters[0xFE24] = ""; // COMBINING MACRON LEFT HALF + converters[0xFE25] = ""; // COMBINING MACRON RIGHT HALF + converters[0xFE26] = ""; // COMBINING CONJOINING MACRON + converters[0x0300] = ""; // COMBINING GRAVE ACCENT + converters[0x0301] = ""; // COMBINING ACUTE ACCENT + converters[0x0302] = ""; // COMBINING CIRCUMFLEX ACCENT + converters[0x0308] = ""; // COMBINING DIAERESIS + converters[0x0313] = ""; // COMBINING COMMA ABOVE + converters[0x0314] = ""; // COMBINING REVERSED COMMA ABOVE + converters[0x037A] = ""; // GREEK YPOGEGRAMMENI + converters[0x0342] = ""; // COMBINING GREEK PERISPOMENI + // Now converted pre-composed characters to their alphabetic bases, discarding the accents + // Greek + // UPPER case + converters[0x0386] = *getUTF8FromUniChar(0x0391, &myBuf); myBuf.setSize(0); // GREEK CAPITAL LETTER ALPHA WITH TONOS + converters[0x0388] = *getUTF8FromUniChar(0x0395, &myBuf); myBuf.setSize(0); // GREEK CAPITAL LETTER EPSILON WITH TONOS + converters[0x0389] = *getUTF8FromUniChar(0x0397, &myBuf); myBuf.setSize(0); // GREEK CAPITAL LETTER ETA WITH TONOS + converters[0x038A] = *getUTF8FromUniChar(0x0399, &myBuf); myBuf.setSize(0); // GREEK CAPITAL LETTER IOTA WITH TONOS + converters[0x03AA] = *getUTF8FromUniChar(0x0399, &myBuf); myBuf.setSize(0); // GREEK CAPITAL LETTER IOTA WITH DIALYTIKA + converters[0x038C] = *getUTF8FromUniChar(0x039F, &myBuf); myBuf.setSize(0); // GREEK CAPITAL LETTER OMICRON WITH TONOS + converters[0x038E] = *getUTF8FromUniChar(0x03A5, &myBuf); myBuf.setSize(0); // GREEK CAPITAL LETTER UPSILON WITH TONOS + converters[0x03AB] = *getUTF8FromUniChar(0x03A5, &myBuf); myBuf.setSize(0); // GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA + converters[0x038F] = *getUTF8FromUniChar(0x03A9, &myBuf); myBuf.setSize(0); // GREEK CAPITAL LETTER OMEGA WITH TONOS + + // lower case + converters[0x03AC] = *getUTF8FromUniChar(0x03B1, &myBuf); myBuf.setSize(0); // GREEK SMALL LETTER ALPHA WITH TONOS + converters[0x03AD] = *getUTF8FromUniChar(0x03B5, &myBuf); myBuf.setSize(0); // GREEK SMALL LETTER EPSILON WITH TONOS + converters[0x03AE] = *getUTF8FromUniChar(0x03B7, &myBuf); myBuf.setSize(0); // GREEK SMALL LETTER ETA WITH TONOS + converters[0x03AF] = *getUTF8FromUniChar(0x03B9, &myBuf); myBuf.setSize(0); // GREEK SMALL LETTER IOTA WITH TONOS + converters[0x03CA] = *getUTF8FromUniChar(0x03B9, &myBuf); myBuf.setSize(0); // GREEK SMALL LETTER IOTA WITH DIALYTIKA + converters[0x03CC] = *getUTF8FromUniChar(0x03BF, &myBuf); myBuf.setSize(0); // GREEK SMALL LETTER OMICRON WITH TONOS + converters[0x03CD] = *getUTF8FromUniChar(0x03C5, &myBuf); myBuf.setSize(0); // GREEK SMALL LETTER UPSILON WITH TONOS + converters[0x03CB] = *getUTF8FromUniChar(0x03C5, &myBuf); myBuf.setSize(0); // GREEK SMALL LETTER UPSILON WITH DIALYTIKA + converters[0x03CE] = *getUTF8FromUniChar(0x03C9, &myBuf); myBuf.setSize(0); // GREEK SMALL LETTER OMEGA WITH TONOS + + // Extended Greek + // UPPER case + converters[0x1F08] = *getUTF8FromUniChar(0x0391, &myBuf); myBuf.setSize(0); // GREEK CAPITAL LETTER ALPHA WITH PSILI + converters[0x1F09] = *getUTF8FromUniChar(0x0391, &myBuf); myBuf.setSize(0); // GREEK CAPITAL LETTER ALPHA WITH DASIA + converters[0x1F0A] = *getUTF8FromUniChar(0x0391, &myBuf); myBuf.setSize(0); // GREEK CAPITAL LETTER ALPHA WITH PSILI AND VARIA + converters[0x1F0B] = *getUTF8FromUniChar(0x0391, &myBuf); myBuf.setSize(0); // GREEK CAPITAL LETTER ALPHA WITH DASIA AND VARIA + converters[0x1F0C] = *getUTF8FromUniChar(0x0391, &myBuf); myBuf.setSize(0); // GREEK CAPITAL LETTER ALPHA WITH PSILI AND OXIA + converters[0x1F0D] = *getUTF8FromUniChar(0x0391, &myBuf); myBuf.setSize(0); // GREEK CAPITAL LETTER ALPHA WITH DASIA AND OXIA + converters[0x1F0E] = *getUTF8FromUniChar(0x0391, &myBuf); myBuf.setSize(0); // GREEK CAPITAL LETTER ALPHA WITH PSILI AND PERISPOMENI + converters[0x1F0F] = *getUTF8FromUniChar(0x0391, &myBuf); myBuf.setSize(0); // GREEK CAPITAL LETTER ALPHA WITH DASIA AND PERISPOMENI + converters[0x1F88] = *getUTF8FromUniChar(0x0391, &myBuf); myBuf.setSize(0); // GREEK CAPITAL LETTER ALPHA WITH PSILI AND PROSGEGRAMMENI + converters[0x1F89] = *getUTF8FromUniChar(0x0391, &myBuf); myBuf.setSize(0); // GREEK CAPITAL LETTER ALPHA WITH DASIA AND PROSGEGRAMMENI + converters[0x1F8A] = *getUTF8FromUniChar(0x0391, &myBuf); myBuf.setSize(0); // GREEK CAPITAL LETTER ALPHA WITH PSILI AND VARIA AND PROSGEGRAMMENI + converters[0x1F8B] = *getUTF8FromUniChar(0x0391, &myBuf); myBuf.setSize(0); // GREEK CAPITAL LETTER ALPHA WITH DASIA AND VARIA AND PROSGEGRAMMENI + converters[0x1F8C] = *getUTF8FromUniChar(0x0391, &myBuf); myBuf.setSize(0); // GREEK CAPITAL LETTER ALPHA WITH PSILI AND OXIA AND PROSGEGRAMMENI + converters[0x1F8D] = *getUTF8FromUniChar(0x0391, &myBuf); myBuf.setSize(0); // GREEK CAPITAL LETTER ALPHA WITH DASIA AND OXIA AND PROSGEGRAMMENI + converters[0x1F8E] = *getUTF8FromUniChar(0x0391, &myBuf); myBuf.setSize(0); // GREEK CAPITAL LETTER ALPHA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI + converters[0x1F8F] = *getUTF8FromUniChar(0x0391, &myBuf); myBuf.setSize(0); // GREEK CAPITAL LETTER ALPHA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI + converters[0x1FB8] = *getUTF8FromUniChar(0x0391, &myBuf); myBuf.setSize(0); // GREEK CAPITAL LETTER ALPHA WITH VRACHY + converters[0x1FB9] = *getUTF8FromUniChar(0x0391, &myBuf); myBuf.setSize(0); // GREEK CAPITAL LETTER ALPHA WITH MACRON + converters[0x1FBA] = *getUTF8FromUniChar(0x0391, &myBuf); myBuf.setSize(0); // GREEK CAPITAL LETTER ALPHA WITH VARIA + converters[0x1FBB] = *getUTF8FromUniChar(0x0391, &myBuf); myBuf.setSize(0); // GREEK CAPITAL LETTER ALPHA WITH OXIA + converters[0x1FBC] = *getUTF8FromUniChar(0x0391, &myBuf); myBuf.setSize(0); // GREEK CAPITAL LETTER ALPHA WITH PROSGEGRAMMENI + + converters[0x1F18] = *getUTF8FromUniChar(0x0395, &myBuf); myBuf.setSize(0); // GREEK CAPITAL LETTER EPSILON WITH PSILI + converters[0x1F19] = *getUTF8FromUniChar(0x0395, &myBuf); myBuf.setSize(0); // GREEK CAPITAL LETTER EPSILON WITH DASIA + converters[0x1F1A] = *getUTF8FromUniChar(0x0395, &myBuf); myBuf.setSize(0); // GREEK CAPITAL LETTER EPSILON WITH PSILI AND VARIA + converters[0x1F1B] = *getUTF8FromUniChar(0x0395, &myBuf); myBuf.setSize(0); // GREEK CAPITAL LETTER EPSILON WITH DASIA AND VARIA + converters[0x1F1C] = *getUTF8FromUniChar(0x0395, &myBuf); myBuf.setSize(0); // GREEK CAPITAL LETTER EPSILON WITH PSILI AND OXIA + converters[0x1F1D] = *getUTF8FromUniChar(0x0395, &myBuf); myBuf.setSize(0); // GREEK CAPITAL LETTER EPSILON WITH DASIA AND OXIA + converters[0x1FC8] = *getUTF8FromUniChar(0x0395, &myBuf); myBuf.setSize(0); // GREEK CAPITAL LETTER EPSILON WITH VARIA + converters[0x1FC9] = *getUTF8FromUniChar(0x0395, &myBuf); myBuf.setSize(0); // GREEK CAPITAL LETTER EPSILON WITH OXIA + + converters[0x1F28] = *getUTF8FromUniChar(0x0397, &myBuf); myBuf.setSize(0); // GREEK CAPITAL LETTER ETA WITH PSILI + converters[0x1F29] = *getUTF8FromUniChar(0x0397, &myBuf); myBuf.setSize(0); // GREEK CAPITAL LETTER ETA WITH DASIA + converters[0x1F2A] = *getUTF8FromUniChar(0x0397, &myBuf); myBuf.setSize(0); // GREEK CAPITAL LETTER ETA WITH PSILI AND VARIA + converters[0x1F2B] = *getUTF8FromUniChar(0x0397, &myBuf); myBuf.setSize(0); // GREEK CAPITAL LETTER ETA WITH DASIA AND VARIA + converters[0x1F2C] = *getUTF8FromUniChar(0x0397, &myBuf); myBuf.setSize(0); // GREEK CAPITAL LETTER ETA WITH PSILI AND OXIA + converters[0x1F2D] = *getUTF8FromUniChar(0x0397, &myBuf); myBuf.setSize(0); // GREEK CAPITAL LETTER ETA WITH DASIA AND OXIA + converters[0x1F2E] = *getUTF8FromUniChar(0x0397, &myBuf); myBuf.setSize(0); // GREEK CAPITAL LETTER ETA WITH PSILI AND PERISPOMENI + converters[0x1F2F] = *getUTF8FromUniChar(0x0397, &myBuf); myBuf.setSize(0); // GREEK CAPITAL LETTER ETA WITH DASIA AND PERISPOMENI + converters[0x1F98] = *getUTF8FromUniChar(0x0397, &myBuf); myBuf.setSize(0); // GREEK CAPITAL LETTER ETA WITH PSILI AND PROSGEGRAMMENI + converters[0x1F99] = *getUTF8FromUniChar(0x0397, &myBuf); myBuf.setSize(0); // GREEK CAPITAL LETTER ETA WITH DASIA AND PROSGEGRAMMENI + converters[0x1F9A] = *getUTF8FromUniChar(0x0397, &myBuf); myBuf.setSize(0); // GREEK CAPITAL LETTER ETA WITH PSILI AND VARIA AND PROSGEGRAMMENI + converters[0x1F9B] = *getUTF8FromUniChar(0x0397, &myBuf); myBuf.setSize(0); // GREEK CAPITAL LETTER ETA WITH DASIA AND VARIA AND PROSGEGRAMMENI + converters[0x1F9C] = *getUTF8FromUniChar(0x0397, &myBuf); myBuf.setSize(0); // GREEK CAPITAL LETTER ETA WITH PSILI AND OXIA AND PROSGEGRAMMENI + converters[0x1F9D] = *getUTF8FromUniChar(0x0397, &myBuf); myBuf.setSize(0); // GREEK CAPITAL LETTER ETA WITH DASIA AND OXIA AND PROSGEGRAMMENI + converters[0x1F9E] = *getUTF8FromUniChar(0x0397, &myBuf); myBuf.setSize(0); // GREEK CAPITAL LETTER ETA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI + converters[0x1F9F] = *getUTF8FromUniChar(0x0397, &myBuf); myBuf.setSize(0); // GREEK CAPITAL LETTER ETA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI + converters[0x1FCA] = *getUTF8FromUniChar(0x0397, &myBuf); myBuf.setSize(0); // GREEK CAPITAL LETTER ETA WITH VARIA + converters[0x1FCB] = *getUTF8FromUniChar(0x0397, &myBuf); myBuf.setSize(0); // GREEK CAPITAL LETTER ETA WITH OXIA + converters[0x1FCC] = *getUTF8FromUniChar(0x0397, &myBuf); myBuf.setSize(0); // GREEK CAPITAL LETTER ETA WITH PROSGEGRAMMENI + + converters[0x1F38] = *getUTF8FromUniChar(0x0399, &myBuf); myBuf.setSize(0); // GREEK CAPITAL LETTER IOTA WITH PSILI + converters[0x1F39] = *getUTF8FromUniChar(0x0399, &myBuf); myBuf.setSize(0); // GREEK CAPITAL LETTER IOTA WITH DASIA + converters[0x1F3A] = *getUTF8FromUniChar(0x0399, &myBuf); myBuf.setSize(0); // GREEK CAPITAL LETTER IOTA WITH PSILI AND VARIA + converters[0x1F3B] = *getUTF8FromUniChar(0x0399, &myBuf); myBuf.setSize(0); // GREEK CAPITAL LETTER IOTA WITH DASIA AND VARIA + converters[0x1F3C] = *getUTF8FromUniChar(0x0399, &myBuf); myBuf.setSize(0); // GREEK CAPITAL LETTER IOTA WITH PSILI AND OXIA + converters[0x1F3D] = *getUTF8FromUniChar(0x0399, &myBuf); myBuf.setSize(0); // GREEK CAPITAL LETTER IOTA WITH DASIA AND OXIA + converters[0x1F3E] = *getUTF8FromUniChar(0x0399, &myBuf); myBuf.setSize(0); // GREEK CAPITAL LETTER IOTA WITH PSILI AND PERISPOMENI + converters[0x1F3F] = *getUTF8FromUniChar(0x0399, &myBuf); myBuf.setSize(0); // GREEK CAPITAL LETTER IOTA WITH DASIA AND PERISPOMENI + converters[0x1FD8] = *getUTF8FromUniChar(0x0399, &myBuf); myBuf.setSize(0); // GREEK CAPITAL LETTER IOTA WITH VRACHY + converters[0x1FD9] = *getUTF8FromUniChar(0x0399, &myBuf); myBuf.setSize(0); // GREEK CAPITAL LETTER IOTA WITH MACRON + converters[0x1FDA] = *getUTF8FromUniChar(0x0399, &myBuf); myBuf.setSize(0); // GREEK CAPITAL LETTER IOTA WITH VARIA + converters[0x1FDB] = *getUTF8FromUniChar(0x0399, &myBuf); myBuf.setSize(0); // GREEK CAPITAL LETTER IOTA WITH OXIA + + converters[0x1F48] = *getUTF8FromUniChar(0x039F, &myBuf); myBuf.setSize(0); // GREEK CAPITAL LETTER OMICRON WITH PSILI + converters[0x1F49] = *getUTF8FromUniChar(0x039F, &myBuf); myBuf.setSize(0); // GREEK CAPITAL LETTER OMICRON WITH DASIA + converters[0x1F4A] = *getUTF8FromUniChar(0x039F, &myBuf); myBuf.setSize(0); // GREEK CAPITAL LETTER OMICRON WITH PSILI AND VARIA + converters[0x1F4B] = *getUTF8FromUniChar(0x039F, &myBuf); myBuf.setSize(0); // GREEK CAPITAL LETTER OMICRON WITH DASIA AND VARIA + converters[0x1F4C] = *getUTF8FromUniChar(0x039F, &myBuf); myBuf.setSize(0); // GREEK CAPITAL LETTER OMICRON WITH PSILI AND OXIA + converters[0x1F4D] = *getUTF8FromUniChar(0x039F, &myBuf); myBuf.setSize(0); // GREEK CAPITAL LETTER OMICRON WITH DASIA AND OXIA + converters[0x1FF8] = *getUTF8FromUniChar(0x039F, &myBuf); myBuf.setSize(0); // GREEK CAPITAL LETTER OMICRON WITH VARIA + converters[0x1FF9] = *getUTF8FromUniChar(0x039F, &myBuf); myBuf.setSize(0); // GREEK CAPITAL LETTER OMICRON WITH OXIA + + converters[0x1F59] = *getUTF8FromUniChar(0x03A5, &myBuf); myBuf.setSize(0); // GREEK CAPITAL LETTER UPSILON WITH DASIA + converters[0x1F5A] = *getUTF8FromUniChar(0x03A5, &myBuf); myBuf.setSize(0); // GREEK CAPITAL LETTER UPSILON WITH PSILI AND VARIA + converters[0x1F5B] = *getUTF8FromUniChar(0x03A5, &myBuf); myBuf.setSize(0); // GREEK CAPITAL LETTER UPSILON WITH DASIA AND VARIA + converters[0x1F5C] = *getUTF8FromUniChar(0x03A5, &myBuf); myBuf.setSize(0); // GREEK CAPITAL LETTER UPSILON WITH PSILI AND OXIA + converters[0x1F5D] = *getUTF8FromUniChar(0x03A5, &myBuf); myBuf.setSize(0); // GREEK CAPITAL LETTER UPSILON WITH DASIA AND OXIA + converters[0x1F5E] = *getUTF8FromUniChar(0x03A5, &myBuf); myBuf.setSize(0); // GREEK CAPITAL LETTER UPSILON WITH PSILI AND PERISPOMENI + converters[0x1F5F] = *getUTF8FromUniChar(0x03A5, &myBuf); myBuf.setSize(0); // GREEK CAPITAL LETTER UPSILON WITH DASIA AND PERISPOMENI + converters[0x1FE8] = *getUTF8FromUniChar(0x03A5, &myBuf); myBuf.setSize(0); // GREEK CAPITAL LETTER UPSILON WITH VRACHY + converters[0x1FE9] = *getUTF8FromUniChar(0x03A5, &myBuf); myBuf.setSize(0); // GREEK CAPITAL LETTER UPSILON WITH MACRON + converters[0x1FEA] = *getUTF8FromUniChar(0x03A5, &myBuf); myBuf.setSize(0); // GREEK CAPITAL LETTER UPSILON WITH VARIA + converters[0x1FEB] = *getUTF8FromUniChar(0x03A5, &myBuf); myBuf.setSize(0); // GREEK CAPITAL LETTER UPSILON WITH OXIA + + converters[0x1F68] = *getUTF8FromUniChar(0x03A9, &myBuf); myBuf.setSize(0); // GREEK CAPITAL LETTER OMEGA WITH PSILI + converters[0x1F69] = *getUTF8FromUniChar(0x03A9, &myBuf); myBuf.setSize(0); // GREEK CAPITAL LETTER OMEGA WITH DASIA + converters[0x1F6A] = *getUTF8FromUniChar(0x03A9, &myBuf); myBuf.setSize(0); // GREEK CAPITAL LETTER OMEGA WITH PSILI AND VARIA + converters[0x1F6B] = *getUTF8FromUniChar(0x03A9, &myBuf); myBuf.setSize(0); // GREEK CAPITAL LETTER OMEGA WITH DASIA AND VARIA + converters[0x1F6C] = *getUTF8FromUniChar(0x03A9, &myBuf); myBuf.setSize(0); // GREEK CAPITAL LETTER OMEGA WITH PSILI AND OXIA + converters[0x1F6D] = *getUTF8FromUniChar(0x03A9, &myBuf); myBuf.setSize(0); // GREEK CAPITAL LETTER OMEGA WITH DASIA AND OXIA + converters[0x1F6E] = *getUTF8FromUniChar(0x03A9, &myBuf); myBuf.setSize(0); // GREEK CAPITAL LETTER OMEGA WITH PSILI AND PERISPOMENI + converters[0x1F6F] = *getUTF8FromUniChar(0x03A9, &myBuf); myBuf.setSize(0); // GREEK CAPITAL LETTER OMEGA WITH DASIA AND PERISPOMENI + converters[0x1FA8] = *getUTF8FromUniChar(0x03A9, &myBuf); myBuf.setSize(0); // GREEK CAPITAL LETTER OMEGA WITH PSILI AND PROSGEGRAMMENI + converters[0x1FA9] = *getUTF8FromUniChar(0x03A9, &myBuf); myBuf.setSize(0); // GREEK CAPITAL LETTER OMEGA WITH DASIA AND PROSGEGRAMMENI + converters[0x1FAA] = *getUTF8FromUniChar(0x03A9, &myBuf); myBuf.setSize(0); // GREEK CAPITAL LETTER OMEGA WITH PSILI AND VARIA AND PROSGEGRAMMENI + converters[0x1FAB] = *getUTF8FromUniChar(0x03A9, &myBuf); myBuf.setSize(0); // GREEK CAPITAL LETTER OMEGA WITH DASIA AND VARIA AND PROSGEGRAMMENI + converters[0x1FAC] = *getUTF8FromUniChar(0x03A9, &myBuf); myBuf.setSize(0); // GREEK CAPITAL LETTER OMEGA WITH PSILI AND OXIA AND PROSGEGRAMMENI + converters[0x1FAD] = *getUTF8FromUniChar(0x03A9, &myBuf); myBuf.setSize(0); // GREEK CAPITAL LETTER OMEGA WITH DASIA AND OXIA AND PROSGEGRAMMENI + converters[0x1FAE] = *getUTF8FromUniChar(0x03A9, &myBuf); myBuf.setSize(0); // GREEK CAPITAL LETTER OMEGA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI + converters[0x1FAF] = *getUTF8FromUniChar(0x03A9, &myBuf); myBuf.setSize(0); // GREEK CAPITAL LETTER OMEGA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI + converters[0x1FFA] = *getUTF8FromUniChar(0x03A9, &myBuf); myBuf.setSize(0); // GREEK CAPITAL LETTER OMEGA WITH VARIA + converters[0x1FFB] = *getUTF8FromUniChar(0x03A9, &myBuf); myBuf.setSize(0); // GREEK CAPITAL LETTER OMEGA WITH OXIA + converters[0x1FFC] = *getUTF8FromUniChar(0x03A9, &myBuf); myBuf.setSize(0); // GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI + + converters[0x1FEC] = *getUTF8FromUniChar(0x03A1, &myBuf); myBuf.setSize(0); // GREEK CAPITAL LETTER RHO WITH DASIA + + // lower case + //alpha + converters[0x1F00] = *getUTF8FromUniChar(0x03B1, &myBuf); myBuf.setSize(0); // GREEK SMALL LETTER ALPHA WITH PSILI + converters[0x1F01] = *getUTF8FromUniChar(0x03B1, &myBuf); myBuf.setSize(0); // GREEK SMALL LETTER ALPHA WITH DASIA + converters[0x1F02] = *getUTF8FromUniChar(0x03B1, &myBuf); myBuf.setSize(0); // GREEK SMALL LETTER ALPHA WITH PSILI AND VARIA + converters[0x1F03] = *getUTF8FromUniChar(0x03B1, &myBuf); myBuf.setSize(0); // GREEK SMALL LETTER ALPHA WITH DASIA AND VARIA + converters[0x1F04] = *getUTF8FromUniChar(0x03B1, &myBuf); myBuf.setSize(0); // GREEK SMALL LETTER ALPHA WITH PSILI AND OXIA + converters[0x1F05] = *getUTF8FromUniChar(0x03B1, &myBuf); myBuf.setSize(0); // GREEK SMALL LETTER ALPHA WITH DASIA AND OXIA + converters[0x1F06] = *getUTF8FromUniChar(0x03B1, &myBuf); myBuf.setSize(0); // GREEK SMALL LETTER ALPHA WITH PSILI AND PERISPOMENI + converters[0x1F07] = *getUTF8FromUniChar(0x03B1, &myBuf); myBuf.setSize(0); // GREEK SMALL LETTER ALPHA WITH DASIA AND PERISPOMENI + converters[0x1F80] = *getUTF8FromUniChar(0x03B1, &myBuf); myBuf.setSize(0); // GREEK SMALL LETTER ALPHA WITH PSILI AND YPOGEGRAMMENI + converters[0x1F81] = *getUTF8FromUniChar(0x03B1, &myBuf); myBuf.setSize(0); // GREEK SMALL LETTER ALPHA WITH DASIA AND YPOGEGRAMMENI + converters[0x1F82] = *getUTF8FromUniChar(0x03B1, &myBuf); myBuf.setSize(0); // GREEK SMALL LETTER ALPHA WITH PSILI AND VARIA AND YPOGEGRAMMENI + converters[0x1F83] = *getUTF8FromUniChar(0x03B1, &myBuf); myBuf.setSize(0); // GREEK SMALL LETTER ALPHA WITH DASIA AND VARIA AND YPOGEGRAMMENI + converters[0x1F84] = *getUTF8FromUniChar(0x03B1, &myBuf); myBuf.setSize(0); // GREEK SMALL LETTER ALPHA WITH PSILI AND OXIA AND YPOGEGRAMMENI + converters[0x1F85] = *getUTF8FromUniChar(0x03B1, &myBuf); myBuf.setSize(0); // GREEK SMALL LETTER ALPHA WITH DASIA AND OXIA AND YPOGEGRAMMENI + converters[0x1F86] = *getUTF8FromUniChar(0x03B1, &myBuf); myBuf.setSize(0); // GREEK SMALL LETTER ALPHA WITH PSILI AND PERISPOMENI AND YPOGEGRAMMENI + converters[0x1F87] = *getUTF8FromUniChar(0x03B1, &myBuf); myBuf.setSize(0); // GREEK SMALL LETTER ALPHA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI + converters[0x1F70] = *getUTF8FromUniChar(0x03B1, &myBuf); myBuf.setSize(0); // GREEK SMALL LETTER ALPHA WITH VARIA + converters[0x1F71] = *getUTF8FromUniChar(0x03B1, &myBuf); myBuf.setSize(0); // GREEK SMALL LETTER ALPHA WITH OXIA + converters[0x1FB0] = *getUTF8FromUniChar(0x03B1, &myBuf); myBuf.setSize(0); // GREEK SMALL LETTER ALPHA WITH VRACHY + converters[0x1FB1] = *getUTF8FromUniChar(0x03B1, &myBuf); myBuf.setSize(0); // GREEK SMALL LETTER ALPHA WITH MACRON + converters[0x1FB2] = *getUTF8FromUniChar(0x03B1, &myBuf); myBuf.setSize(0); // GREEK SMALL LETTER ALPHA WITH VARIA AND YPOGEGRAMMENI + converters[0x1FB3] = *getUTF8FromUniChar(0x03B1, &myBuf); myBuf.setSize(0); // GREEK SMALL LETTER ALPHA WITH YPOGEGRAMMENI + converters[0x1FB4] = *getUTF8FromUniChar(0x03B1, &myBuf); myBuf.setSize(0); // GREEK SMALL LETTER ALPHA WITH OXIA AND YPOGEGRAMMENI + converters[0x1FB5] = *getUTF8FromUniChar(0x03B1, &myBuf); myBuf.setSize(0); // unused? + converters[0x1FB6] = *getUTF8FromUniChar(0x03B1, &myBuf); myBuf.setSize(0); // GREEK SMALL LETTER ALPHA WITH PERISPOMENI + converters[0x1FB7] = *getUTF8FromUniChar(0x03B1, &myBuf); myBuf.setSize(0); // GREEK SMALL LETTER ALPHA WITH PERISPOMENI AND YPOGEGRAMMENI + + converters[0x1F10] = *getUTF8FromUniChar(0x03B5, &myBuf); myBuf.setSize(0); // GREEK SMALL LETTER EPSILON WITH PSILI + converters[0x1F11] = *getUTF8FromUniChar(0x03B5, &myBuf); myBuf.setSize(0); // GREEK SMALL LETTER EPSILON WITH DASIA + converters[0x1F12] = *getUTF8FromUniChar(0x03B5, &myBuf); myBuf.setSize(0); // GREEK SMALL LETTER EPSILON WITH PSILI AND VARIA + converters[0x1F13] = *getUTF8FromUniChar(0x03B5, &myBuf); myBuf.setSize(0); // GREEK SMALL LETTER EPSILON WITH DASIA AND VARIA + converters[0x1F14] = *getUTF8FromUniChar(0x03B5, &myBuf); myBuf.setSize(0); // GREEK SMALL LETTER EPSILON WITH PSILI AND OXIA + converters[0x1F15] = *getUTF8FromUniChar(0x03B5, &myBuf); myBuf.setSize(0); // GREEK SMALL LETTER EPSILON WITH DASIA AND OXIA + converters[0x1F72] = *getUTF8FromUniChar(0x03B5, &myBuf); myBuf.setSize(0); // GREEK SMALL LETTER EPSILON WITH VARIA + converters[0x1F73] = *getUTF8FromUniChar(0x03B5, &myBuf); myBuf.setSize(0); // GREEK SMALL LETTER EPSILON WITH OXIA + + converters[0x1F90] = *getUTF8FromUniChar(0x03B7, &myBuf); myBuf.setSize(0); // GREEK SMALL LETTER ETA WITH PSILI AND YPOGEGRAMMENI + converters[0x1F91] = *getUTF8FromUniChar(0x03B7, &myBuf); myBuf.setSize(0); // GREEK SMALL LETTER ETA WITH DASIA AND YPOGEGRAMMENI + converters[0x1F92] = *getUTF8FromUniChar(0x03B7, &myBuf); myBuf.setSize(0); // GREEK SMALL LETTER ETA WITH PSILI AND VARIA AND YPOGEGRAMMENI + converters[0x1F93] = *getUTF8FromUniChar(0x03B7, &myBuf); myBuf.setSize(0); // GREEK SMALL LETTER ETA WITH DASIA AND VARIA AND YPOGEGRAMMENI + converters[0x1F94] = *getUTF8FromUniChar(0x03B7, &myBuf); myBuf.setSize(0); // GREEK SMALL LETTER ETA WITH PSILI AND OXIA AND YPOGEGRAMMENI + converters[0x1F95] = *getUTF8FromUniChar(0x03B7, &myBuf); myBuf.setSize(0); // GREEK SMALL LETTER ETA WITH DASIA AND OXIA AND YPOGEGRAMMENI + converters[0x1F96] = *getUTF8FromUniChar(0x03B7, &myBuf); myBuf.setSize(0); // GREEK SMALL LETTER ETA WITH PSILI AND PERISPOMENI AND YPOGEGRAMMENI + converters[0x1F97] = *getUTF8FromUniChar(0x03B7, &myBuf); myBuf.setSize(0); // GREEK SMALL LETTER ETA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI + converters[0x1F20] = *getUTF8FromUniChar(0x03B7, &myBuf); myBuf.setSize(0); // GREEK SMALL LETTER ETA WITH PSILI + converters[0x1F21] = *getUTF8FromUniChar(0x03B7, &myBuf); myBuf.setSize(0); // GREEK SMALL LETTER ETA WITH DASIA + converters[0x1F22] = *getUTF8FromUniChar(0x03B7, &myBuf); myBuf.setSize(0); // GREEK SMALL LETTER ETA WITH PSILI AND VARIA + converters[0x1F23] = *getUTF8FromUniChar(0x03B7, &myBuf); myBuf.setSize(0); // GREEK SMALL LETTER ETA WITH DASIA AND VARIA + converters[0x1F24] = *getUTF8FromUniChar(0x03B7, &myBuf); myBuf.setSize(0); // GREEK SMALL LETTER ETA WITH PSILI AND OXIA + converters[0x1F25] = *getUTF8FromUniChar(0x03B7, &myBuf); myBuf.setSize(0); // GREEK SMALL LETTER ETA WITH DASIA AND OXIA + converters[0x1F26] = *getUTF8FromUniChar(0x03B7, &myBuf); myBuf.setSize(0); // GREEK SMALL LETTER ETA WITH PSILI AND PERISPOMENI + converters[0x1F27] = *getUTF8FromUniChar(0x03B7, &myBuf); myBuf.setSize(0); // GREEK SMALL LETTER ETA WITH DASIA AND PERISPOMENI + converters[0x1FC2] = *getUTF8FromUniChar(0x03B7, &myBuf); myBuf.setSize(0); // GREEK SMALL LETTER ETA WITH VARIA AND YPOGEGRAMMENI + converters[0x1FC3] = *getUTF8FromUniChar(0x03B7, &myBuf); myBuf.setSize(0); // GREEK SMALL LETTER ETA WITH YPOGEGRAMMENI + converters[0x1FC4] = *getUTF8FromUniChar(0x03B7, &myBuf); myBuf.setSize(0); // GREEK SMALL LETTER ETA WITH OXIA AND YPOGEGRAMMENI + converters[0x1FC5] = *getUTF8FromUniChar(0x03B7, &myBuf); myBuf.setSize(0); // unused? + converters[0x1FC6] = *getUTF8FromUniChar(0x03B7, &myBuf); myBuf.setSize(0); // GREEK SMALL LETTER ETA WITH PERISPOMENI + converters[0x1FC7] = *getUTF8FromUniChar(0x03B7, &myBuf); myBuf.setSize(0); // GREEK SMALL LETTER ETA WITH PERISPOMENI AND YPOGEGRAMMENI + converters[0x1F74] = *getUTF8FromUniChar(0x03B7, &myBuf); myBuf.setSize(0); // GREEK SMALL LETTER ETA WITH VARIA + converters[0x1F75] = *getUTF8FromUniChar(0x03B7, &myBuf); myBuf.setSize(0); // GREEK SMALL LETTER ETA WITH OXIA + + converters[0x1F30] = *getUTF8FromUniChar(0x03B9, &myBuf); myBuf.setSize(0); // GREEK SMALL LETTER IOTA WITH PSILI + converters[0x1F31] = *getUTF8FromUniChar(0x03B9, &myBuf); myBuf.setSize(0); // GREEK SMALL LETTER IOTA WITH DASIA + converters[0x1F32] = *getUTF8FromUniChar(0x03B9, &myBuf); myBuf.setSize(0); // GREEK SMALL LETTER IOTA WITH PSILI AND VARIA + converters[0x1F33] = *getUTF8FromUniChar(0x03B9, &myBuf); myBuf.setSize(0); // GREEK SMALL LETTER IOTA WITH DASIA AND VARIA + converters[0x1F34] = *getUTF8FromUniChar(0x03B9, &myBuf); myBuf.setSize(0); // GREEK SMALL LETTER IOTA WITH PSILI AND OXIA + converters[0x1F35] = *getUTF8FromUniChar(0x03B9, &myBuf); myBuf.setSize(0); // GREEK SMALL LETTER IOTA WITH DASIA AND OXIA + converters[0x1F36] = *getUTF8FromUniChar(0x03B9, &myBuf); myBuf.setSize(0); // GREEK SMALL LETTER IOTA WITH PSILI AND PERISPOMENI + converters[0x1F37] = *getUTF8FromUniChar(0x03B9, &myBuf); myBuf.setSize(0); // GREEK SMALL LETTER IOTA WITH DASIA AND PERISPOMENI + converters[0x1F76] = *getUTF8FromUniChar(0x03B9, &myBuf); myBuf.setSize(0); // GREEK SMALL LETTER IOTA WITH VARIA + converters[0x1F77] = *getUTF8FromUniChar(0x03B9, &myBuf); myBuf.setSize(0); // GREEK SMALL LETTER IOTA WITH OXIA + converters[0x1FD0] = *getUTF8FromUniChar(0x03B9, &myBuf); myBuf.setSize(0); // GREEK SMALL LETTER IOTA WITH VRACHY + converters[0x1FD1] = *getUTF8FromUniChar(0x03B9, &myBuf); myBuf.setSize(0); // GREEK SMALL LETTER IOTA WITH MACRON + converters[0x1FD2] = *getUTF8FromUniChar(0x03B9, &myBuf); myBuf.setSize(0); // GREEK SMALL LETTER IOTA WITH DIALYTIKA AND VARIA + converters[0x1FD3] = *getUTF8FromUniChar(0x03B9, &myBuf); myBuf.setSize(0); // GREEK SMALL LETTER IOTA WITH DIALYTIKA AND OXIA + converters[0x1FD4] = *getUTF8FromUniChar(0x03B9, &myBuf); myBuf.setSize(0); // unused? + converters[0x1FD5] = *getUTF8FromUniChar(0x03B9, &myBuf); myBuf.setSize(0); // unused? + converters[0x1FD6] = *getUTF8FromUniChar(0x03B9, &myBuf); myBuf.setSize(0); // GREEK SMALL LETTER IOTA WITH PERISPOMENI + converters[0x1FD7] = *getUTF8FromUniChar(0x03B9, &myBuf); myBuf.setSize(0); // GREEK SMALL LETTER IOTA WITH DIALYTIKA AND PERISPOMENI + + converters[0x1F40] = *getUTF8FromUniChar(0x03BF, &myBuf); myBuf.setSize(0); // GREEK SMALL LETTER OMICRON WITH PSILI + converters[0x1F41] = *getUTF8FromUniChar(0x03BF, &myBuf); myBuf.setSize(0); // GREEK SMALL LETTER OMICRON WITH DASIA + converters[0x1F42] = *getUTF8FromUniChar(0x03BF, &myBuf); myBuf.setSize(0); // GREEK SMALL LETTER OMICRON WITH PSILI AND VARIA + converters[0x1F43] = *getUTF8FromUniChar(0x03BF, &myBuf); myBuf.setSize(0); // GREEK SMALL LETTER OMICRON WITH DASIA AND VARIA + converters[0x1F44] = *getUTF8FromUniChar(0x03BF, &myBuf); myBuf.setSize(0); // GREEK SMALL LETTER OMICRON WITH PSILI AND OXIA + converters[0x1F45] = *getUTF8FromUniChar(0x03BF, &myBuf); myBuf.setSize(0); // GREEK SMALL LETTER OMICRON WITH DASIA AND OXIA + converters[0x1F78] = *getUTF8FromUniChar(0x03BF, &myBuf); myBuf.setSize(0); // GREEK SMALL LETTER OMICRON WITH VARIA + converters[0x1F79] = *getUTF8FromUniChar(0x03BF, &myBuf); myBuf.setSize(0); // GREEK SMALL LETTER OMICRON WITH OXIA + + converters[0x1F50] = *getUTF8FromUniChar(0x03C5, &myBuf); myBuf.setSize(0); // GREEK SMALL LETTER UPSILON WITH PSILI + converters[0x1F51] = *getUTF8FromUniChar(0x03C5, &myBuf); myBuf.setSize(0); // GREEK SMALL LETTER UPSILON WITH DASIA + converters[0x1F52] = *getUTF8FromUniChar(0x03C5, &myBuf); myBuf.setSize(0); // GREEK SMALL LETTER UPSILON WITH PSILI AND VARIA + converters[0x1F53] = *getUTF8FromUniChar(0x03C5, &myBuf); myBuf.setSize(0); // GREEK SMALL LETTER UPSILON WITH DASIA AND VARIA + converters[0x1F54] = *getUTF8FromUniChar(0x03C5, &myBuf); myBuf.setSize(0); // GREEK SMALL LETTER UPSILON WITH PSILI AND OXIA + converters[0x1F55] = *getUTF8FromUniChar(0x03C5, &myBuf); myBuf.setSize(0); // GREEK SMALL LETTER UPSILON WITH DASIA AND OXIA + converters[0x1F56] = *getUTF8FromUniChar(0x03C5, &myBuf); myBuf.setSize(0); // GREEK SMALL LETTER UPSILON WITH PSILI AND PERISPOMENI + converters[0x1F57] = *getUTF8FromUniChar(0x03C5, &myBuf); myBuf.setSize(0); // GREEK SMALL LETTER UPSILON WITH DASIA AND PERISPOMENI + converters[0x1F7A] = *getUTF8FromUniChar(0x03C5, &myBuf); myBuf.setSize(0); // GREEK SMALL LETTER UPSILON WITH VARIA + converters[0x1F7B] = *getUTF8FromUniChar(0x03C5, &myBuf); myBuf.setSize(0); // GREEK SMALL LETTER UPSILON WITH OXIA + converters[0x1FE0] = *getUTF8FromUniChar(0x03C5, &myBuf); myBuf.setSize(0); // GREEK SMALL LETTER UPSILON WITH VRACHY + converters[0x1FE1] = *getUTF8FromUniChar(0x03C5, &myBuf); myBuf.setSize(0); // GREEK SMALL LETTER UPSILON WITH MACRON + converters[0x1FE2] = *getUTF8FromUniChar(0x03C5, &myBuf); myBuf.setSize(0); // GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND VARIA + converters[0x1FE3] = *getUTF8FromUniChar(0x03C5, &myBuf); myBuf.setSize(0); // GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND OXIA + converters[0x1FE6] = *getUTF8FromUniChar(0x03C5, &myBuf); myBuf.setSize(0); // GREEK SMALL LETTER UPSILON WITH PERISPOMENI + converters[0x1FE7] = *getUTF8FromUniChar(0x03C5, &myBuf); myBuf.setSize(0); // GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND PERISPOMENI + + converters[0x1F60] = *getUTF8FromUniChar(0x03C9, &myBuf); myBuf.setSize(0); // GREEK SMALL LETTER OMEGA WITH PSILI + converters[0x1F61] = *getUTF8FromUniChar(0x03C9, &myBuf); myBuf.setSize(0); // GREEK SMALL LETTER OMEGA WITH DASIA + converters[0x1F62] = *getUTF8FromUniChar(0x03C9, &myBuf); myBuf.setSize(0); // GREEK SMALL LETTER OMEGA WITH PSILI AND VARIA + converters[0x1F63] = *getUTF8FromUniChar(0x03C9, &myBuf); myBuf.setSize(0); // GREEK SMALL LETTER OMEGA WITH DASIA AND VARIA + converters[0x1F64] = *getUTF8FromUniChar(0x03C9, &myBuf); myBuf.setSize(0); // GREEK SMALL LETTER OMEGA WITH PSILI AND OXIA + converters[0x1F65] = *getUTF8FromUniChar(0x03C9, &myBuf); myBuf.setSize(0); // GREEK SMALL LETTER OMEGA WITH DASIA AND OXIA + converters[0x1F66] = *getUTF8FromUniChar(0x03C9, &myBuf); myBuf.setSize(0); // GREEK SMALL LETTER OMEGA WITH PSILI AND PERISPOMENI + converters[0x1F67] = *getUTF8FromUniChar(0x03C9, &myBuf); myBuf.setSize(0); // GREEK SMALL LETTER OMEGA WITH DASIA AND PERISPOMENI + converters[0x1F7C] = *getUTF8FromUniChar(0x03C9, &myBuf); myBuf.setSize(0); // GREEK SMALL LETTER OMEGA WITH VARIA + converters[0x1F7D] = *getUTF8FromUniChar(0x03C9, &myBuf); myBuf.setSize(0); // GREEK SMALL LETTER OMEGA WITH OXIA + converters[0x1FA0] = *getUTF8FromUniChar(0x03C9, &myBuf); myBuf.setSize(0); // GREEK SMALL LETTER OMEGA WITH PSILI AND YPOGEGRAMMENI + converters[0x1FA1] = *getUTF8FromUniChar(0x03C9, &myBuf); myBuf.setSize(0); // GREEK SMALL LETTER OMEGA WITH DASIA AND YPOGEGRAMMENI + converters[0x1FA2] = *getUTF8FromUniChar(0x03C9, &myBuf); myBuf.setSize(0); // GREEK SMALL LETTER OMEGA WITH PSILI AND VARIA AND YPOGEGRAMMENI + converters[0x1FA3] = *getUTF8FromUniChar(0x03C9, &myBuf); myBuf.setSize(0); // GREEK SMALL LETTER OMEGA WITH DASIA AND VARIA AND YPOGEGRAMMENI + converters[0x1FA4] = *getUTF8FromUniChar(0x03C9, &myBuf); myBuf.setSize(0); // GREEK SMALL LETTER OMEGA WITH PSILI AND OXIA AND YPOGEGRAMMENI + converters[0x1FA5] = *getUTF8FromUniChar(0x03C9, &myBuf); myBuf.setSize(0); // GREEK SMALL LETTER OMEGA WITH DASIA AND OXIA AND YPOGEGRAMMENI + converters[0x1FA6] = *getUTF8FromUniChar(0x03C9, &myBuf); myBuf.setSize(0); // GREEK SMALL LETTER OMEGA WITH PSILI AND PERISPOMENI AND YPOGEGRAMMENI + converters[0x1FA7] = *getUTF8FromUniChar(0x03C9, &myBuf); myBuf.setSize(0); // GREEK SMALL LETTER OMEGA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI + converters[0x1FF2] = *getUTF8FromUniChar(0x03C9, &myBuf); myBuf.setSize(0); // GREEK SMALL LETTER OMEGA WITH VARIA AND YPOGEGRAMMENI + converters[0x1FF3] = *getUTF8FromUniChar(0x03C9, &myBuf); myBuf.setSize(0); // GREEK SMALL LETTER OMEGA WITH YPOGEGRAMMENI + converters[0x1FF4] = *getUTF8FromUniChar(0x03C9, &myBuf); myBuf.setSize(0); // GREEK SMALL LETTER OMEGA WITH OXIA AND YPOGEGRAMMENI + converters[0x1FF5] = *getUTF8FromUniChar(0x03C9, &myBuf); myBuf.setSize(0); // unused? + converters[0x1FF6] = *getUTF8FromUniChar(0x03C9, &myBuf); myBuf.setSize(0); // GREEK SMALL LETTER OMEGA WITH PERISPOMENI + converters[0x1FF7] = *getUTF8FromUniChar(0x03C9, &myBuf); myBuf.setSize(0); // GREEK SMALL LETTER OMEGA WITH PERISPOMENI AND YPOGEGRAMMENI + + converters[0x1FE4] = *getUTF8FromUniChar(0x03C1, &myBuf); myBuf.setSize(0); // GREEK SMALL LETTER RHO WITH PSILI + converters[0x1FE5] = *getUTF8FromUniChar(0x03C1, &myBuf); myBuf.setSize(0); // GREEK SMALL LETTER RHO WITH DASIA + } + } __converters_init; } @@ -57,229 +351,24 @@ UTF8GreekAccents::~UTF8GreekAccents() {}; char UTF8GreekAccents::processText(SWBuf &text, const SWKey *key, const SWModule *module) { if (!option) { //we don't want greek accents - //unsigned char *to, *from; - //to = (unsigned char*)text; - //for (from = (unsigned char*)text; *from; from++) { -#ifdef _ICU_ - decompose.processText(text, (SWKey *)2); // note the hack of 2 to mimic a real key. TODO: remove all hacks -#endif - SWBuf orig = text; const unsigned char* from = (unsigned char*)orig.c_str(); - for (text = ""; *from; from++) { - //first just remove combining characters - if (*from == 0xE2 && *(from + 1) == 0x80 && *(from + 2) == 0x99) { - from += 2; - } - else if (*from == 0xCC && *(from + 1)) { - if (*(from + 1) == 0x80 || *(from + 1) == 0x81 || *(from + 1) == 0x82 || *(from + 1) == 0x88 || *(from + 1) == 0x93 || *(from + 1) == 0x94) { - from++; - } - } - else if (*from == 0xCD && (*(from + 1) == 0xBA || *(from + 1) == 0x82)) { - from++; - } - //now converted pre-composed characters to their alphabetic bases, discarding the accents - - //Greek - //capital alpha - else if ((*from == 0xCE && *(from + 1) == 0x86)) { - text += 0xCE; - text += 0x91; - from++; - } - //capital epsilon - else if ((*from == 0xCE && *(from + 1) == 0x88)) { - text += 0xCE; - text += 0x95; - from++; - } - //capital eta - else if ((*from == 0xCE && *(from + 1) == 0x89)) { - text += 0xCE; - text += 0x97; - from++; - } - //capital iota - else if ((*from == 0xCE && (*(from + 1) == 0x8A || *(from + 1) == 0xAA))) { - text += 0xCE; - text += 0x99; - from++; - } - //capital omicron - else if ((*from == 0xCE && *(from + 1) == 0x8C)) { - text += 0xCE; - text += 0x9F; - from++; - } - //capital upsilon - else if ((*from == 0xCE && (*(from + 1) == 0x8E || *(from + 1) == 0xAB))) { - text += 0xCE; - text += 0xA5; - from++; - } - //capital omega - else if ((*from == 0xCE && *(from + 1) == 0x8F)) { - text += 0xCE; - text += 0xA9; - from++; - } - - //alpha - else if ((*from == 0xCE && *(from + 1) == 0xAC)) { - text += 0xCE; - text += 0xB1; - from++; - } - //epsilon - else if ((*from == 0xCE && *(from + 1) == 0xAD)) { - text += 0xCE; - text += 0xB5; - from++; - } - //eta - else if ((*from == 0xCE && *(from + 1) == 0xAE)) { - text += 0xCE; - text += 0xB7; - from++; - } - //iota - else if ((*from == 0xCE && *(from + 1) == 0xAF) || (*from == 0xCF && *(from + 1) == 0x8A)) { - text += 0xCE; - text += 0xB9; - from++; - } - //omicron - else if ((*from == 0xCF && *(from + 1) == 0x8C)) { - text += 0xCE; - text += 0xBF; - from++; - } - //upsilon - else if ((*from == 0xCE && *(from + 1) == 0x88) || (*from == 0xCF && (*(from + 1) == 0x8B || *(from + 1) == 0x8D))) { - text += 0xCF; - text += 0x85; - from++; - } - //omega - else if ((*from == 0xCF && *(from + 1) == 0x8E)) { - text += 0xCF; - text += 0x89; - from++; - } - - //Extended Greek - //capital alpha - else if (*from == 0xE1 && (((*(from + 1) == 0xBC || *(from + 1) == 0xBE) && *(from + 2) >= 0x88 && *(from + 2) <= 0x8F) || (*(from + 1) == 0xBE && *(from + 2) >= 0xB8 && *(from + 2) <= 0xBC))) { - text += 0xCE; - text += 0x91; - from+=2; - } - //capital epsilon - else if (*from == 0xE1 && ((*(from + 1) == 0xBC && *(from + 2) >= 0x98 && *(from + 2) <= 0x9D) || (*(from + 1) == 0xBF && (*(from + 2) == 0x88 || *(from + 2) == 0x89)))) { - text += 0xCE; - text += 0x95; - from+=2; - } - //capital eta - else if (*from == 0xE1 && ((*(from + 1) == 0xBC && *(from + 2) >= 0xA8 && *(from + 2) <= 0xAF) || (*(from + 1) == 0xBE && *(from + 2) >= 0x98 && *(from + 2) <= 0x9F) || (*(from + 1) == 0xBF && *(from + 2) >= 0x8A && *(from + 2) <= 0x8C))) { - text += 0xCE; - text += 0x97; - from+=2; - } - //capital iota - else if (*from == 0xE1 && ((*(from + 1) == 0xBC && *(from + 2) >= 0xB8 && *(from + 2) <= 0xBF) || (*(from + 1) == 0xBF && *(from + 2) >= 0x98 && *(from + 2) <= 0x9B))) { - text += 0xCE; - text += 0x99; - from+=2; - } - //capital omicron - else if (*from == 0xE1 && (((*(from + 1) == 0xBD && *(from + 2) >= 0x88 && *(from + 2) <= 0x8D)) || ((*(from + 1) == 0xBF && (*(from + 2) == 0xB8 || *(from + 2) == 0xB9))))) { - text += 0xCE; - text += 0x9F; - from+=2; - } - //capital upsilon - else if (*from == 0xE1 && ((*(from + 1) == 0xBD && *(from + 2) >= 0x99 && *(from + 2) <= 0x9F) || (*(from + 1) == 0xBF && *(from + 2) >= 0xA8 && *(from + 2) <= 0xAB))) { - text += 0xCE; - text += 0xA5; - from+=2; - } - //capital omega - else if (*from == 0xE1 && (((*(from + 1) == 0xBD || *(from + 1) == 0xBE) && *(from + 2) >= 0xA8 && *(from + 2) <= 0xAF) || (*(from + 1) == 0xBF && *(from + 2) >= 0xBA && *(from + 2) <= 0xBC))) { - text += 0xCE; - text += 0xA9; - from+=2; - } - //capital rho - else if (*from == 0xE1 && *(from + 1) == 0xBF && *(from + 2) == 0xAC) { - text += 0xCE; - text += 0xA1; - from+=2; - } + text = ""; + map<__u32, SWBuf>::const_iterator it = converters.end(); + while (*from) { + __u32 ch = getUniCharFromUTF8(&from, true); + // if ch is bad, then convert to replacement char + if (!ch) ch = 0xFFFD; - //alpha - else if (*from == 0xE1 && ( - ((*(from + 1) == 0xBC || *(from + 1) == 0xBE) && *(from + 2) >= 0x80 && *(from + 2) <= 0x87) - || (*(from + 1) == 0xBD && (*(from + 2) == 0xB0 || *(from + 2) == 0xB1)) - || (*(from + 1) == 0xBE && *(from + 2) >= 0xB0 && *(from + 2) <= 0xB7))) { - text += 0xCE; - text += 0xB1; - from+=2; - } - //epsilon - else if (*from == 0xE1 && ((*(from + 1) == 0xBC && *(from + 2) >= 0x90 && *(from + 2) <= 0x95) || (*(from + 1) == 0xBD && (*(from + 2) == 0xB2 || *(from + 2) == 0xB3)))) { - text += 0xCE; - text += 0xB5; - from+=2; - } - //eta - else if (*from == 0xE1 && ((*(from + 1) == 0xBE && *(from + 2) >= 0x90 && *(from + 2) <= 0x97) || (*(from + 1) == 0xBC && *(from + 2) >= 0xA0 && *(from + 2) <= 0xA7) || (*(from + 1) == 0xBF && *(from + 2) >= 0x82 && *(from + 2) <= 0x87) || (*(from + 1) == 0xBD && (*(from + 2) == 0xB4 || *(from + 2) == 0xB5)))) { - text += 0xCE; - text += 0xB7; - from+=2; - } - //iota - else if (*from == 0xE1 && ((*(from + 1) == 0xBC && *(from + 2) >= 0xB0 && *(from + 2) <= 0xB7) || (*(from + 1) == 0xBD && (*(from + 2) == 0xB6 || *(from + 2) == 0xB7)) || (*(from + 1) == 0xBF && *(from + 2) >= 0x90 && *(from + 2) <= 0x97))) { - text += 0xCE; - text += 0xB9; - from+=2; - } - //omicron - else if (*from == 0xE1 && (*(from + 1) == 0xBD && ((*(from + 2) >= 0x80 && *(from + 2) <= 0x85) || (*(from + 2) == 0xB8 || *(from + 2) == 0xB9)))) { - text += 0xCE; - text += 0xBF; - from+=2; - } - //upsilon - else if (*from == 0xE1 && ((*(from + 1) == 0xBD && ((*(from + 2) >= 0x90 && *(from + 2) <= 0x97) || *(from + 2) == 0xBA || *(from + 2) == 0xBB)) || (*(from + 1) == 0xBF && ((*(from + 2) >= 0xA0 && *(from + 2) <= 0xA3) || *(from + 2) == 0xA6 || *(from + 2) == 0xA7)))) { - text += 0xCF; - text += 0x85; - from+=2; - } - //omega - else if (*from == 0xE1 && ((*(from + 1) == 0xBD && ((*(from + 2) >= 0xA0 && *(from + 2) <= 0xA7) || (*(from + 2) == 0xBC || *(from + 2) == 0xBD))) || (*(from + 1) == 0xBE && (*(from + 2) >= 0xA0 && *(from + 2) <= 0xA7)) || (*(from + 1) == 0xBF && *(from + 2) >= 0xB2 && *(from + 2) <= 0xB7))) { - text += 0xCF; - text += 0x89; - from+=2; - } - //rho - else if (*from == 0xE1 && *(from + 1) == 0xBF && (*(from + 2) == 0xA4 && *(from + 2) == 0xA5)) { - text += 0xCF; - text += 0x81; - from+=2; - } - else { //no characters we filter - text += *from; + it = converters.find(ch); + if (it == converters.end()) { + getUTF8FromUniChar(ch, &text); } + else text.append((const char *)it->second, it->second.size()); // save a strlen, since we know our size } } return 0; } - - - - SWORD_NAMESPACE_END diff --git a/src/modules/filters/utf8html.cpp b/src/modules/filters/utf8html.cpp index d504418..0a9eeb8 100644 --- a/src/modules/filters/utf8html.cpp +++ b/src/modules/filters/utf8html.cpp @@ -3,7 +3,7 @@ * utf8html.cpp - SWFilter descendant to convert a UTF-8 stream to * HTML escapes * - * $Id: utf8html.cpp 3157 2014-04-17 03:56:12Z greg.hellings $ + * $Id: utf8html.cpp 3081 2014-03-05 19:52:08Z chrislit $ * * Copyright 2001-2013 CrossWire Bible Society (http://www.crosswire.org) * CrossWire Bible Society diff --git a/src/modules/filters/utf8latin1.cpp b/src/modules/filters/utf8latin1.cpp index db5237c..6408e10 100644 --- a/src/modules/filters/utf8latin1.cpp +++ b/src/modules/filters/utf8latin1.cpp @@ -2,7 +2,7 @@ * * utf8latin1.cpp - SWFilter descendant to convert UTF-8 to Latin-1 * - * $Id: utf8latin1.cpp 3157 2014-04-17 03:56:12Z greg.hellings $ + * $Id: utf8latin1.cpp 3081 2014-03-05 19:52:08Z chrislit $ * * Copyright 2001-2013 CrossWire Bible Society (http://www.crosswire.org) * CrossWire Bible Society diff --git a/src/modules/filters/utf8nfc.cpp b/src/modules/filters/utf8nfc.cpp index 06bd76f..16a5c54 100644 --- a/src/modules/filters/utf8nfc.cpp +++ b/src/modules/filters/utf8nfc.cpp @@ -3,7 +3,7 @@ * utf8nfc.cpp - SWFilter descendant to perform NFC (canonical * composition normalization) on UTF-8 text * - * $Id: utf8nfc.cpp 3157 2014-04-17 03:56:12Z greg.hellings $ + * $Id: utf8nfc.cpp 3081 2014-03-05 19:52:08Z chrislit $ * * Copyright 2001-2013 CrossWire Bible Society (http://www.crosswire.org) * CrossWire Bible Society diff --git a/src/modules/filters/utf8nfkd.cpp b/src/modules/filters/utf8nfkd.cpp index 3a782ab..047422a 100644 --- a/src/modules/filters/utf8nfkd.cpp +++ b/src/modules/filters/utf8nfkd.cpp @@ -3,7 +3,7 @@ * utf8nfkd.cpp - SWFilter descendant to perform NFKD (compatability * decomposition normalization) on UTF-8 text * - * $Id: utf8nfkd.cpp 3157 2014-04-17 03:56:12Z greg.hellings $ + * $Id: utf8nfkd.cpp 3081 2014-03-05 19:52:08Z chrislit $ * * Copyright 2001-2013 CrossWire Bible Society (http://www.crosswire.org) * CrossWire Bible Society diff --git a/src/modules/filters/utf8scsu.cpp b/src/modules/filters/utf8scsu.cpp new file mode 100644 index 0000000..bf5bd41 --- /dev/null +++ b/src/modules/filters/utf8scsu.cpp @@ -0,0 +1,61 @@ +/****************************************************************************** + * + * utf8scsu.cpp - SWFilter descendant to convert UTF-8 to SCSU + * + * $Id: utf8scsu.cpp 3100 2014-03-12 04:34:32Z chrislit $ + * + * Copyright 2001-2014 CrossWire Bible Society (http://www.crosswire.org) + * CrossWire Bible Society + * P. O. Box 2528 + * Tempe, AZ 85280-2528 + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + */ + +#ifdef _ICU_ + +#include <utf8scsu.h> +#include <swbuf.h> + +SWORD_NAMESPACE_START + + +UTF8SCSU::UTF8SCSU() { + // initialize SCSU converter + scsuConv = ucnv_open("SCSU", &err); + + // initialize UTF-8 converter + utf8Conv = ucnv_open("UTF-8", &err); +} + +UTF8SCSU::~UTF8SCSU() { + ucnv_close(scsuConv); + ucnv_close(utf8Conv); +} + +char UTF8SCSU::processText(SWBuf &text, const SWKey *key, const SWModule *module) { + if ((unsigned long)key < 2) // hack, we're en(1)/de(0)ciphering + return -1; + + err = U_ZERO_ERROR; + UnicodeString utf16Text(text.getRawData(), text.length(), utf8Conv, err); + err = U_ZERO_ERROR; + int32_t len = utf16Text.extract(text.getRawData(), text.size(), scsuConv, err); + if (len > (int32_t)text.size()+1) { + text.setSize(len+1); + utf16Text.extract(text.getRawData(), text.size(), scsuConv, err); + } + + return 0; +} + +SWORD_NAMESPACE_END +#endif diff --git a/src/modules/filters/utf8utf16.cpp b/src/modules/filters/utf8utf16.cpp index 958f1d9..d969dba 100644 --- a/src/modules/filters/utf8utf16.cpp +++ b/src/modules/filters/utf8utf16.cpp @@ -2,7 +2,7 @@ * * utf8utf16.cpp - SWFilter descendant to convert UTF-8 to UTF-16 * - * $Id: utf8utf16.cpp 3157 2014-04-17 03:56:12Z greg.hellings $ + * $Id: utf8utf16.cpp 3081 2014-03-05 19:52:08Z chrislit $ * * Copyright 2001-2013 CrossWire Bible Society (http://www.crosswire.org) * CrossWire Bible Society |