From 8d3fc864d094eeadc721f8e93436b37a5fab173e Mon Sep 17 00:00:00 2001 From: "Roberto C. Sanchez" Date: Sat, 29 Mar 2014 10:53:33 -0400 Subject: Imported Upstream version 1.5.3 --- src/modules/Makefile | 5 + src/modules/Makefile.am | 10 + src/modules/comments/Makefile | 5 + src/modules/comments/Makefile.am | 8 + src/modules/comments/hrefcom/Makefile | 5 + src/modules/comments/hrefcom/Makefile.am | 4 + src/modules/comments/hrefcom/hrefcom.cpp | 97 ++ src/modules/comments/hrefcom/jfbgen.cpp | 242 +++++ src/modules/comments/rawcom/Makefile | 5 + src/modules/comments/rawcom/Makefile.am | 4 + src/modules/comments/rawcom/mhcidx.cpp | 292 ++++++ src/modules/comments/rawcom/rawcom.cpp | 275 ++++++ src/modules/comments/rawcom/rtfidx.cpp | 292 ++++++ src/modules/comments/rawcom/rwpidx.cpp | 266 ++++++ src/modules/comments/rawfiles/Makefile | 5 + src/modules/comments/rawfiles/Makefile.am | 3 + src/modules/comments/rawfiles/rawfiles.cpp | 291 ++++++ src/modules/comments/rawfiles/rawfilesgen.cpp | 236 +++++ src/modules/comments/swcom.cpp | 30 + src/modules/comments/zcom/Makefile | 5 + src/modules/comments/zcom/Makefile.am | 4 + src/modules/comments/zcom/makeidx.c | 146 +++ src/modules/comments/zcom/rawtxt2z.cpp | 83 ++ src/modules/comments/zcom/zcom.cpp | 290 ++++++ src/modules/common/Makefile | 4 + src/modules/common/Makefile.am | 22 + src/modules/common/compress.cpp | 767 +++++++++++++++ src/modules/common/entriesblk.cpp | 166 ++++ src/modules/common/lzsscomprs.cpp | 665 +++++++++++++ src/modules/common/rawstr.cpp | 565 +++++++++++ src/modules/common/rawstr4.cpp | 562 +++++++++++ src/modules/common/rawverse.cpp | 345 +++++++ src/modules/common/sapphire.cpp | 228 +++++ src/modules/common/swcipher.cpp | 123 +++ src/modules/common/swcomprs.cpp | 190 ++++ src/modules/common/swcomprs.doc | 802 ++++++++++++++++ src/modules/common/zipcomprs.cpp | 158 +++ src/modules/common/zstr.cpp | 705 ++++++++++++++ src/modules/common/zverse.cpp | 513 ++++++++++ src/modules/filters/Makefile | 5 + src/modules/filters/Makefile.am | 65 ++ src/modules/filters/cipherfil.cpp | 38 + src/modules/filters/gbffootnotes.cpp | 118 +++ src/modules/filters/gbfheadings.cpp | 107 +++ src/modules/filters/gbfhtml.cpp | 536 +++++++++++ src/modules/filters/gbfhtmlhref.cpp | 148 +++ src/modules/filters/gbfmorph.cpp | 98 ++ src/modules/filters/gbfplain.cpp | 106 +++ src/modules/filters/gbfrtf.cpp | 277 ++++++ src/modules/filters/gbfstrongs.cpp | 98 ++ src/modules/filters/gbfthml.cpp | 463 +++++++++ src/modules/filters/greeklexattribs.cpp | 58 ++ src/modules/filters/latin1utf16.cpp | 120 +++ src/modules/filters/latin1utf8.cpp | 179 ++++ src/modules/filters/plainfootnotes.cpp | 102 ++ src/modules/filters/plainhtml.cpp | 134 +++ src/modules/filters/rtfhtml.cpp | 99 ++ src/modules/filters/rwphtml.cpp | 187 ++++ src/modules/filters/rwprtf.cpp | 107 +++ src/modules/filters/scsuutf8.cpp | 220 +++++ src/modules/filters/swbasicfilter.cpp | 299 ++++++ src/modules/filters/thmlfootnotes.cpp | 103 ++ src/modules/filters/thmlgbf.cpp | 330 +++++++ src/modules/filters/thmlheadings.cpp | 107 +++ src/modules/filters/thmlhtml.cpp | 211 ++++ src/modules/filters/thmlhtmlhref.cpp | 269 ++++++ src/modules/filters/thmllemma.cpp | 97 ++ src/modules/filters/thmlmorph.cpp | 98 ++ src/modules/filters/thmlolb.cpp | 243 +++++ src/modules/filters/thmlplain.cpp | 201 ++++ src/modules/filters/thmlrtf.cpp | 219 +++++ src/modules/filters/thmlscripref.cpp | 103 ++ src/modules/filters/thmlstrongs.cpp | 138 +++ src/modules/filters/thmlvariants.cpp | 183 ++++ src/modules/filters/unicodertf.cpp | 70 ++ src/modules/filters/utf16utf8.cpp | 95 ++ src/modules/filters/utf8arshaping.cpp | 48 + src/modules/filters/utf8bidireorder.cpp | 55 ++ src/modules/filters/utf8cantillation.cpp | 64 ++ src/modules/filters/utf8greekaccents.cpp | 252 +++++ src/modules/filters/utf8hebrewpoints.cpp | 55 ++ src/modules/filters/utf8html.cpp | 66 ++ src/modules/filters/utf8latin1.cpp | 74 ++ src/modules/filters/utf8nfc.cpp | 46 + src/modules/filters/utf8nfkd.cpp | 46 + src/modules/filters/utf8transliterator.cpp | 479 ++++++++++ src/modules/filters/utf8utf16.cpp | 79 ++ src/modules/genbook/Makefile | 5 + src/modules/genbook/Makefile.am | 5 + src/modules/genbook/rawgenbook/Makefile | 4 + src/modules/genbook/rawgenbook/Makefile.am | 4 + src/modules/genbook/rawgenbook/rawgenbook.cpp | 242 +++++ src/modules/genbook/swgenbook.cpp | 27 + src/modules/lexdict/Makefile | 5 + src/modules/lexdict/Makefile.am | 7 + src/modules/lexdict/rawld/Makefile | 5 + src/modules/lexdict/rawld/Makefile.am | 4 + src/modules/lexdict/rawld/no13.c | 34 + src/modules/lexdict/rawld/rawld.cpp | 204 ++++ src/modules/lexdict/rawld/rawldidx.c | 96 ++ src/modules/lexdict/rawld/strongsidx.c | 90 ++ src/modules/lexdict/rawld4/Makefile | 5 + src/modules/lexdict/rawld4/Makefile.am | 4 + src/modules/lexdict/rawld4/rawld4.cpp | 204 ++++ src/modules/lexdict/swld.cpp | 55 ++ src/modules/lexdict/zld/Makefile | 5 + src/modules/lexdict/zld/Makefile.am | 4 + src/modules/lexdict/zld/zld.cpp | 205 ++++ src/modules/readme | 9 + src/modules/swmodule.cpp | 677 +++++++++++++ src/modules/tests/Makefile | 4 + src/modules/tests/echomod.cpp | 21 + src/modules/texts/Makefile | 5 + src/modules/texts/Makefile.am | 7 + src/modules/texts/rawgbf/Gbf.c | 485 ++++++++++ src/modules/texts/rawgbf/Gbf.pas | 735 ++++++++++++++ src/modules/texts/rawgbf/Gbfmain.pas | 1267 +++++++++++++++++++++++++ src/modules/texts/rawgbf/Makefile | 5 + src/modules/texts/rawgbf/Makefile.am | 4 + src/modules/texts/rawgbf/gbf.cpp | 735 ++++++++++++++ src/modules/texts/rawgbf/gbf.h | 67 ++ src/modules/texts/rawgbf/gbfidx.cpp | 294 ++++++ src/modules/texts/rawgbf/rawgbf.cpp | 84 ++ src/modules/texts/rawtext/Makefile | 5 + src/modules/texts/rawtext/Makefile.am | 4 + src/modules/texts/rawtext/kjvidx.cpp | 169 ++++ src/modules/texts/rawtext/makebnds.c | 86 ++ src/modules/texts/rawtext/nuidx.cpp | 238 +++++ src/modules/texts/rawtext/ojbtxidx.c | 166 ++++ src/modules/texts/rawtext/rawtext.cpp | 630 ++++++++++++ src/modules/texts/rawtext/rawtxidx.c | 146 +++ src/modules/texts/rawtext/rtfidx.cpp | 164 ++++ src/modules/texts/rawtext/svetxidx.c | 153 +++ src/modules/texts/rawtext/vntidx.cpp | 185 ++++ src/modules/texts/swtext.cpp | 39 + src/modules/texts/ztext/Makefile | 5 + src/modules/texts/ztext/Makefile.am | 4 + src/modules/texts/ztext/gbfidx.cpp | 661 +++++++++++++ src/modules/texts/ztext/makeidx.c | 146 +++ src/modules/texts/ztext/nasb.cpp | 107 +++ src/modules/texts/ztext/rawtxt2z.cpp | 457 +++++++++ src/modules/texts/ztext/ztext.cpp | 347 +++++++ 142 files changed, 25038 insertions(+) create mode 100644 src/modules/Makefile create mode 100644 src/modules/Makefile.am create mode 100644 src/modules/comments/Makefile create mode 100644 src/modules/comments/Makefile.am create mode 100644 src/modules/comments/hrefcom/Makefile create mode 100644 src/modules/comments/hrefcom/Makefile.am create mode 100644 src/modules/comments/hrefcom/hrefcom.cpp create mode 100644 src/modules/comments/hrefcom/jfbgen.cpp create mode 100644 src/modules/comments/rawcom/Makefile create mode 100644 src/modules/comments/rawcom/Makefile.am create mode 100644 src/modules/comments/rawcom/mhcidx.cpp create mode 100644 src/modules/comments/rawcom/rawcom.cpp create mode 100644 src/modules/comments/rawcom/rtfidx.cpp create mode 100644 src/modules/comments/rawcom/rwpidx.cpp create mode 100644 src/modules/comments/rawfiles/Makefile create mode 100644 src/modules/comments/rawfiles/Makefile.am create mode 100644 src/modules/comments/rawfiles/rawfiles.cpp create mode 100644 src/modules/comments/rawfiles/rawfilesgen.cpp create mode 100644 src/modules/comments/swcom.cpp create mode 100644 src/modules/comments/zcom/Makefile create mode 100644 src/modules/comments/zcom/Makefile.am create mode 100644 src/modules/comments/zcom/makeidx.c create mode 100644 src/modules/comments/zcom/rawtxt2z.cpp create mode 100644 src/modules/comments/zcom/zcom.cpp create mode 100644 src/modules/common/Makefile create mode 100644 src/modules/common/Makefile.am create mode 100644 src/modules/common/compress.cpp create mode 100644 src/modules/common/entriesblk.cpp create mode 100644 src/modules/common/lzsscomprs.cpp create mode 100644 src/modules/common/rawstr.cpp create mode 100644 src/modules/common/rawstr4.cpp create mode 100644 src/modules/common/rawverse.cpp create mode 100644 src/modules/common/sapphire.cpp create mode 100644 src/modules/common/swcipher.cpp create mode 100644 src/modules/common/swcomprs.cpp create mode 100644 src/modules/common/swcomprs.doc create mode 100644 src/modules/common/zipcomprs.cpp create mode 100644 src/modules/common/zstr.cpp create mode 100644 src/modules/common/zverse.cpp create mode 100644 src/modules/filters/Makefile create mode 100644 src/modules/filters/Makefile.am create mode 100644 src/modules/filters/cipherfil.cpp create mode 100644 src/modules/filters/gbffootnotes.cpp create mode 100644 src/modules/filters/gbfheadings.cpp create mode 100644 src/modules/filters/gbfhtml.cpp create mode 100644 src/modules/filters/gbfhtmlhref.cpp create mode 100644 src/modules/filters/gbfmorph.cpp create mode 100644 src/modules/filters/gbfplain.cpp create mode 100644 src/modules/filters/gbfrtf.cpp create mode 100644 src/modules/filters/gbfstrongs.cpp create mode 100644 src/modules/filters/gbfthml.cpp create mode 100644 src/modules/filters/greeklexattribs.cpp create mode 100644 src/modules/filters/latin1utf16.cpp create mode 100644 src/modules/filters/latin1utf8.cpp create mode 100644 src/modules/filters/plainfootnotes.cpp create mode 100644 src/modules/filters/plainhtml.cpp create mode 100644 src/modules/filters/rtfhtml.cpp create mode 100644 src/modules/filters/rwphtml.cpp create mode 100644 src/modules/filters/rwprtf.cpp create mode 100644 src/modules/filters/scsuutf8.cpp create mode 100644 src/modules/filters/swbasicfilter.cpp create mode 100644 src/modules/filters/thmlfootnotes.cpp create mode 100644 src/modules/filters/thmlgbf.cpp create mode 100644 src/modules/filters/thmlheadings.cpp create mode 100644 src/modules/filters/thmlhtml.cpp create mode 100644 src/modules/filters/thmlhtmlhref.cpp create mode 100644 src/modules/filters/thmllemma.cpp create mode 100644 src/modules/filters/thmlmorph.cpp create mode 100644 src/modules/filters/thmlolb.cpp create mode 100644 src/modules/filters/thmlplain.cpp create mode 100644 src/modules/filters/thmlrtf.cpp create mode 100644 src/modules/filters/thmlscripref.cpp create mode 100644 src/modules/filters/thmlstrongs.cpp create mode 100644 src/modules/filters/thmlvariants.cpp create mode 100644 src/modules/filters/unicodertf.cpp create mode 100644 src/modules/filters/utf16utf8.cpp create mode 100644 src/modules/filters/utf8arshaping.cpp create mode 100644 src/modules/filters/utf8bidireorder.cpp create mode 100644 src/modules/filters/utf8cantillation.cpp create mode 100644 src/modules/filters/utf8greekaccents.cpp create mode 100644 src/modules/filters/utf8hebrewpoints.cpp create mode 100644 src/modules/filters/utf8html.cpp create mode 100644 src/modules/filters/utf8latin1.cpp create mode 100644 src/modules/filters/utf8nfc.cpp create mode 100644 src/modules/filters/utf8nfkd.cpp create mode 100644 src/modules/filters/utf8transliterator.cpp create mode 100644 src/modules/filters/utf8utf16.cpp create mode 100644 src/modules/genbook/Makefile create mode 100644 src/modules/genbook/Makefile.am create mode 100644 src/modules/genbook/rawgenbook/Makefile create mode 100644 src/modules/genbook/rawgenbook/Makefile.am create mode 100644 src/modules/genbook/rawgenbook/rawgenbook.cpp create mode 100644 src/modules/genbook/swgenbook.cpp create mode 100644 src/modules/lexdict/Makefile create mode 100644 src/modules/lexdict/Makefile.am create mode 100644 src/modules/lexdict/rawld/Makefile create mode 100644 src/modules/lexdict/rawld/Makefile.am create mode 100644 src/modules/lexdict/rawld/no13.c create mode 100644 src/modules/lexdict/rawld/rawld.cpp create mode 100644 src/modules/lexdict/rawld/rawldidx.c create mode 100644 src/modules/lexdict/rawld/strongsidx.c create mode 100644 src/modules/lexdict/rawld4/Makefile create mode 100644 src/modules/lexdict/rawld4/Makefile.am create mode 100644 src/modules/lexdict/rawld4/rawld4.cpp create mode 100644 src/modules/lexdict/swld.cpp create mode 100644 src/modules/lexdict/zld/Makefile create mode 100644 src/modules/lexdict/zld/Makefile.am create mode 100644 src/modules/lexdict/zld/zld.cpp create mode 100644 src/modules/readme create mode 100644 src/modules/swmodule.cpp create mode 100644 src/modules/tests/Makefile create mode 100644 src/modules/tests/echomod.cpp create mode 100644 src/modules/texts/Makefile create mode 100644 src/modules/texts/Makefile.am create mode 100644 src/modules/texts/rawgbf/Gbf.c create mode 100644 src/modules/texts/rawgbf/Gbf.pas create mode 100644 src/modules/texts/rawgbf/Gbfmain.pas create mode 100644 src/modules/texts/rawgbf/Makefile create mode 100644 src/modules/texts/rawgbf/Makefile.am create mode 100644 src/modules/texts/rawgbf/gbf.cpp create mode 100644 src/modules/texts/rawgbf/gbf.h create mode 100644 src/modules/texts/rawgbf/gbfidx.cpp create mode 100644 src/modules/texts/rawgbf/rawgbf.cpp create mode 100644 src/modules/texts/rawtext/Makefile create mode 100644 src/modules/texts/rawtext/Makefile.am create mode 100644 src/modules/texts/rawtext/kjvidx.cpp create mode 100644 src/modules/texts/rawtext/makebnds.c create mode 100644 src/modules/texts/rawtext/nuidx.cpp create mode 100644 src/modules/texts/rawtext/ojbtxidx.c create mode 100644 src/modules/texts/rawtext/rawtext.cpp create mode 100644 src/modules/texts/rawtext/rawtxidx.c create mode 100644 src/modules/texts/rawtext/rtfidx.cpp create mode 100644 src/modules/texts/rawtext/svetxidx.c create mode 100644 src/modules/texts/rawtext/vntidx.cpp create mode 100644 src/modules/texts/swtext.cpp create mode 100644 src/modules/texts/ztext/Makefile create mode 100644 src/modules/texts/ztext/Makefile.am create mode 100644 src/modules/texts/ztext/gbfidx.cpp create mode 100644 src/modules/texts/ztext/makeidx.c create mode 100644 src/modules/texts/ztext/nasb.cpp create mode 100644 src/modules/texts/ztext/rawtxt2z.cpp create mode 100644 src/modules/texts/ztext/ztext.cpp (limited to 'src/modules') diff --git a/src/modules/Makefile b/src/modules/Makefile new file mode 100644 index 0000000..ef8eccd --- /dev/null +++ b/src/modules/Makefile @@ -0,0 +1,5 @@ + +root := ../.. + +all: + make -C ${root} diff --git a/src/modules/Makefile.am b/src/modules/Makefile.am new file mode 100644 index 0000000..944dc18 --- /dev/null +++ b/src/modules/Makefile.am @@ -0,0 +1,10 @@ +modulesdir = $(top_srcdir)/src/modules + +libsword_la_SOURCES += $(modulesdir)/swmodule.cpp + +include ../src/modules/common/Makefile.am +include ../src/modules/filters/Makefile.am +include ../src/modules/genbook/Makefile.am +include ../src/modules/texts/Makefile.am +include ../src/modules/comments/Makefile.am +include ../src/modules/lexdict/Makefile.am diff --git a/src/modules/comments/Makefile b/src/modules/comments/Makefile new file mode 100644 index 0000000..1a2d00d --- /dev/null +++ b/src/modules/comments/Makefile @@ -0,0 +1,5 @@ + +root := ../../.. + +all: + make -C ${root} diff --git a/src/modules/comments/Makefile.am b/src/modules/comments/Makefile.am new file mode 100644 index 0000000..1568544 --- /dev/null +++ b/src/modules/comments/Makefile.am @@ -0,0 +1,8 @@ +commentsdir = $(top_srcdir)/src/modules/comments + +libsword_la_SOURCES += $(commentsdir)/swcom.cpp + +include ../src/modules/comments/rawcom/Makefile.am +include ../src/modules/comments/rawfiles/Makefile.am +include ../src/modules/comments/zcom/Makefile.am +include ../src/modules/comments/hrefcom/Makefile.am diff --git a/src/modules/comments/hrefcom/Makefile b/src/modules/comments/hrefcom/Makefile new file mode 100644 index 0000000..35d6648 --- /dev/null +++ b/src/modules/comments/hrefcom/Makefile @@ -0,0 +1,5 @@ + +root := ../../../.. + +all: + make -C ${root} diff --git a/src/modules/comments/hrefcom/Makefile.am b/src/modules/comments/hrefcom/Makefile.am new file mode 100644 index 0000000..a6a2115 --- /dev/null +++ b/src/modules/comments/hrefcom/Makefile.am @@ -0,0 +1,4 @@ +hrefcomdir = $(top_srcdir)/src/modules/comments/hrefcom + +libsword_la_SOURCES += $(hrefcomdir)/hrefcom.cpp + diff --git a/src/modules/comments/hrefcom/hrefcom.cpp b/src/modules/comments/hrefcom/hrefcom.cpp new file mode 100644 index 0000000..a80e5b6 --- /dev/null +++ b/src/modules/comments/hrefcom/hrefcom.cpp @@ -0,0 +1,97 @@ +/****************************************************************************** + * hrefcom.cpp - code for class 'HREFCom'- a module that produces HTML HREFs + * pointing to actual text desired. Uses standard + * files: ot and nt using indexs ??.bks ??.cps ??.vss + */ + + +#include +#include +#include + +#ifndef __GNUC__ +#include +#else +#include +#endif + +#include +#include +#include +#include + + + /****************************************************************************** + * HREFCom Constructor - Initializes data for instance of HREFCom + * + * ENT: iname - Internal name for module + * iprefix - string to prepend to each HREF (e.g. "file://mods/com/jfb/") + * idesc - Name to display to user for module + * idisp - Display object to use for displaying + */ + +HREFCom::HREFCom(const char *ipath, const char *iprefix, const char *iname, const char *idesc, SWDisplay *idisp) : RawVerse(ipath), SWCom(iname, idesc, idisp) +{ + prefix = 0; + stdstr(&prefix, iprefix); +} + + +/****************************************************************************** + * HREFCom Destructor - Cleans up instance of HREFCom + */ + +HREFCom::~HREFCom() +{ + if (prefix) + delete [] prefix; +} + + +/****************************************************************************** + * HREFCom::operator char * - Returns the correct verse when char * cast + * is requested + * + * RET: string buffer with verse + */ + +char *HREFCom::getRawEntry() { + long start; + unsigned short size; + char *tmpbuf; + VerseKey *key = 0; + +#ifndef _WIN32_WCE + try { +#endif + key = SWDYNAMIC_CAST(VerseKey, this->key); +#ifndef _WIN32_WCE + } + catch ( ... ) {} +#endif + if (!key) + key = new VerseKey(this->key); + + findoffset(key->Testament(), key->Index(), &start, &size); + entrySize = size; // support getEntrySize call + + unsigned long newsize = ((size + 2) + strlen(prefix)) * FILTERPAD; + if (newsize > entrybufallocsize) { + if (entrybuf) + delete [] entrybuf; + entrybuf = new char [ newsize ]; + entrybufallocsize = newsize; + } + tmpbuf = new char [ size + 10 ]; + + gettext(key->Testament(), start, size + 2, tmpbuf); + sprintf(entrybuf, "%s%s", prefix, tmpbuf); + preptext(entrybuf); + + delete [] tmpbuf; + + if (key != this->key) + delete key; + + return entrybuf; +} diff --git a/src/modules/comments/hrefcom/jfbgen.cpp b/src/modules/comments/hrefcom/jfbgen.cpp new file mode 100644 index 0000000..8b66a60 --- /dev/null +++ b/src/modules/comments/hrefcom/jfbgen.cpp @@ -0,0 +1,242 @@ +/***************************************************************************** + * + */ + +#include +#include +#include +#include + +#ifndef __GNUC__ +#include +#else +#include +#endif + +#include +#include + +#ifndef O_BINARY + #define O_BINARY 0 +#endif + +void writeidx(VerseKey &key1, VerseKey &key2, VerseKey &key3, long offset, short size); +char findbreak(int fp, long *offset, int *num1, int *num2, int *rangemax, short *size); +void openfiles(); +void checkparams(int argc, char **argv); +void charsetconvert(char *data); + + +VerseKey key1, key2, key3; +int fp, vfp, cfp, bfp; +long chapoffset; +short chapsize; +char testmnt; +char startflag = 0; + + +main(int argc, char **argv) +{ + long pos, offset; + int num1, num2, rangemax, curbook = 0, curchap = 0, curverse = 0; + char buf[127]; + short size, tmp; + extern struct zonline online; + + checkparams(argc, argv); + + key1 = key2 = key3 = "Genesis 1:1"; + + openfiles(); + + num1 = key1.Chapter(); + num2 = key1.Verse(); + + while(!findbreak(fp, &offset, &num1, &num2, &rangemax, &size)) { + if (!startflag) { + startflag = 1; + } + else { + if (num2 < key2.Verse()) { // new chapter + if (num1 <= key2.Chapter()) { // new book + key2.Verse(1); + key2.Chapter(1); + key2.Book(key2.Book()+1); + } + printf("Found Chapter Break: %d ('%s')\n", num1, (const char *)key2); + chapoffset = offset; + chapsize = size; +// continue; + } + } + key2.Verse(1); + key2.Chapter(num1); + key2.Verse(num2); + + key3 = key2; +// key3 += (rangemax - key3.Verse()); + + writeidx(key1, key2, key3, offset, size); + } + close(vfp); + close(cfp); + close(bfp); + close(fp); +} + + +/************************************************************************** + * ENT: key1 - current location of index + * key2 - minimum keyval for which this offset is valid + * key3 - maximum keyval for which this offset is valid + */ + +void writeidx(VerseKey &key1, VerseKey &key2, VerseKey &key3, long offset, short size) +{ + long pos; + short tmp; + + if (key1.Verse() == 1) { // new chapter + if (key1.Chapter() == 1) { // new book + pos = lseek(cfp, 0, SEEK_CUR); + write(bfp, &pos, 4); + pos = lseek(vfp, 0, SEEK_CUR); /* Book intro (cps) */ + write(cfp, &pos, 4); + write(vfp, &chapoffset, 4); /* Book intro (vss) set to same as chap for now(it should be chap 1 which usually contains the book into anyway)*/ + write(vfp, &chapsize, 2); + } + pos = lseek(vfp, 0, SEEK_CUR); + write(cfp, &pos, 4); + write(vfp, &chapoffset, 4); /* Chapter intro */ + write(vfp, &chapsize, 2); + } + if (key1 >= key2) { + write(vfp, &offset, 4); + write(vfp, &size, 2); + } + else { + pos = 0; + tmp = 0; + write(vfp, &pos, 4); + write(vfp, &tmp, 2); + } + key1++; +} + + +static VerseKey inckey = "Genesis 1:1"; + +char findbreak(int fp, long *offset, int *num1, int *num2, int *rangemax, short *size) +{ + char buf[7]; + char buf2[20]; + char ch; + char loop; + long offset2; + int ch2, vs2, rm2; + bool flag; + long chapstart = 0; + static int olbvnum = 0; + char data[256]; + char *bookabrev[66] = {"Ge", "Ex", "Le", "Nu", "De", "Jos", "Jud", "Ru", + "1Sa", "2Sa", "1Ki", "2Ki", "1Ch", "2Ch", "Ezr", "Ne", "Es", + "Job", "Ps", "Pr", "Ec", "So", "Isa", "Jer", "La", "Eze", "Da", + "Ho", "Joe", "Am", "Ob", "Jon", "Mic", "Na", "Heb", "Zep", + "Hag", "Zec", "Mal", + "Mt", "Mr", "Lu", "Joh", "Ac", "Ro", "1Co", "2Co", "Ga", + "Eph", "Php", "Col", "1Th", "2Th", "1Ti", "2Ti", "Tit", "Phm", + "Heb", "Jas", "1Pe", "2Pe", "1Jo", "2Jo", "3Jo", "Jude", "Re" }; + + if (++olbvnum <= 31102) { + + if (olbvnum == 23146) { // "Matthew 1:1" + close(vfp); + close(cfp); + close(bfp); + close(fp); + key1 = key2 = key3 = inckey = "Matthew 1:1"; + openfiles(); + startflag = 0; + } + + + *offset = lseek(fp, 0, SEEK_CUR); + + if ((olbvnum!=1) && (olbvnum != 23146)) + inckey++; + + *num1 = inckey.Chapter(); + *num2 = inckey.Verse(); + + sprintf(data, "JFB%.2d.htm#%s%d_%d", inckey.Book() + ((inckey.Testament()>1)?39:0), bookabrev[inckey.Book() + ((inckey.Testament()>1)?39:0)-1], inckey.Chapter(), inckey.Verse()); + write(fp, data, strlen(data)); + + *size = lseek(fp, 0, SEEK_CUR) - *offset; + write(fp, "\n", 1); + return 0; + } + return 1; +} + + +void openfiles() +{ + char buf[255]; + char fname[5]; + long pos; + short size; + + testmnt = key1.Testament(); + + strcpy(fname, (testmnt==2) ? "nt" : "ot"); + unlink(fname); + if ((fp = open(fname, O_CREAT|O_RDWR|O_BINARY)) == -1) { + fprintf(stderr, "Couldn't open file: %s\n", fname); + exit(1); + } + + sprintf(buf, "%s.vss", fname); + unlink(buf); + if ((vfp = open(buf, O_CREAT|O_WRONLY|O_BINARY)) == -1) { + fprintf(stderr, "Couldn't open file: %s\n", buf); + exit(1); + } + + sprintf(buf, "%s.cps", fname); + unlink(buf); + if ((cfp = open(buf, O_CREAT|O_WRONLY|O_BINARY)) == -1) { + fprintf(stderr, "Couldn't open file: %s\n", buf); + exit(1); + } + + sprintf(buf, "%s.bks", fname); + unlink(buf); + if ((bfp = open(buf, O_CREAT|O_WRONLY|O_BINARY)) == -1) { + fprintf(stderr, "Couldn't open file: %s\n", buf); + exit(1); + } + + pos = 0; + write(bfp, &pos, 4); /* Book offset for testament intros */ + pos = 4; + write(cfp, &pos, 4); /* Chapter offset for testament intro */ + + +/* Right now just zero out intros until parsing correctly */ + pos = 0; + size = 0; + write(vfp, &pos, 4); /* Module intro */ + write(vfp, &size, 2); + write(vfp, &pos, 4); /* Testament intro */ + write(vfp, &size, 2); + +} + + +void checkparams(int argc, char **argv) +{ + if (argc !=1) { + fprintf(stderr, "usage: %s\n", argv[0]); + exit(1); + } +} diff --git a/src/modules/comments/rawcom/Makefile b/src/modules/comments/rawcom/Makefile new file mode 100644 index 0000000..35d6648 --- /dev/null +++ b/src/modules/comments/rawcom/Makefile @@ -0,0 +1,5 @@ + +root := ../../../.. + +all: + make -C ${root} diff --git a/src/modules/comments/rawcom/Makefile.am b/src/modules/comments/rawcom/Makefile.am new file mode 100644 index 0000000..901cf6b --- /dev/null +++ b/src/modules/comments/rawcom/Makefile.am @@ -0,0 +1,4 @@ +rawcomdir = $(top_srcdir)/src/modules/comments/rawcom + +libsword_la_SOURCES += $(rawcomdir)/rawcom.cpp + diff --git a/src/modules/comments/rawcom/mhcidx.cpp b/src/modules/comments/rawcom/mhcidx.cpp new file mode 100644 index 0000000..df16f55 --- /dev/null +++ b/src/modules/comments/rawcom/mhcidx.cpp @@ -0,0 +1,292 @@ +/***************************************************************************** + * + * This code wreaks but works (at least for MHC). Good luck! + */ + +#include +#include +#include +#include + +#ifndef __GNUC__ +#include +#else +#include +#endif + +#include +#include + + +void writeidx(VerseKey &key1, VerseKey &key2, VerseKey &key3, long offset, short size); +char findbreak(int fp, long *offset, int *num1, int *num2, int *rangemax, short *size); +void openfiles(char *fname); +void checkparams(int argc, char **argv); + + +VerseKey key1, key2, key3; +int fp, vfp, cfp, bfp; +long chapoffset; +short chapsize; +char testmnt; + + +main(int argc, char **argv) +{ + long pos, offset; + int num1, num2, rangemax, curbook = 0, curchap = 0, curverse = 0; + char buf[127], startflag = 0; + short size, tmp; + + checkparams(argc, argv); + + openfiles(argv[1]); + + testmnt = key1.Testament(); + pos = 0; + write(bfp, &pos, 4); /* Book offset for testament intros */ + pos = 4; + write(cfp, &pos, 4); /* Chapter offset for testament intro */ + + +/* Right now just zero out intros until parsing correctly */ + pos = 0; + size = 0; + write(vfp, &pos, 4); /* Module intro */ + write(vfp, &size, 2); + write(vfp, &pos, 4); /* Testament intro */ + write(vfp, &size, 2); + + while(!findbreak(fp, &offset, &num1, &num2, &rangemax, &size)) { + if (num2) { + key2.Verse(1); + key2.Chapter(num1); + key2.Verse(num2); + } + else { + key2.Verse(1); + if (!startflag) { + startflag = 1; + } + else { + if (num1 <= key2.Chapter()) { // new book + key2.Chapter(1); + key2.Book(key2.Book()+1); + } + } + key2.Chapter(num1); + printf("Found Chapter Break: %d ('%s')\n", num1, (const char *)key2); + chapoffset = offset; + chapsize = size; + continue; + } + + key3 = key2; + key3 += (rangemax - key3.Verse()); + + writeidx(key1, key2, key3, offset, size); + } + close(vfp); + close(cfp); + close(bfp); + close(fp); +} + + +/************************************************************************** + * ENT: key1 - current location of index + * key2 - minimum keyval for which this offset is valid + * key3 - maximum keyval for which this offset is valid + */ + +void writeidx(VerseKey &key1, VerseKey &key2, VerseKey &key3, long offset, short size) +{ + long pos; + short tmp; + + for (; ((key1 <= key3) && (key1.Error() != KEYERR_OUTOFBOUNDS) && (key1.Testament() == testmnt)); key1+=1) { + if (key1.Verse() == 1) { // new chapter + if (key1.Chapter() == 1) { // new book + pos = lseek(cfp, 0, SEEK_CUR); + write(bfp, &pos, 4); + pos = lseek(vfp, 0, SEEK_CUR); /* Book intro (cps) */ + write(cfp, &pos, 4); + write(vfp, &chapoffset, 4); /* Book intro (vss) set to same as chap for now(it should be chap 1 which usually contains the book into anyway)*/ + write(vfp, &chapsize, 2); + } + pos = lseek(vfp, 0, SEEK_CUR); + write(cfp, &pos, 4); + write(vfp, &chapoffset, 4); /* Chapter intro */ + write(vfp, &chapsize, 2); + } + if (key1 >= key2) { + write(vfp, &offset, 4); + write(vfp, &size, 2); + } + else { + pos = 0; + tmp = 0; + write(vfp, &pos, 4); + write(vfp, &tmp, 2); + } + } +} + + +char startentry(char *buf) +{ + char loop; + + if (buf[0] != 10) + return 0; + if (buf[1] != '#') + return 0; + if (!isdigit(buf[2])) + return 0; + for (loop = 3; loop < 7; loop++) { + if (buf[loop] == ' ') + break; + if ((!isdigit(buf[loop])) && (buf[loop] != ',') && (buf[loop] != '-')) + return 0; + } + return 1; +} + + +char findbreak(int fp, long *offset, int *num1, int *num2, int *rangemax, short *size) +{ + char buf[7]; + char buf2[20]; + char ch; + char loop; + long offset2; + int ch2, vs2, rm2; + + memset(buf, ' ', 7); + + while (1) { + if (startentry(buf)) { + memset(buf, ' ', 2); + for (loop = 2; loop < 7; loop++) { + if ((buf[loop] == '-') || (buf[loop] == ',') || (buf[loop] == ' ')) { + buf[loop] = 0; + *num2 = atoi(buf); + break; + } + } + for (ch = loop + 1; ch < 7; ch++) { + if (buf[ch] == ' ') { + break; + } + } + buf[ch] = 0; + *rangemax = atoi(&buf[loop+1]); + if (!*rangemax) + *rangemax = *num2; + *offset = lseek(fp, 0, SEEK_CUR) - 5; + if (size) { + if (findbreak(fp, &offset2, &ch2, &vs2, &rm2, 0)) { + *size = (short) (lseek(fp, 0, SEEK_END) - (*offset)); + } + else { + if (vs2) { + *size = (offset2 - (*offset)) - 3; + } + else { + sprintf(buf2, "$-$-$- XX:%d", ch2); + *size = (offset2 - (*offset)) - ((strlen(buf2) + 4)); + } + } + lseek(fp, *offset, SEEK_SET); + } + return 0; + } + + if (!strncmp(buf, "$-$-$-", 6)) { + memset(buf2, 0, 7); + loop = 0; + while ((read(fp, &buf2[loop], 1) == 1) && (loop < 7)) { + if ((buf2[loop] == 10) || (buf2[loop] == 13)) { + buf2[loop] = 0; + break; + } + loop++; + } + while (read(fp, &ch, 1) == 1) { + if (ch == '*') + break; + } + + *offset = lseek(fp, 0, SEEK_CUR) - 1; + *num2 = 0; + for (loop = strlen(buf2) - 1; loop; loop--) { + if (buf2[loop] == ':') + break; + } + *num1 = atoi(&buf2[loop+1]); + printf("Chapter marker: %s\n", buf2); + if (size) { + if (findbreak(fp, &offset2, &ch2, &vs2, &rm2, 0)) { + *size = (short) (lseek(fp, 0, SEEK_END) - (*offset)); + } + else { + if (vs2) { + *size = (offset2 - (*offset)) - 3; + } + else { + sprintf(buf2, "$-$-$- XX:%d", ch2); + *size = (offset2 - (*offset)) - ((strlen(buf2) + 4)); + } + } + lseek(fp, *offset, SEEK_SET); + } + return 0; + } + + + memmove(buf, &buf[1], 6); + if (read(fp, &buf[6], 1) != 1) + return 1; + } +} + + +void openfiles(char *fname) +{ + char buf[255]; + + if ((fp = open(fname, O_RDONLY)) == -1) { + fprintf(stderr, "Couldn't open file: %s\n", fname); + exit(1); + } + + sprintf(buf, "%s.vss", fname); + if ((vfp = open(buf, O_CREAT|O_WRONLY)) == -1) { + fprintf(stderr, "Couldn't open file: %s\n", buf); + exit(1); + } + + sprintf(buf, "%s.cps", fname); + if ((cfp = open(buf, O_CREAT|O_WRONLY)) == -1) { + fprintf(stderr, "Couldn't open file: %s\n", buf); + exit(1); + } + + sprintf(buf, "%s.bks", fname); + if ((bfp = open(buf, O_CREAT|O_WRONLY)) == -1) { + fprintf(stderr, "Couldn't open file: %s\n", buf); + exit(1); + } +} + + +void checkparams(int argc, char **argv) +{ + if (argc < 2) { + fprintf(stderr, "usage: %s [nt - for new testmt file]\n", argv[0]); + exit(1); + } + if (argc == 3) + key1 = key2 = key3 = "Matthew 1:1"; + else key1 = key2 = key3 = "Genesis 1:1"; +} diff --git a/src/modules/comments/rawcom/rawcom.cpp b/src/modules/comments/rawcom/rawcom.cpp new file mode 100644 index 0000000..ca93c64 --- /dev/null +++ b/src/modules/comments/rawcom/rawcom.cpp @@ -0,0 +1,275 @@ +/****************************************************************************** + * rawcom.cpp - code for class 'RawCom'- a module that reads raw commentary + * files: ot and nt using indexs ??.bks ??.cps ??.vss + */ + + +#include +#include +#include + +#ifndef __GNUC__ +#include +#else +#include +#endif + +#include +#include +#include +#include + + + /****************************************************************************** + * RawCom Constructor - Initializes data for instance of RawCom + * + * ENT: iname - Internal name for module + * idesc - Name to display to user for module + * idisp - Display object to use for displaying + */ + +RawCom::RawCom(const char *ipath, const char *iname, const char *idesc, SWDisplay *idisp, SWTextEncoding encoding, SWTextDirection dir, SWTextMarkup markup, const char* ilang) + : RawVerse(ipath), + SWCom(iname, idesc, idisp, encoding, dir, markup, ilang){ +} + + +/****************************************************************************** + * RawCom Destructor - Cleans up instance of RawCom + */ + +RawCom::~RawCom() +{ +} + + +/****************************************************************************** + * RawCom::operator char * - Returns the correct verse when char * cast + * is requested + * + * RET: string buffer with verse + */ + +char *RawCom::getRawEntry() { + long start = 0; + unsigned short size = 0; + VerseKey *key = 0; + +#ifndef _WIN32_WCE + try { +#endif + key = SWDYNAMIC_CAST(VerseKey, this->key); +#ifndef _WIN32_WCE + } + catch ( ... ) {} +#endif + if (!key) + key = new VerseKey(this->key); + + + findoffset(key->Testament(), key->Index(), &start, &size); + entrySize = size; // support getEntrySize call + + unsigned long newsize = (size + 2) * FILTERPAD; + if (newsize > entrybufallocsize) { + if (entrybuf) + delete [] entrybuf; + entrybuf = new char [ newsize ]; + entrybufallocsize = newsize; + } + *entrybuf = 0; + + gettext(key->Testament(), start, (size + 2), entrybuf); + + rawFilter(entrybuf, size, key); + + if (!isUnicode()) + preptext(entrybuf); + + if (key != this->key) + delete key; + + return entrybuf; +} + + +/****************************************************************************** + * RawCom::operator += - Increments module key a number of entries + * + * ENT: increment - Number of entries to jump forward + * + * RET: *this + */ + +SWModule &RawCom::operator +=(int increment) +{ + long start; + unsigned short size; + VerseKey *tmpkey = 0; + +#ifndef _WIN32_WCE + try { +#endif + tmpkey = SWDYNAMIC_CAST(VerseKey, key); +#ifndef _WIN32_WCE + } + catch ( ... ) {} +#endif + if (!tmpkey) + tmpkey = new VerseKey(key); + + findoffset(tmpkey->Testament(), tmpkey->Index(), &start, &size); + + SWKey lastgood = *tmpkey; + while (increment) { + long laststart = start; + unsigned short lastsize = size; + SWKey lasttry = *tmpkey; + (increment > 0) ? (*key)++ : (*key)--; + if (tmpkey != key) + delete tmpkey; + tmpkey = 0; +#ifndef _WIN32_WCE + try { +#endif + tmpkey = SWDYNAMIC_CAST(VerseKey, key); +#ifndef _WIN32_WCE + } + catch ( ... ) {} +#endif + if (!tmpkey) + tmpkey = new VerseKey(key); + + if ((error = key->Error())) { + *key = lastgood; + break; + } + long index = tmpkey->Index(); + findoffset(tmpkey->Testament(), index, &start, &size); + if ((((laststart != start) || (lastsize != size))||(!skipConsecutiveLinks)) && (start >= 0) && (size)) { + increment += (increment < 0) ? 1 : -1; + lastgood = *tmpkey; + } + } + error = (error) ? KEYERR_OUTOFBOUNDS : 0; + + if (tmpkey != key) + delete tmpkey; + + return *this; +} + +SWModule &RawCom::setentry(const char *inbuf, long len) { + VerseKey *key = 0; + // see if we have a VerseKey * or decendant +#ifndef _WIN32_WCE + try { +#endif + key = SWDYNAMIC_CAST(VerseKey, this->key); +#ifndef _WIN32_WCE + } + catch ( ... ) {} +#endif + // if we don't have a VerseKey * decendant, create our own + if (!key) + key = new VerseKey(this->key); + + settext(key->Testament(), key->Index(), inbuf, len); + + if (this->key != key) // free our key if we created a VerseKey + delete key; + + return *this; +} + +SWModule &RawCom::operator <<(const char *inbuf) { + VerseKey *key = 0; + // see if we have a VerseKey * or decendant +#ifndef _WIN32_WCE + try { +#endif + key = SWDYNAMIC_CAST(VerseKey, this->key); +#ifndef _WIN32_WCE + } + catch ( ... ) {} +#endif + // if we don't have a VerseKey * decendant, create our own + if (!key) + key = new VerseKey(this->key); + + settext(key->Testament(), key->Index(), inbuf); + + if (this->key != key) // free our key if we created a VerseKey + delete key; + + return *this; +} + + +SWModule &RawCom::operator <<(const SWKey *inkey) { + VerseKey *destkey = 0; + const VerseKey *srckey = 0; + // see if we have a VerseKey * or decendant +#ifndef _WIN32_WCE + try { +#endif + destkey = SWDYNAMIC_CAST(VerseKey, this->key); +#ifndef _WIN32_WCE + } + catch ( ... ) {} +#endif + // if we don't have a VerseKey * decendant, create our own + if (!destkey) + destkey = new VerseKey(this->key); + + // see if we have a VerseKey * or decendant +#ifndef _WIN32_WCE + try { +#endif + srckey = SWDYNAMIC_CAST(VerseKey, inkey); +#ifndef _WIN32_WCE + } + catch ( ... ) {} +#endif + // if we don't have a VerseKey * decendant, create our own + if (!srckey) + srckey = new VerseKey(inkey); + + linkentry(destkey->Testament(), destkey->Index(), srckey->Index()); + + if (this->key != destkey) // free our key if we created a VerseKey + delete destkey; + + if (inkey != srckey) // free our key if we created a VerseKey + delete srckey; + + return *this; +} + + +/****************************************************************************** + * RawCom::deleteEntry - deletes this entry + * + * RET: *this + */ + +void RawCom::deleteEntry() { + + VerseKey *key = 0; + +#ifndef _WIN32_WCE + try { +#endif + key = SWDYNAMIC_CAST(VerseKey, this->key); +#ifndef _WIN32_WCE + } + catch ( ... ) {} +#endif + if (!key) + key = new VerseKey(this->key); + + settext(key->Testament(), key->Index(), ""); + + if (key != this->key) + delete key; +} diff --git a/src/modules/comments/rawcom/rtfidx.cpp b/src/modules/comments/rawcom/rtfidx.cpp new file mode 100644 index 0000000..38b38bc --- /dev/null +++ b/src/modules/comments/rawcom/rtfidx.cpp @@ -0,0 +1,292 @@ +/***************************************************************************** + * + * This code wreaks but works (at least for MHC). Good luck! + */ + +#include +#include +#include +#include + +#ifndef __GNUC__ +#include +#else +#include +#endif + +#include +#include + + +void writeidx(VerseKey &key1, VerseKey &key2, VerseKey &key3, long offset, short size); +char findbreak(int fp, long *offset, int *num1, int *num2, int *rangemax, short *size); +void openfiles(char *fname); +void checkparams(int argc, char **argv); + + +VerseKey key1, key2, key3; +int fp, vfp, cfp, bfp; +long chapoffset; +short chapsize; +char testmnt; + + +main(int argc, char **argv) +{ + long pos, offset; + int num1, num2, rangemax, curbook = 0, curchap = 0, curverse = 0; + char buf[127], startflag = 0; + short size, tmp; + + checkparams(argc, argv); + + openfiles(argv[1]); + + testmnt = key1.Testament(); + pos = 0; + write(bfp, &pos, 4); /* Book offset for testament intros */ + pos = 4; + write(cfp, &pos, 4); /* Chapter offset for testament intro */ + + +/* Right now just zero out intros until parsing correctly */ + pos = 0; + size = 0; + write(vfp, &pos, 4); /* Module intro */ + write(vfp, &size, 2); + write(vfp, &pos, 4); /* Testament intro */ + write(vfp, &size, 2); + + while(!findbreak(fp, &offset, &num1, &num2, &rangemax, &size)) { + if (num2) { + key2.Verse(1); + key2.Chapter(num1); + key2.Verse(num2); + } + else { + key2.Verse(1); + if (!startflag) { + startflag = 1; + } + else { + if (num1 <= key2.Chapter()) { // new book + key2.Chapter(1); + key2.Book(key2.Book()+1); + } + } + key2.Chapter(num1); + printf("Found Chapter Break: %d ('%s')\n", num1, (char *)key2); + chapoffset = offset; + chapsize = size; + continue; + } + + key3 = key2; + key3 += (rangemax - key3.Verse()); + + writeidx(key1, key2, key3, offset, size); + } + close(vfp); + close(cfp); + close(bfp); + close(fp); +} + + +/************************************************************************** + * ENT: key1 - current location of index + * key2 - minimum keyval for which this offset is valid + * key3 - maximum keyval for which this offset is valid + */ + +void writeidx(VerseKey &key1, VerseKey &key2, VerseKey &key3, long offset, short size) +{ + long pos; + short tmp; + + for (; ((key1 <= key3) && (key1.Error() != KEYERR_OUTOFBOUNDS) && (key1.Testament() == testmnt)); key1+=1) { + if (key1.Verse() == 1) { // new chapter + if (key1.Chapter() == 1) { // new book + pos = lseek(cfp, 0, SEEK_CUR); + write(bfp, &pos, 4); + pos = lseek(vfp, 0, SEEK_CUR); /* Book intro (cps) */ + write(cfp, &pos, 4); + write(vfp, &chapoffset, 4); /* Book intro (vss) set to same as chap for now(it should be chap 1 which usually contains the book into anyway)*/ + write(vfp, &chapsize, 2); + } + pos = lseek(vfp, 0, SEEK_CUR); + write(cfp, &pos, 4); + write(vfp, &chapoffset, 4); /* Chapter intro */ + write(vfp, &chapsize, 2); + } + if (key1 >= key2) { + write(vfp, &offset, 4); + write(vfp, &size, 2); + } + else { + pos = 0; + tmp = 0; + write(vfp, &pos, 4); + write(vfp, &tmp, 2); + } + } +} + + +char startentry(char *buf) +{ + char loop; + + if (buf[0] != 10) + return 0; + if (buf[1] != '#') + return 0; + if (!isdigit(buf[2])) + return 0; + for (loop = 3; loop < 7; loop++) { + if (buf[loop] == ' ') + break; + if ((!isdigit(buf[loop])) && (buf[loop] != ',') && (buf[loop] != '-')) + return 0; + } + return 1; +} + + +char findbreak(int fp, long *offset, int *num1, int *num2, int *rangemax, short *size) +{ + char buf[7]; + char buf2[20]; + char ch; + char loop; + long offset2; + int ch2, vs2, rm2; + + memset(buf, ' ', 7); + + while (1) { + if (startentry(buf)) { + memset(buf, ' ', 2); + for (loop = 2; loop < 7; loop++) { + if ((buf[loop] == '-') || (buf[loop] == ',') || (buf[loop] == ' ')) { + buf[loop] = 0; + *num2 = atoi(buf); + break; + } + } + for (ch = loop + 1; ch < 7; ch++) { + if (buf[ch] == ' ') { + break; + } + } + buf[ch] = 0; + *rangemax = atoi(&buf[loop+1]); + if (!*rangemax) + *rangemax = *num2; + *offset = lseek(fp, 0, SEEK_CUR) - 5; + if (size) { + if (findbreak(fp, &offset2, &ch2, &vs2, &rm2, 0)) { + *size = (short) (lseek(fp, 0, SEEK_END) - (*offset)); + } + else { + if (vs2) { + *size = (offset2 - (*offset)) - 3; + } + else { + sprintf(buf2, "$-$-$- XX:%d", ch2); + *size = (offset2 - (*offset)) - ((strlen(buf2) + 4)); + } + } + lseek(fp, *offset, SEEK_SET); + } + return 0; + } + + if (!strncmp(buf, "$-$-$-", 6)) { + memset(buf2, 0, 7); + loop = 0; + while ((read(fp, &buf2[loop], 1) == 1) && (loop < 7)) { + if ((buf2[loop] == 10) || (buf2[loop] == 13)) { + buf2[loop] = 0; + break; + } + loop++; + } + while (read(fp, &ch, 1) == 1) { + if (ch == '*') + break; + } + + *offset = lseek(fp, 0, SEEK_CUR) - 1; + *num2 = 0; + for (loop = strlen(buf2) - 1; loop; loop--) { + if (buf2[loop] == ':') + break; + } + *num1 = atoi(&buf2[loop+1]); + printf("Chapter marker: %s\n", buf2); + if (size) { + if (findbreak(fp, &offset2, &ch2, &vs2, &rm2, 0)) { + *size = (short) (lseek(fp, 0, SEEK_END) - (*offset)); + } + else { + if (vs2) { + *size = (offset2 - (*offset)) - 3; + } + else { + sprintf(buf2, "$-$-$- XX:%d", ch2); + *size = (offset2 - (*offset)) - ((strlen(buf2) + 4)); + } + } + lseek(fp, *offset, SEEK_SET); + } + return 0; + } + + + memmove(buf, &buf[1], 6); + if (read(fp, &buf[6], 1) != 1) + return 1; + } +} + + +void openfiles(char *fname) +{ + char buf[255]; + + if ((fp = open(fname, O_RDONLY)) == -1) { + fprintf(stderr, "Couldn't open file: %s\n", fname); + exit(1); + } + + sprintf(buf, "%s.vss", fname); + if ((vfp = open(buf, O_CREAT|O_WRONLY)) == -1) { + fprintf(stderr, "Couldn't open file: %s\n", buf); + exit(1); + } + + sprintf(buf, "%s.cps", fname); + if ((cfp = open(buf, O_CREAT|O_WRONLY)) == -1) { + fprintf(stderr, "Couldn't open file: %s\n", buf); + exit(1); + } + + sprintf(buf, "%s.bks", fname); + if ((bfp = open(buf, O_CREAT|O_WRONLY)) == -1) { + fprintf(stderr, "Couldn't open file: %s\n", buf); + exit(1); + } +} + + +void checkparams(int argc, char **argv) +{ + if (argc < 2) { + fprintf(stderr, "usage: %s [nt - for new testmt file]\n", argv[0]); + exit(1); + } + if (argc == 3) + key1 = key2 = key3 = "Matthew 1:1"; + else key1 = key2 = key3 = "Genesis 1:1"; +} diff --git a/src/modules/comments/rawcom/rwpidx.cpp b/src/modules/comments/rawcom/rwpidx.cpp new file mode 100644 index 0000000..afcbd81 --- /dev/null +++ b/src/modules/comments/rawcom/rwpidx.cpp @@ -0,0 +1,266 @@ +/***************************************************************************** + * + * This code wreaks but works (at least for RWP). Good luck! + */ + +#include +#include +#include +#include + +#ifndef __GNUC__ +#include +#else +#include +#endif + +#include +#include + + +void writeidx(VerseKey &key1, VerseKey &key2, VerseKey &key3, long offset, short size); +char findbreak(int fp, long *offset, int *num1, int *num2, int *rangemax, short *size); +void openfiles(char *fname); +void checkparams(int argc, char **argv); + + +VerseKey key1, key2, key3; +int fp, vfp, cfp, bfp; +long chapoffset; +short chapsize; +char testmnt; + + +main(int argc, char **argv) +{ + long pos, offset; + int num1 = 0, num2 = 0, rangemax, curbook = 0, curchap = 0, curverse = 0; + char buf[127], startflag = 0; + short size, tmp; + + checkparams(argc, argv); + + openfiles(argv[1]); + + testmnt = key1.Testament(); + pos = 0; + write(bfp, &pos, 4); /* Book offset for testament intros */ + pos = 4; + write(cfp, &pos, 4); /* Chapter offset for testament intro */ + + +/* Right now just zero out intros until parsing correctly */ + pos = 0; + size = 0; + write(vfp, &pos, 4); /* Module intro */ + write(vfp, &size, 2); + write(vfp, &pos, 4); /* Testament intro */ + write(vfp, &size, 2); + + while(!findbreak(fp, &offset, &num1, &num2, &rangemax, &size)) { + if (num2) { + key2.Verse(1); + key2.Chapter(num1); + key2.Verse(num2); + } + else { + key2.Verse(1); + if (!startflag) { + startflag = 1; + } + else { + if (num1 <= key2.Chapter()) { // new book + key2.Chapter(1); + key2.Book(key2.Book()+1); + } + } + key2.Chapter(num1); + printf("Found Chapter Break: %d ('%s')\n", num1, (const char *)key2); + chapoffset = offset; + chapsize = size; + continue; + } + + key3 = key2; + key3 += (rangemax - key3.Verse()); + + printf("Found verse Break: ('%s')\n", (const char *)key2); + writeidx(key1, key2, key3, offset, size); + } + close(vfp); + close(cfp); + close(bfp); + close(fp); +} + + +/************************************************************************** + * ENT: key1 - current location of index + * key2 - minimum keyval for which this offset is valid + * key3 - maximum keyval for which this offset is valid + */ + +void writeidx(VerseKey &key1, VerseKey &key2, VerseKey &key3, long offset, short size) +{ + long pos; + short tmp; + + for (; ((key1 <= key3) && (key1.Error() != KEYERR_OUTOFBOUNDS) && (key1.Testament() == testmnt)); key1+=1) { + if (key1.Verse() == 1) { // new chapter + if (key1.Chapter() == 1) { // new book + pos = lseek(cfp, 0, SEEK_CUR); + write(bfp, &pos, 4); + pos = lseek(vfp, 0, SEEK_CUR); /* Book intro (cps) */ + write(cfp, &pos, 4); + write(vfp, &chapoffset, 4); /* Book intro (vss) set to same as chap for now(it should be chap 1 which usually contains the book into anyway)*/ + write(vfp, &chapsize, 2); + } + pos = lseek(vfp, 0, SEEK_CUR); + write(cfp, &pos, 4); + write(vfp, &chapoffset, 4); /* Chapter intro */ + write(vfp, &chapsize, 2); + } + if (key1 >= key2) { + write(vfp, &offset, 4); + write(vfp, &size, 2); + } + else { + pos = 0; + tmp = 0; + write(vfp, &pos, 4); + write(vfp, &tmp, 2); + } + } +} + + +char startentry(char *buf) +{ + char colon = 0; + + if (buf[0] != 10) + return 0; + if (buf[1] != 10) + return 0; + if (!isdigit(buf[2])) + return 0; + if (!isdigit(buf[3])) { + if (buf[3]!= ':') + return 0; + else colon++; + } + if (!isdigit(buf[4])) { + if (buf[4]!= ':') + return 0; + else colon++; + } + if (colon != 1) + return 0; + return 1; +} + + +char findbreak(int fp, long *offset, int *num1, int *num2, int *rangemax, short *size) +{ + char buf[7]; + char buf2[20]; + char ch; + char loop; + long offset2; + int ch2, vs2, rm2; + + memset(buf, ' ', 7); + + while (1) { + if (startentry(buf)) { + buf[0] = ' '; + buf[1] = ' '; + sscanf(buf, "%d:%d", num1, num2); + *rangemax = *num2; + *offset = lseek(fp, 0, SEEK_CUR) - 5; + if (size) { + if (findbreak(fp, &offset2, &ch2, &vs2, &rm2, 0)) { + *size = (short) (lseek(fp, 0, SEEK_END) - (*offset)); + } + else { + if (vs2) { + *size = (offset2 - (*offset)) - 2; + } + else { + *size = (offset2 - (*offset)) - 6; + } + } + lseek(fp, *offset, SEEK_SET); + } + return 0; + } + + if (!strncmp(buf, "$-$-$-", 6)) { + *offset = lseek(fp, 0, SEEK_CUR) - 1; + *num2 = 0; + (*num1)++; + printf("Book marker: %s\n", buf2); + if (size) { + if (findbreak(fp, &offset2, &ch2, &vs2, &rm2, 0)) { + *size = (short) (lseek(fp, 0, SEEK_END) - (*offset)); + } + else { + if (vs2) { + *size = (offset2 - (*offset)) - 2; + } + else { + *size = (offset2 - (*offset)) - 6; + } + } + lseek(fp, *offset, SEEK_SET); + } + return 0; + } + + + memmove(buf, &buf[1], 6); + if (read(fp, &buf[6], 1) != 1) + return 1; + } +} + + +void openfiles(char *fname) +{ + char buf[255]; + + if ((fp = open(fname, O_RDONLY)) == -1) { + fprintf(stderr, "Couldn't open file: %s\n", fname); + exit(1); + } + + sprintf(buf, "%s.vss", fname); + if ((vfp = open(buf, O_CREAT|O_WRONLY)) == -1) { + fprintf(stderr, "Couldn't open file: %s\n", buf); + exit(1); + } + + sprintf(buf, "%s.cps", fname); + if ((cfp = open(buf, O_CREAT|O_WRONLY)) == -1) { + fprintf(stderr, "Couldn't open file: %s\n", buf); + exit(1); + } + + sprintf(buf, "%s.bks", fname); + if ((bfp = open(buf, O_CREAT|O_WRONLY)) == -1) { + fprintf(stderr, "Couldn't open file: %s\n", buf); + exit(1); + } +} + + +void checkparams(int argc, char **argv) +{ + if (argc < 2) { + fprintf(stderr, "usage: %s [nt - for new testmt file]\n", argv[0]); + exit(1); + } + if (argc == 3) + key1 = key2 = key3 = "Matthew 1:1"; + else key1 = key2 = key3 = "Genesis 1:1"; +} diff --git a/src/modules/comments/rawfiles/Makefile b/src/modules/comments/rawfiles/Makefile new file mode 100644 index 0000000..35d6648 --- /dev/null +++ b/src/modules/comments/rawfiles/Makefile @@ -0,0 +1,5 @@ + +root := ../../../.. + +all: + make -C ${root} diff --git a/src/modules/comments/rawfiles/Makefile.am b/src/modules/comments/rawfiles/Makefile.am new file mode 100644 index 0000000..53aadbe --- /dev/null +++ b/src/modules/comments/rawfiles/Makefile.am @@ -0,0 +1,3 @@ +rawfilesdir = $(top_srcdir)/src/modules/comments/rawfiles + +libsword_la_SOURCES += $(rawfilesdir)/rawfiles.cpp diff --git a/src/modules/comments/rawfiles/rawfiles.cpp b/src/modules/comments/rawfiles/rawfiles.cpp new file mode 100644 index 0000000..c8e9388 --- /dev/null +++ b/src/modules/comments/rawfiles/rawfiles.cpp @@ -0,0 +1,291 @@ +/****************************************************************************** + * rawfiles.cpp - code for class 'RawFiles'- a module that produces HTML HREFs + * pointing to actual text desired. Uses standard + * files: ot and nt using indexs ??.bks ??.cps ??.vss + */ + + +#include +#include +#include + +#ifndef __GNUC__ +#include +#else +#include +#endif + +#include +#include +#include +#include +#include + +#ifndef O_BINARY // O_BINARY is needed in Borland C++ 4.53 +#define O_BINARY 0 // If it hasn't been defined than we probably +#endif // don't need it. + + + /****************************************************************************** + * RawFiles Constructor - Initializes data for instance of RawFiles + * + * ENT: iname - Internal name for module + * idesc - Name to display to user for module + * idisp - Display object to use for displaying + */ + +RawFiles::RawFiles(const char *ipath, const char *iname, const char *idesc, SWDisplay *idisp, SWTextEncoding enc, SWTextDirection dir, SWTextMarkup mark, const char* ilang) : RawVerse(ipath, O_RDWR), SWCom(iname, idesc, idisp, enc, dir, mark, ilang) +{ +} + + +/****************************************************************************** + * RawFiles Destructor - Cleans up instance of RawFiles + */ + +RawFiles::~RawFiles() +{ +} + + +/****************************************************************************** + * RawFiles::operator char * - Returns the correct verse when char * cast + * is requested + * + * RET: string buffer with verse + */ + +char *RawFiles::getRawEntry() { + FileDesc *datafile; + long start = 0; + unsigned short size = 0; + char *tmpbuf; + VerseKey *key = 0; + +#ifndef _WIN32_WCE + try { +#endif + key = SWDYNAMIC_CAST(VerseKey, this->key); +#ifndef _WIN32_WCE + } + catch ( ... ) {} +#endif + if (!key) + key = new VerseKey(this->key); + + findoffset(key->Testament(), key->Index(), &start, &size); + + if (entrybuf) + delete [] entrybuf; + + if (size) { + tmpbuf = new char [ (size + 2) + strlen(path) + 5 ]; + sprintf(tmpbuf,"%s/",path); + gettext(key->Testament(), start, (size + 2), tmpbuf+strlen(tmpbuf)); + datafile = FileMgr::systemFileMgr.open(tmpbuf, O_RDONLY|O_BINARY); + delete [] tmpbuf; + if (datafile->getFd() > 0) { + size = lseek(datafile->getFd(), 0, SEEK_END); + entrybuf = new char [ size * FILTERPAD ]; + memset(entrybuf, 0, size * FILTERPAD); + lseek(datafile->getFd(), 0, SEEK_SET); + read(datafile->getFd(), entrybuf, size); + preptext(entrybuf); + } + else { + entrybuf = new char [2]; + entrybuf[0] = 0; + entrybuf[1] = 0; + } + FileMgr::systemFileMgr.close(datafile); + } + else { + entrybuf = new char [2]; + entrybuf[0] = 0; + entrybuf[1] = 0; + } + + if (key != this->key) + delete key; + + return entrybuf; +} + + +/****************************************************************************** + * RawFiles::operator << (char *)- Update the modules current key entry with + * provided text + * + * RET: *this + */ + +SWModule &RawFiles::operator <<(const char *inbuf) { + FileDesc *datafile; + long start; + unsigned short size; + char *tmpbuf; + VerseKey *key = 0; + +#ifndef _WIN32_WCE + try { +#endif + key = SWDYNAMIC_CAST(VerseKey, this->key); +#ifndef _WIN32_WCE + } + catch ( ... ) {} +#endif + if (!key) + key = new VerseKey(this->key); + + findoffset(key->Testament(), key->Index(), &start, &size); + + if (size) { + tmpbuf = new char [ (size + 2) + strlen(path) + 1 ]; + sprintf(tmpbuf, "%s/", path); + gettext(key->Testament(), start, (size + 2), tmpbuf+strlen(tmpbuf)); + } + else { + tmpbuf = new char [ 16 + strlen(path) + 1 ]; + sprintf(tmpbuf, "%s/%s", path, getnextfilename()); + settext(key->Testament(), key->Index(), tmpbuf+strlen(path)+1); + } + datafile = FileMgr::systemFileMgr.open(tmpbuf, O_CREAT|O_WRONLY|O_BINARY|O_TRUNC); + delete [] tmpbuf; + if (datafile->getFd() > 0) { + write(datafile->getFd(), inbuf, strlen(inbuf)); + } + FileMgr::systemFileMgr.close(datafile); + + if (key != this->key) + delete key; + + return *this; +} + + +/****************************************************************************** + * RawFiles::operator << (SWKey *)- Link the modules current key entry with + * another module entry + * + * RET: *this + */ + +SWModule &RawFiles::operator <<(const SWKey *inkey) { + + long start; + unsigned short size; + char *tmpbuf; + const VerseKey *key = 0; + +#ifndef _WIN32_WCE + try { +#endif + key = SWDYNAMIC_CAST(VerseKey, inkey); +#ifndef _WIN32_WCE + } + catch ( ... ) {} +#endif + if (!key) + key = new VerseKey(this->key); + + findoffset(key->Testament(), key->Index(), &start, &size); + + if (size) { + tmpbuf = new char [ size + 2]; + gettext(key->Testament(), start, size + 2, tmpbuf); + + if (key != inkey) + delete key; + key = 0; + +#ifndef _WIN32_WCE + try { +#endif + key = SWDYNAMIC_CAST(VerseKey, inkey); +#ifndef _WIN32_WCE + } + catch ( ... ) {} +#endif + if (!key) + key = new VerseKey(this->key); + settext(key->Testament(), key->Index(), tmpbuf); + } + + if (key != inkey) + delete key; + + return *this; +} + + +/****************************************************************************** + * RawFiles::deleteEntry - deletes this entry + * + * RET: *this + */ + +void RawFiles::deleteEntry() { + + VerseKey *key = 0; + +#ifndef _WIN32_WCE + try { +#endif + key = SWDYNAMIC_CAST(VerseKey, this->key); +#ifndef _WIN32_WCE + } + catch ( ... ) {} +#endif + if (!key) + key = new VerseKey(this->key); + + settext(key->Testament(), key->Index(), ""); + + if (key != this->key) + delete key; +} + + +/****************************************************************************** + * RawFiles::getnextfilename - generates a valid filename in which to store + * an entry + * + * RET: filename + */ + +char *RawFiles::getnextfilename() { + static char incfile[255]; + long number; + FileDesc *datafile; + + sprintf(incfile, "%s/incfile", path); + datafile = FileMgr::systemFileMgr.open(incfile, O_RDONLY|O_BINARY); + if (read(datafile->getFd(), &number, 4) != 4) + number = 0; + number++; + FileMgr::systemFileMgr.close(datafile); + + datafile = FileMgr::systemFileMgr.open(incfile, O_CREAT|O_WRONLY|O_BINARY|O_TRUNC); + write(datafile->getFd(), &number, 4); + FileMgr::systemFileMgr.close(datafile); + sprintf(incfile, "%.7ld", number-1); + return incfile; +} + + +char RawFiles::createModule (const char *path) { + char *incfile = new char [ strlen (path) + 16 ]; + static long zero = 0; + FileDesc *datafile; + + sprintf(incfile, "%s/incfile", path); + datafile = FileMgr::systemFileMgr.open(incfile, O_CREAT|O_WRONLY|O_BINARY|O_TRUNC); + delete [] incfile; + write(datafile->getFd(), &zero, 4); + FileMgr::systemFileMgr.close(datafile); + + return RawVerse::createModule (path); +} + + + diff --git a/src/modules/comments/rawfiles/rawfilesgen.cpp b/src/modules/comments/rawfiles/rawfilesgen.cpp new file mode 100644 index 0000000..f60c9e2 --- /dev/null +++ b/src/modules/comments/rawfiles/rawfilesgen.cpp @@ -0,0 +1,236 @@ +/***************************************************************************** + * + */ + +#include +#include +#include +#include + +#ifndef __GNUC__ +#include +#else +#include +#endif + +#include +#include + +#ifndef O_BINARY + #define O_BINARY 0 +#endif + +void writeidx(VerseKey &key1, VerseKey &key2, VerseKey &key3, long offset, short size); +char findbreak(int fp, long *offset, int *num1, int *num2, int *rangemax, short *size); +void openfiles(); +void checkparams(int argc, char **argv); +void charsetconvert(char *data); + + +VerseKey key1, key2, key3; +int fp, vfp, cfp, bfp; +long chapoffset; +short chapsize; +char testmnt; +char startflag = 0; + + +main(int argc, char **argv) +{ + long pos, offset; + int num1, num2, rangemax, curbook = 0, curchap = 0, curverse = 0; + char buf[127]; + short size, tmp; + extern struct zonline online; + + checkparams(argc, argv); + + key1 = key2 = key3 = "Genesis 1:1"; + + openfiles(); + + num1 = key1.Chapter(); + num2 = key1.Verse(); + + while(!findbreak(fp, &offset, &num1, &num2, &rangemax, &size)) { + if (!startflag) { + startflag = 1; + } + else { + if (num2 < key2.Verse()) { // new chapter + if (num1 <= key2.Chapter()) { // new book + key2.Verse(1); + key2.Chapter(1); + key2.Book(key2.Book()+1); + } + printf("Created Empty Entry: %d ('%s')\n", num1, (const char *)key2); + chapoffset = offset; + chapsize = size; +// continue; + } + } + key2.Verse(1); + key2.Chapter(num1); + key2.Verse(num2); + + key3 = key2; +// key3 += (rangemax - key3.Verse()); + + writeidx(key1, key2, key3, offset, size); + } + close(vfp); + close(cfp); + close(bfp); + close(fp); +} + + +/************************************************************************** + * ENT: key1 - current location of index + * key2 - minimum keyval for which this offset is valid + * key3 - maximum keyval for which this offset is valid + */ + +void writeidx(VerseKey &key1, VerseKey &key2, VerseKey &key3, long offset, short size) +{ + long pos; + short tmp; + + if (key1.Verse() == 1) { // new chapter + if (key1.Chapter() == 1) { // new book + pos = lseek(cfp, 0, SEEK_CUR); + write(bfp, &pos, 4); + pos = lseek(vfp, 0, SEEK_CUR); /* Book intro (cps) */ + write(cfp, &pos, 4); + write(vfp, &chapoffset, 4); /* Book intro (vss) set to same as chap for now(it should be chap 1 which usually contains the book into anyway)*/ + write(vfp, &chapsize, 2); + } + pos = lseek(vfp, 0, SEEK_CUR); + write(cfp, &pos, 4); + write(vfp, &chapoffset, 4); /* Chapter intro */ + write(vfp, &chapsize, 2); + } + if (key1 >= key2) { + write(vfp, &offset, 4); + size = 0; + write(vfp, &size, 2); + } + else { + pos = 0; + tmp = 0; + write(vfp, &pos, 4); + write(vfp, &tmp, 2); + } + key1++; +} + + +static VerseKey inckey = "Genesis 1:1"; + +char findbreak(int fp, long *offset, int *num1, int *num2, int *rangemax, short *size) +{ + char buf[7]; + char buf2[20]; + char ch; + char loop; + long offset2; + int ch2, vs2, rm2; + bool flag; + long chapstart = 0; + static int olbvnum = 0; + char data[16]; + + memset (data,0,16); + + if (++olbvnum <= 31102) { + + if (olbvnum == 23146) { // "Matthew 1:1" + close(vfp); + close(cfp); + close(bfp); + close(fp); + key1 = key2 = key3 = inckey = "Matthew 1:1"; + openfiles(); + startflag = 0; + } + + + *offset = lseek(fp, 0, SEEK_CUR); + + if ((olbvnum!=1) && (olbvnum != 23146)) + inckey++; + + *num1 = inckey.Chapter(); + *num2 = inckey.Verse(); + + + write(fp, data, 16); + + *size = lseek(fp, 0, SEEK_CUR) - *offset; + return 0; + } + return 1; +} + + +void openfiles() +{ + char buf[255]; + char fname[5]; + long pos; + short size; + + testmnt = key1.Testament(); + + strcpy(fname, (testmnt==2) ? "nt" : "ot"); + unlink(fname); + if ((fp = open(fname, O_CREAT|O_RDWR|O_BINARY)) == -1) { + fprintf(stderr, "Couldn't open file: %s\n", fname); + exit(1); + } + + sprintf(buf, "%s.vss", fname); + unlink(buf); + if ((vfp = open(buf, O_CREAT|O_WRONLY|O_BINARY)) == -1) { + fprintf(stderr, "Couldn't open file: %s\n", buf); + exit(1); + } + + sprintf(buf, "%s.cps", fname); + unlink(buf); + if ((cfp = open(buf, O_CREAT|O_WRONLY|O_BINARY)) == -1) { + fprintf(stderr, "Couldn't open file: %s\n", buf); + exit(1); + } + + sprintf(buf, "%s.bks", fname); + unlink(buf); + if ((bfp = open(buf, O_CREAT|O_WRONLY|O_BINARY)) == -1) { + fprintf(stderr, "Couldn't open file: %s\n", buf); + exit(1); + } + + pos = 0; + write(bfp, &pos, 4); /* Book offset for testament intros */ + pos = 4; + write(cfp, &pos, 4); /* Chapter offset for testament intro */ + + +/* Right now just zero out intros until parsing correctly */ + pos = 0; + size = 0; + write(vfp, &pos, 4); /* Module intro */ + write(vfp, &size, 2); + write(vfp, &pos, 4); /* Testament intro */ + write(vfp, &size, 2); + +} + + +void checkparams(int argc, char **argv) +{ + if (argc !=1) { + fprintf(stderr, "usage: %s\n", argv[0]); + exit(1); + } +} diff --git a/src/modules/comments/swcom.cpp b/src/modules/comments/swcom.cpp new file mode 100644 index 0000000..1feb0cf --- /dev/null +++ b/src/modules/comments/swcom.cpp @@ -0,0 +1,30 @@ +/****************************************************************************** + * swcom.cpp - code for base class 'SWCom'- The basis for all commentary + * modules + */ + +#include + + +/****************************************************************************** + * SWCom Constructor - Initializes data for instance of SWCom + * + * ENT: imodname - Internal name for module + * imoddesc - Name to display to user for module + * idisp - Display object to use for displaying + */ + +SWCom::SWCom(const char *imodname, const char *imoddesc, SWDisplay *idisp, SWTextEncoding enc, SWTextDirection dir, SWTextMarkup mark, const char* ilang): SWModule(imodname, imoddesc, idisp, "Commentaries", enc, dir, mark, ilang) +{ + delete key; + key = CreateKey(); +} + + +/****************************************************************************** + * SWCom Destructor - Cleans up instance of SWCom + */ + +SWCom::~SWCom() +{ +} diff --git a/src/modules/comments/zcom/Makefile b/src/modules/comments/zcom/Makefile new file mode 100644 index 0000000..35d6648 --- /dev/null +++ b/src/modules/comments/zcom/Makefile @@ -0,0 +1,5 @@ + +root := ../../../.. + +all: + make -C ${root} diff --git a/src/modules/comments/zcom/Makefile.am b/src/modules/comments/zcom/Makefile.am new file mode 100644 index 0000000..ad1ef6a --- /dev/null +++ b/src/modules/comments/zcom/Makefile.am @@ -0,0 +1,4 @@ +zcomdir = $(top_srcdir)/src/modules/comments/zcom + +libsword_la_SOURCES += $(zcomdir)/zcom.cpp + diff --git a/src/modules/comments/zcom/makeidx.c b/src/modules/comments/zcom/makeidx.c new file mode 100644 index 0000000..311103e --- /dev/null +++ b/src/modules/comments/zcom/makeidx.c @@ -0,0 +1,146 @@ +#include +#include + + +char findbreak(int fp, int *offset, int *num1, int *num2, short *size); + + +main(int argc, char **argv) +{ + int fp, vfp, cfp, bfp; + long pos; + short size, tmp; + int num1, num2, offset, curbook = 0, curchap = 0, curverse = 0; + char buf[127]; + + if (argc != 2) { + fprintf(stderr, "usage: %s \n", argv[0]); + exit(1); + } + + if ((fp = open(argv[1], O_RDONLY)) == -1) { + fprintf(stderr, "Couldn't open file: %s\n", argv[1]); + exit(1); + } + + sprintf(buf, "%s.vss", argv[1]); + if ((vfp = open(buf, O_CREAT|O_WRONLY)) == -1) { + fprintf(stderr, "Couldn't open file: %s\n", buf); + exit(1); + } + + sprintf(buf, "%s.cps", argv[1]); + if ((cfp = open(buf, O_CREAT|O_WRONLY)) == -1) { + fprintf(stderr, "Couldn't open file: %s\n", buf); + exit(1); + } + + sprintf(buf, "%s.bks", argv[1]); + if ((bfp = open(buf, O_CREAT|O_WRONLY)) == -1) { + fprintf(stderr, "Couldn't open file: %s\n", buf); + exit(1); + } + + pos = 0; + write(bfp, &pos, 4); /* Book offset for testament intros */ + pos = 4; + write(cfp, &pos, 4); /* Chapter offset for testament intro */ + + +/* Right now just zero out intros until parsing correctly */ + pos = 0; + size = 0; + write(vfp, &pos, 4); /* Module intro */ + write(vfp, &size, 2); + write(vfp, &pos, 4); /* Testament intro */ + write(vfp, &size, 2); + + while (!findbreak(fp, &offset, &num1, &num2, &size)) { + + if (num2 == 1) { /* if we're at a new chapter */ + if (num1 == 1) { /* if we're at a new book */ + pos = lseek(cfp, 0, SEEK_CUR); + write(bfp, &pos, 4); + pos = lseek(vfp, 0, SEEK_CUR); /* Book intro (cps) */ + write(cfp, &pos, 4); + pos = 0; + tmp = 0; + write(vfp, &pos, 4); /* Book intro (vss) */ + write(vfp, &tmp, 2); + curbook++; + curchap = 0; + } + pos = lseek(vfp, 0, SEEK_CUR); + write(cfp, &pos, 4); + curverse = 1; + pos = 0; + tmp = 0; + write(vfp, &pos, 4); /* Chapter intro */ + write(vfp, &tmp, 2); + curchap++; + } + else curverse++; + + printf("%2d:%3d:%3d found at offset: %7d\n", curbook, num1, num2, offset); + + if (num1 != curchap) { + fprintf(stderr, "Error: Found chaptures out of sequence\n", buf); + break; + } + if (num2 != curverse) { + fprintf(stderr, "Error: Found verses out of sequence\n", buf); + break; + } + write(vfp, &offset, 4); + write(vfp, &size, 2); + } + + close(vfp); + close(cfp); + close(bfp); + close(fp); +} + + +char findbreak(int fp, int *offset, int *num1, int *num2, short *size) +{ + char buf[7]; + char buf2[7]; + char loop; + int offset2, ch2, vs2; + + memset(buf, ' ', 7); + + while (1) { + if (buf[3] == ':') { + memcpy(buf2, buf, 7); + for (loop = 0; loop < 7; loop++) { + if (!isdigit(buf2[loop])) + buf2[loop] = ' '; + } + buf2[3] = 0; + *num1 = atoi(buf2); + *num2 = atoi(&buf2[4]); + if (*num1 && *num2) { + *offset = lseek(fp, 0, SEEK_CUR); + sprintf(buf2, "%d", *num2); + *offset -= 2 - strlen(buf2); + if (size) { + if (findbreak(fp, &offset2, &ch2, &vs2, 0)) { + *size = (short) (lseek(fp, 0, SEEK_END) - (*offset)); + } + else { + sprintf(buf2, "%d:%d", ch2, vs2); + *size = (offset2 - (*offset)) - (strlen(buf2) + 2); + } + lseek(fp, *offset, SEEK_SET); + } + return 0; + } + } + memmove(buf, &buf[1], 6); + if (read(fp, &buf[6], 1) != 1) + return 1; + } +} + diff --git a/src/modules/comments/zcom/rawtxt2z.cpp b/src/modules/comments/zcom/rawtxt2z.cpp new file mode 100644 index 0000000..f8c18d0 --- /dev/null +++ b/src/modules/comments/zcom/rawtxt2z.cpp @@ -0,0 +1,83 @@ +#include +#include +#include +#include +#include + +#ifndef __GNUC__ +#include +#else +#include +#endif + +#include + + +main(int argc, char **argv) +{ + SWCompress *zobj; + int ifd, ofd, ixfd, oxfd; + long offset, loffset, lzoffset; + short size, lsize, lzsize; + char *tmpbuf; + + if (argc != 2) { + fprintf(stderr, "usage: %s \n", argv[0]); + exit(1); + } + + zobj = new SWCompress(); + +#ifndef O_BINARY +#define O_BINARY 0 +#endif + + tmpbuf = new char [ strlen(argv[1]) + 9 ]; + ifd = open(argv[1], O_RDONLY|O_BINARY); + sprintf(tmpbuf, "%s.vss", argv[1]); + ixfd = open(tmpbuf, O_RDONLY|O_BINARY); + sprintf(tmpbuf, "%s.zzz", argv[1]); + ofd = open(tmpbuf, O_WRONLY|O_BINARY|O_CREAT); + sprintf(tmpbuf, "%s.zzz.vss", argv[1]); + oxfd = open(tmpbuf, O_WRONLY|O_BINARY|O_CREAT); + + delete [] tmpbuf; + + printf("\n"); + + while (1) { + if (read(ixfd, &offset, 4) != 4) + break; + if (read(ixfd, &size, 2) != 2) + break; + + if ((offset == loffset) && (size == lsize)) { + printf("using previous offset,size\n", size); + write(oxfd, &lzoffset, 4); + write(oxfd, &lzsize, 2); + } + else { + printf("%d -> ", size); + lsize = size; + loffset = offset; + + if (size) { + tmpbuf = (char *) calloc(size + 1, 1); + lseek(ifd, offset, SEEK_SET); + read(ifd, tmpbuf, size); + zobj->Buf(tmpbuf); + zobj->zBuf(&size); + free(tmpbuf); + } + offset = lseek(ofd, 0, SEEK_END); + write(oxfd, &offset, 4); + if (size) + write(ofd, zobj->zBuf(&size), size); + lzoffset = offset; + write(oxfd, &size, 2); + lzsize = size; + printf("%d \n", size); + } + } + delete zobj; +} diff --git a/src/modules/comments/zcom/zcom.cpp b/src/modules/comments/zcom/zcom.cpp new file mode 100644 index 0000000..c53fd1e --- /dev/null +++ b/src/modules/comments/zcom/zcom.cpp @@ -0,0 +1,290 @@ +/****************************************************************************** + * rawcom.cpp - code for class 'zCom'- a module that reads raw commentary + * files: ot and nt using indexs ??.bks ??.cps ??.vss + */ + + +#include +#include +#include + +#ifndef __GNUC__ +#include +#else +#include +#endif + +#include +#include +#include +#include + + +/****************************************************************************** + * zCom Constructor - Initializes data for instance of zCom + * + * ENT: ipath - path to data files + * iname - Internal name for module + * idesc - Name to display to user for module + * iblockType - verse, chapter, book, etc. of index chunks + * icomp - Compressor object + * idisp - Display object to use for displaying + */ + +zCom::zCom(const char *ipath, const char *iname, const char *idesc, int iblockType, SWCompress *icomp, SWDisplay *idisp, SWTextEncoding enc, SWTextDirection dir, SWTextMarkup mark, const char* ilang) : zVerse(ipath, -1, iblockType, icomp), SWCom(iname, idesc, idisp, enc, dir, mark, ilang)/*, SWCompress()*/ +{ + blockType = iblockType; + lastWriteKey = 0; +} + +/****************************************************************************** + * zCom Destructor - Cleans up instance of zCom + */ + +zCom::~zCom() { + flushCache(); + + if (lastWriteKey) + delete lastWriteKey; +} + +/****************************************************************************** + * zCom::operator char * - Returns the correct verse when char * cast + * is requested + * + * RET: string buffer with verse + */ +char *zCom::getRawEntry() { + long start = 0; + unsigned short size = 0; + VerseKey *key = 0; + +#ifndef _WIN32_WCE + try { +#endif + key = SWDYNAMIC_CAST(VerseKey, this->key); +#ifndef _WIN32_WCE + } + catch ( ... ) {} +#endif + // if we don't have a VerseKey * decendant, create our own + if (!key) + key = new VerseKey(this->key); + + findoffset(key->Testament(), key->Index(), &start, &size); + entrySize = size; // support getEntrySize call + + unsigned long newsize = (size + 2) * FILTERPAD; + if (newsize > entrybufallocsize) { + if (entrybuf) + delete [] entrybuf; + entrybuf = new char [ newsize ]; + entrybufallocsize = newsize; + } + *entrybuf = 0; + + swgettext(key->Testament(), start, (size + 2), entrybuf); + + rawFilter(entrybuf, size, key); + + if (!isUnicode()) + preptext(entrybuf); + + if (this->key != key) // free our key if we created a VerseKey + delete key; + + return entrybuf; +} + + +bool zCom::sameBlock(VerseKey *k1, VerseKey *k2) { + if (k1->Testament() != k2->Testament()) + return false; + + switch (blockType) { + case VERSEBLOCKS: + if (k1->Verse() != k2->Verse()) + return false; + case CHAPTERBLOCKS: + if (k1->Chapter() != k2->Chapter()) + return false; + case BOOKBLOCKS: + if (k1->Book() != k2->Book()) + return false; + } + return true; +} + +SWModule &zCom::setentry(const char *inbuf, long len) { + VerseKey *key = 0; + // see if we have a VerseKey * or decendant +#ifndef _WIN32_WCE + try { +#endif + key = SWDYNAMIC_CAST(VerseKey, this->key); +#ifndef _WIN32_WCE + } + catch ( ... ) {} +#endif + // if we don't have a VerseKey * decendant, create our own + if (!key) + key = new VerseKey(this->key); + + + // see if we've jumped across blocks since last write + if (lastWriteKey) { + if (!sameBlock(lastWriteKey, key)) { + flushCache(); + } + delete lastWriteKey; + } + + settext(key->Testament(), key->Index(), inbuf, len); + + lastWriteKey = (VerseKey *)key->clone(); // must delete + + if (this->key != key) // free our key if we created a VerseKey + delete key; + + return *this; +} + +SWModule &zCom::operator <<(const char *inbuf) { + return setentry(inbuf, 0); +} + + +SWModule &zCom::operator <<(const SWKey *inkey) { + VerseKey *destkey = 0; + const VerseKey *srckey = 0; + // see if we have a VerseKey * or decendant +#ifndef _WIN32_WCE + try { +#endif + destkey = SWDYNAMIC_CAST(VerseKey, this->key); +#ifndef _WIN32_WCE + } + catch ( ... ) {} +#endif + // if we don't have a VerseKey * decendant, create our own + if (!destkey) + destkey = new VerseKey(this->key); + + // see if we have a VerseKey * or decendant +#ifndef _WIN32_WCE + try { +#endif + srckey = (const VerseKey *) SWDYNAMIC_CAST(VerseKey, inkey); +#ifndef _WIN32_WCE + } + catch ( ... ) { + } +#endif + // if we don't have a VerseKey * decendant, create our own + if (!srckey) + srckey = new VerseKey(inkey); + + linkentry(destkey->Testament(), destkey->Index(), srckey->Index()); + + if (this->key != destkey) // free our key if we created a VerseKey + delete destkey; + + if (inkey != srckey) // free our key if we created a VerseKey + delete srckey; + + return *this; +} + +/****************************************************************************** + * zCom::deleteEntry - deletes this entry + * + * RET: *this + */ + +void zCom::deleteEntry() { + + VerseKey *key = 0; + +#ifndef _WIN32_WCE + try { +#endif + key = SWDYNAMIC_CAST(VerseKey, this->key); +#ifndef _WIN32_WCE + } + catch ( ... ) {} +#endif + if (!key) + key = new VerseKey(this->key); + + settext(key->Testament(), key->Index(), ""); + + if (key != this->key) + delete key; +} + + +/****************************************************************************** + * zCom::operator += - Increments module key a number of entries + * + * ENT: increment - Number of entries to jump forward + * + * RET: *this + */ + +SWModule &zCom::operator +=(int increment) { + long start; + unsigned short size; + VerseKey *tmpkey = 0; + +#ifndef _WIN32_WCE + try { +#endif + tmpkey = SWDYNAMIC_CAST(VerseKey, key); +#ifndef _WIN32_WCE + } + catch ( ... ) {} +#endif + if (!tmpkey) + tmpkey = new VerseKey(key); + + findoffset(tmpkey->Testament(), tmpkey->Index(), &start, &size); + + SWKey lastgood = *tmpkey; + while (increment) { + long laststart = start; + unsigned short lastsize = size; + SWKey lasttry = *tmpkey; + (increment > 0) ? (*key)++ : (*key)--; + if (tmpkey != key) + delete tmpkey; + tmpkey = 0; +#ifndef _WIN32_WCE + try { +#endif + tmpkey = SWDYNAMIC_CAST(VerseKey, key); +#ifndef _WIN32_WCE + } + catch ( ... ) {} +#endif + if (!tmpkey) + tmpkey = new VerseKey(key); + + if ((error = key->Error())) { + *key = lastgood; + break; + } + long index = tmpkey->Index(); + findoffset(tmpkey->Testament(), index, &start, &size); + if ((((laststart != start) || (lastsize != size))||(!skipConsecutiveLinks)) && (start >= 0) && (size)) { + increment += (increment < 0) ? 1 : -1; + lastgood = *tmpkey; + } + } + error = (error) ? KEYERR_OUTOFBOUNDS : 0; + + if (tmpkey != key) + delete tmpkey; + + return *this; +} + diff --git a/src/modules/common/Makefile b/src/modules/common/Makefile new file mode 100644 index 0000000..81f7721 --- /dev/null +++ b/src/modules/common/Makefile @@ -0,0 +1,4 @@ +root := ../../.. + +all: + make -C ${root} diff --git a/src/modules/common/Makefile.am b/src/modules/common/Makefile.am new file mode 100644 index 0000000..ac235d8 --- /dev/null +++ b/src/modules/common/Makefile.am @@ -0,0 +1,22 @@ +commondir = $(top_srcdir)/src/modules/common + +libsword_la_SOURCES += $(commondir)/rawstr.cpp +libsword_la_SOURCES += $(commondir)/rawstr4.cpp +libsword_la_SOURCES += $(commondir)/swcomprs.cpp +libsword_la_SOURCES += $(commondir)/lzsscomprs.cpp + +if ZLIB +SWZLIB = $(commondir)/zipcomprs.cpp +else +SWZLIB = +endif +libsword_la_SOURCES += $(SWZLIB) +libsword_la_SOURCES += $(commondir)/rawverse.cpp +libsword_la_SOURCES += $(commondir)/swcipher.cpp +libsword_la_SOURCES += $(commondir)/zverse.cpp +libsword_la_SOURCES += $(commondir)/zstr.cpp +libsword_la_SOURCES += $(commondir)/entriesblk.cpp + +DEFS += -DUNIX +libsword_la_SOURCES += $(commondir)/sapphire.cpp + diff --git a/src/modules/common/compress.cpp b/src/modules/common/compress.cpp new file mode 100644 index 0000000..5031adb --- /dev/null +++ b/src/modules/common/compress.cpp @@ -0,0 +1,767 @@ +Compression Info, 10-11-95 +Jeff Wheeler + +Source of Algorithm +------------------- + +The compression algorithms used here are based upon the algorithms developed and published by Haruhiko Okumura in a paper entitled "Data Compression Algorithms of LARC and LHarc." This paper discusses three compression algorithms, LSZZ, LZARI, and LZHUF. LZSS is described as the "first" of these, and is described as providing moderate compression with good speed. LZARI is described as an improved LZSS, a combination of the LZSS algorithm with adaptive arithmetic compression. It is described as being slower than LZSS but with better compression. LZHUF (the basis of the common LHA compression program) was included in the paper, however, a free usage license was not included. + +The following are copies of the statements included at the beginning of each source code listing that was supplied in the working paper. + + LZSS, dated 4/6/89, marked as "Use, distribute and + modify this program freely." + + LZARI, dated 4/7/89, marked as "Use, distribute and + modify this program freely." + + LZHUF, dated 11/20/88, written by Haruyasu Yoshizaki, + translated by Haruhiko Okumura on 4/7/89. Not + expressly marked as redistributable or modifiable. + +Since both LZSS and LZARI are marked as "use, distribute and modify freely" we have felt at liberty basing our compression algorithm on either of these. + +Selection of Algorithm +---------------------- + +Working samples of three possible compression algorithms are supplied in Okumura's paper. Which should be used? + +LZSS is the fastest at decompression, but does not generated as small a compressed file as the other methods. The other two methods provided, perhaps, a 15% improvement in compression. Or, put another way, on a 100K file, LZSS might compress it to 50K while the others might approach 40-45K. For STEP purposes, it was decided that decoding speed was of more importance than tighter compression. For these reasons, the first compression algorithm implemented is the LZSS algorithm. + +About LZSS Encoding +------------------- + +(adapted from Haruhiko Okumura's paper) + +This scheme was proposed by Ziv and Lempel [1]. A slightly modified version is described by Storer and Szymanski [2]. An implementation using a binary tree has been proposed by Bell [3]. + +The algorithm is quite simple. +1. Keep a ring buffer which initially contains all space characters. +2. Read several letters from the file to the buffer. +3. Search the buffer for the longest string that matches the letters just read, and send its length and position into the buffer. + +If the ring buffer is 4096 bytes, the position can be stored in 12 bits. If the length is represented in 4 bits, the pair is two bytes long. If the longest match is no more than two characters, then just one character is sent without encoding. The process starts again with the next character. An extra bit is sent each time to tell the decoder whether the next item is a character of a pair. + +[1] J. Ziv and A. Lempel, IEEE Transactions IT-23, 337-343 (1977). +[2] J. A. Storer and T. G. Szymanski, J. ACM, 29, 928-951 (1982). +[3] T.C. Gell, IEEE Transactions COM-34, 1176-1182 (1986). + +void InitTree( // no return value + void); // no parameters + +void InsertNode( // no return value + short int Pos); // position in the buffer + +void DeleteNode( // no return value + short int Node); // node to be removed + +void Encode( // no return value + void); // no parameters + +void Decode( // no return value + void); // no parameters + +// The following are constant sizes used by the compression algorithm. +// +// N - This is the size of the ring buffer. It is set +// to 4K. It is important to note that a position +// within the ring buffer requires 12 bits. +// +// F - This is the maximum length of a character sequence +// that can be taken from the ring buffer. It is set +// to 18. Note that a length must be 3 before it is +// worthwhile to store a position/length pair, so the +// length can be encoded in only 4 bits. Or, put yet +// another way, it is not necessary to encode a length +// of 0-18, it is necessary to encode a length of +// 3-18, which requires 4 bits. +// +// THRESHOLD - It takes 2 bytes to store an offset and +// a length. If a character sequence only +// requires 1 or 2 characters to store +// uncompressed, then it is better to store +// it uncompressed than as an offset into +// the ring buffer. +// +// Note that the 12 bits used to store the position and the 4 bits +// used to store the length equal a total of 16 bits, or 2 bytes. + +#define N 4096 +#define F 18 +#define THRESHOLD 3 +#define NOT_USED N + +// m_ring_buffer is a text buffer. It contains "nodes" of +// uncompressed text that can be indexed by position. That is, +// a substring of the ring buffer can be indexed by a position +// and a length. When decoding, the compressed text may contain +// a position in the ring buffer and a count of the number of +// bytes from the ring buffer that are to be moved into the +// uncompressed buffer. +// +// This ring buffer is not maintained as part of the compressed +// text. Instead, it is reconstructed dynamically. That is, +// it starts out empty and gets built as the text is decompressed. +// +// The ring buffer contain N bytes, with an additional F - 1 bytes +// to facilitate string comparison. + +unsigned char m_ring_buffer[N + F - 1]; + +// m_match_position and m_match_length are set by InsertNode(). +// +// These variables indicate the position in the ring buffer +// and the number of characters at that position that match +// a given string. + +short int m_match_position; +short int m_match_length; + +// m_lson, m_rson, and m_dad are the Japanese way of referring to +// a tree structure. The dad is the parent and it has a right and +// left son (child). +// +// For i = 0 to N-1, m_rson[i] and m_lson[i] will be the right +// and left children of node i. +// +// For i = 0 to N-1, m_dad[i] is the parent of node i. +// +// For i = 0 to 255, rson[N + i + 1] is the root of the tree for +// strings that begin with the character i. Note that this requires +// one byte characters. +// +// These nodes store values of 0...(N-1). Memory requirements +// can be reduces by using 2-byte integers instead of full 4-byte +// integers (for 32-bit applications). Therefore, these are +// defined as "short ints." + +short int m_lson[N + 1]; +short int m_rson[N + 257]; +short int m_dad[N + 1]; + +/* + ------------------------------------------------------------------------- + cLZSS::InitTree + + This function initializes the tree nodes to "empty" states. + ------------------------------------------------------------------------- +*/ + +void cLZSS::InitTree( // no return value + void) // no parameters + throw() // exception list + + { + int i; + + // For i = 0 to N - 1, m_rson[i] and m_lson[i] will be the right + // and left children of node i. These nodes need not be + // initialized. However, for debugging purposes, it is nice to + // have them initialized. Since this is only used for compression + // (not decompression), I don't mind spending the time to do it. + // + // For the same range of i, m_dad[i] is the parent of node i. + // These are initialized to a known value that can represent + // a "not used" state. + + for (i = 0; i < N; i++) + { + m_lson[i] = NOT_USED; + m_rson[i] = NOT_USED; + m_dad[i] = NOT_USED; + } + + // For i = 0 to 255, m_rson[N + i + 1] is the root of the tree + // for strings that begin with the character i. This is why + // the right child array is larger than the left child array. + // These are also initialzied to a "not used" state. + // + // Note that there are 256 of these, one for each of the possible + // 256 characters. + + for (i = N + 1; i <= (N + 256); i++) + { + m_rson[i] = NOT_USED; + } + + // Done. + } + +/* + ------------------------------------------------------------------------- + cLZSS::InsertNode + + This function inserts a string from the ring buffer into one of + the trees. It loads the match position and length member variables + for the longest match. + + The string to be inserted is identified by the parameter Pos, + A full F bytes are inserted. So, m_ring_buffer[Pos ... Pos+F-1] + are inserted. + + If the matched length is exactly F, then an old node is removed + in favor of the new one (because the old one will be deleted + sooner). + + Note that Pos plays a dual role. It is used as both a position + in the ring buffer and also as a tree node. m_ring_buffer[Pos] + defines a character that is used to identify a tree node. + ------------------------------------------------------------------------- +*/ + +void cLZSS::InsertNode( // no return value + short int Pos) // position in the buffer + throw() // exception list + + { + short int i; + short int p; + int cmp; + unsigned char * key; + + ASSERT(Pos >= 0); + ASSERT(Pos < N); + + cmp = 1; + key = &(m_ring_buffer[Pos]); + + // The last 256 entries in m_rson contain the root nodes for + // strings that begin with a letter. Get an index for the + // first letter in this string. + + p = (short int) (N + 1 + key[0]); + + // Set the left and right tree nodes for this position to "not + // used." + + m_lson[Pos] = NOT_USED; + m_rson[Pos] = NOT_USED; + + // Haven't matched anything yet. + + m_match_length = 0; + + for ( ; ; ) + { + if (cmp >= 0) + { + if (m_rson[p] != NOT_USED) + { + p = m_rson[p]; + } + else + { + m_rson[p] = Pos; + m_dad[Pos] = p; + return; + } + } + else + { + if (m_lson[p] != NOT_USED) + { + p = m_lson[p]; + } + else + { + m_lson[p] = Pos; + m_dad[Pos] = p; + return; + } + } + + // Should we go to the right or the left to look for the + // next match? + + for (i = 1; i < F; i++) + { + cmp = key[i] - m_ring_buffer[p + i]; + if (cmp != 0) + break; + } + + if (i > m_match_length) + { + m_match_position = p; + m_match_length = i; + + if (i >= F) + break; + } + } + + m_dad[Pos] = m_dad[p]; + m_lson[Pos] = m_lson[p]; + m_rson[Pos] = m_rson[p]; + + m_dad[ m_lson[p] ] = Pos; + m_dad[ m_rson[p] ] = Pos; + + if (m_rson[ m_dad[p] ] == p) + { + m_rson[ m_dad[p] ] = Pos; + } + else + { + m_lson[ m_dad[p] ] = Pos; + } + + // Remove "p" + + m_dad[p] = NOT_USED; + } + +/* + ------------------------------------------------------------------------- + cLZSS::DeleteNode + + This function removes the node "Node" from the tree. + ------------------------------------------------------------------------- +*/ + +void cLZSS::DeleteNode( // no return value + short int Node) // node to be removed + throw() // exception list + + { + short int q; + + ASSERT(Node >= 0); + ASSERT(Node < (N+1)); + + if (m_dad[Node] == NOT_USED) + { + // not in tree, nothing to do + return; + } + + if (m_rson[Node] == NOT_USED) + { + q = m_lson[Node]; + } + else if (m_lson[Node] == NOT_USED) + { + q = m_rson[Node]; + } + else + { + q = m_lson[Node]; + if (m_rson[q] != NOT_USED) + { + do + { + q = m_rson[q]; + } + while (m_rson[q] != NOT_USED); + + m_rson[ m_dad[q] ] = m_lson[q]; + m_dad[ m_lson[q] ] = m_dad[q]; + m_lson[q] = m_lson[Node]; + m_dad[ m_lson[Node] ] = q; + } + + m_rson[q] = m_rson[Node]; + m_dad[ m_rson[Node] ] = q; + } + + m_dad[q] = m_dad[Node]; + + if (m_rson[ m_dad[Node] ] == Node) + { + m_rson[ m_dad[Node] ] = q; + } + else + { + m_lson[ m_dad[Node] ] = q; + } + + m_dad[Node] = NOT_USED; + } + +/* + ------------------------------------------------------------------------- + cLZSS::Encode + + This function "encodes" the input stream into the output stream. + The GetChars() and SendChars() functions are used to separate + this method from the actual i/o. + ------------------------------------------------------------------------- +*/ + +void cLZSS::Encode( // no return value + void) // no parameters + + { + short int i; // an iterator + short int r; // node number in the binary tree + short int s; // position in the ring buffer + unsigned short int len; // len of initial string + short int last_match_length; // length of last match + short int code_buf_pos; // position in the output buffer + unsigned char code_buf[17]; // the output buffer + unsigned char mask; // bit mask for byte 0 of out buf + unsigned char c; // character read from string + + // Start with a clean tree. + + InitTree(); + + // code_buf[0] works as eight flags. A "1" represents that the + // unit is an unencoded letter (1 byte), and a "0" represents + // that the next unit is a pair (2 bytes). + // + // code_buf[1..16] stores eight units of code. Since the best + // we can do is store eight pairs, at most 16 + // bytes are needed to store this. + // + // This is why the maximum size of the code buffer is 17 bytes. + + code_buf[0] = 0; + code_buf_pos = 1; + + // Mask iterates over the 8 bits in the code buffer. The first + // character ends up being stored in the low bit. + // + // bit 8 7 6 5 4 3 2 1 + // | | + // | first sequence in code buffer + // | + // last sequence in code buffer + + mask = 1; + + s = 0; + r = (short int) N - (short int) F; + + // Initialize the ring buffer with spaces... + + // Note that the last F bytes of the ring buffer are not filled. + // This is because those F bytes will be filled in immediately + // with bytes from the input stream. + + memset(m_ring_buffer, ' ', N - F); + + // Read F bytes into the last F bytes of the ring buffer. + // + // This function loads the buffer with X characters and returns + // the actual amount loaded. + + len = GetChars(&(m_ring_buffer[r]), F); + + // Make sure there is something to be compressed. + + if (len == 0) + return; + + // Insert the F strings, each of which begins with one or more + // 'space' characters. Note the order in which these strings + // are inserted. This way, degenerate trees will be less likely + // to occur. + + for (i = 1; i <= F; i++) + { + InsertNode((short int) (r - i)); + } + + // Finally, insert the whole string just read. The + // member variables match_length and match_position are set. + + InsertNode(r); + + // Now that we're preloaded, continue till done. + + do + { + + // m_match_length may be spuriously long near the end of + // text. + + if (m_match_length > len) + { + m_match_length = len; + } + + // Is it cheaper to store this as a single character? If so, + // make it so. + + if (m_match_length < THRESHOLD) + { + // Send one character. Remember that code_buf[0] is the + // set of flags for the next eight items. + + m_match_length = 1; + code_buf[0] |= mask; + code_buf[code_buf_pos++] = m_ring_buffer[r]; + } + + // Otherwise, we do indeed have a string that can be stored + // compressed to save space. + + else + { + // The next 16 bits need to contain the position (12 bits) + // and the length (4 bits). + + code_buf[code_buf_pos++] = (unsigned char) m_match_position; + code_buf[code_buf_pos++] = (unsigned char) ( + ((m_match_position >> 4) & 0xf0) | + (m_match_length - THRESHOLD) ); + } + + // Shift the mask one bit to the left so that it will be ready + // to store the new bit. + + mask = (unsigned char) (mask << 1); + + // If the mask is now 0, then we know that we have a full set + // of flags and items in the code buffer. These need to be + // output. + + if (mask == 0) + { + // code_buf is the buffer of characters to be output. + // code_buf_pos is the number of characters it contains. + + SendChars(code_buf, code_buf_pos); + + // Reset for next buffer... + + code_buf[0] = 0; + code_buf_pos = 1; + mask = 1; + } + + last_match_length = m_match_length; + + // Delete old strings and read new bytes... + + for (i = 0; i < last_match_length; i++) + { + + // Get next character... + + if (GetChars(&c, 1) != 1) + break; + + // Delete "old strings" + + DeleteNode(s); + + // Put this character into the ring buffer. + // + // The original comment here says "If the position is near + // the end of the buffer, extend the buffer to make + // string comparison easier." + // + // That's a little misleading, because the "end" of the + // buffer is really what we consider to be the "beginning" + // of the buffer, that is, positions 0 through F. + // + // The idea is that the front end of the buffer is duplicated + // into the back end so that when you're looking at characters + // at the back end of the buffer, you can index ahead (beyond + // the normal end of the buffer) and see the characters + // that are at the front end of the buffer wihtout having + // to adjust the index. + // + // That is... + // + // 1234xxxxxxxxxxxxxxxxxxxxxxxxxxxxx1234 + // | | | + // position 0 end of buffer | + // | + // duplicate of front of buffer + + m_ring_buffer[s] = c; + + if (s < F - 1) + { + m_ring_buffer[s + N] = c; + } + + // Increment the position, and wrap around when we're at + // the end. Note that this relies on N being a power of 2. + + s = (short int) ( (s + 1) & (N - 1) ); + r = (short int) ( (r + 1) & (N - 1) ); + + // Register the string that is found in + // m_ring_buffer[r..r+F-1]. + + InsertNode(r); + } + + // If we didn't quit because we hit the last_match_length, + // then we must have quit because we ran out of characters + // to process. + + while (i++ < last_match_length) + { + DeleteNode(s); + + s = (short int) ( (s + 1) & (N - 1) ); + r = (short int) ( (r + 1) & (N - 1) ); + + // Note that len hitting 0 is the key that causes the + // do...while() to terminate. This is the only place + // within the loop that len is modified. + // + // Its original value is F (or a number less than F for + // short strings). + + if (--len) + { + InsertNode(r); /* buffer may not be empty. */ + } + } + + // End of do...while() loop. Continue processing until there + // are no more characters to be compressed. The variable + // "len" is used to signal this condition. + } + while (len > 0); + + // There could still be something in the output buffer. Send it + // now. + + if (code_buf_pos > 1) + { + // code_buf is the encoded string to send. + // code_buf_ptr is the number of characters. + + SendChars(code_buf, code_buf_pos); + } + + // Done! + } + +/* + ------------------------------------------------------------------------- + cLZSS::Decode + + This function "decodes" the input stream into the output stream. + The GetChars() and SendChars() functions are used to separate + this method from the actual i/o. + ------------------------------------------------------------------------- +*/ + +void cLZSS::Decode( // no return value + void) // no parameters + + { + int k; + int r; // node number + unsigned char c[F]; // an array of chars + unsigned char flags; // 8 bits of flags + int flag_count; // which flag we're on + short int pos; // position in the ring buffer + short int len; // number of chars in ring buffer + + // Initialize the ring buffer with a common string. + // + // Note that the last F bytes of the ring buffer are not filled. + + memset(m_ring_buffer, ' ', N - F); + + r = N - F; + + flags = (char) 0; + flag_count = 0; + + for ( ; ; ) + { + + // If there are more bits of interest in this flag, then + // shift that next interesting bit into the 1's position. + // + // If this flag has been exhausted, the next byte must + // be a flag. + + if (flag_count > 0) + { + flags = (unsigned char) (flags >> 1); + flag_count--; + } + else + { + // Next byte must be a flag. + + if (GetChars(&flags, 1) != 1) + break; + + // Set the flag counter. While at first it might appear + // that this should be an 8 since there are 8 bits in the + // flag, it should really be a 7 because the shift must + // be performed 7 times in order to see all 8 bits. + + flag_count = 7; + } + + // If the low order bit of the flag is now set, then we know + // that the next byte is a single, unencoded character. + + if (flags & 1) + { + if (GetChars(c, 1) != 1) + break; + + if (SendChars(c, 1) != 1) + break; + + // Add to buffer, and increment to next spot. Wrap at end. + + m_ring_buffer[r] = c[0]; + r = (short int) ( (r + 1) & (N - 1) ); + } + + // Otherwise, we know that the next two bytes are a + // pair. The position is in 12 bits and + // the length is in 4 bits. + + else + { + // Original code: + // if ((i = getc(infile)) == EOF) + // break; + // if ((j = getc(infile)) == EOF) + // break; + // i |= ((j & 0xf0) << 4); + // j = (j & 0x0f) + THRESHOLD; + // + // I've modified this to only make one input call, and + // have changed the variable names to something more + // obvious. + + if (GetChars(c, 2) != 2) + break; + + // Convert these two characters into the position and + // length. Note that the length is always at least + // THRESHOLD, which is why we're able to get a length + // of 18 out of only 4 bits. + + pos = (short int) ( c[0] | ((c[1] & 0xf0) << 4) ); + + len = (short int) ( (c[1] & 0x0f) + THRESHOLD ); + + // There are now "len" characters at position "pos" in + // the ring buffer that can be pulled out. Note that + // len is never more than F. + + for (k = 0; k < len; k++) + { + c[k] = m_ring_buffer[(pos + k) & (N - 1)]; + + // Add to buffer, and increment to next spot. Wrap at end. + + m_ring_buffer[r] = c[k]; + r = (short int) ( (r + 1) & (N - 1) ); + } + + // Add the "len" characters to the output stream. + + if (SendChars(c, len) != len) + break; + } + } + } + diff --git a/src/modules/common/entriesblk.cpp b/src/modules/common/entriesblk.cpp new file mode 100644 index 0000000..d38cf53 --- /dev/null +++ b/src/modules/common/entriesblk.cpp @@ -0,0 +1,166 @@ +#include +#include +#include + +const int EntriesBlock::METAHEADERSIZE = 4; + // count(4); +const int EntriesBlock::METAENTRYSIZE = 8; + // offset(4); size(4); + +EntriesBlock::EntriesBlock(const char *iBlock, unsigned long size) { + block = (char *)calloc(1, size); + memcpy(block, iBlock, size); +} + + +EntriesBlock::EntriesBlock() { + block = (char *)calloc(1, sizeof(__u32)); +} + + +EntriesBlock::~EntriesBlock() { + free(block); +} + + +void EntriesBlock::setCount(int count) { + __u32 rawCount = archtosword32(count); + memcpy(block, &rawCount, sizeof(__u32)); +} + + +int EntriesBlock::getCount() { + __u32 count = 0; + memcpy(&count, block, sizeof(__u32)); + count = swordtoarch32(count); + return count; +} + + +void EntriesBlock::getMetaEntry(int index, unsigned long *offset, unsigned long *size) { + __u32 rawOffset = 0; + __u32 rawSize = 0; + *offset = 0; + *size = 0; + if (index >= getCount()) // assert index < count + return; + + // first 4 bytes is count, each 6 bytes after is each meta entry + memcpy(&rawOffset, block + METAHEADERSIZE + (index * METAENTRYSIZE), sizeof(rawOffset)); + memcpy(&rawSize, block + METAHEADERSIZE + (index * METAENTRYSIZE) + sizeof(rawOffset), sizeof(rawSize)); + + *offset = (unsigned long)swordtoarch32(rawOffset); + *size = (unsigned long)swordtoarch32(rawSize); +} + + +void EntriesBlock::setMetaEntry(int index, unsigned long offset, unsigned long size) { + __u32 rawOffset = archtosword32(offset); + __u32 rawSize = archtosword32(size); + + if (index >= getCount()) // assert index < count + return; + + // first 4 bytes is count, each 6 bytes after is each meta entry + memcpy(block + METAHEADERSIZE + (index * METAENTRYSIZE), &rawOffset, sizeof(rawOffset)); + memcpy(block + METAHEADERSIZE + (index * METAENTRYSIZE) + sizeof(rawOffset), &rawSize, sizeof(rawSize)); +} + + +const char *EntriesBlock::getRawData(unsigned long *retSize) { + unsigned long max = 4; + int loop; + unsigned long offset; + unsigned long size; + for (loop = 0; loop < getCount(); loop++) { + getMetaEntry(loop, &offset, &size); + max = ((offset + size) > max) ? (offset + size) : max; + } + *retSize = max; + return block; +} + + +int EntriesBlock::addEntry(const char *entry) { + unsigned long dataSize; + getRawData(&dataSize); + unsigned long len = strlen(entry); + unsigned long offset; + unsigned long size; + int count = getCount(); + unsigned long dataStart = METAHEADERSIZE + (count * METAENTRYSIZE); + // new meta entry + new data size + 1 because null + block = (char *)realloc(block, dataSize + METAENTRYSIZE + len + 1); + // shift right to make room for new meta entry + memmove(block + dataStart + METAENTRYSIZE, block + dataStart, dataSize - dataStart); + + for (int loop = 0; loop < count; loop++) { + getMetaEntry(loop, &offset, &size); + if (offset) { // if not a deleted entry + offset += METAENTRYSIZE; + setMetaEntry(loop, offset, size); + } + } + + offset = dataSize; // original dataSize before realloc + size = len + 1; + // add our text to the end + memcpy(block + offset + METAENTRYSIZE, entry, size); + // increment count + setCount(count + 1); + // add our meta entry + setMetaEntry(count, offset + METAENTRYSIZE, size); + // return index of our new entry + return count; +} + + +const char *EntriesBlock::getEntry(int entryIndex) { + unsigned long offset; + unsigned long size; + static char *empty = ""; + + getMetaEntry(entryIndex, &offset, &size); + return (offset) ? block+offset : empty; +} + + +unsigned long EntriesBlock::getEntrySize(int entryIndex) { + unsigned long offset; + unsigned long size; + + getMetaEntry(entryIndex, &offset, &size); + return (offset) ? size : 0; +} + + +void EntriesBlock::removeEntry(int entryIndex) { + unsigned long offset; + unsigned long size, size2; + unsigned long dataSize; + getRawData(&dataSize); + getMetaEntry(entryIndex, &offset, &size); + unsigned long len = size - 1; + int count = getCount(); + unsigned long dataStart = METAHEADERSIZE + (count * METAENTRYSIZE); + + if (!offset) // already deleted + return; + + // shift left to retrieve space used for old entry + memmove(block + offset, block + offset + size, dataSize - (offset + size)); + + // fix offset for all entries after our entry that were shifted left + for (int loop = entryIndex + 1; loop < count; loop++) { + getMetaEntry(loop, &offset, &size2); + if (offset) { // if not a deleted entry + offset -= size; + setMetaEntry(loop, offset, size2); + } + } + + // zero out our meta entry + setMetaEntry(entryIndex, 0L, 0); +} + + diff --git a/src/modules/common/lzsscomprs.cpp b/src/modules/common/lzsscomprs.cpp new file mode 100644 index 0000000..3606fbc --- /dev/null +++ b/src/modules/common/lzsscomprs.cpp @@ -0,0 +1,665 @@ +/****************************************************************************** + * lzsscomprs.cpp - code for class 'LZSSCompress'- a driver class that + * provides LZSS compression + */ + +#include +#include +#include + + +/****************************************************************************** + * LZSSCompress Statics + */ + +// m_ring_buffer is a text buffer. It contains "nodes" of +// uncompressed text that can be indexed by position. That is, +// a substring of the ring buffer can be indexed by a position +// and a length. When decoding, the compressed text may contain +// a position in the ring buffer and a count of the number of +// bytes from the ring buffer that are to be moved into the +// uncompressed buffer. +// +// This ring buffer is not maintained as part of the compressed +// text. Instead, it is reconstructed dynamically. That is, +// it starts out empty and gets built as the text is decompressed. +// +// The ring buffer contain N bytes, with an additional F - 1 bytes +// to facilitate string comparison. + +unsigned char LZSSCompress::m_ring_buffer[N + F - 1]; + +// m_match_position and m_match_length are set by InsertNode(). +// +// These variables indicate the position in the ring buffer +// and the number of characters at that position that match +// a given string. + +short int LZSSCompress::m_match_position; +short int LZSSCompress::m_match_length; + +// m_lson, m_rson, and m_dad are the Japanese way of referring to +// a tree structure. The dad is the parent and it has a right and +// left son (child). +// +// For i = 0 to N-1, m_rson[i] and m_lson[i] will be the right +// and left children of node i. +// +// For i = 0 to N-1, m_dad[i] is the parent of node i. +// +// For i = 0 to 255, rson[N + i + 1] is the root of the tree for +// strings that begin with the character i. Note that this requires +// one byte characters. +// +// These nodes store values of 0...(N-1). Memory requirements +// can be reduces by using 2-byte integers instead of full 4-byte +// integers (for 32-bit applications). Therefore, these are +// defined as "short ints." + +short int LZSSCompress::m_lson[N + 1]; +short int LZSSCompress::m_rson[N + 257]; +short int LZSSCompress::m_dad[N + 1]; + + +/****************************************************************************** + * LZSSCompress Constructor - Initializes data for instance of LZSSCompress + * + */ + +LZSSCompress::LZSSCompress() : SWCompress() { +} + + +/****************************************************************************** + * LZSSCompress Destructor - Cleans up instance of LZSSCompress + */ + +LZSSCompress::~LZSSCompress() { +} + + +/****************************************************************************** + * LZSSCompress::InitTree - This function initializes the tree nodes to + * "empty" states. + */ + +void LZSSCompress::InitTree(void) { + int i; + + // For i = 0 to N - 1, m_rson[i] and m_lson[i] will be the right + // and left children of node i. These nodes need not be + // initialized. However, for debugging purposes, it is nice to + // have them initialized. Since this is only used for compression + // (not decompression), I don't mind spending the time to do it. + // + // For the same range of i, m_dad[i] is the parent of node i. + // These are initialized to a known value that can represent + // a "not used" state. + + for (i = 0; i < N; i++) { + m_lson[i] = NOT_USED; + m_rson[i] = NOT_USED; + m_dad[i] = NOT_USED; + } + + // For i = 0 to 255, m_rson[N + i + 1] is the root of the tree + // for strings that begin with the character i. This is why + // the right child array is larger than the left child array. + // These are also initialzied to a "not used" state. + // + // Note that there are 256 of these, one for each of the possible + // 256 characters. + + for (i = N + 1; i <= (N + 256); i++) { + m_rson[i] = NOT_USED; + } +} + + +/****************************************************************************** + * LZSSCompress::InsertNode - This function inserts a string from the ring + * buffer into one of the trees. It loads the + * match position and length member variables + * for the longest match. + * + * The string to be inserted is identified by + * the parameter Pos, A full F bytes are + * inserted. So, + * m_ring_buffer[Pos ... Pos+F-1] + * are inserted. + * + * If the matched length is exactly F, then an + * old node is removed in favor of the new one + * (because the old one will be deleted + * sooner). + * + * Note that Pos plays a dual role. It is + * used as both a position in the ring buffer + * and also as a tree node. + * m_ring_buffer[Pos] defines a character that + * is used to identify a tree node. + * + * ENT: pos - position in the buffer + */ + +void LZSSCompress::InsertNode(short int Pos) +{ + short int i; + short int p; + int cmp; + unsigned char * key; + +/* + ASSERT(Pos >= 0); + ASSERT(Pos < N); +*/ + + cmp = 1; + key = &(m_ring_buffer[Pos]); + + // The last 256 entries in m_rson contain the root nodes for + // strings that begin with a letter. Get an index for the + // first letter in this string. + + p = (short int) (N + 1 + key[0]); + + // Set the left and right tree nodes for this position to "not + // used." + + m_lson[Pos] = NOT_USED; + m_rson[Pos] = NOT_USED; + + // Haven't matched anything yet. + + m_match_length = 0; + + for ( ; ; ) { + if (cmp >= 0) { + if (m_rson[p] != NOT_USED) { + p = m_rson[p]; + } + else { + m_rson[p] = Pos; + m_dad[Pos] = p; + return; + } + } + else { + if (m_lson[p] != NOT_USED) { + p = m_lson[p]; + } + else { + m_lson[p] = Pos; + m_dad[Pos] = p; + return; + } + } + + // Should we go to the right or the left to look for the + // next match? + + for (i = 1; i < F; i++) { + cmp = key[i] - m_ring_buffer[p + i]; + if (cmp != 0) + break; + } + + if (i > m_match_length) { + m_match_position = p; + m_match_length = i; + + if (i >= F) + break; + } + } + + m_dad[Pos] = m_dad[p]; + m_lson[Pos] = m_lson[p]; + m_rson[Pos] = m_rson[p]; + + m_dad[ m_lson[p] ] = Pos; + m_dad[ m_rson[p] ] = Pos; + + if (m_rson[ m_dad[p] ] == p) { + m_rson[ m_dad[p] ] = Pos; + } + else { + m_lson[ m_dad[p] ] = Pos; + } + + // Remove "p" + + m_dad[p] = NOT_USED; +} + + +/****************************************************************************** + * LZSSCompress::DeleteNode - This function removes the node "Node" from the + * tree. + * + * ENT: node - node to be removed + */ + +void LZSSCompress::DeleteNode(short int Node) +{ + short int q; + +/* + ASSERT(Node >= 0); + ASSERT(Node < (N+1)); +*/ + + if (m_dad[Node] == NOT_USED) { // not in tree, nothing to do + return; + } + + if (m_rson[Node] == NOT_USED) { + q = m_lson[Node]; + } + else if (m_lson[Node] == NOT_USED) { + q = m_rson[Node]; + } + else { + q = m_lson[Node]; + if (m_rson[q] != NOT_USED) { + do { + q = m_rson[q]; + } while (m_rson[q] != NOT_USED); + + m_rson[ m_dad[q] ] = m_lson[q]; + m_dad[ m_lson[q] ] = m_dad[q]; + m_lson[q] = m_lson[Node]; + m_dad[ m_lson[Node] ] = q; + } + + m_rson[q] = m_rson[Node]; + m_dad[ m_rson[Node] ] = q; + } + + m_dad[q] = m_dad[Node]; + + if (m_rson[ m_dad[Node] ] == Node) { + m_rson[ m_dad[Node] ] = q; + } + else { + m_lson[ m_dad[Node] ] = q; + } + + m_dad[Node] = NOT_USED; +} + + +/****************************************************************************** + * LZSSCompress::Encode - This function "encodes" the input stream into the + * output stream. + * The GetChars() and SendChars() functions are + * used to separate this method from the actual + * i/o. + * NOTE: must set zlen for parent class to know length of + * compressed buffer. + */ + +void LZSSCompress::Encode(void) +{ + short int i; // an iterator + short int r; // node number in the binary tree + short int s; // position in the ring buffer + unsigned short int len; // len of initial string + short int last_match_length; // length of last match + short int code_buf_pos; // position in the output buffer + unsigned char code_buf[17]; // the output buffer + unsigned char mask; // bit mask for byte 0 of out buf + unsigned char c; // character read from string + + // Start with a clean tree. + + InitTree(); + direct = 0; // set direction needed by parent [Get|Send]Chars() + + // code_buf[0] works as eight flags. A "1" represents that the + // unit is an unencoded letter (1 byte), and a "0" represents + // that the next unit is a pair (2 bytes). + // + // code_buf[1..16] stores eight units of code. Since the best + // we can do is store eight pairs, at most 16 + // bytes are needed to store this. + // + // This is why the maximum size of the code buffer is 17 bytes. + + code_buf[0] = 0; + code_buf_pos = 1; + + // Mask iterates over the 8 bits in the code buffer. The first + // character ends up being stored in the low bit. + // + // bit 8 7 6 5 4 3 2 1 + // | | + // | first sequence in code buffer + // | + // last sequence in code buffer + + mask = 1; + + s = 0; + r = (short int) N - (short int) F; + + // Initialize the ring buffer with spaces... + + // Note that the last F bytes of the ring buffer are not filled. + // This is because those F bytes will be filled in immediately + // with bytes from the input stream. + + memset(m_ring_buffer, ' ', N - F); + + // Read F bytes into the last F bytes of the ring buffer. + // + // This function loads the buffer with X characters and returns + // the actual amount loaded. + + len = GetChars((char *) &(m_ring_buffer[r]), F); + + // Make sure there is something to be compressed. + + if (len == 0) + return; + + // Insert the F strings, each of which begins with one or more + // 'space' characters. Note the order in which these strings + // are inserted. This way, degenerate trees will be less likely + // to occur. + + for (i = 1; i <= F; i++) { + InsertNode((short int) (r - i)); + } + + // Finally, insert the whole string just read. The + // member variables match_length and match_position are set. + + InsertNode(r); + + // Now that we're preloaded, continue till done. + + do { + + // m_match_length may be spuriously long near the end of + // text. + + if (m_match_length > len) { + m_match_length = len; + } + + // Is it cheaper to store this as a single character? If so, + // make it so. + + if (m_match_length < THRESHOLD) { + // Send one character. Remember that code_buf[0] is the + // set of flags for the next eight items. + + m_match_length = 1; + code_buf[0] |= mask; + code_buf[code_buf_pos++] = m_ring_buffer[r]; + } + + // Otherwise, we do indeed have a string that can be stored + // compressed to save space. + + else { + // The next 16 bits need to contain the position (12 bits) + // and the length (4 bits). + + code_buf[code_buf_pos++] = (unsigned char) m_match_position; + code_buf[code_buf_pos++] = (unsigned char) ( + ((m_match_position >> 4) & 0xf0) | + (m_match_length - THRESHOLD) ); + } + + // Shift the mask one bit to the left so that it will be ready + // to store the new bit. + + mask = (unsigned char) (mask << 1); + + // If the mask is now 0, then we know that we have a full set + // of flags and items in the code buffer. These need to be + // output. + + if (!mask) { + // code_buf is the buffer of characters to be output. + // code_buf_pos is the number of characters it contains. + + SendChars((char *) code_buf, code_buf_pos); + + // Reset for next buffer... + + code_buf[0] = 0; + code_buf_pos = 1; + mask = 1; + } + + last_match_length = m_match_length; + + // Delete old strings and read new bytes... + + for (i = 0; i < last_match_length; i++) { + // Get next character... + + if (GetChars((char *) &c, 1) != 1) + break; + + // Delete "old strings" + + DeleteNode(s); + + // Put this character into the ring buffer. + // + // The original comment here says "If the position is near + // the end of the buffer, extend the buffer to make + // string comparison easier." + // + // That's a little misleading, because the "end" of the + // buffer is really what we consider to be the "beginning" + // of the buffer, that is, positions 0 through F. + // + // The idea is that the front end of the buffer is duplicated + // into the back end so that when you're looking at characters + // at the back end of the buffer, you can index ahead (beyond + // the normal end of the buffer) and see the characters + // that are at the front end of the buffer wihtout having + // to adjust the index. + // + // That is... + // + // 1234xxxxxxxxxxxxxxxxxxxxxxxxxxxxx1234 + // | | | + // position 0 end of buffer | + // | + // duplicate of front of buffer + + m_ring_buffer[s] = c; + + if (s < F - 1) { + m_ring_buffer[s + N] = c; + } + + // Increment the position, and wrap around when we're at + // the end. Note that this relies on N being a power of 2. + + s = (short int) ( (s + 1) & (N - 1) ); + r = (short int) ( (r + 1) & (N - 1) ); + + // Register the string that is found in + // m_ring_buffer[r..r+F-1]. + + InsertNode(r); + } + + // If we didn't quit because we hit the last_match_length, + // then we must have quit because we ran out of characters + // to process. + + while (i++ < last_match_length) { + DeleteNode(s); + + s = (short int) ( (s + 1) & (N - 1) ); + r = (short int) ( (r + 1) & (N - 1) ); + + // Note that len hitting 0 is the key that causes the + // do...while() to terminate. This is the only place + // within the loop that len is modified. + // + // Its original value is F (or a number less than F for + // short strings). + + if (--len) { + InsertNode(r); /* buffer may not be empty. */ + } + } + + // End of do...while() loop. Continue processing until there + // are no more characters to be compressed. The variable + // "len" is used to signal this condition. + } while (len > 0); + + // There could still be something in the output buffer. Send it + // now. + + if (code_buf_pos > 1) { + // code_buf is the encoded string to send. + // code_buf_ptr is the number of characters. + + SendChars((char *) code_buf, code_buf_pos); + } + + + // must set zlen for parent class to know length of compressed buffer + zlen = zpos; +} + + +/****************************************************************************** + * LZSSCompress::Decode - This function "decodes" the input stream into the + * output stream. + * The GetChars() and SendChars() functions are + * used to separate this method from the actual + * i/o. + */ + +void LZSSCompress::Decode(void) +{ + int k; + int r; // node number + unsigned char c[F]; // an array of chars + unsigned char flags; // 8 bits of flags + int flag_count; // which flag we're on + short int pos; // position in the ring buffer + short int len; // number of chars in ring buffer + unsigned long totalLen = 0; + + direct = 1; // set direction needed by parent [Get|Send]Chars() + + // Initialize the ring buffer with a common string. + // + // Note that the last F bytes of the ring buffer are not filled. + + memset(m_ring_buffer, ' ', N - F); + + r = N - F; + + flags = (char) 0; + flag_count = 0; + + for ( ; ; ) { + + // If there are more bits of interest in this flag, then + // shift that next interesting bit into the 1's position. + // + // If this flag has been exhausted, the next byte must + // be a flag. + + if (flag_count > 0) { + flags = (unsigned char) (flags >> 1); + flag_count--; + } + else { + // Next byte must be a flag. + + if (GetChars((char *) &flags, 1) != 1) + break; + + // Set the flag counter. While at first it might appear + // that this should be an 8 since there are 8 bits in the + // flag, it should really be a 7 because the shift must + // be performed 7 times in order to see all 8 bits. + + flag_count = 7; + } + + // If the low order bit of the flag is now set, then we know + // that the next byte is a single, unencoded character. + + if (flags & 1) { + if (GetChars((char *) c, 1) != 1) + break; + + if (SendChars((char *) c, 1) != 1) { + totalLen++; + break; + } + + // Add to buffer, and increment to next spot. Wrap at end. + + m_ring_buffer[r] = c[0]; + r = (short int) ( (r + 1) & (N - 1) ); + } + + // Otherwise, we know that the next two bytes are a + // pair. The position is in 12 bits and + // the length is in 4 bits. + + else { + // Original code: + // if ((i = getc(infile)) == EOF) + // break; + // if ((j = getc(infile)) == EOF) + // break; + // i |= ((j & 0xf0) << 4); + // j = (j & 0x0f) + THRESHOLD; + // + // I've modified this to only make one input call, and + // have changed the variable names to something more + // obvious. + + if (GetChars((char *) c, 2) != 2) + break; + + // Convert these two characters into the position and + // length. Note that the length is always at least + // THRESHOLD, which is why we're able to get a length + // of 18 out of only 4 bits. + + pos = (short int) ( c[0] | ((c[1] & 0xf0) << 4) ); + + len = (short int) ( (c[1] & 0x0f) + THRESHOLD ); + + // There are now "len" characters at position "pos" in + // the ring buffer that can be pulled out. Note that + // len is never more than F. + + for (k = 0; k < len; k++) { + c[k] = m_ring_buffer[(pos + k) & (N - 1)]; + + // Add to buffer, and increment to next spot. Wrap at end. + + m_ring_buffer[r] = c[k]; + r = (short int) ( (r + 1) & (N - 1) ); + } + + // Add the "len" :characters to the output stream. + + if (SendChars((char *) c, len) != (unsigned int)len) { + totalLen += len; + break; + } + } + } + slen = totalLen; +} diff --git a/src/modules/common/rawstr.cpp b/src/modules/common/rawstr.cpp new file mode 100644 index 0000000..787946c --- /dev/null +++ b/src/modules/common/rawstr.cpp @@ -0,0 +1,565 @@ +/****************************************************************************** + * rawstr.cpp - code for class 'RawStr'- a module that reads raw text + * files: ot and nt using indexs ??.bks ??.cps ??.vss + * and provides lookup and parsing functions based on + * class StrKey + */ + + +#include +#include +#include + +#ifndef __GNUC__ +#include +#else +#include +#endif + +#include +#include +#include +#include +#include +/****************************************************************************** + * RawStr Statics + */ + +int RawStr::instance = 0; +char RawStr::nl = '\n'; + + +/****************************************************************************** + * RawStr Constructor - Initializes data for instance of RawStr + * + * ENT: ipath - path of the directory where data and index files are located. + * be sure to include the trailing separator (e.g. '/' or '\') + * (e.g. 'modules/texts/rawtext/webster/') + */ + +RawStr::RawStr(const char *ipath, int fileMode) +{ + char buf[127]; + + lastoff = -1; + path = 0; + stdstr(&path, ipath); + +#ifndef O_BINARY // O_BINARY is needed in Borland C++ 4.53 +#define O_BINARY 0 // If it hasn't been defined than we probably +#endif // don't need it. + + if (fileMode == -1) { // try read/write if possible + fileMode = O_RDWR; + } + + sprintf(buf, "%s.idx", path); + idxfd = FileMgr::systemFileMgr.open(buf, fileMode|O_BINARY, true); + + sprintf(buf, "%s.dat", path); + datfd = FileMgr::systemFileMgr.open(buf, fileMode|O_BINARY, true); + + if (datfd < 0) { + sprintf(buf, "Error: %d", errno); + perror(buf); + } + + instance++; +} + + +/****************************************************************************** + * RawStr Destructor - Cleans up instance of RawStr + */ + +RawStr::~RawStr() +{ + if (path) + delete [] path; + + --instance; + + FileMgr::systemFileMgr.close(idxfd); + FileMgr::systemFileMgr.close(datfd); +} + + +/****************************************************************************** + * RawStr::getidxbufdat - Gets the index string at the given idx offset + * NOTE: buf is allocated and must be freed by + * calling function + * + * ENT: ioffset - offset in dat file to lookup + * buf - address of pointer to allocate for storage of string + */ + +void RawStr::getidxbufdat(long ioffset, char **buf) +{ + int size; + char ch; + if (datfd > 0) { + lseek(datfd->getFd(), ioffset, SEEK_SET); + for (size = 0; read(datfd->getFd(), &ch, 1) == 1; size++) { + if ((ch == '\\') || (ch == 10) || (ch == 13)) + break; + } + *buf = (*buf) ? (char *)realloc(*buf, size + 1) : (char *)malloc(size + 1); + if (size) { + lseek(datfd->getFd(), ioffset, SEEK_SET); + read(datfd->getFd(), *buf, size); + } + (*buf)[size] = 0; + for (size--; size > 0; size--) + (*buf)[size] = SW_toupper((*buf)[size]); + } + else { + *buf = (*buf) ? (char *)realloc(*buf, 1) : (char *)malloc(1); + **buf = 0; + } +} + + +/****************************************************************************** + * RawStr::getidxbuf - Gets the index string at the given idx offset + * NOTE: buf is allocated and must be freed by + * calling function + * + * ENT: ioffset - offset in idx file to lookup + * buf - address of pointer to allocate for storage of string + */ + +void RawStr::getidxbuf(long ioffset, char **buf) +{ + char *trybuf, *targetbuf; + long offset; + + if (idxfd > 0) { + lseek(idxfd->getFd(), ioffset, SEEK_SET); + read(idxfd->getFd(), &offset, 4); + + offset = swordtoarch32(offset); + + getidxbufdat(offset, buf); + for (trybuf = targetbuf = *buf; *trybuf; trybuf++, targetbuf++) { +/* + if (*trybuf == '-') { // ignore '-' because alphabetized silly in file + targetbuf--; + continue; + } +*/ + *targetbuf = SW_toupper(*trybuf); + } + *targetbuf = 0; + trybuf = 0; + } +} + + +/****************************************************************************** + * RawStr::findoffset - Finds the offset of the key string from the indexes + * + * ENT: key - key string to lookup + * start - address to store the starting offset + * size - address to store the size of the entry + * away - number of entries before of after to jump + * (default = 0) + * + * RET: error status + */ + +signed char RawStr::findoffset(const char *ikey, long *start, unsigned short *size, long away, long *idxoff) +{ + char *trybuf, *targetbuf, *key, quitflag = 0; + signed char retval = 0; + long headoff, tailoff, tryoff = 0, maxoff = 0; + + if (idxfd->getFd() >=0) { + tailoff = maxoff = lseek(idxfd->getFd(), 0, SEEK_END) - 6; + if (*ikey) { + headoff = 0; + + key = new char [ strlen(ikey) + 1 ]; + strcpy(key, ikey); + + for (trybuf = targetbuf = key; *trybuf; trybuf++, targetbuf++) { + /* + if (*trybuf == '-') { // ignore '-' because alphabetized silly in file + targetbuf--; + continue; + } + */ + *targetbuf = SW_toupper(*trybuf); + } + *targetbuf = 0; + trybuf = 0; + + while (headoff < tailoff) { + tryoff = (lastoff == -1) ? headoff + ((((tailoff / 6) - (headoff / 6))) / 2) * 6 : lastoff; + lastoff = -1; + getidxbuf(tryoff, &trybuf); + + if (!*trybuf && tryoff) { // In case of extra entry at end of idx (not first entry) + tryoff += (tryoff > (maxoff / 2))?-6:6; + retval = -1; + break; + } + + if (!strcmp(key, trybuf)) + break; + + int diff = strcmp(key, trybuf); + if (diff < 0) + tailoff = (tryoff == headoff) ? headoff : tryoff; + else headoff = tryoff; + if (tailoff == headoff + 6) { + if (quitflag++) + headoff = tailoff; + } + } + if (headoff >= tailoff) + tryoff = headoff; + if (trybuf) + free(trybuf); + delete [] key; + } + else tryoff = 0; + + lseek(idxfd->getFd(), tryoff, SEEK_SET); + + *start = *size = 0; + read(idxfd->getFd(), start, 4); + read(idxfd->getFd(), size, 2); + if (idxoff) + *idxoff = tryoff; + + *start = swordtoarch32(*start); + *size = swordtoarch16(*size); + + while (away) { + long laststart = *start; + unsigned short lastsize = *size; + long lasttry = tryoff; + tryoff += (away > 0) ? 6 : -6; + + bool bad = false; + if (((tryoff + (away*6)) < -6) || (tryoff + (away*6) > (maxoff+6))) + bad = true; + else if (lseek(idxfd->getFd(), tryoff, SEEK_SET) < 0) + bad = true; + if (bad) { + retval = -1; + *start = laststart; + *size = lastsize; + tryoff = lasttry; + if (idxoff) + *idxoff = tryoff; + break; + } + read(idxfd->getFd(), start, 4); + read(idxfd->getFd(), size, 2); + if (idxoff) + *idxoff = tryoff; + + *start = swordtoarch32(*start); + *size = swordtoarch16(*size); + + if (((laststart != *start) || (lastsize != *size)) && (*start >= 0) && (*size)) + away += (away < 0) ? 1 : -1; + } + + lastoff = tryoff; + } + else { + *start = 0; + *size = 0; + if (idxoff) + *idxoff = 0; + retval = -1; + } + return retval; +} + + +/****************************************************************************** + * RawStr::preptext - Prepares the text before returning it to external + * objects + * + * ENT: buf - buffer where text is stored and where to store the prep'd + * text. + */ + +void RawStr::preptext(char *buf) { + char *to, *from, space = 0, cr = 0, realdata = 0, nlcnt = 0; + + for (to = from = buf; *from; from++) { + switch (*from) { + case 10: + if (!realdata) + continue; + space = (cr) ? 0 : 1; + cr = 0; + nlcnt++; + if (nlcnt > 1) { +// *to++ = nl; + *to++ = nl; +// nlcnt = 0; + } + continue; + case 13: + if (!realdata) + continue; + *to++ = nl; + space = 0; + cr = 1; + continue; + } + realdata = 1; + nlcnt = 0; + if (space) { + space = 0; + if (*from != ' ') { + *to++ = ' '; + from--; + continue; + } + } + *to++ = *from; + } + *to = 0; + + while (to > (buf+1)) { // remove trailing excess + to--; + if ((*to == 10) || (*to == ' ')) + *to = 0; + else break; + } +} + + +/****************************************************************************** + * RawStr::gettext - gets text at a given offset + * + * ENT: + * start - starting offset where the text is located in the file + * size - size of text entry + * buf - buffer to store text + * + */ + +void RawStr::gettext(long istart, unsigned short isize, char *idxbuf, char *buf) +{ + char *ch; + char *idxbuflocal = 0; + getidxbufdat(istart, &idxbuflocal); + long start = istart; + unsigned short size = isize; + + do { + memset(buf, 0, size); + lseek(datfd->getFd(), start, SEEK_SET); + read(datfd->getFd(), buf, (int)(size - 2)); + + for (ch = buf; *ch; ch++) { // skip over index string + if (*ch == 10) { + ch++; + break; + } + } + size -= (unsigned short)(ch-buf); + memmove(buf, ch, size); + buf[size] = 0; + buf[size+1] = 0; + + // resolve link + if (!strncmp(buf, "@LINK", 5)) { + for (ch = buf; *ch; ch++) { // null before nl + if (*ch == 10) { + *ch = 0; + break; + } + } + findoffset(buf + 6, &start, &size); + // TODO: FIX! THIS IS WRONG!!! buf is not reallocated for the appropriate size! + } + else break; + } + while (true); // while we're resolving links + + if (idxbuflocal) { + int localsize = strlen(idxbuflocal); + localsize = (localsize < (size - 1)) ? localsize : (size - 1); + strncpy(idxbuf, idxbuflocal, localsize); + idxbuf[localsize] = 0; + free(idxbuflocal); + } +} + + +/****************************************************************************** + * RawLD::settext - Sets text for current offset + * + * ENT: key - key for this entry + * buf - buffer to store + * len - length of buffer (0 - null terminated) + */ + +void RawStr::settext(const char *ikey, const char *buf, long len) +{ + + long start, outstart; + long idxoff; + long endoff; + long shiftSize; + unsigned short size; + unsigned short outsize; + static const char nl[] = {13, 10}; + char *tmpbuf = 0; + char *key = 0; + char *dbKey = 0; + char *idxBytes = 0; + char *outbuf = 0; + char *ch = 0; + + findoffset(ikey, &start, &size, 0, &idxoff); + stdstr(&key, ikey); + for (ch = key; *ch; ch++) + *ch = SW_toupper(*ch); + ch = 0; + + getidxbufdat(start, &dbKey); + + if (strcmp(key, dbKey) < 0) { + } + else if (strcmp(key, dbKey) > 0) { + idxoff += 6; + } else if ((!strcmp(key, dbKey)) && (len || strlen(buf) /*we're not deleting*/)) { // got absolute entry + do { + tmpbuf = new char [ size + 2 ]; + memset(tmpbuf, 0, size + 2); + lseek(datfd->getFd(), start, SEEK_SET); + read(datfd->getFd(), tmpbuf, (int)(size - 1)); + + for (ch = tmpbuf; *ch; ch++) { // skip over index string + if (*ch == 10) { + ch++; + break; + } + } + memmove(tmpbuf, ch, size - (unsigned short)(ch-tmpbuf)); + + // resolve link + if (!strncmp(tmpbuf, "@LINK", 5) && (len ? len : strlen(buf))) { + for (ch = tmpbuf; *ch; ch++) { // null before nl + if (*ch == 10) { + *ch = 0; + break; + } + } + findoffset(tmpbuf + 6, &start, &size, 0, &idxoff); + } + else break; + } + while (true); // while we're resolving links + } + + endoff = lseek(idxfd->getFd(), 0, SEEK_END); + + shiftSize = endoff - idxoff; + + if (shiftSize > 0) { + idxBytes = new char [ shiftSize ]; + lseek(idxfd->getFd(), idxoff, SEEK_SET); + read(idxfd->getFd(), idxBytes, shiftSize); + } + + outbuf = new char [ (len ? len : strlen(buf)) + strlen(key) + 5 ]; + sprintf(outbuf, "%s%c%c", key, 13, 10); + size = strlen(outbuf); + memcpy (outbuf + size, buf, len ? len : strlen(buf)); + size = outsize = size + (len ? len : strlen(buf)); + + start = outstart = lseek(datfd->getFd(), 0, SEEK_END); + + outstart = archtosword32(start); + outsize = archtosword16(size); + + lseek(idxfd->getFd(), idxoff, SEEK_SET); + if (len ? len : strlen(buf)) { + lseek(datfd->getFd(), start, SEEK_SET); + write(datfd->getFd(), outbuf, (int)size); + + // add a new line to make data file easier to read in an editor + write(datfd->getFd(), &nl, 2); + + write(idxfd->getFd(), &outstart, 4); + write(idxfd->getFd(), &outsize, 2); + if (idxBytes) { + write(idxfd->getFd(), idxBytes, shiftSize); + delete [] idxBytes; + } + } + else { // delete entry + if (idxBytes) { + write(idxfd->getFd(), idxBytes+6, shiftSize-6); + lseek(idxfd->getFd(), -1, SEEK_CUR); // last valid byte + FileMgr::systemFileMgr.trunc(idxfd); // truncate index + delete [] idxBytes; + } + } + + delete [] key; + delete [] outbuf; + free(dbKey); +} + + +/****************************************************************************** + * RawLD::linkentry - links one entry to another + * + * ENT: testmt - testament to find (0 - Bible/module introduction) + * destidxoff - dest offset into .vss + * srcidxoff - source offset into .vss + */ + +void RawStr::linkentry(const char *destkey, const char *srckey) { + char *text = new char [ strlen(destkey) + 7 ]; + sprintf(text, "@LINK %s", destkey); + settext(srckey, text); + delete [] text; +} + + +/****************************************************************************** + * RawLD::CreateModule - Creates new module files + * + * ENT: path - directory to store module files + * RET: error status + */ + +signed char RawStr::createModule(const char *ipath) +{ + char *path = 0; + char *buf = new char [ strlen (ipath) + 20 ]; + FileDesc *fd, *fd2; + + stdstr(&path, ipath); + + if ((path[strlen(path)-1] == '/') || (path[strlen(path)-1] == '\\')) + path[strlen(path)-1] = 0; + + sprintf(buf, "%s.dat", path); + unlink(buf); + fd = FileMgr::systemFileMgr.open(buf, O_CREAT|O_WRONLY|O_BINARY, S_IREAD|S_IWRITE); + fd->getFd(); + FileMgr::systemFileMgr.close(fd); + + sprintf(buf, "%s.idx", path); + unlink(buf); + fd2 = FileMgr::systemFileMgr.open(buf, O_CREAT|O_WRONLY|O_BINARY, S_IREAD|S_IWRITE); + fd2->getFd(); + FileMgr::systemFileMgr.close(fd2); + + delete [] path; + + return 0; +} diff --git a/src/modules/common/rawstr4.cpp b/src/modules/common/rawstr4.cpp new file mode 100644 index 0000000..d5926ff --- /dev/null +++ b/src/modules/common/rawstr4.cpp @@ -0,0 +1,562 @@ +/****************************************************************************** + * rawstr.cpp - code for class 'RawStr'- a module that reads raw text + * files: ot and nt using indexs ??.bks ??.cps ??.vss + * and provides lookup and parsing functions based on + * class StrKey + */ + +#include +#include +#include + +#ifndef __GNUC__ +#include +#else +#include +#endif + +#include +#include +#include +#include +#include + +/****************************************************************************** + * RawStr Statics + */ + +int RawStr4::instance = 0; + + +/****************************************************************************** + * RawStr Constructor - Initializes data for instance of RawStr + * + * ENT: ipath - path of the directory where data and index files are located. + * be sure to include the trailing separator (e.g. '/' or '\') + * (e.g. 'modules/texts/rawtext/webster/') + */ + +RawStr4::RawStr4(const char *ipath, int fileMode) +{ + char buf[127]; + + nl = '\n'; + lastoff = -1; + path = 0; + stdstr(&path, ipath); + +#ifndef O_BINARY // O_BINARY is needed in Borland C++ 4.53 +#define O_BINARY 0 // If it hasn't been defined than we probably +#endif // don't need it. + + if (fileMode == -1) { // try read/write if possible + fileMode = O_RDWR; + } + + sprintf(buf, "%s.idx", path); + idxfd = FileMgr::systemFileMgr.open(buf, fileMode|O_BINARY, true); + + sprintf(buf, "%s.dat", path); + datfd = FileMgr::systemFileMgr.open(buf, fileMode|O_BINARY, true); + + if (datfd < 0) { + sprintf(buf, "Error: %d", errno); + perror(buf); + } + + instance++; +} + + +/****************************************************************************** + * RawStr Destructor - Cleans up instance of RawStr + */ + +RawStr4::~RawStr4() +{ + if (path) + delete [] path; + + --instance; + + FileMgr::systemFileMgr.close(idxfd); + FileMgr::systemFileMgr.close(datfd); +} + + +/****************************************************************************** + * RawStr4::getidxbufdat - Gets the index string at the given idx offset + * NOTE: buf is allocated and must be freed by + * calling function + * + * ENT: ioffset - offset in dat file to lookup + * buf - address of pointer to allocate for storage of string + */ + +void RawStr4::getidxbufdat(long ioffset, char **buf) +{ + int size; + char ch; + if (datfd > 0) { + lseek(datfd->getFd(), ioffset, SEEK_SET); + for (size = 0; read(datfd->getFd(), &ch, 1) == 1; size++) { + if ((ch == '\\') || (ch == 10) || (ch == 13)) + break; + } + *buf = (*buf) ? (char *)realloc(*buf, size + 1) : (char *)malloc(size + 1); + if (size) { + lseek(datfd->getFd(), ioffset, SEEK_SET); + read(datfd->getFd(), *buf, size); + } + (*buf)[size] = 0; + for (size--; size > 0; size--) + (*buf)[size] = SW_toupper((*buf)[size]); + } + else { + *buf = (*buf) ? (char *)realloc(*buf, 1) : (char *)malloc(1); + **buf = 0; + } +} + + +/****************************************************************************** + * RawStr4::getidxbuf - Gets the index string at the given idx offset + * NOTE: buf is allocated and must be freed by + * calling function + * + * ENT: ioffset - offset in idx file to lookup + * buf - address of pointer to allocate for storage of string + */ + +void RawStr4::getidxbuf(long ioffset, char **buf) +{ + char *trybuf, *targetbuf; + long offset; + + if (idxfd > 0) { + lseek(idxfd->getFd(), ioffset, SEEK_SET); + read(idxfd->getFd(), &offset, 4); + + offset = swordtoarch32(offset); + + getidxbufdat(offset, buf); + for (trybuf = targetbuf = *buf; *trybuf; trybuf++, targetbuf++) { +/* + if (*trybuf == '-') { // ignore '-' because alphabetized silly in file + targetbuf--; + continue; + } +*/ + *targetbuf = SW_toupper(*trybuf); + } + *targetbuf = 0; + trybuf = 0; + } +} + + +/****************************************************************************** + * RawStr4::findoffset - Finds the offset of the key string from the indexes + * + * ENT: key - key string to lookup + * start - address to store the starting offset + * size - address to store the size of the entry + * away - number of entries before of after to jump + * (default = 0) + * + * RET: error status + */ + +signed char RawStr4::findoffset(const char *ikey, long *start, unsigned long *size, long away, long *idxoff) +{ + char *trybuf, *targetbuf, *key, quitflag = 0; + signed char retval = 0; + long headoff, tailoff, tryoff = 0, maxoff = 0; + + if (idxfd->getFd() >=0) { + if (*ikey) { + headoff = 0; + tailoff = maxoff = lseek(idxfd->getFd(), 0, SEEK_END) - 8; + + key = new char [ strlen(ikey) + 1 ]; + strcpy(key, ikey); + + for (trybuf = targetbuf = key; *trybuf; trybuf++, targetbuf++) { + /* + if (*trybuf == '-') { // ignore '-' because alphabetized silly in file + targetbuf--; + continue; + } + */ + *targetbuf = SW_toupper(*trybuf); + } + *targetbuf = 0; + trybuf = 0; + + while (headoff < tailoff) { + tryoff = (lastoff == -1) ? headoff + ((((tailoff / 8) - (headoff / 8))) / 2) * 8 : lastoff; + lastoff = -1; + getidxbuf(tryoff, &trybuf); + + if (!*trybuf) { // In case of extra entry at end of idx + tryoff += (tryoff > (maxoff / 2))?-8:8; + retval = -1; + break; + } + + if (!strcmp(key, trybuf)) + break; + + int diff = strcmp(key, trybuf); + if (diff < 0) + tailoff = (tryoff == headoff) ? headoff : tryoff; + else headoff = tryoff; + if (tailoff == headoff + 8) { + if (quitflag++) + headoff = tailoff; + } + } + if (headoff >= tailoff) + tryoff = headoff; + if (trybuf) + free(trybuf); + delete [] key; + } + else tryoff = 0; + + lseek(idxfd->getFd(), tryoff, SEEK_SET); + + *start = *size = 0; + read(idxfd->getFd(), start, 4); + read(idxfd->getFd(), size, 4); + if (idxoff) + *idxoff = tryoff; + + *start = swordtoarch32(*start); + *size = swordtoarch32(*size); + + while (away) { + long laststart = *start; + unsigned long lastsize = *size; + long lasttry = tryoff; + tryoff += (away > 0) ? 8 : -8; + + bool bad = false; + if (((tryoff + (away*8)) < -8) || (tryoff + (away*8) > (maxoff+8))) + bad = true; + else if (lseek(idxfd->getFd(), tryoff, SEEK_SET) < 0) + bad = true; + if (bad) { + retval = -1; + *start = laststart; + *size = lastsize; + tryoff = lasttry; + if (idxoff) + *idxoff = tryoff; + break; + } + read(idxfd->getFd(), start, 4); + read(idxfd->getFd(), size, 4); + if (idxoff) + *idxoff = tryoff; + + *start = swordtoarch32(*start); + *size = swordtoarch32(*size); + + if (((laststart != *start) || (lastsize != *size)) && (*start >= 0) && (*size)) + away += (away < 0) ? 1 : -1; + } + + lastoff = tryoff; + } + else { + *start = 0; + *size = 0; + if (idxoff) + *idxoff = 0; + retval = -1; + } + return retval; +} + + +/****************************************************************************** + * RawStr4::preptext - Prepares the text before returning it to external + * objects + * + * ENT: buf - buffer where text is stored and where to store the prep'd + * text. + */ + +void RawStr4::preptext(char *buf) +{ + char *to, *from, space = 0, cr = 0, realdata = 0, nlcnt = 0; + + for (to = from = buf; *from; from++) { + switch (*from) { + case 10: + if (!realdata) + continue; + space = (cr) ? 0 : 1; + cr = 0; + nlcnt++; + if (nlcnt > 1) { +// *to++ = nl; + *to++ = nl; +// nlcnt = 0; + } + continue; + case 13: + if (!realdata) + continue; + *to++ = nl; + space = 0; + cr = 1; + continue; + } + realdata = 1; + nlcnt = 0; + if (space) { + space = 0; + if (*from != ' ') { + *to++ = ' '; + from--; + continue; + } + } + *to++ = *from; + } + *to = 0; + + while (to > (buf+1)) { // remove trailing excess + to--; + if ((*to == 10) || (*to == ' ')) + *to = 0; + else break; + } +} + + +/****************************************************************************** + * RawStr4::gettext - gets text at a given offset + * + * ENT: + * start - starting offset where the text is located in the file + * size - size of text entry + * buf - buffer to store text + * + */ + +void RawStr4::gettext(long istart, unsigned long isize, char *idxbuf, char *buf) +{ + char *ch; + char *idxbuflocal = 0; + getidxbufdat(istart, &idxbuflocal); + long start = istart; + unsigned long size = isize; + + do { + memset(buf, 0, size); + lseek(datfd->getFd(), start, SEEK_SET); + read(datfd->getFd(), buf, (int)(size - 1)); + + for (ch = buf; *ch; ch++) { // skip over index string + if (*ch == 10) { + ch++; + break; + } + } + memmove(buf, ch, size - (unsigned long)(ch-buf)); + + // resolve link + if (!strncmp(buf, "@LINK", 5)) { + for (ch = buf; *ch; ch++) { // null before nl + if (*ch == 10) { + *ch = 0; + break; + } + } + findoffset(buf + 8, &start, &size); + } + else break; + } + while (true); // while we're resolving links + + if (idxbuflocal) { + int localsize = strlen(idxbuflocal); + localsize = (localsize < (size - 1)) ? localsize : (size - 1); + strncpy(idxbuf, idxbuflocal, localsize); + idxbuf[localsize] = 0; + free(idxbuflocal); + } +} + + +/****************************************************************************** + * RawLD::settext - Sets text for current offset + * + * ENT: key - key for this entry + * buf - buffer to store + * len - length of buffer (0 - null terminated) + */ + +void RawStr4::settext(const char *ikey, const char *buf, long len) +{ + + long start, outstart; + long idxoff; + long endoff; + long shiftSize; + unsigned long size; + unsigned long outsize; + static const char nl[] = {13, 10}; + char *tmpbuf = 0; + char *key = 0; + char *dbKey = 0; + char *idxBytes = 0; + char *outbuf = 0; + char *ch = 0; + + findoffset(ikey, &start, &size, 0, &idxoff); + stdstr(&key, ikey); + for (ch = key; *ch; ch++) + *ch = SW_toupper(*ch); + ch = 0; + + getidxbufdat(start, &dbKey); + + if (strcmp(key, dbKey) < 0) { + } + else if (strcmp(key, dbKey) > 0) { + idxoff += 8; + } else if ((!strcmp(key, dbKey)) && (len || strlen(buf) /*we're not deleting*/)) { // got absolute entry + do { + tmpbuf = new char [ size + 2 ]; + memset(tmpbuf, 0, size + 2); + lseek(datfd->getFd(), start, SEEK_SET); + read(datfd->getFd(), tmpbuf, (int)(size - 1)); + + for (ch = tmpbuf; *ch; ch++) { // skip over index string + if (*ch == 10) { + ch++; + break; + } + } + memmove(tmpbuf, ch, size - (unsigned long)(ch-tmpbuf)); + + // resolve link + if (!strncmp(tmpbuf, "@LINK", 5) && (len ? len : strlen(buf))) { + for (ch = tmpbuf; *ch; ch++) { // null before nl + if (*ch == 10) { + *ch = 0; + break; + } + } + findoffset(tmpbuf + 8, &start, &size, 0, &idxoff); + } + else break; + } + while (true); // while we're resolving links + } + + endoff = lseek(idxfd->getFd(), 0, SEEK_END); + + shiftSize = endoff - idxoff; + + if (shiftSize > 0) { + idxBytes = new char [ shiftSize ]; + lseek(idxfd->getFd(), idxoff, SEEK_SET); + read(idxfd->getFd(), idxBytes, shiftSize); + } + + outbuf = new char [ (len ? len : strlen(buf)) + strlen(key) + 5 ]; + sprintf(outbuf, "%s%c%c", key, 13, 10); + size = strlen(outbuf); + memcpy (outbuf + size, buf, len ? len : strlen(buf)); + size = outsize = size + (len ? len : strlen(buf)); + + start = outstart = lseek(datfd->getFd(), 0, SEEK_END); + + outstart = archtosword32(start); + outsize = archtosword32(size); + + lseek(idxfd->getFd(), idxoff, SEEK_SET); + if (len ? len : strlen(buf)) { + lseek(datfd->getFd(), start, SEEK_SET); + write(datfd->getFd(), outbuf, (long)size); + + // add a new line to make data file easier to read in an editor + write(datfd->getFd(), &nl, 2); + + write(idxfd->getFd(), &outstart, 4); + write(idxfd->getFd(), &outsize, 4); + if (idxBytes) { + write(idxfd->getFd(), idxBytes, shiftSize); + delete [] idxBytes; + } + } + else { // delete entry + if (idxBytes) { + write(idxfd->getFd(), idxBytes+8, shiftSize-8); + lseek(idxfd->getFd(), -1, SEEK_CUR); // last valid byte + FileMgr::systemFileMgr.trunc(idxfd); // truncate index + delete [] idxBytes; + } + } + + delete [] key; + delete [] outbuf; + free(dbKey); +} + + +/****************************************************************************** + * RawLD::linkentry - links one entry to another + * + * ENT: testmt - testament to find (0 - Bible/module introduction) + * destidxoff - dest offset into .vss + * srcidxoff - source offset into .vss + */ + +void RawStr4::linkentry(const char *destkey, const char *srckey) { + char *text = new char [ strlen(destkey) + 7 ]; + sprintf(text, "@LINK %s", destkey); + settext(srckey, text); + delete [] text; +} + + +/****************************************************************************** + * RawLD::CreateModule - Creates new module files + * + * ENT: path - directory to store module files + * RET: error status + */ + +signed char RawStr4::createModule(const char *ipath) +{ + char *path = 0; + char *buf = new char [ strlen (ipath) + 20 ]; + FileDesc *fd, *fd2; + + stdstr(&path, ipath); + + if ((path[strlen(path)-1] == '/') || (path[strlen(path)-1] == '\\')) + path[strlen(path)-1] = 0; + + sprintf(buf, "%s.dat", path); + unlink(buf); + fd = FileMgr::systemFileMgr.open(buf, O_CREAT|O_WRONLY|O_BINARY, S_IREAD|S_IWRITE); + fd->getFd(); + FileMgr::systemFileMgr.close(fd); + + sprintf(buf, "%s.idx", path); + unlink(buf); + fd2 = FileMgr::systemFileMgr.open(buf, O_CREAT|O_WRONLY|O_BINARY, S_IREAD|S_IWRITE); + fd2->getFd(); + FileMgr::systemFileMgr.close(fd2); + + delete [] path; + + return 0; +} diff --git a/src/modules/common/rawverse.cpp b/src/modules/common/rawverse.cpp new file mode 100644 index 0000000..3374da5 --- /dev/null +++ b/src/modules/common/rawverse.cpp @@ -0,0 +1,345 @@ +/****************************************************************************** + * rawverse.cpp - code for class 'RawVerse'- a module that reads raw text + * files: ot and nt using indexs ??.bks ??.cps ??.vss + * and provides lookup and parsing functions based on + * class VerseKey + */ + + +#include +#include +#include +#include + +#ifndef __GNUC__ +#include +#include +#else +#include +#endif + +#include +#include +#include +#include +#include + +#ifndef O_BINARY // O_BINARY is needed in Borland C++ 4.53 +#define O_BINARY 0 // If it hasn't been defined than we probably +#endif // don't need it. + + +/****************************************************************************** + * RawVerse Statics + */ + + int RawVerse::instance = 0; + + +/****************************************************************************** + * RawVerse Constructor - Initializes data for instance of RawVerse + * + * ENT: ipath - path of the directory where data and index files are located. + * be sure to include the trailing separator (e.g. '/' or '\') + * (e.g. 'modules/texts/rawtext/webster/') + */ + +RawVerse::RawVerse(const char *ipath, int fileMode) +{ + char *buf; + + nl = '\n'; + path = 0; + stdstr(&path, ipath); + buf = new char [ strlen(path) + 80 ]; + if ((path[strlen(path)-1] == '/') || (path[strlen(path)-1] == '\\')) + path[strlen(path)-1] = 0; + + if (fileMode == -1) { // try read/write if possible + fileMode = O_RDWR; + } + + sprintf(buf, "%s/ot.vss", path); + idxfp[0] = FileMgr::systemFileMgr.open(buf, fileMode|O_BINARY, true); + + sprintf(buf, "%s/nt.vss", path); + idxfp[1] = FileMgr::systemFileMgr.open(buf, fileMode|O_BINARY, true); + + sprintf(buf, "%s/ot", path); + textfp[0] = FileMgr::systemFileMgr.open(buf, fileMode|O_BINARY, true); + + sprintf(buf, "%s/nt", path); + textfp[1] = FileMgr::systemFileMgr.open(buf, fileMode|O_BINARY, true); + + delete [] buf; + instance++; +} + + +/****************************************************************************** + * RawVerse Destructor - Cleans up instance of RawVerse + */ + +RawVerse::~RawVerse() +{ + int loop1; + + if (path) + delete [] path; + + --instance; + + for (loop1 = 0; loop1 < 2; loop1++) { + FileMgr::systemFileMgr.close(idxfp[loop1]); + FileMgr::systemFileMgr.close(textfp[loop1]); + } +} + + +/****************************************************************************** + * RawVerse::findoffset - Finds the offset of the key verse from the indexes + * + * ENT: testmt - testament to find (0 - Bible/module introduction) + * idxoff - offset into .vss + * start - address to store the starting offset + * size - address to store the size of the entry + */ + +void RawVerse::findoffset(char testmt, long idxoff, long *start, unsigned short *size) { + idxoff *= 6; + if (!testmt) + testmt = ((idxfp[1]) ? 1:2); + + if (idxfp[testmt-1]->getFd() >= 0) { + lseek(idxfp[testmt-1]->getFd(), idxoff, SEEK_SET); + read(idxfp[testmt-1]->getFd(), start, 4); + long len = read(idxfp[testmt-1]->getFd(), size, 2); // read size + + *start = swordtoarch32(*start); + *size = swordtoarch16(*size); + + if (len < 2) { + *size = (unsigned short)((*start) ? (lseek(textfp[testmt-1]->getFd(), 0, SEEK_END) - (long)*start) : 0); // if for some reason we get an error reading size, make size to end of file + } + } + else { + *start = 0; + *size = 0; + } +} + + +/****************************************************************************** + * RawVerse::preptext - Prepares the text before returning it to external + * objects + * + * ENT: buf - buffer where text is stored and where to store the prep'd + * text. + */ + +void RawVerse::preptext(char *buf) +{ + char *to, *from, space = 0, cr = 0, realdata = 0, nlcnt = 0; + + for (to = from = buf; *from; from++) { + switch (*from) { + case 10: + if (!realdata) + continue; + space = (cr) ? 0 : 1; + cr = 0; + nlcnt++; + if (nlcnt > 1) { +// *to++ = nl; + *to++ = nl; +// nlcnt = 0; + } + continue; + case 13: + if (!realdata) + continue; + *to++ = nl; + space = 0; + cr = 1; + continue; + } + realdata = 1; + nlcnt = 0; + if (space) { + space = 0; + if (*from != ' ') { + *to++ = ' '; + from--; + continue; + } + } + *to++ = *from; + } + *to = 0; + + while (to > (buf+1)) { // remove trailing excess + to--; + if ((*to == 10) || (*to == ' ')) + *to = 0; + else break; + } +} + + +/****************************************************************************** + * RawVerse::gettext - gets text at a given offset + * + * ENT: testmt - testament file to search in (0 - Old; 1 - New) + * start - starting offset where the text is located in the file + * size - size of text entry + 2 (null)(null) + * buf - buffer to store text + * + */ + +void RawVerse::gettext(char testmt, long start, unsigned short size, char *buf) { + memset(buf, 0, size+1); + if (size) { + if (textfp[testmt-1]->getFd() >= 0) { + lseek(textfp[testmt-1]->getFd(), start, SEEK_SET); + read(textfp[testmt-1]->getFd(), buf, (int)size - 2); + } + } +} + + +/****************************************************************************** + * RawVerse::settext - Sets text for current offset + * + * ENT: testmt - testament to find (0 - Bible/module introduction) + * idxoff - offset into .vss + * buf - buffer to store + * len - length of buffer (0 - null terminated) + */ + +void RawVerse::settext(char testmt, long idxoff, const char *buf, long len) +{ + long start, outstart; + unsigned short size; + unsigned short outsize; + static const char nl[] = {13, 10}; + + idxoff *= 6; + if (!testmt) + testmt = ((idxfp[1]) ? 1:2); + + size = outsize = len ? len : strlen(buf); + + start = outstart = lseek(textfp[testmt-1]->getFd(), 0, SEEK_END); + lseek(idxfp[testmt-1]->getFd(), idxoff, SEEK_SET); + + if (size) { + lseek(textfp[testmt-1]->getFd(), start, SEEK_SET); + write(textfp[testmt-1]->getFd(), buf, (int)size); + + // add a new line to make data file easier to read in an editor + write(textfp[testmt-1]->getFd(), &nl, 2); + } + else { + start = 0; + } + + outstart = archtosword32(start); + outsize = archtosword16(size); + + write(idxfp[testmt-1]->getFd(), &outstart, 4); + write(idxfp[testmt-1]->getFd(), &outsize, 2); + + +} + + +/****************************************************************************** + * RawVerse::linkentry - links one entry to another + * + * ENT: testmt - testament to find (0 - Bible/module introduction) + * destidxoff - dest offset into .vss + * srcidxoff - source offset into .vss + */ + +void RawVerse::linkentry(char testmt, long destidxoff, long srcidxoff) { + long start; + unsigned short size; + + destidxoff *= 6; + srcidxoff *= 6; + + if (!testmt) + testmt = ((idxfp[1]) ? 1:2); + + // get source + lseek(idxfp[testmt-1]->getFd(), srcidxoff, SEEK_SET); + read(idxfp[testmt-1]->getFd(), &start, 4); + read(idxfp[testmt-1]->getFd(), &size, 2); + + // write dest + lseek(idxfp[testmt-1]->getFd(), destidxoff, SEEK_SET); + write(idxfp[testmt-1]->getFd(), &start, 4); + write(idxfp[testmt-1]->getFd(), &size, 2); +} + + +/****************************************************************************** + * RawVerse::CreateModule - Creates new module files + * + * ENT: path - directory to store module files + * RET: error status + */ + +char RawVerse::createModule(const char *ipath) +{ + char *path = 0; + char *buf = new char [ strlen (ipath) + 20 ]; + FileDesc *fd, *fd2; + + stdstr(&path, ipath); + + if ((path[strlen(path)-1] == '/') || (path[strlen(path)-1] == '\\')) + path[strlen(path)-1] = 0; + + sprintf(buf, "%s/ot", path); + unlink(buf); + fd = FileMgr::systemFileMgr.open(buf, O_CREAT|O_WRONLY|O_BINARY, S_IREAD|S_IWRITE); + fd->getFd(); + FileMgr::systemFileMgr.close(fd); + + sprintf(buf, "%s/nt", path); + unlink(buf); + fd = FileMgr::systemFileMgr.open(buf, O_CREAT|O_WRONLY|O_BINARY, S_IREAD|S_IWRITE); + fd->getFd(); + FileMgr::systemFileMgr.close(fd); + + sprintf(buf, "%s/ot.vss", path); + unlink(buf); + fd = FileMgr::systemFileMgr.open(buf, O_CREAT|O_WRONLY|O_BINARY, S_IREAD|S_IWRITE); + fd->getFd(); + + sprintf(buf, "%s/nt.vss", path); + unlink(buf); + fd2 = FileMgr::systemFileMgr.open(buf, O_CREAT|O_WRONLY|O_BINARY, S_IREAD|S_IWRITE); + fd2->getFd(); + + VerseKey vk; + vk.Headings(1); + long offset = 0; + short size = 0; + for (vk = TOP; !vk.Error(); vk++) { + write((vk.Testament() == 1) ? fd->getFd() : fd2->getFd(), &offset, 4); + write((vk.Testament() == 1) ? fd->getFd() : fd2->getFd(), &size, 2); + } + + FileMgr::systemFileMgr.close(fd); + FileMgr::systemFileMgr.close(fd2); + + delete [] path; +/* + RawVerse rv(path); + VerseKey mykey("Rev 22:21"); +*/ + + return 0; +} diff --git a/src/modules/common/sapphire.cpp b/src/modules/common/sapphire.cpp new file mode 100644 index 0000000..686bccb --- /dev/null +++ b/src/modules/common/sapphire.cpp @@ -0,0 +1,228 @@ +/* sapphire.cpp -- the Saphire II stream cipher class. + Dedicated to the Public Domain the author and inventor: + (Michael Paul Johnson). This code comes with no warranty. + Use it at your own risk. + Ported from the Pascal implementation of the Sapphire Stream + Cipher 9 December 1994. + Added hash pre- and post-processing 27 December 1994. + Modified initialization to make index variables key dependent, + made the output function more resistant to cryptanalysis, + and renamed to Sapphire II 2 January 1995 +*/ + + +#ifdef WIN32 +#include +#endif + +#ifdef UNIX +#include +#include +#else +#ifndef _MSC_VER +#include +#endif +#endif + +#ifdef _WIN32_WCE +#include +#endif + +#include "sapphire.h" + +unsigned char sapphire::keyrand(int limit, + unsigned char *user_key, + unsigned char keysize, + unsigned char *rsum, + unsigned *keypos) + { + unsigned u, // Value from 0 to limit to return. + retry_limiter, // No infinite loops allowed. + mask; // Select just enough bits. + + if (!limit) return 0; // Avoid divide by zero error. + retry_limiter = 0; + mask = 1; // Fill mask with enough bits to cover + while (mask < (unsigned)limit) // the desired range. + mask = (mask << 1) + 1; + do + { + *rsum = cards[*rsum] + user_key[(*keypos)++]; + if (*keypos >= keysize) + { + *keypos = 0; // Recycle the user key. + *rsum += keysize; // key "aaaa" != key "aaaaaaaa" + } + u = mask & *rsum; + if (++retry_limiter > 11) + u %= limit; // Prevent very rare long loops. + } + while (u > (unsigned)limit); + return u; + } + +void sapphire::initialize(unsigned char *key, unsigned char keysize) + { + // Key size may be up to 256 bytes. + // Pass phrases may be used directly, with longer length + // compensating for the low entropy expected in such keys. + // Alternatively, shorter keys hashed from a pass phrase or + // generated randomly may be used. For random keys, lengths + // of from 4 to 16 bytes are recommended, depending on how + // secure you want this to be. + + int i; + unsigned char toswap, swaptemp, rsum; + unsigned keypos; + + // If we have been given no key, assume the default hash setup. + + if (keysize < 1) + { + hash_init(); + return; + } + + // Start with cards all in order, one of each. + + for (i=0;i<256;i++) + cards[i] = i; + + // Swap the card at each position with some other card. + + toswap = 0; + keypos = 0; // Start with first byte of user key. + rsum = 0; + for (i=255;i>=0;i--) + { + toswap = keyrand(i, key, keysize, &rsum, &keypos); + swaptemp = cards[i]; + cards[i] = cards[toswap]; + cards[toswap] = swaptemp; + } + + // Initialize the indices and data dependencies. + // Indices are set to different values instead of all 0 + // to reduce what is known about the state of the cards + // when the first byte is emitted. + + rotor = cards[1]; + ratchet = cards[3]; + avalanche = cards[5]; + last_plain = cards[7]; + last_cipher = cards[rsum]; + + toswap = swaptemp = rsum = 0; + keypos = 0; + } + +void sapphire::hash_init(void) + { + // This function is used to initialize non-keyed hash + // computation. + + int i, j; + + // Initialize the indices and data dependencies. + + rotor = 1; + ratchet = 3; + avalanche = 5; + last_plain = 7; + last_cipher = 11; + + // Start with cards all in inverse order. + + for (i=0, j=255;i<256;i++,j--) + cards[i] = (unsigned char) j; + } + +sapphire::sapphire(unsigned char *key, unsigned char keysize) + { + if (key && keysize) + initialize(key, keysize); + } + +void sapphire::burn(void) + { + // Destroy the key and state information in RAM. + memset(cards, 0, 256); + rotor = ratchet = avalanche = last_plain = last_cipher = 0; + } + +sapphire::~sapphire() + { + burn(); + } + +unsigned char sapphire::encrypt(unsigned char b) + { +#ifdef USBINARY + // Picture a single enigma rotor with 256 positions, rewired + // on the fly by card-shuffling. + + // This cipher is a variant of one invented and written + // by Michael Paul Johnson in November, 1993. + + unsigned char swaptemp; + + // Shuffle the deck a little more. + + ratchet += cards[rotor++]; + swaptemp = cards[last_cipher]; + cards[last_cipher] = cards[ratchet]; + cards[ratchet] = cards[last_plain]; + cards[last_plain] = cards[rotor]; + cards[rotor] = swaptemp; + avalanche += cards[swaptemp]; + + // Output one byte from the state in such a way as to make it + // very hard to figure out which one you are looking at. + + last_cipher = b^cards[(cards[ratchet] + cards[rotor]) & 0xFF] ^ + cards[cards[(cards[last_plain] + + cards[last_cipher] + + cards[avalanche])&0xFF]]; + last_plain = b; + return last_cipher; +#else + return b; +#endif + } + +unsigned char sapphire::decrypt(unsigned char b) + { + unsigned char swaptemp; + + // Shuffle the deck a little more. + + ratchet += cards[rotor++]; + swaptemp = cards[last_cipher]; + cards[last_cipher] = cards[ratchet]; + cards[ratchet] = cards[last_plain]; + cards[last_plain] = cards[rotor]; + cards[rotor] = swaptemp; + avalanche += cards[swaptemp]; + + // Output one byte from the state in such a way as to make it + // very hard to figure out which one you are looking at. + + last_plain = b^cards[(cards[ratchet] + cards[rotor]) & 0xFF] ^ + cards[cards[(cards[last_plain] + + cards[last_cipher] + + cards[avalanche])&0xFF]]; + last_cipher = b; + return last_plain; + } + +void sapphire::hash_final(unsigned char *hash, // Destination + unsigned char hashlength) // Size of hash. + { + int i; + + for (i=255;i>=0;i--) + encrypt((unsigned char) i); + for (i=0;i +#include +#include + + +/****************************************************************************** + * SWCipher Constructor - Initializes data for instance of SWCipher + * + */ + +SWCipher::SWCipher(unsigned char *key) { + master.initialize(key, strlen((char *)key)); + buf = 0; +} + + +/****************************************************************************** + * SWCipher Destructor - Cleans up instance of SWCipher + */ + +SWCipher::~SWCipher() +{ + if (buf) + free(buf); +} + + +char *SWCipher::Buf(const char *ibuf, unsigned int ilen) +{ + if (ibuf) { + + if (buf) + free(buf); + + if (!ilen) { + len = strlen(buf); + ilen = len + 1; + } + else len = ilen; + + buf = (char *) malloc(ilen); + memcpy(buf, ibuf, ilen); + cipher = false; + } + + Decode(); + + return buf; +} + + +char *SWCipher::cipherBuf(unsigned int *ilen, const char *ibuf) +{ + if (ibuf) { + + if (buf) + free(buf); + + buf = (char *) malloc(*ilen); + memcpy(buf, ibuf, *ilen); + len = *ilen; + cipher = true; + } + + Encode(); + + *ilen = (short)len; + return buf; +} + + +/****************************************************************************** + * SWCipher::Encode - This function "encodes" the input stream into the + * output stream. + * The GetChars() and SendChars() functions are + * used to separate this method from the actual + * i/o. + */ + +void SWCipher::Encode(void) +{ + if (!cipher) { + work = master; + for (int i = 0; i < len; i++) + buf[i] = work.encrypt(buf[i]); + cipher = true; + } +} + + +/****************************************************************************** + * SWCipher::Decode - This function "decodes" the input stream into the + * output stream. + * The GetChars() and SendChars() functions are + * used to separate this method from the actual + * i/o. + */ + +void SWCipher::Decode(void) +{ + if (cipher) { + work = master; + for (int i = 0; i < len; i++) + buf[i] = work.decrypt(buf[i]); + cipher = false; + } +} + + +/****************************************************************************** + * SWCipher::setCipherKey - setter for a new CipherKey + * + */ + +void SWCipher::setCipherKey(const char *ikey) { + unsigned char *key = (unsigned char *)ikey; + master.initialize(key, strlen((char *)key)); +} diff --git a/src/modules/common/swcomprs.cpp b/src/modules/common/swcomprs.cpp new file mode 100644 index 0000000..4bd2e5e --- /dev/null +++ b/src/modules/common/swcomprs.cpp @@ -0,0 +1,190 @@ +/****************************************************************************** + * swcomprs.cpp - code for class 'SWCompress'- a driver class that provides + * compression utilities. + */ + +#include +#include +#include + + +/****************************************************************************** + * SWCompress Constructor - Initializes data for instance of SWCompress + * + */ + +SWCompress::SWCompress() +{ + buf = zbuf = 0; + Init(); +} + + +/****************************************************************************** + * SWCompress Destructor - Cleans up instance of SWCompress + */ + +SWCompress::~SWCompress() +{ + if (zbuf) + free(zbuf); + + if (buf) + free(buf); +} + + +void SWCompress::Init() +{ + if (buf) + free(buf); + + if (zbuf) + free(zbuf); + + buf = 0; + zbuf = 0; + direct = 0; + zlen = 0; + slen = 0; + zpos = 0; + pos = 0; +} + + +char *SWCompress::Buf(const char *ibuf, unsigned long *len) { + // setting an uncompressed buffer + if (ibuf) { + Init(); + slen = (len) ? *len : strlen(ibuf); + buf = (char *) calloc(slen + 1, 1); + memcpy(buf, ibuf, slen); + } + + // getting an uncompressed buffer + if (!buf) { + buf = (char *)calloc(1,1); // be sure we at least allocate an empty buf for return; + direct = 1; + Decode(); +// slen = strlen(buf); + if (len) + *len = slen; + } + return buf; +} + + +char *SWCompress::zBuf(unsigned long *len, char *ibuf) +{ + // setting a compressed buffer + if (ibuf) { + Init(); + zbuf = (char *) malloc(*len); + memcpy(zbuf, ibuf, *len); + zlen = *len; + } + + // getting a compressed buffer + if (!zbuf) { + direct = 0; + Encode(); + } + + *len = zlen; + return zbuf; +} + + +unsigned long SWCompress::GetChars(char *ibuf, unsigned long len) +{ + if (direct) { + len = (((zlen - zpos) > (unsigned)len) ? len : zlen - zpos); + if (len > 0) { + memmove(ibuf, &zbuf[zpos], len); + zpos += len; + } + } + else { +// slen = strlen(buf); + len = (((slen - pos) > (unsigned)len) ? len : slen - pos); + if (len > 0) { + memmove(ibuf, &buf[pos], len); + pos += len; + } + } + return len; +} + + +unsigned long SWCompress::SendChars(char *ibuf, unsigned long len) +{ + if (direct) { + if (buf) { +// slen = strlen(buf); + if ((pos + len) > (unsigned)slen) { + buf = (char *) realloc(buf, pos + len + 1024); + memset(&buf[pos], 0, len + 1024); + } + } + else buf = (char *)calloc(1, len + 1024); + memmove(&buf[pos], ibuf, len); + pos += len; + } + else { + if (zbuf) { + if ((zpos + len) > zlen) { + zbuf = (char *) realloc(zbuf, zpos + len + 1024); + zlen = zpos + len + 1024; + } + } + else { + zbuf = (char *)calloc(1, len + 1024); + zlen = len + 1024; + } + memmove(&zbuf[zpos], ibuf, len); + zpos += len; + } + return len; +} + + +/****************************************************************************** + * SWCompress::Encode - This function "encodes" the input stream into the + * output stream. + * The GetChars() and SendChars() functions are + * used to separate this method from the actual + * i/o. + */ + +void SWCompress::Encode(void) +{ + cycleStream(); +} + + +/****************************************************************************** + * SWCompress::Decode - This function "decodes" the input stream into the + * output stream. + * The GetChars() and SendChars() functions are + * used to separate this method from the actual + * i/o. + */ + +void SWCompress::Decode(void) +{ + cycleStream(); +} + + +void SWCompress::cycleStream() { + char buf[1024]; + unsigned long len, totlen = 0; + + do { + len = GetChars(buf, 1024); + if (len) + totlen += SendChars(buf, len); + } while (len == 1024); + + zlen = slen = totlen; +} diff --git a/src/modules/common/swcomprs.doc b/src/modules/common/swcomprs.doc new file mode 100644 index 0000000..b6817f2 --- /dev/null +++ b/src/modules/common/swcomprs.doc @@ -0,0 +1,802 @@ +The following is the original information send from Parson's Technologies via +Craig Rairden. +_______________________________________________________________________________ +Compression Info, 10-11-95 +Jeff Wheeler + +Source of Algorithm +------------------- + +The compression algorithms used here are based upon the algorithms developed +and published by Haruhiko Okumura in a paper entitled "Data Compression +Algorithms of LARC and LHarc." This paper discusses three compression +algorithms, LSZZ, LZARI, and LZHUF. LZSS is described as the "first" of +these, and is described as providing moderate compression with good speed. +LZARI is described as an improved LZSS, a combination of the LZSS algorithm +with adaptive arithmetic compression. It is described as being slower than +LZSS but with better compression. LZHUF (the basis of the common LHA +compression program) was included in the paper, however, a free usage license +was not included. + +The following are copies of the statements included at the beginning of each +source code listing that was supplied in the working paper. + + LZSS, dated 4/6/89, marked as "Use, distribute and + modify this program freely." + + LZARI, dated 4/7/89, marked as "Use, distribute and + modify this program freely." + + LZHUF, dated 11/20/88, written by Haruyasu Yoshizaki, + translated by Haruhiko Okumura on 4/7/89. Not + expressly marked as redistributable or modifiable. + +Since both LZSS and LZARI are marked as "use, distribute and modify freely" we +have felt at liberty basing our compression algorithm on either of these. + +Selection of Algorithm +---------------------- + +Working samples of three possible compression algorithms are supplied in +Okumura's paper. Which should be used? + +LZSS is the fastest at decompression, but does not generated as small a +compressed file as the other methods. The other two methods provided, perhaps, +a 15% improvement in compression. Or, put another way, on a 100K file, LZSS +might compress it to 50K while the others might approach 40-45K. For STEP +purposes, it was decided that decoding speed was of more importance than +tighter compression. For these reasons, the first compression algorithm +implemented is the LZSS algorithm. + +About LZSS Encoding +------------------- + +(adapted from Haruhiko Okumura's paper) + +This scheme was proposed by Ziv and Lempel [1]. A slightly modified version +is described by Storer and Szymanski [2]. An implementation using a binary +tree has been proposed by Bell [3]. + +The algorithm is quite simple. +1. Keep a ring buffer which initially contains all space characters. +2. Read several letters from the file to the buffer. +3. Search the buffer for the longest string that matches the letters just + read, and send its length and position into the buffer. + +If the ring buffer is 4096 bytes, the position can be stored in 12 bits. If the +length is represented in 4 bits, the pair is two bytes +long. If the longest match is no more than two characters, then just one +character is sent without encoding. The process starts again with the next +character. An extra bit is sent each time to tell the decoder whether the +next item is a character of a pair. + +[1] J. Ziv and A. Lempel, IEEE Transactions IT-23, 337-343 (1977). +[2] J. A. Storer and T. G. Szymanski, J. ACM, 29, 928-951 (1982). +[3] T.C. Gell, IEEE Transactions COM-34, 1176-1182 (1986). + +class SWCompress { +public: +void InitTree( // no return value + void); // no parameters + +void InsertNode( // no return value + short int Pos); // position in the buffer + +void DeleteNode( // no return value + short int Node); // node to be removed + +void Encode( // no return value + void); // no parameters + +void Decode( // no return value + void); // no parameters +}; + +// The following are constant sizes used by the compression algorithm. +// +// N - This is the size of the ring buffer. It is set +// to 4K. It is important to note that a position +// within the ring buffer requires 12 bits. +// +// F - This is the maximum length of a character sequence +// that can be taken from the ring buffer. It is set +// to 18. Note that a length must be 3 before it is +// worthwhile to store a position/length pair, so the +// length can be encoded in only 4 bits. Or, put yet +// another way, it is not necessary to encode a length +// of 0-18, it is necessary to encode a length of +// 3-18, which requires 4 bits. +// +// THRESHOLD - It takes 2 bytes to store an offset and +// a length. If a character sequence only +// requires 1 or 2 characters to store +// uncompressed, then it is better to store +// it uncompressed than as an offset into +// the ring buffer. +// +// Note that the 12 bits used to store the position and the 4 bits +// used to store the length equal a total of 16 bits, or 2 bytes. + +#define N 4096 +#define F 18 +#define THRESHOLD 3 +#define NOT_USED N + +// m_ring_buffer is a text buffer. It contains "nodes" of +// uncompressed text that can be indexed by position. That is, +// a substring of the ring buffer can be indexed by a position +// and a length. When decoding, the compressed text may contain +// a position in the ring buffer and a count of the number of +// bytes from the ring buffer that are to be moved into the +// uncompressed buffer. +// +// This ring buffer is not maintained as part of the compressed +// text. Instead, it is reconstructed dynamically. That is, +// it starts out empty and gets built as the text is decompressed. +// +// The ring buffer contain N bytes, with an additional F - 1 bytes +// to facilitate string comparison. + +unsigned char m_ring_buffer[N + F - 1]; + +// m_match_position and m_match_length are set by InsertNode(). +// +// These variables indicate the position in the ring buffer +// and the number of characters at that position that match +// a given string. + +short int m_match_position; +short int m_match_length; + +// m_lson, m_rson, and m_dad are the Japanese way of referring to +// a tree structure. The dad is the parent and it has a right and +// left son (child). +// +// For i = 0 to N-1, m_rson[i] and m_lson[i] will be the right +// and left children of node i. +// +// For i = 0 to N-1, m_dad[i] is the parent of node i. +// +// For i = 0 to 255, rson[N + i + 1] is the root of the tree for +// strings that begin with the character i. Note that this requires +// one byte characters. +// +// These nodes store values of 0...(N-1). Memory requirements +// can be reduces by using 2-byte integers instead of full 4-byte +// integers (for 32-bit applications). Therefore, these are +// defined as "short ints." + +short int m_lson[N + 1]; +short int m_rson[N + 257]; +short int m_dad[N + 1]; + + + + +/* + ------------------------------------------------------------------------- + cLZSS::InitTree + + This function initializes the tree nodes to "empty" states. + ------------------------------------------------------------------------- +*/ + +void cLZSS::InitTree( // no return value + void) // no parameters + throw() // exception list + + { + int i; + + // For i = 0 to N - 1, m_rson[i] and m_lson[i] will be the right + // and left children of node i. These nodes need not be + // initialized. However, for debugging purposes, it is nice to + // have them initialized. Since this is only used for compression + // (not decompression), I don't mind spending the time to do it. + // + // For the same range of i, m_dad[i] is the parent of node i. + // These are initialized to a known value that can represent + // a "not used" state. + + for (i = 0; i < N; i++) + { + m_lson[i] = NOT_USED; + m_rson[i] = NOT_USED; + m_dad[i] = NOT_USED; + } + + // For i = 0 to 255, m_rson[N + i + 1] is the root of the tree + // for strings that begin with the character i. This is why + // the right child array is larger than the left child array. + // These are also initialzied to a "not used" state. + // + // Note that there are 256 of these, one for each of the possible + // 256 characters. + + for (i = N + 1; i <= (N + 256); i++) + { + m_rson[i] = NOT_USED; + } + + // Done. + } + +/* + ------------------------------------------------------------------------- + cLZSS::InsertNode + + This function inserts a string from the ring buffer into one of + the trees. It loads the match position and length member variables + for the longest match. + + The string to be inserted is identified by the parameter Pos, + A full F bytes are inserted. So, m_ring_buffer[Pos ... Pos+F-1] + are inserted. + + If the matched length is exactly F, then an old node is removed + in favor of the new one (because the old one will be deleted + sooner). + + Note that Pos plays a dual role. It is used as both a position + in the ring buffer and also as a tree node. m_ring_buffer[Pos] + defines a character that is used to identify a tree node. + ------------------------------------------------------------------------- +*/ + +void cLZSS::InsertNode( // no return value + short int Pos) // position in the buffer + throw() // exception list + + { + short int i; + short int p; + int cmp; + unsigned char * key; + + ASSERT(Pos >= 0); + ASSERT(Pos < N); + + cmp = 1; + key = &(m_ring_buffer[Pos]); + + // The last 256 entries in m_rson contain the root nodes for + // strings that begin with a letter. Get an index for the + // first letter in this string. + + p = (short int) (N + 1 + key[0]); + + // Set the left and right tree nodes for this position to "not + // used." + + m_lson[Pos] = NOT_USED; + m_rson[Pos] = NOT_USED; + + // Haven't matched anything yet. + + m_match_length = 0; + + for ( ; ; ) + { + if (cmp >= 0) + { + if (m_rson[p] != NOT_USED) + { + p = m_rson[p]; + } + else + { + m_rson[p] = Pos; + m_dad[Pos] = p; + return; + } + } + else + { + if (m_lson[p] != NOT_USED) + { + p = m_lson[p]; + } + else + { + m_lson[p] = Pos; + m_dad[Pos] = p; + return; + } + } + + // Should we go to the right or the left to look for the + // next match? + + for (i = 1; i < F; i++) + { + cmp = key[i] - m_ring_buffer[p + i]; + if (cmp != 0) + break; + } + + if (i > m_match_length) + { + m_match_position = p; + m_match_length = i; + + if (i >= F) + break; + } + } + + m_dad[Pos] = m_dad[p]; + m_lson[Pos] = m_lson[p]; + m_rson[Pos] = m_rson[p]; + + m_dad[ m_lson[p] ] = Pos; + m_dad[ m_rson[p] ] = Pos; + + if (m_rson[ m_dad[p] ] == p) + { + m_rson[ m_dad[p] ] = Pos; + } + else + { + m_lson[ m_dad[p] ] = Pos; + } + + // Remove "p" + + m_dad[p] = NOT_USED; + } + +/* + ------------------------------------------------------------------------- + cLZSS::DeleteNode + + This function removes the node "Node" from the tree. + ------------------------------------------------------------------------- +*/ + +void cLZSS::DeleteNode( // no return value + short int Node) // node to be removed + throw() // exception list + + { + short int q; + + ASSERT(Node >= 0); + ASSERT(Node < (N+1)); + + if (m_dad[Node] == NOT_USED) + { + // not in tree, nothing to do + return; + } + + if (m_rson[Node] == NOT_USED) + { + q = m_lson[Node]; + } + else if (m_lson[Node] == NOT_USED) + { + q = m_rson[Node]; + } + else + { + q = m_lson[Node]; + if (m_rson[q] != NOT_USED) + { + do + { + q = m_rson[q]; + } + while (m_rson[q] != NOT_USED); + + m_rson[ m_dad[q] ] = m_lson[q]; + m_dad[ m_lson[q] ] = m_dad[q]; + m_lson[q] = m_lson[Node]; + m_dad[ m_lson[Node] ] = q; + } + + m_rson[q] = m_rson[Node]; + m_dad[ m_rson[Node] ] = q; + } + + m_dad[q] = m_dad[Node]; + + if (m_rson[ m_dad[Node] ] == Node) + { + m_rson[ m_dad[Node] ] = q; + } + else + { + m_lson[ m_dad[Node] ] = q; + } + + m_dad[Node] = NOT_USED; + } + +/* + ------------------------------------------------------------------------- + cLZSS::Encode + + This function "encodes" the input stream into the output stream. + The GetChars() and SendChars() functions are used to separate + this method from the actual i/o. + ------------------------------------------------------------------------- +*/ + +void cLZSS::Encode( // no return value + void) // no parameters + + { + short int i; // an iterator + short int r; // node number in the binary tree + short int s; // position in the ring buffer + unsigned short int len; // len of initial string + short int last_match_length; // length of last match + short int code_buf_pos; // position in the output buffer + unsigned char code_buf[17]; // the output buffer + unsigned char mask; // bit mask for byte 0 of out buf + unsigned char c; // character read from string + + // Start with a clean tree. + + InitTree(); + + // code_buf[0] works as eight flags. A "1" represents that the + // unit is an unencoded letter (1 byte), and a "0" represents + // that the next unit is a pair (2 bytes). + // + // code_buf[1..16] stores eight units of code. Since the best + // we can do is store eight pairs, at most 16 + // bytes are needed to store this. + // + // This is why the maximum size of the code buffer is 17 bytes. + + code_buf[0] = 0; + code_buf_pos = 1; + + // Mask iterates over the 8 bits in the code buffer. The first + // character ends up being stored in the low bit. + // + // bit 8 7 6 5 4 3 2 1 + // | | + // | first sequence in code buffer + // | + // last sequence in code buffer + + mask = 1; + + s = 0; + r = (short int) N - (short int) F; + + // Initialize the ring buffer with spaces... + + // Note that the last F bytes of the ring buffer are not filled. + // This is because those F bytes will be filled in immediately + // with bytes from the input stream. + + memset(m_ring_buffer, ' ', N - F); + + // Read F bytes into the last F bytes of the ring buffer. + // + // This function loads the buffer with X characters and returns + // the actual amount loaded. + + len = GetChars(&(m_ring_buffer[r]), F); + + // Make sure there is something to be compressed. + + if (len == 0) + return; + + // Insert the F strings, each of which begins with one or more + // 'space' characters. Note the order in which these strings + // are inserted. This way, degenerate trees will be less likely + // to occur. + + for (i = 1; i <= F; i++) + { + InsertNode((short int) (r - i)); + } + + // Finally, insert the whole string just read. The + // member variables match_length and match_position are set. + + InsertNode(r); + + // Now that we're preloaded, continue till done. + + do + { + + // m_match_length may be spuriously long near the end of + // text. + + if (m_match_length > len) + { + m_match_length = len; + } + + // Is it cheaper to store this as a single character? If so, + // make it so. + + if (m_match_length < THRESHOLD) + { + // Send one character. Remember that code_buf[0] is the + // set of flags for the next eight items. + + m_match_length = 1; + code_buf[0] |= mask; + code_buf[code_buf_pos++] = m_ring_buffer[r]; + } + + // Otherwise, we do indeed have a string that can be stored + // compressed to save space. + + else + { + // The next 16 bits need to contain the position (12 bits) + // and the length (4 bits). + + code_buf[code_buf_pos++] = (unsigned char) m_match_position; + code_buf[code_buf_pos++] = (unsigned char) ( + ((m_match_position >> 4) & 0xf0) | + (m_match_length - THRESHOLD) ); + } + + // Shift the mask one bit to the left so that it will be ready + // to store the new bit. + + mask = (unsigned char) (mask << 1); + + // If the mask is now 0, then we know that we have a full set + // of flags and items in the code buffer. These need to be + // output. + + if (mask == 0) + { + // code_buf is the buffer of characters to be output. + // code_buf_pos is the number of characters it contains. + + SendChars(code_buf, code_buf_pos); + + // Reset for next buffer... + + code_buf[0] = 0; + code_buf_pos = 1; + mask = 1; + } + + last_match_length = m_match_length; + + // Delete old strings and read new bytes... + + for (i = 0; i < last_match_length; i++) + { + + // Get next character... + + if (GetChars(&c, 1) != 1) + break; + + // Delete "old strings" + + DeleteNode(s); + + // Put this character into the ring buffer. + // + // The original comment here says "If the position is near + // the end of the buffer, extend the buffer to make + // string comparison easier." + // + // That's a little misleading, because the "end" of the + // buffer is really what we consider to be the "beginning" + // of the buffer, that is, positions 0 through F. + // + // The idea is that the front end of the buffer is duplicated + // into the back end so that when you're looking at characters + // at the back end of the buffer, you can index ahead (beyond + // the normal end of the buffer) and see the characters + // that are at the front end of the buffer wihtout having + // to adjust the index. + // + // That is... + // + // 1234xxxxxxxxxxxxxxxxxxxxxxxxxxxxx1234 + // | | | + // position 0 end of buffer | + // | + // duplicate of front of buffer + + m_ring_buffer[s] = c; + + if (s < F - 1) + { + m_ring_buffer[s + N] = c; + } + + // Increment the position, and wrap around when we're at + // the end. Note that this relies on N being a power of 2. + + s = (short int) ( (s + 1) & (N - 1) ); + r = (short int) ( (r + 1) & (N - 1) ); + + // Register the string that is found in + // m_ring_buffer[r..r+F-1]. + + InsertNode(r); + } + + // If we didn't quit because we hit the last_match_length, + // then we must have quit because we ran out of characters + // to process. + + while (i++ < last_match_length) + { + DeleteNode(s); + + s = (short int) ( (s + 1) & (N - 1) ); + r = (short int) ( (r + 1) & (N - 1) ); + + // Note that len hitting 0 is the key that causes the + // do...while() to terminate. This is the only place + // within the loop that len is modified. + // + // Its original value is F (or a number less than F for + // short strings). + + if (--len) + { + InsertNode(r); /* buffer may not be empty. */ + } + } + + // End of do...while() loop. Continue processing until there + // are no more characters to be compressed. The variable + // "len" is used to signal this condition. + } + while (len > 0); + + // There could still be something in the output buffer. Send it + // now. + + if (code_buf_pos > 1) + { + // code_buf is the encoded string to send. + // code_buf_ptr is the number of characters. + + SendChars(code_buf, code_buf_pos); + } + + // Done! + } + +/* + ------------------------------------------------------------------------- + cLZSS::Decode + + This function "decodes" the input stream into the output stream. + The GetChars() and SendChars() functions are used to separate + this method from the actual i/o. + ------------------------------------------------------------------------- +*/ + +void cLZSS::Decode( // no return value + void) // no parameters + + { + int k; + int r; // node number + unsigned char c[F]; // an array of chars + unsigned char flags; // 8 bits of flags + int flag_count; // which flag we're on + short int pos; // position in the ring buffer + short int len; // number of chars in ring buffer + + // Initialize the ring buffer with a common string. + // + // Note that the last F bytes of the ring buffer are not filled. + + memset(m_ring_buffer, ' ', N - F); + + r = N - F; + + flags = (char) 0; + flag_count = 0; + + for ( ; ; ) + { + + // If there are more bits of interest in this flag, then + // shift that next interesting bit into the 1's position. + // + // If this flag has been exhausted, the next byte must + // be a flag. + + if (flag_count > 0) + { + flags = (unsigned char) (flags >> 1); + flag_count--; + } + else + { + // Next byte must be a flag. + + if (GetChars(&flags, 1) != 1) + break; + + // Set the flag counter. While at first it might appear + // that this should be an 8 since there are 8 bits in the + // flag, it should really be a 7 because the shift must + // be performed 7 times in order to see all 8 bits. + + flag_count = 7; + } + + // If the low order bit of the flag is now set, then we know + // that the next byte is a single, unencoded character. + + if (flags & 1) + { + if (GetChars(c, 1) != 1) + break; + + if (SendChars(c, 1) != 1) + break; + + // Add to buffer, and increment to next spot. Wrap at end. + + m_ring_buffer[r] = c[0]; + r = (short int) ( (r + 1) & (N - 1) ); + } + + // Otherwise, we know that the next two bytes are a + // pair. The position is in 12 bits and + // the length is in 4 bits. + + else + { + // Original code: + // if ((i = getc(infile)) == EOF) + // break; + // if ((j = getc(infile)) == EOF) + // break; + // i |= ((j & 0xf0) << 4); + // j = (j & 0x0f) + THRESHOLD; + // + // I've modified this to only make one input call, and + // have changed the variable names to something more + // obvious. + + if (GetChars(c, 2) != 2) + break; + + // Convert these two characters into the position and + // length. Note that the length is always at least + // THRESHOLD, which is why we're able to get a length + // of 18 out of only 4 bits. + + pos = (short int) ( c[0] | ((c[1] & 0xf0) << 4) ); + + len = (short int) ( (c[1] & 0x0f) + THRESHOLD ); + + // There are now "len" characters at position "pos" in + // the ring buffer that can be pulled out. Note that + // len is never more than F. + + for (k = 0; k < len; k++) + { + c[k] = m_ring_buffer[(pos + k) & (N - 1)]; + + // Add to buffer, and increment to next spot. Wrap at end. + + m_ring_buffer[r] = c[k]; + r = (short int) ( (r + 1) & (N - 1) ); + } + + // Add the "len" characters to the output stream. + + if (SendChars(c, len) != len) + break; + } + } + } + diff --git a/src/modules/common/zipcomprs.cpp b/src/modules/common/zipcomprs.cpp new file mode 100644 index 0000000..01ba430 --- /dev/null +++ b/src/modules/common/zipcomprs.cpp @@ -0,0 +1,158 @@ +/****************************************************************************** + * swcomprs.cpp - code for class 'ZipCompress'- a driver class that provides + * compression utilities. - using zlib + */ + +#include +#include +#include +#include +#include +#include + +/****************************************************************************** + * ZipCompress Constructor - Initializes data for instance of ZipCompress + * + */ + +ZipCompress::ZipCompress() : SWCompress() +{ +// fprintf(stderr, "init compress\n"); +} + + +/****************************************************************************** + * ZipCompress Destructor - Cleans up instance of ZipCompress + */ + +ZipCompress::~ZipCompress() { +} + + +/****************************************************************************** + * ZipCompress::Encode - This function "encodes" the input stream into the + * output stream. + * The GetChars() and SendChars() functions are + * used to separate this method from the actual + * i/o. + * NOTE: must set zlen for parent class to know length of + * compressed buffer. + */ + +void ZipCompress::Encode(void) +{ +/* +ZEXTERN int ZEXPORT compress OF((Bytef *dest, uLongf *destLen, + const Bytef *source, uLong sourceLen)); + Compresses the source buffer into the destination buffer. sourceLen is + the byte length of the source buffer. Upon entry, destLen is the total + size of the destination buffer, which must be at least 0.1% larger than + sourceLen plus 12 bytes. Upon exit, destLen is the actual size of the + compressed buffer. + This function can be used to compress a whole file at once if the + input file is mmap'ed. + compress returns Z_OK if success, Z_MEM_ERROR if there was not + enough memory, Z_BUF_ERROR if there was not enough room in the output + buffer. +*/ + direct = 0; // set direction needed by parent [Get|Send]Chars() + + // get buffer + char chunk[1024]; + char *buf = (char *)calloc(1, 1024); + char *chunkbuf = buf; + unsigned long chunklen; + unsigned long len = 0; + while((chunklen = GetChars(chunk, 1023))) { + memcpy(chunkbuf, chunk, chunklen); + len += chunklen; + if (chunklen < 1023) + break; + else buf = (char *)realloc(buf, len + 1024); + chunkbuf = buf+len; + } + + + zlen = (long) (len*1.001)+15; + char *zbuf = new char[zlen+1]; + if (len) + { + //printf("Doing compress\n"); + if (compress((Bytef*)zbuf, &zlen, (const Bytef*)buf, len)!=Z_OK) + { + printf("ERROR in compression\n"); + } + else { + SendChars(zbuf, zlen); + } + } + else + { + fprintf(stderr, "No buffer to compress\n"); + } + delete [] zbuf; + free (buf); +} + + +/****************************************************************************** + * ZipCompress::Decode - This function "decodes" the input stream into the + * output stream. + * The GetChars() and SendChars() functions are + * used to separate this method from the actual + * i/o. + */ + +void ZipCompress::Decode(void) +{ +/* +ZEXTERN int ZEXPORT uncompress OF((Bytef *dest, uLongf *destLen, + const Bytef *source, uLong sourceLen)); + Decompresses the source buffer into the destination buffer. sourceLen is + the byte length of the source buffer. Upon entry, destLen is the total + size of the destination buffer, which must be large enough to hold the + entire uncompressed data. (The size of the uncompressed data must have + been saved previously by the compressor and transmitted to the decompressor + by some mechanism outside the scope of this compression library.) + Upon exit, destLen is the actual size of the compressed buffer. + This function can be used to decompress a whole file at once if the + input file is mmap'ed. + + uncompress returns Z_OK if success, Z_MEM_ERROR if there was not + enough memory, Z_BUF_ERROR if there was not enough room in the output + buffer, or Z_DATA_ERROR if the input data was corrupted. +*/ + + // get buffer + char chunk[1024]; + char *zbuf = (char *)calloc(1, 1024); + char *chunkbuf = zbuf; + int chunklen; + unsigned long zlen = 0; + while((chunklen = GetChars(chunk, 1023))) { + memcpy(chunkbuf, chunk, chunklen); + zlen += chunklen; + if (chunklen < 1023) + break; + else zbuf = (char *)realloc(zbuf, zlen + 1024); + chunkbuf = zbuf + zlen; + } + + //printf("Decoding complength{%ld} uncomp{%ld}\n", zlen, blen); + if (zlen) { + unsigned long blen = zlen*20; // trust compression is less than 1000% + char *buf = new char[blen]; + //printf("Doing decompress {%s}\n", zbuf); + if (uncompress((Bytef*)buf, &blen, (Bytef*)zbuf, zlen) != Z_OK) { + fprintf(stderr, "no room in outbuffer to during decompression. see zipcomp.cpp\n"); + } + SendChars(buf, blen); + delete [] buf; + slen = blen; + } + else { + fprintf(stderr, "No buffer to decompress!\n"); + } + //printf("Finished decoding\n"); + free (zbuf); +} diff --git a/src/modules/common/zstr.cpp b/src/modules/common/zstr.cpp new file mode 100644 index 0000000..fc02572 --- /dev/null +++ b/src/modules/common/zstr.cpp @@ -0,0 +1,705 @@ +/****************************************************************************** + * zstr.cpp - code for class 'zStr'- a module that reads compressed text + * files and provides lookup and parsing functions based on + * class StrKey + */ + +#include +#include +#include + +#ifndef __GNUC__ +#include +#else +#include +#endif + +#include +#include +#include +#include +#include + +#include +#include + +/****************************************************************************** + * zStr Statics + */ + +int zStr::instance = 0; +const int zStr::IDXENTRYSIZE = 8; +const int zStr::ZDXENTRYSIZE = 8; + + +/****************************************************************************** + * zStr Constructor - Initializes data for instance of zStr + * + * ENT: ipath - path of the directory where data and index files are located. + */ + +zStr::zStr(const char *ipath, int fileMode, long blockCount, SWCompress *icomp) { + char buf[127]; + + nl = '\n'; + lastoff = -1; + path = 0; + stdstr(&path, ipath); + + compressor = (icomp) ? icomp : new SWCompress(); + this->blockCount = blockCount; +#ifndef O_BINARY // O_BINARY is needed in Borland C++ 4.53 +#define O_BINARY 0 // If it hasn't been defined than we probably +#endif // don't need it. + + if (fileMode == -1) { // try read/write if possible + fileMode = O_RDWR; + } + + sprintf(buf, "%s.idx", path); + idxfd = FileMgr::systemFileMgr.open(buf, fileMode|O_BINARY, true); + + sprintf(buf, "%s.dat", path); + datfd = FileMgr::systemFileMgr.open(buf, fileMode|O_BINARY, true); + + sprintf(buf, "%s.zdx", path); + zdxfd = FileMgr::systemFileMgr.open(buf, fileMode|O_BINARY, true); + + sprintf(buf, "%s.zdt", path); + zdtfd = FileMgr::systemFileMgr.open(buf, fileMode|O_BINARY, true); + + if (datfd <= 0) { + sprintf(buf, "Error: %d", errno); + perror(buf); + } + + cacheBlock = 0; + cacheBlockIndex = -1; + cacheDirty = false; + + instance++; +} + + +/****************************************************************************** + * zStr Destructor - Cleans up instance of zStr + */ + +zStr::~zStr() { + + flushCache(); + + if (path) + delete [] path; + + --instance; + + FileMgr::systemFileMgr.close(idxfd); + FileMgr::systemFileMgr.close(datfd); + FileMgr::systemFileMgr.close(zdxfd); + FileMgr::systemFileMgr.close(zdtfd); + + + if (compressor) + delete compressor; + +} + + +/****************************************************************************** + * zStr::getidxbufdat - Gets the index string at the given dat offset + * NOTE: buf is calloc'd, or if not null, realloc'd and must + * be free'd by calling function + * + * ENT: ioffset - offset in dat file to lookup + * buf - address of pointer to allocate for storage of string + */ + +void zStr::getKeyFromDatOffset(long ioffset, char **buf) { + int size; + char ch; + if (datfd > 0) { + lseek(datfd->getFd(), ioffset, SEEK_SET); + for (size = 0; read(datfd->getFd(), &ch, 1) == 1; size++) { + if ((ch == '\\') || (ch == 10) || (ch == 13)) + break; + } + *buf = (*buf) ? (char *)realloc(*buf, size + 1) : (char *)malloc(size + 1); + if (size) { + lseek(datfd->getFd(), ioffset, SEEK_SET); + read(datfd->getFd(), *buf, size); + } + (*buf)[size] = 0; + for (size--; size > 0; size--) + (*buf)[size] = SW_toupper((*buf)[size]); + } + else { + *buf = (*buf) ? (char *)realloc(*buf, 1) : (char *)malloc(1); + **buf = 0; + } +} + + +/****************************************************************************** + * zStr::getidxbuf - Gets the index string at the given idx offset + * NOTE: buf is calloc'd, or if not null, realloc'd + * and must be freed by calling function + * + * ENT: ioffset - offset in idx file to lookup + * buf - address of pointer to allocate for storage of string + */ + +void zStr::getKeyFromIdxOffset(long ioffset, char **buf) { + __u32 offset; + + if (idxfd > 0) { + lseek(idxfd->getFd(), ioffset, SEEK_SET); + read(idxfd->getFd(), &offset, sizeof(__u32)); + offset = swordtoarch32(offset); + getKeyFromDatOffset(offset, buf); + } +} + + +/****************************************************************************** + * zStr::findoffset - Finds the offset of the key string from the indexes + * + * ENT: key - key string to lookup + * offset - address to store the starting offset + * size - address to store the size of the entry + * away - number of entries before of after to jump + * (default = 0) + * + * RET: error status + */ + +signed char zStr::findKeyIndex(const char *ikey, long *idxoff, long away) { + char *trybuf = 0, *key = 0, quitflag = 0; + signed char retval = 0; + __s32 headoff, tailoff, tryoff = 0, maxoff = 0; + __u32 start, size; + + if (idxfd->getFd() >= 0) { + tailoff = maxoff = lseek(idxfd->getFd(), 0, SEEK_END) - IDXENTRYSIZE; + if (*ikey) { + headoff = 0; + stdstr(&key, ikey); + toupperstr(key); + + while (headoff < tailoff) { + tryoff = (lastoff == -1) ? headoff + (((((tailoff / IDXENTRYSIZE) - (headoff / IDXENTRYSIZE))) / 2) * IDXENTRYSIZE) : lastoff; + lastoff = -1; + + getKeyFromIdxOffset(tryoff, &trybuf); + + if (!*trybuf && tryoff) { // In case of extra entry at end of idx (not first entry) + tryoff += (tryoff > (maxoff / 2))?-IDXENTRYSIZE:IDXENTRYSIZE; + retval = -1; + break; + } + + int diff = strcmp(key, trybuf); + if (!diff) + break; + + if (diff < 0) + tailoff = (tryoff == headoff) ? headoff : tryoff; + else headoff = tryoff; + if (tailoff == headoff + IDXENTRYSIZE) { + if (quitflag++) + headoff = tailoff; + } + } + if (headoff >= tailoff) + tryoff = headoff; + if (trybuf) + free(trybuf); + delete [] key; + } + else { tryoff = 0; } + + lseek(idxfd->getFd(), tryoff, SEEK_SET); + + start = size = 0; + retval = (read(idxfd->getFd(), &start, sizeof(__u32))==sizeof(__u32)) ? retval : -1; + retval = (read(idxfd->getFd(), &size, sizeof(__u32))==sizeof(__u32)) ? retval : -1; + start = swordtoarch32(start); + size = swordtoarch32(size); + + if (idxoff) + *idxoff = tryoff; + + while (away) { + __u32 laststart = start; + __u32 lastsize = size; + __s32 lasttry = tryoff; + tryoff += (away > 0) ? IDXENTRYSIZE : -IDXENTRYSIZE; + + bool bad = false; + if (((long)(tryoff + (away*IDXENTRYSIZE)) < -IDXENTRYSIZE) || (tryoff + (away*IDXENTRYSIZE) > (maxoff+IDXENTRYSIZE))) + bad = true; + else if (lseek(idxfd->getFd(), tryoff, SEEK_SET) < 0) + bad = true; + if (bad) { + retval = -1; + start = laststart; + size = lastsize; + tryoff = lasttry; + if (idxoff) + *idxoff = tryoff; + break; + } + read(idxfd->getFd(), &start, sizeof(__u32)); + read(idxfd->getFd(), &size, sizeof(__u32)); + start = swordtoarch32(start); + size = swordtoarch32(size); + + if (idxoff) + *idxoff = tryoff; + + + if (((laststart != start) || (lastsize != size)) && (start >= 0) && (size)) + away += (away < 0) ? 1 : -1; + } + + lastoff = tryoff; + } + else { + if (idxoff) + *idxoff = 0; + retval = -1; + } + return retval; +} + + +/****************************************************************************** + * zStr::preptext - Prepares the text before returning it to external + * objects + * + * ENT: buf - buffer where text is stored and where to store the prep'd + * text. + */ + +void zStr::prepText(char *buf) { + char *to, *from, space = 0, cr = 0, realdata = 0, nlcnt = 0; + + for (to = from = buf; *from; from++) { + switch (*from) { + case 10: + if (!realdata) + continue; + space = (cr) ? 0 : 1; + cr = 0; + nlcnt++; + if (nlcnt > 1) { +// *to++ = nl; + *to++ = nl; +// nlcnt = 0; + } + continue; + case 13: + if (!realdata) + continue; + *to++ = nl; + space = 0; + cr = 1; + continue; + } + realdata = 1; + nlcnt = 0; + if (space) { + space = 0; + if (*from != ' ') { + *to++ = ' '; + from--; + continue; + } + } + *to++ = *from; + } + *to = 0; + + while (to > (buf+1)) { // remove trailing excess + to--; + if ((*to == 10) || (*to == ' ')) + *to = 0; + else break; + } +} + + +/****************************************************************************** + * zStr::gettext - gets text at a given offset + * + * ENT: + * offset - idxoffset where the key is located. + * buf - buffer to store text + * idxbuf - buffer to store index key + * NOTE: buffer will be alloc'd / realloc'd and + * should be free'd by the client + * + */ + +void zStr::getText(long offset, char **idxbuf, char **buf) { + char *ch; + char *idxbuflocal = 0; + getKeyFromIdxOffset(offset, &idxbuflocal); + __u32 start; + __u32 size; + + do { + lseek(idxfd->getFd(), offset, SEEK_SET); + read(idxfd->getFd(), &start, sizeof(__u32)); + read(idxfd->getFd(), &size, sizeof(__u32)); + start = swordtoarch32(start); + size = swordtoarch32(size); + + *buf = (*buf) ? (char *)realloc(*buf, size + 1) : (char *)malloc(size + 1); + *idxbuf = (*idxbuf) ? (char *)realloc(*idxbuf, size + 1) : (char *)malloc(size + 1); + memset(*buf, 0, size + 1); + memset(*idxbuf, 0, size + 1); + lseek(datfd->getFd(), start, SEEK_SET); + read(datfd->getFd(), *buf, (int)(size)); + + for (ch = *buf; *ch; ch++) { // skip over index string + if (*ch == 10) { + ch++; + break; + } + } + memmove(*buf, ch, size - (unsigned long)(ch-*buf)); + + // resolve link + if (!strncmp(*buf, "@LINK", 5)) { + for (ch = *buf; *ch; ch++) { // null before nl + if (*ch == 10) { + *ch = 0; + break; + } + } + findKeyIndex(*buf + IDXENTRYSIZE, &offset); + } + else break; + } + while (true); // while we're resolving links + + if (idxbuflocal) { + __u32 localsize = strlen(idxbuflocal); + localsize = (localsize < (size - 1)) ? localsize : (size - 1); + strncpy(*idxbuf, idxbuflocal, localsize); + (*idxbuf)[localsize] = 0; + free(idxbuflocal); + } + __u32 block = 0; + __u32 entry = 0; + memmove(&block, *buf, sizeof(__u32)); + memmove(&entry, *buf + sizeof(__u32), sizeof(__u32)); + block = swordtoarch32(block); + entry = swordtoarch32(entry); + getCompressedText(block, entry, buf); +} + + +/****************************************************************************** + * zStr::getCompressedText - Get text entry from a compressed index / zdata + * file. + */ + +void zStr::getCompressedText(long block, long entry, char **buf) { + + __u32 size = 0; + + if (cacheBlockIndex != block) { + __u32 start = 0; + + lseek(zdxfd->getFd(), block * ZDXENTRYSIZE, SEEK_SET); + read(zdxfd->getFd(), &start, sizeof(__u32)); + read(zdxfd->getFd(), &size, sizeof(__u32)); + start = swordtoarch32(start); + size = swordtoarch32(size); + + *buf = (*buf) ? (char *)realloc(*buf, size + 1) : (char *)malloc(size + 1); + + lseek(zdtfd->getFd(), start, SEEK_SET); + read(zdtfd->getFd(), *buf, size); + + flushCache(); + + unsigned long len = size; + compressor->zBuf(&len, *buf); + char * rawBuf = compressor->Buf(0, &len); + cacheBlock = new EntriesBlock(rawBuf, len); + cacheBlockIndex = block; + } + size = cacheBlock->getEntrySize(entry); + *buf = (*buf) ? (char *)realloc(*buf, size + 1) : (char *)malloc(size + 1); + strcpy(*buf, cacheBlock->getEntry(entry)); +} + + +/****************************************************************************** + * zLD::settext - Sets text for current offset + * + * ENT: key - key for this entry + * buf - buffer to store + * len - length of buffer (0 - null terminated) + */ + +void zStr::setText(const char *ikey, const char *buf, long len) { + + __u32 start, outstart; + __u32 size, outsize; + __s32 endoff; + long idxoff = 0; + __s32 shiftSize; + static const char nl[] = {13, 10}; + char *tmpbuf = 0; + char *key = 0; + char *dbKey = 0; + char *idxBytes = 0; + char *outbuf = 0; + char *ch = 0; + + stdstr(&key, ikey); + toupperstr(key); + + char notFound = findKeyIndex(ikey, &idxoff, 0); + if (!notFound) { + getKeyFromIdxOffset(idxoff, &dbKey); + int diff = strcmp(key, dbKey); + if (diff < 0) { + } + else if (diff > 0) { + idxoff += IDXENTRYSIZE; + } + else if ((!diff) && (len || strlen(buf) /*we're not deleting*/)) { // got absolute entry + do { + lseek(idxfd->getFd(), idxoff, SEEK_SET); + read(idxfd->getFd(), &start, sizeof(__u32)); + read(idxfd->getFd(), &size, sizeof(__u32)); + start = swordtoarch32(start); + size = swordtoarch32(size); + + tmpbuf = new char [ size + 2 ]; + memset(tmpbuf, 0, size + 2); + lseek(datfd->getFd(), start, SEEK_SET); + read(datfd->getFd(), tmpbuf, size); + + for (ch = tmpbuf; *ch; ch++) { // skip over index string + if (*ch == 10) { + ch++; + break; + } + } + memmove(tmpbuf, ch, size - (unsigned long)(ch-tmpbuf)); + + // resolve link + if (!strncmp(tmpbuf, "@LINK", 5) && (len ? len : strlen(buf))) { + for (ch = tmpbuf; *ch; ch++) { // null before nl + if (*ch == 10) { + *ch = 0; + break; + } + } + findKeyIndex(tmpbuf + IDXENTRYSIZE, &idxoff); + delete [] tmpbuf; + } + else break; + } + while (true); // while we're resolving links + } + } + + endoff = lseek(idxfd->getFd(), 0, SEEK_END); + + shiftSize = endoff - idxoff; + + if (shiftSize > 0) { + idxBytes = new char [ shiftSize ]; + lseek(idxfd->getFd(), idxoff, SEEK_SET); + read(idxfd->getFd(), idxBytes, shiftSize); + } + + outbuf = new char [ (len ? len : strlen(buf)) + strlen(key) + 5 ]; + sprintf(outbuf, "%s%c%c", key, 13, 10); + size = strlen(outbuf); + if (len ? len : strlen(buf)) { // NOT a link + if (!cacheBlock) { + flushCache(); + cacheBlock = new EntriesBlock(); + cacheBlockIndex = (lseek(zdxfd->getFd(), 0, SEEK_END) / ZDXENTRYSIZE); + } + else if (cacheBlock->getCount() >= blockCount) { + flushCache(); + cacheBlock = new EntriesBlock(); + cacheBlockIndex = (lseek(zdxfd->getFd(), 0, SEEK_END) / ZDXENTRYSIZE); + } + __u32 entry = cacheBlock->addEntry(buf); + cacheDirty = true; + outstart = archtosword32(cacheBlockIndex); + outsize = archtosword32(entry); + memcpy (outbuf + size, &outstart, sizeof(__u32)); + memcpy (outbuf + size + sizeof(__u32), &outsize, sizeof(__u32)); + size += (sizeof(__u32) * 2); + } + else { // link + memcpy(outbuf + size, buf, len ? len : strlen(buf)); + size += (len ? len : strlen(buf)); + } + + start = lseek(datfd->getFd(), 0, SEEK_END); + + outstart = archtosword32(start); + outsize = archtosword32(size); + + lseek(idxfd->getFd(), idxoff, SEEK_SET); + if (len ? len : strlen(buf)) { + lseek(datfd->getFd(), start, SEEK_SET); + write(datfd->getFd(), outbuf, size); + + // add a new line to make data file easier to read in an editor + write(datfd->getFd(), &nl, 2); + + write(idxfd->getFd(), &outstart, sizeof(__u32)); + write(idxfd->getFd(), &outsize, sizeof(__u32)); + if (idxBytes) { + write(idxfd->getFd(), idxBytes, shiftSize); + } + } + else { // delete entry + if (idxBytes) { + write(idxfd->getFd(), idxBytes+IDXENTRYSIZE, shiftSize-IDXENTRYSIZE); + lseek(idxfd->getFd(), -1, SEEK_CUR); // last valid byte + FileMgr::systemFileMgr.trunc(idxfd); // truncate index + } + } + + if (idxBytes) + delete [] idxBytes; + delete [] key; + delete [] outbuf; + free(dbKey); +} + + +/****************************************************************************** + * zLD::linkentry - links one entry to another + * + * ENT: testmt - testament to find (0 - Bible/module introduction) + * destidxoff - dest offset into .vss + * srcidxoff - source offset into .vss + */ + +void zStr::linkEntry(const char *destkey, const char *srckey) { + char *text = new char [ strlen(destkey) + 7 ]; + sprintf(text, "@LINK %s", destkey); + setText(srckey, text); + delete [] text; +} + + +void zStr::flushCache() { + if (cacheBlock) { + if (cacheDirty) { + __u32 start = 0; + unsigned long size = 0; + __u32 outstart = 0, outsize = 0; + + const char *rawBuf = cacheBlock->getRawData(&size); + compressor->Buf(rawBuf, &size); + compressor->zBuf(&size); + + long zdxSize = lseek(zdxfd->getFd(), 0, SEEK_END); + long zdtSize = lseek(zdtfd->getFd(), 0, SEEK_END); + + if ((cacheBlockIndex * ZDXENTRYSIZE) > (zdxSize - ZDXENTRYSIZE)) { // New Block + start = zdtSize; + } + else { + lseek(zdxfd->getFd(), cacheBlockIndex * ZDXENTRYSIZE, SEEK_SET); + read(zdxfd->getFd(), &start, sizeof(__u32)); + read(zdxfd->getFd(), &outsize, sizeof(__u32)); + start = swordtoarch32(start); + outsize = swordtoarch32(outsize); + if (start + outsize >= zdtSize) { // last entry, just overwrite + // start is already set + } + else if (size < outsize) { // middle entry, but smaller, that's fine and let's preserve bigger size + size = outsize; + } + else { // middle and bigger-- we have serious problems, for now let's put it at the end = lots of wasted space + start = zdtSize; + } + } + + + + outstart = archtosword32(start); + outsize = archtosword32((__u32)size); + + lseek(zdxfd->getFd(), cacheBlockIndex * ZDXENTRYSIZE, SEEK_SET); + lseek(zdtfd->getFd(), start, SEEK_SET); + rawBuf = compressor->zBuf(&size); + write(zdtfd->getFd(), rawBuf, size); + + // add a new line to make data file easier to read in an editor + write(zdtfd->getFd(), &nl, 2); + + write(zdxfd->getFd(), &outstart, sizeof(__u32)); + write(zdxfd->getFd(), &outsize, sizeof(__u32)); + + delete cacheBlock; + } + } + cacheBlockIndex = -1; + cacheBlock = 0; + cacheDirty = false; +} + + +/****************************************************************************** + * zLD::CreateModule - Creates new module files + * + * ENT: path - directory to store module files + * RET: error status + */ + +signed char zStr::createModule(const char *ipath) { + char *path = 0; + char *buf = new char [ strlen (ipath) + 20 ]; + FileDesc *fd, *fd2; + + stdstr(&path, ipath); + + if ((path[strlen(path)-1] == '/') || (path[strlen(path)-1] == '\\')) + path[strlen(path)-1] = 0; + + sprintf(buf, "%s.dat", path); + unlink(buf); + fd = FileMgr::systemFileMgr.open(buf, O_CREAT|O_WRONLY|O_BINARY, S_IREAD|S_IWRITE); + fd->getFd(); + FileMgr::systemFileMgr.close(fd); + + sprintf(buf, "%s.idx", path); + unlink(buf); + fd2 = FileMgr::systemFileMgr.open(buf, O_CREAT|O_WRONLY|O_BINARY, S_IREAD|S_IWRITE); + fd2->getFd(); + FileMgr::systemFileMgr.close(fd2); + + sprintf(buf, "%s.zdt", path); + unlink(buf); + fd2 = FileMgr::systemFileMgr.open(buf, O_CREAT|O_WRONLY|O_BINARY, S_IREAD|S_IWRITE); + fd2->getFd(); + FileMgr::systemFileMgr.close(fd2); + + sprintf(buf, "%s.zdx", path); + unlink(buf); + fd2 = FileMgr::systemFileMgr.open(buf, O_CREAT|O_WRONLY|O_BINARY, S_IREAD|S_IWRITE); + fd2->getFd(); + FileMgr::systemFileMgr.close(fd2); + + delete [] path; + + return 0; +} diff --git a/src/modules/common/zverse.cpp b/src/modules/common/zverse.cpp new file mode 100644 index 0000000..8d30797 --- /dev/null +++ b/src/modules/common/zverse.cpp @@ -0,0 +1,513 @@ +/****************************************************************************** + * zverse.h - code for class 'zVerse'- a module that reads raw text + * files: ot and nt using indexs ??.bks ??.cps ??.vss + * and provides lookup and parsing functions based on + * class VerseKey for compressed modules + */ + + +#include +#include +#include +#include +#include + +#ifndef __GNUC__ +#include +#else +#include +#endif + +#include +#include +#include +#include +#include + + +#ifndef O_BINARY +#define O_BINARY 0 +#endif + + +/****************************************************************************** + * zVerse Statics + */ + +int zVerse::instance = 0; + +const char zVerse::uniqueIndexID[] = {'X', 'r', 'v', 'c', 'b'}; + +/****************************************************************************** + * zVerse Constructor - Initializes data for instance of zVerse + * + * ENT: ipath - path of the directory where data and index files are located. + * be sure to include the trailing separator (e.g. '/' or '\') + * (e.g. 'modules/texts/rawtext/webster/') + * fileMode - open mode for the files (O_RDONLY, etc.) + * blockType - verse, chapter, book, etc. + */ + +zVerse::zVerse(const char *ipath, int fileMode, int blockType, SWCompress *icomp) +{ + char buf[127]; + + nl = '\n'; + path = 0; + cacheBufIdx = -1; + cacheTestament = 0; + cacheBuf = 0; + dirtyCache = false; + stdstr(&path, ipath); + + if ((path[strlen(path)-1] == '/') || (path[strlen(path)-1] == '\\')) + path[strlen(path)-1] = 0; + + compressor = (icomp) ? icomp : new SWCompress(); + + if (fileMode == -1) { // try read/write if possible + fileMode = O_RDWR; + } + + sprintf(buf, "%s/ot.%czs", path, uniqueIndexID[blockType]); + idxfp[0] = FileMgr::systemFileMgr.open(buf, fileMode|O_BINARY, true); + + sprintf(buf, "%s/nt.%czs", path, uniqueIndexID[blockType]); + idxfp[1] = FileMgr::systemFileMgr.open(buf, fileMode|O_BINARY, true); + + sprintf(buf, "%s/ot.%czz", path, uniqueIndexID[blockType]); + textfp[0] = FileMgr::systemFileMgr.open(buf, fileMode|O_BINARY, true); + + sprintf(buf, "%s/nt.%czz", path, uniqueIndexID[blockType]); + textfp[1] = FileMgr::systemFileMgr.open(buf, fileMode|O_BINARY, true); + + sprintf(buf, "%s/ot.%czv", path, uniqueIndexID[blockType]); + compfp[0] = FileMgr::systemFileMgr.open(buf, fileMode|O_BINARY, true); + + sprintf(buf, "%s/nt.%czv", path, uniqueIndexID[blockType]); + compfp[1] = FileMgr::systemFileMgr.open(buf, fileMode|O_BINARY, true); + + instance++; +} + + +/****************************************************************************** + * zVerse Destructor - Cleans up instance of zVerse + */ + +zVerse::~zVerse() +{ + int loop1; + + if (cacheBuf) { + flushCache(); + free(cacheBuf); + } + + if (path) + delete [] path; + + if (compressor) + delete compressor; + + --instance; + + for (loop1 = 0; loop1 < 2; loop1++) { + FileMgr::systemFileMgr.close(idxfp[loop1]); + FileMgr::systemFileMgr.close(textfp[loop1]); + FileMgr::systemFileMgr.close(compfp[loop1]); + } +} + + +/****************************************************************************** + * zVerse::findoffset - Finds the offset of the key verse from the indexes + * + * + * + * ENT: testmt - testament to find (0 - Bible/module introduction) + * book - book to find (0 - testament introduction) + * chapter - chapter to find (0 - book introduction) + * verse - verse to find (0 - chapter introduction) + * start - address to store the starting offset + * size - address to store the size of the entry + */ + +void zVerse::findoffset(char testmt, long idxoff, long *start, unsigned short *size) +{ + // set start to offset in + // set size to + // set + unsigned long ulBuffNum=0; // buffer number + unsigned long ulVerseStart=0; // verse offset within buffer + unsigned short usVerseSize=0; // verse size + unsigned long ulCompOffset=0; // compressed buffer start + unsigned long ulCompSize=0; // buffer size compressed + unsigned long ulUnCompSize=0; // buffer size uncompressed + char *pcCompText=NULL; // compressed text + + *start = *size = 0; + //printf ("Finding offset %ld\n", idxoff); + idxoff *= 10; + if (!testmt) { + testmt = ((idxfp[0]) ? 1:2); + } + + // assert we have and valid file descriptor + if (compfp[testmt-1]->getFd() < 1) + return; + + long newOffset = lseek(compfp[testmt-1]->getFd(), idxoff, SEEK_SET); + if (newOffset == idxoff) { + if (read(compfp[testmt-1]->getFd(), &ulBuffNum, 4) != 4) { + printf ("Error reading ulBuffNum\n"); + return; + } + } + else return; + + ulBuffNum = swordtoarch32(ulBuffNum); + + if (read(compfp[testmt-1]->getFd(), &ulVerseStart, 4) < 2) + { + printf ("Error reading ulVerseStart\n"); + return; + } + if (read(compfp[testmt-1]->getFd(), &usVerseSize, 2) < 2) + { + printf ("Error reading usVerseSize\n"); + return; + } + + *start = swordtoarch32(ulVerseStart); + *size = swordtoarch16(usVerseSize); + + if (*size) { + if (((long) ulBuffNum == cacheBufIdx) && (testmt == cacheTestament) && (cacheBuf)) { + // have the text buffered + return; + } + + //printf ("Got buffer number{%ld} versestart{%ld} versesize{%d}\n", ulBuffNum, ulVerseStart, usVerseSize); + + + if (lseek(idxfp[testmt-1]->getFd(), ulBuffNum*12, SEEK_SET)!=(long) ulBuffNum*12) + { + printf ("Error seeking compressed file index\n"); + return; + } + if (read(idxfp[testmt-1]->getFd(), &ulCompOffset, 4)<4) + { + printf ("Error reading ulCompOffset\n"); + return; + } + if (read(idxfp[testmt-1]->getFd(), &ulCompSize, 4)<4) + { + printf ("Error reading ulCompSize\n"); + return; + } + if (read(idxfp[testmt-1]->getFd(), &ulUnCompSize, 4)<4) + { + printf ("Error reading ulUnCompSize\n"); + return; + } + + ulCompOffset = swordtoarch32(ulCompOffset); + ulCompSize = swordtoarch32(ulCompSize); + ulUnCompSize = swordtoarch32(ulUnCompSize); + + if (lseek(textfp[testmt-1]->getFd(), ulCompOffset, SEEK_SET)!=(long)ulCompOffset) + { + printf ("Error: could not seek to right place in compressed text\n"); + return; + } + pcCompText = new char[ulCompSize]; + + if (read(textfp[testmt-1]->getFd(), pcCompText, ulCompSize)<(long)ulCompSize) + { + printf ("Error reading compressed text\n"); + return; + } + compressor->zBuf(&ulCompSize, pcCompText); + + if (cacheBuf) { + flushCache(); + free(cacheBuf); + } + + unsigned long len = 0; + compressor->Buf(0, &len); + cacheBuf = (char *)calloc(len + 1, 1); + memcpy(cacheBuf, compressor->Buf(), len); + + cacheTestament = testmt; + cacheBufIdx = ulBuffNum; + } +} + + +/****************************************************************************** + * zVerse::swgettext - gets text at a given offset + * + * ENT: testmt - testament file to search in (0 - Old; 1 - New) + * start - starting offset where the text is located in the file + * size - size of text entry + 1 (null) + * buf - buffer to store text + * + */ + +void zVerse::swgettext(char testmt, long start, unsigned short size, char *inbuf) +{ + memset(inbuf, 0, size); + if (size > 2) { + strncpy(inbuf, &(cacheBuf[start]), size-2); + } +} + + +/****************************************************************************** + * zVerse::settext - Sets text for current offset + * + * ENT: testmt - testament to find (0 - Bible/module introduction) + * idxoff - offset into .vss + * buf - buffer to store + * len - length of buffer (0 - null terminated) + */ + +void zVerse::settext(char testmt, long idxoff, const char *buf, long len) +{ + if ((!dirtyCache) || (cacheBufIdx < 0)) { + cacheBufIdx = lseek(idxfp[testmt-1]->getFd(), 0, SEEK_END) / 12; + cacheTestament = testmt; + if (cacheBuf) + free(cacheBuf); + cacheBuf = (char *)calloc(len ? len : strlen(buf)+1, 1); + } + else cacheBuf = (char *)((cacheBuf)?realloc(cacheBuf, strlen(cacheBuf)+(len ? len : strlen(buf)+1)):calloc((len ? len : strlen(buf)+1), 1)); + + dirtyCache = true; + + unsigned long start, outstart; + unsigned long outBufIdx = cacheBufIdx; + unsigned short size; + unsigned short outsize; + + idxoff *= 10; + size = outsize = len ? len : strlen(buf); + + start = strlen(cacheBuf); + + if (!size) + start = outBufIdx = 0; + + outBufIdx = archtosword32(outBufIdx); + outstart = archtosword32(start); + outsize = archtosword16(size); + + lseek(compfp[testmt-1]->getFd(), idxoff, SEEK_SET); + write(compfp[testmt-1]->getFd(), &outBufIdx, 4); + write(compfp[testmt-1]->getFd(), &outstart, 4); + write(compfp[testmt-1]->getFd(), &outsize, 2); + strcat(cacheBuf, buf); +} + + +void zVerse::flushCache() { + if (dirtyCache) { + unsigned long idxoff; + unsigned long start, outstart; + unsigned long size, outsize; + unsigned long zsize, outzsize; + + idxoff = cacheBufIdx * 12; + size = outsize = zsize = outzsize = strlen(cacheBuf); + if (size) { +// if (compressor) { +// delete compressor; +// compressor = new LZSSCompress(); +// } + compressor->Buf(cacheBuf); + compressor->zBuf(&zsize); + outzsize = zsize; + + start = outstart = lseek(textfp[cacheTestament-1]->getFd(), 0, SEEK_END); + + outstart = archtosword32(start); + outsize = archtosword32(size); + outzsize = archtosword32(zsize); + + write(textfp[cacheTestament-1]->getFd(), compressor->zBuf(&zsize), zsize); + + lseek(idxfp[cacheTestament-1]->getFd(), idxoff, SEEK_SET); + write(idxfp[cacheTestament-1]->getFd(), &outstart, 4); + write(idxfp[cacheTestament-1]->getFd(), &outzsize, 4); + write(idxfp[cacheTestament-1]->getFd(), &outsize, 4); + } + dirtyCache = false; + } +} + +/****************************************************************************** + * RawVerse::linkentry - links one entry to another + * + * ENT: testmt - testament to find (0 - Bible/module introduction) + * destidxoff - dest offset into .vss + * srcidxoff - source offset into .vss + */ + +void zVerse::linkentry(char testmt, long destidxoff, long srcidxoff) { + long bufidx; + long start; + unsigned short size; + + destidxoff *= 10; + srcidxoff *= 10; + + if (!testmt) + testmt = ((idxfp[1]) ? 1:2); + + // get source + lseek(compfp[testmt-1]->getFd(), srcidxoff, SEEK_SET); + read(compfp[testmt-1]->getFd(), &bufidx, 4); + read(compfp[testmt-1]->getFd(), &start, 4); + read(compfp[testmt-1]->getFd(), &size, 2); + + // write dest + lseek(compfp[testmt-1]->getFd(), destidxoff, SEEK_SET); + write(compfp[testmt-1]->getFd(), &bufidx, 4); + write(compfp[testmt-1]->getFd(), &start, 4); + write(compfp[testmt-1]->getFd(), &size, 2); +} + + +/****************************************************************************** + * RawVerse::CreateModule - Creates new module files + * + * ENT: path - directory to store module files + * RET: error status + */ + +char zVerse::createModule(const char *ipath, int blockBound) +{ + char *path = 0; + char *buf = new char [ strlen (ipath) + 20 ]; + FileDesc *fd, *fd2; + + stdstr(&path, ipath); + + if ((path[strlen(path)-1] == '/') || (path[strlen(path)-1] == '\\')) + path[strlen(path)-1] = 0; + + sprintf(buf, "%s/ot.%czs", path, uniqueIndexID[blockBound]); + unlink(buf); + fd = FileMgr::systemFileMgr.open(buf, O_CREAT|O_WRONLY|O_BINARY, S_IREAD|S_IWRITE); + fd->getFd(); + FileMgr::systemFileMgr.close(fd); + + sprintf(buf, "%s/nt.%czs", path, uniqueIndexID[blockBound]); + unlink(buf); + fd = FileMgr::systemFileMgr.open(buf, O_CREAT|O_WRONLY|O_BINARY, S_IREAD|S_IWRITE); + fd->getFd(); + FileMgr::systemFileMgr.close(fd); + + sprintf(buf, "%s/ot.%czz", path, uniqueIndexID[blockBound]); + unlink(buf); + fd = FileMgr::systemFileMgr.open(buf, O_CREAT|O_WRONLY|O_BINARY, S_IREAD|S_IWRITE); + fd->getFd(); + FileMgr::systemFileMgr.close(fd); + + sprintf(buf, "%s/nt.%czz", path, uniqueIndexID[blockBound]); + unlink(buf); + fd2 = FileMgr::systemFileMgr.open(buf, O_CREAT|O_WRONLY|O_BINARY, S_IREAD|S_IWRITE); + fd2->getFd(); + FileMgr::systemFileMgr.close(fd); + + sprintf(buf, "%s/ot.%czv", path, uniqueIndexID[blockBound]); + unlink(buf); + fd = FileMgr::systemFileMgr.open(buf, O_CREAT|O_WRONLY|O_BINARY, S_IREAD|S_IWRITE); + fd->getFd(); + + sprintf(buf, "%s/nt.%czv", path, uniqueIndexID[blockBound]); + unlink(buf); + fd2 = FileMgr::systemFileMgr.open(buf, O_CREAT|O_WRONLY|O_BINARY, S_IREAD|S_IWRITE); + fd2->getFd(); + + VerseKey vk; + vk.Headings(1); + long offset = 0; + short size = 0; + for (vk = TOP; !vk.Error(); vk++) { + write((vk.Testament() == 1) ? fd->getFd() : fd2->getFd(), &offset, 4); //compBufIdxOffset + write((vk.Testament() == 1) ? fd->getFd() : fd2->getFd(), &offset, 4); + write((vk.Testament() == 1) ? fd->getFd() : fd2->getFd(), &size, 2); + } + + FileMgr::systemFileMgr.close(fd); + FileMgr::systemFileMgr.close(fd2); + + delete [] path; +/* + RawVerse rv(path); + VerseKey mykey("Rev 22:21"); +*/ + + return 0; +} + + +/****************************************************************************** + * zVerse::preptext - Prepares the text before returning it to external + * objects + * + * ENT: buf - buffer where text is stored and where to store the prep'd + * text. + */ + +void zVerse::preptext(char *buf) +{ + char *to, *from, space = 0, cr = 0, realdata = 0, nlcnt = 0; + + for (to = from = buf; *from; from++) { + switch (*from) { + case 10: + if (!realdata) + continue; + space = (cr) ? 0 : 1; + cr = 0; + nlcnt++; + if (nlcnt > 1) { +// *to++ = nl; + *to++ = nl; +// nlcnt = 0; + } + continue; + case 13: + if (!realdata) + continue; + *to++ = nl; + space = 0; + cr = 1; + continue; + } + realdata = 1; + nlcnt = 0; + if (space) { + space = 0; + if (*from != ' ') { + *to++ = ' '; + from--; + continue; + } + } + *to++ = *from; + } + *to = 0; + + if (to > buf) { + for (to--; to > buf; to--) { // remove trailing excess + if ((*to == 10) || (*to == ' ')) + *to = 0; + else break; + } + } +} diff --git a/src/modules/filters/Makefile b/src/modules/filters/Makefile new file mode 100644 index 0000000..1a2d00d --- /dev/null +++ b/src/modules/filters/Makefile @@ -0,0 +1,5 @@ + +root := ../../.. + +all: + make -C ${root} diff --git a/src/modules/filters/Makefile.am b/src/modules/filters/Makefile.am new file mode 100644 index 0000000..c58fb5f --- /dev/null +++ b/src/modules/filters/Makefile.am @@ -0,0 +1,65 @@ +filtersdir = $(top_srcdir)/src/modules/filters + +libsword_la_SOURCES += $(filtersdir)/swbasicfilter.cpp + +libsword_la_SOURCES += $(filtersdir)/gbfhtml.cpp +libsword_la_SOURCES += $(filtersdir)/gbfhtmlhref.cpp +libsword_la_SOURCES += $(filtersdir)/gbfplain.cpp +libsword_la_SOURCES += $(filtersdir)/gbfrtf.cpp +libsword_la_SOURCES += $(filtersdir)/plainhtml.cpp +libsword_la_SOURCES += $(filtersdir)/rwphtml.cpp +libsword_la_SOURCES += $(filtersdir)/rwprtf.cpp +libsword_la_SOURCES += $(filtersdir)/cipherfil.cpp +libsword_la_SOURCES += $(filtersdir)/rtfhtml.cpp + +libsword_la_SOURCES += $(filtersdir)/gbfstrongs.cpp +libsword_la_SOURCES += $(filtersdir)/gbffootnotes.cpp +libsword_la_SOURCES += $(filtersdir)/gbfheadings.cpp +libsword_la_SOURCES += $(filtersdir)/gbfmorph.cpp +libsword_la_SOURCES += $(filtersdir)/plainfootnotes.cpp +libsword_la_SOURCES += $(filtersdir)/thmlstrongs.cpp +libsword_la_SOURCES += $(filtersdir)/thmlfootnotes.cpp +libsword_la_SOURCES += $(filtersdir)/thmlheadings.cpp +libsword_la_SOURCES += $(filtersdir)/thmlmorph.cpp +libsword_la_SOURCES += $(filtersdir)/thmllemma.cpp +libsword_la_SOURCES += $(filtersdir)/thmlscripref.cpp +libsword_la_SOURCES += $(filtersdir)/thmlvariants.cpp + +libsword_la_SOURCES += $(filtersdir)/gbfthml.cpp +libsword_la_SOURCES += $(filtersdir)/thmlgbf.cpp +libsword_la_SOURCES += $(filtersdir)/thmlrtf.cpp +libsword_la_SOURCES += $(filtersdir)/thmlhtml.cpp +libsword_la_SOURCES += $(filtersdir)/thmlhtmlhref.cpp +libsword_la_SOURCES += $(filtersdir)/thmlplain.cpp + +libsword_la_SOURCES += $(filtersdir)/unicodertf.cpp +libsword_la_SOURCES += $(filtersdir)/latin1utf8.cpp +libsword_la_SOURCES += $(filtersdir)/latin1utf16.cpp +libsword_la_SOURCES += $(filtersdir)/utf8utf16.cpp +libsword_la_SOURCES += $(filtersdir)/utf16utf8.cpp +libsword_la_SOURCES += $(filtersdir)/scsuutf8.cpp +libsword_la_SOURCES += $(filtersdir)/utf8html.cpp +libsword_la_SOURCES += $(filtersdir)/utf8latin1.cpp + +libsword_la_SOURCES += $(filtersdir)/thmlolb.cpp + +libsword_la_SOURCES += $(filtersdir)/greeklexattribs.cpp + +if ICU +ICUDEFS = -D_ICU_ +SWICUSRC = $(filtersdir)/utf8transliterator.cpp +SWICUSRC += $(filtersdir)/utf8nfc.cpp +SWICUSRC += $(filtersdir)/utf8nfkd.cpp +SWICUSRC += $(filtersdir)/utf8arshaping.cpp +SWICUSRC += $(filtersdir)/utf8bidireorder.cpp +else +SWICUSRC = +ICUDEFS = +endif +libsword_la_SOURCES += $(SWICUSRC) +DEFS += $(ICUDEFS) + +libsword_la_SOURCES += $(filtersdir)/utf8cantillation.cpp +libsword_la_SOURCES += $(filtersdir)/utf8hebrewpoints.cpp +libsword_la_SOURCES += $(filtersdir)/utf8greekaccents.cpp + diff --git a/src/modules/filters/cipherfil.cpp b/src/modules/filters/cipherfil.cpp new file mode 100644 index 0000000..ad55396 --- /dev/null +++ b/src/modules/filters/cipherfil.cpp @@ -0,0 +1,38 @@ +/****************************************************************************** + * + * cipherfil - SWFilter decendant to decipher a module + */ + + +#include +#include +#include + + +CipherFilter::CipherFilter(const char *key) { + cipher = new SWCipher((unsigned char *)key); +} + + +CipherFilter::~CipherFilter() { + delete cipher; +} + + +SWCipher *CipherFilter::getCipher() { + return cipher; +} + + +char CipherFilter::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) { + unsigned int len; +// len = strlen(text); + len = maxlen; + if (len > 0) { + cipher->cipherBuf(&len, text); + strncpy(text, cipher->Buf(), (len < (unsigned int)maxlen) ? len : maxlen); + } + text[maxlen] = 0; + text[maxlen+1] = 0; + return 0; +} diff --git a/src/modules/filters/gbffootnotes.cpp b/src/modules/filters/gbffootnotes.cpp new file mode 100644 index 0000000..c5b7b90 --- /dev/null +++ b/src/modules/filters/gbffootnotes.cpp @@ -0,0 +1,118 @@ +/****************************************************************************** + * + * gbffootnotes - SWFilter decendant to hide or show footnotes + * in a GBF module. + */ + + +#include +#include +#include +#ifndef __GNUC__ +#else +#include +#endif + + +const char GBFFootnotes::on[] = "On"; +const char GBFFootnotes::off[] = "Off"; +const char GBFFootnotes::optName[] = "Footnotes"; +const char GBFFootnotes::optTip[] = "Toggles Footnotes On and Off if they exist"; + + +GBFFootnotes::GBFFootnotes() { + option = false; + options.push_back(on); + options.push_back(off); +} + + +GBFFootnotes::~GBFFootnotes() { +} + +void GBFFootnotes::setOptionValue(const char *ival) +{ + option = (!stricmp(ival, on)); +} + +const char *GBFFootnotes::getOptionValue() +{ + return (option) ? on:off; +} + +char GBFFootnotes::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) +{ + if (!option) { // if we don't want footnotes + char *to, *from, token[4096]; // cheese. Fix. + int tokpos = 0; + bool intoken = false; + int len; + bool hide = false; + + len = strlen(text) + 1; // shift string to right of buffer + if (len < maxlen) { + memmove(&text[maxlen - len], text, len); + from = &text[maxlen - len]; + } + else from = text; // ------------------------------- + + for (to = text; *from; from++) { + if (*from == '<') { + intoken = true; + tokpos = 0; +// memset(token, 0, 4096); + token[0] = 0; + token[1] = 0; + token[2] = 0; + continue; + } + if (*from == '>') { // process tokens + intoken = false; + switch (*token) { + case 'R': // Reference + switch(token[1]) { + case 'F': // Begin footnote + hide = true; + break; + case 'f': // end footnote + hide = false; + break; + } + continue; // skip token + case 'W': + if (token[1] == 'T') { + switch (token[2]) { + case 'P': + case 'S': + case 'A': + continue; // remove this token + default: + break; + } + } + } + // if not a footnote token, keep token in text + if (!hide) { + *to++ = '<'; + for (char *tok = token; *tok; tok++) + *to++ = *tok; + *to++ = '>'; + } + continue; + } + if (intoken) { + if (tokpos < 4090) + token[tokpos++] = *from; + token[tokpos+2] = 0; // +2 cuz we init token with 2 extra '0' because of switch statement + } + else { + if (!hide) { + *to++ = *from; + } + } + } + *to++ = 0; + *to = 0; + } + return 0; +} diff --git a/src/modules/filters/gbfheadings.cpp b/src/modules/filters/gbfheadings.cpp new file mode 100644 index 0000000..590e2fa --- /dev/null +++ b/src/modules/filters/gbfheadings.cpp @@ -0,0 +1,107 @@ +/****************************************************************************** + * + * gbfheadings - SWFilter decendant to hide or show headings + * in a GBF module. + */ + + +#include +#include +#include +#ifndef __GNUC__ +#else +#include +#endif + + +const char GBFHeadings::on[] = "On"; +const char GBFHeadings::off[] = "Off"; +const char GBFHeadings::optName[] = "Headings"; +const char GBFHeadings::optTip[] = "Toggles Headings On and Off if they exist"; + + +GBFHeadings::GBFHeadings() { + option = false; + options.push_back(on); + options.push_back(off); +} + + +GBFHeadings::~GBFHeadings() { +} + +void GBFHeadings::setOptionValue(const char *ival) +{ + option = (!stricmp(ival, on)); +} + +const char *GBFHeadings::getOptionValue() +{ + return (option) ? on:off; +} + +char GBFHeadings::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) +{ + if (!option) { // if we don't want headings + char *to, *from, token[2048]; // cheese. Fix. + int tokpos = 0; + bool intoken = false; + int len; + bool hide = false; + + len = strlen(text) + 1; // shift string to right of buffer + if (len < maxlen) { + memmove(&text[maxlen - len], text, len); + from = &text[maxlen - len]; + } + else from = text; // ------------------------------- + + for (to = text; *from; from++) { + if (*from == '<') { + intoken = true; + tokpos = 0; +// memset(token, 0, 2048); + token[0] = 0; + token[1] = 0; + token[2] = 0; + continue; + } + if (*from == '>') { // process tokens + intoken = false; + switch (*token) { + case 'T': // Reference + switch(token[1]) { + case 'S': // Begin heading + hide = true; + break; + case 's': // end heading + hide = false; + break; + } + continue; // skip token + } + // if not a heading token, keep token in text + if (!hide) { + *to++ = '<'; + for (char *tok = token; *tok; tok++) + *to++ = *tok; + *to++ = '>'; + } + continue; + } + if (intoken) { + if (tokpos < 2045) + token[tokpos++] = *from; + token[tokpos+2] = 0; + } + else { + if (!hide) { + *to++ = *from; + } + } + } + *to++ = 0; + *to = 0; + } + return 0; +} diff --git a/src/modules/filters/gbfhtml.cpp b/src/modules/filters/gbfhtml.cpp new file mode 100644 index 0000000..73d445a --- /dev/null +++ b/src/modules/filters/gbfhtml.cpp @@ -0,0 +1,536 @@ +/*************************************************************************** + gbfhtml.cpp - description + ------------------- + begin : Thu Jun 24 1999 + copyright : (C) 1999 by Torsten Uhlmann + email : TUhlmann@gmx.de + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + ***************************************************************************/ + +#include +#include +#include + + +GBFHTML::GBFHTML() +{ +} + + +char GBFHTML::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) +{ + char *to, *from, token[2048]; + int tokpos = 0; + bool intoken = false; + bool hasFootnotePreTag = false; + bool isRightJustified = false; + bool isCentered = false; + int len; + const char *tok; + + len = strlen(text) + 1; // shift string to right of buffer + if (len < maxlen) { + memmove(&text[maxlen - len], text, len); + from = &text[maxlen - len]; + } + else + from = text; // ------------------------------- + + for (to = text; *from; from++) + { + if (*from == '\n') { + *from = ' '; + } + if (*from == '<') { + intoken = true; + tokpos = 0; + token[0] = 0; + token[1] = 0; + token[2] = 0; + continue; + } + if (*from == '>') { + intoken = false; + // process desired tokens + switch (*token) { + case 'W': // Strongs + switch(token[1]) + { + case 'G': // Greek + case 'H': // Hebrew + case 'T': // Tense + *to++ = ' '; + *to++ = '<'; + *to++ = 's'; + *to++ = 'm'; + *to++ = 'a'; + *to++ = 'l'; + *to++ = 'l'; + *to++ = '>'; + *to++ = '<'; + *to++ = 'e'; + *to++ = 'm'; + *to++ = '>'; + for (tok = token+2; *tok; tok++) + *to++ = *tok; + *to++ = '<'; + *to++ = '/'; + *to++ = 'e'; + *to++ = 'm'; + *to++ = '>'; + *to++ = '<'; + *to++ = '/'; + *to++ = 's'; + *to++ = 'm'; + *to++ = 'a'; + *to++ = 'l'; + *to++ = 'l'; + *to++ = '>'; + *to++ = ' '; + continue; + } + break; + case 'R': + switch(token[1]) + { + case 'X': + *to++ = '<'; + *to++ = 'a'; + *to++ = ' '; + *to++ = 'h'; + *to++ = 'r'; + *to++ = 'e'; + *to++ = 'f'; + *to++ = '='; + *to++ = '\"'; + for (tok = token + 3; *tok; tok++) { + if(*tok != '<' && *tok+1 != 'R' && *tok+2 != 'x') { + *to++ = *tok; + } + else { + break; + } + } + *to++ = '\"'; + *to++ = '>'; + continue; + case 'x': + *to++ = '<'; + *to++ = '/'; + *to++ = 'a'; + *to++ = '>'; + continue; + case 'B': //word(s) explained in footnote + *to++ = '<'; + *to++ = 'i'; + *to++ = '>'; + hasFootnotePreTag = true; //we have the RB tag + continue; + case 'F': // footnote begin + if (hasFootnotePreTag) { + *to++ = '<'; + *to++ = '/'; + *to++ = 'i'; + *to++ = '>'; + *to++ = ' '; + } + *to++ = '<'; + *to++ = 'f'; + *to++ = 'o'; + *to++ = 'n'; + *to++ = 't'; + *to++ = ' '; + *to++ = 'c'; + *to++ = 'o'; + *to++ = 'l'; + *to++ = 'o'; + *to++ = 'r'; + *to++ = '='; + *to++ = '\"'; + *to++ = '#'; + *to++ = '8'; + *to++ = '0'; + *to++ = '0'; + *to++ = '0'; + *to++ = '0'; + *to++ = '0'; + *to++ = '\"'; + *to++ = '>'; + + *to++ = ' '; + *to++ = '<'; + *to++ = 's'; + *to++ = 'm'; + *to++ = 'a'; + *to++ = 'l'; + *to++ = 'l'; + *to++ = '>'; + *to++ = '('; + + continue; + case 'f': // footnote end + *to++ = ')'; + *to++ = '<'; + *to++ = '/'; + *to++ = 's'; + *to++ = 'm'; + *to++ = 'a'; + *to++ = 'l'; + *to++ = 'l'; + *to++ = '>'; + *to++ = ' '; + *to++ = '<'; + *to++ = '/'; + *to++ = 'f'; + *to++ = 'o'; + *to++ = 'n'; + *to++ = 't'; + *to++ = '>'; + hasFootnotePreTag = false; + continue; + } + break; + + case 'F': // font tags + switch(token[1]) + { + case 'I': // italic start + *to++ = '<'; + *to++ = 'i'; + *to++ = '>'; + continue; + case 'i': // italic end + *to++ = '<'; + *to++ = '/'; + *to++ = 'i'; + *to++ = '>'; + continue; + case 'B': // bold start + *to++ = '<'; + *to++ = 'b'; + *to++ = '>'; + continue; + case 'b': // bold end + *to++ = '<'; + *to++ = '/'; + *to++ = 'b'; + *to++ = '>'; + continue; + case 'R': // words of Jesus begin + *to++ = '<'; + *to++ = 'f'; + *to++ = 'o'; + *to++ = 'n'; + *to++ = 't'; + *to++ = ' '; + *to++ = 'c'; + *to++ = 'o'; + *to++ = 'l'; + *to++ = 'o'; + *to++ = 'r'; + *to++ = '='; + *to++ = '#'; + *to++ = 'F'; + *to++ = 'F'; + *to++ = '0'; + *to++ = '0'; + *to++ = '0'; + *to++ = '0'; + *to++ = '>'; + continue; + case 'r': // words of Jesus end + *to++ = '<'; + *to++ = '/'; + *to++ = 'f'; + *to++ = 'o'; + *to++ = 'n'; + *to++ = 't'; + *to++ = '>'; + continue; + case 'U': // Underline start + *to++ = '<'; + *to++ = 'u'; + *to++ = '>'; + continue; + case 'u': // Underline end + *to++ = '<'; + *to++ = '/'; + *to++ = 'u'; + *to++ = '>'; + continue; + case 'O': // Old Testament quote begin + *to++ = '<'; + *to++ = 'c'; + *to++ = 'i'; + *to++ = 't'; + *to++ = 'e'; + *to++ = '>'; + continue; + case 'o': // Old Testament quote end + *to++ = '<'; + *to++ = '/'; + *to++ = 'c'; + *to++ = 'i'; + *to++ = 't'; + *to++ = 'e'; + *to++ = '>'; + continue; + case 'S': // Superscript begin + *to++ = '<'; + *to++ = 's'; + *to++ = 'u'; + *to++ = 'p'; + *to++ = '>'; + continue; + case 's': // Superscript end + *to++ = '<'; + *to++ = '/'; + *to++ = 's'; + *to++ = 'u'; + *to++ = 'p'; + *to++ = '>'; + continue; + case 'V': // Subscript begin + *to++ = '<'; + *to++ = 's'; + *to++ = 'u'; + *to++ = 'b'; + *to++ = '>'; + continue; + case 'v': // Subscript end + *to++ = '<'; + *to++ = '/'; + *to++ = 's'; + *to++ = 'u'; + *to++ = 'b'; + *to++ = '>'; + continue; + case 'N': + *to++ = '<'; + *to++ = 'f'; + *to++ = 'o'; + *to++ = 'n'; + *to++ = 't'; + *to++ = ' '; + *to++ = 'f'; + *to++ = 'a'; + *to++ = 'c'; + *to++ = 'e'; + *to++ = '='; + *to++ = '"'; + for (tok = token + 2; *tok; tok++) + *to++ = *tok; + *to++ = '"'; + *to++ = '>'; + continue; + case 'n': + *to++ = '<'; + *to++ = '/'; + *to++ = 'f'; + *to++ = 'o'; + *to++ = 'n'; + *to++ = 't'; + *to++ = '>'; + continue; + } + break; + case 'C': // special character tags + switch(token[1]) + { + case 'A': // ASCII value + *to++ = (char)atoi(&token[2]); + continue; + case 'G': + //*to++ = ' '; + continue; + case 'L': // line break + *to++ = '<'; + *to++ = 'b'; + *to++ = 'r'; + *to++ = ' '; + *to++ = '/'; + *to++ = '>'; + *to++ = ' '; + continue; + case 'M': // new paragraph + *to++ = '<'; + *to++ = 'b'; + *to++ = 'r'; + *to++ = ' '; + *to++ = '/'; + *to++ = '>'; + continue; + case 'T': + //*to++ = ' '; + continue; + } + break; + case 'J': //Justification + switch(token[1]) + { + case 'R': //right + *to++ = '<'; + *to++ = 'd'; + *to++ = 'i'; + *to++ = 'v'; + *to++ = ' '; + *to++ = 'a'; + *to++ = 'l'; + *to++ = 'i'; + *to++ = 'g'; + *to++ = 'n'; + *to++ = '='; + *to++ = '\"'; + *to++ = 'r'; + *to++ = 'i'; + *to++ = 'g'; + *to++ = 'h'; + *to++ = 't'; + *to++ = '\"'; + *to++ = '>'; + isRightJustified = true; + continue; + + case 'C': //center + *to++ = '<'; + *to++ = 'd'; + *to++ = 'i'; + *to++ = 'v'; + *to++ = ' '; + *to++ = 'a'; + *to++ = 'l'; + *to++ = 'i'; + *to++ = 'g'; + *to++ = 'n'; + *to++ = '='; + *to++ = '\"'; + *to++ = 'c'; + *to++ = 'e'; + *to++ = 'n'; + *to++ = 't'; + *to++ = 'e'; + *to++ = 'r'; + *to++ = '\"'; + *to++ = '>'; + isCentered = true; + continue; + + case 'L': //left, reset right and center + if (isCentered) { + *to++ = '<'; + *to++ = '/'; + *to++ = 'c'; + *to++ = 'e'; + *to++ = 'n'; + *to++ = 't'; + *to++ = 'e'; + *to++ = 'r'; + *to++ = '>'; + isCentered = false; + } + if (isRightJustified) { + *to++ = '<'; + *to++ = '/'; + *to++ = 'd'; + *to++ = 'i'; + *to++ = 'v'; + *to++ = '>'; + isRightJustified = false; + } + continue; + } + break; + case 'T': // title formatting + switch(token[1]) + { + case 'T': // Book title begin + *to++ = '<'; + *to++ = 'b'; + *to++ = 'i'; + *to++ = 'g'; + *to++ = '>'; + continue; + case 't': + *to++ = '<'; + *to++ = '/'; + *to++ = 'b'; + *to++ = 'i'; + *to++ = 'g'; + *to++ = '>'; + continue;/* + case 'S': + *to++ = '<'; + *to++ = 'b'; + *to++ = 'r'; + *to++ = ' '; + *to++ = '/'; + *to++ = '>'; + *to++ = '<'; + *to++ = 'b'; + *to++ = 'i'; + *to++ = 'g'; + *to++ = '>'; + continue; + case 's': + *to++ = '<'; + *to++ = '/'; + *to++ = 'b'; + *to++ = 'i'; + *to++ = 'g'; + *to++ = '>'; + *to++ = '<'; + *to++ = 'b'; + *to++ = 'r'; + *to++ = ' '; + *to++ = '/'; + *to++ = '>'; + continue;*/ + } + break; + + case 'P': // special formatting + switch(token[1]) + { + case 'P': // Poetry begin + *to++ = '<'; + *to++ = 'c'; + *to++ = 'i'; + *to++ = 't'; + *to++ = 'e'; + *to++ = '>'; + continue; + case 'p': + *to++ = '<'; + *to++ = '/'; + *to++ = 'c'; + *to++ = 'i'; + *to++ = 't'; + *to++ = 'e'; + *to++ = '>'; + continue; + } + break; + } + continue; + } + if (intoken) { + if (tokpos < 2045) { + token[tokpos++] = *from; + token[tokpos+2] = 0; + } + } + else + *to++ = *from; + } + *to++ = 0; + *to = 0; + return 0; +} diff --git a/src/modules/filters/gbfhtmlhref.cpp b/src/modules/filters/gbfhtmlhref.cpp new file mode 100644 index 0000000..30b27ba --- /dev/null +++ b/src/modules/filters/gbfhtmlhref.cpp @@ -0,0 +1,148 @@ +/*************************************************************************** + gbfhtmlhref.cpp - GBF to HTML filter with hrefs + for strongs and morph tags + ------------------- + begin : 2001-09-03 + copyright : 2001 by CrossWire Bible Society + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + ***************************************************************************/ + +#include +#include +#include + +GBFHTMLHREF::GBFHTMLHREF() { + setTokenStart("<"); + setTokenEnd(">"); + + setTokenCaseSensitive(true); + + addTokenSubstitute("Rf", ")"); + addTokenSubstitute("Rx", ""); + addTokenSubstitute("FI", ""); // italics begin + addTokenSubstitute("Fi", ""); + addTokenSubstitute("FB", ""); // bold begin + addTokenSubstitute("Fb", ""); + addTokenSubstitute("FR", ""); // words of Jesus begin + addTokenSubstitute("Fr", ""); + addTokenSubstitute("FU", ""); // underline begin + addTokenSubstitute("Fu", ""); + addTokenSubstitute("FO", ""); // Old Testament quote begin + addTokenSubstitute("Fo", ""); + addTokenSubstitute("FS", ""); // Superscript begin// Subscript begin + addTokenSubstitute("Fs", ""); + addTokenSubstitute("FV", ""); // Subscript begin + addTokenSubstitute("Fv", ""); + addTokenSubstitute("TT", ""); // Book title begin + addTokenSubstitute("Tt", ""); + addTokenSubstitute("PP", ""); // poetry begin + addTokenSubstitute("Pp", ""); + addTokenSubstitute("Fn", ""); // font end + addTokenSubstitute("CL", "
"); // new line + addTokenSubstitute("CM", "
"); // paragraph is a non showing comment that can be changed in the front end to

if desired + addTokenSubstitute("CG", ""); // ??? + addTokenSubstitute("CT", ""); // ??? + addTokenSubstitute("JR", "

"); // right align begin + addTokenSubstitute("JC", "
"); // center align begin + addTokenSubstitute("JL", "
"); // align end + +} + + +bool GBFHTMLHREF::handleToken(char **buf, const char *token, DualStringMap &userData) { + const char *tok; + + if (!substituteToken(buf, token)) { + if (!strncmp(token, "WG", 2) || !strncmp(token, "WH", 2)) { // strong's numbers + pushString(buf, " <>"); + } + + else if (!strncmp(token, "WTG", 3) || !strncmp(token, "WTH", 3)) { // strong's numbers tense + pushString(buf, " ()"); + } + + else if (!strncmp(token, "WT", 2) && strncmp(token, "WTH", 3) && strncmp(token, "WTG", 3)) { // morph tags + pushString(buf, " ()"); + } + + else if (!strncmp(token, "RX", 2)) { + pushString(buf, ""); + userData["hasFootnotePreTag"] = "true"; + } + + else if (!strncmp(token, "RF", 2)) { + if(userData["hasFootnotePreTag"] == "true") { + userData["hasFootnotePreTag"] = "false"; + pushString(buf, " "); + } + pushString(buf, " ("); + } + + else if (!strncmp(token, "FN", 2)) { + pushString(buf, " +#include +#include +#ifndef __GNUC__ +#else +#include +#endif + + +const char GBFMorph::on[] = "On"; +const char GBFMorph::off[] = "Off"; +const char GBFMorph::optName[] = "Morphological Tags"; +const char GBFMorph::optTip[] = "Toggles Morphological Tags On and Off if they exist"; + + +GBFMorph::GBFMorph() { + option = false; + options.push_back(on); + options.push_back(off); +} + + +GBFMorph::~GBFMorph() { +} + +void GBFMorph::setOptionValue(const char *ival) +{ + option = (!stricmp(ival, on)); +} + +const char *GBFMorph::getOptionValue() +{ + return (option) ? on:off; +} + +char GBFMorph::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) +{ + if (!option) { // if we don't want morph tags + char *to, *from, token[2048]; // cheese. Fix. + int tokpos = 0; + bool intoken = false; + int len; + bool lastspace = false; + + len = strlen(text) + 1; // shift string to right of buffer + if (len < maxlen) { + memmove(&text[maxlen - len], text, len); + from = &text[maxlen - len]; + } + else from = text; // ------------------------------- + + for (to = text; *from; from++) { + if (*from == '<') { + intoken = true; + tokpos = 0; + token[0] = 0; + token[1] = 0; + token[2] = 0; + continue; + } + if (*from == '>') { // process tokens + intoken = false; + if (*token == 'W' && token[1] == 'T') { // Morph + if ((from[1] == ' ') || (from[1] == ',') || (from[1] == ';') || (from[1] == '.') || (from[1] == '?') || (from[1] == '!') || (from[1] == ')') || (from[1] == '\'') || (from[1] == '\"')) { + if (lastspace) + to--; + } + continue; + } + // if not a morph tag token, keep token in text + *to++ = '<'; + for (char *tok = token; *tok; tok++) + *to++ = *tok; + *to++ = '>'; + continue; + } + if (intoken) { + if (tokpos < 2045) + token[tokpos++] = *from; + token[tokpos+2] = 0; + } + else { + *to++ = *from; + lastspace = (*from == ' '); + } + } + *to++ = 0; + *to = 0; + } + return 0; +} diff --git a/src/modules/filters/gbfplain.cpp b/src/modules/filters/gbfplain.cpp new file mode 100644 index 0000000..65766d3 --- /dev/null +++ b/src/modules/filters/gbfplain.cpp @@ -0,0 +1,106 @@ +/****************************************************************************** + * + * gbfplain - SWFilter decendant to strip out all GBF tags or convert to + * ASCII rendered symbols. + */ + + +#include +#include +#include + + +GBFPlain::GBFPlain() { +} + + +char GBFPlain::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) +{ + char *to, *from, token[2048]; + int tokpos = 0; + bool intoken = false; + int len; + + len = strlen(text) + 1; // shift string to right of buffer + if (len < maxlen) { + memmove(&text[maxlen - len], text, len); + from = &text[maxlen - len]; + } + else from = text; // ------------------------------- + + for (to = text; *from; from++) { + if (*from == '<') { + intoken = true; + tokpos = 0; + token[0] = 0; + token[1] = 0; + token[2] = 0; + continue; + } + if (*from == '>') { + intoken = false; + // process desired tokens + switch (*token) { + case 'W': // Strongs + switch(token[1]) { + case 'G': // Greek + case 'H': // Hebrew + case 'T': // Tense + *to++ = ' '; + *to++ = '<'; + for (char *tok = token + 2; *tok; tok++) + *to++ = *tok; + *to++ = '>'; + *to++ = ' '; + continue; + } + break; + case 'R': + switch(token[1]) { + case 'F': // footnote begin + *to++ = ' '; + *to++ = '['; + continue; + case 'f': // footnote end + *to++ = ']'; + *to++ = ' '; + continue; + } + break; + case 'C': + switch(token[1]) { + case 'A': // ASCII value + *to++ = (char)atoi(&token[2]); + continue; + case 'G': + *to++ = '>'; + continue; +/* Bug in WEB + case 'L': + *to++ = '<'; + continue; +*/ + case 'L': // Bug in WEB. Use above entry when fixed + case 'N': // new line + *to++ = '\n'; + continue; + case 'M': // new paragraph + *to++ = '\n'; + *to++ = '\n'; + continue; + } + break; + } + continue; + } + if (intoken) { + if (tokpos < 2045) + token[tokpos++] = *from; + token[tokpos+2] = 0; + } + else *to++ = *from; + } + *to++ = 0; + *to = 0; + return 0; +} diff --git a/src/modules/filters/gbfrtf.cpp b/src/modules/filters/gbfrtf.cpp new file mode 100644 index 0000000..5f7d064 --- /dev/null +++ b/src/modules/filters/gbfrtf.cpp @@ -0,0 +1,277 @@ +/****************************************************************************** + * + * gbfrtf - SWFilter decendant to convert all GBF tags to RTF tags + */ + + +#include +#include +#include +#include + +GBFRTF::GBFRTF() { +} + + +char GBFRTF::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) +{ + unsigned char *to, *from; + char token[2048]; + int tokpos = 0; + bool intoken = false; + int len; + const char *tok; + + len = strlen(text) + 1; // shift string to right of buffer + if (len < maxlen) { + memmove(&text[maxlen - len], text, len); + from = (unsigned char *)&text[maxlen - len]; + } + else from = (unsigned char *)text; // ------------------------------- + for (to = (unsigned char *)text; *from; from++) { + if (*from == '<') { + intoken = true; + tokpos = 0; + token[0] = 0; + token[1] = 0; + token[2] = 0; + continue; + } + if (*from == '>') { + intoken = false; + // process desired tokens + switch (*token) { + case 'W': // Strongs + switch(token[1]) { + case 'G': // Greek + case 'H': // Hebrew + *to++ = '{'; + *to++ = '\\'; + *to++ = 'f'; + *to++ = 's'; + *to++ = '1'; + *to++ = '7'; + *to++ = ' '; + *to++ = '<'; + for (tok = token + 2; *tok; tok++) + *to++ = *tok; + *to++ = '>'; + *to++ = '}'; + continue; + + case 'T': // Tense + *to++ = '{'; + *to++ = '\\'; + *to++ = 'f'; + *to++ = 's'; + *to++ = '1'; + *to++ = '7'; + *to++ = ' '; + *to++ = '('; + bool separate = false; + for (tok = token + 2; *tok; tok++) { + if (separate) { + *to++ = ';'; + *to++ = ' '; + separate = false; + } + switch (*tok) { + case 'G': + case 'H': + for (tok++; *tok; tok++) { + if (isdigit(*tok)) { + *to++ = *tok; + separate = true; + } + else { + tok--; + break; + } + } + break; + default: + for (; *tok; tok++) { + *to++ = *tok; + } + } + } + *to++ = ')'; + *to++ = '}'; + continue; + } + break; + case 'R': + switch(token[1]) { + case 'X': + *to++ = '#'; + continue; + case 'x': + *to++ = '|'; + continue; + case 'F': // footnote begin + *to++ = '{'; + *to++ = '\\'; + *to++ = 'i'; + *to++ = '1'; + *to++ = ' '; + *to++ = '\\'; + *to++ = 'f'; + *to++ = 's'; + *to++ = '1'; + *to++ = '7'; + *to++ = ' '; + *to++ = '('; + continue; + case 'f': // footnote end + *to++ = ')'; + *to++ = ' '; + *to++ = '}'; + continue; + } + break; + case 'F': // font tags + switch(token[1]) { + case 'I': // italic start + *to++ = '\\'; + *to++ = 'i'; + *to++ = '1'; + *to++ = ' '; + continue; + case 'i': // italic end + *to++ = '\\'; + *to++ = 'i'; + *to++ = '0'; + *to++ = ' '; + continue; + case 'B': // bold start + *to++ = '\\'; + *to++ = 'b'; + *to++ = '1'; + *to++ = ' '; + continue; + case 'b': // bold end + *to++ = '\\'; + *to++ = 'b'; + *to++ = '0'; + *to++ = ' '; + continue; + case 'N': + *to++ = '{'; + if (!strnicmp(token+2, "Symbol", 6)) { + *to++ = '\\'; + *to++ = 'f'; + *to++ = '7'; + *to++ = ' '; + } + continue; + case 'n': + *to++ = '}'; + continue; + case 'S': + *to++ = '{'; + *to++ = '\\'; + *to++ = 's'; + *to++ = 'u'; + *to++ = 'p'; + *to++ = 'e'; + *to++ = 'r'; + *to++ = ' '; + continue; + case 's': + *to++ = '}'; + continue; + case 'R': + *to++ = '{'; + *to++ = '\\'; + *to++ = 'c'; + *to++ = 'f'; + *to++ = '6'; + *to++ = ' '; + continue; + case 'r': + *to++ = '}'; + continue; + } + break; + case 'C': // special character tags + switch(token[1]) { + case 'A': // ASCII value + *to++ = (char)atoi(&token[2]); + continue; + case 'G': + *to++ = '>'; + continue; + case 'L': // line break + *to++ = '\\'; + *to++ = 'l'; + *to++ = 'i'; + *to++ = 'n'; + *to++ = 'e'; + *to++ = ' '; + continue; + case 'M': // new paragraph + *to++ = '\\'; + *to++ = 'p'; + *to++ = 'a'; + *to++ = 'r'; + *to++ = ' '; + continue; + case 'T': + *to++ = '<'; + } + break; + case 'T': // title formatting + switch(token[1]) + { + case 'T': // Book title begin + *to++ = '{'; + *to++ = '\\'; + *to++ = 'f'; + *to++ = 's'; + *to++ = '2'; + *to++ = '2'; + *to++ = ' '; + continue; + case 't': + *to++ = '}'; + continue; + case 'S': + *to++ = '\\'; + *to++ = 'p'; + *to++ = 'a'; + *to++ = 'r'; + *to++ = ' '; + *to++ = '{'; + *to++ = '\\'; + *to++ = 'i'; + *to++ = '1'; + *to++ = '\\'; + *to++ = 'b'; + *to++ = '1'; + *to++ = ' '; + continue; + case 's': + *to++ = '}'; + *to++ = '\\'; + *to++ = 'p'; + *to++ = 'a'; + *to++ = 'r'; + *to++ = ' '; + continue; + } + break; + + } + continue; + } + if (intoken) { + if (tokpos < 2045) + token[tokpos++] = *from; + token[tokpos+2] = 0; + } + else *to++ = *from; + } + *to++ = 0; + *to = 0; + return 0; +} diff --git a/src/modules/filters/gbfstrongs.cpp b/src/modules/filters/gbfstrongs.cpp new file mode 100644 index 0000000..40fc958 --- /dev/null +++ b/src/modules/filters/gbfstrongs.cpp @@ -0,0 +1,98 @@ +/****************************************************************************** + * + * gbfstrongs - SWFilter decendant to hide or show strongs number + * in a GBF module. + */ + + +#include +#include +#include +#ifndef __GNUC__ +#else +#include +#endif + + +const char GBFStrongs::on[] = "On"; +const char GBFStrongs::off[] = "Off"; +const char GBFStrongs::optName[] = "Strong's Numbers"; +const char GBFStrongs::optTip[] = "Toggles Strong's Numbers On and Off if they exist"; + + +GBFStrongs::GBFStrongs() { + option = false; + options.push_back(on); + options.push_back(off); +} + + +GBFStrongs::~GBFStrongs() { +} + +void GBFStrongs::setOptionValue(const char *ival) +{ + option = (!stricmp(ival, on)); +} + +const char *GBFStrongs::getOptionValue() +{ + return (option) ? on:off; +} + +char GBFStrongs::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) +{ + if (!option) { // if we don't want strongs + char *to, *from, token[2048]; // cheese. Fix. + int tokpos = 0; + bool intoken = false; + int len; + bool lastspace = false; + + len = strlen(text) + 1; // shift string to right of buffer + if (len < maxlen) { + memmove(&text[maxlen - len], text, len); + from = &text[maxlen - len]; + } + else from = text; // ------------------------------- + + for (to = text; *from; from++) { + if (*from == '<') { + intoken = true; + tokpos = 0; + token[0] = 0; + token[1] = 0; + token[2] = 0; + continue; + } + if (*from == '>') { // process tokens + intoken = false; + if (*token == 'W' && (token[1] == 'G' || token[1] == 'H')) { // Strongs + if ((from[1] == ' ') || (from[1] == ',') || (from[1] == ';') || (from[1] == '.') || (from[1] == '?') || (from[1] == '!') || (from[1] == ')') || (from[1] == '\'') || (from[1] == '\"')) { + if (lastspace) + to--; + } + continue; + } + // if not a strongs token, keep token in text + *to++ = '<'; + for (char *tok = token; *tok; tok++) + *to++ = *tok; + *to++ = '>'; + continue; + } + if (intoken) { + if (tokpos < 2045) + token[tokpos++] = *from; + token[tokpos+2] = 0; + } + else { + *to++ = *from; + lastspace = (*from == ' '); + } + } + *to++ = 0; + *to = 0; + } + return 0; +} diff --git a/src/modules/filters/gbfthml.cpp b/src/modules/filters/gbfthml.cpp new file mode 100644 index 0000000..ca03e71 --- /dev/null +++ b/src/modules/filters/gbfthml.cpp @@ -0,0 +1,463 @@ +/*************************************************************************** + gbfthml.cpp - GBF to ThML filter + ------------------- + begin : 1999-10-27 + copyright : 2001 by CrossWire Bible Society + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + ***************************************************************************/ + +#include +#include +#include + + +GBFThML::GBFThML() +{ +} + + +char GBFThML::ProcessText(char *text, int maxlen) +{ + char *to, *from, token[2048]; + int tokpos = 0; + bool intoken = false; + int len; + const char *tok; + + len = strlen(text) + 1; // shift string to right of buffer + if (len < maxlen) { + memmove(&text[maxlen - len], text, len); + from = &text[maxlen - len]; + } + else from = text; // ------------------------------- + + for (to = text; *from; from++) + { + if (*from == '<') { + intoken = true; + tokpos = 0; + token[0] = 0; + token[1] = 0; + token[2] = 0; + continue; + } + if (*from == '>') + { + intoken = false; + // process desired tokens + switch (*token) { + case 'W': // Strongs + switch(token[1]) { + case 'G': + case 'H': + *to++ = '<'; + *to++ = 's'; + *to++ = 'y'; + *to++ = 'n'; + *to++ = 'c'; + *to++ = ' '; + *to++ = 't'; + *to++ = 'y'; + *to++ = 'p'; + *to++ = 'e'; + *to++ = '='; + *to++ = '"'; + *to++ = 'S'; + *to++ = 't'; + *to++ = 'r'; + *to++ = 'o'; + *to++ = 'n'; + *to++ = 'g'; + *to++ = 's'; + *to++ = '"'; + *to++ = ' '; + *to++ = 'v'; + *to++ = 'a'; + *to++ = 'l'; + *to++ = 'u'; + *to++ = 'e'; + *to++ = '='; + *to++ = '"'; + for (tok = token + 1; *tok; tok++) + *to++ = *tok; + *to++ = '"'; + *to++ = ' '; + *to++ = '/'; + *to++ = '>'; + continue; + + case 'T': // Tense + *to++ = '<'; + *to++ = 's'; + *to++ = 'y'; + *to++ = 'n'; + *to++ = 'c'; + *to++ = ' '; + *to++ = 't'; + *to++ = 'y'; + *to++ = 'p'; + *to++ = 'e'; + *to++ = '='; + *to++ = '"'; + *to++ = 'M'; + *to++ = 'o'; + *to++ = 'r'; + *to++ = 'p'; + *to++ = 'h'; + *to++ = '"'; + *to++ = ' '; + *to++ = 'v'; + *to++ = 'a'; + *to++ = 'l'; + *to++ = 'u'; + *to++ = 'e'; + *to++ = '='; + *to++ = '"'; + for (tok = token + 2; *tok; tok++) + *to++ = *tok; + *to++ = '"'; + *to++ = ' '; + *to++ = '/'; + *to++ = '>'; + continue; + } + break; + case 'R': + switch(token[1]) + { + case 'X': + *to++ = '<'; + *to++ = 'a'; + *to++ = ' '; + *to++ = 'h'; + *to++ = 'r'; + *to++ = 'e'; + *to++ = 'f'; + *to++ = '='; + *to++ = '\"'; + for (tok = token + 3; *tok; tok++) { + if(*tok != '<' && *tok+1 != 'R' && *tok+2 != 'x') { + *to++ = *tok; + } + else { + break; + } + } + *to++ = '\"'; + *to++ = '>'; + continue; + case 'x': + *to++ = '<'; + *to++ = '/'; + *to++ = 'a'; + *to++ = '>'; + continue; + case 'F': // footnote begin + *to++ = '<'; + *to++ = 'n'; + *to++ = 'o'; + *to++ = 't'; + *to++ = 'e'; + *to++ = ' '; + *to++ = 'p'; + *to++ = 'l'; + *to++ = 'a'; + *to++ = 'c'; + *to++ = 'e'; + *to++ = '='; + *to++ = '"'; + *to++ = 'f'; + *to++ = 'o'; + *to++ = 'o'; + *to++ = 't'; + *to++ = '"'; + *to++ = '>'; + continue; + case 'f': // footnote end + *to++ = '<'; + *to++ = '/'; + *to++ = 'n'; + *to++ = 'o'; + *to++ = 't'; + *to++ = 'e'; + *to++ = '>'; + continue; + } + break; + case 'F': // font tags + switch(token[1]) + { + case 'N': + *to++ = '<'; + *to++ = 'f'; + *to++ = 'o'; + *to++ = 'n'; + *to++ = 't'; + *to++ = ' '; + *to++ = 'f'; + *to++ = 'a'; + *to++ = 'c'; + *to++ = 'e'; + *to++ = '='; + *to++ = '"'; + for (tok = token + 2; *tok; tok++) + *to++ = *tok; + *to++ = '"'; + *to++ = '>'; + continue; + case 'n': + *to++ = '<'; + *to++ = '/'; + *to++ = 'f'; + *to++ = 'o'; + *to++ = 'n'; + *to++ = 't'; + *to++ = '>'; + continue; + case 'I': // italic start + *to++ = '<'; + *to++ = 'i'; + *to++ = '>'; + continue; + case 'i': // italic end + *to++ = '<'; + *to++ = '/'; + *to++ = 'i'; + *to++ = '>'; + continue; + case 'B': // bold start + *to++ = '<'; + *to++ = 'b'; + *to++ = '>'; + continue; + case 'b': // bold end + *to++ = '<'; + *to++ = '/'; + *to++ = 'b'; + *to++ = '>'; + continue; + + case 'R': // words of Jesus begin + *to++ = '<'; + *to++ = 'f'; + *to++ = 'o'; + *to++ = 'n'; + *to++ = 't'; + *to++ = ' '; + *to++ = 'c'; + *to++ = 'o'; + *to++ = 'l'; + *to++ = 'o'; + *to++ = 'r'; + *to++ = '='; + *to++ = '\"'; + *to++ = '#'; + *to++ = 'f'; + *to++ = 'f'; + *to++ = '0'; + *to++ = '0'; + *to++ = '0'; + *to++ = '0'; + *to++ = '\"'; + *to++ = '>'; + continue; + case 'r': // words of Jesus end + *to++ = '<'; + *to++ = '/'; + *to++ = 'f'; + *to++ = 'o'; + *to++ = 'n'; + *to++ = 't'; + *to++ = '>'; + continue; + case 'U': // Underline start + *to++ = '<'; + *to++ = 'u'; + *to++ = '>'; + continue; + case 'u': // Underline end + *to++ = '<'; + *to++ = '/'; + *to++ = 'u'; + *to++ = '>'; + continue; + case 'O': // Old Testament quote begin + *to++ = '<'; + *to++ = 'c'; + *to++ = 'i'; + *to++ = 't'; + *to++ = 'e'; + *to++ = '>'; + continue; + case 'o': // Old Testament quote end + *to++ = '<'; + *to++ = '/'; + *to++ = 'c'; + *to++ = 'i'; + *to++ = 't'; + *to++ = 'e'; + *to++ = '>'; + continue; + case 'S': // Superscript begin + *to++ = '<'; + *to++ = 's'; + *to++ = 'u'; + *to++ = 'p'; + *to++ = '>'; + continue; + case 's': // Superscript end + *to++ = '<'; + *to++ = '/'; + *to++ = 's'; + *to++ = 'u'; + *to++ = 'p'; + *to++ = '>'; + continue; + case 'V': // Subscript begin + *to++ = '<'; + *to++ = 's'; + *to++ = 'u'; + *to++ = 'b'; + *to++ = '>'; + continue; + case 'v': // Subscript end + *to++ = '<'; + *to++ = '/'; + *to++ = 's'; + *to++ = 'u'; + *to++ = 'b'; + *to++ = '>'; + continue; + } + break; + case 'C': // special character tags + switch(token[1]) + { + case 'A': // ASCII value + *to++ = (char)atoi(&token[2]); + continue; + case 'G': + //*to++ = ' '; + continue; + case 'L': // line break + *to++ = '<'; + *to++ = 'b'; + *to++ = 'r'; + *to++ = ' '; + *to++ = '/'; + *to++ = '>'; + *to++ = ' '; + continue; + case 'M': // new paragraph + *to++ = '<'; + *to++ = 'p'; + *to++ = ' '; + *to++ = '/'; + *to++ = '>'; + continue; + case 'T': + //*to++ = ' '; + continue; + } + break; + case 'T': // title formatting + switch(token[1]) + { + case 'T': // Book title begin + *to++ = '<'; + *to++ = 'b'; + *to++ = 'i'; + *to++ = 'g'; + *to++ = '>'; + continue; + case 't': + *to++ = '<'; + *to++ = '/'; + *to++ = 'b'; + *to++ = 'i'; + *to++ = 'g'; + *to++ = '>'; + continue; + case 'S': + *to++ = '<'; + *to++ = 'd'; + *to++ = 'i'; + *to++ = 'v'; + *to++ = ' '; + *to++ = 'c'; + *to++ = 'l'; + *to++ = 'a'; + *to++ = 's'; + *to++ = 's'; + *to++ = '='; + *to++ = '\"'; + *to++ = 's'; + *to++ = 'e'; + *to++ = 'c'; + *to++ = 'h'; + *to++ = 'e'; + *to++ = 'a'; + *to++ = 'd'; + *to++ = '\"'; + *to++ = '>'; + continue; + case 's': + *to++ = '<'; + *to++ = '/'; + *to++ = 'd'; + *to++ = 'i'; + *to++ = 'v'; + *to++ = '>'; + continue; + } + break; + + case 'P': // special formatting + switch(token[1]) + { + case 'P': // Poetry begin + *to++ = '<'; + *to++ = 'v'; + *to++ = 'e'; + *to++ = 'r'; + *to++ = 's'; + *to++ = 'e'; + *to++ = '>'; + continue; + case 'p': + *to++ = '<'; + *to++ = '/'; + *to++ = 'v'; + *to++ = 'e'; + *to++ = 'r'; + *to++ = 's'; + *to++ = 'e'; + *to++ = '>'; + continue; + } + break; + } + continue; + } + if (intoken) { + if (tokpos < 2045) + token[tokpos++] = *from; + token[tokpos+2] = 0; + } + else *to++ = *from; + } + *to++ = 0; + *to = 0; + return 0; +} + + + diff --git a/src/modules/filters/greeklexattribs.cpp b/src/modules/filters/greeklexattribs.cpp new file mode 100644 index 0000000..fb166df --- /dev/null +++ b/src/modules/filters/greeklexattribs.cpp @@ -0,0 +1,58 @@ +/****************************************************************************** + * + * greeklexattribs - SWFilter decendant to set entry attributes for greek + * lexicons + */ + + +#include +#include +#include +#include + + +GreekLexAttribs::GreekLexAttribs() { +} + + +char GreekLexAttribs::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) { + + if (module->isProcessEntryAttributes()) { + char *from; + bool inAV = false; + string phrase; + string freq; + char *currentPhrase = 0; + + + for (from = text; *from; from++) { + if (inAV) { + if (currentPhrase == 0) { + if (isalpha(*from)) + currentPhrase = from; + } + else { + if ((!isalpha(*from)) && (*from != ' ')) { + phrase = ""; + phrase.append(currentPhrase, (int)(from - currentPhrase)-1); + currentPhrase = from; + while (*from && isdigit(*from)) from++; + freq = ""; + freq.append(currentPhrase, (int)(from - currentPhrase)); + module->getEntryAttributes()["AVPhrase"][phrase]["Frequency"] = freq; + currentPhrase = 0; + } + } + if (*from == ';') inAV = false; + + } + else if (!strncmp(from, "AV-", 3)) { + inAV = true; + from+=2; + } + } + } + return 0; +} + + diff --git a/src/modules/filters/latin1utf16.cpp b/src/modules/filters/latin1utf16.cpp new file mode 100644 index 0000000..75ee998 --- /dev/null +++ b/src/modules/filters/latin1utf16.cpp @@ -0,0 +1,120 @@ +/****************************************************************************** + * + * Latin1UTF16 - SWFilter decendant to convert a Latin-1 character to UTF-16 + * + */ + + +#include +#include +#include + +Latin1UTF16::Latin1UTF16() { +} + + +char Latin1UTF16::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) +{ + unsigned char *from; + unsigned short *to; + int len; + + len = strlen(text) + 1; // shift string to right of buffer + if (len < maxlen) { + memmove(&text[maxlen - len], text, len); + from = (unsigned char*)&text[maxlen - len]; + } + else + from = (unsigned char*)text; + // ------------------------------- + + for (to = (unsigned short*)text; *from; from++) { + switch (*from) { + case 0x80: // '€' + *to++ = 0x20AC; + break; + case 0x82: // '‚' + *to++ = 0x201A; + break; + case 0x83: // 'ƒ' + *to++ = 0x0192; + break; + case 0x84: // '„' + *to++ = 0x201E; + break; + case 0x85: // '…' + *to++ = 0x2026; + break; + case 0x86: // '†' + *to++ = 0x2020; + break; + case 0x87: // '‡' + *to++ = 0x2021; + break; + case 0x88: // 'ˆ' + *to++ = 0x02C6; + break; + case 0x89: // '‰' + *to++ = 0x2030; + break; + case 0x8A: // 'Š' + *to++ = 0x0160; + break; + case 0x8B: // '‹' + *to++ = 0x2039; + break; + case 0x8C: // 'Œ' + *to++ = 0x0152; + break; + case 0x8E: // 'Ž' + *to++ = 0x017D; + break; + case 0x91: // '‘' + *to++ = 0x2018; + break; + case 0x92: // '’' + *to++ = 0x2019; + break; + case 0x93: // '“' + *to++ = 0x201C; + break; + case 0x94: // '”' + *to++ = 0x201D; + break; + case 0x95: // '•' + *to++ = 0x2022; + break; + case 0x96: // '–' + *to++ = 0x2013; + break; + case 0x97: // '—' + *to++ = 0x2014; + break; + case 0x98: // '˜' + *to++ = 0x02DC; + break; + case 0x99: // '™' + *to++ = 0x2122; + break; + case 0x9A: // 'š' + *to++ = 0x0161; + break; + case 0x9B: // '›' + *to++ = 0x203A; + break; + case 0x9C: // 'œ' + *to++ = 0x0153; + break; + case 0x9E: // 'ž' + *to++ = 0x017E; + break; + case 0x9F: // 'Ÿ' + *to++ = 0x0178; + break; + default: + *to++ = (unsigned short)*from; + } + } + *to = 0; + return 0; +} diff --git a/src/modules/filters/latin1utf8.cpp b/src/modules/filters/latin1utf8.cpp new file mode 100644 index 0000000..91af8dc --- /dev/null +++ b/src/modules/filters/latin1utf8.cpp @@ -0,0 +1,179 @@ +/****************************************************************************** + * + * Latin1UTF8 - SWFilter decendant to convert a Latin-1 character to UTF-8 + * + */ + + +#include +#include +#include +#include + +Latin1UTF8::Latin1UTF8() { +} + + +char Latin1UTF8::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) +{ + unsigned char *to, *from; + int len; + + len = strlen(text) + 1; + if (len == maxlen + 1) + maxlen = (maxlen + 1) * FILTERPAD; + // shift string to right of buffer + if (len < maxlen) { + memmove(&text[maxlen - len], text, len); + from = (unsigned char*)&text[maxlen - len]; + } + else + from = (unsigned char*)text; // ------------------------------- + + + + for (to = (unsigned char*)text; *from; from++) { + if (*from < 0x80) { + *to++ = *from; + } + else if (*from < 0xc0) { + switch(*from) { + case 0x80: // '€' + *to++ = 0xe2; // 'â' + *to++ = 0x82; // '‚' + *to++ = 0xac; // '¬' + break; + case 0x82: // '‚' + *to++ = 0xe2; // 'â' + *to++ = 0x80; // '€' + *to++ = 0x9a; // 'š' + break; + case 0x83: // 'ƒ' + *to++ = 0xc6; // 'Æ' + *to++ = 0x92; // '’' + break; + case 0x84: // '„' + *to++ = 0xe2; // 'â' + *to++ = 0x80; // '€' + *to++ = 0x9e; // 'ž' + break; + case 0x85: // '…' + *to++ = 0xe2; // 'â' + *to++ = 0x80; // '€' + *to++ = 0xa6; // '¦' + break; + case 0x86: // '†' + *to++ = 0xe2; // 'â' + *to++ = 0x80; // '€' + *to++ = 0xa0; // ' ' + break; + case 0x87: // '‡' + *to++ = 0xe2; // 'â' + *to++ = 0x80; // '€' + *to++ = 0xa1; // '¡' + break; + case 0x88: // 'ˆ' + *to++ = 0xcb; // 'Ë' + *to++ = 0x86; // '†' + break; + case 0x89: // '‰' + *to++ = 0xe2; // 'â' + *to++ = 0x80; // '€' + *to++ = 0xb0; // '°' + break; + case 0x8A: // 'Š' + *to++ = 0xc5; // 'Å' + *to++ = 0xa0; // ' ' + break; + case 0x8B: // '‹' + *to++ = 0xe2; // 'â' + *to++ = 0x80; // '€' + *to++ = 0xb9; // '¹' + break; + case 0x8C: // 'Œ' + *to++ = 0xc5; // 'Å' + *to++ = 0x92; // '’' + break; + case 0x8E: // 'Ž' + *to++ = 0xc5; // 'Å' + *to++ = 0xbd; // '½' + break; + case 0x91: // '‘' + *to++ = 0xe2; // 'â' + *to++ = 0x80; // '€' + *to++ = 0x98; // '˜' + break; + case 0x92: // '’' + *to++ = 0xe2; // 'â' + *to++ = 0x80; // '€' + *to++ = 0x99; // '™' + break; + case 0x93: // '“' + *to++ = 0xe2; // 'â' + *to++ = 0x80; // '€' + *to++ = 0x9c; // 'œ' + break; + case 0x94: // '”' + *to++ = 0xe2; // 'â' + *to++ = 0x80; // '€' + *to++ = 0x9d; // '' + break; + case 0x95: // '•' + *to++ = 0xe2; // 'â' + *to++ = 0x80; // '€' + *to++ = 0xa2; // '¢' + break; + case 0x96: // '–' + *to++ = 0xe2; // 'â' + *to++ = 0x80; // '€' + *to++ = 0x93; // '“' + break; + case 0x97: // '—' + *to++ = 0xe2; // 'â' + *to++ = 0x80; // '€' + *to++ = 0x94; // '”' + break; + case 0x98: // '˜' + *to++ = 0xcb; // 'Ë' + *to++ = 0x9c; // 'œ' + break; + case 0x99: // '™' + *to++ = 0xe2; // 'â' + *to++ = 0x84; // '„' + *to++ = 0xa2; // '¢' + break; + case 0x9A: // 'š' + *to++ = 0xc5; // 'Å' + *to++ = 0xa1; // '¡' + break; + case 0x9B: // '›' + *to++ = 0xe2; // 'â' + *to++ = 0x80; // '€' + *to++ = 0xba; // 'º' + break; + case 0x9C: // 'œ' + *to++ = 0xc5; // 'Å' + *to++ = 0x93; // '“' + break; + case 0x9E: // 'ž' + *to++ = 0xc5; // 'Å' + *to++ = 0xbe; // '¾' + break; + case 0x9F: // 'Ÿ' + *to++ = 0xc5; // 'Å' + *to++ = 0xb8; // '¸' + break; + default: + *to++ = 0xC2; + *to++ = *from; + } + } + else { + *to++ = 0xC3; + *to++ = (*from - 0x40); + } + } + *to++ = 0; + *to = 0; + return 0; +} diff --git a/src/modules/filters/plainfootnotes.cpp b/src/modules/filters/plainfootnotes.cpp new file mode 100644 index 0000000..96fc4d8 --- /dev/null +++ b/src/modules/filters/plainfootnotes.cpp @@ -0,0 +1,102 @@ +/*************************************************************************** + plainfootnotes.cpp - description + ------------------- + begin : Wed Oct 13 1999 + copyright : (C) 1999 by The team of BibleTime + email : info@bibletime.de + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + ***************************************************************************/ + +#include +#include + +#include +#include +#ifndef __GNUC__ +#else +#include +#endif + +const char PLAINFootnotes::on[] = "On"; +const char PLAINFootnotes::off[] = "Off"; +const char PLAINFootnotes::optName[] = "Footnotes"; +const char PLAINFootnotes::optTip[] = "Toggles Footnotes On and Off In Bible Texts If They Exist"; + +PLAINFootnotes::PLAINFootnotes(){ + option = false; + options.push_back(on); + options.push_back(off); +} + +PLAINFootnotes::~PLAINFootnotes(){ +} + + +void PLAINFootnotes::setOptionValue(const char *ival) +{ + option = (!stricmp(ival, on)); +} + +const char *PLAINFootnotes::getOptionValue() +{ + return (option) ? on:off; +} + + +char PLAINFootnotes::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) +{ + char token[2048]; + int tokpos = 0; + bool intoken = false; + bool lastspace = false; + + if (!option) { // if we don't want footnotes + char *to, *from; + int len; + bool hide = false; + + len = strlen(text) + 1; // shift string to right of buffer + if (len < maxlen) + { + memmove(&text[maxlen - len], text, len); + from = &text[maxlen - len]; + } + else from = text; // ------------------------------- + + for (to = text; *from; from++) { + if (*from == '{') // Footnote start + { + hide = true; + continue; + } + if (*from == '}') // Footnote end + { + hide=false; + continue; + } + if (intoken) { + if (tokpos < 2045) + token[tokpos++] = *from; + token[tokpos+2] = 0; + } + else { + if (!hide) { + *to++ = *from; + lastspace = (*from == ' '); + } + } + } + *to++ = 0; + *to = 0; + } + return 0; +} + diff --git a/src/modules/filters/plainhtml.cpp b/src/modules/filters/plainhtml.cpp new file mode 100644 index 0000000..fefb029 --- /dev/null +++ b/src/modules/filters/plainhtml.cpp @@ -0,0 +1,134 @@ +/*************************************************************************** + rwphtml.cpp - description + ------------------- + begin : Thu Jun 24 1999 + copyright : (C) 1999 by Torsten Uhlmann + email : TUhlmann@gmx.de + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + ***************************************************************************/ + +#include +#include +#include + + +PLAINHTML::PLAINHTML() +{ +} + + +char PLAINHTML::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) +{ + char *to, *from; + int len; + int count = 0; + + len = strlen(text) + 1; // shift string to right of buffer + if (len < maxlen) { + memmove(&text[maxlen - len], text, len); + from = &text[maxlen - len]; + } + else from = text; // ------------------------------- + for (to = text; *from; from++) + { + if ((*from == '\n') && (from[1] == '\n')) // paragraph + { + *to++ = '<'; + *to++ = 'P'; + *to++ = '>'; + from++; + continue; + } else { + if ((*from == '\n')) // && (from[1] != '\n')) // new line + { + *to++ = '<'; + *to++ = 'B'; + *to++ = 'R'; + *to++ = '>'; + continue; + } + } + + if (*from == '{') { + *to++ = '<'; + *to++ = 'F'; + *to++ = 'O'; + *to++ = 'N'; + *to++ = 'T'; + *to++ = ' '; + *to++ = 'C'; + *to++ = 'O'; + *to++ = 'L'; + *to++ = 'O'; + *to++ = 'R'; + *to++ = '='; + *to++ = '#'; + *to++ = '8'; + *to++ = '0'; + *to++ = '0'; + *to++ = '0'; + *to++ = '0'; + *to++ = '0'; + *to++ = '>'; + + *to++ = '<'; + *to++ = 'S'; + *to++ = 'M'; + *to++ = 'A'; + *to++ = 'L'; + *to++ = 'L'; + *to++ = '>'; + *to++ = ' '; + *to++ = '('; + continue; + } + + if (*from == '}') + { + *to++ = ')'; + *to++ = ' '; + *to++ = '<'; + *to++ = '/'; + *to++ = 'S'; + *to++ = 'M'; + *to++ = 'A'; + *to++ = 'L'; + *to++ = 'L'; + *to++ = '>'; + + *to++ = '<'; + *to++ = '/'; + *to++ = 'F'; + *to++ = 'O'; + *to++ = 'N'; + *to++ = 'T'; + *to++ = '>'; + continue; + } + + if ((*from == ' ') && (count > 5000)) + { + *to++ = '<'; + *to++ = 'W'; + *to++ = 'B'; + *to++ = 'R'; + *to++ = '>'; + count = 0; + continue; + } + + *to++ = *from; + count++; + } + *to++ = 0; + *to = 0; + return 0; +} diff --git a/src/modules/filters/rtfhtml.cpp b/src/modules/filters/rtfhtml.cpp new file mode 100644 index 0000000..f0b842b --- /dev/null +++ b/src/modules/filters/rtfhtml.cpp @@ -0,0 +1,99 @@ +/*************************************************************************** + rtfhtml.cpp - description + ------------------- + begin : Wed Oct 13 1999 + copyright : (C) 1999 by The team of BibleTime + email : info@bibletime.de + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + ***************************************************************************/ + +#include +#include +#include + + +RTFHTML::RTFHTML() { + +} + + +char RTFHTML::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) +{ + char *to, *from; + int len; + bool center = false; + + len = strlen(text) + 1; // shift string to right of buffer + if (len < maxlen) { + memmove(&text[maxlen - len], text, len); + from = &text[maxlen - len]; + } + else from = text; // ------------------------------- + for (to = text; *from; from++) { + if (*from == '\\') // a RTF command + { + if ((from[1] == 'p') && (from[2] == 'a') && (from[3] == 'r') && (from[4] == 'd')) + { // switch all modifier off + if (center) + { + *to++ = '<'; + *to++ = '/'; + *to++ = 'C'; + *to++ = 'E'; + *to++ = 'N'; + *to++ = 'T'; + *to++ = 'E'; + *to++ = 'R'; + *to++ = '>'; + center = false; + } + from += 4; + continue; + } + if ((from[1] == 'p') && (from[2] == 'a') && (from[3] == 'r')) + { + *to++ = '<'; + *to++ = 'P'; + *to++ = '>'; + *to++ = '\n'; + from += 3; + continue; + } + if (from[1] == ' ') + { + from += 1; + continue; + } + if ((from[1] == 'q') && (from[2] == 'c')) // center on + { + if (!center) + { + *to++ = '<'; + *to++ = 'C'; + *to++ = 'E'; + *to++ = 'N'; + *to++ = 'T'; + *to++ = 'E'; + *to++ = 'R'; + *to++ = '>'; + center = true; + } + from += 2; + continue; + } + } + + *to++ = *from; + } + *to++ = 0; + *to = 0; + return 0; +} diff --git a/src/modules/filters/rwphtml.cpp b/src/modules/filters/rwphtml.cpp new file mode 100644 index 0000000..6f8ae4f --- /dev/null +++ b/src/modules/filters/rwphtml.cpp @@ -0,0 +1,187 @@ +/*************************************************************************** + rwphtml.cpp - description + ------------------- + begin : Thu Jun 24 1999 + copyright : (C) 1999 by Torsten Uhlmann + email : TUhlmann@gmx.de + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + ***************************************************************************/ + +#include +#include +#include +#include + +RWPHTML::RWPHTML() +{ +} + + +char RWPHTML::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) +{ + char *to, *from; + signed char greek_str[500]; + bool inverse = false; + bool first_letter = false; + int len; + + len = strlen(text) + 1; // shift string to right of buffer + if (len < maxlen) { + memmove(&text[maxlen - len], text, len); + from = &text[maxlen - len]; + } else + from = text; + for (to = text; *from; from++) { + if (*from == '\\') { + ++from; + int i=0; + first_letter = true; + greek_str[0] = '\0'; + while (*from != '\\') { /* get the greek word or phrase */ + greek_str[i++] = *from; + greek_str[i + 1] = '\0'; + from++; + } /* convert to symbol font as best we can */ + strcpy(to," "); + to += strlen(to); + for (int j = 0; j < i; j++) { + if ((first_letter) + && (greek_str[j] == 'h')) { + if (greek_str[j + 1] == 'o') { + *to++ = 'o'; + first_letter = false; + ++j; + continue; + } else if (greek_str[j + 1] == 'a') { + *to++ = 'a'; + first_letter = false; + ++j; + continue; + } else if (greek_str[j + 1] == 'w') { + *to++ = 'w'; + first_letter = false; + ++j; + continue; + } else if (greek_str[j + 1] == 'u') { + *to++ = 'u'; + first_letter = false; + ++j; + continue; + } else if (greek_str[j + 1] == + -109) { + *to++ = 'w'; + first_letter = false; + ++j; + continue; + } else if (greek_str[j + 1] == + -120) { + *to++ = 'h'; + first_letter = false; + ++j; + continue; + } else if (greek_str[j + 1] == 'i') { + *to++ = 'i'; + first_letter = false; + ++j; + continue; + }else if (greek_str[j + 1] == 'e') { + *to++ = 'e'; + first_letter = false; + ++j; + continue; + } + first_letter = false; + } + if ((greek_str[j] == 't') + && (greek_str[j + 1] == 'h')) { + *to++ = 'q'; + ++j; + continue; + } + if ((greek_str[j] == 'c') + && (greek_str[j + 1] == 'h')) { + *to++ = 'c'; + ++j; + continue; + } + if ((greek_str[j] == 'p') + && (greek_str[j + 1] == 'h')) { + ++j; + *to++ = 'f'; + continue; + } + if (greek_str[j] == -120) { + *to++ = 'h'; + continue; + } + if (greek_str[j] == -125) { + *to++ = 'a'; + continue; + } + if (greek_str[j] == -109) { + if(greek_str[j+1] == 'i') ++j; + *to++ = 'w'; + continue; + } + if (greek_str[j] == ' ') + first_letter = true; + if (greek_str[j] == 's') { + if(isalpha(greek_str[j + 1])) *to++ = 's'; + else if(!isprint(greek_str[j] )) *to++ = 's'; + else *to++ = 'V'; + continue; + } + if (greek_str[j] == '\'') { + continue; + } + *to++ = greek_str[j]; + } + strcpy(to," "); + to += strlen(to); + continue; + } + if (*from == '#') { // verse markings (e.g. "#Mark 1:1|") + inverse = true; + strcpy(to,""); + to += strlen(to); + continue; + } + if ((*from == '|') && (inverse)) { + inverse = false; + strcpy(to,""); + to += strlen(to); + continue; + } + if (*from == '{') { + strcpy(to,"
"); + to += strlen(to); + if ((from - &text[maxlen - len]) > 10) { // not the beginning of the entry + strcpy(to,"

"); + to += strlen(to); + } + continue; + } + if (*from == '}') { + strcpy(to," "); + to += strlen(to); + continue; + } + if ((*from == '\n') && (from[1] == '\n')) { + strcpy(to,"

'); + WriteLn(OutFile,'


[Index]  '); + WriteLn(OutFile,'[Home]'); + WriteLn(OutFile,''); + CloseFile(OutFile); + fHTMLisOpen := false; + end; + end; + + procedure CloseASCII; + begin + if fASCIIisOpen then + begin + WriteLn(OutFile,sLine); + sLine := ''; + WriteLn(OutFile); + if WEBDraftCheckBox.Checked then + begin + WriteLn(OutFile,'______________________________________________________________'); + WriteLn(OutFile); + WriteLn(OutFile,'The above is from the public domain World English Bible (WEB).'); + WriteLn(OutFile,'See http://www.ebible.org/bible/WEB for more about this Bible.'); + WriteLn(OutFile,'Please report typos to mpj@ebible.org.'); + end; + CloseFile(OutFile); + fASCIIisOpen := false; + end; + end; + + procedure OpenHTML; + begin + if fHTMLisOpen then CloseHTML; + sLine := ''; + OutFileName := ExtractFilePath(DestEdit.Text)+BookFileName[InFile.bBk]+'.htm'; + AssignFile(OutFile,OutFileName); + Rewrite(OutFile); + WriteLn(OutFile,''); + WriteLn(OutFile,''); + WriteLn(OutFile,''); + WriteLn(OutFile,''); + WriteLn(OutFile,''); + WriteLn(OutFile,''); + WriteLn(OutFile,''+InFile.sTitle+''); + WriteLn(OutFile,''); + WriteLn(OutFile,''); + WriteLn(OutFile,''); + WriteLn(OutFile,''); + WriteLn(OutFile,'

'); + WriteLn(OutFile,InFile.sTitle); + WriteLn(OutFile,'

'); + fHTMLisOpen := true; + end; + + procedure OpenASCII; + begin + if fASCIIisOpen then CloseASCII; + if fProse then + sLine := ' ' + else + sLine := ''; + OutFileName := ExtractFilePath(DestEdit.Text)+BookFileName[InFile.bBk]+'.txt'; + AssignFile(OutFile,OutFileName); + Rewrite(OutFile); + WriteLn(OutFile); + WriteLn(OutFile,InFile.sTitle); + WriteLn(OutFile); + fASCIIisOpen := true; + end; + + procedure OpenNTChapter; + var s: string; + begin + if InFile.bBk >= 64 then + begin + if fASCIIisOpen then CloseASCII; + inc(iFileNumber); + s := IntToStr(iFileNumber); + if Length(s) < 3 then s := '0'+s; + if Length(s) < 3 then s := '0'+s; + OutFileName := ExtractFilePath(DestEdit.Text)+'n'+s+'.txt'; + AssignFile(OutFile,OutFileName); + Rewrite(OutFile); + WriteLn(OutFile,'Subject: '+BookFileName[InFile.bBk]+' '+InFile.sChapter+', World English Bible'); + if iFileNumber = 260 then + WriteLn(OutFile,'X-Reset: 1'); + WriteLn(OutFile); + WriteLn(OutFile); + WriteLn(OutFile,InFile.sTitle+', Chapter '+InFile.sChapter); + WriteLn(OutFile); + fASCIIisOpen := true; + if fProse then + sLine := ' ' + else + sLine := ''; + end + else + begin + inc(bChap); + if (bLastBook <> Infile.bBk) or ((bChap mod 3) = 1) then + begin + if (bLastBook <> Infile.bBk) then + begin + bLastBook := Infile.bBk; + bChap := 1; + end; + if fASCIIisOpen then CloseASCII; + inc(iFileNumber); + s := IntToStr(iFileNumber); + if Length(s) < 3 then s := '0'+s; + if Length(s) < 3 then s := '0'+s; + OutFileName := ExtractFilePath(DestEdit.Text)+s+'.txt'; + AssignFile(OutFile,OutFileName); + Rewrite(OutFile); + WriteLn(OutFile,'Subject: '+BookFileName[InFile.bBk]+' '+InFile.sChapter+', World English Bible'); + if (Infile.bBk = 39) and (bChap = 4) then + WriteLn(OutFile,'X-Reset: 1'); + WriteLn(OutFile); + WriteLn(OutFile); + WriteLn(OutFile,InFile.sTitle+', starting at chapter '+InFile.sChapter); + WriteLn(OutFile); + fASCIIisOpen := true; + if fProse then + sLine := ' ' + else + sLine := ''; + end; + end; + end; + + procedure CheckHTMLEOL; + begin + if Length(sLine) > 75 then + begin + i := 75; + while (i > 0) and (sLine[i] <> ' ') do + dec(i); + if i < 1 then + begin + if fHTMLisOpen then WriteLn(OutFile,sLine); + sLine := '' + end + else + begin + sPrint := system.copy(sLine,1,i-1); + sSave := system.copy(sLine,i+1,Length(sLine)-i); + if fHTMLisOpen then WriteLn(OutFile,sPrint); + sLine := sSave; + end + end; + end; + + procedure StartNewHTMLLine; + begin + if fInclude then + begin + CheckHTMLEOL; + if fHTMLisOpen then WriteLn(OutFile, sLine+'

'); + sLine := '

'; + end; + end; + + +begin + QuickButton.Enabled := false; + GoBitBtn.Enabled := false; + fInclude := false; + fSkip := false; + fProse := true; + fRed := false; + LastBook := ''; + ParagraphAttributes := sNormalPar; + try + InFile := TReadGBF.Create; + if InFile.Init(Trim(SourceEdit.Text)) then + begin + LinePos := 0; + case FormatRadioGroup.ItemIndex of + -1: showmessage('No destination format selected!'); + 0: begin + Label3.Caption := 'Converting to ASCII'; + AssignFile(OutFile, DestEdit.Text); + FileMode := 1; + Rewrite(OutFile); + fASCIIisOpen := true; + sLine := ''; + repeat + wd := ANSIToOEM(InFile.GetToken(tok)); + Application.ProcessMessages; + case tok of + tokWord: + begin + if fInclude and (not fSkip) then + begin + sLine := sLine + wd; + CheckEOL; + end; + end; + tokSpace: + begin + if fInclude and (not fSkip) then + begin + sLine := sLine + wd; + CheckEOL; + end; + end; + tokSync: + begin + if fInclude and (length(wd) > 3) then + begin + if wd[3] = 'V' then + begin + sLine := sLine + '{' + InFile.sChapter+':'+ + InFile.sVerse+'} '; + CheckEOL + end + else if (wd[3] = 'C') and (InFile.bBk = 19) then + begin + StartNewLine; + WriteLn(OutFile, 'Psalm '+InFile.sChapter); + WriteLn(OutFile); + end; + if wd[3] = 'B' then + fProse := true; + end; + end; + tokContent: + begin + if wd = '' then + fInclude := true + else if wd = '' then + fInclude := true + else if wd = '' then + fInclude := ApocryphaCheckBox.Checked + end; + tokControl: + begin + if wd = '' then + begin + StartNewLine; + if fProse then + begin + WriteLn(OutFile); + sLine := ' ' + end + end + else if wd = '' then + begin + StartNewLine; + sLine := ' '; + end + else if wd = '' then + fProse := true + else if wd = '' then + fProse := false + else if wd = '' then + fSkip := true + else if wd = '' then + fSkip := false + else if wd = '' then + fSkip := true + else if wd = '' then + fSkip := false + else if wd = '' then + fInclude := false + end; + tokChar: + begin + if fInclude and (not fSkip) then + begin + sLine := sLine + wd; + CheckEOL; + end; + end; + tokFont: + begin + if wd = '' then + begin + if fInclude then + sLine := sLine + '['; + end + else if wd = '' then + begin + if fInclude then + sLine := sLine + ']'; + end + end; + end + until tok = tokEOF; + writeln(OutFile, sLine); + CloseFile(OutFile); + fASCIIisOpen := false; + Label3.Caption := ''; + end; + 1: begin + Label3.Caption := 'Converting to ASCII (one file/book)'; + FileMode := 1; + fASCIIisOpen := false; + sLine := ''; + repeat + Application.ProcessMessages; + wd := ANSIToOEM(InFile.GetToken(tok)); + case tok of + tokEOF: + CloseASCII; + tokWord: + begin + if fInclude and (not fSkip) then + begin + sLine := sLine + wd; + CheckEOL; + end; + end; + tokSpace: + begin + if fInclude and (not fSkip) then + begin + sLine := sLine + wd; + CheckEOL; + end; + end; + tokSync: + begin + if fInclude and (length(wd) > 3) then + begin + if wd[3] = 'V' then + begin + sLine := sLine + '{' + InFile.sChapter+':'+ + InFile.sVerse+'} '; + CheckEOL + end + else if (wd[3] = 'C') and (InFile.bBk = 19) then + begin + StartNewLine; + WriteLn(OutFile, 'Psalm '+InFile.sChapter); + WriteLn(OutFile); + end; + if wd[3] = 'B' then + begin + fProse := true; + CloseASCII; + end; + end; + end; + tokContent: + begin + if wd = '' then + fInclude := true + else if wd = '' then + fInclude := true + else if wd = '' then + fInclude := ApocryphaCheckBox.Checked + else if wd = '' then + OpenASCII; + end; + tokControl: + begin + if wd = '' then + begin + StartNewLine; + if fProse then + begin + if fASCIIisOpen then WriteLn(OutFile); + sLine := ' ' + end + end + else if wd = '' then + begin + StartNewLine; + sLine := ' '; + end + else if wd = '' then + fProse := true + else if wd = '' then + fProse := false + else if wd = '' then + fSkip := true + else if wd = '' then + fSkip := false + else if wd = '' then + fSkip := true + else if wd = '' then + fSkip := false + else if wd = '' then + fInclude := false + end; + tokChar: + begin + if fInclude and (not fSkip) then + begin + sLine := sLine + wd; + CheckEOL; + end; + end; + tokFont: + begin + if wd = '' then + begin + if fInclude then + sLine := sLine + '['; + end + else if wd = '' then + begin + if fInclude then + sLine := sLine + ']'; + end + end; + end + until tok = tokEOF; + if fASCIIisOpen then writeln(OutFile, sLine); + CloseASCII; + Label3.Caption := ''; + end; + 2: begin + Label3.Caption := 'Converting ASCII postings'; + bLastBook := 255; + bChap := 0; + FileMode := 1; + iFileNumber := 0; + fASCIIisOpen := false; + sLine := ''; + repeat + Application.ProcessMessages; + wd := ANSIToOEM(InFile.GetToken(tok)); + case tok of + tokEOF: + CloseASCII; + tokWord: + begin + if fInclude and (not fSkip) then + begin + sLine := sLine + wd; + CheckEOL; + end; + end; + tokSpace: + begin + if fInclude and (not fSkip) then + begin + sLine := sLine + wd; + CheckEOL; + end; + end; + tokSync: + begin + if fInclude and (length(wd) > 3) then + begin + if wd[3] = 'V' then + begin + sLine := sLine + '{' + InFile.sChapter+':'+ + InFile.sVerse+'} '; + CheckEOL + end + else if (wd[3] = 'C') then + begin + OpenNTChapter; + if (InFile.bBk = 19) then + begin + StartNewLine; + if fASCIIisOpen then + begin + WriteLn(OutFile, 'Psalm '+InFile.sChapter); + WriteLn(OutFile); + end; + end; + end; + if wd[3] = 'B' then + begin + fProse := true; + CloseASCII; + end; + end; + end; + tokContent: + begin + if wd = '' then + fInclude := true + else if wd = '' then + begin + fInclude := true; + iFileNumber := 0; + end + else if wd = '' then + fInclude := ApocryphaCheckBox.Checked + end; + tokControl: + begin + if wd = '' then + begin + StartNewLine; + if fProse then + begin + if fASCIIisOpen then WriteLn(OutFile); + sLine := ' ' + end + end + else if wd = '' then + begin + StartNewLine; + sLine := ' '; + end + else if wd = '' then + fProse := true + else if wd = '' then + fProse := false + else if wd = '' then + fSkip := true + else if wd = '' then + fSkip := false + else if wd = '' then + fSkip := true + else if wd = '' then + fSkip := false + else if wd = '' then + fInclude := false + end; + tokChar: + begin + if fInclude and (not fSkip) then + begin + sLine := sLine + wd; + CheckEOL; + end; + end; + tokFont: + begin + if wd = '' then + begin + if fInclude then + sLine := sLine + '['; + end + else if wd = '' then + begin + if fInclude then + sLine := sLine + ']'; + end + end; + end + until tok = tokEOF; + if fASCIIisOpen then writeln(OutFile, sLine); + CloseASCII; + Label3.Caption := ''; + end; + 3: begin + Label3.Caption := 'Converting to RTF'; + AssignFile(OutFile, DestEdit.Text); + FileMode := 1; + Rewrite(OutFile); + repeat + Application.ProcessMessages; + wd := InFile.GetToken(tok); + case tok of + tokWord: + begin + if fInclude then + begin + LinePos := LinePos + Length(wd); + write(OutFile,wd); + end; + end; + tokSpace: + begin + if fInclude then + begin + LinePos := LinePos + Length(wd); + if LinePos > 78 then + begin + WriteLn(OutFile,wd); + LinePos := 0; + end + else + write(OutFile,wd); + end + end; + tokSync: + begin + if length(wd) > 1 then + begin + case wd[2] of + 'B': begin + if InFile.sBook <> LastBook then + begin + LastBook := InFile.sBook; + WriteLn(OutFile,'\par '+sTitlePar+ + LastBook+'\par '+ParagraphAttributes); + LinePos := 0; + end; + end; + 'V': begin + s := '{\f5\super '+InFile.sChapter+':'+ + InFile.sVerse+'}'; + Write(OutFile,s); + LinePos := LinePos+Length(s); + end; + end; + end; + end; + tokControl: + begin + if length(wd) > 1 then + begin + case wd[2] of + 'A': fInclude := false; + 'E': begin + Write(OutFile,'{\b\cf1 '); + LinePos := LinePos + 8; + end; + 'F': fInclude := false; + 'H': begin + fInclude := true; + ParagraphAttributes := sHebrewTitlePar; + Write(OutFile,ParagraphAttributes); + LinePos := LinePos + Length(ParagraphAttributes); + end; + 'I' : begin + Write(OutFile,'{\i\cf1 '); + LinePos := LinePos + 7; + end; + 'J' : begin + Write(OutFile,'{\scaps '); + LinePos := LinePos + 8; + end; + 'K': fInclude := false; + 'M': begin + if fInclude then + begin + writeln(OutFile); + write(OutFile,'\par '+ParagraphAttributes); + LinePos := Length(ParagraphAttributes) + 5; + end; + end; + 'N': begin + fInclude := true; + ParagraphAttributes := sNormalPar; + Write(OutFile,ParagraphAttributes); + LinePos := LinePos + Length(ParagraphAttributes); + end; + 'P': begin + fInclude := true; + ParagraphAttributes := sPoetryPar; + Write(OutFile,ParagraphAttributes); + LinePos := LinePos + Length(ParagraphAttributes); + end; + 'Q': begin + fInclude := true; + ParagraphAttributes := sTitlePar; + Write(OutFile,ParagraphAttributes); + LinePos := LinePos + Length(ParagraphAttributes); + end; + 'R' : begin + Write(OutFile,'\cf6 '); + LinePos := LinePos + 4; + end; + 'S': begin + fInclude := true; + ParagraphAttributes := sSelahPar; + Write(OutFile,ParagraphAttributes); + LinePos := LinePos + Length(ParagraphAttributes); + end; + 'T': begin + fInclude := true; + ParagraphAttributes := sTitlePar; + Write(OutFile,ParagraphAttributes); + LinePos := LinePos + Length(ParagraphAttributes); + end; + 'U' : begin + Write(OutFile,'{\ul '); + LinePos := LinePos + 4; + end; + 'W': begin + fInclude := true; + ParagraphAttributes := sNormalQuotePar; + Write(OutFile,ParagraphAttributes); + LinePos := LinePos + Length(ParagraphAttributes); + end; + 'X': fInclude := false; + 'Y': begin + fInclude := true; + ParagraphAttributes := sPoetryQuotePar; + Write(OutFile,ParagraphAttributes); + LinePos := LinePos + Length(ParagraphAttributes); + end; + 'Z': fInclude := false; + 'a': fInclude := false; + 'c': fInclude := false; + 'e': begin + Write(OutFile,'}'); + inc(LinePos); + end; + 'h': fInclude := false; + 'i': begin + Write(OutFile,'}'); + inc(LinePos); + end; + 'j': begin + Write(OutFile,'}'); + inc(LinePos); + end; + 'n': fInclude := false; + 'p': fInclude := false; + 'r': begin + Write(OutFile,'}'); + inc(LinePos); + end; + 'u': begin + Write(OutFile,'}'); + inc(LinePos); + end; + + end; + end; + end; + tokChar: + begin + if fInclude then + begin + write(OutFile,wd); + LinePos := LinePos + length(wd); + end; + end; + end; + until tok = tokEOF; + writeln(OutFile,'\par }'); + CloseFile(OutFile); + Label3.Caption := ''; + end; + 4: begin // GBF + Label3.Caption := 'Converting to GBF'; + OutGBF := TWriteGBF.Create; + OutGBF.Init(Trim(DestEdit.Text)); + OutGBF.Out(''); + repeat + Application.ProcessMessages; + wd := InFile.GetToken(tok); + if tok <> tokEOF then OutGBF.Out(wd); + until tok = tokEOF; + OutGBF.Done; + OutGBF.Free; + Label3.Caption := ''; + end; + 5: begin // HTML + Label3.Caption := 'Converting to HTML'; + fHTMLisOpen := false; + repeat + Application.ProcessMessages; + wd := Infile.GetToken(tok); + case tok of + tokEOF: + CloseHTML; + tokWord: + begin + if fInclude and (not fSkip) then + begin + sLine := sLine + wd; + CheckHTMLEOL; + end; + end; + tokSpace: + begin + if fInclude and (not fSkip) then + begin + sLine := sLine + wd; + CheckHTMLEOL; + end; + end; + tokSync: + begin + if fInclude and (length(wd) > 3) then + begin + if wd[3] = 'V' then + begin + if fRed then + sLine := sLine + ''; + sLine := sLine + ''+ + InFile.sChapter+':'+ + InFile.sVerse+''; + if fRed then + sLine := sLine + ''; + CheckHTMLEOL + end + else if (wd[3] = 'C') and (InFile.bBk = 19) then + begin + StartNewHTMLLine; + if fHTMLisOpen then + begin + WriteLn(OutFile, '

Psalm '+ + InFile.sChapter+'

'); + WriteLn(OutFile); + end; + end; + if wd[3] = 'B' then + begin + fProse := true; + CloseHTML; + end; + end; + end; + tokContent: + begin + if wd = '' then + fInclude := true + else if wd = '' then + fInclude := true + else if wd = '' then + fInclude := ApocryphaCheckBox.Checked + else if wd = '' then + OpenHTML; + end; + tokControl: + begin + if wd = '' then + begin + StartNewHTMLLine; + if not fProse then + begin + sLine := sLine + '  '; + end + end + else if wd = '' then + begin + StartNewHTMLLine; + sLine := sLine + '       ' + end + else if wd = '' then + fProse := true + else if wd = '' then + fProse := false + else if wd = '' then + fSkip := true + else if wd = '' then + fSkip := false + else if wd = '' then + fSkip := true + else if wd = '' then + fSkip := false + else if wd = '' then + fInclude := false + end; + tokChar: + begin + if fInclude and (not fSkip) then + begin + if wd = '"' then + sLine := sLine + '"' + else + sLine := sLine + wd; + CheckHTMLEOL; + end; + end; + tokFont: + begin + if fInclude then + begin + if wd = '' then + sLine := sLine + '' + else if wd = '' then + sLine := sLine + '' + else if wd = '' then + begin + if not fRed then + begin + sLine := sLine + ''; + fRed := true + end + end + else if wd = '' then + begin + if fRed then + begin + sLine := sLine + ''; + fRed := false + end + end + end; + end; + end; + until tok = tokEOF; + Label3.Caption := ''; + end; + end; + InFile.Done; + end; + InFile.Free; + except + showmessage('Error!'); + end; + GoBitBtn.Enabled := true; + QuickButton.Enabled := true; +end; + +procedure TGBFConverterMainForm.GoBitBtnClick(Sender: TObject); +begin + DoConversion; +end; + +procedure TGBFConverterMainForm.Timer1Timer(Sender: TObject); +begin + If InFile <> nil then + VerseLabel.Caption := InFile.sBook+' ['+IntToStr(InFile.bBk)+'] '+ + InFile.sChapter+':'+InFile.sVerse + else + VerseLabel.Caption := ''; +end; + +procedure TGBFConverterMainForm.QuickConversion; +begin + FormatRadioGroup.ItemIndex := 1; + DoConversion; + FormatRadioGroup.ItemIndex := 2; + DoConversion; + FormatRadioGroup.ItemIndex := 5; + DoConversion; +end; + +procedure TGBFConverterMainForm.FormShow(Sender: TObject); +begin + VerseLabel.Caption := ''; + WdLabel.Caption := ''; +end; + +(* +procedure TGBFConverterMainForm.TransformButtonClick(Sender: TObject); +var apoc: textfile; + last, s, sBook, sChap, sVs: string; + blankcount, i: integer; +begin + TransformButton.Enabled := false; + blankcount := 0; + assignfile(apoc, trim(sourceedit.text)); + reset(apoc); + assignfile(outfile, trim(destedit.text)); + rewrite(outfile); + last := ''; + while not eof(apoc) do + begin + readln(apoc, s); + if s = '' then + begin + inc(blankcount); + if last <> '' then + begin + writeln(outfile, last, '~M'); + last := ''; + end; + end + else + begin + if blankcount >= 2 then + writeln(outfile, '~T',s,'~N~M') // book title + else if blankcount = 1 then + begin + sBook := ''; + sChap := ''; + sVs := ''; + i := 1; + while (s[i] <> ' ') and (i <= Length(s)) do + begin + sBook := sBook + s[i]; + inc(i); + end; + while (s[i] = ' ') and (i <= Length(s)) do + inc(i); + while (s[i] <> ':') and (i <= Length(s)) do + begin + sChap := sChap + s[i]; + inc(i); + end; + inc(i); + while IsDigit(s[i]) and (i <= Length(s)) do + begin + sVs := sVs + s[i]; + inc(i); + end; + write(outfile, '~B'+sBook+';~C'+sChap+';'); + if sVs <> '' then + write(outfile, '~V'+sVs+';'); + end + else + begin + if last <> '' then + begin + writeln(outfile, last); + end; + last := s; + end; + blankcount := 0; + end; + end; + if last <> '' then + begin + writeln(outfile, last); + last := s; + end; + closefile(outfile); + closefile(apoc); + TransformButton.Enabled := true; +end; +*) + +procedure TGBFConverterMainForm.FormatRadioGroupClick(Sender: TObject); +begin + Case FormatRadioGroup.ItemIndex of + 0: // Plain ASCII (one file) + DestEdit.Text := 'pub\web.txt'; + 1: // Plain ASCII (one file per book) + DestEdit.Text := 'pub\web.htm'; + 2: // Daily posts + DestEdit.Text := 'pub\queue\web.txt'; + 3: // RTF + DestEdit.Text := 'pub\web.rtf'; + 4: // GBF + DestEdit.Text := 'pub\web.gbf'; + 5: // HTML + DestEdit.Text := 'pub\htm\web.htm'; + end; +end; + +procedure TGBFConverterMainForm.QuickButtonClick(Sender: TObject); +begin + QuickConversion; +end; + +procedure TGBFConverterMainForm.FormActivate(Sender: TObject); +begin + if ParamCount > 0 then + if ParamStr(1) = 'quick' then + begin + QuickConversion; + close; + end; +end; + +end. diff --git a/src/modules/texts/rawgbf/Makefile b/src/modules/texts/rawgbf/Makefile new file mode 100644 index 0000000..35d6648 --- /dev/null +++ b/src/modules/texts/rawgbf/Makefile @@ -0,0 +1,5 @@ + +root := ../../../.. + +all: + make -C ${root} diff --git a/src/modules/texts/rawgbf/Makefile.am b/src/modules/texts/rawgbf/Makefile.am new file mode 100644 index 0000000..ab6aa2e --- /dev/null +++ b/src/modules/texts/rawgbf/Makefile.am @@ -0,0 +1,4 @@ +rawgbfdir = $(top_srcdir)/src/modules/texts/rawgbf + +libsword_la_SOURCES += $(rawgbfdir)/rawgbf.cpp + diff --git a/src/modules/texts/rawgbf/gbf.cpp b/src/modules/texts/rawgbf/gbf.cpp new file mode 100644 index 0000000..dc67a1c --- /dev/null +++ b/src/modules/texts/rawgbf/gbf.cpp @@ -0,0 +1,735 @@ +enum TToken { +tokNull, tokEOF, tokHeader, tokContent, tokTail, tokStyle, + tokWord, tokSpace, tokSync, tokControl, tokChar, tokFont}; + +enum TCharacterAttribute { caBold, caSmallCaps, caItalic, caOTQuote, caRed, + caSuperscript, caUnderline, caSubscript}; + +// TCharAttribs = set of TCharacterAttribute; + + +struct TBookNameRec { + string Name, Abbr; + char Num; +} + +const struct TBookNameRec TBookAbbr[116] = { + {"1 Chronicles", "1CH", 13}, //0 + {"1 Corinthians", "1CO", 70}, //1 + {"1 Esdras", "1E", 52}, //2 + {"1 John", "1J", 86}, //3 + {"1 Kings", "1K", 11}, //4 + {"1 Maccabees", "1M", 50}, //5 + {"1 Peter", "1P", 84}, //6 + {"1 Samuel", "1S", 9}, //7 + {"1 Thessalonians", "1TH", 76}, //8 + {"1 Timothy", "1TI", 78}, //9 + {"2 Chronicles", "2CH", 14}, //10 + {"2 Corinthians", "2CO", 71}, //11 + {"2 Esdras", "2E", 56}, //12 + {"2 John", "2J", 87}, //13 + {"2 Kings", "2K", 12}, //14 + {"2 Maccabees", "2M", 51}, //15 + {"2 Peter", "2P", 85}, //16 + {"2 Samuel", "2S", 10}, //17 + {"2 Thessalonians", "2TH", 77}, //18 + {"2 Timothy", "2TI", 79}, //19 + {"3 John", "3J", 88}, //20 + {"3 Maccabees", "3M", 55}, //21 + {"4 Maccabees", "4M", 57}, //22 + {"1 Chronicles", "1 CH", 13}, //0 + {"1 Corinthians", "1 CO", 70}, //1 + {"1 Esdras", "1 E", 52}, //2 + {"1 John", "1 J", 86}, //3 + {"1 Kings", "1 K", 11}, //4 + {"1 Maccabees", "1 M", 50}, //5 + {"1 Peter", "1 P", 84}, //6 + {"1 Samuel", "1 S", 9}, //7 + {"1 Thessalonians", "1 TH", 76}, //8 + {"1 Timothy", "1 TI", 78}, //9 + {"2 Chronicles", "2 CH", 14}, //10 + {"2 Corinthians", "2 CO", 71}, //11 + {"2 Esdras", "2 E", 56}, //12 + {"2 John", "2 J", 87}, //13 + {"2 Kings", "2 K", 12}, //14 + {"2 Maccabees", "2 M", 51}, //15 + {"2 Peter", "2 P", 85}, //16 + {"2 Samuel", "2 S", 10}, //17 + {"2 Thessalonians", "2 TH", 77}, //18 + {"2 Timothy", "2 TI", 79}, //19 + {"3 John", "3 J", 88}, //20 + {"3 Maccabees", "3 M", 55}, //21 + {"4 Maccabees", "4 M", 57}, //22 + {"Acts", "AC", 68}, //23 + {"Amos", "AM", 30}, //24 + {"Prayer of Asariah and the Song of the Three Jews", "AZ", 47}, + {"Baruch", "BA", 45}, //26 + {"Bel and the Dragon","BE", 49}, //27 + {"Colossians", "CO", 75}, //28 + {"Daniel", "DA", 27}, //29 + {"Deuteronomy", "DE", 5}, //30 + {"Deuteronomy", "DT", 5}, //31 + {"Ecclesiasties", "EC", 21}, //32 + {"Esther", "ES", 17}, //33 + {"Exodus", "EX", 2}, //34 + {"Ezekiel", "EZE", 26}, //35 + {"Ezra", "EZR", 15}, //36 + {"Galatians", "GA", 72}, //37 + {"Genesis", "GE", 1}, //38 + {"Genesis", "GN", 1}, //39 + {"Ephesians", "EP", 73}, //40 + {"Esther (Greek}", "GR", 42), //41 + {"Habakkuk", "HAB", 35}, //42 + {"Haggai", "HAG", 37}, //43 + {"Hebrews", "HE", 82}, //44 + {"Hosea", "HO", 28}, //45 + {"Isaiah", "IS", 23}, //46 + {"James", "JA", 83}, //47 + {"Jeremiah", "JE", 24}, //48 + {"Job", "JOB", 18}, //49 + {"Joel", "JOE", 29}, //50 + {"John", "JOH", 67}, //51 + {"Jonah", "JON", 32}, //52 + {"Joshua", "JOS", 6}, //53 + {"Jude", "JUDE", 89}, //54 + {"Judges", "JUDG", 7}, //55 + {"Judith", "JUDI", 41}, //56 + {"Lamentations", "LA", 25}, //57 + {"Letter of Jeremiah",Abbr:"LET", 46}, //58 + {"Leviticus", "LEV", 3}, //59 + {"Luke", "LK", 66}, //60 + {"Leviticus", "LV", 3}, //61 + {"Luke", "LU", 66}, //62 + {"Malachi", "MAL", 39}, //63 + {"Prayer of Manasseh",Abbr:"MAN", 53}, //64 + {"Mark", "MAR", 65}, //65 + {"Matthew", "MAT", 64}, //66 + {"Micah", "MI", 33}, //67 + {"Nahum", "NA", 34}, //68 + {"Nehemiah", "NE", 16}, //69 + {"Numbers", "NU", 4}, //70 + {"Obadiah", "OB", 31}, //71 + {"Psalm 151", "P1", 54}, //72 + {"Philemon", "PHILE", 81}, //73 + {"Philippians", "PHILI", 74}, //74 + {"Philemon", "PHM", 81}, //75 + {"Philippians", "PHP", 74}, //76 + {"Proverbs", "PR", 20}, //77 + {"Psalms", "PS", 19}, //78 + {"Revelation", "RE", 90}, //79 + {"Romans", "RM", 69}, //80 + {"Romans", "RO", 69}, //81 + {"Ruth", "RU", 8}, //82 + {"Sirach", "SI", 44}, //83 + {"Song of Solomon", "SOL", 22}, //84 + {"Song of Solomon", "SON", 22}, //85 + {"Song of Solomon", "SS", 22}, //86 + {"Susanna", "SU", 48}, //87 + {"Titus", "TI", 80}, //88 + {"Tobit", "TO", 40}, //89 + {"Wisdom", "WI", 43}, //90 + {"Zechariah", "ZEC", 38}, //91 + {"Zephaniah", "ZEP", 36} //92 + }, + +string BookFileName[91] = { + "","Genesis","Exodus","Lev","Num","Deut","Joshua","Judges", // 0 - 7 + "Ruth","1Sam","2Sam","1Kings","2Kings","1Chron","2Chron", // 8 - 14 + "Ezra","Nehemiah","Esther","Job","Psalms","Proverbs", // 15-20 + "Eccl","Song","Isaiah","Jeremiah","Lament","Ezekiel", // 21-26 + "Daniel","Hosea","Joel","Amos","Obadiah","Jonah","Micah", // 27-33 + "Nahum","Habakkuk","Zeph","Haggai","Zech","Malachi", // 34-39 + "Tobit","Judith","Esther","Wisdom","Sirach","Baruch", // 40-45 + "Let","Azar","Susanna","Bel","1Mac","2Mac","1Esdras", // 46-52 + "Man","P1","3Mac","2Esdras","4Mac","","","","","","", // 53-63 + "Matthew","Mark","Luke","John","Acts","Romans","1Cor", // 64-70 + "2Cor","Gal","Eph","Philip","Col","1Thes","2Thes","1Tim", // 71-78 + "2Tim","Titus","Philemon","Hebrews","James","1Peter", // 79-84 + "2Peter","1John","2John","3John","Jude","Rev"}; // 85-90 + +class TReadGBF { +private: + FILE *fp; + string FName, TokenLine; + int TokenPos; + bool fFileIsOpen, fParagraphEnd, fInTitle, fInPsalmBookTitle, fInHebrewTitle, fInSectionTitle; + +public: + string sBook, sChapter, sVerse, sMode; + string sContext; // Last text type (header, body, or tail) + string sTitle; // Title of this book of the Bible + string sPsalmBookTitle; // Title of this Psalm book + string sHebrewTitle; // Psalm Hebrew title + string sSectionTitle; // Section headings + string sDate; + string sFontName; + int iTotalWords; + char chJustification, chDirection; + bool fIndent, fPoetry; + int CharAttribs; + char bBk, bChap, bVs, bWd; + + bool Init(const string sFileName); + void Done(); + string GetToken(TToken &TokenKind); + end; + +class TWriteGBF { + private: + F: TextFile; + FName, LineOut: string; + fFileIsOpen: boolean; + bBk, bChap, bVs, bWd: byte; + + public + + function Init(const sFileName: string): boolean; + function Done: boolean; + procedure Out(const s: string); + end; + +function isletter(const ch: char): boolean; +function isinword(const ch: char): boolean; +function IsDigit(const ch: char): Boolean; +function IsUpper(const ch: char): Boolean; +function ConformCase(const sPat, sSrc: string): string; +function BookNameToNumber(const sBookName: string): byte; + +implementation + +function isletter(const ch: char): boolean; +begin + case ch of + 'A'..'Z': isletter := true; + 'a'..'z': isletter := true; + else + isletter := false; + end; +end; + +function isinword(const ch: char): boolean; +begin + case ch of + '-': isinword := true; + 'A'..'Z': isinword := true; + 'a'..'z': isinword := true; + else + isinword := false; + end; +end; + +function IsUpper(const ch: char): Boolean; +begin + case ch of + 'A'..'Z': IsUpper := true; + else + IsUpper := false; + end; +end; + +function IsDigit(const ch: char): Boolean; +begin + case ch of + '0'..'9': IsDigit := true; + else + IsDigit := false; + end; +end; + + +function MatchAbbrev(const sName, sAbbrev: string): boolean; +var i: integer; +begin + if Length(sName) < Length(sAbbrev) then + Result := false + else + Result := true; + i := 1; + while (i <= Length(sAbbrev)) and Result do + begin + if UpCase(sName[i]) <> sAbbrev[i] then + Result := false; + inc(i); + end; +end; + +function BookNameToNumber(const sBookName: string): byte; +var i: integer; +begin + Result := 0; + try + if IsDigit(sBookName[Length(sBookName)]) and IsDigit(sBookName[1]) then + Result := StrToInt(sBookName); + except + Result := 0; + end; + i := 0; + while (Result = 0) and (i <= 115) do // Yuk! Linear search. + begin + if MatchAbbrev(sBookName,BookAbbr[i].Abbr) then + begin + Result := BookAbbr[i].Num; + end; + inc(i); + end; +end; + +function BookNumberToName(const bBookNum: byte): string; +begin + if bBookNum <= 115 then + Result := BookAbbr[bBookNum].Name + else + Result := ''; +end; + +function ConformCase(const sPat, sSrc: string): string; +var i: integer; +begin + Result := sSrc; + if (Length(sPat) > 0) and (Length(sSrc) > 0) then + begin + Result := LowerCase(sSrc); + if IsUpper(sPat[1]) then + Result[1] := UpCase(Result[1]); + if (Length(sPat) > 1) and (Length(sSrc) > 1) then + begin + if IsUpper(sPat[2]) then + begin + for i := 2 to Length(Result) do + Result[i] := UpCase(Result[i]); + end; + end; + end; +end; + +function TReadGBF.Init(const sFileName: string): boolean; +var s: string; + tok: TToken; +begin + try + fParagraphEnd := false; + bBk := 0; + bChap := 0; + bVs := 0; + bWd := 0; + iTotalWords := 0; + FName := sFileName; + Assign(F, FName); + reset(F); + readln(F, TokenLine); + TokenPos := 1; + fFileIsOpen := true; + repeat + s := GetToken(tok) + until (tok = tokEOF) or ((tok = tokHeader) and (s[3] = '0')); + Init := true; + except + Init := false; + fFileIsOpen := false; + end +end; + +procedure TReadGBF.Done; +begin + if fFileIsOpen then + begin + closefile(F); + fFileIsOpen := false; + end; +end; + +function TReadGBF.GetToken(var TokenKind: TToken): string; +var m: integer; +begin + Result := ''; + TokenKind := tokNull; + if TokenPos = 0 then + begin + if (not fFileIsOpen) or EOF(F) then + TokenKind := tokEOF + else + begin + ReadLn(F,TokenLine); + TokenPos := 1; + end; + end; + if TokenKind <> tokEOF then + begin + m := Length(TokenLine); + if TokenPos > m then + begin + TokenKind := tokSpace; + if fParagraphEnd then + fParagraphEnd := false + else + Result := ' '; + TokenPos := 0; + end + else + begin + if (TokenLine[TokenPos] = '<') then + begin + fParagraphEnd := false; + repeat + Result := Result + TokenLine[TokenPos]; + inc(TokenPos); + until (TokenLine[TokenPos] = '>') or (TokenPos > m); + Result := Result + '>'; + inc(TokenPos); + case result[2] of + 'B': begin // File body text type + TokenKind := tokContent; + sContext := Result; + end; + 'C': begin // Special characters + TokenKind := tokControl; + if (Result[3] = 'M') or (Result[3] = 'L') then + fParagraphEnd := true; + end; + 'D': begin // Direction + TokenKind := tokControl; + chDirection := Result[3]; + end; + 'H': begin + TokenKind := tokHeader; + sContext := Result; + end; + 'F': begin // Font attributes + TokenKind := tokFont; + case Result[3] of + 'B': CharAttribs := CharAttribs + [caBold]; + 'C': CharAttribs := CharAttribs + [caSmallCaps]; + 'I': CharAttribs := CharAttribs + [caItalic]; + 'N': sFontName := copy(Result,4,Length(Result)-4); + 'O': CharAttribs := CharAttribs + [caOTQuote]; + 'R': CharAttribs := CharAttribs + [caRed]; + 'S': CharAttribs := CharAttribs + [caSuperscript]; + 'U': CharAttribs := CharAttribs + [caUnderline]; + 'V': CharAttribs := CharAttribs + [caSubscript]; + 'b': CharAttribs := CharAttribs - [caBold]; + 'c': CharAttribs := CharAttribs - [caSmallCaps]; + 'i': CharAttribs := CharAttribs - [caItalic]; + 'n': sFontName := ''; + 'o': CharAttribs := CharAttribs - [caOTQuote]; + 'r': CharAttribs := CharAttribs - [caRed]; + 's': CharAttribs := CharAttribs - [caSuperscript]; + 'u': CharAttribs := CharAttribs - [caUnderline]; + 'v': CharAttribs := CharAttribs - [caSubscript]; + + end; + end; + 'J': begin // Justification + TokenKind := tokStyle; + chJustification := Result[3]; + end; + 'P': begin // Poetry/prose, indent + TokenKind := tokControl; + case Result[3] of + 'I': fIndent := true; + 'P': fPoetry := true; + 'i': fIndent := false; + 'p': fPoetry := false; + end; + end; + 'R': begin // References and footnotes + TokenKind := tokControl; + end; + 'S': begin // sync mark + TokenKind := TokSync; + case Result[3] of + 'B': begin // Book + sBook := system.copy(Result, 4, length(Result)-4); + sPsalmBookTitle := ''; + if sBook = '' then + begin + inc(bBk); + sBook := BookNumberToName(bBk); + end + else + bBk := BookNameToNumber(sBook); + sTitle := sBook; + end; + 'C': begin //chapter + sChapter := system.copy(Result, 4, length(Result)-4); + if sChapter = '' then + begin + inc(bChap); + sChapter := IntToStr(bChap); + end + else + begin + try + bChap := StrToInt(sChapter); + except + showmessage('Non-numeric chapter: '+sBook+' '+sChapter); + end; + end; + sHebrewTitle := ''; + end; + 'V': begin // Verse + bWd := 0; + sVerse := system.copy(Result, 4, length(Result)-4); + if sVerse = '' then + begin + inc(bVs); + sVerse := IntToStr(bVs); + end + else + begin + try + bVs := StrToInt(sVerse); + except + showmessage('Non-numeric verse: '+sBook+' '+sChapter+':'+sVerse); + end; + end; + end; + 'D': begin // Date + sDate := system.copy(Result, 3, length(Result)-4); + end; + end; + end; + 'T': begin // Titles + TokenKind := TokContent; + case Result[3] of + 'B': + begin + sPsalmBookTitle := ''; + fInPsalmBookTitle := true; + end; + 'b': fInPsalmBookTitle := true; + 'H': + begin + sHebrewTitle := ''; + fInHebrewTitle := true; + end; + 'h': fInHebrewTitle := false; + 'S': + begin + sSectionTitle := ''; + fInSectionTitle := true; + end; + 's': fInSectionTitle := false; + 'T': + begin + sTitle := ''; + fInTitle := true; + end; + 't': fInTitle := false; + end; + end; + 'Z': begin // File tail + TokenKind := tokTail; + sContext := Result; + if Result[3] = 'Z' then + done; + end; + else + TokenKind := TokControl; + + end; + end + else if isletter(TokenLine[TokenPos]) then + begin {Word} + fParagraphEnd := false; + TokenKind := tokWord; + repeat + Result := Result + TokenLine[TokenPos]; + inc(TokenPos); + until (TokenPos > m) or (not isinword(TokenLine[TokenPos])); + inc(bWd); + inc(iTotalWords); + end + else if ((TokenLine[TokenPos] = ' ') or (TokenLine[TokenPos] = #9)) then + begin + fParagraphEnd := false; + TokenKind := tokSpace; + Result := Result + TokenLine[TokenPos]; + inc(TokenPos); + end + else + begin + fParagraphEnd := false; + TokenKind := tokChar; + Result := Result + TokenLine[TokenPos]; + inc(TokenPos); + end + end; + end; + if ((TokenKind = tokWord) or (TokenKind = tokSpace) or + (TokenKind = tokChar)) then + begin + if fInTitle then + sTitle := sTitle + Result + else if fInPsalmBookTitle then + sPsalmBookTitle := sPsalmBookTitle + Result + else if fInHebrewTitle then + sHebrewTitle := sHebrewTitle + Result + else if fInSectionTitle then + sSectionTitle := sSectionTitle + Result; + end; +end; + +function TWriteGBF.Init(const sFileName: string): boolean; +begin + try + bBk := 0; + bChap := 0; + bVs := 0; + bWd := 0; + LineOut := ''; + FName := sFileName; + Assign(F, FName); + filemode := 1; + rewrite(F); + fFileIsOpen := true; + Init := true; + except + Init := false; + fFileIsOpen := false; + end +end; + +function TWriteGBF.Done: boolean; +begin + try + if fFileIsOpen then + begin + if LineOut <> '' then + begin + WriteLn(F, LineOut); + LineOut := ''; + end; + CloseFile(F); + end; + Done := true; + except + Done := false; + end; +end; + +procedure TWriteGBF.Out(const s: string); +var sPrint, sSave, sBook, sChapter, sVerse: string; + i: integer; + b: byte; +begin + if (Length(s) > 0) and IsLetter(s[1]) then + begin + inc(bWd); + LineOut := LineOut + s; + end + else if Length(s) > 3 then + begin + if (s[1] = '<') and (s[2] = 'S') then + begin + case s[3] of + 'B': begin // Book + sBook := system.copy(s, 4, length(s)-4); + if sBook = '' then + begin + inc(bBk); + LineOut := LineOut + s; + end + else + begin + b := bBk; + bBk := BookNameToNumber(sBook); + if b <> bBk then + LineOut := LineOut + s; + end; + end; + 'C': begin //chapter + sChapter := system.copy(s, 4, length(s)-4); + if sChapter = '' then + begin + inc(bChap); + LineOut := LineOut + s; + end + else + begin + try +// b := bChap; + bChap := StrToInt(sChapter); +// if b <> bChap then + LineOut := LineOut + s; + except + showmessage('Non-numeric chapter: '+sBook+' '+sChapter); + end; + end; + end; + 'V': begin // Verse + bWd := 0; + sVerse := system.copy(s, 4, length(s)-4); + if sVerse = '' then + begin + inc(bVs); + LineOut := LineOut + s; + end + else + begin + try +// b := bVs; + bVs := StrToInt(sVerse); +// if b <> bVs then + LineOut := LineOut + s; + except + showmessage('Non-numeric verse: '+sBook+' '+sChapter+':'+sVerse); + end; + end; + end; + else + LineOut := LineOut + s; + end + end + else + LineOut := LineOut + s; // Not a sync mark + end + else // other token, space, or punctuation + LineOut := LineOut + s; // Length <= 3 + if ((s = '') or (s = '')) then + begin + if (Length(LineOut) > 78) then + begin + i := 78; + while (i > 0) and (LineOut[i] <> ' ') do + dec(i); + if i < 1 then + begin + WriteLn(F,LineOut); + LineOut := ''; + end + else + begin + sPrint := system.copy(LineOut,1,i-1); + sSave := system.copy(LineOut,i+1,Length(LineOut)-i); + WriteLn(F,sPrint); + WriteLn(F, sSave); + LineOut := ''; + end + end + else + begin + WriteLn(F, LineOut); + LineOut := ''; + end + end + else if (Length(LineOut) > 78) then + begin + i := 78; + while (i > 0) and (LineOut[i] <> ' ') do + dec(i); + if i < 1 then + begin + WriteLn(F,LineOut); + LineOut := ''; + end + else + begin + sPrint := system.copy(LineOut,1,i-1); + sSave := system.copy(LineOut,i+1,Length(LineOut)-i); + WriteLn(F,sPrint); + LineOut := sSave; + end + end +end; + +end. diff --git a/src/modules/texts/rawgbf/gbf.h b/src/modules/texts/rawgbf/gbf.h new file mode 100644 index 0000000..b695759 --- /dev/null +++ b/src/modules/texts/rawgbf/gbf.h @@ -0,0 +1,67 @@ +/* Header for module GBF, generated by p2c */ +#ifndef GBF_H +#define GBF_H +/* p2c: Gbf.pas, line 5: Warning: Could not find module SYSUTILS [271] */ + + +#include "sysutils.h" +/* p2c: Gbf.pas, line 5: Warning: Could not find module DIALOGS [271] */ +#include "dialogs.h" + + +#ifdef GBF_G +# define vextern +#else +# define vextern extern +#endif + + + +typedef enum { + tokNull, tokEOF, tokHeader, tokContent, tokTail, tokStyle, tokWord, + tokSpace, tokSync, tokControl, tokChar, tokFont +} TToken; +typedef enum { + caBold, caSmallCaps, caItalic, caOTQuote, caRed, caSuperscript, caUnderline, + caSubscript +} TCharacterAttribute; +typedef long TCharAttribs; + + + +typedef struct TBookNameRec { + Char Name[256], Abbr[256]; + uchar Num; +} TBookNameRec; + +typedef TBookNameRec TBookAbbr[116]; +/* p2c: Gbf.pas, line 25: + * Warning: Expected an expression, found a '/' [227] */ +/* p2c: Gbf.pas, line 25: + * Warning: Expected an expression, found a '/' [227] */ +/* p2c: Gbf.pas, line 25: Warning: Division by zero [163] */ +/* p2c: Gbf.pas, line 26: Warning: Division by zero [163] */ +/* p2c: Gbf.pas, line 26: Warning: Expected a ')', found a '(' [227] */ +/* p2c: Gbf.pas, line 144: + * Warning: Expected an expression, found a '/' [227] */ +/* p2c: Gbf.pas, line 144: + * Warning: Expected an expression, found a '/' [227] */ +/* p2c: Gbf.pas, line 144: Warning: Division by zero [163] */ +/* p2c: Gbf.pas, line 144: Warning: Division by zero [163] */ +/* p2c: Gbf.pas, line 145: Warning: Mixing non-strings with strings [170] */ +/* p2c: Gbf.pas, line 145: + * Warning: Expected a ')', found a string literal [227] */ + + +extern TBookAbbr BookAbbr; + +extern Char BookFileName[91][256]; + +vextern Char STR1[256]; + + +#undef vextern + +#endif /*GBF_H*/ + +/* End. */ diff --git a/src/modules/texts/rawgbf/gbfidx.cpp b/src/modules/texts/rawgbf/gbfidx.cpp new file mode 100644 index 0000000..8337d62 --- /dev/null +++ b/src/modules/texts/rawgbf/gbfidx.cpp @@ -0,0 +1,294 @@ +/***************************************************************************** + * + * This code wreaks but works (at least for WEB). Good luck! + */ + +#include +#include +#include +#include + +#ifndef __GNUC__ +#include +#else +#include +#endif + +#include +#include + + +void writeidx(VerseKey &key1, VerseKey &key2, VerseKey &key3, long offset, short size); +char findbreak(int fp, long *offset, int *num1, int *num2, int *rangemax, short *size); +void openfiles(char *fname); +void checkparams(int argc, char **argv); + + +VerseKey key1, key2, key3; +int fp, vfp, cfp, bfp; +long chapoffset; +short chapsize; +char testmnt; + + +main(int argc, char **argv) +{ + long pos, offset; + int num1, num2, rangemax, curbook = 0, curchap = 0, curverse = 0; + char buf[127], startflag = 0; + short size, tmp; + + checkparams(argc, argv); + + openfiles(argv[1]); + + testmnt = key1.Testament(); + num1 = key1.Chapter(); + num2 = key1.Verse(); + pos = 0; + write(bfp, &pos, 4); /* Book offset for testament intros */ + pos = 4; + write(cfp, &pos, 4); /* Chapter offset for testament intro */ + + +/* Right now just zero out intros until parsing correctly */ + pos = 0; + size = 0; + write(vfp, &pos, 4); /* Module intro */ + write(vfp, &size, 2); + write(vfp, &pos, 4); /* Testament intro */ + write(vfp, &size, 2); + + while(!findbreak(fp, &offset, &num1, &num2, &rangemax, &size)) { + if (!startflag) { + startflag = 1; + } + else { + if (num2 < key2.Verse()) { // new chapter + if (num1 <= key2.Chapter()) { // new book + key2.Verse(1); + key2.Chapter(1); + key2.Book(key2.Book()+1); + } + printf("Found Chapter Break: %d ('%s')\n", num1, (const char *)key2); + chapoffset = offset; + chapsize = size; +// continue; + } + } + key2.Verse(1); + key2.Chapter(num1); + key2.Verse(num2); + + key3 = key2; +// key3 += (rangemax - key3.Verse()); + + writeidx(key1, key2, key3, offset, size); + } + close(vfp); + close(cfp); + close(bfp); + close(fp); +} + + +/************************************************************************** + * ENT: key1 - current location of index + * key2 - minimum keyval for which this offset is valid + * key3 - maximum keyval for which this offset is valid + */ + +void writeidx(VerseKey &key1, VerseKey &key2, VerseKey &key3, long offset, short size) +{ + long pos; + short tmp; + + for (; ((key1 <= key3) && (key1.Error() != KEYERR_OUTOFBOUNDS) && (key1.Testament() == testmnt)); key1+=1) { + if (key1.Verse() == 1) { // new chapter + if (key1.Chapter() == 1) { // new book + pos = lseek(cfp, 0, SEEK_CUR); + write(bfp, &pos, 4); + pos = lseek(vfp, 0, SEEK_CUR); /* Book intro (cps) */ + write(cfp, &pos, 4); + write(vfp, &chapoffset, 4); /* Book intro (vss) set to same as chap for now(it should be chap 1 which usually contains the book into anyway)*/ + write(vfp, &chapsize, 2); + } + pos = lseek(vfp, 0, SEEK_CUR); + write(cfp, &pos, 4); + write(vfp, &chapoffset, 4); /* Chapter intro */ + write(vfp, &chapsize, 2); + } + if (key1 >= key2) { + write(vfp, &offset, 4); + write(vfp, &size, 2); + } + else { + pos = 0; + tmp = 0; + write(vfp, &pos, 4); + write(vfp, &tmp, 2); + } + } +} + + +char startchap(char *buf) +{ + char loop; + + if (buf[0] != '<') + return 0; + if (buf[1] != 'S') + return 0; + if (buf[2] != 'C') + return 0; +/* + if (!isdigit(buf[2])) + return 0; + for (loop = 3; loop < 7; loop++) { + if (buf[loop] == ' ') + break; + if ((!isdigit(buf[loop])) && (buf[loop] != ',') && (buf[loop] != '-')) + return 0; + } +*/ + return 1; +} + + +char startentry(char *buf) +{ + char loop; + + if (buf[0] != '<') + return 0; + if (buf[1] != 'S') + return 0; + if (buf[2] != 'V') + return 0; +/* + if (!isdigit(buf[2])) + return 0; + for (loop = 3; loop < 7; loop++) { + if (buf[loop] == ' ') + break; + if ((!isdigit(buf[loop])) && (buf[loop] != ',') && (buf[loop] != '-')) + return 0; + } +*/ + return 1; +} + + +char findbreak(int fp, long *offset, int *num1, int *num2, int *rangemax, short *size) +{ + char buf[7]; + char buf2[20]; + char ch; + char loop; + long offset2; + int ch2, vs2, rm2; + bool flag; + long chapstart = 0; + + memset(buf, ' ', 7); + + while (1) { + if (startchap(buf)) { + chapstart = lseek(fp, 0, SEEK_CUR) - 7; + memset(buf, ' ', 3); + flag = false; + for (loop = 3; loop < 6; loop++) { + if (isdigit(buf[loop])) + flag = true; + else { + buf[loop] = 0; + break; + } + } + if (flag) + *num1 = atoi(buf); + else (*num1)++; + } + if (startentry(buf)) { + memset(buf, ' ', 3); + flag = false; + for (loop = 3; loop < 6; loop++) { + if (isdigit(buf[loop])) + flag = true; + else { + buf[loop] = 0; + break; + } + if (flag) + *num2 = atoi(buf); + else (*num2)++; + } + loop++; + if (size) + *offset = lseek(fp, 0, SEEK_CUR) - (7 - loop); + else *offset = (chapstart) ? chapstart : lseek(fp, 0, SEEK_CUR) - 7; + if (size) { + ch2 = *num1; + vs2 = *num2; + if (findbreak(fp, &offset2, &ch2, &vs2, &rm2, 0)) { + *size = (short) (lseek(fp, 0, SEEK_END) - (*offset)); + } + else { + if (vs2) { + *size = (offset2 - (*offset)); + } + } + lseek(fp, *offset, SEEK_SET); + } + return 0; + } + memmove(buf, &buf[1], 6); + if (read(fp, &buf[6], 1) != 1) + return 1; + } +} + + +void openfiles(char *fname) +{ +#ifndef O_BINARY // O_BINARY is needed in Borland C++ 4.53 +#define O_BINARY 0 // If it hasn't been defined than we probably +#endif // don't need it. + char buf[255]; + + if ((fp = open(fname, O_RDONLY|O_BINARY)) == -1) { + fprintf(stderr, "Couldn't open file: %s\n", fname); + exit(1); + } + + sprintf(buf, "%s.vss", fname); + if ((vfp = open(buf, O_CREAT|O_WRONLY|O_BINARY)) == -1) { + fprintf(stderr, "Couldn't open file: %s\n", buf); + exit(1); + } + + sprintf(buf, "%s.cps", fname); + if ((cfp = open(buf, O_CREAT|O_WRONLY|O_BINARY)) == -1) { + fprintf(stderr, "Couldn't open file: %s\n", buf); + exit(1); + } + + sprintf(buf, "%s.bks", fname); + if ((bfp = open(buf, O_CREAT|O_WRONLY|O_BINARY)) == -1) { + fprintf(stderr, "Couldn't open file: %s\n", buf); + exit(1); + } +} + + +void checkparams(int argc, char **argv) +{ + if (argc < 2) { + fprintf(stderr, "usage: %s [nt - for new testmt file]\n", argv[0]); + exit(1); + } + if (argc == 3) + key1 = key2 = key3 = "Matthew 1:1"; + else key1 = key2 = key3 = "Genesis 1:1"; +} diff --git a/src/modules/texts/rawgbf/rawgbf.cpp b/src/modules/texts/rawgbf/rawgbf.cpp new file mode 100644 index 0000000..0866585 --- /dev/null +++ b/src/modules/texts/rawgbf/rawgbf.cpp @@ -0,0 +1,84 @@ +/****************************************************************************** + * rawgbf.cpp - code for class 'RawGBF'- a module that reads raw text files: + * ot and nt using indexs ??.bks ??.cps ??.vss + */ + + +#include +#include +#include + +#ifndef __GNUC__ +#include +#else +#include +#endif + +#include +#include +#include +#include + + +/****************************************************************************** + * RawGBF Constructor - Initializes data for instance of RawGBF + * + * ENT: iname - Internal name for module + * idesc - Name to display to user for module + * idisp - Display object to use for displaying + */ + +RawGBF::RawGBF(const char *ipath, const char *iname, const char *idesc, SWDisplay *idisp) : SWText(iname, idesc, idisp), RawVerse(ipath) +{ +} + + +/****************************************************************************** + * RawGBF Destructor - Cleans up instance of RawGBF + */ + +RawGBF::~RawGBF() +{ +} + + +/****************************************************************************** + * RawGBF::operator char * - Returns the correct verse when char * cast + * is requested + * + * RET: string buffer with verse + */ + +RawGBF::operator char*() +{ + long start; + unsigned short size; + VerseKey *key = 0; + +#ifndef _WIN32_WCE + try { +#endif + key = SWDYNAMIC_CAST(VerseKey, this->key); +#ifndef _WIN32_WCE + } + catch ( ... ) {} +#endif + if (!key) + key = new VerseKey(this->key); + + + findoffset(key->Testament(), key->Index(), &start, &size); + + if (entrybuf) + delete [] entrybuf; + entrybuf = new char [ size * 3 ]; // extra for conversion to RTF or other. + + gettext(key->Testament(), start, size + 1, entrybuf); + preptext(entrybuf); + RenderText(entrybuf, size * 3); + + if (key != this->key) + delete key; + + return entrybuf; +} diff --git a/src/modules/texts/rawtext/Makefile b/src/modules/texts/rawtext/Makefile new file mode 100644 index 0000000..35d6648 --- /dev/null +++ b/src/modules/texts/rawtext/Makefile @@ -0,0 +1,5 @@ + +root := ../../../.. + +all: + make -C ${root} diff --git a/src/modules/texts/rawtext/Makefile.am b/src/modules/texts/rawtext/Makefile.am new file mode 100644 index 0000000..d0e1d7e --- /dev/null +++ b/src/modules/texts/rawtext/Makefile.am @@ -0,0 +1,4 @@ +rawtextdir = $(top_srcdir)/src/modules/texts/rawtext + +libsword_la_SOURCES += $(rawtextdir)/rawtext.cpp + diff --git a/src/modules/texts/rawtext/kjvidx.cpp b/src/modules/texts/rawtext/kjvidx.cpp new file mode 100644 index 0000000..708a9e6 --- /dev/null +++ b/src/modules/texts/rawtext/kjvidx.cpp @@ -0,0 +1,169 @@ +#include +#include +#include + + +char findbreak(int fp, int *offset, int *num1, int *num2, short *size); + + +main(int argc, char **argv) +{ + int fp, vfp, cfp, bfp; + long pos; + short size, tmp; + int num1, num2, offset, curbook = 0, curchap = 0, curverse = 0; + char buf[127]; + VerseKey mykey; + + if ((argc < 2) || (argc > 3)) { + fprintf(stderr, "usage: %s [nt]\n", argv[0]); + exit(1); + } + + if ((fp = open(argv[1], O_RDONLY)) == -1) { + fprintf(stderr, "Couldn't open file: %s\n", argv[1]); + exit(1); + } + + sprintf(buf, "%s.vss", argv[1]); + if ((vfp = open(buf, O_CREAT|O_WRONLY)) == -1) { + fprintf(stderr, "Couldn't open file: %s\n", buf); + exit(1); + } + + sprintf(buf, "%s.cps", argv[1]); + if ((cfp = open(buf, O_CREAT|O_WRONLY)) == -1) { + fprintf(stderr, "Couldn't open file: %s\n", buf); + exit(1); + } + + sprintf(buf, "%s.bks", argv[1]); + if ((bfp = open(buf, O_CREAT|O_WRONLY)) == -1) { + fprintf(stderr, "Couldn't open file: %s\n", buf); + exit(1); + } + + pos = 0; + write(bfp, &pos, 4); /* Book offset for testament intros */ + pos = 4; + write(cfp, &pos, 4); /* Chapter offset for testament intro */ + + +/* Right now just zero out intros until parsing correctly */ + pos = 0; + size = 0; + write(vfp, &pos, 4); /* Module intro */ + write(vfp, &size, 2); + write(vfp, &pos, 4); /* Testament intro */ + write(vfp, &size, 2); + + mykey = (argc == 3) ? "Matthew 1:1" : "Genesis 1:1"; + + while (!findbreak(fp, &offset, &num1, &num2, &size)) { + num1 = mykey.Chapter(); + num2 = mykey.Verse(); + if (num2 == 1) { /* if we're at a new chapter */ + if (num1 == 1) { /* if we're at a new book */ + pos = lseek(cfp, 0, SEEK_CUR); + write(bfp, &pos, 4); + pos = lseek(vfp, 0, SEEK_CUR); /* Book intro (cps) */ + write(cfp, &pos, 4); + pos = 0; + tmp = 0; + write(vfp, &pos, 4); /* Book intro (vss) */ + write(vfp, &tmp, 2); + curbook++; + curchap = 0; + } + pos = lseek(vfp, 0, SEEK_CUR); + write(cfp, &pos, 4); + curverse = 1; + pos = 0; + tmp = 0; + write(vfp, &pos, 4); /* Chapter intro */ + write(vfp, &tmp, 2); + curchap++; + } + else curverse++; + + printf("%2d:%3d:%3d found at offset: %7d\n", curbook, num1, num2, offset); + + if (num1 != curchap) { + fprintf(stderr, "Error: Found chaptures out of sequence\n"); + break; + } + if (num2 != curverse) { + fprintf(stderr, "Error: Found verses out of sequence\n"); + break; + } + write(vfp, &offset, 4); + write(vfp, &size, 2); + mykey++; + } + + close(vfp); + close(cfp); + close(bfp); + close(fp); +} + + +char findbreak(int fp, int *offset, int *num1, int *num2, short *size) +{ + char buf[17]; + char buf2[7]; + char loop; + char offadj, inquotes, sizeadj; + int offset2, ch2, vs2; + + memset(buf, ' ', 17); + + while (1) { + offadj = -10; + inquotes = 0; + sizeadj = 0; + if ((!memcmp(buf, "\\widctlpar {\\b\\f0\\cf2 ", 16)) && (!size)) { + offadj = -1; +// inquotes = 1; + sizeadj = -18; + } + if (!memcmp(&buf[1], "\\f0\\fs16\\cf2\\up6", 15)) { + offadj = 0; + inquotes = 1; + sizeadj = (*buf == 10) ? -18:-17; + } + if (!memcmp(buf, "\\fi200\\widctlpar", 16)) { + offadj = -1; +// inquotes = 1; + sizeadj = -18; + } + if (offadj > -10) { + *offset = lseek(fp, 0, SEEK_CUR) + offadj; + if (size) { + (*offset)++; + while (inquotes) { + while (read(fp, buf2, 1) == 1) { + if (*buf2 == '}') + break; + (*offset)++; + } + inquotes--; + } + if (findbreak(fp, &offset2, &ch2, &vs2, 0)) { + *size = (short) (lseek(fp, 0, SEEK_END) - (*offset)); + } + else { + sprintf(buf2, "%d:%d", ch2, vs2); + *size = (offset2 - (*offset)); + } + lseek(fp, *offset+17, SEEK_SET); + } + else (*offset) += sizeadj; + return 0; + } + memmove(buf, &buf[1], 16); + if (read(fp, &buf[16], 1) != 1) + return 1; + } +} + diff --git a/src/modules/texts/rawtext/makebnds.c b/src/modules/texts/rawtext/makebnds.c new file mode 100644 index 0000000..44da447 --- /dev/null +++ b/src/modules/texts/rawtext/makebnds.c @@ -0,0 +1,86 @@ +#include +#include + + +char *bnames[] = { + "Genesis", "Exodus", "Leviticus", "Numbers", "Deuteronomy", + "Joshua", "Judges", "Ruth", "I Samual", "II Samuel", + "I Kings", "II Kings", "I Chronicles", "II Chronicles", "Ezra", + "Nehemiah", "Esther", "Job", "Psalms", "Proverbs", + "Ecclesiastes", "Song of Solomon", "Isaiah", "Jeremiah", "Lamentations", + "Ezekiel", "Daniel", "Hosea", "Joel", "Amos", + "Obadiah", "Jonah", "Micah", "Nahum", "Habakkuk", + "Zephaniah", "Haggai", "Zechariah", "Malachi", + "Matthew", "Mark", "Luke", "John", "Acts", + "Romans", "I Corinthians", "II Corinthians", "Galatians", "Ephesians", + "Philippians", "Colossians", "I Thessalonians", "II Thessalonians", "I Timothy", + "II Timothy", "Titus", "Philemon", "Hebrews", "James", + "I Peter", "II Peter", "I John", "II John", "III John", + "Jude", "Revelation of John"}; + + + +main(int argc, char **argv) +{ + int fp, vfp, cfp, bfp; + long pos; + int num1, num2, offset, offset2, chapmax, chapoff, chapoff2, curbook = 0, curchap = 0, curverse = 0; + char buf[127]; + + if (argc > 3) { + fprintf(stderr, "usage: %s [NT?]\n", argv[0]); + exit(1); + } + + if (argc > 2) + curbook = 39; + sprintf(buf, "%s.vss", argv[1]); + if ((vfp = open(buf, O_RDONLY)) == -1) { + fprintf(stderr, "Couldn't open file: %s\n", buf); + exit(1); + } + + sprintf(buf, "%s.cps", argv[1]); + if ((cfp = open(buf, O_RDONLY)) == -1) { + fprintf(stderr, "Couldn't open file: %s\n", buf); + exit(1); + } + + sprintf(buf, "%s.bks", argv[1]); + if ((bfp = open(buf, O_RDONLY)) == -1) { + fprintf(stderr, "Couldn't open file: %s\n", buf); + exit(1); + } + + read(bfp, &offset2, sizeof(offset2)); + read(cfp, &chapoff2, sizeof(chapoff2)); + while (read(bfp, &offset, sizeof(offset)) == sizeof(offset)) { + chapmax = (offset - offset2) / sizeof(offset); + printf("\n\{\"%s\", %d}, \n// %s\n", bnames[curbook], chapmax, bnames[curbook]); + curbook++; + for (curchap = 0; curchap < chapmax; curchap++) { + read(cfp, &chapoff, sizeof(chapoff)); + printf("%d, ", (chapoff - chapoff2) / sizeof(chapoff)); + chapoff2 = chapoff; + } + offset2 = offset; + } + pos = lseek(cfp, 0, SEEK_CUR); + offset = (int) lseek(cfp, 0, SEEK_END); + chapmax = (offset - offset2) / sizeof(offset); + printf("\n\{\"%s\", %d}, \n// %s\n", bnames[curbook], chapmax, bnames[curbook]); + curbook++; + lseek(cfp, pos, SEEK_SET); + for (curchap = 0; curchap < chapmax - 1; curchap++) { + read(cfp, &chapoff, sizeof(chapoff)); + printf("%d, ", (chapoff - chapoff2) / sizeof(chapoff)); + chapoff2 = chapoff; + } + chapoff = (int) lseek(vfp, 0, SEEK_END); + printf("%d, ", (chapoff - chapoff2) / sizeof(chapoff)); + + close(vfp); + close(cfp); + close(bfp); + close(fp); +} diff --git a/src/modules/texts/rawtext/nuidx.cpp b/src/modules/texts/rawtext/nuidx.cpp new file mode 100644 index 0000000..edf298d --- /dev/null +++ b/src/modules/texts/rawtext/nuidx.cpp @@ -0,0 +1,238 @@ +/***************************************************************************** + * + * This code wreaks but works (at least for MHC). Good luck! + */ + +#include +#include +#include +#include + +#ifndef __GNUC__ +#include +#else +#include +#endif + +#include +#include + + +void writeidx(VerseKey &key1, VerseKey &key2, VerseKey &key3, long offset, short size); +char findbreak(int fp, long *offset, int *num1, int *num2, int *rangemax, short *size); +void openfiles(char *fname); +void checkparams(int argc, char **argv); + + +VerseKey key1, key2, key3; +int fp, vfp, cfp, bfp; +long chapoffset; +short chapsize; +char testmnt; + + +main(int argc, char **argv) +{ + long pos, offset; + int num1, num2, rangemax, curbook = 0, curchap = 0, curverse = 0; + char buf[127], startflag = 0; + short size, tmp; + + checkparams(argc, argv); + + openfiles(argv[1]); + + testmnt = key1.Testament(); + num1 = key1.Chapter(); + num2 = key1.Verse(); + pos = 0; + write(bfp, &pos, 4); /* Book offset for testament intros */ + pos = 4; + write(cfp, &pos, 4); /* Chapter offset for testament intro */ + + +/* Right now just zero out intros until parsing correctly */ + pos = 0; + size = 0; + write(vfp, &pos, 4); /* Module intro */ + write(vfp, &size, 2); + write(vfp, &pos, 4); /* Testament intro */ + write(vfp, &size, 2); + + while(!findbreak(fp, &offset, &num1, &num2, &rangemax, &size)) { + writeidx(key1, key2, key3, offset, size); + key2++; + key3 = key2; + } + close(vfp); + close(cfp); + close(bfp); + close(fp); +} + + +/************************************************************************** + * ENT: key1 - current location of index + * key2 - minimum keyval for which this offset is valid + * key3 - maximum keyval for which this offset is valid + */ + +void writeidx(VerseKey &key1, VerseKey &key2, VerseKey &key3, long offset, short size) +{ + long pos; + short tmp; + + for (; ((key1 <= key3) && (key1.Error() != KEYERR_OUTOFBOUNDS) && (key1.Testament() == testmnt)); key1+=1) { + if (key1.Verse() == 1) { // new chapter + if (key1.Chapter() == 1) { // new book + pos = lseek(cfp, 0, SEEK_CUR); + write(bfp, &pos, 4); + pos = lseek(vfp, 0, SEEK_CUR); /* Book intro (cps) */ + write(cfp, &pos, 4); + write(vfp, &chapoffset, 4); /* Book intro (vss) set to same as chap for now(it should be chap 1 which usually contains the book into anyway)*/ + write(vfp, &chapsize, 2); + } + pos = lseek(vfp, 0, SEEK_CUR); + write(cfp, &pos, 4); + write(vfp, &chapoffset, 4); /* Chapter intro */ + write(vfp, &chapsize, 2); + } + if (key1 >= key2) { + write(vfp, &offset, 4); + write(vfp, &size, 2); + } + else { + pos = 0; + tmp = 0; + write(vfp, &pos, 4); + write(vfp, &tmp, 2); + } + } +} + + +char startchap(char *buf) +{ + char loop; + + if (buf[0] != '<') + return 0; + if (buf[1] != 'S') + return 0; + if (buf[2] != 'C') + return 0; +/* + if (!isdigit(buf[2])) + return 0; + for (loop = 3; loop < 7; loop++) { + if (buf[loop] == ' ') + break; + if ((!isdigit(buf[loop])) && (buf[loop] != ',') && (buf[loop] != '-')) + return 0; + } +*/ + return 1; +} + + +char startentry(char *buf) +{ + char loop; + + if (buf[0] != '<') + return 0; + if (buf[1] != 'S') + return 0; + if (buf[2] != 'V') + return 0; +/* + if (!isdigit(buf[2])) + return 0; + for (loop = 3; loop < 7; loop++) { + if (buf[loop] == ' ') + break; + if ((!isdigit(buf[loop])) && (buf[loop] != ',') && (buf[loop] != '-')) + return 0; + } +*/ + return 1; +} + + +char findbreak(int fp, long *offset, int *num1, int *num2, int *rangemax, short *size) +{ + char buf[7]; + char buf2[20]; + char ch; + char loop; + long offset2; + int ch2, vs2, rm2; + bool flag; + long chapstart = 0; + + memset(buf, ' ', 7); + + while (1) { + if (startentry(buf)) { + if (size) + *offset = lseek(fp, 0, SEEK_CUR) - 3; + else *offset = lseek(fp, 0, SEEK_CUR) - 7; + if (size) { + ch2 = *num1; + vs2 = *num2; + if (findbreak(fp, &offset2, &ch2, &vs2, &rm2, 0)) { + *size = (short) (lseek(fp, 0, SEEK_END) - (*offset)); + } + else { + *size = (offset2 - (*offset)); + } + lseek(fp, *offset, SEEK_SET); + } + return 0; + } + memmove(buf, &buf[1], 6); + if (read(fp, &buf[6], 1) != 1) + return 1; + } +} + + +void openfiles(char *fname) +{ + char buf[255]; + + if ((fp = open(fname, O_RDONLY)) == -1) { + fprintf(stderr, "Couldn't open file: %s\n", fname); + exit(1); + } + + sprintf(buf, "%s.vss", fname); + if ((vfp = open(buf, O_CREAT|O_WRONLY)) == -1) { + fprintf(stderr, "Couldn't open file: %s\n", buf); + exit(1); + } + + sprintf(buf, "%s.cps", fname); + if ((cfp = open(buf, O_CREAT|O_WRONLY)) == -1) { + fprintf(stderr, "Couldn't open file: %s\n", buf); + exit(1); + } + + sprintf(buf, "%s.bks", fname); + if ((bfp = open(buf, O_CREAT|O_WRONLY)) == -1) { + fprintf(stderr, "Couldn't open file: %s\n", buf); + exit(1); + } +} + + +void checkparams(int argc, char **argv) +{ + if (argc < 2) { + fprintf(stderr, "usage: %s [nt - for new testmt file]\n", argv[0]); + exit(1); + } + if (argc == 3) + key1 = key2 = key3 = "Matthew 1:1"; + else key1 = key2 = key3 = "Genesis 1:1"; +} diff --git a/src/modules/texts/rawtext/ojbtxidx.c b/src/modules/texts/rawtext/ojbtxidx.c new file mode 100644 index 0000000..f70cc01 --- /dev/null +++ b/src/modules/texts/rawtext/ojbtxidx.c @@ -0,0 +1,166 @@ +#include +#include +#ifndef __GNUC__ +#include +#else +#include +#endif + + +char findbreak(int fd, long *offset, int *num1, int *num2, short *size); + + +main(int argc, char **argv) +{ + int fd, vfd, cfd, bfd; + long pos, offset; + short size, tmp; + int num1, num2, curbook = 0, curchap = 0, curverse = 0; + char buf[127]; + + if (argc != 2) { + fprintf(stderr, "usage: %s \n", argv[0]); + exit(1); + } +#ifndef O_BINARY // O_BINARY is for Borland to be happy. If we're in GNU, just define it to a NULL mask +#define O_BINARY 0 +#endif + if ((fd = open(argv[1], O_RDONLY|O_BINARY)) == -1) { + fprintf(stderr, "Couldn't open file: %s\n", argv[1]); + exit(1); + } + + sprintf(buf, "%s.vss", argv[1]); + if ((vfd = open(buf, O_CREAT|O_WRONLY|O_BINARY)) == -1) { + fprintf(stderr, "Couldn't open file: %s\n", buf); + exit(1); + } + + sprintf(buf, "%s.cps", argv[1]); + if ((cfd = open(buf, O_CREAT|O_WRONLY|O_BINARY)) == -1) { + fprintf(stderr, "Couldn't open file: %s\n", buf); + exit(1); + } + + sprintf(buf, "%s.bks", argv[1]); + if ((bfd = open(buf, O_CREAT|O_WRONLY|O_BINARY)) == -1) { + fprintf(stderr, "Couldn't open file: %s\n", buf); + exit(1); + } + + pos = 0; + write(bfd, &pos, 4); /* Book offset for testament intros */ + pos = 4; + write(cfd, &pos, 4); /* Chapter offset for testament intro */ + + +/* Right now just zero out intros until parsing correctly */ + pos = 0; + size = 0; + write(vfd, &pos, 4); /* Module intro */ + write(vfd, &size, 2); + write(vfd, &pos, 4); /* Testament intro */ + write(vfd, &size, 2); + + while (!findbreak(fd, &offset, &num1, &num2, &size)) { + + if (num2 == 1) { /* if we're at a new chapter */ + if (num1 == 1) { /* if we're at a new book */ + pos = lseek(cfd, 0, SEEK_CUR); + write(bfd, &pos, 4); + pos = lseek(vfd, 0, SEEK_CUR); /* Book intro (cps) */ + write(cfd, &pos, 4); + pos = 0; + tmp = 0; + write(vfd, &pos, 4); /* Book intro (vss) */ + write(vfd, &tmp, 2); + curbook++; + curchap = 0; + } + pos = lseek(vfd, 0, SEEK_CUR); + write(cfd, &pos, 4); + curverse = 1; + pos = 0; + tmp = 0; + write(vfd, &pos, 4); /* Chapter intro */ + write(vfd, &tmp, 2); + curchap++; + } + else curverse++; + + printf("%2d:%3d:%3d found at offset: %7ld\n", curbook, num1, num2, offset); + + if (num1 != curchap) { + fprintf(stderr, "Error: Found chaptures out of sequence (%2d:%3d:%3d)\n", curbook, num1-1, num2); + curchap = num1; +// break; + } + if (num2 != curverse) { + fprintf(stderr, "Error: Found verses out of sequence (%2d:%3d:%3d)\n", curbook, num1, num2-1); +// break; + tmp = 0; + curverse = num2; + write(vfd, &offset, 4); + write(vfd, &tmp, 2); + } + write(vfd, &offset, 4); + write(vfd, &size, 2); + } + + close(vfd); + close(cfd); + close(bfd); + close(fd); + return 0; +} + + +char findbreak(int fd, long *offset, int *num1, int *num2, short *size) +{ + char buf[8]; + char buf2[7]; + char loop, len, star; + + memset(buf, ' ', 7); + buf[7] = 0; + + while (1) { + + memmove(buf, &buf[1], 6); + if (read(fd, &buf[6], 1) != 1) + return 1; + + if ((buf[0] == 10) && ((buf[2] == '*') || (buf[3] == '*') || (buf[4] == '*'))) { + star = 0; + for (loop = 0; loop < 7; loop++) { + if (buf[loop] == '*') + star = 1; + if (isdigit(buf[loop])&&star) + break; + else buf[loop] = ' '; + } + if (loop < 7) { + sscanf(buf, "%d", num1); + continue; + } + } + + if ((buf[0] == '|') && (isdigit(buf[1]))) { + sscanf(&buf[1], "%d", num2); + sprintf(buf, "%d", *num2); + (*offset) = lseek(fd, 0, SEEK_CUR); + (*offset) -= (4-strlen(buf)); + + for (len = 1; len == 1; len = read(fd, &loop, 1)) { + if (loop == '|') + break; + } + + *size = (short)(lseek(fd, 0, SEEK_CUR) - *offset) - 1; + lseek(fd, -1, SEEK_CUR); + break; + } + } + return 0; +} + diff --git a/src/modules/texts/rawtext/rawtext.cpp b/src/modules/texts/rawtext/rawtext.cpp new file mode 100644 index 0000000..acc1cfd --- /dev/null +++ b/src/modules/texts/rawtext/rawtext.cpp @@ -0,0 +1,630 @@ +/****************************************************************************** + * rawtext.cpp - code for class 'RawText'- a module that reads raw text files: + * ot and nt using indexs ??.bks ??.cps ??.vss + */ + + +#include +#include + +#ifndef __GNUC__ +#include +#else +#include +#endif + +#include +#include +#include +#include + +#include +#include +#include +#include // GNU + +#ifndef O_BINARY +#define O_BINARY 0 +#endif + +/****************************************************************************** + * RawText Constructor - Initializes data for instance of RawText + * + * ENT: iname - Internal name for module + * idesc - Name to display to user for module + * idisp - Display object to use for displaying + */ + +RawText::RawText(const char *ipath, const char *iname, const char *idesc, SWDisplay *idisp, SWTextEncoding enc, SWTextDirection dir, SWTextMarkup mark, const char* ilang) + : SWText(iname, idesc, idisp, enc, dir, mark, ilang), + RawVerse(ipath) { + + string fname; + fname = path; + char ch = fname.c_str()[strlen(fname.c_str())-1]; + if ((ch != '/') && (ch != '\\')) + fname += "/"; + + for (int loop = 0; loop < 2; loop++) { + fastSearch[loop] = 0; + string fastidxname =(fname + ((loop)?"ntwords.dat":"otwords.dat")); + if (!access(fastidxname.c_str(), 04)) { + fastidxname = (fname + ((loop)?"ntwords.idx":"otwords.idx")); + if (!access(fastidxname.c_str(), 04)) + fastSearch[loop] = new RawStr((fname + ((loop)?"ntwords":"otwords")).c_str()); + } + } +} + + +/****************************************************************************** + * RawText Destructor - Cleans up instance of RawText + */ + +RawText::~RawText() +{ + if (fastSearch[0]) + delete fastSearch[0]; + + if (fastSearch[1]) + delete fastSearch[1]; +} + + +/****************************************************************************** + * RawText::operator char * - Returns the correct verse when char * cast + * is requested + * + * RET: string buffer with verse + */ + +char *RawText::getRawEntry() { + long start = 0; + unsigned short size = 0; + VerseKey *key = 0; + + // see if we have a VerseKey * or decendant +#ifndef _WIN32_WCE + try { +#endif + key = SWDYNAMIC_CAST(VerseKey, this->key); +#ifndef _WIN32_WCE + } + catch ( ... ) { } +#endif + // if we don't have a VerseKey * decendant, create our own + if (!key) + key = new VerseKey(this->key); + + findoffset(key->Testament(), key->Index(), &start, &size); + entrySize = size; // support getEntrySize call + + unsigned long newsize = (size + 2) * FILTERPAD; + if (newsize > entrybufallocsize) { + if (entrybuf) + delete [] entrybuf; + entrybuf = new char [ newsize ]; + entrybufallocsize = newsize; + } + *entrybuf = 0; + + gettext(key->Testament(), start, (size + 2), entrybuf); + + rawFilter(entrybuf, size, key); + + if (!isUnicode()) + preptext(entrybuf); + + if (this->key != key) // free our key if we created a VerseKey + delete key; + + return entrybuf; +} + + +signed char RawText::createSearchFramework() { + SWKey *savekey = 0; + SWKey *searchkey = 0; + SWKey textkey; + char *word = 0; + char *wordBuf = 0; + + // dictionary holds words associated with a list + // containing every module position that contains + // the word. [0] Old Testament; [1] NT + map < string, list > dictionary[2]; + + + // save key information so as not to disrupt original + // module position + if (!key->Persist()) { + savekey = CreateKey(); + *savekey = *key; + } + else savekey = key; + + searchkey = (key->Persist())?key->clone():0; + if (searchkey) { + searchkey->Persist(1); + SetKey(*searchkey); + } + + // position module at the beginning + *this = TOP; + + VerseKey *lkey = (VerseKey *)key; + + // iterate thru each entry in module + while (!Error()) { + long index = lkey->Index(); + wordBuf = (char *)calloc(sizeof(char), strlen(StripText()) + 1); + strcpy(wordBuf, StripText()); + + // grab each word from the text + word = strtok(wordBuf, " !.,?;:()-=+/\\|{}[]\"<>"); + while (word) { + + // make work upper case + for (unsigned int i = 0; i < strlen(word); i++) + word[i] = SW_toupper(word[i]); + + // lookup word in dictionary (or make entry in dictionary + // for this word) and add this module position (index) to + // the word's associated list of module positions + dictionary[lkey->Testament()-1][word].push_back(index); + word = strtok(NULL, " !.,?;:()-=+/\\|{}[]\"<>"); + } + free(wordBuf); + (*this)++; + } + + // reposition module back to where it was before we were called + SetKey(*savekey); + + if (!savekey->Persist()) + delete savekey; + + if (searchkey) + delete searchkey; + + + // --------- Let's output an index from our dictionary ----------- + int datfd; + int idxfd; + map < string, list >::iterator it; + list::iterator it2; + unsigned long offset, entryoff; + unsigned short size; + + string fname; + fname = path; + char ch = fname.c_str()[strlen(fname.c_str())-1]; + if ((ch != '/') && (ch != '\\')) + fname += "/"; + + // for old and new testament do... + for (int loop = 0; loop < 2; loop++) { + if ((datfd = open((fname + ((loop)?"ntwords.dat":"otwords.dat")).c_str(), O_CREAT|O_WRONLY|O_BINARY, 00644 )) == -1) + return -1; + if ((idxfd = open((fname + ((loop)?"ntwords.idx":"otwords.idx")).c_str(), O_CREAT|O_WRONLY|O_BINARY, 00644 )) == -1) { + close(datfd); + return -1; + } + + // iterate thru each word in the dictionary + for (it = dictionary[loop].begin(); it != dictionary[loop].end(); it++) { + printf("%s: ", it->first.c_str()); + + // get our current offset in our word.dat file and write this as the start + // of the next entry in our database + offset = lseek(datfd, 0, SEEK_CUR); + write(idxfd, &offset, 4); + + // write our word out to the word.dat file, delineating with a \n + write(datfd, it->first.c_str(), strlen(it->first.c_str())); + write(datfd, "\n", 1); + + // force our mod position list for this word to be unique (remove + // duplicates that may exist if the word was found more than once + // in the verse + it->second.unique(); + + // iterate thru each mod position for this word and output it to + // our database + unsigned short count = 0; + for (it2 = it->second.begin(); it2 != it->second.end(); it2++) { + entryoff= *it2; + write(datfd, &entryoff, 4); + count++; + } + + // now see what our new position is in our word.dat file and + // determine the size of this database entry + size = lseek(datfd, 0, SEEK_CUR) - offset; + + // store the size of this database entry + write(idxfd, &size, 2); + printf("%d entries (size: %d)\n", count, size); + } + close(datfd); + close(idxfd); + } + return 0; +} + + +/****************************************************************************** + * SWModule::Search - Searches a module for a string + * + * ENT: istr - string for which to search + * searchType - type of search to perform + * >=0 - regex + * -1 - phrase + * -2 - multiword + * flags - options flags for search + * justCheckIfSupported - if set, don't search, only tell if this + * function supports requested search. + * + * RET: listkey set to verses that contain istr + */ + +ListKey &RawText::Search(const char *istr, int searchType, int flags, SWKey *scope, bool *justCheckIfSupported, void (*percent)(char, void *), void *percentUserData) +{ + listkey.ClearList(); + + if ((fastSearch[0]) && (fastSearch[1])) { + + switch (searchType) { + case -2: { + + if ((flags & REG_ICASE) != REG_ICASE) // if haven't chosen to + // ignore case + break; // can't handle fast case sensitive searches + + // test to see if our scope for this search is bounded by a + // VerseKey + VerseKey *testKeyType = 0; +#ifndef _WIN32_WCE + try { +#endif + testKeyType = SWDYNAMIC_CAST(VerseKey, ((scope)?scope:key)); +#ifndef _WIN32_WCE + } + catch ( ... ) {} +#endif + // if we don't have a VerseKey * decendant we can't handle + // because of scope. + // In the future, add bool SWKey::isValid(const char *tryString); + if (!testKeyType) + break; + + + // check if we just want to see if search is supported. + // If we've gotten this far, then it is supported. + if (justCheckIfSupported) { + *justCheckIfSupported = true; + return listkey; + } + + SWKey saveKey = *testKeyType; // save current place + + char error = 0; + char **words = 0; + char *wordBuf = 0; + int wordCount = 0; + long start; + unsigned short size; + char *idxbuf = 0; + char *datbuf = 0; + list indexes; + list indexes2; + VerseKey vk; + vk = TOP; + + (*percent)(10, percentUserData); + + // toupper our copy of search string + stdstr(&wordBuf, istr); + for (unsigned int i = 0; i < strlen(wordBuf); i++) + wordBuf[i] = SW_toupper(wordBuf[i]); + + // get list of individual words + words = (char **)calloc(sizeof(char *), 10); + int allocWords = 10; + words[wordCount] = strtok(wordBuf, " "); + while (words[wordCount]) { + wordCount++; + if (wordCount == allocWords) { + allocWords+=10; + words = (char **)realloc(words, sizeof(char *)*allocWords); + } + words[wordCount] = strtok(NULL, " "); + } + + (*percent)(20, percentUserData); + + // clear our result set + indexes.erase(indexes.begin(), indexes.end()); + + // search both old and new testament indexes + for (int j = 0; j < 2; j++) { + // iterate thru each word the user passed to us. + for (int i = 0; i < wordCount; i++) { + + // clear this word's result set + indexes2.erase(indexes2.begin(), indexes2.end()); + error = 0; + + // iterate thru every word in the database that starts + // with our search word + for (int away = 0; !error; away++) { + idxbuf = 0; + + // find our word in the database and jump ahead _away_ + error = fastSearch[j]->findoffset(words[i], &start, &size, away); + + // get the word from the database + fastSearch[j]->getidxbufdat(start, &idxbuf); + + // check to see if it starts with our target word + if (strlen(idxbuf) > strlen(words[i])) + idxbuf[strlen(words[i])] = 0; +// else words[i][strlen(idxbuf)] = 0; + if (!strcmp(idxbuf, words[i])) { + + // get data for this word from database + free(idxbuf); + idxbuf = (char *)calloc(size+2, 1); + datbuf = (char *)calloc(size+2, 1); + fastSearch[j]->gettext(start, size + 2, idxbuf, datbuf); + + // we know that the data consists of sizof(long) + // records each a valid module position that constains + // this word + // + // iterate thru each of these module positions + long *keyindex = (long *)datbuf; + while (keyindex < (long *)(datbuf + size - (strlen(idxbuf) + 1))) { + if (i) { // if we're not on our first word + + // check to see if this word is already in the result set. + // This is our AND functionality + if (find(indexes.begin(), indexes.end(), *keyindex) != indexes.end()) + // add to new result set + indexes2.push_back(*keyindex); + } + else indexes2.push_back(*keyindex); + keyindex++; + } + free(datbuf); + } + else error = 1; // no more matches + free(idxbuf); + } + + // make new result set final result set + indexes = indexes2; + + percent((char)(20 + (float)((j*wordCount)+i)/(wordCount * 2) * 78), percentUserData); + } + + // indexes contains our good verses, lets return them in a listkey + indexes.sort(); + + // iterate thru each good module position that meets the search + for (list ::iterator it = indexes.begin(); it != indexes.end(); it++) { + + // set a temporary verse key to this module position + vk.Testament(j+1); + vk.Error(); + vk.Index(*it); + + // check scope + // Try to set our scope key to this verse key + if (scope) { + *testKeyType = vk; + + // check to see if it set ok and if so, add to our return list + if (*testKeyType == vk) + listkey << (const char *) vk; + } + else listkey << (const char*) vk; + } + } + (*percent)(98, percentUserData); + + free(words); + free(wordBuf); + + *testKeyType = saveKey; // set current place back to original + + listkey = TOP; + (*percent)(100, percentUserData); + return listkey; + } + + default: + break; + } + } + + // check if we just want to see if search is supported + if (justCheckIfSupported) { + *justCheckIfSupported = false; + return listkey; + } + + // if we don't support this search, fall back to base class + return SWModule::Search(istr, searchType, flags, scope, justCheckIfSupported, percent, percentUserData); +} + +#ifdef _MSC_VER +SWModule &RawText::operator =(SW_POSITION p) { +#else +RawText &RawText::operator =(SW_POSITION p) { +#endif + SWModule::operator =(p); + return *this; +} + +SWModule &RawText::setentry(const char *inbuf, long len) { + VerseKey *key = 0; + // see if we have a VerseKey * or decendant +#ifndef _WIN32_WCE + try { +#endif + key = SWDYNAMIC_CAST(VerseKey, this->key); +#ifndef _WIN32_WCE + } + catch ( ... ) {} +#endif + // if we don't have a VerseKey * decendant, create our own + if (!key) + key = new VerseKey(this->key); + + settext(key->Testament(), key->Index(), inbuf, len); + + if (this->key != key) // free our key if we created a VerseKey + delete key; + + return *this; +} + +SWModule &RawText::operator <<(const char *inbuf) { + return setentry(inbuf, 0); +} + + +SWModule &RawText::operator <<(const SWKey *inkey) { + VerseKey *destkey = 0; + const VerseKey *srckey = 0; + // see if we have a VerseKey * or decendant +#ifndef _WIN32_WCE + try { +#endif + destkey = SWDYNAMIC_CAST(VerseKey, this->key); +#ifndef _WIN32_WCE + } + catch ( ... ) {} +#endif + // if we don't have a VerseKey * decendant, create our own + if (!destkey) + destkey = new VerseKey(this->key); + + // see if we have a VerseKey * or decendant +#ifndef _WIN32_WCE + try { +#endif + srckey = SWDYNAMIC_CAST(VerseKey, inkey); +#ifndef _WIN32_WCE + } + catch ( ... ) {} +#endif + // if we don't have a VerseKey * decendant, create our own + if (!srckey) + srckey = new VerseKey(inkey); + + linkentry(destkey->Testament(), destkey->Index(), srckey->Index()); + + if (this->key != destkey) // free our key if we created a VerseKey + delete destkey; + + if (inkey != srckey) // free our key if we created a VerseKey + delete srckey; + + return *this; +} + + +/****************************************************************************** + * RawText::deleteEntry - deletes this entry + * + * RET: *this + */ + +void RawText::deleteEntry() { + + VerseKey *key = 0; + +#ifndef _WIN32_WCE + try { +#endif + key = SWDYNAMIC_CAST(VerseKey, this->key); +#ifndef _WIN32_WCE + } + catch ( ... ) {} +#endif + if (!key) + key = new VerseKey(this->key); + + settext(key->Testament(), key->Index(), ""); + + if (key != this->key) + delete key; +} + +/****************************************************************************** + * RawText::operator += - Increments module key a number of entries + * + * ENT: increment - Number of entries to jump forward + * + * RET: *this + */ + +SWModule &RawText::operator +=(int increment) +{ + long start; + unsigned short size; + VerseKey *tmpkey = 0; + +#ifndef _WIN32_WCE + try { +#endif + tmpkey = SWDYNAMIC_CAST(VerseKey, key); +#ifndef _WIN32_WCE + } + catch ( ... ) {} +#endif + if (!tmpkey) + tmpkey = new VerseKey(key); + + findoffset(tmpkey->Testament(), tmpkey->Index(), &start, &size); + + SWKey lastgood = *tmpkey; + while (increment) { + long laststart = start; + unsigned short lastsize = size; + SWKey lasttry = *tmpkey; + (increment > 0) ? (*key)++ : (*key)--; + if (tmpkey != key) + delete tmpkey; + tmpkey = 0; +#ifndef _WIN32_WCE + try { +#endif + tmpkey = SWDYNAMIC_CAST(VerseKey, key); +#ifndef _WIN32_WCE + } + catch ( ... ) {} +#endif + if (!tmpkey) + tmpkey = new VerseKey(key); + + if ((error = key->Error())) { + *key = lastgood; + break; + } + long index = tmpkey->Index(); + findoffset(tmpkey->Testament(), index, &start, &size); + if ((((laststart != start) || (lastsize != size))||(!skipConsecutiveLinks)) && (start >= 0) && (size)) { + increment += (increment < 0) ? 1 : -1; + lastgood = *tmpkey; + } + } + error = (error) ? KEYERR_OUTOFBOUNDS : 0; + + if (tmpkey != key) + delete tmpkey; + + return *this; +} diff --git a/src/modules/texts/rawtext/rawtxidx.c b/src/modules/texts/rawtext/rawtxidx.c new file mode 100644 index 0000000..311103e --- /dev/null +++ b/src/modules/texts/rawtext/rawtxidx.c @@ -0,0 +1,146 @@ +#include +#include + + +char findbreak(int fp, int *offset, int *num1, int *num2, short *size); + + +main(int argc, char **argv) +{ + int fp, vfp, cfp, bfp; + long pos; + short size, tmp; + int num1, num2, offset, curbook = 0, curchap = 0, curverse = 0; + char buf[127]; + + if (argc != 2) { + fprintf(stderr, "usage: %s \n", argv[0]); + exit(1); + } + + if ((fp = open(argv[1], O_RDONLY)) == -1) { + fprintf(stderr, "Couldn't open file: %s\n", argv[1]); + exit(1); + } + + sprintf(buf, "%s.vss", argv[1]); + if ((vfp = open(buf, O_CREAT|O_WRONLY)) == -1) { + fprintf(stderr, "Couldn't open file: %s\n", buf); + exit(1); + } + + sprintf(buf, "%s.cps", argv[1]); + if ((cfp = open(buf, O_CREAT|O_WRONLY)) == -1) { + fprintf(stderr, "Couldn't open file: %s\n", buf); + exit(1); + } + + sprintf(buf, "%s.bks", argv[1]); + if ((bfp = open(buf, O_CREAT|O_WRONLY)) == -1) { + fprintf(stderr, "Couldn't open file: %s\n", buf); + exit(1); + } + + pos = 0; + write(bfp, &pos, 4); /* Book offset for testament intros */ + pos = 4; + write(cfp, &pos, 4); /* Chapter offset for testament intro */ + + +/* Right now just zero out intros until parsing correctly */ + pos = 0; + size = 0; + write(vfp, &pos, 4); /* Module intro */ + write(vfp, &size, 2); + write(vfp, &pos, 4); /* Testament intro */ + write(vfp, &size, 2); + + while (!findbreak(fp, &offset, &num1, &num2, &size)) { + + if (num2 == 1) { /* if we're at a new chapter */ + if (num1 == 1) { /* if we're at a new book */ + pos = lseek(cfp, 0, SEEK_CUR); + write(bfp, &pos, 4); + pos = lseek(vfp, 0, SEEK_CUR); /* Book intro (cps) */ + write(cfp, &pos, 4); + pos = 0; + tmp = 0; + write(vfp, &pos, 4); /* Book intro (vss) */ + write(vfp, &tmp, 2); + curbook++; + curchap = 0; + } + pos = lseek(vfp, 0, SEEK_CUR); + write(cfp, &pos, 4); + curverse = 1; + pos = 0; + tmp = 0; + write(vfp, &pos, 4); /* Chapter intro */ + write(vfp, &tmp, 2); + curchap++; + } + else curverse++; + + printf("%2d:%3d:%3d found at offset: %7d\n", curbook, num1, num2, offset); + + if (num1 != curchap) { + fprintf(stderr, "Error: Found chaptures out of sequence\n", buf); + break; + } + if (num2 != curverse) { + fprintf(stderr, "Error: Found verses out of sequence\n", buf); + break; + } + write(vfp, &offset, 4); + write(vfp, &size, 2); + } + + close(vfp); + close(cfp); + close(bfp); + close(fp); +} + + +char findbreak(int fp, int *offset, int *num1, int *num2, short *size) +{ + char buf[7]; + char buf2[7]; + char loop; + int offset2, ch2, vs2; + + memset(buf, ' ', 7); + + while (1) { + if (buf[3] == ':') { + memcpy(buf2, buf, 7); + for (loop = 0; loop < 7; loop++) { + if (!isdigit(buf2[loop])) + buf2[loop] = ' '; + } + buf2[3] = 0; + *num1 = atoi(buf2); + *num2 = atoi(&buf2[4]); + if (*num1 && *num2) { + *offset = lseek(fp, 0, SEEK_CUR); + sprintf(buf2, "%d", *num2); + *offset -= 2 - strlen(buf2); + if (size) { + if (findbreak(fp, &offset2, &ch2, &vs2, 0)) { + *size = (short) (lseek(fp, 0, SEEK_END) - (*offset)); + } + else { + sprintf(buf2, "%d:%d", ch2, vs2); + *size = (offset2 - (*offset)) - (strlen(buf2) + 2); + } + lseek(fp, *offset, SEEK_SET); + } + return 0; + } + } + memmove(buf, &buf[1], 6); + if (read(fp, &buf[6], 1) != 1) + return 1; + } +} + diff --git a/src/modules/texts/rawtext/rtfidx.cpp b/src/modules/texts/rawtext/rtfidx.cpp new file mode 100644 index 0000000..9fdb305 --- /dev/null +++ b/src/modules/texts/rawtext/rtfidx.cpp @@ -0,0 +1,164 @@ +#include +#include +#include + + +char findbreak(int fp, int *offset, int *num1, int *num2, short *size); + + +main(int argc, char **argv) +{ + int fp, vfp, cfp, bfp; + long pos; + short size, tmp; + int num1, num2, offset, curbook = 0, curchap = 0, curverse = 0; + char buf[127]; + VerseKey mykey; + + if ((argc < 2) || (argc > 3)) { + fprintf(stderr, "usage: %s [nt]\n", argv[0]); + exit(1); + } + + if ((fp = open(argv[1], O_RDONLY)) == -1) { + fprintf(stderr, "Couldn't open file: %s\n", argv[1]); + exit(1); + } + + sprintf(buf, "%s.vss", argv[1]); + if ((vfp = open(buf, O_CREAT|O_WRONLY)) == -1) { + fprintf(stderr, "Couldn't open file: %s\n", buf); + exit(1); + } + + sprintf(buf, "%s.cps", argv[1]); + if ((cfp = open(buf, O_CREAT|O_WRONLY)) == -1) { + fprintf(stderr, "Couldn't open file: %s\n", buf); + exit(1); + } + + sprintf(buf, "%s.bks", argv[1]); + if ((bfp = open(buf, O_CREAT|O_WRONLY)) == -1) { + fprintf(stderr, "Couldn't open file: %s\n", buf); + exit(1); + } + + pos = 0; + write(bfp, &pos, 4); /* Book offset for testament intros */ + pos = 4; + write(cfp, &pos, 4); /* Chapter offset for testament intro */ + + +/* Right now just zero out intros until parsing correctly */ + pos = 0; + size = 0; + write(vfp, &pos, 4); /* Module intro */ + write(vfp, &size, 2); + write(vfp, &pos, 4); /* Testament intro */ + write(vfp, &size, 2); + + mykey = (argc == 3) ? "Matthew 1:1" : "Genesis 1:1"; + + while (!findbreak(fp, &offset, &num1, &num2, &size)) { + num1 = mykey.Chapter(); + num2 = mykey.Verse(); + if (num2 == 1) { /* if we're at a new chapter */ + if (num1 == 1) { /* if we're at a new book */ + pos = lseek(cfp, 0, SEEK_CUR); + write(bfp, &pos, 4); + pos = lseek(vfp, 0, SEEK_CUR); /* Book intro (cps) */ + write(cfp, &pos, 4); + pos = 0; + tmp = 0; + write(vfp, &pos, 4); /* Book intro (vss) */ + write(vfp, &tmp, 2); + curbook++; + curchap = 0; + } + pos = lseek(vfp, 0, SEEK_CUR); + write(cfp, &pos, 4); + curverse = 1; + pos = 0; + tmp = 0; + write(vfp, &pos, 4); /* Chapter intro */ + write(vfp, &tmp, 2); + curchap++; + } + else curverse++; + + printf("%2d:%3d:%3d found at offset: %7d\n", curbook, num1, num2, offset); + + if (num1 != curchap) { + fprintf(stderr, "Error: Found chaptures out of sequence\n"); + break; + } + if (num2 != curverse) { + fprintf(stderr, "Error: Found verses out of sequence\n"); + break; + } + write(vfp, &offset, 4); + write(vfp, &size, 2); + mykey++; + } + + close(vfp); + close(cfp); + close(bfp); + close(fp); +} + + +char findbreak(int fp, int *offset, int *num1, int *num2, short *size) +{ + char buf[17]; + char buf2[7]; + char loop; + char offadj, inquotes, sizeadj; + int offset2, ch2, vs2; + + memset(buf, ' ', 17); + + while (1) { + offadj = -10; + inquotes = 0; + sizeadj = 0; + if (!memcmp(&buf[1], "\\f0\\fs16\\cf2\\up6", 15)) { + offadj = 0; + inquotes = 1; + sizeadj = (*buf == 10) ? -19:-17; + } + if (!memcmp(buf, "\\fi200\\widctlpar", 16)) { + offadj = -1; +// inquotes = 1; + sizeadj = -18; + } + if (offadj > -10) { + *offset = lseek(fp, 0, SEEK_CUR) + offadj; + if (size) { + (*offset)++; + while (inquotes) { + while (read(fp, buf2, 1) == 1) { + if (*buf2 == '}') + break; + (*offset)++; + } + inquotes--; + } + if (findbreak(fp, &offset2, &ch2, &vs2, 0)) { + *size = (short) (lseek(fp, 0, SEEK_END) - (*offset)); + } + else { + sprintf(buf2, "%d:%d", ch2, vs2); + *size = (offset2 - (*offset)); + } + lseek(fp, *offset+17, SEEK_SET); + } + else (*offset) += sizeadj; + return 0; + } + memmove(buf, &buf[1], 16); + if (read(fp, &buf[16], 1) != 1) + return 1; + } +} + diff --git a/src/modules/texts/rawtext/svetxidx.c b/src/modules/texts/rawtext/svetxidx.c new file mode 100644 index 0000000..26e67fd --- /dev/null +++ b/src/modules/texts/rawtext/svetxidx.c @@ -0,0 +1,153 @@ +#include +#include +#ifndef __GNUC__ +#include +#else +#include +#endif + + +char findbreak(int fd, long *offset, int *num1, int *num2, short *size); + + +main(int argc, char **argv) +{ + int fd, vfd, cfd, bfd; + long pos, offset; + short size, tmp; + int num1, num2, curbook = 0, curchap = 0, curverse = 0; + char buf[127]; + + if (argc != 2) { + fprintf(stderr, "usage: %s \n", argv[0]); + exit(1); + } +#ifndef O_BINARY // O_BINARY is for Borland to be happy. If we're in GNU, just define it to a NULL mask +#define O_BINARY 0 +#endif + if ((fd = open(argv[1], O_RDONLY|O_BINARY)) == -1) { + fprintf(stderr, "Couldn't open file: %s\n", argv[1]); + exit(1); + } + + sprintf(buf, "%s.vss", argv[1]); + if ((vfd = open(buf, O_CREAT|O_WRONLY|O_BINARY)) == -1) { + fprintf(stderr, "Couldn't open file: %s\n", buf); + exit(1); + } + + sprintf(buf, "%s.cps", argv[1]); + if ((cfd = open(buf, O_CREAT|O_WRONLY|O_BINARY)) == -1) { + fprintf(stderr, "Couldn't open file: %s\n", buf); + exit(1); + } + + sprintf(buf, "%s.bks", argv[1]); + if ((bfd = open(buf, O_CREAT|O_WRONLY|O_BINARY)) == -1) { + fprintf(stderr, "Couldn't open file: %s\n", buf); + exit(1); + } + + pos = 0; + write(bfd, &pos, 4); /* Book offset for testament intros */ + pos = 4; + write(cfd, &pos, 4); /* Chapter offset for testament intro */ + + +/* Right now just zero out intros until parsing correctly */ + pos = 0; + size = 0; + write(vfd, &pos, 4); /* Module intro */ + write(vfd, &size, 2); + write(vfd, &pos, 4); /* Testament intro */ + write(vfd, &size, 2); + + while (!findbreak(fd, &offset, &num1, &num2, &size)) { + + if (num2 == 1) { /* if we're at a new chapter */ + if (num1 == 1) { /* if we're at a new book */ + pos = lseek(cfd, 0, SEEK_CUR); + write(bfd, &pos, 4); + pos = lseek(vfd, 0, SEEK_CUR); /* Book intro (cps) */ + write(cfd, &pos, 4); + pos = 0; + tmp = 0; + write(vfd, &pos, 4); /* Book intro (vss) */ + write(vfd, &tmp, 2); + curbook++; + curchap = 0; + } + pos = lseek(vfd, 0, SEEK_CUR); + write(cfd, &pos, 4); + curverse = 1; + pos = 0; + tmp = 0; + write(vfd, &pos, 4); /* Chapter intro */ + write(vfd, &tmp, 2); + curchap++; + } + else curverse++; + + printf("%2d:%3d:%3d found at offset: %7ld\n", curbook, num1, num2, offset); + + if (num1 != curchap) { + fprintf(stderr, "Error: Found chaptures out of sequence (%2d:%3d:%3d)\n", curbook, num1-1, num2); + curchap = num1; +// break; + } + if (num2 != curverse) { + fprintf(stderr, "Error: Found verses out of sequence (%2d:%3d:%3d)\n", curbook, num1, num2-1); +// break; + tmp = 0; + curverse = num2; + write(vfd, &offset, 4); + write(vfd, &tmp, 2); + } + write(vfd, &offset, 4); + write(vfd, &size, 2); + } + + close(vfd); + close(cfd); + close(bfd); + close(fd); +} + + +char findbreak(int fd, long *offset, int *num1, int *num2, short *size) +{ + char buf[8]; + char buf2[7]; + char loop, len; + + memset(buf, ' ', 7); + buf[7] = 0; + + while (1) { + + memmove(buf, &buf[1], 6); + if (read(fd, &buf[6], 1) != 1) + return 1; + + if ((buf[0] == 10) && (isdigit(buf[1]))) { + sscanf(buf, "%d %s", num2, buf2); + if (!strncmp(buf2, "KAP", 3)) { + *num1 = *num2; + continue; + } + sprintf(buf, "%d", *num2); + (*offset) = lseek(fd, 0, SEEK_CUR); + (*offset) -= (5-strlen(buf)); + for (len = 1; len == 1; len = read(fd, &loop, 1)) { + if ((loop == 10) || (loop == 13)) + break; + } + + *size = (short)(lseek(fd, 0, SEEK_CUR) - *offset) - 1; + lseek(fd, -1, SEEK_CUR); + break; + } + } + return 0; +} + diff --git a/src/modules/texts/rawtext/vntidx.cpp b/src/modules/texts/rawtext/vntidx.cpp new file mode 100644 index 0000000..bbb4a9e --- /dev/null +++ b/src/modules/texts/rawtext/vntidx.cpp @@ -0,0 +1,185 @@ +#include +#include +#include + + +char findbreak(int fp, int *offset, int *num1, int *num2, short *size); + + +main(int argc, char **argv) +{ + int fp, vfp, cfp, bfp; + long pos; + short size, tmp; + int num1, num2, offset, curbook = 0, curchap = 0, curverse = 0; + char buf[127]; + VerseKey mykey; + + if ((argc < 2) || (argc > 3)) { + fprintf(stderr, "usage: %s [nt]\n", argv[0]); + exit(1); + } + + if ((fp = open(argv[1], O_RDONLY)) == -1) { + fprintf(stderr, "Couldn't open file: %s\n", argv[1]); + exit(1); + } + + sprintf(buf, "%s.vss", argv[1]); + if ((vfp = open(buf, O_CREAT|O_WRONLY)) == -1) { + fprintf(stderr, "Couldn't open file: %s\n", buf); + exit(1); + } + + sprintf(buf, "%s.cps", argv[1]); + if ((cfp = open(buf, O_CREAT|O_WRONLY)) == -1) { + fprintf(stderr, "Couldn't open file: %s\n", buf); + exit(1); + } + + sprintf(buf, "%s.bks", argv[1]); + if ((bfp = open(buf, O_CREAT|O_WRONLY)) == -1) { + fprintf(stderr, "Couldn't open file: %s\n", buf); + exit(1); + } + + pos = 0; + write(bfp, &pos, 4); /* Book offset for testament intros */ + pos = 4; + write(cfp, &pos, 4); /* Chapter offset for testament intro */ + + +/* Right now just zero out intros until parsing correctly */ + pos = 0; + size = 0; + write(vfp, &pos, 4); /* Module intro */ + write(vfp, &size, 2); + write(vfp, &pos, 4); /* Testament intro */ + write(vfp, &size, 2); + + mykey = (argc == 3) ? "Matthew 1:1" : "Genesis 1:1"; + + while (!findbreak(fp, &offset, &num1, &num2, &size)) { + num1 = mykey.Chapter(); + num2 = mykey.Verse(); + if (num2 == 1) { /* if we're at a new chapter */ + if (num1 == 1) { /* if we're at a new book */ + pos = lseek(cfp, 0, SEEK_CUR); + write(bfp, &pos, 4); + pos = lseek(vfp, 0, SEEK_CUR); /* Book intro (cps) */ + write(cfp, &pos, 4); + pos = 0; + tmp = 0; + write(vfp, &pos, 4); /* Book intro (vss) */ + write(vfp, &tmp, 2); + curbook++; + curchap = 0; + } + pos = lseek(vfp, 0, SEEK_CUR); + write(cfp, &pos, 4); + curverse = 1; + pos = 0; + tmp = 0; + write(vfp, &pos, 4); /* Chapter intro */ + write(vfp, &tmp, 2); + curchap++; + } + else curverse++; + + printf("%2d:%3d:%3d found at offset: %7d\n", curbook, num1, num2, offset); + + if (num1 != curchap) { + fprintf(stderr, "Error: Found chaptures out of sequence\n"); + break; + } + if (num2 != curverse) { + fprintf(stderr, "Error: Found verses out of sequence\n"); + break; + } + write(vfp, &offset, 4); + write(vfp, &size, 2); + mykey++; + } + + close(vfp); + close(cfp); + close(bfp); + close(fp); +} + + +char findbreak(int fp, int *offset, int *num1, int *num2, short *size) +{ + char buf[17]; + char buf2[7]; + char buf3[7]; + char loop; + char offadj, inquotes, sizeadj; + int offset2, ch2, vs2; + + strcpy (buf3, "\\par "); + buf3[5] = 10; + memset(buf, ' ', 17); + + while (1) { + offadj = -100; + inquotes = 0; + sizeadj = 0; + if (!memcmp(buf, "\\par FIN DEL NUEVO TESTAMENTO", 16)) { + offadj = -11; +// inquotes = 1; + sizeadj = -7; + } + + if ((!memcmp(buf, buf3, 6)) && (!size)) { + offadj = -11; +// inquotes = 1; + sizeadj = -7; + } + if (!memcmp(buf, "\\par ", 6)) { + if (isdigit(buf[6])) { + for (loop = 7; loop < 10; loop++) { + if (!isdigit(buf[loop])) + break; + } + offadj = -(11 - (loop - 6)); + // inquotes = 1; + sizeadj = -7; + } + } +/* + if (!memcmp(buf, "\\fi200\\widctlpar", 16)) { + offadj = -1; +// inquotes = 1; + sizeadj = -18; + } +*/ + if (offadj > -100) { + *offset = lseek(fp, 0, SEEK_CUR) + offadj; + if (size) { + (*offset)++; + while (inquotes) { + while (read(fp, buf2, 1) == 1) { + if (*buf2 == '}') + break; + (*offset)++; + } + inquotes--; + } + if (findbreak(fp, &offset2, &ch2, &vs2, 0)) { + *size = (short) (lseek(fp, 0, SEEK_END) - (*offset)); + } + else { + *size = (offset2 - (*offset)); + } + lseek(fp, *offset-sizeadj, SEEK_SET); + } + else (*offset) += sizeadj; + return 0; + } + memmove(buf, &buf[1], 16); + if (read(fp, &buf[16], 1) != 1) + return 1; + } +} + diff --git a/src/modules/texts/swtext.cpp b/src/modules/texts/swtext.cpp new file mode 100644 index 0000000..85da8a3 --- /dev/null +++ b/src/modules/texts/swtext.cpp @@ -0,0 +1,39 @@ +/****************************************************************************** + * swtext.cpp - code for base class 'SWText'- The basis for all text modules + */ + +#include +#include + + +/****************************************************************************** + * SWText Constructor - Initializes data for instance of SWText + * + * ENT: imodname - Internal name for module + * imoddesc - Name to display to user for module + * idisp - Display object to use for displaying + */ + +SWText::SWText(const char *imodname, const char *imoddesc, SWDisplay *idisp, SWTextEncoding enc, SWTextDirection dir, SWTextMarkup mark, const char* ilang): SWModule(imodname, imoddesc, idisp, "Biblical Texts", enc, dir, mark, ilang) +{ + delete key; + key = CreateKey(); +} + + +/****************************************************************************** + * SWText Destructor - Cleans up instance of SWText + */ + +SWText::~SWText() { +} + + +/****************************************************************************** + * SWText CreateKey - Create the correct key (VerseKey) for use with SWText + */ + +SWKey *SWText::CreateKey() +{ + return new VerseKey(); +} diff --git a/src/modules/texts/ztext/Makefile b/src/modules/texts/ztext/Makefile new file mode 100644 index 0000000..35d6648 --- /dev/null +++ b/src/modules/texts/ztext/Makefile @@ -0,0 +1,5 @@ + +root := ../../../.. + +all: + make -C ${root} diff --git a/src/modules/texts/ztext/Makefile.am b/src/modules/texts/ztext/Makefile.am new file mode 100644 index 0000000..2b78db6 --- /dev/null +++ b/src/modules/texts/ztext/Makefile.am @@ -0,0 +1,4 @@ +ztextdir = $(top_srcdir)/src/modules/texts/ztext + +libsword_la_SOURCES += $(ztextdir)/ztext.cpp + diff --git a/src/modules/texts/ztext/gbfidx.cpp b/src/modules/texts/ztext/gbfidx.cpp new file mode 100644 index 0000000..e7a9530 --- /dev/null +++ b/src/modules/texts/ztext/gbfidx.cpp @@ -0,0 +1,661 @@ +/***************************************************************************** + * + * This code reeks but works (sometimes). Good luck! + * Modified for zText purposes + */ + +//#include +#include +#include +#include +#include +#include +#include + +//#ifndef __GNUC__ +#include +//#else +//#include +//#endif + +#include +#include + + +void writeidx(VerseKey &key1, VerseKey &key2, VerseKey &key3, long offset, short size); +char findbreak(int fp, long *offset, int *num1, int *num2, int *rangemax, unsigned short *size); +void openfiles(char *fname); +void checkparams(int argc, char **argv); + + +VerseKey key1, key2, key3; +int fp=0, vfp=0, cfp=0, bfp=0; +long chapoffset=0; +unsigned short chapsize=0; +long bookoffset=0; +unsigned short booksize=0; +long testoffset=0; +unsigned short testsize=0; +long verseoffset=0; +unsigned short versesize=0; +long nextoffset=0; +char testmnt=0; +int deadcount = 0; +int chapmark=-4, bookmark=-1; +ofstream cfile; + + +int main(int argc, char **argv) +{ + long pos, offset; + int num1, num2, rangemax;//, curbook = 0, curchap = 0, curverse = 0; + //char buf[127], + char startflag = 0; + unsigned short size;//, tmp; + + checkparams(argc, argv); + + openfiles(argv[1]); + //key1 = "Matthew 1:1"; + //key2 = "Matthew 1:1"; + //key3 = "Matthew 1:1"; + + testmnt = key1.Testament(); + cfile << "testament" << (int) testmnt << "\n"; + num1 = key1.Chapter(); + num2 = key1.Verse(); + pos = 0; + write(bfp, &pos, 4); /* Book offset for testament intros */ + pos = 4; + write(cfp, &pos, 4); /* Chapter offset for testament intro */ + + +/* Right now just zero out intros until parsing correctly */ + /*pos = 0; + size = 0; + write(vfp, &pos, 4); // Module intro + write(vfp, &size, 2); + cfile << "modintro pos{" << pos << "} size{" << size << "}\n"; + write(vfp, &pos, 4); // Testament intro + write(vfp, &size, 2); + cfile << "test intro pos{" << pos << "} size{" << size << "}\n"; + */ + cout << "GBFIDX Running\n"; + cout.flush(); + while(!findbreak(fp, &offset, &num1, &num2, &rangemax, &size)) { + if (!startflag) { + startflag = 1; + } + else { + if (num2 < key2.Verse()) { // new chapter + if (num1 <= key2.Chapter()) { // new book + key2.Verse(1); + key2.Chapter(1); + key2.Book(key2.Book()+1); + } + cfile << "Found Chapter Break: " << num1 << " ('" << (const char *)key2 << "')\n"; + //chapoffset = offset; + //chapsize = chapsize - size; +// continue; + } + } + key2.Verse(1); + key2.Chapter(num1); + key2.Verse(num2); + + key3 = key2; +// key3 += (rangemax - key3.Verse()); + + writeidx(key1, key2, key3, offset, size); + } + close(vfp); + close(cfp); + close(bfp); + close(fp); + + return 1; +} + + +/************************************************************************** + * writeidx: key1 - current location of index + * key2 - minimum keyval for which this offset is valid + * key3 - maximum keyval for which this offset is valid + */ + +void writeidx(VerseKey &key1, VerseKey &key2, VerseKey &key3, long offset, short size) +{ + long pos; + unsigned short tmp; + + for (; ((key1 <= key3) && (key1.Error() != KEYERR_OUTOFBOUNDS) && (key1.Testament() == testmnt)); key1+=1) { + if (chapmark>=2) + { + if (bookmark==2) + { + //booksize = booksize - chapsize + 7; + cfile << "versesize " << versesize << " booksize " << booksize << " chapsize " << chapsize << " size " << size << "\n"; + //cfile.flush(); + //assert(chapsize < size); + //if (chapsize > size) // At start of Psalms gets chapsize rather than chapsize+size ??? + //{ + // versesize = versesize - (booksize - (chapsize - size) + 7); + //} + //else + //{ + versesize = versesize - (booksize - (chapsize) + 7); + //} + cfile << "Last verse in book\n"; + } + //chapsize = chapsize - size; + cfile << "versesize " << versesize << " chapsize " << chapsize << " size " << size<< "\n"; + cfile.flush(); + //assert(chapsize > size); + //if (chapsize > size) // At start of Psalms gets chapsize rather than chapsize+size ??? + //{ + // versesize = versesize - (chapsize - size); + //} + //else + //{ + versesize = versesize - (chapsize); + //} + cfile << "Last verse in chapter\n"; + } + if (chapmark>=2 && bookmark!=1) + { + cfile << "prev verse pos{" << verseoffset << "} size{" << versesize << "} nextoffset{" << nextoffset << "}\n"; + cfile.flush(); + assert(verseoffset==nextoffset); + write(vfp, &verseoffset, 4); + write(vfp, &versesize, 2); + nextoffset = verseoffset+versesize; + bookmark = 0; + chapmark = 0; + } + if (key1.Verse() == 1) { // new chapter + cfile << "size??? " << size << "\n"; + cfile.flush(); + //assert(chapsize > size || key1.Chapter()==1); + //assert(chapsize > size); + //if (chapsize > size) // At start of books gets chapsize rather than chapsize+size + //{ + // chapsize = chapsize - size; + //} + if (key1.Chapter() == 1) { // new book + booksize = booksize - chapsize + 7; + if (key1.Book() == 1) + { + pos = 0; + //tmp = testoffset; + tmp = 0; // better just remember that it goes up to the testament intro to avoid 64k limit + // AV exceeds that anyway! + write(vfp, &pos, 4); /* Module intro */ + write(vfp, &tmp, 2); + assert(nextoffset==0); + cfile << "modintro pos{" << pos << "} size{" << tmp << "}\n"; + testsize = testsize - booksize - chapsize + 7; + if (testsize > 10000) + { + cerr << "Error: testament too big " << testsize << "\n"; + exit(-1); + } + //assert(testoffset==nextoffset); + write(vfp, &testoffset, 4); /* Testament intro (vss) */ + write(vfp, &testsize, 2); + nextoffset = testoffset+testsize; + cfile << "test intro pos{" << testoffset << "} size{" << testsize << "}\n"; + } + pos = lseek(cfp, 0, SEEK_CUR); + write(bfp, &pos, 4); + pos = lseek(vfp, 0, SEEK_CUR); /* Book intro (cps) */ + write(cfp, &pos, 4); + if (booksize > 10000) + { + cerr << "Error: book too big " << booksize << "\n"; + exit(-1); + } + assert(bookoffset==nextoffset); + write(vfp, &bookoffset, 4); /* Book intro (vss) */ + write(vfp, &booksize, 2); + nextoffset = bookoffset+booksize; + cfile << "book intro pos{" << bookoffset << "} size{" << booksize << "}\n"; + //offset += booksize; + //bookmark = false; + } + pos = lseek(vfp, 0, SEEK_CUR); + write(cfp, &pos, 4); + assert(chapsize < 10000); + write(vfp, &chapoffset, 4); /* Chapter intro */ + write(vfp, &chapsize, 2); + nextoffset = chapoffset+chapsize; + cfile << "chapter intro pos{" << chapoffset << "} size{" << chapsize << "}\n"; + //offset += chapsize; + //size -= chapsize; + //chapmark = false; + } + if (key1 >= key2) { + if (size > 10000) + { + cerr << "Error: verse too big " << size << "\n"; + exit(-1); + } + if (!chapmark && !bookmark) + { + write(vfp, &offset, 4); + write(vfp, &size, 2); + cfile << "verse pos{" << offset << "} size{" << size << "}\n"; + cfile.flush(); + assert(offset==nextoffset); + nextoffset = offset+size; + //cfile << "bookmark " << bookmark << " chapmark " << chapmark << "\n"; + } + else + { + verseoffset = offset; + versesize = size; + cfile << "saving verse pos{" << offset << "} size{" << size << "}\n"; + cfile << "bookmark " << bookmark << " chapmark " << chapmark << "\n"; + } + } + else { + pos = 0; + tmp = 0; + write(vfp, &pos, 4); + write(vfp, &tmp, 2); + cfile << "blank pos{" << pos << "} size{" << tmp << "}\n"; + } + } +} + +char startmod(char *buf) +{ + //char loop; + + if (buf[0] != '<') + return 0; + if (buf[1] != 'H') + return 0; + if (buf[2] != '0') + return 0; +/* + if (!isdigit(buf[2])) + return 0; + for (loop = 3; loop < 7; loop++) { + if (buf[loop] == ' ') + break; + if ((!isdigit(buf[loop])) && (buf[loop] != ',') && (buf[loop] != '-')) + return 0; + } +*/ + return 1; +} + + +char starttest(char *buf) +{ + //char loop; + + if (buf[0] != '<') + return 0; + if (buf[1] != 'B') + return 0; + if (testmnt==2) + { + if (buf[2] != 'N') + return 0; + } + else + { + if (buf[2] != 'O') + return 0; + } + //if (buf[3] != '>') + // return 0; +/* + if (!isdigit(buf[2])) + return 0; + for (loop = 3; loop < 7; loop++) { + if (buf[loop] == ' ') + break; + if ((!isdigit(buf[loop])) && (buf[loop] != ',') && (buf[loop] != '-')) + return 0; + } +*/ + return 1; +} + + +char startbook(char *buf) +{ + //char loop; + + if (buf[0] != '<') + return 0; + if (buf[1] != 'S') + return 0; + if (buf[2] != 'B') + return 0; +/* + if (!isdigit(buf[2])) + return 0; + for (loop = 3; loop < 7; loop++) { + if (buf[loop] == ' ') + break; + if ((!isdigit(buf[loop])) && (buf[loop] != ',') && (buf[loop] != '-')) + return 0; + } +*/ + return 1; +} + + +char startchap(char *buf) +{ + //char loop; + + if (buf[0] != '<') + return 0; + if (buf[1] != 'S') + return 0; + if (buf[2] != 'C') + return 0; +/* + if (!isdigit(buf[2])) + return 0; + for (loop = 3; loop < 7; loop++) { + if (buf[loop] == ' ') + break; + if ((!isdigit(buf[loop])) && (buf[loop] != ',') && (buf[loop] != '-')) + return 0; + } +*/ + return 1; +} + + +char startentry(char *buf) +{ + //char loop; + //cfile << "{SV}"; + + if (buf[0] != '<') + { + //cfile << "{no<}"; + return 0; + } + if (buf[1] != 'S') + { + //cfile << "\n{noS}\n"; + return 0; + } + if (buf[2] != 'V') + { + //cfile << "\n{noV}\n"; + return 0; + } +/* + if (!isdigit(buf[2])) + return 0; + for (loop = 3; loop < 7; loop++) { + if (buf[loop] == ' ') + break; + if ((!isdigit(buf[loop])) && (buf[loop] != ',') && (buf[loop] != '-')) + return 0; + } +*/ + return 1; +} + + +char findbreak(int fp, long *offset, int *num1, int *num2, int *rangemax, unsigned short *size) +{ + char buf[8]; + //char buf2[20]; + //char ch; + char loop=0; + long offset2; + int ch2, vs2, rm2; + bool flag; + long versestart = 0; + long chapstart = 0; + long bookstart = 0; + long teststart = 0; + + memset(buf, ' ', 8); + + while (1) { + //cfile << "#" << buf << "#"; + //if (lseek(fp, 0, SEEK_CUR) > 2000000) + //{ + // cfile << lseek(fp, 0, SEEK_CUR) << "\n"; + //} + if (starttest(buf)) { + cfile << "\n{start of testament}\n"; + //chapstart = lseek(fp, 0, SEEK_CUR) - 7; + teststart = lseek(fp, 0, SEEK_CUR) - 7; + testoffset = teststart; + memset(buf, ' ', 3); + flag = false; + for (loop = 3; loop < 6; loop++) { + if (buf[loop]!='>') + flag = true; + else { + buf[loop] = 0; + break; + } + } + ch2 = *num1; + vs2 = 1; + if (size) { + if (findbreak(fp, &offset2, &ch2, &vs2, &rm2, 0)) { + testsize = (unsigned short) (lseek(fp, 0, SEEK_END) - teststart-7); + } + else { + if (vs2) { + testsize = (offset2 - teststart - 7); + } + } + lseek(fp, teststart+7, SEEK_SET); + cfile << "\nGot testsize " << testsize << "\n"; + } + } + + + if (startbook(buf)) { + cfile << "\n{start of book}\n"; + bookmark++; + //chapstart = lseek(fp, 0, SEEK_CUR) - 7; + bookstart = lseek(fp, 0, SEEK_CUR) - 7; + bookoffset = bookstart; + memset(buf, ' ', 3); + flag = false; + for (loop = 3; loop < 6; loop++) { + if (buf[loop]!='>') + flag = true; + else { + buf[loop] = 0; + break; + } + } + if (size) { + ch2 = *num1; + vs2 = 1; + if (findbreak(fp, &offset2, &ch2, &vs2, &rm2, 0)) { + booksize = (unsigned short) (lseek(fp, 0, SEEK_END) - bookstart - 7); + } + else { + if (vs2) { + booksize = (offset2 - bookstart - 7); + } + } + lseek(fp, bookstart+7, SEEK_SET); + cfile << "\nGot booksize " << booksize << "\n"; + } + } + + if (startchap(buf)) { + cfile << "{start of chapter}"; + chapmark++; + //chapstart = lseek(fp, 0, SEEK_CUR) - 7; + chapstart = lseek(fp, 0, SEEK_CUR) - 7; + chapoffset = chapstart; + memset(buf, ' ', 3); + flag = false; + for (loop = 3; loop < 6; loop++) { + if (isdigit(buf[loop])) + flag = true; + else { + buf[loop] = 0; + break; + } + } + if (flag) + *num1 = atoi(buf); + else (*num1)++; + + if (size) { + ch2 = *num1; + vs2 = 1; + lseek(fp, chapstart, SEEK_SET); + if (findbreak(fp, &offset2, &ch2, &vs2, &rm2, 0)) { + chapsize = (unsigned short) (lseek(fp, 0, SEEK_END) - chapstart); + cfile << "getting chapsizeend{" << chapsize << "} = " << lseek(fp, 0, SEEK_END) << " - " << chapstart << "\n"; + } + else { + if (vs2) { + chapsize = (offset2 - chapstart); + cfile << "getting chapsize{" << chapsize << "} = " << offset2 << " - " << chapstart << "\n"; + } + } + lseek(fp, chapstart + 7, SEEK_SET); + cfile << "\nGot chapsize " << chapsize << " loop{" << (int) loop << "}\n"; + } + //return 0; + + } + if (startentry(buf)) { + //cfile << "{start of verse}"; + memset(buf, ' ', 3); + flag = false; + for (loop = 3; loop < 6; loop++) { + if (isdigit(buf[loop])) + flag = true; + else { + buf[loop] = 0; + break; + } + if (flag) + *num2 = atoi(buf); + else (*num2)++; + } + loop++; + /* + if (size) + { + // *offset = lseek(fp, 0, SEEK_CUR) - (7 - loop); + *offset = lseek(fp, 0, SEEK_CUR) - 7; + } + //else *offset = (chapstart) ? chapstart : lseek(fp, 0, SEEK_CUR) - 7; + else *offset = (chapstart) ? chapstart : lseek(fp, 0, SEEK_CUR) - 7; + */ + /*if (chapstart) + { + chapsize = *offset-chapstart; + } + else + { + chapsize = 0; + }*/ + *offset = lseek(fp, 0, SEEK_CUR) - 7; + versestart = *offset; + if (size) { + ch2 = *num1; + vs2 = *num2; + if (findbreak(fp, &offset2, &ch2, &vs2, &rm2, 0)) { + *size = (unsigned short) (lseek(fp, 0, SEEK_END) - versestart); + cfile << "getting sizeend{" << *size << "} = " << lseek(fp, 0, SEEK_END) << " - " << versestart << "\n"; + } + else { + if (vs2) { + *size = (offset2 - versestart); + cfile << "getting size{" << *size << "} = " << offset2 << " - " << versestart << "\n"; + } + } + lseek(fp, *offset+1, SEEK_SET); + } + else + { + cfile << "got offset{" << *offset << "}\n"; + } + return 0; + } + //cfile << "{ng}"; + //deadcount++; + //if (deadcount==1000) exit(-1); + //if (!size) + //{ + // cfile << "not bound offset{" << *offset << "}\n"; + //} + memmove(buf, &buf[1], 6); + if (read(fp, &buf[6], 1) != 1) + return 1; + } +} + + +void openfiles(char *fname) +{ +#ifndef O_BINARY // O_BINARY is needed in Borland C++ 4.53 +#define O_BINARY 0 // If it hasn't been defined than we probably +#endif // don't need it. + char buf[255]; + + if ((fp = open(fname, O_RDONLY|O_BINARY)) == -1) { + fprintf(stderr, "Couldn't open file: %s\n", fname); + exit(1); + } + + sprintf(buf, "%s.vss", fname); + if ((vfp = open(buf, O_CREAT|O_WRONLY|O_BINARY|O_TRUNC)) == -1) { + fprintf(stderr, "Couldn't open file: %s\n", buf); + exit(1); + } + + sprintf(buf, "%s.cps", fname); + if ((cfp = open(buf, O_CREAT|O_WRONLY|O_BINARY|O_TRUNC)) == -1) { + fprintf(stderr, "Couldn't open file: %s\n", buf); + exit(1); + } + + sprintf(buf, "%s.bks", fname); + if ((bfp = open(buf, O_CREAT|O_WRONLY|O_BINARY|O_TRUNC)) == -1) { + fprintf(stderr, "Couldn't open file: %s\n", buf); + exit(1); + } + cfile.open("gbfidx.log", ios::out); + if (!cfile.is_open()) + { + cerr << "Failed to open log file\n"; + exit(-1); + } +} + + +void checkparams(int argc, char **argv) +{ + if (argc < 2) { + fprintf(stderr, "usage: %s [nt - for new testmt file]\n", argv[0]); + exit(1); + } + if (!strcmp(argv[1], "nt")) + key1 = key2 = key3 = "Matthew 1:1"; + else if (!strcmp(argv[1], "ot")) + { + key1 = key2 = key3 = "Genesis 1:1"; + } + else + { + cerr << "File must be ot or nt\n"; + exit(-1); + } +} diff --git a/src/modules/texts/ztext/makeidx.c b/src/modules/texts/ztext/makeidx.c new file mode 100644 index 0000000..311103e --- /dev/null +++ b/src/modules/texts/ztext/makeidx.c @@ -0,0 +1,146 @@ +#include +#include + + +char findbreak(int fp, int *offset, int *num1, int *num2, short *size); + + +main(int argc, char **argv) +{ + int fp, vfp, cfp, bfp; + long pos; + short size, tmp; + int num1, num2, offset, curbook = 0, curchap = 0, curverse = 0; + char buf[127]; + + if (argc != 2) { + fprintf(stderr, "usage: %s \n", argv[0]); + exit(1); + } + + if ((fp = open(argv[1], O_RDONLY)) == -1) { + fprintf(stderr, "Couldn't open file: %s\n", argv[1]); + exit(1); + } + + sprintf(buf, "%s.vss", argv[1]); + if ((vfp = open(buf, O_CREAT|O_WRONLY)) == -1) { + fprintf(stderr, "Couldn't open file: %s\n", buf); + exit(1); + } + + sprintf(buf, "%s.cps", argv[1]); + if ((cfp = open(buf, O_CREAT|O_WRONLY)) == -1) { + fprintf(stderr, "Couldn't open file: %s\n", buf); + exit(1); + } + + sprintf(buf, "%s.bks", argv[1]); + if ((bfp = open(buf, O_CREAT|O_WRONLY)) == -1) { + fprintf(stderr, "Couldn't open file: %s\n", buf); + exit(1); + } + + pos = 0; + write(bfp, &pos, 4); /* Book offset for testament intros */ + pos = 4; + write(cfp, &pos, 4); /* Chapter offset for testament intro */ + + +/* Right now just zero out intros until parsing correctly */ + pos = 0; + size = 0; + write(vfp, &pos, 4); /* Module intro */ + write(vfp, &size, 2); + write(vfp, &pos, 4); /* Testament intro */ + write(vfp, &size, 2); + + while (!findbreak(fp, &offset, &num1, &num2, &size)) { + + if (num2 == 1) { /* if we're at a new chapter */ + if (num1 == 1) { /* if we're at a new book */ + pos = lseek(cfp, 0, SEEK_CUR); + write(bfp, &pos, 4); + pos = lseek(vfp, 0, SEEK_CUR); /* Book intro (cps) */ + write(cfp, &pos, 4); + pos = 0; + tmp = 0; + write(vfp, &pos, 4); /* Book intro (vss) */ + write(vfp, &tmp, 2); + curbook++; + curchap = 0; + } + pos = lseek(vfp, 0, SEEK_CUR); + write(cfp, &pos, 4); + curverse = 1; + pos = 0; + tmp = 0; + write(vfp, &pos, 4); /* Chapter intro */ + write(vfp, &tmp, 2); + curchap++; + } + else curverse++; + + printf("%2d:%3d:%3d found at offset: %7d\n", curbook, num1, num2, offset); + + if (num1 != curchap) { + fprintf(stderr, "Error: Found chaptures out of sequence\n", buf); + break; + } + if (num2 != curverse) { + fprintf(stderr, "Error: Found verses out of sequence\n", buf); + break; + } + write(vfp, &offset, 4); + write(vfp, &size, 2); + } + + close(vfp); + close(cfp); + close(bfp); + close(fp); +} + + +char findbreak(int fp, int *offset, int *num1, int *num2, short *size) +{ + char buf[7]; + char buf2[7]; + char loop; + int offset2, ch2, vs2; + + memset(buf, ' ', 7); + + while (1) { + if (buf[3] == ':') { + memcpy(buf2, buf, 7); + for (loop = 0; loop < 7; loop++) { + if (!isdigit(buf2[loop])) + buf2[loop] = ' '; + } + buf2[3] = 0; + *num1 = atoi(buf2); + *num2 = atoi(&buf2[4]); + if (*num1 && *num2) { + *offset = lseek(fp, 0, SEEK_CUR); + sprintf(buf2, "%d", *num2); + *offset -= 2 - strlen(buf2); + if (size) { + if (findbreak(fp, &offset2, &ch2, &vs2, 0)) { + *size = (short) (lseek(fp, 0, SEEK_END) - (*offset)); + } + else { + sprintf(buf2, "%d:%d", ch2, vs2); + *size = (offset2 - (*offset)) - (strlen(buf2) + 2); + } + lseek(fp, *offset, SEEK_SET); + } + return 0; + } + } + memmove(buf, &buf[1], 6); + if (read(fp, &buf[6], 1) != 1) + return 1; + } +} + diff --git a/src/modules/texts/ztext/nasb.cpp b/src/modules/texts/ztext/nasb.cpp new file mode 100644 index 0000000..51e08b4 --- /dev/null +++ b/src/modules/texts/ztext/nasb.cpp @@ -0,0 +1,107 @@ + + +#include +#include +#include +#include + +#ifndef __GNUC__ +#include +#else +#include +#endif + +#include + +class FileCompress: public SWCompress { + int ifd; + int ofd; + int ufd; + int zfd; +public: + FileCompress(char *); + ~FileCompress(); + int GetChars(char *, int len); + int SendChars(char *, int len); + void Encode(); + void Decode(); +}; + + +FileCompress::FileCompress(char *fname) +{ + char buf[256]; + +#ifndef O_BINARY +#define O_BINARY 0 +#endif + + ufd = open(fname, O_RDWR|O_CREAT|O_BINARY); + + sprintf(buf, "%s.zzz", fname); + zfd = open(buf, O_RDWR|O_CREAT|O_BINARY); +} + + +FileCompress::~FileCompress(char *fname) +{ + close(ufd); + close(zfd); +} + + +int FileCompress::GetChars(char *buf, int len) +{ + return read(ifd, buf, len); +} + + +int FileCompress::SendChars(char *buf, int len) +{ + return write(ofd, buf, len); +} + + +void FileCompress::Encode() +{ + ifd = ufd; + ofd = zfd; + + SWCompress::Encode(); +} + + +void FileCompress::Decode() +{ + ifd = zfd; + ofd = ufd; + + SWCompress::Decode(); +} + + +main(int argc, char **argv) +{ + int decomp = 0; + SWCompress *fobj; + + if (argc != 2) { + fprintf(stderr, "usage: %s \n", argv[0]); + exit(1); + } + + if (strlen(argv[1]) > 4) { + if (!strcmp(&argv[1][strlen(argv[1])-4], ".zzz")) { + argv[1][strlen(argv[1])-4] = 0; + decomp = 1; + } + } + + fobj = new FileCompress(argv[1]); + + if (decomp) + fobj->Decode(); + else fobj->Encode(); + + delete fobj; +} diff --git a/src/modules/texts/ztext/rawtxt2z.cpp b/src/modules/texts/ztext/rawtxt2z.cpp new file mode 100644 index 0000000..7eafe2a --- /dev/null +++ b/src/modules/texts/ztext/rawtxt2z.cpp @@ -0,0 +1,457 @@ +// Compression on variable granularity + +#include +#include +#include +#include + +#ifndef __GNUC__ +#include +#else +#include +#endif + +#include +#include + +int iBufSize, ulBuffNum; +ofstream cfile; +ofstream cfile2; + +int ofd[2], oxfd[2], ovxfd[2]; +int ifd[2], ixfd[2]; +int itestfd[2], itestxfd[2]; +unsigned long ulIOff=0, ulCOff=0, ulFOff=0, ulNone=0; +string currbuff=""; + + +int openreadfile(char *buffer, char *path, const char *filename) +{ + int filenum; + sprintf(buffer, "%s/%s", path, filename); + cfile << buffer << "\n"; + filenum = open(buffer, O_RDONLY|O_BINARY); + if (filenum > 0) + { + return filenum; + } + else + { + cerr << "failed to open file to read\n"; + exit(-1); + } +} + +int openwritefile(char *buffer, char *path, const char *filename) +{ + int filenum; + sprintf(buffer, "%s/%s", path, filename); + cfile << buffer << "\n"; + filenum = open(buffer, O_WRONLY|O_BINARY|O_CREAT|O_TRUNC); + if (filenum > 0) + { + return filenum; + } + else + { + cerr << "failed to open file to read\n"; + exit(-1); + } +} + +int bytebound(unsigned long offset, VerseKey &thekey) +{ + unsigned long bufferoff; + cfile << "byteboundtest " << thekey << "\n"; + bufferoff = iBufSize * (ulBuffNum+1); + if (offset > bufferoff) + { + return 1; + } + else + { + return 0; + } +} + +int versebound(unsigned long offset, VerseKey &thekey) +{ + cfile << "verseboundtest " << thekey << "\n"; + return 1; +} + +int chapterbound(unsigned long offset, VerseKey &thekey) +{ + VerseKey testkey; + testkey = thekey; + testkey++; + //cfile << "chapterboundtest " << testkey; + if (testkey.Verse()==1 || (!thekey.compare("Revelation of John 22:21"))) + { + //cfile << " 1\n"; + return 1; + } + else + { + //cfile << " 0\n"; + return 0; + } +} + +int bookbound(unsigned long offset, VerseKey &thekey) +{ + VerseKey testkey; + testkey = thekey; + cfile << "bookboundtest " << testkey << "\n"; + testkey++; + if (testkey.Chapter()==1 || (!thekey.compare("Revelation of John 22:21"))) + { + return 1; + } + else + { + return 0; + } +} + + +typedef int (*boundfunc)(unsigned long offset, VerseKey &thekey); + +int writeblock(int i) +{ + char *destbuff=NULL; + unsigned long compsize = 0, buffsize=0; + + cfile << "compressing block\n"; + // compress current buffer + buffsize = currbuff.length(); + write(itestfd[i], currbuff.c_str(), buffsize); + compsize = (unsigned long) (buffsize*1.01)+20; // at least 1% bigger than buffer + 12 bytes + //cfile << "{" << compsize << "}"; + //destbuff = (char *) calloc(compsize + 1, 1); + destbuff = new char[compsize]; + if (compress((Bytef*)destbuff, &compsize, (const Bytef*)currbuff.c_str(), buffsize)!=Z_OK) + { + cerr << "Could not compress buffer: exiting\n"; + delete[] destbuff; + exit(-1); + } + //cout << "Compressed buffer{" << compsize << "}\n" << destbuff << "\n"; + //cout.flush(); + // write to compressed file index + ulCOff = lseek(ofd[i], 0, SEEK_END); + write(oxfd[i], &ulCOff, 4); // offset in compressed file + write(oxfd[i], &compsize, 4); // compressed size + write(oxfd[i], &buffsize, 4); // uncompressed size + cfile << buffsize << " -> " << compsize << "\n"; + cfile2 << "Compressed{" << compsize << "}\n" << destbuff << "\n"; + cfile2.flush(); + + //write compressed buffer to file + write(ofd[i], destbuff, compsize); + + //free(destbuff); + delete[] destbuff; + + currbuff = ""; + ulBuffNum++; + ulIOff = 0; + return 1; +} + + + +int main(int argc, char **argv) +{ + VerseKey key1, key2, key3; + int i; + char xbuff[64]; + unsigned long offset; + unsigned short size=0; + unsigned long ulsize=0; + char *tmpbuf=NULL; + int iType; + boundfunc blockbound[4] = {bytebound, versebound, chapterbound, bookbound}; + bool newbook=true, newchapter=true, newtestament = true, newmodule = true, lasttodo=true; + + if ((argc < 2) || (argc > 4)) { + cerr << "usage: " << argv[0] << " datapath [compression type [buffer size]]\n"; + exit(1); + } + + if (argc>2) + { + iType = atoi(argv[2]); + if (argc==4) + { + iBufSize = atoi(argv[3]); + } + else + { + iBufSize = 1; + } + } + else + { + iType = 2; + iBufSize = 1; + } + + cfile.open("raw2z.log", ios::out); + if (!cfile.is_open()) + { + cerr << "Failed to open log file\n"; + exit(-1); + } + cfile2.open("raw2z.lg2", ios::out); + if (!cfile2.is_open()) + { + cerr << "Failed to open log file\n"; + exit(-1); + } + cfile << iType << " " << iBufSize << "\n"; + + if ((iType<=0) || (iType > 4) || !iBufSize || !strcmp(argv[1], "-h") || !strcmp(argv[1], "--help") || !strcmp(argv[1], "/?") || !strcmp(argv[1], "-help")) + { + cfile << argv[0] << " - a tool to create compressed Sword modules\n"; + cfile << "version 0.1\n\n"; + cfile << "usage: "<< argv[0] << " datapath [compression type [buffer size]]\n\n"; + cfile << "datapath: the directory in which to find the raw module\n"; + cfile << "compression type: (default 2)\n" << " 1 - bytes\n" << " 2 - verses\n" << " 3 - chapters\n" << " 4 - books\n"; + cfile << "buffer size (default 1): the number of the compression type in each block\n"; + exit(1); + } + + //zobj = new SWCompress(); + //rawdrv = new RawVerse(argv[1]); + +#ifndef O_BINARY +#define O_BINARY 0 +#endif + cfile << "opening files\n"; + + tmpbuf = new char [ strlen(argv[1]) + 11 ]; + + //original files + ifd[0] = openreadfile(tmpbuf, argv[1], "ot"); + ixfd[0] = openreadfile(tmpbuf, argv[1], "ot.vss"); + ifd[1] = openreadfile(tmpbuf, argv[1], "nt"); + ixfd[1] = openreadfile(tmpbuf, argv[1], "nt.vss"); + +switch ( iType) { + case 1 : + ofd[0] = openwritefile(tmpbuf, argv[1], "ot.rzz"); + oxfd[0] = openwritefile(tmpbuf, argv[1], "ot.rzs"); + ovxfd[0] = openwritefile(tmpbuf, argv[1], "ot.rzv"); + ofd[1] = openwritefile(tmpbuf, argv[1], "nt.rzz"); + oxfd[1] = openwritefile(tmpbuf, argv[1], "nt.rzs"); + ovxfd[1] = openwritefile(tmpbuf, argv[1], "nt.rzv"); + //boundfunc = bytebound; + break; + case 2 : + ofd[0] = openwritefile(tmpbuf, argv[1], "ot.vzz"); + oxfd[0] = openwritefile(tmpbuf, argv[1], "ot.vzs"); + ovxfd[0] = openwritefile(tmpbuf, argv[1], "ot.vzv"); + ofd[1] = openwritefile(tmpbuf, argv[1], "nt.vzz"); + oxfd[1] = openwritefile(tmpbuf, argv[1], "nt.vzs"); + ovxfd[1] = openwritefile(tmpbuf, argv[1], "nt.vzv"); + break; + case 3 : + ofd[0] = openwritefile(tmpbuf, argv[1], "ot.czz"); + oxfd[0] = openwritefile(tmpbuf, argv[1], "ot.czs"); + ovxfd[0] = openwritefile(tmpbuf, argv[1], "ot.czv"); + ofd[1] = openwritefile(tmpbuf, argv[1], "nt.czz"); + oxfd[1] = openwritefile(tmpbuf, argv[1], "nt.czs"); + ovxfd[1] = openwritefile(tmpbuf, argv[1], "nt.czv"); + break; + case 4 : + ofd[0] = openwritefile(tmpbuf, argv[1], "ot.bzz"); + oxfd[0] = openwritefile(tmpbuf, argv[1], "ot.bzs"); + ovxfd[0] = openwritefile(tmpbuf, argv[1], "ot.bzv"); + ofd[1] = openwritefile(tmpbuf, argv[1], "nt.bzz"); + oxfd[1] = openwritefile(tmpbuf, argv[1], "nt.bzs"); + ovxfd[1] = openwritefile(tmpbuf, argv[1], "nt.bzv"); + break; + default: + cerr << "Unknown compression type\n"; + exit(-1); +} + itestfd[0] = openwritefile(tmpbuf, argv[1], "ot.tst"); + itestfd[1] = openwritefile(tmpbuf, argv[1], "nt.tst"); + itestxfd[0] = openwritefile(tmpbuf, argv[1], "ot.tdx"); + itestxfd[1] = openwritefile(tmpbuf, argv[1], "nt.tdx"); + + + delete [] tmpbuf; + + //cfile << "about to start\n"; + +for ( i=0; i<2; i++) +{ + ulIOff=0, ulBuffNum=0; + currbuff = ""; + key1 = (i == 1) ? "Matthew 1:1" : "Genesis 1:1"; + key2 = key3 = key1; + newtestament = true; + + cfile << "key: " << key1 << " Testament {" << key1.Testament()-1 << "}\n"; + //cfile << "Chapter {" << key.Chapter() << "}\n"; + //cfile << "Verse {" << key.Verse() << "}\n"; + //cfile << key.compare("Revelation of John 22:21") << "\n"; + //cfile << key.compare("Genesis 1:1") << "\n"; + do + { + //cfile << "ok"; + // read current verse offset + if (read(ixfd[i], &offset, 4) != 4) + { + cfile << "Failed to read input verse offsets?\n"; + break; + } + if (read(ixfd[i], &size, 2) != 2) + { + cfile << "Failed to read input verse sizes?\n"; + break; + } + cfile << "key:" << key1 << " offset:" << offset << " size:" << size << "\n"; + sprintf(xbuff, "key{%s} offset{%ld} size{%d}\n", (const char *)key1, offset, size); + write(itestxfd[i], &xbuff, strlen(xbuff)); + ulsize = size; + if (!offset && !size) + { + //Check for module header + if (read(ixfd[i], &ulIOff, 4) != 4) + { + cfile << "Failed to read input verse offsets?\n"; + break; + } + ulsize = ulIOff; + ulIOff = 0; + lseek(ixfd[i], 6, SEEK_SET); + } + + if (ulsize) + { + // read current verse and add to current buffer + tmpbuf = (char *) calloc(ulsize + 1, 1); + lseek(ifd[i], offset, SEEK_SET); + read(ifd[i], tmpbuf, ulsize); + currbuff += tmpbuf; + //cfile << currbuff << "\n"; + + // write to verse index into compressed + write(ovxfd[i], &ulBuffNum, 4); // current buffer number + write(ovxfd[i], &ulIOff, 4); // offset within the buffer + write(ovxfd[i], &size, 2); // verse size + + ulFOff = lseek(ofd[i], 0, SEEK_CUR) + size; + if (key1.compare("Revelation of John 22:21")!=-1) + { + lasttodo = false; + } + if (blockbound[iType-1](ulFOff, key1)/*at block boudary*/) + { + writeblock(i); + /* + cfile << "compressing block\n"; + // compress current buffer + buffsize = currbuff.length(); + write(itestfd[i], currbuff.c_str(), buffsize); + compsize = (unsigned long) (buffsize*1.01)+20; // at least 1% bigger than buffer + 12 bytes + //cfile << "{" << compsize << "}"; + //destbuff = (char *) calloc(compsize + 1, 1); + destbuff = new char[compsize]; + if (compress((Bytef*)destbuff, &compsize, (const Bytef*)currbuff.c_str(), buffsize)!=Z_OK) + { + cerr << "Could not compress buffer: exiting\n"; + delete[] destbuff; + exit(-1); + } + //cout << "Compressed buffer{" << compsize << "}\n" << destbuff << "\n"; + //cout.flush(); + // write to compressed file index + ulCOff = lseek(ofd[i], 0, SEEK_END); + write(oxfd[i], &ulCOff, 4); // offset in compressed file + write(oxfd[i], &compsize, 4); // compressed size + write(oxfd[i], &buffsize, 4); // uncompressed size + cfile << buffsize << " -> " << compsize << "\n"; + cfile2 << "Compressed{" << compsize << "}\n" << destbuff << "\n"; + cfile2.flush(); + + //write compressed buffer to file + write(ofd[i], destbuff, compsize); + + //free(destbuff); + delete[] destbuff; + + currbuff = ""; + ulBuffNum++; + ulIOff = 0; + */ + } + else + { + ulIOff += ulsize; + } + free(tmpbuf); + + if (newmodule) + { + newmodule = false; + cfile << "had a new module " << (const char *) key1 << "{" << offset << "}\n"; + writeblock(i); + } + else if (newtestament) + { + newtestament = false; + cfile << "had a new testament " << (const char *) key1 << "{" << offset << "}\n"; + } + else if (newbook) + { + newbook = false; + cfile << "had a new book " << (const char *) key1 << "{" << offset << "}\n"; + } + else if (newchapter) + { + newchapter = false; + cfile << "had a new chapter " << (const char *) key1 << "{" << offset << "}\n"; + } + else + { + key1++; + } + + if (key1.Chapter()!=key2.Chapter() || (key1.Book()!=key2.Book())) + { + newchapter = true; + cfile << "got a new chapter " << (const char *) key1 << "\n"; + } + if (key1.Book()!=key2.Book()) + { + newbook = true; + cfile << "got a new book " << (const char *) key1 << "\n"; + } + key2 = key1; + + } + else + { + cfile << "empty offset\n"; + // write to verse index into compressed + write(ovxfd[i], &ulNone, 4); // current buffer number + write(ovxfd[i], &size, 2); // verse size + write(ovxfd[i], &ulNone, 4); // offset within the buffer + } + } + while ( (key1.Testament()==i+1) && ((key1.compare("Revelation of John 22:21")==-1) || (lasttodo))); + + close(ifd[i]); + close(ofd[i]); + close(ixfd[i]); + close(oxfd[i]); + close(ovxfd[i]); + close(itestfd[i]); + close(itestxfd[i]); +} + return 1; +} diff --git a/src/modules/texts/ztext/ztext.cpp b/src/modules/texts/ztext/ztext.cpp new file mode 100644 index 0000000..6e243b9 --- /dev/null +++ b/src/modules/texts/ztext/ztext.cpp @@ -0,0 +1,347 @@ +/****************************************************************************** + * ztext.cpp - code for class 'zText'- a module that reads compressed text + * files: ot and nt using indexs ??.vss + */ + + +#include +#include +#include + +#ifndef __GNUC__ +#include +#else +#include +#endif + +#include +#include +#include +//#include +#include +//#include + + +/****************************************************************************** + * zText Constructor - Initializes data for instance of zText + * + * ENT: ipath - path to data files + * iname - Internal name for module + * idesc - Name to display to user for module + * iblockType - verse, chapter, book, etc. of index chunks + * icomp - Compressor object + * idisp - Display object to use for displaying + */ + +zText::zText(const char *ipath, const char *iname, const char *idesc, int iblockType, SWCompress *icomp, SWDisplay *idisp, SWTextEncoding enc, SWTextDirection dir, SWTextMarkup mark, const char* ilang) : zVerse(ipath, -1, iblockType, icomp), SWText(iname, idesc, idisp, enc, dir, mark, ilang)/*, SWCompress()*/ +{ + blockType = iblockType; + lastWriteKey = 0; +} + + +/****************************************************************************** + * zText Destructor - Cleans up instance of zText + */ + +zText::~zText() +{ + flushCache(); + + if (lastWriteKey) + delete lastWriteKey; +} + + +/****************************************************************************** + * zText::getRawEntry - Returns the current verse buffer + * + * RET: buffer with verse + */ + +char *zText::getRawEntry() +{ +/* + long start; + unsigned long size; + unsigned long destsize; + char *tmpbuf; + char *dest; + VerseKey *lkey = (VerseKey *) SWModule::key; + char sizebuf[3]; + + lkey->Verse(0); + if (chapcache != lkey->Index()) { + findoffset(lkey->Testament(), lkey->Index(), &start, &((unsigned short) size)); + gettext(lkey->Testament(), start, 3, sizebuf); + memcpy(&size, sizebuf, 2); + tmpbuf = new char [ size + 1 ]; + gettext(lkey->Testament(), start + 2, size + 1 , tmpbuf); + //zBuf(&size, tmpbuf); + dest = new char [ (size*4) + 1 ]; + uncompress((Bytef *)dest, &destsize, (Bytef *) tmpbuf, size); + chapcache = lkey->Index(); + delete [] tmpbuf; + } + + //findoffset(key->Testament(), key->Index(), &start, &size); + findoffset(lkey->Testament(), lkey->Index(), &start, &((unsigned short) size)); + + if (versebuf) + delete [] versebuf; + versebuf = new char [ size + 1 ]; + //memcpy(versebuf, Buf(), size); + memcpy(versebuf, dest, destsize); + delete [] dest; + + preptext(versebuf); + + return versebuf; +*/ + + long start = 0; + unsigned short size = 0; + VerseKey *key = 0; + + //printf ("zText char *\n"); + + // see if we have a VerseKey * or decendant +#ifndef _WIN32_WCE + try { +#endif + key = SWDYNAMIC_CAST(VerseKey, this->key); +#ifndef _WIN32_WCE + } + catch ( ... ) {} +#endif + // if we don't have a VerseKey * decendant, create our own + if (!key) + key = new VerseKey(this->key); + + //printf ("checking cache\n"); + //printf ("finding offset\n"); + findoffset(key->Testament(), key->Index(), &start, &size); + entrySize = size; // support getEntrySize call + + //printf ("deleting previous buffer\n"); + unsigned long newsize = (size + 2) * FILTERPAD; + if (newsize > entrybufallocsize) { + if (entrybuf) + delete [] entrybuf; + entrybuf = new char [ newsize ]; + entrybufallocsize = newsize; + } + *entrybuf = 0; + + //printf ("getting text\n"); + swgettext(key->Testament(), start, (size + 2), entrybuf); + //printf ("got text\n"); + + rawFilter(entrybuf, size, key); + + //printf ("preparing text\n"); + if (!isUnicode()) + preptext(entrybuf); + + if (this->key != key) // free our key if we created a VerseKey + delete key; + + //printf ("returning text\n"); + return entrybuf; + +} + + +bool zText::sameBlock(VerseKey *k1, VerseKey *k2) { + if (k1->Testament() != k2->Testament()) + return false; + + switch (blockType) { + case VERSEBLOCKS: + if (k1->Verse() != k2->Verse()) + return false; + case CHAPTERBLOCKS: + if (k1->Chapter() != k2->Chapter()) + return false; + case BOOKBLOCKS: + if (k1->Book() != k2->Book()) + return false; + } + return true; +} + + +SWModule &zText::setentry(const char *inbuf, long len) { + VerseKey *key = 0; + // see if we have a VerseKey * or decendant +#ifndef _WIN32_WCE + try { +#endif + key = SWDYNAMIC_CAST(VerseKey, this->key); +#ifndef _WIN32_WCE + } + catch ( ... ) {} +#endif + // if we don't have a VerseKey * decendant, create our own + if (!key) + key = new VerseKey(this->key); + + + // see if we've jumped across blocks since last write + if (lastWriteKey) { + if (!sameBlock(lastWriteKey, key)) { + flushCache(); + } + delete lastWriteKey; + } + + settext(key->Testament(), key->Index(), inbuf, len); + + lastWriteKey = (VerseKey *)key->clone(); // must delete + + if (this->key != key) // free our key if we created a VerseKey + delete key; + + return *this; +} + +SWModule &zText::operator <<(const char *inbuf) { + return setentry(inbuf, 0); +} + + +SWModule &zText::operator <<(const SWKey *inkey) { + VerseKey *destkey = 0; + const VerseKey *srckey = 0; + // see if we have a VerseKey * or decendant +#ifndef _WIN32_WCE + try { +#endif + destkey = SWDYNAMIC_CAST(VerseKey, this->key); +#ifndef _WIN32_WCE + } + catch ( ... ) {} +#endif + // if we don't have a VerseKey * decendant, create our own + if (!destkey) + destkey = new VerseKey(this->key); + + // see if we have a VerseKey * or decendant +#ifndef _WIN32_WCE + try { +#endif + srckey = (const VerseKey *) SWDYNAMIC_CAST(VerseKey, inkey); +#ifndef _WIN32_WCE + } + catch ( ... ) { + } +#endif + // if we don't have a VerseKey * decendant, create our own + if (!srckey) + srckey = new VerseKey(inkey); + + linkentry(destkey->Testament(), destkey->Index(), srckey->Index()); + + if (this->key != destkey) // free our key if we created a VerseKey + delete destkey; + + if (inkey != srckey) // free our key if we created a VerseKey + delete srckey; + + return *this; +} + + +/****************************************************************************** + * zFiles::deleteEntry - deletes this entry + * + * RET: *this + */ + +void zText::deleteEntry() { + + VerseKey *key = 0; + +#ifndef _WIN32_WCE + try { +#endif + key = SWDYNAMIC_CAST(VerseKey, this->key); +#ifndef _WIN32_WCE + } + catch ( ... ) {} +#endif + if (!key) + key = new VerseKey(this->key); + + settext(key->Testament(), key->Index(), ""); + + if (key != this->key) + delete key; +} + + +/****************************************************************************** + * zText::operator += - Increments module key a number of entries + * + * ENT: increment - Number of entries to jump forward + * + * RET: *this + */ + +SWModule &zText::operator +=(int increment) +{ + long start; + unsigned short size; + VerseKey *tmpkey = 0; + +#ifndef _WIN32_WCE + try { +#endif + tmpkey = SWDYNAMIC_CAST(VerseKey, key); +#ifndef _WIN32_WCE + } + catch ( ... ) {} +#endif + if (!tmpkey) + tmpkey = new VerseKey(key); + + findoffset(tmpkey->Testament(), tmpkey->Index(), &start, &size); + + SWKey lastgood = *tmpkey; + while (increment) { + long laststart = start; + unsigned short lastsize = size; + SWKey lasttry = *tmpkey; + (increment > 0) ? (*key)++ : (*key)--; + if (tmpkey != key) + delete tmpkey; + tmpkey = 0; +#ifndef _WIN32_WCE + try { +#endif + tmpkey = SWDYNAMIC_CAST(VerseKey, key); +#ifndef _WIN32_WCE + } + catch ( ... ) {} +#endif + if (!tmpkey) + tmpkey = new VerseKey(key); + + if ((error = key->Error())) { + *key = lastgood; + break; + } + long index = tmpkey->Index(); + findoffset(tmpkey->Testament(), index, &start, &size); + if ((((laststart != start) || (lastsize != size))||(!skipConsecutiveLinks)) && (start >= 0) && (size)) { + increment += (increment < 0) ? 1 : -1; + lastgood = *tmpkey; + } + } + error = (error) ? KEYERR_OUTOFBOUNDS : 0; + + if (tmpkey != key) + delete tmpkey; + + return *this; +} -- cgit v1.2.3

"); + to += strlen(to); + continue; + } + *to++ = *from; + } + *to++ = 0; + *to = 0; + return 0; +} diff --git a/src/modules/filters/rwprtf.cpp b/src/modules/filters/rwprtf.cpp new file mode 100644 index 0000000..8f7b074 --- /dev/null +++ b/src/modules/filters/rwprtf.cpp @@ -0,0 +1,107 @@ +/****************************************************************************** + * + * rwprtf - SWFilter decendant to convert all GBF tags to RTF tags + */ + + +#include +#include +#include + + +RWPRTF::RWPRTF() { + +} + + +char RWPRTF::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) +{ + char *to, *from; + bool ingreek = false; + bool inverse = false; + int len; + + len = strlen(text) + 1; // shift string to right of buffer + if (len < maxlen) { + memmove(&text[maxlen - len], text, len); + from = &text[maxlen - len]; + } + else from = text; // ------------------------------- + for (to = text; *from; from++) { + if (*from == '\\') { + if(!ingreek) { + ingreek = true; + *to++ = '['; + *to++ = '{'; + *to++ = '\\'; + *to++ = 'f'; + *to++ = '8'; + *to++ = ' '; + continue; + } + else { + ingreek = false; + *to++ = '}'; + *to++ = ']'; + continue; + } + } + + if ((ingreek) && ((*from == 'h') || (*from == 'H'))) + continue; // 'h's are mostly useless in RWP translitterations. The greek is more correct without them. + + if (*from == '#') { // verse markings (e.g. "#Mark 1:1|") + inverse = true; + *to++ = '{'; + *to++ = '\\'; + *to++ = 'c'; + *to++ = 'f'; + *to++ = '2'; + *to++ = ' '; + *to++ = '#'; + continue; + } + if ((*from == '|') && (inverse)) { + inverse = false; + *to++ = '|'; + *to++ = '}'; + continue; + } + + if (*from == '{') { + *to++ = '{'; + *to++ = '\\'; + *to++ = 'b'; + *to++ = ' '; + if ((from - &text[maxlen - len]) > 10) { // not the beginning of the entry + *to++ = '\\'; + *to++ = 'p'; + *to++ = 'a'; + *to++ = 'r'; + *to++ = ' '; + } + continue; + } + + if (*from == '}') { + // this is kinda neat... DO NOTHING + } + if ((*from == '\n') && (from[1] == '\n')) { + *to++ = '\\'; + *to++ = 'p'; + *to++ = 'a'; + *to++ = 'r'; + *to++ = '\\'; + *to++ = 'p'; + *to++ = 'a'; + *to++ = 'r'; + *to++ = ' '; + continue; + } + + *to++ = *from; + } + *to++ = 0; + *to = 0; + return 0; +} diff --git a/src/modules/filters/scsuutf8.cpp b/src/modules/filters/scsuutf8.cpp new file mode 100644 index 0000000..d0d5ceb --- /dev/null +++ b/src/modules/filters/scsuutf8.cpp @@ -0,0 +1,220 @@ +/****************************************************************************** + * + * SCSUUTF8 - SWFilter decendant to convert a SCSU character to UTF-8 + * + */ + + +/* This class is based on: + * http://czyborra.com/scsu/scsu.c written by Roman Czyborra@dds.nl + * on Andrea's balcony in North Amsterdam on 1998-08-04 + * Thanks to Richard Verhoeven for his suggestion + * to correct the haphazard "if" after UQU to "else if" on 1998-10-01 + * + * This is a deflator to UTF-8 output for input compressed in SCSU, + * the (Reuters) Standard Compression Scheme for Unicode as described + * in http://www.unicode.org/unicode/reports/tr6.html + */ + +#include +#include +#include + +#include + +SCSUUTF8::SCSUUTF8() { +} + + +unsigned char* SCSUUTF8::UTF8Output(unsigned long uchar, unsigned char* text) +{ + /* join UTF-16 surrogates without any pairing sanity checks */ + + static int d; + + if (uchar >= 0xd800 && uchar <= 0xdbff) { d = uchar & 0x3f; return text; } + if (uchar >= 0xdc00 && uchar <= 0xdfff) { uchar = uchar + 0x2400 + d * 0x400; } + + /* output one character as UTF-8 multibyte sequence */ + + if (uchar < 0x80) { + *text++ = c; + } + else if (uchar < 0x800) { + *text++ = 0xc0 | uchar >> 6; + *text++ = 0x80 | uchar & 0x3f; + } + else if (uchar < 0x10000) { + *text++ = 0xe0 | uchar >> 12; + *text++ = 0x80 | uchar >> 6 & 0x3f; + *text++ = 0x80 | uchar & 0x3f; + } + else if (uchar < 0x200000) { + *text++ = 0xf0 | uchar >> 18; + *text++ = 0x80 | uchar >> 12 & 0x3f; + *text++ = 0x80 | uchar >> 6 & 0x3f; + *text++ = 0x80 | uchar & 0x3f; + } + + return text; +} + +char SCSUUTF8::ProcessText(char *text, int len, const SWKey *key, const SWModule *module) +{ + unsigned char *to, *from; + unsigned long buflen = len * FILTERPAD; + char active = 0, mode = 0; + + static unsigned short start[8] = {0x0000,0x0080,0x0100,0x0300,0x2000,0x2080,0x2100,0x3000}; + static unsigned short slide[8] = {0x0080,0x00C0,0x0400,0x0600,0x0900,0x3040,0x30A0,0xFF00}; + static unsigned short win[256] = { + 0x0000, 0x0080, 0x0100, 0x0180, 0x0200, 0x0280, 0x0300, 0x0380, + 0x0400, 0x0480, 0x0500, 0x0580, 0x0600, 0x0680, 0x0700, 0x0780, + 0x0800, 0x0880, 0x0900, 0x0980, 0x0A00, 0x0A80, 0x0B00, 0x0B80, + 0x0C00, 0x0C80, 0x0D00, 0x0D80, 0x0E00, 0x0E80, 0x0F00, 0x0F80, + 0x1000, 0x1080, 0x1100, 0x1180, 0x1200, 0x1280, 0x1300, 0x1380, + 0x1400, 0x1480, 0x1500, 0x1580, 0x1600, 0x1680, 0x1700, 0x1780, + 0x1800, 0x1880, 0x1900, 0x1980, 0x1A00, 0x1A80, 0x1B00, 0x1B80, + 0x1C00, 0x1C80, 0x1D00, 0x1D80, 0x1E00, 0x1E80, 0x1F00, 0x1F80, + 0x2000, 0x2080, 0x2100, 0x2180, 0x2200, 0x2280, 0x2300, 0x2380, + 0x2400, 0x2480, 0x2500, 0x2580, 0x2600, 0x2680, 0x2700, 0x2780, + 0x2800, 0x2880, 0x2900, 0x2980, 0x2A00, 0x2A80, 0x2B00, 0x2B80, + 0x2C00, 0x2C80, 0x2D00, 0x2D80, 0x2E00, 0x2E80, 0x2F00, 0x2F80, + 0x3000, 0x3080, 0x3100, 0x3180, 0x3200, 0x3280, 0x3300, 0x3800, + 0xE000, 0xE080, 0xE100, 0xE180, 0xE200, 0xE280, 0xE300, 0xE380, + 0xE400, 0xE480, 0xE500, 0xE580, 0xE600, 0xE680, 0xE700, 0xE780, + 0xE800, 0xE880, 0xE900, 0xE980, 0xEA00, 0xEA80, 0xEB00, 0xEB80, + 0xEC00, 0xEC80, 0xED00, 0xED80, 0xEE00, 0xEE80, 0xEF00, 0xEF80, + 0xF000, 0xF080, 0xF100, 0xF180, 0xF200, 0xF280, 0xF300, 0xF380, + 0xF400, 0xF480, 0xF500, 0xF580, 0xF600, 0xF680, 0xF700, 0xF780, + 0xF800, 0xF880, 0xF900, 0xF980, 0xFA00, 0xFA80, 0xFB00, 0xFB80, + 0xFC00, 0xFC80, 0xFD00, 0xFD80, 0xFE00, 0xFE80, 0xFF00, 0xFF80, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x00C0, 0x0250, 0x0370, 0x0530, 0x3040, 0x30A0, 0xFF60 + }; + + if (!len) + return 0; + + memmove(&text[buflen - len], text, len); + from = (unsigned char*)&text[buflen - len]; + to = (unsigned char *)text; + + // ------------------------------- + + for (int i = 0; i < len;) { + + + if (i >= len) break; + c = from[i++]; + + if (c >= 0x80) + { + to = UTF8Output (c - 0x80 + slide[active], to); + } + else if (c >= 0x20 && c <= 0x7F) + { + to = UTF8Output (c, to); + } + else if (c == 0x0 || c == 0x9 || c == 0xA || c == 0xC || c == 0xD) + { + to = UTF8Output (c, to); + } + else if (c >= 0x1 && c <= 0x8) /* SQn */ + { + if (i >= len) break; + /* single quote */ d = from[i++]; + + to = UTF8Output (d < 0x80 ? d + start [c - 0x1] : + d - 0x80 + slide [c - 0x1], to); + } + else if (c >= 0x10 && c <= 0x17) /* SCn */ + { + /* change window */ active = c - 0x10; + } + else if (c >= 0x18 && c <= 0x1F) /* SDn */ + { + /* define window */ active = c - 0x18; + if (i >= len) break; + slide [active] = win [from[i++]]; + } + else if (c == 0xB) /* SDX */ + { + if (i >= len) break; + c = from[i++]; + + if (i >= len) break; + d = from[i++]; + + slide [active = c>>5] = 0x10000 + (((c & 0x1F) << 8 | d) << 7); + } + else if (c == 0xE) /* SQU */ + { + if (i >= len) break; + /* SQU */ c = from[i++]; + + if (i >= len) break; + to = UTF8Output (c << 8 | from[i++], to); + } + else if (c == 0xF) /* SCU */ + { + /* change to Unicode mode */ mode = 1; + + while (mode) + { + if (i >= len) break; + c = from[i++]; + + if (c <= 0xDF || c >= 0xF3) + { + if (i >= len) break; + to = UTF8Output (c << 8 | from[i++], to); + } + else if (c == 0xF0) /* UQU */ + { + if (i >= len) break; + c = from[i++]; + + if (i >= len) break; + to = UTF8Output (c << 8 | from[i++], to); + } + else if (c >= 0xE0 && c <= 0xE7) /* UCn */ + { + active = c - 0xE0; mode = 0; + } + else if (c >= 0xE8 && c <= 0xEF) /* UDn */ + { + if (i >= len) break; + slide [active=c-0xE8] = win [from[i++]]; mode = 0; + } + else if (c == 0xF1) /* UDX */ + { + if (i >= len) break; + c = from[i++]; + + if (i >= len) break; + d = from[i++]; + + slide [active = c>>5] = + 0x10000 + (((c & 0x1F) << 8 | d) << 7); mode = 0; + } + } + } + + + } + + *to++ = 0; + *to = 0; + return 0; +} + diff --git a/src/modules/filters/swbasicfilter.cpp b/src/modules/filters/swbasicfilter.cpp new file mode 100644 index 0000000..2865085 --- /dev/null +++ b/src/modules/filters/swbasicfilter.cpp @@ -0,0 +1,299 @@ +/****************************************************************************** + * swbasicfilter.h - definition of class SWBasicFilter. An SWFilter + * impl that provides some basic methods that + * many filters will need and can use as a starting + * point. + * + * $Id: swbasicfilter.cpp,v 1.17 2002/03/11 19:01:28 scribe Exp $ + * + * Copyright 2001 CrossWire Bible Society (http://www.crosswire.org) + * CrossWire Bible Society + * P. O. Box 2528 + * Tempe, AZ 85280-2528 + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + */ + +#include +#include +#include +#include +#include + +SWBasicFilter::SWBasicFilter() { + tokenStart = 0; + tokenEnd = 0; + escStart = 0; + escEnd = 0; + + setTokenStart("<"); + setTokenEnd(">"); + setEscapeStart("&"); + setEscapeEnd(";"); + + escStringCaseSensitive = false; + tokenCaseSensitive = false; + passThruUnknownToken = false; + passThruUnknownEsc = false; +} + + +void SWBasicFilter::setPassThruUnknownToken(bool val) { + passThruUnknownToken = val; +} + + +void SWBasicFilter::setPassThruUnknownEscapeString(bool val) { + passThruUnknownEsc = val; +} + + +void SWBasicFilter::setTokenCaseSensitive(bool val) { + tokenCaseSensitive = val; +} + + +void SWBasicFilter::setEscapeStringCaseSensitive(bool val) { + escStringCaseSensitive = val; +} + + +SWBasicFilter::~SWBasicFilter() { + if (tokenStart) + delete [] tokenStart; + + if (tokenEnd) + delete [] tokenEnd; + + if (escStart) + delete [] escStart; + + if (escEnd) + delete [] escEnd; +} + + +void SWBasicFilter::addTokenSubstitute(const char *findString, const char *replaceString) { + char *buf = 0; + + if (!tokenCaseSensitive) { + stdstr(&buf, findString); + toupperstr(buf); + tokenSubMap.insert(DualStringMap::value_type(buf, replaceString)); + delete [] buf; + } + else tokenSubMap.insert(DualStringMap::value_type(findString, replaceString)); +} + + +void SWBasicFilter::addEscapeStringSubstitute(const char *findString, const char *replaceString) { + char *buf = 0; + + if (!escStringCaseSensitive) { + stdstr(&buf, findString); + toupperstr(buf); + escSubMap.insert(DualStringMap::value_type(buf, replaceString)); + delete [] buf; + } + else escSubMap.insert(DualStringMap::value_type(findString, replaceString)); +} + + +void SWBasicFilter::pushString(char **buf, const char *format, ...) { + va_list argptr; + + va_start(argptr, format); + *buf += vsprintf(*buf, format, argptr); + va_end(argptr); + +// *buf += strlen(*buf); +} + + +bool SWBasicFilter::substituteToken(char **buf, const char *token) { + DualStringMap::iterator it; + + if (!tokenCaseSensitive) { + char *tmp = 0; + stdstr(&tmp, token); + toupperstr(tmp); + it = tokenSubMap.find(tmp); + delete [] tmp; + } else + it = tokenSubMap.find(token); + + if (it != tokenSubMap.end()) { + pushString(buf, it->second.c_str()); + return true; + } + return false; +} + + +bool SWBasicFilter::substituteEscapeString(char **buf, const char *escString) { + DualStringMap::iterator it; + + if (!escStringCaseSensitive) { + char *tmp = 0; + stdstr(&tmp, escString); + toupperstr(tmp); + it = escSubMap.find(tmp); + delete [] tmp; + } else + it = escSubMap.find(escString); + + if (it != escSubMap.end()) { + pushString(buf, it->second.c_str()); + return true; + } + return false; +} + + +bool SWBasicFilter::handleToken(char **buf, const char *token, DualStringMap &userData) { + return substituteToken(buf, token); +} + + +bool SWBasicFilter::handleEscapeString(char **buf, const char *escString, DualStringMap &userData) { + return substituteEscapeString(buf, escString); +} + + +void SWBasicFilter::setEscapeStart(const char *escStart) { + stdstr(&(this->escStart), escStart); +} + + +void SWBasicFilter::setEscapeEnd(const char *escEnd) { + stdstr(&(this->escEnd), escEnd); +} + + +void SWBasicFilter::setTokenStart(const char *tokenStart) { + stdstr(&(this->tokenStart), tokenStart); +} + + +void SWBasicFilter::setTokenEnd(const char *tokenEnd) { + stdstr(&(this->tokenEnd), tokenEnd); +} + + +char SWBasicFilter::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) { + this->key = key; + this->module = module; + char *to, *from, token[4096]; + int tokpos = 0; + bool intoken = false; + int len; + bool inEsc = false; + char escStartLen = strlen(escStart); + char escEndLen = strlen(escEnd); + char escStartPos = 0, escEndPos = 0; + char tokenStartLen = strlen(tokenStart); + char tokenEndLen = strlen(tokenEnd); + char tokenStartPos = 0, tokenEndPos = 0; + DualStringMap userData; + string lastTextNode; + + bool suspendTextPassThru = false; + userData["suspendTextPassThru"] = "false"; + + len = strlen(text) + 1; // shift string to right of buffer + if (len < maxlen) { + memmove(&text[maxlen - len], text, len); + from = &text[maxlen - len]; + } + else from = text; // ------------------------------- + + + for (to = text; *from; from++) { + if (*from == tokenStart[tokenStartPos]) { + if (tokenStartPos == (tokenStartLen - 1)) { + intoken = true; + tokpos = 0; + token[0] = 0; + token[1] = 0; + token[2] = 0; + inEsc = false; + } + else tokenStartPos++; + continue; + } + + if (*from == escStart[escStartPos]) { + if (escStartPos == (escStartLen - 1)) { + intoken = true; + tokpos = 0; + token[0] = 0; + token[1] = 0; + token[2] = 0; + inEsc = true; + } + else escStartPos++; + continue; + } + + if (inEsc) { + if (*from == escEnd[escEndPos]) { + if (escEndPos == (escEndLen - 1)) { + intoken = false; + userData["lastTextNode"] = lastTextNode; + if ((!handleEscapeString(&to, token, userData)) && (passThruUnknownEsc)) { + pushString(&to, escStart); + pushString(&to, token); + pushString(&to, escEnd); + } + escEndPos = escStartPos = tokenEndPos = tokenStartPos = 0; + lastTextNode = ""; + suspendTextPassThru = (!userData["suspendTextPassThru"].compare("true")); + continue; + } + } + } + + if (!inEsc) { + if (*from == tokenEnd[tokenEndPos]) { + if (tokenEndPos == (tokenEndLen - 1)) { + intoken = false; + userData["lastTextNode"] = lastTextNode; + if ((!handleToken(&to, token, userData)) && (passThruUnknownToken)) { + pushString(&to, tokenStart); + pushString(&to, token); + pushString(&to, tokenEnd); + } + escEndPos = escStartPos = tokenEndPos = tokenStartPos = 0; + lastTextNode = ""; + suspendTextPassThru = (!userData["suspendTextPassThru"].compare("true")); + continue; + } + } + } + + if (intoken) { + if (tokpos < 4090) + token[tokpos++] = *from; + token[tokpos+2] = 0; + } + else { + if (!suspendTextPassThru) + *to++ = *from; + lastTextNode += *from; + } + } + *to++ = 0; + *to = 0; + return 0; +} + + + diff --git a/src/modules/filters/thmlfootnotes.cpp b/src/modules/filters/thmlfootnotes.cpp new file mode 100644 index 0000000..d9b1f0e --- /dev/null +++ b/src/modules/filters/thmlfootnotes.cpp @@ -0,0 +1,103 @@ +/****************************************************************************** + * + * thmlfootnotes - SWFilter decendant to hide or show footnotes + * in a ThML module. + */ + + +#include +#include +#include +#ifndef __GNUC__ +#else +#include +#endif + + +const char ThMLFootnotes::on[] = "On"; +const char ThMLFootnotes::off[] = "Off"; +const char ThMLFootnotes::optName[] = "Footnotes"; +const char ThMLFootnotes::optTip[] = "Toggles Footnotes On and Off if they exist"; + + +ThMLFootnotes::ThMLFootnotes() { + option = false; + options.push_back(on); + options.push_back(off); +} + + +ThMLFootnotes::~ThMLFootnotes() { +} + +void ThMLFootnotes::setOptionValue(const char *ival) +{ + option = (!stricmp(ival, on)); +} + +const char *ThMLFootnotes::getOptionValue() +{ + return (option) ? on:off; +} + +char ThMLFootnotes::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) +{ + if (!option) { // if we don't want footnotes + char *to, *from, token[2048]; // cheese. Fix. + int tokpos = 0; + bool intoken = false; + int len; + bool hide = false; + + len = strlen(text) + 1; // shift string to right of buffer + if (len < maxlen) { + memmove(&text[maxlen - len], text, len); + from = &text[maxlen - len]; + } + else from = text; // ------------------------------- + + for (to = text; *from; from++) { + if (*from == '<') { + intoken = true; + tokpos = 0; + token[0] = 0; + token[1] = 0; + token[2] = 0; + continue; + } + if (*from == '>') { // process tokens + intoken = false; + if (!strncmp(token, "note", 4)) { + hide = true; + continue; + } + else if (!strncmp(token, "/note", 5)) { + hide = false; + continue; + } + + // if not a footnote token, keep token in text + if (!hide) { + *to++ = '<'; + for (char *tok = token; *tok; tok++) + *to++ = *tok; + *to++ = '>'; + } + continue; + } + if (intoken) { + if (tokpos < 2045) + token[tokpos++] = *from; + token[tokpos+2] = 0; + } + else { + if (!hide) { + *to++ = *from; + } + } + } + *to++ = 0; + *to = 0; + } + return 0; +} diff --git a/src/modules/filters/thmlgbf.cpp b/src/modules/filters/thmlgbf.cpp new file mode 100644 index 0000000..66d9a20 --- /dev/null +++ b/src/modules/filters/thmlgbf.cpp @@ -0,0 +1,330 @@ +/*************************************************************************** + thmlgbf.cpp - ThML to GBF filter + ------------------- + begin : 1999-10-28 + copyright : 2001 by CrossWire Bible Society + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + ***************************************************************************/ + +#include +#include +#include + + +ThMLGBF::ThMLGBF() +{ +} + + +char ThMLGBF::ProcessText(char *text, int maxlen) +{ + char *to, *from, token[2048]; + int tokpos = 0; + bool intoken = false; + int len; + bool ampersand = false; + bool sechead = false; + bool title = false; + + len = strlen(text) + 1; // shift string to right of buffer + if (len < maxlen) { + memmove(&text[maxlen - len], text, len); + from = &text[maxlen - len]; + } + else from = text; // ------------------------------- + for (to = text; *from; from++) { + if (*from == '<') { + intoken = true; + tokpos = 0; + token[0] = 0; + token[1] = 0; + token[2] = 0; + ampersand = false; + continue; + } + else if (*from == '&') { + intoken = true; + tokpos = 0; + memset(token, 0, 2048); + ampersand = true; + continue; + } + if (*from == ';' && ampersand) { + intoken = false; + + if (!strncmp("nbsp", token, 4)) *to++ = ' '; + else if (!strncmp("quot", token, 4)) *to++ = '"'; + else if (!strncmp("amp", token, 3)) *to++ = '&'; + else if (!strncmp("lt", token, 2)) *to++ = '<'; + else if (!strncmp("gt", token, 2)) *to++ = '>'; + else if (!strncmp("brvbar", token, 6)) *to++ = '|'; + else if (!strncmp("sect", token, 4)) *to++ = '§'; + else if (!strncmp("copy", token, 4)) *to++ = '©'; + else if (!strncmp("laquo", token, 5)) *to++ = '«'; + else if (!strncmp("reg", token, 3)) *to++ = '®'; + else if (!strncmp("acute", token, 5)) *to++ = '´'; + else if (!strncmp("para", token, 4)) *to++ = '¶'; + else if (!strncmp("raquo", token, 5)) *to++ = '»'; + + else if (!strncmp("Aacute", token, 6)) *to++ = 'Á'; + else if (!strncmp("Agrave", token, 6)) *to++ = 'À'; + else if (!strncmp("Acirc", token, 5)) *to++ = 'Â'; + else if (!strncmp("Auml", token, 4)) *to++ = 'Ä'; + else if (!strncmp("Atilde", token, 6)) *to++ = 'Ã'; + else if (!strncmp("Aring", token, 5)) *to++ = 'Å'; + else if (!strncmp("aacute", token, 6)) *to++ = 'á'; + else if (!strncmp("agrave", token, 6)) *to++ = 'à'; + else if (!strncmp("acirc", token, 5)) *to++ = 'â'; + else if (!strncmp("auml", token, 4)) *to++ = 'ä'; + else if (!strncmp("atilde", token, 6)) *to++ = 'ã'; + else if (!strncmp("aring", token, 5)) *to++ = 'å'; + else if (!strncmp("Eacute", token, 6)) *to++ = 'É'; + else if (!strncmp("Egrave", token, 6)) *to++ = 'È'; + else if (!strncmp("Ecirc", token, 5)) *to++ = 'Ê'; + else if (!strncmp("Euml", token, 4)) *to++ = 'Ë'; + else if (!strncmp("eacute", token, 6)) *to++ = 'é'; + else if (!strncmp("egrave", token, 6)) *to++ = 'è'; + else if (!strncmp("ecirc", token, 5)) *to++ = 'ê'; + else if (!strncmp("euml", token, 4)) *to++ = 'ë'; + else if (!strncmp("Iacute", token, 6)) *to++ = 'Í'; + else if (!strncmp("Igrave", token, 6)) *to++ = 'Ì'; + else if (!strncmp("Icirc", token, 5)) *to++ = 'Î'; + else if (!strncmp("Iuml", token, 4)) *to++ = 'Ï'; + else if (!strncmp("iacute", token, 6)) *to++ = 'í'; + else if (!strncmp("igrave", token, 6)) *to++ = 'ì'; + else if (!strncmp("icirc", token, 5)) *to++ = 'î'; + else if (!strncmp("iuml", token, 4)) *to++ = 'ï'; + else if (!strncmp("Oacute", token, 6)) *to++ = 'Ó'; + else if (!strncmp("Ograve", token, 6)) *to++ = 'Ò'; + else if (!strncmp("Ocirc", token, 5)) *to++ = 'Ô'; + else if (!strncmp("Ouml", token, 4)) *to++ = 'Ö'; + else if (!strncmp("Otilde", token, 6)) *to++ = 'Õ'; + else if (!strncmp("oacute", token, 6)) *to++ = 'ó'; + else if (!strncmp("ograve", token, 6)) *to++ = 'ò'; + else if (!strncmp("ocirc", token, 5)) *to++ = 'ô'; + else if (!strncmp("ouml", token, 4)) *to++ = 'ö'; + else if (!strncmp("otilde", token, 6)) *to++ = 'õ'; + else if (!strncmp("Uacute", token, 6)) *to++ = 'Ú'; + else if (!strncmp("Ugrave", token, 6)) *to++ = 'Ù'; + else if (!strncmp("Ucirc", token, 5)) *to++ = 'Û'; + else if (!strncmp("Uuml", token, 4)) *to++ = 'Ü'; + else if (!strncmp("uacute", token, 6)) *to++ = 'ú'; + else if (!strncmp("ugrave", token, 6)) *to++ = 'ù'; + else if (!strncmp("ucirc", token, 5)) *to++ = 'û'; + else if (!strncmp("uuml", token, 4)) *to++ = 'ü'; + else if (!strncmp("Yacute", token, 6)) *to++ = 'Ý'; + else if (!strncmp("yacute", token, 6)) *to++ = 'ý'; + else if (!strncmp("yuml", token, 4)) *to++ = 'ÿ'; + + else if (!strncmp("deg", token, 3)) *to++ = '°'; + else if (!strncmp("plusmn", token, 6)) *to++ = '±'; + else if (!strncmp("sup2", token, 4)) *to++ = '²'; + else if (!strncmp("sup3", token, 4)) *to++ = '³'; + else if (!strncmp("sup1", token, 4)) *to++ = '¹'; + else if (!strncmp("nbsp", token, 4)) *to++ = 'º'; + else if (!strncmp("pound", token, 5)) *to++ = '£'; + else if (!strncmp("cent", token, 4)) *to++ = '¢'; + else if (!strncmp("frac14", token, 6)) *to++ = '¼'; + else if (!strncmp("frac12", token, 6)) *to++ = '½'; + else if (!strncmp("frac34", token, 6)) *to++ = '¾'; + else if (!strncmp("iquest", token, 6)) *to++ = '¿'; + else if (!strncmp("iexcl", token, 5)) *to++ = '¡'; + else if (!strncmp("ETH", token, 3)) *to++ = 'Ð'; + else if (!strncmp("eth", token, 3)) *to++ = 'ð'; + else if (!strncmp("THORN", token, 5)) *to++ = 'Þ'; + else if (!strncmp("thorn", token, 5)) *to++ = 'þ'; + else if (!strncmp("AElig", token, 5)) *to++ = 'Æ'; + else if (!strncmp("aelig", token, 5)) *to++ = 'æ'; + else if (!strncmp("Oslash", token, 6)) *to++ = 'Ø'; + else if (!strncmp("curren", token, 6)) *to++ = '¤'; + else if (!strncmp("Ccedil", token, 6)) *to++ = 'Ç'; + else if (!strncmp("ccedil", token, 6)) *to++ = 'ç'; + else if (!strncmp("szlig", token, 5)) *to++ = 'ß'; + else if (!strncmp("Ntilde", token, 6)) *to++ = 'Ñ'; + else if (!strncmp("ntilde", token, 6)) *to++ = 'ñ'; + else if (!strncmp("yen", token, 3)) *to++ = '¥'; + else if (!strncmp("not", token, 3)) *to++ = '¬'; + else if (!strncmp("ordf", token, 4)) *to++ = 'ª'; + else if (!strncmp("uml", token, 3)) *to++ = '¨'; + else if (!strncmp("shy", token, 3)) *to++ = '­'; + else if (!strncmp("macr", token, 4)) *to++ = '¯'; + continue; + + } + else if (*from == '>' && !ampersand) { + intoken = false; + // process desired tokens + if (!strncmp(token, "sync type=\"Strongs\" value=\"", 27)) { + *to++ = '<'; + *to++ = 'W'; + for (unsigned int i = 27; token[i] != '\"'; i++) + *to++ = token[i]; + *to++ = '>'; + continue; + } + if (!strncmp(token, "sync type=\"morph\" value=\"", 25)) { + *to++ = '<'; + *to++ = 'W'; + *to++ = 'T'; + for (unsigned int i = 25; token[i] != '\"'; i++) + *to++ = token[i]; + *to++ = '>'; + continue; + } + else if (!strncmp(token, "scripRef", 8)) { + *to++ = '<'; + *to++ = 'R'; + *to++ = 'X'; + *to++ = '>'; + continue; + } + else if (!strncmp(token, "/scripRef", 9)) { + *to++ = '<'; + *to++ = 'R'; + *to++ = 'x'; + *to++ = '>'; + continue; + } + else if (!strncmp(token, "note", 4)) { + *to++ = '<'; + *to++ = 'R'; + *to++ = 'F'; + *to++ = '>'; + continue; + } + else if (!strncmp(token, "/note", 5)) { + *to++ = '<'; + *to++ = 'R'; + *to++ = 'f'; + *to++ = '>'; + continue; + } + else if (!strncmp(token, "sup", 3)) { + *to++ = '<'; + *to++ = 'F'; + *to++ = 'S'; + *to++ = '>'; + } + else if (!strncmp(token, "/sup", 4)) { + *to++ = '<'; + *to++ = 'F'; + *to++ = 's'; + *to++ = '>'; + } + else if (!strnicmp(token, "font color=#ff0000", 18)) { + *to++ = '<'; + *to++ = 'F'; + *to++ = 'R'; + *to++ = '>'; + continue; + } + else if (!strnicmp(token, "/font", 5)) { + *to++ = '<'; + *to++ = 'F'; + *to++ = 'r'; + *to++ = '>'; + continue; + } + else if (!strncmp(token, "div class=\"sechead\"", 19)) { + *to++ = '<'; + *to++ = 'T'; + *to++ = 'S'; + *to++ = '>'; + sechead = true; + continue; + } + else if (sechead && !strncmp(token, "/div", 19)) { + *to++ = '<'; + *to++ = 'T'; + *to++ = 's'; + *to++ = '>'; + sechead = false; + continue; + } + else if (!strncmp(token, "div class=\"title\"", 19)) { + *to++ = '<'; + *to++ = 'T'; + *to++ = 'T'; + *to++ = '>'; + title = true; + continue; + } + else if (title && !strncmp(token, "/div", 19)) { + *to++ = '<'; + *to++ = 'T'; + *to++ = 't'; + *to++ = '>'; + title = false; + continue; + } + else if (!strnicmp(token, "br", 2)) { + *to++ = '<'; + *to++ = 'C'; + *to++ = 'L'; + *to++ = '>'; + continue; + } + else switch(*token) { + case 'I': // font tags + case 'i': + *to++ = '<'; + *to++ = 'F'; + *to++ = 'I'; + *to++ = '>'; + continue; + case 'B': // bold start + case 'b': + *to++ = '<'; + *to++ = 'F'; + *to++ = 'B'; + *to++ = '>'; + continue; + case '/': + switch(token[1]) { + case 'P': + case 'p': + *to++ = '<'; + *to++ = 'C'; + *to++ = 'M'; + *to++ = '>'; + continue; + case 'I': + case 'i': // italic end + *to++ = '<'; + *to++ = 'F'; + *to++ = 'i'; + *to++ = '>'; + continue; + case 'B': // bold start + case 'b': + *to++ = '<'; + *to++ = 'F'; + *to++ = 'b'; + *to++ = '>'; + continue; + } + } + continue; + } + if (intoken) { + if (tokpos < 2045) + token[tokpos++] = *from; + token[tokpos+2] = 0; + } + else *to++ = *from; + } + *to++ = 0; + *to = 0; + return 0; +} + + + diff --git a/src/modules/filters/thmlheadings.cpp b/src/modules/filters/thmlheadings.cpp new file mode 100644 index 0000000..00b8a23 --- /dev/null +++ b/src/modules/filters/thmlheadings.cpp @@ -0,0 +1,107 @@ +/****************************************************************************** + * + * thmlheadings - SWFilter decendant to hide or show headings + * in a ThML module. + */ + + +#include +#include +#include +#ifndef __GNUC__ +#else +#include +#endif + + +const char ThMLHeadings::on[] = "On"; +const char ThMLHeadings::off[] = "Off"; +const char ThMLHeadings::optName[] = "Headings"; +const char ThMLHeadings::optTip[] = "Toggles Headings On and Off if they exist"; + + +ThMLHeadings::ThMLHeadings() { + option = false; + options.push_back(on); + options.push_back(off); +} + + +ThMLHeadings::~ThMLHeadings() { +} + +void ThMLHeadings::setOptionValue(const char *ival) +{ + option = (!stricmp(ival, on)); +} + +const char *ThMLHeadings::getOptionValue() +{ + return (option) ? on:off; +} + +char ThMLHeadings::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) +{ + if (!option) { // if we don't want headings + char *to, *from, token[2048]; // cheese. Fix. + int tokpos = 0; + bool intoken = false; + int len; + bool hide = false; + + len = strlen(text) + 1; // shift string to right of buffer + if (len < maxlen) { + memmove(&text[maxlen - len], text, len); + from = &text[maxlen - len]; + } + else from = text; // ------------------------------- + + for (to = text; *from; from++) { + if (*from == '<') { + intoken = true; + tokpos = 0; + token[0] = 0; + token[1] = 0; + token[2] = 0; + continue; + } + if (*from == '>') { // process tokens + intoken = false; + if (!strnicmp(token, "div class=\"sechead\"", 19)) { + hide = true; + continue; + } + if (!strnicmp(token, "div class=\"title\"", 17)) { + hide = true; + continue; + } + else if (hide && !strnicmp(token, "/div", 4)) { + hide = false; + continue; + } + + // if not a heading token, keep token in text + if (!hide) { + *to++ = '<'; + for (char *tok = token; *tok; tok++) + *to++ = *tok; + *to++ = '>'; + } + continue; + } + if (intoken) { + if (tokpos < 2045) + token[tokpos++] = *from; + token[tokpos+2] = 0; + } + else { + if (!hide) { + *to++ = *from; + } + } + } + *to++ = 0; + *to = 0; + } + return 0; +} diff --git a/src/modules/filters/thmlhtml.cpp b/src/modules/filters/thmlhtml.cpp new file mode 100644 index 0000000..9cb8679 --- /dev/null +++ b/src/modules/filters/thmlhtml.cpp @@ -0,0 +1,211 @@ +/*************************************************************************** + thmlhtml.cpp - ThML to HTML filter + ------------------- + begin : 1999-10-27 + copyright : 2001 by CrossWire Bible Society + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + ***************************************************************************/ + +#include +#include +#include +#include + + +ThMLHTML::ThMLHTML() { + setTokenStart("<"); + setTokenEnd(">"); +/* + setEscapeStart("&"); + setEscapeEnd(";"); + + setEscapeStringCaseSensitive(true); + + addEscapeStringSubstitute("nbsp", " "); + addEscapeStringSubstitute("quot", "\""); + addEscapeStringSubstitute("amp", "&"); + addEscapeStringSubstitute("lt", "<"); + addEscapeStringSubstitute("gt", ">"); + addEscapeStringSubstitute("brvbar", "|"); + addEscapeStringSubstitute("sect", "§"); + addEscapeStringSubstitute("copy", "©"); + addEscapeStringSubstitute("laquo", "«"); + addEscapeStringSubstitute("reg", "®"); + addEscapeStringSubstitute("acute", "´"); + addEscapeStringSubstitute("para", "¶"); + addEscapeStringSubstitute("raquo", "»"); + + addEscapeStringSubstitute("Aacute", "Á"); + addEscapeStringSubstitute("Agrave", "À"); + addEscapeStringSubstitute("Acirc", "Â"); + addEscapeStringSubstitute("Auml", "Ä"); + addEscapeStringSubstitute("Atilde", "Ã"); + addEscapeStringSubstitute("Aring", "Å"); + addEscapeStringSubstitute("aacute", "á"); + addEscapeStringSubstitute("agrave", "à"); + addEscapeStringSubstitute("acirc", "â"); + addEscapeStringSubstitute("auml", "ä"); + addEscapeStringSubstitute("atilde", "ã"); + addEscapeStringSubstitute("aring", "å"); + addEscapeStringSubstitute("Eacute", "É"); + addEscapeStringSubstitute("Egrave", "È"); + addEscapeStringSubstitute("Ecirc", "Ê"); + addEscapeStringSubstitute("Euml", "Ë"); + addEscapeStringSubstitute("eacute", "é"); + addEscapeStringSubstitute("egrave", "è"); + addEscapeStringSubstitute("ecirc", "ê"); + addEscapeStringSubstitute("euml", "ë"); + addEscapeStringSubstitute("Iacute", "Í"); + addEscapeStringSubstitute("Igrave", "Ì"); + addEscapeStringSubstitute("Icirc", "Î"); + addEscapeStringSubstitute("Iuml", "Ï"); + addEscapeStringSubstitute("iacute", "í"); + addEscapeStringSubstitute("igrave", "ì"); + addEscapeStringSubstitute("icirc", "î"); + addEscapeStringSubstitute("iuml", "ï"); + addEscapeStringSubstitute("Oacute", "Ó"); + addEscapeStringSubstitute("Ograve", "Ò"); + addEscapeStringSubstitute("Ocirc", "Ô"); + addEscapeStringSubstitute("Ouml", "Ö"); + addEscapeStringSubstitute("Otilde", "Õ"); + addEscapeStringSubstitute("oacute", "ó"); + addEscapeStringSubstitute("ograve", "ò"); + addEscapeStringSubstitute("ocirc", "ô"); + addEscapeStringSubstitute("ouml", "ö"); + addEscapeStringSubstitute("otilde", "õ"); + addEscapeStringSubstitute("Uacute", "Ú"); + addEscapeStringSubstitute("Ugrave", "Ù"); + addEscapeStringSubstitute("Ucirc", "Û"); + addEscapeStringSubstitute("Uuml", "Ü"); + addEscapeStringSubstitute("uacute", "ú"); + addEscapeStringSubstitute("ugrave", "ù"); + addEscapeStringSubstitute("ucirc", "û"); + addEscapeStringSubstitute("uuml", "ü"); + addEscapeStringSubstitute("Yacute", "Ý"); + addEscapeStringSubstitute("yacute", "ý"); + addEscapeStringSubstitute("yuml", "ÿ"); + + addEscapeStringSubstitute("deg", "°"); + addEscapeStringSubstitute("plusmn", "±"); + addEscapeStringSubstitute("sup2", "²"); + addEscapeStringSubstitute("sup3", "³"); + addEscapeStringSubstitute("sup1", "¹"); + addEscapeStringSubstitute("nbsp", "º"); + addEscapeStringSubstitute("pound", "£"); + addEscapeStringSubstitute("cent", "¢"); + addEscapeStringSubstitute("frac14", "¼"); + addEscapeStringSubstitute("frac12", "½"); + addEscapeStringSubstitute("frac34", "¾"); + addEscapeStringSubstitute("iquest", "¿"); + addEscapeStringSubstitute("iexcl", "¡"); + addEscapeStringSubstitute("ETH", "Ð"); + addEscapeStringSubstitute("eth", "ð"); + addEscapeStringSubstitute("THORN", "Þ"); + addEscapeStringSubstitute("thorn", "þ"); + addEscapeStringSubstitute("AElig", "Æ"); + addEscapeStringSubstitute("aelig", "æ"); + addEscapeStringSubstitute("Oslash", "Ø"); + addEscapeStringSubstitute("curren", "¤"); + addEscapeStringSubstitute("Ccedil", "Ç"); + addEscapeStringSubstitute("ccedil", "ç"); + addEscapeStringSubstitute("szlig", "ß"); + addEscapeStringSubstitute("Ntilde", "Ñ"); + addEscapeStringSubstitute("ntilde", "ñ"); + addEscapeStringSubstitute("yen", "¥"); + addEscapeStringSubstitute("not", "¬"); + addEscapeStringSubstitute("ordf", "ª"); + addEscapeStringSubstitute("uml", "¨"); + addEscapeStringSubstitute("shy", "­"); + addEscapeStringSubstitute("macr", "¯"); +*/ + setTokenCaseSensitive(true); + + addTokenSubstitute("/scripRef", " "); + addTokenSubstitute("note", " ("); + addTokenSubstitute("/note", ") "); +} + + +bool ThMLHTML::handleToken(char **buf, const char *token, DualStringMap &userData) { + if (!substituteToken(buf, token)) { + // manually process if it wasn't a simple substitution + if (!strncmp(token, "sync type=\"Strongs\" value=\"", 27)) { + if (token[27] == 'H' || token[27] == 'G' || token[27] == 'A') { + pushString(buf, ""); + for (const char *tok = token + 5; *tok; tok++) + if(*tok != '\"') + *(*buf)++ = *tok; + pushString(buf, ""); + } + else if (token[27] == 'T') { + pushString(buf, ""); + for (unsigned int i = 29; token[i] != '\"'; i++) + *(*buf)++ = token[i]; + pushString(buf, ""); + } + } + else if (!strncmp(token, "sync type=\"morph\" value=\"", 25)) { + pushString(buf, ""); + for (unsigned int i = 25; token[i] != '\"'; i++) + *(*buf)++ = token[i]; + pushString(buf, ""); + } + else if (!strncmp(token, "sync type=\"lemma\" value=\"", 25)) { + pushString(buf, "("); + for (unsigned int i = 25; token[i] != '\"'; i++) + *(*buf)++ = token[i]; + pushString(buf, ")"); + } + else if (!strncmp(token, "scripRef", 8)) { + pushString(buf, "getConfigEntry("AbsoluteDataPath")); + if (*((*buf)-1) == '/') + c++; // skip '/' + } + continue; + } + *(*buf)++ = *c; + } + *(*buf)++ = '>'; + } + else if(!strncmp(token, "note", 4)) { + pushString(buf, " ("); + } + + else { + return false; // we still didn't handle token + } + } + return true; +} + diff --git a/src/modules/filters/thmlhtmlhref.cpp b/src/modules/filters/thmlhtmlhref.cpp new file mode 100644 index 0000000..ce7e3fd --- /dev/null +++ b/src/modules/filters/thmlhtmlhref.cpp @@ -0,0 +1,269 @@ +/*************************************************************************** + thmlhtmlhref.cpp - ThML to HTML filter with hrefs + ------------------- + begin : 2001-09-03 + copyright : 2001 by CrossWire Bible Society + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + ***************************************************************************/ + +#include +#include +#include +#include + + +ThMLHTMLHREF::ThMLHTMLHREF() { + setTokenStart("<"); + setTokenEnd(">"); +/* + setEscapeStart("&"); + setEscapeEnd(";"); + + setEscapeStringCaseSensitive(true); + + addEscapeStringSubstitute("nbsp", " "); + addEscapeStringSubstitute("quot", "\""); + addEscapeStringSubstitute("amp", "&"); + addEscapeStringSubstitute("lt", "<"); + addEscapeStringSubstitute("gt", ">"); + addEscapeStringSubstitute("brvbar", "|"); + addEscapeStringSubstitute("sect", "§"); + addEscapeStringSubstitute("copy", "©"); + addEscapeStringSubstitute("laquo", "«"); + addEscapeStringSubstitute("reg", "®"); + addEscapeStringSubstitute("acute", "´"); + addEscapeStringSubstitute("para", "¶"); + addEscapeStringSubstitute("raquo", "»"); + + addEscapeStringSubstitute("Aacute", "Á"); + addEscapeStringSubstitute("Agrave", "À"); + addEscapeStringSubstitute("Acirc", "Â"); + addEscapeStringSubstitute("Auml", "Ä"); + addEscapeStringSubstitute("Atilde", "Ã"); + addEscapeStringSubstitute("Aring", "Å"); + addEscapeStringSubstitute("aacute", "á"); + addEscapeStringSubstitute("agrave", "à"); + addEscapeStringSubstitute("acirc", "â"); + addEscapeStringSubstitute("auml", "ä"); + addEscapeStringSubstitute("atilde", "ã"); + addEscapeStringSubstitute("aring", "å"); + addEscapeStringSubstitute("Eacute", "É"); + addEscapeStringSubstitute("Egrave", "È"); + addEscapeStringSubstitute("Ecirc", "Ê"); + addEscapeStringSubstitute("Euml", "Ë"); + addEscapeStringSubstitute("eacute", "é"); + addEscapeStringSubstitute("egrave", "è"); + addEscapeStringSubstitute("ecirc", "ê"); + addEscapeStringSubstitute("euml", "ë"); + addEscapeStringSubstitute("Iacute", "Í"); + addEscapeStringSubstitute("Igrave", "Ì"); + addEscapeStringSubstitute("Icirc", "Î"); + addEscapeStringSubstitute("Iuml", "Ï"); + addEscapeStringSubstitute("iacute", "í"); + addEscapeStringSubstitute("igrave", "ì"); + addEscapeStringSubstitute("icirc", "î"); + addEscapeStringSubstitute("iuml", "ï"); + addEscapeStringSubstitute("Oacute", "Ó"); + addEscapeStringSubstitute("Ograve", "Ò"); + addEscapeStringSubstitute("Ocirc", "Ô"); + addEscapeStringSubstitute("Ouml", "Ö"); + addEscapeStringSubstitute("Otilde", "Õ"); + addEscapeStringSubstitute("oacute", "ó"); + addEscapeStringSubstitute("ograve", "ò"); + addEscapeStringSubstitute("ocirc", "ô"); + addEscapeStringSubstitute("ouml", "ö"); + addEscapeStringSubstitute("otilde", "õ"); + addEscapeStringSubstitute("Uacute", "Ú"); + addEscapeStringSubstitute("Ugrave", "Ù"); + addEscapeStringSubstitute("Ucirc", "Û"); + addEscapeStringSubstitute("Uuml", "Ü"); + addEscapeStringSubstitute("uacute", "ú"); + addEscapeStringSubstitute("ugrave", "ù"); + addEscapeStringSubstitute("ucirc", "û"); + addEscapeStringSubstitute("uuml", "ü"); + addEscapeStringSubstitute("Yacute", "Ý"); + addEscapeStringSubstitute("yacute", "ý"); + addEscapeStringSubstitute("yuml", "ÿ"); + + addEscapeStringSubstitute("deg", "°"); + addEscapeStringSubstitute("plusmn", "±"); + addEscapeStringSubstitute("sup2", "²"); + addEscapeStringSubstitute("sup3", "³"); + addEscapeStringSubstitute("sup1", "¹"); + addEscapeStringSubstitute("nbsp", "º"); + addEscapeStringSubstitute("pound", "£"); + addEscapeStringSubstitute("cent", "¢"); + addEscapeStringSubstitute("frac14", "¼"); + addEscapeStringSubstitute("frac12", "½"); + addEscapeStringSubstitute("frac34", "¾"); + addEscapeStringSubstitute("iquest", "¿"); + addEscapeStringSubstitute("iexcl", "¡"); + addEscapeStringSubstitute("ETH", "Ð"); + addEscapeStringSubstitute("eth", "ð"); + addEscapeStringSubstitute("THORN", "Þ"); + addEscapeStringSubstitute("thorn", "þ"); + addEscapeStringSubstitute("AElig", "Æ"); + addEscapeStringSubstitute("aelig", "æ"); + addEscapeStringSubstitute("Oslash", "Ø"); + addEscapeStringSubstitute("curren", "¤"); + addEscapeStringSubstitute("Ccedil", "Ç"); + addEscapeStringSubstitute("ccedil", "ç"); + addEscapeStringSubstitute("szlig", "ß"); + addEscapeStringSubstitute("Ntilde", "Ñ"); + addEscapeStringSubstitute("ntilde", "ñ"); + addEscapeStringSubstitute("yen", "¥"); + addEscapeStringSubstitute("not", "¬"); + addEscapeStringSubstitute("ordf", "ª"); + addEscapeStringSubstitute("uml", "¨"); + addEscapeStringSubstitute("shy", "­"); + addEscapeStringSubstitute("macr", "¯"); +*/ + setTokenCaseSensitive(true); + + addTokenSubstitute("note", " ("); + addTokenSubstitute("/note", ") "); + addTokenSubstitute("/scripture", " "); +} + + +bool ThMLHTMLHREF::handleToken(char **buf, const char *token, DualStringMap &userData) { + const char *tok; + if (!substituteToken(buf, token)) { + // manually process if it wasn't a simple substitution + if (!strncmp(token, "sync ", 5)) { + pushString(buf, ""); + } + + else if (!strncmp(token, "scripture ", 10)) { + userData["inscriptRef"] = "true"; + pushString(buf, ""); + } + + else if (!strncmp(token, "scripRef p", 10) || !strncmp(token, "scripRef v", 10)) { + userData["inscriptRef"] = "true"; + pushString(buf, "John 3:16" + else if (!strcmp(token, "scripRef")) { + userData["inscriptRef"] = "false"; + // let's stop text from going to output + userData["suspendTextPassThru"] = "true"; + } + + // we've ended a scripRef + else if (!strcmp(token, "/scripRef")) { + if (userData["inscriptRef"] == "true") { // like "John 3:16" + userData["inscriptRef"] = "false"; + pushString(buf, ""); + } + + else { // like "John 3:16" + pushString(buf, ""); + } + } + + else if (!strncmp(token, "div class=\"sechead\"", 19)) { + userData["SecHead"] = "true"; + pushString(buf, "
"); + } + else if (!strncmp(token, "div class=\"title\"", 19)) { + userData["SecHead"] = "true"; + pushString(buf, "
"); + } + else if (!strncmp(token, "/div", 4)) { + if (userData["SecHead"] == "true") { + pushString(buf, "
"); + userData["SecHead"] = "false"; + } + } + + else if (!strncmp(token, "sync type=\"Strongs\" value=\"T", 28)) { + pushString(buf, "
"); + } + else if (!strncmp(token, "img ", 4)) { + const char *src = strstr(token, "src"); + if (!src) // assert we have a src attribute + return false; + + *(*buf)++ = '<'; + for (const char *c = token; *c; c++) { + if (c == src) { + for (;((*c) && (*c != '"')); c++) + *(*buf)++ = *c; + + if (!*c) { c--; continue; } + + *(*buf)++ = '"'; + if (*(c+1) == '/') { + pushString(buf, "file:"); + pushString(buf, module->getConfigEntry("AbsoluteDataPath")); + if (*((*buf)-1) == '/') + c++; // skip '/' + } + continue; + } + *(*buf)++ = *c; + } + *(*buf)++ = '>'; + } + else if (!strncmp(token, "note", 4)) { + pushString(buf, " ("); + } + else { + *(*buf)++ = '<'; + for (const char *tok = token; *tok; tok++) + *(*buf)++ = *tok; + *(*buf)++ = '>'; + //return false; // we still didn't handle token + } + } + return true; +} + diff --git a/src/modules/filters/thmllemma.cpp b/src/modules/filters/thmllemma.cpp new file mode 100644 index 0000000..33856db --- /dev/null +++ b/src/modules/filters/thmllemma.cpp @@ -0,0 +1,97 @@ +/****************************************************************************** + * + * thmllemma - SWFilter decendant to hide or show lemmas + * in a ThML module. + */ + + +#include +#include +#include +#ifndef __GNUC__ +#else +#include +#endif + + +const char ThMLLemma::on[] = "On"; +const char ThMLLemma::off[] = "Off"; +const char ThMLLemma::optName[] = "Lemmas"; +const char ThMLLemma::optTip[] = "Toggles Lemmas On and Off if they exist"; + +ThMLLemma::ThMLLemma() { + option = false; + options.push_back(on); + options.push_back(off); +} + + +ThMLLemma::~ThMLLemma() { +} + +void ThMLLemma::setOptionValue(const char *ival) +{ + option = (!stricmp(ival, on)); +} + +const char *ThMLLemma::getOptionValue() +{ + return (option) ? on:off; +} + +char ThMLLemma::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) +{ + if (!option) { // if we don't want lemmas + char *to, *from, token[2048]; // cheese. Fix. + int tokpos = 0; + bool intoken = false; + int len; + bool lastspace = false; + + len = strlen(text) + 1; // shift string to right of buffer + if (len < maxlen) { + memmove(&text[maxlen - len], text, len); + from = &text[maxlen - len]; + } + else from = text; // ------------------------------- + + for (to = text; *from; from++) { + if (*from == '<') { + intoken = true; + tokpos = 0; + token[0] = 0; + token[1] = 0; + token[2] = 0; + continue; + } + if (*from == '>') { // process tokens + intoken = false; + if (!strnicmp(token, "sync type=\"lemma\" ", 18)) { // Lemma + if ((from[1] == ' ') || (from[1] == ',') || (from[1] == ';') || (from[1] == '.') || (from[1] == '?') || (from[1] == '!') || (from[1] == ')') || (from[1] == '\'') || (from[1] == '\"')) { + if (lastspace) + to--; + } + continue; + } + // if not a lemma token, keep token in text + *to++ = '<'; + for (char *tok = token; *tok; tok++) + *to++ = *tok; + *to++ = '>'; + continue; + } + if (intoken) { + if (tokpos < 2045) + token[tokpos++] = *from; + token[tokpos+2] = 0; + } + else { + *to++ = *from; + lastspace = (*from == ' '); + } + } + *to++ = 0; + *to = 0; + } + return 0; +} diff --git a/src/modules/filters/thmlmorph.cpp b/src/modules/filters/thmlmorph.cpp new file mode 100644 index 0000000..f95bede --- /dev/null +++ b/src/modules/filters/thmlmorph.cpp @@ -0,0 +1,98 @@ +/****************************************************************************** + * + * thmlmorph - SWFilter decendant to hide or show morph tags + * in a ThML module. + */ + + +#include +#include +#include +#ifndef __GNUC__ +#else +#include +#endif + + +const char ThMLMorph::on[] = "On"; +const char ThMLMorph::off[] = "Off"; +const char ThMLMorph::optName[] = "Morphological Tags"; +const char ThMLMorph::optTip[] = "Toggles Morphological Tags On and Off if they exist"; + + +ThMLMorph::ThMLMorph() { + option = false; + options.push_back(on); + options.push_back(off); +} + + +ThMLMorph::~ThMLMorph() { +} + +void ThMLMorph::setOptionValue(const char *ival) +{ + option = (!stricmp(ival, on)); +} + +const char *ThMLMorph::getOptionValue() +{ + return (option) ? on:off; +} + +char ThMLMorph::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) +{ + if (!option) { // if we don't want morph tags + char *to, *from, token[2048]; // cheese. Fix. + int tokpos = 0; + bool intoken = false; + int len; + bool lastspace = false; + + len = strlen(text) + 1; // shift string to right of buffer + if (len < maxlen) { + memmove(&text[maxlen - len], text, len); + from = &text[maxlen - len]; + } + else from = text; // ------------------------------- + + for (to = text; *from; from++) { + if (*from == '<') { + intoken = true; + tokpos = 0; + token[0] = 0; + token[1] = 0; + token[2] = 0; + continue; + } + if (*from == '>') { // process tokens + intoken = false; + if (!strnicmp(token, "sync type=\"morph\" ", 18)) { // Morph + if ((from[1] == ' ') || (from[1] == ',') || (from[1] == ';') || (from[1] == '.') || (from[1] == '?') || (from[1] == '!') || (from[1] == ')') || (from[1] == '\'') || (from[1] == '\"')) { + if (lastspace) + to--; + } + continue; + } + // if not a morph tag token, keep token in text + *to++ = '<'; + for (char *tok = token; *tok; tok++) + *to++ = *tok; + *to++ = '>'; + continue; + } + if (intoken) { + if (tokpos < 2045) + token[tokpos++] = *from; + token[tokpos+2] = 0; + } + else { + *to++ = *from; + lastspace = (*from == ' '); + } + } + *to++ = 0; + *to = 0; + } + return 0; +} diff --git a/src/modules/filters/thmlolb.cpp b/src/modules/filters/thmlolb.cpp new file mode 100644 index 0000000..2b31fab --- /dev/null +++ b/src/modules/filters/thmlolb.cpp @@ -0,0 +1,243 @@ +/*************************************************************************** + thmlolb.cpp - ThML to OLB filter + ------------------- + begin : 2001-05-10 + copyright : 2001 by CrossWire Bible Society + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + ***************************************************************************/ + +#include +#include +#include + + +ThMLOLB::ThMLOLB() +{ +} + + +char ThMLOLB::ProcessText(char *text, int maxlen) +{ + char *to, *from, token[2048]; + int tokpos = 0; + bool intoken = false; + int len; + bool ampersand = false; + int i; + + len = strlen(text) + 1; // shift string to right of buffer + if (len < maxlen) { + memmove(&text[maxlen - len], text, len); + from = &text[maxlen - len]; + } + else from = text; // ------------------------------- + for (to = text; *from; from++) + { + if (*from == '<') { + intoken = true; + tokpos = 0; + memset(token, 0, 2048); + ampersand = false; + continue; + } + else if (*from == '&') { + intoken = true; + tokpos = 0; + memset(token, 0, 2048); + ampersand = true; + continue; + } + if (*from == ';' && ampersand) { + intoken = false; + + if (!strncmp("nbsp", token, 4)) *to++ = ' '; + else if (!strncmp("quot", token, 4)) *to++ = '"'; + else if (!strncmp("amp", token, 3)) *to++ = '&'; + else if (!strncmp("lt", token, 2)) *to++ = '<'; + else if (!strncmp("gt", token, 2)) *to++ = '>'; + else if (!strncmp("brvbar", token, 6)) *to++ = '|'; + else if (!strncmp("sect", token, 4)) *to++ = '§'; + else if (!strncmp("copy", token, 4)) *to++ = '©'; + else if (!strncmp("laquo", token, 5)) *to++ = '«'; + else if (!strncmp("reg", token, 3)) *to++ = '®'; + else if (!strncmp("acute", token, 5)) *to++ = '´'; + else if (!strncmp("para", token, 4)) *to++ = '¶'; + else if (!strncmp("raquo", token, 5)) *to++ = '»'; + + else if (!strncmp("Aacute", token, 6)) *to++ = 'Á'; + else if (!strncmp("Agrave", token, 6)) *to++ = 'À'; + else if (!strncmp("Acirc", token, 5)) *to++ = 'Â'; + else if (!strncmp("Auml", token, 4)) *to++ = 'Ä'; + else if (!strncmp("Atilde", token, 6)) *to++ = 'Ã'; + else if (!strncmp("Aring", token, 5)) *to++ = 'Å'; + else if (!strncmp("aacute", token, 6)) *to++ = 'á'; + else if (!strncmp("agrave", token, 6)) *to++ = 'à'; + else if (!strncmp("acirc", token, 5)) *to++ = 'â'; + else if (!strncmp("auml", token, 4)) *to++ = 'ä'; + else if (!strncmp("atilde", token, 6)) *to++ = 'ã'; + else if (!strncmp("aring", token, 5)) *to++ = 'å'; + else if (!strncmp("Eacute", token, 6)) *to++ = 'É'; + else if (!strncmp("Egrave", token, 6)) *to++ = 'È'; + else if (!strncmp("Ecirc", token, 5)) *to++ = 'Ê'; + else if (!strncmp("Euml", token, 4)) *to++ = 'Ë'; + else if (!strncmp("eacute", token, 6)) *to++ = 'é'; + else if (!strncmp("egrave", token, 6)) *to++ = 'è'; + else if (!strncmp("ecirc", token, 5)) *to++ = 'ê'; + else if (!strncmp("euml", token, 4)) *to++ = 'ë'; + else if (!strncmp("Iacute", token, 6)) *to++ = 'Í'; + else if (!strncmp("Igrave", token, 6)) *to++ = 'Ì'; + else if (!strncmp("Icirc", token, 5)) *to++ = 'Î'; + else if (!strncmp("Iuml", token, 4)) *to++ = 'Ï'; + else if (!strncmp("iacute", token, 6)) *to++ = 'í'; + else if (!strncmp("igrave", token, 6)) *to++ = 'ì'; + else if (!strncmp("icirc", token, 5)) *to++ = 'î'; + else if (!strncmp("iuml", token, 4)) *to++ = 'ï'; + else if (!strncmp("Oacute", token, 6)) *to++ = 'Ó'; + else if (!strncmp("Ograve", token, 6)) *to++ = 'Ò'; + else if (!strncmp("Ocirc", token, 5)) *to++ = 'Ô'; + else if (!strncmp("Ouml", token, 4)) *to++ = 'Ö'; + else if (!strncmp("Otilde", token, 6)) *to++ = 'Õ'; + else if (!strncmp("oacute", token, 6)) *to++ = 'ó'; + else if (!strncmp("ograve", token, 6)) *to++ = 'ò'; + else if (!strncmp("ocirc", token, 5)) *to++ = 'ô'; + else if (!strncmp("ouml", token, 4)) *to++ = 'ö'; + else if (!strncmp("otilde", token, 6)) *to++ = 'õ'; + else if (!strncmp("Uacute", token, 6)) *to++ = 'Ú'; + else if (!strncmp("Ugrave", token, 6)) *to++ = 'Ù'; + else if (!strncmp("Ucirc", token, 5)) *to++ = 'Û'; + else if (!strncmp("Uuml", token, 4)) *to++ = 'Ü'; + else if (!strncmp("uacute", token, 6)) *to++ = 'ú'; + else if (!strncmp("ugrave", token, 6)) *to++ = 'ù'; + else if (!strncmp("ucirc", token, 5)) *to++ = 'û'; + else if (!strncmp("uuml", token, 4)) *to++ = 'ü'; + else if (!strncmp("Yacute", token, 6)) *to++ = 'Ý'; + else if (!strncmp("yacute", token, 6)) *to++ = 'ý'; + else if (!strncmp("yuml", token, 4)) *to++ = 'ÿ'; + + else if (!strncmp("deg", token, 3)) *to++ = '°'; + else if (!strncmp("plusmn", token, 6)) *to++ = '±'; + else if (!strncmp("sup2", token, 4)) *to++ = '²'; + else if (!strncmp("sup3", token, 4)) *to++ = '³'; + else if (!strncmp("sup1", token, 4)) *to++ = '¹'; + else if (!strncmp("nbsp", token, 4)) *to++ = 'º'; + else if (!strncmp("pound", token, 5)) *to++ = '£'; + else if (!strncmp("cent", token, 4)) *to++ = '¢'; + else if (!strncmp("frac14", token, 6)) *to++ = '¼'; + else if (!strncmp("frac12", token, 6)) *to++ = '½'; + else if (!strncmp("frac34", token, 6)) *to++ = '¾'; + else if (!strncmp("iquest", token, 6)) *to++ = '¿'; + else if (!strncmp("iexcl", token, 5)) *to++ = '¡'; + else if (!strncmp("ETH", token, 3)) *to++ = 'Ð'; + else if (!strncmp("eth", token, 3)) *to++ = 'ð'; + else if (!strncmp("THORN", token, 5)) *to++ = 'Þ'; + else if (!strncmp("thorn", token, 5)) *to++ = 'þ'; + else if (!strncmp("AElig", token, 5)) *to++ = 'Æ'; + else if (!strncmp("aelig", token, 5)) *to++ = 'æ'; + else if (!strncmp("Oslash", token, 6)) *to++ = 'Ø'; + else if (!strncmp("curren", token, 6)) *to++ = '¤'; + else if (!strncmp("Ccedil", token, 6)) *to++ = 'Ç'; + else if (!strncmp("ccedil", token, 6)) *to++ = 'ç'; + else if (!strncmp("szlig", token, 5)) *to++ = 'ß'; + else if (!strncmp("Ntilde", token, 6)) *to++ = 'Ñ'; + else if (!strncmp("ntilde", token, 6)) *to++ = 'ñ'; + else if (!strncmp("yen", token, 3)) *to++ = '¥'; + else if (!strncmp("not", token, 3)) *to++ = '¬'; + else if (!strncmp("ordf", token, 4)) *to++ = 'ª'; + else if (!strncmp("uml", token, 3)) *to++ = '¨'; + else if (!strncmp("shy", token, 3)) *to++ = '­'; + else if (!strncmp("macr", token, 4)) *to++ = '¯'; + continue; + + } + else if (*from == '>' && !ampersand) + { + intoken = false; + // process desired tokens + if (!strncmp(token, "sync type=\"Strongs\" value=\"G", 27)) { + *to++ = '<'; + for (i = 28; token[i] != '\"'; i++) + *to++ = token[i]; + *to++ = '>'; + continue; + } + else if (!strncmp(token, "sync type=\"Strongs\" value=\"H", 27)) { + *to++ = '<'; + for (i = 28; token[i] != '\"'; i++) + *to++ = token[i]; + *to++ = '>'; + continue; + } + else if (!strncmp(token, "scripRef", 8)) { + *to++ = '#'; + continue; + } + else if (!strncmp(token, "/scripRef", 9)) { + *to++ = ' '; + continue; + } + else if (!strncmp(token, "note ", 5)) { + *to++ = '{'; + continue; + } + else if (!strncmp(token, "/note", 5)) { + *to++ = '}'; + continue; + } + else if (!strnicmp(token, "font", 4)) { + *to++ = '\\'; + *to++ = '\\'; + continue; + } + else if (!strnicmp(token, "/font", 5)) { + *to++ = '\\'; + *to++ = '\\'; + continue; + } + else switch(*token) { + case 'I': // font tags + case 'i': + *to++ = '\\'; + *to++ = '@'; + continue; + case 'B': // bold start + case 'b': + *to++ = '\\'; + *to++ = '$'; + continue; + case '/': + switch(token[1]) { + case 'I': + case 'i': // italic end + *to++ = '\\'; + *to++ = '@'; + continue; + case 'B': // bold start + case 'b': + *to++ = '\\'; + *to++ = '$'; + continue; + } + } + continue; + } + if (intoken) { + if (tokpos < 2047) + token[tokpos++] = *from; + } + else *to++ = *from; + } + *to++ = 0; + *to = 0; + return 0; +} + + + diff --git a/src/modules/filters/thmlplain.cpp b/src/modules/filters/thmlplain.cpp new file mode 100644 index 0000000..5609f16 --- /dev/null +++ b/src/modules/filters/thmlplain.cpp @@ -0,0 +1,201 @@ +/****************************************************************************** + * + * thmlplain - SWFilter decendant to strip out all ThML tags or convert to + * ASCII rendered symbols. + */ + + +#include +#include +#include + + +ThMLPlain::ThMLPlain() { +} + + +char ThMLPlain::ProcessText(char *text, int maxlen) +{ + char *to, *from, token[2048]; + int tokpos = 0; + bool intoken = false; + int len; + bool ampersand = false; + + len = strlen(text) + 1; // shift string to right of buffer + if (len < maxlen) { + memmove(&text[maxlen - len], text, len); + from = &text[maxlen - len]; + } + else from = text; // ------------------------------- + + for (to = text; *from; from++) { + if (*from == 10 || *from == 13) + from++; + if (*from == '<') { + intoken = true; + tokpos = 0; + token[0] = 0; + token[1] = 0; + token[2] = 0; + ampersand = false; + continue; + } + else if (*from == '&') { + intoken = true; + tokpos = 0; + token[0] = 0; + token[1] = 0; + token[2] = 0; + ampersand = true; + continue; + } + if (*from == ';' && ampersand) { + intoken = false; + + if (!strncmp("nbsp", token, 4)) *to++ = ' '; + else if (!strncmp("quot", token, 4)) *to++ = '"'; + else if (!strncmp("amp", token, 3)) *to++ = '&'; + else if (!strncmp("lt", token, 2)) *to++ = '<'; + else if (!strncmp("gt", token, 2)) *to++ = '>'; + else if (!strncmp("brvbar", token, 6)) *to++ = '|'; + else if (!strncmp("sect", token, 4)) *to++ = '§'; + else if (!strncmp("copy", token, 4)) *to++ = '©'; + else if (!strncmp("laquo", token, 5)) *to++ = '«'; + else if (!strncmp("reg", token, 3)) *to++ = '®'; + else if (!strncmp("acute", token, 5)) *to++ = '´'; + else if (!strncmp("para", token, 4)) *to++ = '¶'; + else if (!strncmp("raquo", token, 5)) *to++ = '»'; + + else if (!strncmp("Aacute", token, 6)) *to++ = 'Á'; + else if (!strncmp("Agrave", token, 6)) *to++ = 'À'; + else if (!strncmp("Acirc", token, 5)) *to++ = 'Â'; + else if (!strncmp("Auml", token, 4)) *to++ = 'Ä'; + else if (!strncmp("Atilde", token, 6)) *to++ = 'Ã'; + else if (!strncmp("Aring", token, 5)) *to++ = 'Å'; + else if (!strncmp("aacute", token, 6)) *to++ = 'á'; + else if (!strncmp("agrave", token, 6)) *to++ = 'à'; + else if (!strncmp("acirc", token, 5)) *to++ = 'â'; + else if (!strncmp("auml", token, 4)) *to++ = 'ä'; + else if (!strncmp("atilde", token, 6)) *to++ = 'ã'; + else if (!strncmp("aring", token, 5)) *to++ = 'å'; + else if (!strncmp("Eacute", token, 6)) *to++ = 'É'; + else if (!strncmp("Egrave", token, 6)) *to++ = 'È'; + else if (!strncmp("Ecirc", token, 5)) *to++ = 'Ê'; + else if (!strncmp("Euml", token, 4)) *to++ = 'Ë'; + else if (!strncmp("eacute", token, 6)) *to++ = 'é'; + else if (!strncmp("egrave", token, 6)) *to++ = 'è'; + else if (!strncmp("ecirc", token, 5)) *to++ = 'ê'; + else if (!strncmp("euml", token, 4)) *to++ = 'ë'; + else if (!strncmp("Iacute", token, 6)) *to++ = 'Í'; + else if (!strncmp("Igrave", token, 6)) *to++ = 'Ì'; + else if (!strncmp("Icirc", token, 5)) *to++ = 'Î'; + else if (!strncmp("Iuml", token, 4)) *to++ = 'Ï'; + else if (!strncmp("iacute", token, 6)) *to++ = 'í'; + else if (!strncmp("igrave", token, 6)) *to++ = 'ì'; + else if (!strncmp("icirc", token, 5)) *to++ = 'î'; + else if (!strncmp("iuml", token, 4)) *to++ = 'ï'; + else if (!strncmp("Oacute", token, 6)) *to++ = 'Ó'; + else if (!strncmp("Ograve", token, 6)) *to++ = 'Ò'; + else if (!strncmp("Ocirc", token, 5)) *to++ = 'Ô'; + else if (!strncmp("Ouml", token, 4)) *to++ = 'Ö'; + else if (!strncmp("Otilde", token, 6)) *to++ = 'Õ'; + else if (!strncmp("oacute", token, 6)) *to++ = 'ó'; + else if (!strncmp("ograve", token, 6)) *to++ = 'ò'; + else if (!strncmp("ocirc", token, 5)) *to++ = 'ô'; + else if (!strncmp("ouml", token, 4)) *to++ = 'ö'; + else if (!strncmp("otilde", token, 6)) *to++ = 'õ'; + else if (!strncmp("Uacute", token, 6)) *to++ = 'Ú'; + else if (!strncmp("Ugrave", token, 6)) *to++ = 'Ù'; + else if (!strncmp("Ucirc", token, 5)) *to++ = 'Û'; + else if (!strncmp("Uuml", token, 4)) *to++ = 'Ü'; + else if (!strncmp("uacute", token, 6)) *to++ = 'ú'; + else if (!strncmp("ugrave", token, 6)) *to++ = 'ù'; + else if (!strncmp("ucirc", token, 5)) *to++ = 'û'; + else if (!strncmp("uuml", token, 4)) *to++ = 'ü'; + else if (!strncmp("Yacute", token, 6)) *to++ = 'Ý'; + else if (!strncmp("yacute", token, 6)) *to++ = 'ý'; + else if (!strncmp("yuml", token, 4)) *to++ = 'ÿ'; + + else if (!strncmp("deg", token, 3)) *to++ = '°'; + else if (!strncmp("plusmn", token, 6)) *to++ = '±'; + else if (!strncmp("sup2", token, 4)) *to++ = '²'; + else if (!strncmp("sup3", token, 4)) *to++ = '³'; + else if (!strncmp("sup1", token, 4)) *to++ = '¹'; + else if (!strncmp("nbsp", token, 4)) *to++ = 'º'; + else if (!strncmp("pound", token, 5)) *to++ = '£'; + else if (!strncmp("cent", token, 4)) *to++ = '¢'; + else if (!strncmp("frac14", token, 6)) *to++ = '¼'; + else if (!strncmp("frac12", token, 6)) *to++ = '½'; + else if (!strncmp("frac34", token, 6)) *to++ = '¾'; + else if (!strncmp("iquest", token, 6)) *to++ = '¿'; + else if (!strncmp("iexcl", token, 5)) *to++ = '¡'; + else if (!strncmp("ETH", token, 3)) *to++ = 'Ð'; + else if (!strncmp("eth", token, 3)) *to++ = 'ð'; + else if (!strncmp("THORN", token, 5)) *to++ = 'Þ'; + else if (!strncmp("thorn", token, 5)) *to++ = 'þ'; + else if (!strncmp("AElig", token, 5)) *to++ = 'Æ'; + else if (!strncmp("aelig", token, 5)) *to++ = 'æ'; + else if (!strncmp("Oslash", token, 6)) *to++ = 'Ø'; + else if (!strncmp("curren", token, 6)) *to++ = '¤'; + else if (!strncmp("Ccedil", token, 6)) *to++ = 'Ç'; + else if (!strncmp("ccedil", token, 6)) *to++ = 'ç'; + else if (!strncmp("szlig", token, 5)) *to++ = 'ß'; + else if (!strncmp("Ntilde", token, 6)) *to++ = 'Ñ'; + else if (!strncmp("ntilde", token, 6)) *to++ = 'ñ'; + else if (!strncmp("yen", token, 3)) *to++ = '¥'; + else if (!strncmp("not", token, 3)) *to++ = '¬'; + else if (!strncmp("ordf", token, 4)) *to++ = 'ª'; + else if (!strncmp("uml", token, 3)) *to++ = '¨'; + else if (!strncmp("shy", token, 3)) *to++ = '­'; + else if (!strncmp("macr", token, 4)) *to++ = '¯'; + continue; + + } + else if (*from == '>' && !ampersand) { + intoken = false; + // process desired tokens + if (!strncmp(token, "sync type=\"Strongs\" value=\"", 27)) { + *to++ = ' '; + *to++ = '<'; + for (unsigned int i = 27; token[i] != '\"'; i++) + *to++ = token[i]; + *to++ = '>'; + continue; + } + if (!strncmp(token, "sync type=\"morph\" value=\"", 25)) { + *to++ = ' '; + *to++ = '('; + for (unsigned int i = 25; token[i] != '\"'; i++) + *to++ = token[i]; + *to++ = ')'; + continue; + } + if (!strncmp("note", token, 4)) { + *to++ = ' '; + *to++ = '('; + } + else if (!strncmp("br", token, 2)) + *to++ = '\n'; + else if (!strncmp("/p", token, 2)) + *to++ = '\n'; + else if (!strncmp("/note", token, 5)) { + *to++ = ')'; + *to++ = ' '; + } + continue; + } + if (intoken) { + if (tokpos < 2045) + token[tokpos++] = *from; + token[tokpos+2] = 0; + } + else *to++ = *from; + } + *to++ = 0; + *to = 0; + + return 0; +} + + diff --git a/src/modules/filters/thmlrtf.cpp b/src/modules/filters/thmlrtf.cpp new file mode 100644 index 0000000..76289ec --- /dev/null +++ b/src/modules/filters/thmlrtf.cpp @@ -0,0 +1,219 @@ +/*************************************************************************** + thmlrtf.cpp - ThML to RTF filter + ------------------- + begin : 1999-10-27 + copyright : 2001 by CrossWire Bible Society + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + ***************************************************************************/ + +#include +#include +#include + + +ThMLRTF::ThMLRTF() +{ + setTokenStart("<"); + setTokenEnd(">"); + + setEscapeStart("&"); + setEscapeEnd(";"); + + setEscapeStringCaseSensitive(true); + + addEscapeStringSubstitute("nbsp", " "); + addEscapeStringSubstitute("quot", "\""); + addEscapeStringSubstitute("amp", "&"); + addEscapeStringSubstitute("lt", "<"); + addEscapeStringSubstitute("gt", ">"); + addEscapeStringSubstitute("brvbar", "|"); + addEscapeStringSubstitute("sect", "§"); + addEscapeStringSubstitute("copy", "©"); + addEscapeStringSubstitute("laquo", "«"); + addEscapeStringSubstitute("reg", "®"); + addEscapeStringSubstitute("acute", "´"); + addEscapeStringSubstitute("para", "¶"); + addEscapeStringSubstitute("raquo", "»"); + + addEscapeStringSubstitute("Aacute", "Á"); + addEscapeStringSubstitute("Agrave", "À"); + addEscapeStringSubstitute("Acirc", "Â"); + addEscapeStringSubstitute("Auml", "Ä"); + addEscapeStringSubstitute("Atilde", "Ã"); + addEscapeStringSubstitute("Aring", "Å"); + addEscapeStringSubstitute("aacute", "á"); + addEscapeStringSubstitute("agrave", "à"); + addEscapeStringSubstitute("acirc", "â"); + addEscapeStringSubstitute("auml", "ä"); + addEscapeStringSubstitute("atilde", "ã"); + addEscapeStringSubstitute("aring", "å"); + addEscapeStringSubstitute("Eacute", "É"); + addEscapeStringSubstitute("Egrave", "È"); + addEscapeStringSubstitute("Ecirc", "Ê"); + addEscapeStringSubstitute("Euml", "Ë"); + addEscapeStringSubstitute("eacute", "é"); + addEscapeStringSubstitute("egrave", "è"); + addEscapeStringSubstitute("ecirc", "ê"); + addEscapeStringSubstitute("euml", "ë"); + addEscapeStringSubstitute("Iacute", "Í"); + addEscapeStringSubstitute("Igrave", "Ì"); + addEscapeStringSubstitute("Icirc", "Î"); + addEscapeStringSubstitute("Iuml", "Ï"); + addEscapeStringSubstitute("iacute", "í"); + addEscapeStringSubstitute("igrave", "ì"); + addEscapeStringSubstitute("icirc", "î"); + addEscapeStringSubstitute("iuml", "ï"); + addEscapeStringSubstitute("Oacute", "Ó"); + addEscapeStringSubstitute("Ograve", "Ò"); + addEscapeStringSubstitute("Ocirc", "Ô"); + addEscapeStringSubstitute("Ouml", "Ö"); + addEscapeStringSubstitute("Otilde", "Õ"); + addEscapeStringSubstitute("oacute", "ó"); + addEscapeStringSubstitute("ograve", "ò"); + addEscapeStringSubstitute("ocirc", "ô"); + addEscapeStringSubstitute("ouml", "ö"); + addEscapeStringSubstitute("otilde", "õ"); + addEscapeStringSubstitute("Uacute", "Ú"); + addEscapeStringSubstitute("Ugrave", "Ù"); + addEscapeStringSubstitute("Ucirc", "Û"); + addEscapeStringSubstitute("Uuml", "Ü"); + addEscapeStringSubstitute("uacute", "ú"); + addEscapeStringSubstitute("ugrave", "ù"); + addEscapeStringSubstitute("ucirc", "û"); + addEscapeStringSubstitute("uuml", "ü"); + addEscapeStringSubstitute("Yacute", "Ý"); + addEscapeStringSubstitute("yacute", "ý"); + addEscapeStringSubstitute("yuml", "ÿ"); + + addEscapeStringSubstitute("deg", "°"); + addEscapeStringSubstitute("plusmn", "±"); + addEscapeStringSubstitute("sup2", "²"); + addEscapeStringSubstitute("sup3", "³"); + addEscapeStringSubstitute("sup1", "¹"); + addEscapeStringSubstitute("nbsp", "º"); + addEscapeStringSubstitute("pound", "£"); + addEscapeStringSubstitute("cent", "¢"); + addEscapeStringSubstitute("frac14", "¼"); + addEscapeStringSubstitute("frac12", "½"); + addEscapeStringSubstitute("frac34", "¾"); + addEscapeStringSubstitute("iquest", "¿"); + addEscapeStringSubstitute("iexcl", "¡"); + addEscapeStringSubstitute("ETH", "Ð"); + addEscapeStringSubstitute("eth", "ð"); + addEscapeStringSubstitute("THORN", "Þ"); + addEscapeStringSubstitute("thorn", "þ"); + addEscapeStringSubstitute("AElig", "Æ"); + addEscapeStringSubstitute("aelig", "æ"); + addEscapeStringSubstitute("Oslash", "Ø"); + addEscapeStringSubstitute("curren", "¤"); + addEscapeStringSubstitute("Ccedil", "Ç"); + addEscapeStringSubstitute("ccedil", "ç"); + addEscapeStringSubstitute("szlig", "ß"); + addEscapeStringSubstitute("Ntilde", "Ñ"); + addEscapeStringSubstitute("ntilde", "ñ"); + addEscapeStringSubstitute("yen", "¥"); + addEscapeStringSubstitute("not", "¬"); + addEscapeStringSubstitute("ordf", "ª"); + addEscapeStringSubstitute("uml", "¨"); + addEscapeStringSubstitute("shy", "­"); + addEscapeStringSubstitute("macr", "¯"); + + setTokenCaseSensitive(true); + + addTokenSubstitute("/scripRef", "|}"); + addTokenSubstitute("/note", ") }"); + + addTokenSubstitute("br", "\\line "); + addTokenSubstitute("br /", "\\line "); + addTokenSubstitute("i", "{\\i1 "); + addTokenSubstitute("/i", "}"); + addTokenSubstitute("b", "{\\b1 "); + addTokenSubstitute("/b", "}"); + addTokenSubstitute("p", "\\par "); + + //we need uppercase forms for the moment to support a few early ThML modules that aren't XHTML compliant + addTokenSubstitute("BR", "\\line "); + addTokenSubstitute("I", "{\\i1 "); + addTokenSubstitute("/I", "}"); + addTokenSubstitute("B", "{\\b1 "); + addTokenSubstitute("/B", "}"); + addTokenSubstitute("P", "\\par "); +} + +bool ThMLRTF::handleToken(char **buf, const char *token, DualStringMap &userData) { + if (!substituteToken(buf, token)) { + // manually process if it wasn't a simple substitution + if (!strncmp(token, "sync type=\"Strongs\" value=\"", 27)) { +/* if (token[27] == 'H' || token[27] == 'G' || token[27] == 'A') { + pushString(buf, " {\\fs15 <"); + for (unsigned int i = 28; token[i] != '\"'; i++) + *(*buf)++ = token[i]; + pushString(buf, ">}"); + } + else if (token[27] == 'T') { + pushString(buf, " {\\fs15 ("); + for (unsigned int i = 28; token[i] != '\"'; i++) + *(*buf)++ = token[i]; + pushString(buf, ")}"); + } + } + else if (!strncmp(token, "sync type=\"morph\" ", 18)) { + pushString(buf, " {\\fs15 ("); + for (const char *tok = token + 5; *tok; tok++) { + if (!strncmp(tok, "value=\"", 7)) { + tok += 7; + for (;*tok != '\"'; tok++) + *(*buf)++ = *tok; + break; + } + } + + pushString(buf, ")}"); +*/ } + else if (!strncmp(token, "sync type=\"lemma\" value=\"", 25)) { + pushString(buf, "{\\fs15 ("); + for (unsigned int i = 25; token[i] != '\"'; i++) + *(*buf)++ = token[i]; + pushString(buf, ")}"); + } + else if (!strncmp(token, "scripRef", 8)) { + pushString(buf, "{\\cf2 #"); + } + else if (!strncmp(token, "div", 3)) { + *(*buf)++ = '{'; + if (!strncmp(token, "div class=\"title\"", 17)) { + pushString(buf, "\\par\\i1\\b1 "); + userData["sechead"] = "true"; + } + else if (!strncmp(token, "div class=\"sechead\"", 19)) { + pushString(buf, "\\par\\i1\\b1 "); + userData["sechead"] = "true"; + } + } + else if (!strncmp(token, "/div", 4)) { + *(*buf)++ = '}'; + if (userData["sechead"] == "true") { + pushString(buf, "\\par "); + userData["sechead"] == "false"; + } + } + else if (!strncmp(token, "note", 4)) { + pushString(buf, " {\\i1\\fs15 ("); + } + + else { + return false; // we still didn't handle token + } + } + return true; +} + + diff --git a/src/modules/filters/thmlscripref.cpp b/src/modules/filters/thmlscripref.cpp new file mode 100644 index 0000000..23edd6d --- /dev/null +++ b/src/modules/filters/thmlscripref.cpp @@ -0,0 +1,103 @@ +/****************************************************************************** + * + * thmlscripref - SWFilter decendant to hide or show scripture references + * in a ThML module. + */ + + +#include +#include +#include +#ifndef __GNUC__ +#else +#include +#endif + + +const char ThMLScripref::on[] = "On"; +const char ThMLScripref::off[] = "Off"; +const char ThMLScripref::optName[] = "Scripture Cross-references"; +const char ThMLScripref::optTip[] = "Toggles Scripture Cross-references On and Off if they exist"; + + +ThMLScripref::ThMLScripref() { + option = false; + options.push_back(on); + options.push_back(off); +} + + +ThMLScripref::~ThMLScripref() { +} + +void ThMLScripref::setOptionValue(const char *ival) +{ + option = (!stricmp(ival, on)); +} + +const char *ThMLScripref::getOptionValue() +{ + return (option) ? on:off; +} + +char ThMLScripref::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) +{ + if (!option) { // if we don't want scriprefs + char *to, *from, token[2048]; // cheese. Fix. + int tokpos = 0; + bool intoken = false; + int len; + bool hide = false; + + len = strlen(text) + 1; // shift string to right of buffer + if (len < maxlen) { + memmove(&text[maxlen - len], text, len); + from = &text[maxlen - len]; + } + else from = text; // ------------------------------- + + for (to = text; *from; from++) { + if (*from == '<') { + intoken = true; + tokpos = 0; + token[0] = 0; + token[1] = 0; + token[2] = 0; + continue; + } + if (*from == '>') { // process tokens + intoken = false; + if (!strnicmp(token, "scripRef", 8)) { + hide = true; + continue; + } + else if (!strnicmp(token, "/scripRef", 9)) { + hide = false; + continue; + } + + // if not a scripref token, keep token in text + if (!hide) { + *to++ = '<'; + for (char *tok = token; *tok; tok++) + *to++ = *tok; + *to++ = '>'; + } + continue; + } + if (intoken) { + if (tokpos < 2045) + token[tokpos++] = *from; + token[tokpos+2] = 0; + } + else { + if (!hide) { + *to++ = *from; + } + } + } + *to++ = 0; + *to = 0; + } + return 0; +} diff --git a/src/modules/filters/thmlstrongs.cpp b/src/modules/filters/thmlstrongs.cpp new file mode 100644 index 0000000..8d0466c --- /dev/null +++ b/src/modules/filters/thmlstrongs.cpp @@ -0,0 +1,138 @@ +/****************************************************************************** + * + * thmlstrongs - SWFilter decendant to hide or show strongs number + * in a ThML module. + */ + + +#include +#include +#include +#include +#include +#ifndef __GNUC__ +#else +#include +#endif + + +const char ThMLStrongs::on[] = "On"; +const char ThMLStrongs::off[] = "Off"; +const char ThMLStrongs::optName[] = "Strong's Numbers"; +const char ThMLStrongs::optTip[] = "Toggles Strong's Numbers On and Off if they exist"; + + +ThMLStrongs::ThMLStrongs() { + option = false; + options.push_back(on); + options.push_back(off); +} + + +ThMLStrongs::~ThMLStrongs() { +} + +void ThMLStrongs::setOptionValue(const char *ival) +{ + option = (!stricmp(ival, on)); +} + +const char *ThMLStrongs::getOptionValue() +{ + return (option) ? on:off; +} + +char ThMLStrongs::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) +{ + char *to, *from, token[2048]; // cheese. Fix. + int tokpos = 0; + bool intoken = false; + int len; + bool lastspace = false; + int word = 1; + char val[128]; + char wordstr[5]; + char *valto; + char *ch; + + len = strlen(text) + 1; // shift string to right of buffer + if (len < maxlen) { + memmove(&text[maxlen - len], text, len); + from = &text[maxlen - len]; + } + else from = text; + + // ------------------------------- + + for (to = text; *from; from++) { + if (*from == '<') { + intoken = true; + tokpos = 0; + token[0] = 0; + token[1] = 0; + token[2] = 0; + continue; + } + if (*from == '>') { // process tokens + intoken = false; + if (!strnicmp(token, "sync type=\"Strongs\" ", 20)) { // Strongs + if (module->isProcessEntryAttributes()) { + valto = val; + for (unsigned int i = 27; token[i] != '\"' && i < 150; i++) + *valto++ = token[i]; + *valto = 0; + sprintf(wordstr, "%03d", word++); + module->getEntryAttributes()["Word"][wordstr]["Strongs"] = val; + } + + if (!option) { // if we don't want strongs + if ((from[1] == ' ') || (from[1] == ',') || (from[1] == ';') || (from[1] == '.') || (from[1] == '?') || (from[1] == '!') || (from[1] == ')') || (from[1] == '\'') || (from[1] == '\"')) { + if (lastspace) + to--; + } + continue; + } + } + if (module->isProcessEntryAttributes()) { + if (!strncmp(token, "sync type=\"morph\"", 17)) { + for (ch = token+17; *ch; ch++) { + if (!strncmp(ch, "class=\"", 7)) { + valto = val; + for (unsigned int i = 7; ch[i] != '\"' && i < 127; i++) + *valto++ = ch[i]; + *valto = 0; + sprintf(wordstr, "%03d", word-1); + module->getEntryAttributes()["Word"][wordstr]["MorphClass"] = val; + } + if (!strncmp(ch, "value=\"", 7)) { + valto = val; + for (unsigned int i = 7; ch[i] != '\"' && i < 127; i++) + *valto++ = ch[i]; + *valto = 0; + sprintf(wordstr, "%03d", word-1); + module->getEntryAttributes()["Word"][wordstr]["Morph"] = val; + } + } + } + } + // if not a strongs token, keep token in text + *to++ = '<'; + for (char *tok = token; *tok; tok++) + *to++ = *tok; + *to++ = '>'; + continue; + } + if (intoken) { + if (tokpos < 2045) + token[tokpos++] = *from; + token[tokpos+2] = 0; + } + else { + *to++ = *from; + lastspace = (*from == ' '); + } + } + *to++ = 0; + *to = 0; + return 0; +} diff --git a/src/modules/filters/thmlvariants.cpp b/src/modules/filters/thmlvariants.cpp new file mode 100644 index 0000000..fda0950 --- /dev/null +++ b/src/modules/filters/thmlvariants.cpp @@ -0,0 +1,183 @@ +/****************************************************************************** + * + * thmlvariants - SWFilter decendant to hide or show textual variants + * in a ThML module. + */ + + +#include +#include +#include +#ifndef __GNUC__ +#else +#include +#endif + + +const char ThMLVariants::primary[] = "Primary Reading"; +const char ThMLVariants::secondary[] = "Secondary Reading"; +const char ThMLVariants::all[] = "All Readings"; + +const char ThMLVariants::optName[] = "Textual Variants"; +const char ThMLVariants::optTip[] = "Switch between Textual Variants modes"; + + +ThMLVariants::ThMLVariants() { + option = false; + options.push_back(primary); + options.push_back(secondary); + options.push_back(all); +} + + +ThMLVariants::~ThMLVariants() { +} + +void ThMLVariants::setOptionValue(const char *ival) +{ + option = (!stricmp(ival, primary)); +} + +const char *ThMLVariants::getOptionValue() +{ + if (option == 0) { + return primary; + } + else if (option == 1) { + return secondary; + } + else { + return all; + } +} + +char ThMLVariants::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) +{ + if (option == 0) { //we want primary only + char *to, *from, token[2048]; // cheese. Fix. + int tokpos = 0; + bool intoken = false; + int len; + bool hide = false; + + len = strlen(text) + 1; // shift string to right of buffer + if (len < maxlen) { + memmove(&text[maxlen - len], text, len); + from = &text[maxlen - len]; + } + else from = text; + + // ------------------------------- + + for (to = text; *from; from++) { + if (*from == '<') { + intoken = true; + tokpos = 0; + token[0] = 0; + token[1] = 0; + token[2] = 0; + continue; + } + if (*from == '>') { // process tokens + intoken = false; + if (!strncmp(token, "div type=\"variant\"", 19)) { + hide = true; + continue; + } + else if (!strncmp(token, "/div", 4)) { + hide = false; + continue; + } + + // if not a footnote token, keep token in text + if (!hide) { + *to++ = '<'; + for (char *tok = token; *tok; tok++) + *to++ = *tok; + *to++ = '>'; + } + continue; + } + if (intoken) { + if (tokpos < 2045) + token[tokpos++] = *from; + token[tokpos+2] = 0; + } + else { + if (!hide) { + *to++ = *from; + } + } + } + *to++ = 0; + *to = 0; + + } + else if (option == 1) { //we want variant only + char *to, *from, token[2048]; // cheese. Fix. + int tokpos = 0; + bool intoken = false; + int len; + bool hide = false; + + len = strlen(text) + 1; // shift string to right of buffer + if (len < maxlen) { + memmove(&text[maxlen - len], text, len); + from = &text[maxlen - len]; + } + else from = text; + + // ------------------------------- + + for (to = text; *from; from++) { + if (*from == '<') { + intoken = true; + tokpos = 0; + token[0] = 0; + token[1] = 0; + token[2] = 0; + continue; + } + if (*from == '>') { // process tokens + intoken = false; + if (!strncmp(token, "div type=\"primary\"", 19)) { + hide = true; + continue; + } + else if (!strncmp(token, "/div", 4)) { + hide = false; + continue; + } + + // if not a footnote token, keep token in text + if (!hide) { + *to++ = '<'; + for (char *tok = token; *tok; tok++) + *to++ = *tok; + *to++ = '>'; + } + continue; + } + if (intoken) { + if (tokpos < 2045) + token[tokpos++] = *from; + token[tokpos+2] = 0; + } + else { + if (!hide) { + *to++ = *from; + } + } + } + *to++ = 0; + *to = 0; + + } + return 0; +} + + + + + + diff --git a/src/modules/filters/unicodertf.cpp b/src/modules/filters/unicodertf.cpp new file mode 100644 index 0000000..b53a2d7 --- /dev/null +++ b/src/modules/filters/unicodertf.cpp @@ -0,0 +1,70 @@ +/****************************************************************************** + * + * unicodertf - SWFilter decendant to convert a double byte unicode file + * to RTF tags + */ + + +#include +#include +#include + +UnicodeRTF::UnicodeRTF() { +} + + +char UnicodeRTF::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) +{ + unsigned char *to, *from, *maxto; + int len; + char digit[10]; + short ch; // must be signed per unicode spec (negative is ok for big numbers > 32768) + + len = strlenw(text) + 2; // shift string to right of buffer + if (len < maxlen) { + memmove(&text[maxlen - len], text, len); + from = (unsigned char*)&text[maxlen - len]; + } + else from = (unsigned char*)text; + maxto =(unsigned char*)text + maxlen; + + // ------------------------------- + for (to = (unsigned char*)text; *from && (to <= maxto); from++) { + ch = 0; + if ((*from & 128) != 128) { + *to++ = *from; + continue; + } + if ((*from & 128) && ((*from & 64) != 64)) { + // error + *from = 'x'; + continue; + } + *from <<= 1; + int subsequent; + for (subsequent = 1; (*from & 128); subsequent++) { + *from <<= 1; + from[subsequent] &= 63; + ch <<= 6; + ch |= from[subsequent]; + } + subsequent--; + *from <<=1; + char significantFirstBits = 8 - (2+subsequent); + + ch |= (((short)*from) << (((6*subsequent)+significantFirstBits)-8)); + from += subsequent; + *to++ = '\\'; + *to++ = 'u'; + sprintf(digit, "%d", ch); + for (char *dig = digit; *dig; dig++) + *to++ = *dig; + *to++ = '?'; + } + + if (to != maxto) { + *to++ = 0; + } + *to = 0; + return 0; +} diff --git a/src/modules/filters/utf16utf8.cpp b/src/modules/filters/utf16utf8.cpp new file mode 100644 index 0000000..5a7719f --- /dev/null +++ b/src/modules/filters/utf16utf8.cpp @@ -0,0 +1,95 @@ +/****************************************************************************** + * + * UTF16UTF8 - SWFilter decendant to convert UTF-16 to UTF-8 + * + */ + +#include +#include + +#include + +UTF16UTF8::UTF16UTF8() { +} + + +char UTF16UTF8::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) +{ + unsigned short *from; + unsigned char *to; + + int len; + unsigned long uchar; + unsigned short schar; + + len = 0; + from = (unsigned short*) text; + while (*from) { + len += 2; + from++; + } + + // shift string to right of buffer + if (len < maxlen) { + memmove(&text[maxlen - len], text, len); + from = (unsigned short*)&text[maxlen - len]; + } + else + from = (unsigned short*)text; + + + // ------------------------------- + + for (to = (unsigned char*)text; *from; from++) { + uchar = 0; + + if (*from < 0xD800 || *from > 0xDFFF) { + uchar = *from; + } + else if (*from >= 0xD800 && *from <= 0xDBFF) { + uchar = *from; + schar = *(from+1); + if (uchar < 0xDC00 || uchar > 0xDFFF) { + //error, do nothing + continue; + } + uchar &= 0x03ff; + schar &= 0x03ff; + uchar <<= 10; + uchar |= schar; + uchar += 0x10000; + from++; + } + else { + //error, do nothing + continue; + } + + if (uchar < 0x80) { + *to++ = uchar; + } + else if (uchar < 0x800) { + *to++ = 0xc0 | (uchar >> 6); + *to++ = 0x80 | (uchar & 0x3f); + } + else if (uchar < 0x10000) { + *to++ = 0xe0 | (uchar >> 12); + *to++ = 0x80 | (uchar >> 6) & 0x3f; + *to++ = 0x80 | uchar & 0x3f; + } + else if (uchar < 0x200000) { + *to++ = 0xF0 | (uchar >> 18); + *to++ = 0x80 | (uchar >> 12) & 0x3F; + *to++ = 0x80 | (uchar >> 6) & 0x3F; + *to++ = 0x80 | uchar & 0x3F; + } + } + *to++ = 0; + *to = 0; + + return 0; +} + + + + diff --git a/src/modules/filters/utf8arshaping.cpp b/src/modules/filters/utf8arshaping.cpp new file mode 100644 index 0000000..5121f48 --- /dev/null +++ b/src/modules/filters/utf8arshaping.cpp @@ -0,0 +1,48 @@ +/****************************************************************************** +* +* utf8arshaping - SWFilter decendant to perform Arabic shaping on +* UTF-8 text +*/ + +#ifdef _ICU_ + +#include +#include + +#ifdef __GNUC__ +#include +#endif + +#include + +UTF8arShaping::UTF8arShaping() { + + conv = ucnv_open("UTF-8", &err); + +} + +UTF8arShaping::~UTF8arShaping() { + ucnv_close(conv); +} + +char UTF8arShaping::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) +{ + UChar *ustr, *ustr2; + + int32_t len = strlen(text); + ustr = new UChar[len]; + ustr2 = new UChar[len]; + + // Convert UTF-8 string to UTF-16 (UChars) + len = ucnv_toUChars(conv, ustr, len, text, -1, &err); + + len = u_shapeArabic(ustr, len, ustr2, len, U_SHAPE_LETTERS_SHAPE | U_SHAPE_DIGITS_EN2AN, &err); + + ucnv_fromUChars(conv, text, maxlen, ustr2, len, &err); + + delete [] ustr2; + delete [] ustr; + return 0; +} + +#endif diff --git a/src/modules/filters/utf8bidireorder.cpp b/src/modules/filters/utf8bidireorder.cpp new file mode 100644 index 0000000..8fa7280 --- /dev/null +++ b/src/modules/filters/utf8bidireorder.cpp @@ -0,0 +1,55 @@ +/****************************************************************************** +* +* utf8cnormalizer - SWFilter decendant to perform reordering of UTF-8 +* text to visual order according to Unicode BiDi +*/ + +#ifdef _ICU_ + +#include +#include + +#ifdef __GNUC__ +#include +#endif + +#include + +UTF8BiDiReorder::UTF8BiDiReorder() { + + conv = ucnv_open("UTF-8", &err); + +} + +UTF8BiDiReorder::~UTF8BiDiReorder() { + ucnv_close(conv); +} + +char UTF8BiDiReorder::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) +{ + UChar *ustr, *ustr2; + + int32_t len = strlen(text); + ustr = new UChar[len]; //each char could become a surrogate pair + + // Convert UTF-8 string to UTF-16 (UChars) + len = ucnv_toUChars(conv, ustr, len, text, -1, &err); + ustr2 = new UChar[len]; + + UBiDi* bidi = ubidi_openSized(len + 1, 0, &err); + ubidi_setPara(bidi, ustr, len, UBIDI_DEFAULT_RTL, NULL, &err); + len = ubidi_writeReordered(bidi, ustr2, len, + UBIDI_DO_MIRRORING | UBIDI_REMOVE_BIDI_CONTROLS, &err); + ubidi_close(bidi); + +// len = ubidi_writeReverse(ustr, len, ustr2, len, +// UBIDI_DO_MIRRORING | UBIDI_REMOVE_BIDI_CONTROLS, &err); + + ucnv_fromUChars(conv, text, maxlen, ustr2, len, &err); + + delete [] ustr2; + delete [] ustr; + return 0; +} + +#endif diff --git a/src/modules/filters/utf8cantillation.cpp b/src/modules/filters/utf8cantillation.cpp new file mode 100644 index 0000000..84cb513 --- /dev/null +++ b/src/modules/filters/utf8cantillation.cpp @@ -0,0 +1,64 @@ +/****************************************************************************** + * + * UTF8Cantillation - SWFilter decendant to remove UTF-8 Hebrew cantillation + * + */ + + +#include +#include +#include + + +const char UTF8Cantillation::on[] = "On"; +const char UTF8Cantillation::off[] = "Off"; +const char UTF8Cantillation::optName[] = "Hebrew Cantillation"; +const char UTF8Cantillation::optTip[] = "Toggles Hebrew Cantillation Marks"; + +UTF8Cantillation::UTF8Cantillation() { + option = false; + options.push_back(on); + options.push_back(off); +} + +UTF8Cantillation::~UTF8Cantillation(){}; + +void UTF8Cantillation::setOptionValue(const char *ival) +{ + option = (!stricmp(ival, on)); +} + +const char *UTF8Cantillation::getOptionValue() +{ + return (option) ? on:off; +} + +char UTF8Cantillation::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) +{ + if (!option) { + unsigned char *to, *from; + to = (unsigned char*)text; + //The UTF-8 range 0xD6 0x90 to 0xD6 0xAF and 0xD7 0x84 consist of Hebrew cantillation marks so block those out. + for (from = (unsigned char*)text; *from; from++) { + if (*from != 0xD6) { + if (*from == 0xD7 && *(from + 1) == 0x84) { + from++; + } + else { + *to++ = *from; + } + } + else if (*(from + 1) < 0x90 || *(from + 1) > 0xAF) { + *to++ = *from; + from++; + *to++ = *from; + } + else { + from++; + } + } + *to++ = 0; + *to = 0; + } + return 0; +} diff --git a/src/modules/filters/utf8greekaccents.cpp b/src/modules/filters/utf8greekaccents.cpp new file mode 100644 index 0000000..b0e5dc8 --- /dev/null +++ b/src/modules/filters/utf8greekaccents.cpp @@ -0,0 +1,252 @@ +/****************************************************************************** + * + * UTF8GreekAccents - SWFilter decendant to remove UTF-8 Greek accents + * + */ + + +#include +#include +#include + + +const char UTF8GreekAccents::on[] = "On"; +const char UTF8GreekAccents::off[] = "Off"; +const char UTF8GreekAccents::optName[] = "Greek Accents"; +const char UTF8GreekAccents::optTip[] = "Toggles Greek Accents"; + +UTF8GreekAccents::UTF8GreekAccents() { + option = true; + options.push_back(on); + options.push_back(off); +} + +UTF8GreekAccents::~UTF8GreekAccents(){}; + +void UTF8GreekAccents::setOptionValue(const char *ival) +{ + option = (!stricmp(ival, on)); +} + +const char *UTF8GreekAccents::getOptionValue() +{ + return (option) ? on:off; +} + +char UTF8GreekAccents::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) +{ + if (!option) { + unsigned char *to, *from; + + to = (unsigned char*)text; + for (from = (unsigned char*)text; *from; from++) { + //first just remove combining characters + if (*from == 0xE2 && *(from + 1) == 0x80 && *(from + 2) == 0x99) + from += 2; + else if (*from == 0xCC && *(from + 1)) { + if (*(from + 1) == 0x80 || *(from + 1) == 0x81 || *(from + 1) == 0x82 || *(from + 1) == 0x88 || *(from + 1) == 0x93 || *(from + 1) == 0x94) + from++; + } + else if (*from == 0xCD && *(from + 1) == 0xBA) + from++; + //now converted pre-composed characters to their alphabetic bases, discarding the accents + + //Greek + //capital alpha + else if ((*from == 0xCE && *(from + 1) == 0x86)) { + *to++ = 0xCE; + *to++ = 0x91; + from++; + } + //capital epsilon + else if ((*from == 0xCE && *(from + 1) == 0x88)) { + *to++ = 0xCE; + *to++ = 0x95; + from++; + } + //capital eta + else if ((*from == 0xCE && *(from + 1) == 0x89)) { + *to++ = 0xCE; + *to++ = 0x97; + from++; + } + //capital iota + else if ((*from == 0xCE && (*(from + 1) == 0x8A || *(from + 1) == 0xAA))) { + *to++ = 0xCE; + *to++ = 0x99; + from++; + } + //capital omicron + else if ((*from == 0xCE && *(from + 1) == 0x8C)) { + *to++ = 0xCE; + *to++ = 0x9F; + from++; + } + //capital upsilon + else if ((*from == 0xCE && (*(from + 1) == 0x8E || *(from + 1) == 0xAB))) { + *to++ = 0xCE; + *to++ = 0xA5; + from++; + } + //capital omega + else if ((*from == 0xCE && *(from + 1) == 0x8F)) { + *to++ = 0xCE; + *to++ = 0xA9; + from++; + } + + //alpha + else if ((*from == 0xCE && *(from + 1) == 0xAC)) { + *to++ = 0xCE; + *to++ = 0xB1; + from++; + } + //epsilon + else if ((*from == 0xCE && *(from + 1) == 0xAD)) { + *to++ = 0xCE; + *to++ = 0xB5; + from++; + } + //eta + else if ((*from == 0xCE && *(from + 1) == 0xAE)) { + *to++ = 0xCE; + *to++ = 0xB7; + from++; + } + //iota + else if ((*from == 0xCE && *(from + 1) == 0xAF) || (*from == 0xCF && *(from + 1) == 0x8A)) { + *to++ = 0xCE; + *to++ = 0xB9; + from++; + } + //omicron + else if ((*from == 0xCF && *(from + 1) == 0x8C)) { + *to++ = 0xCE; + *to++ = 0xBF; + from++; + } + //upsilon + else if ((*from == 0xCE && *(from + 1) == 0x88) || (*from == 0xCF && (*(from + 1) == 0x8B || *(from + 1) == 0x8D))) { + *to++ = 0xCF; + *to++ = 0x85; + from++; + } + //omega + else if ((*from == 0xCF && *(from + 1) == 0x8E)) { + *to++ = 0xCF; + *to++ = 0x89; + from++; + } + + //Extended Greek + //capital alpha + else if (*from == 0xE1 && ((*(from + 1) == 0xBC || *(from + 1) == 0xBE) && *(from + 2) >= 0x88 && *(from + 2) <= 0x8F) || (*(from + 1) == 0xBE && *(from + 2) >= 0xB8 && *(from + 2) <= 0xBC)) { + *to++ = 0xCE; + *to++ = 0x91; + from+=2; + } + //capital epsilon + else if (*from == 0xE1 && ((*(from + 1) == 0xBC && *(from + 2) >= 0x98 && *(from + 2) <= 0x9D) || (*(from + 1) == 0xBF && (*(from + 2) == 0x88 || *(from + 2) == 0x89)))) { + *to++ = 0xCE; + *to++ = 0x95; + from+=2; + } + //capital eta + else if (*from == 0xE1 && ((*(from + 1) == 0xBC && *(from + 2) >= 0xA8 && *(from + 2) <= 0xAF) || (*(from + 1) == 0xBE && *(from + 2) >= 0x98 && *(from + 2) <= 0x9F) || (*(from + 1) == 0xBF && *(from + 2) >= 0x8A && *(from + 2) <= 0x8C))) { + *to++ = 0xCE; + *to++ = 0x97; + from+=2; + } + //capital iota + else if (*from == 0xE1 && ((*(from + 1) == 0xBC && *(from + 2) >= 0xB8 && *(from + 2) <= 0xBF) || (*(from + 1) == 0xBF && *(from + 2) >= 0x98 && *(from + 2) <= 0x9B))) { + *to++ = 0xCE; + *to++ = 0x99; + from+=2; + } + //capital omicron + else if (*from == 0xE1 && ((*(from + 1) == 0xBD && *(from + 2) >= 0x88 && *(from + 2) <= 0x8D) || (*(from + 1) == 0xBF && *(from + 2) == 0xB8 || *(from + 2) == 0xB9))) { + *to++ = 0xCE; + *to++ = 0x9F; + from+=2; + } + //capital upsilon + else if (*from == 0xE1 && ((*(from + 1) == 0xBD && *(from + 2) >= 0x99 && *(from + 2) <= 0x9F) || (*(from + 1) == 0xBF && *(from + 2) >= 0xA8 && *(from + 2) <= 0xAB))) { + *to++ = 0xCE; + *to++ = 0xA5; + from+=2; + } + //capital omega + else if (*from == 0xE1 && (((*(from + 1) == 0xBD || *(from + 1) == 0xBE) && *(from + 2) >= 0xA8 && *(from + 2) <= 0xAF) || (*(from + 1) == 0xBF && *(from + 2) >= 0xBA && *(from + 2) <= 0xBC))) { + *to++ = 0xCE; + *to++ = 0xA9; + from+=2; + } + //capital rho + else if (*from == 0xE1 && *(from + 1) == 0xBF && *(from + 2) == 0xAC) { + *to++ = 0xCE; + *to++ = 0xA1; + from+=2; + } + + //alpha + else if (*from == 0xE1 && ((*(from + 1) == 0xBC || *(from + 1) == 0xBE) && *(from + 2) >= 0x80 && *(from + 2) <= 0x87) || (*(from + 1) == 0xBD && (*(from + 2) == 0xB0 || *(from + 2) == 0xB1)) || (*(from + 1) == 0xBE && *(from + 2) >= 0xB0 && *(from + 2) <= 0xB7)) { + *to++ = 0xCE; + *to++ = 0xB1; + from+=2; + } + //epsilon + else if (*from == 0xE1 && ((*(from + 1) == 0xBC && *(from + 2) >= 0x90 && *(from + 2) <= 0x95) || (*(from + 1) == 0xBD && (*(from + 2) == 0xB2 || *(from + 2) == 0xB3)))) { + *to++ = 0xCE; + *to++ = 0xB5; + from+=2; + } + //eta + else if (*from == 0xE1 && ((*(from + 1) == 0xBE && *(from + 2) >= 0x90 && *(from + 2) <= 0x97) || (*(from + 1) == 0xBC && *(from + 2) >= 0xA0 && *(from + 2) <= 0xA7) || (*(from + 1) == 0xBF && *(from + 2) >= 0x82 && *(from + 2) <= 0x87) || (*(from + 1) == 0xBD && (*(from + 2) == 0xB4 || *(from + 2) == 0xB5)))) { + *to++ = 0xCE; + *to++ = 0xB7; + from+=2; + } + //iota + else if (*from == 0xE1 && ((*(from + 1) == 0xBC && *(from + 2) >= 0xB0 && *(from + 2) <= 0xB7) || (*(from + 1) == 0xBD && (*(from + 2) == 0xB6 || *(from + 2) == 0xB7)) || (*(from + 1) == 0xBF && *(from + 2) >= 0x90 && *(from + 2) <= 0x97))) { + *to++ = 0xCE; + *to++ = 0xB9; + from+=2; + } + //omicron + else if (*from == 0xE1 && (*(from + 1) == 0xBD && ((*(from + 2) >= 0x80 && *(from + 2) <= 0x85) || (*(from + 2) == 0xB8 || *(from + 2) == 0xB9)))) { + *to++ = 0xCE; + *to++ = 0xBF; + from+=2; + } + //upsilon + else if (*from == 0xE1 && ((*(from + 1) == 0xBD && ((*(from + 2) >= 0x90 && *(from + 2) <= 0x97) || *(from + 2) == 0xBA || *(from + 2) == 0xBB)) || (*(from + 1) == 0xBF && ((*(from + 2) >= 0xA0 && *(from + 2) <= 0xA3) || *(from + 2) == 0xA6 || *(from + 2) == 0xA7)))) { + *to++ = 0xCF; + *to++ = 0x85; + from+=2; + } + //omega + else if (*from == 0xE1 && ((*(from + 1) == 0xBD && ((*(from + 2) >= 0xA0 && *(from + 2) <= 0xA7) || (*(from + 2) == 0xBC || *(from + 2) == 0xBD))) || (*(from + 1) == 0xBE && (*(from + 2) >= 0xA0 && *(from + 2) <= 0xA7)) || (*(from + 1) == 0xBF && *(from + 2) >= 0xB2 && *(from + 2) <= 0xB7))) { + *to++ = 0xCF; + *to++ = 0x89; + from+=2; + } + //rho + else if (*from == 0xE1 && *(from + 1) == 0xBF && (*(from + 2) == 0xA4 && *(from + 2) == 0xA5)) { + *to++ = 0xCF; + *to++ = 0x81; + from+=2; + } + else + *to++ = *from; + } + *to++ = 0; + *to = 0; + } + return 0; +} + + + + + + diff --git a/src/modules/filters/utf8hebrewpoints.cpp b/src/modules/filters/utf8hebrewpoints.cpp new file mode 100644 index 0000000..e5b50e1 --- /dev/null +++ b/src/modules/filters/utf8hebrewpoints.cpp @@ -0,0 +1,55 @@ +/****************************************************************************** + * + * UTF8HebrewPoints - SWFilter decendant to remove UTF-8 Hebrew vowel points + * + */ + + +#include +#include +#include + + +const char UTF8HebrewPoints::on[] = "On"; +const char UTF8HebrewPoints::off[] = "Off"; +const char UTF8HebrewPoints::optName[] = "Hebrew Vowel Points"; +const char UTF8HebrewPoints::optTip[] = "Toggles Hebrew Vowel Points"; + +UTF8HebrewPoints::UTF8HebrewPoints() { + option = true; + options.push_back(on); + options.push_back(off); +} + +UTF8HebrewPoints::~UTF8HebrewPoints(){}; + +void UTF8HebrewPoints::setOptionValue(const char *ival) +{ + option = (!stricmp(ival, on)); +} + +const char *UTF8HebrewPoints::getOptionValue() +{ + return (option) ? on:off; +} + +char UTF8HebrewPoints::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) +{ + if (!option) { + unsigned char *to, *from; + + to = (unsigned char*)text; + //The UTF-8 range 0xD6 0xB0 to 0xD6 0xBF excluding 0xD6 0x consist of Hebrew cantillation marks so block those out. + for (from = (unsigned char*)text; *from; from++) { + if ((*from == 0xD6) && (*(from + 1) >= 0xB0 && *(from + 1) <= 0xBF) && (*(from + 1) != 0xBE)) { + from++; + } + else { + *to++ = *from; + } + } + *to++ = 0; + *to = 0; + } + return 0; +} diff --git a/src/modules/filters/utf8html.cpp b/src/modules/filters/utf8html.cpp new file mode 100644 index 0000000..7487815 --- /dev/null +++ b/src/modules/filters/utf8html.cpp @@ -0,0 +1,66 @@ +/****************************************************************************** + * + * utf8html - SWFilter decendant to convert a UTF-8 stream to HTML escapes + * + */ + + +#include +#include +#include + +UTF8HTML::UTF8HTML() { +} + + +char UTF8HTML::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) +{ + unsigned char *to, *from; + int len; + char digit[10]; + unsigned long ch; + + len = strlenw(text) + 2; // shift string to right of buffer + if (len < maxlen) { + memmove(&text[maxlen - len], text, len); + from = (unsigned char*)&text[maxlen - len]; + } + else from = (unsigned char*)text; + // ------------------------------- + for (to = (unsigned char*)text; *from; from++) { + ch = 0; + if ((*from & 128) != 128) { +// if (*from != ' ') + *to++ = *from; + continue; + } + if ((*from & 128) && ((*from & 64) != 64)) { + // error + *from = 'x'; + continue; + } + *from <<= 1; + int subsequent; + for (subsequent = 1; (*from & 128); subsequent++) { + *from <<= 1; + from[subsequent] &= 63; + ch <<= 6; + ch |= from[subsequent]; + } + subsequent--; + *from <<=1; + char significantFirstBits = 8 - (2+subsequent); + + ch |= (((short)*from) << (((6*subsequent)+significantFirstBits)-8)); + from += subsequent; + *to++ = '&'; + *to++ = '#'; + sprintf(digit, "%d", ch); + for (char *dig = digit; *dig; dig++) + *to++ = *dig; + *to++ = ';'; + } + *to++ = 0; + *to = 0; + return 0; +} diff --git a/src/modules/filters/utf8latin1.cpp b/src/modules/filters/utf8latin1.cpp new file mode 100644 index 0000000..6cc1acd --- /dev/null +++ b/src/modules/filters/utf8latin1.cpp @@ -0,0 +1,74 @@ +/****************************************************************************** + * + * UTF8Latin1 - SWFilter decendant to convert UTF-8 to Latin-1 + * + */ + +#include +#include + +#include + +UTF8Latin1::UTF8Latin1(char rchar) : replacementChar(rchar) { +} + + +char UTF8Latin1::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) +{ + unsigned char *from; + unsigned short *to; + + int len; + unsigned long uchar; + unsigned char significantFirstBits, subsequent; + + len = strlen(text) + 1; // shift string to right of buffer + if (len < maxlen) { + memmove(&text[maxlen - len], text, len); + from = (unsigned char*)&text[maxlen - len]; + } + else + from = (unsigned char*)text; + + + // ------------------------------- + + for (to = (unsigned short*)text; *from; from++) { + uchar = 0; + if ((*from & 128) != 128) { + // if (*from != ' ') + uchar = *from; + } + else if ((*from & 128) && ((*from & 64) != 64)) { + // error, do nothing + continue; + } + else { + *from <<= 1; + for (subsequent = 1; (*from & 128); subsequent++) { + *from <<= 1; + from[subsequent] &= 63; + uchar <<= 6; + uchar |= from[subsequent]; + } + subsequent--; + *from <<=1; + significantFirstBits = 8 - (2+subsequent); + + uchar |= (((short)*from) << (((6*subsequent)+significantFirstBits)-8)); + from += subsequent; + } + + if (uchar < 0xff) { + *to++ = (unsigned char)uchar; + } + else { + *to++ = replacementChar; + } + } + *to++ = 0; + *to = 0; + + return 0; +} + diff --git a/src/modules/filters/utf8nfc.cpp b/src/modules/filters/utf8nfc.cpp new file mode 100644 index 0000000..df9e090 --- /dev/null +++ b/src/modules/filters/utf8nfc.cpp @@ -0,0 +1,46 @@ +/****************************************************************************** +* +* utf8nfc - SWFilter decendant to perform NFC (canonical composition +* normalization) on UTF-8 text +*/ + +#ifdef _ICU_ + +#include +#include + +#ifdef __GNUC__ +#include +#endif + +#include + +UTF8NFC::UTF8NFC() { + conv = ucnv_open("UTF-8", &err); +} + +UTF8NFC::~UTF8NFC() { + ucnv_close(conv); +} + +char UTF8NFC::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) +{ + int32_t len = strlen(text) * 2; + source = new UChar[len + 1]; //each char could become a surrogate pair + + // Convert UTF-8 string to UTF-16 (UChars) + len = ucnv_toUChars(conv, source, len, text, -1, &err); + target = new UChar[len + 1]; + + //canonical composition + unorm_normalize(source, len, UNORM_NFC, 0, target, len, &err); + + ucnv_fromUChars(conv, text, maxlen, target, -1, &err); + + delete [] source; + delete [] target; + + return 0; +} + +#endif diff --git a/src/modules/filters/utf8nfkd.cpp b/src/modules/filters/utf8nfkd.cpp new file mode 100644 index 0000000..450cbbf --- /dev/null +++ b/src/modules/filters/utf8nfkd.cpp @@ -0,0 +1,46 @@ +/****************************************************************************** +* +* utf8nfkd - SWFilter decendant to perform NFKD (compatability decomposition +* normalization) on UTF-8 text +*/ + +#ifdef _ICU_ + +#include +#include + +#ifdef __GNUC__ +#include +#endif + +#include + +UTF8NFKD::UTF8NFKD() { + conv = ucnv_open("UTF-8", &err); +} + +UTF8NFKD::~UTF8NFKD() { + ucnv_close(conv); +} + +char UTF8NFKD::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) +{ + int32_t len = strlen(text) * 2; + source = new UChar[len + 1]; //each char could become a surrogate pair + + // Convert UTF-8 string to UTF-16 (UChars) + len = ucnv_toUChars(conv, source, len, text, -1, &err); + target = new UChar[len + 1]; + + //compatability decomposition + unorm_normalize(source, len, UNORM_NFKD, 0, target, len, &err); + + ucnv_fromUChars(conv, text, maxlen, target, -1, &err); + + delete [] source; + delete [] target; + + return 0; +} + +#endif diff --git a/src/modules/filters/utf8transliterator.cpp b/src/modules/filters/utf8transliterator.cpp new file mode 100644 index 0000000..7bc068a --- /dev/null +++ b/src/modules/filters/utf8transliterator.cpp @@ -0,0 +1,479 @@ +/****************************************************************************** +* +* utf8transliterators - SWFilter decendant to transliterate between +* ICU-supported scripts. +*/ + +#ifdef _ICU_ + +#include +#include + +#ifdef __GNUC__ +#include +#endif + +#include + +const char UTF8Transliterator::optionstring[NUMTARGETSCRIPTS][16] = { + "Off", + "Latin", + "Basic Latin", + "Beta", + "BGreek", +/* + "Greek", + "Hebrew", + "Cyrillic", + "Arabic", + "Syriac", + "Katakana", + "Hiragana", + "Jamo", + "Hangul", + "Devanagari", + "Tamil", + "Bengali", + "Gurmukhi", + "Gujarati", + "Oriya", + "Telugu", + "Kannada", + "Malayalam", + "Thai", + "Georgian", + "Armenian", + "Ethiopic", + "Gothic", + "Ugaritic", + "Coptic" + */ +}; + +const char UTF8Transliterator::optName[] = "Transliteration"; +const char UTF8Transliterator::optTip[] = "Transliterates between scripts"; + +UTF8Transliterator::UTF8Transliterator() { + option = 0; + unsigned long i; + for (i = 0; i < NUMTARGETSCRIPTS; i++) { + options.push_back(optionstring[i]); + } +} + +void UTF8Transliterator::setOptionValue(const char *ival) +{ + unsigned char i = option = NUMTARGETSCRIPTS; + while (i && stricmp(ival, optionstring[i])) { + i--; + option = i; + } +} + +const char *UTF8Transliterator::getOptionValue() +{ + return (NUMTARGETSCRIPTS > option) ? optionstring[option] : 0; +} + +char UTF8Transliterator::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) +{ + if (option) { // if we want transliteration + unsigned long i, j; + UErrorCode err = U_ZERO_ERROR; + UConverter * conv = NULL; + conv = ucnv_open("UTF-8", &err); + + bool compat = false; + bool noNFC = false; + + if (option == SE_JAMO) { + noNFC = true; + } + + // Convert UTF-8 string to UTF-16 (UChars) + j = strlen(text); + int32_t len = (j * 2) + 1; + UChar *source = new UChar[len]; + err = U_ZERO_ERROR; + len = ucnv_toUChars(conv, source, len, text, j, &err); + source[len] = 0; + + // Figure out which scripts are used in the string + unsigned char scripts[NUMSCRIPTS]; + + for (i = 0; i < NUMSCRIPTS; i++) { + scripts[i] = false; + } + + for (i = 0; i < len; i++) { + j = ublock_getCode(source[i]); + switch (j) { + case UBLOCK_BASIC_LATIN: scripts[SE_LATIN] = true; break; + case UBLOCK_GREEK: scripts[SE_GREEK] = true; break; + case UBLOCK_HEBREW: scripts[SE_HEBREW] = true; break; + case UBLOCK_CYRILLIC: scripts[SE_CYRILLIC] = true; break; + case UBLOCK_ARABIC: scripts[SE_ARABIC] = true; break; + case UBLOCK_SYRIAC: scripts[SE_SYRIAC] = true; break; + case UBLOCK_KATAKANA: scripts[SE_KATAKANA] = true; break; + case UBLOCK_HIRAGANA: scripts[SE_HIRAGANA] = true; break; + case UBLOCK_HANGUL_SYLLABLES: scripts[SE_HANGUL] = true; break; + case UBLOCK_HANGUL_JAMO: scripts[SE_JAMO] = true; break; + case UBLOCK_DEVANAGARI: scripts[SE_DEVANAGARI] = true; break; + case UBLOCK_TAMIL: scripts[SE_TAMIL] = true; break; + case UBLOCK_BENGALI: scripts[SE_BENGALI] = true; break; + case UBLOCK_GURMUKHI: scripts[SE_GURMUKHI] = true; break; + case UBLOCK_GUJARATI: scripts[SE_GUJARATI] = true; break; + case UBLOCK_ORIYA: scripts[SE_ORIYA] = true; break; + case UBLOCK_TELUGU: scripts[SE_TELUGU] = true; break; + case UBLOCK_KANNADA: scripts[SE_KANNADA] = true; break; + case UBLOCK_MALAYALAM: scripts[SE_MALAYALAM] = true; break; + case UBLOCK_THAI: scripts[SE_THAI] = true; break; + case UBLOCK_GEORGIAN: scripts[SE_GEORGIAN] = true; break; + case UBLOCK_ARMENIAN: scripts[SE_ARMENIAN] = true; break; + case UBLOCK_ETHIOPIC: scripts[SE_ETHIOPIC] = true; break; + case UBLOCK_GOTHIC: scripts[SE_GOTHIC] = true; break; + // needs Unicode 3.2? or 4.0? support from ICU + //case UBLOCK_UGARITIC: scripts[SE_UGARITIC] = true; break; + case UBLOCK_CJK_RADICALS_SUPPLEMENT: + case UBLOCK_KANGXI_RADICALS: + case UBLOCK_IDEOGRAPHIC_DESCRIPTION_CHARACTERS: + case UBLOCK_CJK_SYMBOLS_AND_PUNCTUATION: + case UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A: + case UBLOCK_CJK_UNIFIED_IDEOGRAPHS: + scripts[SE_HAN] = true; + break; + case UBLOCK_CJK_COMPATIBILITY: + case UBLOCK_CJK_COMPATIBILITY_IDEOGRAPHS: + case UBLOCK_CJK_COMPATIBILITY_FORMS: + scripts[SE_HAN] = true; + compat = true; + break; + case UBLOCK_HANGUL_COMPATIBILITY_JAMO: + scripts[SE_HANGUL] = true; + compat = true; + break; + + default: scripts[SE_LATIN] = true; + } + } + scripts[option] = false; //turn off the reflexive transliteration + + //return if we have no transliteration to do for this text + j = 0; + for (i = 0; !j && i < NUMSCRIPTS; i++) { + if (scripts[i]) j++; + } + if (!j) { + ucnv_close(conv); + return 0; + } + + UnicodeString id; + if (compat) { + id = UnicodeString("NFKD"); + } + else { + id = UnicodeString("NFD"); + } + + //Simple X to Latin transliterators + if (scripts[SE_GREEK]) { + if (option == SE_BETA) + id += UnicodeString(";Greek-Beta"); + else if (option == SE_BGREEK) + id += UnicodeString(";Greek-BGreek"); + else { + if (!strnicmp (((SWModule*)module)->Lang(), "cop", 3)) { + id += UnicodeString(";Coptic-Latin"); + } + else { + id += UnicodeString(";Greek-Latin"); + } + scripts[SE_LATIN] = true; + } + } + if (scripts[SE_HEBREW]) { + if (option == SE_BETA) + id += UnicodeString(";Hebrew-CCAT"); + else if (option == SE_SYRIAC) + id += UnicodeString(";Hebrew-Syriac"); + else { + id += UnicodeString(";Hebrew-Latin"); + scripts[SE_LATIN] = true; + } + } + if (scripts[SE_CYRILLIC]) { + id += UnicodeString(";Cyrillic-Latin"); + scripts[SE_LATIN] = true; + } + if (scripts[SE_ARABIC]) { + id += UnicodeString(";Arabic-Latin"); + scripts[SE_LATIN] = true; + } + if (scripts[SE_SYRIAC]) { + if (option == SE_BETA) + id += UnicodeString(";Syriac-CCAT"); + else if (option == SE_HEBREW) + id += UnicodeString(";Syriac-Hebrew"); + else { + id += UnicodeString(";Syriac-Latin"); + scripts[SE_LATIN] = true; + } + } + if (scripts[SE_THAI]) { + id += UnicodeString(";Thai-Latin"); + scripts[SE_LATIN] = true; + } + if (scripts[SE_GEORGIAN]) { + id += UnicodeString(";Georgian-Latin"); + scripts[SE_LATIN] = true; + } + if (scripts[SE_ARMENIAN]) { + id += UnicodeString(";Armenian-Latin"); + scripts[SE_LATIN] = true; + } + if (scripts[SE_ETHIOPIC]) { + id += UnicodeString(";Ethiopic-Latin"); + scripts[SE_LATIN] = true; + } + if (scripts[SE_GOTHIC]) { + id += UnicodeString(";Gothic-Latin"); + scripts[SE_LATIN] = true; + } + if (scripts[SE_UGARITIC]) { + id += UnicodeString(";Ugaritic-Latin"); + scripts[SE_LATIN] = true; + } + if (scripts[SE_HAN]) { + if (!strnicmp (((SWModule*)module)->Lang(), "ja", 2)) { + id += UnicodeString(";Kanji-OnRomaji"); + } + else { + id += UnicodeString(";Han-Pinyin"); + } + scripts[SE_LATIN] = true; + } + + // Inter-Kana and Kana to Latin transliterators + if (option == SE_HIRAGANA && scripts[SE_KATAKANA]) { + id += UnicodeString(";Katakana-Hiragana"); + scripts[SE_HIRAGANA] = true; + } + else if (option == SE_KATAKANA && scripts[SE_HIRAGANA]) { + id += UnicodeString(";Hiragana-Katakana"); + scripts[SE_KATAKANA] = true; + } + else { + if (scripts[SE_KATAKANA]) { + id += UnicodeString(";Katakana-Latin"); + scripts[SE_LATIN] = true; + } + if (scripts[SE_HIRAGANA]) { + id += UnicodeString(";Hiragana-Latin"); + scripts[SE_LATIN] = true; + } + } + + // Inter-Korean and Korean to Latin transliterators + if (option == SE_HANGUL && scripts[SE_JAMO]) { + noNFC = false; + scripts[SE_HANGUL] = true; + } + else if (option == SE_JAMO && scripts[SE_HANGUL]) { + noNFC = true; + scripts[SE_JAMO] = true; + } + else { + if (scripts[SE_HANGUL]) { + id += UnicodeString(";Hangul-Latin"); + scripts[SE_LATIN] = true; + } + if (scripts[SE_JAMO]) { + id += UnicodeString(";Jamo-Latin"); + scripts[SE_LATIN] = true; + } + } + + // Indic-Latin + if (option < SE_DEVANAGARI || option > SE_MALAYALAM) { + // Indic to Latin + if (scripts[SE_TAMIL]) { + id += UnicodeString(";Tamil-Latin"); + scripts[SE_LATIN] = true; + } + if (scripts[SE_BENGALI]) { + id += UnicodeString(";Bengali-Latin"); + scripts[SE_LATIN] = true; + } + if (scripts[SE_GURMUKHI]) { + id += UnicodeString(";Gurmukhi-Latin"); + scripts[SE_LATIN] = true; + } + if (scripts[SE_GUJARATI]) { + id += UnicodeString(";Gujarati-Latin"); + scripts[SE_LATIN] = true; + } + if (scripts[SE_ORIYA]) { + id += UnicodeString(";Oriya-Latin"); + scripts[SE_LATIN] = true; + } + if (scripts[SE_TELUGU]) { + id += UnicodeString(";Telugu-Latin"); + scripts[SE_LATIN] = true; + } + if (scripts[SE_KANNADA]) { + id += UnicodeString(";Kannada-Latin"); + scripts[SE_LATIN] = true; + } + if (scripts[SE_MALAYALAM]) { + id += UnicodeString(";Malayalam-Latin"); + scripts[SE_LATIN] = true; + } + } + else { + if (scripts[SE_LATIN]) { + id += UnicodeString(";Latin-InterIndic"); + } + if (scripts[SE_DEVANAGARI]) { + id += UnicodeString(";Devanagari-InterIndic"); + } + if (scripts[SE_TAMIL]) { + id += UnicodeString(";Tamil-InterIndic"); + } + if (scripts[SE_BENGALI]) { + id += UnicodeString(";Bengali-InterIndic"); + } + if (scripts[SE_GURMUKHI]) { + id += UnicodeString(";Gurmurkhi-InterIndic"); + } + if (scripts[SE_GUJARATI]) { + id += UnicodeString(";Gujarati-InterIndic"); + } + if (scripts[SE_ORIYA]) { + id += UnicodeString(";Oriya-InterIndic"); + } + if (scripts[SE_TELUGU]) { + id += UnicodeString(";Telugu-InterIndic"); + } + if (scripts[SE_KANNADA]) { + id += UnicodeString(";Kannada-InterIndic"); + } + if (scripts[SE_MALAYALAM]) { + id += UnicodeString(";Malayalam-InterIndic"); + } + + switch(option) { + case SE_DEVANAGARI: + id += UnicodeString(";InterIndic-Devanagari"); + break; + case SE_TAMIL: + id += UnicodeString(";InterIndic-Tamil"); + break; + case SE_BENGALI: + id += UnicodeString(";InterIndic-Bengali"); + break; + case SE_GURMUKHI: + id += UnicodeString(";InterIndic-Gurmukhi"); + break; + case SE_GUJARATI: + id += UnicodeString(";InterIndic-Gujarati"); + break; + case SE_ORIYA: + id += UnicodeString(";InterIndic-Oriya"); + break; + case SE_TELUGU: + id += UnicodeString(";InterIndic-Telugu"); + break; + case SE_KANNADA: + id += UnicodeString(";InterIndic-Kannada"); + break; + case SE_MALAYALAM: + id += UnicodeString(";InterIndic-Malayalam"); + break; + default: + id += UnicodeString(";InterIndic-Latin"); + scripts[SE_LATIN] = true; + break; + } + } + + if (scripts[SE_LATIN]) { + switch (option) { + case SE_GREEK: + id += UnicodeString(";Latin-Greek"); + break; + case SE_HEBREW: + id += UnicodeString(";Latin-Hebrew"); + break; + case SE_CYRILLIC: + id += UnicodeString(";Latin-Cyrillic"); + break; + case SE_ARABIC: + id += UnicodeString(";Latin-Arabic"); + break; + case SE_SYRIAC: + id += UnicodeString(";Latin-Syriac"); + break; + case SE_THAI: + id += UnicodeString(";Latin-Thai"); + break; + case SE_GEORGIAN: + id += UnicodeString(";Latin-Georgian"); + break; + case SE_ARMENIAN: + id += UnicodeString(";Latin-Armenian"); + break; + case SE_ETHIOPIC: + id += UnicodeString(";Latin-Ethiopic"); + break; + case SE_GOTHIC: + id += UnicodeString(";Latin-Gothic"); + break; + case SE_UGARITIC: + id += UnicodeString(";Latin-Ugaritic"); + break; + case SE_COPTIC: + id += UnicodeString(";Latin-Coptic"); + break; + case SE_KATAKANA: + id += UnicodeString(";Latin-Katakana"); + break; + case SE_HIRAGANA: + id += UnicodeString(";Latin-Hiragana"); + break; + case SE_JAMO: + id += UnicodeString(";Latin-Jamo"); + break; + case SE_HANGUL: + id += UnicodeString(";Latin-Hangul"); + break; + } + } + + if (option == SE_BASICLATIN) { + id += UnicodeString(";Any-Latin1"); + } + + if (noNFC) { + id += UnicodeString(";NFD"); + } else { + id += UnicodeString(";NFC"); + } + + UParseError perr; + + err = U_ZERO_ERROR; + Transliterator * trans = Transliterator::createInstance(id, UTRANS_FORWARD, perr, err); + if (trans) { + UnicodeString target = UnicodeString(source); + trans->transliterate(target); + len = ucnv_fromUChars(conv, text, maxlen, target.getBuffer(), target.length(), &err); + if (len < maxlen) *(text + len) = 0; + else *(text + maxlen) = 0; + delete trans; + } + ucnv_close(conv); + } + return 0; +} +#endif diff --git a/src/modules/filters/utf8utf16.cpp b/src/modules/filters/utf8utf16.cpp new file mode 100644 index 0000000..9aea6fe --- /dev/null +++ b/src/modules/filters/utf8utf16.cpp @@ -0,0 +1,79 @@ +/****************************************************************************** + * + * UTF8UTF16 - SWFilter decendant to convert UTF-8 to UTF-16 + * + */ + +#include +#include + +#include + +UTF8UTF16::UTF8UTF16() { +} + + +char UTF8UTF16::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module) +{ + unsigned char *from; + unsigned short *to; + + int len; + unsigned long uchar; + unsigned char significantFirstBits, subsequent; + unsigned short schar; + + len = strlen(text) + 1; // shift string to right of buffer + if (len < maxlen) { + memmove(&text[maxlen - len], text, len); + from = (unsigned char*)&text[maxlen - len]; + } + else + from = (unsigned char*)text; + + + // ------------------------------- + + for (to = (unsigned short*)text; *from; from++) { + uchar = 0; + if ((*from & 128) != 128) { + // if (*from != ' ') + uchar = *from; + } + else if ((*from & 128) && ((*from & 64) != 64)) { + // error, do nothing + continue; + } + else { + *from <<= 1; + for (subsequent = 1; (*from & 128); subsequent++) { + *from <<= 1; + from[subsequent] &= 63; + uchar <<= 6; + uchar |= from[subsequent]; + } + subsequent--; + *from <<=1; + significantFirstBits = 8 - (2+subsequent); + + uchar |= (((short)*from) << (((6*subsequent)+significantFirstBits)-8)); + from += subsequent; + } + + if (uchar < 0x1ffff) { + *to++ = (unsigned short)uchar; + } + else { + uchar -= 0x10000; + schar = 0xD800 | (uchar & 0x03ff); + uchar >>= 10; + uchar |= 0xDC00; + *to++ = (unsigned short)schar; + *to++ = (unsigned short)uchar; + } + } + *to = (unsigned short)0; + + return 0; +} + diff --git a/src/modules/genbook/Makefile b/src/modules/genbook/Makefile new file mode 100644 index 0000000..1a2d00d --- /dev/null +++ b/src/modules/genbook/Makefile @@ -0,0 +1,5 @@ + +root := ../../.. + +all: + make -C ${root} diff --git a/src/modules/genbook/Makefile.am b/src/modules/genbook/Makefile.am new file mode 100644 index 0000000..02f6ab1 --- /dev/null +++ b/src/modules/genbook/Makefile.am @@ -0,0 +1,5 @@ +genbookdir = $(top_srcdir)/src/modules/genbook + +libsword_la_SOURCES += $(genbookdir)/swgenbook.cpp + +include ../src/modules/genbook/rawgenbook/Makefile.am diff --git a/src/modules/genbook/rawgenbook/Makefile b/src/modules/genbook/rawgenbook/Makefile new file mode 100644 index 0000000..aab8056 --- /dev/null +++ b/src/modules/genbook/rawgenbook/Makefile @@ -0,0 +1,4 @@ +root := ../../../.. + +all: + make -C ${root} diff --git a/src/modules/genbook/rawgenbook/Makefile.am b/src/modules/genbook/rawgenbook/Makefile.am new file mode 100644 index 0000000..a176d75 --- /dev/null +++ b/src/modules/genbook/rawgenbook/Makefile.am @@ -0,0 +1,4 @@ +rawgenbookdir = $(top_srcdir)/src/modules/genbook/rawgenbook + +libsword_la_SOURCES += $(rawgenbookdir)/rawgenbook.cpp + diff --git a/src/modules/genbook/rawgenbook/rawgenbook.cpp b/src/modules/genbook/rawgenbook/rawgenbook.cpp new file mode 100644 index 0000000..fab309f --- /dev/null +++ b/src/modules/genbook/rawgenbook/rawgenbook.cpp @@ -0,0 +1,242 @@ +/****************************************************************************** + * rawtext.cpp - code for class 'RawGenBook'- a module that reads raw text files: + * ot and nt using indexs ??.bks ??.cps ??.vss + */ + + +#include +#include + +#ifndef __GNUC__ +#include +#else +#include +#endif + +#include +#include +#include +#include + +#ifndef O_BINARY +#define O_BINARY 0 +#endif + +/****************************************************************************** + * RawGenBook Constructor - Initializes data for instance of RawGenBook + * + * ENT: iname - Internal name for module + * idesc - Name to display to user for module + * idisp - Display object to use for displaying + */ + +RawGenBook::RawGenBook(const char *ipath, const char *iname, const char *idesc, SWDisplay *idisp, SWTextEncoding enc, SWTextDirection dir, SWTextMarkup mark, const char* ilang) + : SWGenBook(iname, idesc, idisp, enc, dir, mark, ilang) { + int fileMode = O_RDWR; + char *buf = new char [ strlen (ipath) + 20 ]; + + entryBuf = 0; + path = 0; + stdstr(&path, ipath); + + + if ((path[strlen(path)-1] == '/') || (path[strlen(path)-1] == '\\')) + path[strlen(path)-1] = 0; + + delete key; + key = CreateKey(); + + + sprintf(buf, "%s.bdt", path); + bdtfd = FileMgr::systemFileMgr.open(buf, fileMode|O_BINARY, true); + + delete [] buf; + +} + + +/****************************************************************************** + * RawGenBook Destructor - Cleans up instance of RawGenBook + */ + +RawGenBook::~RawGenBook() { + + FileMgr::systemFileMgr.close(bdtfd); + + if (path) + delete [] path; + + if (entryBuf) + delete [] entryBuf; +} + + +/****************************************************************************** + * RawGenBook::operator char * - Returns the correct verse when char * cast + * is requested + * + * RET: string buffer with verse + */ + +char *RawGenBook::getRawEntry() { + + __u32 offset = 0; + __u32 size = 0; + + TreeKeyIdx *key = 0; +#ifndef _WIN32_WCE + try { +#endif + key = SWDYNAMIC_CAST(TreeKeyIdx, (this->key)); +#ifndef _WIN32_WCE + } + catch ( ... ) {} +#endif + + if (!key) { + key = (TreeKeyIdx *)CreateKey(); + (*key) = *(this->key); + } + + if (entryBuf) + delete [] entryBuf; + + int dsize; + key->getUserData(&dsize); + if (dsize > 7) { + memcpy(&offset, key->getUserData(), 4); + offset = swordtoarch32(offset); + + memcpy(&size, key->getUserData() + 4, 4); + size = swordtoarch32(size); + + entrySize = size; // support getEntrySize call + + entryBuf = new char [ (size + 2) * FILTERPAD ]; + *entryBuf = 0; + lseek(bdtfd->getFd(), offset, SEEK_SET); + read(bdtfd->getFd(), entryBuf, size); + + rawFilter(entryBuf, size, key); + + if (!isUnicode()) + RawStr::preptext(entryBuf); + } + else { + entryBuf = new char [2]; + entryBuf[0] = 0; + entryBuf[1] = 0; + entrySize = 0; + } + + if (key != this->key) // free our key if we created a VerseKey + delete key; + + return entryBuf; +} + + + +#ifdef _MSC_VER +SWModule &RawGenBook::operator =(SW_POSITION p) { +#else +RawGenBook &RawGenBook::operator =(SW_POSITION p) { +#endif + SWModule::operator =(p); + return *this; +} + +SWModule &RawGenBook::setentry(const char *inbuf, long len) { + + __u32 offset = archtosword32(lseek(bdtfd->getFd(), 0, SEEK_END)); + __u32 size = 0; + TreeKeyIdx *key = ((TreeKeyIdx *)this->key); + + char userData[8]; + + if (!len) + len = strlen(inbuf); + + write(bdtfd->getFd(), inbuf, len); + + size = archtosword32(len); + memcpy(userData, &offset, 4); + memcpy(userData+4, &size, 4); + key->setUserData(userData, 8); + key->save(); + + return *this; +} + +SWModule &RawGenBook::operator <<(const char *inbuf) { + return setentry(inbuf, 0); +} + + +SWModule &RawGenBook::operator <<(const SWKey *inkey) { + TreeKeyIdx *srckey = 0; + TreeKeyIdx *key = ((TreeKeyIdx *)this->key); + // see if we have a VerseKey * or decendant +#ifndef _WIN32_WCE + try { +#endif + srckey = SWDYNAMIC_CAST(TreeKeyIdx, inkey); +#ifndef _WIN32_WCE + } + catch ( ... ) {} +#endif + // if we don't have a VerseKey * decendant, create our own + if (!srckey) { + srckey = (TreeKeyIdx *)CreateKey(); + (*srckey) = *inkey; + } + + key->setUserData(srckey->getUserData(), 8); + key->save(); + + if (inkey != srckey) // free our key if we created a VerseKey + delete srckey; + + return *this; +} + + +/****************************************************************************** + * RawGenBook::deleteEntry - deletes this entry + * + * RET: *this + */ + +void RawGenBook::deleteEntry() { + TreeKeyIdx *key = ((TreeKeyIdx *)this->key); + key->remove(); +} + + +char RawGenBook::createModule(const char *ipath) { + char *path = 0; + char *buf = new char [ strlen (ipath) + 20 ]; + FileDesc *fd; + signed char retval; + + stdstr(&path, ipath); + + if ((path[strlen(path)-1] == '/') || (path[strlen(path)-1] == '\\')) + path[strlen(path)-1] = 0; + + sprintf(buf, "%s.bdt", path); + unlink(buf); + fd = FileMgr::systemFileMgr.open(buf, O_CREAT|O_WRONLY|O_BINARY, S_IREAD|S_IWRITE); + fd->getFd(); + FileMgr::systemFileMgr.close(fd); + + retval = TreeKeyIdx::create(path); + delete [] path; + return retval; +} + + +SWKey *RawGenBook::CreateKey() { + TreeKeyIdx *newKey = new TreeKeyIdx(path); + return newKey; +} diff --git a/src/modules/genbook/swgenbook.cpp b/src/modules/genbook/swgenbook.cpp new file mode 100644 index 0000000..589b0b9 --- /dev/null +++ b/src/modules/genbook/swgenbook.cpp @@ -0,0 +1,27 @@ +/****************************************************************************** + * swld.cpp - code for base class 'SWLD'. SWLD is the basis for all + * types of Lexicon and Dictionary modules (hence the 'LD'). + */ + +#include + + +/****************************************************************************** + * SWLD Constructor - Initializes data for instance of SWLD + * + * ENT: imodname - Internal name for module + * imoddesc - Name to display to user for module + * idisp - Display object to use for displaying + */ + +SWGenBook::SWGenBook(const char *imodname, const char *imoddesc, SWDisplay *idisp, SWTextEncoding enc, SWTextDirection dir, SWTextMarkup mark, const char* ilang) : SWModule(imodname, imoddesc, idisp, "Generic Books", enc, dir, mark, ilang) { +} + + +/****************************************************************************** + * SWLD Destructor - Cleans up instance of SWLD + */ + +SWGenBook::~SWGenBook() { +} + diff --git a/src/modules/lexdict/Makefile b/src/modules/lexdict/Makefile new file mode 100644 index 0000000..1a2d00d --- /dev/null +++ b/src/modules/lexdict/Makefile @@ -0,0 +1,5 @@ + +root := ../../.. + +all: + make -C ${root} diff --git a/src/modules/lexdict/Makefile.am b/src/modules/lexdict/Makefile.am new file mode 100644 index 0000000..8cfe68b --- /dev/null +++ b/src/modules/lexdict/Makefile.am @@ -0,0 +1,7 @@ +lexdictdir = $(top_srcdir)/src/modules/lexdict + +libsword_la_SOURCES += $(lexdictdir)/swld.cpp + +include ../src/modules/lexdict/rawld/Makefile.am +include ../src/modules/lexdict/rawld4/Makefile.am +include ../src/modules/lexdict/zld/Makefile.am diff --git a/src/modules/lexdict/rawld/Makefile b/src/modules/lexdict/rawld/Makefile new file mode 100644 index 0000000..35d6648 --- /dev/null +++ b/src/modules/lexdict/rawld/Makefile @@ -0,0 +1,5 @@ + +root := ../../../.. + +all: + make -C ${root} diff --git a/src/modules/lexdict/rawld/Makefile.am b/src/modules/lexdict/rawld/Makefile.am new file mode 100644 index 0000000..2a2d996 --- /dev/null +++ b/src/modules/lexdict/rawld/Makefile.am @@ -0,0 +1,4 @@ +rawlddir = $(top_srcdir)/src/modules/lexdict/rawld + +libsword_la_SOURCES += $(rawlddir)/rawld.cpp + diff --git a/src/modules/lexdict/rawld/no13.c b/src/modules/lexdict/rawld/no13.c new file mode 100644 index 0000000..1e94846 --- /dev/null +++ b/src/modules/lexdict/rawld/no13.c @@ -0,0 +1,34 @@ +#include +#include + +main(int argc, char **argv) +{ + int fd, loop; + char ch; + char breakcnt = 0; + + if (argc != 2) { + fprintf(stderr, "This program writes to stdout, so to be useful,\n\tit should be redirected (e.g no13 bla > bla.dat)\nusage: %s \n", argv[0]); + exit(1); + } + fd = open(argv[1], O_RDONLY); + while (read(fd, &ch, 1) == 1) { + if (ch == 0x0d) { // CR + breakcnt++; + continue; + } + if (ch == 0x1a) // Ctrl-Z + continue; + + if (ch != 0x0a) { // LF + if (breakcnt > 1) { + for (loop = breakcnt; loop > 0; loop--) + putchar(0x0d); + putchar(0x0a); + } + breakcnt=0; + } + putchar(ch); + } + close(fd); +} diff --git a/src/modules/lexdict/rawld/rawld.cpp b/src/modules/lexdict/rawld/rawld.cpp new file mode 100644 index 0000000..b22c28b --- /dev/null +++ b/src/modules/lexdict/rawld/rawld.cpp @@ -0,0 +1,204 @@ +/****************************************************************************** + * rawld.cpp - code for class 'RawLD'- a module that reads raw lexicon and + * dictionary files: *.dat *.idx + */ + + +#include +#include +#include + +#ifndef __GNUC__ +#include +#else +#include +#endif + +#include +#include +#include +#include + + + /****************************************************************************** + * RawLD Constructor - Initializes data for instance of RawLD + * + * ENT: ipath - path and filename of files (no extension) + * iname - Internal name for module + * idesc - Name to display to user for module + * idisp - Display object to use for displaying + */ + +RawLD::RawLD(const char *ipath, const char *iname, const char *idesc, SWDisplay *idisp, SWTextEncoding enc, SWTextDirection dir, SWTextMarkup mark, const char* ilang) : RawStr(ipath), SWLD(iname, idesc, idisp, enc, dir, mark, ilang) +{ +} + + +/****************************************************************************** + * RawLD Destructor - Cleans up instance of RawLD + */ + +RawLD::~RawLD() +{ +} + + +/****************************************************************************** + * RawLD::strongsPad - Pads a key if it is 100% digits to 5 places + * + * ENT: buf - buffer to check and pad + */ + +void RawLD::strongsPad(char *buf) +{ + const char *check; + int size = 0; + int len = strlen(buf); + if ((len < 5) && (len > 0)) { + for (check = buf; *check; check++) { + if (!isdigit(*check)) + break; + else size++; + } + + if ((size == len) && size) + sprintf(buf, "%.5d", atoi(buf)); + } +} + + +/****************************************************************************** + * RawLD::getEntry - Looks up entry from data file. 'Snaps' to closest + * entry and sets 'entrybuf'. + * + * ENT: away - number of entries offset from key (default = 0) + * + * RET: error status + */ + +char RawLD::getEntry(long away) +{ + long start = 0; + unsigned short size = 0; + char *idxbuf = 0; + char retval = 0; + + char *buf = new char [ strlen(*key) + 6 ]; + strcpy(buf, *key); + + strongsPad(buf); + + *entrybuf = 0; + if (!(retval = findoffset(buf, &start, &size, away))) { + entrySize = size; // support getEntrySize call + if (entrybuf) + delete [] entrybuf; + entrybuf = new char [ ++size * FILTERPAD ]; + idxbuf = new char [ size * FILTERPAD ]; + + gettext(start, size + 1, idxbuf, entrybuf); + if (!key->Persist()) // If we have our own key + *key = idxbuf; // reset it to entry index buffer + + stdstr(&entkeytxt, idxbuf); // set entry key text that module 'snapped' to. + delete [] idxbuf; + } + else { + entrybuf = new char [ 5 ]; + *entrybuf = 0; + } + + delete [] buf; + return retval; +} + + +/****************************************************************************** + * RawLD::operator char * - Returns the correct entry when char * cast + * is requested + * + * RET: string buffer with entry + */ + +char *RawLD::getRawEntry() { + if (!getEntry() && !isUnicode()) { + preptext(entrybuf); + } + + return entrybuf; +} + + +/****************************************************************************** + * RawLD::operator += - Increments module key a number of entries + * + * ENT: increment - Number of entries to jump forward + * + * RET: *this + */ + +SWModule &RawLD::operator +=(int increment) +{ + char tmperror; + + if (key->Traversable()) { + *key += increment; + error = key->Error(); + increment = 0; + } + + tmperror = (getEntry(increment)) ? KEYERR_OUTOFBOUNDS : 0; + error = (error)?error:tmperror; + *key = entkeytxt; + return *this; +} + + +/****************************************************************************** + * RawLD::operator =(SW_POSITION) - Positions this key if applicable + */ + +SWModule &RawLD::operator =(SW_POSITION p) +{ + if (!key->Traversable()) { + switch (p) { + case POS_TOP: + *key = ""; + break; + case POS_BOTTOM: + *key = "zzzzzzzzz"; + break; + } + } + else *key = p; + return *this; +} + + +SWModule &RawLD::setentry(const char *inbuf, long len) { + settext(*key, inbuf, len); + + return *this; +} + +SWModule &RawLD::operator <<(const char *inbuf) { + return setentry(inbuf, 0); +} + + +SWModule &RawLD::operator <<(const SWKey *inkey) { + linkentry(*key, *inkey); + + return *this; +} + + +/****************************************************************************** + * RawFiles::deleteEntry - deletes this entry + * + * RET: *this + */ + +void RawLD::deleteEntry() { + settext(*key, ""); +} diff --git a/src/modules/lexdict/rawld/rawldidx.c b/src/modules/lexdict/rawld/rawldidx.c new file mode 100644 index 0000000..cc4709c --- /dev/null +++ b/src/modules/lexdict/rawld/rawldidx.c @@ -0,0 +1,96 @@ +/***************************************************************************** + * Bible dictionary index utility + */ + + +#ifndef __GNUC__ +#include +#else +#include +#endif + +#include +#include +#include +#include +#include +#include + + +char findbreak(int fd, long *offset, short *size) +{ + char buf[3]; + char rc = 1; + long offset2; + + memset(buf, 0, sizeof(buf)); + + while (read(fd, &buf[sizeof(buf)-1], 1) == 1) { + if ((buf[0] == 10) && (buf[1] == '$') && (buf[2] == '$')) { + while (read(fd, buf, 1) == 1) { + if (*buf == 10) { + if (read(fd, buf, 1) == 1) { + *offset = lseek(fd, 0, SEEK_CUR); + rc = 0; + if (size) { + if (!findbreak(fd, &offset2, 0)) + *size = offset2 - *offset - 13; + else *size = lseek(fd, 0, SEEK_END) - *offset; + lseek(fd, *offset, SEEK_SET); + } + break; + } + } + } + break; + } + memmove(buf, &buf[1], sizeof(buf)-1); + } + return rc; +} + + +void main(int argc, char **argv) +{ + int fd, ifd; + long offset; + short size; + char *buf; + + if (argc < 2) { + fprintf(stderr, "usage: %s \n", argv[0]); + exit(1); + } + + buf = (char *) calloc(strlen(argv[1]) + 5, 1); + +#ifndef O_BINARY // O_BINARY is for Borland to be happy. If we're in GNU, just define it to a NULL mask +#define O_BINARY 0 +#endif + sprintf(buf, "%s.dat", argv[1]); + fd = open(buf, O_RDONLY|O_BINARY); + + sprintf(buf, "%s.idx", argv[1]); + ifd = open(buf, O_CREAT|O_WRONLY|O_BINARY); + + offset = 0; /* write offset for intro */ + write(ifd, &offset, 4); + findbreak(fd, &offset, 0); + lseek(fd, 0L, SEEK_SET); + size = offset - 12; + write(ifd, &size, 2); + + buf[3] = 0; /* delimit string for read below */ + + while(!findbreak(fd, &offset, &size)) { + write(ifd, &offset, 4); + write(ifd, &size, 2); + read(fd, buf, 3); + printf("Found: %s...(%ld:%d)\n", buf, offset, size); + } + + free(buf); + + close(ifd); + close(fd); +} diff --git a/src/modules/lexdict/rawld/strongsidx.c b/src/modules/lexdict/rawld/strongsidx.c new file mode 100644 index 0000000..61bcda3 --- /dev/null +++ b/src/modules/lexdict/rawld/strongsidx.c @@ -0,0 +1,90 @@ +/***************************************************************************** + * Bible dictionary index utility + */ + + +#ifndef __GNUC__ +#include +#else +#include +#endif + +#include +#include +#include +#include +#include +#include + + +char findbreak(int fd, long *offset, short *size) +{ + char buf[3]; + char rc = 1; + long offset2; + + memset(buf, 0, sizeof(buf)); + + while (read(fd, &buf[sizeof(buf)-1], 1) == 1) { + if ((buf[0] == 10) && (isdigit(buf[1])) && (isdigit(buf[2]))) { + if (read(fd, buf, 1) == 1) { + *offset = lseek(fd, 0, SEEK_CUR) - 3; + rc = 0; + if (size) { + if (!findbreak(fd, &offset2, 0)) + *size = offset2 - *offset; + else *size = lseek(fd, 0, SEEK_END) - *offset; + lseek(fd, *offset, SEEK_SET); + } + break; + } + break; + } + memmove(buf, &buf[1], sizeof(buf)-1); + } + return rc; +} + + +void main(int argc, char **argv) +{ + int fd, ifd; + long offset; + short size; + char *buf; + char entbuf[6]; + + if (argc < 2) { + fprintf(stderr, "usage: %s \n", argv[0]); + exit(1); + } + + buf = (char *) calloc(strlen(argv[1]) + 5, 1); + + sprintf(buf, "%s.dat", argv[1]); + fd = open(buf, O_RDONLY); + + sprintf(buf, "%s.idx", argv[1]); + ifd = open(buf, O_CREAT|O_WRONLY); + + offset = 0; /* write offset for intro */ + write(ifd, &offset, 4); + findbreak(fd, &offset, 0); + lseek(fd, 0L, SEEK_SET); + size = offset - 12; + write(ifd, &size, 2); + + entbuf[5] = 0; /* delimit string for read below */ + + while(!findbreak(fd, &offset, &size)) { + write(ifd, &offset, 4); + write(ifd, &size, 2); + read(fd, entbuf, 5); + printf("Found: %s...(%ld:%d)\n", entbuf, offset, size); + } + + free(buf); + + close(ifd); + close(fd); +} diff --git a/src/modules/lexdict/rawld4/Makefile b/src/modules/lexdict/rawld4/Makefile new file mode 100644 index 0000000..35d6648 --- /dev/null +++ b/src/modules/lexdict/rawld4/Makefile @@ -0,0 +1,5 @@ + +root := ../../../.. + +all: + make -C ${root} diff --git a/src/modules/lexdict/rawld4/Makefile.am b/src/modules/lexdict/rawld4/Makefile.am new file mode 100644 index 0000000..697e2e5 --- /dev/null +++ b/src/modules/lexdict/rawld4/Makefile.am @@ -0,0 +1,4 @@ +rawld4dir = $(top_srcdir)/src/modules/lexdict/rawld4 + +libsword_la_SOURCES += $(rawld4dir)/rawld4.cpp + diff --git a/src/modules/lexdict/rawld4/rawld4.cpp b/src/modules/lexdict/rawld4/rawld4.cpp new file mode 100644 index 0000000..61d2786 --- /dev/null +++ b/src/modules/lexdict/rawld4/rawld4.cpp @@ -0,0 +1,204 @@ +/****************************************************************************** + * rawld.cpp - code for class 'RawLD'- a module that reads raw lexicon and + * dictionary files: *.dat *.idx + */ + + +#include +#include +#include + +#ifndef __GNUC__ +#include +#else +#include +#endif + +#include +#include +#include +#include + + + /****************************************************************************** + * RawLD Constructor - Initializes data for instance of RawLD + * + * ENT: ipath - path and filename of files (no extension) + * iname - Internal name for module + * idesc - Name to display to user for module + * idisp - Display object to use for displaying + */ + +RawLD4::RawLD4(const char *ipath, const char *iname, const char *idesc, SWDisplay *idisp, SWTextEncoding enc, SWTextDirection dir, SWTextMarkup mark, const char* ilang) : RawStr4(ipath), SWLD(iname, idesc, idisp, enc, dir, mark, ilang) +{ +} + + +/****************************************************************************** + * RawLD Destructor - Cleans up instance of RawLD + */ + +RawLD4::~RawLD4() +{ +} + + +/****************************************************************************** + * RawLD4::strongsPad - Pads a key if it is 100% digits to 5 places + * + * ENT: buf - buffer to check and pad + */ + +void RawLD4::strongsPad(char *buf) +{ + const char *check; + long size = 0; + int len = strlen(buf); + if ((len < 5) && (len > 0)) { + for (check = buf; *check; check++) { + if (!isdigit(*check)) + break; + else size++; + } + + if ((size == len) && size) + sprintf(buf, "%.5d", atoi(buf)); + } +} + + +/****************************************************************************** + * RawLD4::getEntry - Looks up entry from data file. 'Snaps' to closest + * entry and sets 'entrybuf'. + * + * ENT: away - number of entries offset from key (default = 0) + * + * RET: error status + */ + +char RawLD4::getEntry(long away) +{ + long start = 0; + unsigned long size = 0; + char *idxbuf = 0; + char retval = 0; + + char *buf = new char [ strlen(*key) + 6 ]; + strcpy(buf, *key); + + strongsPad(buf); + + *entrybuf = 0; + if (!(retval = findoffset(buf, &start, &size, away))) { + entrySize = size; // support getEntrySize call + if (entrybuf) + delete [] entrybuf; + entrybuf = new char [ ++size * FILTERPAD ]; + idxbuf = new char [ size * FILTERPAD ]; + + gettext(start, size, idxbuf, entrybuf); + if (!key->Persist()) // If we have our own key + *key = idxbuf; // reset it to entry index buffer + + stdstr(&entkeytxt, idxbuf); // set entry key text that module 'snapped' to. + delete [] idxbuf; + } + else { + entrybuf = new char [ 5 ]; + *entrybuf = 0; + } + + delete [] buf; + return retval; +} + + +/****************************************************************************** + * RawLD4::operator char * - Returns the correct entry when char * cast + * is requested + * + * RET: string buffer with entry + */ + +char *RawLD4::getRawEntry() { + if (!getEntry() && !isUnicode()) { + preptext(entrybuf); + } + + return entrybuf; +} + + +/****************************************************************************** + * RawLD4::operator += - Increments module key a number of entries + * + * ENT: increment - Number of entries to jump forward + * + * RET: *this + */ + +SWModule &RawLD4::operator +=(int increment) +{ + char tmperror; + + if (key->Traversable()) { + *key += increment; + error = key->Error(); + increment = 0; + } + + tmperror = (getEntry(increment)) ? KEYERR_OUTOFBOUNDS : 0; + error = (error)?error:tmperror; + *key = entkeytxt; + return *this; +} + + +/****************************************************************************** + * RawLD4::operator =(SW_POSITION) - Positions this key if applicable + */ + +SWModule &RawLD4::operator =(SW_POSITION p) +{ + if (!key->Traversable()) { + switch (p) { + case POS_TOP: + *key = ""; + break; + case POS_BOTTOM: + *key = "zzzzzzzzz"; + break; + } + } + else *key = p; + return *this; +} + + +SWModule &RawLD4::setentry(const char *inbuf, long len) { + settext(*key, inbuf, len); + + return *this; +} + +SWModule &RawLD4::operator <<(const char *inbuf) { + return setentry(inbuf, 0); +} + + +SWModule &RawLD4::operator <<(const SWKey *inkey) { + linkentry(*key, *inkey); + + return *this; +} + + +/****************************************************************************** + * RawFiles::deleteEntry - deletes this entry + * + * RET: *this + */ + +void RawLD4::deleteEntry() { + settext(*key, ""); +} diff --git a/src/modules/lexdict/swld.cpp b/src/modules/lexdict/swld.cpp new file mode 100644 index 0000000..97adc36 --- /dev/null +++ b/src/modules/lexdict/swld.cpp @@ -0,0 +1,55 @@ +/****************************************************************************** + * swld.cpp - code for base class 'SWLD'. SWLD is the basis for all + * types of Lexicon and Dictionary modules (hence the 'LD'). + */ + +#include + + +/****************************************************************************** + * SWLD Constructor - Initializes data for instance of SWLD + * + * ENT: imodname - Internal name for module + * imoddesc - Name to display to user for module + * idisp - Display object to use for displaying + */ + +SWLD::SWLD(const char *imodname, const char *imoddesc, SWDisplay *idisp, SWTextEncoding enc, SWTextDirection dir, SWTextMarkup mark, const char* ilang) : SWModule(imodname, imoddesc, idisp, "Lexicons / Dictionaries", enc, dir, mark, ilang) +{ + delete key; + key = CreateKey(); + entkeytxt = new char [1]; + *entkeytxt = 0; +} + + +/****************************************************************************** + * SWLD Destructor - Cleans up instance of SWLD + */ + +SWLD::~SWLD() +{ + if (entkeytxt) + delete [] entkeytxt; +} + + +/****************************************************************************** + * SWLD::KeyText - Sets/gets module KeyText, getting from saved text if key is + * persistent + * + * ENT: ikeytext - value which to set keytext + * [0] - only get + * + * RET: pointer to keytext + */ + +const char *SWLD::KeyText(const char *ikeytext) +{ + if (key->Persist() && !ikeytext) { + getRawEntry(); // force module key to snap to entry + return entkeytxt; + } + else return SWModule::KeyText(ikeytext); +} + diff --git a/src/modules/lexdict/zld/Makefile b/src/modules/lexdict/zld/Makefile new file mode 100644 index 0000000..35d6648 --- /dev/null +++ b/src/modules/lexdict/zld/Makefile @@ -0,0 +1,5 @@ + +root := ../../../.. + +all: + make -C ${root} diff --git a/src/modules/lexdict/zld/Makefile.am b/src/modules/lexdict/zld/Makefile.am new file mode 100644 index 0000000..81e4d7c --- /dev/null +++ b/src/modules/lexdict/zld/Makefile.am @@ -0,0 +1,4 @@ +zlddir = $(top_srcdir)/src/modules/lexdict/zld + +libsword_la_SOURCES += $(zlddir)/zld.cpp + diff --git a/src/modules/lexdict/zld/zld.cpp b/src/modules/lexdict/zld/zld.cpp new file mode 100644 index 0000000..371b8a2 --- /dev/null +++ b/src/modules/lexdict/zld/zld.cpp @@ -0,0 +1,205 @@ +/****************************************************************************** + * rawld.cpp - code for class 'RawLD'- a module that reads raw lexicon and + * dictionary files: *.dat *.idx + */ + + +#include +#include +#include + +#ifndef __GNUC__ +#include +#else +#include +#endif + +#include +#include +#include +#include + + + /****************************************************************************** + * RawLD Constructor - Initializes data for instance of RawLD + * + * ENT: ipath - path and filename of files (no extension) + * iname - Internal name for module + * idesc - Name to display to user for module + * idisp - Display object to use for displaying + */ + +zLD::zLD(const char *ipath, const char *iname, const char *idesc, long blockCount, SWCompress *icomp, SWDisplay *idisp, SWTextEncoding enc, SWTextDirection dir, SWTextMarkup mark, const char* ilang) : zStr(ipath, -1, blockCount, icomp), SWLD(iname, idesc, idisp, enc, dir, mark, ilang) +{ +} + + +/****************************************************************************** + * RawLD Destructor - Cleans up instance of RawLD + */ + +zLD::~zLD() +{ +} + + +/****************************************************************************** + * zLD::strongsPad - Pads a key if it is 100% digits to 5 places + * + * ENT: buf - buffer to check and pad + */ + +void zLD::strongsPad(char *buf) +{ + const char *check; + long size = 0; + int len = strlen(buf); + if ((len < 5) && (len > 0)) { + for (check = buf; *check; check++) { + if (!isdigit(*check)) + break; + else size++; + } + + if ((size == len) && size) + sprintf(buf, "%.5d", atoi(buf)); + } +} + + +/****************************************************************************** + * zLD::getEntry - Looks up entry from data file. 'Snaps' to closest + * entry and sets 'entrybuf'. + * + * ENT: away - number of entries offset from key (default = 0) + * + * RET: error status + */ + +char zLD::getEntry(long away) +{ + char *idxbuf = 0; + char *ebuf = 0; + char retval = 0; + long index; + unsigned long size; + char *buf = new char [ strlen(*key) + 6 ]; + strcpy(buf, *key); + + strongsPad(buf); + + *entrybuf = 0; + if (!(retval = findKeyIndex(buf, &index, away))) { + getText(index, &idxbuf, &ebuf); + size = strlen(ebuf) + 1; + entrybuf = new char [ size * FILTERPAD ]; + strcpy(entrybuf, ebuf); + + entrySize = size; // support getEntrySize call + if (!key->Persist()) // If we have our own key + *key = idxbuf; // reset it to entry index buffer + + stdstr(&entkeytxt, idxbuf); // set entry key text that module 'snapped' to. + free(idxbuf); + free(ebuf); + } + else { + entrybuf = new char [ 5 ]; + entrybuf[0] = 0; + entrybuf[1] = 0; + } + + delete [] buf; + return retval; +} + + +/****************************************************************************** + * zLD::operator char * - Returns the correct entry when char * cast + * is requested + * + * RET: string buffer with entry + */ + +char *zLD::getRawEntry() { + if (!getEntry() && !isUnicode()) { + prepText(entrybuf); + } + + return entrybuf; +} + + +/****************************************************************************** + * zLD::operator += - Increments module key a number of entries + * + * ENT: increment - Number of entries to jump forward + * + * RET: *this + */ + +SWModule &zLD::operator +=(int increment) +{ + char tmperror; + + if (key->Traversable()) { + *key += increment; + error = key->Error(); + increment = 0; + } + + tmperror = (getEntry(increment)) ? KEYERR_OUTOFBOUNDS : 0; + error = (error)?error:tmperror; + *key = entkeytxt; + return *this; +} + + +/****************************************************************************** + * zLD::operator =(SW_POSITION) - Positions this key if applicable + */ + +SWModule &zLD::operator =(SW_POSITION p) +{ + if (!key->Traversable()) { + switch (p) { + case POS_TOP: + *key = ""; + break; + case POS_BOTTOM: + *key = "zzzzzzzzz"; + break; + } + } + else *key = p; + return *this; +} + + +SWModule &zLD::setentry(const char *inbuf, long len) { + setText(*key, inbuf, len); + + return *this; +} + +SWModule &zLD::operator <<(const char *inbuf) { + return setentry(inbuf, 0); +} + + +SWModule &zLD::operator <<(const SWKey *inkey) { + linkEntry(*key, *inkey); + + return *this; +} + + +/****************************************************************************** + * RawFiles::deleteEntry - deletes this entry + * + * RET: *this + */ + +void zLD::deleteEntry() { + setText(*key, ""); +} diff --git a/src/modules/readme b/src/modules/readme new file mode 100644 index 0000000..92cc99e --- /dev/null +++ b/src/modules/readme @@ -0,0 +1,9 @@ +This directory contains all different module types that are usable by the SWORD +API. + + comments Commentaries + common common utility objects + lexdict Lexicons/Dictionaries + maps Maps + parsers Language Parsers + texts Scripture Texts diff --git a/src/modules/swmodule.cpp b/src/modules/swmodule.cpp new file mode 100644 index 0000000..c407894 --- /dev/null +++ b/src/modules/swmodule.cpp @@ -0,0 +1,677 @@ +/****************************************************************************** + * swmodule.cpp -code for base class 'module'. Module is the basis for all + * types of modules (e.g. texts, commentaries, maps, lexicons, + * etc.) + */ + +#include +#include +#include +#include // GNU +#include +#include // KLUDGE for Search +#ifndef _MSC_VER +#include +#endif + +SWDisplay SWModule::rawdisp; +void SWModule::nullPercent(char percent, void *percentUserData) {} + +/****************************************************************************** + * SWModule Constructor - Initializes data for instance of SWModule + * + * ENT: imodname - Internal name for module + * imoddesc - Name to display to user for module + * idisp - Display object to use for displaying + * imodtype - Type of Module (All modules will be displayed with + * others of same type under their modtype heading + * unicode - if this module is unicode + */ + +SWModule::SWModule(const char *imodname, const char *imoddesc, SWDisplay *idisp, char *imodtype, SWTextEncoding encoding, SWTextDirection direction, SWTextMarkup markup, const char* imodlang) +{ + key = CreateKey(); + entrybuf = new char [1]; + *entrybuf = 0; + config = &ownConfig; + entrybufallocsize = 0; + modname = 0; + error = 0; + moddesc = 0; + modtype = 0; + modlang = 0; + this->encoding = encoding; + this->direction = direction; + this->markup = markup; + entrySize= -1; + disp = (idisp) ? idisp : &rawdisp; + stdstr(&modname, imodname); + stdstr(&moddesc, imoddesc); + stdstr(&modtype, imodtype); + stdstr(&modlang, imodlang); + stripFilters = new FilterList(); + rawFilters = new FilterList(); + renderFilters = new FilterList(); + optionFilters = new FilterList(); + encodingFilters = new FilterList(); + skipConsecutiveLinks = true; + procEntAttr = true; +} + + +/****************************************************************************** + * SWModule Destructor - Cleans up instance of SWModule + */ + +SWModule::~SWModule() +{ + if (entrybuf) + delete [] entrybuf; + if (modname) + delete [] modname; + if (moddesc) + delete [] moddesc; + if (modtype) + delete [] modtype; + if (modlang) + delete [] modlang; + + if (key) { + if (!key->Persist()) + delete key; + } + + stripFilters->clear(); + rawFilters->clear(); + renderFilters->clear(); + optionFilters->clear(); + encodingFilters->clear(); + entryAttributes.clear(); + + delete stripFilters; + delete rawFilters; + delete renderFilters; + delete optionFilters; + delete encodingFilters; +} + + +/****************************************************************************** + * SWModule::CreateKey - Allocates a key of specific type for module + * + * RET: pointer to allocated key + */ + +SWKey *SWModule::CreateKey() +{ + return new SWKey(); +} + + +/****************************************************************************** + * SWModule::Error - Gets and clears error status + * + * RET: error status + */ + +char SWModule::Error() +{ + char retval = error; + + error = 0; + return retval; +} + + +/****************************************************************************** + * SWModule::Name - Sets/gets module name + * + * ENT: imodname - value which to set modname + * [0] - only get + * + * RET: pointer to modname + */ + +char *SWModule::Name(const char *imodname) +{ + return stdstr(&modname, imodname); +} + + +/****************************************************************************** + * SWModule::Description - Sets/gets module description + * + * ENT: imoddesc - value which to set moddesc + * [0] - only get + * + * RET: pointer to moddesc + */ + +char *SWModule::Description(const char *imoddesc) +{ + return stdstr(&moddesc, imoddesc); +} + + +/****************************************************************************** + * SWModule::Type - Sets/gets module type + * + * ENT: imodtype - value which to set modtype + * [0] - only get + * + * RET: pointer to modtype + */ + +char *SWModule::Type(const char *imodtype) +{ + return stdstr(&modtype, imodtype); +} + +/****************************************************************************** + * SWModule::Direction - Sets/gets module direction + * + * ENT: newdir - value which to set direction + * [-1] - only get + * + * RET: char direction + */ +char SWModule::Direction(signed char newdir) { + if (newdir != -1) + direction = newdir; + return direction; +} + +/****************************************************************************** + * SWModule::Encoding - Sets/gets module encoding + * + * ENT: newdir - value which to set direction + * [-1] - only get + * + * RET: char encoding + */ +char SWModule::Encoding(signed char newenc) { + if (newenc != -1) + encoding = newenc; + return encoding; +} + +/****************************************************************************** + * SWModule::Markup - Sets/gets module markup + * + * ENT: newdir - value which to set direction + * [-1] - only get + * + * RET: char markup + */ +char SWModule::Markup(signed char newmark) { + if (newmark != -1) + markup = newmark; + return markup; +} + + +/****************************************************************************** + * SWModule::Lang - Sets/gets module language + * + * ENT: imodlang - value which to set modlang + * [0] - only get + * + * RET: pointer to modname + */ + +char *SWModule::Lang(const char *imodlang) +{ + return stdstr(&modlang, imodlang); +} + + +/****************************************************************************** + * SWModule::Disp - Sets/gets display driver + * + * ENT: idisp - value which to set disp + * [0] - only get + * + * RET: pointer to disp + */ + +SWDisplay *SWModule::Disp(SWDisplay *idisp) +{ + if (idisp) + disp = idisp; + + return disp; +} + + +/****************************************************************************** + * SWModule::Display - Calls this modules display object and passes itself + * + * RET: error status + */ + +char SWModule::Display() +{ + disp->Display(*this); + return 0; +} + + +/****************************************************************************** + * SWModule::SetKey - Sets a key to this module for position to a particular + * record or set of records + * + * ENT: ikey - key with which to set this module + * + * RET: error status + */ + +char SWModule::SetKey(const SWKey &ikey) { + return SetKey(&ikey); +} + +char SWModule::SetKey(const SWKey *ikey) +{ + SWKey *oldKey = 0; + + if (key) { + if (!key->Persist()) // if we have our own copy + oldKey = key; + } + + if (!ikey->Persist()) { // if we are to keep our own copy + key = CreateKey(); + *key = *ikey; + } + else key = (SWKey *)ikey; // if we are to just point to an external key + + if (oldKey) + delete oldKey; + + return 0; +} + + +/****************************************************************************** + * SWModule::KeyText - Sets/gets module KeyText + * + * ENT: ikeytext - value which to set keytext + * [0] - only get + * + * RET: pointer to keytext + */ + +const char *SWModule::KeyText(const char *ikeytext) +{ + if (ikeytext) + SetKey(ikeytext); + + return *key; +} + + +/****************************************************************************** + * SWModule::operator =(SW_POSITION) - Positions this modules to an entry + * + * ENT: p - position (e.g. TOP, BOTTOM) + * + * RET: *this + */ + +SWModule &SWModule::operator =(SW_POSITION p) +{ + *key = p; + char saveError = key->Error(); + + switch (p) { + case POS_TOP: + (*this)++; + (*this)--; + break; + + case POS_BOTTOM: + (*this)--; + (*this)++; + break; + } + + error = saveError; + return *this; +} + + +/****************************************************************************** + * SWModule::operator += - Increments module key a number of entries + * + * ENT: increment - Number of entries to jump forward + * + * RET: *this + */ + +SWModule &SWModule::operator +=(int increment) +{ + (*key) += increment; + error = key->Error(); + + return *this; +} + + +/****************************************************************************** + * SWModule::operator -= - Decrements module key a number of entries + * + * ENT: decrement - Number of entries to jump backward + * + * RET: *this + */ + +SWModule &SWModule::operator -=(int increment) +{ + (*key) -= increment; + error = key->Error(); + + return *this; +} + + +/****************************************************************************** + * SWModule::Search - Searches a module for a string + * + * ENT: istr - string for which to search + * searchType - type of search to perform + * >=0 - regex + * -1 - phrase + * -2 - multiword + * flags - options flags for search + * justCheckIfSupported - if set, don't search, only tell if this + * function supports requested search. + * + * RET: listkey set to verses that contain istr + */ + +ListKey &SWModule::Search(const char *istr, int searchType, int flags, SWKey *scope, bool *justCheckIfSupported, void (*percent)(char, void *), void *percentUserData) +{ + SWKey *savekey = 0; + SWKey *searchkey = 0; + regex_t preg; + SWKey textkey; + char **words = 0; + char *wordBuf = 0; + int wordCount = 0; + const char *sres; + terminateSearch = false; + char perc = 1; + bool savePEA = isProcessEntryAttributes(); + + processEntryAttributes(false); + listkey.ClearList(); + + if (!key->Persist()) { + savekey = CreateKey(); + *savekey = *key; + } + else savekey = key; + + searchkey = (scope)?scope->clone():(key->Persist())?key->clone():0; + if (searchkey) { + searchkey->Persist(1); + SetKey(*searchkey); + } + + (*percent)(perc, percentUserData); + // MAJOR KLUDGE: VerseKey::Index still return index within testament. + // VerseKey::NewIndex should be moved to Index and Index should be some + // VerseKey specific name + VerseKey *vkcheck = 0; +#ifndef _WIN32_WCE + try { +#endif + vkcheck = SWDYNAMIC_CAST(VerseKey, key); +#ifndef _WIN32_WCE + } + catch (...) {} +#endif + // end MAJOR KLUDGE + + *this = BOTTOM; + // fix below when we find out the bug + long highIndex = (vkcheck)?32300/*vkcheck->NewIndex()*/:key->Index(); + if (!highIndex) + highIndex = 1; // avoid division by zero errors. + *this = TOP; + if (searchType >= 0) { + flags |=searchType|REG_NOSUB|REG_EXTENDED; + regcomp(&preg, istr, flags); + } + + (*percent)(++perc, percentUserData); + if (searchType == -2) { + wordBuf = (char *)calloc(sizeof(char), strlen(istr) + 1); + strcpy(wordBuf, istr); + words = (char **)calloc(sizeof(char *), 10); + int allocWords = 10; + words[wordCount] = strtok(wordBuf, " "); + while (words[wordCount]) { + wordCount++; + if (wordCount == allocWords) { + allocWords+=10; + words = (char **)realloc(words, sizeof(char *)*allocWords); + } + words[wordCount] = strtok(NULL, " "); + } + } + + perc = 5; + (*percent)(perc, percentUserData); + + while (!Error() && !terminateSearch) { + + + long mindex = 0; + if (vkcheck) + mindex = vkcheck->NewIndex(); + else mindex = key->Index(); + float per = (float)mindex / highIndex; + per *= 93; + per += 5; + char newperc = (char)per; +// char newperc = (char)(5+(93*(((float)((vkcheck)?vkcheck->NewIndex():key->Index()))/highIndex))); + if (newperc > perc) { + perc = newperc; + (*percent)(perc, percentUserData); + } + else if (newperc < perc) { +#ifndef _MSC_VER + cerr << "Serious error: new percentage complete is less than previous value\n"; + cerr << "using vk? " << ((vkcheck)?"yes":"no") << "\n"; + cerr << "index: " << ((vkcheck)?vkcheck->NewIndex():key->Index()) << "\n"; + cerr << "highIndex: " << highIndex << "\n"; + cerr << "newperc ==" << (int)newperc << "%" << "is smaller than\n"; + cerr << "perc == " << (int )perc << "% \n"; +#endif + } + if (searchType >= 0) { + if (!regexec(&preg, StripText(), 0, 0, 0)) { + textkey = KeyText(); + listkey << textkey; + } + } + else { + if (searchType == -1) { + sres = ((flags & REG_ICASE) == REG_ICASE) ? stristr(StripText(), istr) : strstr(StripText(), istr); + if (sres) { + textkey = KeyText(); + listkey << textkey; + } + } + if (searchType == -2) { + int i; + const char *stripBuf = StripText(); + for (i = 0; i < wordCount; i++) { + sres = ((flags & REG_ICASE) == REG_ICASE) ? stristr(stripBuf, words[i]) : strstr(stripBuf, words[i]); + if (!sres) + break; + } + if (i == wordCount) { + textkey = KeyText(); + listkey << textkey; + } + + } + } + (*this)++; + } + if (searchType >= 0) + regfree(&preg); + + if (searchType == -2) { + free(words); + free(wordBuf); + } + + SetKey(*savekey); + + if (!savekey->Persist()) + delete savekey; + + if (searchkey) + delete searchkey; + + listkey = TOP; + processEntryAttributes(savePEA); + (*percent)(100, percentUserData); + + return listkey; +} + + +/****************************************************************************** + * SWModule::StripText() - calls all stripfilters on current text + * + * ENT: buf - buf to massage instead of this modules current text + * len - max len of buf + * + * RET: this module's text at specified key location massaged by Strip filters + */ + +const char *SWModule::StripText(char *buf, int len) +{ + return RenderText(buf, len, false); +} + + +/****************************************************************************** + * SWModule::RenderText - calls all renderfilters on current text + * + * ENT: buf - buffer to Render instead of current module position + * + * RET: listkey set to verses that contain istr + */ + + const char *SWModule::RenderText(char *buf, int len, bool render) { + entryAttributes.clear(); + char *tmpbuf = (buf) ? buf : getRawEntry(); + SWKey *key = 0; + static char *null = ""; + + if (tmpbuf) { + unsigned long size = (len < 0) ? getEntrySize() * FILTERPAD : len; + if (size < 0) + size = strlen(tmpbuf); + if (size > 0) { + key = (SWKey *)*this; + + optionFilter(tmpbuf, size, key); + + if (render) { + renderFilter(tmpbuf, size, key); + encodingFilter(tmpbuf, size, key); + } + else stripFilter(tmpbuf, size, key); + } + } + else { + tmpbuf = null; + } + + return tmpbuf; +} + + +/****************************************************************************** + * SWModule::RenderText - calls all renderfilters on current text + * + * ENT: tmpKey - key to use to grab text + * + * RET: this module's text at specified key location massaged by RenderFilers + */ + + const char *SWModule::RenderText(SWKey *tmpKey) +{ + SWKey *savekey; + const char *retVal; + + if (!key->Persist()) { + savekey = CreateKey(); + *savekey = *key; + } + else savekey = key; + + SetKey(*tmpKey); + + retVal = RenderText(); + + SetKey(*savekey); + + if (!savekey->Persist()) + delete savekey; + + return retVal; +} + + +/****************************************************************************** + * SWModule::StripText - calls all StripTextFilters on current text + * + * ENT: tmpKey - key to use to grab text + * + * RET: this module's text at specified key location massaged by Strip filters + */ + +const char *SWModule::StripText(SWKey *tmpKey) +{ + SWKey *savekey; + const char *retVal; + + if (!key->Persist()) { + savekey = CreateKey(); + *savekey = *key; + } + else savekey = key; + + SetKey(*tmpKey); + + retVal = StripText(); + + SetKey(*savekey); + + if (!savekey->Persist()) + delete savekey; + + return retVal; +} + + +SWModule::operator const char*() { + return RenderText(); +} + + +const char *SWModule::getConfigEntry(const char *key) const { + ConfigEntMap::iterator it = config->find(key); + return (it != config->end()) ? it->second.c_str() : 0; +} + + +void SWModule::setConfig(ConfigEntMap *config) { + this->config = config; +} diff --git a/src/modules/tests/Makefile b/src/modules/tests/Makefile new file mode 100644 index 0000000..81f7721 --- /dev/null +++ b/src/modules/tests/Makefile @@ -0,0 +1,4 @@ +root := ../../.. + +all: + make -C ${root} diff --git a/src/modules/tests/echomod.cpp b/src/modules/tests/echomod.cpp new file mode 100644 index 0000000..65e689b --- /dev/null +++ b/src/modules/tests/echomod.cpp @@ -0,0 +1,21 @@ +/****************************************************************************** + * echomod.cpp - code for class 'echomod'- a dummy test text module that just + * echos back the key + */ + +#include + + +EchoMod::EchoMod() : SWText("echomod", "Echos back key") +{ +} + + +EchoMod::~EchoMod() { +} + + +EchoMod::operator const char*() +{ + return *key; +} diff --git a/src/modules/texts/Makefile b/src/modules/texts/Makefile new file mode 100644 index 0000000..1a2d00d --- /dev/null +++ b/src/modules/texts/Makefile @@ -0,0 +1,5 @@ + +root := ../../.. + +all: + make -C ${root} diff --git a/src/modules/texts/Makefile.am b/src/modules/texts/Makefile.am new file mode 100644 index 0000000..b48d93e --- /dev/null +++ b/src/modules/texts/Makefile.am @@ -0,0 +1,7 @@ +textsdir = $(top_srcdir)/src/modules/texts + +libsword_la_SOURCES += $(textsdir)/swtext.cpp + +include ../src/modules/texts/rawtext/Makefile.am +include ../src/modules/texts/ztext/Makefile.am +include ../src/modules/texts/rawgbf/Makefile.am diff --git a/src/modules/texts/rawgbf/Gbf.c b/src/modules/texts/rawgbf/Gbf.c new file mode 100644 index 0000000..2b7f786 --- /dev/null +++ b/src/modules/texts/rawgbf/Gbf.c @@ -0,0 +1,485 @@ +/* Output from p2c, the Pascal-to-C translator */ +/* From input file "Gbf.pas" */ + + +#include + + +typedef enum { + tokNull, tokEOF, tokHeader, tokContent, tokTail, tokStyle, tokWord, + tokSpace, tokSync, tokControl, tokChar, tokFont +} TToken; +typedef enum { + caBold, caSmallCaps, caItalic, caOTQuote, caRed, caSuperscript, caUnderline, + caSubscript +} TCharacterAttribute; +typedef long TCharAttribs; + + + +typedef struct TBookNameRec { + Char Name[256], Abbr[256]; + uchar Num; +} TBookNameRec; + +typedef TBookNameRec TBookAbbr[116]; + + +typedef struct TReadGBF { + /* private*/ + FILE *F; + Char FName[256], TokenLine[256]; + long TokenPos; + boolean fFileIsOpen, fParagraphEnd, fInTitle, fInPsalmBookTitle, + fInHebrewTitle, fInSectionTitle; + + /* public*/ + Char sBook[256], sChapter[256], sVerse[256], sMode[256]; + Char sContext[256]; /*// Last text type (header, body, or tail)*/ + Char sTitle[256]; /*// Title of this book of the Bible*/ + Char sPsalmBookTitle[256]; /*// Title of this Psalm book*/ + Char sHebrewTitle[256]; /*// Psalm Hebrew title*/ + Char sSectionTitle[256]; /*// Section headings*/ + Char sDate[256], sFontName[256]; + long iTotalWords; + Char chJustification, chDirection; + boolean fIndent, fPoetry; + TCharAttribs CharAttribs; + uchar bBk, bChap, bVs, bWd; + /* + function Init({const*/ + /*sFileName: string): boolean; + procedure Done; + function GetToken(var TokenKind: TToken): string; + */ +} TReadGBF; + +typedef struct TWriteGBF { + /* private*/ + FILE *F; + Char FName[256], LineOut[256]; + boolean fFileIsOpen; + uchar bBk, bChap, bVs, bWd; + + /* public*/ + /* + function Init({const*/ + /*sFileName: string): boolean; + function Done: boolean; + procedure Out({const*/ + /*s: string); +*/ +} TWriteGBF; + +/*implementation */ + + +/*//0*/ +/*//1*/ +/*//2*/ +/*//3*/ +/*//4*/ +/*//5*/ +/*//6*/ +/*//7*/ +/*//8*/ +/*//9*/ +/*//10*/ +/*//11*/ +/*//12*/ +/*//13*/ +/*//14*/ +/*//15*/ +/*//16*/ +/*//17*/ +/*//18*/ +/*//19*/ +/*//20*/ +/*//21*/ +/*//22*/ +/*//0*/ +/*//1*/ +/*//2*/ +/*//3*/ +/*//4*/ +/*//5*/ +/*//6*/ +/*//7*/ +/*//8*/ +/*//9*/ +/*//10*/ +/*//11*/ +/*//12*/ +/*//13*/ +/*//14*/ +/*//15*/ +/*//16*/ +/*//17*/ +/*//18*/ +/*//19*/ +/*//20*/ +/*//21*/ +/*//22*/ +/*//23*/ +/*//24*/ +/*//26*/ +/*//27*/ +/*//28*/ +/*//29*/ +/*//30*/ +/*//31*/ +/*//32*/ +/*//33*/ +/*//34*/ +/*//35*/ +/*//36*/ +/*//37*/ +/*//38*/ +/*//39*/ +/*//40*/ +/*//41*/ +/*//42*/ +/*//43*/ +/*//44*/ +/*//45*/ +/*//46*/ +/*//47*/ +/*//48*/ +/*//49*/ +/*//50*/ +/*//51*/ +/*//52*/ +/*//53*/ +/*//54*/ +/*//55*/ +/*//56*/ +/*//57*/ +/*//58*/ +/*//59*/ +/*//60*/ +/*//61*/ +/*//62*/ +/*//63*/ +/*//64*/ +/*//65*/ +/*//66*/ +/*//67*/ +/*//68*/ +/*//69*/ +/*//70*/ +/*//71*/ +/*//72*/ +/*//73*/ +/*//74*/ +/*//75*/ +/*//76*/ +/*//77*/ +/*//78*/ +/*//79*/ +/*//80*/ +/*//81*/ +/*//82*/ +/*//83*/ +/*//84*/ +/*//85*/ +/*//86*/ +/*//87*/ +/*//88*/ +/*//89*/ +/*//90*/ +/*//91*/ +/*//92*/ + +Static TBookAbbr BookAbbr = { + { "1 Chronicles", "1CH", 13 }, + { "1 Corinthians", "1CO", 70 }, + { "1 Esdras", "1E", 52 }, + { "1 John", "1J", 86 }, + { "1 Kings", "1K", 11 }, + { "1 Maccabees", "1M", 50 }, + { "1 Peter", "1P", 84 }, + { "1 Samuel", "1S", 9 }, + { "1 Thessalonians", "1TH", 76 }, + { "1 Timothy", "1TI", 78 }, + { "2 Chronicles", "2CH", 14 }, + { "2 Corinthians", "2CO", 71 }, + { "2 Esdras", "2E", 56 }, + { "2 John", "2J", 87 }, + { "2 Kings", "2K", 12 }, + { "2 Maccabees", "2M", 51 }, + { "2 Peter", "2P", 85 }, + { "2 Samuel", "2S", 10 }, + { "2 Thessalonians", "2TH", 77 }, + { "2 Timothy", "2TI", 79 }, + { "3 John", "3J", 88 }, + { "3 Maccabees", "3M", 55 }, + { "4 Maccabees", "4M", 57 }, + { "1 Chronicles", "1 CH", 13 }, + { "1 Corinthians", "1 CO", 70 }, + { "1 Esdras", "1 E", 52 }, + { "1 John", "1 J", 86 }, + { "1 Kings", "1 K", 11 }, + { "1 Maccabees", "1 M", 50 }, + { "1 Peter", "1 P", 84 }, + { "1 Samuel", "1 S", 9 }, + { "1 Thessalonians", "1 TH", 76 }, + { "1 Timothy", "1 TI", 78 }, + { "2 Chronicles", "2 CH", 14 }, + { "2 Corinthians", "2 CO", 71 }, + { "2 Esdras", "2 E", 56 }, + { "2 John", "2 J", 87 }, + { "2 Kings", "2 K", 12 }, + { "2 Maccabees", "2 M", 51 }, + { "2 Peter", "2 P", 85 }, + { "2 Samuel", "2 S", 10 }, + { "2 Thessalonians", "2 TH", 77 }, + { "2 Timothy", "2 TI", 79 }, + { "3 John", "3 J", 88 }, + { "3 Maccabees", "3 M", 55 }, + { "4 Maccabees", "4 M", 57 }, + { "Acts", "AC", 68 }, + { "Amos", "AM", 30 }, + { "Prayer of Asariah and the Song of the Three Jews", "AZ", 47 }, + { "Baruch", "BA", 45 }, + { "Bel and the Dragon", "BE", 49 }, + { "Colossians", "CO", 75 }, + { "Daniel", "DA", 27 }, + { "Deuteronomy", "DE", 5 }, + { "Deuteronomy", "DT", 5 }, + { "Ecclesiasties", "EC", 21 }, + { "Esther", "ES", 17 }, + { "Exodus", "EX", 2 }, + { "Ezekiel", "EZE", 26 }, + { "Ezra", "EZR", 15 }, + { "Galatians", "GA", 72 }, + { "Genesis", "GE", 1 }, + { "Genesis", "GN", 1 }, + { "Ephesians", "EP", 73 }, + { "Esther (Greek)", "GR", 42 }, + { "Habakkuk", "HAB", 35 }, + { "Haggai", "HAG", 37 }, + { "Hebrews", "HE", 82 }, + { "Hosea", "HO", 28 }, + { "Isaiah", "IS", 23 }, + { "James", "JA", 83 }, + { "Jeremiah", "JE", 24 }, + { "Job", "JOB", 18 }, + { "Joel", "JOE", 29 }, + { "John", "JOH", 67 }, + { "Jonah", "JON", 32 }, + { "Joshua", "JOS", 6 }, + { "Jude", "JUDE", 89 }, + { "Judges", "JUDG", 7 }, + { "Judith", "JUDI", 41 }, + { "Lamentations", "LA", 25 }, + { "Letter of Jeremiah", "LET", 46 }, + { "Leviticus", "LEV", 3 }, + { "Luke", "LK", 66 }, + { "Leviticus", "LV", 3 }, + { "Luke", "LU", 66 }, + { "Malachi", "MAL", 39 }, + { "Prayer of Manasseh", "MAN", 53 }, + { "Mark", "MAR", 65 }, + { "Matthew", "MAT", 64 }, + { "Micah", "MI", 33 }, + { "Nahum", "NA", 34 }, + { "Nehemiah", "NE", 16 }, + { "Numbers", "NU", 4 }, + { "Obadiah", "OB", 31 }, + { "Psalm 151", "P1", 54 }, + { "Philemon", "PHILE", 81 }, + { "Philippians", "PHILI", 74 }, + { "Philemon", "PHM", 81 }, + { "Philippians", "PHP", 74 }, + { "Proverbs", "PR", 20 }, + { "Psalms", "PS", 19 }, + { "Revelation", "RE", 90 }, + { "Romans", "RM", 69 }, + { "Romans", "RO", 69 }, + { "Ruth", "RU", 8 }, + { "Sirach", "SI", 44 }, + { "Song of Solomon", "SOL", 22 }, + { "Song of Solomon", "SON", 22 }, + { "Song of Solomon", "SS", 22 }, + { "Susanna", "SU", 48 }, + { "Titus", "TI", 80 }, + { "Tobit", "TO", 40 }, + { "Wisdom", "WI", 43 }, + { "Zechariah", "ZEC", 38 }, + { "Zephaniah", "ZEP", 36 } +}; + +/*// 0 - 7*/ +/*// 8 - 14*/ +/*// 15-20*/ +/*// 21-26*/ +/*// 27-33*/ +/*// 34-39*/ +/*// 40-45*/ +/*// 46-52*/ +/*// 53-63*/ +/*// 64-70*/ +/*// 71-78*/ +/*// 79-84*/ + +Static Char BookFileName[91][256] = { + "", "Genesis", "Exodus", "Lev", "Num", "Deut", "Joshua", "Judges", "Ruth", + "1Sam", "2Sam", "1Kings", "2Kings", "1Chron", "2Chron", "Ezra", "Nehemiah", + "Esther", "Job", "Psalms", "Proverbs", "Eccl", "Song", "Isaiah", "Jeremiah", + "Lament", "Ezekiel", "Daniel", "Hosea", "Joel", "Amos", "Obadiah", "Jonah", + "Micah", "Nahum", "Habakkuk", "Zeph", "Haggai", "Zech", "Malachi", "Tobit", + "Judith", "Esther", "Wisdom", "Sirach", "Baruch", "Let", "Azar", "Susanna", + "Bel", "1Mac", "2Mac", "1Esdras", "Man", "P1", "3Mac", "2Esdras", "4Mac", + "", "", "", "", "", "", "Matthew", "Mark", "Luke", "John", "Acts", "Romans", + "1Cor", "2Cor", "Gal", "Eph", "Philip", "Col", "1Thes", "2Thes", "1Tim", + "2Tim", "Titus", "Philemon", "Hebrews", "James", "1Peter", "2Peter", + "1John", "2John", "3John", "Jude", "Rev" +/* p2c: Gbf.pas, line 200: + * Note: Line breaker spent 0.0 seconds, 5000 tries on line 336 [251] */ +}; /*// 85-90*/ + + +Static boolean isletter(ch) +Char ch; +{ + /*const*/ + boolean Result; + + if (isupper(ch)) { + Result = true; + return Result; + } + if (islower(ch)) + Result = true; + else + Result = false; + return Result; +} + + +Static boolean isinword(ch) +Char ch; +{ + /*const*/ + boolean Result; + + switch (ch) { + + case '-': + Result = true; + break; + + default: + if (isupper(ch)) + Result = true; + else if (islower(ch)) + Result = true; + else + Result = false; + break; + } + return Result; +} + + +Static boolean IsUpper(ch) +Char ch; +{ + /*const*/ + boolean Result; + + if (isupper(ch)) + Result = true; + else + Result = false; + return Result; +} + + +Static boolean IsDigit(ch) +Char ch; +{ + /*const*/ + boolean Result; + + if (isdigit(ch)) + Result = true; + else + Result = false; + return Result; +} + + +Static boolean MatchAbbrev(sName, sAbbrev) +Char *sName, *sAbbrev; +{ + /*const*/ + long i; + boolean Result; + + if (strlen(sName) < strlen(sAbbrev)) { + Result = false; +/* p2c: Gbf.pas, line 245: Warning: Symbol 'RESULT' is not defined [221] */ + } else + Result = true; + i = 1; + while (i <= strlen(sAbbrev) && Result) { + if (toupper(sName[i - 1]) != sAbbrev[i - 1]) + Result = false; + i++; + } +} + + +Static uchar BookNameToNumber(sBookName) +Char *sBookName; +{ + /*const*/ + long Result; + + Result = 0; +/* p2c: Gbf.pas, line 259: Warning: Symbol 'RESULT' is not defined [221] */ + TRY(try1); + if (IsDigit(sBookName[strlen(sBookName) - 1]) & IsDigit(sBookName[0])) { + Result = StrToInt(sBookName); +/* p2c: Gbf.pas, line 262: + * Warning: Symbol 'STRTOINT' is not defined [221] */ + } + except(); +/* p2c: Gbf.pas, line 264: Warning: Symbol 'EXCEPT' is not defined [221] */ +/* p2c: Gbf.pas, line 264: + * Warning: Expected RECOVER, found 'Result' [227] */ + RECOVER(try1); + ; + ENDTRY(try1); +} + + +main(argc, argv) +int argc; +Char *argv[]; +{ /*// Yuk! Linear search.*/ + Char STR1[256]; + uchar Result; + +/* p2c: Gbf.pas, line 266: Warning: Expected BEGIN, found 'i' [227] */ + PASCAL_MAIN(argc, argv); + if (MatchAbbrev(sBookName, BookAbbr[i].Abbr)) { +/* p2c: Gbf.pas, line 269: + * Warning: Symbol 'SBOOKNAME' is not defined [221] */ +/* p2c: Gbf.pas, line 269: Warning: Mixing non-strings with strings [170] */ +/* p2c: Gbf.pas, line 269: Warning: Symbol 'I' is not defined [221] */ + Result = BookAbbr[i].Num; +/* p2c: Gbf.pas, line 271: Warning: Symbol 'I' is not defined [221] */ +/* p2c: Gbf.pas, line 271: Warning: Symbol 'RESULT' is not defined [221] */ + } +/* p2c: Gbf.pas, line 273: Warning: Symbol 'I' is not defined [221] */ + i++; + exit(EXIT_SUCCESS); +} +/* p2c: Gbf.pas, line 275: + * Warning: Junk at end of input file ignored [277] */ + + + +/* End. */ diff --git a/src/modules/texts/rawgbf/Gbf.pas b/src/modules/texts/rawgbf/Gbf.pas new file mode 100644 index 0000000..13826e3 --- /dev/null +++ b/src/modules/texts/rawgbf/Gbf.pas @@ -0,0 +1,735 @@ +type + TToken = (tokNull, tokEOF, tokHeader, tokContent, tokTail, tokStyle, + tokWord, tokSpace, tokSync, tokControl, tokChar, tokFont); + TCharacterAttribute = (caBold, caSmallCaps, caItalic, caOTQuote, caRed, + caSuperscript, caUnderline, caSubscript); + TCharAttribs = set of TCharacterAttribute; + + + TBookNameRec = record + Name, + Abbr: string; + Num: byte + end; + + TBookAbbr = array[0..115] of TBookNameRec; + +const + BookAbbr: TBookAbbr = ( + (Name: '1 Chronicles'; Abbr: '1CH'; Num: 13), {//0} + (Name: '1 Corinthians'; Abbr: '1CO'; Num: 70), {//1} + (Name: '1 Esdras'; Abbr: '1E'; Num: 52), {//2} + (Name: '1 John'; Abbr: '1J'; Num: 86), {//3} + (Name: '1 Kings'; Abbr: '1K'; Num: 11), {//4} + (Name: '1 Maccabees'; Abbr: '1M'; Num: 50), {//5} + (Name: '1 Peter'; Abbr: '1P'; Num: 84), {//6} + (Name: '1 Samuel'; Abbr: '1S'; Num: 9), {//7} + (Name: '1 Thessalonians'; Abbr: '1TH'; Num: 76), {//8} + (Name: '1 Timothy'; Abbr: '1TI'; Num: 78), {//9} + (Name: '2 Chronicles'; Abbr: '2CH'; Num: 14), {//10} + (Name: '2 Corinthians'; Abbr: '2CO'; Num: 71), {//11} + (Name: '2 Esdras'; Abbr: '2E'; Num: 56), {//12} + (Name: '2 John'; Abbr: '2J'; Num: 87), {//13} + (Name: '2 Kings'; Abbr: '2K'; Num: 12), {//14} + (Name: '2 Maccabees'; Abbr: '2M'; Num: 51), {//15} + (Name: '2 Peter'; Abbr: '2P'; Num: 85), {//16} + (Name: '2 Samuel'; Abbr: '2S'; Num: 10), {//17} + (Name: '2 Thessalonians'; Abbr: '2TH'; Num: 77), {//18} + (Name: '2 Timothy'; Abbr: '2TI'; Num: 79), {//19} + (Name: '3 John'; Abbr: '3J'; Num: 88), {//20} + (Name: '3 Maccabees'; Abbr: '3M'; Num: 55), {//21} + (Name: '4 Maccabees'; Abbr: '4M'; Num: 57), {//22} + (Name: '1 Chronicles'; Abbr: '1 CH'; Num: 13), {//0} + (Name: '1 Corinthians'; Abbr: '1 CO'; Num: 70), {//1} + (Name: '1 Esdras'; Abbr: '1 E'; Num: 52), {//2} + (Name: '1 John'; Abbr: '1 J'; Num: 86), {//3} + (Name: '1 Kings'; Abbr: '1 K'; Num: 11), {//4} + (Name: '1 Maccabees'; Abbr: '1 M'; Num: 50), {//5} + (Name: '1 Peter'; Abbr: '1 P'; Num: 84), {//6} + (Name: '1 Samuel'; Abbr: '1 S'; Num: 9), {//7} + (Name: '1 Thessalonians'; Abbr: '1 TH'; Num: 76), {//8} + (Name: '1 Timothy'; Abbr: '1 TI'; Num: 78), {//9} + (Name: '2 Chronicles'; Abbr: '2 CH'; Num: 14), {//10} + (Name: '2 Corinthians'; Abbr: '2 CO'; Num: 71), {//11} + (Name: '2 Esdras'; Abbr: '2 E'; Num: 56), {//12} + (Name: '2 John'; Abbr: '2 J'; Num: 87), {//13} + (Name: '2 Kings'; Abbr: '2 K'; Num: 12), {//14} + (Name: '2 Maccabees'; Abbr: '2 M'; Num: 51), {//15} + (Name: '2 Peter'; Abbr: '2 P'; Num: 85), {//16} + (Name: '2 Samuel'; Abbr: '2 S'; Num: 10), {//17} + (Name: '2 Thessalonians'; Abbr: '2 TH'; Num: 77), {//18} + (Name: '2 Timothy'; Abbr: '2 TI'; Num: 79), {//19} + (Name: '3 John'; Abbr: '3 J'; Num: 88), {//20} + (Name: '3 Maccabees'; Abbr: '3 M'; Num: 55), {//21} + (Name: '4 Maccabees'; Abbr: '4 M'; Num: 57), {//22} + (Name: 'Acts'; Abbr: 'AC'; Num: 68), {//23} + (Name: 'Amos'; Abbr: 'AM'; Num: 30), {//24} + (Name: 'Prayer of Asariah and the Song of the Three Jews'; Abbr: 'AZ'; Num: 47), + (Name: 'Baruch'; Abbr: 'BA'; Num: 45), {//26} + (Name: 'Bel and the Dragon';Abbr: 'BE'; Num: 49), {//27} + (Name: 'Colossians'; Abbr: 'CO'; Num: 75), {//28} + (Name: 'Daniel'; Abbr: 'DA'; Num: 27), {//29} + (Name: 'Deuteronomy'; Abbr: 'DE'; Num: 5), {//30} + (Name: 'Deuteronomy'; Abbr: 'DT'; Num: 5), {//31} + (Name: 'Ecclesiasties'; Abbr: 'EC'; Num: 21), {//32} + (Name: 'Esther'; Abbr: 'ES'; Num: 17), {//33} + (Name: 'Exodus'; Abbr: 'EX'; Num: 2), {//34} + (Name: 'Ezekiel'; Abbr: 'EZE'; Num: 26), {//35} + (Name: 'Ezra'; Abbr: 'EZR'; Num: 15), {//36} + (Name: 'Galatians'; Abbr: 'GA'; Num: 72), {//37} + (Name: 'Genesis'; Abbr: 'GE'; Num: 1), {//38} + (Name: 'Genesis'; Abbr: 'GN'; Num: 1), {//39} + (Name: 'Ephesians'; Abbr: 'EP'; Num: 73), {//40} + (Name: 'Esther (Greek)'; Abbr: 'GR'; Num: 42), {//41} + (Name: 'Habakkuk'; Abbr: 'HAB'; Num: 35), {//42} + (Name: 'Haggai'; Abbr: 'HAG'; Num: 37), {//43} + (Name: 'Hebrews'; Abbr: 'HE'; Num: 82), {//44} + (Name: 'Hosea'; Abbr: 'HO'; Num: 28), {//45} + (Name: 'Isaiah'; Abbr: 'IS'; Num: 23), {//46} + (Name: 'James'; Abbr: 'JA'; Num: 83), {//47} + (Name: 'Jeremiah'; Abbr: 'JE'; Num: 24), {//48} + (Name: 'Job'; Abbr: 'JOB'; Num: 18), {//49} + (Name: 'Joel'; Abbr: 'JOE'; Num: 29), {//50} + (Name: 'John'; Abbr: 'JOH'; Num: 67), {//51} + (Name: 'Jonah'; Abbr: 'JON'; Num: 32), {//52} + (Name: 'Joshua'; Abbr: 'JOS'; Num: 6), {//53} + (Name: 'Jude'; Abbr: 'JUDE'; Num: 89), {//54} + (Name: 'Judges'; Abbr: 'JUDG'; Num: 7), {//55} + (Name: 'Judith'; Abbr: 'JUDI'; Num: 41), {//56} + (Name: 'Lamentations'; Abbr: 'LA'; Num: 25), {//57} + (Name: 'Letter of Jeremiah';Abbr:'LET'; Num: 46), {//58} + (Name: 'Leviticus'; Abbr: 'LEV'; Num: 3), {//59} + (Name: 'Luke'; Abbr: 'LK'; Num: 66), {//60} + (Name: 'Leviticus'; Abbr: 'LV'; Num: 3), {//61} + (Name: 'Luke'; Abbr: 'LU'; Num: 66), {//62} + (Name: 'Malachi'; Abbr: 'MAL'; Num: 39), {//63} + (Name: 'Prayer of Manasseh';Abbr:'MAN'; Num: 53), {//64} + (Name: 'Mark'; Abbr: 'MAR'; Num: 65), {//65} + (Name: 'Matthew'; Abbr: 'MAT'; Num: 64), {//66} + (Name: 'Micah'; Abbr: 'MI'; Num: 33), {//67} + (Name: 'Nahum'; Abbr: 'NA'; Num: 34), {//68} + (Name: 'Nehemiah'; Abbr: 'NE'; Num: 16), {//69} + (Name: 'Numbers'; Abbr: 'NU'; Num: 4), {//70} + (Name: 'Obadiah'; Abbr: 'OB'; Num: 31), {//71} + (Name: 'Psalm 151'; Abbr: 'P1'; Num: 54), {//72} + (Name: 'Philemon'; Abbr: 'PHILE'; Num: 81), {//73} + (Name: 'Philippians'; Abbr: 'PHILI'; Num: 74), {//74} + (Name: 'Philemon'; Abbr: 'PHM'; Num: 81), {//75} + (Name: 'Philippians'; Abbr: 'PHP'; Num: 74), {//76} + (Name: 'Proverbs'; Abbr: 'PR'; Num: 20), {//77} + (Name: 'Psalms'; Abbr: 'PS'; Num: 19), {//78} + (Name: 'Revelation'; Abbr: 'RE'; Num: 90), {//79} + (Name: 'Romans'; Abbr: 'RM'; Num: 69), {//80} + (Name: 'Romans'; Abbr: 'RO'; Num: 69), {//81} + (Name: 'Ruth'; Abbr: 'RU'; Num: 8), {//82} + (Name: 'Sirach'; Abbr: 'SI'; Num: 44), {//83} + (Name: 'Song of Solomon'; Abbr: 'SOL'; Num: 22), {//84} + (Name: 'Song of Solomon'; Abbr: 'SON'; Num: 22), {//85} + (Name: 'Song of Solomon'; Abbr: 'SS'; Num: 22), {//86} + (Name: 'Susanna'; Abbr: 'SU'; Num: 48), {//87} + (Name: 'Titus'; Abbr: 'TI'; Num: 80), {//88} + (Name: 'Tobit'; Abbr: 'TO'; Num: 40), {//89} + (Name: 'Wisdom'; Abbr: 'WI'; Num: 43), {//90} + (Name: 'Zechariah'; Abbr: 'ZEC'; Num: 38), {//91} + (Name: 'Zephaniah'; Abbr: 'ZEP'; Num: 36) {//92} + ); + + BookFileName: array[0..90] of string = ( + '','Genesis','Exodus','Lev','Num','Deut','Joshua','Judges', {// 0 - 7} + 'Ruth','1Sam','2Sam','1Kings','2Kings','1Chron','2Chron', {// 8 - 14} + 'Ezra','Nehemiah','Esther','Job','Psalms','Proverbs', {// 15-20} + 'Eccl','Song','Isaiah','Jeremiah','Lament','Ezekiel', {// 21-26} + 'Daniel','Hosea','Joel','Amos','Obadiah','Jonah','Micah', {// 27-33} + 'Nahum','Habakkuk','Zeph','Haggai','Zech','Malachi', {// 34-39} + 'Tobit','Judith','Esther','Wisdom','Sirach','Baruch', {// 40-45} + 'Let','Azar','Susanna','Bel','1Mac','2Mac','1Esdras', {// 46-52} + 'Man','P1','3Mac','2Esdras','4Mac','','','','','','', {// 53-63} + 'Matthew','Mark','Luke','John','Acts','Romans','1Cor', {// 64-70} + '2Cor','Gal','Eph','Philip','Col','1Thes','2Thes','1Tim', {// 71-78} + '2Tim','Titus','Philemon','Hebrews','James','1Peter', {// 79-84} + '2Peter','1John','2John','3John','Jude','Rev'); {// 85-90} + +type + TReadGBF = record +{ private} + F: File; + FName, TokenLine: string; + TokenPos: integer; + fFileIsOpen, fParagraphEnd, fInTitle, fInPsalmBookTitle, fInHebrewTitle, + fInSectionTitle: boolean; + +{ public} + sBook, sChapter, sVerse, sMode: string; + sContext, {// Last text type (header, body, or tail)} + sTitle, {// Title of this book of the Bible} + sPsalmBookTitle, {// Title of this Psalm book} + sHebrewTitle, {// Psalm Hebrew title} + sSectionTitle, {// Section headings} + sDate, + sFontName: string; + iTotalWords: integer; + chJustification, + chDirection: char; + fIndent, fPoetry: boolean; + CharAttribs: TCharAttribs; + bBk, bChap, bVs, bWd: byte; +{ + function Init({const}{sFileName: string): boolean; + procedure Done; + function GetToken(var TokenKind: TToken): string; +} + end; + + TWriteGBF = record +{ private} + F: File; + FName, LineOut: string; + fFileIsOpen: boolean; + bBk, bChap, bVs, bWd: byte; + +{ public} +{ + function Init({const}{sFileName: string): boolean; + function Done: boolean; + procedure Out({const}{s: string); +} + end; + +{implementation } + +function isletter({const}ch: char): boolean; +begin + case ch of + 'A'..'Z': isletter := true; + 'a'..'z': isletter := true; + else + isletter := false; + end; +end; + +function isinword({const}ch: char): boolean; +begin + case ch of + '-': isinword := true; + 'A'..'Z': isinword := true; + 'a'..'z': isinword := true; + else + isinword := false; + end; +end; + +function IsUpper({const}ch: char): Boolean; +begin + case ch of + 'A'..'Z': IsUpper := true; + else + IsUpper := false; + end; +end; + +function IsDigit({const}ch: char): Boolean; +begin + case ch of + '0'..'9': IsDigit := true; + else + IsDigit := false; + end; +end; + + +function MatchAbbrev({const}sName, sAbbrev: string): boolean; +var i: integer; +begin + if Length(sName) < Length(sAbbrev) then + Result := false + else + Result := true; + i := 1; + while (i <= Length(sAbbrev)) and Result do + begin + if UpCase(sName[i]) <> sAbbrev[i] then + Result := false; + inc(i); + end; +end; + +function BookNameToNumber({const}sBookName: string): byte; +var i: integer; +begin + Result := 0; + try + if IsDigit(sBookName[Length(sBookName)]) and IsDigit(sBookName[1]) then + Result := StrToInt(sBookName); + except + Result := 0; + end; + i := 0; + while (Result = 0) and (i <= 115) do {// Yuk! Linear search.} + begin + if MatchAbbrev(sBookName,BookAbbr[i].Abbr) then + begin + Result := BookAbbr[i].Num; + end; + inc(i); + end; +end; + +function BookNumberToName({const}bBookNum: byte): string; +begin + if bBookNum <= 115 then + Result := BookAbbr[bBookNum].Name + else + Result := ''; +end; + +function ConformCase({const}sPat, sSrc: string): string; +var i: integer; +begin + Result := sSrc; + if (Length(sPat) > 0) and (Length(sSrc) > 0) then + begin + Result := LowerCase(sSrc); + if IsUpper(sPat[1]) then + Result[1] := UpCase(Result[1]); + if (Length(sPat) > 1) and (Length(sSrc) > 1) then + begin + if IsUpper(sPat[2]) then + begin + for i := 2 to Length(Result) do + Result[i] := UpCase(Result[i]); + end; + end; + end; +end; + +function TReadGBF.Init({const}sFileName: string): boolean; +var s: string; + tok: TToken; +begin + try + fParagraphEnd := false; + bBk := 0; + bChap := 0; + bVs := 0; + bWd := 0; + iTotalWords := 0; + FName := sFileName; + Assign(F, FName); + reset(F); + readln(F, TokenLine); + TokenPos := 1; + fFileIsOpen := true; + repeat + s := GetToken(tok) + until (tok = tokEOF) or ((tok = tokHeader) and (s[3] = '0')); + Init := true; + except + Init := false; + fFileIsOpen := false; + end +end; + +procedure TReadGBF.Done; +begin + if fFileIsOpen then + begin + closefile(F); + fFileIsOpen := false; + end; +end; + +function TReadGBF.GetToken(var TokenKind: TToken): string; +var m: integer; +begin + Result := ''; + TokenKind := tokNull; + if TokenPos = 0 then + begin + if (not fFileIsOpen) or EOF(F) then + TokenKind := tokEOF + else + begin + ReadLn(F,TokenLine); + TokenPos := 1; + end; + end; + if TokenKind <> tokEOF then + begin + m := Length(TokenLine); + if TokenPos > m then + begin + TokenKind := tokSpace; + if fParagraphEnd then + fParagraphEnd := false + else + Result := ' '; + TokenPos := 0; + end + else + begin + if (TokenLine[TokenPos] = '<') then + begin + fParagraphEnd := false; + repeat + Result := Result + TokenLine[TokenPos]; + inc(TokenPos); + until (TokenLine[TokenPos] = '>') or (TokenPos > m); + Result := Result + '>'; + inc(TokenPos); + case result[2] of + 'B': begin {// File body text type} + TokenKind := tokContent; + sContext := Result; + end; + 'C': begin {// Special characters} + TokenKind := tokControl; + if (Result[3] = 'M') or (Result[3] = 'L') then + fParagraphEnd := true; + end; + 'D': begin {// Direction} + TokenKind := tokControl; + chDirection := Result[3]; + end; + 'H': begin + TokenKind := tokHeader; + sContext := Result; + end; + 'F': begin {// Font attributes} + TokenKind := tokFont; + case Result[3] of + 'B': CharAttribs := CharAttribs + [caBold]; + 'C': CharAttribs := CharAttribs + [caSmallCaps]; + 'I': CharAttribs := CharAttribs + [caItalic]; + 'N': sFontName := copy(Result,4,Length(Result)-4); + 'O': CharAttribs := CharAttribs + [caOTQuote]; + 'R': CharAttribs := CharAttribs + [caRed]; + 'S': CharAttribs := CharAttribs + [caSuperscript]; + 'U': CharAttribs := CharAttribs + [caUnderline]; + 'V': CharAttribs := CharAttribs + [caSubscript]; + 'b': CharAttribs := CharAttribs - [caBold]; + 'c': CharAttribs := CharAttribs - [caSmallCaps]; + 'i': CharAttribs := CharAttribs - [caItalic]; + 'n': sFontName := ''; + 'o': CharAttribs := CharAttribs - [caOTQuote]; + 'r': CharAttribs := CharAttribs - [caRed]; + 's': CharAttribs := CharAttribs - [caSuperscript]; + 'u': CharAttribs := CharAttribs - [caUnderline]; + 'v': CharAttribs := CharAttribs - [caSubscript]; + + end; + end; + 'J': begin {// Justification} + TokenKind := tokStyle; + chJustification := Result[3]; + end; + 'P': begin {// Poetry/prose, indent} + TokenKind := tokControl; + case Result[3] of + 'I': fIndent := true; + 'P': fPoetry := true; + 'i': fIndent := false; + 'p': fPoetry := false; + end; + end; + 'R': begin {// References and footnotes} + TokenKind := tokControl; + end; + 'S': begin {// sync mark} + TokenKind := TokSync; + case Result[3] of + 'B': begin {// Book} + sBook := system.copy(Result, 4, length(Result)-4); + sPsalmBookTitle := ''; + if sBook = '' then + begin + inc(bBk); + sBook := BookNumberToName(bBk); + end + else + bBk := BookNameToNumber(sBook); + sTitle := sBook; + end; + 'C': begin {//chapter} + sChapter := system.copy(Result, 4, length(Result)-4); + if sChapter = '' then + begin + inc(bChap); + sChapter := IntToStr(bChap); + end + else + begin + try + bChap := StrToInt(sChapter); + except + showmessage('Non-numeric chapter: '+sBook+' '+sChapter); + end; + end; + sHebrewTitle := ''; + end; + 'V': begin {// Verse} + bWd := 0; + sVerse := system.copy(Result, 4, length(Result)-4); + if sVerse = '' then + begin + inc(bVs); + sVerse := IntToStr(bVs); + end + else + begin + try + bVs := StrToInt(sVerse); + except + showmessage('Non-numeric verse: '+sBook+' '+sChapter+':'+sVerse); + end; + end; + end; + 'D': begin {// Date} + sDate := system.copy(Result, 3, length(Result)-4); + end; + end; + end; + 'T': begin {// Titles} + TokenKind := TokContent; + case Result[3] of + 'B': + begin + sPsalmBookTitle := ''; + fInPsalmBookTitle := true; + end; + 'b': fInPsalmBookTitle := true; + 'H': + begin + sHebrewTitle := ''; + fInHebrewTitle := true; + end; + 'h': fInHebrewTitle := false; + 'S': + begin + sSectionTitle := ''; + fInSectionTitle := true; + end; + 's': fInSectionTitle := false; + 'T': + begin + sTitle := ''; + fInTitle := true; + end; + 't': fInTitle := false; + end; + end; + 'Z': begin {// File tail} + TokenKind := tokTail; + sContext := Result; + if Result[3] = 'Z' then + done; + end; + else + TokenKind := TokControl; + + end; + end + else if isletter(TokenLine[TokenPos]) then + begin {Word} + fParagraphEnd := false; + TokenKind := tokWord; + repeat + Result := Result + TokenLine[TokenPos]; + inc(TokenPos); + until (TokenPos > m) or (not isinword(TokenLine[TokenPos])); + inc(bWd); + inc(iTotalWords); + end + else if ((TokenLine[TokenPos] = ' ') or (TokenLine[TokenPos] = #9)) then + begin + fParagraphEnd := false; + TokenKind := tokSpace; + Result := Result + TokenLine[TokenPos]; + inc(TokenPos); + end + else + begin + fParagraphEnd := false; + TokenKind := tokChar; + Result := Result + TokenLine[TokenPos]; + inc(TokenPos); + end + end; + end; + if ((TokenKind = tokWord) or (TokenKind = tokSpace) or + (TokenKind = tokChar)) then + begin + if fInTitle then + sTitle := sTitle + Result + else if fInPsalmBookTitle then + sPsalmBookTitle := sPsalmBookTitle + Result + else if fInHebrewTitle then + sHebrewTitle := sHebrewTitle + Result + else if fInSectionTitle then + sSectionTitle := sSectionTitle + Result; + end; +end; + +function TWriteGBF.Init({const}sFileName: string): boolean; +begin + try + bBk := 0; + bChap := 0; + bVs := 0; + bWd := 0; + LineOut := ''; + FName := sFileName; + Assign(F, FName); + filemode := 1; + rewrite(F); + fFileIsOpen := true; + Init := true; + except + Init := false; + fFileIsOpen := false; + end +end; + +function TWriteGBF.Done: boolean; +begin + try + if fFileIsOpen then + begin + if LineOut <> '' then + begin + WriteLn(F, LineOut); + LineOut := ''; + end; + CloseFile(F); + end; + Done := true; + except + Done := false; + end; +end; + +procedure TWriteGBF.Out({const}s: string); +var sPrint, sSave, sBook, sChapter, sVerse: string; + i: integer; + b: byte; +begin + if (Length(s) > 0) and IsLetter(s[1]) then + begin + inc(bWd); + LineOut := LineOut + s; + end + else if Length(s) > 3 then + begin + if (s[1] = '<') and (s[2] = 'S') then + begin + case s[3] of + 'B': begin {// Book} + sBook := system.copy(s, 4, length(s)-4); + if sBook = '' then + begin + inc(bBk); + LineOut := LineOut + s; + end + else + begin + b := bBk; + bBk := BookNameToNumber(sBook); + if b <> bBk then + LineOut := LineOut + s; + end; + end; + 'C': begin {//chapter} + sChapter := system.copy(s, 4, length(s)-4); + if sChapter = '' then + begin + inc(bChap); + LineOut := LineOut + s; + end + else + begin + try +{// b := bChap;} + bChap := StrToInt(sChapter); +{// if b <> bChap then} + LineOut := LineOut + s; + except + showmessage('Non-numeric chapter: '+sBook+' '+sChapter); + end; + end; + end; + 'V': begin {// Verse} + bWd := 0; + sVerse := system.copy(s, 4, length(s)-4); + if sVerse = '' then + begin + inc(bVs); + LineOut := LineOut + s; + end + else + begin + try +{// b := bVs;} + bVs := StrToInt(sVerse); +{// if b <> bVs then} + LineOut := LineOut + s; + except + showmessage('Non-numeric verse: '+sBook+' '+sChapter+':'+sVerse); + end; + end; + end; + else + LineOut := LineOut + s; + end + end + else + LineOut := LineOut + s; {// Not a sync mark} + end + else {// other token, space, or punctuation} + LineOut := LineOut + s; {// Length <= 3} + if ((s = '') or (s = '')) then + begin + if (Length(LineOut) > 78) then + begin + i := 78; + while (i > 0) and (LineOut[i] <> ' ') do + dec(i); + if i < 1 then + begin + WriteLn(F,LineOut); + LineOut := ''; + end + else + begin + sPrint := system.copy(LineOut,1,i-1); + sSave := system.copy(LineOut,i+1,Length(LineOut)-i); + WriteLn(F,sPrint); + WriteLn(F, sSave); + LineOut := ''; + end + end + else + begin + WriteLn(F, LineOut); + LineOut := ''; + end + end + else if (Length(LineOut) > 78) then + begin + i := 78; + while (i > 0) and (LineOut[i] <> ' ') do + dec(i); + if i < 1 then + begin + WriteLn(F,LineOut); + LineOut := ''; + end + else + begin + sPrint := system.copy(LineOut,1,i-1); + sSave := system.copy(LineOut,i+1,Length(LineOut)-i); + WriteLn(F,sPrint); + LineOut := sSave; + end + end +end; + +end. diff --git a/src/modules/texts/rawgbf/Gbfmain.pas b/src/modules/texts/rawgbf/Gbfmain.pas new file mode 100644 index 0000000..4377622 --- /dev/null +++ b/src/modules/texts/rawgbf/Gbfmain.pas @@ -0,0 +1,1267 @@ +unit GBFMain; + +interface + +uses + Windows, Messages, SysUtils, Classes, Graphics, Controls, Forms, Dialogs, + Buttons, StdCtrls, ExtCtrls, GBF; + +const + sTitlePar = '\pard\plain \s1\fi432\sb240\sa60\keepn\widctlpar \b\f5\fs28\kerning28 '; + sNormalPar = '\pard\plain \fi432\widctlpar \f4 '; + sNormalQuotePar = '\pard\plain \s20\fi432\li432\widctlpar \f4 '; + sPoetryPar = '\pard\plain \s18\fi-432\li432\widctlpar \f4 '; + sPoetryQuotePar = '\pard\plain \s21\fi-432\li864\widctlpar \f4 '; + sHebrewTitlePar = '\pard\plain \s16\fi432\keep\keepn\widctlpar \f4\fs20 '; + sSelahPar = '\pard\plain \s19\qr\widctlpar \f4 '; + ANSI2OEM: array[0..255] of char = + ( #0, #1, #2, #3, #4, #5, #6, #7, + #8, #9, #10, #11, #12, #13, #14, #15, + #16, #17, #18, #19, #20, #21, #22, #23, + #24, #25, #26, #27, #28, #29, #30, #31, + #32, #33, #34, #35, #36, #37, #38, #39, + #40, #41, #42, #43, #44, #45, #46, #47, + #48, #49, #50, #51, #52, #53, #54, #55, + #56, #57, #58, #59, #60, #61, #62, #63, + #64, #65, #66, #67, #68, #69, #70, #71, + #72, #73, #74, #75, #76, #77, #78, #79, + #80, #81, #82, #83, #84, #85, #86, #87, + #88, #89, #90, #91, #92, #93, #94, #95, + #96, #97, #98, #99,#100,#101,#102,#103, + #104,#105,#106,#107,#108,#109,#110,#111, + #112,#113,#114,#115,#116,#117,#118,#119, + #120,#121,#122,#123,#124,#125,#126,#127, + #128,#129, ',', 'a', '"',#133,#197,#216, + '^', '%', 'S', '<',#140,#141,#142,#143, + #144, #96, #97, '"', '"',#249,#150,#151, + '~',#153, 's', '>',#156,#157,#158, 'Y', + ' ',#173,#155,#156,#232,#157,#124, #21, + #168,#169, 'a',#174,#170, '-',#174, '_', + #167,#241,#253, '3', #39,#230, #20,#254, + ',', '1', 'o',#175,#172,#171,#190,#168, + 'A', 'A', 'A', 'A',#142,#143,#198,#128, + 'E',#144, 'E',#142, 'I', 'I', 'I', 'I', + 'D',#165, 'O', 'O', 'O', 'O',#153, 'x', + '0', 'U', 'U', 'U',#154, 'Y', 'b',#225, + #133,#130,#131, 'a',#132,#134,#230,#135, + #138,#130,#136,#137,#141,#161,#140,#139, + #148,#164,#149,#162,#147, 'o',#148,#246, + 'o',#151,#163,#150,#129, 'y', 'b',#152); + +type + TGBFConverterMainForm = class(TForm) + SourceEdit: TEdit; + Label1: TLabel; + BrowseButton: TButton; + SaveDialog1: TSaveDialog; + OpenDialog1: TOpenDialog; + DestEdit: TEdit; + Label2: TLabel; + BrowseDestButton: TButton; + FormatRadioGroup: TRadioGroup; + GoBitBtn: TBitBtn; + CloseBitBtn: TBitBtn; + Timer1: TTimer; + VerseLabel: TLabel; + ApocryphaCheckBox: TCheckBox; + WdLabel: TLabel; + Label3: TLabel; + Label4: TLabel; + WEBDraftCheckBox: TCheckBox; + QuickButton: TButton; + procedure CloseBitBtnClick(Sender: TObject); + procedure GoBitBtnClick(Sender: TObject); + procedure Timer1Timer(Sender: TObject); + procedure FormShow(Sender: TObject); + procedure FormatRadioGroupClick(Sender: TObject); + procedure QuickConversion; + procedure DoConversion; + procedure QuickButtonClick(Sender: TObject); + procedure FormActivate(Sender: TObject); + private + { Private declarations } + public + { Public declarations } + end; + +var + GBFConverterMainForm: TGBFConverterMainForm; + +implementation + +{$R *.DFM} + +var InFile: TReadGBF; + OutGBF: TWriteGBF; + OutFile: TextFile; + +function ANSIToOEM(s: string): string; +var i, j: integer; +begin + Result := s; + j := 1; + for i := 1 to length(s) do + begin + case s[i] of + #133: + begin + Result[j] := '.'; + inc(j); + Insert('..', Result, j); + inc(j); + end; + #140: + begin + Result[j] := 'O'; + inc(j); + Insert('E', Result, j); + end; + #150: + begin + Result[j] := '-'; + inc(j); + Insert('-', Result, j); + end; + #151: + begin + Result[j] := '-'; + inc(j); + Insert('-', Result, j); + end; + #153: + begin + Result[j] := '('; + inc(j); + Insert('TM)', Result, j); + inc(j,2); + end; + #156: + begin + Result[j] := 'o'; + inc(j); + Insert('e', Result, j); + end; + #169: + begin + Result[j] := '('; + inc(j); + Insert('C)',Result, j); + inc(j); + end; + #174: + begin + Result[j] := '('; + inc(j); + Insert('R)',Result, j); + inc(j); + end; + #198: + begin + Result[j] := 'A'; + inc(j); + Insert('E', Result, j); + end; + #230: + begin + Result[j] := 'a'; + inc(j); + Insert('e', Result, j); + end; + else + Result[j] := ANSI2OEM[ord(s[i])]; + end; + inc(j); + end; +end; + +procedure TGBFConverterMainForm.CloseBitBtnClick(Sender: TObject); +begin + Close; +end; + +procedure TGBFConverterMainForm.DoConversion; +var LastBook, wd, ParagraphAttributes, s, sLine, sPrint, sSave, + OutFileName: string; + LinePos, i, iFileNumber: integer; + tok: TToken; + fInclude, fProse, fSkip, fHTMLisOpen, fRed, fASCIIisOpen: boolean; + bLastBook, bChap: byte; + + procedure CheckEOL; + begin + if Length(sLine) > 65 then + begin + i := 65; + while (i > 0) and (sLine[i] <> ' ') do + dec(i); + if i < 1 then + begin + if fASCIIisOpen then WriteLn(OutFile,sLine); + if fProse then + sLine := '' + else + sLine := ' '; + end + else + begin + sPrint := system.copy(sLine,1,i-1); + if fProse then + sSave := system.copy(sLine,i+1,Length(sLine)-i) + else + sSave := ' '+system.copy(sLine,i+1,Length(sLine)-i); + if fASCIIisOpen then WriteLn(OutFile,sPrint); + sLine := sSave; + end + end; + end; + + procedure StartNewLine; + begin + if fInclude then + begin + CheckEol; + if fASCIIisOpen then WriteLn(OutFile, sLine); + sLine := ''; + end; + end; + + procedure CloseHTML; + begin + if fHTMLisOpen then + begin + WriteLn(OutFile,sLine); + sLine := ''; + WriteLn(OutFile,'