From 03134fa5f6f25d92724ce4c183f9bbe12a9e37dc Mon Sep 17 00:00:00 2001 From: "Roberto C. Sanchez" Date: Sat, 29 Mar 2014 10:53:59 -0400 Subject: Imported Upstream version 1.5.11 --- src/modules/texts/Makefile | 5 + src/modules/texts/Makefile.am | 7 + src/modules/texts/rawtext/Makefile | 5 + src/modules/texts/rawtext/Makefile.am | 2 + src/modules/texts/rawtext/rawtext.cpp | 548 ++++++++++++++++++++++++++++++++ src/modules/texts/rawtext4/Makefile.am | 2 + src/modules/texts/rawtext4/rawtext4.cpp | 548 ++++++++++++++++++++++++++++++++ src/modules/texts/swtext.cpp | 113 +++++++ src/modules/texts/ztext/Makefile | 5 + src/modules/texts/ztext/Makefile.am | 3 + src/modules/texts/ztext/ztext.cpp | 195 ++++++++++++ 11 files changed, 1433 insertions(+) create mode 100644 src/modules/texts/Makefile create mode 100644 src/modules/texts/Makefile.am create mode 100644 src/modules/texts/rawtext/Makefile create mode 100644 src/modules/texts/rawtext/Makefile.am create mode 100644 src/modules/texts/rawtext/rawtext.cpp create mode 100644 src/modules/texts/rawtext4/Makefile.am create mode 100644 src/modules/texts/rawtext4/rawtext4.cpp create mode 100644 src/modules/texts/swtext.cpp create mode 100644 src/modules/texts/ztext/Makefile create mode 100644 src/modules/texts/ztext/Makefile.am create mode 100644 src/modules/texts/ztext/ztext.cpp (limited to 'src/modules/texts') diff --git a/src/modules/texts/Makefile b/src/modules/texts/Makefile new file mode 100644 index 0000000..1a2d00d --- /dev/null +++ b/src/modules/texts/Makefile @@ -0,0 +1,5 @@ + +root := ../../.. + +all: + make -C ${root} diff --git a/src/modules/texts/Makefile.am b/src/modules/texts/Makefile.am new file mode 100644 index 0000000..f5d81b4 --- /dev/null +++ b/src/modules/texts/Makefile.am @@ -0,0 +1,7 @@ +textsdir = $(top_srcdir)/src/modules/texts + +libsword_la_SOURCES += $(textsdir)/swtext.cpp + +include ../src/modules/texts/rawtext/Makefile.am +include ../src/modules/texts/rawtext4/Makefile.am +include ../src/modules/texts/ztext/Makefile.am diff --git a/src/modules/texts/rawtext/Makefile b/src/modules/texts/rawtext/Makefile new file mode 100644 index 0000000..35d6648 --- /dev/null +++ b/src/modules/texts/rawtext/Makefile @@ -0,0 +1,5 @@ + +root := ../../../.. + +all: + make -C ${root} diff --git a/src/modules/texts/rawtext/Makefile.am b/src/modules/texts/rawtext/Makefile.am new file mode 100644 index 0000000..5d77e44 --- /dev/null +++ b/src/modules/texts/rawtext/Makefile.am @@ -0,0 +1,2 @@ +rawtextdir = $(top_srcdir)/src/modules/texts/rawtext +libsword_la_SOURCES += $(rawtextdir)/rawtext.cpp diff --git a/src/modules/texts/rawtext/rawtext.cpp b/src/modules/texts/rawtext/rawtext.cpp new file mode 100644 index 0000000..3245da6 --- /dev/null +++ b/src/modules/texts/rawtext/rawtext.cpp @@ -0,0 +1,548 @@ +/****************************************************************************** + * rawtext.cpp - code for class 'RawText'- a module that reads raw text files: + * ot and nt using indexs ??.bks ??.cps ??.vss + */ + + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include // GNU +#include +#include +#include + +#ifndef USELUCENE +using std::map; +using std::list; +using std::find; +#endif + +SWORD_NAMESPACE_START + +#ifndef USELUCENE +typedef map < SWBuf, list > strlist; +typedef list longlist; +#endif + +/****************************************************************************** + * RawText Constructor - Initializes data for instance of RawText + * + * ENT: iname - Internal name for module + * idesc - Name to display to user for module + * idisp - Display object to use for displaying + */ + +RawText::RawText(const char *ipath, const char *iname, const char *idesc, SWDisplay *idisp, SWTextEncoding enc, SWTextDirection dir, SWTextMarkup mark, const char* ilang) + : SWText(iname, idesc, idisp, enc, dir, mark, ilang), + RawVerse(ipath) { + +#ifndef USELUCENE + SWBuf fname; + fname = path; + char ch = fname.c_str()[strlen(fname.c_str())-1]; + if ((ch != '/') && (ch != '\\')) + fname += "/"; + + for (int loop = 0; loop < 2; loop++) { + fastSearch[loop] = 0; + SWBuf fastidxname =(fname + ((loop)?"ntwords.dat":"otwords.dat")); + if (FileMgr::existsFile(fastidxname.c_str())) { + fastidxname = (fname + ((loop)?"ntwords.idx":"otwords.idx")); + if (FileMgr::existsFile(fastidxname.c_str())) + fastSearch[loop] = new RawStr((fname + ((loop)?"ntwords":"otwords")).c_str()); + } + } +#endif +} + + +/****************************************************************************** + * RawText Destructor - Cleans up instance of RawText + */ + +RawText::~RawText() { +#ifndef USELUCENE + if (fastSearch[0]) + delete fastSearch[0]; + + if (fastSearch[1]) + delete fastSearch[1]; +#endif +} + + +bool RawText::isWritable() { + return ((idxfp[0]->getFd() > 0) && ((idxfp[0]->mode & FileMgr::RDWR) == FileMgr::RDWR)); +} + + +/****************************************************************************** + * RawText::getRawEntry - Returns the correct verse when char * cast + * is requested + * + * RET: string buffer with verse + */ + +SWBuf &RawText::getRawEntryBuf() { + long start = 0; + unsigned short size = 0; + VerseKey &key = getVerseKey(); + + findOffset(key.Testament(), key.Index(), &start, &size); + entrySize = size; // support getEntrySize call + + entryBuf = ""; + readText(key.Testament(), start, size, entryBuf); + + rawFilter(entryBuf, 0); // hack, decipher + rawFilter(entryBuf, &key); + +// if (!isUnicode()) + prepText(entryBuf); + + return entryBuf; +} + + +signed char RawText::createSearchFramework(void (*percent)(char, void *), void *percentUserData) { +#ifndef USELUCENE + SWKey *savekey = 0; + SWKey *searchkey = 0; + SWKey textkey; + char *word = 0; + char *wordBuf = 0; + + // dictionary holds words associated with a list + // containing every module position that contains + // the word. [0] Old Testament; [1] NT + map < SWBuf, list > dictionary[2]; + + + // save key information so as not to disrupt original + // module position + if (!key->Persist()) { + savekey = CreateKey(); + *savekey = *key; + } + else savekey = key; + + searchkey = (key->Persist())?key->clone():0; + if (searchkey) { + searchkey->Persist(1); + setKey(*searchkey); + } + + // position module at the beginning + *this = TOP; + + VerseKey *lkey = (VerseKey *)key; + + // iterate thru each entry in module + while (!Error()) { + long index = lkey->Index(); + wordBuf = (char *)calloc(sizeof(char), strlen(StripText()) + 1); + strcpy(wordBuf, StripText()); + + // grab each word from the text + word = strtok(wordBuf, " !.,?;:()-=+/\\|{}[]\"<>"); + while (word) { + + // make word upper case + toupperstr(word); + + // lookup word in dictionary (or make entry in dictionary + // for this word) and add this module position (index) to + // the word's associated list of module positions + dictionary[lkey->Testament()-1][word].push_back(index); + word = strtok(NULL, " !.,?;:()-=+/\\|{}[]\"<>"); + } + free(wordBuf); + (*this)++; + } + + // reposition module back to where it was before we were called + setKey(*savekey); + + if (!savekey->Persist()) + delete savekey; + + if (searchkey) + delete searchkey; + + + // --------- Let's output an index from our dictionary ----------- + FileDesc *datfd; + FileDesc *idxfd; + strlist::iterator it; + longlist::iterator it2; + unsigned long offset, entryoff; + unsigned short size; + + SWBuf fname; + fname = path; + char ch = fname.c_str()[strlen(fname.c_str())-1]; + if ((ch != '/') && (ch != '\\')) + fname += "/"; + + // for old and new testament do... + for (int loop = 0; loop < 2; loop++) { + datfd = FileMgr::getSystemFileMgr()->open((fname + ((loop)?"ntwords.dat":"otwords.dat")).c_str(), FileMgr::CREAT|FileMgr::WRONLY, 00644); + if (datfd->getFd() == -1) + return -1; + idxfd = FileMgr::getSystemFileMgr()->open((fname + ((loop)?"ntwords.idx":"otwords.idx")).c_str(), FileMgr::CREAT|FileMgr::WRONLY, 00644); + if (idxfd->getFd() == -1) { + FileMgr::getSystemFileMgr()->close(datfd); + return -1; + } + + // iterate thru each word in the dictionary + for (it = dictionary[loop].begin(); it != dictionary[loop].end(); it++) { + printf("%s: ", it->first.c_str()); + + // get our current offset in our word.dat file and write this as the start + // of the next entry in our database + offset = datfd->seek(0, SEEK_CUR); + idxfd->write(&offset, 4); + + // write our word out to the word.dat file, delineating with a \n + datfd->write(it->first.c_str(), strlen(it->first.c_str())); + datfd->write("\n", 1); + + // force our mod position list for this word to be unique (remove + // duplicates that may exist if the word was found more than once + // in the verse + it->second.unique(); + + // iterate thru each mod position for this word and output it to + // our database + unsigned short count = 0; + for (it2 = it->second.begin(); it2 != it->second.end(); it2++) { + entryoff= *it2; + datfd->write(&entryoff, 4); + count++; + } + + // now see what our new position is in our word.dat file and + // determine the size of this database entry + size = datfd->seek(0, SEEK_CUR) - offset; + + // store the size of this database entry + idxfd->write(&size, 2); + printf("%d entries (size: %d)\n", count, size); + } + FileMgr::getSystemFileMgr()->close(datfd); + FileMgr::getSystemFileMgr()->close(idxfd); + } + return 0; +#else + return SWModule::createSearchFramework(percent, percentUserData); +#endif +} + + +void RawText::deleteSearchFramework() { +#ifndef USELUCENE + SWBuf target = path; + char ch = target.c_str()[strlen(target.c_str())-1]; + if ((ch != '/') && (ch != '\\')) + target += "/lucene"; + FileMgr::removeFile(target + "ntwords.dat"); + FileMgr::removeFile(target + "otwords.dat"); + FileMgr::removeFile(target + "ntwords.idx"); + FileMgr::removeFile(target + "otwords.idx"); +#else + SWModule::deleteSearchFramework(); +#endif +} + + +/****************************************************************************** + * SWModule::search - Searches a module for a string + * + * ENT: istr - string for which to search + * searchType - type of search to perform + * >=0 - regex + * -1 - phrase + * -2 - multiword + * flags - options flags for search + * justCheckIfSupported - if set, don't search, only tell if this + * function supports requested search. + * + * RET: ListKey set to verses that contain istr + */ + +ListKey &RawText::search(const char *istr, int searchType, int flags, SWKey *scope, bool *justCheckIfSupported, void (*percent)(char, void *), void *percentUserData) { +#ifndef USELUCENE + listKey.ClearList(); + + if ((fastSearch[0]) && (fastSearch[1])) { + + switch (searchType) { + case -2: { + + if ((flags & REG_ICASE) != REG_ICASE) // if haven't chosen to + // ignore case + break; // can't handle fast case sensitive searches + + // test to see if our scope for this search is bounded by a + // VerseKey + VerseKey *testKeyType = 0; + SWTRY { + testKeyType = SWDYNAMIC_CAST(VerseKey, ((scope)?scope:key)); + } + SWCATCH ( ... ) {} + // if we don't have a VerseKey * decendant we can't handle + // because of scope. + // In the future, add bool SWKey::isValid(const char *tryString); + if (!testKeyType) + break; + + + // check if we just want to see if search is supported. + // If we've gotten this far, then it is supported. + if (justCheckIfSupported) { + *justCheckIfSupported = true; + return listKey; + } + + SWKey saveKey = *testKeyType; // save current place + + char error = 0; + char **words = 0; + char *wordBuf = 0; + int wordCount = 0; + long start; + unsigned short size; + char *idxbuf = 0; + SWBuf datBuf; + list indexes; + list indexes2; + VerseKey vk; + vk = TOP; + + (*percent)(10, percentUserData); + + // toupper our copy of search string + stdstr(&wordBuf, istr); + toupperstr(wordBuf); + + // get list of individual words + words = (char **)calloc(sizeof(char *), 10); + int allocWords = 10; + words[wordCount] = strtok(wordBuf, " "); + while (words[wordCount]) { + wordCount++; + if (wordCount == allocWords) { + allocWords+=10; + words = (char **)realloc(words, sizeof(char *)*allocWords); + } + words[wordCount] = strtok(NULL, " "); + } + + (*percent)(20, percentUserData); + + // clear our result set + indexes.erase(indexes.begin(), indexes.end()); + + // search both old and new testament indexes + for (int j = 0; j < 2; j++) { + // iterate thru each word the user passed to us. + for (int i = 0; i < wordCount; i++) { + + // clear this word's result set + indexes2.erase(indexes2.begin(), indexes2.end()); + error = 0; + + // iterate thru every word in the database that starts + // with our search word + for (int away = 0; !error; away++) { + idxbuf = 0; + + // find our word in the database and jump ahead _away_ + error = fastSearch[j]->findOffset(words[i], &start, &size, away); + + // get the word from the database + fastSearch[j]->getIDXBufDat(start, &idxbuf); + + // check to see if it starts with our target word + if (strlen(idxbuf) > strlen(words[i])) + idxbuf[strlen(words[i])] = 0; +// else words[i][strlen(idxbuf)] = 0; + if (!strcmp(idxbuf, words[i])) { + + // get data for this word from database + delete [] idxbuf; + idxbuf = 0; + datBuf = ""; + fastSearch[j]->readText(start, &size, &idxbuf, datBuf); + + // we know that the data consists of sizof(long) + // records each a valid module position that constains + // this word + // + // iterate thru each of these module positions + long *keyindex = (long *)datBuf.getRawData(); + while (keyindex < (long *)(datBuf.getRawData() + size - (strlen(idxbuf) + 1))) { + if (i) { // if we're not on our first word + + // check to see if this word is already in the result set. + // This is our AND functionality + if (find(indexes.begin(), indexes.end(), *keyindex) != indexes.end()) + // add to new result set + indexes2.push_back(*keyindex); + } + else indexes2.push_back(*keyindex); + keyindex++; + } + } + else error = 1; // no more matches + free(idxbuf); + } + + // make new result set final result set + indexes = indexes2; + + percent((char)(20 + (float)((j*wordCount)+i)/(wordCount * 2) * 78), percentUserData); + } + + // indexes contains our good verses, lets return them in a listKey + indexes.sort(); + + // iterate thru each good module position that meets the search + for (longlist::iterator it = indexes.begin(); it != indexes.end(); it++) { + + // set a temporary verse key to this module position + vk.Testament(j+1); + vk.Error(); + vk.Index(*it); + + // check scope + // Try to set our scope key to this verse key + if (scope) { + *testKeyType = vk; + + // check to see if it set ok and if so, add to our return list + if (*testKeyType == vk) + listKey << (const char *) vk; + } + else listKey << (const char*) vk; + } + } + (*percent)(98, percentUserData); + + free(words); + free(wordBuf); + + *testKeyType = saveKey; // set current place back to original + + listKey = TOP; + (*percent)(100, percentUserData); + return listKey; + } + + default: + break; + } + } + + // check if we just want to see if search is supported + if (justCheckIfSupported) { + *justCheckIfSupported = false; + return listKey; + } + +#endif + // if we don't support this search, fall back to base class + return SWModule::search(istr, searchType, flags, scope, justCheckIfSupported, percent, percentUserData); +} + + +void RawText::setEntry(const char *inbuf, long len) { + VerseKey &key = getVerseKey(); + doSetText(key.Testament(), key.Index(), inbuf, len); +} + + +void RawText::linkEntry(const SWKey *inkey) { + VerseKey &destkey = getVerseKey(); + const VerseKey *srckey = 0; + + // see if we have a VerseKey * or decendant + SWTRY { + srckey = SWDYNAMIC_CAST(VerseKey, inkey); + } + SWCATCH ( ... ) {} + // if we don't have a VerseKey * decendant, create our own + if (!srckey) + srckey = new VerseKey(inkey); + + doLinkEntry(destkey.Testament(), destkey.Index(), srckey->Index()); + + if (inkey != srckey) // free our key if we created a VerseKey + delete srckey; +} + + +/****************************************************************************** + * RawText::deleteEntry - deletes this entry + * + * RET: *this + */ + +void RawText::deleteEntry() { + VerseKey &key = getVerseKey(); + doSetText(key.Testament(), key.Index(), ""); +} + +/****************************************************************************** + * RawText::increment - Increments module key a number of entries + * + * ENT: increment - Number of entries to jump forward + * + * RET: *this + */ + +void RawText::increment(int steps) { + long start; + unsigned short size; + VerseKey *tmpkey = &getVerseKey(); + + findOffset(tmpkey->Testament(), tmpkey->Index(), &start, &size); + + SWKey lastgood = *tmpkey; + while (steps) { + long laststart = start; + unsigned short lastsize = size; + SWKey lasttry = *tmpkey; + (steps > 0) ? (*key)++ : (*key)--; + tmpkey = &getVerseKey(); + + if ((error = key->Error())) { + *key = lastgood; + break; + } + long index = tmpkey->Index(); + findOffset(tmpkey->Testament(), index, &start, &size); + if ( + (((laststart != start) || (lastsize != size)) // we're a different entry +// && (start > 0) + && (size)) // and we actually have a size + ||(!skipConsecutiveLinks)) { // or we don't want to skip consecutive links + steps += (steps < 0) ? 1 : -1; + lastgood = *tmpkey; + } + } + error = (error) ? KEYERR_OUTOFBOUNDS : 0; +} + +SWORD_NAMESPACE_END diff --git a/src/modules/texts/rawtext4/Makefile.am b/src/modules/texts/rawtext4/Makefile.am new file mode 100644 index 0000000..1f3791d --- /dev/null +++ b/src/modules/texts/rawtext4/Makefile.am @@ -0,0 +1,2 @@ +rawtext4dir = $(top_srcdir)/src/modules/texts/rawtext4 +libsword_la_SOURCES += $(rawtext4dir)/rawtext4.cpp diff --git a/src/modules/texts/rawtext4/rawtext4.cpp b/src/modules/texts/rawtext4/rawtext4.cpp new file mode 100644 index 0000000..a06691e --- /dev/null +++ b/src/modules/texts/rawtext4/rawtext4.cpp @@ -0,0 +1,548 @@ +/****************************************************************************** + * rawtext4.cpp - code for class 'RawText4'- a module that reads raw text files: + * ot and nt using indexs ??.bks ??.cps ??.vss + */ + + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include // GNU +#include +#include +#include + +#ifndef USELUCENE +using std::map; +using std::list; +using std::find; +#endif + +SWORD_NAMESPACE_START + +#ifndef USELUCENE +typedef map < SWBuf, list > strlist; +typedef list longlist; +#endif + +/****************************************************************************** + * RawText4 Constructor - Initializes data for instance of RawText4 + * + * ENT: iname - Internal name for module + * idesc - Name to display to user for module + * idisp - Display object to use for displaying + */ + +RawText4::RawText4(const char *ipath, const char *iname, const char *idesc, SWDisplay *idisp, SWTextEncoding enc, SWTextDirection dir, SWTextMarkup mark, const char* ilang) + : SWText(iname, idesc, idisp, enc, dir, mark, ilang), + RawVerse4(ipath) { + +#ifndef USELUCENE + SWBuf fname; + fname = path; + char ch = fname.c_str()[strlen(fname.c_str())-1]; + if ((ch != '/') && (ch != '\\')) + fname += "/"; + + for (int loop = 0; loop < 2; loop++) { + fastSearch[loop] = 0; + SWBuf fastidxname =(fname + ((loop)?"ntwords.dat":"otwords.dat")); + if (FileMgr::existsFile(fastidxname.c_str())) { + fastidxname = (fname + ((loop)?"ntwords.idx":"otwords.idx")); + if (FileMgr::existsFile(fastidxname.c_str())) + fastSearch[loop] = new RawStr4((fname + ((loop)?"ntwords":"otwords")).c_str()); + } + } +#endif +} + + +/****************************************************************************** + * RawText4 Destructor - Cleans up instance of RawText4 + */ + +RawText4::~RawText4() { +#ifndef USELUCENE + if (fastSearch[0]) + delete fastSearch[0]; + + if (fastSearch[1]) + delete fastSearch[1]; +#endif +} + + +bool RawText4::isWritable() { + return ((idxfp[0]->getFd() > 0) && ((idxfp[0]->mode & FileMgr::RDWR) == FileMgr::RDWR)); +} + + +/****************************************************************************** + * RawText4::getRawEntry - Returns the correct verse when char * cast + * is requested + * + * RET: string buffer with verse + */ + +SWBuf &RawText4::getRawEntryBuf() { + long start = 0; + unsigned long size = 0; + VerseKey &key = getVerseKey(); + + findOffset(key.Testament(), key.Index(), &start, &size); + entrySize = size; // support getEntrySize call + + entryBuf = ""; + readText(key.Testament(), start, size, entryBuf); + + rawFilter(entryBuf, 0); // hack, decipher + rawFilter(entryBuf, &key); + +// if (!isUnicode()) + prepText(entryBuf); + + return entryBuf; +} + + +signed char RawText4::createSearchFramework(void (*percent)(char, void *), void *percentUserData) { +#ifndef USELUCENE + SWKey *savekey = 0; + SWKey *searchkey = 0; + SWKey textkey; + char *word = 0; + char *wordBuf = 0; + + // dictionary holds words associated with a list + // containing every module position that contains + // the word. [0] Old Testament; [1] NT + map < SWBuf, list > dictionary[2]; + + + // save key information so as not to disrupt original + // module position + if (!key->Persist()) { + savekey = CreateKey(); + *savekey = *key; + } + else savekey = key; + + searchkey = (key->Persist())?key->clone():0; + if (searchkey) { + searchkey->Persist(1); + setKey(*searchkey); + } + + // position module at the beginning + *this = TOP; + + VerseKey *lkey = (VerseKey *)key; + + // iterate thru each entry in module + while (!Error()) { + long index = lkey->Index(); + wordBuf = (char *)calloc(sizeof(char), strlen(StripText()) + 1); + strcpy(wordBuf, StripText()); + + // grab each word from the text + word = strtok(wordBuf, " !.,?;:()-=+/\\|{}[]\"<>"); + while (word) { + + // make word upper case + toupperstr(word); + + // lookup word in dictionary (or make entry in dictionary + // for this word) and add this module position (index) to + // the word's associated list of module positions + dictionary[lkey->Testament()-1][word].push_back(index); + word = strtok(NULL, " !.,?;:()-=+/\\|{}[]\"<>"); + } + free(wordBuf); + (*this)++; + } + + // reposition module back to where it was before we were called + setKey(*savekey); + + if (!savekey->Persist()) + delete savekey; + + if (searchkey) + delete searchkey; + + + // --------- Let's output an index from our dictionary ----------- + FileDesc *datfd; + FileDesc *idxfd; + strlist::iterator it; + longlist::iterator it2; + unsigned long offset, entryoff; + unsigned long size; + + SWBuf fname; + fname = path; + char ch = fname.c_str()[strlen(fname.c_str())-1]; + if ((ch != '/') && (ch != '\\')) + fname += "/"; + + // for old and new testament do... + for (int loop = 0; loop < 2; loop++) { + datfd = FileMgr::getSystemFileMgr()->open((fname + ((loop)?"ntwords.dat":"otwords.dat")).c_str(), FileMgr::CREAT|FileMgr::WRONLY, 00644); + if (datfd->getFd() == -1) + return -1; + idxfd = FileMgr::getSystemFileMgr()->open((fname + ((loop)?"ntwords.idx":"otwords.idx")).c_str(), FileMgr::CREAT|FileMgr::WRONLY, 00644); + if (idxfd->getFd() == -1) { + FileMgr::getSystemFileMgr()->close(datfd); + return -1; + } + + // iterate thru each word in the dictionary + for (it = dictionary[loop].begin(); it != dictionary[loop].end(); it++) { + printf("%s: ", it->first.c_str()); + + // get our current offset in our word.dat file and write this as the start + // of the next entry in our database + offset = datfd->seek(0, SEEK_CUR); + idxfd->write(&offset, 4); + + // write our word out to the word.dat file, delineating with a \n + datfd->write(it->first.c_str(), strlen(it->first.c_str())); + datfd->write("\n", 1); + + // force our mod position list for this word to be unique (remove + // duplicates that may exist if the word was found more than once + // in the verse + it->second.unique(); + + // iterate thru each mod position for this word and output it to + // our database + unsigned short count = 0; + for (it2 = it->second.begin(); it2 != it->second.end(); it2++) { + entryoff= *it2; + datfd->write(&entryoff, 4); + count++; + } + + // now see what our new position is in our word.dat file and + // determine the size of this database entry + size = datfd->seek(0, SEEK_CUR) - offset; + + // store the size of this database entry + idxfd->write(&size, 4); + printf("%d entries (size: %d)\n", count, size); + } + FileMgr::getSystemFileMgr()->close(datfd); + FileMgr::getSystemFileMgr()->close(idxfd); + } + return 0; +#else + return SWModule::createSearchFramework(percent, percentUserData); +#endif +} + + +void RawText4::deleteSearchFramework() { +#ifndef USELUCENE + SWBuf target = path; + char ch = target.c_str()[strlen(target.c_str())-1]; + if ((ch != '/') && (ch != '\\')) + target += "/lucene"; + FileMgr::removeFile(target + "ntwords.dat"); + FileMgr::removeFile(target + "otwords.dat"); + FileMgr::removeFile(target + "ntwords.idx"); + FileMgr::removeFile(target + "otwords.idx"); +#else + SWModule::deleteSearchFramework(); +#endif +} + + +/****************************************************************************** + * SWModule::search - Searches a module for a string + * + * ENT: istr - string for which to search + * searchType - type of search to perform + * >=0 - regex + * -1 - phrase + * -2 - multiword + * flags - options flags for search + * justCheckIfSupported - if set, don't search, only tell if this + * function supports requested search. + * + * RET: ListKey set to verses that contain istr + */ + +ListKey &RawText4::search(const char *istr, int searchType, int flags, SWKey *scope, bool *justCheckIfSupported, void (*percent)(char, void *), void *percentUserData) { +#ifndef USELUCENE + listKey.ClearList(); + + if ((fastSearch[0]) && (fastSearch[1])) { + + switch (searchType) { + case -2: { + + if ((flags & REG_ICASE) != REG_ICASE) // if haven't chosen to + // ignore case + break; // can't handle fast case sensitive searches + + // test to see if our scope for this search is bounded by a + // VerseKey + VerseKey *testKeyType = 0; + SWTRY { + testKeyType = SWDYNAMIC_CAST(VerseKey, ((scope)?scope:key)); + } + SWCATCH ( ... ) {} + // if we don't have a VerseKey * decendant we can't handle + // because of scope. + // In the future, add bool SWKey::isValid(const char *tryString); + if (!testKeyType) + break; + + + // check if we just want to see if search is supported. + // If we've gotten this far, then it is supported. + if (justCheckIfSupported) { + *justCheckIfSupported = true; + return listKey; + } + + SWKey saveKey = *testKeyType; // save current place + + char error = 0; + char **words = 0; + char *wordBuf = 0; + int wordCount = 0; + long start; + unsigned long size; + char *idxbuf = 0; + SWBuf datBuf; + list indexes; + list indexes2; + VerseKey vk; + vk = TOP; + + (*percent)(10, percentUserData); + + // toupper our copy of search string + stdstr(&wordBuf, istr); + toupperstr(wordBuf); + + // get list of individual words + words = (char **)calloc(sizeof(char *), 10); + int allocWords = 10; + words[wordCount] = strtok(wordBuf, " "); + while (words[wordCount]) { + wordCount++; + if (wordCount == allocWords) { + allocWords+=10; + words = (char **)realloc(words, sizeof(char *)*allocWords); + } + words[wordCount] = strtok(NULL, " "); + } + + (*percent)(20, percentUserData); + + // clear our result set + indexes.erase(indexes.begin(), indexes.end()); + + // search both old and new testament indexes + for (int j = 0; j < 2; j++) { + // iterate thru each word the user passed to us. + for (int i = 0; i < wordCount; i++) { + + // clear this word's result set + indexes2.erase(indexes2.begin(), indexes2.end()); + error = 0; + + // iterate thru every word in the database that starts + // with our search word + for (int away = 0; !error; away++) { + idxbuf = 0; + + // find our word in the database and jump ahead _away_ + error = fastSearch[j]->findOffset(words[i], &start, &size, away); + + // get the word from the database + fastSearch[j]->getIDXBufDat(start, &idxbuf); + + // check to see if it starts with our target word + if (strlen(idxbuf) > strlen(words[i])) + idxbuf[strlen(words[i])] = 0; +// else words[i][strlen(idxbuf)] = 0; + if (!strcmp(idxbuf, words[i])) { + + // get data for this word from database + delete [] idxbuf; + idxbuf = 0; + datBuf = ""; + fastSearch[j]->readText(start, &size, &idxbuf, datBuf); + + // we know that the data consists of sizof(long) + // records each a valid module position that constains + // this word + // + // iterate thru each of these module positions + long *keyindex = (long *)datBuf.getRawData(); + while (keyindex < (long *)(datBuf.getRawData() + size - (strlen(idxbuf) + 1))) { + if (i) { // if we're not on our first word + + // check to see if this word is already in the result set. + // This is our AND functionality + if (find(indexes.begin(), indexes.end(), *keyindex) != indexes.end()) + // add to new result set + indexes2.push_back(*keyindex); + } + else indexes2.push_back(*keyindex); + keyindex++; + } + } + else error = 1; // no more matches + free(idxbuf); + } + + // make new result set final result set + indexes = indexes2; + + percent((char)(20 + (float)((j*wordCount)+i)/(wordCount * 2) * 78), percentUserData); + } + + // indexes contains our good verses, lets return them in a listKey + indexes.sort(); + + // iterate thru each good module position that meets the search + for (longlist::iterator it = indexes.begin(); it != indexes.end(); it++) { + + // set a temporary verse key to this module position + vk.Testament(j+1); + vk.Error(); + vk.Index(*it); + + // check scope + // Try to set our scope key to this verse key + if (scope) { + *testKeyType = vk; + + // check to see if it set ok and if so, add to our return list + if (*testKeyType == vk) + listKey << (const char *) vk; + } + else listKey << (const char*) vk; + } + } + (*percent)(98, percentUserData); + + free(words); + free(wordBuf); + + *testKeyType = saveKey; // set current place back to original + + listKey = TOP; + (*percent)(100, percentUserData); + return listKey; + } + + default: + break; + } + } + + // check if we just want to see if search is supported + if (justCheckIfSupported) { + *justCheckIfSupported = false; + return listKey; + } + +#endif + // if we don't support this search, fall back to base class + return SWModule::search(istr, searchType, flags, scope, justCheckIfSupported, percent, percentUserData); +} + + +void RawText4::setEntry(const char *inbuf, long len) { + VerseKey &key = getVerseKey(); + doSetText(key.Testament(), key.Index(), inbuf, len); +} + + +void RawText4::linkEntry(const SWKey *inkey) { + VerseKey &destkey = getVerseKey(); + const VerseKey *srckey = 0; + + // see if we have a VerseKey * or decendant + SWTRY { + srckey = SWDYNAMIC_CAST(VerseKey, inkey); + } + SWCATCH ( ... ) {} + // if we don't have a VerseKey * decendant, create our own + if (!srckey) + srckey = new VerseKey(inkey); + + doLinkEntry(destkey.Testament(), destkey.Index(), srckey->Index()); + + if (inkey != srckey) // free our key if we created a VerseKey + delete srckey; +} + + +/****************************************************************************** + * RawText4::deleteEntry - deletes this entry + * + * RET: *this + */ + +void RawText4::deleteEntry() { + VerseKey &key = getVerseKey(); + doSetText(key.Testament(), key.Index(), ""); +} + +/****************************************************************************** + * RawText4::increment - Increments module key a number of entries + * + * ENT: increment - Number of entries to jump forward + * + * RET: *this + */ + +void RawText4::increment(int steps) { + long start; + unsigned long size; + VerseKey *tmpkey = &getVerseKey(); + + findOffset(tmpkey->Testament(), tmpkey->Index(), &start, &size); + + SWKey lastgood = *tmpkey; + while (steps) { + long laststart = start; + unsigned long lastsize = size; + SWKey lasttry = *tmpkey; + (steps > 0) ? (*key)++ : (*key)--; + tmpkey = &getVerseKey(); + + if ((error = key->Error())) { + *key = lastgood; + break; + } + long index = tmpkey->Index(); + findOffset(tmpkey->Testament(), index, &start, &size); + if ( + (((laststart != start) || (lastsize != size)) // we're a different entry +// && (start > 0) + && (size)) // and we actually have a size + ||(!skipConsecutiveLinks)) { // or we don't want to skip consecutive links + steps += (steps < 0) ? 1 : -1; + lastgood = *tmpkey; + } + } + error = (error) ? KEYERR_OUTOFBOUNDS : 0; +} + +SWORD_NAMESPACE_END diff --git a/src/modules/texts/swtext.cpp b/src/modules/texts/swtext.cpp new file mode 100644 index 0000000..d0ff386 --- /dev/null +++ b/src/modules/texts/swtext.cpp @@ -0,0 +1,113 @@ +/****************************************************************************** + * swtext.cpp - code for base class 'SWText'- The basis for all text modules + */ + +#include +#include +#include +#include + +SWORD_NAMESPACE_START + +/****************************************************************************** + * SWText Constructor - Initializes data for instance of SWText + * + * ENT: imodname - Internal name for module + * imoddesc - Name to display to user for module + * idisp - Display object to use for displaying + */ + +SWText::SWText(const char *imodname, const char *imoddesc, SWDisplay *idisp, SWTextEncoding enc, SWTextDirection dir, SWTextMarkup mark, const char* ilang): SWModule(imodname, imoddesc, idisp, (char *)"Biblical Texts", enc, dir, mark, ilang) { + tmpVK = new VerseKey(); + delete key; + key = CreateKey(); + skipConsecutiveLinks = false; +} + + +/****************************************************************************** + * SWText Destructor - Cleans up instance of SWText + */ + +SWText::~SWText() { + delete tmpVK; +} + + +/****************************************************************************** + * SWText CreateKey - Create the correct key (VerseKey) for use with SWText + */ + +SWKey *SWText::CreateKey() { + return new VerseKey(); +} + + +long SWText::Index() const { + VerseKey *key = 0; + SWTRY { + key = SWDYNAMIC_CAST(VerseKey, this->key); + } + SWCATCH ( ... ) {} + if (!key) + key = new VerseKey(this->key); + + entryIndex = key->NewIndex(); + + if (key != this->key) + delete key; + + return entryIndex; +} + +long SWText::Index(long iindex) { + VerseKey *key = 0; + SWTRY { + key = SWDYNAMIC_CAST(VerseKey, this->key); + } + SWCATCH ( ... ) {} + if (!key) + key = new VerseKey(this->key); + + key->Testament(1); + key->Index(iindex); + + if (key != this->key) { + this->key->copyFrom(*key); + delete key; + } + + return Index(); +} + + +VerseKey &SWText::getVerseKey() const { + VerseKey *key = NULL; + // see if we have a VerseKey * or decendant + SWTRY { + key = SWDYNAMIC_CAST(VerseKey, this->key); + } + SWCATCH ( ... ) { } + if (!key) { + ListKey *lkTest = 0; + SWTRY { + lkTest = SWDYNAMIC_CAST(ListKey, this->key); + } + SWCATCH ( ... ) { } + if (lkTest) { + SWTRY { + key = SWDYNAMIC_CAST(VerseKey, lkTest->GetElement()); + } + SWCATCH ( ... ) { } + } + } + if (!key) { + tmpVK->setLocale(LocaleMgr::getSystemLocaleMgr()->getDefaultLocaleName()); + (*tmpVK) = *(this->key); + return (*tmpVK); + } + else return *key; +} + + +SWORD_NAMESPACE_END diff --git a/src/modules/texts/ztext/Makefile b/src/modules/texts/ztext/Makefile new file mode 100644 index 0000000..35d6648 --- /dev/null +++ b/src/modules/texts/ztext/Makefile @@ -0,0 +1,5 @@ + +root := ../../../.. + +all: + make -C ${root} diff --git a/src/modules/texts/ztext/Makefile.am b/src/modules/texts/ztext/Makefile.am new file mode 100644 index 0000000..817107c --- /dev/null +++ b/src/modules/texts/ztext/Makefile.am @@ -0,0 +1,3 @@ +ztextdir = $(top_srcdir)/src/modules/texts/ztext + +libsword_la_SOURCES += $(ztextdir)/ztext.cpp diff --git a/src/modules/texts/ztext/ztext.cpp b/src/modules/texts/ztext/ztext.cpp new file mode 100644 index 0000000..b461d49 --- /dev/null +++ b/src/modules/texts/ztext/ztext.cpp @@ -0,0 +1,195 @@ +/****************************************************************************** + * ztext.cpp - code for class 'zText'- a module that reads compressed text + * files: ot and nt using indexs ??.vss + */ + + +#include +#include +#include +#include +#include +#include + +#include + +#include // GNU + + +SWORD_NAMESPACE_START + +/****************************************************************************** + * zText Constructor - Initializes data for instance of zText + * + * ENT: ipath - path to data files + * iname - Internal name for module + * idesc - Name to display to user for module + * iblockType - verse, chapter, book, etc. of index chunks + * icomp - Compressor object + * idisp - Display object to use for displaying + */ + +zText::zText(const char *ipath, const char *iname, const char *idesc, int iblockType, SWCompress *icomp, SWDisplay *idisp, SWTextEncoding enc, SWTextDirection dir, SWTextMarkup mark, const char* ilang) + : zVerse(ipath, FileMgr::RDWR, iblockType, icomp), SWText(iname, idesc, idisp, enc, dir, mark, ilang) { + blockType = iblockType; + lastWriteKey = 0; +} + + +/****************************************************************************** + * zText Destructor - Cleans up instance of zText + */ + +zText::~zText() +{ + flushCache(); + + if (lastWriteKey) + delete lastWriteKey; + +} + + +bool zText::isWritable() { return ((idxfp[0]->getFd() > 0) && ((idxfp[0]->mode & FileMgr::RDWR) == FileMgr::RDWR)); } + + +/****************************************************************************** + * zText::getRawEntry - Returns the current verse buffer + * + * RET: buffer with verse + */ + +SWBuf &zText::getRawEntryBuf() { + long start = 0; + unsigned short size = 0; + VerseKey &key = getVerseKey(); + + findOffset(key.Testament(), key.Index(), &start, &size); + entrySize = size; // support getEntrySize call + + entryBuf = ""; + zReadText(key.Testament(), start, size, entryBuf); + + rawFilter(entryBuf, &key); + +// if (!isUnicode()) + prepText(entryBuf); + + return entryBuf; +} + + +bool zText::sameBlock(VerseKey *k1, VerseKey *k2) { + if (k1->Testament() != k2->Testament()) + return false; + + switch (blockType) { + case VERSEBLOCKS: + if (k1->Verse() != k2->Verse()) + return false; + case CHAPTERBLOCKS: + if (k1->Chapter() != k2->Chapter()) + return false; + case BOOKBLOCKS: + if (k1->Book() != k2->Book()) + return false; + } + return true; +} + + +void zText::setEntry(const char *inbuf, long len) { + VerseKey &key = getVerseKey(); + + // see if we've jumped across blocks since last write + if (lastWriteKey) { + if (!sameBlock(lastWriteKey, &key)) { + flushCache(); + } + delete lastWriteKey; + } + + doSetText(key.Testament(), key.Index(), inbuf, len); + + lastWriteKey = (VerseKey *)key.clone(); // must delete +} + + +void zText::linkEntry(const SWKey *inkey) { + VerseKey &destkey = getVerseKey(); + const VerseKey *srckey = 0; + + // see if we have a VerseKey * or decendant + SWTRY { + srckey = (const VerseKey *) SWDYNAMIC_CAST(VerseKey, inkey); + } + SWCATCH ( ... ) { + } + // if we don't have a VerseKey * decendant, create our own + if (!srckey) + srckey = new VerseKey(inkey); + + doLinkEntry(destkey.Testament(), destkey.Index(), srckey->Index()); + + if (inkey != srckey) // free our key if we created a VerseKey + delete srckey; +} + + +/****************************************************************************** + * zFiles::deleteEntry - deletes this entry + * + */ + +void zText::deleteEntry() { + + VerseKey &key = getVerseKey(); + + doSetText(key.Testament(), key.Index(), ""); +} + + +/****************************************************************************** + * zText::increment - Increments module key a number of entries + * + * ENT: increment - Number of entries to jump forward + * + */ + +void zText::increment(int steps) { + long start; + unsigned short size; + VerseKey *tmpkey = &getVerseKey(); + + findOffset(tmpkey->Testament(), tmpkey->Index(), &start, &size); + + SWKey lastgood = *tmpkey; + while (steps) { + long laststart = start; + unsigned short lastsize = size; + SWKey lasttry = *tmpkey; + (steps > 0) ? (*key)++ : (*key)--; + tmpkey = &getVerseKey(); + + if ((error = key->Error())) { + *key = lastgood; + break; + } + long index = tmpkey->Index(); + findOffset(tmpkey->Testament(), index, &start, &size); + + if ( + (((laststart != start) || (lastsize != size)) // we're a different entry +// && (start > 0) + && (size)) // and we actually have a size + ||(!skipConsecutiveLinks)) { // or we don't want to skip consecutive links + steps += (steps < 0) ? 1 : -1; + lastgood = *tmpkey; + } + } + error = (error) ? KEYERR_OUTOFBOUNDS : 0; +} + + + +SWORD_NAMESPACE_END -- cgit v1.2.3