diff options
author | Roberto C. Sanchez <roberto@connexer.com> | 2014-03-29 10:53:49 -0400 |
---|---|---|
committer | Roberto C. Sanchez <roberto@connexer.com> | 2014-03-29 10:53:49 -0400 |
commit | 8c8aa6b07e595cfac56838b5964ab3e96051f1b2 (patch) | |
tree | da38e2c1979148dbd3b0c7b87f930746f5ba7f44 /src/modules/texts | |
parent | 8d3fc864d094eeadc721f8e93436b37a5fab173e (diff) |
Imported Upstream version 1.5.7
Diffstat (limited to 'src/modules/texts')
-rw-r--r-- | src/modules/texts/Makefile.am | 1 | ||||
-rw-r--r-- | src/modules/texts/rawgbf/rawgbf.cpp | 5 | ||||
-rw-r--r-- | src/modules/texts/rawtext/rawtext.cpp | 470 | ||||
-rw-r--r-- | src/modules/texts/swtext.cpp | 42 | ||||
-rw-r--r-- | src/modules/texts/ztext/ztext.cpp | 483 |
5 files changed, 622 insertions, 379 deletions
diff --git a/src/modules/texts/Makefile.am b/src/modules/texts/Makefile.am index b48d93e..2c4479e 100644 --- a/src/modules/texts/Makefile.am +++ b/src/modules/texts/Makefile.am @@ -4,4 +4,3 @@ libsword_la_SOURCES += $(textsdir)/swtext.cpp include ../src/modules/texts/rawtext/Makefile.am include ../src/modules/texts/ztext/Makefile.am -include ../src/modules/texts/rawgbf/Makefile.am diff --git a/src/modules/texts/rawgbf/rawgbf.cpp b/src/modules/texts/rawgbf/rawgbf.cpp index 0866585..6b8516f 100644 --- a/src/modules/texts/rawgbf/rawgbf.cpp +++ b/src/modules/texts/rawgbf/rawgbf.cpp @@ -19,6 +19,7 @@ #include <rawverse.h> #include <rawgbf.h> +SWORD_NAMESPACE_START /****************************************************************************** * RawGBF Constructor - Initializes data for instance of RawGBF @@ -73,7 +74,7 @@ RawGBF::operator char*() delete [] entrybuf; entrybuf = new char [ size * 3 ]; // extra for conversion to RTF or other. - gettext(key->Testament(), start, size + 1, entrybuf); + readtext(key->Testament(), start, size + 1, entrybuf); preptext(entrybuf); RenderText(entrybuf, size * 3); @@ -82,3 +83,5 @@ RawGBF::operator char*() return entrybuf; } + +SWORD_NAMESPACE_END diff --git a/src/modules/texts/rawtext/rawtext.cpp b/src/modules/texts/rawtext/rawtext.cpp index acc1cfd..1e1048d 100644 --- a/src/modules/texts/rawtext/rawtext.cpp +++ b/src/modules/texts/rawtext/rawtext.cpp @@ -13,20 +13,35 @@ #include <unistd.h> #endif -#include <string.h> #include <utilfuns.h> #include <rawverse.h> #include <rawtext.h> +#include <regex.h> // GNU +#ifdef USELUCENE +#include <CLucene/CLucene.h> +using namespace lucene::search; +using namespace lucene::queryParser; +#else #include <map> #include <list> #include <algorithm> -#include <regex.h> // GNU + +using std::map; +using std::list; +using std::find; + +#endif #ifndef O_BINARY #define O_BINARY 0 #endif +SWORD_NAMESPACE_START + +typedef map < SWBuf, list<long> > strlist; +typedef list<long> longlist; + /****************************************************************************** * RawText Constructor - Initializes data for instance of RawText * @@ -39,7 +54,20 @@ RawText::RawText(const char *ipath, const char *iname, const char *idesc, SWDisp : SWText(iname, idesc, idisp, enc, dir, mark, ilang), RawVerse(ipath) { - string fname; +#ifdef USELUCENE + SWBuf fname; + fname = path; + ir = 0; + is = 0; + char ch = fname.c_str()[strlen(fname.c_str())-1]; + if ((ch != '/') && (ch != '\\')) + fname += "/lucene"; + if (IndexReader::indexExists(fname.c_str())) { + ir = &IndexReader::open(fname); + is = new IndexSearcher(*ir); + } +#else + SWBuf fname; fname = path; char ch = fname.c_str()[strlen(fname.c_str())-1]; if ((ch != '/') && (ch != '\\')) @@ -47,13 +75,14 @@ RawText::RawText(const char *ipath, const char *iname, const char *idesc, SWDisp for (int loop = 0; loop < 2; loop++) { fastSearch[loop] = 0; - string fastidxname =(fname + ((loop)?"ntwords.dat":"otwords.dat")); + SWBuf fastidxname =(fname + ((loop)?"ntwords.dat":"otwords.dat")); if (!access(fastidxname.c_str(), 04)) { fastidxname = (fname + ((loop)?"ntwords.idx":"otwords.idx")); if (!access(fastidxname.c_str(), 04)) fastSearch[loop] = new RawStr((fname + ((loop)?"ntwords":"otwords")).c_str()); } } +#endif } @@ -61,68 +90,159 @@ RawText::RawText(const char *ipath, const char *iname, const char *idesc, SWDisp * RawText Destructor - Cleans up instance of RawText */ -RawText::~RawText() -{ +RawText::~RawText() { +#ifdef USELUCENE + if (is) + is->close(); + + if (ir) + delete ir; +#else if (fastSearch[0]) delete fastSearch[0]; if (fastSearch[1]) delete fastSearch[1]; +#endif +} + + +VerseKey &RawText::getVerseKey() { + static VerseKey tmpVK; + VerseKey *key; + // see if we have a VerseKey * or decendant + try { + key = SWDYNAMIC_CAST(VerseKey, this->key); + } + catch ( ... ) { } + if (!key) { + ListKey *lkTest = 0; + try { + lkTest = SWDYNAMIC_CAST(ListKey, this->key); + } + catch ( ... ) { } + if (lkTest) { + try { + key = SWDYNAMIC_CAST(VerseKey, lkTest->GetElement()); + } + catch ( ... ) { } + } + } + if (!key) { + tmpVK = *(this->key); + return tmpVK; + } + else return *key; } /****************************************************************************** - * RawText::operator char * - Returns the correct verse when char * cast + * RawText::getRawEntry - Returns the correct verse when char * cast * is requested * * RET: string buffer with verse */ -char *RawText::getRawEntry() { +SWBuf &RawText::getRawEntryBuf() { long start = 0; unsigned short size = 0; - VerseKey *key = 0; + VerseKey &key = getVerseKey(); - // see if we have a VerseKey * or decendant -#ifndef _WIN32_WCE - try { -#endif - key = SWDYNAMIC_CAST(VerseKey, this->key); -#ifndef _WIN32_WCE + findOffset(key.Testament(), key.Index(), &start, &size); + entrySize = size; // support getEntrySize call + + entryBuf = ""; + readText(key.Testament(), start, size, entryBuf); + + rawFilter(entryBuf, 0); // hack, decipher + rawFilter(entryBuf, &key); + +// if (!isUnicode()) + prepText(entryBuf); + + return entryBuf; +} + + +signed char RawText::createSearchFramework() { +#ifdef USELUCENE + SWKey *savekey = 0; + SWKey *searchkey = 0; + SWKey textkey; + char *word = 0; + char *wordBuf = 0; + + + // save key information so as not to disrupt original + // module position + if (!key->Persist()) { + savekey = CreateKey(); + *savekey = *key; } - catch ( ... ) { } -#endif - // if we don't have a VerseKey * decendant, create our own - if (!key) - key = new VerseKey(this->key); + else savekey = key; - findoffset(key->Testament(), key->Index(), &start, &size); - entrySize = size; // support getEntrySize call + searchkey = (key->Persist())?key->clone():0; + if (searchkey) { + searchkey->Persist(1); + setKey(*searchkey); + } + + // position module at the beginning + *this = TOP; + + VerseKey *lkey = (VerseKey *)key; + + // iterate thru each entry in module + + IndexWriter* writer = NULL; + Directory* d = NULL; + + lucene::analysis::SimpleAnalyzer& an = *new lucene::analysis::SimpleAnalyzer(); + SWBuf target = path; + char ch = target.c_str()[strlen(target.c_str())-1]; + if ((ch != '/') && (ch != '\\')) + target += "/lucene"; - unsigned long newsize = (size + 2) * FILTERPAD; - if (newsize > entrybufallocsize) { - if (entrybuf) - delete [] entrybuf; - entrybuf = new char [ newsize ]; - entrybufallocsize = newsize; + if (IndexReader::indexExists(target.c_str())) { + d = &FSDirectory::getDirectory(target.c_str(), false); + if (IndexReader::isLocked(*d)) { + IndexReader::unlock(*d); + } + + writer = new IndexWriter(*d, an, false); + } else { + d = &FSDirectory::getDirectory(target.c_str(), true); + writer = new IndexWriter( *d ,an, true); } - *entrybuf = 0; - gettext(key->Testament(), start, (size + 2), entrybuf); - rawFilter(entrybuf, size, key); + + while (!Error()) { + Document &doc = *new Document(); + doc.add( Field::Text(_T("key"), (const char *)*lkey ) ); + doc.add( Field::Text(_T("content"), StripText()) ); + writer->addDocument(doc); + delete &doc; - if (!isUnicode()) - preptext(entrybuf); + (*this)++; + } - if (this->key != key) // free our key if we created a VerseKey - delete key; + writer->optimize(); + writer->close(); + delete writer; + delete &an; - return entrybuf; -} + // reposition module back to where it was before we were called + setKey(*savekey); + if (!savekey->Persist()) + delete savekey; -signed char RawText::createSearchFramework() { + if (searchkey) + delete searchkey; + + +#else SWKey *savekey = 0; SWKey *searchkey = 0; SWKey textkey; @@ -132,7 +252,7 @@ signed char RawText::createSearchFramework() { // dictionary holds words associated with a list // containing every module position that contains // the word. [0] Old Testament; [1] NT - map < string, list<long> > dictionary[2]; + map < SWBuf, list<long> > dictionary[2]; // save key information so as not to disrupt original @@ -146,7 +266,7 @@ signed char RawText::createSearchFramework() { searchkey = (key->Persist())?key->clone():0; if (searchkey) { searchkey->Persist(1); - SetKey(*searchkey); + setKey(*searchkey); } // position module at the beginning @@ -164,9 +284,8 @@ signed char RawText::createSearchFramework() { word = strtok(wordBuf, " !.,?;:()-=+/\\|{}[]\"<>"); while (word) { - // make work upper case - for (unsigned int i = 0; i < strlen(word); i++) - word[i] = SW_toupper(word[i]); + // make word upper case + toupperstr(word); // lookup word in dictionary (or make entry in dictionary // for this word) and add this module position (index) to @@ -179,7 +298,7 @@ signed char RawText::createSearchFramework() { } // reposition module back to where it was before we were called - SetKey(*savekey); + setKey(*savekey); if (!savekey->Persist()) delete savekey; @@ -191,12 +310,12 @@ signed char RawText::createSearchFramework() { // --------- Let's output an index from our dictionary ----------- int datfd; int idxfd; - map < string, list<long> >::iterator it; - list<long>::iterator it2; + strlist::iterator it; + longlist::iterator it2; unsigned long offset, entryoff; unsigned short size; - string fname; + SWBuf fname; fname = path; char ch = fname.c_str()[strlen(fname.c_str())-1]; if ((ch != '/') && (ch != '\\')) @@ -249,12 +368,13 @@ signed char RawText::createSearchFramework() { close(datfd); close(idxfd); } +#endif return 0; } /****************************************************************************** - * SWModule::Search - Searches a module for a string + * SWModule::search - Searches a module for a string * * ENT: istr - string for which to search * searchType - type of search to perform @@ -268,8 +388,89 @@ signed char RawText::createSearchFramework() { * RET: listkey set to verses that contain istr */ -ListKey &RawText::Search(const char *istr, int searchType, int flags, SWKey *scope, bool *justCheckIfSupported, void (*percent)(char, void *), void *percentUserData) -{ +ListKey &RawText::search(const char *istr, int searchType, int flags, SWKey *scope, bool *justCheckIfSupported, void (*percent)(char, void *), void *percentUserData) { +#ifdef USELUCENE + listkey.ClearList(); + + if ((is) && (ir)) { + + switch (searchType) { + case -2: { // let lucene replace multiword for now + + + // test to see if our scope for this search is bounded by a + // VerseKey + VerseKey *testKeyType = 0, vk; + try { + testKeyType = SWDYNAMIC_CAST(VerseKey, ((scope)?scope:key)); + } + catch ( ... ) {} + // if we don't have a VerseKey * decendant we can't handle + // because of scope. + // In the future, add bool SWKey::isValid(const char *tryString); + if (!testKeyType) + break; + + + // check if we just want to see if search is supported. + // If we've gotten this far, then it is supported. + if (justCheckIfSupported) { + *justCheckIfSupported = true; + return listkey; + } + + (*percent)(10, percentUserData); + + standard::StandardAnalyzer analyzer; + Query &q = QueryParser::Parse(istr, _T("content"), analyzer); + (*percent)(20, percentUserData); + Hits &h = is->search(q); + (*percent)(80, percentUserData); + + + // iterate thru each good module position that meets the search + for (long i = 0; i < h.Length(); i++) { + Document &doc = h.doc(i); + + // set a temporary verse key to this module position + vk = doc.get(_T("key")); + + // check scope + // Try to set our scope key to this verse key + if (scope) { + *testKeyType = vk; + + // check to see if it set ok and if so, add to our return list + if (*testKeyType == vk) + listkey << (const char *) vk; + listkey.GetElement()->userData = (void *)(int)(h.score(i)*100); + } + else { + listkey << (const char*) vk; + listkey.GetElement()->userData = (void *)(int)(h.score(i)*100); + } + } + (*percent)(98, percentUserData); + + delete &h; + delete &q; + + listkey = TOP; + (*percent)(100, percentUserData); + return listkey; + } + + default: + break; + } + } + + // check if we just want to see if search is supported + if (justCheckIfSupported) { + *justCheckIfSupported = false; + return listkey; + } +#else listkey.ClearList(); if ((fastSearch[0]) && (fastSearch[1])) { @@ -284,14 +485,10 @@ ListKey &RawText::Search(const char *istr, int searchType, int flags, SWKey *sco // test to see if our scope for this search is bounded by a // VerseKey VerseKey *testKeyType = 0; -#ifndef _WIN32_WCE try { -#endif testKeyType = SWDYNAMIC_CAST(VerseKey, ((scope)?scope:key)); -#ifndef _WIN32_WCE } catch ( ... ) {} -#endif // if we don't have a VerseKey * decendant we can't handle // because of scope. // In the future, add bool SWKey::isValid(const char *tryString); @@ -315,7 +512,7 @@ ListKey &RawText::Search(const char *istr, int searchType, int flags, SWKey *sco long start; unsigned short size; char *idxbuf = 0; - char *datbuf = 0; + SWBuf datBuf; list <long> indexes; list <long> indexes2; VerseKey vk; @@ -325,8 +522,7 @@ ListKey &RawText::Search(const char *istr, int searchType, int flags, SWKey *sco // toupper our copy of search string stdstr(&wordBuf, istr); - for (unsigned int i = 0; i < strlen(wordBuf); i++) - wordBuf[i] = SW_toupper(wordBuf[i]); + toupperstr(wordBuf); // get list of individual words words = (char **)calloc(sizeof(char *), 10); @@ -361,10 +557,10 @@ ListKey &RawText::Search(const char *istr, int searchType, int flags, SWKey *sco idxbuf = 0; // find our word in the database and jump ahead _away_ - error = fastSearch[j]->findoffset(words[i], &start, &size, away); + error = fastSearch[j]->findOffset(words[i], &start, &size, away); // get the word from the database - fastSearch[j]->getidxbufdat(start, &idxbuf); + fastSearch[j]->getIDXBufDat(start, &idxbuf); // check to see if it starts with our target word if (strlen(idxbuf) > strlen(words[i])) @@ -373,18 +569,18 @@ ListKey &RawText::Search(const char *istr, int searchType, int flags, SWKey *sco if (!strcmp(idxbuf, words[i])) { // get data for this word from database - free(idxbuf); - idxbuf = (char *)calloc(size+2, 1); - datbuf = (char *)calloc(size+2, 1); - fastSearch[j]->gettext(start, size + 2, idxbuf, datbuf); + delete [] idxbuf; + idxbuf = 0; + datBuf = ""; + fastSearch[j]->readText(start, &size, &idxbuf, datBuf); // we know that the data consists of sizof(long) // records each a valid module position that constains // this word // // iterate thru each of these module positions - long *keyindex = (long *)datbuf; - while (keyindex < (long *)(datbuf + size - (strlen(idxbuf) + 1))) { + long *keyindex = (long *)datBuf.getRawData(); + while (keyindex < (long *)(datBuf.getRawData() + size - (strlen(idxbuf) + 1))) { if (i) { // if we're not on our first word // check to see if this word is already in the result set. @@ -396,7 +592,6 @@ ListKey &RawText::Search(const char *istr, int searchType, int flags, SWKey *sco else indexes2.push_back(*keyindex); keyindex++; } - free(datbuf); } else error = 1; // no more matches free(idxbuf); @@ -412,7 +607,7 @@ ListKey &RawText::Search(const char *istr, int searchType, int flags, SWKey *sco indexes.sort(); // iterate thru each good module position that meets the search - for (list <long>::iterator it = indexes.begin(); it != indexes.end(); it++) { + for (longlist::iterator it = indexes.begin(); it != indexes.end(); it++) { // set a temporary verse key to this module position vk.Testament(j+1); @@ -454,85 +649,35 @@ ListKey &RawText::Search(const char *istr, int searchType, int flags, SWKey *sco return listkey; } - // if we don't support this search, fall back to base class - return SWModule::Search(istr, searchType, flags, scope, justCheckIfSupported, percent, percentUserData); -} - -#ifdef _MSC_VER -SWModule &RawText::operator =(SW_POSITION p) { -#else -RawText &RawText::operator =(SW_POSITION p) { #endif - SWModule::operator =(p); - return *this; + // if we don't support this search, fall back to base class + return SWModule::search(istr, searchType, flags, scope, justCheckIfSupported, percent, percentUserData); } -SWModule &RawText::setentry(const char *inbuf, long len) { - VerseKey *key = 0; - // see if we have a VerseKey * or decendant -#ifndef _WIN32_WCE - try { -#endif - key = SWDYNAMIC_CAST(VerseKey, this->key); -#ifndef _WIN32_WCE - } - catch ( ... ) {} -#endif - // if we don't have a VerseKey * decendant, create our own - if (!key) - key = new VerseKey(this->key); - - settext(key->Testament(), key->Index(), inbuf, len); - - if (this->key != key) // free our key if we created a VerseKey - delete key; - return *this; -} - -SWModule &RawText::operator <<(const char *inbuf) { - return setentry(inbuf, 0); +void RawText::setEntry(const char *inbuf, long len) { + VerseKey &key = getVerseKey(); + doSetText(key.Testament(), key.Index(), inbuf, len); } -SWModule &RawText::operator <<(const SWKey *inkey) { - VerseKey *destkey = 0; +void RawText::linkEntry(const SWKey *inkey) { + VerseKey &destkey = getVerseKey(); const VerseKey *srckey = 0; - // see if we have a VerseKey * or decendant -#ifndef _WIN32_WCE - try { -#endif - destkey = SWDYNAMIC_CAST(VerseKey, this->key); -#ifndef _WIN32_WCE - } - catch ( ... ) {} -#endif - // if we don't have a VerseKey * decendant, create our own - if (!destkey) - destkey = new VerseKey(this->key); // see if we have a VerseKey * or decendant -#ifndef _WIN32_WCE try { -#endif srckey = SWDYNAMIC_CAST(VerseKey, inkey); -#ifndef _WIN32_WCE } catch ( ... ) {} -#endif // if we don't have a VerseKey * decendant, create our own if (!srckey) srckey = new VerseKey(inkey); - linkentry(destkey->Testament(), destkey->Index(), srckey->Index()); - - if (this->key != destkey) // free our key if we created a VerseKey - delete destkey; + doLinkEntry(destkey.Testament(), destkey.Index(), srckey->Index()); if (inkey != srckey) // free our key if we created a VerseKey delete srckey; - - return *this; } @@ -543,88 +688,49 @@ SWModule &RawText::operator <<(const SWKey *inkey) { */ void RawText::deleteEntry() { - - VerseKey *key = 0; - -#ifndef _WIN32_WCE - try { -#endif - key = SWDYNAMIC_CAST(VerseKey, this->key); -#ifndef _WIN32_WCE - } - catch ( ... ) {} -#endif - if (!key) - key = new VerseKey(this->key); - - settext(key->Testament(), key->Index(), ""); - - if (key != this->key) - delete key; + VerseKey &key = getVerseKey(); + doSetText(key.Testament(), key.Index(), ""); } /****************************************************************************** - * RawText::operator += - Increments module key a number of entries + * RawText::increment - Increments module key a number of entries * * ENT: increment - Number of entries to jump forward * * RET: *this */ -SWModule &RawText::operator +=(int increment) -{ +void RawText::increment(int steps) { long start; unsigned short size; - VerseKey *tmpkey = 0; - -#ifndef _WIN32_WCE - try { -#endif - tmpkey = SWDYNAMIC_CAST(VerseKey, key); -#ifndef _WIN32_WCE - } - catch ( ... ) {} -#endif - if (!tmpkey) - tmpkey = new VerseKey(key); + VerseKey *tmpkey = &getVerseKey(); - findoffset(tmpkey->Testament(), tmpkey->Index(), &start, &size); + findOffset(tmpkey->Testament(), tmpkey->Index(), &start, &size); SWKey lastgood = *tmpkey; - while (increment) { + while (steps) { long laststart = start; unsigned short lastsize = size; SWKey lasttry = *tmpkey; - (increment > 0) ? (*key)++ : (*key)--; - if (tmpkey != key) - delete tmpkey; - tmpkey = 0; -#ifndef _WIN32_WCE - try { -#endif - tmpkey = SWDYNAMIC_CAST(VerseKey, key); -#ifndef _WIN32_WCE - } - catch ( ... ) {} -#endif - if (!tmpkey) - tmpkey = new VerseKey(key); + (steps > 0) ? (*key)++ : (*key)--; + tmpkey = &getVerseKey(); if ((error = key->Error())) { *key = lastgood; break; } long index = tmpkey->Index(); - findoffset(tmpkey->Testament(), index, &start, &size); - if ((((laststart != start) || (lastsize != size))||(!skipConsecutiveLinks)) && (start >= 0) && (size)) { - increment += (increment < 0) ? 1 : -1; + findOffset(tmpkey->Testament(), index, &start, &size); + if ( + (((laststart != start) || (lastsize != size)) // we're a different entry +// && (start > 0) + && (size)) // and we actually have a size + ||(!skipConsecutiveLinks)) { // or we don't want to skip consecutive links + steps += (steps < 0) ? 1 : -1; lastgood = *tmpkey; } } error = (error) ? KEYERR_OUTOFBOUNDS : 0; - - if (tmpkey != key) - delete tmpkey; - - return *this; } + +SWORD_NAMESPACE_END diff --git a/src/modules/texts/swtext.cpp b/src/modules/texts/swtext.cpp index 85da8a3..8610dae 100644 --- a/src/modules/texts/swtext.cpp +++ b/src/modules/texts/swtext.cpp @@ -5,6 +5,7 @@ #include <swtext.h> #include <listkey.h> +SWORD_NAMESPACE_START /****************************************************************************** * SWText Constructor - Initializes data for instance of SWText @@ -18,6 +19,7 @@ SWText::SWText(const char *imodname, const char *imoddesc, SWDisplay *idisp, SWT { delete key; key = CreateKey(); + skipConsecutiveLinks = false; } @@ -37,3 +39,43 @@ SWKey *SWText::CreateKey() { return new VerseKey(); } + + +long SWText::Index() const { + VerseKey *key = 0; + try { + key = SWDYNAMIC_CAST(VerseKey, this->key); + } + catch ( ... ) {} + if (!key) + key = new VerseKey(this->key); + + entryIndex = key->NewIndex(); + + if (key != this->key) + delete key; + + return entryIndex; +} + +long SWText::Index(long iindex) { + VerseKey *key = 0; + try { + key = SWDYNAMIC_CAST(VerseKey, this->key); + } + catch ( ... ) {} + if (!key) + key = new VerseKey(this->key); + + key->Testament(1); + key->Index(iindex); + + if (key != this->key) { + this->key->copyFrom(*key); + delete key; + } + + return Index(); +} + +SWORD_NAMESPACE_END diff --git a/src/modules/texts/ztext/ztext.cpp b/src/modules/texts/ztext/ztext.cpp index 6e243b9..1fe0e7a 100644 --- a/src/modules/texts/ztext/ztext.cpp +++ b/src/modules/texts/ztext/ztext.cpp @@ -14,13 +14,19 @@ #include <unistd.h> #endif -#include <iostream.h> -#include <string.h> #include <utilfuns.h> -//#include <rawverse.h> #include <ztext.h> -//#include <zlib.h> +#include <regex.h> // GNU + + +#ifdef USELUCENE +#include <CLucene/CLucene.h> +using namespace lucene::search; +using namespace lucene::queryParser; +#endif + +SWORD_NAMESPACE_START /****************************************************************************** * zText Constructor - Initializes data for instance of zText @@ -33,10 +39,23 @@ * idisp - Display object to use for displaying */ -zText::zText(const char *ipath, const char *iname, const char *idesc, int iblockType, SWCompress *icomp, SWDisplay *idisp, SWTextEncoding enc, SWTextDirection dir, SWTextMarkup mark, const char* ilang) : zVerse(ipath, -1, iblockType, icomp), SWText(iname, idesc, idisp, enc, dir, mark, ilang)/*, SWCompress()*/ -{ +zText::zText(const char *ipath, const char *iname, const char *idesc, int iblockType, SWCompress *icomp, SWDisplay *idisp, SWTextEncoding enc, SWTextDirection dir, SWTextMarkup mark, const char* ilang) + : zVerse(ipath, -1, iblockType, icomp), SWText(iname, idesc, idisp, enc, dir, mark, ilang) { blockType = iblockType; lastWriteKey = 0; +#ifdef USELUCENE + SWBuf fname; + fname = path; + ir = 0; + is = 0; + char ch = fname.c_str()[strlen(fname.c_str())-1]; + if ((ch != '/') && (ch != '\\')) + fname += "/lucene"; + if (IndexReader::indexExists(fname.c_str())) { + ir = &IndexReader::open(fname); + is = new IndexSearcher(*ir); + } +#endif } @@ -50,6 +69,14 @@ zText::~zText() if (lastWriteKey) delete lastWriteKey; + +#ifdef USELUCENE + if (is) + is->close(); + + if (ir) + delete ir; +#endif } @@ -59,96 +86,23 @@ zText::~zText() * RET: buffer with verse */ -char *zText::getRawEntry() -{ -/* - long start; - unsigned long size; - unsigned long destsize; - char *tmpbuf; - char *dest; - VerseKey *lkey = (VerseKey *) SWModule::key; - char sizebuf[3]; - - lkey->Verse(0); - if (chapcache != lkey->Index()) { - findoffset(lkey->Testament(), lkey->Index(), &start, &((unsigned short) size)); - gettext(lkey->Testament(), start, 3, sizebuf); - memcpy(&size, sizebuf, 2); - tmpbuf = new char [ size + 1 ]; - gettext(lkey->Testament(), start + 2, size + 1 , tmpbuf); - //zBuf(&size, tmpbuf); - dest = new char [ (size*4) + 1 ]; - uncompress((Bytef *)dest, &destsize, (Bytef *) tmpbuf, size); - chapcache = lkey->Index(); - delete [] tmpbuf; - } - - //findoffset(key->Testament(), key->Index(), &start, &size); - findoffset(lkey->Testament(), lkey->Index(), &start, &((unsigned short) size)); - - if (versebuf) - delete [] versebuf; - versebuf = new char [ size + 1 ]; - //memcpy(versebuf, Buf(), size); - memcpy(versebuf, dest, destsize); - delete [] dest; - - preptext(versebuf); - - return versebuf; -*/ - +SWBuf &zText::getRawEntryBuf() { long start = 0; unsigned short size = 0; - VerseKey *key = 0; - - //printf ("zText char *\n"); - - // see if we have a VerseKey * or decendant -#ifndef _WIN32_WCE - try { -#endif - key = SWDYNAMIC_CAST(VerseKey, this->key); -#ifndef _WIN32_WCE - } - catch ( ... ) {} -#endif - // if we don't have a VerseKey * decendant, create our own - if (!key) - key = new VerseKey(this->key); + VerseKey &key = getVerseKey(); - //printf ("checking cache\n"); - //printf ("finding offset\n"); - findoffset(key->Testament(), key->Index(), &start, &size); + findOffset(key.Testament(), key.Index(), &start, &size); entrySize = size; // support getEntrySize call + + entryBuf = ""; + zReadText(key.Testament(), start, size, entryBuf); - //printf ("deleting previous buffer\n"); - unsigned long newsize = (size + 2) * FILTERPAD; - if (newsize > entrybufallocsize) { - if (entrybuf) - delete [] entrybuf; - entrybuf = new char [ newsize ]; - entrybufallocsize = newsize; - } - *entrybuf = 0; - - //printf ("getting text\n"); - swgettext(key->Testament(), start, (size + 2), entrybuf); - //printf ("got text\n"); - - rawFilter(entrybuf, size, key); - - //printf ("preparing text\n"); - if (!isUnicode()) - preptext(entrybuf); - - if (this->key != key) // free our key if we created a VerseKey - delete key; + rawFilter(entryBuf, &key); - //printf ("returning text\n"); - return entrybuf; +// if (!isUnicode()) + prepText(entryBuf); + return entryBuf; } @@ -171,177 +125,316 @@ bool zText::sameBlock(VerseKey *k1, VerseKey *k2) { } -SWModule &zText::setentry(const char *inbuf, long len) { - VerseKey *key = 0; - // see if we have a VerseKey * or decendant -#ifndef _WIN32_WCE - try { -#endif - key = SWDYNAMIC_CAST(VerseKey, this->key); -#ifndef _WIN32_WCE - } - catch ( ... ) {} -#endif - // if we don't have a VerseKey * decendant, create our own - if (!key) - key = new VerseKey(this->key); - +void zText::setEntry(const char *inbuf, long len) { + VerseKey &key = getVerseKey(); // see if we've jumped across blocks since last write if (lastWriteKey) { - if (!sameBlock(lastWriteKey, key)) { + if (!sameBlock(lastWriteKey, &key)) { flushCache(); } delete lastWriteKey; } - settext(key->Testament(), key->Index(), inbuf, len); + doSetText(key.Testament(), key.Index(), inbuf, len); - lastWriteKey = (VerseKey *)key->clone(); // must delete - - if (this->key != key) // free our key if we created a VerseKey - delete key; - - return *this; -} - -SWModule &zText::operator <<(const char *inbuf) { - return setentry(inbuf, 0); + lastWriteKey = (VerseKey *)key.clone(); // must delete } -SWModule &zText::operator <<(const SWKey *inkey) { - VerseKey *destkey = 0; +void zText::linkEntry(const SWKey *inkey) { + VerseKey &destkey = getVerseKey(); const VerseKey *srckey = 0; - // see if we have a VerseKey * or decendant -#ifndef _WIN32_WCE - try { -#endif - destkey = SWDYNAMIC_CAST(VerseKey, this->key); -#ifndef _WIN32_WCE - } - catch ( ... ) {} -#endif - // if we don't have a VerseKey * decendant, create our own - if (!destkey) - destkey = new VerseKey(this->key); // see if we have a VerseKey * or decendant -#ifndef _WIN32_WCE try { -#endif srckey = (const VerseKey *) SWDYNAMIC_CAST(VerseKey, inkey); -#ifndef _WIN32_WCE } catch ( ... ) { } -#endif // if we don't have a VerseKey * decendant, create our own if (!srckey) srckey = new VerseKey(inkey); - linkentry(destkey->Testament(), destkey->Index(), srckey->Index()); - - if (this->key != destkey) // free our key if we created a VerseKey - delete destkey; + doLinkEntry(destkey.Testament(), destkey.Index(), srckey->Index()); if (inkey != srckey) // free our key if we created a VerseKey delete srckey; - - return *this; } /****************************************************************************** * zFiles::deleteEntry - deletes this entry * - * RET: *this */ void zText::deleteEntry() { - VerseKey *key = 0; + VerseKey &key = getVerseKey(); -#ifndef _WIN32_WCE - try { -#endif - key = SWDYNAMIC_CAST(VerseKey, this->key); -#ifndef _WIN32_WCE - } - catch ( ... ) {} -#endif - if (!key) - key = new VerseKey(this->key); - - settext(key->Testament(), key->Index(), ""); - - if (key != this->key) - delete key; + doSetText(key.Testament(), key.Index(), ""); } /****************************************************************************** - * zText::operator += - Increments module key a number of entries + * zText::increment - Increments module key a number of entries * * ENT: increment - Number of entries to jump forward * - * RET: *this */ -SWModule &zText::operator +=(int increment) -{ +void zText::increment(int steps) { long start; unsigned short size; - VerseKey *tmpkey = 0; - -#ifndef _WIN32_WCE - try { -#endif - tmpkey = SWDYNAMIC_CAST(VerseKey, key); -#ifndef _WIN32_WCE - } - catch ( ... ) {} -#endif - if (!tmpkey) - tmpkey = new VerseKey(key); + VerseKey *tmpkey = &getVerseKey(); - findoffset(tmpkey->Testament(), tmpkey->Index(), &start, &size); + findOffset(tmpkey->Testament(), tmpkey->Index(), &start, &size); SWKey lastgood = *tmpkey; - while (increment) { + while (steps) { long laststart = start; unsigned short lastsize = size; SWKey lasttry = *tmpkey; - (increment > 0) ? (*key)++ : (*key)--; - if (tmpkey != key) - delete tmpkey; - tmpkey = 0; -#ifndef _WIN32_WCE - try { -#endif - tmpkey = SWDYNAMIC_CAST(VerseKey, key); -#ifndef _WIN32_WCE - } - catch ( ... ) {} -#endif - if (!tmpkey) - tmpkey = new VerseKey(key); + (steps > 0) ? (*key)++ : (*key)--; + tmpkey = &getVerseKey(); if ((error = key->Error())) { *key = lastgood; break; } long index = tmpkey->Index(); - findoffset(tmpkey->Testament(), index, &start, &size); - if ((((laststart != start) || (lastsize != size))||(!skipConsecutiveLinks)) && (start >= 0) && (size)) { - increment += (increment < 0) ? 1 : -1; + findOffset(tmpkey->Testament(), index, &start, &size); + + if ( + (((laststart != start) || (lastsize != size)) // we're a different entry +// && (start > 0) + && (size)) // and we actually have a size + ||(!skipConsecutiveLinks)) { // or we don't want to skip consecutive links + steps += (steps < 0) ? 1 : -1; lastgood = *tmpkey; } } error = (error) ? KEYERR_OUTOFBOUNDS : 0; +} - if (tmpkey != key) - delete tmpkey; - return *this; +VerseKey &zText::getVerseKey() { + static VerseKey tmpVK; + VerseKey *key; + // see if we have a VerseKey * or decendant + try { + key = SWDYNAMIC_CAST(VerseKey, this->key); + } + catch ( ... ) { } + if (!key) { + ListKey *lkTest = 0; + try { + lkTest = SWDYNAMIC_CAST(ListKey, this->key); + } + catch ( ... ) { } + if (lkTest) { + try { + key = SWDYNAMIC_CAST(VerseKey, lkTest->GetElement()); + } + catch ( ... ) { } + } + } + if (!key) { + tmpVK = *(this->key); + return tmpVK; + } + else return *key; } + + +#ifndef O_BINARY +#define O_BINARY 0 +#endif + +signed char zText::createSearchFramework() { +#ifdef USELUCENE + SWKey *savekey = 0; + SWKey *searchkey = 0; + SWKey textkey; + char *word = 0; + char *wordBuf = 0; + + + // save key information so as not to disrupt original + // module position + if (!key->Persist()) { + savekey = CreateKey(); + *savekey = *key; + } + else savekey = key; + + searchkey = (key->Persist())?key->clone():0; + if (searchkey) { + searchkey->Persist(1); + setKey(*searchkey); + } + + // position module at the beginning + *this = TOP; + + VerseKey *lkey = (VerseKey *)key; + + // iterate thru each entry in module + + IndexWriter* writer = NULL; + Directory* d = NULL; + + lucene::analysis::SimpleAnalyzer& an = *new lucene::analysis::SimpleAnalyzer(); + SWBuf target = path; + char ch = target.c_str()[strlen(target.c_str())-1]; + if ((ch != '/') && (ch != '\\')) + target += "/lucene"; + + if (IndexReader::indexExists(target.c_str())) { + d = &FSDirectory::getDirectory(target.c_str(), false); + if (IndexReader::isLocked(*d)) { + IndexReader::unlock(*d); + } + + writer = new IndexWriter(*d, an, false); + } else { + d = &FSDirectory::getDirectory(target.c_str(), true); + writer = new IndexWriter( *d ,an, true); + } + + + + while (!Error()) { + Document &doc = *new Document(); + doc.add( Field::Text(_T("key"), (const char *)*lkey ) ); + doc.add( Field::Text(_T("content"), StripText()) ); + writer->addDocument(doc); + delete &doc; + + (*this)++; + } + + writer->optimize(); + writer->close(); + delete writer; + delete &an; + + // reposition module back to where it was before we were called + setKey(*savekey); + + if (!savekey->Persist()) + delete savekey; + + if (searchkey) + delete searchkey; + + +#endif + return 0; +} + + +/****************************************************************************** + * SWModule::Search - Searches a module for a string + * + * ENT: istr - string for which to search + * searchType - type of search to perform + * >=0 - regex + * -1 - phrase + * -2 - multiword + * flags - options flags for search + * justCheckIfSupported - if set, don't search, only tell if this + * function supports requested search. + * + * RET: listkey set to verses that contain istr + */ + +ListKey &zText::search(const char *istr, int searchType, int flags, SWKey *scope, bool *justCheckIfSupported, void (*percent)(char, void *), void *percentUserData) { +#ifdef USELUCENE + listkey.ClearList(); + + if ((is) && (ir)) { + + switch (searchType) { + case -2: { // let lucene replace multiword for now + + + // test to see if our scope for this search is bounded by a + // VerseKey + VerseKey *testKeyType = 0, vk; + try { + testKeyType = SWDYNAMIC_CAST(VerseKey, ((scope)?scope:key)); + } + catch ( ... ) {} + // if we don't have a VerseKey * decendant we can't handle + // because of scope. + // In the future, add bool SWKey::isValid(const char *tryString); + if (!testKeyType) + break; + + + // check if we just want to see if search is supported. + // If we've gotten this far, then it is supported. + if (justCheckIfSupported) { + *justCheckIfSupported = true; + return listkey; + } + + (*percent)(10, percentUserData); + + standard::StandardAnalyzer analyzer; + Query &q = QueryParser::Parse(istr, _T("content"), analyzer); + (*percent)(20, percentUserData); + Hits &h = is->search(q); + (*percent)(80, percentUserData); + + + // iterate thru each good module position that meets the search + for (long i = 0; i < h.Length(); i++) { + Document &doc = h.doc(i); + + // set a temporary verse key to this module position + vk = doc.get(_T("key")); + + // check scope + // Try to set our scope key to this verse key + if (scope) { + *testKeyType = vk; + + // check to see if it set ok and if so, add to our return list + if (*testKeyType == vk) + listkey << (const char *) vk; + listkey.GetElement()->userData = (void *)(int)(h.score(i)*100); + } + else { + listkey << (const char*) vk; + listkey.GetElement()->userData = (void *)(int)(h.score(i)*100); + } + } + (*percent)(98, percentUserData); + + delete &h; + delete &q; + + listkey = TOP; + (*percent)(100, percentUserData); + return listkey; + } + + default: + break; + } + } + + // check if we just want to see if search is supported + if (justCheckIfSupported) { + *justCheckIfSupported = false; + return listkey; + } +#endif + // if we don't support this search, fall back to base class + return SWModule::search(istr, searchType, flags, scope, justCheckIfSupported, percent, percentUserData); +} + + +SWORD_NAMESPACE_END |