diff options
Diffstat (limited to 'src/modules/texts/ztext/ztext.cpp')
-rw-r--r-- | src/modules/texts/ztext/ztext.cpp | 483 |
1 files changed, 288 insertions, 195 deletions
diff --git a/src/modules/texts/ztext/ztext.cpp b/src/modules/texts/ztext/ztext.cpp index 6e243b9..1fe0e7a 100644 --- a/src/modules/texts/ztext/ztext.cpp +++ b/src/modules/texts/ztext/ztext.cpp @@ -14,13 +14,19 @@ #include <unistd.h> #endif -#include <iostream.h> -#include <string.h> #include <utilfuns.h> -//#include <rawverse.h> #include <ztext.h> -//#include <zlib.h> +#include <regex.h> // GNU + + +#ifdef USELUCENE +#include <CLucene/CLucene.h> +using namespace lucene::search; +using namespace lucene::queryParser; +#endif + +SWORD_NAMESPACE_START /****************************************************************************** * zText Constructor - Initializes data for instance of zText @@ -33,10 +39,23 @@ * idisp - Display object to use for displaying */ -zText::zText(const char *ipath, const char *iname, const char *idesc, int iblockType, SWCompress *icomp, SWDisplay *idisp, SWTextEncoding enc, SWTextDirection dir, SWTextMarkup mark, const char* ilang) : zVerse(ipath, -1, iblockType, icomp), SWText(iname, idesc, idisp, enc, dir, mark, ilang)/*, SWCompress()*/ -{ +zText::zText(const char *ipath, const char *iname, const char *idesc, int iblockType, SWCompress *icomp, SWDisplay *idisp, SWTextEncoding enc, SWTextDirection dir, SWTextMarkup mark, const char* ilang) + : zVerse(ipath, -1, iblockType, icomp), SWText(iname, idesc, idisp, enc, dir, mark, ilang) { blockType = iblockType; lastWriteKey = 0; +#ifdef USELUCENE + SWBuf fname; + fname = path; + ir = 0; + is = 0; + char ch = fname.c_str()[strlen(fname.c_str())-1]; + if ((ch != '/') && (ch != '\\')) + fname += "/lucene"; + if (IndexReader::indexExists(fname.c_str())) { + ir = &IndexReader::open(fname); + is = new IndexSearcher(*ir); + } +#endif } @@ -50,6 +69,14 @@ zText::~zText() if (lastWriteKey) delete lastWriteKey; + +#ifdef USELUCENE + if (is) + is->close(); + + if (ir) + delete ir; +#endif } @@ -59,96 +86,23 @@ zText::~zText() * RET: buffer with verse */ -char *zText::getRawEntry() -{ -/* - long start; - unsigned long size; - unsigned long destsize; - char *tmpbuf; - char *dest; - VerseKey *lkey = (VerseKey *) SWModule::key; - char sizebuf[3]; - - lkey->Verse(0); - if (chapcache != lkey->Index()) { - findoffset(lkey->Testament(), lkey->Index(), &start, &((unsigned short) size)); - gettext(lkey->Testament(), start, 3, sizebuf); - memcpy(&size, sizebuf, 2); - tmpbuf = new char [ size + 1 ]; - gettext(lkey->Testament(), start + 2, size + 1 , tmpbuf); - //zBuf(&size, tmpbuf); - dest = new char [ (size*4) + 1 ]; - uncompress((Bytef *)dest, &destsize, (Bytef *) tmpbuf, size); - chapcache = lkey->Index(); - delete [] tmpbuf; - } - - //findoffset(key->Testament(), key->Index(), &start, &size); - findoffset(lkey->Testament(), lkey->Index(), &start, &((unsigned short) size)); - - if (versebuf) - delete [] versebuf; - versebuf = new char [ size + 1 ]; - //memcpy(versebuf, Buf(), size); - memcpy(versebuf, dest, destsize); - delete [] dest; - - preptext(versebuf); - - return versebuf; -*/ - +SWBuf &zText::getRawEntryBuf() { long start = 0; unsigned short size = 0; - VerseKey *key = 0; - - //printf ("zText char *\n"); - - // see if we have a VerseKey * or decendant -#ifndef _WIN32_WCE - try { -#endif - key = SWDYNAMIC_CAST(VerseKey, this->key); -#ifndef _WIN32_WCE - } - catch ( ... ) {} -#endif - // if we don't have a VerseKey * decendant, create our own - if (!key) - key = new VerseKey(this->key); + VerseKey &key = getVerseKey(); - //printf ("checking cache\n"); - //printf ("finding offset\n"); - findoffset(key->Testament(), key->Index(), &start, &size); + findOffset(key.Testament(), key.Index(), &start, &size); entrySize = size; // support getEntrySize call + + entryBuf = ""; + zReadText(key.Testament(), start, size, entryBuf); - //printf ("deleting previous buffer\n"); - unsigned long newsize = (size + 2) * FILTERPAD; - if (newsize > entrybufallocsize) { - if (entrybuf) - delete [] entrybuf; - entrybuf = new char [ newsize ]; - entrybufallocsize = newsize; - } - *entrybuf = 0; - - //printf ("getting text\n"); - swgettext(key->Testament(), start, (size + 2), entrybuf); - //printf ("got text\n"); - - rawFilter(entrybuf, size, key); - - //printf ("preparing text\n"); - if (!isUnicode()) - preptext(entrybuf); - - if (this->key != key) // free our key if we created a VerseKey - delete key; + rawFilter(entryBuf, &key); - //printf ("returning text\n"); - return entrybuf; +// if (!isUnicode()) + prepText(entryBuf); + return entryBuf; } @@ -171,177 +125,316 @@ bool zText::sameBlock(VerseKey *k1, VerseKey *k2) { } -SWModule &zText::setentry(const char *inbuf, long len) { - VerseKey *key = 0; - // see if we have a VerseKey * or decendant -#ifndef _WIN32_WCE - try { -#endif - key = SWDYNAMIC_CAST(VerseKey, this->key); -#ifndef _WIN32_WCE - } - catch ( ... ) {} -#endif - // if we don't have a VerseKey * decendant, create our own - if (!key) - key = new VerseKey(this->key); - +void zText::setEntry(const char *inbuf, long len) { + VerseKey &key = getVerseKey(); // see if we've jumped across blocks since last write if (lastWriteKey) { - if (!sameBlock(lastWriteKey, key)) { + if (!sameBlock(lastWriteKey, &key)) { flushCache(); } delete lastWriteKey; } - settext(key->Testament(), key->Index(), inbuf, len); + doSetText(key.Testament(), key.Index(), inbuf, len); - lastWriteKey = (VerseKey *)key->clone(); // must delete - - if (this->key != key) // free our key if we created a VerseKey - delete key; - - return *this; -} - -SWModule &zText::operator <<(const char *inbuf) { - return setentry(inbuf, 0); + lastWriteKey = (VerseKey *)key.clone(); // must delete } -SWModule &zText::operator <<(const SWKey *inkey) { - VerseKey *destkey = 0; +void zText::linkEntry(const SWKey *inkey) { + VerseKey &destkey = getVerseKey(); const VerseKey *srckey = 0; - // see if we have a VerseKey * or decendant -#ifndef _WIN32_WCE - try { -#endif - destkey = SWDYNAMIC_CAST(VerseKey, this->key); -#ifndef _WIN32_WCE - } - catch ( ... ) {} -#endif - // if we don't have a VerseKey * decendant, create our own - if (!destkey) - destkey = new VerseKey(this->key); // see if we have a VerseKey * or decendant -#ifndef _WIN32_WCE try { -#endif srckey = (const VerseKey *) SWDYNAMIC_CAST(VerseKey, inkey); -#ifndef _WIN32_WCE } catch ( ... ) { } -#endif // if we don't have a VerseKey * decendant, create our own if (!srckey) srckey = new VerseKey(inkey); - linkentry(destkey->Testament(), destkey->Index(), srckey->Index()); - - if (this->key != destkey) // free our key if we created a VerseKey - delete destkey; + doLinkEntry(destkey.Testament(), destkey.Index(), srckey->Index()); if (inkey != srckey) // free our key if we created a VerseKey delete srckey; - - return *this; } /****************************************************************************** * zFiles::deleteEntry - deletes this entry * - * RET: *this */ void zText::deleteEntry() { - VerseKey *key = 0; + VerseKey &key = getVerseKey(); -#ifndef _WIN32_WCE - try { -#endif - key = SWDYNAMIC_CAST(VerseKey, this->key); -#ifndef _WIN32_WCE - } - catch ( ... ) {} -#endif - if (!key) - key = new VerseKey(this->key); - - settext(key->Testament(), key->Index(), ""); - - if (key != this->key) - delete key; + doSetText(key.Testament(), key.Index(), ""); } /****************************************************************************** - * zText::operator += - Increments module key a number of entries + * zText::increment - Increments module key a number of entries * * ENT: increment - Number of entries to jump forward * - * RET: *this */ -SWModule &zText::operator +=(int increment) -{ +void zText::increment(int steps) { long start; unsigned short size; - VerseKey *tmpkey = 0; - -#ifndef _WIN32_WCE - try { -#endif - tmpkey = SWDYNAMIC_CAST(VerseKey, key); -#ifndef _WIN32_WCE - } - catch ( ... ) {} -#endif - if (!tmpkey) - tmpkey = new VerseKey(key); + VerseKey *tmpkey = &getVerseKey(); - findoffset(tmpkey->Testament(), tmpkey->Index(), &start, &size); + findOffset(tmpkey->Testament(), tmpkey->Index(), &start, &size); SWKey lastgood = *tmpkey; - while (increment) { + while (steps) { long laststart = start; unsigned short lastsize = size; SWKey lasttry = *tmpkey; - (increment > 0) ? (*key)++ : (*key)--; - if (tmpkey != key) - delete tmpkey; - tmpkey = 0; -#ifndef _WIN32_WCE - try { -#endif - tmpkey = SWDYNAMIC_CAST(VerseKey, key); -#ifndef _WIN32_WCE - } - catch ( ... ) {} -#endif - if (!tmpkey) - tmpkey = new VerseKey(key); + (steps > 0) ? (*key)++ : (*key)--; + tmpkey = &getVerseKey(); if ((error = key->Error())) { *key = lastgood; break; } long index = tmpkey->Index(); - findoffset(tmpkey->Testament(), index, &start, &size); - if ((((laststart != start) || (lastsize != size))||(!skipConsecutiveLinks)) && (start >= 0) && (size)) { - increment += (increment < 0) ? 1 : -1; + findOffset(tmpkey->Testament(), index, &start, &size); + + if ( + (((laststart != start) || (lastsize != size)) // we're a different entry +// && (start > 0) + && (size)) // and we actually have a size + ||(!skipConsecutiveLinks)) { // or we don't want to skip consecutive links + steps += (steps < 0) ? 1 : -1; lastgood = *tmpkey; } } error = (error) ? KEYERR_OUTOFBOUNDS : 0; +} - if (tmpkey != key) - delete tmpkey; - return *this; +VerseKey &zText::getVerseKey() { + static VerseKey tmpVK; + VerseKey *key; + // see if we have a VerseKey * or decendant + try { + key = SWDYNAMIC_CAST(VerseKey, this->key); + } + catch ( ... ) { } + if (!key) { + ListKey *lkTest = 0; + try { + lkTest = SWDYNAMIC_CAST(ListKey, this->key); + } + catch ( ... ) { } + if (lkTest) { + try { + key = SWDYNAMIC_CAST(VerseKey, lkTest->GetElement()); + } + catch ( ... ) { } + } + } + if (!key) { + tmpVK = *(this->key); + return tmpVK; + } + else return *key; } + + +#ifndef O_BINARY +#define O_BINARY 0 +#endif + +signed char zText::createSearchFramework() { +#ifdef USELUCENE + SWKey *savekey = 0; + SWKey *searchkey = 0; + SWKey textkey; + char *word = 0; + char *wordBuf = 0; + + + // save key information so as not to disrupt original + // module position + if (!key->Persist()) { + savekey = CreateKey(); + *savekey = *key; + } + else savekey = key; + + searchkey = (key->Persist())?key->clone():0; + if (searchkey) { + searchkey->Persist(1); + setKey(*searchkey); + } + + // position module at the beginning + *this = TOP; + + VerseKey *lkey = (VerseKey *)key; + + // iterate thru each entry in module + + IndexWriter* writer = NULL; + Directory* d = NULL; + + lucene::analysis::SimpleAnalyzer& an = *new lucene::analysis::SimpleAnalyzer(); + SWBuf target = path; + char ch = target.c_str()[strlen(target.c_str())-1]; + if ((ch != '/') && (ch != '\\')) + target += "/lucene"; + + if (IndexReader::indexExists(target.c_str())) { + d = &FSDirectory::getDirectory(target.c_str(), false); + if (IndexReader::isLocked(*d)) { + IndexReader::unlock(*d); + } + + writer = new IndexWriter(*d, an, false); + } else { + d = &FSDirectory::getDirectory(target.c_str(), true); + writer = new IndexWriter( *d ,an, true); + } + + + + while (!Error()) { + Document &doc = *new Document(); + doc.add( Field::Text(_T("key"), (const char *)*lkey ) ); + doc.add( Field::Text(_T("content"), StripText()) ); + writer->addDocument(doc); + delete &doc; + + (*this)++; + } + + writer->optimize(); + writer->close(); + delete writer; + delete &an; + + // reposition module back to where it was before we were called + setKey(*savekey); + + if (!savekey->Persist()) + delete savekey; + + if (searchkey) + delete searchkey; + + +#endif + return 0; +} + + +/****************************************************************************** + * SWModule::Search - Searches a module for a string + * + * ENT: istr - string for which to search + * searchType - type of search to perform + * >=0 - regex + * -1 - phrase + * -2 - multiword + * flags - options flags for search + * justCheckIfSupported - if set, don't search, only tell if this + * function supports requested search. + * + * RET: listkey set to verses that contain istr + */ + +ListKey &zText::search(const char *istr, int searchType, int flags, SWKey *scope, bool *justCheckIfSupported, void (*percent)(char, void *), void *percentUserData) { +#ifdef USELUCENE + listkey.ClearList(); + + if ((is) && (ir)) { + + switch (searchType) { + case -2: { // let lucene replace multiword for now + + + // test to see if our scope for this search is bounded by a + // VerseKey + VerseKey *testKeyType = 0, vk; + try { + testKeyType = SWDYNAMIC_CAST(VerseKey, ((scope)?scope:key)); + } + catch ( ... ) {} + // if we don't have a VerseKey * decendant we can't handle + // because of scope. + // In the future, add bool SWKey::isValid(const char *tryString); + if (!testKeyType) + break; + + + // check if we just want to see if search is supported. + // If we've gotten this far, then it is supported. + if (justCheckIfSupported) { + *justCheckIfSupported = true; + return listkey; + } + + (*percent)(10, percentUserData); + + standard::StandardAnalyzer analyzer; + Query &q = QueryParser::Parse(istr, _T("content"), analyzer); + (*percent)(20, percentUserData); + Hits &h = is->search(q); + (*percent)(80, percentUserData); + + + // iterate thru each good module position that meets the search + for (long i = 0; i < h.Length(); i++) { + Document &doc = h.doc(i); + + // set a temporary verse key to this module position + vk = doc.get(_T("key")); + + // check scope + // Try to set our scope key to this verse key + if (scope) { + *testKeyType = vk; + + // check to see if it set ok and if so, add to our return list + if (*testKeyType == vk) + listkey << (const char *) vk; + listkey.GetElement()->userData = (void *)(int)(h.score(i)*100); + } + else { + listkey << (const char*) vk; + listkey.GetElement()->userData = (void *)(int)(h.score(i)*100); + } + } + (*percent)(98, percentUserData); + + delete &h; + delete &q; + + listkey = TOP; + (*percent)(100, percentUserData); + return listkey; + } + + default: + break; + } + } + + // check if we just want to see if search is supported + if (justCheckIfSupported) { + *justCheckIfSupported = false; + return listkey; + } +#endif + // if we don't support this search, fall back to base class + return SWModule::search(istr, searchType, flags, scope, justCheckIfSupported, percent, percentUserData); +} + + +SWORD_NAMESPACE_END |