summaryrefslogtreecommitdiff
path: root/src/modules/texts/ztext/ztext.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/modules/texts/ztext/ztext.cpp')
-rw-r--r--src/modules/texts/ztext/ztext.cpp483
1 files changed, 288 insertions, 195 deletions
diff --git a/src/modules/texts/ztext/ztext.cpp b/src/modules/texts/ztext/ztext.cpp
index 6e243b9..1fe0e7a 100644
--- a/src/modules/texts/ztext/ztext.cpp
+++ b/src/modules/texts/ztext/ztext.cpp
@@ -14,13 +14,19 @@
#include <unistd.h>
#endif
-#include <iostream.h>
-#include <string.h>
#include <utilfuns.h>
-//#include <rawverse.h>
#include <ztext.h>
-//#include <zlib.h>
+#include <regex.h> // GNU
+
+
+#ifdef USELUCENE
+#include <CLucene/CLucene.h>
+using namespace lucene::search;
+using namespace lucene::queryParser;
+#endif
+
+SWORD_NAMESPACE_START
/******************************************************************************
* zText Constructor - Initializes data for instance of zText
@@ -33,10 +39,23 @@
* idisp - Display object to use for displaying
*/
-zText::zText(const char *ipath, const char *iname, const char *idesc, int iblockType, SWCompress *icomp, SWDisplay *idisp, SWTextEncoding enc, SWTextDirection dir, SWTextMarkup mark, const char* ilang) : zVerse(ipath, -1, iblockType, icomp), SWText(iname, idesc, idisp, enc, dir, mark, ilang)/*, SWCompress()*/
-{
+zText::zText(const char *ipath, const char *iname, const char *idesc, int iblockType, SWCompress *icomp, SWDisplay *idisp, SWTextEncoding enc, SWTextDirection dir, SWTextMarkup mark, const char* ilang)
+ : zVerse(ipath, -1, iblockType, icomp), SWText(iname, idesc, idisp, enc, dir, mark, ilang) {
blockType = iblockType;
lastWriteKey = 0;
+#ifdef USELUCENE
+ SWBuf fname;
+ fname = path;
+ ir = 0;
+ is = 0;
+ char ch = fname.c_str()[strlen(fname.c_str())-1];
+ if ((ch != '/') && (ch != '\\'))
+ fname += "/lucene";
+ if (IndexReader::indexExists(fname.c_str())) {
+ ir = &IndexReader::open(fname);
+ is = new IndexSearcher(*ir);
+ }
+#endif
}
@@ -50,6 +69,14 @@ zText::~zText()
if (lastWriteKey)
delete lastWriteKey;
+
+#ifdef USELUCENE
+ if (is)
+ is->close();
+
+ if (ir)
+ delete ir;
+#endif
}
@@ -59,96 +86,23 @@ zText::~zText()
* RET: buffer with verse
*/
-char *zText::getRawEntry()
-{
-/*
- long start;
- unsigned long size;
- unsigned long destsize;
- char *tmpbuf;
- char *dest;
- VerseKey *lkey = (VerseKey *) SWModule::key;
- char sizebuf[3];
-
- lkey->Verse(0);
- if (chapcache != lkey->Index()) {
- findoffset(lkey->Testament(), lkey->Index(), &start, &((unsigned short) size));
- gettext(lkey->Testament(), start, 3, sizebuf);
- memcpy(&size, sizebuf, 2);
- tmpbuf = new char [ size + 1 ];
- gettext(lkey->Testament(), start + 2, size + 1 , tmpbuf);
- //zBuf(&size, tmpbuf);
- dest = new char [ (size*4) + 1 ];
- uncompress((Bytef *)dest, &destsize, (Bytef *) tmpbuf, size);
- chapcache = lkey->Index();
- delete [] tmpbuf;
- }
-
- //findoffset(key->Testament(), key->Index(), &start, &size);
- findoffset(lkey->Testament(), lkey->Index(), &start, &((unsigned short) size));
-
- if (versebuf)
- delete [] versebuf;
- versebuf = new char [ size + 1 ];
- //memcpy(versebuf, Buf(), size);
- memcpy(versebuf, dest, destsize);
- delete [] dest;
-
- preptext(versebuf);
-
- return versebuf;
-*/
-
+SWBuf &zText::getRawEntryBuf() {
long start = 0;
unsigned short size = 0;
- VerseKey *key = 0;
-
- //printf ("zText char *\n");
-
- // see if we have a VerseKey * or decendant
-#ifndef _WIN32_WCE
- try {
-#endif
- key = SWDYNAMIC_CAST(VerseKey, this->key);
-#ifndef _WIN32_WCE
- }
- catch ( ... ) {}
-#endif
- // if we don't have a VerseKey * decendant, create our own
- if (!key)
- key = new VerseKey(this->key);
+ VerseKey &key = getVerseKey();
- //printf ("checking cache\n");
- //printf ("finding offset\n");
- findoffset(key->Testament(), key->Index(), &start, &size);
+ findOffset(key.Testament(), key.Index(), &start, &size);
entrySize = size; // support getEntrySize call
+
+ entryBuf = "";
+ zReadText(key.Testament(), start, size, entryBuf);
- //printf ("deleting previous buffer\n");
- unsigned long newsize = (size + 2) * FILTERPAD;
- if (newsize > entrybufallocsize) {
- if (entrybuf)
- delete [] entrybuf;
- entrybuf = new char [ newsize ];
- entrybufallocsize = newsize;
- }
- *entrybuf = 0;
-
- //printf ("getting text\n");
- swgettext(key->Testament(), start, (size + 2), entrybuf);
- //printf ("got text\n");
-
- rawFilter(entrybuf, size, key);
-
- //printf ("preparing text\n");
- if (!isUnicode())
- preptext(entrybuf);
-
- if (this->key != key) // free our key if we created a VerseKey
- delete key;
+ rawFilter(entryBuf, &key);
- //printf ("returning text\n");
- return entrybuf;
+// if (!isUnicode())
+ prepText(entryBuf);
+ return entryBuf;
}
@@ -171,177 +125,316 @@ bool zText::sameBlock(VerseKey *k1, VerseKey *k2) {
}
-SWModule &zText::setentry(const char *inbuf, long len) {
- VerseKey *key = 0;
- // see if we have a VerseKey * or decendant
-#ifndef _WIN32_WCE
- try {
-#endif
- key = SWDYNAMIC_CAST(VerseKey, this->key);
-#ifndef _WIN32_WCE
- }
- catch ( ... ) {}
-#endif
- // if we don't have a VerseKey * decendant, create our own
- if (!key)
- key = new VerseKey(this->key);
-
+void zText::setEntry(const char *inbuf, long len) {
+ VerseKey &key = getVerseKey();
// see if we've jumped across blocks since last write
if (lastWriteKey) {
- if (!sameBlock(lastWriteKey, key)) {
+ if (!sameBlock(lastWriteKey, &key)) {
flushCache();
}
delete lastWriteKey;
}
- settext(key->Testament(), key->Index(), inbuf, len);
+ doSetText(key.Testament(), key.Index(), inbuf, len);
- lastWriteKey = (VerseKey *)key->clone(); // must delete
-
- if (this->key != key) // free our key if we created a VerseKey
- delete key;
-
- return *this;
-}
-
-SWModule &zText::operator <<(const char *inbuf) {
- return setentry(inbuf, 0);
+ lastWriteKey = (VerseKey *)key.clone(); // must delete
}
-SWModule &zText::operator <<(const SWKey *inkey) {
- VerseKey *destkey = 0;
+void zText::linkEntry(const SWKey *inkey) {
+ VerseKey &destkey = getVerseKey();
const VerseKey *srckey = 0;
- // see if we have a VerseKey * or decendant
-#ifndef _WIN32_WCE
- try {
-#endif
- destkey = SWDYNAMIC_CAST(VerseKey, this->key);
-#ifndef _WIN32_WCE
- }
- catch ( ... ) {}
-#endif
- // if we don't have a VerseKey * decendant, create our own
- if (!destkey)
- destkey = new VerseKey(this->key);
// see if we have a VerseKey * or decendant
-#ifndef _WIN32_WCE
try {
-#endif
srckey = (const VerseKey *) SWDYNAMIC_CAST(VerseKey, inkey);
-#ifndef _WIN32_WCE
}
catch ( ... ) {
}
-#endif
// if we don't have a VerseKey * decendant, create our own
if (!srckey)
srckey = new VerseKey(inkey);
- linkentry(destkey->Testament(), destkey->Index(), srckey->Index());
-
- if (this->key != destkey) // free our key if we created a VerseKey
- delete destkey;
+ doLinkEntry(destkey.Testament(), destkey.Index(), srckey->Index());
if (inkey != srckey) // free our key if we created a VerseKey
delete srckey;
-
- return *this;
}
/******************************************************************************
* zFiles::deleteEntry - deletes this entry
*
- * RET: *this
*/
void zText::deleteEntry() {
- VerseKey *key = 0;
+ VerseKey &key = getVerseKey();
-#ifndef _WIN32_WCE
- try {
-#endif
- key = SWDYNAMIC_CAST(VerseKey, this->key);
-#ifndef _WIN32_WCE
- }
- catch ( ... ) {}
-#endif
- if (!key)
- key = new VerseKey(this->key);
-
- settext(key->Testament(), key->Index(), "");
-
- if (key != this->key)
- delete key;
+ doSetText(key.Testament(), key.Index(), "");
}
/******************************************************************************
- * zText::operator += - Increments module key a number of entries
+ * zText::increment - Increments module key a number of entries
*
* ENT: increment - Number of entries to jump forward
*
- * RET: *this
*/
-SWModule &zText::operator +=(int increment)
-{
+void zText::increment(int steps) {
long start;
unsigned short size;
- VerseKey *tmpkey = 0;
-
-#ifndef _WIN32_WCE
- try {
-#endif
- tmpkey = SWDYNAMIC_CAST(VerseKey, key);
-#ifndef _WIN32_WCE
- }
- catch ( ... ) {}
-#endif
- if (!tmpkey)
- tmpkey = new VerseKey(key);
+ VerseKey *tmpkey = &getVerseKey();
- findoffset(tmpkey->Testament(), tmpkey->Index(), &start, &size);
+ findOffset(tmpkey->Testament(), tmpkey->Index(), &start, &size);
SWKey lastgood = *tmpkey;
- while (increment) {
+ while (steps) {
long laststart = start;
unsigned short lastsize = size;
SWKey lasttry = *tmpkey;
- (increment > 0) ? (*key)++ : (*key)--;
- if (tmpkey != key)
- delete tmpkey;
- tmpkey = 0;
-#ifndef _WIN32_WCE
- try {
-#endif
- tmpkey = SWDYNAMIC_CAST(VerseKey, key);
-#ifndef _WIN32_WCE
- }
- catch ( ... ) {}
-#endif
- if (!tmpkey)
- tmpkey = new VerseKey(key);
+ (steps > 0) ? (*key)++ : (*key)--;
+ tmpkey = &getVerseKey();
if ((error = key->Error())) {
*key = lastgood;
break;
}
long index = tmpkey->Index();
- findoffset(tmpkey->Testament(), index, &start, &size);
- if ((((laststart != start) || (lastsize != size))||(!skipConsecutiveLinks)) && (start >= 0) && (size)) {
- increment += (increment < 0) ? 1 : -1;
+ findOffset(tmpkey->Testament(), index, &start, &size);
+
+ if (
+ (((laststart != start) || (lastsize != size)) // we're a different entry
+// && (start > 0)
+ && (size)) // and we actually have a size
+ ||(!skipConsecutiveLinks)) { // or we don't want to skip consecutive links
+ steps += (steps < 0) ? 1 : -1;
lastgood = *tmpkey;
}
}
error = (error) ? KEYERR_OUTOFBOUNDS : 0;
+}
- if (tmpkey != key)
- delete tmpkey;
- return *this;
+VerseKey &zText::getVerseKey() {
+ static VerseKey tmpVK;
+ VerseKey *key;
+ // see if we have a VerseKey * or decendant
+ try {
+ key = SWDYNAMIC_CAST(VerseKey, this->key);
+ }
+ catch ( ... ) { }
+ if (!key) {
+ ListKey *lkTest = 0;
+ try {
+ lkTest = SWDYNAMIC_CAST(ListKey, this->key);
+ }
+ catch ( ... ) { }
+ if (lkTest) {
+ try {
+ key = SWDYNAMIC_CAST(VerseKey, lkTest->GetElement());
+ }
+ catch ( ... ) { }
+ }
+ }
+ if (!key) {
+ tmpVK = *(this->key);
+ return tmpVK;
+ }
+ else return *key;
}
+
+
+#ifndef O_BINARY
+#define O_BINARY 0
+#endif
+
+signed char zText::createSearchFramework() {
+#ifdef USELUCENE
+ SWKey *savekey = 0;
+ SWKey *searchkey = 0;
+ SWKey textkey;
+ char *word = 0;
+ char *wordBuf = 0;
+
+
+ // save key information so as not to disrupt original
+ // module position
+ if (!key->Persist()) {
+ savekey = CreateKey();
+ *savekey = *key;
+ }
+ else savekey = key;
+
+ searchkey = (key->Persist())?key->clone():0;
+ if (searchkey) {
+ searchkey->Persist(1);
+ setKey(*searchkey);
+ }
+
+ // position module at the beginning
+ *this = TOP;
+
+ VerseKey *lkey = (VerseKey *)key;
+
+ // iterate thru each entry in module
+
+ IndexWriter* writer = NULL;
+ Directory* d = NULL;
+
+ lucene::analysis::SimpleAnalyzer& an = *new lucene::analysis::SimpleAnalyzer();
+ SWBuf target = path;
+ char ch = target.c_str()[strlen(target.c_str())-1];
+ if ((ch != '/') && (ch != '\\'))
+ target += "/lucene";
+
+ if (IndexReader::indexExists(target.c_str())) {
+ d = &FSDirectory::getDirectory(target.c_str(), false);
+ if (IndexReader::isLocked(*d)) {
+ IndexReader::unlock(*d);
+ }
+
+ writer = new IndexWriter(*d, an, false);
+ } else {
+ d = &FSDirectory::getDirectory(target.c_str(), true);
+ writer = new IndexWriter( *d ,an, true);
+ }
+
+
+
+ while (!Error()) {
+ Document &doc = *new Document();
+ doc.add( Field::Text(_T("key"), (const char *)*lkey ) );
+ doc.add( Field::Text(_T("content"), StripText()) );
+ writer->addDocument(doc);
+ delete &doc;
+
+ (*this)++;
+ }
+
+ writer->optimize();
+ writer->close();
+ delete writer;
+ delete &an;
+
+ // reposition module back to where it was before we were called
+ setKey(*savekey);
+
+ if (!savekey->Persist())
+ delete savekey;
+
+ if (searchkey)
+ delete searchkey;
+
+
+#endif
+ return 0;
+}
+
+
+/******************************************************************************
+ * SWModule::Search - Searches a module for a string
+ *
+ * ENT: istr - string for which to search
+ * searchType - type of search to perform
+ * >=0 - regex
+ * -1 - phrase
+ * -2 - multiword
+ * flags - options flags for search
+ * justCheckIfSupported - if set, don't search, only tell if this
+ * function supports requested search.
+ *
+ * RET: listkey set to verses that contain istr
+ */
+
+ListKey &zText::search(const char *istr, int searchType, int flags, SWKey *scope, bool *justCheckIfSupported, void (*percent)(char, void *), void *percentUserData) {
+#ifdef USELUCENE
+ listkey.ClearList();
+
+ if ((is) && (ir)) {
+
+ switch (searchType) {
+ case -2: { // let lucene replace multiword for now
+
+
+ // test to see if our scope for this search is bounded by a
+ // VerseKey
+ VerseKey *testKeyType = 0, vk;
+ try {
+ testKeyType = SWDYNAMIC_CAST(VerseKey, ((scope)?scope:key));
+ }
+ catch ( ... ) {}
+ // if we don't have a VerseKey * decendant we can't handle
+ // because of scope.
+ // In the future, add bool SWKey::isValid(const char *tryString);
+ if (!testKeyType)
+ break;
+
+
+ // check if we just want to see if search is supported.
+ // If we've gotten this far, then it is supported.
+ if (justCheckIfSupported) {
+ *justCheckIfSupported = true;
+ return listkey;
+ }
+
+ (*percent)(10, percentUserData);
+
+ standard::StandardAnalyzer analyzer;
+ Query &q = QueryParser::Parse(istr, _T("content"), analyzer);
+ (*percent)(20, percentUserData);
+ Hits &h = is->search(q);
+ (*percent)(80, percentUserData);
+
+
+ // iterate thru each good module position that meets the search
+ for (long i = 0; i < h.Length(); i++) {
+ Document &doc = h.doc(i);
+
+ // set a temporary verse key to this module position
+ vk = doc.get(_T("key"));
+
+ // check scope
+ // Try to set our scope key to this verse key
+ if (scope) {
+ *testKeyType = vk;
+
+ // check to see if it set ok and if so, add to our return list
+ if (*testKeyType == vk)
+ listkey << (const char *) vk;
+ listkey.GetElement()->userData = (void *)(int)(h.score(i)*100);
+ }
+ else {
+ listkey << (const char*) vk;
+ listkey.GetElement()->userData = (void *)(int)(h.score(i)*100);
+ }
+ }
+ (*percent)(98, percentUserData);
+
+ delete &h;
+ delete &q;
+
+ listkey = TOP;
+ (*percent)(100, percentUserData);
+ return listkey;
+ }
+
+ default:
+ break;
+ }
+ }
+
+ // check if we just want to see if search is supported
+ if (justCheckIfSupported) {
+ *justCheckIfSupported = false;
+ return listkey;
+ }
+#endif
+ // if we don't support this search, fall back to base class
+ return SWModule::search(istr, searchType, flags, scope, justCheckIfSupported, percent, percentUserData);
+}
+
+
+SWORD_NAMESPACE_END