summaryrefslogtreecommitdiff
path: root/src/modules/texts
diff options
context:
space:
mode:
authorRoberto C. Sanchez <roberto@connexer.com>2014-03-29 10:53:49 -0400
committerRoberto C. Sanchez <roberto@connexer.com>2014-03-29 10:53:49 -0400
commit8c8aa6b07e595cfac56838b5964ab3e96051f1b2 (patch)
treeda38e2c1979148dbd3b0c7b87f930746f5ba7f44 /src/modules/texts
parent8d3fc864d094eeadc721f8e93436b37a5fab173e (diff)
Imported Upstream version 1.5.7
Diffstat (limited to 'src/modules/texts')
-rw-r--r--src/modules/texts/Makefile.am1
-rw-r--r--src/modules/texts/rawgbf/rawgbf.cpp5
-rw-r--r--src/modules/texts/rawtext/rawtext.cpp470
-rw-r--r--src/modules/texts/swtext.cpp42
-rw-r--r--src/modules/texts/ztext/ztext.cpp483
5 files changed, 622 insertions, 379 deletions
diff --git a/src/modules/texts/Makefile.am b/src/modules/texts/Makefile.am
index b48d93e..2c4479e 100644
--- a/src/modules/texts/Makefile.am
+++ b/src/modules/texts/Makefile.am
@@ -4,4 +4,3 @@ libsword_la_SOURCES += $(textsdir)/swtext.cpp
include ../src/modules/texts/rawtext/Makefile.am
include ../src/modules/texts/ztext/Makefile.am
-include ../src/modules/texts/rawgbf/Makefile.am
diff --git a/src/modules/texts/rawgbf/rawgbf.cpp b/src/modules/texts/rawgbf/rawgbf.cpp
index 0866585..6b8516f 100644
--- a/src/modules/texts/rawgbf/rawgbf.cpp
+++ b/src/modules/texts/rawgbf/rawgbf.cpp
@@ -19,6 +19,7 @@
#include <rawverse.h>
#include <rawgbf.h>
+SWORD_NAMESPACE_START
/******************************************************************************
* RawGBF Constructor - Initializes data for instance of RawGBF
@@ -73,7 +74,7 @@ RawGBF::operator char*()
delete [] entrybuf;
entrybuf = new char [ size * 3 ]; // extra for conversion to RTF or other.
- gettext(key->Testament(), start, size + 1, entrybuf);
+ readtext(key->Testament(), start, size + 1, entrybuf);
preptext(entrybuf);
RenderText(entrybuf, size * 3);
@@ -82,3 +83,5 @@ RawGBF::operator char*()
return entrybuf;
}
+
+SWORD_NAMESPACE_END
diff --git a/src/modules/texts/rawtext/rawtext.cpp b/src/modules/texts/rawtext/rawtext.cpp
index acc1cfd..1e1048d 100644
--- a/src/modules/texts/rawtext/rawtext.cpp
+++ b/src/modules/texts/rawtext/rawtext.cpp
@@ -13,20 +13,35 @@
#include <unistd.h>
#endif
-#include <string.h>
#include <utilfuns.h>
#include <rawverse.h>
#include <rawtext.h>
+#include <regex.h> // GNU
+#ifdef USELUCENE
+#include <CLucene/CLucene.h>
+using namespace lucene::search;
+using namespace lucene::queryParser;
+#else
#include <map>
#include <list>
#include <algorithm>
-#include <regex.h> // GNU
+
+using std::map;
+using std::list;
+using std::find;
+
+#endif
#ifndef O_BINARY
#define O_BINARY 0
#endif
+SWORD_NAMESPACE_START
+
+typedef map < SWBuf, list<long> > strlist;
+typedef list<long> longlist;
+
/******************************************************************************
* RawText Constructor - Initializes data for instance of RawText
*
@@ -39,7 +54,20 @@ RawText::RawText(const char *ipath, const char *iname, const char *idesc, SWDisp
: SWText(iname, idesc, idisp, enc, dir, mark, ilang),
RawVerse(ipath) {
- string fname;
+#ifdef USELUCENE
+ SWBuf fname;
+ fname = path;
+ ir = 0;
+ is = 0;
+ char ch = fname.c_str()[strlen(fname.c_str())-1];
+ if ((ch != '/') && (ch != '\\'))
+ fname += "/lucene";
+ if (IndexReader::indexExists(fname.c_str())) {
+ ir = &IndexReader::open(fname);
+ is = new IndexSearcher(*ir);
+ }
+#else
+ SWBuf fname;
fname = path;
char ch = fname.c_str()[strlen(fname.c_str())-1];
if ((ch != '/') && (ch != '\\'))
@@ -47,13 +75,14 @@ RawText::RawText(const char *ipath, const char *iname, const char *idesc, SWDisp
for (int loop = 0; loop < 2; loop++) {
fastSearch[loop] = 0;
- string fastidxname =(fname + ((loop)?"ntwords.dat":"otwords.dat"));
+ SWBuf fastidxname =(fname + ((loop)?"ntwords.dat":"otwords.dat"));
if (!access(fastidxname.c_str(), 04)) {
fastidxname = (fname + ((loop)?"ntwords.idx":"otwords.idx"));
if (!access(fastidxname.c_str(), 04))
fastSearch[loop] = new RawStr((fname + ((loop)?"ntwords":"otwords")).c_str());
}
}
+#endif
}
@@ -61,68 +90,159 @@ RawText::RawText(const char *ipath, const char *iname, const char *idesc, SWDisp
* RawText Destructor - Cleans up instance of RawText
*/
-RawText::~RawText()
-{
+RawText::~RawText() {
+#ifdef USELUCENE
+ if (is)
+ is->close();
+
+ if (ir)
+ delete ir;
+#else
if (fastSearch[0])
delete fastSearch[0];
if (fastSearch[1])
delete fastSearch[1];
+#endif
+}
+
+
+VerseKey &RawText::getVerseKey() {
+ static VerseKey tmpVK;
+ VerseKey *key;
+ // see if we have a VerseKey * or decendant
+ try {
+ key = SWDYNAMIC_CAST(VerseKey, this->key);
+ }
+ catch ( ... ) { }
+ if (!key) {
+ ListKey *lkTest = 0;
+ try {
+ lkTest = SWDYNAMIC_CAST(ListKey, this->key);
+ }
+ catch ( ... ) { }
+ if (lkTest) {
+ try {
+ key = SWDYNAMIC_CAST(VerseKey, lkTest->GetElement());
+ }
+ catch ( ... ) { }
+ }
+ }
+ if (!key) {
+ tmpVK = *(this->key);
+ return tmpVK;
+ }
+ else return *key;
}
/******************************************************************************
- * RawText::operator char * - Returns the correct verse when char * cast
+ * RawText::getRawEntry - Returns the correct verse when char * cast
* is requested
*
* RET: string buffer with verse
*/
-char *RawText::getRawEntry() {
+SWBuf &RawText::getRawEntryBuf() {
long start = 0;
unsigned short size = 0;
- VerseKey *key = 0;
+ VerseKey &key = getVerseKey();
- // see if we have a VerseKey * or decendant
-#ifndef _WIN32_WCE
- try {
-#endif
- key = SWDYNAMIC_CAST(VerseKey, this->key);
-#ifndef _WIN32_WCE
+ findOffset(key.Testament(), key.Index(), &start, &size);
+ entrySize = size; // support getEntrySize call
+
+ entryBuf = "";
+ readText(key.Testament(), start, size, entryBuf);
+
+ rawFilter(entryBuf, 0); // hack, decipher
+ rawFilter(entryBuf, &key);
+
+// if (!isUnicode())
+ prepText(entryBuf);
+
+ return entryBuf;
+}
+
+
+signed char RawText::createSearchFramework() {
+#ifdef USELUCENE
+ SWKey *savekey = 0;
+ SWKey *searchkey = 0;
+ SWKey textkey;
+ char *word = 0;
+ char *wordBuf = 0;
+
+
+ // save key information so as not to disrupt original
+ // module position
+ if (!key->Persist()) {
+ savekey = CreateKey();
+ *savekey = *key;
}
- catch ( ... ) { }
-#endif
- // if we don't have a VerseKey * decendant, create our own
- if (!key)
- key = new VerseKey(this->key);
+ else savekey = key;
- findoffset(key->Testament(), key->Index(), &start, &size);
- entrySize = size; // support getEntrySize call
+ searchkey = (key->Persist())?key->clone():0;
+ if (searchkey) {
+ searchkey->Persist(1);
+ setKey(*searchkey);
+ }
+
+ // position module at the beginning
+ *this = TOP;
+
+ VerseKey *lkey = (VerseKey *)key;
+
+ // iterate thru each entry in module
+
+ IndexWriter* writer = NULL;
+ Directory* d = NULL;
+
+ lucene::analysis::SimpleAnalyzer& an = *new lucene::analysis::SimpleAnalyzer();
+ SWBuf target = path;
+ char ch = target.c_str()[strlen(target.c_str())-1];
+ if ((ch != '/') && (ch != '\\'))
+ target += "/lucene";
- unsigned long newsize = (size + 2) * FILTERPAD;
- if (newsize > entrybufallocsize) {
- if (entrybuf)
- delete [] entrybuf;
- entrybuf = new char [ newsize ];
- entrybufallocsize = newsize;
+ if (IndexReader::indexExists(target.c_str())) {
+ d = &FSDirectory::getDirectory(target.c_str(), false);
+ if (IndexReader::isLocked(*d)) {
+ IndexReader::unlock(*d);
+ }
+
+ writer = new IndexWriter(*d, an, false);
+ } else {
+ d = &FSDirectory::getDirectory(target.c_str(), true);
+ writer = new IndexWriter( *d ,an, true);
}
- *entrybuf = 0;
- gettext(key->Testament(), start, (size + 2), entrybuf);
- rawFilter(entrybuf, size, key);
+
+ while (!Error()) {
+ Document &doc = *new Document();
+ doc.add( Field::Text(_T("key"), (const char *)*lkey ) );
+ doc.add( Field::Text(_T("content"), StripText()) );
+ writer->addDocument(doc);
+ delete &doc;
- if (!isUnicode())
- preptext(entrybuf);
+ (*this)++;
+ }
- if (this->key != key) // free our key if we created a VerseKey
- delete key;
+ writer->optimize();
+ writer->close();
+ delete writer;
+ delete &an;
- return entrybuf;
-}
+ // reposition module back to where it was before we were called
+ setKey(*savekey);
+ if (!savekey->Persist())
+ delete savekey;
-signed char RawText::createSearchFramework() {
+ if (searchkey)
+ delete searchkey;
+
+
+#else
SWKey *savekey = 0;
SWKey *searchkey = 0;
SWKey textkey;
@@ -132,7 +252,7 @@ signed char RawText::createSearchFramework() {
// dictionary holds words associated with a list
// containing every module position that contains
// the word. [0] Old Testament; [1] NT
- map < string, list<long> > dictionary[2];
+ map < SWBuf, list<long> > dictionary[2];
// save key information so as not to disrupt original
@@ -146,7 +266,7 @@ signed char RawText::createSearchFramework() {
searchkey = (key->Persist())?key->clone():0;
if (searchkey) {
searchkey->Persist(1);
- SetKey(*searchkey);
+ setKey(*searchkey);
}
// position module at the beginning
@@ -164,9 +284,8 @@ signed char RawText::createSearchFramework() {
word = strtok(wordBuf, " !.,?;:()-=+/\\|{}[]\"<>");
while (word) {
- // make work upper case
- for (unsigned int i = 0; i < strlen(word); i++)
- word[i] = SW_toupper(word[i]);
+ // make word upper case
+ toupperstr(word);
// lookup word in dictionary (or make entry in dictionary
// for this word) and add this module position (index) to
@@ -179,7 +298,7 @@ signed char RawText::createSearchFramework() {
}
// reposition module back to where it was before we were called
- SetKey(*savekey);
+ setKey(*savekey);
if (!savekey->Persist())
delete savekey;
@@ -191,12 +310,12 @@ signed char RawText::createSearchFramework() {
// --------- Let's output an index from our dictionary -----------
int datfd;
int idxfd;
- map < string, list<long> >::iterator it;
- list<long>::iterator it2;
+ strlist::iterator it;
+ longlist::iterator it2;
unsigned long offset, entryoff;
unsigned short size;
- string fname;
+ SWBuf fname;
fname = path;
char ch = fname.c_str()[strlen(fname.c_str())-1];
if ((ch != '/') && (ch != '\\'))
@@ -249,12 +368,13 @@ signed char RawText::createSearchFramework() {
close(datfd);
close(idxfd);
}
+#endif
return 0;
}
/******************************************************************************
- * SWModule::Search - Searches a module for a string
+ * SWModule::search - Searches a module for a string
*
* ENT: istr - string for which to search
* searchType - type of search to perform
@@ -268,8 +388,89 @@ signed char RawText::createSearchFramework() {
* RET: listkey set to verses that contain istr
*/
-ListKey &RawText::Search(const char *istr, int searchType, int flags, SWKey *scope, bool *justCheckIfSupported, void (*percent)(char, void *), void *percentUserData)
-{
+ListKey &RawText::search(const char *istr, int searchType, int flags, SWKey *scope, bool *justCheckIfSupported, void (*percent)(char, void *), void *percentUserData) {
+#ifdef USELUCENE
+ listkey.ClearList();
+
+ if ((is) && (ir)) {
+
+ switch (searchType) {
+ case -2: { // let lucene replace multiword for now
+
+
+ // test to see if our scope for this search is bounded by a
+ // VerseKey
+ VerseKey *testKeyType = 0, vk;
+ try {
+ testKeyType = SWDYNAMIC_CAST(VerseKey, ((scope)?scope:key));
+ }
+ catch ( ... ) {}
+ // if we don't have a VerseKey * decendant we can't handle
+ // because of scope.
+ // In the future, add bool SWKey::isValid(const char *tryString);
+ if (!testKeyType)
+ break;
+
+
+ // check if we just want to see if search is supported.
+ // If we've gotten this far, then it is supported.
+ if (justCheckIfSupported) {
+ *justCheckIfSupported = true;
+ return listkey;
+ }
+
+ (*percent)(10, percentUserData);
+
+ standard::StandardAnalyzer analyzer;
+ Query &q = QueryParser::Parse(istr, _T("content"), analyzer);
+ (*percent)(20, percentUserData);
+ Hits &h = is->search(q);
+ (*percent)(80, percentUserData);
+
+
+ // iterate thru each good module position that meets the search
+ for (long i = 0; i < h.Length(); i++) {
+ Document &doc = h.doc(i);
+
+ // set a temporary verse key to this module position
+ vk = doc.get(_T("key"));
+
+ // check scope
+ // Try to set our scope key to this verse key
+ if (scope) {
+ *testKeyType = vk;
+
+ // check to see if it set ok and if so, add to our return list
+ if (*testKeyType == vk)
+ listkey << (const char *) vk;
+ listkey.GetElement()->userData = (void *)(int)(h.score(i)*100);
+ }
+ else {
+ listkey << (const char*) vk;
+ listkey.GetElement()->userData = (void *)(int)(h.score(i)*100);
+ }
+ }
+ (*percent)(98, percentUserData);
+
+ delete &h;
+ delete &q;
+
+ listkey = TOP;
+ (*percent)(100, percentUserData);
+ return listkey;
+ }
+
+ default:
+ break;
+ }
+ }
+
+ // check if we just want to see if search is supported
+ if (justCheckIfSupported) {
+ *justCheckIfSupported = false;
+ return listkey;
+ }
+#else
listkey.ClearList();
if ((fastSearch[0]) && (fastSearch[1])) {
@@ -284,14 +485,10 @@ ListKey &RawText::Search(const char *istr, int searchType, int flags, SWKey *sco
// test to see if our scope for this search is bounded by a
// VerseKey
VerseKey *testKeyType = 0;
-#ifndef _WIN32_WCE
try {
-#endif
testKeyType = SWDYNAMIC_CAST(VerseKey, ((scope)?scope:key));
-#ifndef _WIN32_WCE
}
catch ( ... ) {}
-#endif
// if we don't have a VerseKey * decendant we can't handle
// because of scope.
// In the future, add bool SWKey::isValid(const char *tryString);
@@ -315,7 +512,7 @@ ListKey &RawText::Search(const char *istr, int searchType, int flags, SWKey *sco
long start;
unsigned short size;
char *idxbuf = 0;
- char *datbuf = 0;
+ SWBuf datBuf;
list <long> indexes;
list <long> indexes2;
VerseKey vk;
@@ -325,8 +522,7 @@ ListKey &RawText::Search(const char *istr, int searchType, int flags, SWKey *sco
// toupper our copy of search string
stdstr(&wordBuf, istr);
- for (unsigned int i = 0; i < strlen(wordBuf); i++)
- wordBuf[i] = SW_toupper(wordBuf[i]);
+ toupperstr(wordBuf);
// get list of individual words
words = (char **)calloc(sizeof(char *), 10);
@@ -361,10 +557,10 @@ ListKey &RawText::Search(const char *istr, int searchType, int flags, SWKey *sco
idxbuf = 0;
// find our word in the database and jump ahead _away_
- error = fastSearch[j]->findoffset(words[i], &start, &size, away);
+ error = fastSearch[j]->findOffset(words[i], &start, &size, away);
// get the word from the database
- fastSearch[j]->getidxbufdat(start, &idxbuf);
+ fastSearch[j]->getIDXBufDat(start, &idxbuf);
// check to see if it starts with our target word
if (strlen(idxbuf) > strlen(words[i]))
@@ -373,18 +569,18 @@ ListKey &RawText::Search(const char *istr, int searchType, int flags, SWKey *sco
if (!strcmp(idxbuf, words[i])) {
// get data for this word from database
- free(idxbuf);
- idxbuf = (char *)calloc(size+2, 1);
- datbuf = (char *)calloc(size+2, 1);
- fastSearch[j]->gettext(start, size + 2, idxbuf, datbuf);
+ delete [] idxbuf;
+ idxbuf = 0;
+ datBuf = "";
+ fastSearch[j]->readText(start, &size, &idxbuf, datBuf);
// we know that the data consists of sizof(long)
// records each a valid module position that constains
// this word
//
// iterate thru each of these module positions
- long *keyindex = (long *)datbuf;
- while (keyindex < (long *)(datbuf + size - (strlen(idxbuf) + 1))) {
+ long *keyindex = (long *)datBuf.getRawData();
+ while (keyindex < (long *)(datBuf.getRawData() + size - (strlen(idxbuf) + 1))) {
if (i) { // if we're not on our first word
// check to see if this word is already in the result set.
@@ -396,7 +592,6 @@ ListKey &RawText::Search(const char *istr, int searchType, int flags, SWKey *sco
else indexes2.push_back(*keyindex);
keyindex++;
}
- free(datbuf);
}
else error = 1; // no more matches
free(idxbuf);
@@ -412,7 +607,7 @@ ListKey &RawText::Search(const char *istr, int searchType, int flags, SWKey *sco
indexes.sort();
// iterate thru each good module position that meets the search
- for (list <long>::iterator it = indexes.begin(); it != indexes.end(); it++) {
+ for (longlist::iterator it = indexes.begin(); it != indexes.end(); it++) {
// set a temporary verse key to this module position
vk.Testament(j+1);
@@ -454,85 +649,35 @@ ListKey &RawText::Search(const char *istr, int searchType, int flags, SWKey *sco
return listkey;
}
- // if we don't support this search, fall back to base class
- return SWModule::Search(istr, searchType, flags, scope, justCheckIfSupported, percent, percentUserData);
-}
-
-#ifdef _MSC_VER
-SWModule &RawText::operator =(SW_POSITION p) {
-#else
-RawText &RawText::operator =(SW_POSITION p) {
#endif
- SWModule::operator =(p);
- return *this;
+ // if we don't support this search, fall back to base class
+ return SWModule::search(istr, searchType, flags, scope, justCheckIfSupported, percent, percentUserData);
}
-SWModule &RawText::setentry(const char *inbuf, long len) {
- VerseKey *key = 0;
- // see if we have a VerseKey * or decendant
-#ifndef _WIN32_WCE
- try {
-#endif
- key = SWDYNAMIC_CAST(VerseKey, this->key);
-#ifndef _WIN32_WCE
- }
- catch ( ... ) {}
-#endif
- // if we don't have a VerseKey * decendant, create our own
- if (!key)
- key = new VerseKey(this->key);
-
- settext(key->Testament(), key->Index(), inbuf, len);
-
- if (this->key != key) // free our key if we created a VerseKey
- delete key;
- return *this;
-}
-
-SWModule &RawText::operator <<(const char *inbuf) {
- return setentry(inbuf, 0);
+void RawText::setEntry(const char *inbuf, long len) {
+ VerseKey &key = getVerseKey();
+ doSetText(key.Testament(), key.Index(), inbuf, len);
}
-SWModule &RawText::operator <<(const SWKey *inkey) {
- VerseKey *destkey = 0;
+void RawText::linkEntry(const SWKey *inkey) {
+ VerseKey &destkey = getVerseKey();
const VerseKey *srckey = 0;
- // see if we have a VerseKey * or decendant
-#ifndef _WIN32_WCE
- try {
-#endif
- destkey = SWDYNAMIC_CAST(VerseKey, this->key);
-#ifndef _WIN32_WCE
- }
- catch ( ... ) {}
-#endif
- // if we don't have a VerseKey * decendant, create our own
- if (!destkey)
- destkey = new VerseKey(this->key);
// see if we have a VerseKey * or decendant
-#ifndef _WIN32_WCE
try {
-#endif
srckey = SWDYNAMIC_CAST(VerseKey, inkey);
-#ifndef _WIN32_WCE
}
catch ( ... ) {}
-#endif
// if we don't have a VerseKey * decendant, create our own
if (!srckey)
srckey = new VerseKey(inkey);
- linkentry(destkey->Testament(), destkey->Index(), srckey->Index());
-
- if (this->key != destkey) // free our key if we created a VerseKey
- delete destkey;
+ doLinkEntry(destkey.Testament(), destkey.Index(), srckey->Index());
if (inkey != srckey) // free our key if we created a VerseKey
delete srckey;
-
- return *this;
}
@@ -543,88 +688,49 @@ SWModule &RawText::operator <<(const SWKey *inkey) {
*/
void RawText::deleteEntry() {
-
- VerseKey *key = 0;
-
-#ifndef _WIN32_WCE
- try {
-#endif
- key = SWDYNAMIC_CAST(VerseKey, this->key);
-#ifndef _WIN32_WCE
- }
- catch ( ... ) {}
-#endif
- if (!key)
- key = new VerseKey(this->key);
-
- settext(key->Testament(), key->Index(), "");
-
- if (key != this->key)
- delete key;
+ VerseKey &key = getVerseKey();
+ doSetText(key.Testament(), key.Index(), "");
}
/******************************************************************************
- * RawText::operator += - Increments module key a number of entries
+ * RawText::increment - Increments module key a number of entries
*
* ENT: increment - Number of entries to jump forward
*
* RET: *this
*/
-SWModule &RawText::operator +=(int increment)
-{
+void RawText::increment(int steps) {
long start;
unsigned short size;
- VerseKey *tmpkey = 0;
-
-#ifndef _WIN32_WCE
- try {
-#endif
- tmpkey = SWDYNAMIC_CAST(VerseKey, key);
-#ifndef _WIN32_WCE
- }
- catch ( ... ) {}
-#endif
- if (!tmpkey)
- tmpkey = new VerseKey(key);
+ VerseKey *tmpkey = &getVerseKey();
- findoffset(tmpkey->Testament(), tmpkey->Index(), &start, &size);
+ findOffset(tmpkey->Testament(), tmpkey->Index(), &start, &size);
SWKey lastgood = *tmpkey;
- while (increment) {
+ while (steps) {
long laststart = start;
unsigned short lastsize = size;
SWKey lasttry = *tmpkey;
- (increment > 0) ? (*key)++ : (*key)--;
- if (tmpkey != key)
- delete tmpkey;
- tmpkey = 0;
-#ifndef _WIN32_WCE
- try {
-#endif
- tmpkey = SWDYNAMIC_CAST(VerseKey, key);
-#ifndef _WIN32_WCE
- }
- catch ( ... ) {}
-#endif
- if (!tmpkey)
- tmpkey = new VerseKey(key);
+ (steps > 0) ? (*key)++ : (*key)--;
+ tmpkey = &getVerseKey();
if ((error = key->Error())) {
*key = lastgood;
break;
}
long index = tmpkey->Index();
- findoffset(tmpkey->Testament(), index, &start, &size);
- if ((((laststart != start) || (lastsize != size))||(!skipConsecutiveLinks)) && (start >= 0) && (size)) {
- increment += (increment < 0) ? 1 : -1;
+ findOffset(tmpkey->Testament(), index, &start, &size);
+ if (
+ (((laststart != start) || (lastsize != size)) // we're a different entry
+// && (start > 0)
+ && (size)) // and we actually have a size
+ ||(!skipConsecutiveLinks)) { // or we don't want to skip consecutive links
+ steps += (steps < 0) ? 1 : -1;
lastgood = *tmpkey;
}
}
error = (error) ? KEYERR_OUTOFBOUNDS : 0;
-
- if (tmpkey != key)
- delete tmpkey;
-
- return *this;
}
+
+SWORD_NAMESPACE_END
diff --git a/src/modules/texts/swtext.cpp b/src/modules/texts/swtext.cpp
index 85da8a3..8610dae 100644
--- a/src/modules/texts/swtext.cpp
+++ b/src/modules/texts/swtext.cpp
@@ -5,6 +5,7 @@
#include <swtext.h>
#include <listkey.h>
+SWORD_NAMESPACE_START
/******************************************************************************
* SWText Constructor - Initializes data for instance of SWText
@@ -18,6 +19,7 @@ SWText::SWText(const char *imodname, const char *imoddesc, SWDisplay *idisp, SWT
{
delete key;
key = CreateKey();
+ skipConsecutiveLinks = false;
}
@@ -37,3 +39,43 @@ SWKey *SWText::CreateKey()
{
return new VerseKey();
}
+
+
+long SWText::Index() const {
+ VerseKey *key = 0;
+ try {
+ key = SWDYNAMIC_CAST(VerseKey, this->key);
+ }
+ catch ( ... ) {}
+ if (!key)
+ key = new VerseKey(this->key);
+
+ entryIndex = key->NewIndex();
+
+ if (key != this->key)
+ delete key;
+
+ return entryIndex;
+}
+
+long SWText::Index(long iindex) {
+ VerseKey *key = 0;
+ try {
+ key = SWDYNAMIC_CAST(VerseKey, this->key);
+ }
+ catch ( ... ) {}
+ if (!key)
+ key = new VerseKey(this->key);
+
+ key->Testament(1);
+ key->Index(iindex);
+
+ if (key != this->key) {
+ this->key->copyFrom(*key);
+ delete key;
+ }
+
+ return Index();
+}
+
+SWORD_NAMESPACE_END
diff --git a/src/modules/texts/ztext/ztext.cpp b/src/modules/texts/ztext/ztext.cpp
index 6e243b9..1fe0e7a 100644
--- a/src/modules/texts/ztext/ztext.cpp
+++ b/src/modules/texts/ztext/ztext.cpp
@@ -14,13 +14,19 @@
#include <unistd.h>
#endif
-#include <iostream.h>
-#include <string.h>
#include <utilfuns.h>
-//#include <rawverse.h>
#include <ztext.h>
-//#include <zlib.h>
+#include <regex.h> // GNU
+
+
+#ifdef USELUCENE
+#include <CLucene/CLucene.h>
+using namespace lucene::search;
+using namespace lucene::queryParser;
+#endif
+
+SWORD_NAMESPACE_START
/******************************************************************************
* zText Constructor - Initializes data for instance of zText
@@ -33,10 +39,23 @@
* idisp - Display object to use for displaying
*/
-zText::zText(const char *ipath, const char *iname, const char *idesc, int iblockType, SWCompress *icomp, SWDisplay *idisp, SWTextEncoding enc, SWTextDirection dir, SWTextMarkup mark, const char* ilang) : zVerse(ipath, -1, iblockType, icomp), SWText(iname, idesc, idisp, enc, dir, mark, ilang)/*, SWCompress()*/
-{
+zText::zText(const char *ipath, const char *iname, const char *idesc, int iblockType, SWCompress *icomp, SWDisplay *idisp, SWTextEncoding enc, SWTextDirection dir, SWTextMarkup mark, const char* ilang)
+ : zVerse(ipath, -1, iblockType, icomp), SWText(iname, idesc, idisp, enc, dir, mark, ilang) {
blockType = iblockType;
lastWriteKey = 0;
+#ifdef USELUCENE
+ SWBuf fname;
+ fname = path;
+ ir = 0;
+ is = 0;
+ char ch = fname.c_str()[strlen(fname.c_str())-1];
+ if ((ch != '/') && (ch != '\\'))
+ fname += "/lucene";
+ if (IndexReader::indexExists(fname.c_str())) {
+ ir = &IndexReader::open(fname);
+ is = new IndexSearcher(*ir);
+ }
+#endif
}
@@ -50,6 +69,14 @@ zText::~zText()
if (lastWriteKey)
delete lastWriteKey;
+
+#ifdef USELUCENE
+ if (is)
+ is->close();
+
+ if (ir)
+ delete ir;
+#endif
}
@@ -59,96 +86,23 @@ zText::~zText()
* RET: buffer with verse
*/
-char *zText::getRawEntry()
-{
-/*
- long start;
- unsigned long size;
- unsigned long destsize;
- char *tmpbuf;
- char *dest;
- VerseKey *lkey = (VerseKey *) SWModule::key;
- char sizebuf[3];
-
- lkey->Verse(0);
- if (chapcache != lkey->Index()) {
- findoffset(lkey->Testament(), lkey->Index(), &start, &((unsigned short) size));
- gettext(lkey->Testament(), start, 3, sizebuf);
- memcpy(&size, sizebuf, 2);
- tmpbuf = new char [ size + 1 ];
- gettext(lkey->Testament(), start + 2, size + 1 , tmpbuf);
- //zBuf(&size, tmpbuf);
- dest = new char [ (size*4) + 1 ];
- uncompress((Bytef *)dest, &destsize, (Bytef *) tmpbuf, size);
- chapcache = lkey->Index();
- delete [] tmpbuf;
- }
-
- //findoffset(key->Testament(), key->Index(), &start, &size);
- findoffset(lkey->Testament(), lkey->Index(), &start, &((unsigned short) size));
-
- if (versebuf)
- delete [] versebuf;
- versebuf = new char [ size + 1 ];
- //memcpy(versebuf, Buf(), size);
- memcpy(versebuf, dest, destsize);
- delete [] dest;
-
- preptext(versebuf);
-
- return versebuf;
-*/
-
+SWBuf &zText::getRawEntryBuf() {
long start = 0;
unsigned short size = 0;
- VerseKey *key = 0;
-
- //printf ("zText char *\n");
-
- // see if we have a VerseKey * or decendant
-#ifndef _WIN32_WCE
- try {
-#endif
- key = SWDYNAMIC_CAST(VerseKey, this->key);
-#ifndef _WIN32_WCE
- }
- catch ( ... ) {}
-#endif
- // if we don't have a VerseKey * decendant, create our own
- if (!key)
- key = new VerseKey(this->key);
+ VerseKey &key = getVerseKey();
- //printf ("checking cache\n");
- //printf ("finding offset\n");
- findoffset(key->Testament(), key->Index(), &start, &size);
+ findOffset(key.Testament(), key.Index(), &start, &size);
entrySize = size; // support getEntrySize call
+
+ entryBuf = "";
+ zReadText(key.Testament(), start, size, entryBuf);
- //printf ("deleting previous buffer\n");
- unsigned long newsize = (size + 2) * FILTERPAD;
- if (newsize > entrybufallocsize) {
- if (entrybuf)
- delete [] entrybuf;
- entrybuf = new char [ newsize ];
- entrybufallocsize = newsize;
- }
- *entrybuf = 0;
-
- //printf ("getting text\n");
- swgettext(key->Testament(), start, (size + 2), entrybuf);
- //printf ("got text\n");
-
- rawFilter(entrybuf, size, key);
-
- //printf ("preparing text\n");
- if (!isUnicode())
- preptext(entrybuf);
-
- if (this->key != key) // free our key if we created a VerseKey
- delete key;
+ rawFilter(entryBuf, &key);
- //printf ("returning text\n");
- return entrybuf;
+// if (!isUnicode())
+ prepText(entryBuf);
+ return entryBuf;
}
@@ -171,177 +125,316 @@ bool zText::sameBlock(VerseKey *k1, VerseKey *k2) {
}
-SWModule &zText::setentry(const char *inbuf, long len) {
- VerseKey *key = 0;
- // see if we have a VerseKey * or decendant
-#ifndef _WIN32_WCE
- try {
-#endif
- key = SWDYNAMIC_CAST(VerseKey, this->key);
-#ifndef _WIN32_WCE
- }
- catch ( ... ) {}
-#endif
- // if we don't have a VerseKey * decendant, create our own
- if (!key)
- key = new VerseKey(this->key);
-
+void zText::setEntry(const char *inbuf, long len) {
+ VerseKey &key = getVerseKey();
// see if we've jumped across blocks since last write
if (lastWriteKey) {
- if (!sameBlock(lastWriteKey, key)) {
+ if (!sameBlock(lastWriteKey, &key)) {
flushCache();
}
delete lastWriteKey;
}
- settext(key->Testament(), key->Index(), inbuf, len);
+ doSetText(key.Testament(), key.Index(), inbuf, len);
- lastWriteKey = (VerseKey *)key->clone(); // must delete
-
- if (this->key != key) // free our key if we created a VerseKey
- delete key;
-
- return *this;
-}
-
-SWModule &zText::operator <<(const char *inbuf) {
- return setentry(inbuf, 0);
+ lastWriteKey = (VerseKey *)key.clone(); // must delete
}
-SWModule &zText::operator <<(const SWKey *inkey) {
- VerseKey *destkey = 0;
+void zText::linkEntry(const SWKey *inkey) {
+ VerseKey &destkey = getVerseKey();
const VerseKey *srckey = 0;
- // see if we have a VerseKey * or decendant
-#ifndef _WIN32_WCE
- try {
-#endif
- destkey = SWDYNAMIC_CAST(VerseKey, this->key);
-#ifndef _WIN32_WCE
- }
- catch ( ... ) {}
-#endif
- // if we don't have a VerseKey * decendant, create our own
- if (!destkey)
- destkey = new VerseKey(this->key);
// see if we have a VerseKey * or decendant
-#ifndef _WIN32_WCE
try {
-#endif
srckey = (const VerseKey *) SWDYNAMIC_CAST(VerseKey, inkey);
-#ifndef _WIN32_WCE
}
catch ( ... ) {
}
-#endif
// if we don't have a VerseKey * decendant, create our own
if (!srckey)
srckey = new VerseKey(inkey);
- linkentry(destkey->Testament(), destkey->Index(), srckey->Index());
-
- if (this->key != destkey) // free our key if we created a VerseKey
- delete destkey;
+ doLinkEntry(destkey.Testament(), destkey.Index(), srckey->Index());
if (inkey != srckey) // free our key if we created a VerseKey
delete srckey;
-
- return *this;
}
/******************************************************************************
* zFiles::deleteEntry - deletes this entry
*
- * RET: *this
*/
void zText::deleteEntry() {
- VerseKey *key = 0;
+ VerseKey &key = getVerseKey();
-#ifndef _WIN32_WCE
- try {
-#endif
- key = SWDYNAMIC_CAST(VerseKey, this->key);
-#ifndef _WIN32_WCE
- }
- catch ( ... ) {}
-#endif
- if (!key)
- key = new VerseKey(this->key);
-
- settext(key->Testament(), key->Index(), "");
-
- if (key != this->key)
- delete key;
+ doSetText(key.Testament(), key.Index(), "");
}
/******************************************************************************
- * zText::operator += - Increments module key a number of entries
+ * zText::increment - Increments module key a number of entries
*
* ENT: increment - Number of entries to jump forward
*
- * RET: *this
*/
-SWModule &zText::operator +=(int increment)
-{
+void zText::increment(int steps) {
long start;
unsigned short size;
- VerseKey *tmpkey = 0;
-
-#ifndef _WIN32_WCE
- try {
-#endif
- tmpkey = SWDYNAMIC_CAST(VerseKey, key);
-#ifndef _WIN32_WCE
- }
- catch ( ... ) {}
-#endif
- if (!tmpkey)
- tmpkey = new VerseKey(key);
+ VerseKey *tmpkey = &getVerseKey();
- findoffset(tmpkey->Testament(), tmpkey->Index(), &start, &size);
+ findOffset(tmpkey->Testament(), tmpkey->Index(), &start, &size);
SWKey lastgood = *tmpkey;
- while (increment) {
+ while (steps) {
long laststart = start;
unsigned short lastsize = size;
SWKey lasttry = *tmpkey;
- (increment > 0) ? (*key)++ : (*key)--;
- if (tmpkey != key)
- delete tmpkey;
- tmpkey = 0;
-#ifndef _WIN32_WCE
- try {
-#endif
- tmpkey = SWDYNAMIC_CAST(VerseKey, key);
-#ifndef _WIN32_WCE
- }
- catch ( ... ) {}
-#endif
- if (!tmpkey)
- tmpkey = new VerseKey(key);
+ (steps > 0) ? (*key)++ : (*key)--;
+ tmpkey = &getVerseKey();
if ((error = key->Error())) {
*key = lastgood;
break;
}
long index = tmpkey->Index();
- findoffset(tmpkey->Testament(), index, &start, &size);
- if ((((laststart != start) || (lastsize != size))||(!skipConsecutiveLinks)) && (start >= 0) && (size)) {
- increment += (increment < 0) ? 1 : -1;
+ findOffset(tmpkey->Testament(), index, &start, &size);
+
+ if (
+ (((laststart != start) || (lastsize != size)) // we're a different entry
+// && (start > 0)
+ && (size)) // and we actually have a size
+ ||(!skipConsecutiveLinks)) { // or we don't want to skip consecutive links
+ steps += (steps < 0) ? 1 : -1;
lastgood = *tmpkey;
}
}
error = (error) ? KEYERR_OUTOFBOUNDS : 0;
+}
- if (tmpkey != key)
- delete tmpkey;
- return *this;
+VerseKey &zText::getVerseKey() {
+ static VerseKey tmpVK;
+ VerseKey *key;
+ // see if we have a VerseKey * or decendant
+ try {
+ key = SWDYNAMIC_CAST(VerseKey, this->key);
+ }
+ catch ( ... ) { }
+ if (!key) {
+ ListKey *lkTest = 0;
+ try {
+ lkTest = SWDYNAMIC_CAST(ListKey, this->key);
+ }
+ catch ( ... ) { }
+ if (lkTest) {
+ try {
+ key = SWDYNAMIC_CAST(VerseKey, lkTest->GetElement());
+ }
+ catch ( ... ) { }
+ }
+ }
+ if (!key) {
+ tmpVK = *(this->key);
+ return tmpVK;
+ }
+ else return *key;
}
+
+
+#ifndef O_BINARY
+#define O_BINARY 0
+#endif
+
+signed char zText::createSearchFramework() {
+#ifdef USELUCENE
+ SWKey *savekey = 0;
+ SWKey *searchkey = 0;
+ SWKey textkey;
+ char *word = 0;
+ char *wordBuf = 0;
+
+
+ // save key information so as not to disrupt original
+ // module position
+ if (!key->Persist()) {
+ savekey = CreateKey();
+ *savekey = *key;
+ }
+ else savekey = key;
+
+ searchkey = (key->Persist())?key->clone():0;
+ if (searchkey) {
+ searchkey->Persist(1);
+ setKey(*searchkey);
+ }
+
+ // position module at the beginning
+ *this = TOP;
+
+ VerseKey *lkey = (VerseKey *)key;
+
+ // iterate thru each entry in module
+
+ IndexWriter* writer = NULL;
+ Directory* d = NULL;
+
+ lucene::analysis::SimpleAnalyzer& an = *new lucene::analysis::SimpleAnalyzer();
+ SWBuf target = path;
+ char ch = target.c_str()[strlen(target.c_str())-1];
+ if ((ch != '/') && (ch != '\\'))
+ target += "/lucene";
+
+ if (IndexReader::indexExists(target.c_str())) {
+ d = &FSDirectory::getDirectory(target.c_str(), false);
+ if (IndexReader::isLocked(*d)) {
+ IndexReader::unlock(*d);
+ }
+
+ writer = new IndexWriter(*d, an, false);
+ } else {
+ d = &FSDirectory::getDirectory(target.c_str(), true);
+ writer = new IndexWriter( *d ,an, true);
+ }
+
+
+
+ while (!Error()) {
+ Document &doc = *new Document();
+ doc.add( Field::Text(_T("key"), (const char *)*lkey ) );
+ doc.add( Field::Text(_T("content"), StripText()) );
+ writer->addDocument(doc);
+ delete &doc;
+
+ (*this)++;
+ }
+
+ writer->optimize();
+ writer->close();
+ delete writer;
+ delete &an;
+
+ // reposition module back to where it was before we were called
+ setKey(*savekey);
+
+ if (!savekey->Persist())
+ delete savekey;
+
+ if (searchkey)
+ delete searchkey;
+
+
+#endif
+ return 0;
+}
+
+
+/******************************************************************************
+ * SWModule::Search - Searches a module for a string
+ *
+ * ENT: istr - string for which to search
+ * searchType - type of search to perform
+ * >=0 - regex
+ * -1 - phrase
+ * -2 - multiword
+ * flags - options flags for search
+ * justCheckIfSupported - if set, don't search, only tell if this
+ * function supports requested search.
+ *
+ * RET: listkey set to verses that contain istr
+ */
+
+ListKey &zText::search(const char *istr, int searchType, int flags, SWKey *scope, bool *justCheckIfSupported, void (*percent)(char, void *), void *percentUserData) {
+#ifdef USELUCENE
+ listkey.ClearList();
+
+ if ((is) && (ir)) {
+
+ switch (searchType) {
+ case -2: { // let lucene replace multiword for now
+
+
+ // test to see if our scope for this search is bounded by a
+ // VerseKey
+ VerseKey *testKeyType = 0, vk;
+ try {
+ testKeyType = SWDYNAMIC_CAST(VerseKey, ((scope)?scope:key));
+ }
+ catch ( ... ) {}
+ // if we don't have a VerseKey * decendant we can't handle
+ // because of scope.
+ // In the future, add bool SWKey::isValid(const char *tryString);
+ if (!testKeyType)
+ break;
+
+
+ // check if we just want to see if search is supported.
+ // If we've gotten this far, then it is supported.
+ if (justCheckIfSupported) {
+ *justCheckIfSupported = true;
+ return listkey;
+ }
+
+ (*percent)(10, percentUserData);
+
+ standard::StandardAnalyzer analyzer;
+ Query &q = QueryParser::Parse(istr, _T("content"), analyzer);
+ (*percent)(20, percentUserData);
+ Hits &h = is->search(q);
+ (*percent)(80, percentUserData);
+
+
+ // iterate thru each good module position that meets the search
+ for (long i = 0; i < h.Length(); i++) {
+ Document &doc = h.doc(i);
+
+ // set a temporary verse key to this module position
+ vk = doc.get(_T("key"));
+
+ // check scope
+ // Try to set our scope key to this verse key
+ if (scope) {
+ *testKeyType = vk;
+
+ // check to see if it set ok and if so, add to our return list
+ if (*testKeyType == vk)
+ listkey << (const char *) vk;
+ listkey.GetElement()->userData = (void *)(int)(h.score(i)*100);
+ }
+ else {
+ listkey << (const char*) vk;
+ listkey.GetElement()->userData = (void *)(int)(h.score(i)*100);
+ }
+ }
+ (*percent)(98, percentUserData);
+
+ delete &h;
+ delete &q;
+
+ listkey = TOP;
+ (*percent)(100, percentUserData);
+ return listkey;
+ }
+
+ default:
+ break;
+ }
+ }
+
+ // check if we just want to see if search is supported
+ if (justCheckIfSupported) {
+ *justCheckIfSupported = false;
+ return listkey;
+ }
+#endif
+ // if we don't support this search, fall back to base class
+ return SWModule::search(istr, searchType, flags, scope, justCheckIfSupported, percent, percentUserData);
+}
+
+
+SWORD_NAMESPACE_END