summaryrefslogtreecommitdiff
path: root/src/modules/swmodule.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/modules/swmodule.cpp')
-rw-r--r--src/modules/swmodule.cpp1285
1 files changed, 1285 insertions, 0 deletions
diff --git a/src/modules/swmodule.cpp b/src/modules/swmodule.cpp
new file mode 100644
index 0000000..8461953
--- /dev/null
+++ b/src/modules/swmodule.cpp
@@ -0,0 +1,1285 @@
+/******************************************************************************
+ * swmodule.cpp -code for base class 'module'. Module is the basis for all
+ * types of modules (e.g. texts, commentaries, maps, lexicons,
+ * etc.)
+ */
+
+#include <vector>
+
+#include <sysdata.h>
+#include <swmodule.h>
+#include <utilstr.h>
+#include <regex.h> // GNU
+#include <swfilter.h>
+#include <versekey.h> // KLUDGE for Search
+#include <treekeyidx.h> // KLUDGE for Search
+#include <swoptfilter.h>
+#include <filemgr.h>
+#include <stringmgr.h>
+#ifndef _MSC_VER
+#include <iostream>
+#endif
+
+#ifdef USELUCENE
+#include <CLucene.h>
+#include <CLucene/CLBackwards.h>
+
+//Lucence includes
+//#include "CLucene.h"
+//#include "CLucene/util/Reader.h"
+//#include "CLucene/util/Misc.h"
+//#include "CLucene/util/dirent.h"
+
+using namespace lucene::index;
+using namespace lucene::analysis;
+using namespace lucene::util;
+using namespace lucene::store;
+using namespace lucene::document;
+using namespace lucene::queryParser;
+using namespace lucene::search;
+#endif
+
+using std::vector;
+
+SWORD_NAMESPACE_START
+
+SWDisplay SWModule::rawdisp;
+
+typedef std::list<SWBuf> StringList;
+
+/******************************************************************************
+ * SWModule Constructor - Initializes data for instance of SWModule
+ *
+ * ENT: imodname - Internal name for module
+ * imoddesc - Name to display to user for module
+ * idisp - Display object to use for displaying
+ * imodtype - Type of Module (All modules will be displayed with
+ * others of same type under their modtype heading
+ * unicode - if this module is unicode
+ */
+
+SWModule::SWModule(const char *imodname, const char *imoddesc, SWDisplay *idisp, char *imodtype, SWTextEncoding encoding, SWTextDirection direction, SWTextMarkup markup, const char* imodlang) {
+ key = CreateKey();
+ entryBuf = "";
+ config = &ownConfig;
+ modname = 0;
+ error = 0;
+ moddesc = 0;
+ modtype = 0;
+ modlang = 0;
+ this->encoding = encoding;
+ this->direction = direction;
+ this->markup = markup;
+ entrySize= -1;
+ disp = (idisp) ? idisp : &rawdisp;
+ stdstr(&modname, imodname);
+ stdstr(&moddesc, imoddesc);
+ stdstr(&modtype, imodtype);
+ stdstr(&modlang, imodlang);
+ stripFilters = new FilterList();
+ rawFilters = new FilterList();
+ renderFilters = new FilterList();
+ optionFilters = new OptionFilterList();
+ encodingFilters = new FilterList();
+ skipConsecutiveLinks = true;
+ procEntAttr = true;
+}
+
+
+/******************************************************************************
+ * SWModule Destructor - Cleans up instance of SWModule
+ */
+
+SWModule::~SWModule()
+{
+ if (modname)
+ delete [] modname;
+ if (moddesc)
+ delete [] moddesc;
+ if (modtype)
+ delete [] modtype;
+ if (modlang)
+ delete [] modlang;
+
+ if (key) {
+ if (!key->Persist())
+ delete key;
+ }
+
+ stripFilters->clear();
+ rawFilters->clear();
+ renderFilters->clear();
+ optionFilters->clear();
+ encodingFilters->clear();
+ entryAttributes.clear();
+
+ delete stripFilters;
+ delete rawFilters;
+ delete renderFilters;
+ delete optionFilters;
+ delete encodingFilters;
+}
+
+
+/******************************************************************************
+ * SWModule::CreateKey - Allocates a key of specific type for module
+ *
+ * RET: pointer to allocated key
+ */
+
+SWKey *SWModule::CreateKey()
+{
+ return new SWKey();
+}
+
+
+/******************************************************************************
+ * SWModule::Error - Gets and clears error status
+ *
+ * RET: error status
+ */
+
+char SWModule::Error()
+{
+ char retval = error;
+
+ error = 0;
+ return retval;
+}
+
+
+/******************************************************************************
+ * SWModule::Name - Sets/gets module name
+ *
+ * ENT: imodname - value which to set modname
+ * [0] - only get
+ *
+ * RET: pointer to modname
+ */
+
+char *SWModule::Name(const char *imodname) {
+ return stdstr(&modname, imodname);
+}
+
+char *SWModule::Name() const {
+ return modname;
+}
+
+
+/******************************************************************************
+ * SWModule::Description - Sets/gets module description
+ *
+ * ENT: imoddesc - value which to set moddesc
+ * [0] - only get
+ *
+ * RET: pointer to moddesc
+ */
+
+char *SWModule::Description(const char *imoddesc) {
+ return stdstr(&moddesc, imoddesc);
+}
+
+char *SWModule::Description() const {
+ return moddesc;
+}
+
+
+/******************************************************************************
+ * SWModule::Type - Sets/gets module type
+ *
+ * ENT: imodtype - value which to set modtype
+ * [0] - only get
+ *
+ * RET: pointer to modtype
+ */
+
+char *SWModule::Type(const char *imodtype) {
+ return stdstr(&modtype, imodtype);
+}
+
+char *SWModule::Type() const {
+ return modtype;
+}
+
+/******************************************************************************
+ * SWModule::Direction - Sets/gets module direction
+ *
+ * ENT: newdir - value which to set direction
+ * [-1] - only get
+ *
+ * RET: char direction
+ */
+char SWModule::Direction(signed char newdir) {
+ if (newdir != -1)
+ direction = newdir;
+ return direction;
+}
+
+/******************************************************************************
+ * SWModule::Encoding - Sets/gets module encoding
+ *
+ * ENT: newdir - value which to set direction
+ * [-1] - only get
+ *
+ * RET: char encoding
+ */
+char SWModule::Encoding(signed char newenc) {
+ if (newenc != -1)
+ encoding = newenc;
+ return encoding;
+}
+
+/******************************************************************************
+ * SWModule::Markup - Sets/gets module markup
+ *
+ * ENT: newdir - value which to set direction
+ * [-1] - only get
+ *
+ * RET: char markup
+ */
+char SWModule::Markup(signed char newmark) {
+ if (newmark != -1)
+ markup = newmark;
+ return markup;
+}
+
+
+/******************************************************************************
+ * SWModule::Lang - Sets/gets module language
+ *
+ * ENT: imodlang - value which to set modlang
+ * [0] - only get
+ *
+ * RET: pointer to modname
+ */
+
+char *SWModule::Lang(const char *imodlang)
+{
+ return stdstr(&modlang, imodlang);
+}
+
+
+/******************************************************************************
+ * SWModule::Disp - Sets/gets display driver
+ *
+ * ENT: idisp - value which to set disp
+ * [0] - only get
+ *
+ * RET: pointer to disp
+ */
+
+SWDisplay *SWModule::getDisplay() const {
+ return disp;
+}
+
+void SWModule::setDisplay(SWDisplay *idisp) {
+ disp = idisp;
+}
+
+
+/******************************************************************************
+ * SWModule::Display - Calls this modules display object and passes itself
+ *
+ * RET: error status
+ */
+
+char SWModule::Display() {
+ disp->Display(*this);
+ return 0;
+}
+
+
+/******************************************************************************
+ * SWModule::getKey - Gets the key from this module that points to the position
+ * record
+ *
+ * RET: key object
+ */
+
+SWKey *SWModule::getKey() const {
+ return key;
+}
+
+
+/******************************************************************************
+ * SWModule::setKey - Sets a key to this module for position to a particular
+ * record
+ *
+ * ENT: ikey - key with which to set this module
+ *
+ * RET: error status
+ */
+
+char SWModule::setKey(const SWKey *ikey) {
+ SWKey *oldKey = 0;
+
+ if (key) {
+ if (!key->Persist()) // if we have our own copy
+ oldKey = key;
+ }
+
+ if (!ikey->Persist()) { // if we are to keep our own copy
+ key = CreateKey();
+ *key = *ikey;
+ }
+ else key = (SWKey *)ikey; // if we are to just point to an external key
+
+ if (oldKey)
+ delete oldKey;
+
+ return 0;
+}
+
+
+/******************************************************************************
+ * SWModule::setPosition(SW_POSITION) - Positions this modules to an entry
+ *
+ * ENT: p - position (e.g. TOP, BOTTOM)
+ *
+ * RET: *this
+ */
+
+void SWModule::setPosition(SW_POSITION p) {
+ *key = p;
+ char saveError = key->Error();
+
+ switch (p) {
+ case POS_TOP:
+ (*this)++;
+ (*this)--;
+ break;
+
+ case POS_BOTTOM:
+ (*this)--;
+ (*this)++;
+ break;
+ }
+
+ error = saveError;
+}
+
+
+/******************************************************************************
+ * SWModule::increment - Increments module key a number of entries
+ *
+ * ENT: increment - Number of entries to jump forward
+ *
+ * RET: *this
+ */
+
+void SWModule::increment(int steps) {
+ (*key) += steps;
+ error = key->Error();
+}
+
+
+/******************************************************************************
+ * SWModule::decrement - Decrements module key a number of entries
+ *
+ * ENT: decrement - Number of entries to jump backward
+ *
+ * RET: *this
+ */
+
+void SWModule::decrement(int steps) {
+ (*key) -= steps;
+ error = key->Error();
+}
+
+
+/******************************************************************************
+ * SWModule::Search - Searches a module for a string
+ *
+ * ENT: istr - string for which to search
+ * searchType - type of search to perform
+ * >=0 - regex
+ * -1 - phrase
+ * -2 - multiword
+ * -3 - entryAttrib (eg. Word//Lemma/G1234/)
+ * -4 - clucene
+ * flags - options flags for search
+ * justCheckIfSupported - if set, don't search, only tell if this
+ * function supports requested search.
+ *
+ * RET: ListKey set to verses that contain istr
+ */
+
+ListKey &SWModule::search(const char *istr, int searchType, int flags, SWKey *scope, bool *justCheckIfSupported, void (*percent)(char, void *), void *percentUserData) {
+
+ listKey.ClearList();
+ SWBuf term = istr;
+
+#ifdef USELUCENE
+ SWBuf target = getConfigEntry("AbsoluteDataPath");
+ char ch = target.c_str()[strlen(target.c_str())-1];
+ if ((ch != '/') && (ch != '\\'))
+ target.append('/');
+ target.append("lucene");
+#endif
+ if (justCheckIfSupported) {
+ *justCheckIfSupported = (searchType >= -3);
+#ifdef USELUCENE
+ if ((searchType == -4) && (IndexReader::indexExists(target.c_str()))) {
+ *justCheckIfSupported = true;
+ }
+#endif
+ return listKey;
+ }
+
+ SWKey *saveKey = 0;
+ SWKey *searchKey = 0;
+ SWKey *resultKey = CreateKey();
+ regex_t preg;
+ vector<SWBuf> words;
+ const char *sres;
+ terminateSearch = false;
+ char perc = 1;
+ bool savePEA = isProcessEntryAttributes();
+ // determine if we might be doing special strip searches. useful for knowing if we can use shortcuts
+ bool specialStrips = (getConfigEntry("LocalStripFilter") || strchr(istr, '<'));
+
+ processEntryAttributes(searchType == -3);
+
+
+ if (!key->Persist()) {
+ saveKey = CreateKey();
+ *saveKey = *key;
+ }
+ else saveKey = key;
+
+ searchKey = (scope)?scope->clone():(key->Persist())?key->clone():0;
+ if (searchKey) {
+ searchKey->Persist(1);
+ setKey(*searchKey);
+ }
+
+ (*percent)(perc, percentUserData);
+ // MAJOR KLUDGE: VerseKey::Index still return index within testament.
+ // VerseKey::NewIndex should be moved to Index and Index should be some
+ // VerseKey specific name
+ VerseKey *vkcheck = 0;
+ SWTRY {
+ vkcheck = SWDYNAMIC_CAST(VerseKey, key);
+ }
+ SWCATCH (...) {}
+ // end MAJOR KLUDGE
+
+ *this = BOTTOM;
+ // fix below when we find out the bug
+ long highIndex = (vkcheck)?32300/*vkcheck->NewIndex()*/:key->Index();
+ if (!highIndex)
+ highIndex = 1; // avoid division by zero errors.
+ *this = TOP;
+ if (searchType >= 0) {
+ flags |=searchType|REG_NOSUB|REG_EXTENDED;
+ regcomp(&preg, istr, flags);
+ }
+
+ (*percent)(++perc, percentUserData);
+
+
+#ifdef USELUCENE
+ if (searchType == -4) { // lucene
+ //Buffers for the wchar<->utf8 char* conversion
+ const unsigned short int MAX_CONV_SIZE = 2047;
+ wchar_t wcharBuffer[MAX_CONV_SIZE + 1];
+ char utfBuffer[MAX_CONV_SIZE + 1];
+
+ lucene::index::IndexReader *ir = 0;
+ lucene::search::IndexSearcher *is = 0;
+ Query *q = 0;
+ Hits *h = 0;
+ SWTRY {
+ ir = IndexReader::open(target);
+ is = new IndexSearcher(ir);
+ (*percent)(10, percentUserData);
+
+ standard::StandardAnalyzer analyzer;
+ lucene_utf8towcs(wcharBuffer, istr, MAX_CONV_SIZE); //TODO Is istr always utf8?
+ q = QueryParser::parse(wcharBuffer, _T("content"), &analyzer);
+ (*percent)(20, percentUserData);
+ h = is->search(q);
+ (*percent)(80, percentUserData);
+
+ // iterate thru each good module position that meets the search
+ for (long i = 0; i < h->length(); i++) {
+ Document &doc = h->doc(i);
+
+ // set a temporary verse key to this module position
+ lucene_wcstoutf8(utfBuffer, doc.get(_T("key")), MAX_CONV_SIZE);
+ *resultKey = utfBuffer; //TODO Does a key always accept utf8?
+
+ // check to see if it sets ok (in our range?) and if so, add to our return list
+ *getKey() = *resultKey;
+ if (*getKey() == *resultKey) {
+ listKey << *resultKey;
+ listKey.GetElement()->userData = (void *)((__u32)(h->score(i)*100));
+ }
+ }
+ (*percent)(98, percentUserData);
+ }
+ SWCATCH (...) {
+ q = 0;
+ // invalid clucene query
+ }
+ delete h;
+ delete q;
+
+ delete is;
+ if (ir) {
+ ir->close();
+ }
+ }
+#endif
+
+ // some pre-loop processing
+ switch (searchType) {
+
+ // phrase
+ case -1:
+ // let's see if we're told to ignore case. If so, then we'll touppstr our term
+ if ((flags & REG_ICASE) == REG_ICASE) toupperstr(term);
+ break;
+
+ // multi-word
+ case -2:
+ // let's break the term down into our words vector
+ while (1) {
+ const char *word = term.stripPrefix(' ');
+ if (!word) {
+ words.push_back(term);
+ break;
+ }
+ words.push_back(word);
+ }
+ if ((flags & REG_ICASE) == REG_ICASE) {
+ for (unsigned int i = 0; i < words.size(); i++) {
+ toupperstr(words[i]);
+ }
+ }
+ break;
+
+ // entry attributes
+ case -3:
+ // let's break the attribute segs down. We'll reuse our words vector for each segment
+ while (1) {
+ const char *word = term.stripPrefix('/');
+ if (!word) {
+ words.push_back(term);
+ break;
+ }
+ words.push_back(word);
+ }
+ break;
+ }
+
+
+ // our main loop to iterate the module and find the stuff
+ perc = 5;
+ (*percent)(perc, percentUserData);
+
+
+ while ((searchType > -4) && !Error() && !terminateSearch) {
+ long mindex = 0;
+ if (vkcheck)
+ mindex = vkcheck->NewIndex();
+ else mindex = key->Index();
+ float per = (float)mindex / highIndex;
+ per *= 93;
+ per += 5;
+ char newperc = (char)per;
+// char newperc = (char)(5+(93*(((float)((vkcheck)?vkcheck->NewIndex():key->Index()))/highIndex)));
+ if (newperc > perc) {
+ perc = newperc;
+ (*percent)(perc, percentUserData);
+ }
+ else if (newperc < perc) {
+#ifndef _MSC_VER
+ std::cerr << "Serious error: new percentage complete is less than previous value\n";
+ std::cerr << "using vk? " << ((vkcheck)?"yes":"no") << "\n";
+ std::cerr << "index: " << ((vkcheck)?vkcheck->NewIndex():key->Index()) << "\n";
+ std::cerr << "highIndex: " << highIndex << "\n";
+ std::cerr << "newperc ==" << (int)newperc << "%" << "is smaller than\n";
+ std::cerr << "perc == " << (int )perc << "% \n";
+#endif
+ }
+ if (searchType >= 0) {
+ if (!regexec(&preg, StripText(), 0, 0, 0)) {
+ *resultKey = *getKey();
+ resultKey->clearBound();
+ listKey << *resultKey;
+ }
+ }
+
+ // phrase
+ else {
+ SWBuf textBuf;
+ switch (searchType) {
+
+ // phrase
+ case -1:
+ textBuf = StripText();
+ if ((flags & REG_ICASE) == REG_ICASE) toupperstr(textBuf);
+ sres = strstr(textBuf.c_str(), term.c_str());
+ if (sres) { //it's also in the StripText(), so we have a valid search result item now
+ *resultKey = *getKey();
+ resultKey->clearBound();
+ listKey << *resultKey;
+ }
+ break;
+
+ // multiword
+ case -2: { // enclose our allocations
+ int loopCount = 0;
+ unsigned int foundWords = 0;
+ do {
+ textBuf = ((loopCount == 0)&&(!specialStrips)) ? getRawEntry() : StripText();
+ foundWords = 0;
+
+ for (unsigned int i = 0; i < words.size(); i++) {
+ if ((flags & REG_ICASE) == REG_ICASE) toupperstr(textBuf);
+ sres = strstr(textBuf.c_str(), words[i].c_str());
+ if (!sres) {
+ break; //for loop
+ }
+ foundWords++;
+ }
+
+ loopCount++;
+ } while ( (loopCount < 2) && (foundWords == words.size()));
+
+ if ((loopCount == 2) && (foundWords == words.size())) { //we found the right words in both raw and stripped text, which means it's a valid result item
+ *resultKey = *getKey();
+ resultKey->clearBound();
+ listKey << *resultKey;
+ }
+ } break;
+
+ // entry attributes
+ case -3:
+ RenderText(); // force parse
+ AttributeTypeList &entryAttribs = getEntryAttributes();
+ AttributeTypeList::iterator i1Start, i1End;
+ AttributeList::iterator i2Start, i2End;
+ AttributeValue::iterator i3Start, i3End;
+
+ if ((words.size()) && (words[0].length())) {
+ i1Start = entryAttribs.find(words[0]);
+ i1End = i1Start;
+ if (i1End != entryAttribs.end())
+ i1End++;
+ }
+ else {
+ i1Start = entryAttribs.begin();
+ i1End = entryAttribs.end();
+ }
+ for (;i1Start != i1End; i1Start++) {
+ if ((words.size()>1) && (words[1].length())) {
+ i2Start = i1Start->second.find(words[1]);
+ i2End = i2Start;
+ if (i2End != i1Start->second.end())
+ i2End++;
+ }
+ else {
+ i2Start = i1Start->second.begin();
+ i2End = i1Start->second.end();
+ }
+ for (;i2Start != i2End; i2Start++) {
+ if ((words.size()>2) && (words[2].length())) {
+ i3Start = i2Start->second.find(words[2]);
+ i3End = i3Start;
+ if (i3End != i2Start->second.end())
+ i3End++;
+ }
+ else {
+ i3Start = i2Start->second.begin();
+ i3End = i2Start->second.end();
+ }
+ for (;i3Start != i3End; i3Start++) {
+ if ((words.size()>3) && (words[3].length())) {
+ if (flags & SEARCHFLAG_MATCHWHOLEENTRY) {
+ bool found = !(((flags & REG_ICASE) == REG_ICASE) ? sword::stricmp(i3Start->second.c_str(), words[3]) : strcmp(i3Start->second.c_str(), words[3]));
+ sres = (found) ? i3Start->second.c_str() : 0;
+ }
+ else {
+ sres = ((flags & REG_ICASE) == REG_ICASE) ? stristr(i3Start->second.c_str(), words[3]) : strstr(i3Start->second.c_str(), words[3]);
+ }
+ if (sres) {
+ *resultKey = *getKey();
+ resultKey->clearBound();
+ listKey << *resultKey;
+ break;
+ }
+ }
+ }
+ if (i3Start != i3End)
+ break;
+ }
+ if (i2Start != i2End)
+ break;
+ }
+ break;
+ } // end switch
+ }
+ (*this)++;
+ }
+
+
+ // cleaup work
+ if (searchType >= 0)
+ regfree(&preg);
+
+ setKey(*saveKey);
+
+ if (!saveKey->Persist())
+ delete saveKey;
+
+ if (searchKey)
+ delete searchKey;
+ delete resultKey;
+
+ listKey = TOP;
+ processEntryAttributes(savePEA);
+
+
+ (*percent)(100, percentUserData);
+
+
+ return listKey;
+}
+
+
+/******************************************************************************
+ * SWModule::StripText() - calls all stripfilters on current text
+ *
+ * ENT: buf - buf to massage instead of this modules current text
+ * len - max len of buf
+ *
+ * RET: this module's text at current key location massaged by Strip filters
+ */
+
+const char *SWModule::StripText(const char *buf, int len) {
+ return RenderText(buf, len, false);
+}
+
+
+/******************************************************************************
+ * SWModule::RenderText - calls all renderfilters on current text
+ *
+ * ENT: buf - buffer to Render instead of current module position
+ *
+ * RET: this module's text at current key location massaged by RenderText filters
+ */
+
+ const char *SWModule::RenderText(const char *buf, int len, bool render) {
+ entryAttributes.clear();
+
+ static SWBuf local;
+ if (buf)
+ local = buf;
+
+ SWBuf &tmpbuf = (buf) ? local : getRawEntryBuf();
+ SWKey *key = 0;
+ static const char *null = "";
+
+ if (tmpbuf) {
+ unsigned long size = (len < 0) ? ((getEntrySize()<0) ? strlen(tmpbuf) : getEntrySize()) : len;
+ if (size > 0) {
+ key = (SWKey *)*this;
+
+ optionFilter(tmpbuf, key);
+
+ if (render) {
+ renderFilter(tmpbuf, key);
+ encodingFilter(tmpbuf, key);
+ }
+ else stripFilter(tmpbuf, key);
+ }
+ }
+ else {
+ tmpbuf = null;
+ }
+
+ return tmpbuf;
+}
+
+
+/******************************************************************************
+ * SWModule::RenderText - calls all renderfilters on current text
+ *
+ * ENT: tmpKey - key to use to grab text
+ *
+ * RET: this module's text at current key location massaged by RenderFilers
+ */
+
+ const char *SWModule::RenderText(SWKey *tmpKey) {
+ SWKey *saveKey;
+ const char *retVal;
+
+ if (!key->Persist()) {
+ saveKey = CreateKey();
+ *saveKey = *key;
+ }
+ else saveKey = key;
+
+ setKey(*tmpKey);
+
+ retVal = RenderText();
+
+ setKey(*saveKey);
+
+ if (!saveKey->Persist())
+ delete saveKey;
+
+ return retVal;
+}
+
+
+/******************************************************************************
+ * SWModule::StripText - calls all StripTextFilters on current text
+ *
+ * ENT: tmpKey - key to use to grab text
+ *
+ * RET: this module's text at specified key location massaged by Strip filters
+ */
+
+const char *SWModule::StripText(SWKey *tmpKey) {
+ SWKey *saveKey;
+ const char *retVal;
+
+ if (!key->Persist()) {
+ saveKey = CreateKey();
+ *saveKey = *key;
+ }
+ else saveKey = key;
+
+ setKey(*tmpKey);
+
+ retVal = StripText();
+
+ setKey(*saveKey);
+
+ if (!saveKey->Persist())
+ delete saveKey;
+
+ return retVal;
+}
+
+
+const char *SWModule::getConfigEntry(const char *key) const {
+ ConfigEntMap::iterator it = config->find(key);
+ return (it != config->end()) ? it->second.c_str() : 0;
+}
+
+
+void SWModule::setConfig(ConfigEntMap *config) {
+ this->config = config;
+}
+
+
+bool SWModule::hasSearchFramework() {
+#ifdef USELUCENE
+ return true;
+#else
+ return SWSearchable::hasSearchFramework();
+#endif
+}
+
+void SWModule::deleteSearchFramework() {
+#ifdef USELUCENE
+ SWBuf target = getConfigEntry("AbsoluteDataPath");
+ char ch = target.c_str()[strlen(target.c_str())-1];
+ if ((ch != '/') && (ch != '\\'))
+ target.append('/');
+ target.append("lucene");
+
+ FileMgr::removeDir(target.c_str());
+#else
+ SWSearchable::deleteSearchFramework();
+#endif
+}
+
+
+signed char SWModule::createSearchFramework(void (*percent)(char, void *), void *percentUserData) {
+#ifdef USELUCENE
+ SWKey *saveKey = 0;
+ SWKey *searchKey = 0;
+ SWKey textkey;
+ SWBuf c;
+
+
+ // turn all filters to default values
+ StringList filterSettings;
+ for (OptionFilterList::iterator filter = optionFilters->begin(); filter != optionFilters->end(); filter++) {
+ filterSettings.push_back((*filter)->getOptionValue());
+ (*filter)->setOptionValue(*((*filter)->getOptionValues().begin()));
+
+ if (!strcmp("Greek Accents", (*filter)->getOptionName())) {
+ (*filter)->setOptionValue("Off");
+ }
+ }
+
+
+ // be sure we give CLucene enough file handles
+ FileMgr::getSystemFileMgr()->flush();
+
+ // save key information so as not to disrupt original
+ // module position
+ if (!key->Persist()) {
+ saveKey = CreateKey();
+ *saveKey = *key;
+ }
+ else saveKey = key;
+
+ searchKey = (key->Persist())?key->clone():0;
+ if (searchKey) {
+ searchKey->Persist(1);
+ setKey(*searchKey);
+ }
+
+ RAMDirectory *ramDir = NULL;
+ IndexWriter *coreWriter = NULL;
+ IndexWriter *fsWriter = NULL;
+ Directory *d = NULL;
+
+ standard::StandardAnalyzer *an = new standard::StandardAnalyzer();
+ SWBuf target = getConfigEntry("AbsoluteDataPath");
+ bool includeKeyInSearch = getConfig().has("SearchOption", "IncludeKeyInSearch");
+ char ch = target.c_str()[strlen(target.c_str())-1];
+ if ((ch != '/') && (ch != '\\'))
+ target.append('/');
+ target.append("lucene");
+ FileMgr::createParent(target+"/dummy");
+
+ ramDir = new RAMDirectory();
+ coreWriter = new IndexWriter(ramDir, an, true);
+
+
+
+ char perc = 1;
+ VerseKey *vkcheck = 0;
+ vkcheck = SWDYNAMIC_CAST(VerseKey, key);
+
+ TreeKeyIdx *tkcheck = 0;
+ tkcheck = SWDYNAMIC_CAST(TreeKeyIdx, key);
+
+
+ *this = BOTTOM;
+ long highIndex = (vkcheck)?32300/*vkcheck->NewIndex()*/:key->Index();
+ if (!highIndex)
+ highIndex = 1; // avoid division by zero errors.
+
+ bool savePEA = isProcessEntryAttributes();
+ processEntryAttributes(true);
+
+ // prox chapter blocks
+ // position module at the beginning
+ *this = TOP;
+
+ VerseKey chapMax;
+ SWBuf proxBuf;
+ SWBuf proxLem;
+ SWBuf strong;
+
+ const short int MAX_CONV_SIZE = 2047;
+ wchar_t wcharBuffer[MAX_CONV_SIZE + 1];
+
+ char err = Error();
+ while (!err) {
+ long mindex = 0;
+ if (vkcheck)
+ mindex = vkcheck->NewIndex();
+ else mindex = key->Index();
+
+ proxBuf = "";
+ proxLem = "";
+
+ // computer percent complete so we can report to our progress callback
+ float per = (float)mindex / highIndex;
+ // between 5%-98%
+ per *= 93; per += 5;
+ char newperc = (char)per;
+ if (newperc > perc) {
+ perc = newperc;
+ (*percent)(perc, percentUserData);
+ }
+
+ // get "content" field
+ const char *content = StripText();
+
+ bool good = false;
+
+ // start out entry
+ Document *doc = new Document();
+ // get "key" field
+ SWBuf keyText = (vkcheck) ? vkcheck->getOSISRef() : getKeyText();
+ if (content && *content) {
+ good = true;
+
+
+ // build "strong" field
+ AttributeTypeList::iterator words;
+ AttributeList::iterator word;
+ AttributeValue::iterator strongVal;
+
+ strong="";
+ words = getEntryAttributes().find("Word");
+ if (words != getEntryAttributes().end()) {
+ for (word = words->second.begin();word != words->second.end(); word++) {
+ int partCount = atoi(word->second["PartCount"]);
+ if (!partCount) partCount = 1;
+ for (int i = 0; i < partCount; i++) {
+ SWBuf tmp = "Lemma";
+ if (partCount > 1) tmp.appendFormatted(".%d", i+1);
+ strongVal = word->second.find(tmp);
+ if (strongVal != word->second.end()) {
+ // cheeze. skip empty article tags that weren't assigned to any text
+ if (strongVal->second == "G3588") {
+ if (word->second.find("Text") == word->second.end())
+ continue; // no text? let's skip
+ }
+ strong.append(strongVal->second);
+ strong.append(' ');
+ }
+ }
+ }
+ }
+
+ lucene_utf8towcs(wcharBuffer, keyText, MAX_CONV_SIZE); //keyText must be utf8
+ doc->add( *Field::Text(_T("key"), wcharBuffer ) );
+
+ if (includeKeyInSearch) {
+ c = keyText;
+ c += " ";
+ c += content;
+ content = c.c_str();
+ }
+
+ lucene_utf8towcs(wcharBuffer, content, MAX_CONV_SIZE); //content must be utf8
+ doc->add( *Field::UnStored(_T("content"), wcharBuffer) );
+
+ if (strong.length() > 0) {
+ lucene_utf8towcs(wcharBuffer, strong, MAX_CONV_SIZE);
+ doc->add( *Field::UnStored(_T("lemma"), wcharBuffer) );
+//printf("setting fields (%s).\ncontent: %s\nlemma: %s\n", (const char *)*key, content, strong.c_str());
+ }
+
+//printf("setting fields (%s).\n", (const char *)*key);
+//fflush(stdout);
+ }
+ // don't write yet, cuz we have to see if we're the first of a prox block (5:1 or chapter5/verse1
+
+ // for VerseKeys use chapter
+ if (vkcheck) {
+ chapMax = *vkcheck;
+ // we're the first verse in a chapter
+ if (vkcheck->Verse() == 1) {
+ chapMax = MAXVERSE;
+ VerseKey saveKey = *vkcheck;
+ while ((!err) && (*vkcheck <= chapMax)) {
+//printf("building proxBuf from (%s).\nproxBuf.c_str(): %s\n", (const char *)*key, proxBuf.c_str());
+//printf("building proxBuf from (%s).\n", (const char *)*key);
+
+ content = StripText();
+ if (content && *content) {
+ // build "strong" field
+ strong = "";
+ AttributeTypeList::iterator words;
+ AttributeList::iterator word;
+ AttributeValue::iterator strongVal;
+
+ words = getEntryAttributes().find("Word");
+ if (words != getEntryAttributes().end()) {
+ for (word = words->second.begin();word != words->second.end(); word++) {
+ int partCount = atoi(word->second["PartCount"]);
+ if (!partCount) partCount = 1;
+ for (int i = 0; i < partCount; i++) {
+ SWBuf tmp = "Lemma";
+ if (partCount > 1) tmp.appendFormatted(".%d", i+1);
+ strongVal = word->second.find(tmp);
+ if (strongVal != word->second.end()) {
+ // cheeze. skip empty article tags that weren't assigned to any text
+ if (strongVal->second == "G3588") {
+ if (word->second.find("Text") == word->second.end())
+ continue; // no text? let's skip
+ }
+ strong.append(strongVal->second);
+ strong.append(' ');
+ }
+ }
+ }
+ }
+ proxBuf += content;
+ proxBuf.append(' ');
+ proxLem += strong;
+ if (proxLem.length())
+ proxLem.append("\n");
+ }
+ (*this)++;
+ err = Error();
+ }
+ err = 0;
+ *vkcheck = saveKey;
+ }
+ }
+
+ // for TreeKeys use siblings if we have no children
+ else if (tkcheck) {
+ if (!tkcheck->hasChildren()) {
+ if (!tkcheck->previousSibling()) {
+ do {
+//printf("building proxBuf from (%s).\n", (const char *)*key);
+//fflush(stdout);
+
+ content = StripText();
+ if (content && *content) {
+ // build "strong" field
+ strong = "";
+ AttributeTypeList::iterator words;
+ AttributeList::iterator word;
+ AttributeValue::iterator strongVal;
+
+ words = getEntryAttributes().find("Word");
+ if (words != getEntryAttributes().end()) {
+ for (word = words->second.begin();word != words->second.end(); word++) {
+ int partCount = atoi(word->second["PartCount"]);
+ if (!partCount) partCount = 1;
+ for (int i = 0; i < partCount; i++) {
+ SWBuf tmp = "Lemma";
+ if (partCount > 1) tmp.appendFormatted(".%d", i+1);
+ strongVal = word->second.find(tmp);
+ if (strongVal != word->second.end()) {
+ // cheeze. skip empty article tags that weren't assigned to any text
+ if (strongVal->second == "G3588") {
+ if (word->second.find("Text") == word->second.end())
+ continue; // no text? let's skip
+ }
+ strong.append(strongVal->second);
+ strong.append(' ');
+ }
+ }
+ }
+ }
+
+ proxBuf += content;
+ proxBuf.append(' ');
+ proxLem += strong;
+ if (proxLem.length())
+ proxLem.append("\n");
+ }
+ } while (tkcheck->nextSibling());
+ tkcheck->parent();
+ tkcheck->firstChild();
+ }
+ else tkcheck->nextSibling(); // reposition from our previousSibling test
+ }
+ }
+
+ if (proxBuf.length() > 0) {
+
+ lucene_utf8towcs(wcharBuffer, proxBuf, MAX_CONV_SIZE); //keyText must be utf8
+
+//printf("proxBuf after (%s).\nprox: %s\nproxLem: %s\n", (const char *)*key, proxBuf.c_str(), proxLem.c_str());
+
+ doc->add( *Field::UnStored(_T("prox"), wcharBuffer) );
+ good = true;
+ }
+ if (proxLem.length() > 0) {
+ lucene_utf8towcs(wcharBuffer, proxLem, MAX_CONV_SIZE); //keyText must be utf8
+ doc->add( *Field::UnStored(_T("proxlem"), wcharBuffer) );
+ good = true;
+ }
+ if (good) {
+//printf("writing (%s).\n", (const char *)*key);
+//fflush(stdout);
+ coreWriter->addDocument(doc);
+ }
+ delete doc;
+
+ (*this)++;
+ err = Error();
+ }
+
+ // Optimizing automatically happens with the call to addIndexes
+ //coreWriter->optimize();
+ coreWriter->close();
+
+ if (IndexReader::indexExists(target.c_str())) {
+ d = FSDirectory::getDirectory(target.c_str(), false);
+ if (IndexReader::isLocked(d)) {
+ IndexReader::unlock(d);
+ }
+
+ fsWriter = new IndexWriter( d, an, false);
+ } else {
+ d = FSDirectory::getDirectory(target.c_str(), true);
+ fsWriter = new IndexWriter( d ,an, true);
+ }
+
+ Directory *dirs[] = { ramDir, 0 };
+ fsWriter->addIndexes(dirs);
+ fsWriter->close();
+
+ delete ramDir;
+ delete coreWriter;
+ delete fsWriter;
+ delete an;
+
+ // reposition module back to where it was before we were called
+ setKey(*saveKey);
+
+ if (!saveKey->Persist())
+ delete saveKey;
+
+ if (searchKey)
+ delete searchKey;
+
+ processEntryAttributes(savePEA);
+
+ // reset option filters back to original values
+ StringList::iterator origVal = filterSettings.begin();
+ for (OptionFilterList::iterator filter = optionFilters->begin(); filter != optionFilters->end(); filter++) {
+ (*filter)->setOptionValue(*origVal++);
+ }
+
+ return 0;
+#else
+ return SWSearchable::createSearchFramework(percent, percentUserData);
+#endif
+}
+
+/** OptionFilterBuffer a text buffer
+ * @param filters the FilterList of filters to iterate
+ * @param buf the buffer to filter
+ * @param key key location from where this buffer was extracted
+ */
+void SWModule::filterBuffer(OptionFilterList *filters, SWBuf &buf, SWKey *key) {
+ OptionFilterList::iterator it;
+ for (it = filters->begin(); it != filters->end(); it++) {
+ (*it)->processText(buf, key, this);
+ }
+}
+
+/** FilterBuffer a text buffer
+ * @param filters the FilterList of filters to iterate
+ * @param buf the buffer to filter
+ * @param key key location from where this buffer was extracted
+ */
+void SWModule::filterBuffer(FilterList *filters, SWBuf &buf, SWKey *key) {
+ FilterList::iterator it;
+ for (it = filters->begin(); it != filters->end(); it++) {
+ (*it)->processText(buf, key, this);
+ }
+}
+
+signed char SWModule::createModule(const char*) {
+ return -1;
+}
+
+void SWModule::setEntry(const char*, long) {
+}
+
+void SWModule::linkEntry(const SWKey*) {
+}
+
+SWORD_NAMESPACE_END