diff options
Diffstat (limited to 'src/modules/swmodule.cpp')
-rw-r--r-- | src/modules/swmodule.cpp | 454 |
1 files changed, 257 insertions, 197 deletions
diff --git a/src/modules/swmodule.cpp b/src/modules/swmodule.cpp index 758b8d2..6944c4e 100644 --- a/src/modules/swmodule.cpp +++ b/src/modules/swmodule.cpp @@ -1,10 +1,12 @@ /****************************************************************************** - * swmodule.cpp - code for base class 'module'. Module is the basis for all - * types of modules (e.g. texts, commentaries, maps, lexicons, - * etc.) * + * swmodule.cpp - code for base class 'SWModule'. SWModule is the basis + * for all types of modules (e.g. texts, commentaries, + * maps, lexicons, etc.) * - * Copyright 2009 CrossWire Bible Society (http://www.crosswire.org) + * $Id: swmodule.cpp 2976 2013-09-10 14:09:44Z scribe $ + * + * Copyright 1999-2013 CrossWire Bible Society (http://www.crosswire.org) * CrossWire Bible Society * P. O. Box 2528 * Tempe, AZ 85280-2528 @@ -27,7 +29,6 @@ #include <sysdata.h> #include <swmodule.h> #include <utilstr.h> -#include <regex.h> // GNU #include <swfilter.h> #include <versekey.h> // KLUDGE for Search #include <treekeyidx.h> // KLUDGE for Search @@ -38,9 +39,17 @@ #include <iostream> #endif +#ifdef USECXX11REGEX +#include <regex> +#ifndef REG_ICASE +#define REG_ICASE std::regex::icase +#endif +#else +#include <regex.h> // GNU +#endif + #ifdef USELUCENE #include <CLucene.h> -#include <CLucene/CLBackwards.h> //Lucence includes //#include "CLucene.h" @@ -61,7 +70,7 @@ using std::vector; SWORD_NAMESPACE_START -SWDisplay SWModule::rawdisp; +SWModule::StdOutDisplay SWModule::rawdisp; typedef std::list<SWBuf> StringList; @@ -77,7 +86,7 @@ typedef std::list<SWBuf> StringList; */ SWModule::SWModule(const char *imodname, const char *imoddesc, SWDisplay *idisp, const char *imodtype, SWTextEncoding encoding, SWTextDirection direction, SWTextMarkup markup, const char *imodlang) { - key = CreateKey(); + key = createKey(); entryBuf = ""; config = &ownConfig; modname = 0; @@ -120,44 +129,44 @@ SWModule::~SWModule() delete [] modlang; if (key) { - if (!key->Persist()) + if (!key->isPersist()) delete key; } stripFilters->clear(); - rawFilters->clear(); - renderFilters->clear(); - optionFilters->clear(); - encodingFilters->clear(); + rawFilters->clear(); + renderFilters->clear(); + optionFilters->clear(); + encodingFilters->clear(); entryAttributes.clear(); - delete stripFilters; - delete rawFilters; - delete renderFilters; - delete optionFilters; - delete encodingFilters; + delete stripFilters; + delete rawFilters; + delete renderFilters; + delete optionFilters; + delete encodingFilters; } /****************************************************************************** - * SWModule::CreateKey - Allocates a key of specific type for module + * SWModule::createKey - Allocates a key of specific type for module * * RET: pointer to allocated key */ -SWKey *SWModule::CreateKey() const +SWKey *SWModule::createKey() const { return new SWKey(); } /****************************************************************************** - * SWModule::Error - Gets and clears error status + * SWModule::popError - Gets and clears error status * * RET: error status */ -char SWModule::Error() +char SWModule::popError() { char retval = error; @@ -175,11 +184,7 @@ char SWModule::Error() * RET: pointer to modname */ -char *SWModule::Name(const char *imodname) { - return stdstr(&modname, imodname); -} - -char *SWModule::Name() const { +const char *SWModule::getName() const { return modname; } @@ -193,11 +198,7 @@ char *SWModule::Name() const { * RET: pointer to moddesc */ -char *SWModule::Description(const char *imoddesc) { - return stdstr(&moddesc, imoddesc); -} - -char *SWModule::Description() const { +const char *SWModule::getDescription() const { return moddesc; } @@ -211,70 +212,20 @@ char *SWModule::Description() const { * RET: pointer to modtype */ -char *SWModule::Type(const char *imodtype) { - return stdstr(&modtype, imodtype); -} - -char *SWModule::Type() const { +const char *SWModule::getType() const { return modtype; } /****************************************************************************** - * SWModule::Direction - Sets/gets module direction + * SWModule::getDirection - Sets/gets module direction * * ENT: newdir - value which to set direction * [-1] - only get * * RET: char direction */ -char SWModule::Direction(signed char newdir) { - if (newdir != -1) - direction = newdir; - return direction; -} - -/****************************************************************************** - * SWModule::Encoding - Sets/gets module encoding - * - * ENT: newdir - value which to set direction - * [-1] - only get - * - * RET: char encoding - */ -char SWModule::Encoding(signed char newenc) { - if (newenc != -1) - encoding = newenc; - return encoding; -} - -/****************************************************************************** - * SWModule::Markup - Sets/gets module markup - * - * ENT: newdir - value which to set direction - * [-1] - only get - * - * RET: char markup - */ -char SWModule::Markup(signed char newmark) { - if (newmark != -1) - markup = newmark; - return markup; -} - - -/****************************************************************************** - * SWModule::Lang - Sets/gets module language - * - * ENT: imodlang - value which to set modlang - * [0] - only get - * - * RET: pointer to modname - */ - -char *SWModule::Lang(const char *imodlang) -{ - if (imodlang) stdstr(&modlang, imodlang); - return modlang; +char SWModule::getDirection() const { + return direction; } @@ -295,19 +246,17 @@ void SWModule::setDisplay(SWDisplay *idisp) { disp = idisp; } - /****************************************************************************** - * SWModule::Display - Calls this modules display object and passes itself - * - * RET: error status - */ + * * SWModule::Display - Calls this modules display object and passes itself + * * + * * RET: error status + * */ -char SWModule::Display() { - disp->Display(*this); +char SWModule::display() { + disp->display(*this); return 0; } - /****************************************************************************** * SWModule::getKey - Gets the key from this module that points to the position * record @@ -333,12 +282,12 @@ char SWModule::setKey(const SWKey *ikey) { SWKey *oldKey = 0; if (key) { - if (!key->Persist()) // if we have our own copy + if (!key->isPersist()) // if we have our own copy oldKey = key; } - if (!ikey->Persist()) { // if we are to keep our own copy - key = CreateKey(); + if (!ikey->isPersist()) { // if we are to keep our own copy + key = createKey(); *key = *ikey; } else key = (SWKey *)ikey; // if we are to just point to an external key @@ -360,7 +309,7 @@ char SWModule::setKey(const SWKey *ikey) { void SWModule::setPosition(SW_POSITION p) { *key = p; - char saveError = key->Error(); + char saveError = key->popError(); switch (p) { case POS_TOP: @@ -388,7 +337,7 @@ void SWModule::setPosition(SW_POSITION p) { void SWModule::increment(int steps) { (*key) += steps; - error = key->Error(); + error = key->popError(); } @@ -402,7 +351,7 @@ void SWModule::increment(int steps) { void SWModule::decrement(int steps) { (*key) -= steps; - error = key->Error(); + error = key->popError(); } @@ -426,7 +375,7 @@ void SWModule::decrement(int steps) { ListKey &SWModule::search(const char *istr, int searchType, int flags, SWKey *scope, bool *justCheckIfSupported, void (*percent)(char, void *), void *percentUserData) { - listKey.ClearList(); + listKey.clear(); SWBuf term = istr; bool includeComponents = false; // for entryAttrib e.g., /Lemma.1/ @@ -447,10 +396,21 @@ ListKey &SWModule::search(const char *istr, int searchType, int flags, SWKey *sc return listKey; } - SWKey *saveKey = 0; + SWKey *saveKey = 0; SWKey *searchKey = 0; - SWKey *resultKey = CreateKey(); + SWKey *resultKey = createKey(); + SWKey *lastKey = createKey(); + SWBuf lastBuf = ""; + +#ifdef USECXX11REGEX + std::locale oldLocale; + std::locale::global(std::locale("en_US.UTF-8")); + + std::regex preg; +#else regex_t preg; +#endif + vector<SWBuf> words; vector<SWBuf> window; const char *sres; @@ -460,36 +420,40 @@ ListKey &SWModule::search(const char *istr, int searchType, int flags, SWKey *sc // determine if we might be doing special strip searches. useful for knowing if we can use shortcuts bool specialStrips = (getConfigEntry("LocalStripFilter") - || (getConfig().has("GlobalOptionFilter", "UTF8GreekAccents")) - || (getConfig().has("GlobalOptionFilter", "UTF8HebrewPoints")) - || (getConfig().has("GlobalOptionFilter", "UTF8ArabicPoints")) - || (strchr(istr, '<'))); + || (getConfig().has("GlobalOptionFilter", "UTF8GreekAccents")) + || (getConfig().has("GlobalOptionFilter", "UTF8HebrewPoints")) + || (getConfig().has("GlobalOptionFilter", "UTF8ArabicPoints")) + || (strchr(istr, '<'))); - processEntryAttributes(searchType == -3); + setProcessEntryAttributes(searchType == -3); - if (!key->Persist()) { - saveKey = CreateKey(); + if (!key->isPersist()) { + saveKey = createKey(); *saveKey = *key; } else saveKey = key; - searchKey = (scope)?scope->clone():(key->Persist())?key->clone():0; + searchKey = (scope)?scope->clone():(key->isPersist())?key->clone():0; if (searchKey) { - searchKey->Persist(1); + searchKey->setPersist(true); setKey(*searchKey); } (*percent)(perc, percentUserData); *this = BOTTOM; - long highIndex = key->Index(); + long highIndex = key->getIndex(); if (!highIndex) highIndex = 1; // avoid division by zero errors. *this = TOP; if (searchType >= 0) { +#ifdef USECXX11REGEX + preg = std::regex((SWBuf(".*")+istr+".*").c_str(), std::regex_constants::extended & flags); +#else flags |=searchType|REG_NOSUB|REG_EXTENDED; regcomp(&preg, istr, flags); +#endif } (*percent)(++perc, percentUserData); @@ -497,10 +461,6 @@ ListKey &SWModule::search(const char *istr, int searchType, int flags, SWKey *sc #ifdef USELUCENE if (searchType == -4) { // lucene - //Buffers for the wchar<->utf8 char* conversion - const unsigned short int MAX_CONV_SIZE = 2047; - wchar_t wcharBuffer[MAX_CONV_SIZE + 1]; - char utfBuffer[MAX_CONV_SIZE + 1]; lucene::index::IndexReader *ir = 0; lucene::search::IndexSearcher *is = 0; @@ -513,20 +473,18 @@ ListKey &SWModule::search(const char *istr, int searchType, int flags, SWKey *sc const TCHAR *stopWords[] = { 0 }; standard::StandardAnalyzer analyzer(stopWords); - lucene_utf8towcs(wcharBuffer, istr, MAX_CONV_SIZE); //TODO Is istr always utf8? - q = QueryParser::parse(wcharBuffer, _T("content"), &analyzer); + q = QueryParser::parse((wchar_t *)utf8ToWChar(istr).getRawData(), _T("content"), &analyzer); (*percent)(20, percentUserData); h = is->search(q); (*percent)(80, percentUserData); // iterate thru each good module position that meets the search bool checkBounds = getKey()->isBoundSet(); - for (long i = 0; i < h->length(); i++) { + for (unsigned long i = 0; i < (unsigned long)h->length(); i++) { Document &doc = h->doc(i); // set a temporary verse key to this module position - lucene_wcstoutf8(utfBuffer, doc.get(_T("key")), MAX_CONV_SIZE); - *resultKey = utfBuffer; //TODO Does a key always accept utf8? + *resultKey = wcharToUTF8(doc.get(_T("key"))); //TODO Does a key always accept utf8? // check to see if it sets ok (within our bounds) and if not, skip if (checkBounds) { @@ -536,7 +494,7 @@ ListKey &SWModule::search(const char *istr, int searchType, int flags, SWKey *sc } } listKey << *resultKey; - listKey.GetElement()->userData = (void *)((__u32)(h->score(i)*100)); + listKey.getElement()->userData = (__u64)((__u32)(h->score(i)*100)); } (*percent)(98, percentUserData); } @@ -606,8 +564,8 @@ ListKey &SWModule::search(const char *istr, int searchType, int flags, SWKey *sc (*percent)(perc, percentUserData); - while ((searchType != -4) && !Error() && !terminateSearch) { - long mindex = key->Index(); + while ((searchType != -4) && !popError() && !terminateSearch) { + long mindex = key->getIndex(); float per = (float)mindex / highIndex; per *= 93; per += 5; @@ -619,17 +577,35 @@ ListKey &SWModule::search(const char *istr, int searchType, int flags, SWKey *sc else if (newperc < perc) { #ifndef _MSC_VER std::cerr << "Serious error: new percentage complete is less than previous value\n"; - std::cerr << "index: " << (key->Index()) << "\n"; + std::cerr << "index: " << (key->getIndex()) << "\n"; std::cerr << "highIndex: " << highIndex << "\n"; std::cerr << "newperc ==" << (int)newperc << "%" << "is smaller than\n"; std::cerr << "perc == " << (int )perc << "% \n"; #endif } if (searchType >= 0) { - if (!regexec(&preg, StripText(), 0, 0, 0)) { + SWBuf textBuf = stripText(); +#ifdef USECXX11REGEX + if (std::regex_match(std::string(textBuf.c_str()), preg)) { +#else + if (!regexec(&preg, textBuf, 0, 0, 0)) { +#endif *resultKey = *getKey(); resultKey->clearBound(); listKey << *resultKey; + lastBuf = ""; + } +#ifdef USECXX11REGEX + else if (std::regex_match(std::string((lastBuf + ' ' + textBuf).c_str()), preg)) { +#else + else if (!regexec(&preg, lastBuf + ' ' + textBuf, 0, 0, 0)) { +#endif + lastKey->clearBound(); + listKey << *lastKey; + lastBuf = textBuf; + } + else { + lastBuf = textBuf; } } @@ -640,10 +616,10 @@ ListKey &SWModule::search(const char *istr, int searchType, int flags, SWKey *sc // phrase case -1: - textBuf = StripText(); + textBuf = stripText(); if ((flags & REG_ICASE) == REG_ICASE) toupperstr(textBuf); sres = strstr(textBuf.c_str(), term.c_str()); - if (sres) { //it's also in the StripText(), so we have a valid search result item now + if (sres) { //it's also in the stripText(), so we have a valid search result item now *resultKey = *getKey(); resultKey->clearBound(); listKey << *resultKey; @@ -655,7 +631,7 @@ ListKey &SWModule::search(const char *istr, int searchType, int flags, SWKey *sc int loopCount = 0; unsigned int foundWords = 0; do { - textBuf = ((loopCount == 0)&&(!specialStrips)) ? getRawEntry() : StripText(); + textBuf = ((loopCount == 0)&&(!specialStrips)) ? getRawEntry() : stripText(); foundWords = 0; for (unsigned int i = 0; i < words.size(); i++) { @@ -680,7 +656,7 @@ ListKey &SWModule::search(const char *istr, int searchType, int flags, SWKey *sc // entry attributes case -3: { - RenderText(); // force parse + renderText(); // force parse AttributeTypeList &entryAttribs = getEntryAttributes(); AttributeTypeList::iterator i1Start, i1End; AttributeList::iterator i2Start, i2End; @@ -804,25 +780,32 @@ ListKey &SWModule::search(const char *istr, int searchType, int flags, SWKey *sc break; } // end switch } + *lastKey = *getKey(); (*this)++; } // cleaup work - if (searchType >= 0) + if (searchType >= 0) { +#ifdef USECXX11REGEX + std::locale::global(oldLocale); +#else regfree(&preg); +#endif + } setKey(*saveKey); - if (!saveKey->Persist()) + if (!saveKey->isPersist()) delete saveKey; if (searchKey) delete searchKey; delete resultKey; + delete lastKey; listKey = TOP; - processEntryAttributes(savePEA); + setProcessEntryAttributes(savePEA); (*percent)(100, percentUserData); @@ -833,7 +816,7 @@ ListKey &SWModule::search(const char *istr, int searchType, int flags, SWKey *sc /****************************************************************************** - * SWModule::StripText() - calls all stripfilters on current text + * SWModule::stripText() - calls all stripfilters on current text * * ENT: buf - buf to massage instead of this modules current text * len - max len of buf @@ -841,29 +824,45 @@ ListKey &SWModule::search(const char *istr, int searchType, int flags, SWKey *sc * RET: this module's text at current key location massaged by Strip filters */ -const char *SWModule::StripText(const char *buf, int len) { - return RenderText(buf, len, false); +const char *SWModule::stripText(const char *buf, int len) { + static SWBuf local; + local = renderText(buf, len, false); + return local.c_str(); +} + + +/** SWModule::getRenderHeader() - Produces any header data which might be + * useful which associated with the processing done with this filter. + * A typical example is a suggested CSS style block for classed + * containers. + */ +const char *SWModule::getRenderHeader() const { + FilterList::const_iterator first = getRenderFilters().begin(); + if (first != getRenderFilters().end()) { + return (*first)->getHeader(); + } + return ""; } /****************************************************************************** - * SWModule::RenderText - calls all renderfilters on current text + * SWModule::renderText - calls all renderfilters on current text * * ENT: buf - buffer to Render instead of current module position * - * RET: this module's text at current key location massaged by RenderText filters + * RET: this module's text at current key location massaged by renderText filters */ - const char *SWModule::RenderText(const char *buf, int len, bool render) { + SWBuf SWModule::renderText(const char *buf, int len, bool render) { bool savePEA = isProcessEntryAttributes(); if (!buf) { entryAttributes.clear(); } else { - processEntryAttributes(false); + setProcessEntryAttributes(false); } - static SWBuf local; + SWBuf local; if (buf) local = buf; @@ -889,37 +888,37 @@ const char *SWModule::StripText(const char *buf, int len) { tmpbuf = null; } - processEntryAttributes(savePEA); + setProcessEntryAttributes(savePEA); return tmpbuf; } /****************************************************************************** - * SWModule::RenderText - calls all renderfilters on current text + * SWModule::renderText - calls all renderfilters on current text * * ENT: tmpKey - key to use to grab text * * RET: this module's text at current key location massaged by RenderFilers */ - const char *SWModule::RenderText(const SWKey *tmpKey) { +SWBuf SWModule::renderText(const SWKey *tmpKey) { SWKey *saveKey; const char *retVal; - if (!key->Persist()) { - saveKey = CreateKey(); + if (!key->isPersist()) { + saveKey = createKey(); *saveKey = *key; } else saveKey = key; setKey(*tmpKey); - retVal = RenderText(); + retVal = renderText(); setKey(*saveKey); - if (!saveKey->Persist()) + if (!saveKey->isPersist()) delete saveKey; return retVal; @@ -927,35 +926,53 @@ const char *SWModule::StripText(const char *buf, int len) { /****************************************************************************** - * SWModule::StripText - calls all StripTextFilters on current text + * SWModule::stripText - calls all StripTextFilters on current text * * ENT: tmpKey - key to use to grab text * * RET: this module's text at specified key location massaged by Strip filters */ -const char *SWModule::StripText(const SWKey *tmpKey) { +const char *SWModule::stripText(const SWKey *tmpKey) { SWKey *saveKey; const char *retVal; - if (!key->Persist()) { - saveKey = CreateKey(); + if (!key->isPersist()) { + saveKey = createKey(); *saveKey = *key; } else saveKey = key; setKey(*tmpKey); - retVal = StripText(); + retVal = stripText(); setKey(*saveKey); - if (!saveKey->Persist()) + if (!saveKey->isPersist()) delete saveKey; return retVal; } +/****************************************************************************** + * SWModule::getBibliography -Returns bibliographic data for a module in the + * requested format + * + * ENT: bibFormat format of the bibliographic data + * + * RET: bibliographic data in the requested format as a string (BibTeX by default) + */ + +SWBuf SWModule::getBibliography(unsigned char bibFormat) const { + SWBuf s; + switch (bibFormat) { + case BIB_BIBTEX: + s.append("@Book {").append(modname).append(", Title = \"").append(moddesc).append("\", Publisher = \"CrossWire Bible Society\"}"); + break; + } + return s; +} const char *SWModule::getConfigEntry(const char *key) const { ConfigEntMap::iterator it = config->find(key); @@ -1008,7 +1025,6 @@ signed char SWModule::createSearchFramework(void (*percent)(char, void *), void SWBuf c; const int MAX_CONV_SIZE = 1024 * 1024; - wchar_t *wcharBuffer = new wchar_t[MAX_CONV_SIZE + 1]; // turn all filters to default values StringList filterSettings; @@ -1030,15 +1046,15 @@ signed char SWModule::createSearchFramework(void (*percent)(char, void *), void // save key information so as not to disrupt original // module position - if (!key->Persist()) { - saveKey = CreateKey(); + if (!key->isPersist()) { + saveKey = createKey(); *saveKey = *key; } else saveKey = key; - searchKey = (key->Persist())?key->clone():0; + searchKey = (key->isPersist())?key->clone():0; if (searchKey) { - searchKey->Persist(1); + searchKey->setPersist(1); setKey(*searchKey); } @@ -1062,19 +1078,19 @@ signed char SWModule::createSearchFramework(void (*percent)(char, void *), void VerseKey *vkcheck = 0; vkcheck = SWDYNAMIC_CAST(VerseKey, key); VerseKey *chapMax = 0; - if (vkcheck) chapMax = (VerseKey *)vkcheck->clone(); + if (vkcheck) chapMax = (VerseKey *)vkcheck->clone(); TreeKeyIdx *tkcheck = 0; tkcheck = SWDYNAMIC_CAST(TreeKeyIdx, key); *this = BOTTOM; - long highIndex = key->Index(); + long highIndex = key->getIndex(); if (!highIndex) highIndex = 1; // avoid division by zero errors. bool savePEA = isProcessEntryAttributes(); - processEntryAttributes(true); + setProcessEntryAttributes(true); // prox chapter blocks // position module at the beginning @@ -1082,14 +1098,17 @@ signed char SWModule::createSearchFramework(void (*percent)(char, void *), void SWBuf proxBuf; SWBuf proxLem; + SWBuf proxMorph; SWBuf strong; + SWBuf morph; - char err = Error(); + char err = popError(); while (!err) { - long mindex = key->Index(); + long mindex = key->getIndex(); proxBuf = ""; proxLem = ""; + proxMorph = ""; // computer percent complete so we can report to our progress callback float per = (float)mindex / highIndex; @@ -1102,7 +1121,7 @@ signed char SWModule::createSearchFramework(void (*percent)(char, void *), void } // get "content" field - const char *content = StripText(); + const char *content = stripText(); bool good = false; @@ -1118,8 +1137,10 @@ signed char SWModule::createSearchFramework(void (*percent)(char, void *), void AttributeTypeList::iterator words; AttributeList::iterator word; AttributeValue::iterator strongVal; + AttributeValue::iterator morphVal; strong=""; + morph=""; words = getEntryAttributes().find("Word"); if (words != getEntryAttributes().end()) { for (word = words->second.begin();word != words->second.end(); word++) { @@ -1136,16 +1157,22 @@ signed char SWModule::createSearchFramework(void (*percent)(char, void *), void continue; // no text? let's skip } strong.append(strongVal->second); + morph.append(strongVal->second); + morph.append('@'); + SWBuf tmp = "Morph"; + if (partCount > 1) tmp.appendFormatted(".%d", i+1); + morphVal = word->second.find(tmp); + if (morphVal != word->second.end()) { + morph.append(morphVal->second); + } strong.append(' '); + morph.append(' '); } } } } - lucene_utf8towcs(wcharBuffer, keyText, MAX_CONV_SIZE); //keyText must be utf8 -// doc->add( *(new Field("key", wcharBuffer, Field::STORE_YES | Field::INDEX_TOKENIZED))); - doc->add( *Field::Text(_T("key"), wcharBuffer ) ); - + doc->add(*_CLNEW Field(_T("key"), (wchar_t *)utf8ToWChar(keyText).getRawData(), Field::STORE_YES | Field::INDEX_UNTOKENIZED)); if (includeKeyInSearch) { c = keyText; @@ -1154,12 +1181,11 @@ signed char SWModule::createSearchFramework(void (*percent)(char, void *), void content = c.c_str(); } - lucene_utf8towcs(wcharBuffer, content, MAX_CONV_SIZE); //content must be utf8 - doc->add( *Field::UnStored(_T("content"), wcharBuffer) ); + doc->add(*_CLNEW Field(_T("content"), (wchar_t *)utf8ToWChar(content).getRawData(), Field::STORE_NO | Field::INDEX_TOKENIZED)); if (strong.length() > 0) { - lucene_utf8towcs(wcharBuffer, strong, MAX_CONV_SIZE); - doc->add( *Field::UnStored(_T("lemma"), wcharBuffer) ); + doc->add(*_CLNEW Field(_T("lemma"), (wchar_t *)utf8ToWChar(strong).getRawData(), Field::STORE_NO | Field::INDEX_TOKENIZED)); + doc->add(*_CLNEW Field(_T("morph"), (wchar_t *)utf8ToWChar(morph).getRawData(), Field::STORE_NO | Field::INDEX_TOKENIZED)); //printf("setting fields (%s).\ncontent: %s\nlemma: %s\n", (const char *)*key, content, strong.c_str()); } @@ -1172,20 +1198,22 @@ signed char SWModule::createSearchFramework(void (*percent)(char, void *), void if (vkcheck) { *chapMax = *vkcheck; // we're the first verse in a chapter - if (vkcheck->Verse() == 1) { + if (vkcheck->getVerse() == 1) { *chapMax = MAXVERSE; VerseKey saveKey = *vkcheck; while ((!err) && (*vkcheck <= *chapMax)) { //printf("building proxBuf from (%s).\nproxBuf.c_str(): %s\n", (const char *)*key, proxBuf.c_str()); //printf("building proxBuf from (%s).\n", (const char *)*key); - content = StripText(); + content = stripText(); if (content && *content) { // build "strong" field strong = ""; + morph = ""; AttributeTypeList::iterator words; AttributeList::iterator word; AttributeValue::iterator strongVal; + AttributeValue::iterator morphVal; words = getEntryAttributes().find("Word"); if (words != getEntryAttributes().end()) { @@ -1203,7 +1231,16 @@ signed char SWModule::createSearchFramework(void (*percent)(char, void *), void continue; // no text? let's skip } strong.append(strongVal->second); + morph.append(strongVal->second); + morph.append('@'); + SWBuf tmp = "Morph"; + if (partCount > 1) tmp.appendFormatted(".%d", i+1); + morphVal = word->second.find(tmp); + if (morphVal != word->second.end()) { + morph.append(morphVal->second); + } strong.append(' '); + morph.append(' '); } } } @@ -1211,11 +1248,14 @@ signed char SWModule::createSearchFramework(void (*percent)(char, void *), void proxBuf += content; proxBuf.append(' '); proxLem += strong; - if (proxLem.length()) + proxMorph += morph; + if (proxLem.length()) { proxLem.append("\n"); + proxMorph.append("\n"); + } } (*this)++; - err = Error(); + err = popError(); } err = 0; *vkcheck = saveKey; @@ -1230,13 +1270,15 @@ signed char SWModule::createSearchFramework(void (*percent)(char, void *), void //printf("building proxBuf from (%s).\n", (const char *)*key); //fflush(stdout); - content = StripText(); + content = stripText(); if (content && *content) { // build "strong" field strong = ""; + morph = ""; AttributeTypeList::iterator words; AttributeList::iterator word; AttributeValue::iterator strongVal; + AttributeValue::iterator morphVal; words = getEntryAttributes().find("Word"); if (words != getEntryAttributes().end()) { @@ -1254,7 +1296,16 @@ signed char SWModule::createSearchFramework(void (*percent)(char, void *), void continue; // no text? let's skip } strong.append(strongVal->second); + morph.append(strongVal->second); + morph.append('@'); + SWBuf tmp = "Morph"; + if (partCount > 1) tmp.appendFormatted(".%d", i+1); + morphVal = word->second.find(tmp); + if (morphVal != word->second.end()) { + morph.append(morphVal->second); + } strong.append(' '); + morph.append(' '); } } } @@ -1263,8 +1314,11 @@ signed char SWModule::createSearchFramework(void (*percent)(char, void *), void proxBuf += content; proxBuf.append(' '); proxLem += strong; - if (proxLem.length()) + proxMorph += morph; + if (proxLem.length()) { proxLem.append("\n"); + proxMorph.append("\n"); + } } } while (tkcheck->nextSibling()); tkcheck->parent(); @@ -1276,16 +1330,12 @@ signed char SWModule::createSearchFramework(void (*percent)(char, void *), void if (proxBuf.length() > 0) { - lucene_utf8towcs(wcharBuffer, proxBuf, MAX_CONV_SIZE); //keyText must be utf8 - -//printf("proxBuf after (%s).\nprox: %s\nproxLem: %s\n", (const char *)*key, proxBuf.c_str(), proxLem.c_str()); - - doc->add( *Field::UnStored(_T("prox"), wcharBuffer) ); + doc->add(*_CLNEW Field(_T("prox"), (wchar_t *)utf8ToWChar(proxBuf).getRawData(), Field::STORE_NO | Field::INDEX_TOKENIZED)); good = true; } if (proxLem.length() > 0) { - lucene_utf8towcs(wcharBuffer, proxLem, MAX_CONV_SIZE); //keyText must be utf8 - doc->add( *Field::UnStored(_T("proxlem"), wcharBuffer) ); + doc->add(*_CLNEW Field(_T("proxlem"), (wchar_t *)utf8ToWChar(proxLem).getRawData(), Field::STORE_NO | Field::INDEX_TOKENIZED) ); + doc->add(*_CLNEW Field(_T("proxmorph"), (wchar_t *)utf8ToWChar(proxMorph).getRawData(), Field::STORE_NO | Field::INDEX_TOKENIZED) ); good = true; } if (good) { @@ -1296,27 +1346,39 @@ signed char SWModule::createSearchFramework(void (*percent)(char, void *), void delete doc; (*this)++; - err = Error(); + err = popError(); } // Optimizing automatically happens with the call to addIndexes //coreWriter->optimize(); coreWriter->close(); +#ifdef CLUCENE2 + d = FSDirectory::getDirectory(target.c_str()); +#endif if (IndexReader::indexExists(target.c_str())) { +#ifndef CLUCENE2 d = FSDirectory::getDirectory(target.c_str(), false); +#endif if (IndexReader::isLocked(d)) { IndexReader::unlock(d); } - fsWriter = new IndexWriter( d, an, false); - } else { + } + else { +#ifndef CLUCENE2 d = FSDirectory::getDirectory(target.c_str(), true); +#endif fsWriter = new IndexWriter(d, an, true); } Directory *dirs[] = { ramDir, 0 }; +#ifdef CLUCENE2 + lucene::util::ConstValueArray< lucene::store::Directory *>dirsa(dirs, 1); + fsWriter->addIndexes(dirsa); +#else fsWriter->addIndexes(dirs); +#endif fsWriter->close(); delete ramDir; @@ -1327,15 +1389,15 @@ signed char SWModule::createSearchFramework(void (*percent)(char, void *), void // reposition module back to where it was before we were called setKey(*saveKey); - if (!saveKey->Persist()) + if (!saveKey->isPersist()) delete saveKey; if (searchKey) delete searchKey; - delete chapMax; + delete chapMax; - processEntryAttributes(savePEA); + setProcessEntryAttributes(savePEA); // reset option filters back to original values StringList::iterator origVal = filterSettings.begin(); @@ -1343,8 +1405,6 @@ signed char SWModule::createSearchFramework(void (*percent)(char, void *), void (*filter)->setOptionValue(*origVal++); } - delete [] wcharBuffer; - return 0; #else return SWSearchable::createSearchFramework(percent, percentUserData); @@ -1356,7 +1416,7 @@ signed char SWModule::createSearchFramework(void (*percent)(char, void *), void * @param buf the buffer to filter * @param key key location from where this buffer was extracted */ -void SWModule::filterBuffer(OptionFilterList *filters, SWBuf &buf, const SWKey *key) { +void SWModule::filterBuffer(OptionFilterList *filters, SWBuf &buf, const SWKey *key) const { OptionFilterList::iterator it; for (it = filters->begin(); it != filters->end(); it++) { (*it)->processText(buf, key, this); @@ -1368,7 +1428,7 @@ void SWModule::filterBuffer(OptionFilterList *filters, SWBuf &buf, const SWKey * * @param buf the buffer to filter * @param key key location from where this buffer was extracted */ -void SWModule::filterBuffer(FilterList *filters, SWBuf &buf, const SWKey *key) { +void SWModule::filterBuffer(FilterList *filters, SWBuf &buf, const SWKey *key) const { FilterList::iterator it; for (it = filters->begin(); it != filters->end(); it++) { (*it)->processText(buf, key, this); |