From 71a39f4652cd51df814c930dd268f3c9ad2aee86 Mon Sep 17 00:00:00 2001 From: "Roberto C. Sanchez" Date: Sat, 29 Mar 2014 10:54:01 -0400 Subject: Imported Upstream version 1.6.0+dfsg --- src/modules/texts/rawtext4/rawtext4.cpp | 453 ++++---------------------------- 1 file changed, 49 insertions(+), 404 deletions(-) (limited to 'src/modules/texts/rawtext4/rawtext4.cpp') diff --git a/src/modules/texts/rawtext4/rawtext4.cpp b/src/modules/texts/rawtext4/rawtext4.cpp index a06691e..65f5cef 100644 --- a/src/modules/texts/rawtext4/rawtext4.cpp +++ b/src/modules/texts/rawtext4/rawtext4.cpp @@ -1,9 +1,24 @@ /****************************************************************************** - * rawtext4.cpp - code for class 'RawText4'- a module that reads raw text files: - * ot and nt using indexs ??.bks ??.cps ??.vss + * rawtext4.cpp - code for class 'RawText4'- a module that reads raw text + * files: ot and nt using indexs ??.bks ??.cps ??.vss + * + * + * Copyright 2009 CrossWire Bible Society (http://www.crosswire.org) + * CrossWire Bible Society + * P. O. Box 2528 + * Tempe, AZ 85280-2528 + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * */ - #include #include #include @@ -42,27 +57,9 @@ typedef list longlist; * idisp - Display object to use for displaying */ -RawText4::RawText4(const char *ipath, const char *iname, const char *idesc, SWDisplay *idisp, SWTextEncoding enc, SWTextDirection dir, SWTextMarkup mark, const char* ilang) - : SWText(iname, idesc, idisp, enc, dir, mark, ilang), +RawText4::RawText4(const char *ipath, const char *iname, const char *idesc, SWDisplay *idisp, SWTextEncoding enc, SWTextDirection dir, SWTextMarkup mark, const char* ilang, const char *versification) + : SWText(iname, idesc, idisp, enc, dir, mark, ilang, versification), RawVerse4(ipath) { - -#ifndef USELUCENE - SWBuf fname; - fname = path; - char ch = fname.c_str()[strlen(fname.c_str())-1]; - if ((ch != '/') && (ch != '\\')) - fname += "/"; - - for (int loop = 0; loop < 2; loop++) { - fastSearch[loop] = 0; - SWBuf fastidxname =(fname + ((loop)?"ntwords.dat":"otwords.dat")); - if (FileMgr::existsFile(fastidxname.c_str())) { - fastidxname = (fname + ((loop)?"ntwords.idx":"otwords.idx")); - if (FileMgr::existsFile(fastidxname.c_str())) - fastSearch[loop] = new RawStr4((fname + ((loop)?"ntwords":"otwords")).c_str()); - } - } -#endif } @@ -71,13 +68,6 @@ RawText4::RawText4(const char *ipath, const char *iname, const char *idesc, SWDi */ RawText4::~RawText4() { -#ifndef USELUCENE - if (fastSearch[0]) - delete fastSearch[0]; - - if (fastSearch[1]) - delete fastSearch[1]; -#endif } @@ -98,7 +88,7 @@ SWBuf &RawText4::getRawEntryBuf() { unsigned long size = 0; VerseKey &key = getVerseKey(); - findOffset(key.Testament(), key.Index(), &start, &size); + findOffset(key.Testament(), key.TestamentIndex(), &start, &size); entrySize = size; // support getEntrySize call entryBuf = ""; @@ -114,382 +104,16 @@ SWBuf &RawText4::getRawEntryBuf() { } -signed char RawText4::createSearchFramework(void (*percent)(char, void *), void *percentUserData) { -#ifndef USELUCENE - SWKey *savekey = 0; - SWKey *searchkey = 0; - SWKey textkey; - char *word = 0; - char *wordBuf = 0; - - // dictionary holds words associated with a list - // containing every module position that contains - // the word. [0] Old Testament; [1] NT - map < SWBuf, list > dictionary[2]; - - - // save key information so as not to disrupt original - // module position - if (!key->Persist()) { - savekey = CreateKey(); - *savekey = *key; - } - else savekey = key; - - searchkey = (key->Persist())?key->clone():0; - if (searchkey) { - searchkey->Persist(1); - setKey(*searchkey); - } - - // position module at the beginning - *this = TOP; - - VerseKey *lkey = (VerseKey *)key; - - // iterate thru each entry in module - while (!Error()) { - long index = lkey->Index(); - wordBuf = (char *)calloc(sizeof(char), strlen(StripText()) + 1); - strcpy(wordBuf, StripText()); - - // grab each word from the text - word = strtok(wordBuf, " !.,?;:()-=+/\\|{}[]\"<>"); - while (word) { - - // make word upper case - toupperstr(word); - - // lookup word in dictionary (or make entry in dictionary - // for this word) and add this module position (index) to - // the word's associated list of module positions - dictionary[lkey->Testament()-1][word].push_back(index); - word = strtok(NULL, " !.,?;:()-=+/\\|{}[]\"<>"); - } - free(wordBuf); - (*this)++; - } - - // reposition module back to where it was before we were called - setKey(*savekey); - - if (!savekey->Persist()) - delete savekey; - - if (searchkey) - delete searchkey; - - - // --------- Let's output an index from our dictionary ----------- - FileDesc *datfd; - FileDesc *idxfd; - strlist::iterator it; - longlist::iterator it2; - unsigned long offset, entryoff; - unsigned long size; - - SWBuf fname; - fname = path; - char ch = fname.c_str()[strlen(fname.c_str())-1]; - if ((ch != '/') && (ch != '\\')) - fname += "/"; - - // for old and new testament do... - for (int loop = 0; loop < 2; loop++) { - datfd = FileMgr::getSystemFileMgr()->open((fname + ((loop)?"ntwords.dat":"otwords.dat")).c_str(), FileMgr::CREAT|FileMgr::WRONLY, 00644); - if (datfd->getFd() == -1) - return -1; - idxfd = FileMgr::getSystemFileMgr()->open((fname + ((loop)?"ntwords.idx":"otwords.idx")).c_str(), FileMgr::CREAT|FileMgr::WRONLY, 00644); - if (idxfd->getFd() == -1) { - FileMgr::getSystemFileMgr()->close(datfd); - return -1; - } - - // iterate thru each word in the dictionary - for (it = dictionary[loop].begin(); it != dictionary[loop].end(); it++) { - printf("%s: ", it->first.c_str()); - - // get our current offset in our word.dat file and write this as the start - // of the next entry in our database - offset = datfd->seek(0, SEEK_CUR); - idxfd->write(&offset, 4); - - // write our word out to the word.dat file, delineating with a \n - datfd->write(it->first.c_str(), strlen(it->first.c_str())); - datfd->write("\n", 1); - - // force our mod position list for this word to be unique (remove - // duplicates that may exist if the word was found more than once - // in the verse - it->second.unique(); - - // iterate thru each mod position for this word and output it to - // our database - unsigned short count = 0; - for (it2 = it->second.begin(); it2 != it->second.end(); it2++) { - entryoff= *it2; - datfd->write(&entryoff, 4); - count++; - } - - // now see what our new position is in our word.dat file and - // determine the size of this database entry - size = datfd->seek(0, SEEK_CUR) - offset; - - // store the size of this database entry - idxfd->write(&size, 4); - printf("%d entries (size: %d)\n", count, size); - } - FileMgr::getSystemFileMgr()->close(datfd); - FileMgr::getSystemFileMgr()->close(idxfd); - } - return 0; -#else - return SWModule::createSearchFramework(percent, percentUserData); -#endif -} - - -void RawText4::deleteSearchFramework() { -#ifndef USELUCENE - SWBuf target = path; - char ch = target.c_str()[strlen(target.c_str())-1]; - if ((ch != '/') && (ch != '\\')) - target += "/lucene"; - FileMgr::removeFile(target + "ntwords.dat"); - FileMgr::removeFile(target + "otwords.dat"); - FileMgr::removeFile(target + "ntwords.idx"); - FileMgr::removeFile(target + "otwords.idx"); -#else - SWModule::deleteSearchFramework(); -#endif -} - - -/****************************************************************************** - * SWModule::search - Searches a module for a string - * - * ENT: istr - string for which to search - * searchType - type of search to perform - * >=0 - regex - * -1 - phrase - * -2 - multiword - * flags - options flags for search - * justCheckIfSupported - if set, don't search, only tell if this - * function supports requested search. - * - * RET: ListKey set to verses that contain istr - */ - -ListKey &RawText4::search(const char *istr, int searchType, int flags, SWKey *scope, bool *justCheckIfSupported, void (*percent)(char, void *), void *percentUserData) { -#ifndef USELUCENE - listKey.ClearList(); - - if ((fastSearch[0]) && (fastSearch[1])) { - - switch (searchType) { - case -2: { - - if ((flags & REG_ICASE) != REG_ICASE) // if haven't chosen to - // ignore case - break; // can't handle fast case sensitive searches - - // test to see if our scope for this search is bounded by a - // VerseKey - VerseKey *testKeyType = 0; - SWTRY { - testKeyType = SWDYNAMIC_CAST(VerseKey, ((scope)?scope:key)); - } - SWCATCH ( ... ) {} - // if we don't have a VerseKey * decendant we can't handle - // because of scope. - // In the future, add bool SWKey::isValid(const char *tryString); - if (!testKeyType) - break; - - - // check if we just want to see if search is supported. - // If we've gotten this far, then it is supported. - if (justCheckIfSupported) { - *justCheckIfSupported = true; - return listKey; - } - - SWKey saveKey = *testKeyType; // save current place - - char error = 0; - char **words = 0; - char *wordBuf = 0; - int wordCount = 0; - long start; - unsigned long size; - char *idxbuf = 0; - SWBuf datBuf; - list indexes; - list indexes2; - VerseKey vk; - vk = TOP; - - (*percent)(10, percentUserData); - - // toupper our copy of search string - stdstr(&wordBuf, istr); - toupperstr(wordBuf); - - // get list of individual words - words = (char **)calloc(sizeof(char *), 10); - int allocWords = 10; - words[wordCount] = strtok(wordBuf, " "); - while (words[wordCount]) { - wordCount++; - if (wordCount == allocWords) { - allocWords+=10; - words = (char **)realloc(words, sizeof(char *)*allocWords); - } - words[wordCount] = strtok(NULL, " "); - } - - (*percent)(20, percentUserData); - - // clear our result set - indexes.erase(indexes.begin(), indexes.end()); - - // search both old and new testament indexes - for (int j = 0; j < 2; j++) { - // iterate thru each word the user passed to us. - for (int i = 0; i < wordCount; i++) { - - // clear this word's result set - indexes2.erase(indexes2.begin(), indexes2.end()); - error = 0; - - // iterate thru every word in the database that starts - // with our search word - for (int away = 0; !error; away++) { - idxbuf = 0; - - // find our word in the database and jump ahead _away_ - error = fastSearch[j]->findOffset(words[i], &start, &size, away); - - // get the word from the database - fastSearch[j]->getIDXBufDat(start, &idxbuf); - - // check to see if it starts with our target word - if (strlen(idxbuf) > strlen(words[i])) - idxbuf[strlen(words[i])] = 0; -// else words[i][strlen(idxbuf)] = 0; - if (!strcmp(idxbuf, words[i])) { - - // get data for this word from database - delete [] idxbuf; - idxbuf = 0; - datBuf = ""; - fastSearch[j]->readText(start, &size, &idxbuf, datBuf); - - // we know that the data consists of sizof(long) - // records each a valid module position that constains - // this word - // - // iterate thru each of these module positions - long *keyindex = (long *)datBuf.getRawData(); - while (keyindex < (long *)(datBuf.getRawData() + size - (strlen(idxbuf) + 1))) { - if (i) { // if we're not on our first word - - // check to see if this word is already in the result set. - // This is our AND functionality - if (find(indexes.begin(), indexes.end(), *keyindex) != indexes.end()) - // add to new result set - indexes2.push_back(*keyindex); - } - else indexes2.push_back(*keyindex); - keyindex++; - } - } - else error = 1; // no more matches - free(idxbuf); - } - - // make new result set final result set - indexes = indexes2; - - percent((char)(20 + (float)((j*wordCount)+i)/(wordCount * 2) * 78), percentUserData); - } - - // indexes contains our good verses, lets return them in a listKey - indexes.sort(); - - // iterate thru each good module position that meets the search - for (longlist::iterator it = indexes.begin(); it != indexes.end(); it++) { - - // set a temporary verse key to this module position - vk.Testament(j+1); - vk.Error(); - vk.Index(*it); - - // check scope - // Try to set our scope key to this verse key - if (scope) { - *testKeyType = vk; - - // check to see if it set ok and if so, add to our return list - if (*testKeyType == vk) - listKey << (const char *) vk; - } - else listKey << (const char*) vk; - } - } - (*percent)(98, percentUserData); - - free(words); - free(wordBuf); - - *testKeyType = saveKey; // set current place back to original - - listKey = TOP; - (*percent)(100, percentUserData); - return listKey; - } - - default: - break; - } - } - - // check if we just want to see if search is supported - if (justCheckIfSupported) { - *justCheckIfSupported = false; - return listKey; - } - -#endif - // if we don't support this search, fall back to base class - return SWModule::search(istr, searchType, flags, scope, justCheckIfSupported, percent, percentUserData); -} - - void RawText4::setEntry(const char *inbuf, long len) { VerseKey &key = getVerseKey(); - doSetText(key.Testament(), key.Index(), inbuf, len); + doSetText(key.Testament(), key.TestamentIndex(), inbuf, len); } void RawText4::linkEntry(const SWKey *inkey) { VerseKey &destkey = getVerseKey(); - const VerseKey *srckey = 0; - - // see if we have a VerseKey * or decendant - SWTRY { - srckey = SWDYNAMIC_CAST(VerseKey, inkey); - } - SWCATCH ( ... ) {} - // if we don't have a VerseKey * decendant, create our own - if (!srckey) - srckey = new VerseKey(inkey); - - doLinkEntry(destkey.Testament(), destkey.Index(), srckey->Index()); - - if (inkey != srckey) // free our key if we created a VerseKey - delete srckey; + const VerseKey *srckey = &getVerseKey(inkey); + doLinkEntry(destkey.Testament(), destkey.TestamentIndex(), srckey->TestamentIndex()); } @@ -501,7 +125,7 @@ void RawText4::linkEntry(const SWKey *inkey) { void RawText4::deleteEntry() { VerseKey &key = getVerseKey(); - doSetText(key.Testament(), key.Index(), ""); + doSetText(key.Testament(), key.TestamentIndex(), ""); } /****************************************************************************** @@ -517,21 +141,21 @@ void RawText4::increment(int steps) { unsigned long size; VerseKey *tmpkey = &getVerseKey(); - findOffset(tmpkey->Testament(), tmpkey->Index(), &start, &size); + findOffset(tmpkey->Testament(), tmpkey->TestamentIndex(), &start, &size); SWKey lastgood = *tmpkey; while (steps) { long laststart = start; unsigned long lastsize = size; SWKey lasttry = *tmpkey; - (steps > 0) ? (*key)++ : (*key)--; + (steps > 0) ? ++(*key) : --(*key); tmpkey = &getVerseKey(); if ((error = key->Error())) { *key = lastgood; break; } - long index = tmpkey->Index(); + long index = tmpkey->TestamentIndex(); findOffset(tmpkey->Testament(), index, &start, &size); if ( (((laststart != start) || (lastsize != size)) // we're a different entry @@ -545,4 +169,25 @@ void RawText4::increment(int steps) { error = (error) ? KEYERR_OUTOFBOUNDS : 0; } +bool RawText4::isLinked(const SWKey *k1, const SWKey *k2) const { + long start1, start2; + unsigned long size1, size2; + VerseKey *vk1 = &getVerseKey(k1); + VerseKey *vk2 = &getVerseKey(k2); + if (vk1->Testament() != vk2->Testament()) return false; + + findOffset(vk1->Testament(), vk1->TestamentIndex(), &start1, &size1); + findOffset(vk2->Testament(), vk2->TestamentIndex(), &start2, &size2); + return start1 == start2; +} + +bool RawText4::hasEntry(const SWKey *k) const { + long start; + unsigned long size; + VerseKey *vk = &getVerseKey(k); + + findOffset(vk->Testament(), vk->TestamentIndex(), &start, &size); + return size; +} + SWORD_NAMESPACE_END -- cgit v1.2.3