diff options
Diffstat (limited to 'utilities/xml2gbs.cpp')
-rw-r--r-- | utilities/xml2gbs.cpp | 484 |
1 files changed, 0 insertions, 484 deletions
diff --git a/utilities/xml2gbs.cpp b/utilities/xml2gbs.cpp deleted file mode 100644 index 2461be1..0000000 --- a/utilities/xml2gbs.cpp +++ /dev/null @@ -1,484 +0,0 @@ -#include <ctype.h> -#include <stdio.h> -#include <fcntl.h> -#include <errno.h> -#include <stdlib.h> - -#ifndef __GNUC__ -#include <io.h> -#else -#include <unistd.h> -#endif - -#include <entriesblk.h> -//#include <iostream> -#include <string> -#include <stdio.h> -#include <treekeyidx.h> -#include <rawgenbook.h> - -#ifndef O_BINARY -#define O_BINARY 0 -#endif - -#ifndef NO_SWORD_NAMESPACE -using sword::TreeKeyIdx; -using sword::RawGenBook; -using sword::SWKey; -#endif - -#define DEBUG - -/* -void printTree(TreeKeyIdx treeKey, TreeKeyIdx *target = 0, int level = 1) { - if (!target) - target = &treeKey; - - unsigned long currentOffset = target->getOffset(); - std::cout << ((currentOffset == treeKey.getOffset()) ? "==>" : ""); - for (int i = 0; i < level; i++) std::cout << "\t"; - std::cout << treeKey.getLocalName() << "/\n"; - if (treeKey.firstChild()) { - printTree(treeKey, target, level+1); - treeKey.parent(); - } - if (treeKey.nextSibling()) - printTree(treeKey, target, level); -} -*/ - - -void setkey (TreeKeyIdx * treeKey, char* keybuffer) { - char* tok = strtok(keybuffer, "/"); - while (tok) { - bool foundkey = false; - if (treeKey->hasChildren()) { - treeKey->firstChild(); - if (!strcmp(treeKey->getLocalName(), tok)) { - foundkey = true; - } else { - while (treeKey->nextSibling()) { - if (treeKey->getLocalName()) { - if (!strcmp(treeKey->getLocalName(), tok)) { - foundkey = true; - } - } - } - } - if (!foundkey) { - treeKey->append(); - treeKey->setLocalName(tok); - treeKey->save(); - } - } - else { - treeKey->appendChild(); - treeKey->setLocalName(tok); - treeKey->save(); - } - -#ifdef DEBUG -// std::cout << treeKey->getLocalName() << " : " << tok << std::endl; -#endif - - tok = strtok(NULL, "/"); - - } -} - -int readline(FILE* infile, char* linebuffer) { - signed char c; - char* lbPtr = linebuffer; - while ((c = fgetc(infile)) != EOF) { - *lbPtr++ = c; - if (c == 10) { - *lbPtr = 0; - return (lbPtr-linebuffer); - } - } - return 0; -} - -enum XML_FORMATS { F_AUTODETECT, F_OSIS, F_THML }; - -#define HELPTEXT "xml2gbs 1.0 OSIS/ThML General Book module creation tool for the SWORD Project\n usage:\n xml2gbs [-l] [-i] [-fT|-fO] <filename> [modname]\n -l uses long div names in ThML files\n -i exports to IMP format instead of creating a module\n -fO and -fT will set the importer to expect OSIS or ThML format respectively\n (otherwise it attempts to autodetect)\n" - -unsigned char detectFormat(char* filename, char* entbuffer) { - - unsigned char format = F_AUTODETECT; - - FILE *infile; - infile = fopen(filename, "r"); - if (!infile) { - fprintf(stderr, HELPTEXT); - fprintf(stderr, "\n\nCould not open file \"%s\"\n", filename); - } - else { - while (readline(infile, entbuffer) && format == F_AUTODETECT) { - if (strstr(entbuffer, "<osis")) { - format = F_OSIS; - } - else if (strstr(entbuffer, "<ThML")) { - format = F_THML; - } - } - fclose(infile); - } - - return format; -} - -int getTag(FILE* file, char* keybuffer) { - char c; - char* kbPtr = keybuffer; - while ((c = fgetc(file)) != '>') - *kbPtr++ = c; - *kbPtr++ = c; - *kbPtr = 0; - return (kbPtr-keybuffer); -} - -int processXML(char* filename, char* modname, bool longnames, bool exportfile, unsigned char format, char* entbuffer) { - signed long i = 0; - char* strtmp; - -#ifdef DEBUG - printf ("%s :%s :%d :%d :%d\n\n", filename, modname, longnames, exportfile, format); -#endif - - FILE *infile; - infile = fopen(filename, "r"); - if (!infile) { - fprintf(stderr, HELPTEXT); - fprintf(stderr, "\n\nCould not open file \"%s\"\n", filename); - return -1; - } - FILE *outfile; - if (exportfile) { - strcat (modname, ".imp"); - outfile = fopen(modname, "w"); - } - - TreeKeyIdx * treeKey; - RawGenBook * book; - - std::string divs[32]; - - int level = 0; - char* keybuffer = new char[2048]; - char* keybuffer2 = new char[2048]; - char* n = new char[256]; - char* type = new char[256]; - char* title= new char[512]; - unsigned long entrysize = 0; - unsigned long keysize = 0; - bool closer = false; - - if (!exportfile) { - // Do some initialization stuff - TreeKeyIdx::create(modname); - treeKey = new TreeKeyIdx(modname); - RawGenBook::createModule(modname); - delete treeKey; - book = new RawGenBook(modname); - treeKey = ((TreeKeyIdx *)((SWKey *)(*book))); - } - -#ifdef DEBUG -// TreeKeyIdx root = *((TreeKeyIdx *)((SWKey *)(*book))); -#endif - - int c; - while ((c = fgetc(infile)) != EOF) { - if (c == '<') { - if (getTag(infile, keybuffer)) { - if ((format == F_OSIS) && ((!strcmp(keybuffer, "/div>")) || (!strcmp(keybuffer, "/verse>"))) || - ((format == F_THML) && ((!strncmp(keybuffer, "/div", 4)) && (keybuffer[4] > '0' && keybuffer[4] < '7')))) { - if (!closer) { - keysize = 0; - keybuffer2[0] = 0; - for (i = 0; i < level; i++) { - keybuffer2[keysize] = '/'; - keysize++; - keybuffer2[keysize] = 0; - strcat (keybuffer2, divs[i].c_str()); - keysize += divs[i].length(); - } - - if (level) { - printf ("%s\n", keybuffer2); - if (exportfile) { - fprintf (outfile, "$$$%s\n%s\n", keybuffer2, entbuffer); - } - else { - treeKey->root(); - setkey(treeKey, keybuffer2); - book->setEntry(entbuffer, entrysize); // save text to module at current position - } - } - } - level--; - entbuffer[0] = 0; - entrysize = 0; - - closer = true; - } - else if ((format == F_OSIS) && !((!strcmp(keybuffer, "div>") || !strncmp(keybuffer, "div ", 4)) || (!strcmp(keybuffer, "verse>") || !strncmp(keybuffer, "verse ", 6))) || - ((format == F_THML) && !((!strncmp(keybuffer, "div", 3)) && (keybuffer[3] > '0' && keybuffer[3] < '7')))) { - entbuffer[entrysize++] = '<'; - for (i = 0; i <= strlen(keybuffer); i++) { - entbuffer[entrysize++] = keybuffer[i]; - } - entrysize--; - } - else { - //we have a divN... - if (!closer) { - keysize = 0; - keybuffer2[0] = 0; - for (i = 0; i < level; i++) { - keybuffer2[keysize] = '/'; - keysize++; - keybuffer2[keysize] = 0; - strcat (keybuffer2, divs[i].c_str()); - keysize += divs[i].length(); - } - - if (level) { - printf ("%s\n", keybuffer2); - if (exportfile) { - fprintf (outfile, "$$$%s\n%s\n", keybuffer2, entbuffer); - } - else { - treeKey->root(); - setkey(treeKey, keybuffer2); - book->setEntry(entbuffer, entrysize); // save text to module at current position - } - } - } - - entbuffer[0] = 0; - entrysize = 0; - - level++; - keysize = strlen(keybuffer)-1; -/* keysize = 0; - while ((c = fgetc(infile)) != EOF) { - if (c != '>') { - keybuffer[keysize] = c; - keysize++; - } - else { - break; - } - } - keybuffer[keysize] = 0;*/ - - type[0] = 0; - n[0] = 0; - title[0] = 0; - - if (format == F_OSIS && longnames == false) { - strtmp = strstr(keybuffer, "osisID=\""); - if (strtmp) { - strtmp += 8; - i = 0; - for (;*strtmp != '\"'; strtmp++) { - if (*strtmp == 10) { - title[i] = ' '; - i++; - } - else if (*strtmp == '.') { - i = 0; - } - else if (*strtmp != 13) { - title[i] = *strtmp; - i++; - } - } - title[i] = 0; - } - strcpy (keybuffer, title); - } - else { - strtmp = strstr(keybuffer, "type=\""); - if (strtmp) { - strtmp += 6; - i = 0; - for (;*strtmp != '\"'; strtmp++) { - if (*strtmp == 10) { - type[i] = ' '; - i++; - } - else if (*strtmp != 13) { - type[i] = *strtmp; - i++; - } - } - type[i] = 0; - } - - strtmp = strstr(keybuffer, "n=\""); - if (strtmp) { - strtmp += 3; - i = 0; - for (;*strtmp != '\"'; strtmp++) { - if (*strtmp == 10) { - n[i] = ' '; - i++; - } - else if (*strtmp != 13) { - n[i] = *strtmp; - i++; - } - } - n[i] = 0; - } - - if (format == F_OSIS) { - strtmp = strstr(keybuffer, "title=\""); - if (strtmp) { - strtmp += 7; - i = 0; - for (;*strtmp != '\"'; strtmp++) { - if (*strtmp == 10) { - title[i] = ' '; - i++; - } - else if (*strtmp != 13) { - title[i] = *strtmp; - i++; - } - } - title[i] = 0; - } - } - else if (format == F_THML) { - strtmp = strstr(keybuffer, "title=\""); - if (strtmp) { - strtmp += 7; - i = 0; - for (;*strtmp != '\"'; strtmp++) { - if (*strtmp == 10) { - title[i] = ' '; - i++; - } - else if (*strtmp != 13) { - title[i] = *strtmp; - i++; - } - } - title[i] = 0; - } - } - - strcpy (keybuffer, type); - if (strlen(keybuffer) && strlen(n)) - strcat (keybuffer, " "); - strcat (keybuffer, n); - - if (longnames && strlen(keybuffer)) - strcat (keybuffer, ": "); - if (longnames || !strlen(keybuffer)) - strcat (keybuffer, title); - } - divs[level-1] = keybuffer; - - closer = false; - } - } - } - else if (c != 13) { - entbuffer[entrysize] = c; - entrysize++; - entbuffer[entrysize] = 0; - } - } - -#ifdef DEBUG -// printTree(root, treeKey); -#endif - -// delete book; //causes nasty-bad errors upon execution - delete n; - delete type; - delete title; - delete keybuffer; -} - -int main(int argc, char **argv) { - unsigned long i = 0; - - char modname[256]; - *modname = 0; - char filename[256]; - *filename = 0; - - bool longnames = false; - bool exportfile = false; - unsigned char format = F_AUTODETECT; - - if (argc > 2) { - for (i = 1; i < argc; i++) { - if (argv[i][0] == '-') { - switch (argv[i][1]) { - case 'l': - longnames = true; - continue; - case 'i': - exportfile = true; - continue; - case 'f': - if (argv[i][2] == 'O') { - format = F_OSIS; - } - else if (argv[i][2] == 'T') { - format = F_OSIS; - } - else { - format = F_AUTODETECT; - } - continue; - } - } - else if (*filename == 0) { - strcpy (filename, argv[i]); - } - else if (*modname == 0) { - strcpy (modname, argv[i]); - } - } - } - else if (argc > 1) { - strcpy (filename, argv[1]); - } - - if (!*filename) { - fprintf(stderr, HELPTEXT); - return -1; - } - else { - if (!*modname) { - for (i = 0; (i < 256) && (filename[i]) && (filename[i] != '.'); i++) { - modname[i] = filename[i]; - } - modname[i] = 0; - } - - char* entbuffer = new char[1048576]; - format = (format == F_AUTODETECT) ? detectFormat(filename, entbuffer) : format; - if (format == F_AUTODETECT) { - fprintf(stderr, HELPTEXT); - fprintf(stderr, "\n\nCould not detect file format for file \"%s\", please specify.\n", filename); - return -1; - } - - int retCode = processXML (filename, modname, longnames, exportfile, format, entbuffer); - delete entbuffer; - - return retCode; - } -} - - - |