diff options
Diffstat (limited to 'src/modules/filters/utf8latin1.cpp')
-rw-r--r-- | src/modules/filters/utf8latin1.cpp | 92 |
1 files changed, 92 insertions, 0 deletions
diff --git a/src/modules/filters/utf8latin1.cpp b/src/modules/filters/utf8latin1.cpp new file mode 100644 index 0000000..1bf8d3b --- /dev/null +++ b/src/modules/filters/utf8latin1.cpp @@ -0,0 +1,92 @@ +/****************************************************************************** + * + * utf8latin1.cpp - SWFilter descendant to convert UTF-8 to Latin-1 + * + * $Id: utf8latin1.cpp 2980 2013-09-14 21:51:47Z scribe $ + * + * Copyright 2001-2013 CrossWire Bible Society (http://www.crosswire.org) + * CrossWire Bible Society + * P. O. Box 2528 + * Tempe, AZ 85280-2528 + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + */ + +#include <stdlib.h> +#include <stdio.h> + +#include <utf8latin1.h> +#include <swbuf.h> + + +SWORD_NAMESPACE_START + + +UTF8Latin1::UTF8Latin1(char rchar) : replacementChar(rchar) { +} + + +char UTF8Latin1::processText(SWBuf &text, const SWKey *key, const SWModule *module) +{ + unsigned char *from; + + unsigned long uchar; + unsigned char significantFirstBits, subsequent; + + if ((unsigned long)key < 2) {// hack, we're en(1)/de(0)ciphering + return (char)-1; + } + + SWBuf orig = text; + from = (unsigned char*)orig.c_str(); + + + // ------------------------------- + + for (text = ""; *from; from++) { + uchar = 0; + if ((*from & 128) != 128) { + // if (*from != ' ') + uchar = *from; + } + else if ((*from & 128) && ((*from & 64) != 64)) { + // error, do nothing + continue; + } + else { + *from <<= 1; + for (subsequent = 1; (*from & 128); subsequent++) { + *from <<= 1; + from[subsequent] &= 63; + uchar <<= 6; + uchar |= from[subsequent]; + } + subsequent--; + *from <<=1; + significantFirstBits = 8 - (2+subsequent); + + uchar |= (((short)*from) << (((6*subsequent)+significantFirstBits)-8)); + from += subsequent; + } + + if (uchar < 0xff) { + text += (unsigned char)uchar; + } + else { + text += replacementChar; + } + } + return 0; +} + + +SWORD_NAMESPACE_END + |