summaryrefslogtreecommitdiff
path: root/tests
diff options
context:
space:
mode:
authorTeus Benschop <teusjannette@gmail.com>2018-10-28 11:51:26 +0100
committerTeus Benschop <teusjannette@gmail.com>2018-10-28 11:51:26 +0100
commit1d0ff54794b5edea7cdf1d2d66710a0fa885bcc5 (patch)
tree8ece5f9ef437fbb151f2b22ed0c6e1a714879c7c /tests
parentc7dbdc9161a7c460526b80fe01af49d714856126 (diff)
New upstream version 1.8.1
Diffstat (limited to 'tests')
-rw-r--r--tests/CMakeLists.txt2
-rw-r--r--tests/Makefile.am9
-rw-r--r--tests/Makefile.in65
-rw-r--r--tests/configtest.cpp4
-rw-r--r--tests/cppunit/Makefile.in4
-rw-r--r--tests/ldtest.cpp32
-rw-r--r--tests/localetest.cpp2
-rw-r--r--tests/osistest.cpp5
-rw-r--r--tests/testsuite/CMakeLists.txt8
-rw-r--r--tests/testsuite/Makefile.am16
-rw-r--r--tests/testsuite/Makefile.in480
-rw-r--r--tests/testsuite/README24
-rw-r--r--tests/testsuite/UTF-8-test.txt300
-rw-r--r--tests/testsuite/gbsReference.imp42
-rw-r--r--tests/testsuite/gbs_basic.good66
-rwxr-xr-xtests/testsuite/gbs_basic.sh30
-rw-r--r--tests/testsuite/greekaccents.good7
-rwxr-xr-xtests/testsuite/greekaccents.sh8
-rw-r--r--tests/testsuite/greekaccents.txt7
-rw-r--r--tests/testsuite/ldr12n.good24
-rw-r--r--tests/testsuite/ldr12n.imp12
-rwxr-xr-xtests/testsuite/ldr12n.sh31
-rwxr-xr-xtests/testsuite/listtest.sh2
-rw-r--r--tests/testsuite/osis.good102
-rwxr-xr-xtests/testsuite/osis.sh26
-rw-r--r--tests/testsuite/osisReference.xml15
-rw-r--r--tests/testsuite/osis_basic.good186
-rwxr-xr-xtests/testsuite/osis_basic.sh35
-rw-r--r--tests/testsuite/osis_mod2zmod.good186
-rwxr-xr-xtests/testsuite/osis_mod2zmod.sh60
-rw-r--r--tests/testsuite/osis_osis2modcipher.good186
-rwxr-xr-xtests/testsuite/osis_osis2modcipher.sh37
-rwxr-xr-xtests/testsuite/runall.sh2
-rwxr-xr-xtests/testsuite/runtest.sh2
-rw-r--r--tests/testsuite/utf8basic.good300
-rwxr-xr-xtests/testsuite/utf8basic.sh10
-rw-r--r--tests/testsuite/versekeytest.good10
-rwxr-xr-xtests/testsuite/versekeytest.sh2
-rwxr-xr-xtests/testsuite/versemgrtest.sh2
-rwxr-xr-xtests/testsuite/verseparsing-utf8.sh2
-rwxr-xr-xtests/testsuite/verseparsing.sh2
-rw-r--r--tests/testsuite/vs2osisref.good2
-rwxr-xr-xtests/testsuite/vs2osisref.sh22
-rwxr-xr-xtests/testsuite/xmltag.sh2
-rw-r--r--tests/utf8norm.cpp54
-rw-r--r--tests/versekeytest.cpp16
46 files changed, 2247 insertions, 194 deletions
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index 130af8a..30818a8 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -31,6 +31,8 @@ SET(test_PROGRAMS
localetest
mgrtest
modtest
+ osistest
+ ldtest
parsekey
rawldidxtest
romantest
diff --git a/tests/Makefile.am b/tests/Makefile.am
index ad09283..1be04d4 100644
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -5,14 +5,14 @@ AM_CPPFLAGS += -I$(top_srcdir)/include/internal/regex
endif
LDADD = $(top_builddir)/lib/libsword.la
-SUBDIRS = cppunit
+SUBDIRS = cppunit testsuite
noinst_PROGRAMS = utf8norm ciphertest keytest mgrtest parsekey versekeytest \
vtreekeytest versemgrtest listtest casttest modtest \
compnone complzss localetest introtest indextest \
configtest keycast romantest testblocks filtertest \
rawldidxtest lextest swaptest swbuftest xmltest \
- webiftest striptest osistest bibliotest
+ webiftest striptest ldtest osistest bibliotest
if HAVE_ICU
ICUPROG = icutest translittest tlitmgrtest
@@ -68,10 +68,7 @@ swbuftest_SOURCES = swbuftest.cpp
webiftest_SOURCES = webiftest.cpp
striptest_SOURCES = striptest.cpp
xmltest_SOURCES = xmltest.cpp
+ldtest_SOURCES = ldtest.cpp
osistest_SOURCES = osistest.cpp
bibliotest_SOURCES = bibliotest.cpp
-EXTRA_DIST =
-include bcppmake/Makefile.am
-include testsuite/Makefile.am
-include tmp/Makefile.am
diff --git a/tests/Makefile.in b/tests/Makefile.in
index c228d64..884969d 100644
--- a/tests/Makefile.in
+++ b/tests/Makefile.in
@@ -1,4 +1,4 @@
-# Makefile.in generated by automake 1.14.1 from Makefile.am.
+# Makefile.in generated by automake 1.13.4 from Makefile.am.
# @configure_input@
# Copyright (C) 1994-2013 Free Software Foundation, Inc.
@@ -90,13 +90,11 @@ noinst_PROGRAMS = utf8norm$(EXEEXT) ciphertest$(EXEEXT) \
testblocks$(EXEEXT) filtertest$(EXEEXT) rawldidxtest$(EXEEXT) \
lextest$(EXEEXT) swaptest$(EXEEXT) swbuftest$(EXEEXT) \
xmltest$(EXEEXT) webiftest$(EXEEXT) striptest$(EXEEXT) \
- osistest$(EXEEXT) bibliotest$(EXEEXT) $(am__EXEEXT_1) \
- $(am__EXEEXT_2)
-DIST_COMMON = $(srcdir)/bcppmake/Makefile.am \
- $(srcdir)/testsuite/Makefile.am $(srcdir)/tmp/Makefile.am \
- $(srcdir)/Makefile.in $(srcdir)/Makefile.am \
- $(top_srcdir)/depcomp
+ ldtest$(EXEEXT) osistest$(EXEEXT) bibliotest$(EXEEXT) \
+ $(am__EXEEXT_1) $(am__EXEEXT_2)
subdir = tests
+DIST_COMMON = $(srcdir)/Makefile.in $(srcdir)/Makefile.am \
+ $(top_srcdir)/depcomp
ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
am__aclocal_m4_deps = $(top_srcdir)/m4/acx_clucene.m4 \
$(top_srcdir)/m4/cppunit.m4 $(top_srcdir)/m4/libtool.m4 \
@@ -171,6 +169,10 @@ am_keytest_OBJECTS = keytest.$(OBJEXT)
keytest_OBJECTS = $(am_keytest_OBJECTS)
keytest_LDADD = $(LDADD)
keytest_DEPENDENCIES = $(top_builddir)/lib/libsword.la
+am_ldtest_OBJECTS = ldtest.$(OBJEXT)
+ldtest_OBJECTS = $(am_ldtest_OBJECTS)
+ldtest_LDADD = $(LDADD)
+ldtest_DEPENDENCIES = $(top_builddir)/lib/libsword.la
am_lextest_OBJECTS = lextest.$(OBJEXT)
lextest_OBJECTS = $(am_lextest_OBJECTS)
lextest_LDADD = $(LDADD)
@@ -295,24 +297,24 @@ SOURCES = $(bibliotest_SOURCES) $(casttest_SOURCES) \
$(ciphertest_SOURCES) $(complzss_SOURCES) $(compnone_SOURCES) \
$(compzip_SOURCES) $(configtest_SOURCES) $(filtertest_SOURCES) \
$(icutest_SOURCES) $(indextest_SOURCES) $(introtest_SOURCES) \
- $(keycast_SOURCES) $(keytest_SOURCES) $(lextest_SOURCES) \
- $(listtest_SOURCES) $(localetest_SOURCES) $(mgrtest_SOURCES) \
- $(modtest_SOURCES) $(osistest_SOURCES) $(parsekey_SOURCES) \
- $(rawldidxtest_SOURCES) $(romantest_SOURCES) \
- $(striptest_SOURCES) $(swaptest_SOURCES) $(swbuftest_SOURCES) \
- $(testblocks_SOURCES) $(tlitmgrtest_SOURCES) \
- $(translittest_SOURCES) $(utf8norm_SOURCES) \
- $(versekeytest_SOURCES) $(versemgrtest_SOURCES) \
- $(vtreekeytest_SOURCES) $(webiftest_SOURCES) \
- $(xmltest_SOURCES)
+ $(keycast_SOURCES) $(keytest_SOURCES) $(ldtest_SOURCES) \
+ $(lextest_SOURCES) $(listtest_SOURCES) $(localetest_SOURCES) \
+ $(mgrtest_SOURCES) $(modtest_SOURCES) $(osistest_SOURCES) \
+ $(parsekey_SOURCES) $(rawldidxtest_SOURCES) \
+ $(romantest_SOURCES) $(striptest_SOURCES) $(swaptest_SOURCES) \
+ $(swbuftest_SOURCES) $(testblocks_SOURCES) \
+ $(tlitmgrtest_SOURCES) $(translittest_SOURCES) \
+ $(utf8norm_SOURCES) $(versekeytest_SOURCES) \
+ $(versemgrtest_SOURCES) $(vtreekeytest_SOURCES) \
+ $(webiftest_SOURCES) $(xmltest_SOURCES)
DIST_SOURCES = $(bibliotest_SOURCES) $(casttest_SOURCES) \
$(ciphertest_SOURCES) $(complzss_SOURCES) $(compnone_SOURCES) \
$(am__compzip_SOURCES_DIST) $(configtest_SOURCES) \
$(filtertest_SOURCES) $(am__icutest_SOURCES_DIST) \
$(indextest_SOURCES) $(introtest_SOURCES) $(keycast_SOURCES) \
- $(keytest_SOURCES) $(lextest_SOURCES) $(listtest_SOURCES) \
- $(localetest_SOURCES) $(mgrtest_SOURCES) $(modtest_SOURCES) \
- $(osistest_SOURCES) $(parsekey_SOURCES) \
+ $(keytest_SOURCES) $(ldtest_SOURCES) $(lextest_SOURCES) \
+ $(listtest_SOURCES) $(localetest_SOURCES) $(mgrtest_SOURCES) \
+ $(modtest_SOURCES) $(osistest_SOURCES) $(parsekey_SOURCES) \
$(rawldidxtest_SOURCES) $(romantest_SOURCES) \
$(striptest_SOURCES) $(swaptest_SOURCES) $(swbuftest_SOURCES) \
$(testblocks_SOURCES) $(am__tlitmgrtest_SOURCES_DIST) \
@@ -540,14 +542,16 @@ target_vendor = @target_vendor@
top_build_prefix = @top_build_prefix@
top_builddir = @top_builddir@
top_srcdir = @top_srcdir@
+with_bzip2 = @with_bzip2@
with_conf = @with_conf@
with_icu = @with_icu@
with_icusword = @with_icusword@
+with_xz = @with_xz@
with_zlib = @with_zlib@
AUTOMAKE_OPTIONS = 1.6
AM_CPPFLAGS = -I $(top_srcdir)/include $(am__append_1)
LDADD = $(top_builddir)/lib/libsword.la
-SUBDIRS = cppunit
+SUBDIRS = cppunit testsuite
@HAVE_ICU_FALSE@ICUPROG =
@HAVE_ICU_TRUE@ICUPROG = icutest translittest tlitmgrtest
@HAVE_ICU_TRUE@icutest_SOURCES = icutest.cpp
@@ -584,23 +588,14 @@ swbuftest_SOURCES = swbuftest.cpp
webiftest_SOURCES = webiftest.cpp
striptest_SOURCES = striptest.cpp
xmltest_SOURCES = xmltest.cpp
+ldtest_SOURCES = ldtest.cpp
osistest_SOURCES = osistest.cpp
bibliotest_SOURCES = bibliotest.cpp
-EXTRA_DIST = $(swbcppdir)/filtertest.bpf $(swbcppdir)/filtertest.bpr \
- $(swbcppdir)/libsword.bpf $(swbcppdir)/libsword.bpr \
- $(swbcppdir)/mgrtest.bpf $(swbcppdir)/mgrtest.bpr \
- $(swbcppdir)/parsekey.bpf $(swbcppdir)/parsekey.bpr \
- $(swbcppdir)/tests.bpg $(swtspdir)/runall.sh \
- $(swtspdir)/runtest.sh $(swtspdir)/verseparsing.good \
- $(swtspdir)/verseparsing.sh $(swtesttmpdir)/README
-swbcppdir = $(top_srcdir)/tests/bcppmake
-swtspdir = $(top_srcdir)/tests/testsuite
-swtesttmpdir = $(top_srcdir)/tests/tmp
all: all-recursive
.SUFFIXES:
.SUFFIXES: .cpp .lo .o .obj
-$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(srcdir)/bcppmake/Makefile.am $(srcdir)/testsuite/Makefile.am $(srcdir)/tmp/Makefile.am $(am__configure_deps)
+$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps)
@for dep in $?; do \
case '$(am__configure_deps)' in \
*$$dep*) \
@@ -621,7 +616,6 @@ Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
esac;
-$(srcdir)/bcppmake/Makefile.am $(srcdir)/testsuite/Makefile.am $(srcdir)/tmp/Makefile.am:
$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
@@ -693,6 +687,10 @@ keytest$(EXEEXT): $(keytest_OBJECTS) $(keytest_DEPENDENCIES) $(EXTRA_keytest_DEP
@rm -f keytest$(EXEEXT)
$(AM_V_CXXLD)$(CXXLINK) $(keytest_OBJECTS) $(keytest_LDADD) $(LIBS)
+ldtest$(EXEEXT): $(ldtest_OBJECTS) $(ldtest_DEPENDENCIES) $(EXTRA_ldtest_DEPENDENCIES)
+ @rm -f ldtest$(EXEEXT)
+ $(AM_V_CXXLD)$(CXXLINK) $(ldtest_OBJECTS) $(ldtest_LDADD) $(LIBS)
+
lextest$(EXEEXT): $(lextest_OBJECTS) $(lextest_DEPENDENCIES) $(EXTRA_lextest_DEPENDENCIES)
@rm -f lextest$(EXEEXT)
$(AM_V_CXXLD)$(CXXLINK) $(lextest_OBJECTS) $(lextest_LDADD) $(LIBS)
@@ -796,6 +794,7 @@ distclean-compile:
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/introtest.Po@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/keycast.Po@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/keytest.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ldtest.Po@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/lextest.Po@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/listtest.Po@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/localetest.Po@am__quote@
diff --git a/tests/configtest.cpp b/tests/configtest.cpp
index d196f07..9e28355 100644
--- a/tests/configtest.cpp
+++ b/tests/configtest.cpp
@@ -2,7 +2,7 @@
*
* configtest.cpp -
*
- * $Id: configtest.cpp 2833 2013-06-29 06:40:28Z chrislit $
+ * $Id: configtest.cpp 3515 2017-11-01 11:38:09Z scribe $
*
* Copyright 2001-2013 CrossWire Bible Society (http://www.crosswire.org)
* CrossWire Bible Society
@@ -31,7 +31,7 @@ int main(int argc, char **argv) {
config["Section1"]["Entry1"] = "Value1";
config["Section1"]["Entry2"] = "oops";
config["Section1"]["Entry2"] = "Value2";
- config.Save();
+ config.save();
SWConfig config2("./test1.conf");
std::cout << "Should be Value2: " << config2["Section1"]["Entry2"] << std::endl;
return 0;
diff --git a/tests/cppunit/Makefile.in b/tests/cppunit/Makefile.in
index f5c8bfa..e4e285d 100644
--- a/tests/cppunit/Makefile.in
+++ b/tests/cppunit/Makefile.in
@@ -1,4 +1,4 @@
-# Makefile.in generated by automake 1.14.1 from Makefile.am.
+# Makefile.in generated by automake 1.13.4 from Makefile.am.
# @configure_input@
# Copyright (C) 1994-2013 Free Software Foundation, Inc.
@@ -528,9 +528,11 @@ target_vendor = @target_vendor@
top_build_prefix = @top_build_prefix@
top_builddir = @top_builddir@
top_srcdir = @top_srcdir@
+with_bzip2 = @with_bzip2@
with_conf = @with_conf@
with_icu = @with_icu@
with_icusword = @with_icusword@
+with_xz = @with_xz@
with_zlib = @with_zlib@
LDADD = $(top_builddir)/lib/libsword.la
LibSword_SOURCES = main.cpp stringmgr_test.cpp swbuf_test.cpp url_test.cpp versekey_test.cpp
diff --git a/tests/ldtest.cpp b/tests/ldtest.cpp
new file mode 100644
index 0000000..369984f
--- /dev/null
+++ b/tests/ldtest.cpp
@@ -0,0 +1,32 @@
+#include <swmodule.h>
+#include <swmgr.h>
+#include <iostream>
+#include <stdio.h>
+
+using namespace sword;
+using namespace std;
+
+int main(int argc, char **argv) {
+
+ if (argc < 2) {
+ fprintf(stderr, "usage: %s <lexdict_name>\n", *argv);
+ exit(-1);
+ }
+
+ SWMgr library;
+ SWModule *module = library.getModule(argv[1]);
+ if (!module) {
+ cerr << "\nCouldn't find module: " << argv[1] << "\n" << endl;
+ exit(-2);
+ }
+ int i = 0;
+ for ((*module) = TOP; !module->popError(); module->increment()) {
+ cout << module->getKeyText() << ": " << module->stripText() << "\n";
+ if (++i > 10) {
+ cout << "ERROR: more than 10 iterations. stopping.\n";
+ break;
+ }
+
+ }
+ return 0;
+}
diff --git a/tests/localetest.cpp b/tests/localetest.cpp
index 26f6b28..8f222b8 100644
--- a/tests/localetest.cpp
+++ b/tests/localetest.cpp
@@ -2,7 +2,7 @@
*
* localetest.cpp -
*
- * $Id: localetest.cpp 3005 2014-01-09 04:06:11Z greg.hellings $
+ * $Id: localetest.cpp 3001 2014-01-03 19:23:42Z scribe $
*
* Copyright 2000-2013 CrossWire Bible Society (http://www.crosswire.org)
* CrossWire Bible Society
diff --git a/tests/osistest.cpp b/tests/osistest.cpp
index cfc09bb..77fda1a 100644
--- a/tests/osistest.cpp
+++ b/tests/osistest.cpp
@@ -2,7 +2,7 @@
*
* osistest.cpp -
*
- * $Id: osistest.cpp 3185 2014-04-17 04:32:00Z greg.hellings $
+ * $Id: osistest.cpp 3548 2017-12-10 05:11:38Z scribe $
*
* Copyright 20122013 CrossWire Bible Society (http://www.crosswire.org)
* CrossWire Bible Society
@@ -76,6 +76,9 @@ int main(int argc, char **argv) {
module->setKey("Ps.3.1");
outputCurrentVerse(module);
+ module->setKey("Matt.2.6");
+ outputCurrentVerse(module);
+
module->setKey("Mark.1.14");
outputCurrentVerse(module);
diff --git a/tests/testsuite/CMakeLists.txt b/tests/testsuite/CMakeLists.txt
index 48dc01a..c695236 100644
--- a/tests/testsuite/CMakeLists.txt
+++ b/tests/testsuite/CMakeLists.txt
@@ -1,12 +1,12 @@
#############################################################################
# This file will actually be responsible for running the tests
-#
+#
+
+FILE(WRITE "${CMAKE_CURRENT_BINARY_DIR}/sword.conf" "[Install]\nLocalePath=${CMAKE_CURRENT_SOURCE_DIR}/../../")
ADD_CUSTOM_TARGET(
tests_configure
- COMMAND cp ${CMAKE_CURRENT_SOURCE_DIR}/*.sh ${CMAKE_CURRENT_BINARY_DIR}
- COMMAND cp ${CMAKE_CURRENT_SOURCE_DIR}/*.good ${CMAKE_CURRENT_BINARY_DIR}
- COMMAND echo \"[Install]\\nLocalePath=${CMAKE_CURRENT_SOURCE_DIR}/../../\" > ${CMAKE_CURRENT_BINARY_DIR}/sword.conf
+ COMMAND cp "${CMAKE_CURRENT_SOURCE_DIR}/*.{sh,good,imp,txt,xml}" "${CMAKE_CURRENT_BINARY_DIR}"
DEPENDS ${test_PROGRAMS}
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
)
diff --git a/tests/testsuite/Makefile.am b/tests/testsuite/Makefile.am
index db26c79..33e0b22 100644
--- a/tests/testsuite/Makefile.am
+++ b/tests/testsuite/Makefile.am
@@ -1,6 +1,12 @@
-swtspdir = $(top_srcdir)/tests/testsuite
+all:
+ @echo
+ @echo to run tests type: ./runall.sh
+ @echo or make run
+ @echo
-EXTRA_DIST += $(swtspdir)/runall.sh
-EXTRA_DIST += $(swtspdir)/runtest.sh
-EXTRA_DIST += $(swtspdir)/verseparsing.good
-EXTRA_DIST += $(swtspdir)/verseparsing.sh
+run:
+ ./runall.sh
+
+clean-local:
+ -rm -rf tmp
+ -rm -rf *.try
diff --git a/tests/testsuite/Makefile.in b/tests/testsuite/Makefile.in
new file mode 100644
index 0000000..aa8d9a1
--- /dev/null
+++ b/tests/testsuite/Makefile.in
@@ -0,0 +1,480 @@
+# Makefile.in generated by automake 1.13.4 from Makefile.am.
+# @configure_input@
+
+# Copyright (C) 1994-2013 Free Software Foundation, Inc.
+
+# This Makefile.in is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE.
+
+@SET_MAKE@
+VPATH = @srcdir@
+am__is_gnu_make = test -n '$(MAKEFILE_LIST)' && test -n '$(MAKELEVEL)'
+am__make_running_with_option = \
+ case $${target_option-} in \
+ ?) ;; \
+ *) echo "am__make_running_with_option: internal error: invalid" \
+ "target option '$${target_option-}' specified" >&2; \
+ exit 1;; \
+ esac; \
+ has_opt=no; \
+ sane_makeflags=$$MAKEFLAGS; \
+ if $(am__is_gnu_make); then \
+ sane_makeflags=$$MFLAGS; \
+ else \
+ case $$MAKEFLAGS in \
+ *\\[\ \ ]*) \
+ bs=\\; \
+ sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \
+ | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \
+ esac; \
+ fi; \
+ skip_next=no; \
+ strip_trailopt () \
+ { \
+ flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \
+ }; \
+ for flg in $$sane_makeflags; do \
+ test $$skip_next = yes && { skip_next=no; continue; }; \
+ case $$flg in \
+ *=*|--*) continue;; \
+ -*I) strip_trailopt 'I'; skip_next=yes;; \
+ -*I?*) strip_trailopt 'I';; \
+ -*O) strip_trailopt 'O'; skip_next=yes;; \
+ -*O?*) strip_trailopt 'O';; \
+ -*l) strip_trailopt 'l'; skip_next=yes;; \
+ -*l?*) strip_trailopt 'l';; \
+ -[dEDm]) skip_next=yes;; \
+ -[JT]) skip_next=yes;; \
+ esac; \
+ case $$flg in \
+ *$$target_option*) has_opt=yes; break;; \
+ esac; \
+ done; \
+ test $$has_opt = yes
+am__make_dryrun = (target_option=n; $(am__make_running_with_option))
+am__make_keepgoing = (target_option=k; $(am__make_running_with_option))
+pkgdatadir = $(datadir)/@PACKAGE@
+pkgincludedir = $(includedir)/@PACKAGE@
+pkglibdir = $(libdir)/@PACKAGE@
+pkglibexecdir = $(libexecdir)/@PACKAGE@
+am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
+install_sh_DATA = $(install_sh) -c -m 644
+install_sh_PROGRAM = $(install_sh) -c
+install_sh_SCRIPT = $(install_sh) -c
+INSTALL_HEADER = $(INSTALL_DATA)
+transform = $(program_transform_name)
+NORMAL_INSTALL = :
+PRE_INSTALL = :
+POST_INSTALL = :
+NORMAL_UNINSTALL = :
+PRE_UNINSTALL = :
+POST_UNINSTALL = :
+build_triplet = @build@
+host_triplet = @host@
+target_triplet = @target@
+subdir = tests/testsuite
+DIST_COMMON = $(srcdir)/Makefile.in $(srcdir)/Makefile.am README
+ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
+am__aclocal_m4_deps = $(top_srcdir)/m4/acx_clucene.m4 \
+ $(top_srcdir)/m4/cppunit.m4 $(top_srcdir)/m4/libtool.m4 \
+ $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \
+ $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \
+ $(top_srcdir)/configure.ac
+am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
+ $(ACLOCAL_M4)
+mkinstalldirs = $(install_sh) -d
+CONFIG_HEADER = $(top_builddir)/include/config.h
+CONFIG_CLEAN_FILES =
+CONFIG_CLEAN_VPATH_FILES =
+AM_V_P = $(am__v_P_@AM_V@)
+am__v_P_ = $(am__v_P_@AM_DEFAULT_V@)
+am__v_P_0 = false
+am__v_P_1 = :
+AM_V_GEN = $(am__v_GEN_@AM_V@)
+am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@)
+am__v_GEN_0 = @echo " GEN " $@;
+am__v_GEN_1 =
+AM_V_at = $(am__v_at_@AM_V@)
+am__v_at_ = $(am__v_at_@AM_DEFAULT_V@)
+am__v_at_0 = @
+am__v_at_1 =
+SOURCES =
+DIST_SOURCES =
+am__can_run_installinfo = \
+ case $$AM_UPDATE_INFO_DIR in \
+ n|no|NO) false;; \
+ *) (install-info --version) >/dev/null 2>&1;; \
+ esac
+am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP)
+DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
+ACLOCAL = @ACLOCAL@
+AMTAR = @AMTAR@
+AM_CFLAGS = @AM_CFLAGS@
+AM_CXXFLAGS = @AM_CXXFLAGS@
+AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@
+AR = @AR@
+AS = @AS@
+AUTOCONF = @AUTOCONF@
+AUTOHEADER = @AUTOHEADER@
+AUTOMAKE = @AUTOMAKE@
+AWK = @AWK@
+CC = @CC@
+CCDEPMODE = @CCDEPMODE@
+CFLAGS = @CFLAGS@
+CLUCENE2_CFLAGS = @CLUCENE2_CFLAGS@
+CLUCENE2_LIBS = @CLUCENE2_LIBS@
+CLUCENE_CXXFLAGS = @CLUCENE_CXXFLAGS@
+CLUCENE_LIBS = @CLUCENE_LIBS@
+CPP = @CPP@
+CPPFLAGS = @CPPFLAGS@
+CPPUNIT_CFLAGS = @CPPUNIT_CFLAGS@
+CPPUNIT_CONFIG = @CPPUNIT_CONFIG@
+CPPUNIT_LIBS = @CPPUNIT_LIBS@
+CURL_CONFIG = @CURL_CONFIG@
+CURL_LIBS = @CURL_LIBS@
+CXX = @CXX@
+CXXCPP = @CXXCPP@
+CXXDEPMODE = @CXXDEPMODE@
+CXXFLAGS = @CXXFLAGS@
+CYGPATH_W = @CYGPATH_W@
+DEFS = @DEFS@
+DEPDIR = @DEPDIR@
+DLLTOOL = @DLLTOOL@
+DSYMUTIL = @DSYMUTIL@
+DUMPBIN = @DUMPBIN@
+ECHO_C = @ECHO_C@
+ECHO_N = @ECHO_N@
+ECHO_T = @ECHO_T@
+EGREP = @EGREP@
+EXEEXT = @EXEEXT@
+FGREP = @FGREP@
+GREP = @GREP@
+ICU_CONFIG = @ICU_CONFIG@
+ICU_IOLIBS = @ICU_IOLIBS@
+ICU_LIBS = @ICU_LIBS@
+INSTALL = @INSTALL@
+INSTALL_DATA = @INSTALL_DATA@
+INSTALL_PROGRAM = @INSTALL_PROGRAM@
+INSTALL_SCRIPT = @INSTALL_SCRIPT@
+INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
+LD = @LD@
+LDFLAGS = @LDFLAGS@
+LIBOBJS = @LIBOBJS@
+LIBS = @LIBS@
+LIBTOOL = @LIBTOOL@
+LIPO = @LIPO@
+LN_S = @LN_S@
+LTLIBOBJS = @LTLIBOBJS@
+MAINT = @MAINT@
+MAKEINFO = @MAKEINFO@
+MANIFEST_TOOL = @MANIFEST_TOOL@
+MKDIR_P = @MKDIR_P@
+NM = @NM@
+NMEDIT = @NMEDIT@
+OBJDUMP = @OBJDUMP@
+OBJEXT = @OBJEXT@
+OTOOL = @OTOOL@
+OTOOL64 = @OTOOL64@
+PACKAGE = @PACKAGE@
+PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
+PACKAGE_NAME = @PACKAGE_NAME@
+PACKAGE_STRING = @PACKAGE_STRING@
+PACKAGE_TARNAME = @PACKAGE_TARNAME@
+PACKAGE_URL = @PACKAGE_URL@
+PACKAGE_VERSION = @PACKAGE_VERSION@
+PATH_SEPARATOR = @PATH_SEPARATOR@
+PKG_CONFIG = @PKG_CONFIG@
+PKG_CONFIG_LIBDIR = @PKG_CONFIG_LIBDIR@
+PKG_CONFIG_PATH = @PKG_CONFIG_PATH@
+RANLIB = @RANLIB@
+SED = @SED@
+SET_MAKE = @SET_MAKE@
+SHELL = @SHELL@
+STRIP = @STRIP@
+SWORD_VERSION_MAJOR = @SWORD_VERSION_MAJOR@
+SWORD_VERSION_MICRO = @SWORD_VERSION_MICRO@
+SWORD_VERSION_MINOR = @SWORD_VERSION_MINOR@
+SWORD_VERSION_NANO = @SWORD_VERSION_NANO@
+SWORD_VERSION_NUM = @SWORD_VERSION_NUM@
+SWORD_VERSION_STR = @SWORD_VERSION_STR@
+VERSION = @VERSION@
+abs_builddir = @abs_builddir@
+abs_srcdir = @abs_srcdir@
+abs_top_builddir = @abs_top_builddir@
+abs_top_srcdir = @abs_top_srcdir@
+ac_ct_AR = @ac_ct_AR@
+ac_ct_CC = @ac_ct_CC@
+ac_ct_CXX = @ac_ct_CXX@
+ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
+am__include = @am__include@
+am__leading_dot = @am__leading_dot@
+am__quote = @am__quote@
+am__tar = @am__tar@
+am__untar = @am__untar@
+bindir = @bindir@
+build = @build@
+build_alias = @build_alias@
+build_cpu = @build_cpu@
+build_os = @build_os@
+build_vendor = @build_vendor@
+builddir = @builddir@
+datadir = @datadir@
+datarootdir = @datarootdir@
+dir_confdef = @dir_confdef@
+docdir = @docdir@
+dvidir = @dvidir@
+enable_debug = @enable_debug@
+enable_profile = @enable_profile@
+exec_prefix = @exec_prefix@
+host = @host@
+host_alias = @host_alias@
+host_cpu = @host_cpu@
+host_os = @host_os@
+host_vendor = @host_vendor@
+htmldir = @htmldir@
+includedir = @includedir@
+infodir = @infodir@
+install_sh = @install_sh@
+libdir = @libdir@
+libexecdir = @libexecdir@
+localedir = @localedir@
+localstatedir = @localstatedir@
+mandir = @mandir@
+mkdir_p = @mkdir_p@
+oldincludedir = @oldincludedir@
+pdfdir = @pdfdir@
+prefix = @prefix@
+program_transform_name = @program_transform_name@
+psdir = @psdir@
+sbindir = @sbindir@
+sharedstatedir = @sharedstatedir@
+srcdir = @srcdir@
+sysconfdir = @sysconfdir@
+target = @target@
+target_alias = @target_alias@
+target_cpu = @target_cpu@
+target_mingw32 = @target_mingw32@
+target_os = @target_os@
+target_system = @target_system@
+target_vendor = @target_vendor@
+top_build_prefix = @top_build_prefix@
+top_builddir = @top_builddir@
+top_srcdir = @top_srcdir@
+with_bzip2 = @with_bzip2@
+with_conf = @with_conf@
+with_icu = @with_icu@
+with_icusword = @with_icusword@
+with_xz = @with_xz@
+with_zlib = @with_zlib@
+all: all-am
+
+.SUFFIXES:
+$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps)
+ @for dep in $?; do \
+ case '$(am__configure_deps)' in \
+ *$$dep*) \
+ ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
+ && { if test -f $@; then exit 0; else break; fi; }; \
+ exit 1;; \
+ esac; \
+ done; \
+ echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign tests/testsuite/Makefile'; \
+ $(am__cd) $(top_srcdir) && \
+ $(AUTOMAKE) --foreign tests/testsuite/Makefile
+.PRECIOUS: Makefile
+Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
+ @case '$?' in \
+ *config.status*) \
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
+ *) \
+ echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
+ cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
+ esac;
+
+$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+
+$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps)
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps)
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(am__aclocal_m4_deps):
+
+mostlyclean-libtool:
+ -rm -f *.lo
+
+clean-libtool:
+ -rm -rf .libs _libs
+tags TAGS:
+
+ctags CTAGS:
+
+cscope cscopelist:
+
+
+distdir: $(DISTFILES)
+ @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+ topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+ list='$(DISTFILES)'; \
+ dist_files=`for file in $$list; do echo $$file; done | \
+ sed -e "s|^$$srcdirstrip/||;t" \
+ -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
+ case $$dist_files in \
+ */*) $(MKDIR_P) `echo "$$dist_files" | \
+ sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
+ sort -u` ;; \
+ esac; \
+ for file in $$dist_files; do \
+ if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
+ if test -d $$d/$$file; then \
+ dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
+ if test -d "$(distdir)/$$file"; then \
+ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+ fi; \
+ if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
+ cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
+ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+ fi; \
+ cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
+ else \
+ test -f "$(distdir)/$$file" \
+ || cp -p $$d/$$file "$(distdir)/$$file" \
+ || exit 1; \
+ fi; \
+ done
+check-am: all-am
+check: check-am
+all-am: Makefile
+installdirs:
+install: install-am
+install-exec: install-exec-am
+install-data: install-data-am
+uninstall: uninstall-am
+
+install-am: all-am
+ @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
+
+installcheck: installcheck-am
+install-strip:
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
+mostlyclean-generic:
+
+clean-generic:
+
+distclean-generic:
+ -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
+ -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
+
+maintainer-clean-generic:
+ @echo "This command is intended for maintainers to use"
+ @echo "it deletes files that may require special tools to rebuild."
+clean: clean-am
+
+clean-am: clean-generic clean-libtool clean-local mostlyclean-am
+
+distclean: distclean-am
+ -rm -f Makefile
+distclean-am: clean-am distclean-generic
+
+dvi: dvi-am
+
+dvi-am:
+
+html: html-am
+
+html-am:
+
+info: info-am
+
+info-am:
+
+install-data-am:
+
+install-dvi: install-dvi-am
+
+install-dvi-am:
+
+install-exec-am:
+
+install-html: install-html-am
+
+install-html-am:
+
+install-info: install-info-am
+
+install-info-am:
+
+install-man:
+
+install-pdf: install-pdf-am
+
+install-pdf-am:
+
+install-ps: install-ps-am
+
+install-ps-am:
+
+installcheck-am:
+
+maintainer-clean: maintainer-clean-am
+ -rm -f Makefile
+maintainer-clean-am: distclean-am maintainer-clean-generic
+
+mostlyclean: mostlyclean-am
+
+mostlyclean-am: mostlyclean-generic mostlyclean-libtool
+
+pdf: pdf-am
+
+pdf-am:
+
+ps: ps-am
+
+ps-am:
+
+uninstall-am:
+
+.MAKE: install-am install-strip
+
+.PHONY: all all-am check check-am clean clean-generic clean-libtool \
+ clean-local cscopelist-am ctags-am distclean distclean-generic \
+ distclean-libtool distdir dvi dvi-am html html-am info info-am \
+ install install-am install-data install-data-am install-dvi \
+ install-dvi-am install-exec install-exec-am install-html \
+ install-html-am install-info install-info-am install-man \
+ install-pdf install-pdf-am install-ps install-ps-am \
+ install-strip installcheck installcheck-am installdirs \
+ maintainer-clean maintainer-clean-generic mostlyclean \
+ mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \
+ tags-am uninstall uninstall-am
+
+all:
+ @echo
+ @echo to run tests type: ./runall.sh
+ @echo or make run
+ @echo
+
+run:
+ ./runall.sh
+
+clean-local:
+ -rm -rf tmp
+ -rm -rf *.try
+
+# Tell versions [3.59,3.63) of GNU make to not export all variables.
+# Otherwise a system limit (for SysV at least) may be exceeded.
+.NOEXPORT:
diff --git a/tests/testsuite/README b/tests/testsuite/README
new file mode 100644
index 0000000..a07a0c9
--- /dev/null
+++ b/tests/testsuite/README
@@ -0,0 +1,24 @@
+All tests are represented by a <test>.sh / <test>.good file pair.
+
+To run a test:
+
+./runtest.sh test
+
+This will run test.sh > test.try and compare test.try to test.good and report any differences (failures)
+
+To run all tests:
+
+./runall.sh
+
+===================================
+
+To create a new test, do whatever you want in your new mytest.sh file,
+call, executables, do anything you'd like and output results which
+matter for a good test.
+
+When all is running fine, output your .good file with:
+
+./mytest.sh > mytest.good
+
+That's it. Simple right? :) So make more unit tests!
+
diff --git a/tests/testsuite/UTF-8-test.txt b/tests/testsuite/UTF-8-test.txt
new file mode 100644
index 0000000..78c859c
--- /dev/null
+++ b/tests/testsuite/UTF-8-test.txt
@@ -0,0 +1,300 @@
+UTF-8 decoder capability and stress test
+----------------------------------------
+
+Markus Kuhn <http://www.cl.cam.ac.uk/~mgk25/> - 2015-08-28 - CC BY 4.0
+
+This test file can help you examine, how your UTF-8 decoder handles
+various types of correct, malformed, or otherwise interesting UTF-8
+sequences. This file is not meant to be a conformance test. It does
+not prescribe any particular outcome. Therefore, there is no way to
+"pass" or "fail" this test file, even though the text does suggest a
+preferable decoder behaviour at some places. Its aim is, instead, to
+help you think about, and test, the behaviour of your UTF-8 decoder on a
+systematic collection of unusual inputs. Experience so far suggests
+that most first-time authors of UTF-8 decoders find at least one
+serious problem in their decoder using this file.
+
+The test lines below cover boundary conditions, malformed UTF-8
+sequences, as well as correctly encoded UTF-8 sequences of Unicode code
+points that should never occur in a correct UTF-8 file.
+
+According to ISO 10646-1:2000, sections D.7 and 2.3c, a device
+receiving UTF-8 shall interpret a "malformed sequence in the same way
+that it interprets a character that is outside the adopted subset" and
+"characters that are not within the adopted subset shall be indicated
+to the user" by a receiving device. One commonly used approach in
+UTF-8 decoders is to replace any malformed UTF-8 sequence by a
+replacement character (U+FFFD), which looks a bit like an inverted
+question mark, or a similar symbol. It might be a good idea to
+visually distinguish a malformed UTF-8 sequence from a correctly
+encoded Unicode character that is just not available in the current
+font but otherwise fully legal, even though ISO 10646-1 doesn't
+mandate this. In any case, just ignoring malformed sequences or
+unavailable characters does not conform to ISO 10646, will make
+debugging more difficult, and can lead to user confusion.
+
+Please check, whether a malformed UTF-8 sequence is (1) represented at
+all, (2) represented by exactly one single replacement character (or
+equivalent signal), and (3) the following quotation mark after an
+illegal UTF-8 sequence is correctly displayed, i.e. proper
+resynchronization takes place immediately after any malformed
+sequence. This file says "THE END" in the last line, so if you don't
+see that, your decoder crashed somehow before, which should always be
+cause for concern.
+
+All lines in this file are exactly 79 characters long (plus the line
+feed). In addition, all lines end with "|", except for the two test
+lines 2.1.1 and 2.2.1, which contain non-printable ASCII controls
+U+0000 and U+007F. If you display this file with a fixed-width font,
+these "|" characters should all line up in column 79 (right margin).
+This allows you to test quickly, whether your UTF-8 decoder finds the
+correct number of characters in every line, that is whether each
+malformed sequences is replaced by a single replacement character.
+
+Note that, as an alternative to the notion of malformed sequence used
+here, it is also a perfectly acceptable (and in some situations even
+preferable) solution to represent each individual byte of a malformed
+sequence with a replacement character. If you follow this strategy in
+your decoder, then please ignore the "|" column.
+
+
+Here come the tests: |
+ |
+1 Some correct UTF-8 text |
+ |
+You should see the Greek word 'kosme': "κόσμε" |
+ |
+2 Boundary condition test cases |
+ |
+2.1 First possible sequence of a certain length |
+ |
+2.1.1 1 byte (U-00000000): "^@" // SWORD: removed. we don't support null mid-string, <- that's a literal <caret at>
+2.1.2 2 bytes (U-00000080): "€" |
+2.1.3 3 bytes (U-00000800): "ࠀ" |
+2.1.4 4 bytes (U-00010000): "𐀀" |
+2.1.5 5 bytes (U-00200000): "" |
+2.1.6 6 bytes (U-04000000): "" |
+ |
+2.2 Last possible sequence of a certain length |
+ |
+2.2.1 1 byte (U-0000007F): ""
+2.2.2 2 bytes (U-000007FF): "߿" |
+2.2.3 3 bytes (U-0000FFFF): "￿" |
+2.2.4 4 bytes (U-001FFFFF): "" |
+2.2.5 5 bytes (U-03FFFFFF): "" |
+2.2.6 6 bytes (U-7FFFFFFF): "" |
+ |
+2.3 Other boundary conditions |
+ |
+2.3.1 U-0000D7FF = ed 9f bf = "퟿" |
+2.3.2 U-0000E000 = ee 80 80 = "" |
+2.3.3 U-0000FFFD = ef bf bd = "�" |
+2.3.4 U-0010FFFF = f4 8f bf bf = "􏿿" |
+2.3.5 U-00110000 = f4 90 80 80 = "" |
+ |
+3 Malformed sequences |
+ |
+3.1 Unexpected continuation bytes |
+ |
+Each unexpected continuation byte should be separately signalled as a |
+malformed sequence of its own. |
+ |
+3.1.1 First continuation byte 0x80: "" |
+3.1.2 Last continuation byte 0xbf: "" |
+ |
+3.1.3 2 continuation bytes: "" |
+3.1.4 3 continuation bytes: "" |
+3.1.5 4 continuation bytes: "" |
+3.1.6 5 continuation bytes: "" |
+3.1.7 6 continuation bytes: "" |
+3.1.8 7 continuation bytes: "" |
+ |
+3.1.9 Sequence of all 64 possible continuation bytes (0x80-0xbf): |
+ |
+ " |
+ |
+ |
+ " |
+ |
+3.2 Lonely start characters |
+ |
+3.2.1 All 32 first bytes of 2-byte sequences (0xc0-0xdf), |
+ each followed by a space character: |
+ |
+ " |
+ " |
+ |
+3.2.2 All 16 first bytes of 3-byte sequences (0xe0-0xef), |
+ each followed by a space character: |
+ |
+ " " |
+ |
+3.2.3 All 8 first bytes of 4-byte sequences (0xf0-0xf7), |
+ each followed by a space character: |
+ |
+ " " |
+ |
+3.2.4 All 4 first bytes of 5-byte sequences (0xf8-0xfb), |
+ each followed by a space character: |
+ |
+ " " |
+ |
+3.2.5 All 2 first bytes of 6-byte sequences (0xfc-0xfd), |
+ each followed by a space character: |
+ |
+ " " |
+ |
+3.3 Sequences with last continuation byte missing |
+ |
+All bytes of an incomplete sequence should be signalled as a single |
+malformed sequence, i.e., you should see only a single replacement |
+character in each of the next 10 tests. (Characters as in section 2) |
+ |
+3.3.1 2-byte sequence with last byte missing (U+0000): "" |
+3.3.2 3-byte sequence with last byte missing (U+0000): "" |
+3.3.3 4-byte sequence with last byte missing (U+0000): "" |
+3.3.4 5-byte sequence with last byte missing (U+0000): "" |
+3.3.5 6-byte sequence with last byte missing (U+0000): "" |
+3.3.6 2-byte sequence with last byte missing (U-000007FF): "" |
+3.3.7 3-byte sequence with last byte missing (U-0000FFFF): "" |
+3.3.8 4-byte sequence with last byte missing (U-001FFFFF): "" |
+3.3.9 5-byte sequence with last byte missing (U-03FFFFFF): "" |
+3.3.10 6-byte sequence with last byte missing (U-7FFFFFFF): "" |
+ |
+3.4 Concatenation of incomplete sequences |
+ |
+All the 10 sequences of 3.3 concatenated, you should see 10 malformed |
+sequences being signalled: |
+ |
+ "" |
+ |
+3.5 Impossible bytes |
+ |
+The following two bytes cannot appear in a correct UTF-8 string |
+ |
+3.5.1 fe = "" |
+3.5.2 ff = "" |
+3.5.3 fe fe ff ff = "" |
+ |
+4 Overlong sequences |
+ |
+The following sequences are not malformed according to the letter of |
+the Unicode 2.0 standard. However, they are longer then necessary and |
+a correct UTF-8 encoder is not allowed to produce them. A "safe UTF-8 |
+decoder" should reject them just like malformed sequences for two |
+reasons: (1) It helps to debug applications if overlong sequences are |
+not treated as valid representations of characters, because this helps |
+to spot problems more quickly. (2) Overlong sequences provide |
+alternative representations of characters, that could maliciously be |
+used to bypass filters that check only for ASCII characters. For |
+instance, a 2-byte encoded line feed (LF) would not be caught by a |
+line counter that counts only 0x0a bytes, but it would still be |
+processed as a line feed by an unsafe UTF-8 decoder later in the |
+pipeline. From a security point of view, ASCII compatibility of UTF-8 |
+sequences means also, that ASCII characters are *only* allowed to be |
+represented by ASCII bytes in the range 0x00-0x7f. To ensure this |
+aspect of ASCII compatibility, use only "safe UTF-8 decoders" that |
+reject overlong UTF-8 sequences for which a shorter encoding exists. |
+ |
+4.1 Examples of an overlong ASCII character |
+ |
+With a safe UTF-8 decoder, all of the following five overlong |
+representations of the ASCII character slash ("/") should be rejected |
+like a malformed UTF-8 sequence, for instance by substituting it with |
+a replacement character. If you see a slash below, you do not have a |
+safe UTF-8 decoder! |
+ |
+4.1.1 U+002F = c0 af = "" |
+4.1.2 U+002F = e0 80 af = "" |
+4.1.3 U+002F = f0 80 80 af = "" |
+4.1.4 U+002F = f8 80 80 80 af = "" |
+4.1.5 U+002F = fc 80 80 80 80 af = "" |
+ |
+4.2 Maximum overlong sequences |
+ |
+Below you see the highest Unicode value that is still resulting in an |
+overlong sequence if represented with the given number of bytes. This |
+is a boundary test for safe UTF-8 decoders. All five characters should |
+be rejected like malformed UTF-8 sequences. |
+ |
+4.2.1 U-0000007F = c1 bf = "" |
+4.2.2 U-000007FF = e0 9f bf = "" |
+4.2.3 U-0000FFFF = f0 8f bf bf = "" |
+4.2.4 U-001FFFFF = f8 87 bf bf bf = "" |
+4.2.5 U-03FFFFFF = fc 83 bf bf bf bf = "" |
+ |
+4.3 Overlong representation of the NUL character |
+ |
+The following five sequences should also be rejected like malformed |
+UTF-8 sequences and should not be treated like the ASCII NUL |
+character. |
+ |
+4.3.1 U+0000 = c0 80 = "" |
+4.3.2 U+0000 = e0 80 80 = "" |
+4.3.3 U+0000 = f0 80 80 80 = "" |
+4.3.4 U+0000 = f8 80 80 80 80 = "" |
+4.3.5 U+0000 = fc 80 80 80 80 80 = "" |
+ |
+5 Illegal code positions |
+ |
+The following UTF-8 sequences should be rejected like malformed |
+sequences, because they never represent valid ISO 10646 characters and |
+a UTF-8 decoder that accepts them might introduce security problems |
+comparable to overlong UTF-8 sequences. |
+ |
+5.1 Single UTF-16 surrogates |
+ |
+5.1.1 U+D800 = ed a0 80 = "" |
+5.1.2 U+DB7F = ed ad bf = "" |
+5.1.3 U+DB80 = ed ae 80 = "" |
+5.1.4 U+DBFF = ed af bf = "" |
+5.1.5 U+DC00 = ed b0 80 = "" |
+5.1.6 U+DF80 = ed be 80 = "" |
+5.1.7 U+DFFF = ed bf bf = "" |
+ |
+5.2 Paired UTF-16 surrogates |
+ |
+5.2.1 U+D800 U+DC00 = ed a0 80 ed b0 80 = "" |
+5.2.2 U+D800 U+DFFF = ed a0 80 ed bf bf = "" |
+5.2.3 U+DB7F U+DC00 = ed ad bf ed b0 80 = "" |
+5.2.4 U+DB7F U+DFFF = ed ad bf ed bf bf = "" |
+5.2.5 U+DB80 U+DC00 = ed ae 80 ed b0 80 = "" |
+5.2.6 U+DB80 U+DFFF = ed ae 80 ed bf bf = "" |
+5.2.7 U+DBFF U+DC00 = ed af bf ed b0 80 = "" |
+5.2.8 U+DBFF U+DFFF = ed af bf ed bf bf = "" |
+ |
+5.3 Noncharacter code positions |
+ |
+The following "noncharacters" are "reserved for internal use" by |
+applications, and according to older versions of the Unicode Standard |
+"should never be interchanged". Unicode Corrigendum #9 dropped the |
+latter restriction. Nevertheless, their presence in incoming UTF-8 data |
+can remain a potential security risk, depending on what use is made of |
+these codes subsequently. Examples of such internal use: |
+ |
+ - Some file APIs with 16-bit characters may use the integer value -1 |
+ = U+FFFF to signal an end-of-file (EOF) or error condition. |
+ |
+ - In some UTF-16 receivers, code point U+FFFE might trigger a |
+ byte-swap operation (to convert between UTF-16LE and UTF-16BE). |
+ |
+With such internal use of noncharacters, it may be desirable and safer |
+to block those code points in UTF-8 decoders, as they should never |
+occur legitimately in incoming UTF-8 data, and could trigger unsafe |
+behaviour in subsequent processing. |
+ |
+Particularly problematic noncharacters in 16-bit applications: |
+ |
+5.3.1 U+FFFE = ef bf be = "￾" |
+5.3.2 U+FFFF = ef bf bf = "￿" |
+ |
+Other noncharacters: |
+ |
+5.3.3 U+FDD0 .. U+FDEF = "﷐﷑﷒﷓﷔﷕﷖﷗﷘﷙﷚﷛﷜﷝﷞﷟﷠﷡﷢﷣﷤﷥﷦﷧﷨﷩﷪﷫﷬﷭﷮﷯"|
+ |
+5.3.4 U+nFFFE U+nFFFF (for n = 1..10) |
+ |
+ "🿾🿿𯿾𯿿𿿾𿿿񏿾񏿿񟿾񟿿񯿾񯿿񿿾񿿿򏿾򏿿 |
+ 򟿾򟿿򯿾򯿿򿿾򿿿󏿾󏿿󟿾󟿿󯿾󯿿󿿾󿿿􏿾􏿿" |
+ |
+THE END |
diff --git a/tests/testsuite/gbsReference.imp b/tests/testsuite/gbsReference.imp
new file mode 100644
index 0000000..ab5bca1
--- /dev/null
+++ b/tests/testsuite/gbsReference.imp
@@ -0,0 +1,42 @@
+$$$Chapter 1
+Text of chapter 1.
+$$$/Chapter 2
+Text
+of chapter 2.
+$$$/Chapter 3/
+Text
+of
+chapter
+3.
+$$$Chapter 4/
+Text of chapter 4.
+$$$/Chapter 4/Section 1
+Text of section 1 in chapter 4.
+$$$Chapter 5
+Text of chapter 5.
+$$$/Chapter 5/Section 1/
+Text of section 1 in chapter 5.
+$$$Chapter 5/Section 2/
+Text of section 2 in chapter 5.
+$$$Chapter 6
+Text of chapter 6.
+$$$Chapter 6/Section 1
+Text of section 1 in chapter 6.
+$$$Chapter 6/Section 2
+Text of section 2 in chapter 6.
+$$$Chapter 6/Section 3
+Text of section 3 in chapter 6.
+$$$Chapter 7
+Text of chapter 7.
+$$$Chapter 7/Section 1
+Text of section 1 in chapter 7.
+$$$Chapter 7/Section 1/Subsection 1
+Text of subsection 1 in section 1 of chapter 7.
+$$$Chapter 7/Section 1/Subsection 1/Paragraph 1
+Text of paragraph 1 in subsection 1 of section 1 in chapter 7.
+$$$Chapter 7/Section 1/Subsection 1/Paragraph 1/Sentence 1
+Text of sentence 1 in paragraph 1 of subsection 1 in section 1 of chapter 7.
+$$$Chapter 8
+Text of chapter 8.
+
+
diff --git a/tests/testsuite/gbs_basic.good b/tests/testsuite/gbs_basic.good
new file mode 100644
index 0000000..9b77976
--- /dev/null
+++ b/tests/testsuite/gbs_basic.good
@@ -0,0 +1,66 @@
+Chapter 1
+/Chapter 2
+/Chapter 3/
+Chapter 4/
+/Chapter 4/Section 1
+Chapter 5
+/Chapter 5/Section 1/
+Chapter 5/Section 2/
+Chapter 6
+Chapter 6/Section 1
+Chapter 6/Section 2
+Chapter 6/Section 3
+Chapter 7
+Chapter 7/Section 1
+Chapter 7/Section 1/Subsection 1
+Chapter 7/Section 1/Subsection 1/Paragraph 1
+Chapter 7/Section 1/Subsection 1/Paragraph 1/Sentence 1
+Chapter 8
+
+-- Plain output
+/Chapter 7: Text of chapter 7.
+
+-- RTF output
+{\rtf1\ansi{\fonttbl{\f0\froman\fcharset0\fprq2 Times New Roman;}{\f1\fdecor\fprq2 Gentium;}{\f7\froman\fcharset2\fprq2 Symbol;}}/Chapter 8: {\f1 Text of chapter 8.}\par
+}
+
+-- imp dump
+$$$
+
+$$$/Chapter 1
+Text of chapter 1.
+$$$/Chapter 2
+Text of chapter 2.
+$$$/Chapter 3
+Text of chapter 3.
+$$$/Chapter 4
+Text of chapter 4.
+$$$/Chapter 4/Section 1
+Text of section 1 in chapter 4.
+$$$/Chapter 5
+Text of chapter 5.
+$$$/Chapter 5/Section 1
+Text of section 1 in chapter 5.
+$$$/Chapter 5/Section 2
+Text of section 2 in chapter 5.
+$$$/Chapter 6
+Text of chapter 6.
+$$$/Chapter 6/Section 1
+Text of section 1 in chapter 6.
+$$$/Chapter 6/Section 2
+Text of section 2 in chapter 6.
+$$$/Chapter 6/Section 3
+Text of section 3 in chapter 6.
+$$$/Chapter 7
+Text of chapter 7.
+$$$/Chapter 7/Section 1
+Text of section 1 in chapter 7.
+$$$/Chapter 7/Section 1/Subsection 1
+Text of subsection 1 in section 1 of chapter 7.
+$$$/Chapter 7/Section 1/Subsection 1/Paragraph 1
+Text of paragraph 1 in subsection 1 of section 1 in chapter 7.
+$$$/Chapter 7/Section 1/Subsection 1/Paragraph 1/Sentence 1
+Text of sentence 1 in paragraph 1 of subsection 1 in section 1 of chapter 7.
+$$$/Chapter 8
+Text of chapter 8.
+
diff --git a/tests/testsuite/gbs_basic.sh b/tests/testsuite/gbs_basic.sh
new file mode 100755
index 0000000..689e87d
--- /dev/null
+++ b/tests/testsuite/gbs_basic.sh
@@ -0,0 +1,30 @@
+#!/bin/sh
+
+rm -rf tmp/gbs_basic/
+mkdir -p tmp/gbs_basic/mods.d
+mkdir -p tmp/gbs_basic/modules
+
+cat > tmp/gbs_basic/mods.d/gbsreference.conf <<!
+[GBSReference]
+DataPath=./modules/gbsreference
+ModDrv=RawGenBook
+Encoding=UTF-8
+SourceType=OSIS
+Lang=en
+Feature=StrongsNumbers
+!
+
+../../utilities/imp2gbs gbsReference.imp -o tmp/gbs_basic/modules/gbsreference 2>&1 | grep -v \$Rev
+
+cd tmp/gbs_basic
+#../../../gbstest GBSReference
+
+echo
+echo "-- Plain output"
+../../../../utilities/diatheke/diatheke -b GBSReference -f plain -k "Chapter 7" | grep -v GBSReference
+echo
+echo "-- RTF output"
+../../../../utilities/diatheke/diatheke -b GBSReference -f RTF -k "Chapter 8" | grep -v GBSReference
+echo
+echo "-- imp dump"
+../../../../utilities/mod2imp GBSReference
diff --git a/tests/testsuite/greekaccents.good b/tests/testsuite/greekaccents.good
new file mode 100644
index 0000000..a39dc3b
--- /dev/null
+++ b/tests/testsuite/greekaccents.good
@@ -0,0 +1,7 @@
+Και καθως Μωυσης υψωσεν τον οφιν εν τη ερημω, ουτως υψωθηναι δει τον υιον του ανθρωπου,
+ινα πας ο πιστευων ⸂εν αυτω⸃ ⸆ εχη ζωην αιωνιον.
+ουτως γαρ ηγαπησεν ο θεος τον κοσμον, ωστε τον υιον ⸆ τον μονογενη εδωκεν, ινα πας ο πιστευων εις αυτον μη αποληται αλλ εχη ζωην αιωνιον.
+ου γαρ απεστειλεν ο θεος τον υιον ⸆ εις τον κοσμον ινα κρινη τον κοσμον, αλλ ινα σωθη ο κοσμος δι αυτου.
+ο πιστευων εις αυτον ου κρινεται· ο °δε μη πιστευων ηδη κεκριται, οτι μη πεπιστευκεν εις το ονομα του μονογενους υιου του θεου.
+αυτη δε εστιν η κρισις οτι °το φως εληλυθεν εις τον κοσμον και ⸉ηγαπησαν οι ανθρωποι μαλλον το σκοτος⸊ η το φως· ην γαρ ⸉¹αυτων πονηρα⸊ τα εργα.
+
diff --git a/tests/testsuite/greekaccents.sh b/tests/testsuite/greekaccents.sh
new file mode 100755
index 0000000..f0def67
--- /dev/null
+++ b/tests/testsuite/greekaccents.sh
@@ -0,0 +1,8 @@
+#/bin/sh
+
+# there is an iteration value as the last parameter and can be used
+# for testing speed. Set to 999999 my results on my Dell Precision 5510
+# real 0m8.952s
+# user 0m8.939s
+# sys 0m0.004s
+../utf8norm -ga 999 < greekaccents.txt
diff --git a/tests/testsuite/greekaccents.txt b/tests/testsuite/greekaccents.txt
new file mode 100644
index 0000000..e8b3de8
--- /dev/null
+++ b/tests/testsuite/greekaccents.txt
@@ -0,0 +1,7 @@
+Καὶ καθὼς Μωϋσῆς ὕψωσεν τὸν ὄφιν ἐν τῇ ἐρήμῳ, οὕτως ὑψωθῆναι δεῖ τὸν υἱὸν τοῦ ἀνθρώπου,
+ἵνα πᾶς ὁ πιστεύων ⸂ἐν αὐτῷ⸃ ⸆ ἔχῃ ζωὴν αἰώνιον.
+οὕτως γὰρ ἠγάπησεν ὁ θεὸς τὸν κόσμον, ὥστε τὸν υἱὸν ⸆ τὸν μονογενῆ ἔδωκεν, ἵνα πᾶς ὁ πιστεύων εἰς αὐτὸν μὴ ἀπόληται ἀλλ᾿ ἔχῃ ζωὴν αἰώνιον.
+οὐ γὰρ ἀπέστειλεν ὁ θεὸς τὸν υἱὸν ⸆ εἰς τὸν κόσμον ἵνα κρίνῃ τὸν κόσμον, ἀλλ᾿ ἵνα σωθῇ ὁ κόσμος δι᾿ αὐτοῦ.
+ὁ πιστεύων εἰς αὐτὸν οὐ κρίνεται· ὁ °δὲ μὴ πιστεύων ἤδη κέκριται, ὅτι μὴ πεπίστευκεν εἰς τὸ ὄνομα τοῦ μονογενοῦς υἱοῦ τοῦ θεοῦ.
+αὕτη δέ ἐστιν ἡ κρίσις ὅτι °τὸ φῶς ἐλήλυθεν εἰς τὸν κόσμον καὶ ⸉ἠγάπησαν οἱ ἄνθρωποι μᾶλλον τὸ σκότος⸊ ἢ τὸ φῶς· ἦν γὰρ ⸉¹αὐτῶν πονηρὰ⸊ τὰ ἔργα.
+
diff --git a/tests/testsuite/ldr12n.good b/tests/testsuite/ldr12n.good
new file mode 100644
index 0000000..1906020
--- /dev/null
+++ b/tests/testsuite/ldr12n.good
@@ -0,0 +1,24 @@
+0001
+0002
+0003
+4
+0005
+0006
+0001
+0002
+0003
+4
+0005
+0006
+0001: Body of 1
+0002: Body of 2
+0003: Body of 3
+0005: Body of 5
+0006: Body of 6
+4: Body of 4
+00001: Body of 1
+00002: Body of 2
+00003: Body of 3
+00004: Body of 4
+00005: Body of 5
+00006: Body of 6
diff --git a/tests/testsuite/ldr12n.imp b/tests/testsuite/ldr12n.imp
new file mode 100644
index 0000000..17cecfc
--- /dev/null
+++ b/tests/testsuite/ldr12n.imp
@@ -0,0 +1,12 @@
+$$$0001
+Body of 1
+$$$0002
+Body of 2
+$$$0003
+Body of 3
+$$$4
+Body of 4
+$$$0005
+Body of 5
+$$$0006
+Body of 6
diff --git a/tests/testsuite/ldr12n.sh b/tests/testsuite/ldr12n.sh
new file mode 100755
index 0000000..a406d0a
--- /dev/null
+++ b/tests/testsuite/ldr12n.sh
@@ -0,0 +1,31 @@
+#!/bin/sh
+# Lexicon / Dictionary regularization tests to make sure we pad and lookup correctly
+
+rm -rf tmp/ldr12n/
+mkdir -p tmp/ldr12n/mods.d
+mkdir -p tmp/ldr12n/modules
+
+cat > tmp/ldr12n/mods.d/ldr12n.conf <<!
+[ldr12n]
+DataPath=./modules/ldr12n
+ModDrv=RawLD
+Encoding=UTF-8
+SourceType=Plain
+Lang=en
+StrongsPadding=false
+!
+
+cat > tmp/ldr12n/mods.d/ldr12np.conf <<!
+[ldr12np]
+DataPath=./modules/ldr12np
+ModDrv=RawLD
+Encoding=UTF-8
+SourceType=Plain
+Lang=en
+StrongsPadding=true
+!
+
+../../utilities/imp2ld ldr12n.imp -P -o tmp/ldr12n/modules/ldr12n 2>&1 | grep -v \$Rev
+../../utilities/imp2ld ldr12n.imp -o tmp/ldr12n/modules/ldr12np 2>&1 | grep -v \$Rev
+
+cd tmp/ldr12n && ../../../ldtest ldr12n && ../../../ldtest ldr12np
diff --git a/tests/testsuite/listtest.sh b/tests/testsuite/listtest.sh
index 855d161..0666979 100755
--- a/tests/testsuite/listtest.sh
+++ b/tests/testsuite/listtest.sh
@@ -1,7 +1,7 @@
#!/bin/sh
#******************************************************************************
#
-# $Id: swmgr.h 2321 2009-04-13 01:17:00Z scribe $
+# $Id: listtest.sh 3063 2014-03-04 13:04:11Z chrislit $
#
# Copyright 1998-2009 CrossWire Bible Society (http://www.crosswire.org)
# CrossWire Bible Society
diff --git a/tests/testsuite/osis.good b/tests/testsuite/osis.good
deleted file mode 100644
index 508e148..0000000
--- a/tests/testsuite/osis.good
+++ /dev/null
@@ -1,102 +0,0 @@
-SUCCESS: ../../utilities/osis2mod: has finished its work and will now rest
-Key:
-Psalms 3:1
--------
-Preverse Header 0:
-Raw:
-<div sID="gen12" type="section"/> <title canonical="true" type="psalm">A Psalm of David, when he fled from Absalom his son.</title> <div sID="gen13" type="x-p"/> <lg sID="gen14"/>
--------
-Rendered Header:
- <h3>A Psalm of David, when he fled from Absalom his son.</h3>
-
-<br />
-
--------
-CSS:
- .divineName { font-variant: small-caps; }
- .wordsOfJesus { color: red; }
- .transChangeSupplied { font-style: italic; }
- .overline { text-decoration: overline; }
- .indent1 { margin-left: 10px }
- .indent2 { margin-left: 20px }
- .indent3 { margin-left: 30px }
- .indent4 { margin-left: 40px }
-
--------
-RenderText:
- <span class="line indent0"><span class="divineName">Lord</span>, how are they increased that trouble me!</span><br />
-<span class="line indent0">many <span class="transChangeSupplied">are</span> they that rise up against me.</span><br />
-
--------
--------
-
-Key:
-Mark 1:14
--------
-Preverse Header 0:
-Raw:
-<div sID="gen22" type="section"/> <title>The Beginning of the Ministry of Jesus</title> <title type="parallel">(<reference osisRef="Matt.4.12-Matt.4.22">Matt 4:12–22</reference>; <reference osisRef="Luke.4.14">Luke 4:14</reference>, <reference osisRef="Luke.4.15">15</reference>; <reference osisRef="Luke.5.1-Luke.5.11">5:1-11</reference>) </title> <div sID="gen23" type="x-p"/>
--------
-Rendered Header:
- <h3>The Beginning of the Ministry of Jesus</h3>
-
-<h3>(<a href="passagestudy.jsp?action=showRef&type=scripRef&value=Matt.4.12-Matt.4.22&module=">Matt 4:12–22</a>; <a href="passagestudy.jsp?action=showRef&type=scripRef&value=Luke.4.14&module=">Luke 4:14</a>, <a href="passagestudy.jsp?action=showRef&type=scripRef&value=Luke.4.15&module=">15</a>; <a href="passagestudy.jsp?action=showRef&type=scripRef&value=Luke.5.1-Luke.5.11&module=">5:1-11</a>) </h3>
-
-<br />
-
--------
-CSS:
- .divineName { font-variant: small-caps; }
- .wordsOfJesus { color: red; }
- .transChangeSupplied { font-style: italic; }
- .overline { text-decoration: overline; }
- .indent1 { margin-left: 10px }
- .indent2 { margin-left: 20px }
- .indent3 { margin-left: 30px }
- .indent4 { margin-left: 40px }
-
--------
-RenderText:
- Now after that John was put in prison, Jesus came into Galilee, preaching the gospel of the kingdom of God,
--------
--------
-
-
-Whitespace tests around headings:
-
-
- <h1 class="bookHeader">Old Testament</h1>
-
- <h1 class="bookHeader">THE FIRST BOOK OF MOSES CALLED GENESIS</h1>
-
- <h1 class="bookHeader">Introduction and Outline</h1>
-
-<br />
-This is the <b>Book of Genesis</b>, the <i>first</i> book in the Bible. It may be outlined as follows: <br />
-<br />
-<ul>
- <li><sup>1</i>Creation of Heaven and Earth, 1:1-2:4a</li>
- <li><sup>2</i>Creation of Man and Woman, 2:4b-25</li>
- <li><sub>3</sub>Fall, 3:1-24</li>
- <li>...</li>
-</ul>
- <br />
-Tables work like this: <table><tbody>
- <tr> <td><b>Column 1 Label</b></td> <td><b>Column 2 Label</b></td> </tr>
- <tr> <td>Column 1, Row 1</td> <td>Column 2, Row 1</td> </tr>
- <tr> <td>Column 1, Row 2</td> <td>Column 2, Row 2</td> </tr>
- </tbody></table>
-<br />
-
- <h2 class="chapterHeader">From Creation to Abraham (1:1–11:9)</h2>
-
-
- <h3>Creation of the Heavens and the Earth</h3>
-
-<br />
-
-[ Genesis 1:1 ] In the beginning God created the heaven and the earth. <br />
-
-<br />
-
-[ Genesis 1:2 ] Text of verse 2.
diff --git a/tests/testsuite/osis.sh b/tests/testsuite/osis.sh
deleted file mode 100755
index 689e64d..0000000
--- a/tests/testsuite/osis.sh
+++ /dev/null
@@ -1,26 +0,0 @@
-#!/bin/sh
-
-rm -rf osis/
-mkdir -p osis/mods.d
-mkdir -p osis/modules
-
-cat > osis/mods.d/osisreference.conf <<!
-[OSISReference]
-DataPath=./modules/
-ModDrv=zText
-Encoding=UTF-8
-BlockType=BOOK
-CompressType=ZIP
-SourceType=OSIS
-Lang=en
-GlobalOptionFilter=OSISStrongs
-GlobalOptionFilter=OSISMorph
-GlobalOptionFilter=OSISFootnotes
-GlobalOptionFilter=OSISHeadings
-GlobalOptionFilter=OSISRedLetterWords
-Feature=StrongsNumbers
-!
-
-../../utilities/osis2mod osis/modules/ osisReference.xml -z 2>&1 | grep -v \$Rev
-
-cd osis && ../../osistest OSISReference
diff --git a/tests/testsuite/osisReference.xml b/tests/testsuite/osisReference.xml
index d07f774..0ef86e5 100644
--- a/tests/testsuite/osisReference.xml
+++ b/tests/testsuite/osisReference.xml
@@ -141,6 +141,16 @@
</div>
<div type="bookGroup">
<title>New Testament</title>
+ <div type="book" osisID="Matt">
+ <chapter osisID="Matt.2"/>
+ <verse osisID="Matt.2.5">
+ <w lemma="strong:G3004">They said</w> to him, <q level="1" marker="“" sID="q.5787"/><note n="A" osisID="Matt.2.5.xref.A" type="crossReference"><reference osisRef="John.7.42">John 7:42</reference></note><w lemma="strong:G965">In Bethlehem</w> <w lemma="strong:G2453">of Judea</w>; <w lemma="strong:G3779">for this</w> <w lemma="strong:G3704">is what</w> <w lemma="strong:G1125">has been written</w> <note n="1" osisID="Matt.2.5.note.1" type="explanation">Or <hi type="italic">through</hi></note><w lemma="strong:G4396">by the prophet</w>:
+ </verse>
+ <verse osisID="Matt.2.6">
+ <lg sID="lg.300"/><l sID="l.20649"/><q level="2" marker="‘" sID="q.5788"/><note n="A" osisID="Matt.2.6.xref.A" type="crossReference"><reference osisRef="Mic.5.2">Mic 5:2</reference>; <reference osisRef="John.7.42">John 7:42</reference></note><hi type="small-caps"><w lemma="strong:G965">And you, Bethlehem</w>, <w lemma="strong:G1093">land</w> <w lemma="strong:G2455">of Judah</w></hi>, <l eID="l.20649"/><l sID="l.20650"/><hi type="small-caps"><w lemma="strong:G3760">Are by no</w> <w lemma="strong:G3760">means</w> <w lemma="strong:G1646">least</w> <w lemma="strong:G1722">among</w> <w lemma="strong:G2233">the leaders</w> <w lemma="strong:G2455">of Judah</w></hi>; <l eID="l.20650"/><l sID="l.20651"/><hi type="small-caps"><w lemma="strong:G1831">For out of you shall come</w> <w lemma="strong:G1831">forth</w> <w lemma="strong:G2233">a Ruler</w></hi> <l eID="l.20651"/><l sID="l.20652"/><hi type="small-caps"><w lemma="strong:G3748">Who</w> will</hi> <note n="B" osisID="Matt.2.6.xref.B" type="crossReference"><reference osisRef="John.21.16">John 21:16</reference></note><hi type="small-caps"><w lemma="strong:G4165">shepherd</w> <w lemma="strong:G2992">My people</w> <w lemma="strong:G2474">Israel</w></hi>.<q eID="q.5788" level="2" marker="’"/><q eID="q.5787" level="1" marker="”"/> <lb type="x-end-paragraph"/>
+ </verse>
+ </chapter>
+ </div>
<div type="book" osisID="Mark">
<title type="main">THE GOSPEL ACCORDING TO <abbr expansion="Saint">ST.</abbr> MARK</title>
<chapter sID="Mark.1" osisID="Mark.1"/>
@@ -178,6 +188,11 @@ And He was in the wilderness forty days being tempted by Satan; and He was with
</div>
<chapter eID="Mark.1"/>
</div>
+ <div type="book" osisID="Acts">
+ <chapter sID="Acts.2" osisID="Acts.2"/>
+ <verse sID="Acts.2.19" osisID="Acts.2.19"/><l level="1">‘<hi type="small-caps"><w lemma="strong:G1325">And I will grant</w> <w lemma="strong:G5059">wonders</w> <w lemma="strong:G3772">in the sky</w> <w lemma="strong:G0507">above</w></hi></l><l level="1"> <hi type="small-caps"><w lemma="strong:G4592">And signs</w> <w lemma="strong:G1093">on the earth</w> <w lemma="strong:G2736">below</w></hi>,</l><l level="1"> <hi type="small-caps"><w lemma="strong:G0129">Blood</w>, <w lemma="strong:G4442">and fire</w>, <w lemma="strong:G0822">and vapor</w> <w lemma="strong:G2586">of smoke</w></hi>.</l><verse eID="Acts.2.19"/>
+ <verse sID="Acts.2.20" osisID="Acts.2.20"/><l level="1">‘<hi type="small-caps"><w lemma="strong:G2246">The sun</w> <w lemma="strong:G3344">will be turned</w> <w lemma="strong:G4655">into darkness</w></hi></l><l level="1"> <hi type="small-caps"><w lemma="strong:G4582">And the moon</w> <w lemma="strong:G0129">into blood</w></hi>,</l><l level="1"> <hi type="small-caps"><w lemma="strong:G4250">Before</w> <w lemma="strong:G3173">the great</w> <w lemma="strong:G2016">and glorious</w> <w lemma="strong:G2250">day</w> <w lemma="strong:G2962">of the Lord</w> <w lemma="strong:G2064">shall come</w></hi>.</l><verse eID="Acts.2.20"/>
+ <chapter eID="Acts.2"/>
</div>
</osisText>
</osis>
diff --git a/tests/testsuite/osis_basic.good b/tests/testsuite/osis_basic.good
new file mode 100644
index 0000000..ab3123f
--- /dev/null
+++ b/tests/testsuite/osis_basic.good
@@ -0,0 +1,186 @@
+SUCCESS: ../../utilities/osis2mod: has finished its work and will now rest
+Key:
+Psalms 3:1
+-------
+Preverse Header 0:
+Raw:
+<div sID="gen12" type="section"/> <title canonical="true" type="psalm">A Psalm of David, when he fled from Absalom his son.</title> <div sID="gen13" type="x-p"/> <lg sID="gen14"/>
+-------
+Rendered Header:
+ <h3 class="title psalm canonical">A Psalm of David, when he fled from Absalom his son.</h3>
+
+<br />
+
+-------
+CSS:
+ .divineName { font-variant: small-caps; }
+ .wordsOfJesus { color: red; }
+ .transChange { font-style: italic; }
+ .transChange.transChange-supplied { font-style: italic; }
+ .transChange.transChange-added { font-style: italic; }
+ .transChange.transChange-tenseChange::before { content: '*'; }
+ .transChange.transChange-tenseChange { font-style: normal; }
+ .transChange:lang(zh) { font-style: normal; text-decoration: dotted underline; }
+ .overline { text-decoration: overline; }
+ .indent1 { margin-left: 1em; }
+ .indent2 { margin-left: 2em; }
+ .indent3 { margin-left: 3em; }
+ .indent4 { margin-left: 4em; }
+ abbr { &:hover{ &:before{ content: attr(title); } } }
+ .small-caps { font-variant: small-caps; }
+ .selah { text-align: right; width: 50%; margin: 0; padding: 0; }
+ .acrostic { text-align: center; }
+ .colophon {font-style: italic; font-size: small; display: block; }
+ .rdg { font-style: italic; }
+ .catchWord {font-style: bold; }
+ .x-p-indent {text-indent: 1em; }
+
+-------
+RenderText:
+ <span class="line indent0"><span class="divineName">Lord</span>, how are they increased that trouble me!</span><br />
+<span class="line indent0">many <span class="transChange transChange-added">are</span> they that rise up against me.</span><br />
+
+-------
+-------
+
+Key:
+Matthew 2:6
+-------
+Preverse Header 0:
+Raw:
+<div></div>
+-------
+Rendered Header:
+<div class=""></div>
+-------
+CSS:
+ .divineName { font-variant: small-caps; }
+ .wordsOfJesus { color: red; }
+ .transChange { font-style: italic; }
+ .transChange.transChange-supplied { font-style: italic; }
+ .transChange.transChange-added { font-style: italic; }
+ .transChange.transChange-tenseChange::before { content: '*'; }
+ .transChange.transChange-tenseChange { font-style: normal; }
+ .transChange:lang(zh) { font-style: normal; text-decoration: dotted underline; }
+ .overline { text-decoration: overline; }
+ .indent1 { margin-left: 1em; }
+ .indent2 { margin-left: 2em; }
+ .indent3 { margin-left: 3em; }
+ .indent4 { margin-left: 4em; }
+ abbr { &:hover{ &:before{ content: attr(title); } } }
+ .small-caps { font-variant: small-caps; }
+ .selah { text-align: right; width: 50%; margin: 0; padding: 0; }
+ .acrostic { text-align: center; }
+ .colophon {font-style: italic; font-size: small; display: block; }
+ .rdg { font-style: italic; }
+ .catchWord {font-style: bold; }
+ .x-p-indent {text-indent: 1em; }
+
+-------
+RenderText:
+<span class="line indent0">‘<a class=" crossReference" href="passagestudy.jsp?action=showNote&type=x&value=1&module=OSISReference&passage=Matthew+2%3A6"><small><sup class="x">*x</sup></small></a><span class="small-caps">And you, Bethlehem, land of Judah</span>, </span><br />
+<span class="line indent0"><span class="small-caps">Are by no means least among the leaders of Judah</span>; </span><br />
+<span class="line indent0"><span class="small-caps">For out of you shall come forth a Ruler</span> </span><br />
+<span class="line indent0"><span class="small-caps">Who will</span> <a class=" crossReference" href="passagestudy.jsp?action=showNote&type=x&value=2&module=OSISReference&passage=Matthew+2%3A6"><small><sup class="x">*x</sup></small></a><span class="small-caps">shepherd My people Israel</span>.’” <br />
+
+-------
+-------
+
+Key:
+Mark 1:14
+-------
+Preverse Header 0:
+Raw:
+<div sID="gen25" type="section"/> <title>The Beginning of the Ministry of Jesus</title> <title type="parallel">(<reference osisRef="Matt.4.12-Matt.4.22">Matt 4:12–22</reference>; <reference osisRef="Luke.4.14">Luke 4:14</reference>, <reference osisRef="Luke.4.15">15</reference>; <reference osisRef="Luke.5.1-Luke.5.11">5:1-11</reference>) </title> <div sID="gen26" type="x-p"/>
+-------
+Rendered Header:
+ <h3 class="title">The Beginning of the Ministry of Jesus</h3>
+
+<h3 class="title parallel">(<a class="" href="passagestudy.jsp?action=showRef&type=scripRef&value=Matt.4.12-Matt.4.22&module=">Matt 4:12–22</a>; <a class="" href="passagestudy.jsp?action=showRef&type=scripRef&value=Luke.4.14&module=">Luke 4:14</a>, <a class="" href="passagestudy.jsp?action=showRef&type=scripRef&value=Luke.4.15&module=">15</a>; <a class="" href="passagestudy.jsp?action=showRef&type=scripRef&value=Luke.5.1-Luke.5.11&module=">5:1-11</a>) </h3>
+
+<br />
+
+-------
+CSS:
+ .divineName { font-variant: small-caps; }
+ .wordsOfJesus { color: red; }
+ .transChange { font-style: italic; }
+ .transChange.transChange-supplied { font-style: italic; }
+ .transChange.transChange-added { font-style: italic; }
+ .transChange.transChange-tenseChange::before { content: '*'; }
+ .transChange.transChange-tenseChange { font-style: normal; }
+ .transChange:lang(zh) { font-style: normal; text-decoration: dotted underline; }
+ .overline { text-decoration: overline; }
+ .indent1 { margin-left: 1em; }
+ .indent2 { margin-left: 2em; }
+ .indent3 { margin-left: 3em; }
+ .indent4 { margin-left: 4em; }
+ abbr { &:hover{ &:before{ content: attr(title); } } }
+ .small-caps { font-variant: small-caps; }
+ .selah { text-align: right; width: 50%; margin: 0; padding: 0; }
+ .acrostic { text-align: center; }
+ .colophon {font-style: italic; font-size: small; display: block; }
+ .rdg { font-style: italic; }
+ .catchWord {font-style: bold; }
+ .x-p-indent {text-indent: 1em; }
+
+-------
+RenderText:
+ Now after that John was put in prison, Jesus came into Galilee, preaching the gospel of the kingdom of God,
+-------
+-------
+
+
+Whitespace tests around headings:
+
+
+ <h1 class="bookHeader">Old Testament</h1>
+
+ <h1 class="bookHeader main">THE FIRST BOOK OF MOSES CALLED GENESIS</h1>
+
+ <h1 class="bookHeader">Introduction and Outline</h1>
+
+<br />
+This is the <b>Book of Genesis</b>, the <i>first</i> book in the Bible. It may be outlined as follows: <br />
+<br />
+<ul>
+ <li><sup>1</sup>Creation of Heaven and Earth, 1:1-2:4a</li>
+ <li><sup>2</sup>Creation of Man and Woman, 2:4b-25</li>
+ <li><sub>3</sub>Fall, 3:1-24</li>
+ <li>...</li>
+</ul>
+ <br />
+Tables work like this: <table><tbody>
+ <tr> <td><b>Column 1 Label</b></td> <td><b>Column 2 Label</b></td> </tr>
+ <tr> <td>Column 1, Row 1</td> <td>Column 2, Row 1</td> </tr>
+ <tr> <td>Column 1, Row 2</td> <td>Column 2, Row 2</td> </tr>
+ </tbody></table>
+<br />
+
+ <h2 class="chapterHeader">From Creation to Abraham (1:1–11:9)</h2>
+
+
+ <h3 class="title">Creation of the Heavens and the Earth</h3>
+
+<br />
+
+[ Genesis 1:1 ] In the beginning God created the heaven and the earth. <br />
+
+<br />
+
+[ Genesis 1:2 ] Text of verse 2.
+
+-- Plain output
+Acts 2:19: ‘* And I will grant wonders in the sky above *
+* And signs on the earth below *,
+* Blood, and fire, and vapor of smoke *.
+
+Acts 2:20: ‘* The sun will be turned into darkness *
+* And the moon into blood *,
+* Before the great and glorious day of the Lord shall come *.
+
+
+-- RTF output
+{\rtf1\ansi{\fonttbl{\f0\froman\fcharset0\fprq2 Times New Roman;}{\f1\fdecor\fprq2 Gentium;}{\f7\froman\fcharset2\fprq2 Symbol;}}Acts 2:19: {\f1 ‘{\i1 {And I will grant} {wonders} {in the sky} {above}}{\par} {\i1 {And signs} {on the earth} {below}},{\par} {\i1 {Blood}, {and fire}, {and vapor} {of smoke}}.{\par}}\par
+Acts 2:20: {\f1 ‘{\i1 {The sun} {will be turned} {into darkness}}{\par} {\i1 {And the moon} {into blood}},{\par} {\i1 {Before} {the great} {and glorious} {day} {of the Lord} {shall come}}.{\par} }\par
+}
diff --git a/tests/testsuite/osis_basic.sh b/tests/testsuite/osis_basic.sh
new file mode 100755
index 0000000..2a9066b
--- /dev/null
+++ b/tests/testsuite/osis_basic.sh
@@ -0,0 +1,35 @@
+#!/bin/sh
+
+rm -rf tmp/osis_basic/
+mkdir -p tmp/osis_basic/mods.d
+mkdir -p tmp/osis_basic/modules
+
+cat > tmp/osis_basic/mods.d/osisreference.conf <<!
+[OSISReference]
+DataPath=./modules/
+ModDrv=zText
+Encoding=UTF-8
+BlockType=BOOK
+CompressType=ZIP
+SourceType=OSIS
+Lang=en
+GlobalOptionFilter=OSISLemma
+GlobalOptionFilter=OSISStrongs
+GlobalOptionFilter=OSISMorph
+GlobalOptionFilter=OSISFootnotes
+GlobalOptionFilter=OSISHeadings
+GlobalOptionFilter=OSISRedLetterWords
+Feature=StrongsNumbers
+!
+
+../../utilities/osis2mod tmp/osis_basic/modules/ osisReference.xml -z 2>&1 | grep -v \$Rev
+
+cd tmp/osis_basic
+../../../osistest OSISReference
+
+echo
+echo "-- Plain output"
+../../../../utilities/diatheke/diatheke -b OSISReference -f plain -k Acts 2:19-20 | grep -v OSISReference
+echo
+echo "-- RTF output"
+../../../../utilities/diatheke/diatheke -b OSISReference -f RTF -k Acts 2:19-20 | grep -v OSISReference
diff --git a/tests/testsuite/osis_mod2zmod.good b/tests/testsuite/osis_mod2zmod.good
new file mode 100644
index 0000000..36ec4fe
--- /dev/null
+++ b/tests/testsuite/osis_mod2zmod.good
@@ -0,0 +1,186 @@
+SUCCESS: ../../utilities/osis2mod: has finished its work and will now rest
+Key:
+Psalms 3:1
+-------
+Preverse Header 0:
+Raw:
+<div sID="gen12" type="section"/> <title canonical="true" type="psalm">A Psalm of David, when he fled from Absalom his son.</title> <div sID="gen13" type="x-p"/> <lg sID="gen14"/>
+-------
+Rendered Header:
+ <h3 class="title psalm canonical">A Psalm of David, when he fled from Absalom his son.</h3>
+
+<br />
+
+-------
+CSS:
+ .divineName { font-variant: small-caps; }
+ .wordsOfJesus { color: red; }
+ .transChange { font-style: italic; }
+ .transChange.transChange-supplied { font-style: italic; }
+ .transChange.transChange-added { font-style: italic; }
+ .transChange.transChange-tenseChange::before { content: '*'; }
+ .transChange.transChange-tenseChange { font-style: normal; }
+ .transChange:lang(zh) { font-style: normal; text-decoration: dotted underline; }
+ .overline { text-decoration: overline; }
+ .indent1 { margin-left: 1em; }
+ .indent2 { margin-left: 2em; }
+ .indent3 { margin-left: 3em; }
+ .indent4 { margin-left: 4em; }
+ abbr { &:hover{ &:before{ content: attr(title); } } }
+ .small-caps { font-variant: small-caps; }
+ .selah { text-align: right; width: 50%; margin: 0; padding: 0; }
+ .acrostic { text-align: center; }
+ .colophon {font-style: italic; font-size: small; display: block; }
+ .rdg { font-style: italic; }
+ .catchWord {font-style: bold; }
+ .x-p-indent {text-indent: 1em; }
+
+-------
+RenderText:
+ <span class="line indent0"><span class="divineName">Lord</span>, how are they increased that trouble me!</span><br />
+<span class="line indent0">many <span class="transChange transChange-added">are</span> they that rise up against me.</span><br />
+
+-------
+-------
+
+Key:
+Matthew 2:6
+-------
+Preverse Header 0:
+Raw:
+<div></div>
+-------
+Rendered Header:
+<div class=""></div>
+-------
+CSS:
+ .divineName { font-variant: small-caps; }
+ .wordsOfJesus { color: red; }
+ .transChange { font-style: italic; }
+ .transChange.transChange-supplied { font-style: italic; }
+ .transChange.transChange-added { font-style: italic; }
+ .transChange.transChange-tenseChange::before { content: '*'; }
+ .transChange.transChange-tenseChange { font-style: normal; }
+ .transChange:lang(zh) { font-style: normal; text-decoration: dotted underline; }
+ .overline { text-decoration: overline; }
+ .indent1 { margin-left: 1em; }
+ .indent2 { margin-left: 2em; }
+ .indent3 { margin-left: 3em; }
+ .indent4 { margin-left: 4em; }
+ abbr { &:hover{ &:before{ content: attr(title); } } }
+ .small-caps { font-variant: small-caps; }
+ .selah { text-align: right; width: 50%; margin: 0; padding: 0; }
+ .acrostic { text-align: center; }
+ .colophon {font-style: italic; font-size: small; display: block; }
+ .rdg { font-style: italic; }
+ .catchWord {font-style: bold; }
+ .x-p-indent {text-indent: 1em; }
+
+-------
+RenderText:
+<span class="line indent0">‘<a class=" crossReference" href="passagestudy.jsp?action=showNote&type=x&value=1&module=zOSISReference&passage=Matthew+2%3A6"><small><sup class="x">*x</sup></small></a><span class="small-caps">And you, Bethlehem, land of Judah</span>, </span><br />
+<span class="line indent0"><span class="small-caps">Are by no means least among the leaders of Judah</span>; </span><br />
+<span class="line indent0"><span class="small-caps">For out of you shall come forth a Ruler</span> </span><br />
+<span class="line indent0"><span class="small-caps">Who will</span> <a class=" crossReference" href="passagestudy.jsp?action=showNote&type=x&value=2&module=zOSISReference&passage=Matthew+2%3A6"><small><sup class="x">*x</sup></small></a><span class="small-caps">shepherd My people Israel</span>.’” <br />
+
+-------
+-------
+
+Key:
+Mark 1:14
+-------
+Preverse Header 0:
+Raw:
+<div sID="gen25" type="section"/> <title>The Beginning of the Ministry of Jesus</title> <title type="parallel">(<reference osisRef="Matt.4.12-Matt.4.22">Matt 4:12–22</reference>; <reference osisRef="Luke.4.14">Luke 4:14</reference>, <reference osisRef="Luke.4.15">15</reference>; <reference osisRef="Luke.5.1-Luke.5.11">5:1-11</reference>) </title> <div sID="gen26" type="x-p"/>
+-------
+Rendered Header:
+ <h3 class="title">The Beginning of the Ministry of Jesus</h3>
+
+<h3 class="title parallel">(<a class="" href="passagestudy.jsp?action=showRef&type=scripRef&value=Matt.4.12-Matt.4.22&module=">Matt 4:12–22</a>; <a class="" href="passagestudy.jsp?action=showRef&type=scripRef&value=Luke.4.14&module=">Luke 4:14</a>, <a class="" href="passagestudy.jsp?action=showRef&type=scripRef&value=Luke.4.15&module=">15</a>; <a class="" href="passagestudy.jsp?action=showRef&type=scripRef&value=Luke.5.1-Luke.5.11&module=">5:1-11</a>) </h3>
+
+<br />
+
+-------
+CSS:
+ .divineName { font-variant: small-caps; }
+ .wordsOfJesus { color: red; }
+ .transChange { font-style: italic; }
+ .transChange.transChange-supplied { font-style: italic; }
+ .transChange.transChange-added { font-style: italic; }
+ .transChange.transChange-tenseChange::before { content: '*'; }
+ .transChange.transChange-tenseChange { font-style: normal; }
+ .transChange:lang(zh) { font-style: normal; text-decoration: dotted underline; }
+ .overline { text-decoration: overline; }
+ .indent1 { margin-left: 1em; }
+ .indent2 { margin-left: 2em; }
+ .indent3 { margin-left: 3em; }
+ .indent4 { margin-left: 4em; }
+ abbr { &:hover{ &:before{ content: attr(title); } } }
+ .small-caps { font-variant: small-caps; }
+ .selah { text-align: right; width: 50%; margin: 0; padding: 0; }
+ .acrostic { text-align: center; }
+ .colophon {font-style: italic; font-size: small; display: block; }
+ .rdg { font-style: italic; }
+ .catchWord {font-style: bold; }
+ .x-p-indent {text-indent: 1em; }
+
+-------
+RenderText:
+ Now after that John was put in prison, Jesus came into Galilee, preaching the gospel of the kingdom of God,
+-------
+-------
+
+
+Whitespace tests around headings:
+
+
+ <h1 class="bookHeader">Old Testament</h1>
+
+ <h1 class="bookHeader main">THE FIRST BOOK OF MOSES CALLED GENESIS</h1>
+
+ <h1 class="bookHeader">Introduction and Outline</h1>
+
+<br />
+This is the <b>Book of Genesis</b>, the <i>first</i> book in the Bible. It may be outlined as follows: <br />
+<br />
+<ul>
+ <li><sup>1</sup>Creation of Heaven and Earth, 1:1-2:4a</li>
+ <li><sup>2</sup>Creation of Man and Woman, 2:4b-25</li>
+ <li><sub>3</sub>Fall, 3:1-24</li>
+ <li>...</li>
+</ul>
+ <br />
+Tables work like this: <table><tbody>
+ <tr> <td><b>Column 1 Label</b></td> <td><b>Column 2 Label</b></td> </tr>
+ <tr> <td>Column 1, Row 1</td> <td>Column 2, Row 1</td> </tr>
+ <tr> <td>Column 1, Row 2</td> <td>Column 2, Row 2</td> </tr>
+ </tbody></table>
+<br />
+
+ <h2 class="chapterHeader">From Creation to Abraham (1:1–11:9)</h2>
+
+
+ <h3 class="title">Creation of the Heavens and the Earth</h3>
+
+<br />
+
+[ Genesis 1:1 ] In the beginning God created the heaven and the earth. <br />
+
+<br />
+
+[ Genesis 1:2 ] Text of verse 2.
+
+-- Plain output
+Acts 2:19: ‘* And I will grant wonders in the sky above *
+* And signs on the earth below *,
+* Blood, and fire, and vapor of smoke *.
+
+Acts 2:20: ‘* The sun will be turned into darkness *
+* And the moon into blood *,
+* Before the great and glorious day of the Lord shall come *.
+
+
+-- RTF output
+{\rtf1\ansi{\fonttbl{\f0\froman\fcharset0\fprq2 Times New Roman;}{\f1\fdecor\fprq2 Gentium;}{\f7\froman\fcharset2\fprq2 Symbol;}}Acts 2:19: {\f1 ‘{\i1 {And I will grant} {wonders} {in the sky} {above}}{\par} {\i1 {And signs} {on the earth} {below}},{\par} {\i1 {Blood}, {and fire}, {and vapor} {of smoke}}.{\par}}\par
+Acts 2:20: {\f1 ‘{\i1 {The sun} {will be turned} {into darkness}}{\par} {\i1 {And the moon} {into blood}},{\par} {\i1 {Before} {the great} {and glorious} {day} {of the Lord} {shall come}}.{\par} }\par
+}
diff --git a/tests/testsuite/osis_mod2zmod.sh b/tests/testsuite/osis_mod2zmod.sh
new file mode 100755
index 0000000..52149fc
--- /dev/null
+++ b/tests/testsuite/osis_mod2zmod.sh
@@ -0,0 +1,60 @@
+#!/bin/sh
+
+rm -rf tmp/osis_mod2zmod/
+mkdir -p tmp/osis_mod2zmod/mods.d
+mkdir -p tmp/osis_mod2zmod/modules
+mkdir -p tmp/osis_mod2zmod/zmodules
+
+cat > tmp/osis_mod2zmod/mods.d/osisreference.conf <<!
+[OSISReference]
+DataPath=./modules/
+ModDrv=RawText
+#ModDrv=zText
+#CipherKey=abc123
+Encoding=UTF-8
+BlockType=BOOK
+CompressType=ZIP
+SourceType=OSIS
+Lang=en
+GlobalOptionFilter=OSISLemma
+GlobalOptionFilter=OSISStrongs
+GlobalOptionFilter=OSISMorph
+GlobalOptionFilter=OSISFootnotes
+GlobalOptionFilter=OSISHeadings
+GlobalOptionFilter=OSISRedLetterWords
+Feature=StrongsNumbers
+!
+
+cat > tmp/osis_mod2zmod/mods.d/zosisreference.conf <<!
+[zOSISReference]
+DataPath=./zmodules/
+#ModDrv=RawText
+ModDrv=zText
+CipherKey=abc123
+Encoding=UTF-8
+BlockType=BOOK
+CompressType=ZIP
+SourceType=OSIS
+Lang=en
+GlobalOptionFilter=OSISLemma
+GlobalOptionFilter=OSISStrongs
+GlobalOptionFilter=OSISMorph
+GlobalOptionFilter=OSISFootnotes
+GlobalOptionFilter=OSISHeadings
+GlobalOptionFilter=OSISRedLetterWords
+Feature=StrongsNumbers
+!
+
+../../utilities/osis2mod tmp/osis_mod2zmod/modules/ osisReference.xml 2>&1 | grep -v \$Rev
+
+sed 's/OSISReference/zOSISReference/' osis_basic.good > osis_mod2zmod.good
+cd tmp/osis_mod2zmod
+../../../../utilities/mod2zmod OSISReference zmodules/ 4 2 0 abc123 > /dev/null 2>&1
+../../../osistest zOSISReference
+
+echo
+echo "-- Plain output"
+../../../../utilities/diatheke/diatheke -b zOSISReference -f plain -k Acts 2:19-20 | grep -v zOSISReference
+echo
+echo "-- RTF output"
+../../../../utilities/diatheke/diatheke -b zOSISReference -f RTF -k Acts 2:19-20 | grep -v zOSISReference
diff --git a/tests/testsuite/osis_osis2modcipher.good b/tests/testsuite/osis_osis2modcipher.good
new file mode 100644
index 0000000..ab3123f
--- /dev/null
+++ b/tests/testsuite/osis_osis2modcipher.good
@@ -0,0 +1,186 @@
+SUCCESS: ../../utilities/osis2mod: has finished its work and will now rest
+Key:
+Psalms 3:1
+-------
+Preverse Header 0:
+Raw:
+<div sID="gen12" type="section"/> <title canonical="true" type="psalm">A Psalm of David, when he fled from Absalom his son.</title> <div sID="gen13" type="x-p"/> <lg sID="gen14"/>
+-------
+Rendered Header:
+ <h3 class="title psalm canonical">A Psalm of David, when he fled from Absalom his son.</h3>
+
+<br />
+
+-------
+CSS:
+ .divineName { font-variant: small-caps; }
+ .wordsOfJesus { color: red; }
+ .transChange { font-style: italic; }
+ .transChange.transChange-supplied { font-style: italic; }
+ .transChange.transChange-added { font-style: italic; }
+ .transChange.transChange-tenseChange::before { content: '*'; }
+ .transChange.transChange-tenseChange { font-style: normal; }
+ .transChange:lang(zh) { font-style: normal; text-decoration: dotted underline; }
+ .overline { text-decoration: overline; }
+ .indent1 { margin-left: 1em; }
+ .indent2 { margin-left: 2em; }
+ .indent3 { margin-left: 3em; }
+ .indent4 { margin-left: 4em; }
+ abbr { &:hover{ &:before{ content: attr(title); } } }
+ .small-caps { font-variant: small-caps; }
+ .selah { text-align: right; width: 50%; margin: 0; padding: 0; }
+ .acrostic { text-align: center; }
+ .colophon {font-style: italic; font-size: small; display: block; }
+ .rdg { font-style: italic; }
+ .catchWord {font-style: bold; }
+ .x-p-indent {text-indent: 1em; }
+
+-------
+RenderText:
+ <span class="line indent0"><span class="divineName">Lord</span>, how are they increased that trouble me!</span><br />
+<span class="line indent0">many <span class="transChange transChange-added">are</span> they that rise up against me.</span><br />
+
+-------
+-------
+
+Key:
+Matthew 2:6
+-------
+Preverse Header 0:
+Raw:
+<div></div>
+-------
+Rendered Header:
+<div class=""></div>
+-------
+CSS:
+ .divineName { font-variant: small-caps; }
+ .wordsOfJesus { color: red; }
+ .transChange { font-style: italic; }
+ .transChange.transChange-supplied { font-style: italic; }
+ .transChange.transChange-added { font-style: italic; }
+ .transChange.transChange-tenseChange::before { content: '*'; }
+ .transChange.transChange-tenseChange { font-style: normal; }
+ .transChange:lang(zh) { font-style: normal; text-decoration: dotted underline; }
+ .overline { text-decoration: overline; }
+ .indent1 { margin-left: 1em; }
+ .indent2 { margin-left: 2em; }
+ .indent3 { margin-left: 3em; }
+ .indent4 { margin-left: 4em; }
+ abbr { &:hover{ &:before{ content: attr(title); } } }
+ .small-caps { font-variant: small-caps; }
+ .selah { text-align: right; width: 50%; margin: 0; padding: 0; }
+ .acrostic { text-align: center; }
+ .colophon {font-style: italic; font-size: small; display: block; }
+ .rdg { font-style: italic; }
+ .catchWord {font-style: bold; }
+ .x-p-indent {text-indent: 1em; }
+
+-------
+RenderText:
+<span class="line indent0">‘<a class=" crossReference" href="passagestudy.jsp?action=showNote&type=x&value=1&module=OSISReference&passage=Matthew+2%3A6"><small><sup class="x">*x</sup></small></a><span class="small-caps">And you, Bethlehem, land of Judah</span>, </span><br />
+<span class="line indent0"><span class="small-caps">Are by no means least among the leaders of Judah</span>; </span><br />
+<span class="line indent0"><span class="small-caps">For out of you shall come forth a Ruler</span> </span><br />
+<span class="line indent0"><span class="small-caps">Who will</span> <a class=" crossReference" href="passagestudy.jsp?action=showNote&type=x&value=2&module=OSISReference&passage=Matthew+2%3A6"><small><sup class="x">*x</sup></small></a><span class="small-caps">shepherd My people Israel</span>.’” <br />
+
+-------
+-------
+
+Key:
+Mark 1:14
+-------
+Preverse Header 0:
+Raw:
+<div sID="gen25" type="section"/> <title>The Beginning of the Ministry of Jesus</title> <title type="parallel">(<reference osisRef="Matt.4.12-Matt.4.22">Matt 4:12–22</reference>; <reference osisRef="Luke.4.14">Luke 4:14</reference>, <reference osisRef="Luke.4.15">15</reference>; <reference osisRef="Luke.5.1-Luke.5.11">5:1-11</reference>) </title> <div sID="gen26" type="x-p"/>
+-------
+Rendered Header:
+ <h3 class="title">The Beginning of the Ministry of Jesus</h3>
+
+<h3 class="title parallel">(<a class="" href="passagestudy.jsp?action=showRef&type=scripRef&value=Matt.4.12-Matt.4.22&module=">Matt 4:12–22</a>; <a class="" href="passagestudy.jsp?action=showRef&type=scripRef&value=Luke.4.14&module=">Luke 4:14</a>, <a class="" href="passagestudy.jsp?action=showRef&type=scripRef&value=Luke.4.15&module=">15</a>; <a class="" href="passagestudy.jsp?action=showRef&type=scripRef&value=Luke.5.1-Luke.5.11&module=">5:1-11</a>) </h3>
+
+<br />
+
+-------
+CSS:
+ .divineName { font-variant: small-caps; }
+ .wordsOfJesus { color: red; }
+ .transChange { font-style: italic; }
+ .transChange.transChange-supplied { font-style: italic; }
+ .transChange.transChange-added { font-style: italic; }
+ .transChange.transChange-tenseChange::before { content: '*'; }
+ .transChange.transChange-tenseChange { font-style: normal; }
+ .transChange:lang(zh) { font-style: normal; text-decoration: dotted underline; }
+ .overline { text-decoration: overline; }
+ .indent1 { margin-left: 1em; }
+ .indent2 { margin-left: 2em; }
+ .indent3 { margin-left: 3em; }
+ .indent4 { margin-left: 4em; }
+ abbr { &:hover{ &:before{ content: attr(title); } } }
+ .small-caps { font-variant: small-caps; }
+ .selah { text-align: right; width: 50%; margin: 0; padding: 0; }
+ .acrostic { text-align: center; }
+ .colophon {font-style: italic; font-size: small; display: block; }
+ .rdg { font-style: italic; }
+ .catchWord {font-style: bold; }
+ .x-p-indent {text-indent: 1em; }
+
+-------
+RenderText:
+ Now after that John was put in prison, Jesus came into Galilee, preaching the gospel of the kingdom of God,
+-------
+-------
+
+
+Whitespace tests around headings:
+
+
+ <h1 class="bookHeader">Old Testament</h1>
+
+ <h1 class="bookHeader main">THE FIRST BOOK OF MOSES CALLED GENESIS</h1>
+
+ <h1 class="bookHeader">Introduction and Outline</h1>
+
+<br />
+This is the <b>Book of Genesis</b>, the <i>first</i> book in the Bible. It may be outlined as follows: <br />
+<br />
+<ul>
+ <li><sup>1</sup>Creation of Heaven and Earth, 1:1-2:4a</li>
+ <li><sup>2</sup>Creation of Man and Woman, 2:4b-25</li>
+ <li><sub>3</sub>Fall, 3:1-24</li>
+ <li>...</li>
+</ul>
+ <br />
+Tables work like this: <table><tbody>
+ <tr> <td><b>Column 1 Label</b></td> <td><b>Column 2 Label</b></td> </tr>
+ <tr> <td>Column 1, Row 1</td> <td>Column 2, Row 1</td> </tr>
+ <tr> <td>Column 1, Row 2</td> <td>Column 2, Row 2</td> </tr>
+ </tbody></table>
+<br />
+
+ <h2 class="chapterHeader">From Creation to Abraham (1:1–11:9)</h2>
+
+
+ <h3 class="title">Creation of the Heavens and the Earth</h3>
+
+<br />
+
+[ Genesis 1:1 ] In the beginning God created the heaven and the earth. <br />
+
+<br />
+
+[ Genesis 1:2 ] Text of verse 2.
+
+-- Plain output
+Acts 2:19: ‘* And I will grant wonders in the sky above *
+* And signs on the earth below *,
+* Blood, and fire, and vapor of smoke *.
+
+Acts 2:20: ‘* The sun will be turned into darkness *
+* And the moon into blood *,
+* Before the great and glorious day of the Lord shall come *.
+
+
+-- RTF output
+{\rtf1\ansi{\fonttbl{\f0\froman\fcharset0\fprq2 Times New Roman;}{\f1\fdecor\fprq2 Gentium;}{\f7\froman\fcharset2\fprq2 Symbol;}}Acts 2:19: {\f1 ‘{\i1 {And I will grant} {wonders} {in the sky} {above}}{\par} {\i1 {And signs} {on the earth} {below}},{\par} {\i1 {Blood}, {and fire}, {and vapor} {of smoke}}.{\par}}\par
+Acts 2:20: {\f1 ‘{\i1 {The sun} {will be turned} {into darkness}}{\par} {\i1 {And the moon} {into blood}},{\par} {\i1 {Before} {the great} {and glorious} {day} {of the Lord} {shall come}}.{\par} }\par
+}
diff --git a/tests/testsuite/osis_osis2modcipher.sh b/tests/testsuite/osis_osis2modcipher.sh
new file mode 100755
index 0000000..06e6310
--- /dev/null
+++ b/tests/testsuite/osis_osis2modcipher.sh
@@ -0,0 +1,37 @@
+#!/bin/sh
+
+rm -rf tmp/osis_osis2modcipher/
+mkdir -p tmp/osis_osis2modcipher/mods.d
+mkdir -p tmp/osis_osis2modcipher/modules
+
+cat > tmp/osis_osis2modcipher/mods.d/osisreference.conf <<!
+[OSISReference]
+DataPath=./modules/
+ModDrv=zText
+CipherKey=abc123
+Encoding=UTF-8
+BlockType=BOOK
+CompressType=ZIP
+SourceType=OSIS
+Lang=en
+GlobalOptionFilter=OSISLemma
+GlobalOptionFilter=OSISStrongs
+GlobalOptionFilter=OSISMorph
+GlobalOptionFilter=OSISFootnotes
+GlobalOptionFilter=OSISHeadings
+GlobalOptionFilter=OSISRedLetterWords
+Feature=StrongsNumbers
+!
+
+../../utilities/osis2mod tmp/osis_osis2modcipher/modules/ osisReference.xml -z -c abc123 2>&1 | grep -v \$Rev|grep -v "with phrase"
+
+cp osis_basic.good osis_osis2modcipher.good
+cd tmp/osis_osis2modcipher
+../../../osistest OSISReference
+
+echo
+echo "-- Plain output"
+../../../../utilities/diatheke/diatheke -b OSISReference -f plain -k Acts 2:19-20 | grep -v OSISReference
+echo
+echo "-- RTF output"
+../../../../utilities/diatheke/diatheke -b OSISReference -f RTF -k Acts 2:19-20 | grep -v OSISReference
diff --git a/tests/testsuite/runall.sh b/tests/testsuite/runall.sh
index 75eb221..fb5232b 100755
--- a/tests/testsuite/runall.sh
+++ b/tests/testsuite/runall.sh
@@ -3,7 +3,7 @@
#
# Runs entire test suite
#
-# $Id: runall.sh 2327 2009-04-22 11:42:33Z scribe $
+# $Id: runall.sh 3063 2014-03-04 13:04:11Z chrislit $
#
# Copyright 1998-2009 CrossWire Bible Society (http://www.crosswire.org)
# CrossWire Bible Society
diff --git a/tests/testsuite/runtest.sh b/tests/testsuite/runtest.sh
index 4ef57f7..305524b 100755
--- a/tests/testsuite/runtest.sh
+++ b/tests/testsuite/runtest.sh
@@ -3,7 +3,7 @@
#
# Runs a single test
#
-# $Id: runtest.sh 2364 2009-04-29 08:10:02Z scribe $
+# $Id: runtest.sh 3063 2014-03-04 13:04:11Z chrislit $
#
# Copyright 1998-2009 CrossWire Bible Society (http://www.crosswire.org)
# CrossWire Bible Society
diff --git a/tests/testsuite/utf8basic.good b/tests/testsuite/utf8basic.good
new file mode 100644
index 0000000..4ab4ca6
--- /dev/null
+++ b/tests/testsuite/utf8basic.good
@@ -0,0 +1,300 @@
+UTF-8 decoder capability and stress test
+----------------------------------------
+
+Markus Kuhn <http://www.cl.cam.ac.uk/~mgk25/> - 2015-08-28 - CC BY 4.0
+
+This test file can help you examine, how your UTF-8 decoder handles
+various types of correct, malformed, or otherwise interesting UTF-8
+sequences. This file is not meant to be a conformance test. It does
+not prescribe any particular outcome. Therefore, there is no way to
+"pass" or "fail" this test file, even though the text does suggest a
+preferable decoder behaviour at some places. Its aim is, instead, to
+help you think about, and test, the behaviour of your UTF-8 decoder on a
+systematic collection of unusual inputs. Experience so far suggests
+that most first-time authors of UTF-8 decoders find at least one
+serious problem in their decoder using this file.
+
+The test lines below cover boundary conditions, malformed UTF-8
+sequences, as well as correctly encoded UTF-8 sequences of Unicode code
+points that should never occur in a correct UTF-8 file.
+
+According to ISO 10646-1:2000, sections D.7 and 2.3c, a device
+receiving UTF-8 shall interpret a "malformed sequence in the same way
+that it interprets a character that is outside the adopted subset" and
+"characters that are not within the adopted subset shall be indicated
+to the user" by a receiving device. One commonly used approach in
+UTF-8 decoders is to replace any malformed UTF-8 sequence by a
+replacement character (U+FFFD), which looks a bit like an inverted
+question mark, or a similar symbol. It might be a good idea to
+visually distinguish a malformed UTF-8 sequence from a correctly
+encoded Unicode character that is just not available in the current
+font but otherwise fully legal, even though ISO 10646-1 doesn't
+mandate this. In any case, just ignoring malformed sequences or
+unavailable characters does not conform to ISO 10646, will make
+debugging more difficult, and can lead to user confusion.
+
+Please check, whether a malformed UTF-8 sequence is (1) represented at
+all, (2) represented by exactly one single replacement character (or
+equivalent signal), and (3) the following quotation mark after an
+illegal UTF-8 sequence is correctly displayed, i.e. proper
+resynchronization takes place immediately after any malformed
+sequence. This file says "THE END" in the last line, so if you don't
+see that, your decoder crashed somehow before, which should always be
+cause for concern.
+
+All lines in this file are exactly 79 characters long (plus the line
+feed). In addition, all lines end with "|", except for the two test
+lines 2.1.1 and 2.2.1, which contain non-printable ASCII controls
+U+0000 and U+007F. If you display this file with a fixed-width font,
+these "|" characters should all line up in column 79 (right margin).
+This allows you to test quickly, whether your UTF-8 decoder finds the
+correct number of characters in every line, that is whether each
+malformed sequences is replaced by a single replacement character.
+
+Note that, as an alternative to the notion of malformed sequence used
+here, it is also a perfectly acceptable (and in some situations even
+preferable) solution to represent each individual byte of a malformed
+sequence with a replacement character. If you follow this strategy in
+your decoder, then please ignore the "|" column.
+
+
+Here come the tests: |
+ |
+1 Some correct UTF-8 text |
+ |
+You should see the Greek word 'kosme': "κόσμε" |
+ |
+2 Boundary condition test cases |
+ |
+2.1 First possible sequence of a certain length |
+ |
+2.1.1 1 byte (U-00000000): "^@" // SWORD: removed. we don't support null mid-string, <- that's a literal <caret at>
+2.1.2 2 bytes (U-00000080): "€" |
+2.1.3 3 bytes (U-00000800): "ࠀ" |
+2.1.4 4 bytes (U-00010000): "𐀀" |
+2.1.5 5 bytes (U-00200000): "�" |
+2.1.6 6 bytes (U-04000000): "�" |
+ |
+2.2 Last possible sequence of a certain length |
+ |
+2.2.1 1 byte (U-0000007F): ""
+2.2.2 2 bytes (U-000007FF): "߿" |
+2.2.3 3 bytes (U-0000FFFF): "￿" |
+2.2.4 4 bytes (U-001FFFFF): "�" |
+2.2.5 5 bytes (U-03FFFFFF): "�" |
+2.2.6 6 bytes (U-7FFFFFFF): "�" |
+ |
+2.3 Other boundary conditions |
+ |
+2.3.1 U-0000D7FF = ed 9f bf = "퟿" |
+2.3.2 U-0000E000 = ee 80 80 = "" |
+2.3.3 U-0000FFFD = ef bf bd = "�" |
+2.3.4 U-0010FFFF = f4 8f bf bf = "􏿿" |
+2.3.5 U-00110000 = f4 90 80 80 = "�" |
+ |
+3 Malformed sequences |
+ |
+3.1 Unexpected continuation bytes |
+ |
+Each unexpected continuation byte should be separately signalled as a |
+malformed sequence of its own. |
+ |
+3.1.1 First continuation byte 0x80: "�" |
+3.1.2 Last continuation byte 0xbf: "�" |
+ |
+3.1.3 2 continuation bytes: "��" |
+3.1.4 3 continuation bytes: "���" |
+3.1.5 4 continuation bytes: "����" |
+3.1.6 5 continuation bytes: "�����" |
+3.1.7 6 continuation bytes: "������" |
+3.1.8 7 continuation bytes: "�������" |
+ |
+3.1.9 Sequence of all 64 possible continuation bytes (0x80-0xbf): |
+ |
+ "���������������� |
+ ���������������� |
+ ���������������� |
+ ����������������" |
+ |
+3.2 Lonely start characters |
+ |
+3.2.1 All 32 first bytes of 2-byte sequences (0xc0-0xdf), |
+ each followed by a space character: |
+ |
+ "� � � � � � � � � � � � � � � � |
+ � � � � � � � � � � � � � � � � " |
+ |
+3.2.2 All 16 first bytes of 3-byte sequences (0xe0-0xef), |
+ each followed by a space character: |
+ |
+ "� � � � � � � � � � � � � � � � " |
+ |
+3.2.3 All 8 first bytes of 4-byte sequences (0xf0-0xf7), |
+ each followed by a space character: |
+ |
+ "� � � � � � � � " |
+ |
+3.2.4 All 4 first bytes of 5-byte sequences (0xf8-0xfb), |
+ each followed by a space character: |
+ |
+ "� � � � " |
+ |
+3.2.5 All 2 first bytes of 6-byte sequences (0xfc-0xfd), |
+ each followed by a space character: |
+ |
+ "� � " |
+ |
+3.3 Sequences with last continuation byte missing |
+ |
+All bytes of an incomplete sequence should be signalled as a single |
+malformed sequence, i.e., you should see only a single replacement |
+character in each of the next 10 tests. (Characters as in section 2) |
+ |
+3.3.1 2-byte sequence with last byte missing (U+0000): "�" |
+3.3.2 3-byte sequence with last byte missing (U+0000): "�" |
+3.3.3 4-byte sequence with last byte missing (U+0000): "�" |
+3.3.4 5-byte sequence with last byte missing (U+0000): "�" |
+3.3.5 6-byte sequence with last byte missing (U+0000): "�" |
+3.3.6 2-byte sequence with last byte missing (U-000007FF): "�" |
+3.3.7 3-byte sequence with last byte missing (U-0000FFFF): "�" |
+3.3.8 4-byte sequence with last byte missing (U-001FFFFF): "�" |
+3.3.9 5-byte sequence with last byte missing (U-03FFFFFF): "�" |
+3.3.10 6-byte sequence with last byte missing (U-7FFFFFFF): "�" |
+ |
+3.4 Concatenation of incomplete sequences |
+ |
+All the 10 sequences of 3.3 concatenated, you should see 10 malformed |
+sequences being signalled: |
+ |
+ "����������" |
+ |
+3.5 Impossible bytes |
+ |
+The following two bytes cannot appear in a correct UTF-8 string |
+ |
+3.5.1 fe = "�" |
+3.5.2 ff = "�" |
+3.5.3 fe fe ff ff = "����" |
+ |
+4 Overlong sequences |
+ |
+The following sequences are not malformed according to the letter of |
+the Unicode 2.0 standard. However, they are longer then necessary and |
+a correct UTF-8 encoder is not allowed to produce them. A "safe UTF-8 |
+decoder" should reject them just like malformed sequences for two |
+reasons: (1) It helps to debug applications if overlong sequences are |
+not treated as valid representations of characters, because this helps |
+to spot problems more quickly. (2) Overlong sequences provide |
+alternative representations of characters, that could maliciously be |
+used to bypass filters that check only for ASCII characters. For |
+instance, a 2-byte encoded line feed (LF) would not be caught by a |
+line counter that counts only 0x0a bytes, but it would still be |
+processed as a line feed by an unsafe UTF-8 decoder later in the |
+pipeline. From a security point of view, ASCII compatibility of UTF-8 |
+sequences means also, that ASCII characters are *only* allowed to be |
+represented by ASCII bytes in the range 0x00-0x7f. To ensure this |
+aspect of ASCII compatibility, use only "safe UTF-8 decoders" that |
+reject overlong UTF-8 sequences for which a shorter encoding exists. |
+ |
+4.1 Examples of an overlong ASCII character |
+ |
+With a safe UTF-8 decoder, all of the following five overlong |
+representations of the ASCII character slash ("/") should be rejected |
+like a malformed UTF-8 sequence, for instance by substituting it with |
+a replacement character. If you see a slash below, you do not have a |
+safe UTF-8 decoder! |
+ |
+4.1.1 U+002F = c0 af = "�" |
+4.1.2 U+002F = e0 80 af = "�" |
+4.1.3 U+002F = f0 80 80 af = "�" |
+4.1.4 U+002F = f8 80 80 80 af = "�" |
+4.1.5 U+002F = fc 80 80 80 80 af = "�" |
+ |
+4.2 Maximum overlong sequences |
+ |
+Below you see the highest Unicode value that is still resulting in an |
+overlong sequence if represented with the given number of bytes. This |
+is a boundary test for safe UTF-8 decoders. All five characters should |
+be rejected like malformed UTF-8 sequences. |
+ |
+4.2.1 U-0000007F = c1 bf = "�" |
+4.2.2 U-000007FF = e0 9f bf = "�" |
+4.2.3 U-0000FFFF = f0 8f bf bf = "�" |
+4.2.4 U-001FFFFF = f8 87 bf bf bf = "�" |
+4.2.5 U-03FFFFFF = fc 83 bf bf bf bf = "�" |
+ |
+4.3 Overlong representation of the NUL character |
+ |
+The following five sequences should also be rejected like malformed |
+UTF-8 sequences and should not be treated like the ASCII NUL |
+character. |
+ |
+4.3.1 U+0000 = c0 80 = "�" |
+4.3.2 U+0000 = e0 80 80 = "�" |
+4.3.3 U+0000 = f0 80 80 80 = "�" |
+4.3.4 U+0000 = f8 80 80 80 80 = "�" |
+4.3.5 U+0000 = fc 80 80 80 80 80 = "�" |
+ |
+5 Illegal code positions |
+ |
+The following UTF-8 sequences should be rejected like malformed |
+sequences, because they never represent valid ISO 10646 characters and |
+a UTF-8 decoder that accepts them might introduce security problems |
+comparable to overlong UTF-8 sequences. |
+ |
+5.1 Single UTF-16 surrogates |
+ |
+5.1.1 U+D800 = ed a0 80 = "" |
+5.1.2 U+DB7F = ed ad bf = "" |
+5.1.3 U+DB80 = ed ae 80 = "" |
+5.1.4 U+DBFF = ed af bf = "" |
+5.1.5 U+DC00 = ed b0 80 = "" |
+5.1.6 U+DF80 = ed be 80 = "" |
+5.1.7 U+DFFF = ed bf bf = "" |
+ |
+5.2 Paired UTF-16 surrogates |
+ |
+5.2.1 U+D800 U+DC00 = ed a0 80 ed b0 80 = "" |
+5.2.2 U+D800 U+DFFF = ed a0 80 ed bf bf = "" |
+5.2.3 U+DB7F U+DC00 = ed ad bf ed b0 80 = "" |
+5.2.4 U+DB7F U+DFFF = ed ad bf ed bf bf = "" |
+5.2.5 U+DB80 U+DC00 = ed ae 80 ed b0 80 = "" |
+5.2.6 U+DB80 U+DFFF = ed ae 80 ed bf bf = "" |
+5.2.7 U+DBFF U+DC00 = ed af bf ed b0 80 = "" |
+5.2.8 U+DBFF U+DFFF = ed af bf ed bf bf = "" |
+ |
+5.3 Noncharacter code positions |
+ |
+The following "noncharacters" are "reserved for internal use" by |
+applications, and according to older versions of the Unicode Standard |
+"should never be interchanged". Unicode Corrigendum #9 dropped the |
+latter restriction. Nevertheless, their presence in incoming UTF-8 data |
+can remain a potential security risk, depending on what use is made of |
+these codes subsequently. Examples of such internal use: |
+ |
+ - Some file APIs with 16-bit characters may use the integer value -1 |
+ = U+FFFF to signal an end-of-file (EOF) or error condition. |
+ |
+ - In some UTF-16 receivers, code point U+FFFE might trigger a |
+ byte-swap operation (to convert between UTF-16LE and UTF-16BE). |
+ |
+With such internal use of noncharacters, it may be desirable and safer |
+to block those code points in UTF-8 decoders, as they should never |
+occur legitimately in incoming UTF-8 data, and could trigger unsafe |
+behaviour in subsequent processing. |
+ |
+Particularly problematic noncharacters in 16-bit applications: |
+ |
+5.3.1 U+FFFE = ef bf be = "￾" |
+5.3.2 U+FFFF = ef bf bf = "￿" |
+ |
+Other noncharacters: |
+ |
+5.3.3 U+FDD0 .. U+FDEF = "﷐﷑﷒﷓﷔﷕﷖﷗﷘﷙﷚﷛﷜﷝﷞﷟﷠﷡﷢﷣﷤﷥﷦﷧﷨﷩﷪﷫﷬﷭﷮﷯"|
+ |
+5.3.4 U+nFFFE U+nFFFF (for n = 1..10) |
+ |
+ "🿾🿿𯿾𯿿𿿾𿿿񏿾񏿿񟿾񟿿񯿾񯿿񿿾񿿿򏿾򏿿 |
+ 򟿾򟿿򯿾򯿿򿿾򿿿󏿾󏿿󟿾󟿿󯿾󯿿󿿾󿿿􏿾􏿿" |
+ |
+THE END |
diff --git a/tests/testsuite/utf8basic.sh b/tests/testsuite/utf8basic.sh
new file mode 100755
index 0000000..625f1ff
--- /dev/null
+++ b/tests/testsuite/utf8basic.sh
@@ -0,0 +1,10 @@
+#/bin/sh
+
+# utf8basic.good originally generated with:
+# uconv --from-code UTF-8 --to-code UTF-8 --from-callback substitute UTF-8-test.txt > utf8basic.good
+# but modified to ignore UTF-16 surrogates which are apparently illegal. We return multiple replacement
+# characters there, but the spec apparently says we are only supposed to return 1 per UTF-16 surrogate
+# there are comments in the spec about "security vulnerability" but we always check if we're at the
+# end of our buffer before continuing processing each byte (shouldn't all decoders do this?), so there
+# shouldn't be a problem. Ignoring the UTF-16 non-conformance for now.
+../utf8norm < UTF-8-test.txt
diff --git a/tests/testsuite/versekeytest.good b/tests/testsuite/versekeytest.good
index 5540c6a..62f61fb 100644
--- a/tests/testsuite/versekeytest.good
+++ b/tests/testsuite/versekeytest.good
@@ -64,3 +64,13 @@ Mark.1.1-- = Matthew 28:20
Matthew.1.1-- = Malachi 4:6
++ = Matthew 1:1
.setBook(.getBook() - 1) = Malachi 1:1
+
+Chapter math
+
+Matthew.1.1 - 1 chapter
+.setChapter(.getChapter() - 1) = Malachi 4:1
+
+Verse math
+
+Matthew.1.1 - 1 verse
+.setVerse(.getVerse() - 1) = Malachi 4:6
diff --git a/tests/testsuite/versekeytest.sh b/tests/testsuite/versekeytest.sh
index c2238c1..3bdda63 100755
--- a/tests/testsuite/versekeytest.sh
+++ b/tests/testsuite/versekeytest.sh
@@ -1,7 +1,7 @@
#!/bin/sh
#******************************************************************************
#
-# $Id: swmgr.h 2321 2009-04-13 01:17:00Z scribe $
+# $Id: versekeytest.sh 3063 2014-03-04 13:04:11Z chrislit $
#
# Copyright 1998-2009 CrossWire Bible Society (http://www.crosswire.org)
# CrossWire Bible Society
diff --git a/tests/testsuite/versemgrtest.sh b/tests/testsuite/versemgrtest.sh
index 91b1af8..fd2c2af 100755
--- a/tests/testsuite/versemgrtest.sh
+++ b/tests/testsuite/versemgrtest.sh
@@ -1,7 +1,7 @@
#!/bin/sh
#******************************************************************************
#
-# $Id: swmgr.h 2321 2009-04-13 01:17:00Z scribe $
+# $Id: versemgrtest.sh 3063 2014-03-04 13:04:11Z chrislit $
#
# Copyright 1998-2009 CrossWire Bible Society (http://www.crosswire.org)
# CrossWire Bible Society
diff --git a/tests/testsuite/verseparsing-utf8.sh b/tests/testsuite/verseparsing-utf8.sh
index 1d05007..4be346a 100755
--- a/tests/testsuite/verseparsing-utf8.sh
+++ b/tests/testsuite/verseparsing-utf8.sh
@@ -3,7 +3,7 @@
#
# This only works if --with-icu was passed to configure
#
-# $Id: swmgr.h 2321 2009-04-13 01:17:00Z scribe $
+# $Id: verseparsing-utf8.sh 3063 2014-03-04 13:04:11Z chrislit $
#
# Copyright 1998-2009 CrossWire Bible Society (http://www.crosswire.org)
# CrossWire Bible Society
diff --git a/tests/testsuite/verseparsing.sh b/tests/testsuite/verseparsing.sh
index aa6ae25..6e3dd42 100755
--- a/tests/testsuite/verseparsing.sh
+++ b/tests/testsuite/verseparsing.sh
@@ -1,7 +1,7 @@
#!/bin/sh
#******************************************************************************
#
-# $Id: verseparsing.sh 2796 2013-04-11 16:18:45Z scribe $
+# $Id: verseparsing.sh 3063 2014-03-04 13:04:11Z chrislit $
#
# Copyright 1998-2009 CrossWire Bible Society (http://www.crosswire.org)
# CrossWire Bible Society
diff --git a/tests/testsuite/vs2osisref.good b/tests/testsuite/vs2osisref.good
new file mode 100644
index 0000000..eea5961
--- /dev/null
+++ b/tests/testsuite/vs2osisref.good
@@ -0,0 +1,2 @@
+Matt 3:1-12: {Mark 1:3-8; Luke 3:2-17;} John 1:6-8, 19-28
+<reference osisRef="Matt.3.1-Matt.3.12">Matt 3:1-12</reference>: {<reference osisRef="Mark.1.3-Mark.1.8">Mark 1:3-8</reference>; <reference osisRef="Luke.3.2-Luke.3.17">Luke 3:2-17</reference>;} <reference osisRef="John.1.6-John.1.8">John 1:6-8</reference>, <reference osisRef="John.1.19-John.1.28">19-28</reference>
diff --git a/tests/testsuite/vs2osisref.sh b/tests/testsuite/vs2osisref.sh
new file mode 100755
index 0000000..90c978b
--- /dev/null
+++ b/tests/testsuite/vs2osisref.sh
@@ -0,0 +1,22 @@
+#!/bin/sh
+#******************************************************************************
+#
+# $Id$
+#
+# Copyright 1998-2009 CrossWire Bible Society (http://www.crosswire.org)
+# CrossWire Bible Society
+# P. O. Box 2528
+# Tempe, AZ 85280-2528
+#
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the
+# Free Software Foundation version 2.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# General Public License for more details.
+#
+
+echo "Matt 3:1-12: {Mark 1:3-8; Luke 3:2-17;} John 1:6-8, 19-28"
+../../utilities/vs2osisref "Matt 3:1-12: {Mark 1:3-8; Luke 3:2-17;} John 1:6-8, 19-28"
diff --git a/tests/testsuite/xmltag.sh b/tests/testsuite/xmltag.sh
index f223b68..4686519 100755
--- a/tests/testsuite/xmltag.sh
+++ b/tests/testsuite/xmltag.sh
@@ -1,7 +1,7 @@
#!/bin/sh
#******************************************************************************
#
-# $Id: swmgr.h 2321 2009-04-13 01:17:00Z scribe $
+# $Id: xmltag.sh 3063 2014-03-04 13:04:11Z chrislit $
#
# Copyright 1998-2009 CrossWire Bible Society (http://www.crosswire.org)
# CrossWire Bible Society
diff --git a/tests/utf8norm.cpp b/tests/utf8norm.cpp
index a992a2e..63a9545 100644
--- a/tests/utf8norm.cpp
+++ b/tests/utf8norm.cpp
@@ -2,7 +2,7 @@
*
* utf8norm.cpp -
*
- * $Id: utf8norm.cpp 2833 2013-06-29 06:40:28Z chrislit $
+ * $Id: utf8norm.cpp 3515 2017-11-01 11:38:09Z scribe $
*
* Copyright 2009-2013 CrossWire Bible Society (http://www.crosswire.org)
* CrossWire Bible Society
@@ -23,16 +23,62 @@
#include <iostream>
#include <utilstr.h>
#include <swbuf.h>
+#if !defined(__GNUC__) && !defined(_WIN32_WCE)
+#include <io.h>
+#include <direct.h>
+#else
+#include <unistd.h>
+#endif
+#include <utf8greekaccents.h>
using namespace sword;
using namespace std;
int main(int argc, char **argv) {
- const char *buf = (argc > 1) ? argv[1] : "Description=German Unrevidierte Luther Übersetzung von 1545";
+ const char *buf = (argc > 1 && argv[1][0] != '-') ? argv[1] : 0; // "Description=German Unrevidierte Luther Übersetzung von 1545";
- SWBuf fixed = assureValidUTF8(buf);
+ if (buf) {
+ SWBuf fixed = assureValidUTF8(buf);
- cout << "input / processed:\n" << buf << "\n" << fixed << endl;
+ cout << "input / processed:\n" << buf << "\n" << fixed << endl;
+ }
+ else {
+ SWOptionFilter *filter = 0;
+ if (argc > 1 && !strcmp(argv[1], "-ga")) filter = new UTF8GreekAccents();
+ if (filter && filter->isBoolean()) filter->setOptionValue("Off");
+ int repeat = 1;
+ if (argc > 2) repeat = atoi(argv[2]);
+ SWBuf contents = "";
+ char chunk[255];
+ int count = 254;
+ while (count > 0) {
+ count = read(STDIN_FILENO, chunk, 254);
+ if (count > 0) {
+ chunk[count] = 0;
+ contents.append(chunk);
+ }
+ }
+ SWBuf filteredContents = contents;
+ if (filter) {
+ for (int i = 0; i < repeat; ++i) {
+ filteredContents = contents;
+ filter->processText(filteredContents);
+ }
+ }
+ const unsigned char *c = (const unsigned char *)filteredContents.getRawData();
+ // UTF-32 BOM
+ __u32 ch = 0xfeff;
+// write(STDOUT_FILENO, &ch, 4);
+ while (c && *c) {
+ ch = getUniCharFromUTF8(&c);
+// ch = __swswap32(ch);
+ if (!ch) ch = 0xFFFD;
+ SWBuf c8;
+ getUTF8FromUniChar(ch, &c8);
+ write(STDOUT_FILENO, c8.getRawData(), c8.length());
+ }
+ delete filter;
+ }
return 0;
}
diff --git a/tests/versekeytest.cpp b/tests/versekeytest.cpp
index 4ae064e..d8bea6a 100644
--- a/tests/versekeytest.cpp
+++ b/tests/versekeytest.cpp
@@ -2,7 +2,7 @@
*
* versekeytest.cpp -
*
- * $Id: versekeytest.cpp 2833 2013-06-29 06:40:28Z chrislit $
+ * $Id: versekeytest.cpp 3305 2014-12-15 02:02:48Z charcoal $
*
* Copyright 2007-2013 CrossWire Bible Society (http://www.crosswire.org)
* CrossWire Bible Society
@@ -277,5 +277,19 @@ cout << currentVerse << endl;
vkey.setBook(vkey.getBook() - 1);
cout << ".setBook(.getBook() - 1) = " << vkey << "\n";
+ cout << "\nChapter math\n\n";
+
+ cout << "Matthew.1.1 - 1 chapter\n";
+ vkey = "Matthew.1.1";
+ vkey.setChapter(vkey.getChapter() - 1);
+ cout << ".setChapter(.getChapter() - 1) = " << vkey << "\n";
+
+ cout << "\nVerse math\n\n";
+
+ cout << "Matthew.1.1 - 1 verse\n";
+ vkey = "Matthew.1.1";
+ vkey.setVerse(vkey.getVerse() - 1);
+ cout << ".setVerse(.getVerse() - 1) = " << vkey << "\n";
+
return 0;
}