New upstream version 1.8.1

author: Teus Benschop <teusjannette@gmail.com> 2018-10-28 11:51:26 +0100
committer: Teus Benschop <teusjannette@gmail.com> 2018-10-28 11:51:26 +0100
commit: 1d0ff54794b5edea7cdf1d2d66710a0fa885bcc5 (patch)
tree: 8ece5f9ef437fbb151f2b22ed0c6e1a714879c7c /tests
parent: c7dbdc9161a7c460526b80fe01af49d714856126 (diff)
46 files changed, 2247 insertions, 194 deletions
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index 130af8a..30818a8 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -31,6 +31,8 @@ SET(test_PROGRAMS
 	localetest
 	mgrtest
 	modtest
+	osistest
+	ldtest
 	parsekey
 	rawldidxtest
 	romantest
diff --git a/tests/Makefile.am b/tests/Makefile.am
index ad09283..1be04d4 100644
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -5,14 +5,14 @@ AM_CPPFLAGS += -I$(top_srcdir)/include/internal/regex
 endif
 LDADD = $(top_builddir)/lib/libsword.la
 
-SUBDIRS = cppunit
+SUBDIRS = cppunit testsuite
 
 noinst_PROGRAMS = utf8norm ciphertest keytest mgrtest parsekey versekeytest \
 			vtreekeytest versemgrtest listtest casttest modtest \
 			compnone complzss localetest introtest indextest \
 			configtest keycast romantest testblocks filtertest \
 			rawldidxtest lextest swaptest swbuftest xmltest \
-			webiftest striptest osistest bibliotest
+			webiftest striptest ldtest osistest bibliotest
 
 if HAVE_ICU
 ICUPROG = icutest translittest tlitmgrtest
@@ -68,10 +68,7 @@ swbuftest_SOURCES = swbuftest.cpp
 webiftest_SOURCES = webiftest.cpp
 striptest_SOURCES = striptest.cpp
 xmltest_SOURCES = xmltest.cpp
+ldtest_SOURCES = ldtest.cpp
 osistest_SOURCES = osistest.cpp
 bibliotest_SOURCES = bibliotest.cpp
 
-EXTRA_DIST = 
-include bcppmake/Makefile.am
-include testsuite/Makefile.am
-include tmp/Makefile.am
diff --git a/tests/Makefile.in b/tests/Makefile.in
index c228d64..884969d 100644
--- a/tests/Makefile.in
+++ b/tests/Makefile.in
@@ -1,4 +1,4 @@
-# Makefile.in generated by automake 1.14.1 from Makefile.am.
+# Makefile.in generated by automake 1.13.4 from Makefile.am.
 # @configure_input@
 
 # Copyright (C) 1994-2013 Free Software Foundation, Inc.
@@ -90,13 +90,11 @@ noinst_PROGRAMS = utf8norm$(EXEEXT) ciphertest$(EXEEXT) \
 	testblocks$(EXEEXT) filtertest$(EXEEXT) rawldidxtest$(EXEEXT) \
 	lextest$(EXEEXT) swaptest$(EXEEXT) swbuftest$(EXEEXT) \
 	xmltest$(EXEEXT) webiftest$(EXEEXT) striptest$(EXEEXT) \
-	osistest$(EXEEXT) bibliotest$(EXEEXT) $(am__EXEEXT_1) \
-	$(am__EXEEXT_2)
-DIST_COMMON = $(srcdir)/bcppmake/Makefile.am \
-	$(srcdir)/testsuite/Makefile.am $(srcdir)/tmp/Makefile.am \
-	$(srcdir)/Makefile.in $(srcdir)/Makefile.am \
-	$(top_srcdir)/depcomp
+	ldtest$(EXEEXT) osistest$(EXEEXT) bibliotest$(EXEEXT) \
+	$(am__EXEEXT_1) $(am__EXEEXT_2)
 subdir = tests
+DIST_COMMON = $(srcdir)/Makefile.in $(srcdir)/Makefile.am \
+	$(top_srcdir)/depcomp
 ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
 am__aclocal_m4_deps = $(top_srcdir)/m4/acx_clucene.m4 \
 	$(top_srcdir)/m4/cppunit.m4 $(top_srcdir)/m4/libtool.m4 \
@@ -171,6 +169,10 @@ am_keytest_OBJECTS = keytest.$(OBJEXT)
 keytest_OBJECTS = $(am_keytest_OBJECTS)
 keytest_LDADD = $(LDADD)
 keytest_DEPENDENCIES = $(top_builddir)/lib/libsword.la
+am_ldtest_OBJECTS = ldtest.$(OBJEXT)
+ldtest_OBJECTS = $(am_ldtest_OBJECTS)
+ldtest_LDADD = $(LDADD)
+ldtest_DEPENDENCIES = $(top_builddir)/lib/libsword.la
 am_lextest_OBJECTS = lextest.$(OBJEXT)
 lextest_OBJECTS = $(am_lextest_OBJECTS)
 lextest_LDADD = $(LDADD)
@@ -295,24 +297,24 @@ SOURCES = $(bibliotest_SOURCES) $(casttest_SOURCES) \
 	$(ciphertest_SOURCES) $(complzss_SOURCES) $(compnone_SOURCES) \
 	$(compzip_SOURCES) $(configtest_SOURCES) $(filtertest_SOURCES) \
 	$(icutest_SOURCES) $(indextest_SOURCES) $(introtest_SOURCES) \
-	$(keycast_SOURCES) $(keytest_SOURCES) $(lextest_SOURCES) \
-	$(listtest_SOURCES) $(localetest_SOURCES) $(mgrtest_SOURCES) \
-	$(modtest_SOURCES) $(osistest_SOURCES) $(parsekey_SOURCES) \
-	$(rawldidxtest_SOURCES) $(romantest_SOURCES) \
-	$(striptest_SOURCES) $(swaptest_SOURCES) $(swbuftest_SOURCES) \
-	$(testblocks_SOURCES) $(tlitmgrtest_SOURCES) \
-	$(translittest_SOURCES) $(utf8norm_SOURCES) \
-	$(versekeytest_SOURCES) $(versemgrtest_SOURCES) \
-	$(vtreekeytest_SOURCES) $(webiftest_SOURCES) \
-	$(xmltest_SOURCES)
+	$(keycast_SOURCES) $(keytest_SOURCES) $(ldtest_SOURCES) \
+	$(lextest_SOURCES) $(listtest_SOURCES) $(localetest_SOURCES) \
+	$(mgrtest_SOURCES) $(modtest_SOURCES) $(osistest_SOURCES) \
+	$(parsekey_SOURCES) $(rawldidxtest_SOURCES) \
+	$(romantest_SOURCES) $(striptest_SOURCES) $(swaptest_SOURCES) \
+	$(swbuftest_SOURCES) $(testblocks_SOURCES) \
+	$(tlitmgrtest_SOURCES) $(translittest_SOURCES) \
+	$(utf8norm_SOURCES) $(versekeytest_SOURCES) \
+	$(versemgrtest_SOURCES) $(vtreekeytest_SOURCES) \
+	$(webiftest_SOURCES) $(xmltest_SOURCES)
 DIST_SOURCES = $(bibliotest_SOURCES) $(casttest_SOURCES) \
 	$(ciphertest_SOURCES) $(complzss_SOURCES) $(compnone_SOURCES) \
 	$(am__compzip_SOURCES_DIST) $(configtest_SOURCES) \
 	$(filtertest_SOURCES) $(am__icutest_SOURCES_DIST) \
 	$(indextest_SOURCES) $(introtest_SOURCES) $(keycast_SOURCES) \
-	$(keytest_SOURCES) $(lextest_SOURCES) $(listtest_SOURCES) \
-	$(localetest_SOURCES) $(mgrtest_SOURCES) $(modtest_SOURCES) \
-	$(osistest_SOURCES) $(parsekey_SOURCES) \
+	$(keytest_SOURCES) $(ldtest_SOURCES) $(lextest_SOURCES) \
+	$(listtest_SOURCES) $(localetest_SOURCES) $(mgrtest_SOURCES) \
+	$(modtest_SOURCES) $(osistest_SOURCES) $(parsekey_SOURCES) \
 	$(rawldidxtest_SOURCES) $(romantest_SOURCES) \
 	$(striptest_SOURCES) $(swaptest_SOURCES) $(swbuftest_SOURCES) \
 	$(testblocks_SOURCES) $(am__tlitmgrtest_SOURCES_DIST) \
@@ -540,14 +542,16 @@ target_vendor = @target_vendor@
 top_build_prefix = @top_build_prefix@
 top_builddir = @top_builddir@
 top_srcdir = @top_srcdir@
+with_bzip2 = @with_bzip2@
 with_conf = @with_conf@
 with_icu = @with_icu@
 with_icusword = @with_icusword@
+with_xz = @with_xz@
 with_zlib = @with_zlib@
 AUTOMAKE_OPTIONS = 1.6
 AM_CPPFLAGS = -I $(top_srcdir)/include $(am__append_1)
 LDADD = $(top_builddir)/lib/libsword.la
-SUBDIRS = cppunit
+SUBDIRS = cppunit testsuite
 @HAVE_ICU_FALSE@ICUPROG = 
 @HAVE_ICU_TRUE@ICUPROG = icutest translittest tlitmgrtest
 @HAVE_ICU_TRUE@icutest_SOURCES = icutest.cpp
@@ -584,23 +588,14 @@ swbuftest_SOURCES = swbuftest.cpp
 webiftest_SOURCES = webiftest.cpp
 striptest_SOURCES = striptest.cpp
 xmltest_SOURCES = xmltest.cpp
+ldtest_SOURCES = ldtest.cpp
 osistest_SOURCES = osistest.cpp
 bibliotest_SOURCES = bibliotest.cpp
-EXTRA_DIST = $(swbcppdir)/filtertest.bpf $(swbcppdir)/filtertest.bpr \
-	$(swbcppdir)/libsword.bpf $(swbcppdir)/libsword.bpr \
-	$(swbcppdir)/mgrtest.bpf $(swbcppdir)/mgrtest.bpr \
-	$(swbcppdir)/parsekey.bpf $(swbcppdir)/parsekey.bpr \
-	$(swbcppdir)/tests.bpg $(swtspdir)/runall.sh \
-	$(swtspdir)/runtest.sh $(swtspdir)/verseparsing.good \
-	$(swtspdir)/verseparsing.sh $(swtesttmpdir)/README
-swbcppdir = $(top_srcdir)/tests/bcppmake
-swtspdir = $(top_srcdir)/tests/testsuite
-swtesttmpdir = $(top_srcdir)/tests/tmp
 all: all-recursive
 
 .SUFFIXES:
 .SUFFIXES: .cpp .lo .o .obj
-$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(srcdir)/bcppmake/Makefile.am $(srcdir)/testsuite/Makefile.am $(srcdir)/tmp/Makefile.am $(am__configure_deps)
+$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am  $(am__configure_deps)
 	@for dep in $?; do \
 	  case '$(am__configure_deps)' in \
 	    *$$dep*) \
@@ -621,7 +616,6 @@ Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
 	    echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
 	    cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
 	esac;
-$(srcdir)/bcppmake/Makefile.am $(srcdir)/testsuite/Makefile.am $(srcdir)/tmp/Makefile.am:
 
 $(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
 	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
@@ -693,6 +687,10 @@ keytest$(EXEEXT): $(keytest_OBJECTS) $(keytest_DEPENDENCIES) $(EXTRA_keytest_DEP
 	@rm -f keytest$(EXEEXT)
 	$(AM_V_CXXLD)$(CXXLINK) $(keytest_OBJECTS) $(keytest_LDADD) $(LIBS)
 
+ldtest$(EXEEXT): $(ldtest_OBJECTS) $(ldtest_DEPENDENCIES) $(EXTRA_ldtest_DEPENDENCIES) 
+	@rm -f ldtest$(EXEEXT)
+	$(AM_V_CXXLD)$(CXXLINK) $(ldtest_OBJECTS) $(ldtest_LDADD) $(LIBS)
+
 lextest$(EXEEXT): $(lextest_OBJECTS) $(lextest_DEPENDENCIES) $(EXTRA_lextest_DEPENDENCIES) 
 	@rm -f lextest$(EXEEXT)
 	$(AM_V_CXXLD)$(CXXLINK) $(lextest_OBJECTS) $(lextest_LDADD) $(LIBS)
@@ -796,6 +794,7 @@ distclean-compile:
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/introtest.Po@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/keycast.Po@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/keytest.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ldtest.Po@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/lextest.Po@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/listtest.Po@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/localetest.Po@am__quote@
diff --git a/tests/configtest.cpp b/tests/configtest.cpp
index d196f07..9e28355 100644
--- a/tests/configtest.cpp
+++ b/tests/configtest.cpp
@@ -2,7 +2,7 @@
  *
  *  configtest.cpp -	
  *
- * $Id: configtest.cpp 2833 2013-06-29 06:40:28Z chrislit $
+ * $Id: configtest.cpp 3515 2017-11-01 11:38:09Z scribe $
  *
  * Copyright 2001-2013 CrossWire Bible Society (http://www.crosswire.org)
  *	CrossWire Bible Society
@@ -31,7 +31,7 @@ int main(int argc, char **argv) {
     config["Section1"]["Entry1"] = "Value1";
     config["Section1"]["Entry2"] = "oops";
     config["Section1"]["Entry2"] = "Value2";
-    config.Save();
+    config.save();
     SWConfig config2("./test1.conf");
     std::cout << "Should be Value2: " << config2["Section1"]["Entry2"] << std::endl;
     return 0;
diff --git a/tests/cppunit/Makefile.in b/tests/cppunit/Makefile.in
index f5c8bfa..e4e285d 100644
--- a/tests/cppunit/Makefile.in
+++ b/tests/cppunit/Makefile.in
@@ -1,4 +1,4 @@
-# Makefile.in generated by automake 1.14.1 from Makefile.am.
+# Makefile.in generated by automake 1.13.4 from Makefile.am.
 # @configure_input@
 
 # Copyright (C) 1994-2013 Free Software Foundation, Inc.
@@ -528,9 +528,11 @@ target_vendor = @target_vendor@
 top_build_prefix = @top_build_prefix@
 top_builddir = @top_builddir@
 top_srcdir = @top_srcdir@
+with_bzip2 = @with_bzip2@
 with_conf = @with_conf@
 with_icu = @with_icu@
 with_icusword = @with_icusword@
+with_xz = @with_xz@
 with_zlib = @with_zlib@
 LDADD = $(top_builddir)/lib/libsword.la
 LibSword_SOURCES = main.cpp stringmgr_test.cpp swbuf_test.cpp url_test.cpp versekey_test.cpp 
diff --git a/tests/ldtest.cpp b/tests/ldtest.cpp
new file mode 100644
index 0000000..369984f
--- /dev/null
+++ b/tests/ldtest.cpp
@@ -0,0 +1,32 @@
+#include <swmodule.h>
+#include <swmgr.h>
+#include <iostream>
+#include <stdio.h>
+
+using namespace sword;
+using namespace std;
+
+int main(int argc, char **argv) {
+
+	if (argc < 2) {
+		fprintf(stderr, "usage: %s <lexdict_name>\n", *argv);
+		exit(-1);
+	}
+
+	SWMgr library;
+	SWModule *module = library.getModule(argv[1]);
+	if (!module) {
+		cerr << "\nCouldn't find module: " << argv[1] << "\n" << endl;
+		exit(-2);
+	}
+	int i = 0;
+	for ((*module) = TOP; !module->popError(); module->increment()) {
+		cout << module->getKeyText() << ": " << module->stripText() << "\n";
+		if (++i > 10) {
+			cout << "ERROR: more than 10 iterations. stopping.\n";
+			break;
+		}
+
+	}
+	return 0;
+}
diff --git a/tests/localetest.cpp b/tests/localetest.cpp
index 26f6b28..8f222b8 100644
--- a/tests/localetest.cpp
+++ b/tests/localetest.cpp
@@ -2,7 +2,7 @@
  *
  *  localetest.cpp -	
  *
- * $Id: localetest.cpp 3005 2014-01-09 04:06:11Z greg.hellings $
+ * $Id: localetest.cpp 3001 2014-01-03 19:23:42Z scribe $
  *
  * Copyright 2000-2013 CrossWire Bible Society (http://www.crosswire.org)
  *	CrossWire Bible Society
diff --git a/tests/osistest.cpp b/tests/osistest.cpp
index cfc09bb..77fda1a 100644
--- a/tests/osistest.cpp
+++ b/tests/osistest.cpp
@@ -2,7 +2,7 @@
  *
  *  osistest.cpp -	
  *
- * $Id: osistest.cpp 3185 2014-04-17 04:32:00Z greg.hellings $
+ * $Id: osistest.cpp 3548 2017-12-10 05:11:38Z scribe $
  *
  * Copyright 20122013 CrossWire Bible Society (http://www.crosswire.org)
  *	CrossWire Bible Society
@@ -76,6 +76,9 @@ int main(int argc, char **argv) {
 	module->setKey("Ps.3.1");
 	outputCurrentVerse(module);
 
+	module->setKey("Matt.2.6");
+	outputCurrentVerse(module);
+
 	module->setKey("Mark.1.14");
 	outputCurrentVerse(module);
 
diff --git a/tests/testsuite/CMakeLists.txt b/tests/testsuite/CMakeLists.txt
index 48dc01a..c695236 100644
--- a/tests/testsuite/CMakeLists.txt
+++ b/tests/testsuite/CMakeLists.txt
@@ -1,12 +1,12 @@
 #############################################################################
 # This file will actually be responsible for running the tests
-# 
+#
+
+FILE(WRITE "${CMAKE_CURRENT_BINARY_DIR}/sword.conf" "[Install]\nLocalePath=${CMAKE_CURRENT_SOURCE_DIR}/../../")
 
 ADD_CUSTOM_TARGET(
 	tests_configure
-	COMMAND cp ${CMAKE_CURRENT_SOURCE_DIR}/*.sh ${CMAKE_CURRENT_BINARY_DIR}
-	COMMAND cp ${CMAKE_CURRENT_SOURCE_DIR}/*.good ${CMAKE_CURRENT_BINARY_DIR}
-	COMMAND echo \"[Install]\\nLocalePath=${CMAKE_CURRENT_SOURCE_DIR}/../../\" > ${CMAKE_CURRENT_BINARY_DIR}/sword.conf
+	COMMAND cp "${CMAKE_CURRENT_SOURCE_DIR}/*.{sh,good,imp,txt,xml}" "${CMAKE_CURRENT_BINARY_DIR}"
 	DEPENDS ${test_PROGRAMS}
 	WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
 )
diff --git a/tests/testsuite/Makefile.am b/tests/testsuite/Makefile.am
index db26c79..33e0b22 100644
--- a/tests/testsuite/Makefile.am
+++ b/tests/testsuite/Makefile.am
@@ -1,6 +1,12 @@
-swtspdir = $(top_srcdir)/tests/testsuite
+all:
+	@echo
+	@echo to run tests type: ./runall.sh
+	@echo or make run
+	@echo
 
-EXTRA_DIST += $(swtspdir)/runall.sh
-EXTRA_DIST += $(swtspdir)/runtest.sh
-EXTRA_DIST += $(swtspdir)/verseparsing.good
-EXTRA_DIST += $(swtspdir)/verseparsing.sh
+run:
+	./runall.sh
+
+clean-local:
+	-rm -rf tmp
+	-rm -rf *.try
diff --git a/tests/testsuite/Makefile.in b/tests/testsuite/Makefile.in
new file mode 100644
index 0000000..aa8d9a1
--- /dev/null
+++ b/tests/testsuite/Makefile.in
@@ -0,0 +1,480 @@
+# Makefile.in generated by automake 1.13.4 from Makefile.am.
+# @configure_input@
+
+# Copyright (C) 1994-2013 Free Software Foundation, Inc.
+
+# This Makefile.in is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE.
+
+@SET_MAKE@
+VPATH = @srcdir@
+am__is_gnu_make = test -n '$(MAKEFILE_LIST)' && test -n '$(MAKELEVEL)'
+am__make_running_with_option = \
+  case $${target_option-} in \
+      ?) ;; \
+      *) echo "am__make_running_with_option: internal error: invalid" \
+              "target option '$${target_option-}' specified" >&2; \
+         exit 1;; \
+  esac; \
+  has_opt=no; \
+  sane_makeflags=$$MAKEFLAGS; \
+  if $(am__is_gnu_make); then \
+    sane_makeflags=$$MFLAGS; \
+  else \
+    case $$MAKEFLAGS in \
+      *\\[\ \	]*) \
+        bs=\\; \
+        sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \
+          | sed "s/$$bs$$bs[$$bs $$bs	]*//g"`;; \
+    esac; \
+  fi; \
+  skip_next=no; \
+  strip_trailopt () \
+  { \
+    flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \
+  }; \
+  for flg in $$sane_makeflags; do \
+    test $$skip_next = yes && { skip_next=no; continue; }; \
+    case $$flg in \
+      *=*|--*) continue;; \
+        -*I) strip_trailopt 'I'; skip_next=yes;; \
+      -*I?*) strip_trailopt 'I';; \
+        -*O) strip_trailopt 'O'; skip_next=yes;; \
+      -*O?*) strip_trailopt 'O';; \
+        -*l) strip_trailopt 'l'; skip_next=yes;; \
+      -*l?*) strip_trailopt 'l';; \
+      -[dEDm]) skip_next=yes;; \
+      -[JT]) skip_next=yes;; \
+    esac; \
+    case $$flg in \
+      *$$target_option*) has_opt=yes; break;; \
+    esac; \
+  done; \
+  test $$has_opt = yes
+am__make_dryrun = (target_option=n; $(am__make_running_with_option))
+am__make_keepgoing = (target_option=k; $(am__make_running_with_option))
+pkgdatadir = $(datadir)/@PACKAGE@
+pkgincludedir = $(includedir)/@PACKAGE@
+pkglibdir = $(libdir)/@PACKAGE@
+pkglibexecdir = $(libexecdir)/@PACKAGE@
+am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
+install_sh_DATA = $(install_sh) -c -m 644
+install_sh_PROGRAM = $(install_sh) -c
+install_sh_SCRIPT = $(install_sh) -c
+INSTALL_HEADER = $(INSTALL_DATA)
+transform = $(program_transform_name)
+NORMAL_INSTALL = :
+PRE_INSTALL = :
+POST_INSTALL = :
+NORMAL_UNINSTALL = :
+PRE_UNINSTALL = :
+POST_UNINSTALL = :
+build_triplet = @build@
+host_triplet = @host@
+target_triplet = @target@
+subdir = tests/testsuite
+DIST_COMMON = $(srcdir)/Makefile.in $(srcdir)/Makefile.am README
+ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
+am__aclocal_m4_deps = $(top_srcdir)/m4/acx_clucene.m4 \
+	$(top_srcdir)/m4/cppunit.m4 $(top_srcdir)/m4/libtool.m4 \
+	$(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \
+	$(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \
+	$(top_srcdir)/configure.ac
+am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
+	$(ACLOCAL_M4)
+mkinstalldirs = $(install_sh) -d
+CONFIG_HEADER = $(top_builddir)/include/config.h
+CONFIG_CLEAN_FILES =
+CONFIG_CLEAN_VPATH_FILES =
+AM_V_P = $(am__v_P_@AM_V@)
+am__v_P_ = $(am__v_P_@AM_DEFAULT_V@)
+am__v_P_0 = false
+am__v_P_1 = :
+AM_V_GEN = $(am__v_GEN_@AM_V@)
+am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@)
+am__v_GEN_0 = @echo "  GEN     " $@;
+am__v_GEN_1 = 
+AM_V_at = $(am__v_at_@AM_V@)
+am__v_at_ = $(am__v_at_@AM_DEFAULT_V@)
+am__v_at_0 = @
+am__v_at_1 = 
+SOURCES =
+DIST_SOURCES =
+am__can_run_installinfo = \
+  case $$AM_UPDATE_INFO_DIR in \
+    n|no|NO) false;; \
+    *) (install-info --version) >/dev/null 2>&1;; \
+  esac
+am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP)
+DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
+ACLOCAL = @ACLOCAL@
+AMTAR = @AMTAR@
+AM_CFLAGS = @AM_CFLAGS@
+AM_CXXFLAGS = @AM_CXXFLAGS@
+AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@
+AR = @AR@
+AS = @AS@
+AUTOCONF = @AUTOCONF@
+AUTOHEADER = @AUTOHEADER@
+AUTOMAKE = @AUTOMAKE@
+AWK = @AWK@
+CC = @CC@
+CCDEPMODE = @CCDEPMODE@
+CFLAGS = @CFLAGS@
+CLUCENE2_CFLAGS = @CLUCENE2_CFLAGS@
+CLUCENE2_LIBS = @CLUCENE2_LIBS@
+CLUCENE_CXXFLAGS = @CLUCENE_CXXFLAGS@
+CLUCENE_LIBS = @CLUCENE_LIBS@
+CPP = @CPP@
+CPPFLAGS = @CPPFLAGS@
+CPPUNIT_CFLAGS = @CPPUNIT_CFLAGS@
+CPPUNIT_CONFIG = @CPPUNIT_CONFIG@
+CPPUNIT_LIBS = @CPPUNIT_LIBS@
+CURL_CONFIG = @CURL_CONFIG@
+CURL_LIBS = @CURL_LIBS@
+CXX = @CXX@
+CXXCPP = @CXXCPP@
+CXXDEPMODE = @CXXDEPMODE@
+CXXFLAGS = @CXXFLAGS@
+CYGPATH_W = @CYGPATH_W@
+DEFS = @DEFS@
+DEPDIR = @DEPDIR@
+DLLTOOL = @DLLTOOL@
+DSYMUTIL = @DSYMUTIL@
+DUMPBIN = @DUMPBIN@
+ECHO_C = @ECHO_C@
+ECHO_N = @ECHO_N@
+ECHO_T = @ECHO_T@
+EGREP = @EGREP@
+EXEEXT = @EXEEXT@
+FGREP = @FGREP@
+GREP = @GREP@
+ICU_CONFIG = @ICU_CONFIG@
+ICU_IOLIBS = @ICU_IOLIBS@
+ICU_LIBS = @ICU_LIBS@
+INSTALL = @INSTALL@
+INSTALL_DATA = @INSTALL_DATA@
+INSTALL_PROGRAM = @INSTALL_PROGRAM@
+INSTALL_SCRIPT = @INSTALL_SCRIPT@
+INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
+LD = @LD@
+LDFLAGS = @LDFLAGS@
+LIBOBJS = @LIBOBJS@
+LIBS = @LIBS@
+LIBTOOL = @LIBTOOL@
+LIPO = @LIPO@
+LN_S = @LN_S@
+LTLIBOBJS = @LTLIBOBJS@
+MAINT = @MAINT@
+MAKEINFO = @MAKEINFO@
+MANIFEST_TOOL = @MANIFEST_TOOL@
+MKDIR_P = @MKDIR_P@
+NM = @NM@
+NMEDIT = @NMEDIT@
+OBJDUMP = @OBJDUMP@
+OBJEXT = @OBJEXT@
+OTOOL = @OTOOL@
+OTOOL64 = @OTOOL64@
+PACKAGE = @PACKAGE@
+PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
+PACKAGE_NAME = @PACKAGE_NAME@
+PACKAGE_STRING = @PACKAGE_STRING@
+PACKAGE_TARNAME = @PACKAGE_TARNAME@
+PACKAGE_URL = @PACKAGE_URL@
+PACKAGE_VERSION = @PACKAGE_VERSION@
+PATH_SEPARATOR = @PATH_SEPARATOR@
+PKG_CONFIG = @PKG_CONFIG@
+PKG_CONFIG_LIBDIR = @PKG_CONFIG_LIBDIR@
+PKG_CONFIG_PATH = @PKG_CONFIG_PATH@
+RANLIB = @RANLIB@
+SED = @SED@
+SET_MAKE = @SET_MAKE@
+SHELL = @SHELL@
+STRIP = @STRIP@
+SWORD_VERSION_MAJOR = @SWORD_VERSION_MAJOR@
+SWORD_VERSION_MICRO = @SWORD_VERSION_MICRO@
+SWORD_VERSION_MINOR = @SWORD_VERSION_MINOR@
+SWORD_VERSION_NANO = @SWORD_VERSION_NANO@
+SWORD_VERSION_NUM = @SWORD_VERSION_NUM@
+SWORD_VERSION_STR = @SWORD_VERSION_STR@
+VERSION = @VERSION@
+abs_builddir = @abs_builddir@
+abs_srcdir = @abs_srcdir@
+abs_top_builddir = @abs_top_builddir@
+abs_top_srcdir = @abs_top_srcdir@
+ac_ct_AR = @ac_ct_AR@
+ac_ct_CC = @ac_ct_CC@
+ac_ct_CXX = @ac_ct_CXX@
+ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
+am__include = @am__include@
+am__leading_dot = @am__leading_dot@
+am__quote = @am__quote@
+am__tar = @am__tar@
+am__untar = @am__untar@
+bindir = @bindir@
+build = @build@
+build_alias = @build_alias@
+build_cpu = @build_cpu@
+build_os = @build_os@
+build_vendor = @build_vendor@
+builddir = @builddir@
+datadir = @datadir@
+datarootdir = @datarootdir@
+dir_confdef = @dir_confdef@
+docdir = @docdir@
+dvidir = @dvidir@
+enable_debug = @enable_debug@
+enable_profile = @enable_profile@
+exec_prefix = @exec_prefix@
+host = @host@
+host_alias = @host_alias@
+host_cpu = @host_cpu@
+host_os = @host_os@
+host_vendor = @host_vendor@
+htmldir = @htmldir@
+includedir = @includedir@
+infodir = @infodir@
+install_sh = @install_sh@
+libdir = @libdir@
+libexecdir = @libexecdir@
+localedir = @localedir@
+localstatedir = @localstatedir@
+mandir = @mandir@
+mkdir_p = @mkdir_p@
+oldincludedir = @oldincludedir@
+pdfdir = @pdfdir@
+prefix = @prefix@
+program_transform_name = @program_transform_name@
+psdir = @psdir@
+sbindir = @sbindir@
+sharedstatedir = @sharedstatedir@
+srcdir = @srcdir@
+sysconfdir = @sysconfdir@
+target = @target@
+target_alias = @target_alias@
+target_cpu = @target_cpu@
+target_mingw32 = @target_mingw32@
+target_os = @target_os@
+target_system = @target_system@
+target_vendor = @target_vendor@
+top_build_prefix = @top_build_prefix@
+top_builddir = @top_builddir@
+top_srcdir = @top_srcdir@
+with_bzip2 = @with_bzip2@
+with_conf = @with_conf@
+with_icu = @with_icu@
+with_icusword = @with_icusword@
+with_xz = @with_xz@
+with_zlib = @with_zlib@
+all: all-am
+
+.SUFFIXES:
+$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am  $(am__configure_deps)
+	@for dep in $?; do \
+	  case '$(am__configure_deps)' in \
+	    *$$dep*) \
+	      ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
+	        && { if test -f $@; then exit 0; else break; fi; }; \
+	      exit 1;; \
+	  esac; \
+	done; \
+	echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign tests/testsuite/Makefile'; \
+	$(am__cd) $(top_srcdir) && \
+	  $(AUTOMAKE) --foreign tests/testsuite/Makefile
+.PRECIOUS: Makefile
+Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
+	@case '$?' in \
+	  *config.status*) \
+	    cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
+	  *) \
+	    echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
+	    cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
+	esac;
+
+$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
+	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+
+$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps)
+	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps)
+	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(am__aclocal_m4_deps):
+
+mostlyclean-libtool:
+	-rm -f *.lo
+
+clean-libtool:
+	-rm -rf .libs _libs
+tags TAGS:
+
+ctags CTAGS:
+
+cscope cscopelist:
+
+
+distdir: $(DISTFILES)
+	@srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+	topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+	list='$(DISTFILES)'; \
+	  dist_files=`for file in $$list; do echo $$file; done | \
+	  sed -e "s|^$$srcdirstrip/||;t" \
+	      -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
+	case $$dist_files in \
+	  */*) $(MKDIR_P) `echo "$$dist_files" | \
+			   sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
+			   sort -u` ;; \
+	esac; \
+	for file in $$dist_files; do \
+	  if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
+	  if test -d $$d/$$file; then \
+	    dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
+	    if test -d "$(distdir)/$$file"; then \
+	      find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+	    fi; \
+	    if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
+	      cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
+	      find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+	    fi; \
+	    cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
+	  else \
+	    test -f "$(distdir)/$$file" \
+	    || cp -p $$d/$$file "$(distdir)/$$file" \
+	    || exit 1; \
+	  fi; \
+	done
+check-am: all-am
+check: check-am
+all-am: Makefile
+installdirs:
+install: install-am
+install-exec: install-exec-am
+install-data: install-data-am
+uninstall: uninstall-am
+
+install-am: all-am
+	@$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
+
+installcheck: installcheck-am
+install-strip:
+	if test -z '$(STRIP)'; then \
+	  $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+	    install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+	      install; \
+	else \
+	  $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+	    install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+	    "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+	fi
+mostlyclean-generic:
+
+clean-generic:
+
+distclean-generic:
+	-test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
+	-test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
+
+maintainer-clean-generic:
+	@echo "This command is intended for maintainers to use"
+	@echo "it deletes files that may require special tools to rebuild."
+clean: clean-am
+
+clean-am: clean-generic clean-libtool clean-local mostlyclean-am
+
+distclean: distclean-am
+	-rm -f Makefile
+distclean-am: clean-am distclean-generic
+
+dvi: dvi-am
+
+dvi-am:
+
+html: html-am
+
+html-am:
+
+info: info-am
+
+info-am:
+
+install-data-am:
+
+install-dvi: install-dvi-am
+
+install-dvi-am:
+
+install-exec-am:
+
+install-html: install-html-am
+
+install-html-am:
+
+install-info: install-info-am
+
+install-info-am:
+
+install-man:
+
+install-pdf: install-pdf-am
+
+install-pdf-am:
+
+install-ps: install-ps-am
+
+install-ps-am:
+
+installcheck-am:
+
+maintainer-clean: maintainer-clean-am
+	-rm -f Makefile
+maintainer-clean-am: distclean-am maintainer-clean-generic
+
+mostlyclean: mostlyclean-am
+
+mostlyclean-am: mostlyclean-generic mostlyclean-libtool
+
+pdf: pdf-am
+
+pdf-am:
+
+ps: ps-am
+
+ps-am:
+
+uninstall-am:
+
+.MAKE: install-am install-strip
+
+.PHONY: all all-am check check-am clean clean-generic clean-libtool \
+	clean-local cscopelist-am ctags-am distclean distclean-generic \
+	distclean-libtool distdir dvi dvi-am html html-am info info-am \
+	install install-am install-data install-data-am install-dvi \
+	install-dvi-am install-exec install-exec-am install-html \
+	install-html-am install-info install-info-am install-man \
+	install-pdf install-pdf-am install-ps install-ps-am \
+	install-strip installcheck installcheck-am installdirs \
+	maintainer-clean maintainer-clean-generic mostlyclean \
+	mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \
+	tags-am uninstall uninstall-am
+
+all:
+	@echo
+	@echo to run tests type: ./runall.sh
+	@echo or make run
+	@echo
+
+run:
+	./runall.sh
+
+clean-local:
+	-rm -rf tmp
+	-rm -rf *.try
+
+# Tell versions [3.59,3.63) of GNU make to not export all variables.
+# Otherwise a system limit (for SysV at least) may be exceeded.
+.NOEXPORT:
diff --git a/tests/testsuite/README b/tests/testsuite/README
new file mode 100644
index 0000000..a07a0c9
--- /dev/null
+++ b/tests/testsuite/README
@@ -0,0 +1,24 @@
+All tests are represented by a <test>.sh / <test>.good file pair.
+
+To run a test:
+
+./runtest.sh test
+
+This will run test.sh > test.try and compare test.try to test.good and report any differences (failures)
+
+To run all tests:
+
+./runall.sh
+
+===================================
+
+To create a new test, do whatever you want in your new mytest.sh file,
+call, executables, do anything you'd like and output results which
+matter for a good test.
+
+When all is running fine, output your .good file with:
+
+./mytest.sh > mytest.good
+
+That's it.  Simple right?  :)  So make more unit tests!
+
diff --git a/tests/testsuite/UTF-8-test.txt b/tests/testsuite/UTF-8-test.txt
new file mode 100644
index 0000000..78c859c
--- /dev/null
+++ b/tests/testsuite/UTF-8-test.txt
@@ -0,0 +1,300 @@
+UTF-8 decoder capability and stress test
+----------------------------------------
+
+Markus Kuhn <http://www.cl.cam.ac.uk/~mgk25/> - 2015-08-28 - CC BY 4.0
+
+This test file can help you examine, how your UTF-8 decoder handles
+various types of correct, malformed, or otherwise interesting UTF-8
+sequences. This file is not meant to be a conformance test. It does
+not prescribe any particular outcome. Therefore, there is no way to
+"pass" or "fail" this test file, even though the text does suggest a
+preferable decoder behaviour at some places. Its aim is, instead, to
+help you think about, and test, the behaviour of your UTF-8 decoder on a
+systematic collection of unusual inputs. Experience so far suggests
+that most first-time authors of UTF-8 decoders find at least one
+serious problem in their decoder using this file.
+
+The test lines below cover boundary conditions, malformed UTF-8
+sequences, as well as correctly encoded UTF-8 sequences of Unicode code
+points that should never occur in a correct UTF-8 file.
+
+According to ISO 10646-1:2000, sections D.7 and 2.3c, a device
+receiving UTF-8 shall interpret a "malformed sequence in the same way
+that it interprets a character that is outside the adopted subset" and
+"characters that are not within the adopted subset shall be indicated
+to the user" by a receiving device. One commonly used approach in
+UTF-8 decoders is to replace any malformed UTF-8 sequence by a
+replacement character (U+FFFD), which looks a bit like an inverted
+question mark, or a similar symbol. It might be a good idea to
+visually distinguish a malformed UTF-8 sequence from a correctly
+encoded Unicode character that is just not available in the current
+font but otherwise fully legal, even though ISO 10646-1 doesn't
+mandate this. In any case, just ignoring malformed sequences or
+unavailable characters does not conform to ISO 10646, will make
+debugging more difficult, and can lead to user confusion.
+
+Please check, whether a malformed UTF-8 sequence is (1) represented at
+all, (2) represented by exactly one single replacement character (or
+equivalent signal), and (3) the following quotation mark after an
+illegal UTF-8 sequence is correctly displayed, i.e. proper
+resynchronization takes place immediately after any malformed
+sequence. This file says "THE END" in the last line, so if you don't
+see that, your decoder crashed somehow before, which should always be
+cause for concern.
+
+All lines in this file are exactly 79 characters long (plus the line
+feed). In addition, all lines end with "|", except for the two test
+lines 2.1.1 and 2.2.1, which contain non-printable ASCII controls
+U+0000 and U+007F. If you display this file with a fixed-width font,
+these "|" characters should all line up in column 79 (right margin).
+This allows you to test quickly, whether your UTF-8 decoder finds the
+correct number of characters in every line, that is whether each
+malformed sequences is replaced by a single replacement character.
+
+Note that, as an alternative to the notion of malformed sequence used
+here, it is also a perfectly acceptable (and in some situations even
+preferable) solution to represent each individual byte of a malformed
+sequence with a replacement character. If you follow this strategy in
+your decoder, then please ignore the "|" column.
+
+
+Here come the tests:                                                          |
+                                                                              |
+1  Some correct UTF-8 text                                                    |
+                                                                              |
+You should see the Greek word 'kosme':       "κόσμε"                          |
+                                                                              |
+2  Boundary condition test cases                                              |
+                                                                              |
+2.1  First possible sequence of a certain length                              |
+                                                                              |
+2.1.1  1 byte  (U-00000000):        "^@" // SWORD: removed. we don't support null mid-string, <- that's a literal <caret at>
+2.1.2  2 bytes (U-00000080):        ""                                       |
+2.1.3  3 bytes (U-00000800):        "ࠀ"                                       |
+2.1.4  4 bytes (U-00010000):        "𐀀"                                       |
+2.1.5  5 bytes (U-00200000):        "�����"                                       |
+2.1.6  6 bytes (U-04000000):        "������"                                       |
+                                                                              |
+2.2  Last possible sequence of a certain length                               |
+                                                                              |
+2.2.1  1 byte  (U-0000007F):        ""                                        
+2.2.2  2 bytes (U-000007FF):        "߿"                                       |
+2.2.3  3 bytes (U-0000FFFF):        ""                                       |
+2.2.4  4 bytes (U-001FFFFF):        "����"                                       |
+2.2.5  5 bytes (U-03FFFFFF):        "�����"                                       |
+2.2.6  6 bytes (U-7FFFFFFF):        "������"                                       |
+                                                                              |
+2.3  Other boundary conditions                                                |
+                                                                              |
+2.3.1  U-0000D7FF = ed 9f bf = "퟿"                                            |
+2.3.2  U-0000E000 = ee 80 80 = ""                                            |
+2.3.3  U-0000FFFD = ef bf bd = "�"                                            |
+2.3.4  U-0010FFFF = f4 8f bf bf = "􏿿"                                         |
+2.3.5  U-00110000 = f4 90 80 80 = "����"                                         |
+                                                                              |
+3  Malformed sequences                                                        |
+                                                                              |
+3.1  Unexpected continuation bytes                                            |
+                                                                              |
+Each unexpected continuation byte should be separately signalled as a         |
+malformed sequence of its own.                                                |
+                                                                              |
+3.1.1  First continuation byte 0x80: "�"                                      |
+3.1.2  Last  continuation byte 0xbf: "�"                                      |
+                                                                              |
+3.1.3  2 continuation bytes: "��"                                             |
+3.1.4  3 continuation bytes: "���"                                            |
+3.1.5  4 continuation bytes: "����"                                           |
+3.1.6  5 continuation bytes: "�����"                                          |
+3.1.7  6 continuation bytes: "������"                                         |
+3.1.8  7 continuation bytes: "�������"                                        |
+                                                                              |
+3.1.9  Sequence of all 64 possible continuation bytes (0x80-0xbf):            |
+                                                                              |
+   "����������������                                                          |
+    ����������������                                                          |
+    ����������������                                                          |
+    ����������������"                                                         |
+                                                                              |
+3.2  Lonely start characters                                                  |
+                                                                              |
+3.2.1  All 32 first bytes of 2-byte sequences (0xc0-0xdf),                    |
+       each followed by a space character:                                    |
+                                                                              |
+   "� � � � � � � � � � � � � � � �                                           |
+    � � � � � � � � � � � � � � � � "                                         |
+                                                                              |
+3.2.2  All 16 first bytes of 3-byte sequences (0xe0-0xef),                    |
+       each followed by a space character:                                    |
+                                                                              |
+   "� � � � � � � � � � � � � � � � "                                         |
+                                                                              |
+3.2.3  All 8 first bytes of 4-byte sequences (0xf0-0xf7),                     |
+       each followed by a space character:                                    |
+                                                                              |
+   "� � � � � � � � "                                                         |
+                                                                              |
+3.2.4  All 4 first bytes of 5-byte sequences (0xf8-0xfb),                     |
+       each followed by a space character:                                    |
+                                                                              |
+   "� � � � "                                                                 |
+                                                                              |
+3.2.5  All 2 first bytes of 6-byte sequences (0xfc-0xfd),                     |
+       each followed by a space character:                                    |
+                                                                              |
+   "� � "                                                                     |
+                                                                              |
+3.3  Sequences with last continuation byte missing                            |
+                                                                              |
+All bytes of an incomplete sequence should be signalled as a single           |
+malformed sequence, i.e., you should see only a single replacement            |
+character in each of the next 10 tests. (Characters as in section 2)          |
+                                                                              |
+3.3.1  2-byte sequence with last byte missing (U+0000):     "�"               |
+3.3.2  3-byte sequence with last byte missing (U+0000):     "��"               |
+3.3.3  4-byte sequence with last byte missing (U+0000):     "���"               |
+3.3.4  5-byte sequence with last byte missing (U+0000):     "����"               |
+3.3.5  6-byte sequence with last byte missing (U+0000):     "�����"               |
+3.3.6  2-byte sequence with last byte missing (U-000007FF): "�"               |
+3.3.7  3-byte sequence with last byte missing (U-0000FFFF): "�"               |
+3.3.8  4-byte sequence with last byte missing (U-001FFFFF): "���"               |
+3.3.9  5-byte sequence with last byte missing (U-03FFFFFF): "����"               |
+3.3.10 6-byte sequence with last byte missing (U-7FFFFFFF): "�����"               |
+                                                                              |
+3.4  Concatenation of incomplete sequences                                    |
+                                                                              |
+All the 10 sequences of 3.3 concatenated, you should see 10 malformed         |
+sequences being signalled:                                                    |
+                                                                              |
+   "�����������������������������"                                                               |
+                                                                              |
+3.5  Impossible bytes                                                         |
+                                                                              |
+The following two bytes cannot appear in a correct UTF-8 string               |
+                                                                              |
+3.5.1  fe = "�"                                                               |
+3.5.2  ff = "�"                                                               |
+3.5.3  fe fe ff ff = "����"                                                   |
+                                                                              |
+4  Overlong sequences                                                         |
+                                                                              |
+The following sequences are not malformed according to the letter of          |
+the Unicode 2.0 standard. However, they are longer then necessary and         |
+a correct UTF-8 encoder is not allowed to produce them. A "safe UTF-8         |
+decoder" should reject them just like malformed sequences for two             |
+reasons: (1) It helps to debug applications if overlong sequences are         |
+not treated as valid representations of characters, because this helps        |
+to spot problems more quickly. (2) Overlong sequences provide                 |
+alternative representations of characters, that could maliciously be          |
+used to bypass filters that check only for ASCII characters. For              |
+instance, a 2-byte encoded line feed (LF) would not be caught by a            |
+line counter that counts only 0x0a bytes, but it would still be               |
+processed as a line feed by an unsafe UTF-8 decoder later in the              |
+pipeline. From a security point of view, ASCII compatibility of UTF-8         |
+sequences means also, that ASCII characters are *only* allowed to be          |
+represented by ASCII bytes in the range 0x00-0x7f. To ensure this             |
+aspect of ASCII compatibility, use only "safe UTF-8 decoders" that            |
+reject overlong UTF-8 sequences for which a shorter encoding exists.          |
+                                                                              |
+4.1  Examples of an overlong ASCII character                                  |
+                                                                              |
+With a safe UTF-8 decoder, all of the following five overlong                 |
+representations of the ASCII character slash ("/") should be rejected         |
+like a malformed UTF-8 sequence, for instance by substituting it with         |
+a replacement character. If you see a slash below, you do not have a          |
+safe UTF-8 decoder!                                                           |
+                                                                              |
+4.1.1 U+002F = c0 af             = "��"                                        |
+4.1.2 U+002F = e0 80 af          = "���"                                        |
+4.1.3 U+002F = f0 80 80 af       = "����"                                        |
+4.1.4 U+002F = f8 80 80 80 af    = "�����"                                        |
+4.1.5 U+002F = fc 80 80 80 80 af = "������"                                        |
+                                                                              |
+4.2  Maximum overlong sequences                                               |
+                                                                              |
+Below you see the highest Unicode value that is still resulting in an         |
+overlong sequence if represented with the given number of bytes. This         |
+is a boundary test for safe UTF-8 decoders. All five characters should        |
+be rejected like malformed UTF-8 sequences.                                   |
+                                                                              |
+4.2.1  U-0000007F = c1 bf             = "��"                                   |
+4.2.2  U-000007FF = e0 9f bf          = "���"                                   |
+4.2.3  U-0000FFFF = f0 8f bf bf       = "����"                                   |
+4.2.4  U-001FFFFF = f8 87 bf bf bf    = "�����"                                   |
+4.2.5  U-03FFFFFF = fc 83 bf bf bf bf = "������"                                   |
+                                                                              |
+4.3  Overlong representation of the NUL character                             |
+                                                                              |
+The following five sequences should also be rejected like malformed           |
+UTF-8 sequences and should not be treated like the ASCII NUL                  |
+character.                                                                    |
+                                                                              |
+4.3.1  U+0000 = c0 80             = "��"                                       |
+4.3.2  U+0000 = e0 80 80          = "���"                                       |
+4.3.3  U+0000 = f0 80 80 80       = "����"                                       |
+4.3.4  U+0000 = f8 80 80 80 80    = "�����"                                       |
+4.3.5  U+0000 = fc 80 80 80 80 80 = "������"                                       |
+                                                                              |
+5  Illegal code positions                                                     |
+                                                                              |
+The following UTF-8 sequences should be rejected like malformed               |
+sequences, because they never represent valid ISO 10646 characters and        |
+a UTF-8 decoder that accepts them might introduce security problems           |
+comparable to overlong UTF-8 sequences.                                       |
+                                                                              |
+5.1 Single UTF-16 surrogates                                                  |
+                                                                              |
+5.1.1  U+D800 = ed a0 80 = "���"                                                |
+5.1.2  U+DB7F = ed ad bf = "���"                                                |
+5.1.3  U+DB80 = ed ae 80 = "���"                                                |
+5.1.4  U+DBFF = ed af bf = "���"                                                |
+5.1.5  U+DC00 = ed b0 80 = "���"                                                |
+5.1.6  U+DF80 = ed be 80 = "���"                                                |
+5.1.7  U+DFFF = ed bf bf = "���"                                                |
+                                                                              |
+5.2 Paired UTF-16 surrogates                                                  |
+                                                                              |
+5.2.1  U+D800 U+DC00 = ed a0 80 ed b0 80 = "������"                               |
+5.2.2  U+D800 U+DFFF = ed a0 80 ed bf bf = "������"                               |
+5.2.3  U+DB7F U+DC00 = ed ad bf ed b0 80 = "������"                               |
+5.2.4  U+DB7F U+DFFF = ed ad bf ed bf bf = "������"                               |
+5.2.5  U+DB80 U+DC00 = ed ae 80 ed b0 80 = "������"                               |
+5.2.6  U+DB80 U+DFFF = ed ae 80 ed bf bf = "������"                               |
+5.2.7  U+DBFF U+DC00 = ed af bf ed b0 80 = "������"                               |
+5.2.8  U+DBFF U+DFFF = ed af bf ed bf bf = "������"                               |
+                                                                              |
+5.3 Noncharacter code positions                                               |
+                                                                              |
+The following "noncharacters" are "reserved for internal use" by              |
+applications, and according to older versions of the Unicode Standard         |
+"should never be interchanged". Unicode Corrigendum #9 dropped the            |
+latter restriction. Nevertheless, their presence in incoming UTF-8 data       |
+can remain a potential security risk, depending on what use is made of        |
+these codes subsequently. Examples of such internal use:                      |
+                                                                              |
+ - Some file APIs with 16-bit characters may use the integer value -1         |
+   = U+FFFF to signal an end-of-file (EOF) or error condition.                |
+                                                                              |
+ - In some UTF-16 receivers, code point U+FFFE might trigger a                |
+   byte-swap operation (to convert between UTF-16LE and UTF-16BE).            |
+                                                                              |
+With such internal use of noncharacters, it may be desirable and safer        |
+to block those code points in UTF-8 decoders, as they should never            |
+occur legitimately in incoming UTF-8 data, and could trigger unsafe           |
+behaviour in subsequent processing.                                           |
+                                                                              |
+Particularly problematic noncharacters in 16-bit applications:                |
+                                                                              |
+5.3.1  U+FFFE = ef bf be = ""                                                |
+5.3.2  U+FFFF = ef bf bf = ""                                                |
+                                                                              |
+Other noncharacters:                                                          |
+                                                                              |
+5.3.3  U+FDD0 .. U+FDEF = "﷐﷑﷒﷓﷔﷕﷖﷗﷘﷙﷚﷛﷜﷝﷞﷟﷠﷡﷢﷣﷤﷥﷦﷧﷨﷩﷪﷫﷬﷭﷮﷯"|
+                                                                              |
+5.3.4  U+nFFFE U+nFFFF (for n = 1..10)                                        |
+                                                                              |
+       "🿾🿿𯿾𯿿𿿾𿿿񏿾񏿿񟿾񟿿񯿾񯿿񿿾񿿿򏿾򏿿                                    |
+        򟿾򟿿򯿾򯿿򿿾򿿿󏿾󏿿󟿾󟿿󯿾󯿿󿿾󿿿􏿾􏿿"                                   |
+                                                                              |
+THE END                                                                       |
diff --git a/tests/testsuite/gbsReference.imp b/tests/testsuite/gbsReference.imp
new file mode 100644
index 0000000..ab5bca1
--- /dev/null
+++ b/tests/testsuite/gbsReference.imp
@@ -0,0 +1,42 @@
+$$$Chapter 1
+Text of chapter 1.
+$$$/Chapter 2
+Text
+of chapter 2.
+$$$/Chapter 3/
+Text
+of
+chapter
+3.
+$$$Chapter 4/
+Text of chapter 4.
+$$$/Chapter 4/Section 1
+Text of section 1 in chapter 4.
+$$$Chapter 5
+Text of chapter 5.
+$$$/Chapter 5/Section 1/
+Text of section 1 in chapter 5.
+$$$Chapter 5/Section 2/
+Text of section 2 in chapter 5.
+$$$Chapter 6
+Text of chapter 6.
+$$$Chapter 6/Section 1
+Text of section 1 in chapter 6.
+$$$Chapter 6/Section 2
+Text of section 2 in chapter 6.
+$$$Chapter 6/Section 3
+Text of section 3 in chapter 6.
+$$$Chapter 7
+Text of chapter 7.
+$$$Chapter 7/Section 1
+Text of section 1 in chapter 7.
+$$$Chapter 7/Section 1/Subsection 1
+Text of subsection 1 in section 1 of chapter 7.
+$$$Chapter 7/Section 1/Subsection 1/Paragraph 1
+Text of paragraph 1 in subsection 1 of section 1 in chapter 7.
+$$$Chapter 7/Section 1/Subsection 1/Paragraph 1/Sentence 1
+Text of sentence 1 in paragraph 1 of subsection 1 in section 1 of chapter 7.
+$$$Chapter 8
+Text of chapter 8.
+
+
diff --git a/tests/testsuite/gbs_basic.good b/tests/testsuite/gbs_basic.good
new file mode 100644
index 0000000..9b77976
--- /dev/null
+++ b/tests/testsuite/gbs_basic.good
@@ -0,0 +1,66 @@
+Chapter 1
+/Chapter 2
+/Chapter 3/
+Chapter 4/
+/Chapter 4/Section 1
+Chapter 5
+/Chapter 5/Section 1/
+Chapter 5/Section 2/
+Chapter 6
+Chapter 6/Section 1
+Chapter 6/Section 2
+Chapter 6/Section 3
+Chapter 7
+Chapter 7/Section 1
+Chapter 7/Section 1/Subsection 1
+Chapter 7/Section 1/Subsection 1/Paragraph 1
+Chapter 7/Section 1/Subsection 1/Paragraph 1/Sentence 1
+Chapter 8
+
+-- Plain output
+/Chapter 7: Text of chapter 7.
+
+-- RTF output
+{\rtf1\ansi{\fonttbl{\f0\froman\fcharset0\fprq2 Times New Roman;}{\f1\fdecor\fprq2 Gentium;}{\f7\froman\fcharset2\fprq2 Symbol;}}/Chapter 8: {\f1 Text of chapter 8.}\par 
+}
+
+-- imp dump
+$$$
+
+$$$/Chapter 1
+Text of chapter 1.
+$$$/Chapter 2
+Text of chapter 2.
+$$$/Chapter 3
+Text of chapter 3.
+$$$/Chapter 4
+Text of chapter 4.
+$$$/Chapter 4/Section 1
+Text of section 1 in chapter 4.
+$$$/Chapter 5
+Text of chapter 5.
+$$$/Chapter 5/Section 1
+Text of section 1 in chapter 5.
+$$$/Chapter 5/Section 2
+Text of section 2 in chapter 5.
+$$$/Chapter 6
+Text of chapter 6.
+$$$/Chapter 6/Section 1
+Text of section 1 in chapter 6.
+$$$/Chapter 6/Section 2
+Text of section 2 in chapter 6.
+$$$/Chapter 6/Section 3
+Text of section 3 in chapter 6.
+$$$/Chapter 7
+Text of chapter 7.
+$$$/Chapter 7/Section 1
+Text of section 1 in chapter 7.
+$$$/Chapter 7/Section 1/Subsection 1
+Text of subsection 1 in section 1 of chapter 7.
+$$$/Chapter 7/Section 1/Subsection 1/Paragraph 1
+Text of paragraph 1 in subsection 1 of section 1 in chapter 7.
+$$$/Chapter 7/Section 1/Subsection 1/Paragraph 1/Sentence 1
+Text of sentence 1 in paragraph 1 of subsection 1 in section 1 of chapter 7.
+$$$/Chapter 8
+Text of chapter 8.
+
diff --git a/tests/testsuite/gbs_basic.sh b/tests/testsuite/gbs_basic.sh
new file mode 100755
index 0000000..689e87d
--- /dev/null
+++ b/tests/testsuite/gbs_basic.sh
@@ -0,0 +1,30 @@
+#!/bin/sh
+
+rm -rf tmp/gbs_basic/
+mkdir -p tmp/gbs_basic/mods.d
+mkdir -p tmp/gbs_basic/modules
+
+cat > tmp/gbs_basic/mods.d/gbsreference.conf <<!
+[GBSReference]
+DataPath=./modules/gbsreference
+ModDrv=RawGenBook
+Encoding=UTF-8
+SourceType=OSIS
+Lang=en
+Feature=StrongsNumbers
+!
+
+../../utilities/imp2gbs gbsReference.imp -o tmp/gbs_basic/modules/gbsreference 2>&1 | grep -v \$Rev
+
+cd tmp/gbs_basic
+#../../../gbstest GBSReference
+
+echo
+echo "-- Plain output"
+../../../../utilities/diatheke/diatheke -b GBSReference -f plain -k "Chapter 7" | grep -v GBSReference
+echo
+echo "-- RTF output"
+../../../../utilities/diatheke/diatheke -b GBSReference -f RTF -k "Chapter 8" | grep -v GBSReference
+echo
+echo "-- imp dump"
+../../../../utilities/mod2imp GBSReference
diff --git a/tests/testsuite/greekaccents.good b/tests/testsuite/greekaccents.good
new file mode 100644
index 0000000..a39dc3b
--- /dev/null
+++ b/tests/testsuite/greekaccents.good
@@ -0,0 +1,7 @@
+Και καθως Μωυσης υψωσεν τον οφιν εν τη ερημω, ουτως υψωθηναι δει τον υιον του ανθρωπου,
+ινα πας ο πιστευων ⸂εν αυτω⸃ ⸆ εχη ζωην αιωνιον.
+ουτως γαρ ηγαπησεν ο θεος τον κοσμον, ωστε τον υιον ⸆ τον μονογενη εδωκεν, ινα πας ο πιστευων εις αυτον μη αποληται αλλ εχη ζωην αιωνιον.
+ου γαρ απεστειλεν ο θεος τον υιον ⸆ εις τον κοσμον ινα κρινη τον κοσμον, αλλ ινα σωθη ο κοσμος δι αυτου.
+ο πιστευων εις αυτον ου κρινεται· ο °δε μη πιστευων ηδη κεκριται, οτι μη πεπιστευκεν εις το ονομα του μονογενους υιου του θεου.
+αυτη δε εστιν η κρισις οτι °το φως εληλυθεν εις τον κοσμον και ⸉ηγαπησαν οι ανθρωποι μαλλον το σκοτος⸊ η το φως· ην γαρ ⸉¹αυτων πονηρα⸊ τα εργα.
+
diff --git a/tests/testsuite/greekaccents.sh b/tests/testsuite/greekaccents.sh
new file mode 100755
index 0000000..f0def67
--- /dev/null
+++ b/tests/testsuite/greekaccents.sh
@@ -0,0 +1,8 @@
+#/bin/sh
+
+# there is an iteration value as the last parameter and can be used
+# for testing speed. Set to 999999 my results on my Dell Precision 5510
+# real	0m8.952s
+# user	0m8.939s
+# sys	0m0.004s
+../utf8norm -ga 999 < greekaccents.txt
diff --git a/tests/testsuite/greekaccents.txt b/tests/testsuite/greekaccents.txt
new file mode 100644
index 0000000..e8b3de8
--- /dev/null
+++ b/tests/testsuite/greekaccents.txt
@@ -0,0 +1,7 @@
+Καὶ καθὼς Μωϋσῆς ὕψωσεν τὸν ὄφιν ἐν τῇ ἐρήμῳ, οὕτως ὑψωθῆναι δεῖ τὸν υἱὸν τοῦ ἀνθρώπου,
+ἵνα πᾶς ὁ πιστεύων ⸂ἐν αὐτῷ⸃ ⸆ ἔχῃ ζωὴν αἰώνιον.
+οὕτως γὰρ ἠγάπησεν ὁ θεὸς τὸν κόσμον, ὥστε τὸν υἱὸν ⸆ τὸν μονογενῆ ἔδωκεν, ἵνα πᾶς ὁ πιστεύων εἰς αὐτὸν μὴ ἀπόληται ἀλλ᾿ ἔχῃ ζωὴν αἰώνιον.
+οὐ γὰρ ἀπέστειλεν ὁ θεὸς τὸν υἱὸν ⸆ εἰς τὸν κόσμον ἵνα κρίνῃ τὸν κόσμον, ἀλλ᾿ ἵνα σωθῇ ὁ κόσμος δι᾿ αὐτοῦ.
+ὁ πιστεύων εἰς αὐτὸν οὐ κρίνεται· ὁ °δὲ μὴ πιστεύων ἤδη κέκριται, ὅτι μὴ πεπίστευκεν εἰς τὸ ὄνομα τοῦ μονογενοῦς υἱοῦ τοῦ θεοῦ.
+αὕτη δέ ἐστιν ἡ κρίσις ὅτι °τὸ φῶς ἐλήλυθεν εἰς τὸν κόσμον καὶ ⸉ἠγάπησαν οἱ ἄνθρωποι μᾶλλον τὸ σκότος⸊ ἢ τὸ φῶς· ἦν γὰρ ⸉¹αὐτῶν πονηρὰ⸊ τὰ ἔργα.
+
diff --git a/tests/testsuite/ldr12n.good b/tests/testsuite/ldr12n.good
new file mode 100644
index 0000000..1906020
--- /dev/null
+++ b/tests/testsuite/ldr12n.good
@@ -0,0 +1,24 @@
+0001
+0002
+0003
+4
+0005
+0006
+0001
+0002
+0003
+4
+0005
+0006
+0001: Body of 1
+0002: Body of 2
+0003: Body of 3
+0005: Body of 5
+0006: Body of 6
+4: Body of 4
+00001: Body of 1
+00002: Body of 2
+00003: Body of 3
+00004: Body of 4
+00005: Body of 5
+00006: Body of 6
diff --git a/tests/testsuite/ldr12n.imp b/tests/testsuite/ldr12n.imp
new file mode 100644
index 0000000..17cecfc
--- /dev/null
+++ b/tests/testsuite/ldr12n.imp
@@ -0,0 +1,12 @@
+$$$0001
+Body of 1
+$$$0002
+Body of 2
+$$$0003
+Body of 3
+$$$4
+Body of 4
+$$$0005
+Body of 5
+$$$0006
+Body of 6
diff --git a/tests/testsuite/ldr12n.sh b/tests/testsuite/ldr12n.sh
new file mode 100755
index 0000000..a406d0a
--- /dev/null
+++ b/tests/testsuite/ldr12n.sh
@@ -0,0 +1,31 @@
+#!/bin/sh
+# Lexicon / Dictionary regularization tests to make sure we pad and lookup correctly
+
+rm -rf tmp/ldr12n/
+mkdir -p tmp/ldr12n/mods.d
+mkdir -p tmp/ldr12n/modules
+
+cat > tmp/ldr12n/mods.d/ldr12n.conf <<!
+[ldr12n]
+DataPath=./modules/ldr12n
+ModDrv=RawLD
+Encoding=UTF-8
+SourceType=Plain
+Lang=en
+StrongsPadding=false
+!
+
+cat > tmp/ldr12n/mods.d/ldr12np.conf <<!
+[ldr12np]
+DataPath=./modules/ldr12np
+ModDrv=RawLD
+Encoding=UTF-8
+SourceType=Plain
+Lang=en
+StrongsPadding=true
+!
+
+../../utilities/imp2ld ldr12n.imp -P -o tmp/ldr12n/modules/ldr12n 2>&1 | grep -v \$Rev
+../../utilities/imp2ld ldr12n.imp -o tmp/ldr12n/modules/ldr12np 2>&1 | grep -v \$Rev
+
+cd tmp/ldr12n && ../../../ldtest ldr12n && ../../../ldtest ldr12np
diff --git a/tests/testsuite/listtest.sh b/tests/testsuite/listtest.sh
index 855d161..0666979 100755
--- a/tests/testsuite/listtest.sh
+++ b/tests/testsuite/listtest.sh
@@ -1,7 +1,7 @@
 #!/bin/sh
 #******************************************************************************
 #
-# $Id: swmgr.h 2321 2009-04-13 01:17:00Z scribe $
+# $Id: listtest.sh 3063 2014-03-04 13:04:11Z chrislit $
 #
 # Copyright 1998-2009 CrossWire Bible Society (http://www.crosswire.org)
 #	CrossWire Bible Society
diff --git a/tests/testsuite/osis.good b/tests/testsuite/osis.good
deleted file mode 100644
index 508e148..0000000
--- a/tests/testsuite/osis.good
+++ /dev/null
@@ -1,102 +0,0 @@
-SUCCESS: ../../utilities/osis2mod: has finished its work and will now rest
-Key:
-Psalms 3:1
--------
-Preverse Header 0:
-Raw:
-<div sID="gen12" type="section"/> <title canonical="true" type="psalm">A Psalm of David, when he fled from Absalom his son.</title> <div sID="gen13" type="x-p"/> <lg sID="gen14"/> 
--------
-Rendered Header:
- <h3>A Psalm of David, when he fled from Absalom his son.</h3>
-
-<br />
- 
--------
-CSS:
-		.divineName { font-variant: small-caps; }
-		.wordsOfJesus { color: red; }
-		.transChangeSupplied { font-style: italic; }
-		.overline        { text-decoration: overline; }
-		.indent1         { margin-left: 10px }
-		.indent2         { margin-left: 20px }
-		.indent3         { margin-left: 30px }
-		.indent4         { margin-left: 40px }
-	
--------
-RenderText:
- <span class="line indent0"><span class="divineName">Lord</span>, how are they increased that trouble me!</span><br />
-<span class="line indent0">many <span class="transChangeSupplied">are</span> they that rise up against me.</span><br />
-
--------
--------
-
-Key:
-Mark 1:14
--------
-Preverse Header 0:
-Raw:
-<div sID="gen22" type="section"/> <title>The Beginning of the Ministry of Jesus</title> <title type="parallel">(<reference osisRef="Matt.4.12-Matt.4.22">Matt 4:12–22</reference>; <reference osisRef="Luke.4.14">Luke 4:14</reference>, <reference osisRef="Luke.4.15">15</reference>; <reference osisRef="Luke.5.1-Luke.5.11">5:1-11</reference>) </title> <div sID="gen23" type="x-p"/> 
--------
-Rendered Header:
- <h3>The Beginning of the Ministry of Jesus</h3>
-
-<h3>(<a href="passagestudy.jsp?action=showRef&type=scripRef&value=Matt.4.12-Matt.4.22&module=">Matt 4:12–22</a>; <a href="passagestudy.jsp?action=showRef&type=scripRef&value=Luke.4.14&module=">Luke 4:14</a>, <a href="passagestudy.jsp?action=showRef&type=scripRef&value=Luke.4.15&module=">15</a>; <a href="passagestudy.jsp?action=showRef&type=scripRef&value=Luke.5.1-Luke.5.11&module=">5:1-11</a>) </h3>
-
-<br />
-
--------
-CSS:
-		.divineName { font-variant: small-caps; }
-		.wordsOfJesus { color: red; }
-		.transChangeSupplied { font-style: italic; }
-		.overline        { text-decoration: overline; }
-		.indent1         { margin-left: 10px }
-		.indent2         { margin-left: 20px }
-		.indent3         { margin-left: 30px }
-		.indent4         { margin-left: 40px }
-	
--------
-RenderText:
- Now after that John was put in prison, Jesus came into Galilee, preaching the gospel of the kingdom of God, 
--------
--------
-
-
-Whitespace tests around headings:
-
-
- <h1 class="bookHeader">Old Testament</h1>
-
- <h1 class="bookHeader">THE FIRST BOOK OF MOSES CALLED GENESIS</h1>
-
- <h1 class="bookHeader">Introduction and Outline</h1>
-
-<br />
-This is the <b>Book of Genesis</b>, the <i>first</i> book in the Bible. It may be outlined as follows: <br />
-<br />
-<ul>
- 	<li><sup>1</i>Creation of Heaven and Earth, 1:1-2:4a</li>
-	<li><sup>2</i>Creation of Man and Woman, 2:4b-25</li>
-	<li><sub>3</sub>Fall, 3:1-24</li>
-	<li>...</li>
-</ul>
- <br />
-Tables work like this: <table><tbody>
- 	<tr> <td><b>Column 1 Label</b></td> <td><b>Column 2 Label</b></td> </tr>
- 	<tr> <td>Column 1, Row 1</td> <td>Column 2, Row 1</td> </tr>
- 	<tr> <td>Column 1, Row 2</td> <td>Column 2, Row 2</td> </tr>
- </tbody></table>
-<br />
-
-  <h2 class="chapterHeader">From Creation to Abraham (1:1–11:9)</h2>
-
-
- <h3>Creation of the Heavens and the Earth</h3>
-
-<br />
-
-[ Genesis 1:1 ]  In the beginning God created the heaven and the earth.  <br />
-
-<br />
-
-[ Genesis 1:2 ] Text of verse 2.
diff --git a/tests/testsuite/osis.sh b/tests/testsuite/osis.sh
deleted file mode 100755
index 689e64d..0000000
--- a/tests/testsuite/osis.sh
+++ /dev/null
@@ -1,26 +0,0 @@
-#!/bin/sh
-
-rm -rf osis/
-mkdir -p osis/mods.d
-mkdir -p osis/modules
-
-cat > osis/mods.d/osisreference.conf <<!
-[OSISReference]
-DataPath=./modules/
-ModDrv=zText
-Encoding=UTF-8
-BlockType=BOOK
-CompressType=ZIP
-SourceType=OSIS
-Lang=en
-GlobalOptionFilter=OSISStrongs
-GlobalOptionFilter=OSISMorph
-GlobalOptionFilter=OSISFootnotes
-GlobalOptionFilter=OSISHeadings
-GlobalOptionFilter=OSISRedLetterWords
-Feature=StrongsNumbers
-!
-
-../../utilities/osis2mod osis/modules/ osisReference.xml -z 2>&1 | grep -v \$Rev
-
-cd osis && ../../osistest OSISReference
diff --git a/tests/testsuite/osisReference.xml b/tests/testsuite/osisReference.xml
index d07f774..0ef86e5 100644
--- a/tests/testsuite/osisReference.xml
+++ b/tests/testsuite/osisReference.xml
@@ -141,6 +141,16 @@
 </div>
 <div type="bookGroup">
  <title>New Testament</title>
+ <div type="book" osisID="Matt">
+  <chapter osisID="Matt.2"/>
+   <verse osisID="Matt.2.5">
+    <w lemma="strong:G3004">They said</w> to him, <q level="1" marker="“" sID="q.5787"/><note n="A" osisID="Matt.2.5.xref.A" type="crossReference"><reference osisRef="John.7.42">John 7:42</reference></note><w lemma="strong:G965">In Bethlehem</w> <w lemma="strong:G2453">of Judea</w>; <w lemma="strong:G3779">for this</w> <w lemma="strong:G3704">is what</w> <w lemma="strong:G1125">has been written</w> <note n="1" osisID="Matt.2.5.note.1" type="explanation">Or <hi type="italic">through</hi></note><w lemma="strong:G4396">by the prophet</w>:
+   </verse>
+   <verse osisID="Matt.2.6">
+    <lg sID="lg.300"/><l sID="l.20649"/><q level="2" marker="‘" sID="q.5788"/><note n="A" osisID="Matt.2.6.xref.A" type="crossReference"><reference osisRef="Mic.5.2">Mic 5:2</reference>; <reference osisRef="John.7.42">John 7:42</reference></note><hi type="small-caps"><w lemma="strong:G965">And you, Bethlehem</w>, <w lemma="strong:G1093">land</w> <w lemma="strong:G2455">of Judah</w></hi>, <l eID="l.20649"/><l sID="l.20650"/><hi type="small-caps"><w lemma="strong:G3760">Are by no</w> <w lemma="strong:G3760">means</w> <w lemma="strong:G1646">least</w> <w lemma="strong:G1722">among</w> <w lemma="strong:G2233">the leaders</w> <w lemma="strong:G2455">of Judah</w></hi>; <l eID="l.20650"/><l sID="l.20651"/><hi type="small-caps"><w lemma="strong:G1831">For out of you shall come</w> <w lemma="strong:G1831">forth</w> <w lemma="strong:G2233">a Ruler</w></hi> <l eID="l.20651"/><l sID="l.20652"/><hi type="small-caps"><w lemma="strong:G3748">Who</w> will</hi> <note n="B" osisID="Matt.2.6.xref.B" type="crossReference"><reference osisRef="John.21.16">John 21:16</reference></note><hi type="small-caps"><w lemma="strong:G4165">shepherd</w> <w lemma="strong:G2992">My people</w> <w lemma="strong:G2474">Israel</w></hi>.<q eID="q.5788" level="2" marker="’"/><q eID="q.5787" level="1" marker="”"/>  <lb type="x-end-paragraph"/>
+   </verse>
+  </chapter>
+ </div>
  <div type="book" osisID="Mark">
   <title type="main">THE GOSPEL ACCORDING TO <abbr expansion="Saint">ST.</abbr> MARK</title>
   <chapter sID="Mark.1" osisID="Mark.1"/>
@@ -178,6 +188,11 @@ And He was in the wilderness forty days being tempted by Satan; and He was with
    </div>
   <chapter eID="Mark.1"/>
  </div>
+ <div type="book" osisID="Acts">
+  <chapter sID="Acts.2" osisID="Acts.2"/>
+    <verse sID="Acts.2.19" osisID="Acts.2.19"/><l level="1">‘<hi type="small-caps"><w lemma="strong:G1325">And I will grant</w> <w lemma="strong:G5059">wonders</w> <w lemma="strong:G3772">in the sky</w> <w lemma="strong:G0507">above</w></hi></l><l level="1"> <hi type="small-caps"><w lemma="strong:G4592">And signs</w> <w lemma="strong:G1093">on the earth</w> <w lemma="strong:G2736">below</w></hi>,</l><l level="1"> <hi type="small-caps"><w lemma="strong:G0129">Blood</w>, <w lemma="strong:G4442">and fire</w>, <w lemma="strong:G0822">and vapor</w> <w lemma="strong:G2586">of smoke</w></hi>.</l><verse eID="Acts.2.19"/>
+    <verse sID="Acts.2.20" osisID="Acts.2.20"/><l level="1">‘<hi type="small-caps"><w lemma="strong:G2246">The sun</w> <w lemma="strong:G3344">will be turned</w> <w lemma="strong:G4655">into darkness</w></hi></l><l level="1"> <hi type="small-caps"><w lemma="strong:G4582">And the moon</w> <w lemma="strong:G0129">into blood</w></hi>,</l><l level="1"> <hi type="small-caps"><w lemma="strong:G4250">Before</w> <w lemma="strong:G3173">the great</w> <w lemma="strong:G2016">and glorious</w> <w lemma="strong:G2250">day</w> <w lemma="strong:G2962">of the Lord</w> <w lemma="strong:G2064">shall come</w></hi>.</l><verse eID="Acts.2.20"/>
+  <chapter eID="Acts.2"/>
 </div>
 </osisText>
 </osis>
diff --git a/tests/testsuite/osis_basic.good b/tests/testsuite/osis_basic.good
new file mode 100644
index 0000000..ab3123f
--- /dev/null
+++ b/tests/testsuite/osis_basic.good
@@ -0,0 +1,186 @@
+SUCCESS: ../../utilities/osis2mod: has finished its work and will now rest
+Key:
+Psalms 3:1
+-------
+Preverse Header 0:
+Raw:
+<div sID="gen12" type="section"/> <title canonical="true" type="psalm">A Psalm of David, when he fled from Absalom his son.</title> <div sID="gen13" type="x-p"/> <lg sID="gen14"/> 
+-------
+Rendered Header:
+ <h3 class="title psalm canonical">A Psalm of David, when he fled from Absalom his son.</h3>
+
+<br />
+ 
+-------
+CSS:
+		.divineName { font-variant: small-caps; }
+		.wordsOfJesus { color: red; }
+		.transChange { font-style: italic; }
+		.transChange.transChange-supplied { font-style: italic; }
+		.transChange.transChange-added { font-style: italic; }
+		.transChange.transChange-tenseChange::before { content: '*'; }
+		.transChange.transChange-tenseChange { font-style: normal; }
+		.transChange:lang(zh) { font-style: normal; text-decoration: dotted underline; }
+		.overline { text-decoration: overline; }
+		.indent1 { margin-left: 1em; }
+		.indent2 { margin-left: 2em; }
+		.indent3 { margin-left: 3em; }
+		.indent4 { margin-left: 4em; }
+		abbr { &:hover{ &:before{ content: attr(title); } } }
+		.small-caps { font-variant: small-caps; }
+		.selah { text-align: right; width: 50%; margin: 0; padding: 0; }
+		.acrostic { text-align: center; }
+		.colophon {font-style: italic; font-size: small; display: block; }
+		.rdg { font-style: italic; }
+		.catchWord {font-style: bold; }
+		.x-p-indent {text-indent: 1em; }
+	
+-------
+RenderText:
+ <span class="line indent0"><span class="divineName">Lord</span>, how are they increased that trouble me!</span><br />
+<span class="line indent0">many <span class="transChange transChange-added">are</span> they that rise up against me.</span><br />
+
+-------
+-------
+
+Key:
+Matthew 2:6
+-------
+Preverse Header 0:
+Raw:
+<div></div>
+-------
+Rendered Header:
+<div class=""></div>
+-------
+CSS:
+		.divineName { font-variant: small-caps; }
+		.wordsOfJesus { color: red; }
+		.transChange { font-style: italic; }
+		.transChange.transChange-supplied { font-style: italic; }
+		.transChange.transChange-added { font-style: italic; }
+		.transChange.transChange-tenseChange::before { content: '*'; }
+		.transChange.transChange-tenseChange { font-style: normal; }
+		.transChange:lang(zh) { font-style: normal; text-decoration: dotted underline; }
+		.overline { text-decoration: overline; }
+		.indent1 { margin-left: 1em; }
+		.indent2 { margin-left: 2em; }
+		.indent3 { margin-left: 3em; }
+		.indent4 { margin-left: 4em; }
+		abbr { &:hover{ &:before{ content: attr(title); } } }
+		.small-caps { font-variant: small-caps; }
+		.selah { text-align: right; width: 50%; margin: 0; padding: 0; }
+		.acrostic { text-align: center; }
+		.colophon {font-style: italic; font-size: small; display: block; }
+		.rdg { font-style: italic; }
+		.catchWord {font-style: bold; }
+		.x-p-indent {text-indent: 1em; }
+	
+-------
+RenderText:
+<span class="line indent0">‘<a class=" crossReference" href="passagestudy.jsp?action=showNote&type=x&value=1&module=OSISReference&passage=Matthew+2%3A6"><small><sup class="x">*x</sup></small></a><span class="small-caps">And you, Bethlehem, land of Judah</span>, </span><br />
+<span class="line indent0"><span class="small-caps">Are by no means least among the leaders of Judah</span>; </span><br />
+<span class="line indent0"><span class="small-caps">For out of you shall come forth a Ruler</span> </span><br />
+<span class="line indent0"><span class="small-caps">Who will</span> <a class=" crossReference" href="passagestudy.jsp?action=showNote&type=x&value=2&module=OSISReference&passage=Matthew+2%3A6"><small><sup class="x">*x</sup></small></a><span class="small-caps">shepherd My people Israel</span>.’” <br />
+  
+-------
+-------
+
+Key:
+Mark 1:14
+-------
+Preverse Header 0:
+Raw:
+<div sID="gen25" type="section"/> <title>The Beginning of the Ministry of Jesus</title> <title type="parallel">(<reference osisRef="Matt.4.12-Matt.4.22">Matt 4:12–22</reference>; <reference osisRef="Luke.4.14">Luke 4:14</reference>, <reference osisRef="Luke.4.15">15</reference>; <reference osisRef="Luke.5.1-Luke.5.11">5:1-11</reference>) </title> <div sID="gen26" type="x-p"/> 
+-------
+Rendered Header:
+ <h3 class="title">The Beginning of the Ministry of Jesus</h3>
+
+<h3 class="title parallel">(<a class="" href="passagestudy.jsp?action=showRef&type=scripRef&value=Matt.4.12-Matt.4.22&module=">Matt 4:12–22</a>; <a class="" href="passagestudy.jsp?action=showRef&type=scripRef&value=Luke.4.14&module=">Luke 4:14</a>, <a class="" href="passagestudy.jsp?action=showRef&type=scripRef&value=Luke.4.15&module=">15</a>; <a class="" href="passagestudy.jsp?action=showRef&type=scripRef&value=Luke.5.1-Luke.5.11&module=">5:1-11</a>) </h3>
+
+<br />
+
+-------
+CSS:
+		.divineName { font-variant: small-caps; }
+		.wordsOfJesus { color: red; }
+		.transChange { font-style: italic; }
+		.transChange.transChange-supplied { font-style: italic; }
+		.transChange.transChange-added { font-style: italic; }
+		.transChange.transChange-tenseChange::before { content: '*'; }
+		.transChange.transChange-tenseChange { font-style: normal; }
+		.transChange:lang(zh) { font-style: normal; text-decoration: dotted underline; }
+		.overline { text-decoration: overline; }
+		.indent1 { margin-left: 1em; }
+		.indent2 { margin-left: 2em; }
+		.indent3 { margin-left: 3em; }
+		.indent4 { margin-left: 4em; }
+		abbr { &:hover{ &:before{ content: attr(title); } } }
+		.small-caps { font-variant: small-caps; }
+		.selah { text-align: right; width: 50%; margin: 0; padding: 0; }
+		.acrostic { text-align: center; }
+		.colophon {font-style: italic; font-size: small; display: block; }
+		.rdg { font-style: italic; }
+		.catchWord {font-style: bold; }
+		.x-p-indent {text-indent: 1em; }
+	
+-------
+RenderText:
+ Now after that John was put in prison, Jesus came into Galilee, preaching the gospel of the kingdom of God, 
+-------
+-------
+
+
+Whitespace tests around headings:
+
+
+ <h1 class="bookHeader">Old Testament</h1>
+
+ <h1 class="bookHeader main">THE FIRST BOOK OF MOSES CALLED GENESIS</h1>
+
+ <h1 class="bookHeader">Introduction and Outline</h1>
+
+<br />
+This is the <b>Book of Genesis</b>, the <i>first</i> book in the Bible. It may be outlined as follows: <br />
+<br />
+<ul>
+ 	<li><sup>1</sup>Creation of Heaven and Earth, 1:1-2:4a</li>
+	<li><sup>2</sup>Creation of Man and Woman, 2:4b-25</li>
+	<li><sub>3</sub>Fall, 3:1-24</li>
+	<li>...</li>
+</ul>
+ <br />
+Tables work like this: <table><tbody>
+ 	<tr> <td><b>Column 1 Label</b></td> <td><b>Column 2 Label</b></td> </tr>
+ 	<tr> <td>Column 1, Row 1</td> <td>Column 2, Row 1</td> </tr>
+ 	<tr> <td>Column 1, Row 2</td> <td>Column 2, Row 2</td> </tr>
+ </tbody></table>
+<br />
+
+  <h2 class="chapterHeader">From Creation to Abraham (1:1–11:9)</h2>
+
+
+ <h3 class="title">Creation of the Heavens and the Earth</h3>
+
+<br />
+
+[ Genesis 1:1 ]  In the beginning God created the heaven and the earth.  <br />
+
+<br />
+
+[ Genesis 1:2 ] Text of verse 2.
+
+-- Plain output
+Acts 2:19: ‘* And I will grant wonders in the sky above *
+* And signs on the earth below *,
+* Blood, and fire, and vapor of smoke *.
+
+Acts 2:20: ‘* The sun will be turned into darkness *
+* And the moon into blood *,
+* Before the great and glorious day of the Lord shall come *.
+ 
+
+-- RTF output
+{\rtf1\ansi{\fonttbl{\f0\froman\fcharset0\fprq2 Times New Roman;}{\f1\fdecor\fprq2 Gentium;}{\f7\froman\fcharset2\fprq2 Symbol;}}Acts 2:19: {\f1 ‘{\i1 {And I will grant} {wonders} {in the sky} {above}}{\par} {\i1 {And signs} {on the earth} {below}},{\par} {\i1 {Blood}, {and fire}, {and vapor} {of smoke}}.{\par}}\par 
+Acts 2:20: {\f1 ‘{\i1 {The sun} {will be turned} {into darkness}}{\par} {\i1 {And the moon} {into blood}},{\par} {\i1 {Before} {the great} {and glorious} {day} {of the Lord} {shall come}}.{\par} }\par 
+}
diff --git a/tests/testsuite/osis_basic.sh b/tests/testsuite/osis_basic.sh
new file mode 100755
index 0000000..2a9066b
--- /dev/null
+++ b/tests/testsuite/osis_basic.sh
@@ -0,0 +1,35 @@
+#!/bin/sh
+
+rm -rf tmp/osis_basic/
+mkdir -p tmp/osis_basic/mods.d
+mkdir -p tmp/osis_basic/modules
+
+cat > tmp/osis_basic/mods.d/osisreference.conf <<!
+[OSISReference]
+DataPath=./modules/
+ModDrv=zText
+Encoding=UTF-8
+BlockType=BOOK
+CompressType=ZIP
+SourceType=OSIS
+Lang=en
+GlobalOptionFilter=OSISLemma
+GlobalOptionFilter=OSISStrongs
+GlobalOptionFilter=OSISMorph
+GlobalOptionFilter=OSISFootnotes
+GlobalOptionFilter=OSISHeadings
+GlobalOptionFilter=OSISRedLetterWords
+Feature=StrongsNumbers
+!
+
+../../utilities/osis2mod tmp/osis_basic/modules/ osisReference.xml -z 2>&1 | grep -v \$Rev
+
+cd tmp/osis_basic
+../../../osistest OSISReference
+
+echo
+echo "-- Plain output"
+../../../../utilities/diatheke/diatheke -b OSISReference -f plain -k Acts 2:19-20 | grep -v OSISReference
+echo
+echo "-- RTF output"
+../../../../utilities/diatheke/diatheke -b OSISReference -f RTF -k Acts 2:19-20 | grep -v OSISReference
diff --git a/tests/testsuite/osis_mod2zmod.good b/tests/testsuite/osis_mod2zmod.good
new file mode 100644
index 0000000..36ec4fe
--- /dev/null
+++ b/tests/testsuite/osis_mod2zmod.good
@@ -0,0 +1,186 @@
+SUCCESS: ../../utilities/osis2mod: has finished its work and will now rest
+Key:
+Psalms 3:1
+-------
+Preverse Header 0:
+Raw:
+<div sID="gen12" type="section"/> <title canonical="true" type="psalm">A Psalm of David, when he fled from Absalom his son.</title> <div sID="gen13" type="x-p"/> <lg sID="gen14"/> 
+-------
+Rendered Header:
+ <h3 class="title psalm canonical">A Psalm of David, when he fled from Absalom his son.</h3>
+
+<br />
+ 
+-------
+CSS:
+		.divineName { font-variant: small-caps; }
+		.wordsOfJesus { color: red; }
+		.transChange { font-style: italic; }
+		.transChange.transChange-supplied { font-style: italic; }
+		.transChange.transChange-added { font-style: italic; }
+		.transChange.transChange-tenseChange::before { content: '*'; }
+		.transChange.transChange-tenseChange { font-style: normal; }
+		.transChange:lang(zh) { font-style: normal; text-decoration: dotted underline; }
+		.overline { text-decoration: overline; }
+		.indent1 { margin-left: 1em; }
+		.indent2 { margin-left: 2em; }
+		.indent3 { margin-left: 3em; }
+		.indent4 { margin-left: 4em; }
+		abbr { &:hover{ &:before{ content: attr(title); } } }
+		.small-caps { font-variant: small-caps; }
+		.selah { text-align: right; width: 50%; margin: 0; padding: 0; }
+		.acrostic { text-align: center; }
+		.colophon {font-style: italic; font-size: small; display: block; }
+		.rdg { font-style: italic; }
+		.catchWord {font-style: bold; }
+		.x-p-indent {text-indent: 1em; }
+	
+-------
+RenderText:
+ <span class="line indent0"><span class="divineName">Lord</span>, how are they increased that trouble me!</span><br />
+<span class="line indent0">many <span class="transChange transChange-added">are</span> they that rise up against me.</span><br />
+
+-------
+-------
+
+Key:
+Matthew 2:6
+-------
+Preverse Header 0:
+Raw:
+<div></div>
+-------
+Rendered Header:
+<div class=""></div>
+-------
+CSS:
+		.divineName { font-variant: small-caps; }
+		.wordsOfJesus { color: red; }
+		.transChange { font-style: italic; }
+		.transChange.transChange-supplied { font-style: italic; }
+		.transChange.transChange-added { font-style: italic; }
+		.transChange.transChange-tenseChange::before { content: '*'; }
+		.transChange.transChange-tenseChange { font-style: normal; }
+		.transChange:lang(zh) { font-style: normal; text-decoration: dotted underline; }
+		.overline { text-decoration: overline; }
+		.indent1 { margin-left: 1em; }
+		.indent2 { margin-left: 2em; }
+		.indent3 { margin-left: 3em; }
+		.indent4 { margin-left: 4em; }
+		abbr { &:hover{ &:before{ content: attr(title); } } }
+		.small-caps { font-variant: small-caps; }
+		.selah { text-align: right; width: 50%; margin: 0; padding: 0; }
+		.acrostic { text-align: center; }
+		.colophon {font-style: italic; font-size: small; display: block; }
+		.rdg { font-style: italic; }
+		.catchWord {font-style: bold; }
+		.x-p-indent {text-indent: 1em; }
+	
+-------
+RenderText:
+<span class="line indent0">‘<a class=" crossReference" href="passagestudy.jsp?action=showNote&type=x&value=1&module=zOSISReference&passage=Matthew+2%3A6"><small><sup class="x">*x</sup></small></a><span class="small-caps">And you, Bethlehem, land of Judah</span>, </span><br />
+<span class="line indent0"><span class="small-caps">Are by no means least among the leaders of Judah</span>; </span><br />
+<span class="line indent0"><span class="small-caps">For out of you shall come forth a Ruler</span> </span><br />
+<span class="line indent0"><span class="small-caps">Who will</span> <a class=" crossReference" href="passagestudy.jsp?action=showNote&type=x&value=2&module=zOSISReference&passage=Matthew+2%3A6"><small><sup class="x">*x</sup></small></a><span class="small-caps">shepherd My people Israel</span>.’” <br />
+  
+-------
+-------
+
+Key:
+Mark 1:14
+-------
+Preverse Header 0:
+Raw:
+<div sID="gen25" type="section"/> <title>The Beginning of the Ministry of Jesus</title> <title type="parallel">(<reference osisRef="Matt.4.12-Matt.4.22">Matt 4:12–22</reference>; <reference osisRef="Luke.4.14">Luke 4:14</reference>, <reference osisRef="Luke.4.15">15</reference>; <reference osisRef="Luke.5.1-Luke.5.11">5:1-11</reference>) </title> <div sID="gen26" type="x-p"/> 
+-------
+Rendered Header:
+ <h3 class="title">The Beginning of the Ministry of Jesus</h3>
+
+<h3 class="title parallel">(<a class="" href="passagestudy.jsp?action=showRef&type=scripRef&value=Matt.4.12-Matt.4.22&module=">Matt 4:12–22</a>; <a class="" href="passagestudy.jsp?action=showRef&type=scripRef&value=Luke.4.14&module=">Luke 4:14</a>, <a class="" href="passagestudy.jsp?action=showRef&type=scripRef&value=Luke.4.15&module=">15</a>; <a class="" href="passagestudy.jsp?action=showRef&type=scripRef&value=Luke.5.1-Luke.5.11&module=">5:1-11</a>) </h3>
+
+<br />
+
+-------
+CSS:
+		.divineName { font-variant: small-caps; }
+		.wordsOfJesus { color: red; }
+		.transChange { font-style: italic; }
+		.transChange.transChange-supplied { font-style: italic; }
+		.transChange.transChange-added { font-style: italic; }
+		.transChange.transChange-tenseChange::before { content: '*'; }
+		.transChange.transChange-tenseChange { font-style: normal; }
+		.transChange:lang(zh) { font-style: normal; text-decoration: dotted underline; }
+		.overline { text-decoration: overline; }
+		.indent1 { margin-left: 1em; }
+		.indent2 { margin-left: 2em; }
+		.indent3 { margin-left: 3em; }
+		.indent4 { margin-left: 4em; }
+		abbr { &:hover{ &:before{ content: attr(title); } } }
+		.small-caps { font-variant: small-caps; }
+		.selah { text-align: right; width: 50%; margin: 0; padding: 0; }
+		.acrostic { text-align: center; }
+		.colophon {font-style: italic; font-size: small; display: block; }
+		.rdg { font-style: italic; }
+		.catchWord {font-style: bold; }
+		.x-p-indent {text-indent: 1em; }
+	
+-------
+RenderText:
+ Now after that John was put in prison, Jesus came into Galilee, preaching the gospel of the kingdom of God, 
+-------
+-------
+
+
+Whitespace tests around headings:
+
+
+ <h1 class="bookHeader">Old Testament</h1>
+
+ <h1 class="bookHeader main">THE FIRST BOOK OF MOSES CALLED GENESIS</h1>
+
+ <h1 class="bookHeader">Introduction and Outline</h1>
+
+<br />
+This is the <b>Book of Genesis</b>, the <i>first</i> book in the Bible. It may be outlined as follows: <br />
+<br />
+<ul>
+ 	<li><sup>1</sup>Creation of Heaven and Earth, 1:1-2:4a</li>
+	<li><sup>2</sup>Creation of Man and Woman, 2:4b-25</li>
+	<li><sub>3</sub>Fall, 3:1-24</li>
+	<li>...</li>
+</ul>
+ <br />
+Tables work like this: <table><tbody>
+ 	<tr> <td><b>Column 1 Label</b></td> <td><b>Column 2 Label</b></td> </tr>
+ 	<tr> <td>Column 1, Row 1</td> <td>Column 2, Row 1</td> </tr>
+ 	<tr> <td>Column 1, Row 2</td> <td>Column 2, Row 2</td> </tr>
+ </tbody></table>
+<br />
+
+  <h2 class="chapterHeader">From Creation to Abraham (1:1–11:9)</h2>
+
+
+ <h3 class="title">Creation of the Heavens and the Earth</h3>
+
+<br />
+
+[ Genesis 1:1 ]  In the beginning God created the heaven and the earth.  <br />
+
+<br />
+
+[ Genesis 1:2 ] Text of verse 2.
+
+-- Plain output
+Acts 2:19: ‘* And I will grant wonders in the sky above *
+* And signs on the earth below *,
+* Blood, and fire, and vapor of smoke *.
+
+Acts 2:20: ‘* The sun will be turned into darkness *
+* And the moon into blood *,
+* Before the great and glorious day of the Lord shall come *.
+ 
+
+-- RTF output
+{\rtf1\ansi{\fonttbl{\f0\froman\fcharset0\fprq2 Times New Roman;}{\f1\fdecor\fprq2 Gentium;}{\f7\froman\fcharset2\fprq2 Symbol;}}Acts 2:19: {\f1 ‘{\i1 {And I will grant} {wonders} {in the sky} {above}}{\par} {\i1 {And signs} {on the earth} {below}},{\par} {\i1 {Blood}, {and fire}, {and vapor} {of smoke}}.{\par}}\par 
+Acts 2:20: {\f1 ‘{\i1 {The sun} {will be turned} {into darkness}}{\par} {\i1 {And the moon} {into blood}},{\par} {\i1 {Before} {the great} {and glorious} {day} {of the Lord} {shall come}}.{\par} }\par 
+}
diff --git a/tests/testsuite/osis_mod2zmod.sh b/tests/testsuite/osis_mod2zmod.sh
new file mode 100755
index 0000000..52149fc
--- /dev/null
+++ b/tests/testsuite/osis_mod2zmod.sh
@@ -0,0 +1,60 @@
+#!/bin/sh
+
+rm -rf tmp/osis_mod2zmod/
+mkdir -p tmp/osis_mod2zmod/mods.d
+mkdir -p tmp/osis_mod2zmod/modules
+mkdir -p tmp/osis_mod2zmod/zmodules
+
+cat > tmp/osis_mod2zmod/mods.d/osisreference.conf <<!
+[OSISReference]
+DataPath=./modules/
+ModDrv=RawText
+#ModDrv=zText
+#CipherKey=abc123
+Encoding=UTF-8
+BlockType=BOOK
+CompressType=ZIP
+SourceType=OSIS
+Lang=en
+GlobalOptionFilter=OSISLemma
+GlobalOptionFilter=OSISStrongs
+GlobalOptionFilter=OSISMorph
+GlobalOptionFilter=OSISFootnotes
+GlobalOptionFilter=OSISHeadings
+GlobalOptionFilter=OSISRedLetterWords
+Feature=StrongsNumbers
+!
+
+cat > tmp/osis_mod2zmod/mods.d/zosisreference.conf <<!
+[zOSISReference]
+DataPath=./zmodules/
+#ModDrv=RawText
+ModDrv=zText
+CipherKey=abc123
+Encoding=UTF-8
+BlockType=BOOK
+CompressType=ZIP
+SourceType=OSIS
+Lang=en
+GlobalOptionFilter=OSISLemma
+GlobalOptionFilter=OSISStrongs
+GlobalOptionFilter=OSISMorph
+GlobalOptionFilter=OSISFootnotes
+GlobalOptionFilter=OSISHeadings
+GlobalOptionFilter=OSISRedLetterWords
+Feature=StrongsNumbers
+!
+
+../../utilities/osis2mod tmp/osis_mod2zmod/modules/ osisReference.xml 2>&1 | grep -v \$Rev
+
+sed 's/OSISReference/zOSISReference/' osis_basic.good > osis_mod2zmod.good
+cd tmp/osis_mod2zmod
+../../../../utilities/mod2zmod OSISReference zmodules/ 4 2 0 abc123 > /dev/null 2>&1
+../../../osistest zOSISReference
+
+echo
+echo "-- Plain output"
+../../../../utilities/diatheke/diatheke -b zOSISReference -f plain -k Acts 2:19-20 | grep -v zOSISReference
+echo
+echo "-- RTF output"
+../../../../utilities/diatheke/diatheke -b zOSISReference -f RTF -k Acts 2:19-20 | grep -v zOSISReference
diff --git a/tests/testsuite/osis_osis2modcipher.good b/tests/testsuite/osis_osis2modcipher.good
new file mode 100644
index 0000000..ab3123f
--- /dev/null
+++ b/tests/testsuite/osis_osis2modcipher.good
@@ -0,0 +1,186 @@
+SUCCESS: ../../utilities/osis2mod: has finished its work and will now rest
+Key:
+Psalms 3:1
+-------
+Preverse Header 0:
+Raw:
+<div sID="gen12" type="section"/> <title canonical="true" type="psalm">A Psalm of David, when he fled from Absalom his son.</title> <div sID="gen13" type="x-p"/> <lg sID="gen14"/> 
+-------
+Rendered Header:
+ <h3 class="title psalm canonical">A Psalm of David, when he fled from Absalom his son.</h3>
+
+<br />
+ 
+-------
+CSS:
+		.divineName { font-variant: small-caps; }
+		.wordsOfJesus { color: red; }
+		.transChange { font-style: italic; }
+		.transChange.transChange-supplied { font-style: italic; }
+		.transChange.transChange-added { font-style: italic; }
+		.transChange.transChange-tenseChange::before { content: '*'; }
+		.transChange.transChange-tenseChange { font-style: normal; }
+		.transChange:lang(zh) { font-style: normal; text-decoration: dotted underline; }
+		.overline { text-decoration: overline; }
+		.indent1 { margin-left: 1em; }
+		.indent2 { margin-left: 2em; }
+		.indent3 { margin-left: 3em; }
+		.indent4 { margin-left: 4em; }
+		abbr { &:hover{ &:before{ content: attr(title); } } }
+		.small-caps { font-variant: small-caps; }
+		.selah { text-align: right; width: 50%; margin: 0; padding: 0; }
+		.acrostic { text-align: center; }
+		.colophon {font-style: italic; font-size: small; display: block; }
+		.rdg { font-style: italic; }
+		.catchWord {font-style: bold; }
+		.x-p-indent {text-indent: 1em; }
+	
+-------
+RenderText:
+ <span class="line indent0"><span class="divineName">Lord</span>, how are they increased that trouble me!</span><br />
+<span class="line indent0">many <span class="transChange transChange-added">are</span> they that rise up against me.</span><br />
+
+-------
+-------
+
+Key:
+Matthew 2:6
+-------
+Preverse Header 0:
+Raw:
+<div></div>
+-------
+Rendered Header:
+<div class=""></div>
+-------
+CSS:
+		.divineName { font-variant: small-caps; }
+		.wordsOfJesus { color: red; }
+		.transChange { font-style: italic; }
+		.transChange.transChange-supplied { font-style: italic; }
+		.transChange.transChange-added { font-style: italic; }
+		.transChange.transChange-tenseChange::before { content: '*'; }
+		.transChange.transChange-tenseChange { font-style: normal; }
+		.transChange:lang(zh) { font-style: normal; text-decoration: dotted underline; }
+		.overline { text-decoration: overline; }
+		.indent1 { margin-left: 1em; }
+		.indent2 { margin-left: 2em; }
+		.indent3 { margin-left: 3em; }
+		.indent4 { margin-left: 4em; }
+		abbr { &:hover{ &:before{ content: attr(title); } } }
+		.small-caps { font-variant: small-caps; }
+		.selah { text-align: right; width: 50%; margin: 0; padding: 0; }
+		.acrostic { text-align: center; }
+		.colophon {font-style: italic; font-size: small; display: block; }
+		.rdg { font-style: italic; }
+		.catchWord {font-style: bold; }
+		.x-p-indent {text-indent: 1em; }
+	
+-------
+RenderText:
+<span class="line indent0">‘<a class=" crossReference" href="passagestudy.jsp?action=showNote&type=x&value=1&module=OSISReference&passage=Matthew+2%3A6"><small><sup class="x">*x</sup></small></a><span class="small-caps">And you, Bethlehem, land of Judah</span>, </span><br />
+<span class="line indent0"><span class="small-caps">Are by no means least among the leaders of Judah</span>; </span><br />
+<span class="line indent0"><span class="small-caps">For out of you shall come forth a Ruler</span> </span><br />
+<span class="line indent0"><span class="small-caps">Who will</span> <a class=" crossReference" href="passagestudy.jsp?action=showNote&type=x&value=2&module=OSISReference&passage=Matthew+2%3A6"><small><sup class="x">*x</sup></small></a><span class="small-caps">shepherd My people Israel</span>.’” <br />
+  
+-------
+-------
+
+Key:
+Mark 1:14
+-------
+Preverse Header 0:
+Raw:
+<div sID="gen25" type="section"/> <title>The Beginning of the Ministry of Jesus</title> <title type="parallel">(<reference osisRef="Matt.4.12-Matt.4.22">Matt 4:12–22</reference>; <reference osisRef="Luke.4.14">Luke 4:14</reference>, <reference osisRef="Luke.4.15">15</reference>; <reference osisRef="Luke.5.1-Luke.5.11">5:1-11</reference>) </title> <div sID="gen26" type="x-p"/> 
+-------
+Rendered Header:
+ <h3 class="title">The Beginning of the Ministry of Jesus</h3>
+
+<h3 class="title parallel">(<a class="" href="passagestudy.jsp?action=showRef&type=scripRef&value=Matt.4.12-Matt.4.22&module=">Matt 4:12–22</a>; <a class="" href="passagestudy.jsp?action=showRef&type=scripRef&value=Luke.4.14&module=">Luke 4:14</a>, <a class="" href="passagestudy.jsp?action=showRef&type=scripRef&value=Luke.4.15&module=">15</a>; <a class="" href="passagestudy.jsp?action=showRef&type=scripRef&value=Luke.5.1-Luke.5.11&module=">5:1-11</a>) </h3>
+
+<br />
+
+-------
+CSS:
+		.divineName { font-variant: small-caps; }
+		.wordsOfJesus { color: red; }
+		.transChange { font-style: italic; }
+		.transChange.transChange-supplied { font-style: italic; }
+		.transChange.transChange-added { font-style: italic; }
+		.transChange.transChange-tenseChange::before { content: '*'; }
+		.transChange.transChange-tenseChange { font-style: normal; }
+		.transChange:lang(zh) { font-style: normal; text-decoration: dotted underline; }
+		.overline { text-decoration: overline; }
+		.indent1 { margin-left: 1em; }
+		.indent2 { margin-left: 2em; }
+		.indent3 { margin-left: 3em; }
+		.indent4 { margin-left: 4em; }
+		abbr { &:hover{ &:before{ content: attr(title); } } }
+		.small-caps { font-variant: small-caps; }
+		.selah { text-align: right; width: 50%; margin: 0; padding: 0; }
+		.acrostic { text-align: center; }
+		.colophon {font-style: italic; font-size: small; display: block; }
+		.rdg { font-style: italic; }
+		.catchWord {font-style: bold; }
+		.x-p-indent {text-indent: 1em; }
+	
+-------
+RenderText:
+ Now after that John was put in prison, Jesus came into Galilee, preaching the gospel of the kingdom of God, 
+-------
+-------
+
+
+Whitespace tests around headings:
+
+
+ <h1 class="bookHeader">Old Testament</h1>
+
+ <h1 class="bookHeader main">THE FIRST BOOK OF MOSES CALLED GENESIS</h1>
+
+ <h1 class="bookHeader">Introduction and Outline</h1>
+
+<br />
+This is the <b>Book of Genesis</b>, the <i>first</i> book in the Bible. It may be outlined as follows: <br />
+<br />
+<ul>
+ 	<li><sup>1</sup>Creation of Heaven and Earth, 1:1-2:4a</li>
+	<li><sup>2</sup>Creation of Man and Woman, 2:4b-25</li>
+	<li><sub>3</sub>Fall, 3:1-24</li>
+	<li>...</li>
+</ul>
+ <br />
+Tables work like this: <table><tbody>
+ 	<tr> <td><b>Column 1 Label</b></td> <td><b>Column 2 Label</b></td> </tr>
+ 	<tr> <td>Column 1, Row 1</td> <td>Column 2, Row 1</td> </tr>
+ 	<tr> <td>Column 1, Row 2</td> <td>Column 2, Row 2</td> </tr>
+ </tbody></table>
+<br />
+
+  <h2 class="chapterHeader">From Creation to Abraham (1:1–11:9)</h2>
+
+
+ <h3 class="title">Creation of the Heavens and the Earth</h3>
+
+<br />
+
+[ Genesis 1:1 ]  In the beginning God created the heaven and the earth.  <br />
+
+<br />
+
+[ Genesis 1:2 ] Text of verse 2.
+
+-- Plain output
+Acts 2:19: ‘* And I will grant wonders in the sky above *
+* And signs on the earth below *,
+* Blood, and fire, and vapor of smoke *.
+
+Acts 2:20: ‘* The sun will be turned into darkness *
+* And the moon into blood *,
+* Before the great and glorious day of the Lord shall come *.
+ 
+
+-- RTF output
+{\rtf1\ansi{\fonttbl{\f0\froman\fcharset0\fprq2 Times New Roman;}{\f1\fdecor\fprq2 Gentium;}{\f7\froman\fcharset2\fprq2 Symbol;}}Acts 2:19: {\f1 ‘{\i1 {And I will grant} {wonders} {in the sky} {above}}{\par} {\i1 {And signs} {on the earth} {below}},{\par} {\i1 {Blood}, {and fire}, {and vapor} {of smoke}}.{\par}}\par 
+Acts 2:20: {\f1 ‘{\i1 {The sun} {will be turned} {into darkness}}{\par} {\i1 {And the moon} {into blood}},{\par} {\i1 {Before} {the great} {and glorious} {day} {of the Lord} {shall come}}.{\par} }\par 
+}
diff --git a/tests/testsuite/osis_osis2modcipher.sh b/tests/testsuite/osis_osis2modcipher.sh
new file mode 100755
index 0000000..06e6310
--- /dev/null
+++ b/tests/testsuite/osis_osis2modcipher.sh
@@ -0,0 +1,37 @@
+#!/bin/sh
+
+rm -rf tmp/osis_osis2modcipher/
+mkdir -p tmp/osis_osis2modcipher/mods.d
+mkdir -p tmp/osis_osis2modcipher/modules
+
+cat > tmp/osis_osis2modcipher/mods.d/osisreference.conf <<!
+[OSISReference]
+DataPath=./modules/
+ModDrv=zText
+CipherKey=abc123
+Encoding=UTF-8
+BlockType=BOOK
+CompressType=ZIP
+SourceType=OSIS
+Lang=en
+GlobalOptionFilter=OSISLemma
+GlobalOptionFilter=OSISStrongs
+GlobalOptionFilter=OSISMorph
+GlobalOptionFilter=OSISFootnotes
+GlobalOptionFilter=OSISHeadings
+GlobalOptionFilter=OSISRedLetterWords
+Feature=StrongsNumbers
+!
+
+../../utilities/osis2mod tmp/osis_osis2modcipher/modules/ osisReference.xml -z -c abc123 2>&1 | grep -v \$Rev|grep -v "with phrase"
+
+cp osis_basic.good osis_osis2modcipher.good
+cd tmp/osis_osis2modcipher
+../../../osistest OSISReference
+
+echo
+echo "-- Plain output"
+../../../../utilities/diatheke/diatheke -b OSISReference -f plain -k Acts 2:19-20 | grep -v OSISReference
+echo
+echo "-- RTF output"
+../../../../utilities/diatheke/diatheke -b OSISReference -f RTF -k Acts 2:19-20 | grep -v OSISReference
diff --git a/tests/testsuite/runall.sh b/tests/testsuite/runall.sh
index 75eb221..fb5232b 100755
--- a/tests/testsuite/runall.sh
+++ b/tests/testsuite/runall.sh
@@ -3,7 +3,7 @@
 #
 # Runs entire test suite
 #
-# $Id: runall.sh 2327 2009-04-22 11:42:33Z scribe $
+# $Id: runall.sh 3063 2014-03-04 13:04:11Z chrislit $
 #
 # Copyright 1998-2009 CrossWire Bible Society (http://www.crosswire.org)
 #	CrossWire Bible Society
diff --git a/tests/testsuite/runtest.sh b/tests/testsuite/runtest.sh
index 4ef57f7..305524b 100755
--- a/tests/testsuite/runtest.sh
+++ b/tests/testsuite/runtest.sh
@@ -3,7 +3,7 @@
 #
 # Runs a single test
 #
-# $Id: runtest.sh 2364 2009-04-29 08:10:02Z scribe $
+# $Id: runtest.sh 3063 2014-03-04 13:04:11Z chrislit $
 #
 # Copyright 1998-2009 CrossWire Bible Society (http://www.crosswire.org)
 #	CrossWire Bible Society
diff --git a/tests/testsuite/utf8basic.good b/tests/testsuite/utf8basic.good
new file mode 100644
index 0000000..4ab4ca6
--- /dev/null
+++ b/tests/testsuite/utf8basic.good
@@ -0,0 +1,300 @@
+UTF-8 decoder capability and stress test
+----------------------------------------
+
+Markus Kuhn <http://www.cl.cam.ac.uk/~mgk25/> - 2015-08-28 - CC BY 4.0
+
+This test file can help you examine, how your UTF-8 decoder handles
+various types of correct, malformed, or otherwise interesting UTF-8
+sequences. This file is not meant to be a conformance test. It does
+not prescribe any particular outcome. Therefore, there is no way to
+"pass" or "fail" this test file, even though the text does suggest a
+preferable decoder behaviour at some places. Its aim is, instead, to
+help you think about, and test, the behaviour of your UTF-8 decoder on a
+systematic collection of unusual inputs. Experience so far suggests
+that most first-time authors of UTF-8 decoders find at least one
+serious problem in their decoder using this file.
+
+The test lines below cover boundary conditions, malformed UTF-8
+sequences, as well as correctly encoded UTF-8 sequences of Unicode code
+points that should never occur in a correct UTF-8 file.
+
+According to ISO 10646-1:2000, sections D.7 and 2.3c, a device
+receiving UTF-8 shall interpret a "malformed sequence in the same way
+that it interprets a character that is outside the adopted subset" and
+"characters that are not within the adopted subset shall be indicated
+to the user" by a receiving device. One commonly used approach in
+UTF-8 decoders is to replace any malformed UTF-8 sequence by a
+replacement character (U+FFFD), which looks a bit like an inverted
+question mark, or a similar symbol. It might be a good idea to
+visually distinguish a malformed UTF-8 sequence from a correctly
+encoded Unicode character that is just not available in the current
+font but otherwise fully legal, even though ISO 10646-1 doesn't
+mandate this. In any case, just ignoring malformed sequences or
+unavailable characters does not conform to ISO 10646, will make
+debugging more difficult, and can lead to user confusion.
+
+Please check, whether a malformed UTF-8 sequence is (1) represented at
+all, (2) represented by exactly one single replacement character (or
+equivalent signal), and (3) the following quotation mark after an
+illegal UTF-8 sequence is correctly displayed, i.e. proper
+resynchronization takes place immediately after any malformed
+sequence. This file says "THE END" in the last line, so if you don't
+see that, your decoder crashed somehow before, which should always be
+cause for concern.
+
+All lines in this file are exactly 79 characters long (plus the line
+feed). In addition, all lines end with "|", except for the two test
+lines 2.1.1 and 2.2.1, which contain non-printable ASCII controls
+U+0000 and U+007F. If you display this file with a fixed-width font,
+these "|" characters should all line up in column 79 (right margin).
+This allows you to test quickly, whether your UTF-8 decoder finds the
+correct number of characters in every line, that is whether each
+malformed sequences is replaced by a single replacement character.
+
+Note that, as an alternative to the notion of malformed sequence used
+here, it is also a perfectly acceptable (and in some situations even
+preferable) solution to represent each individual byte of a malformed
+sequence with a replacement character. If you follow this strategy in
+your decoder, then please ignore the "|" column.
+
+
+Here come the tests:                                                          |
+                                                                              |
+1  Some correct UTF-8 text                                                    |
+                                                                              |
+You should see the Greek word 'kosme':       "κόσμε"                          |
+                                                                              |
+2  Boundary condition test cases                                              |
+                                                                              |
+2.1  First possible sequence of a certain length                              |
+                                                                              |
+2.1.1  1 byte  (U-00000000):        "^@" // SWORD: removed. we don't support null mid-string, <- that's a literal <caret at>
+2.1.2  2 bytes (U-00000080):        ""                                       |
+2.1.3  3 bytes (U-00000800):        "ࠀ"                                       |
+2.1.4  4 bytes (U-00010000):        "𐀀"                                       |
+2.1.5  5 bytes (U-00200000):        "�"                                       |
+2.1.6  6 bytes (U-04000000):        "�"                                       |
+                                                                              |
+2.2  Last possible sequence of a certain length                               |
+                                                                              |
+2.2.1  1 byte  (U-0000007F):        ""                                        
+2.2.2  2 bytes (U-000007FF):        "߿"                                       |
+2.2.3  3 bytes (U-0000FFFF):        ""                                       |
+2.2.4  4 bytes (U-001FFFFF):        "�"                                       |
+2.2.5  5 bytes (U-03FFFFFF):        "�"                                       |
+2.2.6  6 bytes (U-7FFFFFFF):        "�"                                       |
+                                                                              |
+2.3  Other boundary conditions                                                |
+                                                                              |
+2.3.1  U-0000D7FF = ed 9f bf = "퟿"                                            |
+2.3.2  U-0000E000 = ee 80 80 = ""                                            |
+2.3.3  U-0000FFFD = ef bf bd = "�"                                            |
+2.3.4  U-0010FFFF = f4 8f bf bf = "􏿿"                                         |
+2.3.5  U-00110000 = f4 90 80 80 = "�"                                         |
+                                                                              |
+3  Malformed sequences                                                        |
+                                                                              |
+3.1  Unexpected continuation bytes                                            |
+                                                                              |
+Each unexpected continuation byte should be separately signalled as a         |
+malformed sequence of its own.                                                |
+                                                                              |
+3.1.1  First continuation byte 0x80: "�"                                      |
+3.1.2  Last  continuation byte 0xbf: "�"                                      |
+                                                                              |
+3.1.3  2 continuation bytes: "��"                                             |
+3.1.4  3 continuation bytes: "���"                                            |
+3.1.5  4 continuation bytes: "����"                                           |
+3.1.6  5 continuation bytes: "�����"                                          |
+3.1.7  6 continuation bytes: "������"                                         |
+3.1.8  7 continuation bytes: "�������"                                        |
+                                                                              |
+3.1.9  Sequence of all 64 possible continuation bytes (0x80-0xbf):            |
+                                                                              |
+   "����������������                                                          |
+    ����������������                                                          |
+    ����������������                                                          |
+    ����������������"                                                         |
+                                                                              |
+3.2  Lonely start characters                                                  |
+                                                                              |
+3.2.1  All 32 first bytes of 2-byte sequences (0xc0-0xdf),                    |
+       each followed by a space character:                                    |
+                                                                              |
+   "� � � � � � � � � � � � � � � �                                           |
+    � � � � � � � � � � � � � � � � "                                         |
+                                                                              |
+3.2.2  All 16 first bytes of 3-byte sequences (0xe0-0xef),                    |
+       each followed by a space character:                                    |
+                                                                              |
+   "� � � � � � � � � � � � � � � � "                                         |
+                                                                              |
+3.2.3  All 8 first bytes of 4-byte sequences (0xf0-0xf7),                     |
+       each followed by a space character:                                    |
+                                                                              |
+   "� � � � � � � � "                                                         |
+                                                                              |
+3.2.4  All 4 first bytes of 5-byte sequences (0xf8-0xfb),                     |
+       each followed by a space character:                                    |
+                                                                              |
+   "� � � � "                                                                 |
+                                                                              |
+3.2.5  All 2 first bytes of 6-byte sequences (0xfc-0xfd),                     |
+       each followed by a space character:                                    |
+                                                                              |
+   "� � "                                                                     |
+                                                                              |
+3.3  Sequences with last continuation byte missing                            |
+                                                                              |
+All bytes of an incomplete sequence should be signalled as a single           |
+malformed sequence, i.e., you should see only a single replacement            |
+character in each of the next 10 tests. (Characters as in section 2)          |
+                                                                              |
+3.3.1  2-byte sequence with last byte missing (U+0000):     "�"               |
+3.3.2  3-byte sequence with last byte missing (U+0000):     "�"               |
+3.3.3  4-byte sequence with last byte missing (U+0000):     "�"               |
+3.3.4  5-byte sequence with last byte missing (U+0000):     "�"               |
+3.3.5  6-byte sequence with last byte missing (U+0000):     "�"               |
+3.3.6  2-byte sequence with last byte missing (U-000007FF): "�"               |
+3.3.7  3-byte sequence with last byte missing (U-0000FFFF): "�"               |
+3.3.8  4-byte sequence with last byte missing (U-001FFFFF): "�"               |
+3.3.9  5-byte sequence with last byte missing (U-03FFFFFF): "�"               |
+3.3.10 6-byte sequence with last byte missing (U-7FFFFFFF): "�"               |
+                                                                              |
+3.4  Concatenation of incomplete sequences                                    |
+                                                                              |
+All the 10 sequences of 3.3 concatenated, you should see 10 malformed         |
+sequences being signalled:                                                    |
+                                                                              |
+   "����������"                                                               |
+                                                                              |
+3.5  Impossible bytes                                                         |
+                                                                              |
+The following two bytes cannot appear in a correct UTF-8 string               |
+                                                                              |
+3.5.1  fe = "�"                                                               |
+3.5.2  ff = "�"                                                               |
+3.5.3  fe fe ff ff = "����"                                                   |
+                                                                              |
+4  Overlong sequences                                                         |
+                                                                              |
+The following sequences are not malformed according to the letter of          |
+the Unicode 2.0 standard. However, they are longer then necessary and         |
+a correct UTF-8 encoder is not allowed to produce them. A "safe UTF-8         |
+decoder" should reject them just like malformed sequences for two             |
+reasons: (1) It helps to debug applications if overlong sequences are         |
+not treated as valid representations of characters, because this helps        |
+to spot problems more quickly. (2) Overlong sequences provide                 |
+alternative representations of characters, that could maliciously be          |
+used to bypass filters that check only for ASCII characters. For              |
+instance, a 2-byte encoded line feed (LF) would not be caught by a            |
+line counter that counts only 0x0a bytes, but it would still be               |
+processed as a line feed by an unsafe UTF-8 decoder later in the              |
+pipeline. From a security point of view, ASCII compatibility of UTF-8         |
+sequences means also, that ASCII characters are *only* allowed to be          |
+represented by ASCII bytes in the range 0x00-0x7f. To ensure this             |
+aspect of ASCII compatibility, use only "safe UTF-8 decoders" that            |
+reject overlong UTF-8 sequences for which a shorter encoding exists.          |
+                                                                              |
+4.1  Examples of an overlong ASCII character                                  |
+                                                                              |
+With a safe UTF-8 decoder, all of the following five overlong                 |
+representations of the ASCII character slash ("/") should be rejected         |
+like a malformed UTF-8 sequence, for instance by substituting it with         |
+a replacement character. If you see a slash below, you do not have a          |
+safe UTF-8 decoder!                                                           |
+                                                                              |
+4.1.1 U+002F = c0 af             = "�"                                        |
+4.1.2 U+002F = e0 80 af          = "�"                                        |
+4.1.3 U+002F = f0 80 80 af       = "�"                                        |
+4.1.4 U+002F = f8 80 80 80 af    = "�"                                        |
+4.1.5 U+002F = fc 80 80 80 80 af = "�"                                        |
+                                                                              |
+4.2  Maximum overlong sequences                                               |
+                                                                              |
+Below you see the highest Unicode value that is still resulting in an         |
+overlong sequence if represented with the given number of bytes. This         |
+is a boundary test for safe UTF-8 decoders. All five characters should        |
+be rejected like malformed UTF-8 sequences.                                   |
+                                                                              |
+4.2.1  U-0000007F = c1 bf             = "�"                                   |
+4.2.2  U-000007FF = e0 9f bf          = "�"                                   |
+4.2.3  U-0000FFFF = f0 8f bf bf       = "�"                                   |
+4.2.4  U-001FFFFF = f8 87 bf bf bf    = "�"                                   |
+4.2.5  U-03FFFFFF = fc 83 bf bf bf bf = "�"                                   |
+                                                                              |
+4.3  Overlong representation of the NUL character                             |
+                                                                              |
+The following five sequences should also be rejected like malformed           |
+UTF-8 sequences and should not be treated like the ASCII NUL                  |
+character.                                                                    |
+                                                                              |
+4.3.1  U+0000 = c0 80             = "�"                                       |
+4.3.2  U+0000 = e0 80 80          = "�"                                       |
+4.3.3  U+0000 = f0 80 80 80       = "�"                                       |
+4.3.4  U+0000 = f8 80 80 80 80    = "�"                                       |
+4.3.5  U+0000 = fc 80 80 80 80 80 = "�"                                       |
+                                                                              |
+5  Illegal code positions                                                     |
+                                                                              |
+The following UTF-8 sequences should be rejected like malformed               |
+sequences, because they never represent valid ISO 10646 characters and        |
+a UTF-8 decoder that accepts them might introduce security problems           |
+comparable to overlong UTF-8 sequences.                                       |
+                                                                              |
+5.1 Single UTF-16 surrogates                                                  |
+                                                                              |
+5.1.1  U+D800 = ed a0 80 = "���"                                                |
+5.1.2  U+DB7F = ed ad bf = "���"                                                |
+5.1.3  U+DB80 = ed ae 80 = "���"                                                |
+5.1.4  U+DBFF = ed af bf = "���"                                                |
+5.1.5  U+DC00 = ed b0 80 = "���"                                                |
+5.1.6  U+DF80 = ed be 80 = "���"                                                |
+5.1.7  U+DFFF = ed bf bf = "���"                                                |
+                                                                              |
+5.2 Paired UTF-16 surrogates                                                  |
+                                                                              |
+5.2.1  U+D800 U+DC00 = ed a0 80 ed b0 80 = "������"                               |
+5.2.2  U+D800 U+DFFF = ed a0 80 ed bf bf = "������"                               |
+5.2.3  U+DB7F U+DC00 = ed ad bf ed b0 80 = "������"                               |
+5.2.4  U+DB7F U+DFFF = ed ad bf ed bf bf = "������"                               |
+5.2.5  U+DB80 U+DC00 = ed ae 80 ed b0 80 = "������"                               |
+5.2.6  U+DB80 U+DFFF = ed ae 80 ed bf bf = "������"                               |
+5.2.7  U+DBFF U+DC00 = ed af bf ed b0 80 = "������"                               |
+5.2.8  U+DBFF U+DFFF = ed af bf ed bf bf = "������"                               |
+                                                                              |
+5.3 Noncharacter code positions                                               |
+                                                                              |
+The following "noncharacters" are "reserved for internal use" by              |
+applications, and according to older versions of the Unicode Standard         |
+"should never be interchanged". Unicode Corrigendum #9 dropped the            |
+latter restriction. Nevertheless, their presence in incoming UTF-8 data       |
+can remain a potential security risk, depending on what use is made of        |
+these codes subsequently. Examples of such internal use:                      |
+                                                                              |
+ - Some file APIs with 16-bit characters may use the integer value -1         |
+   = U+FFFF to signal an end-of-file (EOF) or error condition.                |
+                                                                              |
+ - In some UTF-16 receivers, code point U+FFFE might trigger a                |
+   byte-swap operation (to convert between UTF-16LE and UTF-16BE).            |
+                                                                              |
+With such internal use of noncharacters, it may be desirable and safer        |
+to block those code points in UTF-8 decoders, as they should never            |
+occur legitimately in incoming UTF-8 data, and could trigger unsafe           |
+behaviour in subsequent processing.                                           |
+                                                                              |
+Particularly problematic noncharacters in 16-bit applications:                |
+                                                                              |
+5.3.1  U+FFFE = ef bf be = ""                                                |
+5.3.2  U+FFFF = ef bf bf = ""                                                |
+                                                                              |
+Other noncharacters:                                                          |
+                                                                              |
+5.3.3  U+FDD0 .. U+FDEF = "﷐﷑﷒﷓﷔﷕﷖﷗﷘﷙﷚﷛﷜﷝﷞﷟﷠﷡﷢﷣﷤﷥﷦﷧﷨﷩﷪﷫﷬﷭﷮﷯"|
+                                                                              |
+5.3.4  U+nFFFE U+nFFFF (for n = 1..10)                                        |
+                                                                              |
+       "🿾🿿𯿾𯿿𿿾𿿿񏿾񏿿񟿾񟿿񯿾񯿿񿿾񿿿򏿾򏿿                                    |
+        򟿾򟿿򯿾򯿿򿿾򿿿󏿾󏿿󟿾󟿿󯿾󯿿󿿾󿿿􏿾􏿿"                                   |
+                                                                              |
+THE END                                                                       |
diff --git a/tests/testsuite/utf8basic.sh b/tests/testsuite/utf8basic.sh
new file mode 100755
index 0000000..625f1ff
--- /dev/null
+++ b/tests/testsuite/utf8basic.sh
@@ -0,0 +1,10 @@
+#/bin/sh
+
+# utf8basic.good originally generated with:
+# uconv --from-code UTF-8 --to-code UTF-8 --from-callback substitute UTF-8-test.txt > utf8basic.good
+# but modified to ignore UTF-16 surrogates which are apparently illegal.  We return multiple replacement
+# characters there, but the spec apparently says we are only supposed to return 1 per UTF-16 surrogate
+# there are comments in the spec about "security vulnerability" but we always check if we're at the
+# end of our buffer before continuing processing each byte (shouldn't all decoders do this?), so there
+# shouldn't be a problem.  Ignoring the UTF-16 non-conformance for now.
+../utf8norm < UTF-8-test.txt
diff --git a/tests/testsuite/versekeytest.good b/tests/testsuite/versekeytest.good
index 5540c6a..62f61fb 100644
--- a/tests/testsuite/versekeytest.good
+++ b/tests/testsuite/versekeytest.good
@@ -64,3 +64,13 @@ Mark.1.1-- = Matthew 28:20
 Matthew.1.1-- = Malachi 4:6
 ++ = Matthew 1:1
 .setBook(.getBook() - 1) = Malachi 1:1
+
+Chapter math
+
+Matthew.1.1 - 1 chapter
+.setChapter(.getChapter() - 1) = Malachi 4:1
+
+Verse math
+
+Matthew.1.1 - 1 verse
+.setVerse(.getVerse() - 1) = Malachi 4:6
diff --git a/tests/testsuite/versekeytest.sh b/tests/testsuite/versekeytest.sh
index c2238c1..3bdda63 100755
--- a/tests/testsuite/versekeytest.sh
+++ b/tests/testsuite/versekeytest.sh
@@ -1,7 +1,7 @@
 #!/bin/sh
 #******************************************************************************
 #
-# $Id: swmgr.h 2321 2009-04-13 01:17:00Z scribe $
+# $Id: versekeytest.sh 3063 2014-03-04 13:04:11Z chrislit $
 #
 # Copyright 1998-2009 CrossWire Bible Society (http://www.crosswire.org)
 #	CrossWire Bible Society
diff --git a/tests/testsuite/versemgrtest.sh b/tests/testsuite/versemgrtest.sh
index 91b1af8..fd2c2af 100755
--- a/tests/testsuite/versemgrtest.sh
+++ b/tests/testsuite/versemgrtest.sh
@@ -1,7 +1,7 @@
 #!/bin/sh
 #******************************************************************************
 #
-# $Id: swmgr.h 2321 2009-04-13 01:17:00Z scribe $
+# $Id: versemgrtest.sh 3063 2014-03-04 13:04:11Z chrislit $
 #
 # Copyright 1998-2009 CrossWire Bible Society (http://www.crosswire.org)
 #	CrossWire Bible Society
diff --git a/tests/testsuite/verseparsing-utf8.sh b/tests/testsuite/verseparsing-utf8.sh
index 1d05007..4be346a 100755
--- a/tests/testsuite/verseparsing-utf8.sh
+++ b/tests/testsuite/verseparsing-utf8.sh
@@ -3,7 +3,7 @@
 #
 # This only works if --with-icu was passed to configure 
 #
-# $Id: swmgr.h 2321 2009-04-13 01:17:00Z scribe $
+# $Id: verseparsing-utf8.sh 3063 2014-03-04 13:04:11Z chrislit $
 #
 # Copyright 1998-2009 CrossWire Bible Society (http://www.crosswire.org)
 #	CrossWire Bible Society
diff --git a/tests/testsuite/verseparsing.sh b/tests/testsuite/verseparsing.sh
index aa6ae25..6e3dd42 100755
--- a/tests/testsuite/verseparsing.sh
+++ b/tests/testsuite/verseparsing.sh
@@ -1,7 +1,7 @@
 #!/bin/sh
 #******************************************************************************
 #
-# $Id: verseparsing.sh 2796 2013-04-11 16:18:45Z scribe $
+# $Id: verseparsing.sh 3063 2014-03-04 13:04:11Z chrislit $
 #
 # Copyright 1998-2009 CrossWire Bible Society (http://www.crosswire.org)
 #	CrossWire Bible Society
diff --git a/tests/testsuite/vs2osisref.good b/tests/testsuite/vs2osisref.good
new file mode 100644
index 0000000..eea5961
--- /dev/null
+++ b/tests/testsuite/vs2osisref.good
@@ -0,0 +1,2 @@
+Matt 3:1-12: {Mark 1:3-8; Luke 3:2-17;} John 1:6-8, 19-28
+<reference osisRef="Matt.3.1-Matt.3.12">Matt 3:1-12</reference>: {<reference osisRef="Mark.1.3-Mark.1.8">Mark 1:3-8</reference>; <reference osisRef="Luke.3.2-Luke.3.17">Luke 3:2-17</reference>;} <reference osisRef="John.1.6-John.1.8">John 1:6-8</reference>, <reference osisRef="John.1.19-John.1.28">19-28</reference>
diff --git a/tests/testsuite/vs2osisref.sh b/tests/testsuite/vs2osisref.sh
new file mode 100755
index 0000000..90c978b
--- /dev/null
+++ b/tests/testsuite/vs2osisref.sh
@@ -0,0 +1,22 @@
+#!/bin/sh
+#******************************************************************************
+#
+# $Id$
+#
+# Copyright 1998-2009 CrossWire Bible Society (http://www.crosswire.org)
+#	CrossWire Bible Society
+#	P. O. Box 2528
+#	Tempe, AZ  85280-2528
+#
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the
+# Free Software Foundation version 2.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# General Public License for more details.
+#
+
+echo "Matt 3:1-12: {Mark 1:3-8; Luke 3:2-17;} John 1:6-8, 19-28"
+../../utilities/vs2osisref "Matt 3:1-12: {Mark 1:3-8; Luke 3:2-17;} John 1:6-8, 19-28"
diff --git a/tests/testsuite/xmltag.sh b/tests/testsuite/xmltag.sh
index f223b68..4686519 100755
--- a/tests/testsuite/xmltag.sh
+++ b/tests/testsuite/xmltag.sh
@@ -1,7 +1,7 @@
 #!/bin/sh
 #******************************************************************************
 #
-# $Id: swmgr.h 2321 2009-04-13 01:17:00Z scribe $
+# $Id: xmltag.sh 3063 2014-03-04 13:04:11Z chrislit $
 #
 # Copyright 1998-2009 CrossWire Bible Society (http://www.crosswire.org)
 #	CrossWire Bible Society
diff --git a/tests/utf8norm.cpp b/tests/utf8norm.cpp
index a992a2e..63a9545 100644
--- a/tests/utf8norm.cpp
+++ b/tests/utf8norm.cpp
@@ -2,7 +2,7 @@
  *
  *  utf8norm.cpp -	
  *
- * $Id: utf8norm.cpp 2833 2013-06-29 06:40:28Z chrislit $
+ * $Id: utf8norm.cpp 3515 2017-11-01 11:38:09Z scribe $
  *
  * Copyright 2009-2013 CrossWire Bible Society (http://www.crosswire.org)
  *	CrossWire Bible Society
@@ -23,16 +23,62 @@
 #include <iostream>
 #include <utilstr.h>
 #include <swbuf.h>
+#if !defined(__GNUC__) && !defined(_WIN32_WCE)
+#include <io.h>
+#include <direct.h>
+#else
+#include <unistd.h>
+#endif
+#include <utf8greekaccents.h>
 
 using namespace sword;
 using namespace std;
 
 int main(int argc, char **argv) {
-	const char *buf = (argc > 1) ? argv[1] : "Description=German Unrevidierte Luther Übersetzung von 1545";
+	const char *buf = (argc > 1 && argv[1][0] != '-') ? argv[1] : 0; // "Description=German Unrevidierte Luther Übersetzung von 1545";
 
-	SWBuf fixed = assureValidUTF8(buf);
+	if (buf) {
+		SWBuf fixed = assureValidUTF8(buf);
 
-	cout << "input / processed:\n" << buf << "\n" << fixed << endl;
+		cout << "input / processed:\n" << buf << "\n" << fixed << endl;
+	}
+	else {
+		SWOptionFilter *filter = 0;
+		if (argc > 1 && !strcmp(argv[1], "-ga")) filter = new UTF8GreekAccents();
+		if (filter && filter->isBoolean()) filter->setOptionValue("Off");
+		int repeat = 1;
+		if (argc > 2) repeat = atoi(argv[2]);
+		SWBuf contents = "";
+		char chunk[255];
+		int count = 254;
+		while (count > 0) {
+			count = read(STDIN_FILENO, chunk, 254);
+			if (count > 0) {
+				chunk[count] = 0;
+				contents.append(chunk);
+			}
+		}
+		SWBuf filteredContents = contents;
+		if (filter) {
+			for (int i = 0; i < repeat; ++i) {
+				filteredContents = contents;
+				filter->processText(filteredContents);
+			}
+		}
+		const unsigned char *c = (const unsigned char *)filteredContents.getRawData();
+		// UTF-32 BOM
+		__u32 ch = 0xfeff;
+//		write(STDOUT_FILENO, &ch, 4);
+		while (c && *c) {
+			ch = getUniCharFromUTF8(&c);
+//			ch = __swswap32(ch);
+			if (!ch) ch = 0xFFFD;
+			SWBuf c8;
+		        getUTF8FromUniChar(ch, &c8);
+			write(STDOUT_FILENO, c8.getRawData(), c8.length());
+		}
+		delete filter;
+	}
 
 	return 0;
 }
diff --git a/tests/versekeytest.cpp b/tests/versekeytest.cpp
index 4ae064e..d8bea6a 100644
--- a/tests/versekeytest.cpp
+++ b/tests/versekeytest.cpp
@@ -2,7 +2,7 @@
  *
  *  versekeytest.cpp -	
  *
- * $Id: versekeytest.cpp 2833 2013-06-29 06:40:28Z chrislit $
+ * $Id: versekeytest.cpp 3305 2014-12-15 02:02:48Z charcoal $
  *
  * Copyright 2007-2013 CrossWire Bible Society (http://www.crosswire.org)
  *	CrossWire Bible Society
@@ -277,5 +277,19 @@ cout << currentVerse << endl;
 	vkey.setBook(vkey.getBook() - 1);
 	cout << ".setBook(.getBook() - 1) = " << vkey << "\n";
 
+	cout << "\nChapter math\n\n";
+
+	cout << "Matthew.1.1 - 1 chapter\n";
+	vkey = "Matthew.1.1";
+	vkey.setChapter(vkey.getChapter() - 1);
+	cout << ".setChapter(.getChapter() - 1) = " << vkey << "\n";
+
+	cout << "\nVerse math\n\n";
+
+	cout << "Matthew.1.1 - 1 verse\n";
+	vkey = "Matthew.1.1";
+	vkey.setVerse(vkey.getVerse() - 1);
+	cout << ".setVerse(.getVerse() - 1) = " << vkey << "\n";
+
 	return 0;
 }
author	Teus Benschop <teusjannette@gmail.com>	2018-10-28 11:51:26 +0100
committer	Teus Benschop <teusjannette@gmail.com>	2018-10-28 11:51:26 +0100
commit	1d0ff54794b5edea7cdf1d2d66710a0fa885bcc5 (patch)
tree	8ece5f9ef437fbb151f2b22ed0c6e1a714879c7c /tests
parent	c7dbdc9161a7c460526b80fe01af49d714856126 (diff)