summaryrefslogtreecommitdiff
path: root/testsuite
diff options
context:
space:
mode:
Diffstat (limited to 'testsuite')
-rw-r--r--testsuite/8bit.good9
-rw-r--r--testsuite/8bit.inp9
-rwxr-xr-xtestsuite/8bit.sh55
-rwxr-xr-xtestsuite/8to7.sh72
-rw-r--r--testsuite/BOOST.tests828
-rw-r--r--testsuite/Coreutils.pm620
-rw-r--r--testsuite/CuSkip.pm39
-rw-r--r--testsuite/CuTmpdir.pm114
-rw-r--r--testsuite/PCRE.tests2386
-rw-r--r--testsuite/SPENCER.tests542
-rwxr-xr-xtestsuite/badenc.sh43
-rw-r--r--testsuite/binary.sed189
-rwxr-xr-xtestsuite/binary.sh61
-rw-r--r--testsuite/binary2.sed226
-rw-r--r--testsuite/binary3.sed204
-rw-r--r--testsuite/bsd-wrapper.sh31
-rw-r--r--testsuite/bsd.good1755
-rwxr-xr-xtestsuite/bsd.sh435
-rwxr-xr-xtestsuite/bug32082.sh81
-rwxr-xr-xtestsuite/bug32271-1.sh45
-rwxr-xr-xtestsuite/bug32271-2.sh75
-rw-r--r--testsuite/cmd-R.sh47
-rwxr-xr-xtestsuite/cmd-l.sh75
-rwxr-xr-xtestsuite/colon-with-no-label.sh31
-rw-r--r--testsuite/command-endings.sh137
-rw-r--r--testsuite/comment-n.sh95
-rw-r--r--testsuite/compile-errors.sh290
-rw-r--r--testsuite/compile-tests.sh135
-rwxr-xr-xtestsuite/convert-number.sh178
-rw-r--r--testsuite/dc.sed322
-rwxr-xr-xtestsuite/dc.sh62
-rw-r--r--testsuite/debug.pl200
-rw-r--r--testsuite/distrib.inp28
-rw-r--r--testsuite/distrib.sh126
-rw-r--r--testsuite/envvar-check64
-rwxr-xr-xtestsuite/eval.sh141
-rw-r--r--testsuite/execute-tests.sh142
-rwxr-xr-xtestsuite/follow-symlinks-stdin.sh28
-rw-r--r--testsuite/follow-symlinks.sh72
-rw-r--r--testsuite/get-mb-cur-max.c35
-rwxr-xr-xtestsuite/help-version.sh276
-rw-r--r--testsuite/help.sh38
-rwxr-xr-xtestsuite/in-place-hyphen.sh29
-rw-r--r--testsuite/in-place-suffix-backup.sh64
-rw-r--r--testsuite/init.sh618
-rw-r--r--testsuite/inplace-hold.sh32
-rwxr-xr-xtestsuite/inplace-selinux.sh51
-rwxr-xr-xtestsuite/invalid-mb-seq-UMR.sh42
-rw-r--r--testsuite/local.mk212
-rw-r--r--testsuite/mac-mf.good200
-rw-r--r--testsuite/mac-mf.inp200
-rw-r--r--testsuite/mac-mf.sed155
-rwxr-xr-xtestsuite/mac-mf.sh30
-rw-r--r--testsuite/madding.good1
-rw-r--r--testsuite/madding.inp1
-rw-r--r--testsuite/madding.sed8
-rwxr-xr-xtestsuite/madding.sh30
-rwxr-xr-xtestsuite/mb-bad-delim.sh83
-rwxr-xr-xtestsuite/mb-charclass-non-utf8.sh130
-rwxr-xr-xtestsuite/mb-match-slash.sh46
-rwxr-xr-xtestsuite/mb-y-translate.sh134
-rw-r--r--testsuite/misc.pl1206
-rwxr-xr-xtestsuite/missing-filename.sh36
-rwxr-xr-xtestsuite/newjis.sh48
-rwxr-xr-xtestsuite/newline-dfa-bug.sh38
-rw-r--r--testsuite/normalize-text.sh81
-rwxr-xr-xtestsuite/nulldata.sh87
-rwxr-xr-xtestsuite/obinary.sh111
-rw-r--r--testsuite/panic-tests.sh101
-rw-r--r--testsuite/posix-char-class.sh65
-rw-r--r--testsuite/posix-mode-ERE.sh52
-rwxr-xr-xtestsuite/posix-mode-N.sh56
-rw-r--r--testsuite/posix-mode-addr.sh89
-rwxr-xr-xtestsuite/posix-mode-bad-ref.sh32
-rw-r--r--testsuite/posix-mode-s.sh71
-rw-r--r--testsuite/ptestcases.h326
-rwxr-xr-xtestsuite/range-overlap.sh34
-rw-r--r--testsuite/recursive-escape-c.sh71
-rw-r--r--testsuite/regex-errors.sh43
-rwxr-xr-xtestsuite/regex-max-int.sh48
-rwxr-xr-xtestsuite/sandbox.sh90
-rw-r--r--testsuite/stdin-prog.sh37
-rwxr-xr-xtestsuite/stdin.sh37
-rwxr-xr-xtestsuite/subst-mb-incomplete.sh33
-rw-r--r--testsuite/subst-options.sh123
-rw-r--r--testsuite/subst-replacement.sh86
-rwxr-xr-xtestsuite/temp-file-cleanup.sh37
-rw-r--r--testsuite/test-mbrtowc.c170
-rw-r--r--testsuite/testcases.h167
-rwxr-xr-xtestsuite/title-case.sh34
-rw-r--r--testsuite/unbuffered.sh37
-rw-r--r--testsuite/uniq.good874
-rw-r--r--testsuite/uniq.inp2058
-rw-r--r--testsuite/uniq.sed20
-rw-r--r--testsuite/uniq.sh30
-rw-r--r--testsuite/utf8-ru.sh123
-rwxr-xr-xtestsuite/word-delim.sh19
-rw-r--r--testsuite/xemacs.good66
-rw-r--r--testsuite/xemacs.inp66
-rwxr-xr-xtestsuite/xemacs.sh49
100 files changed, 19158 insertions, 0 deletions
diff --git a/testsuite/8bit.good b/testsuite/8bit.good
new file mode 100644
index 0000000..1bd5178
--- /dev/null
+++ b/testsuite/8bit.good
@@ -0,0 +1,9 @@
+籠もよ み籠持ち
+掘串もよ み掘串持ち
+この丘に 菜摘ます児
+家間かな 告らさね
+そらみつ 日本の国は
+おしゃなべて われこそ居れ
+しきなべて われこそ 座せ
+わにこそは 告らめ
+家をも名をも
diff --git a/testsuite/8bit.inp b/testsuite/8bit.inp
new file mode 100644
index 0000000..8c9c4bb
--- /dev/null
+++ b/testsuite/8bit.inp
@@ -0,0 +1,9 @@
+籠もよ み籠持ち
+掘串もよ み掘串持ち
+この丘に 菜摘ます児
+家間かな 告らさね
+そらみつ 大和の国は
+おしゃなべて われこそ居れ
+しきなべて われこそ 座せ
+わにこそは 告らめ
+家をも名をも
diff --git a/testsuite/8bit.sh b/testsuite/8bit.sh
new file mode 100755
index 0000000..35a594c
--- /dev/null
+++ b/testsuite/8bit.sh
@@ -0,0 +1,55 @@
+#!/bin/sh
+
+# Adapted from sed's old "8bit" test
+
+# Copyright (C) 2017-2018 Free Software Foundation, Inc.
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+. "${srcdir=.}/testsuite/init.sh"; path_prepend_ ./sed
+print_ver_ sed
+
+# Original comment from '8bit.sed':
+
+# The first poem from the Man'yoshu. I like Hitomaro's poems better
+# but I couldn't find a copy of any of them in Japanese. This version
+# of this poem is from $BNc2r8E8l<-E5(B($BBh;0HG(B)$B;0>JF2(B.
+#
+# Speaking of Hitomaro, here is the english translation of one of my
+# favorites. I just know that everyone reading these test cases wants
+# to see this.
+#
+# In the autumn mountains
+# The yellow leaves are so thick.
+# Alas, how shall I seek my love
+# Who has wandered away?
+#
+# I see the messenger come
+# As the yellow leaves are falling.
+# Oh, well I remember
+# How on such a day we used to meet--
+# My lover and I!
+# -- Kakinomoto Hitomaro
+
+# The program is:
+# s/大和/日本/
+printf "s/\302\347\317\302/\306\374\313\334/\n" > 8bit-prog.sed \
+ || framework_failure_
+
+
+sed -f 8bit-prog.sed < "$abs_top_srcdir/testsuite/8bit.inp" > out || fail=1
+remove_cr_inplace out
+compare "$abs_top_srcdir/testsuite/8bit.good" out || fail=1
+
+
+Exit $fail
diff --git a/testsuite/8to7.sh b/testsuite/8to7.sh
new file mode 100755
index 0000000..8b0e014
--- /dev/null
+++ b/testsuite/8to7.sh
@@ -0,0 +1,72 @@
+#!/bin/sh
+
+# Runner for old '8to7' test
+
+# Copyright (C) 2017-2018 Free Software Foundation, Inc.
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+. "${srcdir=.}/testsuite/init.sh"; path_prepend_ ./sed
+print_ver_ sed
+
+# Generate the input file, containing non-ascii 8-bit octets.
+# printf with octal escape sequences is the most portable way
+# to produce these.
+{
+ printf '\344\306\244\342\244\350 \244\337\344\306\273\375\244\301\n' ;
+ printf '\267\241\266\372\244\342\244\350 ' ;
+ printf '\244\337\267\241\266\372\273\375\244\301\n' ;
+ printf '\244\263\244\316\265\326\244\313 ' ;
+ printf '\272\332\305\246\244\336\244\271\273\371\n' ;
+ printf '\262\310\264\326\244\253\244\312 \271\360\244\351\244\265\244\315\n' ;
+ printf '\244\275\244\351\244\337\244\304 ';
+ printf '\302\347\317\302\244\316\271\361\244\317\n' ;
+ printf '\244\252\244\267\244\343\244\312\244\331\244\306 ' ;
+ printf '\244\357\244\354\244\263\244\275\265\357\244\354\n' ;
+ printf '\244\267\244\255\244\312\244\331\244\306 ' ;
+ printf '\244\357\244\354\244\263\244\275 ' ;
+ printf '\272\302\244\273\n';
+ printf '\244\357\244\313\244\263\244\275\244\317 ' ;
+ printf '\271\360\244\351\244\341\n' ;
+ printf '\262\310\244\362\244\342\314\276\244\362\244\342\n';
+} > 8to7-inp || framework_failure_
+
+
+# The expected output.
+# NOTE:
+# shell-escaping is OFF with here-doc preceded by a backslash
+# i.e. the first 4 octets in the output will be the characters
+# '\', '3', '4', '4'.
+cat <<\EOF > 8to7-exp || framework_failure_
+\344\306\244\342\244\350 \244\337\344\306\273\375\244\301$
+\267\241\266\372\244\342\244\350 \244\337\267\241\266\372\273\375\244\
+\301$
+\244\263\244\316\265\326\244\313 \272\332\305\246\244\336\244\271\273\
+\371$
+\262\310\264\326\244\253\244\312 \271\360\244\351\244\265\244\315$
+\244\275\244\351\244\337\244\304 \302\347\317\302\244\316\271\361\244\
+\317$
+\244\252\244\267\244\343\244\312\244\331\244\306 \244\357\244\354\244\
+\263\244\275\265\357\244\354$
+\244\267\244\255\244\312\244\331\244\306 \244\357\244\354\244\263\244\
+\275 \272\302\244\273$
+\244\357\244\313\244\263\244\275\244\317 \271\360\244\351\244\341$
+\262\310\244\362\244\342\314\276\244\362\244\342$
+EOF
+
+sed -e 'l;d' 8to7-inp > 8to7-out || fail=1
+remove_cr_inplace 8to7-out
+compare 8to7-exp 8to7-out || fail=1
+
+
+Exit $fail
diff --git a/testsuite/BOOST.tests b/testsuite/BOOST.tests
new file mode 100644
index 0000000..20846d1
--- /dev/null
+++ b/testsuite/BOOST.tests
@@ -0,0 +1,828 @@
+;
+;
+; this file contains a script of tests to run through regress.exe
+;
+; comments start with a semicolon and proceed to the end of the line
+;
+; changes to regular expression compile flags start with a "-" as the first
+; non-whitespace character and consist of a list of the printable names
+; of the flags, for example "match_default"
+;
+; Other lines contain a test to perform using the current flag status
+; the first token contains the expression to compile, the second the string
+; to match it against. If the second string is "!" then the expression should
+; not compile, that is the first string is an invalid regular expression.
+; This is then followed by a list of integers that specify what should match,
+; each pair represents the starting and ending positions of a subexpression
+; starting with the zeroth subexpression (the whole match).
+; A value of -1 indicates that the subexpression should not take part in the
+; match at all, if the first value is -1 then no part of the expression should
+; match the string.
+;
+; Tests taken from BOOST testsuite and adapted to glibc regex.
+;
+; Boost Software License - Version 1.0 - August 17th, 2003
+;
+; Permission is hereby granted, free of charge, to any person or organization
+; obtaining a copy of the software and accompanying documentation covered by
+; this license (the "Software") to use, reproduce, display, distribute,
+; execute, and transmit the Software, and to prepare derivative works of the
+; Software, and to permit third-parties to whom the Software is furnished to
+; do so, all subject to the following:
+;
+; The copyright notices in the Software and this entire statement, including
+; the above license grant, this restriction and the following disclaimer,
+; must be included in all copies of the Software, in whole or in part, and
+; all derivative works of the Software, unless such copies or derivative
+; works are solely in the form of machine-executable object code generated by
+; a source language processor.
+;
+; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+; FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
+; SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
+; FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
+; ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+; DEALINGS IN THE SOFTWARE.
+;
+
+- match_default normal REG_EXTENDED
+
+;
+; try some really simple literals:
+a a 0 1
+Z Z 0 1
+Z aaa -1 -1
+Z xxxxZZxxx 4 5
+
+; and some simple brackets:
+(a) zzzaazz 3 4 3 4
+() zzz 0 0 0 0
+() "" 0 0 0 0
+( !
+) ) 0 1
+(aa !
+aa) baa)b 1 4
+a b -1 -1
+\(\) () 0 2
+\(a\) (a) 0 3
+\() () 0 2
+(\) !
+p(a)rameter ABCparameterXYZ 3 12 4 5
+[pq](a)rameter ABCparameterXYZ 3 12 4 5
+
+; now try escaped brackets:
+- match_default bk_parens REG_BASIC
+\(a\) zzzaazz 3 4 3 4
+\(\) zzz 0 0 0 0
+\(\) "" 0 0 0 0
+\( !
+\) !
+\(aa !
+aa\) !
+() () 0 2
+(a) (a) 0 3
+(\) !
+\() !
+
+; now move on to "." wildcards
+- match_default normal REG_EXTENDED REG_STARTEND
+. a 0 1
+. \n 0 1
+. \r 0 1
+. \0 0 1
+
+;
+; now move on to the repetition ops,
+; starting with operator *
+- match_default normal REG_EXTENDED
+a* b 0 0
+ab* a 0 1
+ab* ab 0 2
+ab* sssabbbbbbsss 3 10
+ab*c* a 0 1
+ab*c* abbb 0 4
+ab*c* accc 0 4
+ab*c* abbcc 0 5
+*a !
+\<* !
+\>* !
+\n* \n\n 0 2
+\** ** 0 2
+\* * 0 1
+
+; now try operator +
+ab+ a -1 -1
+ab+ ab 0 2
+ab+ sssabbbbbbsss 3 10
+ab+c+ a -1 -1
+ab+c+ abbb -1 -1
+ab+c+ accc -1 -1
+ab+c+ abbcc 0 5
++a !
+\<+ !
+\>+ !
+\n+ \n\n 0 2
+\+ + 0 1
+\+ ++ 0 1
+\++ ++ 0 2
+
+; now try operator ?
+- match_default normal REG_EXTENDED
+a? b 0 0
+ab? a 0 1
+ab? ab 0 2
+ab? sssabbbbbbsss 3 5
+ab?c? a 0 1
+ab?c? abbb 0 2
+ab?c? accc 0 2
+ab?c? abcc 0 3
+?a !
+\<? !
+\>? !
+\n? \n\n 0 1
+\? ? 0 1
+\? ?? 0 1
+\?? ?? 0 1
+
+; now try operator {}
+- match_default normal REG_EXTENDED
+a{2} a -1 -1
+a{2} aa 0 2
+a{2} aaa 0 2
+a{2,} a -1 -1
+a{2,} aa 0 2
+a{2,} aaaaa 0 5
+a{2,4} a -1 -1
+a{2,4} aa 0 2
+a{2,4} aaa 0 3
+a{2,4} aaaa 0 4
+a{2,4} aaaaa 0 4
+a{} !
+a{2 !
+a} a} 0 2
+\{\} {} 0 2
+
+- match_default normal REG_BASIC
+a\{2\} a -1 -1
+a\{2\} aa 0 2
+a\{2\} aaa 0 2
+a\{2,\} a -1 -1
+a\{2,\} aa 0 2
+a\{2,\} aaaaa 0 5
+a\{2,4\} a -1 -1
+a\{2,4\} aa 0 2
+a\{2,4\} aaa 0 3
+a\{2,4\} aaaa 0 4
+a\{2,4\} aaaaa 0 4
+{} {} 0 2
+
+; now test the alternation operator |
+- match_default normal REG_EXTENDED
+a|b a 0 1
+a|b b 0 1
+a(b|c) ab 0 2 1 2
+a(b|c) ac 0 2 1 2
+a(b|c) ad -1 -1 -1 -1
+a\| a| 0 2
+
+; now test the set operator []
+- match_default normal REG_EXTENDED
+; try some literals first
+[abc] a 0 1
+[abc] b 0 1
+[abc] c 0 1
+[abc] d -1 -1
+[^bcd] a 0 1
+[^bcd] b -1 -1
+[^bcd] d -1 -1
+[^bcd] e 0 1
+a[b]c abc 0 3
+a[ab]c abc 0 3
+a[^ab]c adc 0 3
+a[]b]c a]c 0 3
+a[[b]c a[c 0 3
+a[-b]c a-c 0 3
+a[^]b]c adc 0 3
+a[^-b]c adc 0 3
+a[b-]c a-c 0 3
+a[b !
+a[] !
+
+; then some ranges
+[b-e] a -1 -1
+[b-e] b 0 1
+[b-e] e 0 1
+[b-e] f -1 -1
+[^b-e] a 0 1
+[^b-e] b -1 -1
+[^b-e] e -1 -1
+[^b-e] f 0 1
+a[1-3]c a2c 0 3
+a[3-1]c !
+a[1-3-5]c !
+a[1- !
+
+; and some classes
+a[[:alpha:]]c abc 0 3
+a[[:unknown:]]c !
+a[[: !
+a[[:alpha !
+a[[:alpha:] !
+a[[:alpha,:] !
+a[[:]:]]b !
+a[[:-:]]b !
+a[[:alph:]] !
+a[[:alphabet:]] !
+[[:alnum:]]+ -%@a0X_- 3 6
+[[:alpha:]]+ -%@aX_0- 3 5
+[[:blank:]]+ "a \tb" 1 4
+[[:cntrl:]]+ a\n\tb 1 3
+[[:digit:]]+ a019b 1 4
+[[:graph:]]+ " a%b " 1 4
+[[:lower:]]+ AabC 1 3
+; This test fails with STLPort, disable for now as this is a corner case anyway...
+;[[:print:]]+ "\na b\n" 1 4
+[[:punct:]]+ " %-&\t" 1 4
+[[:space:]]+ "a \n\t\rb" 1 5
+[[:upper:]]+ aBCd 1 3
+[[:xdigit:]]+ p0f3Cx 1 5
+
+; now test flag settings:
+- escape_in_lists REG_NO_POSIX_TEST
+[\n] \n 0 1
+- REG_NO_POSIX_TEST
+
+; line anchors
+- match_default normal REG_EXTENDED
+^ab ab 0 2
+^ab xxabxx -1 -1
+ab$ ab 0 2
+ab$ abxx -1 -1
+- match_default match_not_bol match_not_eol normal REG_EXTENDED REG_NOTBOL REG_NOTEOL
+^ab ab -1 -1
+^ab xxabxx -1 -1
+ab$ ab -1 -1
+ab$ abxx -1 -1
+
+; back references
+- match_default normal REG_PERL
+a(b)\2c !
+a(b\1)c !
+a(b*)c\1d abbcbbd 0 7 1 3
+a(b*)c\1d abbcbd -1 -1
+a(b*)c\1d abbcbbbd -1 -1
+^(.)\1 abc -1 -1
+a([bc])\1d abcdabbd 4 8 5 6
+; strictly speaking this is at best ambiguous, at worst wrong, this is what most
+; re implimentations will match though.
+a(([bc])\2)*d abbccd 0 6 3 5 3 4
+
+a(([bc])\2)*d abbcbd -1 -1
+a((b)*\2)*d abbbd 0 5 1 4 2 3
+; perl only:
+(ab*)[ab]*\1 ababaaa 0 7 0 1
+(a)\1bcd aabcd 0 5 0 1
+(a)\1bc*d aabcd 0 5 0 1
+(a)\1bc*d aabd 0 4 0 1
+(a)\1bc*d aabcccd 0 7 0 1
+(a)\1bc*[ce]d aabcccd 0 7 0 1
+^(a)\1b(c)*cd$ aabcccd 0 7 0 1 4 5
+
+; posix only:
+- match_default extended REG_EXTENDED
+(ab*)[ab]*\1 ababaaa 0 7 0 1
+
+;
+; word operators:
+\w a 0 1
+\w z 0 1
+\w A 0 1
+\w Z 0 1
+\w _ 0 1
+\w } -1 -1
+\w ` -1 -1
+\w [ -1 -1
+\w @ -1 -1
+; non-word:
+\W a -1 -1
+\W z -1 -1
+\W A -1 -1
+\W Z -1 -1
+\W _ -1 -1
+\W } 0 1
+\W ` 0 1
+\W [ 0 1
+\W @ 0 1
+; word start:
+\<abcd " abcd" 2 6
+\<ab cab -1 -1
+\<ab "\nab" 1 3
+\<tag ::tag 2 5
+;word end:
+abc\> abc 0 3
+abc\> abcd -1 -1
+abc\> abc\n 0 3
+abc\> abc:: 0 3
+; word boundary:
+\babcd " abcd" 2 6
+\bab cab -1 -1
+\bab "\nab" 1 3
+\btag ::tag 2 5
+abc\b abc 0 3
+abc\b abcd -1 -1
+abc\b abc\n 0 3
+abc\b abc:: 0 3
+; within word:
+\B ab 1 1
+a\Bb ab 0 2
+a\B ab 0 1
+a\B a -1 -1
+a\B "a " -1 -1
+
+;
+; buffer operators:
+\`abc abc 0 3
+\`abc \nabc -1 -1
+\`abc " abc" -1 -1
+abc\' abc 0 3
+abc\' abc\n -1 -1
+abc\' "abc " -1 -1
+
+;
+; now follows various complex expressions designed to try and bust the matcher:
+a(((b)))c abc 0 3 1 2 1 2 1 2
+a(b|(c))d abd 0 3 1 2 -1 -1
+a(b|(c))d acd 0 3 1 2 1 2
+a(b*|c)d abbd 0 4 1 3
+; just gotta have one DFA-buster, of course
+a[ab]{20} aaaaabaaaabaaaabaaaab 0 21
+; and an inline expansion in case somebody gets tricky
+a[ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab] aaaaabaaaabaaaabaaaab 0 21
+; and in case somebody just slips in an NFA...
+a[ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab](wee|week)(knights|night) aaaaabaaaabaaaabaaaabweeknights 0 31 21 24 24 31
+; one really big one
+1234567890123456789012345678901234567890123456789012345678901234567890 a1234567890123456789012345678901234567890123456789012345678901234567890b 1 71
+; fish for problems as brackets go past 8
+[ab][cd][ef][gh][ij][kl][mn] xacegikmoq 1 8
+[ab][cd][ef][gh][ij][kl][mn][op] xacegikmoq 1 9
+[ab][cd][ef][gh][ij][kl][mn][op][qr] xacegikmoqy 1 10
+[ab][cd][ef][gh][ij][kl][mn][op][q] xacegikmoqy 1 10
+; and as parenthesis go past 9:
+(a)(b)(c)(d)(e)(f)(g)(h) zabcdefghi 1 9 1 2 2 3 3 4 4 5 5 6 6 7 7 8 8 9
+(a)(b)(c)(d)(e)(f)(g)(h)(i) zabcdefghij 1 10 1 2 2 3 3 4 4 5 5 6 6 7 7 8 8 9 9 10
+(a)(b)(c)(d)(e)(f)(g)(h)(i)(j) zabcdefghijk 1 11 1 2 2 3 3 4 4 5 5 6 6 7 7 8 8 9 9 10 10 11
+(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k) zabcdefghijkl 1 12 1 2 2 3 3 4 4 5 5 6 6 7 7 8 8 9 9 10 10 11 11 12
+(a)d|(b)c abc 1 3 -1 -1 1 2
+_+((www)|(ftp)|(mailto)):_* "_wwwnocolon _mailto:" 12 20 13 19 -1 -1 -1 -1 13 19
+
+; subtleties of matching
+;a(b)?c\1d acd 0 3 -1 -1
+; POSIX is about the following test:
+a(b)?c\1d acd -1 -1 -1 -1
+a(b?c)+d accd 0 4 2 3
+(wee|week)(knights|night) weeknights 0 10 0 3 3 10
+.* abc 0 3
+a(b|(c))d abd 0 3 1 2 -1 -1
+a(b|(c))d acd 0 3 1 2 1 2
+a(b*|c|e)d abbd 0 4 1 3
+a(b*|c|e)d acd 0 3 1 2
+a(b*|c|e)d ad 0 2 1 1
+a(b?)c abc 0 3 1 2
+a(b?)c ac 0 2 1 1
+a(b+)c abc 0 3 1 2
+a(b+)c abbbc 0 5 1 4
+a(b*)c ac 0 2 1 1
+(a|ab)(bc([de]+)f|cde) abcdef 0 6 0 1 1 6 3 5
+a([bc]?)c abc 0 3 1 2
+a([bc]?)c ac 0 2 1 1
+a([bc]+)c abc 0 3 1 2
+a([bc]+)c abcc 0 4 1 3
+a([bc]+)bc abcbc 0 5 1 3
+a(bb+|b)b abb 0 3 1 2
+a(bbb+|bb+|b)b abb 0 3 1 2
+a(bbb+|bb+|b)b abbb 0 4 1 3
+a(bbb+|bb+|b)bb abbb 0 4 1 2
+(.*).* abcdef 0 6 0 6
+(a*)* bc 0 0 0 0
+xyx*xz xyxxxxyxxxz 5 11
+
+; do we get the right subexpression when it is used more than once?
+a(b|c)*d ad 0 2 -1 -1
+a(b|c)*d abcd 0 4 2 3
+a(b|c)+d abd 0 3 1 2
+a(b|c)+d abcd 0 4 2 3
+a(b|c?)+d ad 0 2 1 1
+a(b|c){0,0}d ad 0 2 -1 -1
+a(b|c){0,1}d ad 0 2 -1 -1
+a(b|c){0,1}d abd 0 3 1 2
+a(b|c){0,2}d ad 0 2 -1 -1
+a(b|c){0,2}d abcd 0 4 2 3
+a(b|c){0,}d ad 0 2 -1 -1
+a(b|c){0,}d abcd 0 4 2 3
+a(b|c){1,1}d abd 0 3 1 2
+a(b|c){1,2}d abd 0 3 1 2
+a(b|c){1,2}d abcd 0 4 2 3
+a(b|c){1,}d abd 0 3 1 2
+a(b|c){1,}d abcd 0 4 2 3
+a(b|c){2,2}d acbd 0 4 2 3
+a(b|c){2,2}d abcd 0 4 2 3
+a(b|c){2,4}d abcd 0 4 2 3
+a(b|c){2,4}d abcbd 0 5 3 4
+a(b|c){2,4}d abcbcd 0 6 4 5
+a(b|c){2,}d abcd 0 4 2 3
+a(b|c){2,}d abcbd 0 5 3 4
+; perl only: these conflict with the POSIX test below
+;a(b|c?)+d abcd 0 4 3 3
+;a(b+|((c)*))+d abd 0 3 2 2 2 2 -1 -1
+;a(b+|((c)*))+d abcd 0 4 3 3 3 3 2 3
+
+; posix only:
+- match_default extended REG_EXTENDED REG_STARTEND
+
+a(b|c?)+d abcd 0 4 2 3
+a(b|((c)*))+d abcd 0 4 2 3 2 3 2 3
+a(b+|((c)*))+d abd 0 3 1 2 -1 -1 -1 -1
+a(b+|((c)*))+d abcd 0 4 2 3 2 3 2 3
+a(b|((c)*))+d ad 0 2 1 1 1 1 -1 -1
+a(b|((c)*))*d abcd 0 4 2 3 2 3 2 3
+a(b+|((c)*))*d abd 0 3 1 2 -1 -1 -1 -1
+a(b+|((c)*))*d abcd 0 4 2 3 2 3 2 3
+a(b|((c)*))*d ad 0 2 1 1 1 1 -1 -1
+
+- match_default normal REG_PERL
+; try to match C++ syntax elements:
+; line comment:
+//[^\n]* "++i //here is a line comment\n" 4 28
+; block comment:
+/\*([^*]|\*+[^*/])*\*+/ "/* here is a block comment */" 0 29 26 27
+/\*([^*]|\*+[^*/])*\*+/ "/**/" 0 4 -1 -1
+/\*([^*]|\*+[^*/])*\*+/ "/***/" 0 5 -1 -1
+/\*([^*]|\*+[^*/])*\*+/ "/****/" 0 6 -1 -1
+/\*([^*]|\*+[^*/])*\*+/ "/*****/" 0 7 -1 -1
+/\*([^*]|\*+[^*/])*\*+/ "/*****/*/" 0 7 -1 -1
+; preprossor directives:
+^[[:blank:]]*#([^\n]*\\[[:space:]]+)*[^\n]* "#define some_symbol" 0 19 -1 -1
+^[[:blank:]]*#([^\n]*\\[[:space:]]+)*[^\n]* "#define some_symbol(x) #x" 0 25 -1 -1
+; perl only:
+^[[:blank:]]*#([^\n]*\\[[:space:]]+)*[^\n]* "#define some_symbol(x) \\ \r\n foo();\\\r\n printf(#x);" 0 53 30 42
+; literals:
+((0x[[:xdigit:]]+)|([[:digit:]]+))u?((int(8|16|32|64))|L)? 0xFF 0 4 0 4 0 4 -1 -1 -1 -1 -1 -1 -1 -1
+((0x[[:xdigit:]]+)|([[:digit:]]+))u?((int(8|16|32|64))|L)? 35 0 2 0 2 -1 -1 0 2 -1 -1 -1 -1 -1 -1
+((0x[[:xdigit:]]+)|([[:digit:]]+))u?((int(8|16|32|64))|L)? 0xFFu 0 5 0 4 0 4 -1 -1 -1 -1 -1 -1 -1 -1
+((0x[[:xdigit:]]+)|([[:digit:]]+))u?((int(8|16|32|64))|L)? 0xFFL 0 5 0 4 0 4 -1 -1 4 5 -1 -1 -1 -1
+((0x[[:xdigit:]]+)|([[:digit:]]+))u?((int(8|16|32|64))|L)? 0xFFFFFFFFFFFFFFFFuint64 0 24 0 18 0 18 -1 -1 19 24 19 24 22 24
+; strings:
+'([^\\']|\\.)*' '\\x3A' 0 6 4 5
+'([^\\']|\\.)*' '\\'' 0 4 1 3
+'([^\\']|\\.)*' '\\n' 0 4 1 3
+
+; finally try some case insensitive matches:
+- match_default normal REG_EXTENDED REG_ICASE
+; upper and lower have no meaning here so they fail, however these
+; may compile with other libraries...
+;[[:lower:]] !
+;[[:upper:]] !
+0123456789@abcdefghijklmnopqrstuvwxyz\[\\\]\^_`ABCDEFGHIJKLMNOPQRSTUVWXYZ\{\|\} 0123456789@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]\^_`abcdefghijklmnopqrstuvwxyz\{\|\} 0 72
+
+; known and suspected bugs:
+- match_default normal REG_EXTENDED
+\( ( 0 1
+\) ) 0 1
+\$ $ 0 1
+\^ ^ 0 1
+\. . 0 1
+\* * 0 1
+\+ + 0 1
+\? ? 0 1
+\[ [ 0 1
+\] ] 0 1
+\| | 0 1
+\\ \\ 0 1
+# # 0 1
+\# # 0 1
+a- a- 0 2
+\- - 0 1
+\{ { 0 1
+\} } 0 1
+0 0 0 1
+1 1 0 1
+9 9 0 1
+b b 0 1
+B B 0 1
+< < 0 1
+> > 0 1
+w w 0 1
+W W 0 1
+` ` 0 1
+' ' 0 1
+\n \n 0 1
+, , 0 1
+a a 0 1
+f f 0 1
+n n 0 1
+r r 0 1
+t t 0 1
+v v 0 1
+c c 0 1
+x x 0 1
+: : 0 1
+(\.[[:alnum:]]+){2} "w.a.b " 1 5 3 5
+
+- match_default normal REG_EXTENDED REG_ICASE
+a A 0 1
+A a 0 1
+[abc]+ abcABC 0 6
+[ABC]+ abcABC 0 6
+[a-z]+ abcABC 0 6
+[A-Z]+ abzANZ 0 6
+[a-Z]+ abzABZ 0 6
+[A-z]+ abzABZ 0 6
+[[:lower:]]+ abyzABYZ 0 8
+[[:upper:]]+ abzABZ 0 6
+[[:alpha:]]+ abyzABYZ 0 8
+[[:alnum:]]+ 09abyzABYZ 0 10
+
+; word start:
+\<abcd " abcd" 2 6
+\<ab cab -1 -1
+\<ab "\nab" 1 3
+\<tag ::tag 2 5
+;word end:
+abc\> abc 0 3
+abc\> abcd -1 -1
+abc\> abc\n 0 3
+abc\> abc:: 0 3
+
+; collating elements and rewritten set code:
+- match_default normal REG_EXTENDED REG_STARTEND
+;[[.zero.]] 0 0 1
+;[[.one.]] 1 0 1
+;[[.two.]] 2 0 1
+;[[.three.]] 3 0 1
+[[.a.]] baa 1 2
+;[[.right-curly-bracket.]] } 0 1
+;[[.NUL.]] \0 0 1
+[[:<:]z] !
+[a[:>:]] !
+[[=a=]] a 0 1
+;[[=right-curly-bracket=]] } 0 1
+- match_default normal REG_EXTENDED REG_STARTEND REG_ICASE
+[[.A.]] A 0 1
+[[.A.]] a 0 1
+[[.A.]-b]+ AaBb 0 4
+[A-[.b.]]+ AaBb 0 4
+[[.a.]-B]+ AaBb 0 4
+[a-[.B.]]+ AaBb 0 4
+- match_default normal REG_EXTENDED REG_STARTEND
+[[.a.]-c]+ abcd 0 3
+[a-[.c.]]+ abcd 0 3
+[[:alpha:]-a] !
+[a-[:alpha:]] !
+
+; try mutli-character ligatures:
+;[[.ae.]] ae 0 2
+;[[.ae.]] aE -1 -1
+;[[.AE.]] AE 0 2
+;[[.Ae.]] Ae 0 2
+;[[.ae.]-b] a -1 -1
+;[[.ae.]-b] b 0 1
+;[[.ae.]-b] ae 0 2
+;[a-[.ae.]] a 0 1
+;[a-[.ae.]] b -1 -1
+;[a-[.ae.]] ae 0 2
+- match_default normal REG_EXTENDED REG_STARTEND REG_ICASE
+;[[.ae.]] AE 0 2
+;[[.ae.]] Ae 0 2
+;[[.AE.]] Ae 0 2
+;[[.Ae.]] aE 0 2
+;[[.AE.]-B] a -1 -1
+;[[.Ae.]-b] b 0 1
+;[[.Ae.]-b] B 0 1
+;[[.ae.]-b] AE 0 2
+
+- match_default normal REG_EXTENDED REG_STARTEND REG_NO_POSIX_TEST
+\s+ "ab ab" 2 5
+\S+ " abc " 2 5
+
+- match_default normal REG_EXTENDED REG_STARTEND
+\`abc abc 0 3
+\`abc aabc -1 -1
+abc\' abc 0 3
+abc\' abcd -1 -1
+abc\' abc\n\n -1 -1
+abc\' abc 0 3
+
+; extended repeat checking to exercise new algorithms:
+ab.*xy abxy_ 0 4
+ab.*xy ab_xy_ 0 5
+ab.*xy abxy 0 4
+ab.*xy ab_xy 0 5
+ab.* ab 0 2
+ab.* ab__ 0 4
+
+ab.{2,5}xy ab__xy_ 0 6
+ab.{2,5}xy ab____xy_ 0 8
+ab.{2,5}xy ab_____xy_ 0 9
+ab.{2,5}xy ab__xy 0 6
+ab.{2,5}xy ab_____xy 0 9
+ab.{2,5} ab__ 0 4
+ab.{2,5} ab_______ 0 7
+ab.{2,5}xy ab______xy -1 -1
+ab.{2,5}xy ab_xy -1 -1
+
+ab.*?xy abxy_ 0 4
+ab.*?xy ab_xy_ 0 5
+ab.*?xy abxy 0 4
+ab.*?xy ab_xy 0 5
+ab.*? ab 0 2
+ab.*? ab__ 0 4
+
+ab.{2,5}?xy ab__xy_ 0 6
+ab.{2,5}?xy ab____xy_ 0 8
+ab.{2,5}?xy ab_____xy_ 0 9
+ab.{2,5}?xy ab__xy 0 6
+ab.{2,5}?xy ab_____xy 0 9
+ab.{2,5}? ab__ 0 4
+ab.{2,5}? ab_______ 0 7
+ab.{2,5}?xy ab______xy -1 -1
+ab.{2,5}xy ab_xy -1 -1
+
+; again but with slower algorithm variant:
+- match_default REG_EXTENDED
+; now again for single character repeats:
+
+ab_*xy abxy_ 0 4
+ab_*xy ab_xy_ 0 5
+ab_*xy abxy 0 4
+ab_*xy ab_xy 0 5
+ab_* ab 0 2
+ab_* ab__ 0 4
+
+ab_{2,5}xy ab__xy_ 0 6
+ab_{2,5}xy ab____xy_ 0 8
+ab_{2,5}xy ab_____xy_ 0 9
+ab_{2,5}xy ab__xy 0 6
+ab_{2,5}xy ab_____xy 0 9
+ab_{2,5} ab__ 0 4
+ab_{2,5} ab_______ 0 7
+ab_{2,5}xy ab______xy -1 -1
+ab_{2,5}xy ab_xy -1 -1
+
+ab_*?xy abxy_ 0 4
+ab_*?xy ab_xy_ 0 5
+ab_*?xy abxy 0 4
+ab_*?xy ab_xy 0 5
+ab_*? ab 0 2
+ab_*? ab__ 0 4
+
+ab_{2,5}?xy ab__xy_ 0 6
+ab_{2,5}?xy ab____xy_ 0 8
+ab_{2,5}?xy ab_____xy_ 0 9
+ab_{2,5}?xy ab__xy 0 6
+ab_{2,5}?xy ab_____xy 0 9
+ab_{2,5}? ab__ 0 4
+ab_{2,5}? ab_______ 0 7
+ab_{2,5}?xy ab______xy -1 -1
+ab_{2,5}xy ab_xy -1 -1
+
+; and again for sets:
+ab[_,;]*xy abxy_ 0 4
+ab[_,;]*xy ab_xy_ 0 5
+ab[_,;]*xy abxy 0 4
+ab[_,;]*xy ab_xy 0 5
+ab[_,;]* ab 0 2
+ab[_,;]* ab__ 0 4
+
+ab[_,;]{2,5}xy ab__xy_ 0 6
+ab[_,;]{2,5}xy ab____xy_ 0 8
+ab[_,;]{2,5}xy ab_____xy_ 0 9
+ab[_,;]{2,5}xy ab__xy 0 6
+ab[_,;]{2,5}xy ab_____xy 0 9
+ab[_,;]{2,5} ab__ 0 4
+ab[_,;]{2,5} ab_______ 0 7
+ab[_,;]{2,5}xy ab______xy -1 -1
+ab[_,;]{2,5}xy ab_xy -1 -1
+
+ab[_,;]*?xy abxy_ 0 4
+ab[_,;]*?xy ab_xy_ 0 5
+ab[_,;]*?xy abxy 0 4
+ab[_,;]*?xy ab_xy 0 5
+ab[_,;]*? ab 0 2
+ab[_,;]*? ab__ 0 4
+
+ab[_,;]{2,5}?xy ab__xy_ 0 6
+ab[_,;]{2,5}?xy ab____xy_ 0 8
+ab[_,;]{2,5}?xy ab_____xy_ 0 9
+ab[_,;]{2,5}?xy ab__xy 0 6
+ab[_,;]{2,5}?xy ab_____xy 0 9
+ab[_,;]{2,5}? ab__ 0 4
+ab[_,;]{2,5}? ab_______ 0 7
+ab[_,;]{2,5}?xy ab______xy -1 -1
+ab[_,;]{2,5}xy ab_xy -1 -1
+
+; and again for tricky sets with digraphs:
+;ab[_[.ae.]]*xy abxy_ 0 4
+;ab[_[.ae.]]*xy ab_xy_ 0 5
+;ab[_[.ae.]]*xy abxy 0 4
+;ab[_[.ae.]]*xy ab_xy 0 5
+;ab[_[.ae.]]* ab 0 2
+;ab[_[.ae.]]* ab__ 0 4
+
+;ab[_[.ae.]]{2,5}xy ab__xy_ 0 6
+;ab[_[.ae.]]{2,5}xy ab____xy_ 0 8
+;ab[_[.ae.]]{2,5}xy ab_____xy_ 0 9
+;ab[_[.ae.]]{2,5}xy ab__xy 0 6
+;ab[_[.ae.]]{2,5}xy ab_____xy 0 9
+;ab[_[.ae.]]{2,5} ab__ 0 4
+;ab[_[.ae.]]{2,5} ab_______ 0 7
+;ab[_[.ae.]]{2,5}xy ab______xy -1 -1
+;ab[_[.ae.]]{2,5}xy ab_xy -1 -1
+
+;ab[_[.ae.]]*?xy abxy_ 0 4
+;ab[_[.ae.]]*?xy ab_xy_ 0 5
+;ab[_[.ae.]]*?xy abxy 0 4
+;ab[_[.ae.]]*?xy ab_xy 0 5
+;ab[_[.ae.]]*? ab 0 2
+;ab[_[.ae.]]*? ab__ 0 2
+
+;ab[_[.ae.]]{2,5}?xy ab__xy_ 0 6
+;ab[_[.ae.]]{2,5}?xy ab____xy_ 0 8
+;ab[_[.ae.]]{2,5}?xy ab_____xy_ 0 9
+;ab[_[.ae.]]{2,5}?xy ab__xy 0 6
+;ab[_[.ae.]]{2,5}?xy ab_____xy 0 9
+;ab[_[.ae.]]{2,5}? ab__ 0 4
+;ab[_[.ae.]]{2,5}? ab_______ 0 4
+;ab[_[.ae.]]{2,5}?xy ab______xy -1 -1
+;ab[_[.ae.]]{2,5}xy ab_xy -1 -1
+
+; new bugs detected in spring 2003:
+- normal match_continuous REG_NO_POSIX_TEST
+b abc 1 2
+
+() abc 0 0 0 0
+^() abc 0 0 0 0
+^()+ abc 0 0 0 0
+^(){1} abc 0 0 0 0
+^(){2} abc 0 0 0 0
+^((){2}) abc 0 0 0 0 0 0
+() "" 0 0 0 0
+()\1 "" 0 0 0 0
+()\1 a 0 0 0 0
+a()\1b ab 0 2 1 1
+a()b\1 ab 0 2 1 1
+
+; subtleties of matching with no sub-expressions marked
+- normal match_nosubs REG_NO_POSIX_TEST
+a(b?c)+d accd 0 4
+(wee|week)(knights|night) weeknights 0 10
+.* abc 0 3
+a(b|(c))d abd 0 3
+a(b|(c))d acd 0 3
+a(b*|c|e)d abbd 0 4
+a(b*|c|e)d acd 0 3
+a(b*|c|e)d ad 0 2
+a(b?)c abc 0 3
+a(b?)c ac 0 2
+a(b+)c abc 0 3
+a(b+)c abbbc 0 5
+a(b*)c ac 0 2
+(a|ab)(bc([de]+)f|cde) abcdef 0 6
+a([bc]?)c abc 0 3
+a([bc]?)c ac 0 2
+a([bc]+)c abc 0 3
+a([bc]+)c abcc 0 4
+a([bc]+)bc abcbc 0 5
+a(bb+|b)b abb 0 3
+a(bbb+|bb+|b)b abb 0 3
+a(bbb+|bb+|b)b abbb 0 4
+a(bbb+|bb+|b)bb abbb 0 4
+(.*).* abcdef 0 6
+(a*)* bc 0 0
+
+- normal nosubs REG_NO_POSIX_TEST
+a(b?c)+d accd 0 4
+(wee|week)(knights|night) weeknights 0 10
+.* abc 0 3
+a(b|(c))d abd 0 3
+a(b|(c))d acd 0 3
+a(b*|c|e)d abbd 0 4
+a(b*|c|e)d acd 0 3
+a(b*|c|e)d ad 0 2
+a(b?)c abc 0 3
+a(b?)c ac 0 2
+a(b+)c abc 0 3
+a(b+)c abbbc 0 5
+a(b*)c ac 0 2
+(a|ab)(bc([de]+)f|cde) abcdef 0 6
+a([bc]?)c abc 0 3
+a([bc]?)c ac 0 2
+a([bc]+)c abc 0 3
+a([bc]+)c abcc 0 4
+a([bc]+)bc abcbc 0 5
+a(bb+|b)b abb 0 3
+a(bbb+|bb+|b)b abb 0 3
+a(bbb+|bb+|b)b abbb 0 4
+a(bbb+|bb+|b)bb abbb 0 4
+(.*).* abcdef 0 6
+(a*)* bc 0 0
diff --git a/testsuite/Coreutils.pm b/testsuite/Coreutils.pm
new file mode 100644
index 0000000..4e7a00a
--- /dev/null
+++ b/testsuite/Coreutils.pm
@@ -0,0 +1,620 @@
+package Coreutils;
+# This is a testing framework.
+
+# Copyright (C) 1998-2018 Free Software Foundation, Inc.
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+
+use strict;
+use vars qw($VERSION @ISA @EXPORT);
+
+use FileHandle;
+use File::Compare qw(compare);
+
+@ISA = qw(Exporter);
+($VERSION = '$Revision: 1.5 $ ') =~ tr/[0-9].//cd;
+@EXPORT = qw (run_tests triple_test getlimits);
+
+my $debug = $ENV{DEBUG};
+
+my @Types = qw (IN IN_PIPE OUT ERR AUX CMP EXIT PRE POST OUT_SUBST
+ ERR_SUBST ENV ENV_DEL);
+my %Types = map {$_ => 1} @Types;
+my %Zero_one_type = map {$_ => 1}
+ qw (OUT ERR EXIT PRE POST OUT_SUBST ERR_SUBST ENV);
+my $srcdir = "$ENV{srcdir}";
+my $Global_count = 1;
+
+# When running in a DJGPP environment, make $ENV{SHELL} point to bash.
+# Otherwise, a bad shell might be used (e.g. command.com) and many
+# tests would fail.
+defined $ENV{DJDIR}
+ and $ENV{SHELL} = "$ENV{DJDIR}/bin/bash.exe";
+
+# A file spec: a scalar or a reference to a single-keyed hash
+# ================
+# 'contents' contents only (file name is derived from test name)
+# {filename => 'contents'} filename and contents
+# {filename => undef} filename only -- $(srcdir)/tests/filename must exist
+#
+# FIXME: If there is more than one input file, then you can't specify 'REDIR'.
+# PIPE is still ok.
+#
+# I/O spec: a hash ref with the following properties
+# ================
+# - one key/value pair
+# - the key must be one of these strings: IN, OUT, ERR, AUX, CMP, EXIT
+# - the value must be a file spec
+# {OUT => 'data'} put data in a temp file and compare it to stdout from cmd
+# {OUT => {'filename'=>undef}} compare contents of existing filename to
+# stdout from cmd
+# {OUT => {'filename'=>[$CTOR, $DTOR]}} $CTOR and $DTOR are references to
+# functions, each which is passed the single argument 'filename'.
+# $CTOR must create 'filename'.
+# DTOR may be omitted in which case 'sub{unlink @_[0]}' is used.
+# FIXME: implement this
+# {ERR => ...}
+# Same as for OUT, but compare with stderr, not stdout.
+# {OUT_SUBST => 's/variable_output/expected_output/'}
+# Transform actual standard output before comparing it against expected.
+# This is useful e.g. for programs like du that produce output that
+# varies a lot from system. E.g., an empty file may consume zero file
+# blocks, or more, depending on the OS and on the file system type.
+# {ERR_SUBST => 's/variable_output/expected_output/'}
+# Transform actual stderr output before comparing it against expected.
+# This is useful when verifying that we get a meaningful diagnostic.
+# For example, in rm/fail-2eperm, we have to account for three different
+# diagnostics: Operation not permitted, Not owner, and Permission denied.
+# {EXIT => N} expect exit status of cmd to be N
+# {ENV => 'VAR=val ...'}
+# Prepend 'VAR=val ...' to the command that we execute via 'system'.
+# {ENV_DEL => 'VAR'}
+# Remove VAR from the environment just before running the corresponding
+# command, and restore any value just afterwards.
+#
+# There may be many input file specs. File names from the input specs
+# are concatenated in order on the command line.
+# There may be at most one of the OUT-, ERR-, and EXIT-keyed specs.
+# If the OUT-(or ERR)-keyed hash ref is omitted, then expect no output
+# on stdout (or stderr).
+# If the EXIT-keyed one is omitted, then expect the exit status to be zero.
+
+# FIXME: Make sure that no junkfile is also listed as a
+# non-junkfile (i.e., with undef for contents)
+
+sub _shell_quote ($)
+{
+ my ($string) = @_;
+ $string =~ s/\'/\'\\\'\'/g;
+ return "'$string'";
+}
+
+sub _create_file ($$$$)
+{
+ my ($program_name, $test_name, $file_name, $data) = @_;
+ my $file;
+ if (defined $file_name)
+ {
+ $file = $file_name;
+ }
+ else
+ {
+ $file = "$test_name.$Global_count";
+ ++$Global_count;
+ }
+
+ warn "creating file '$file' with contents '$data'\n" if $debug;
+
+ # The test spec gave a string.
+ # Write it to a temp file and return tempfile name.
+ my $fh = new FileHandle "> $file";
+ die "$program_name: $file: $!\n" if ! $fh;
+ print $fh $data;
+ $fh->close || die "$program_name: $file: $!\n";
+
+ return $file;
+}
+
+sub _compare_files ($$$$$)
+{
+ my ($program_name, $test_name, $in_or_out, $actual, $expected) = @_;
+
+ my $differ = compare ($actual, $expected);
+ if ($differ)
+ {
+ my $info = (defined $in_or_out ? "std$in_or_out " : '');
+ warn "$program_name: test $test_name: ${info}mismatch, comparing "
+ . "$expected (expected) and $actual (actual)\n";
+ # Ignore any failure, discard stderr.
+ system "diff -c $expected $actual 2>/dev/null";
+ }
+
+ return $differ;
+}
+
+sub _process_file_spec ($$$$$)
+{
+ my ($program_name, $test_name, $file_spec, $type, $junk_files) = @_;
+
+ my ($file_name, $contents);
+ if (!ref $file_spec)
+ {
+ ($file_name, $contents) = (undef, $file_spec);
+ }
+ elsif (ref $file_spec eq 'HASH')
+ {
+ my $n = keys %$file_spec;
+ die "$program_name: $test_name: $type spec has $n elements --"
+ . " expected 1\n"
+ if $n != 1;
+ ($file_name, $contents) = each %$file_spec;
+
+ # This happens for the AUX hash in an io_spec like this:
+ # {CMP=> ['zy123utsrqponmlkji', {'@AUX@'=> undef}]},
+ defined $contents
+ or return $file_name;
+ }
+ else
+ {
+ die "$program_name: $test_name: invalid RHS in $type-spec\n"
+ }
+
+ my $is_junk_file = (! defined $file_name
+ || (($type eq 'IN' || $type eq 'AUX' || $type eq 'CMP')
+ && defined $contents));
+ my $file = _create_file ($program_name, $test_name,
+ $file_name, $contents);
+
+ if ($is_junk_file)
+ {
+ push @$junk_files, $file
+ }
+ else
+ {
+ # FIXME: put $srcdir in here somewhere
+ warn "$program_name: $test_name: specified file '$file' does"
+ . " not exist\n"
+ if ! -f "$srcdir/tests/$file";
+ }
+
+ return $file;
+}
+
+sub _at_replace ($$)
+{
+ my ($map, $s) = @_;
+ foreach my $eo (qw (AUX OUT ERR))
+ {
+ my $f = $map->{$eo};
+ $f
+ and $s =~ /\@$eo\@/
+ and $s =~ s/\@$eo\@/$f/g;
+ }
+ return $s;
+}
+
+sub getlimits()
+{
+ my $NV;
+ open $NV, "getlimits |" or die "Error running getlimits\n";
+ my %limits = map {split /=|\n/} <$NV>;
+ return \%limits;
+}
+
+# FIXME: cleanup on interrupt
+# FIXME: extract 'do_1_test' function
+
+# FIXME: having to include $program_name here is an expedient kludge.
+# Library code doesn't 'die'.
+sub run_tests ($$$$$)
+{
+ my ($program_name, $prog, $t_spec, $save_temps, $verbose) = @_;
+
+ # To indicate that $prog is a shell built-in, you'd make it a string 'ref'.
+ # E.g., call run_tests ($prog, \$prog, \@Tests, $save_temps, $verbose);
+ # If it's a ref, invoke it via "env":
+ my @prog = ref $prog ? (qw(env --), $$prog) : $prog;
+
+ # Warn about empty t_spec.
+ # FIXME
+
+ # Remove all temp files upon interrupt.
+ # FIXME
+
+ # Verify that test names are distinct.
+ my $bad_test_name = 0;
+ my %seen;
+ my %seen_8dot3;
+ my $t;
+ foreach $t (@$t_spec)
+ {
+ my $test_name = $t->[0];
+ if ($seen{$test_name})
+ {
+ warn "$program_name: $test_name: duplicate test name\n";
+ $bad_test_name = 1;
+ }
+ $seen{$test_name} = 1;
+
+ if (0)
+ {
+ my $t8 = lc substr $test_name, 0, 8;
+ if ($seen_8dot3{$t8})
+ {
+ warn "$program_name: 8.3 test name conflict: "
+ . "$test_name, $seen_8dot3{$t8}\n";
+ $bad_test_name = 1;
+ }
+ $seen_8dot3{$t8} = $test_name;
+ }
+
+ # The test name may be no longer than 30 bytes.
+ # Yes, this is an arbitrary limit. If it causes trouble,
+ # consider removing it.
+ my $max = 30;
+ if ($max < length $test_name)
+ {
+ warn "$program_name: $test_name: test name is too long (> $max)\n";
+ $bad_test_name = 1;
+ }
+ }
+ return 1 if $bad_test_name;
+
+ # FIXME check exit status
+ system (@prog, '--version') if $verbose;
+
+ my @junk_files;
+ my $fail = 0;
+ foreach my $tt (@$t_spec)
+ {
+ my @post_compare;
+ my @dummy = @$tt;
+ my $t = \@dummy;
+ my $test_name = shift @$t;
+ my $expect = {};
+ my ($pre, $post);
+
+ # FIXME: maybe don't reset this.
+ $Global_count = 1;
+ my @args;
+ my $io_spec;
+ my %seen_type;
+ my @env_delete;
+ my $env_prefix = '';
+ my $input_pipe_cmd;
+ foreach $io_spec (@$t)
+ {
+ if (!ref $io_spec)
+ {
+ push @args, $io_spec;
+ next;
+ }
+
+ if (ref $io_spec ne 'HASH')
+ {
+ eval 'use Data::Dumper';
+ die "$program_name: $test_name: invalid entry in test spec; "
+ . "expected HASH-ref,\nbut got this:\n"
+ . Data::Dumper->Dump ([\$io_spec], ['$io_spec']) . "\n";
+ }
+
+ my $n = keys %$io_spec;
+ die "$program_name: $test_name: spec has $n elements --"
+ . " expected 1\n"
+ if $n != 1;
+ my ($type, $val) = each %$io_spec;
+ die "$program_name: $test_name: invalid key '$type' in test spec\n"
+ if ! $Types{$type};
+
+ # Make sure there's no more than one of OUT, ERR, EXIT, etc.
+ die "$program_name: $test_name: more than one $type spec\n"
+ if $Zero_one_type{$type} and $seen_type{$type}++;
+
+ if ($type eq 'PRE' or $type eq 'POST')
+ {
+ $expect->{$type} = $val;
+ next;
+ }
+
+ if ($type eq 'CMP')
+ {
+ my $t = ref $val;
+ $t && $t eq 'ARRAY'
+ or die "$program_name: $test_name: invalid CMP spec\n";
+ @$val == 2
+ or die "$program_name: $test_name: invalid CMP list; must have"
+ . " exactly 2 elements\n";
+ my @cmp_files;
+ foreach my $e (@$val)
+ {
+ my $r = ref $e;
+ $r && $r ne 'HASH'
+ and die "$program_name: $test_name: invalid element ($r)"
+ . " in CMP list; only scalars and hash references "
+ . "are allowed\n";
+ if ($r && $r eq 'HASH')
+ {
+ my $n = keys %$e;
+ $n == 1
+ or die "$program_name: $test_name: CMP spec has $n "
+ . "elements -- expected 1\n";
+
+ # Replace any '@AUX@' in the key of %$e.
+ my ($ff, $val) = each %$e;
+ my $new_ff = _at_replace $expect, $ff;
+ if ($new_ff ne $ff)
+ {
+ $e->{$new_ff} = $val;
+ delete $e->{$ff};
+ }
+ }
+ my $cmp_file = _process_file_spec ($program_name, $test_name,
+ $e, $type, \@junk_files);
+ push @cmp_files, $cmp_file;
+ }
+ push @post_compare, [@cmp_files];
+
+ $expect->{$type} = $val;
+ next;
+ }
+
+ if ($type eq 'EXIT')
+ {
+ die "$program_name: $test_name: invalid EXIT code\n"
+ if $val !~ /^\d+$/;
+ # FIXME: make sure $data is numeric
+ $expect->{EXIT} = $val;
+ next;
+ }
+
+ if ($type =~ /^(OUT|ERR)_SUBST$/)
+ {
+ $expect->{RESULT_SUBST} ||= {};
+ $expect->{RESULT_SUBST}->{$1} = $val;
+ next;
+ }
+
+ if ($type eq 'ENV')
+ {
+ $env_prefix = "$val ";
+ next;
+ }
+
+ if ($type eq 'ENV_DEL')
+ {
+ push @env_delete, $val;
+ next;
+ }
+
+ my $file = _process_file_spec ($program_name, $test_name, $val,
+ $type, \@junk_files);
+
+ if ($type eq 'IN' || $type eq 'IN_PIPE')
+ {
+ my $quoted_file = _shell_quote $file;
+ if ($type eq 'IN_PIPE')
+ {
+ defined $input_pipe_cmd
+ and die "$program_name: $test_name: only one input"
+ . " may be specified with IN_PIPE\n";
+ $input_pipe_cmd = "cat $quoted_file |";
+ }
+ else
+ {
+ push @args, $quoted_file;
+ }
+ }
+ elsif ($type eq 'AUX' || $type eq 'OUT' || $type eq 'ERR')
+ {
+ $expect->{$type} = $file;
+ }
+ else
+ {
+ die "$program_name: $test_name: invalid type: $type\n"
+ }
+ }
+
+ # Expect an exit status of zero if it's not specified.
+ $expect->{EXIT} ||= 0;
+
+ # Allow ERR to be omitted -- in that case, expect no error output.
+ foreach my $eo (qw (OUT ERR))
+ {
+ if (!exists $expect->{$eo})
+ {
+ $expect->{$eo} = _create_file ($program_name, $test_name,
+ undef, '');
+ push @junk_files, $expect->{$eo};
+ }
+ }
+
+ # FIXME: Does it ever make sense to specify a filename *and* contents
+ # in OUT or ERR spec?
+
+ # FIXME: this is really suboptimal...
+ my @new_args;
+ foreach my $a (@args)
+ {
+ $a = _at_replace $expect, $a;
+ push @new_args, $a;
+ }
+ @args = @new_args;
+
+ warn "$test_name...\n" if $verbose;
+ &{$expect->{PRE}} if $expect->{PRE};
+ my %actual;
+ $actual{OUT} = "$test_name.O";
+ $actual{ERR} = "$test_name.E";
+ push @junk_files, $actual{OUT}, $actual{ERR};
+ my @cmd = (@prog, @args, "> $actual{OUT}", "2> $actual{ERR}");
+ $env_prefix
+ and unshift @cmd, $env_prefix;
+ defined $input_pipe_cmd
+ and unshift @cmd, $input_pipe_cmd;
+ my $cmd_str = join (' ', @cmd);
+
+ # Delete from the environment any symbols specified by syntax
+ # like this: {ENV_DEL => 'TZ'}.
+ my %pushed_env;
+ foreach my $env_sym (@env_delete)
+ {
+ my $val = delete $ENV{$env_sym};
+ defined $val
+ and $pushed_env{$env_sym} = $val;
+ }
+
+ warn "Running command: '$cmd_str'\n" if $debug;
+ my $rc = 0xffff & system $cmd_str;
+
+ # Restore any environment setting we changed via a deletion.
+ foreach my $env_sym (keys %pushed_env)
+ {
+ $ENV{$env_sym} = $pushed_env{$env_sym};
+ }
+
+ if ($rc == 0xff00)
+ {
+ warn "$program_name: test $test_name failed: command failed:\n"
+ . " '$cmd_str': $!\n";
+ $fail = 1;
+ goto cleanup;
+ }
+ $rc >>= 8 if $rc > 0x80;
+ if ($expect->{EXIT} != $rc)
+ {
+ warn "$program_name: test $test_name failed: exit status mismatch:"
+ . " expected $expect->{EXIT}, got $rc\n";
+ $fail = 1;
+ goto cleanup;
+ }
+
+ my %actual_data;
+ # Record actual stdout and stderr contents, if POST may need them.
+ if ($expect->{POST})
+ {
+ foreach my $eo (qw (OUT ERR))
+ {
+ my $out_file = $actual{$eo};
+ open IN, $out_file
+ or (warn
+ "$program_name: cannot open $out_file for reading: $!\n"),
+ $fail = 1, next;
+ $actual_data{$eo} = <IN>;
+ close IN
+ or (warn "$program_name: failed to read $out_file: $!\n"),
+ $fail = 1;
+ }
+ }
+
+ foreach my $eo (qw (OUT ERR))
+ {
+ my $subst_expr = $expect->{RESULT_SUBST}->{$eo};
+ if (defined $subst_expr)
+ {
+ my $out = $actual{$eo};
+ my $orig = "$out.orig";
+
+ # Move $out aside (to $orig), then recreate $out
+ # by transforming each line of $orig via $subst_expr.
+ rename $out, $orig
+ or (warn "$program_name: cannot rename $out to $orig: $!\n"),
+ $fail = 1, next;
+ open IN, $orig
+ or (warn "$program_name: cannot open $orig for reading: $!\n"),
+ $fail = 1, (unlink $orig), next;
+ unlink $orig
+ or (warn "$program_name: cannot unlink $orig: $!\n"),
+ $fail = 1;
+ open OUT, ">$out"
+ or (warn "$program_name: cannot open $out for writing: $!\n"),
+ $fail = 1, next;
+ while (defined (my $line = <IN>))
+ {
+ eval "\$_ = \$line; $subst_expr; \$line = \$_";
+ print OUT $line;
+ }
+ close IN;
+ close OUT
+ or (warn "$program_name: failed to write $out: $!\n"),
+ $fail = 1, next;
+ }
+
+ my $eo_lower = lc $eo;
+ _compare_files ($program_name, $test_name, $eo_lower,
+ $actual{$eo}, $expect->{$eo})
+ and $fail = 1;
+ }
+
+ foreach my $pair (@post_compare)
+ {
+ my ($expected, $actual) = @$pair;
+ _compare_files $program_name, $test_name, undef, $actual, $expected
+ and $fail = 1;
+ }
+
+ cleanup:
+ $expect->{POST}
+ and &{$expect->{POST}} ($actual_data{OUT}, $actual_data{ERR});
+
+ }
+
+ # FIXME: maybe unlink files inside the big foreach loop?
+ unlink @junk_files if ! $save_temps;
+
+ return $fail;
+}
+
+# For each test in @$TESTS, generate two additional tests,
+# one using stdin, the other using a pipe. I.e., given this one
+# ['idem-0', {IN=>''}, {OUT=>''}],
+# generate these:
+# ['idem-0.r', '<', {IN=>''}, {OUT=>''}],
+# ['idem-0.p', {IN_PIPE=>''}, {OUT=>''}],
+# Generate new tests only if there is exactly one input spec.
+# The returned list of tests contains each input test, followed
+# by zero or two derived tests.
+sub triple_test($)
+{
+ my ($tests) = @_;
+ my @new;
+ foreach my $t (@$tests)
+ {
+ push @new, $t;
+
+ my @in;
+ my @args;
+ my @list_of_hash;
+ foreach my $e (@$t)
+ {
+ !ref $e
+ and push (@args, $e), next;
+
+ ref $e && ref $e eq 'HASH'
+ or (warn "$0: $t->[0]: unexpected entry type\n"), next;
+ defined $e->{IN}
+ and (push @in, $e->{IN}), next;
+ push @list_of_hash, $e;
+ }
+ # Add variants IFF there is exactly one input file.
+ @in == 1
+ or next;
+ shift @args; # discard test name
+ push @new, ["$t->[0].r", @args, '<', {IN => $in[0]}, @list_of_hash];
+ push @new, ["$t->[0].p", @args, {IN_PIPE => $in[0]}, @list_of_hash];
+ }
+ return @new;
+}
+
+## package return
+1;
diff --git a/testsuite/CuSkip.pm b/testsuite/CuSkip.pm
new file mode 100644
index 0000000..a2aad28
--- /dev/null
+++ b/testsuite/CuSkip.pm
@@ -0,0 +1,39 @@
+package CuSkip;
+# Skip a test: emit diag to log and to stderr, and exit 77
+
+# Copyright (C) 2011-2018 Free Software Foundation, Inc.
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+
+use strict;
+use warnings;
+
+our $ME = $0 || "<???>";
+
+# Emit a diagnostic both to stderr and to $stderr_fileno_.
+# FIXME: don't hard-code that value (9), since it's already defined in init.cfg.
+sub skip ($)
+{
+ my ($msg) = @_;
+ my $stderr_fileno_ = 9;
+ warn $msg;
+ open FH, ">&$stderr_fileno_"
+ or warn "$ME: failed to dup stderr\n";
+ print FH $msg;
+ close FH
+ or warn "$ME: failed to close FD $stderr_fileno_\n";
+ exit 77;
+}
+
+1;
diff --git a/testsuite/CuTmpdir.pm b/testsuite/CuTmpdir.pm
new file mode 100644
index 0000000..eee8a8d
--- /dev/null
+++ b/testsuite/CuTmpdir.pm
@@ -0,0 +1,114 @@
+package CuTmpdir;
+# create, then chdir into a temporary sub-directory
+
+# Copyright (C) 2007-2018 Free Software Foundation, Inc.
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+
+use strict;
+use warnings;
+
+use File::Temp;
+use File::Find;
+
+our $ME = $0 || "<???>";
+
+my $dir;
+
+sub skip_test($)
+{
+ warn "$ME: skipping test: unsafe working directory name: '$_[0]'\n";
+ exit 77;
+}
+
+sub chmod_1
+{
+ my $name = $_;
+
+ # Skip symlinks and non-directories.
+ -l $name || !-d _
+ and return;
+
+ chmod 0700, $name;
+}
+
+sub chmod_tree
+{
+ # When tempdir fails, it croaks, which leaves $dir undefined.
+ defined $dir
+ or return;
+
+ # Perform the equivalent of find "$dir" -type d -print0|xargs -0 chmod -R 700.
+ my $options = {untaint => 1, wanted => \&chmod_1};
+ find ($options, $dir);
+}
+
+sub import {
+ my $prefix = $_[1];
+
+ $ME eq '-' && defined $prefix
+ and $ME = $prefix;
+
+ if ($prefix !~ /^\//)
+ {
+ eval 'use Cwd';
+ my $cwd = $@ ? '.' : Cwd::getcwd();
+ $prefix = "$cwd/$prefix";
+ }
+
+ # Untaint for the upcoming mkdir.
+ $prefix =~ m!^([-+\@\w./]+)$!
+ or skip_test $prefix;
+ $prefix = $1;
+
+ my $original_pid = $$;
+
+ my $on_sig_remove_tmpdir = sub {
+ my ($sig) = @_;
+ if ($$ == $original_pid and defined $dir)
+ {
+ chmod_tree;
+ # Older versions of File::Temp lack this method.
+ exists &File::Temp::cleanup
+ and &File::Temp::cleanup;
+ }
+ $SIG{$sig} = 'DEFAULT';
+ kill $sig, $$;
+ };
+
+ foreach my $sig (qw (INT TERM HUP))
+ {
+ $SIG{$sig} = $on_sig_remove_tmpdir;
+ }
+
+ my $cleanup = $ENV{SAVE_TEMPS} ? 0 : 1;
+ $dir = File::Temp::tempdir("$prefix.tmp-XXXX", CLEANUP => $cleanup );
+ chdir $dir
+ or warn "$ME: failed to chdir to $dir: $!\n";
+
+ warn "Temp directory: $dir\n" unless $cleanup;
+}
+
+END {
+ # Move cwd out of the directory we're about to remove.
+ # This is required on some systems, and by some versions of File::Temp.
+ chdir '..'
+ or warn "$ME: failed to chdir to .. from $dir: $!\n";
+
+ my $saved_errno = $?;
+ chmod_tree;
+ $? = $saved_errno;
+}
+
+1;
diff --git a/testsuite/PCRE.tests b/testsuite/PCRE.tests
new file mode 100644
index 0000000..7816dce
--- /dev/null
+++ b/testsuite/PCRE.tests
@@ -0,0 +1,2386 @@
+# PCRE version 4.4 21-August-2003
+
+# Tests taken from PCRE and modified to suit glibc regex.
+#
+# PCRE LICENCE
+# ------------
+#
+# PCRE is a library of functions to support regular expressions whose syntax
+# and semantics are as close as possible to those of the Perl 5 language.
+#
+# Written by: Philip Hazel <ph10@cam.ac.uk>
+#
+# University of Cambridge Computing Service,
+# Cambridge, England. Phone: +44 1223 334714.
+#
+# Copyright (c) 1997-2003 University of Cambridge
+#
+# Permission is granted to anyone to use this software for any purpose on any
+# computer system, and to redistribute it freely, subject to the following
+# restrictions:
+#
+# 1. This software is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+#
+# 2. The origin of this software must not be misrepresented, either by
+# explicit claim or by omission. In practice, this means that if you use
+# PCRE in software that you distribute to others, commercially or
+# otherwise, you must put a sentence like this
+#
+# Regular expression support is provided by the PCRE library package,
+# which is open source software, written by Philip Hazel, and copyright
+# by the University of Cambridge, England.
+#
+# somewhere reasonably visible in your documentation and in any relevant
+# files or online help data or similar. A reference to the ftp site for
+# the source, that is, to
+#
+# ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre/
+#
+# should also be given in the documentation. However, this condition is not
+# intended to apply to whole chains of software. If package A includes PCRE,
+# it must acknowledge it, but if package B is software that includes package
+# A, the condition is not imposed on package B (unless it uses PCRE
+# independently).
+#
+# 3. Altered versions must be plainly marked as such, and must not be
+# misrepresented as being the original software.
+#
+# 4. If PCRE is embedded in any software that is released under the GNU
+# General Purpose Licence (GPL), or Lesser General Purpose Licence (LGPL),
+# then the terms of that licence shall supersede any condition above with
+# which it is incompatible.
+#
+# The documentation for PCRE, supplied in the "doc" directory, is distributed
+# under the same terms as the software itself.
+#
+# End
+#
+
+/the quick brown fox/
+ the quick brown fox
+ 0: the quick brown fox
+ The quick brown FOX
+No match
+ What do you know about the quick brown fox?
+ 0: the quick brown fox
+ What do you know about THE QUICK BROWN FOX?
+No match
+
+/The quick brown fox/i
+ the quick brown fox
+ 0: the quick brown fox
+ The quick brown FOX
+ 0: The quick brown FOX
+ What do you know about the quick brown fox?
+ 0: the quick brown fox
+ What do you know about THE QUICK BROWN FOX?
+ 0: THE QUICK BROWN FOX
+
+/a*abc?xyz+pqr{3}ab{2,}xy{4,5}pq{0,6}AB{0,}zz/
+ abxyzpqrrrabbxyyyypqAzz
+ 0: abxyzpqrrrabbxyyyypqAzz
+ abxyzpqrrrabbxyyyypqAzz
+ 0: abxyzpqrrrabbxyyyypqAzz
+ aabxyzpqrrrabbxyyyypqAzz
+ 0: aabxyzpqrrrabbxyyyypqAzz
+ aaabxyzpqrrrabbxyyyypqAzz
+ 0: aaabxyzpqrrrabbxyyyypqAzz
+ aaaabxyzpqrrrabbxyyyypqAzz
+ 0: aaaabxyzpqrrrabbxyyyypqAzz
+ abcxyzpqrrrabbxyyyypqAzz
+ 0: abcxyzpqrrrabbxyyyypqAzz
+ aabcxyzpqrrrabbxyyyypqAzz
+ 0: aabcxyzpqrrrabbxyyyypqAzz
+ aaabcxyzpqrrrabbxyyyypAzz
+ 0: aaabcxyzpqrrrabbxyyyypAzz
+ aaabcxyzpqrrrabbxyyyypqAzz
+ 0: aaabcxyzpqrrrabbxyyyypqAzz
+ aaabcxyzpqrrrabbxyyyypqqAzz
+ 0: aaabcxyzpqrrrabbxyyyypqqAzz
+ aaabcxyzpqrrrabbxyyyypqqqAzz
+ 0: aaabcxyzpqrrrabbxyyyypqqqAzz
+ aaabcxyzpqrrrabbxyyyypqqqqAzz
+ 0: aaabcxyzpqrrrabbxyyyypqqqqAzz
+ aaabcxyzpqrrrabbxyyyypqqqqqAzz
+ 0: aaabcxyzpqrrrabbxyyyypqqqqqAzz
+ aaabcxyzpqrrrabbxyyyypqqqqqqAzz
+ 0: aaabcxyzpqrrrabbxyyyypqqqqqqAzz
+ aaaabcxyzpqrrrabbxyyyypqAzz
+ 0: aaaabcxyzpqrrrabbxyyyypqAzz
+ abxyzzpqrrrabbxyyyypqAzz
+ 0: abxyzzpqrrrabbxyyyypqAzz
+ aabxyzzzpqrrrabbxyyyypqAzz
+ 0: aabxyzzzpqrrrabbxyyyypqAzz
+ aaabxyzzzzpqrrrabbxyyyypqAzz
+ 0: aaabxyzzzzpqrrrabbxyyyypqAzz
+ aaaabxyzzzzpqrrrabbxyyyypqAzz
+ 0: aaaabxyzzzzpqrrrabbxyyyypqAzz
+ abcxyzzpqrrrabbxyyyypqAzz
+ 0: abcxyzzpqrrrabbxyyyypqAzz
+ aabcxyzzzpqrrrabbxyyyypqAzz
+ 0: aabcxyzzzpqrrrabbxyyyypqAzz
+ aaabcxyzzzzpqrrrabbxyyyypqAzz
+ 0: aaabcxyzzzzpqrrrabbxyyyypqAzz
+ aaaabcxyzzzzpqrrrabbxyyyypqAzz
+ 0: aaaabcxyzzzzpqrrrabbxyyyypqAzz
+ aaaabcxyzzzzpqrrrabbbxyyyypqAzz
+ 0: aaaabcxyzzzzpqrrrabbbxyyyypqAzz
+ aaaabcxyzzzzpqrrrabbbxyyyyypqAzz
+ 0: aaaabcxyzzzzpqrrrabbbxyyyyypqAzz
+ aaabcxyzpqrrrabbxyyyypABzz
+ 0: aaabcxyzpqrrrabbxyyyypABzz
+ aaabcxyzpqrrrabbxyyyypABBzz
+ 0: aaabcxyzpqrrrabbxyyyypABBzz
+ >>>aaabxyzpqrrrabbxyyyypqAzz
+ 0: aaabxyzpqrrrabbxyyyypqAzz
+ >aaaabxyzpqrrrabbxyyyypqAzz
+ 0: aaaabxyzpqrrrabbxyyyypqAzz
+ >>>>abcxyzpqrrrabbxyyyypqAzz
+ 0: abcxyzpqrrrabbxyyyypqAzz
+ *** Failers
+No match
+ abxyzpqrrabbxyyyypqAzz
+No match
+ abxyzpqrrrrabbxyyyypqAzz
+No match
+ abxyzpqrrrabxyyyypqAzz
+No match
+ aaaabcxyzzzzpqrrrabbbxyyyyyypqAzz
+No match
+ aaaabcxyzzzzpqrrrabbbxyyypqAzz
+No match
+ aaabcxyzpqrrrabbxyyyypqqqqqqqAzz
+No match
+
+/^(abc){1,2}zz/
+ abczz
+ 0: abczz
+ 1: abc
+ abcabczz
+ 0: abcabczz
+ 1: abc
+ *** Failers
+No match
+ zz
+No match
+ abcabcabczz
+No match
+ >>abczz
+No match
+
+/^(b+|a){1,2}c/
+ bc
+ 0: bc
+ 1: b
+ bbc
+ 0: bbc
+ 1: bb
+ bbbc
+ 0: bbbc
+ 1: bbb
+ bac
+ 0: bac
+ 1: a
+ bbac
+ 0: bbac
+ 1: a
+ aac
+ 0: aac
+ 1: a
+ abbbbbbbbbbbc
+ 0: abbbbbbbbbbbc
+ 1: bbbbbbbbbbb
+ bbbbbbbbbbbac
+ 0: bbbbbbbbbbbac
+ 1: a
+ *** Failers
+No match
+ aaac
+No match
+ abbbbbbbbbbbac
+No match
+
+/^[]cde]/
+ ]thing
+ 0: ]
+ cthing
+ 0: c
+ dthing
+ 0: d
+ ething
+ 0: e
+ *** Failers
+No match
+ athing
+No match
+ fthing
+No match
+
+/^[^]cde]/
+ athing
+ 0: a
+ fthing
+ 0: f
+ *** Failers
+ 0: *
+ ]thing
+No match
+ cthing
+No match
+ dthing
+No match
+ ething
+No match
+
+/^[0-9]+$/
+ 0
+ 0: 0
+ 1
+ 0: 1
+ 2
+ 0: 2
+ 3
+ 0: 3
+ 4
+ 0: 4
+ 5
+ 0: 5
+ 6
+ 0: 6
+ 7
+ 0: 7
+ 8
+ 0: 8
+ 9
+ 0: 9
+ 10
+ 0: 10
+ 100
+ 0: 100
+ *** Failers
+No match
+ abc
+No match
+
+/^.*nter/
+ enter
+ 0: enter
+ inter
+ 0: inter
+ uponter
+ 0: uponter
+
+/^xxx[0-9]+$/
+ xxx0
+ 0: xxx0
+ xxx1234
+ 0: xxx1234
+ *** Failers
+No match
+ xxx
+No match
+
+/^.+[0-9][0-9][0-9]$/
+ x123
+ 0: x123
+ xx123
+ 0: xx123
+ 123456
+ 0: 123456
+ *** Failers
+No match
+ 123
+No match
+ x1234
+ 0: x1234
+
+/^([^!]+)!(.+)=apquxz\.ixr\.zzz\.ac\.uk$/
+ abc!pqr=apquxz.ixr.zzz.ac.uk
+ 0: abc!pqr=apquxz.ixr.zzz.ac.uk
+ 1: abc
+ 2: pqr
+ *** Failers
+No match
+ !pqr=apquxz.ixr.zzz.ac.uk
+No match
+ abc!=apquxz.ixr.zzz.ac.uk
+No match
+ abc!pqr=apquxz:ixr.zzz.ac.uk
+No match
+ abc!pqr=apquxz.ixr.zzz.ac.ukk
+No match
+
+/:/
+ Well, we need a colon: somewhere
+ 0: :
+ *** Fail if we don't
+No match
+
+/([0-9a-f:]+)$/i
+ 0abc
+ 0: 0abc
+ 1: 0abc
+ abc
+ 0: abc
+ 1: abc
+ fed
+ 0: fed
+ 1: fed
+ E
+ 0: E
+ 1: E
+ ::
+ 0: ::
+ 1: ::
+ 5f03:12C0::932e
+ 0: 5f03:12C0::932e
+ 1: 5f03:12C0::932e
+ fed def
+ 0: def
+ 1: def
+ Any old stuff
+ 0: ff
+ 1: ff
+ *** Failers
+No match
+ 0zzz
+No match
+ gzzz
+No match
+ Any old rubbish
+No match
+
+/^.*\.([0-9]{1,3})\.([0-9]{1,3})\.([0-9]{1,3})$/
+ .1.2.3
+ 0: .1.2.3
+ 1: 1
+ 2: 2
+ 3: 3
+ A.12.123.0
+ 0: A.12.123.0
+ 1: 12
+ 2: 123
+ 3: 0
+ *** Failers
+No match
+ .1.2.3333
+No match
+ 1.2.3
+No match
+ 1234.2.3
+No match
+
+/^([0-9]+)\s+IN\s+SOA\s+(\S+)\s+(\S+)\s*\(\s*$/
+ 1 IN SOA non-sp1 non-sp2(
+ 0: 1 IN SOA non-sp1 non-sp2(
+ 1: 1
+ 2: non-sp1
+ 3: non-sp2
+ 1 IN SOA non-sp1 non-sp2 (
+ 0: 1 IN SOA non-sp1 non-sp2 (
+ 1: 1
+ 2: non-sp1
+ 3: non-sp2
+ *** Failers
+No match
+ 1IN SOA non-sp1 non-sp2(
+No match
+
+/^[a-zA-Z0-9][a-zA-Z0-9-]*(\.[a-zA-Z0-9][a-zA-z0-9-]*)*\.$/
+ a.
+ 0: a.
+ Z.
+ 0: Z.
+ 2.
+ 0: 2.
+ ab-c.pq-r.
+ 0: ab-c.pq-r.
+ 1: .pq-r
+ sxk.zzz.ac.uk.
+ 0: sxk.zzz.ac.uk.
+ 1: .uk
+ x-.y-.
+ 0: x-.y-.
+ 1: .y-
+ *** Failers
+No match
+ -abc.peq.
+No match
+
+/^\*\.[a-z]([a-z0-9-]*[a-z0-9]+)?(\.[a-z]([a-z0-9-]*[a-z0-9]+)?)*$/
+ *.a
+ 0: *.a
+ *.b0-a
+ 0: *.b0-a
+ 1: 0-a
+ *.c3-b.c
+ 0: *.c3-b.c
+ 1: 3-b
+ 2: .c
+ *.c-a.b-c
+ 0: *.c-a.b-c
+ 1: -a
+ 2: .b-c
+ 3: -c
+ *** Failers
+No match
+ *.0
+No match
+ *.a-
+No match
+ *.a-b.c-
+No match
+ *.c-a.0-c
+No match
+
+/^[0-9a-f](\.[0-9a-f])*$/i
+ a.b.c.d
+ 0: a.b.c.d
+ 1: .d
+ A.B.C.D
+ 0: A.B.C.D
+ 1: .D
+ a.b.c.1.2.3.C
+ 0: a.b.c.1.2.3.C
+ 1: .C
+
+/^".*"\s*(;.*)?$/
+ "1234"
+ 0: "1234"
+ "abcd" ;
+ 0: "abcd" ;
+ 1: ;
+ "" ; rhubarb
+ 0: "" ; rhubarb
+ 1: ; rhubarb
+ *** Failers
+No match
+ "1234" : things
+No match
+
+/^(a(b(c)))(d(e(f)))(h(i(j)))(k(l(m)))$/
+ abcdefhijklm
+ 0: abcdefhijklm
+ 1: abc
+ 2: bc
+ 3: c
+ 4: def
+ 5: ef
+ 6: f
+ 7: hij
+ 8: ij
+ 9: j
+10: klm
+11: lm
+12: m
+
+/^a*\w/
+ z
+ 0: z
+ az
+ 0: az
+ aaaz
+ 0: aaaz
+ a
+ 0: a
+ aa
+ 0: aa
+ aaaa
+ 0: aaaa
+ a+
+ 0: a
+ aa+
+ 0: aa
+
+/^a+\w/
+ az
+ 0: az
+ aaaz
+ 0: aaaz
+ aa
+ 0: aa
+ aaaa
+ 0: aaaa
+ aa+
+ 0: aa
+
+/^[0-9]{8}\w{2,}/
+ 1234567890
+ 0: 1234567890
+ 12345678ab
+ 0: 12345678ab
+ 12345678__
+ 0: 12345678__
+ *** Failers
+No match
+ 1234567
+No match
+
+/^[aeiou0-9]{4,5}$/
+ uoie
+ 0: uoie
+ 1234
+ 0: 1234
+ 12345
+ 0: 12345
+ aaaaa
+ 0: aaaaa
+ *** Failers
+No match
+ 123456
+No match
+
+/\`(abc|def)=(\1){2,3}\'/
+ abc=abcabc
+ 0: abc=abcabc
+ 1: abc
+ 2: abc
+ def=defdefdef
+ 0: def=defdefdef
+ 1: def
+ 2: def
+ *** Failers
+No match
+ abc=defdef
+No match
+
+/(cat(a(ract|tonic)|erpillar)) \1()2(3)/
+ cataract cataract23
+ 0: cataract cataract23
+ 1: cataract
+ 2: aract
+ 3: ract
+ 4:
+ 5: 3
+ catatonic catatonic23
+ 0: catatonic catatonic23
+ 1: catatonic
+ 2: atonic
+ 3: tonic
+ 4:
+ 5: 3
+ caterpillar caterpillar23
+ 0: caterpillar caterpillar23
+ 1: caterpillar
+ 2: erpillar
+ 3: <unset>
+ 4:
+ 5: 3
+
+
+/^From +([^ ]+) +[a-zA-Z][a-zA-Z][a-zA-Z] +[a-zA-Z][a-zA-Z][a-zA-Z] +[0-9]?[0-9] +[0-9][0-9]:[0-9][0-9]/
+ From abcd Mon Sep 01 12:33:02 1997
+ 0: From abcd Mon Sep 01 12:33
+ 1: abcd
+
+/^From\s+\S+\s+([a-zA-Z]{3}\s+){2}[0-9]{1,2}\s+[0-9][0-9]:[0-9][0-9]/
+ From abcd Mon Sep 01 12:33:02 1997
+ 0: From abcd Mon Sep 01 12:33
+ 1: Sep
+ From abcd Mon Sep 1 12:33:02 1997
+ 0: From abcd Mon Sep 1 12:33
+ 1: Sep
+ *** Failers
+No match
+ From abcd Sep 01 12:33:02 1997
+No match
+
+/^(a)\1{2,3}(.)/
+ aaab
+ 0: aaab
+ 1: a
+ 2: b
+ aaaab
+ 0: aaaab
+ 1: a
+ 2: b
+ aaaaab
+ 0: aaaaa
+ 1: a
+ 2: a
+ aaaaaab
+ 0: aaaaa
+ 1: a
+ 2: a
+
+/^[ab]{1,3}(ab*|b)/
+ aabbbbb
+ 0: aabbbbb
+ 1: abbbbb
+
+/^(cow|)\1(bell)/
+ cowcowbell
+ 0: cowcowbell
+ 1: cow
+ 2: bell
+ bell
+ 0: bell
+ 1:
+ 2: bell
+ *** Failers
+No match
+ cowbell
+No match
+
+/^(a|)\1+b/
+ aab
+ 0: aab
+ 1: a
+ aaaab
+ 0: aaaab
+ 1: a
+ b
+ 0: b
+ 1:
+ *** Failers
+No match
+ ab
+No match
+
+/^(a|)\1{2}b/
+ aaab
+ 0: aaab
+ 1: a
+ b
+ 0: b
+ 1:
+ *** Failers
+No match
+ ab
+No match
+ aab
+No match
+ aaaab
+No match
+
+/^(a|)\1{2,3}b/
+ aaab
+ 0: aaab
+ 1: a
+ aaaab
+ 0: aaaab
+ 1: a
+ b
+ 0: b
+ 1:
+ *** Failers
+No match
+ ab
+No match
+ aab
+No match
+ aaaaab
+No match
+
+/ab{1,3}bc/
+ abbbbc
+ 0: abbbbc
+ abbbc
+ 0: abbbc
+ abbc
+ 0: abbc
+ *** Failers
+No match
+ abc
+No match
+ abbbbbc
+No match
+
+/([^.]*)\.([^:]*):[T ]+(.*)/
+ track1.title:TBlah blah blah
+ 0: track1.title:TBlah blah blah
+ 1: track1
+ 2: title
+ 3: Blah blah blah
+
+/([^.]*)\.([^:]*):[T ]+(.*)/i
+ track1.title:TBlah blah blah
+ 0: track1.title:TBlah blah blah
+ 1: track1
+ 2: title
+ 3: Blah blah blah
+
+/([^.]*)\.([^:]*):[t ]+(.*)/i
+ track1.title:TBlah blah blah
+ 0: track1.title:TBlah blah blah
+ 1: track1
+ 2: title
+ 3: Blah blah blah
+
+/^abc$/
+ abc
+ 0: abc
+ *** Failers
+No match
+
+/[-az]+/
+ az-
+ 0: az-
+ *** Failers
+ 0: a
+ b
+No match
+
+/[az-]+/
+ za-
+ 0: za-
+ *** Failers
+ 0: a
+ b
+No match
+
+/[a-z]+/
+ abcdxyz
+ 0: abcdxyz
+
+/[0-9-]+/
+ 12-34
+ 0: 12-34
+ *** Failers
+No match
+ aaa
+No match
+
+/(abc)\1/i
+ abcabc
+ 0: abcabc
+ 1: abc
+ ABCabc
+ 0: ABCabc
+ 1: ABC
+ abcABC
+ 0: abcABC
+ 1: abc
+
+/a{0}bc/
+ bc
+ 0: bc
+
+/^([^a])([^b])([^c]*)([^d]{3,4})/
+ baNOTccccd
+ 0: baNOTcccc
+ 1: b
+ 2: a
+ 3: NOT
+ 4: cccc
+ baNOTcccd
+ 0: baNOTccc
+ 1: b
+ 2: a
+ 3: NOT
+ 4: ccc
+ baNOTccd
+ 0: baNOTcc
+ 1: b
+ 2: a
+ 3: NO
+ 4: Tcc
+ bacccd
+ 0: baccc
+ 1: b
+ 2: a
+ 3:
+ 4: ccc
+ *** Failers
+ 0: *** Failers
+ 1: *
+ 2: *
+ 3: * Fail
+ 4: ers
+ anything
+No match
+ baccd
+No match
+
+/[^a]/
+ Abc
+ 0: A
+
+/[^a]/i
+ Abc
+ 0: b
+
+/[^a]+/
+ AAAaAbc
+ 0: AAA
+
+/[^a]+/i
+ AAAaAbc
+ 0: bc
+
+/[^k]$/
+ abc
+ 0: c
+ *** Failers
+ 0: s
+ abk
+No match
+
+/[^k]{2,3}$/
+ abc
+ 0: abc
+ kbc
+ 0: bc
+ kabc
+ 0: abc
+ *** Failers
+ 0: ers
+ abk
+No match
+ akb
+No match
+ akk
+No match
+
+/^[0-9]{8,}@.+[^k]$/
+ 12345678@a.b.c.d
+ 0: 12345678@a.b.c.d
+ 123456789@x.y.z
+ 0: 123456789@x.y.z
+ *** Failers
+No match
+ 12345678@x.y.uk
+No match
+ 1234567@a.b.c.d
+No match
+
+/(a)\1{8,}/
+ aaaaaaaaa
+ 0: aaaaaaaaa
+ 1: a
+ aaaaaaaaaa
+ 0: aaaaaaaaaa
+ 1: a
+ *** Failers
+No match
+ aaaaaaa
+No match
+
+/[^a]/
+ aaaabcd
+ 0: b
+ aaAabcd
+ 0: A
+
+/[^a]/i
+ aaaabcd
+ 0: b
+ aaAabcd
+ 0: b
+
+/[^az]/
+ aaaabcd
+ 0: b
+ aaAabcd
+ 0: A
+
+/[^az]/i
+ aaaabcd
+ 0: b
+ aaAabcd
+ 0: b
+
+/P[^*]TAIRE[^*]{1,6}LL/
+ xxxxxxxxxxxPSTAIREISLLxxxxxxxxx
+ 0: PSTAIREISLL
+
+/P[^*]TAIRE[^*]{1,}LL/
+ xxxxxxxxxxxPSTAIREISLLxxxxxxxxx
+ 0: PSTAIREISLL
+
+/(\.[0-9][0-9][1-9]?)[0-9]+/
+ 1.230003938
+ 0: .230003938
+ 1: .23
+ 1.875000282
+ 0: .875000282
+ 1: .875
+ 1.235
+ 0: .235
+ 1: .23
+
+/\b(foo)\s+(\w+)/i
+ Food is on the foo table
+ 0: foo table
+ 1: foo
+ 2: table
+
+/foo(.*)bar/
+ The food is under the bar in the barn.
+ 0: food is under the bar in the bar
+ 1: d is under the bar in the
+
+/(.*)([0-9]*)/
+ I have 2 numbers: 53147
+ 0: I have 2 numbers: 53147
+ 1: I have 2 numbers: 53147
+ 2:
+
+/(.*)([0-9]+)/
+ I have 2 numbers: 53147
+ 0: I have 2 numbers: 53147
+ 1: I have 2 numbers: 5314
+ 2: 7
+
+/(.*)([0-9]+)$/
+ I have 2 numbers: 53147
+ 0: I have 2 numbers: 53147
+ 1: I have 2 numbers: 5314
+ 2: 7
+
+/(.*)\b([0-9]+)$/
+ I have 2 numbers: 53147
+ 0: I have 2 numbers: 53147
+ 1: I have 2 numbers:
+ 2: 53147
+
+/(.*[^0-9])([0-9]+)$/
+ I have 2 numbers: 53147
+ 0: I have 2 numbers: 53147
+ 1: I have 2 numbers:
+ 2: 53147
+
+/[[:digit:]][[:digit:]]\/[[:digit:]][[:digit:]]\/[[:digit:]][[:digit:]][[:digit:]][[:digit:]]/
+ 01/01/2000
+ 0: 01/01/2000
+
+/^(a){0,0}/
+ bcd
+ 0:
+ abc
+ 0:
+ aab
+ 0:
+
+/^(a){0,1}/
+ bcd
+ 0:
+ abc
+ 0: a
+ 1: a
+ aab
+ 0: a
+ 1: a
+
+/^(a){0,2}/
+ bcd
+ 0:
+ abc
+ 0: a
+ 1: a
+ aab
+ 0: aa
+ 1: a
+
+/^(a){0,3}/
+ bcd
+ 0:
+ abc
+ 0: a
+ 1: a
+ aab
+ 0: aa
+ 1: a
+ aaa
+ 0: aaa
+ 1: a
+
+/^(a){0,}/
+ bcd
+ 0:
+ abc
+ 0: a
+ 1: a
+ aab
+ 0: aa
+ 1: a
+ aaa
+ 0: aaa
+ 1: a
+ aaaaaaaa
+ 0: aaaaaaaa
+ 1: a
+
+/^(a){1,1}/
+ bcd
+No match
+ abc
+ 0: a
+ 1: a
+ aab
+ 0: a
+ 1: a
+
+/^(a){1,2}/
+ bcd
+No match
+ abc
+ 0: a
+ 1: a
+ aab
+ 0: aa
+ 1: a
+
+/^(a){1,3}/
+ bcd
+No match
+ abc
+ 0: a
+ 1: a
+ aab
+ 0: aa
+ 1: a
+ aaa
+ 0: aaa
+ 1: a
+
+/^(a){1,}/
+ bcd
+No match
+ abc
+ 0: a
+ 1: a
+ aab
+ 0: aa
+ 1: a
+ aaa
+ 0: aaa
+ 1: a
+ aaaaaaaa
+ 0: aaaaaaaa
+ 1: a
+
+/^[0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9]/
+ 123456654321
+ 0: 123456654321
+
+/^[[:digit:]][[:digit:]][[:digit:]][[:digit:]][[:digit:]][[:digit:]][[:digit:]][[:digit:]][[:digit:]][[:digit:]][[:digit:]][[:digit:]]/
+ 123456654321
+ 0: 123456654321
+
+/^[abc]{12}/
+ abcabcabcabc
+ 0: abcabcabcabc
+
+/^[a-c]{12}/
+ abcabcabcabc
+ 0: abcabcabcabc
+
+/^(a|b|c){12}/
+ abcabcabcabc
+ 0: abcabcabcabc
+ 1: c
+
+/^[abcdefghijklmnopqrstuvwxy0123456789]/
+ n
+ 0: n
+ *** Failers
+No match
+ z
+No match
+
+/abcde{0,0}/
+ abcd
+ 0: abcd
+ *** Failers
+No match
+ abce
+No match
+
+/ab[cd]{0,0}e/
+ abe
+ 0: abe
+ *** Failers
+No match
+ abcde
+No match
+
+/ab(c){0,0}d/
+ abd
+ 0: abd
+ *** Failers
+No match
+ abcd
+No match
+
+/a(b*)/
+ a
+ 0: a
+ 1:
+ ab
+ 0: ab
+ 1: b
+ abbbb
+ 0: abbbb
+ 1: bbbb
+ *** Failers
+ 0: a
+ 1:
+ bbbbb
+No match
+
+/ab[0-9]{0}e/
+ abe
+ 0: abe
+ *** Failers
+No match
+ ab1e
+No match
+
+/(A|B)*CD/
+ CD
+ 0: CD
+
+/(AB)*\1/
+ ABABAB
+ 0: ABABAB
+ 1: AB
+
+/([0-9]+)(\w)/
+ 12345a
+ 0: 12345a
+ 1: 12345
+ 2: a
+ 12345+
+ 0: 12345
+ 1: 1234
+ 2: 5
+
+/(abc|)+/
+ abc
+ 0: abc
+ 1: abc
+ abcabc
+ 0: abcabc
+ 1: abc
+ abcabcabc
+ 0: abcabcabc
+ 1: abc
+ xyz
+ 0:
+ 1:
+
+/([a]*)*/
+ a
+ 0: a
+ 1: a
+ aaaaa
+ 0: aaaaa
+ 1: aaaaa
+
+/([ab]*)*/
+ a
+ 0: a
+ 1: a
+ b
+ 0: b
+ 1: b
+ ababab
+ 0: ababab
+ 1: ababab
+ aaaabcde
+ 0: aaaab
+ 1: aaaab
+ bbbb
+ 0: bbbb
+ 1: bbbb
+
+/([^a]*)*/
+ b
+ 0: b
+ 1: b
+ bbbb
+ 0: bbbb
+ 1: bbbb
+ aaa
+ 0:
+
+/([^ab]*)*/
+ cccc
+ 0: cccc
+ 1: cccc
+ abab
+ 0:
+
+/abc/
+ abc
+ 0: abc
+ xabcy
+ 0: abc
+ ababc
+ 0: abc
+ *** Failers
+No match
+ xbc
+No match
+ axc
+No match
+ abx
+No match
+
+/ab*c/
+ abc
+ 0: abc
+
+/ab*bc/
+ abc
+ 0: abc
+ abbc
+ 0: abbc
+ abbbbc
+ 0: abbbbc
+
+/.{1}/
+ abbbbc
+ 0: a
+
+/.{3,4}/
+ abbbbc
+ 0: abbb
+
+/ab{0,}bc/
+ abbbbc
+ 0: abbbbc
+
+/ab+bc/
+ abbc
+ 0: abbc
+ *** Failers
+No match
+ abc
+No match
+ abq
+No match
+
+/ab+bc/
+ abbbbc
+ 0: abbbbc
+
+/ab{1,}bc/
+ abbbbc
+ 0: abbbbc
+
+/ab{1,3}bc/
+ abbbbc
+ 0: abbbbc
+
+/ab{3,4}bc/
+ abbbbc
+ 0: abbbbc
+
+/ab{4,5}bc/
+ *** Failers
+No match
+ abq
+No match
+ abbbbc
+No match
+
+/ab?bc/
+ abbc
+ 0: abbc
+ abc
+ 0: abc
+
+/ab{0,1}bc/
+ abc
+ 0: abc
+
+/ab?c/
+ abc
+ 0: abc
+
+/ab{0,1}c/
+ abc
+ 0: abc
+
+/^abc$/
+ abc
+ 0: abc
+ *** Failers
+No match
+ abbbbc
+No match
+ abcc
+No match
+
+/^abc/
+ abcc
+ 0: abc
+
+/abc$/
+ aabc
+ 0: abc
+ *** Failers
+No match
+ aabc
+ 0: abc
+ aabcd
+No match
+
+/^/
+ abc
+ 0:
+
+/$/
+ abc
+ 0:
+
+/a.c/
+ abc
+ 0: abc
+ axc
+ 0: axc
+
+/a.*c/
+ axyzc
+ 0: axyzc
+
+/a[bc]d/
+ abd
+ 0: abd
+ *** Failers
+No match
+ axyzd
+No match
+ abc
+No match
+
+/a[b-d]e/
+ ace
+ 0: ace
+
+/a[b-d]/
+ aac
+ 0: ac
+
+/a[-b]/
+ a-
+ 0: a-
+
+/a[b-]/
+ a-
+ 0: a-
+
+/a[]]b/
+ a]b
+ 0: a]b
+
+/a[^bc]d/
+ aed
+ 0: aed
+ *** Failers
+No match
+ abd
+No match
+ abd
+No match
+
+/a[^-b]c/
+ adc
+ 0: adc
+
+/a[^]b]c/
+ adc
+ 0: adc
+ *** Failers
+No match
+ a-c
+ 0: a-c
+ a]c
+No match
+
+/\ba\b/
+ a-
+ 0: a
+ -a
+ 0: a
+ -a-
+ 0: a
+
+/\by\b/
+ *** Failers
+No match
+ xy
+No match
+ yz
+No match
+ xyz
+No match
+
+/\Ba\B/
+ *** Failers
+ 0: a
+ a-
+No match
+ -a
+No match
+ -a-
+No match
+
+/\By\b/
+ xy
+ 0: y
+
+/\by\B/
+ yz
+ 0: y
+
+/\By\B/
+ xyz
+ 0: y
+
+/\w/
+ a
+ 0: a
+
+/\W/
+ -
+ 0: -
+ *** Failers
+ 0: *
+ -
+ 0: -
+ a
+No match
+
+/a\sb/
+ a b
+ 0: a b
+
+/a\Sb/
+ a-b
+ 0: a-b
+ *** Failers
+No match
+ a-b
+ 0: a-b
+ a b
+No match
+
+/[0-9]/
+ 1
+ 0: 1
+
+/[^0-9]/
+ -
+ 0: -
+ *** Failers
+ 0: *
+ -
+ 0: -
+ 1
+No match
+
+/ab|cd/
+ abc
+ 0: ab
+ abcd
+ 0: ab
+
+/()ef/
+ def
+ 0: ef
+ 1:
+
+/a\(b/
+ a(b
+ 0: a(b
+
+/a\(*b/
+ ab
+ 0: ab
+ a((b
+ 0: a((b
+
+/((a))/
+ abc
+ 0: a
+ 1: a
+ 2: a
+
+/(a)b(c)/
+ abc
+ 0: abc
+ 1: a
+ 2: c
+
+/a+b+c/
+ aabbabc
+ 0: abc
+
+/a{1,}b{1,}c/
+ aabbabc
+ 0: abc
+
+/(a+|b)*/
+ ab
+ 0: ab
+ 1: b
+
+/(a+|b){0,}/
+ ab
+ 0: ab
+ 1: b
+
+/(a+|b)+/
+ ab
+ 0: ab
+ 1: b
+
+/(a+|b){1,}/
+ ab
+ 0: ab
+ 1: b
+
+/(a+|b)?/
+ ab
+ 0: a
+ 1: a
+
+/(a+|b){0,1}/
+ ab
+ 0: a
+ 1: a
+
+/[^ab]*/
+ cde
+ 0: cde
+
+/abc/
+ *** Failers
+No match
+ b
+No match
+
+
+/a*/
+
+
+/([abc])*d/
+ abbbcd
+ 0: abbbcd
+ 1: c
+
+/([abc])*bcd/
+ abcd
+ 0: abcd
+ 1: a
+
+/a|b|c|d|e/
+ e
+ 0: e
+
+/(a|b|c|d|e)f/
+ ef
+ 0: ef
+ 1: e
+
+/abcd*efg/
+ abcdefg
+ 0: abcdefg
+
+/ab*/
+ xabyabbbz
+ 0: ab
+ xayabbbz
+ 0: a
+
+/(ab|cd)e/
+ abcde
+ 0: cde
+ 1: cd
+
+/[abhgefdc]ij/
+ hij
+ 0: hij
+
+/(abc|)ef/
+ abcdef
+ 0: ef
+ 1:
+
+/(a|b)c*d/
+ abcd
+ 0: bcd
+ 1: b
+
+/(ab|ab*)bc/
+ abc
+ 0: abc
+ 1: a
+
+/a([bc]*)c*/
+ abc
+ 0: abc
+ 1: bc
+
+/a([bc]*)(c*d)/
+ abcd
+ 0: abcd
+ 1: bc
+ 2: d
+
+/a([bc]+)(c*d)/
+ abcd
+ 0: abcd
+ 1: bc
+ 2: d
+
+/a([bc]*)(c+d)/
+ abcd
+ 0: abcd
+ 1: b
+ 2: cd
+
+/a[bcd]*dcdcde/
+ adcdcde
+ 0: adcdcde
+
+/a[bcd]+dcdcde/
+ *** Failers
+No match
+ abcde
+No match
+ adcdcde
+No match
+
+/(ab|a)b*c/
+ abc
+ 0: abc
+ 1: ab
+
+/((a)(b)c)(d)/
+ abcd
+ 0: abcd
+ 1: abc
+ 2: a
+ 3: b
+ 4: d
+
+/[a-zA-Z_][a-zA-Z0-9_]*/
+ alpha
+ 0: alpha
+
+/^a(bc+|b[eh])g|.h$/
+ abh
+ 0: bh
+
+/(bc+d$|ef*g.|h?i(j|k))/
+ effgz
+ 0: effgz
+ 1: effgz
+ ij
+ 0: ij
+ 1: ij
+ 2: j
+ reffgz
+ 0: effgz
+ 1: effgz
+ *** Failers
+No match
+ effg
+No match
+ bcdd
+No match
+
+/((((((((((a))))))))))/
+ a
+ 0: a
+ 1: a
+ 2: a
+ 3: a
+ 4: a
+ 5: a
+ 6: a
+ 7: a
+ 8: a
+ 9: a
+10: a
+
+/((((((((((a))))))))))\9/
+ aa
+ 0: aa
+ 1: a
+ 2: a
+ 3: a
+ 4: a
+ 5: a
+ 6: a
+ 7: a
+ 8: a
+ 9: a
+10: a
+
+/(((((((((a)))))))))/
+ a
+ 0: a
+ 1: a
+ 2: a
+ 3: a
+ 4: a
+ 5: a
+ 6: a
+ 7: a
+ 8: a
+ 9: a
+
+/multiple words of text/
+ *** Failers
+No match
+ aa
+No match
+ uh-uh
+No match
+
+/multiple words/
+ multiple words, yeah
+ 0: multiple words
+
+/(.*)c(.*)/
+ abcde
+ 0: abcde
+ 1: ab
+ 2: de
+
+/\((.*), (.*)\)/
+ (a, b)
+ 0: (a, b)
+ 1: a
+ 2: b
+
+/abcd/
+ abcd
+ 0: abcd
+
+/a(bc)d/
+ abcd
+ 0: abcd
+ 1: bc
+
+/a[-]?c/
+ ac
+ 0: ac
+
+/(abc)\1/
+ abcabc
+ 0: abcabc
+ 1: abc
+
+/([a-c]*)\1/
+ abcabc
+ 0: abcabc
+ 1: abc
+
+/(a)|\1/
+ a
+ 0: a
+ 1: a
+ *** Failers
+ 0: a
+ 1: a
+ ab
+ 0: a
+ 1: a
+ x
+No match
+
+/abc/i
+ ABC
+ 0: ABC
+ XABCY
+ 0: ABC
+ ABABC
+ 0: ABC
+ *** Failers
+No match
+ aaxabxbaxbbx
+No match
+ XBC
+No match
+ AXC
+No match
+ ABX
+No match
+
+/ab*c/i
+ ABC
+ 0: ABC
+
+/ab*bc/i
+ ABC
+ 0: ABC
+ ABBC
+ 0: ABBC
+
+/ab+bc/i
+ *** Failers
+No match
+ ABC
+No match
+ ABQ
+No match
+
+/ab+bc/i
+ ABBBBC
+ 0: ABBBBC
+
+/^abc$/i
+ ABC
+ 0: ABC
+ *** Failers
+No match
+ ABBBBC
+No match
+ ABCC
+No match
+
+/^abc/i
+ ABCC
+ 0: ABC
+
+/abc$/i
+ AABC
+ 0: ABC
+
+/^/i
+ ABC
+ 0:
+
+/$/i
+ ABC
+ 0:
+
+/a.c/i
+ ABC
+ 0: ABC
+ AXC
+ 0: AXC
+
+/a.*c/i
+ *** Failers
+No match
+ AABC
+ 0: AABC
+ AXYZD
+No match
+
+/a[bc]d/i
+ ABD
+ 0: ABD
+
+/a[b-d]e/i
+ ACE
+ 0: ACE
+ *** Failers
+No match
+ ABC
+No match
+ ABD
+No match
+
+/a[b-d]/i
+ AAC
+ 0: AC
+
+/a[-b]/i
+ A-
+ 0: A-
+
+/a[b-]/i
+ A-
+ 0: A-
+
+/a[]]b/i
+ A]B
+ 0: A]B
+
+/a[^bc]d/i
+ AED
+ 0: AED
+
+/a[^-b]c/i
+ ADC
+ 0: ADC
+ *** Failers
+No match
+ ABD
+No match
+ A-C
+No match
+
+/a[^]b]c/i
+ ADC
+ 0: ADC
+
+/ab|cd/i
+ ABC
+ 0: AB
+ ABCD
+ 0: AB
+
+/()ef/i
+ DEF
+ 0: EF
+ 1:
+
+/$b/i
+ *** Failers
+No match
+ A]C
+No match
+ B
+No match
+
+/a\(b/i
+ A(B
+ 0: A(B
+
+/a\(*b/i
+ AB
+ 0: AB
+ A((B
+ 0: A((B
+
+/((a))/i
+ ABC
+ 0: A
+ 1: A
+ 2: A
+
+/(a)b(c)/i
+ ABC
+ 0: ABC
+ 1: A
+ 2: C
+
+/a+b+c/i
+ AABBABC
+ 0: ABC
+
+/a{1,}b{1,}c/i
+ AABBABC
+ 0: ABC
+
+/(a+|b)*/i
+ AB
+ 0: AB
+ 1: B
+
+/(a+|b){0,}/i
+ AB
+ 0: AB
+ 1: B
+
+/(a+|b)+/i
+ AB
+ 0: AB
+ 1: B
+
+/(a+|b){1,}/i
+ AB
+ 0: AB
+ 1: B
+
+/(a+|b)?/i
+ AB
+ 0: A
+ 1: A
+
+/(a+|b){0,1}/i
+ AB
+ 0: A
+ 1: A
+
+/[^ab]*/i
+ CDE
+ 0: CDE
+
+/([abc])*d/i
+ ABBBCD
+ 0: ABBBCD
+ 1: C
+
+/([abc])*bcd/i
+ ABCD
+ 0: ABCD
+ 1: A
+
+/a|b|c|d|e/i
+ E
+ 0: E
+
+/(a|b|c|d|e)f/i
+ EF
+ 0: EF
+ 1: E
+
+/abcd*efg/i
+ ABCDEFG
+ 0: ABCDEFG
+
+/ab*/i
+ XABYABBBZ
+ 0: AB
+ XAYABBBZ
+ 0: A
+
+/(ab|cd)e/i
+ ABCDE
+ 0: CDE
+ 1: CD
+
+/[abhgefdc]ij/i
+ HIJ
+ 0: HIJ
+
+/^(ab|cd)e/i
+ ABCDE
+No match
+
+/(abc|)ef/i
+ ABCDEF
+ 0: EF
+ 1:
+
+/(a|b)c*d/i
+ ABCD
+ 0: BCD
+ 1: B
+
+/(ab|ab*)bc/i
+ ABC
+ 0: ABC
+ 1: A
+
+/a([bc]*)c*/i
+ ABC
+ 0: ABC
+ 1: BC
+
+/a([bc]*)(c*d)/i
+ ABCD
+ 0: ABCD
+ 1: BC
+ 2: D
+
+/a([bc]+)(c*d)/i
+ ABCD
+ 0: ABCD
+ 1: BC
+ 2: D
+
+/a([bc]*)(c+d)/i
+ ABCD
+ 0: ABCD
+ 1: B
+ 2: CD
+
+/a[bcd]*dcdcde/i
+ ADCDCDE
+ 0: ADCDCDE
+
+/a[bcd]+dcdcde/i
+
+/(ab|a)b*c/i
+ ABC
+ 0: ABC
+ 1: AB
+
+/((a)(b)c)(d)/i
+ ABCD
+ 0: ABCD
+ 1: ABC
+ 2: A
+ 3: B
+ 4: D
+
+/[a-zA-Z_][a-zA-Z0-9_]*/i
+ ALPHA
+ 0: ALPHA
+
+/^a(bc+|b[eh])g|.h$/i
+ ABH
+ 0: BH
+
+/(bc+d$|ef*g.|h?i(j|k))/i
+ EFFGZ
+ 0: EFFGZ
+ 1: EFFGZ
+ IJ
+ 0: IJ
+ 1: IJ
+ 2: J
+ REFFGZ
+ 0: EFFGZ
+ 1: EFFGZ
+ *** Failers
+No match
+ ADCDCDE
+No match
+ EFFG
+No match
+ BCDD
+No match
+
+/((((((((((a))))))))))/i
+ A
+ 0: A
+ 1: A
+ 2: A
+ 3: A
+ 4: A
+ 5: A
+ 6: A
+ 7: A
+ 8: A
+ 9: A
+10: A
+
+/((((((((((a))))))))))\9/i
+ AA
+ 0: AA
+ 1: A
+ 2: A
+ 3: A
+ 4: A
+ 5: A
+ 6: A
+ 7: A
+ 8: A
+ 9: A
+10: A
+
+/(((((((((a)))))))))/i
+ A
+ 0: A
+ 1: A
+ 2: A
+ 3: A
+ 4: A
+ 5: A
+ 6: A
+ 7: A
+ 8: A
+ 9: A
+
+/multiple words of text/i
+ *** Failers
+No match
+ AA
+No match
+ UH-UH
+No match
+
+/multiple words/i
+ MULTIPLE WORDS, YEAH
+ 0: MULTIPLE WORDS
+
+/(.*)c(.*)/i
+ ABCDE
+ 0: ABCDE
+ 1: AB
+ 2: DE
+
+/\((.*), (.*)\)/i
+ (A, B)
+ 0: (A, B)
+ 1: A
+ 2: B
+
+/abcd/i
+ ABCD
+ 0: ABCD
+
+/a(bc)d/i
+ ABCD
+ 0: ABCD
+ 1: BC
+
+/a[-]?c/i
+ AC
+ 0: AC
+
+/(abc)\1/i
+ ABCABC
+ 0: ABCABC
+ 1: ABC
+
+/([a-c]*)\1/i
+ ABCABC
+ 0: ABCABC
+ 1: ABC
+
+/((foo)|(bar))*/
+ foobar
+ 0: foobar
+ 1: bar
+ 2: foo
+ 3: bar
+
+/^(.+)?B/
+ AB
+ 0: AB
+ 1: A
+
+/^([^a-z])|(\^)$/
+ .
+ 0: .
+ 1: .
+
+/^[<>]&/
+ <&OUT
+ 0: <&
+
+/^(){3,5}/
+ abc
+ 0:
+ 1:
+
+/^(a+)*ax/
+ aax
+ 0: aax
+ 1: a
+
+/^((a|b)+)*ax/
+ aax
+ 0: aax
+ 1: a
+ 2: a
+
+/^((a|bc)+)*ax/
+ aax
+ 0: aax
+ 1: a
+ 2: a
+
+/(a|x)*ab/
+ cab
+ 0: ab
+
+/(a)*ab/
+ cab
+ 0: ab
+
+/(ab)[0-9]\1/i
+ Ab4ab
+ 0: Ab4ab
+ 1: Ab
+ ab4Ab
+ 0: ab4Ab
+ 1: ab
+
+/foo\w*[0-9]{4}baz/
+ foobar1234baz
+ 0: foobar1234baz
+
+/(\w+:)+/
+ one:
+ 0: one:
+ 1: one:
+
+/((\w|:)+::)?(\w+)$/
+ abcd
+ 0: abcd
+ 1: <unset>
+ 2: <unset>
+ 3: abcd
+ xy:z:::abcd
+ 0: xy:z:::abcd
+ 1: xy:z:::
+ 2: :
+ 3: abcd
+
+/^[^bcd]*(c+)/
+ aexycd
+ 0: aexyc
+ 1: c
+
+/(a*)b+/
+ caab
+ 0: aab
+ 1: aa
+
+/((\w|:)+::)?(\w+)$/
+ abcd
+ 0: abcd
+ 1: <unset>
+ 2: <unset>
+ 3: abcd
+ xy:z:::abcd
+ 0: xy:z:::abcd
+ 1: xy:z:::
+ 2: :
+ 3: abcd
+ *** Failers
+ 0: Failers
+ 1: <unset>
+ 2: <unset>
+ 3: Failers
+ abcd:
+No match
+ abcd:
+No match
+
+/^[^bcd]*(c+)/
+ aexycd
+ 0: aexyc
+ 1: c
+
+/((Z)+|A)*/
+ ZABCDEFG
+ 0: ZA
+ 1: A
+ 2: Z
+
+/(Z()|A)*/
+ ZABCDEFG
+ 0: ZA
+ 1: A
+ 2:
+
+/(Z(())|A)*/
+ ZABCDEFG
+ 0: ZA
+ 1: A
+ 2:
+ 3:
+
+/(.*)[0-9]+\1/
+ abc123abc
+ 0: abc123abc
+ 1: abc
+ abc123bc
+ 0: bc123bc
+ 1: bc
+
+/((.*))[0-9]+\1/
+ abc123abc
+ 0: abc123abc
+ 1: abc
+ 2: abc
+ abc123bc
+ 0: bc123bc
+ 1: bc
+ 2: bc
+
+/^a{2,5}$/
+ aa
+ 0: aa
+ aaa
+ 0: aaa
+ aaaa
+ 0: aaaa
+ aaaaa
+ 0: aaaaa
+ *** Failers
+No match
+ a
+No match
+ b
+No match
+ aaaaab
+No match
+ aaaaaa
diff --git a/testsuite/SPENCER.tests b/testsuite/SPENCER.tests
new file mode 100644
index 0000000..fc26e6b
--- /dev/null
+++ b/testsuite/SPENCER.tests
@@ -0,0 +1,542 @@
+# regular expression test set
+# Lines are at least three fields, separated by one or more tabs. "" stands
+# for an empty field. First field is an RE. Second field is flags. If
+# C flag given, regcomp() is expected to fail, and the third field is the
+# error name (minus the leading REG_).
+#
+# Otherwise it is expected to succeed, and the third field is the string to
+# try matching it against. If there is no fourth field, the match is
+# expected to fail. If there is a fourth field, it is the substring that
+# the RE is expected to match. If there is a fifth field, it is a comma-
+# separated list of what the subexpressions should match, with - indicating
+# no match for that one. In both the fourth and fifth fields, a (sub)field
+# starting with @ indicates that the (sub)expression is expected to match
+# a null string followed by the stuff after the @; this provides a way to
+# test where null strings match. The character `N' in REs and strings
+# is newline, `S' is space, `T' is tab, `Z' is NUL.
+#
+# The full list of flags:
+# - placeholder, does nothing
+# b RE is a BRE, not an ERE
+# & try it as both an ERE and a BRE
+# C regcomp() error expected, third field is error name
+# i REG_ICASE
+# m ("mundane") REG_NOSPEC
+# s REG_NOSUB (not really testable)
+# n REG_NEWLINE
+# ^ REG_NOTBOL
+# $ REG_NOTEOL
+# # REG_STARTEND (see below)
+# p REG_PEND
+#
+# For REG_STARTEND, the start/end offsets are those of the substring
+# enclosed in ().
+
+# basics
+a & a a
+abc & abc abc
+abc|de - abc abc
+a|b|c - abc a
+
+# parentheses and perversions thereof
+a(b)c - abc abc
+a\(b\)c b abc abc
+a( C EPAREN
+a( b a( a(
+a\( - a( a(
+a\( bC EPAREN
+a\(b bC EPAREN
+a(b C EPAREN
+a(b b a(b a(b
+# gag me with a right parenthesis -- 1003.2 goofed here (my fault, partly)
+a) - a) a)
+) - ) )
+# end gagging (in a just world, those *should* give EPAREN)
+a) b a) a)
+a\) bC EPAREN
+\) bC EPAREN
+a()b - ab ab
+a\(\)b b ab ab
+
+# anchoring and REG_NEWLINE
+^abc$ & abc abc
+a^b - a^b
+a^b b a^b a^b
+a$b - a$b
+a$b b a$b a$b
+^ & abc @abc
+$ & abc @
+^$ & "" @
+$^ - "" @
+\($\)\(^\) b "" @
+# stop retching, those are legitimate (although disgusting)
+^^ - "" @
+$$ - "" @
+b$ & abNc
+b$ &n abNc b
+^b$ & aNbNc
+^b$ &n aNbNc b
+^$ &n aNNb @Nb
+^$ n abc
+^$ n abcN @
+$^ n aNNb @Nb
+\($\)\(^\) bn aNNb @Nb
+^^ n^ aNNb @Nb
+$$ n aNNb @NN
+^a ^ a
+a$ $ a
+^a ^n aNb
+^b ^n aNb b
+a$ $n bNa
+b$ $n bNa b
+a*(^b$)c* - b b
+a*\(^b$\)c* b b b
+
+# certain syntax errors and non-errors
+| C EMPTY
+| b | |
+* C BADRPT
+* b * *
++ C BADRPT
+? C BADRPT
+"" &C EMPTY
+() - abc @abc
+\(\) b abc @abc
+a||b C EMPTY
+|ab C EMPTY
+ab| C EMPTY
+(|a)b C EMPTY
+(a|)b C EMPTY
+(*a) C BADRPT
+(+a) C BADRPT
+(?a) C BADRPT
+({1}a) C BADRPT
+\(\{1\}a\) bC BADRPT
+(a|*b) C BADRPT
+(a|+b) C BADRPT
+(a|?b) C BADRPT
+(a|{1}b) C BADRPT
+^* C BADRPT
+^* b * *
+^+ C BADRPT
+^? C BADRPT
+^{1} C BADRPT
+^\{1\} bC BADRPT
+
+# metacharacters, backslashes
+a.c & abc abc
+a[bc]d & abd abd
+a\*c & a*c a*c
+a\\b & a\b a\b
+a\\\*b & a\*b a\*b
+# The following test is wrong. Using \b in an BRE or ERE is undefined.
+# a\bc & abc abc
+a\ &C EESCAPE
+a\\bc & a\bc a\bc
+\{ bC BADRPT
+a\[b & a[b a[b
+a[b &C EBRACK
+# trailing $ is a peculiar special case for the BRE code
+a$ & a a
+a$ & a$
+a\$ & a
+a\$ & a$ a$
+a\\$ & a
+a\\$ & a$
+a\\$ & a\$
+a\\$ & a\ a\
+
+# back references, ugh
+a\(b\)\2c bC ESUBREG
+a\(b\1\)c bC ESUBREG
+a\(b*\)c\1d b abbcbbd abbcbbd bb
+a\(b*\)c\1d b abbcbd
+a\(b*\)c\1d b abbcbbbd
+^\(.\)\1 b abc
+a\([bc]\)\1d b abcdabbd abbd b
+a\(\([bc]\)\2\)*d b abbccd abbccd
+a\(\([bc]\)\2\)*d b abbcbd
+# actually, this next one probably ought to fail, but the spec is unclear
+a\(\(b\)*\2\)*d b abbbd abbbd
+# here is a case that no NFA implementation does right
+\(ab*\)[ab]*\1 b ababaaa ababaaa a
+# check out normal matching in the presence of back refs
+\(a\)\1bcd b aabcd aabcd
+\(a\)\1bc*d b aabcd aabcd
+\(a\)\1bc*d b aabd aabd
+\(a\)\1bc*d b aabcccd aabcccd
+\(a\)\1bc*[ce]d b aabcccd aabcccd
+^\(a\)\1b\(c\)*cd$ b aabcccd aabcccd
+
+# ordinary repetitions
+ab*c & abc abc
+ab+c - abc abc
+ab?c - abc abc
+a\(*\)b b a*b a*b
+a\(**\)b b ab ab
+a\(***\)b bC BADRPT
+*a b *a *a
+**a b a a
+***a bC BADRPT
+
+# the dreaded bounded repetitions
+# The following two tests are not correct:
+#{ & { {
+#{abc & {abc {abc
+# '{' is always a special char outside bracket expressions. So test ony BRE:
+{ b { {
+{abc b {abc {abc
+{1 C BADRPT
+{1} C BADRPT
+# Same reason as for the two tests above:
+#a{b & a{b a{b
+a{b b a{b a{b
+a{1}b - ab ab
+a\{1\}b b ab ab
+a{1,}b - ab ab
+a\{1,\}b b ab ab
+a{1,2}b - aab aab
+a\{1,2\}b b aab aab
+a{1 C EBRACE
+a\{1 bC EBRACE
+a{1a C EBRACE
+a\{1a bC EBRACE
+a{1a} C BADBR
+a\{1a\} bC BADBR
+# These four tests checks for undefined behavior. Our implementation does
+# something different.
+#a{,2} - a{,2} a{,2}
+#a\{,2\} bC BADBR
+#a{,} - a{,} a{,}
+#a\{,\} bC BADBR
+a{1,x} C BADBR
+a\{1,x\} bC BADBR
+a{1,x C EBRACE
+a\{1,x bC EBRACE
+# These two tests probably fails due to an arbitrary limit on the number of
+# repetitions in the other implementation.
+#a{300} C BADBR
+#a\{300\} bC BADBR
+a{1,0} C BADBR
+a\{1,0\} bC BADBR
+ab{0,0}c - abcac ac
+ab\{0,0\}c b abcac ac
+ab{0,1}c - abcac abc
+ab\{0,1\}c b abcac abc
+ab{0,3}c - abbcac abbc
+ab\{0,3\}c b abbcac abbc
+ab{1,1}c - acabc abc
+ab\{1,1\}c b acabc abc
+ab{1,3}c - acabc abc
+ab\{1,3\}c b acabc abc
+ab{2,2}c - abcabbc abbc
+ab\{2,2\}c b abcabbc abbc
+ab{2,4}c - abcabbc abbc
+ab\{2,4\}c b abcabbc abbc
+((a{1,10}){1,10}){1,10} - a a a,a
+
+# multiple repetitions
+# Wow, there is serious disconnect here. The ERE grammar is like this:
+# ERE_expression : one_char_or_coll_elem_ERE
+# | '^'
+# | '$'
+# | '(' extended_reg_exp ')'
+# | ERE_expression ERE_dupl_symbol
+# ;
+# where ERE_dupl_symbol is any of the repetition methods. It is clear from
+# this that consecutive repetition is OK. On top of this, the one test not
+# marked as failing must fail. For BREs the situation is different, so we
+# use the four tests.
+#a** &C BADRPT
+a** bC BADRPT
+#a++ C BADRPT
+#a?? C BADRPT
+#a*+ C BADRPT
+#a*? C BADRPT
+#a+* C BADRPT
+#a+? C BADRPT
+#a?* C BADRPT
+#a?+ C BADRPT
+#a{1}{1} C BADRPT
+#a*{1} C BADRPT
+#a+{1} C BADRPT
+#a?{1} C BADRPT
+#a{1}* C BADRPT
+#a{1}+ C BADRPT
+#a{1}? C BADRPT
+#a*{b} - a{b} a{b}
+a\{1\}\{1\} bC BADRPT
+a*\{1\} bC BADRPT
+a\{1\}* bC BADRPT
+
+# brackets, and numerous perversions thereof
+a[b]c & abc abc
+a[ab]c & abc abc
+a[^ab]c & adc adc
+a[]b]c & a]c a]c
+a[[b]c & a[c a[c
+a[-b]c & a-c a-c
+a[^]b]c & adc adc
+a[^-b]c & adc adc
+a[b-]c & a-c a-c
+a[b &C EBRACK
+a[] &C EBRACK
+a[1-3]c & a2c a2c
+a[3-1]c &C ERANGE
+a[1-3-5]c &C ERANGE
+a[[.-.]--]c & a-c a-c
+# I don't thing the error value should be ERANGE since a[1-] would be
+# valid, too. Expect EBRACK.
+#a[1- &C ERANGE
+a[1- &C EBRACK
+a[[. &C EBRACK
+a[[.x &C EBRACK
+a[[.x. &C EBRACK
+a[[.x.] &C EBRACK
+a[[.x.]] & ax ax
+a[[.x,.]] &C ECOLLATE
+# This test is invalid. "one" is no collating symbol in any standardized
+# locale.
+# a[[.one.]]b & a1b a1b
+a[[.notdef.]]b &C ECOLLATE
+a[[.].]]b & a]b a]b
+a[[:alpha:]]c & abc abc
+a[[:notdef:]]c &C ECTYPE
+a[[: &C EBRACK
+a[[:alpha &C EBRACK
+a[[:alpha:] &C EBRACK
+a[[:alpha,:] &C ECTYPE
+a[[:]:]]b &C ECTYPE
+a[[:-:]]b &C ECTYPE
+a[[:alph:]] &C ECTYPE
+a[[:alphabet:]] &C ECTYPE
+[[:alnum:]]+ - -%@a0X- a0X
+[[:alpha:]]+ - -%@aX0- aX
+[[:blank:]]+ - aSSTb SST
+[[:cntrl:]]+ - aNTb NT
+[[:digit:]]+ - a019b 019
+[[:graph:]]+ - Sa%bS a%b
+[[:lower:]]+ - AabC ab
+[[:print:]]+ - NaSbN aSb
+[[:punct:]]+ - S%-&T %-&
+[[:space:]]+ - aSNTb SNT
+[[:upper:]]+ - aBCd BC
+[[:xdigit:]]+ - p0f3Cq 0f3C
+a[[=b=]]c & abc abc
+a[[= &C EBRACK
+a[[=b &C EBRACK
+a[[=b= &C EBRACK
+a[[=b=] &C EBRACK
+a[[=b,=]] &C ECOLLATE
+# This test is invalid. "one" is no collating symbol in any standardized
+# locale.
+#a[[=one=]]b & a1b a1b
+
+# complexities
+a(((b)))c - abc abc
+a(b|(c))d - abd abd
+a(b*|c)d - abbd abbd
+# just gotta have one DFA-buster, of course
+a[ab]{20} - aaaaabaaaabaaaabaaaab aaaaabaaaabaaaabaaaab
+# and an inline expansion in case somebody gets tricky
+a[ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab] - aaaaabaaaabaaaabaaaab aaaaabaaaabaaaabaaaab
+# and in case somebody just slips in an NFA...
+a[ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab](wee|week)(knights|night) - aaaaabaaaabaaaabaaaabweeknights aaaaabaaaabaaaabaaaabweeknights
+# fish for anomalies as the number of states passes 32
+12345678901234567890123456789 - a12345678901234567890123456789b 12345678901234567890123456789
+123456789012345678901234567890 - a123456789012345678901234567890b 123456789012345678901234567890
+1234567890123456789012345678901 - a1234567890123456789012345678901b 1234567890123456789012345678901
+12345678901234567890123456789012 - a12345678901234567890123456789012b 12345678901234567890123456789012
+123456789012345678901234567890123 - a123456789012345678901234567890123b 123456789012345678901234567890123
+# and one really big one, beyond any plausible word width
+1234567890123456789012345678901234567890123456789012345678901234567890 - a1234567890123456789012345678901234567890123456789012345678901234567890b 1234567890123456789012345678901234567890123456789012345678901234567890
+# fish for problems as brackets go past 8
+[ab][cd][ef][gh][ij][kl][mn] - xacegikmoq acegikm
+[ab][cd][ef][gh][ij][kl][mn][op] - xacegikmoq acegikmo
+[ab][cd][ef][gh][ij][kl][mn][op][qr] - xacegikmoqy acegikmoq
+[ab][cd][ef][gh][ij][kl][mn][op][q] - xacegikmoqy acegikmoq
+
+# subtleties of matching
+abc & xabcy abc
+a\(b\)?c\1d b acd
+aBc i Abc Abc
+a[Bc]*d i abBCcd abBCcd
+0[[:upper:]]1 &i 0a1 0a1
+0[[:lower:]]1 &i 0A1 0A1
+a[^b]c &i abc
+a[^b]c &i aBc
+a[^b]c &i adc adc
+[a]b[c] - abc abc
+[a]b[a] - aba aba
+[abc]b[abc] - abc abc
+[abc]b[abd] - abd abd
+a(b?c)+d - accd accd
+(wee|week)(knights|night) - weeknights weeknights
+(we|wee|week|frob)(knights|night|day) - weeknights weeknights
+a[bc]d - xyzaaabcaababdacd abd
+a[ab]c - aaabc abc
+abc s abc abc
+() s abc @abc
+a* & b @b
+
+# Let's have some fun -- try to match a C comment.
+# first the obvious, which looks okay at first glance...
+/\*.*\*/ - /*x*/ /*x*/
+# but...
+/\*.*\*/ - /*x*/y/*z*/ /*x*/y/*z*/
+# okay, we must not match */ inside; try to do that...
+/\*([^*]|\*[^/])*\*/ - /*x*/ /*x*/
+/\*([^*]|\*[^/])*\*/ - /*x*/y/*z*/ /*x*/
+# but...
+/\*([^*]|\*[^/])*\*/ - /*x**/y/*z*/ /*x**/y/*z*/
+# and a still fancier version, which does it right (I think)...
+/\*([^*]|\*+[^*/])*\*+/ - /*x*/ /*x*/
+/\*([^*]|\*+[^*/])*\*+/ - /*x*/y/*z*/ /*x*/
+/\*([^*]|\*+[^*/])*\*+/ - /*x**/y/*z*/ /*x**/
+/\*([^*]|\*+[^*/])*\*+/ - /*x****/y/*z*/ /*x****/
+/\*([^*]|\*+[^*/])*\*+/ - /*x**x*/y/*z*/ /*x**x*/
+/\*([^*]|\*+[^*/])*\*+/ - /*x***x/y/*z*/ /*x***x/y/*z*/
+
+# subexpressions
+.* - abc abc -
+a(b)(c)d - abcd abcd b,c
+a(((b)))c - abc abc b,b,b
+a(b|(c))d - abd abd b,-
+a(b*|c|e)d - abbd abbd bb
+a(b*|c|e)d - acd acd c
+a(b*|c|e)d - ad ad @d
+a(b?)c - abc abc b
+a(b?)c - ac ac @c
+a(b+)c - abc abc b
+a(b+)c - abbbc abbbc bbb
+a(b*)c - ac ac @c
+(a|ab)(bc([de]+)f|cde) - abcdef abcdef a,bcdef,de
+# the regression tester only asks for 9 subexpressions
+a(b)(c)(d)(e)(f)(g)(h)(i)(j)k - abcdefghijk abcdefghijk b,c,d,e,f,g,h,i,j
+a(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)l - abcdefghijkl abcdefghijkl b,c,d,e,f,g,h,i,j,k
+a([bc]?)c - abc abc b
+a([bc]?)c - ac ac @c
+a([bc]+)c - abc abc b
+a([bc]+)c - abcc abcc bc
+a([bc]+)bc - abcbc abcbc bc
+a(bb+|b)b - abb abb b
+a(bbb+|bb+|b)b - abb abb b
+a(bbb+|bb+|b)b - abbb abbb bb
+a(bbb+|bb+|b)bb - abbb abbb b
+(.*).* - abcdef abcdef abcdef
+(a*)* - bc @b @b
+
+# do we get the right subexpression when it is used more than once?
+a(b|c)*d - ad ad -
+a(b|c)*d - abcd abcd c
+a(b|c)+d - abd abd b
+a(b|c)+d - abcd abcd c
+a(b|c?)+d - ad ad @d
+a(b|c?)+d - abcd abcd c
+a(b|c){0,0}d - ad ad -
+a(b|c){0,1}d - ad ad -
+a(b|c){0,1}d - abd abd b
+a(b|c){0,2}d - ad ad -
+a(b|c){0,2}d - abcd abcd c
+a(b|c){0,}d - ad ad -
+a(b|c){0,}d - abcd abcd c
+a(b|c){1,1}d - abd abd b
+a(b|c){1,1}d - acd acd c
+a(b|c){1,2}d - abd abd b
+a(b|c){1,2}d - abcd abcd c
+a(b|c){1,}d - abd abd b
+a(b|c){1,}d - abcd abcd c
+a(b|c){2,2}d - acbd acbd b
+a(b|c){2,2}d - abcd abcd c
+a(b|c){2,4}d - abcd abcd c
+a(b|c){2,4}d - abcbd abcbd b
+a(b|c){2,4}d - abcbcd abcbcd c
+a(b|c){2,}d - abcd abcd c
+a(b|c){2,}d - abcbd abcbd b
+a(b+|((c)*))+d - abd abd b,-,-
+a(b+|((c)*))+d - abcd abcd c,c,c
+
+# check out the STARTEND option
+[abc] &# a(b)c b
+[abc] &# a(d)c
+[abc] &# a(bc)d b
+[abc] &# a(dc)d c
+. &# a()c
+b.*c &# b(bc)c bc
+b.* &# b(bc)c bc
+.*c &# b(bc)c bc
+
+# plain strings, with the NOSPEC flag
+abc m abc abc
+abc m xabcy abc
+abc m xyz
+a*b m aba*b a*b
+a*b m ab
+"" mC EMPTY
+
+# cases involving NULs
+aZb & a a
+aZb &p a
+aZb &p# (aZb) aZb
+aZ*b &p# (ab) ab
+a.b &# (aZb) aZb
+a.* &# (aZb)c aZb
+
+# word boundaries (ick)
+[[:<:]]a & a a
+[[:<:]]a & ba
+[[:<:]]a & -a a
+a[[:>:]] & a a
+a[[:>:]] & ab
+a[[:>:]] & a- a
+[[:<:]]a.c[[:>:]] & axcd-dayc-dazce-abc abc
+[[:<:]]a.c[[:>:]] & axcd-dayc-dazce-abc-q abc
+[[:<:]]a.c[[:>:]] & axc-dayc-dazce-abc axc
+[[:<:]]b.c[[:>:]] & a_bxc-byc_d-bzc-q bzc
+[[:<:]].x..[[:>:]] & y_xa_-_xb_y-_xc_-axdc _xc_
+[[:<:]]a_b[[:>:]] & x_a_b
+
+# past problems, and suspected problems
+(A[1])|(A[2])|(A[3])|(A[4])|(A[5])|(A[6])|(A[7])|(A[8])|(A[9])|(A[A]) - A1 A1
+abcdefghijklmnop i abcdefghijklmnop abcdefghijklmnop
+abcdefghijklmnopqrstuv i abcdefghijklmnopqrstuv abcdefghijklmnopqrstuv
+(ALAK)|(ALT[AB])|(CC[123]1)|(CM[123]1)|(GAMC)|(LC[23][EO ])|(SEM[1234])|(SL[ES][12])|(SLWW)|(SLF )|(SLDT)|(VWH[12])|(WH[34][EW])|(WP1[ESN]) - CC11 CC11
+CC[13]1|a{21}[23][EO][123][Es][12]a{15}aa[34][EW]aaaaaaa[X]a - CC11 CC11
+Char \([a-z0-9_]*\)\[.* b Char xyz[k Char xyz[k xyz
+a?b - ab ab
+-\{0,1\}[0-9]*$ b -5 -5
+a*a*a*a*a*a*a* & aaaaaa aaaaaa
+(\b){0} - x @x -
+\(\b\)\{0,0\} b abc @abc -
+a(\b){0}c - ac ac -
+a(.*)b(\1){0}c - abc abc @bc,-
+a(.*)b(\1){0}c - axbc axbc x,-
+
+a\(\(b*\)\)c\1d b abbcbbd abbcbbd bb,bb
+a\(\([bc]\)\)\2d b abcdabbd abbd b,b
+a\(\(\(\([bc]\)\)\3\)\)*d b abbccd abbccd cc,cc,c,c
+a(b)(c)d - abcd abcd b,c
+a(((b)))c - abc abc b,b,b
+a(((b|(((c))))))d - abd abd b,b,b,-,-,-
+a(((b*|c|e)))d - abbd abbd bb,bb,bb
+a((b|c)){0,0}d - ad ad -,-
+a((b|c)){0,1}d - abd abd b,b
+a((b|c)){0,2}d - abcd abcd c,c
+a((b+|((c)*)))+d - abd abd b,b,-,-
+a((b+|((c)*)))+d - abcd abcd c,c,c,c
+(((\b))){0} - x @x -,-,-
+a(((.*)))b((\2)){0}c - abc abc @bc,@bc,@bc,-,-
+a(((.*)))b((\1)){0}c - axbc axbc x,x,x,-,-
+
+\b & SaT @aT
+\b & aT @aT
+a.*\b & abT ab
+\b & STSS
+\B & abc @bc
+\B & aSbTc
+\B & SaT @SaT
+\B & aSTSb @TSb
+
+o$($|.) - oN
+o$($|.) - op
+o$($|.) - o o
diff --git a/testsuite/badenc.sh b/testsuite/badenc.sh
new file mode 100755
index 0000000..2dd80ab
--- /dev/null
+++ b/testsuite/badenc.sh
@@ -0,0 +1,43 @@
+#!/bin/sh
+
+# Test runner for old 'badenc' test
+
+# Copyright (C) 2017-2018 Free Software Foundation, Inc.
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+. "${srcdir=.}/testsuite/init.sh"; path_prepend_ ./sed
+print_ver_ sed
+
+require_en_utf8_locale_
+
+# The input (and also the expected output)
+# containing an invalid multibyte sequences in utf-8 (octet \320 = 0xD0).
+printf "abc\nde\320f\nghi\njkl\nmno\npqr\nstu\nvwx\nyz\n" > badenc-inp \
+ || framework_failure_
+
+
+# The progarm: using 'z' to clear the pattern-space even
+# if it contains invalid multibyte sequences.
+# Using 's/.*//' would not be able to clear the pattern-space.
+cat << \EOF > badenc.sed || framework_failure_
+/.*/ { H ; g ; s/\n// ; p ; z ; x }
+EOF
+
+
+env LC_ALL=en_US.UTF-8 sed -nf badenc.sed badenc-inp > badenc-out || fail=1
+remove_cr_inplace badenc-out
+compare badenc-inp badenc-out || fail=1
+
+
+Exit $fail
diff --git a/testsuite/binary.sed b/testsuite/binary.sed
new file mode 100644
index 0000000..e8ffd3b
--- /dev/null
+++ b/testsuite/binary.sed
@@ -0,0 +1,189 @@
+# A kind of clone of dc geared towards binary operations.
+# by Paolo Bonzini
+#
+# commands available:
+# conversion commands
+# b convert decimal to binary
+# d convert binary to decimal
+#
+# arithmetic commands
+# < shift left binary by decimal number of bits (11 3< gives 11000)
+# > shift right binary by decimal number of bits (1011 2> gives 10)
+# & binary AND (between two binary operands)
+# | binary OR (between two binary operands)
+# ^ binary XOR (between two binary operands)
+# ~ binary NOT (between one binary operand)
+#
+# stack manipulation commands
+# c clear stack
+# P pop stack top
+# D duplicate stack top
+# x exchange top two elements
+# r rotate stack counter-clockwise (second element becomes first)
+# R rotate stack clockwise (last element becomes first)
+#
+# other commands
+# l print stack (stack top is first)
+# p print stack top
+# q quit, print stack top if any (cq is quiet quit)
+#
+# The only shortcoming is that you'd better not attempt conversions of
+# values above 1000 or so.
+#
+# This version does everything in pattern space (a la dc.sed).
+# --------------------------------------------------------------------------
+# This was actually used in a one-disk distribution of Linux to compute
+# netmasks as follows (1 parameter => compute netmask e.g. 24 becomes
+# 255.255.255.0; 2 parameters => given host address and netmask compute
+# network and broadcast addresses):
+#
+# if [ $# = 1 ]; then
+# OUTPUT='$1.$2.$3.$4'
+# set 255.255.255.255 $1
+# else
+# OUTPUT='$1.$2.$3.$4 $5.$6.$7.$8'
+# fi
+#
+# if [ `expr $2 : ".*\\."` -gt 0 ]; then
+# MASK="$2 br b8<r b16<r b24< R|R|R|"
+# else
+# MASK="$2b 31b ^d D
+# 11111111111111111111111111111111 x>1> x<1<"
+# fi
+#
+# set `echo "$1 br b8<r b16<r b24< R|R|R| D # Load address
+# $MASK D ~r # Load mask
+#
+# & DDD 24>dpP 16>11111111& dpP 8>11111111& dpP 11111111& dpP
+# | DDD 24>dpP 16>11111111& dpP 8>11111111& dpP 11111111& dpP
+# " | sed -f binary.sed`
+#
+# eval echo $OUTPUT
+# --------------------------------------------------------------------------
+
+
+1s/^/%%/
+
+:cmd
+s/\(.*%%\) *\([0-9][0-9]*\)/\2\
+\1/
+tcmd
+s/%% *#.*/%%/
+/%%$/ {
+ $b quit
+ N
+}
+
+/^.*%%D/ s/^[^\n]*\n/&&/
+/^.*%%P/ s/^[^\n]*\n//
+/^.*%%x/ s/^\([^\n]*\n\)\([^\n]*\n\)/\2\1/
+/^.*%%r/ s/^\([^\n]*\n\)\([^%]*\)/\2\1/
+/^.*%%R/ s/^\([^%]*\n\)\([^\n]*\n\)/\2\1/
+/^.*%%c/ s/^.*%%/%%/
+/^.*%%p/ P
+
+/^.*%%l/ {
+ h
+ s/.%%.*//
+ p
+ g
+}
+
+/^.*%%q/ {
+ :quit
+ /^%%/!P
+ d
+}
+
+/^.*%%b/ {
+ # Decimal to binary via analog form
+ s/^\([^\n]*\)/-&;9876543210aaaaaaaaa/
+ :d2bloop1
+ s/\(a*\)-\(.\)\([^;]*;[0-9]*\2.\{9\}\(a*\)\)/\1\1\1\1\1\1\1\1\1\1\4-\3/
+ t d2bloop1
+ s/-;9876543210aaaaaaaaa/;a01!/
+ :d2bloop2
+ s/\(a*\)\1\(a\{0,1\}\)\(;\2.\(.\)[^!]*!\)/\1\3\4/
+ /^a/b d2bloop2
+ s/[^!]*!//
+}
+
+/^.*%%d/ {
+ # Binary to decimal via analog form
+ s/^\([^\n]*\)/-&;10a/
+ :b2dloop1
+ s/\(a*\)-\(.\)\([^;]*;[0-9]*\2.\(a*\)\)/\1\1\4-\3/
+ t b2dloop1
+ s/-;10a/;aaaaaaaaa0123456789!/
+ :b2dloop2
+ s/\(a*\)\1\1\1\1\1\1\1\1\1\(a\{0,9\}\)\(;\2.\{9\}\(.\)[^!]*!\)/\1\3\4/
+ /^a/b b2dloop2
+ s/[^!]*!//
+}
+
+/^.*%%&/ {
+ # Binary AND
+ s/\([^\n]*\)\n\([^\n]*\)/-\1-\2-111 01000/
+ :andloop
+ s/\([^-]*\)-\([^-]*\)\([^-]\)-\([^-]*\)\([^-]\)-\([01 ]*\3\5\([01]\)\)/\7\1-\2-\4-\6/
+ t andloop
+ s/^0*\([^-]*\)-[^\n]*/\1/
+ s/^\n/0&/
+}
+
+/^.*%%^/ {
+ # Binary XOR
+ s/\([^\n]*\)\n\([^\n]*\)/-\1-\2-000 01101/
+ b orloop
+}
+
+/^.*%%|/ {
+ # Binary OR
+ s/\([^\n]*\)\n\([^\n]*\)/-\1-\2-000 10111/
+ :orloop
+ s/\([^-]*\)-\([^-]*\)\([^-]\)-\([^-]*\)\([^-]\)-\([01 ]*\3\5\([01]\)\)/\7\1-\2-\4-\6/
+ t orloop
+ s/\([^-]*\)-\([^-]*\)-\([^-]*\)-[^\n]*/\2\3\1/
+}
+
+/^.*%%~/ {
+ # Binary NOT
+ s/^\(.\)\([^\n]*\n\)/\1-010-\2/
+ :notloop
+ s/\(.\)-0\{0,1\}\1\(.\)0\{0,1\}-\([01\n]\)/\2\3-010-/
+ t notloop
+
+ # If result is 00001..., \3 does not match (it looks for -10) and we just
+ # remove the table and leading zeros. If result is 0000...0, \3 matches
+ # (it looks for -0), \4 is a zero and we leave a lone zero as top of the
+ # stack.
+
+ s/0*\(1\{0,1\}\)\([^-]*\)-\(\1\(0\)\)\{0,1\}[^-]*-/\4\1\2/
+}
+
+/^.*%%</ {
+ # Left shift, convert to analog and add a binary digit for each analog digit
+ s/^\([^\n]*\)/-&;9876543210aaaaaaaaa/
+ :lshloop1
+ s/\(a*\)-\(.\)\([^;]*;[0-9]*\2.\{9\}\(a*\)\)/\1\1\1\1\1\1\1\1\1\1\4-\3/
+ t lshloop1
+ s/^\(a*\)-;9876543210aaaaaaaaa\n\([^\n]*\)/\2\1/
+ s/a/0/g
+}
+
+/^.*%%>/ {
+ # Right shift, convert to analog and remove a binary digit for each analog digit
+ s/^\([^\n]*\)/-&;9876543210aaaaaaaaa/
+ :rshloop1
+ s/\(a*\)-\(.\)\([^;]*;[0-9]*\2.\{9\}\(a*\)\)/\1\1\1\1\1\1\1\1\1\1\4-\3/
+ t rshloop1
+ s/^\(a*\)-;9876543210aaaaaaaaa\n\([^\n]*\)/\2\1/
+ :rshloop2
+ s/.a//
+ s/^aa*/0/
+ /a\n/b rshloop2
+}
+
+
+s/%%./%%/
+tcmd
diff --git a/testsuite/binary.sh b/testsuite/binary.sh
new file mode 100755
index 0000000..8ee7ec9
--- /dev/null
+++ b/testsuite/binary.sh
@@ -0,0 +1,61 @@
+#!/bin/sh
+
+# Test runner for the binary-operation version of dc.sed.
+# Adapted from old-style 'binary.sed' test.
+
+# Copyright (C) 2017-2018 Free Software Foundation, Inc.
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+. "${srcdir=.}/testsuite/init.sh"; path_prepend_ ./sed
+print_ver_ sed
+
+cat << \EOF > inp || framework_failure_
+192.168.1.2 br b8<r b16<r b24< R|R|R| D
+255.255.255.0 br b8<r b16<r b24< R|R|R| D~r
+& DDD 24>dpP 16>11111111& dpP 8>11111111& dpP 11111111& dpP
+| DDD 24>dpP 16>11111111& dpP 8>11111111& dpP 11111111& dpP
+EOF
+
+
+cat << \EOF > exp || framework_failure_
+192
+168
+1
+0
+192
+168
+1
+255
+EOF
+
+
+# location of the external SED scripts
+dir="$abs_top_srcdir/testsuite"
+
+
+# Run the three variations of the sed script
+sed -n -f "$dir/binary.sed" < inp > out1 || fail=1
+remove_cr_inplace out1
+compare exp out1 || fail=1
+
+sed -n -f "$dir/binary2.sed" < inp > out2 || fail=1
+remove_cr_inplace out2
+compare exp out2 || fail=1
+
+sed -n -f "$dir/binary3.sed" < inp > out3 || fail=1
+remove_cr_inplace out3
+compare exp out3 || fail=1
+
+
+Exit $fail
diff --git a/testsuite/binary2.sed b/testsuite/binary2.sed
new file mode 100644
index 0000000..115b942
--- /dev/null
+++ b/testsuite/binary2.sed
@@ -0,0 +1,226 @@
+# A kind of clone of dc geared towards binary operations.
+# by Paolo Bonzini
+#
+# commands available:
+# conversion commands
+# b convert decimal to binary
+# d convert binary to decimal
+#
+# arithmetic commands
+# < shift left binary by decimal number of bits (11 3< gives 11000)
+# > shift right binary by decimal number of bits (1011 2> gives 10)
+# & binary AND (between two binary operands)
+# | binary OR (between two binary operands)
+# ^ binary XOR (between two binary operands)
+# ~ binary NOT (between one binary operand)
+#
+# stack manipulation commands
+# c clear stack
+# P pop stack top
+# D duplicate stack top
+# x exchange top two elements
+# r rotate stack counter-clockwise (second element becomes first)
+# R rotate stack clockwise (last element becomes first)
+#
+# other commands
+# l print stack (stack top is first)
+# p print stack top
+# q quit, print stack top if any (cq is quiet quit)
+#
+# The only shortcoming is that you'd better not attempt conversions of
+# values above 1000 or so.
+#
+# This version keeps the stack in hold space and the command in pattern
+# space; it is the fastest one (though the gap with binary3.sed is small).
+# --------------------------------------------------------------------------
+# This was actually used in a one-disk distribution of Linux to compute
+# netmasks as follows (1 parameter => compute netmask e.g. 24 becomes
+# 255.255.255.0; 2 parameters => given host address and netmask compute
+# network and broadcast addresses):
+#
+# if [ $# = 1 ]; then
+# OUTPUT='$1.$2.$3.$4'
+# set 255.255.255.255 $1
+# else
+# OUTPUT='$1.$2.$3.$4 $5.$6.$7.$8'
+# fi
+#
+# if [ `expr $2 : ".*\\."` -gt 0 ]; then
+# MASK="$2 br b8<r b16<r b24< R|R|R|"
+# else
+# MASK="$2b 31b ^d D
+# 11111111111111111111111111111111 x>1> x<1<"
+# fi
+#
+# set `echo "$1 br b8<r b16<r b24< R|R|R| D # Load address
+# $MASK D ~r # Load mask
+#
+# & DDD 24>dpP 16>11111111& dpP 8>11111111& dpP 11111111& dpP
+# | DDD 24>dpP 16>11111111& dpP 8>11111111& dpP 11111111& dpP
+# " | sed -f binary.sed`
+#
+# eval echo $OUTPUT
+# --------------------------------------------------------------------------
+
+:cmd
+s/^[\n\t ]*//
+s/^#.*//
+/^$/ {
+ $b quit
+ N
+ t cmd
+}
+/^[0-9][0-9]*/ {
+ G
+ h
+ s/^[0-9][0-9]* *\([^\n]*\).*/\1/
+ x
+ s/^\([0-9][0-9]*\)[^\n]*/\1/
+ x
+ t cmd
+}
+
+/^[^DPxrRcplqbd&|^~<>]/b bad
+
+/^D/ {
+ x
+ s/^[^\n]*\n/&&/
+}
+/^P/ {
+ x
+ s/^[^\n]*\n//
+}
+/^x/ {
+ x
+ s/^\([^\n]*\n\)\([^\n]*\n\)/\2\1/
+}
+/^r/ {
+ x
+ s/^\([^\n]*\n\)\(.*\)/\2\1/
+}
+/^R/ {
+ x
+ s/^\(.*\n\)\([^\n]*\n\)/\2\1/
+}
+/^c/ {
+ x
+ s/.*//
+}
+/^p/ {
+ x
+ P
+}
+
+/^l/ {
+ x
+ p
+}
+
+/^q/ {
+ :quit
+ x
+ /./P
+ d
+}
+
+/^b/ {
+ # Decimal to binary via analog form
+ x
+ s/^\([^\n]*\)/-&;9876543210aaaaaaaaa/
+ :d2bloop1
+ s/\(a*\)-\(.\)\([^;]*;[0-9]*\2.\{9\}\(a*\)\)/\1\1\1\1\1\1\1\1\1\1\4-\3/
+ t d2bloop1
+ s/-;9876543210aaaaaaaaa/;a01!/
+ :d2bloop2
+ s/\(a*\)\1\(a\{0,1\}\)\(;\2.\(.\)[^!]*!\)/\1\3\4/
+ /^a/b d2bloop2
+ s/[^!]*!//
+}
+
+/^d/ {
+ # Binary to decimal via analog form
+ x
+ s/^\([^\n]*\)/-&;10a/
+ :b2dloop1
+ s/\(a*\)-\(.\)\([^;]*;[0-9]*\2.\(a*\)\)/\1\1\4-\3/
+ t b2dloop1
+ s/-;10a/;aaaaaaaaa0123456789!/
+ :b2dloop2
+ s/\(a*\)\1\1\1\1\1\1\1\1\1\(a\{0,9\}\)\(;\2.\{9\}\(.\)[^!]*!\)/\1\3\4/
+ /^a/b b2dloop2
+ s/[^!]*!//
+}
+
+/^&/ {
+ # Binary AND
+ x
+ s/\([^\n]*\)\n\([^\n]*\)/-\1-\2-111 01000/
+ :andloop
+ s/\([^-]*\)-\([^-]*\)\([^-]\)-\([^-]*\)\([^-]\)-\([01 ]*\3\5\([01]\)\)/\7\1-\2-\4-\6/
+ t andloop
+ s/^0*\([^-]*\)-[^\n]*/\1/
+ s/^\n/0&/
+}
+
+/^\^/ {
+ # Binary XOR
+ x
+ s/\([^\n]*\)\n\([^\n]*\)/-\1-\2-000 01101/
+ b orloop
+}
+
+/^|/ {
+ # Binary OR
+ x
+ s/\([^\n]*\)\n\([^\n]*\)/-\1-\2-000 10111/
+ :orloop
+ s/\([^-]*\)-\([^-]*\)\([^-]\)-\([^-]*\)\([^-]\)-\([01 ]*\3\5\([01]\)\)/\7\1-\2-\4-\6/
+ t orloop
+ s/\([^-]*\)-\([^-]*\)-\([^-]*\)-[^\n]*/\2\3\1/
+}
+
+/^~/ {
+ # Binary NOT
+ x
+ s/^\(.\)\([^\n]*\n\)/\1-010-\2/
+ :notloop
+ s/\(.\)-0\{0,1\}\1\(.\)0\{0,1\}-\([01\n]\)/\2\3-010-/
+ t notloop
+
+ # If result is 00001..., \3 does not match (it looks for -10) and we just
+ # remove the table and leading zeros. If result is 0000...0, \3 matches
+ # (it looks for -0), \4 is a zero and we leave a lone zero as top of the
+ # stack.
+
+ s/0*\(1\{0,1\}\)\([^-]*\)-\(\1\(0\)\)\{0,1\}[^-]*-/\4\1\2/
+}
+
+/^</ {
+ # Left shift, convert to analog and add a binary digit for each analog digit
+ x
+ s/^\([^\n]*\)/-&;9876543210aaaaaaaaa/
+ :lshloop1
+ s/\(a*\)-\(.\)\([^;]*;[0-9]*\2.\{9\}\(a*\)\)/\1\1\1\1\1\1\1\1\1\1\4-\3/
+ t lshloop1
+ s/^\(a*\)-;9876543210aaaaaaaaa\n\([^\n]*\)/\2\1/
+ s/a/0/g
+}
+
+/^>/ {
+ # Right shift, convert to analog and remove a binary digit for each analog digit
+ x
+ s/^\([^\n]*\)/-&;9876543210aaaaaaaaa/
+ :rshloop1
+ s/\(a*\)-\(.\)\([^;]*;[0-9]*\2.\{9\}\(a*\)\)/\1\1\1\1\1\1\1\1\1\1\4-\3/
+ t rshloop1
+ s/^\(a*\)-;9876543210aaaaaaaaa\n\([^\n]*\)/\2\1/
+ :rshloop2
+ s/.a//
+ s/^aa*/0/
+ /a\n/b rshloop2
+}
+
+x
+:bad
+s/^.//
+tcmd
diff --git a/testsuite/binary3.sed b/testsuite/binary3.sed
new file mode 100644
index 0000000..ba6b7d1
--- /dev/null
+++ b/testsuite/binary3.sed
@@ -0,0 +1,204 @@
+# A kind of clone of dc geared towards binary operations.
+# by Paolo Bonzini
+#
+# commands available:
+# conversion commands
+# b convert decimal to binary
+# d convert binary to decimal
+#
+# arithmetic commands
+# < shift left binary by decimal number of bits (11 3< gives 11000)
+# > shift right binary by decimal number of bits (1011 2> gives 10)
+# & binary AND (between two binary operands)
+# | binary OR (between two binary operands)
+# ^ binary XOR (between two binary operands)
+# ~ binary NOT (between one binary operand)
+#
+# stack manipulation commands
+# c clear stack
+# P pop stack top
+# D duplicate stack top
+# x exchange top two elements
+# r rotate stack counter-clockwise (second element becomes first)
+# R rotate stack clockwise (last element becomes first)
+#
+# other commands
+# l print stack (stack top is first)
+# p print stack top
+# q quit, print stack top if any (cq is quiet quit)
+#
+# The only shortcoming is that you'd better not attempt conversions of
+# values above 1000 or so.
+#
+# This version keeps the stack and the current command in hold space and
+# the commands in pattern space; it is just a bit slower than binary2.sed
+# but more size optimized for broken seds which have a 199-command limit
+# (though binary2.sed does not have this much).
+#
+# --------------------------------------------------------------------------
+# This was actually used in a one-disk distribution of Linux to compute
+# netmasks as follows (1 parameter => compute netmask e.g. 24 becomes
+# 255.255.255.0; 2 parameters => given host address and netmask compute
+# network and broadcast addresses):
+#
+# if [ $# = 1 ]; then
+# OUTPUT='$1.$2.$3.$4'
+# set 255.255.255.255 $1
+# else
+# OUTPUT='$1.$2.$3.$4 $5.$6.$7.$8'
+# fi
+#
+# if [ `expr $2 : ".*\\."` -gt 0 ]; then
+# MASK="$2 br b8<r b16<r b24< R|R|R|"
+# else
+# MASK="$2b 31b ^d D
+# 11111111111111111111111111111111 x>1> x<1<"
+# fi
+#
+# set `echo "$1 br b8<r b16<r b24< R|R|R| D # Load address
+# $MASK D ~r # Load mask
+#
+# & DDD 24>dpP 16>11111111& dpP 8>11111111& dpP 11111111& dpP
+# | DDD 24>dpP 16>11111111& dpP 8>11111111& dpP 11111111& dpP
+# " | sed -f binary.sed`
+#
+# eval echo $OUTPUT
+# --------------------------------------------------------------------------
+
+:cmd
+s/^[\n\t ]*//
+s/^#.*//
+/^$/ {
+ $b quit
+ N
+ t cmd
+}
+/^[0-9][0-9]*/ {
+ G
+ h
+ s/^[0-9][0-9]* *\([^\n]*\).*/\1/
+ x
+ s/^\([0-9][0-9]*\)[^\n]*/\1/
+ x
+ t cmd
+}
+
+/^[^DPxrRcplqbd&|^~<>]/bbad
+
+H
+x
+s/\(\n[^\n]\)[^\n]*$/\1/
+
+/D$/ s/^[^\n]*\n/&&/
+/P$/ s/^[^\n]*\n//
+/x$/ s/^\([^\n]*\n\)\([^\n]*\n\)/\2\1/
+/r$/ s/^\([^\n]*\n\)\(.*\)\(..\)/\2\1\3/
+/R$/ s/^\(.*\n\)\([^\n]*\n\)\(..\)/\2\1\3/
+/c$/ s/.*//
+/p$/ P
+/l$/ {
+ s/...$//
+ p
+ t cmd
+}
+
+/q$/ {
+ :quit
+ /.../P
+ d
+}
+
+/b$/ {
+ # Decimal to binary via analog form
+ s/^\([^\n]*\)/-&;9876543210aaaaaaaaa/
+ :d2bloop1
+ s/\(a*\)-\(.\)\([^;]*;[0-9]*\2.\{9\}\(a*\)\)/\1\1\1\1\1\1\1\1\1\1\4-\3/
+ t d2bloop1
+ s/-;9876543210aaaaaaaaa/;a01!/
+ :d2bloop2
+ s/\(a*\)\1\(a\{0,1\}\)\(;\2.\(.\)[^!]*!\)/\1\3\4/
+ /^a/b d2bloop2
+ s/[^!]*!//
+}
+
+/d$/ {
+ # Binary to decimal via analog form
+ s/^\([^\n]*\)/-&;10a/
+ :b2dloop1
+ s/\(a*\)-\(.\)\([^;]*;[0-9]*\2.\(a*\)\)/\1\1\4-\3/
+ t b2dloop1
+ s/-;10a/;aaaaaaaaa0123456789!/
+ :b2dloop2
+ s/\(a*\)\1\1\1\1\1\1\1\1\1\(a\{0,9\}\)\(;\2.\{9\}\(.\)[^!]*!\)/\1\3\4/
+ /^a/b b2dloop2
+ s/[^!]*!//
+}
+
+/&$/ {
+ # Binary AND
+ s/\([^\n]*\)\n\([^\n]*\)/-\1-\2-111 01000/
+ :andloop
+ s/\([^-]*\)-\([^-]*\)\([^-]\)-\([^-]*\)\([^-]\)-\([01 ]*\3\5\([01]\)\)/\7\1-\2-\4-\6/
+ t andloop
+ s/^0*\([^-]*\)-[^\n]*/\1/
+ s/^\n/0&/
+}
+
+/\^$/ {
+ # Binary XOR
+ s/\([^\n]*\)\n\([^\n]*\)/-\1-\2-000 01101/
+ b orloop
+}
+
+/|$/ {
+ # Binary OR
+ s/\([^\n]*\)\n\([^\n]*\)/-\1-\2-000 10111/
+ :orloop
+ s/\([^-]*\)-\([^-]*\)\([^-]\)-\([^-]*\)\([^-]\)-\([01 ]*\3\5\([01]\)\)/\7\1-\2-\4-\6/
+ t orloop
+ s/\([^-]*\)-\([^-]*\)-\([^-]*\)-[^\n]*/\2\3\1/
+}
+
+/~$/ {
+ # Binary NOT
+ s/^\(.\)\([^\n]*\n\)/\1-010-\2/
+ :notloop
+ s/\(.\)-0\{0,1\}\1\(.\)0\{0,1\}-\([01\n]\)/\2\3-010-/
+ t notloop
+
+ # If result is 00001..., \3 does not match (it looks for -10) and we just
+ # remove the table and leading zeros. If result is 0000...0, \3 matches
+ # (it looks for -0), \4 is a zero and we leave a lone zero as top of the
+ # stack.
+
+ s/0*\(1\{0,1\}\)\([^-]*\)-\(\1\(0\)\)\{0,1\}[^-]*-/\4\1\2/
+}
+
+/<$/ {
+ # Left shift, convert to analog and add a binary digit for each analog digit
+ s/^\([^\n]*\)/-&;9876543210aaaaaaaaa/
+ :lshloop1
+ s/\(a*\)-\(.\)\([^;]*;[0-9]*\2.\{9\}\(a*\)\)/\1\1\1\1\1\1\1\1\1\1\4-\3/
+ t lshloop1
+ s/^\(a*\)-;9876543210aaaaaaaaa\n\([^\n]*\)/\2\1/
+ s/a/0/g
+}
+
+/>$/ {
+ # Right shift, convert to analog and remove a binary digit for each analog digit
+ s/^\([^\n]*\)/-&;9876543210aaaaaaaaa/
+ :rshloop1
+ s/\(a*\)-\(.\)\([^;]*;[0-9]*\2.\{9\}\(a*\)\)/\1\1\1\1\1\1\1\1\1\1\4-\3/
+ t rshloop1
+ s/^\(a*\)-;9876543210aaaaaaaaa\n\([^\n]*\)/\2\1/
+ :rshloop2
+ s/.a//
+ s/^aa*/0/
+ /a\n/b rshloop2
+}
+
+s/..$//
+x
+:bad
+s/^.//
+tcmd
diff --git a/testsuite/bsd-wrapper.sh b/testsuite/bsd-wrapper.sh
new file mode 100644
index 0000000..557deb7
--- /dev/null
+++ b/testsuite/bsd-wrapper.sh
@@ -0,0 +1,31 @@
+#!/bin/sh
+
+# Test runner for the old bsd tests
+
+# Copyright (C) 2017-2018 Free Software Foundation, Inc.
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+. "${srcdir=.}/testsuite/init.sh"; path_prepend_ ./sed
+print_ver_ sed
+
+# location of the external SED scripts
+dir="$abs_top_srcdir/testsuite"
+
+# The 'bsd.sh' script runs multiple tests and generates sed.out file.
+sh "$dir/bsd.sh" || fail=1
+remove_cr_inplace sed.out
+compare "$dir/bsd.good" sed.out || fail=1
+
+
+Exit $fail
diff --git a/testsuite/bsd.good b/testsuite/bsd.good
new file mode 100644
index 0000000..5454daa
--- /dev/null
+++ b/testsuite/bsd.good
@@ -0,0 +1,1755 @@
+============
+Test 1.1:101
+============
+Testing argument parsing
+First type
+e1_l1_1
+e1_l1_1
+e1_l1_2
+e1_l1_2
+e1_l1_3
+e1_l1_3
+e1_l1_4
+e1_l1_4
+e1_l1_5
+e1_l1_5
+e1_l1_6
+e1_l1_6
+e1_l1_7
+e1_l1_7
+e1_l1_8
+e1_l1_8
+e1_l1_9
+e1_l1_9
+e1_l1_10
+e1_l1_10
+e1_l1_11
+e1_l1_11
+e1_l1_12
+e1_l1_12
+e1_l1_13
+e1_l1_13
+e1_l1_14
+e1_l1_14
+
+============
+Test 1.2:102
+============
+e1_l1_1
+e1_l1_2
+e1_l1_3
+e1_l1_4
+e1_l1_5
+e1_l1_6
+e1_l1_7
+e1_l1_8
+e1_l1_9
+e1_l1_10
+e1_l1_11
+e1_l1_12
+e1_l1_13
+e1_l1_14
+
+============
+Test 1.3:103
+============
+e1_l1_1
+e1_l1_1
+e1_l1_2
+e1_l1_2
+e1_l1_3
+e1_l1_3
+e1_l1_4
+e1_l1_4
+e1_l1_5
+e1_l1_5
+e1_l1_6
+e1_l1_6
+e1_l1_7
+e1_l1_7
+e1_l1_8
+e1_l1_8
+e1_l1_9
+e1_l1_9
+e1_l1_10
+e1_l1_10
+e1_l1_11
+e1_l1_11
+e1_l1_12
+e1_l1_12
+e1_l1_13
+e1_l1_13
+e1_l1_14
+e1_l1_14
+
+============
+Test 1.4:104
+============
+e1_l1_1
+e1_l1_2
+e1_l1_3
+e1_l1_4
+e1_l1_5
+e1_l1_6
+e1_l1_7
+e1_l1_8
+e1_l1_9
+e1_l1_10
+e1_l1_11
+e1_l1_12
+e1_l1_13
+e1_l1_14
+Second type
+
+==============
+Test 1.4.1:105
+==============
+l1_1
+l1_2
+l1_3
+l1_4
+l1_5
+l1_6
+l1_7
+l1_8
+l1_9
+l1_10
+l1_11
+l1_12
+l1_13
+l1_14
+
+============
+Test 1.5:106
+============
+s1_l1_1
+s1_l1_1
+s1_l1_2
+s1_l1_2
+s1_l1_3
+s1_l1_3
+s1_l1_4
+s1_l1_4
+s1_l1_5
+s1_l1_5
+s1_l1_6
+s1_l1_6
+s1_l1_7
+s1_l1_7
+s1_l1_8
+s1_l1_8
+s1_l1_9
+s1_l1_9
+s1_l1_10
+s1_l1_10
+s1_l1_11
+s1_l1_11
+s1_l1_12
+s1_l1_12
+s1_l1_13
+s1_l1_13
+s1_l1_14
+s1_l1_14
+
+============
+Test 1.6:107
+============
+s1_l1_1
+s1_l1_1
+s1_l1_2
+s1_l1_2
+s1_l1_3
+s1_l1_3
+s1_l1_4
+s1_l1_4
+s1_l1_5
+s1_l1_5
+s1_l1_6
+s1_l1_6
+s1_l1_7
+s1_l1_7
+s1_l1_8
+s1_l1_8
+s1_l1_9
+s1_l1_9
+s1_l1_10
+s1_l1_10
+s1_l1_11
+s1_l1_11
+s1_l1_12
+s1_l1_12
+s1_l1_13
+s1_l1_13
+s1_l1_14
+s1_l1_14
+
+============
+Test 1.7:108
+============
+e1_l1_1
+e1_l1_1
+e1_l1_2
+e1_l1_2
+e1_l1_3
+e1_l1_3
+e1_l1_4
+e1_l1_4
+e1_l1_5
+e1_l1_5
+e1_l1_6
+e1_l1_6
+e1_l1_7
+e1_l1_7
+e1_l1_8
+e1_l1_8
+e1_l1_9
+e1_l1_9
+e1_l1_10
+e1_l1_10
+e1_l1_11
+e1_l1_11
+e1_l1_12
+e1_l1_12
+e1_l1_13
+e1_l1_13
+e1_l1_14
+e1_l1_14
+
+============
+Test 1.8:109
+============
+e1_l1_1
+e1_l1_1
+e1_l1_2
+e1_l1_2
+e1_l1_3
+e1_l1_3
+e1_l1_4
+e1_l1_4
+e1_l1_5
+e1_l1_5
+e1_l1_6
+e1_l1_6
+e1_l1_7
+e1_l1_7
+e1_l1_8
+e1_l1_8
+e1_l1_9
+e1_l1_9
+e1_l1_10
+e1_l1_10
+e1_l1_11
+e1_l1_11
+e1_l1_12
+e1_l1_12
+e1_l1_13
+e1_l1_13
+e1_l1_14
+e1_l1_14
+
+============
+Test 1.9:110
+============
+s1_l1_1
+s1_l1_2
+s1_l1_3
+s1_l1_4
+s1_l1_5
+s1_l1_6
+s1_l1_7
+s1_l1_8
+s1_l1_9
+s1_l1_10
+s1_l1_11
+s1_l1_12
+s1_l1_13
+s1_l1_14
+
+=============
+Test 1.10:111
+=============
+s1_l1_1
+s1_l1_2
+s1_l1_3
+s1_l1_4
+s1_l1_5
+s1_l1_6
+s1_l1_7
+s1_l1_8
+s1_l1_9
+s1_l1_10
+s1_l1_11
+s1_l1_12
+s1_l1_13
+s1_l1_14
+
+=============
+Test 1.11:112
+=============
+e1_l1_1
+e1_l1_2
+e1_l1_3
+e1_l1_4
+e1_l1_5
+e1_l1_6
+e1_l1_7
+e1_l1_8
+e1_l1_9
+e1_l1_10
+e1_l1_11
+e1_l1_12
+e1_l1_13
+e1_l1_14
+
+=============
+Test 1.12:113
+=============
+e1_l1_1
+e1_l1_2
+e1_l1_3
+e1_l1_4
+e1_l1_5
+e1_l1_6
+e1_l1_7
+e1_l1_8
+e1_l1_9
+e1_l1_10
+e1_l1_11
+e1_l1_12
+e1_l1_13
+e1_l1_14
+
+=============
+Test 1.13:114
+=============
+e1_l1_1
+e2_e1_l1_1
+e2_e1_l1_1
+e1_l1_2
+e2_e1_l1_2
+e2_e1_l1_2
+e1_l1_3
+e2_e1_l1_3
+e2_e1_l1_3
+e1_l1_4
+e2_e1_l1_4
+e2_e1_l1_4
+e1_l1_5
+e2_e1_l1_5
+e2_e1_l1_5
+e1_l1_6
+e2_e1_l1_6
+e2_e1_l1_6
+e1_l1_7
+e2_e1_l1_7
+e2_e1_l1_7
+e1_l1_8
+e2_e1_l1_8
+e2_e1_l1_8
+e1_l1_9
+e2_e1_l1_9
+e2_e1_l1_9
+e1_l1_10
+e2_e1_l1_10
+e2_e1_l1_10
+e1_l1_11
+e2_e1_l1_11
+e2_e1_l1_11
+e1_l1_12
+e2_e1_l1_12
+e2_e1_l1_12
+e1_l1_13
+e2_e1_l1_13
+e2_e1_l1_13
+e1_l1_14
+e2_e1_l1_14
+e2_e1_l1_14
+
+=============
+Test 1.14:115
+=============
+s1_l1_1
+s2_s1_l1_1
+s2_s1_l1_1
+s1_l1_2
+s2_s1_l1_2
+s2_s1_l1_2
+s1_l1_3
+s2_s1_l1_3
+s2_s1_l1_3
+s1_l1_4
+s2_s1_l1_4
+s2_s1_l1_4
+s1_l1_5
+s2_s1_l1_5
+s2_s1_l1_5
+s1_l1_6
+s2_s1_l1_6
+s2_s1_l1_6
+s1_l1_7
+s2_s1_l1_7
+s2_s1_l1_7
+s1_l1_8
+s2_s1_l1_8
+s2_s1_l1_8
+s1_l1_9
+s2_s1_l1_9
+s2_s1_l1_9
+s1_l1_10
+s2_s1_l1_10
+s2_s1_l1_10
+s1_l1_11
+s2_s1_l1_11
+s2_s1_l1_11
+s1_l1_12
+s2_s1_l1_12
+s2_s1_l1_12
+s1_l1_13
+s2_s1_l1_13
+s2_s1_l1_13
+s1_l1_14
+s2_s1_l1_14
+s2_s1_l1_14
+
+=============
+Test 1.15:116
+=============
+e1_l1_1
+s1_e1_l1_1
+s1_e1_l1_1
+e1_l1_2
+s1_e1_l1_2
+s1_e1_l1_2
+e1_l1_3
+s1_e1_l1_3
+s1_e1_l1_3
+e1_l1_4
+s1_e1_l1_4
+s1_e1_l1_4
+e1_l1_5
+s1_e1_l1_5
+s1_e1_l1_5
+e1_l1_6
+s1_e1_l1_6
+s1_e1_l1_6
+e1_l1_7
+s1_e1_l1_7
+s1_e1_l1_7
+e1_l1_8
+s1_e1_l1_8
+s1_e1_l1_8
+e1_l1_9
+s1_e1_l1_9
+s1_e1_l1_9
+e1_l1_10
+s1_e1_l1_10
+s1_e1_l1_10
+e1_l1_11
+s1_e1_l1_11
+s1_e1_l1_11
+e1_l1_12
+s1_e1_l1_12
+s1_e1_l1_12
+e1_l1_13
+s1_e1_l1_13
+s1_e1_l1_13
+e1_l1_14
+s1_e1_l1_14
+s1_e1_l1_14
+
+=============
+Test 1.16:117
+=============
+e1_l1_1
+e1_l1_1
+e1_l1_2
+e1_l1_2
+e1_l1_3
+e1_l1_3
+e1_l1_4
+e1_l1_4
+e1_l1_5
+e1_l1_5
+e1_l1_6
+e1_l1_6
+e1_l1_7
+e1_l1_7
+e1_l1_8
+e1_l1_8
+e1_l1_9
+e1_l1_9
+e1_l1_10
+e1_l1_10
+e1_l1_11
+e1_l1_11
+e1_l1_12
+e1_l1_12
+e1_l1_13
+e1_l1_13
+e1_l1_14
+e1_l1_14
+e1_l1_1
+e1_l1_1
+e1_l1_2
+e1_l1_2
+e1_l1_3
+e1_l1_3
+e1_l1_4
+e1_l1_4
+e1_l1_5
+e1_l1_5
+e1_l1_6
+e1_l1_6
+e1_l1_7
+e1_l1_7
+e1_l1_8
+e1_l1_8
+e1_l1_9
+e1_l1_9
+e1_l1_10
+e1_l1_10
+e1_l1_11
+e1_l1_11
+e1_l1_12
+e1_l1_12
+e1_l1_13
+e1_l1_13
+e1_l1_14
+e1_l1_14
+
+=============
+Test 1.17:118
+=============
+l1_1
+l1_1
+l1_2
+l1_2
+l1_3
+l1_3
+l1_4
+l1_4
+l1_5
+l1_5
+l1_6
+l1_6
+l1_7
+l1_7
+l1_8
+l1_8
+l1_9
+l1_9
+l1_10
+l1_10
+l1_11
+l1_11
+l1_12
+l1_12
+l1_13
+l1_13
+l1_14
+l1_14
+
+=============
+Test 1.18:119
+=============
+l1_1
+l1_2
+l1_3
+l1_4
+l1_5
+l1_6
+l1_7
+l1_8
+l1_9
+l1_10
+l1_11
+l1_12
+l1_13
+l1_14
+Testing address ranges
+
+============
+Test 2.1:120
+============
+l1_4
+
+============
+Test 2.2:121
+============
+l2_6
+
+============
+Test 2.3:122
+============
+l1_14
+
+============
+Test 2.4:123
+============
+l2_9
+
+============
+Test 2.5:124
+============
+
+============
+Test 2.6:125
+============
+l2_9
+
+============
+Test 2.7:126
+============
+
+============
+Test 2.9:127
+============
+l1_7
+
+=============
+Test 2.10:128
+=============
+l1_7
+
+=============
+Test 2.11:129
+=============
+l1_7
+
+=============
+Test 2.12:130
+=============
+l1_1
+l1_2
+l1_3
+l1_4
+
+=============
+Test 2.13:131
+=============
+l1_1
+l1_2
+l1_3
+l1_4
+l1_5
+l1_6
+l1_7
+l1_8
+l1_9
+l1_10
+l1_11
+l1_12
+l1_13
+l1_14
+l2_1
+l2_2
+l2_3
+l2_4
+l2_5
+l2_6
+l2_7
+l2_8
+l2_9
+
+=============
+Test 2.14:132
+=============
+l1_1
+l1_2
+l1_3
+l1_4
+l1_5
+l1_6
+l1_7
+l1_8
+l1_9
+l1_10
+l1_11
+l1_12
+l1_13
+l1_14
+l2_1
+l2_2
+l2_3
+l2_4
+l2_5
+l2_6
+l2_7
+l2_8
+l2_9
+
+=============
+Test 2.15:133
+=============
+l1_4
+l1_5
+l1_6
+l1_7
+l1_8
+l1_9
+l1_10
+l1_11
+l1_12
+l1_13
+l1_14
+l2_1
+l2_2
+l2_3
+l2_4
+l2_5
+l2_6
+l2_7
+l2_8
+l2_9
+
+=============
+Test 2.16:134
+=============
+l1_4
+l1_5
+l1_6
+l1_7
+l1_8
+l1_9
+l1_10
+l1_11
+l1_12
+l1_13
+l1_14
+l2_1
+l2_2
+l2_3
+l2_4
+l2_5
+l2_6
+
+=============
+Test 2.17:135
+=============
+l1_4
+l1_5
+l1_6
+l1_7
+l1_8
+l1_9
+l1_10
+l1_14
+l2_1
+l2_2
+l2_3
+l2_4
+l2_5
+l2_6
+l2_7
+l2_8
+l2_9
+
+=============
+Test 2.18:136
+=============
+l2_3
+l2_4
+l2_5
+l2_6
+l2_7
+l2_8
+l2_9
+
+=============
+Test 2.19:137
+=============
+l1_12
+
+=============
+Test 2.20:138
+=============
+l1_7
+Brace and other grouping
+
+============
+Test 3.1:139
+============
+l1_1
+l1_2
+l1_3
+^l1T4$
+^l1T5$
+^l1T6$
+^l1T7$
+^l1T8$
+^l1T9$
+^l1T10$
+^l1T11$
+^l1T12$
+l1_13
+l1_14
+
+============
+Test 3.2:140
+============
+l1_1
+l1_2
+l1_3
+^l1_4
+^l1_5
+^l1_6$
+^l1_7$
+^l1T8$
+^l1_9$
+^l1_10$
+^l1_11
+^l1_12
+l1_13
+l1_14
+
+============
+Test 3.3:141
+============
+^l1T1$
+^l1T2$
+^l1T3$
+l1_4
+l1_5
+l1_6
+l1_7
+l1_8
+l1_9
+l1_10
+l1_11
+l1_12
+^l1T13$
+^l1T14$
+
+============
+Test 3.4:142
+============
+^l1_1
+^l1_2
+^l1_3
+l1_4
+l1_5
+l1_6
+l1_7
+l1_8
+l1_9
+l1_10
+l1_11
+l1_12
+^l1_13
+^l1_14
+Testing a c d and i commands
+
+============
+Test 4.1:143
+============
+before_il1_1
+after_ibefore_il1_1
+before_il1_2
+after_ibefore_il1_2
+before_il1_3
+after_ibefore_il1_3
+before_il1_4
+after_ibefore_il1_4
+before_il1_5
+after_ibefore_il1_5
+before_il1_6
+after_ibefore_il1_6
+before_il1_7
+after_ibefore_il1_7
+before_il1_8
+after_ibefore_il1_8
+before_il1_9
+after_ibefore_il1_9
+before_il1_10
+after_ibefore_il1_10
+before_il1_11
+after_ibefore_il1_11
+before_il1_12
+after_ibefore_il1_12
+before_il1_13
+after_ibefore_il1_13
+before_il1_14
+after_ibefore_il1_14
+before_il2_1
+after_ibefore_il2_1
+before_il2_2
+after_ibefore_il2_2
+before_il2_3
+after_ibefore_il2_3
+before_il2_4
+after_ibefore_il2_4
+before_il2_5
+after_ibefore_il2_5
+before_il2_6
+inserted
+after_ibefore_il2_6
+before_il2_7
+after_ibefore_il2_7
+before_il2_8
+after_ibefore_il2_8
+before_il2_9
+after_ibefore_il2_9
+
+============
+Test 4.2:144
+============
+before_al1_1
+after_abefore_al1_1
+before_al1_2
+after_abefore_al1_2
+before_al1_3
+after_abefore_al1_3
+before_al1_4
+after_abefore_al1_4
+before_a5-12l1_5
+after_abefore_a5-12l1_5
+appended
+before_a5-12l1_6
+after_abefore_a5-12l1_6
+appended
+before_a5-12l1_7
+after_abefore_a5-12l1_7
+appended
+before_a5-12l1_8
+after_abefore_a5-12l1_8
+appended
+before_a5-12l1_9
+after_abefore_a5-12l1_9
+appended
+before_a5-12l1_10
+after_abefore_a5-12l1_10
+appended
+before_a5-12l1_11
+after_abefore_a5-12l1_11
+appended
+before_a5-12l1_12
+after_abefore_a5-12l1_12
+appended
+before_al1_13
+after_abefore_al1_13
+before_al1_14
+after_abefore_al1_14
+before_al2_1
+after_abefore_al2_1
+before_al2_2
+after_abefore_al2_2
+before_al2_3
+after_abefore_al2_3
+before_al2_4
+after_abefore_al2_4
+before_al2_5
+after_abefore_al2_5
+before_al2_6
+after_abefore_al2_6
+before_al2_7
+after_abefore_al2_7
+before_al2_8
+after_abefore_al2_8
+before_al2_9
+after_abefore_al2_9
+
+============
+Test 4.3:145
+============
+^l1_1
+^l1_1$
+appended
+^l1_2
+^l1_2$
+appended
+^l1_3
+^l1_3$
+appended
+^l1_4
+^l1_4$
+appended
+^l1_5
+^l1_5$
+appended
+^l1_6
+^l1_6$
+appended
+^l1_7
+^l1_7$
+appended
+^l1_8
+appended
+^l1_8
+l1_9$
+^l1_10
+appended
+^l1_10
+l1_11$
+^l1_12
+^l1_12$
+appended
+^l1_13
+^l1_13$
+appended
+^l1_14
+^l1_14$
+appended
+^l2_1
+^l2_1$
+^l2_2
+^l2_2$
+^l2_3
+^l2_3$
+^l2_4
+^l2_4$
+^l2_5
+^l2_5$
+^l2_6
+^l2_6$
+^l2_7
+^l2_7$
+^l2_8
+^l2_8$
+^l2_9
+^l2_9$
+
+============
+Test 4.4:146
+============
+hello
+hello
+hello
+hello
+hello
+hello
+hello
+hello
+hello
+hello
+hello
+hello
+hello
+hello
+
+============
+Test 4.5:147
+============
+hello
+
+============
+Test 4.6:148
+============
+hello
+
+============
+Test 4.7:149
+============
+hello
+
+============
+Test 4.8:150
+============
+Testing labels and branching
+
+============
+Test 5.1:151
+============
+label2_l1_1
+label3_label2_l1_1
+label1_l1_2
+label1_l1_3
+label1_l1_4
+label1_l1_5
+label1_l1_6
+label1_l1_7
+label1_l1_8
+label1_l1_9
+label1_l1_10
+label1_l1_11
+label1_l1_12
+label2_l1_13
+label3_label2_l1_13
+label2_l1_14
+label3_label2_l1_14
+
+============
+Test 5.2:152
+============
+tested l2_1
+tested l2_2
+tested l2_3
+tested l2_4
+tested l2_5
+tested l2_6
+tested l2_7
+tested l2_8
+tested l2_9
+tested l2_10
+tested l2_11
+tested l2_12
+tested l2_13
+tested l2_14
+
+============
+Test 5.3:153
+============
+^l1_1
+^l1_1$
+^l1_2
+^l1_2$
+^l1_3
+^l1_3$
+^l1_4
+^l1_4$
+l1_5$
+l1_6$
+l1_7$
+l1_8$
+
+============
+Test 5.4:154
+============
+^l1_1$
+^l1_2$
+^l1_3$
+^l1_4$
+^l1_5$
+^l1_6$
+^l1_7$
+^l1_8$
+l1_9$
+l1_10$
+l1_11$
+l1_12$
+l1_13$
+l1_14$
+
+============
+Test 5.5:155
+============
+^l1_1
+^l1_2
+^l1_4
+^l1_6
+^l1_8
+
+============
+Test 5.6:156
+============
+l1_1
+l1_2
+l1_3
+l1_4
+l1_5
+
+============
+Test 5.7:157
+============
+l1_1
+l1_2
+l1_3
+l1_4
+hello
+l1_5
+
+============
+Test 5.8:158
+============
+m1_1
+m1_2
+m1_3
+m1_4
+m1_5
+m1_6
+m1_7
+m1_8
+m1_9
+m1_10
+m1_11
+m1_12
+m1_13
+m1_14
+Pattern space commands
+
+============
+Test 6.1:159
+============
+changed
+changed
+changed
+changed
+changed
+changed
+changed
+changed
+changed
+changed
+changed
+changed
+changed
+changed
+
+============
+Test 6.2:160
+============
+l1_1
+l1_2
+l1_3
+l1_5
+l1_6
+l1_7
+l1_8
+l1_9
+l1_10
+l1_11
+l1_12
+l1_13
+l1_14
+
+============
+Test 6.3:161
+============
+l1_5
+l1_6
+l1_7
+l1_8
+l1_9
+l1_10
+l1_11
+l1_12
+l1_13
+l1_14
+
+============
+Test 6.4:162
+============
+l1_1
+l1_2
+l1_3
+l1_2
+l1_3
+l1_5
+l1_2
+l1_3
+l1_2
+l1_3
+l1_6
+l1_6
+l1_7
+l1_8
+l1_9
+l1_10
+l1_11
+l1_12
+l1_13
+l1_14
+
+============
+Test 6.5:163
+============
+l1_1
+l1_2
+l1_3
+l1_4
+l1_5
+l1_6
+l1_7
+l1_8
+l1_9
+l1_10
+l1_11
+l1_12
+l1_13
+l1_14
+
+============
+Test 6.6:164
+============
+Testing print and file routines
+
+============
+Test 7.1:165
+============
+\001\002\003\004\005\006\a\b\t$
+\v\f\r\016\017\020\021\022\023\024\025\026\027\030\031\032\033\034\
+\035\036\037 !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWX\
+YZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\177\200\201\202\203\204\205\
+\206\207\210\211\212\213\214\215\216\217\220\221\222\223\224\225\226\
+\227\230\231\232\233\234\235\236\237\240\241\242\243\244\245\246\247\
+\250\251\252\253\254\255\256\257\260\261\262\263\264\265\266\267\270\
+\271\272\273\274\275\276\277\300\301\302\303\304\305\306\307\310\311\
+\312\313\314\315\316\317\320\321\322\323\324\325\326\327\330\331\332\
+\333\334\335\336\337\340\341\342\343\344\345\346\347\350\351\352\353\
+\354\355\356\357\360\361\362\363\364\365\366\367\370\371\372\373\374\
+\375\376\377$
+$
+
+============
+Test 7.2:166
+============
+l1_1
+l1_2
+l1_3
+l1_4
+l1_5
+l1_6
+l1_7
+l1_8
+l1_9
+l1_10
+l1_11
+l1_12
+l1_13
+l1_14
+15
+l2_1
+16
+l2_2
+17
+l2_3
+18
+l2_4
+19
+l2_5
+20
+l2_6
+21
+l2_7
+22
+l2_8
+23
+l2_9
+
+============
+Test 7.3:167
+============
+l1_1
+l1_2
+l1_3
+l1_4
+l1_5
+l1_6
+l1_7
+l1_8
+l1_9
+l1_10
+l1_11
+l1_12
+l1_13
+l1_14
+w results
+l1_3
+l1_4
+l1_5
+l1_6
+l1_7
+l1_8
+l1_9
+l1_10
+l1_11
+l1_12
+
+============
+Test 7.4:168
+============
+l1_1
+l1_2
+l1_3
+l1_4
+l2_1
+l2_2
+l2_3
+l2_4
+l2_5
+l2_6
+l2_7
+l2_8
+l2_9
+l1_5
+l1_6
+l1_7
+l1_8
+l1_9
+l1_10
+l1_11
+l1_12
+l1_13
+l1_14
+
+============
+Test 7.5:169
+============
+l1_1
+l1_2
+l1_3
+l1_4
+l1_5
+l1_6
+l1_7
+l1_8
+l1_9
+l1_10
+l1_11
+l1_12
+l1_13
+l1_14
+
+============
+Test 7.6:170
+============
+l1_1
+l1_2
+l1_3
+l1_4
+l1_5
+l1_6
+l1_7
+l1_8
+l1_9
+l1_10
+l1_11
+l1_12
+l1_13
+l1_14
+
+============
+Test 7.8:171
+============
+
+Testing substitution commands
+
+============
+Test 8.1:172
+============
+XXXX
+XXXX
+XXXX
+XXXX
+XXXX
+XXXX
+XXXX
+XXXX
+XXXX
+XXXXX
+XXXXX
+XXXXX
+XXXXX
+XXXXX
+
+============
+Test 8.2:173
+============
+XXXX
+XXXX
+XXXX
+XXXX
+XXXX
+XXXX
+XXXX
+XXXX
+XXXX
+XXXXX
+XXXXX
+XXXXX
+XXXXX
+XXXXX
+
+============
+Test 8.3:174
+============
+XXXX
+XXXX
+XXXX
+XXXX
+XXXX
+XXXX
+XXXX
+XXXX
+XXXX
+XXXXX
+XXXXX
+XXXXX
+XXXXX
+XXXXX
+
+============
+Test 8.4:175
+============
+l1_1
+l1_2
+l1_3
+l1_4
+l1_5
+l1_6
+l1_7
+l1_8
+l1_9
+l1_10
+l1_11
+l1_12
+l1_13
+l1_14
+
+============
+Test 8.5:176
+============
+l1X1
+l1X2
+l1X3
+l1X4
+l1X5
+l1X6
+l1X7
+l1X8
+l1X9
+l1X10
+l1X11
+l1X12
+l1X13
+l1X14
+
+============
+Test 8.6:177
+============
+(l)(1)(_)(1)
+(l)(1)(_)(2)
+(l)(1)(_)(3)
+(l)(1)(_)(4)
+(l)(1)(_)(5)
+(l)(1)(_)(6)
+(l)(1)(_)(7)
+(l)(1)(_)(8)
+(l)(1)(_)(9)
+(l)(1)(_)(1)(0)
+(l)(1)(_)(1)(1)
+(l)(1)(_)(1)(2)
+(l)(1)(_)(1)(3)
+(l)(1)(_)(1)(4)
+
+============
+Test 8.7:178
+============
+(&)(&)(&)(&)
+(&)(&)(&)(&)
+(&)(&)(&)(&)
+(&)(&)(&)(&)
+(&)(&)(&)(&)
+(&)(&)(&)(&)
+(&)(&)(&)(&)
+(&)(&)(&)(&)
+(&)(&)(&)(&)
+(&)(&)(&)(&)(&)
+(&)(&)(&)(&)(&)
+(&)(&)(&)(&)(&)
+(&)(&)(&)(&)(&)
+(&)(&)(&)(&)(&)
+
+============
+Test 8.8:179
+============
+x_x1xl1
+x_x1xl2
+x_x1xl3
+x_x1xl4
+x_x1xl5
+x_x1xl6
+x_x1xl7
+x_x1xl8
+x_x1xl9
+x_x1xl10
+x_x1xl11
+x_x1xl12
+x_x1xl13
+x_x1xl14
+
+============
+Test 8.9:180
+============
+l1u0
+u1
+u21
+l1u0
+u1
+u22
+l1u0
+u1
+u23
+l1u0
+u1
+u24
+l1u0
+u1
+u25
+l1u0
+u1
+u26
+l1u0
+u1
+u27
+l1u0
+u1
+u28
+l1u0
+u1
+u29
+l1u0
+u1
+u210
+l1u0
+u1
+u211
+l1u0
+u1
+u212
+l1u0
+u1
+u213
+l1u0
+u1
+u214
+
+=============
+Test 8.10:181
+=============
+l1_X
+l1_X
+l1_X
+l1_X
+l1_X
+l1_X
+l1_X
+l1_X
+l1_X
+l1_X0
+l1_X1
+l1_X2
+l1_X3
+l1_X4
+
+=============
+Test 8.11:182
+=============
+lX_1
+lX_2
+lX_3
+lX_4
+lX_5
+lX_6
+lX_7
+lX_8
+lX_9
+lX_10
+lX_11
+lX_12
+lX_13
+lX_14
+s wfile results
+lX_1
+lX_2
+lX_3
+lX_4
+lX_5
+lX_6
+lX_7
+lX_8
+lX_9
+lX_10
+lX_11
+lX_12
+lX_13
+lX_14
+
+=============
+Test 8.12:183
+=============
+lX_X
+lX_X
+lX_X
+lX_4
+lX_5
+lX_6
+lX_7
+lX_8
+lX_9
+lX_X0
+lX_XX
+lX_XX
+lX_XX
+lX_X4
+
+=============
+Test 8.13:184
+=============
+l8_8
+l8_7
+l8_6
+l8_5
+l8_4
+l8_3
+l8_2
+l8_1
+l8_0
+l8_89
+l8_88
+l8_87
+l8_86
+l8_85
+
+=============
+Test 8.14:185
+=============
+l8_8
+l8_7
+l8_6
+l8_5
+l8_4
+l8_3
+l8_2
+l8_1
+l8_0
+l8_89
+l8_88
+l8_87
+l8_86
+l8_85
+
+=============
+Test 8.15:186
+=============
+l1_1Xl1_2
+l1_3
+l1_4
+l1_5
+l1_6
+l1_7
+l1_8
+l1_9
+l1_10
+l1_11
+l1_12
+l1_13
+l1_14
+
+=============
+Test 8.16:187
+=============
+eeefff
+Xeefff
+XYefff
+XYeYff
+XYeYYf
+XYeYYY
+XYeYYY
+
+=============
+Test 8.17:188
+=============
+&&&&
+&&&&
+&&&&
+&&&&
+&&&&
+&&&&
+&&&&
+&&&&
+&&&&
+&&&&&
+&&&&&
+&&&&&
+&&&&&
+&&&&&
diff --git a/testsuite/bsd.sh b/testsuite/bsd.sh
new file mode 100755
index 0000000..a7dbcd2
--- /dev/null
+++ b/testsuite/bsd.sh
@@ -0,0 +1,435 @@
+#!/bin/sh -
+# $NetBSD: sed.test,v 1.3 1997/01/09 20:21:37 tls Exp $
+#
+# Copyright (c) 1992 Diomidis Spinellis.
+# Copyright (c) 1992, 1993
+# The Regents of the University of California. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+# 1. Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# 2. Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution.
+# 3. All advertising materials mentioning features or use of this software
+# must display the following acknowledgement:
+# This product includes software developed by the University of
+# California, Berkeley and its contributors.
+# 4. Neither the name of the University nor the names of its contributors
+# may be used to endorse or promote products derived from this software
+# without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+# SUCH DAMAGE.
+#
+# from: @(#)sed.test 8.1 (Berkeley) 6/6/93
+# $NetBSD: sed.test,v 1.3 1997/01/09 20:21:37 tls Exp $
+#
+
+# sed Regression Tests
+
+# Modified by Paolo Bonzini to:
+# - not warn about buggy seds
+# - run tests once instead of comparing them to the system sed
+# - remove most uses of awk
+# - cleanup at exit
+# - comment tests that broke because of extensions
+
+main()
+{
+ TEST="${1-../sed/sed}"
+ TESTLOG="${2-sed.out}"
+ # DICT="${3-/usr/share/dict/words}"
+
+ : > lines1
+ : > lines2
+ for i in 1 2 3 4 5 6 7 8 9; do
+ echo l1_$i >> lines1
+ echo l2_$i >> lines2
+ done
+ for i in 10 11 12 13 14; do
+ echo l1_$i >> lines1
+ done
+
+ # Set these flags to get messages about known problems
+ tests "$TEST" "$TESTLOG"
+
+ rm -f lines[1234] script[12]
+}
+
+tests()
+{
+ SED="$1"
+ LOG="$2"
+ MARK=100
+ rm -f "$LOG"
+
+ exec 3>&0 4>&1 5>&2
+ exec 0</dev/null 1>/dev/null 2>/dev/null
+ test_error
+ exec 0>&3 1>&4 2>&5
+
+ exec 4>&1 5>&2
+ test_args
+ test_addr
+ test_group
+ test_acid
+ test_branch
+ test_pattern
+ test_print
+ test_subst
+ exec 1>&4 2>&5
+}
+
+mark()
+{
+ exec 2>&1 >>$LOG
+ test $MARK = 100 || echo
+ MARK=`expr $MARK + 1`
+ echo "Test $1:$MARK" | sed 's/./=/g'
+ echo "Test $1:$MARK"
+ echo "Test $1:$MARK" | sed 's/./=/g'
+}
+
+test_args()
+{
+ mark '1.1'
+ echo Testing argument parsing
+ echo First type
+ $SED 's/^/e1_/p' lines1
+ mark '1.2' ; $SED -n 's/^/e1_/p' lines1
+ mark '1.3' ; $SED 's/^/e1_/p' <lines1
+ mark '1.4' ; $SED -n 's/^/e1_/p' <lines1
+ echo Second type
+ mark '1.4.1'
+ $SED -e '' <lines1
+ echo 's/^/s1_/p' >script1
+ echo 's/^/s2_/p' >script2
+ mark '1.5' ; $SED -f script1 lines1
+ mark '1.6' ; $SED -f script1 <lines1
+ mark '1.7' ; $SED -e 's/^/e1_/p' lines1
+ mark '1.8' ; $SED -e 's/^/e1_/p' <lines1
+ mark '1.9' ; $SED -n -f script1 lines1
+ mark '1.10' ; $SED -n -f script1 <lines1
+ mark '1.11' ; $SED -n -e 's/^/e1_/p' lines1
+ mark '1.12' ; $SED -n -e 's/^/e1_/p' <lines1
+ mark '1.13' ; $SED -e 's/^/e1_/p' -e 's/^/e2_/p' lines1
+ mark '1.14' ; $SED -f script1 -f script2 lines1
+ mark '1.15' ; $SED -e 's/^/e1_/p' -f script1 lines1
+ mark '1.16' ; $SED -e 's/^/e1_/p' lines1 lines1
+ # POSIX D11.2:11251
+ mark '1.17' ; $SED p <lines1 lines1
+cat >script1 <<EOF
+#n
+# A comment
+
+p
+EOF
+ mark '1.18' ; $SED -f script1 <lines1 lines1
+}
+
+test_addr()
+{
+ echo Testing address ranges
+ mark '2.1' ; $SED -n -e '4p' lines1
+ mark '2.2' ; $SED -n -e '20p' lines1 lines2
+ mark '2.3' ; $SED -n -e '$p' lines1
+ mark '2.4' ; $SED -n -e '$p' lines1 lines2
+ mark '2.5' ; $SED -n -e '$a\
+hello' /dev/null
+ mark '2.6' ; $SED -n -e '$p' lines1 /dev/null lines2
+ # Should not print anything
+ mark '2.7' ; $SED -n -e '20p' lines1
+ # Disabled because it is undefined behavior
+ # mark '2.8' ; $SED -n -e '0p' lines1
+ mark '2.9' ; $SED -n '/l1_7/p' lines1
+ mark '2.10' ; $SED -n ' /l1_7/ p' lines1
+ mark '2.11' ; $SED -n '\_l1\_7_p' lines1
+ mark '2.12' ; $SED -n '1,4p' lines1
+ mark '2.13' ; $SED -n '1,$p' lines1 lines2
+ mark '2.14' ; $SED -n '1,/l2_9/p' lines1 lines2
+ mark '2.15' ; $SED -n '/4/,$p' lines1 lines2
+ mark '2.16' ; $SED -n '/4/,20p' lines1 lines2
+ mark '2.17' ; $SED -n '/4/,/10/p' lines1 lines2
+ mark '2.18' ; $SED -n '/l2_3/,/l1_8/p' lines1 lines2
+ mark '2.19' ; $SED -n '12,3p' lines1 lines2
+ mark '2.20' ; $SED -n '/l1_7/,3p' lines1 lines2
+}
+
+test_group()
+{
+ echo Brace and other grouping
+ mark '3.1' ; $SED -e '
+4,12 {
+ s/^/^/
+ s/$/$/
+ s/_/T/
+}' lines1
+ mark '3.2' ; $SED -e '
+4,12 {
+ s/^/^/
+ /6/,/10/ {
+ s/$/$/
+ /8/ s/_/T/
+ }
+}' lines1
+ mark '3.3' ; $SED -e '
+4,12 !{
+ s/^/^/
+ /6/,/10/ !{
+ s/$/$/
+ /8/ !s/_/T/
+ }
+}' lines1
+ mark '3.4' ; $SED -e '4,12!s/^/^/' lines1
+}
+
+test_acid()
+{
+ echo Testing a c d and i commands
+ mark '4.1' ; $SED -n -e '
+s/^/before_i/p
+20i\
+inserted
+s/^/after_i/p
+' lines1 lines2
+ mark '4.2' ; $SED -n -e '
+5,12s/^/5-12/
+s/^/before_a/p
+/5-12/a\
+appended
+s/^/after_a/p
+' lines1 lines2
+ mark '4.3' ; $SED -n -e '
+s/^/^/p
+/l1_/a\
+appended
+8,10N
+s/$/$/p
+' lines1 lines2
+ mark '4.4' ; $SED -n -e '
+c\
+hello
+' lines1
+ mark '4.5' ; $SED -n -e '
+8c\
+hello
+' lines1
+ mark '4.6' ; $SED -n -e '
+3,14c\
+hello
+' lines1
+ mark '4.7' ; $SED -n -e '
+8,3c\
+hello
+' lines1
+ mark '4.8' ; $SED d <lines1
+}
+
+test_branch()
+{
+ echo Testing labels and branching
+ mark '5.1' ; $SED -n -e '
+b label4
+:label3
+s/^/label3_/p
+b end
+:label4
+2,12b label1
+b label2
+:label1
+s/^/label1_/p
+b
+:label2
+s/^/label2_/p
+b label3
+:end
+' lines1
+ mark '5.2' ; $SED -n -e '
+s/l1_/l2_/
+t ok
+b
+:ok
+s/^/tested /p
+' lines1 lines2
+ mark '5.3' ; $SED -n -e '
+5,8b inside
+1,5 {
+ s/^/^/p
+ :inside
+ s/$/$/p
+}
+' lines1
+# Check that t clears the substitution done flag
+ mark '5.4' ; $SED -n -e '
+1,8s/^/^/
+t l1
+:l1
+t l2
+s/$/$/p
+b
+:l2
+s/^/ERROR/
+' lines1
+# Check that reading a line clears the substitution done flag
+ mark '5.5' ; $SED -n -e '
+t l2
+1,8s/^/^/p
+2,7N
+b
+:l2
+s/^/ERROR/p
+' lines1
+ mark '5.6' ; $SED 5q lines1
+ mark '5.7' ; $SED -e '
+5i\
+hello
+5q' lines1
+# Branch across block boundary
+ mark '5.8' ; $SED -e '
+{
+:b
+}
+s/l/m/
+tb' lines1
+}
+
+test_pattern()
+{
+echo Pattern space commands
+# Check that the pattern space is deleted
+ mark '6.1' ; $SED -n -e '
+c\
+changed
+p
+' lines1
+ mark '6.2' ; $SED -n -e '
+4d
+p
+' lines1
+ mark '6.3' ; $SED -e '
+N
+N
+N
+D
+P
+4p
+' lines1
+ mark '6.4' ; $SED -e '
+2h
+3H
+4g
+5G
+6x
+6p
+6x
+6p
+' lines1
+ mark '6.5' ; $SED -e '4n' lines1
+ mark '6.6' ; $SED -n -e '4n' lines1
+}
+
+test_print()
+{
+ echo Testing print and file routines
+ awk 'END {for (i = 1; i < 256; i++) printf("%c", i);print "\n"}' \
+ </dev/null >lines3
+ mark '7.1' ; $SED -n l lines3
+ mark '7.2' ; $SED -e '/l2_/=' lines1 lines2
+ rm -f lines4
+ mark '7.3' ; $SED -e '3,12w lines4' lines1
+ echo w results
+ cat lines4
+ mark '7.4' ; $SED -e '4r lines2' lines1
+ mark '7.5' ; $SED -e '5r /dev/dds' lines1
+ mark '7.6' ; $SED -e '6r /dev/null' lines1
+ # mark '7.7'
+ # sed '200q' $DICT | sed 's$.*$s/^/&/w tmpdir/&$' >script1
+ # rm -rf tmpdir
+ # mkdir tmpdir
+ # $SED -f script1 lines1
+ # cat tmpdir/*
+ # rm -rf tmpdir
+ mark '7.8'
+ echo line1 > lines3
+ echo "" >> lines3
+ $SED -n -e '$p' lines3 /dev/null
+}
+
+test_subst()
+{
+ echo Testing substitution commands
+ mark '8.1' ; $SED -e 's/./X/g' lines1
+ mark '8.2' ; $SED -e 's,.,X,g' lines1
+ mark '8.3' ; $SED -e 's.\..X.g' lines1
+# POSIX does not say that this should work
+# mark '8.4' ; $SED -e 's/[/]/Q/' lines1
+ mark '8.4' ; $SED -e 's/[\/]/Q/' lines1
+ mark '8.5' ; $SED -e 's_\__X_' lines1
+ mark '8.6' ; $SED -e 's/./(&)/g' lines1
+ mark '8.7' ; $SED -e 's/./(\&)/g' lines1
+ mark '8.8' ; $SED -e 's/\(.\)\(.\)\(.\)/x\3x\2x\1/g' lines1
+ mark '8.9' ; $SED -e 's/_/u0\
+u1\
+u2/g' lines1
+ mark '8.10' ; $SED -e 's/./X/4' lines1
+ rm -f lines4
+ mark '8.11' ; $SED -e 's/1/X/w lines4' lines1
+ echo s wfile results
+ cat lines4
+ mark '8.12' ; $SED -e 's/[123]/X/g' lines1
+ mark '8.13' ; $SED -e 'y/0123456789/9876543210/' lines1
+ mark '8.14' ; $SED -e 'y10\123456789198765432\101' lines1
+ mark '8.15' ; $SED -e '1N;2y/\n/X/' lines1
+ mark '8.16'
+ echo 'eeefff' | $SED -e 'p' -e 's/e/X/p' -e ':x' \
+ -e 's//Y/p' -e '/f/bx'
+ mark '8.17' ; $SED -e 's&.&\&&g' lines1
+}
+
+test_error()
+{
+ $SED -x && exit 1
+ $SED -f && exit 1
+ $SED -e && exit 1
+ $SED -f /dev/dds && exit 1
+ $SED p /dev/dds && exit 1
+ $SED -f /bin/sh && exit 1
+ $SED '{' && exit 1
+ $SED '{' && exit 1
+ $SED '/hello/' && exit 1
+ $SED '1,/hello/' && exit 1
+ $SED -e '-5p' && exit 1
+ $SED '/jj' && exit 1
+ # $SED 'a hello' && exit 1
+ # $SED 'a \ hello' && exit 1
+ $SED 'b foo' && exit 1
+ $SED 'd hello' && exit 1
+ $SED 's/aa' && exit 1
+ $SED 's/aa/' && exit 1
+ $SED 's/a/b' && exit 1
+ $SED 's/a/b/c/d' && exit 1
+ $SED 's/a/b/ 1 2' && exit 1
+ # $SED 's/a/b/ 1 g' && exit 1
+ $SED 's/a/b/w' && exit 1
+ $SED 'y/aa' && exit 1
+ $SED 'y/aa/b/' && exit 1
+ $SED 'y/aa/' && exit 1
+ $SED 'y/a/b' && exit 1
+ $SED 'y/a/b/c/d' && exit 1
+ $SED '!' && exit 1
+ $SED supercalifrangolisticexprialidociussupercalifrangolisticexcius
+}
+
+main ${1+"$@"}
diff --git a/testsuite/bug32082.sh b/testsuite/bug32082.sh
new file mode 100755
index 0000000..d5d4e92
--- /dev/null
+++ b/testsuite/bug32082.sh
@@ -0,0 +1,81 @@
+#!/bin/sh
+# sed would access uninitialized memory for certain invalid backreference uses.
+# Before sed 4.6 these would result in "Invalid read size of 4" reported
+# by valgrind from execute.c:992
+
+# Copyright (C) 2018 Free Software Foundation, Inc.
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+. "${srcdir=.}/testsuite/init.sh"; path_prepend_ ./sed
+print_ver_ sed
+
+require_valgrind_
+
+printf '1\n2\n' > in || framework_failure_
+printf '1\n2\n\n' > exp-posix || framework_failure_
+printf '1\n1\n2\n2\n' > exp-no-posix || framework_failure_
+
+#
+# Test 1: with "--posix"
+#
+# using "--posix" disables the backref safety check in
+# regexp.c:compile_regex_1(), which is reported as:
+# "invalid reference \\%d on `s' command's RHS"
+
+valgrind --quiet --error-exitcode=1 \
+ sed --posix -e '/2/p ; 2s//\9/' in > out-posix 2> err-posix || fail=1
+
+echo "valgrind report for 'posix' test:"
+echo "=================================="
+cat err-posix
+echo "=================================="
+
+
+# Work around a bug in CentOS 5.10's valgrind
+# FIXME: remove in 2018 or when CentOS 5 is no longer officially supported
+grep 'valgrind: .*Assertion.*failed' err-posix > /dev/null \
+ && skip_ 'you seem to have a buggy version of valgrind'
+
+compare exp-posix out-posix || fail=1
+compare /dev/null err || fail=1
+
+
+
+#
+# Test 2: without "--posix"
+#
+# When not using "--posix", using a backref to a non-existing group
+# would be caught in compile_regex_1.
+# As reported in bugs.gnu.org/32082 by bugs@feusi.co,
+# using the recent begline/endline optimization with a few "previous regex"
+# tricks bypasses this check.
+
+valgrind --quiet --error-exitcode=1 \
+ sed -e '/^/s///p ; 2s//\9/' in > out-no-posix 2> err-no-posix || fail=1
+
+echo "valgrind report for 'no-posix' test:"
+echo "===================================="
+cat err-no-posix
+echo "===================================="
+
+# Work around a bug in CentOS 5.10's valgrind
+# FIXME: remove in 2018 or when CentOS 5 is no longer officially supported
+grep 'valgrind: .*Assertion.*failed' err-no-posix > /dev/null \
+ && skip_ 'you seem to have a buggy version of valgrind'
+
+compare exp-no-posix out-no-posix || fail=1
+compare /dev/null err || fail=1
+
+
+Exit $fail
diff --git a/testsuite/bug32271-1.sh b/testsuite/bug32271-1.sh
new file mode 100755
index 0000000..df2e308
--- /dev/null
+++ b/testsuite/bug32271-1.sh
@@ -0,0 +1,45 @@
+#!/bin/sh
+# sed would incorrectly copy internal buffers under certain s/// uses.
+# Before sed 4.6 these would result in an extraneous NUL at end of lines.
+#
+
+# Copyright (C) 2018 Free Software Foundation, Inc.
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+. "${srcdir=.}/testsuite/init.sh"; path_prepend_ ./sed
+print_ver_ sed
+
+printf '0\n' > in || framework_failure_
+printf '0\n' > exp || framework_failure_
+
+# Before sed 4.6, this would result in: 0x30 0x00 0x0a.
+sed -e 's/$/a/2' in > out 2> err || fail=1
+
+compare exp out || fail=1
+compare /dev/null err || fail=1
+
+# To ease debugging / error reporting (the above 'compare'
+# will report "binary files differ" - not very helpful here)
+if test -n "$fail" ; then
+ echo "---- TEST FAILED"
+ echo "out:"
+ od -tx1 out
+ echo "exp:"
+ od -tx1 exp
+ echo "err:"
+ od -tx1 err
+fi
+
+
+Exit $fail
diff --git a/testsuite/bug32271-2.sh b/testsuite/bug32271-2.sh
new file mode 100755
index 0000000..d6e50ce
--- /dev/null
+++ b/testsuite/bug32271-2.sh
@@ -0,0 +1,75 @@
+#!/bin/sh
+# sed would access uninitialized memory for certain regexes.
+# Before sed 4.6 these would result in "Conditional jump or move depends on
+# uninitialised value(s)" and "Invalid read of size 1"
+# by valgrind from regexp.c:286
+
+# Copyright (C) 2018 Free Software Foundation, Inc.
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+. "${srcdir=.}/testsuite/init.sh"; path_prepend_ ./sed
+print_ver_ sed
+
+require_valgrind_
+
+# 40 characters ensures valgrind detects the bug
+# (with less than 25 - it does not).
+z=0000000000000000000000000000000000000000
+
+printf '%s\n' $z $z > in || framework_failure_
+printf '%s\n' $z $z > exp || framework_failure_
+
+# Before sed-4.6, this would fail with:
+# [...]
+# ==13131== Conditional jump or move depends on uninitialised value(s)
+# ==13131== at 0x4C3002B: memchr (vg_replace_strmem.c:883)
+# ==13131== by 0x1120BD: match_regex (regexp.c:286)
+# ==13131== by 0x110736: do_subst (execute.c:1101)
+# ==13131== by 0x1115D3: execute_program (execute.c:1591)
+# ==13131== by 0x111A4C: process_files (execute.c:1774)
+# ==13131== by 0x112E1C: main (sed.c:405)
+# ==13131==
+# ==13131== Invalid read of size 1
+# ==13131== at 0x4C30027: memchr (vg_replace_strmem.c:883)
+# ==13131== by 0x1120BD: match_regex (regexp.c:286)
+# ==13131== by 0x110736: do_subst (execute.c:1101)
+# ==13131== by 0x1115D3: execute_program (execute.c:1591)
+# ==13131== by 0x111A4C: process_files (execute.c:1774)
+# ==13131== by 0x112E1C: main (sed.c:405)
+# ==13131== Address 0x55ec765 is 0 bytes after a block of size 101 alloc'd
+# ==13131== at 0x4C2DDCF: realloc (vg_replace_malloc.c:785)
+# ==13131== by 0x113BA2: ck_realloc (utils.c:418)
+# ==13131== by 0x10E682: resize_line (execute.c:154)
+# ==13131== by 0x10E6F0: str_append (execute.c:165)
+# ==13131== by 0x110779: do_subst (execute.c:1106)
+# ==13131== by 0x1115D3: execute_program (execute.c:1591)
+# ==13131== by 0x111A4C: process_files (execute.c:1774)
+# ==13131== by 0x112E1C: main (sed.c:405)
+valgrind --quiet --error-exitcode=1 \
+ sed -e 'N; s/$//m2' in > out 2> err || fail=1
+
+# Work around a bug in CentOS 5.10's valgrind
+# FIXME: remove in 2018 or when CentOS 5 is no longer officially supported
+grep 'valgrind: .*Assertion.*failed' err-no-posix > /dev/null \
+ && skip_ 'you seem to have a buggy version of valgrind'
+
+compare exp out || fail=1
+compare /dev/null err || fail=1
+
+echo "valgrind report:"
+echo "=================================="
+cat err
+echo "=================================="
+
+exit $fail
diff --git a/testsuite/cmd-R.sh b/testsuite/cmd-R.sh
new file mode 100644
index 0000000..4f4eb99
--- /dev/null
+++ b/testsuite/cmd-R.sh
@@ -0,0 +1,47 @@
+#!/bin/sh
+# Test 'R' command
+
+# Copyright (C) 2016-2018 Free Software Foundation, Inc.
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+. "${srcdir=.}/testsuite/init.sh"; path_prepend_ ./sed
+print_ver_ sed
+
+printf "%s\n" x y > a || framework_failure_
+printf "%s\n" 1 2 > b || framework_failure_
+printf "%s\n" X > c || framework_failure_
+touch d || framework_failure_
+
+# Read and interleave two lines
+printf "%s\n" x 1 y 2 > exp1 || framework_failure_
+sed -e 1Rb -e 2Rb a > out1 || fail=1
+compare_ exp1 out1 || fail=1
+
+# Read a non-existing file, silently ignored
+sed -e 1Rq a > out2 || fail=1
+compare_ a out2
+
+# Read two lines from a file, second time will be EOF
+# (implementation note: EOF from get_delim())
+printf "%s\n" x X y > exp3 || framework_failure_
+sed -e 1Rc -e 2Rc a > out3 || fail=1
+compare_ exp3 out3 || fail=1
+
+# Read two lines from an empty file, both will be EOF
+# (implementation note: EOF in before get_delim())
+sed -e 1Rd -e 2Rd a > out4 || fail=1
+compare_ a out4 || fail=1
+
+
+Exit $fail
diff --git a/testsuite/cmd-l.sh b/testsuite/cmd-l.sh
new file mode 100755
index 0000000..f5207e9
--- /dev/null
+++ b/testsuite/cmd-l.sh
@@ -0,0 +1,75 @@
+#!/bin/sh
+# Test 'l' command with different widths
+
+# Copyright (C) 2016-2018 Free Software Foundation, Inc.
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+. "${srcdir=.}/testsuite/init.sh"; path_prepend_ ./sed
+print_ver_ sed
+
+# 10 20 30 40 50 60 70 75
+cat <<\EOF >in1 || framework_failure_
+0123456789012345678901234567890123456789012345678901234567890123456789012345
+EOF
+
+# default: 70 characters (including the \n)
+cat <<\EOF >exp-def || framework_failure_
+012345678901234567890123456789012345678901234567890123456789012345678\
+9012345$
+EOF
+
+# 11 characters
+cat <<\EOF >exp-11 || framework_failure_
+0123456789\
+0123456789\
+0123456789\
+0123456789\
+0123456789\
+0123456789\
+0123456789\
+012345$
+EOF
+
+# command 'l n' is a gnu extension, rejected in posix mode
+cat <<\EOF >exp-err-posix-ln || framework_failure_
+sed: -e expression #1, char 2: extra characters after command
+EOF
+
+# sed's default: 70 characters
+sed -n l in1 >out-def || fail=1
+compare_ exp-def out-def || fail=1
+
+# limit with COLS envvar, sed subtracts one to avoid ttys linewraps
+COLS=12 sed -n l in1 >out-cols12 || fail=1
+compare_ exp-11 out-cols12 || fail=1
+
+# invalid COLS envvar should be ignored (wrap at default=70)
+COLS=0 sed -n l in1 >out-cols0 || fail=1
+compare_ exp-def out-cols0 || fail=1
+COLS=foo sed -n l in1 >out-cols-foo || fail=1
+compare_ exp-def out-cols-foo || fail=1
+
+# limit with -l parameter
+sed -l 11 -n l in1 >out-l11 || fail=1
+compare_ exp-11 out-l11 || fail=1
+
+# limit with 'ln' command (gnu extension)
+sed -n l11 in1 >out-ln-11 || fail=1
+compare_ exp-11 out-ln-11 || fail=1
+
+# limit with 'ln' command (gnu extension)
+returns_ 1 sed --posix -n l11 in1 2>err-posix-ln || fail=1
+compare_ exp-err-posix-ln err-posix-ln || fail=1
+
+Exit $fail
diff --git a/testsuite/colon-with-no-label.sh b/testsuite/colon-with-no-label.sh
new file mode 100755
index 0000000..915a192
--- /dev/null
+++ b/testsuite/colon-with-no-label.sh
@@ -0,0 +1,31 @@
+#!/bin/sh
+# Verify that a ":" command with no label is now rejected.
+
+# Copyright (C) 2015-2018 Free Software Foundation, Inc.
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+. "${srcdir=.}/testsuite/init.sh"; path_prepend_ ./sed
+print_ver_ sed
+
+echo 'sed: -e expression #1, char 1: ":" lacks a label' > exp-err \
+ || framework_failure_
+
+# Before sed-4.3, sed would mistakenly accept a ":" with no following
+# label name.
+echo x | returns_ 1 sed : > out 2> err || fail=1
+
+compare /dev/null out || fail=1
+compare exp-err err || fail=1
+
+Exit $fail
diff --git a/testsuite/command-endings.sh b/testsuite/command-endings.sh
new file mode 100644
index 0000000..1c257e9
--- /dev/null
+++ b/testsuite/command-endings.sh
@@ -0,0 +1,137 @@
+#!/bin/sh
+# Test command separators and endings
+
+# Copyright (C) 2017-2018 Free Software Foundation, Inc.
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+. "${srcdir=.}/testsuite/init.sh"; path_prepend_ ./sed
+print_ver_ sed
+
+# Allowed endings/separators after most commands:
+# newline, comment, closing brace, semicolon, EOF
+# they are also allowed after opening and closing braces themselves.
+#
+# Not tested here:
+# r/R/w/R/e and s///[we] which use read_filename() and do not
+# accept comments or semicolons.
+
+
+
+# Test commands and braces followed by:
+# closing braces, comment, semicolons, EOF (newlines are tested later).
+#
+# sed-4.3 wrongly rejected y/// followed by '}' or '#' (bug#22460).
+#
+# Implementation notes (see compile.c):
+# Simple commands, '}', and 'y///' commands use read_end_of_cmd().
+#
+# q/Q/l/L have additional check for optional integer,
+# then call read_end_of_cmd().
+#
+# labels use 'read_label()'.
+#
+# 's///' has special handling, depending on additional flags
+# (with 's///[we]' commands and semicolons are not allowed).
+# Implemented in mark_subst_opts().
+#
+for p in \
+ 'h' \
+ 'h;' \
+ 'h ;' \
+ 'h# foo' \
+ 'h # foo' \
+ '{h}' \
+ '{h } ' \
+ '{ h } ' \
+ \
+ '{h}# foo' \
+ '{h} # foo' \
+ '{h};' \
+ '{h} ;' \
+ '{;h;} ' \
+ '{{h}}' \
+ '{;{h};}' \
+ \
+ 'y/1/a/' \
+ 'y/1/a/;d' \
+ 'y/1/a/ ;d' \
+ '{y/1/a/}' \
+ 'y/1/a/#foo'\
+ 'y/1/a/ #fo'\
+ \
+ 's/1/a/' \
+ 's/1/a/;d' \
+ 's/1/a/ ;d' \
+ '{s/1/a/}' \
+ 's/1/a/#foo'\
+ 's/1/a/ #fo'\
+ \
+ 's/1/a/i ;' \
+ 's/1/a/i #foo' \
+ '{ s/1/a/i }' \
+ \
+ 'bx; :x' \
+ 'bx; :x;' \
+ 'bx; :x ;' \
+ 'bx; :x#foo' \
+ 'bx; :x #foo' \
+ '{ bx; :x }' \
+ \
+ 'l' \
+ 'l;' \
+ 'l ;' \
+ 'l#foo' \
+ 'l #foo' \
+ '{l}' \
+ '{l }' \
+ 'l1' \
+ 'l1;' \
+ 'l1 ;' \
+ 'l1#foo' \
+ 'l1 #foo' \
+ '{l1}' \
+ '{l1 }' \
+ ;
+do
+ sed -n "$p" < /dev/null >out 2>err || fail=1
+ compare /dev/null err || fail=1
+ compare /dev/null out || fail=1
+done
+
+
+# Create files to test newlines after commands
+# (instead of having to embed newlines in shell variables in a portable way)
+printf 'd\n' > nl1 || framework_failure_
+printf '{\nd}' > nl2 || framework_failure_
+printf '{d\n}' > nl3 || framework_failure_
+printf '{d}\n' > nl4 || framework_failure_
+printf 'y/1/a/\n' > nl5 || framework_failure_
+printf 's/1/a/\n' > nl6 || framework_failure_
+printf 'bx\n:x\n' > nl7 || framework_failure_
+printf 'l\n' > nl8 || framework_failure_
+printf 'l1\n' > nl9 || framework_failure_
+# s/// has special allowance for \r in mark_subst_opts(),
+# even if not on windows.
+# TODO: should other commands allow it ?
+printf 's/1/a/\r\n' > nl10 || framework_failure_
+
+for i in 1 2 3 4 5 6 7 8 9 10 ;
+do
+ sed -n -f "nl$i" </dev/null >out 2>err || fail=1
+ compare /dev/null err || fail=1
+ compare /dev/null out || fail=1
+done
+
+
+Exit $fail
diff --git a/testsuite/comment-n.sh b/testsuite/comment-n.sh
new file mode 100644
index 0000000..ad94c63
--- /dev/null
+++ b/testsuite/comment-n.sh
@@ -0,0 +1,95 @@
+#!/bin/sh
+# Test the '#n' silent mode (activated by first line comment)
+
+# Copyright (C) 2016-2018 Free Software Foundation, Inc.
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+. "${srcdir=.}/testsuite/init.sh"; path_prepend_ ./sed
+print_ver_ sed
+
+echo X > in1 || framework_failure_
+
+# expected output with 'sed -n = in1' (silent mode)
+echo 1 > exp-silent || framework_failure_
+
+# expected output with 'sed = in1' (not silent mode)
+printf "1\nX\n" > exp-norm || framework_failure_
+
+
+# A comment '#n' in the first script, in the first line
+sed -e '#n' in1 > out1 || fail=1
+compare_ /dev/null out1 || fail=1
+
+sed -e '#n' -e = in1 > out2 || fail=1
+compare_ exp-silent out2 || fail=1
+
+sed -e '#ni!' -e = in1 > out3 || fail=1
+compare_ exp-silent out3 || fail=1
+
+# not the first 2 characters, or space before n,
+# or uppercase N - do not activate silent mode
+sed -e '=#n' in1 > out4 || fail=1
+compare_ exp-norm out4 || fail=1
+
+sed -e '# n' -e = in1 > out5 || fail=1
+compare_ exp-norm out5 || fail=1
+
+sed -e '#N' -e = in1 > out6 || fail=1
+compare_ exp-norm out6 || fail=1
+
+sed -e = -e '#n' in1 > out7 || fail=1
+compare_ exp-norm out7 || fail=1
+
+
+#
+# Test the same, with a program instead of -e.
+#
+cat << \EOF > prog1 || framework_failure_
+#n
+=
+EOF
+sed -f prog1 in1 > out8 || fail=1
+compare_ exp-silent out8 || fail=1
+
+# not in the first 2 characters
+cat << \EOF > prog2 || framework_failure_
+=
+#n
+EOF
+sed -f prog2 in1 > out9 || fail=1
+compare_ exp-norm out9 || fail=1
+
+# not in the first 2 characters
+cat << \EOF > prog3 || framework_failure_
+# n
+=
+EOF
+sed -f prog3 in1 > out10 || fail=1
+compare_ exp-norm out10 || fail=1
+
+
+# -e then a program file.
+cat << \EOF > prog4 || framework_failure_
+#n
+EOF
+sed -e = -f prog4 in1 > out11 || fail=1
+compare_ exp-norm out11 || fail=1
+
+
+# If the program comes before -e , silent mode is activated.
+sed -f prog4 -e = in1 > out12 || fail=1
+compare_ exp-silent out12 || fail=1
+
+
+Exit $fail
diff --git a/testsuite/compile-errors.sh b/testsuite/compile-errors.sh
new file mode 100644
index 0000000..7773adb
--- /dev/null
+++ b/testsuite/compile-errors.sh
@@ -0,0 +1,290 @@
+#!/bin/sh
+# Test compilation errors
+
+# Copyright (C) 2016-2018 Free Software Foundation, Inc.
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+. "${srcdir=.}/testsuite/init.sh"; path_prepend_ ./sed
+print_ver_ sed
+
+#
+# Excess P modifier to s// (EXCESS_P_OPT)
+#
+cat <<\EOF >exp-exs-p || framework_failure_
+sed: -e expression #1, char 8: multiple `p' options to `s' command
+EOF
+returns_ 1 sed 's/./x/pp' </dev/null 2>err-exs-p || fail=1
+compare_ exp-exs-p err-exs-p || fail=1
+
+#
+# Excess G modifier to s// (EXCESS_G_OPT)
+#
+cat <<\EOF >exp-exs-g || framework_failure_
+sed: -e expression #1, char 8: multiple `g' options to `s' command
+EOF
+returns_ 1 sed 's/./x/gg' </dev/null 2>err-exs-g || fail=1
+compare_ exp-exs-g err-exs-g || fail=1
+
+#
+# zero numeric modifier to s// (ZERO_N_OPT)
+#
+cat <<\EOF >exp-exs-0 || framework_failure_
+sed: -e expression #1, char 7: number option to `s' command may not be zero
+EOF
+returns_ 1 sed 's/./x/0' </dev/null 2>err-exs-0 || fail=1
+compare_ exp-exs-0 err-exs-0 || fail=1
+
+
+#
+# Multiple number modifiers to s// (EXCESS_N_OPT)
+#
+cat <<\EOF >exp-exs-n || framework_failure_
+sed: -e expression #1, char 9: multiple number options to `s' command
+EOF
+returns_ 1 sed 's/./x/2p3' </dev/null 2>err-exs-n || fail=1
+compare_ exp-exs-n err-exs-n || fail=1
+
+
+#
+# Unknown s/// modifier letter
+#
+cat << \EOF >exp-unk-s-opt || framework_failure_
+sed: -e expression #1, char 7: unknown option to `s'
+EOF
+returns_ 1 sed 's/./x/Q' </dev/null 2>err-unk-s-opt || fail=1
+compare_ exp-unk-s-opt err-unk-s-opt || fail=1
+
+#
+# Special case: s/// followed by \r alone
+#
+printf "s/./x/\r" > s-opt-r-in || framework_failure_
+cat << \EOF >exp-s-opt-r || framework_failure_
+sed: file s-opt-r-in line 1: unknown option to `s'
+EOF
+returns_ 1 sed -f s-opt-r-in </dev/null 2>err-s-opt-r || fail=1
+compare_ exp-s-opt-r err-s-opt-r || fail=1
+
+
+#
+# Step-address as first address (BAD_STEP)
+# (both +N and ~N addresses)
+cat <<\EOF >exp-step-addr || framework_failure_
+sed: -e expression #1, char 2: invalid usage of +N or ~N as first address
+EOF
+returns_ 1 sed '~1d' </dev/null 2>err-step-addr1 || fail=1
+compare_ exp-step-addr err-step-addr1 || fail=1
+returns_ 1 sed '+1d' </dev/null 2>err-step-addr2 || fail=1
+compare_ exp-step-addr err-step-addr2 || fail=1
+
+
+#
+# Multiple '!' (BAD_BANG)
+#
+cat <<\EOF >exp-bad-bang || framework_failure_
+sed: -e expression #1, char 3: multiple `!'s
+EOF
+returns_ 1 sed '1!!d' </dev/null 2>err-bad-bang || fail=1
+compare_ exp-bad-bang err-bad-bang || fail=1
+
+
+#
+# GNU extension commands, not accepted in --posix mode
+# (bad_command(OPT))
+for opt in e F v z L Q T R W ;
+do
+ cat <<EOF >exp-posix-cmd-$opt || framework_failure_
+sed: -e expression #1, char 2: unknown command: \`$opt'
+EOF
+ returns_ 1 sed --posix "1$opt" </dev/null 2>err-posix-cmd-$opt || fail=1
+ compare_ exp-posix-cmd-$opt err-posix-cmd-$opt || fail=1
+done
+
+#
+# GNU extension commands, accepting only one address in --posix mode
+# (ONE_ADDR)
+cat <<\EOF >exp-one-addr || framework_failure_
+sed: -e expression #1, char 4: command only uses one address
+EOF
+for opt in a i l = r ;
+do
+ returns_ 1 sed --posix "1,2$opt" </dev/null 2>err-posix-cmd-$opt || fail=1
+ compare_ exp-one-addr err-posix-cmd-$opt || fail=1
+done
+
+# q/Q always accept one address (Q is gnu extension, can't use --posix, above)
+for opt in q Q ;
+do
+ returns_ 1 sed "1,2$opt" </dev/null 2>err-posix-cmd-$opt || fail=1
+ compare_ exp-one-addr err-posix-cmd-$opt || fail=1
+done
+
+#
+# Comment with address (NO_CLOSE_BRACE_ADDR)
+#
+cat <<\EOF >exp-no-sharp || framework_failure_
+sed: -e expression #1, char 2: comments don't accept any addresses
+EOF
+returns_ 1 sed '1#foo' </dev/null 2>err-no-sharp || fail=1
+compare_ exp-no-sharp err-no-sharp || fail=1
+
+
+#
+# Unexpected closing braces (EXCESS_CLOSE_BRACE)
+#
+cat <<\EOF >exp-unexp-brace || framework_failure_
+sed: -e expression #1, char 2: unexpected `}'
+EOF
+returns_ 1 sed '1}' </dev/null 2>err-unexp-brace || fail=1
+compare_ exp-unexp-brace err-unexp-brace || fail=1
+
+
+#
+# Umatched opening braces (EXCESS_OPEN_BRACE)
+# TODO: why 'char 0' ?
+cat <<\EOF >exp-unmatched-braces || framework_failure_
+sed: -e expression #1, char 0: unmatched `{'
+EOF
+returns_ 1 sed '1{' </dev/null 2>err-unmatched-braces || fail=1
+compare_ exp-unmatched-braces err-unmatched-braces || fail=1
+
+
+#
+# '}' with address (NO_CLOSE_BRACE_ADDR)
+#
+cat <<\EOF >exp-brace-addr || framework_failure_
+sed: -e expression #1, char 3: `}' doesn't want any addresses
+EOF
+returns_ 1 sed '{1}' </dev/null 2>err-brace-addr || fail=1
+compare_ exp-brace-addr err-brace-addr || fail=1
+
+
+#
+# Too new version requested (ANCIENT_VERSION)
+# (i.e. this version of SED is too old)
+cat <<\EOF >exp-anc-ver || framework_failure_
+sed: -e expression #1, char 4: expected newer version of sed
+EOF
+returns_ 1 sed 'v9.0' </dev/null 2>err-anc-ver || fail=1
+compare_ exp-anc-ver err-anc-ver || fail=1
+
+
+#
+# Junk after command (EXCESS_JUNK)
+# notes: EOF, \n or ';' are allowed after a command.
+# multiple places abort with EXCESS_JUNK, check them all.
+# dummy addresses ensure the offending char is the same.
+cat <<\EOF >exp-junk || framework_failure_
+sed: -e expression #1, char 7: extra characters after command
+EOF
+returns_ 1 sed '11111=d' </dev/null 2>err-junk || fail=1
+compare_ exp-junk err-junk || fail=1
+returns_ 1 sed 'y/a/b/d' </dev/null 2>err-junk-y || fail=1
+compare_ exp-junk err-junk-y || fail=1
+returns_ 1 sed '1111{}d' </dev/null 2>err-junk-braces || fail=1
+compare_ exp-junk err-junk-braces || fail=1
+returns_ 1 sed '22222ld' </dev/null 2>err-junk-braces || fail=1
+compare_ exp-junk err-junk-braces || fail=1
+
+
+#
+# Slash after a/c/i (EXPECTED_SLASH)
+# note: GNU extensions are less strict than --posix.
+cat <<\EOF >exp-junk || framework_failure_
+sed: -e expression #1, char 2: expected \ after `a', `c' or `i'
+EOF
+for opt in a c i ;
+do
+ # EOF after command
+ returns_ 1 sed "1$opt" </dev/null 2>err-junk-$opt || fail=1
+ compare_ exp-junk err-junk-$opt || fail=1
+
+ # no slash after command, in GNU extension mode - accepted.
+ sed "1${opt}foo" </dev/null >/dev/null || fail=1
+
+ # no slash after command, in --posix mode - rejected.
+ returns_ 1 sed --posix "${opt}foo" </dev/null 2>err-junk-$opt-psx || fail=1
+ compare_ exp-junk err-junk-$opt-psx || fail=1
+done
+
+
+#
+# ':' with address (NO_COLON_ADDR)
+#
+cat <<\EOF >exp-colon-addr || framework_failure_
+sed: -e expression #1, char 2: : doesn't want any addresses
+EOF
+returns_ 1 sed '2:' </dev/null 2>err-colon-addr || fail=1
+compare_ exp-colon-addr err-colon-addr || fail=1
+
+
+
+#
+# q/Q need one address (ONE_ADDR)
+#
+cat <<\EOF >exp-colon-addr || framework_failure_
+sed: -e expression #1, char 2: : doesn't want any addresses
+EOF
+returns_ 1 sed '2:' </dev/null 2>err-colon-addr || fail=1
+compare_ exp-colon-addr err-colon-addr || fail=1
+
+
+#
+# unterminated Y commands (UNTERM_Y_CMD)
+# NOTE: the code calls bad_proc(UNTERM_Y_CMD)
+# in multiple places due to varied conditions - check them all.
+# dummy addresses ensures the offending character is always 5.
+cat <<\EOF >exp-unterm-y || framework_failure_
+sed: -e expression #1, char 5: unterminated `y' command
+EOF
+returns_ 1 sed '1111y' </dev/null 2>err-unterm-y1 || fail=1
+compare_ exp-unterm-y err-unterm-y1 || fail=1
+returns_ 1 sed '111y/' </dev/null 2>err-unterm-y2 || fail=1
+compare_ exp-unterm-y err-unterm-y2 || fail=1
+returns_ 1 sed '11y/a' </dev/null 2>err-unterm-y3 || fail=1
+compare_ exp-unterm-y err-unterm-y3 || fail=1
+returns_ 1 sed '1y/a/' </dev/null 2>err-unterm-y4 || fail=1
+compare_ exp-unterm-y err-unterm-y4 || fail=1
+returns_ 1 sed 'y/a/a' </dev/null 2>err-unterm-y5 || fail=1
+compare_ exp-unterm-y err-unterm-y5 || fail=1
+
+#
+# Y command with bad legth (Y_CMD_LEN)
+# TODO: check with multibyte strings.
+cat <<\EOF >exp-bad-y-len || framework_failure_
+sed: -e expression #1, char 7: strings for `y' command are different lengths
+EOF
+returns_ 1 sed 'y/a/bb/' </dev/null 2>err-bad-y-len || fail=1
+compare_ exp-bad-y-len err-bad-y-len || fail=1
+
+
+#
+# GNU Extension: allow a/c/i to continue in next 'program'.
+# in --posix mode, reject it with "incomplete command" (INCOMPLETE_CMD)
+#
+cat <<\EOF >exp-inc-cmd || framework_failure_
+sed: -e expression #1, char 2: incomplete command
+EOF
+for opt in a c i ;
+do
+ # works as a gnu extension
+ sed -e "$opt\\" -e foo < /dev/null || fail=1
+
+ # rejected in posix mode
+ returns_ 1 sed --posix -e "$opt\\" -e foo </dev/null 2>err-inc-cmd-$opt \
+ || fail=1
+ compare_ exp-inc-cmd err-inc-cmd-$opt || fail=1
+done
+
+
+Exit $fail
diff --git a/testsuite/compile-tests.sh b/testsuite/compile-tests.sh
new file mode 100644
index 0000000..75d5b6d
--- /dev/null
+++ b/testsuite/compile-tests.sh
@@ -0,0 +1,135 @@
+#!/bin/sh
+# Test compilation less-common cases
+
+# Copyright (C) 2016-2018 Free Software Foundation, Inc.
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+. "${srcdir=.}/testsuite/init.sh"; path_prepend_ ./sed
+print_ver_ sed
+
+#
+# Special file names, with gnu extensions and without (if the host
+# supports /dev/std{out,err} )
+#
+echo a > a || framework_failure_
+
+# With gnu extension enabled, /dev/stderr is emulated internally
+# regardless of the operating system.
+sed 'w/dev/stderr' a >out 2>err|| fail=1
+compare_ a out || fail=1
+compare_ a err || fail=1
+
+# In posix mode /dev/std* are not emulated internally. Skip if they
+# don't exist. 'env' is used to avoid built-in 'test' which
+# simulates /dev/stderr, e.g. bash on AIX.
+if env test -w /dev/stderr ; then
+ sed --posix 'w/dev/stderr' a >out-psx 2>err-psx || fail=1
+ compare_ a out-psx || fail=1
+ compare_ a err-psx || fail=1
+fi
+
+
+#
+# labels followed by various characters
+# (read_label)
+echo a > lbl-in-exp || framework_failure_
+cat << \EOF > lbl-prog || framework_failure_
+bZ
+:Z
+bY;
+:Y
+{bX}
+:X ;
+b W
+: W
+EOF
+sed -f lbl-prog lbl-in-exp > lbl-out || fail=1
+compare_ lbl-in-exp lbl-out
+
+
+
+#
+# character classes (compile.c:snarf_char_class)
+#
+
+# open brackets followed by EOF
+cat <<\EOF >exp-err-op-bracket || framework_failure_
+sed: -e expression #1, char 2: unterminated address regex
+EOF
+returns_ 1 sed '/[' </dev/null 2>err-op-bracket1 || fail=1
+compare_ exp-err-op-bracket err-op-bracket1 || fail=1
+
+
+# open brackets followed by \n
+printf "/[\n" > op-bracket-prog || framework_failure_
+cat <<\EOF >exp-err-op-bracket || framework_failure_
+sed: file op-bracket-prog line 1: unterminated address regex
+EOF
+returns_ 1 sed -f op-bracket-prog </dev/null 2>err-op-bracket2 || fail=1
+compare_ exp-err-op-bracket err-op-bracket2 || fail=1
+
+
+# unterminated character class '[.'
+# (snarf_char_class terminates on char 7, then returns)
+cat <<\EOF >exp-chr-class || framework_failure_
+sed: -e expression #1, char 7: unterminated `s' command
+EOF
+returns_ 1 sed 's/[[.//' </dev/null 2>err-chr-class || fail=1
+compare_ exp-chr-class err-chr-class || fail=1
+
+
+# closing bracket immediately after char-class opening
+# sequence (e.g. '[:]' instead of '[:alpha:]' ).
+cat<< \EOF >exp-chr-class2 || framework_failure_
+sed: -e expression #1, char 9: unterminated `s' command
+EOF
+returns_ 1 sed 's/[[:]]//' </dev/null 2>err-chr-class2 || fail=1
+compare_ exp-chr-class2 err-chr-class2 || fail=1
+
+
+# EOF after backslash in a regex (compile.c:match_slash())
+cat<< \EOF >exp-backslash-eof || framework_failure_
+sed: -e expression #1, char 2: unterminated address regex
+EOF
+returns_ 1 sed '/\' </dev/null 2>err-backslash-eof || fail=1
+compare_ exp-backslash-eof err-backslash-eof || fail=1
+
+
+# Valid version requirement
+sed 'v4' < /dev/null || fail=1
+
+# Closing braces followed by another closing braces, and '#'
+echo X > in-exp || framework_failure_
+sed -n '{{p}}' in-exp > out-braces-1 || fail=1
+compare_ in-exp out-braces-1 || fail=1
+
+sed -n '{p}#foo' in-exp > out-braces-2 || fail=1
+compare_ in-exp out-braces-2 || fail=1
+
+# 'l' followed by closing braces, and '#'
+printf 'X$\n' > exp-l || framework_failure_
+sed -n '{l}' in-exp > out-l-braces || fail=1
+compare_ exp-l out-l-braces || fail=1
+sed -n 'l#foo' in-exp > out-l-hash || fail=1
+compare_ exp-l out-l-hash || fail=1
+
+
+#
+# unterminated a/c/i as last command
+# (pending_text)
+sed -e 'a\' in-exp > out-unterm-a1 || fail=1
+compare_ in-exp out-unterm-a1 || fail=1
+
+
+Exit $fail
diff --git a/testsuite/convert-number.sh b/testsuite/convert-number.sh
new file mode 100755
index 0000000..a485f47
--- /dev/null
+++ b/testsuite/convert-number.sh
@@ -0,0 +1,178 @@
+#!/bin/sh
+# Test number conversion from escape sequences \xNN \oNNN \dNNN
+# (compile.c:convert_number())
+
+# Copyright (C) 2016-2018 Free Software Foundation, Inc.
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+. "${srcdir=.}/testsuite/init.sh"; path_prepend_ ./sed
+print_ver_ sed
+
+#
+# Test \dNNN conversions
+#
+printf "%s\n" a a a a a a a > in-d || framework_failure_
+
+# Each line is a separate test case
+cat <<\EOF >prog-d
+# Expected output: ASCII 0x0D '\r'
+1s/./\d13/
+
+# Expected output: ASCII 0xff '\3ff'
+2s/./\d255/
+
+# Expected (?) output: 'dB'
+# (\d followed by character >= base 10, treated as '\d', which is 'd').
+3s/./\dB/
+
+# Expected (?) output: 'dQ'
+# (\d followed by non-hex character, treated as '\d', which is 'd').
+4s/./\dQ/
+
+# Expected output: '{4'
+# \dNNN is limited to three digits.
+# The first three digits are 123 = 0x7b = '{'. '4' is treated as-is.
+5s/./\d1234/
+
+# Expected (?) output: '\1'
+# undocumented implementation-specific limitation:
+# After 3 digit limits, the 8-bit value is used,
+# decimal 513 wraps-around to 1.
+6s/./\d513/
+
+# Expected output: '\0','7'
+# (three digit limit)
+7s/./\d0007/
+EOF
+
+printf '\r\n\377\ndB\ndQ\n{4\n\1\n\0007\n' > exp-d || framework_failure_
+
+sed -f prog-d in-d > out-d || fail=1
+compare_ exp-d out-d || fail=1
+
+if test "$fail" -eq 1 ; then
+ od -tx1c prog-d
+ od -tx1c exp-d
+ od -tx1c out-d
+fi
+
+
+
+
+#
+# Test \oNNN conversions
+#
+printf "%s\n" a a a a a a > in-o || framework_failure_
+
+# Each line is a separate test case
+cat <<\EOF >prog-o
+# Expected output: '\5'
+1s/./\o5/
+
+# Expected output: ASCII 0xff '\3ff'
+2s/./\o377/
+
+# Expected (?) output: 'o9'
+# (\o followed by character >= base 18, treated as '\o', which is 'o').
+3s/./\o9/
+
+# Expected (?) output: 'oQ'
+# (\o followed by non-hex character, treated as '\o', which is 'o').
+4s/./\oQ/
+
+# Expected output: 'S4'
+# \oNNN is limited to three digits.
+# The first three digits are o123 = 0x53 = 'S'. '4' is treated as-is.
+5s/./\o1234/
+
+# Expected (?) output: '\1'
+# undocumented implementation-specific limitation:
+# After 3 digit limits, the 8-bit value is used,
+# octal 401 wraps-around to 1.
+6s/./\o401/
+EOF
+
+printf '\5\n\377\no9\noQ\nS4\n\1\n' > exp-o || framework_failure_
+
+sed -f prog-o in-o > out-o || fail=1
+compare_ exp-o out-o || fail=1
+
+if test "$fail" -eq 1 ; then
+ od -tx1c prog-o
+ od -tx1c exp-o
+ od -tx1c out-o
+fi
+
+
+
+
+
+#
+# Test \xNN conversions
+#
+printf "%s\n" a a a a > in-x || framework_failure_
+
+# Each line is a separate test case
+cat <<\EOF >prog-x
+# Expected output: ASCII 0x06 '\6'
+1s/./\x6/
+
+# Expected output: ASCII 0xCE '\316'
+2s/./\xce/
+
+# Expected (?) output: 'xy'
+# (\x followed by non-hex character, treated as '\x', which is 'x').
+3s/./\xy/
+
+# Expected output: '\253' 'c' (0xAB = 253 octal)
+# \xNN is limited to two digits.
+4s/./\xabc/
+EOF
+
+printf '\6\n\316\nxy\n\253c\n' > exp-x || framework_failure_
+
+sed -f prog-x in-x > out-x || fail=1
+compare_ exp-x out-x || fail=1
+
+if test "$fail" -eq 1 ; then
+ od -tx1c prog-x
+ od -tx1c exp-x
+ od -tx1c out-x
+fi
+
+
+# for completeness, cover all possible letters/digits
+
+printf "%s\n" a a a a a a a a a a a > cnv-num-in || framework_failure_
+cat << \EOF > cnv-num-prog || framework_failure_
+1s/./\x01/
+2s/./\x23/
+3s/./\x45/
+4s/./\x67/
+5s/./\x89/
+6s/./\xAB/
+7s/./\xab/
+8s/./\xCD/
+9s/./\xcd/
+10s/./\xef/
+11s/./\xEF/
+EOF
+
+printf '\1\n#\nE\ng\n\211\n\253\n\253\n\315\n\315\n\357\n\357\n' \
+ > cnv-num-exp || framework_failure_
+
+sed -f cnv-num-prog cnv-num-in > cnv-num-out || fail=1
+compare_ cnv-num-exp cnv-num-out || fail=1
+
+Exit $fail
diff --git a/testsuite/dc.sed b/testsuite/dc.sed
new file mode 100644
index 0000000..5267589
--- /dev/null
+++ b/testsuite/dc.sed
@@ -0,0 +1,322 @@
+#!/bin/sed -nf
+# dc.sed - an arbitrary precision RPN calculator
+# Created by Greg Ubben <gsu@romulus.ncsc.mil> early 1995, late 1996
+#
+# Dedicated to MAC's memory of the IBM 1620 ("CADET") computer.
+# @(#)GSU dc.sed 1.1 06-Mar-1999 [non-explanatory]
+#
+# Examples:
+# sqrt(2) to 10 digits: echo "10k 2vp" | dc.sed
+# 20 factorial: echo "[d1-d1<!*]s! 20l!xp" | dc.sed
+# sin(ln(7)): echo "s(l(7))" | bc -c /usr/lib/lib.b | dc.sed
+# hex to base 60: echo "60o16i 6B407.CAFE p" | dc.sed
+# tests most of dc.sed: echo 16oAk2vp | dc.sed
+#
+# To debug or analyze, give the dc Y command as input or add it to
+# embedded dc routines, or add the sed p command to the beginning of
+# the main loop or at various points in the low-level sed routines.
+# If you need to allow [|~] characters in the input, filter this
+# script through "tr '|~' '\36\37'" first (or use dc.pl).
+#
+# Not implemented: ! \
+# But implemented: K Y t # !< !> != fractional-bases
+# SunOS limits: 199/199 commands (though could pack in 10-20 more)
+# Limitations: scale <= 999; |obase| >= 1; input digits in [0..F]
+# Completed: 1am Feb 4, 1997
+
+s/^/|P|K0|I10|O10|?~/
+
+: next
+s/|?./|?/
+s/|?#[ -}]*/|?/
+/|?!*[lLsS;:<>=]\{0,1\}$/N
+/|?!*[-+*/%^<>=]/b binop
+/^|.*|?[dpPfQXZvxkiosStT;:]/b binop
+/|?[_0-9A-F.]/b number
+/|?\[/b string
+/|?l/b load
+/|?L/b Load
+/|?[sS]/b save
+/|?c/ s/[^|]*//
+/|?d/ s/[^~]*~/&&/
+/|?f/ s//&[pSbz0<aLb]dSaxsaLa/
+/|?x/ s/\([^~]*~\)\(.*|?x\)~*/\2\1/
+/|?[KIO]/ s/.*|\([KIO]\)\([^|]*\).*|?\1/\2~&/
+/|?T/ s/\.*0*~/~/
+# a slow, non-stackable array implementation in dc, just for completeness
+# A fast, stackable, associative array implementation could be done in sed
+# (format: {key}value{key}value...), but would be longer, like load & save.
+/|?;/ s/|?;\([^{}]\)/|?~[s}s{L{s}q]S}[S}l\1L}1-d0>}s\1L\1l{xS\1]dS{xL}/
+/|?:/ s/|?:\([^{}]\)/|?~[s}L{s}L{s}L}s\1q]S}S}S{[L}1-d0>}S}l\1s\1L\1l{xS\1]dS{x/
+/|?[ ~ cdfxKIOT]/b next
+/|?\n/b next
+/|?[pP]/b print
+/|?k/ s/^\([0-9]\{1,3\}\)\([.~].*|K\)[^|]*/\2\1/
+/|?i/ s/^\(-\{0,1\}[0-9]*\.\{0,1\}[0-9]\{1,\}\)\(~.*|I\)[^|]*/\2\1/
+/|?o/ s/^\(-\{0,1\}[1-9][0-9]*\.\{0,1\}[0-9]*\)\(~.*|O\)[^|]*/\2\1/
+/|?[kio]/b pop
+/|?t/b trunc
+/|??/b input
+/|?Q/b break
+/|?q/b quit
+h
+/|?[XZz]/b count
+/|?v/b sqrt
+s/.*|?\([^Y]\).*/\1 is unimplemented/
+s/\n/\\n/g
+l
+g
+b next
+
+: print
+/^-\{0,1\}[0-9]*\.\{0,1\}[0-9]\{1,\}~.*|?p/!b Print
+/|O10|/b Print
+
+# Print a number in a non-decimal output base. Uses registers a,b,c,d.
+# Handles fractional output bases (O<-1 or O>=1), unlike other dc's.
+# Converts the fraction correctly on negative output bases, unlike
+# UNIX dc. Also scales the fraction more accurately than UNIX dc.
+#
+s,|?p,&KSa0kd[[-]Psa0la-]Sad0>a[0P]sad0=a[A*2+]saOtd0>a1-ZSd[[[[ ]P]sclb1\
+!=cSbLdlbtZ[[[-]P0lb-sb]sclb0>c1+]sclb0!<c[0P1+dld>c]scdld>cscSdLbP]q]Sb\
+[t[1P1-d0<c]scd0<c]ScO_1>bO1!<cO[16]<bOX0<b[[q]sc[dSbdA>c[A]sbdA=c[B]sbd\
+B=c[C]sbdC=c[D]sbdD=c[E]sbdE=c[F]sb]xscLbP]~Sd[dtdZOZ+k1O/Tdsb[.5]*[.1]O\
+X^*dZkdXK-1+ktsc0kdSb-[Lbdlb*lc+tdSbO*-lb0!=aldx]dsaxLbsb]sad1!>a[[.]POX\
++sb1[SbO*dtdldx-LbO*dZlb!<a]dsax]sadXd0<asbsasaLasbLbscLcsdLdsdLdLak[]pP,
+b next
+
+: Print
+/|?p/s/[^~]*/&\
+~&/
+s/\(.*|P\)\([^|]*\)/\
+\2\1/
+s/\([^~]*\)\n\([^~]*\)\(.*|P\)/\1\3\2/
+h
+s/~.*//
+/./{ s/.//; p; }
+# Just s/.//p would work if we knew we were running under the -n option.
+# Using l vs p would kind of do \ continuations, but would break strings.
+g
+
+: pop
+s/[^~]*~//
+b next
+
+: load
+s/\(.*|?.\)\(.\)/\20~\1/
+s/^\(.\)0\(.*|r\1\([^~|]*\)~\)/\1\3\2/
+s/.//
+b next
+
+: Load
+s/\(.*|?.\)\(.\)/\2\1/
+s/^\(.\)\(.*|r\1\)\([^~|]*~\)/|\3\2/
+/^|/!i\
+register empty
+s/.//
+b next
+
+: save
+s/\(.*|?.\)\(.\)/\2\1/
+/^\(.\).*|r\1/ !s/\(.\).*|/&r\1|/
+/|?S/ s/\(.\).*|r\1/&~/
+s/\(.\)\([^~]*~\)\(.*|r\1\)[^~|]*~\{0,1\}/\3\2/
+b next
+
+: quit
+t quit
+s/|?[^~]*~[^~]*~/|?q/
+t next
+# Really should be using the -n option to avoid printing a final newline.
+s/.*|P\([^|]*\).*/\1/
+q
+
+: break
+s/[0-9]*/&;987654321009;/
+: break1
+s/^\([^;]*\)\([1-9]\)\(0*\)\([^1]*\2\(.\)[^;]*\3\(9*\).*|?.\)[^~]*~/\1\5\6\4/
+t break1
+b pop
+
+: input
+N
+s/|??\(.*\)\(\n.*\)/|?\2~\1/
+b next
+
+: count
+/|?Z/ s/~.*//
+/^-\{0,1\}[0-9]*\.\{0,1\}[0-9]\{1,\}$/ s/[-.0]*\([^.]*\)\.*/\1/
+/|?X/ s/-*[0-9A-F]*\.*\([0-9A-F]*\).*/\1/
+s/|.*//
+/~/ s/[^~]//g
+
+s/./a/g
+: count1
+ s/a\{10\}/b/g
+ s/b*a*/&a9876543210;/
+ s/a.\{9\}\(.\).*;/\1/
+ y/b/a/
+/a/b count1
+G
+/|?z/ s/\n/&~/
+s/\n[^~]*//
+b next
+
+: trunc
+# for efficiency, doesn't pad with 0s, so 10k 2 5/ returns just .40
+# The X* here and in a couple other places works around a SunOS 4.x sed bug.
+s/\([^.~]*\.*\)\(.*|K\([^|]*\)\)/\3;9876543210009909:\1,\2/
+: trunc1
+ s/^\([^;]*\)\([1-9]\)\(0*\)\([^1]*\2\(.\)[^:]*X*\3\(9*\)[^,]*\),\([0-9]\)/\1\5\6\4\7,/
+t trunc1
+s/[^:]*:\([^,]*\)[^~]*/\1/
+b normal
+
+: number
+s/\(.*|?\)\(_\{0,1\}[0-9A-F]*\.\{0,1\}[0-9A-F]*\)/\2~\1~/
+s/^_/-/
+/^[^A-F~]*~.*|I10|/b normal
+/^[-0.]*~/b normal
+s:\([^.~]*\)\.*\([^~]*\):[Ilb^lbk/,\1\2~0A1B2C3D4E5F1=11223344556677889900;.\2:
+: digit
+ s/^\([^,]*\),\(-*\)\([0-F]\)\([^;]*\(.\)\3[^1;]*\(1*\)\)/I*+\1\2\6\5~,\2\4/
+t digit
+s:...\([^/]*.\)\([^,]*\)[^.]*\(.*|?.\):\2\3KSb[99]k\1]SaSaXSbLalb0<aLakLbktLbk:
+b next
+
+: string
+/|?[^]]*$/N
+s/\(|?[^]]*\)\[\([^]]*\)]/\1|{\2|}/
+/|?\[/b string
+s/\(.*|?\)|{\(.*\)|}/\2~\1[/
+s/|{/[/g
+s/|}/]/g
+b next
+
+: binop
+/^[^~|]*~[^|]/ !i\
+stack empty
+//!b next
+/^-\{0,1\}[0-9]*\.\{0,1\}[0-9]\{1,\}~/ !s/[^~]*\(.*|?!*[^!=<>]\)/0\1/
+/^[^~]*~-\{0,1\}[0-9]*\.\{0,1\}[0-9]\{1,\}~/ !s/~[^~]*\(.*|?!*[^!=<>]\)/~0\1/
+h
+/|?\*/b mul
+/|?\//b div
+/|?%/b rem
+/|?^/b exp
+
+/|?[+-]/ s/^\(-*\)\([^~]*~\)\(-*\)\([^~]*~\).*|?\(-\{0,1\}\).*/\2\4s\3o\1\3\5/
+s/\([^.~]*\)\([^~]*~[^.~]*\)\(.*\)/<\1,\2,\3|=-~.0,123456789<></
+/^<\([^,]*,[^~]*\)\.*0*~\1\.*0*~/ s/</=/
+: cmp1
+ s/^\(<[^,]*\)\([0-9]\),\([^,]*\)\([0-9]\),/\1,\2\3,\4/
+t cmp1
+/^<\([^~]*\)\([^~]\)[^~]*~\1\(.\).*|=.*\3.*\2/ s/</>/
+/|?/{
+ s/^\([<>]\)\(-[^~]*~-.*\1\)\(.\)/\3\2/
+ s/^\(.\)\(.*|?!*\)\1/\2!\1/
+ s/|?![^!]\(.\)/&l\1x/
+ s/[^~]*~[^~]*~\(.*|?\)!*.\(.*\)|=.*/\1\2/
+ b next
+}
+s/\(-*\)\1|=.*/;9876543210;9876543210/
+/o-/ s/;9876543210/;0123456789/
+s/^>\([^~]*~\)\([^~]*~\)s\(-*\)\(-*o\3\(-*\)\)/>\2\1s\5\4/
+
+s/,\([0-9]*\)\.*\([^,]*\),\([0-9]*\)\.*\([0-9]*\)/\1,\2\3.,\4;0/
+: right1
+ s/,\([0-9]\)\([^,]*\),;*\([0-9]\)\([0-9]*\);*0*/\1,\2\3,\4;0/
+t right1
+s/.\([^,]*\),~\(.*\);0~s\(-*\)o-*/\1~\30\2~/
+
+: addsub1
+ s/\(.\{0,1\}\)\(~[^,]*\)\([0-9]\)\(\.*\),\([^;]*\)\(;\([^;]*\(\3[^;]*\)\).*X*\1\(.*\)\)/\2,\4\5\9\8\7\6/
+ s/,\([^~]*~\).\{10\}\(.\)[^;]\{0,9\}\([^;]\{0,1\}\)[^;]*/,\2\1\3/
+# could be done in one s/// if we could have >9 back-refs...
+/^~.*~;/!b addsub1
+
+: endbin
+s/.\([^,]*\),\([0-9.]*\).*/\1\2/
+G
+s/\n[^~]*~[^~]*//
+
+: normal
+s/^\(-*\)0*\([0-9.]*[0-9]\)[^~]*/\1\2/
+s/^[^1-9~]*~/0~/
+b next
+
+: mul
+s/\(-*\)\([0-9]*\)\.*\([0-9]*\)~\(-*\)\([0-9]*\)\.*\([0-9]*\).*|K\([^|]*\).*/\1\4\2\5.!\3\6,|\2<\3~\5>\6:\7;9876543210009909/
+
+: mul1
+ s/![0-9]\([^<]*\)<\([0-9]\{0,1\}\)\([^>]*\)>\([0-9]\{0,1\}\)/0!\1\2<\3\4>/
+ /![0-9]/ s/\(:[^;]*\)\([1-9]\)\(0*\)\([^0]*\2\(.\).*X*\3\(9*\)\)/\1\5\6\4/
+/<~[^>]*>:0*;/!t mul1
+
+s/\(-*\)\1\([^>]*\).*/;\2^>:9876543210aaaaaaaaa/
+
+: mul2
+ s/\([0-9]~*\)^/^\1/
+ s/<\([0-9]*\)\(.*[~^]\)\([0-9]*\)>/\1<\2>\3/
+
+ : mul3
+ s/>\([0-9]\)\(.*\1.\{9\}\(a*\)\)/\1>\2;9\38\37\36\35\34\33\32\31\30/
+ s/\(;[^<]*\)\([0-9]\)<\([^;]*\).*\2[0-9]*\(.*\)/\4\1<\2\3/
+ s/a[0-9]/a/g
+ s/a\{10\}/b/g
+ s/b\{10\}/c/g
+ /|0*[1-9][^>]*>0*[1-9]/b mul3
+
+ s/;/a9876543210;/
+ s/a.\{9\}\(.\)[^;]*\([^,]*\)[0-9]\([.!]*\),/\2,\1\3/
+ y/cb/ba/
+/|<^/!b mul2
+b endbin
+
+: div
+# CDDET
+/^[-.0]*[1-9]/ !i\
+divide by 0
+//!b pop
+s/\(-*\)\([0-9]*\)\.*\([^~]*~-*\)\([0-9]*\)\.*\([^~]*\)/\2.\3\1;0\4.\5;0/
+: div1
+ s/^\.0\([^.]*\)\.;*\([0-9]\)\([0-9]*\);*0*/.\1\2.\3;0/
+ s/^\([^.]*\)\([0-9]\)\.\([^;]*;\)0*\([0-9]*\)\([0-9]\)\./\1.\2\30\4.\5/
+t div1
+s/~\(-*\)\1\(-*\);0*\([^;]*[0-9]\)[^~]*/~123456789743222111~\2\3/
+s/\(.\(.\)[^~]*\)[^9]*\2.\{8\}\(.\)[^~]*/\3~\1/
+s,|?.,&SaSadSaKdlaZ+LaX-1+[sb1]Sbd1>bkLatsbLa[dSa2lbla*-*dLa!=a]dSaxsakLasbLb*t,
+b next
+
+: rem
+s,|?%,&Sadla/LaKSa[999]k*Lak-,
+b next
+
+: exp
+# This decimal method is just a little faster than the binary method done
+# totally in dc: 1LaKLb [kdSb*LbK]Sb [[.5]*d0ktdSa<bkd*KLad1<a]Sa d1<a kk*
+/^[^~]*\./i\
+fraction in exponent ignored
+s,[^-0-9].*,;9d**dd*8*d*d7dd**d*6d**d5d*d*4*d3d*2lbd**1lb*0,
+: exp1
+ s/\([0-9]\);\(.*\1\([d*]*\)[^l]*\([^*]*\)\(\**\)\)/;dd*d**d*\4\3\5\2/
+t exp1
+G
+s,-*.\{9\}\([^9]*\)[^0]*0.\(.*|?.\),\2~saSaKdsaLb0kLbkK*+k1\1LaktsbkLax,
+s,|?.,&SadSbdXSaZla-SbKLaLadSb[0Lb-d1lb-*d+K+0kkSb[1Lb/]q]Sa0>a[dk]sadK<a[Lb],
+b next
+
+: sqrt
+# first square root using sed: 8k2v at 1:30am Dec 17, 1996
+/^-/i\
+square root of negative number
+/^[-0]/b next
+s/~.*//
+/^\./ s/0\([0-9]\)/\1/g
+/^\./ !s/[0-9][0-9]/7/g
+G
+s/\n/~/
+s,|?.,&K1+k KSbSb[dk]SadXdK<asadlb/lb+[.5]*[sbdlb/lb+[.5]*dlb>a]dsaxsasaLbsaLatLbk K1-kt,
+b next
+
+# END OF GSU dc.sed
diff --git a/testsuite/dc.sh b/testsuite/dc.sh
new file mode 100755
index 0000000..96c000c
--- /dev/null
+++ b/testsuite/dc.sh
@@ -0,0 +1,62 @@
+#!/bin/sh
+
+# Test runner for dc.sed
+
+# Copyright (C) 2017-2018 Free Software Foundation, Inc.
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+. "${srcdir=.}/testsuite/init.sh"; path_prepend_ ./sed
+print_ver_ sed
+
+# Compute Easter of 2002...
+# usage: (echo YEAR; cat easter.dc) | dc.sed
+cat << \EOF > easter.dc || framework_failure_
+[ddsf[lfp[too early
+]Pq]s@1583>@
+ddd19%1+sg100/1+d3*4/12-sx8*5+25/5-sz5*4/lx-10-sdlg11*20+lz+lx-30%
+d[30+]s@0>@d[[1+]s@lg11<@]s@25=@d[1+]s@24=@se44le-d[30+]s@21>@dld+7%-7+
+[March ]smd[31-[April ]sm]s@31<@psnlmPpsn1z>p]splpx
+EOF
+
+cat <<\EOF > easter-exp || framework_failure_
+31
+March 2002
+EOF
+
+
+# Compute square root of 2
+cat << \EOF > sqrt2-inp || framework_failure_
+16oAk2vpq
+EOF
+
+
+cat << \EOF > sqrt2-exp || framework_failure_
+1.6A09E667A
+EOF
+
+
+# location of external test files
+dir="$abs_top_srcdir/testsuite"
+
+# Easter 2002
+( echo 2002 ; cat easter.dc ) | sed -n -f "$dir/dc.sed" > easter-out|| fail=1
+compare easter-exp easter-out || fail=1
+
+# Square root of 2
+sed -n -f "$dir/dc.sed" sqrt2-inp > sqrt2-out || fail=1
+compare sqrt2-exp sqrt2-out || fail=1
+
+
+
+Exit $fail
diff --git a/testsuite/debug.pl b/testsuite/debug.pl
new file mode 100644
index 0000000..661c5ed
--- /dev/null
+++ b/testsuite/debug.pl
@@ -0,0 +1,200 @@
+#!/usr/bin/perl
+# Test the --debug feature
+
+# Copyright (C) 2018 Free Software Foundation, Inc.
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+
+use strict;
+use File::stat;
+
+(my $program_name = $0) =~ s|.*/||;
+
+# Turn off localization of executable's output.
+@ENV{qw(LANGUAGE LANG LC_ALL)} = ('C') x 3;
+
+my $prog = 'sed';
+
+print "PATH = $ENV{PATH}\n";
+
+=pod
+This list contains a template for the tests.
+Two 'foreach' loops below add the '{IN/OUT}' hash entries
+of typical coreutils tests, adds single-quotes around the sed program,
+and adds the --debug command line option.
+
+NOTE: test names with "_" character will be checked with NULL input,
+but not with non-empty input (eg. to avoid executing external programs).
+=cut
+my @Tests =
+ (
+ ## Test parsing of SED commands, without any execution
+ ['c0', '=' ],
+ ['c1', ':FOO' ],
+ ['c2', '{=}' ],
+ ['c3', '#FOO' ],
+ ['c4', 'aFOO' ],
+ ['c5', 'b' ],
+ ['c6', 'bx;:x' ],
+ ['c7', 'cFOO' ],
+ ['c8', 'D' ],
+ ['c9', 'd' ],
+ ['c10_', 'e' ],
+ ['c11_', 'ew' ],
+ ['c12', 'F' ],
+ ['c13', 'G' ],
+ ['c14', 'g' ],
+ ['c15', 'H' ],
+ ['c16', 'h' ],
+ ['c17', 'iFOO' ],
+ ['c18', 'l' ],
+ ['c19', 'l3' ],
+ ['c20', 'N' ],
+ ['c21', 'n' ],
+ ['c22', 'P' ],
+ ['c23', 'p' ],
+ ['c24', 'Q' ],
+ ['c25_', 'Q3' ],
+ ['c26', 'q' ],
+ ['c27_', 'q3' ],
+ ['c28', 'Rx' ],
+ ['c29', 'rx' ],
+ ['c30', 's/x//' ],
+ ['c31', 'T' ],
+ ['c32', 'Tx;:x' ],
+ ['c33', 't' ],
+ ['c34', 'tx;:x' ],
+ ['c35', 'v' ],
+ ['c36', 'Wx' ],
+ ['c37', 'wx' ],
+ ['c38', 'x' ],
+ ['c39', 'y/x/y/' ],
+ ['c40', 'z' ],
+ ['c41', '' ],
+
+ ## Test parsing of SED addresses, without any execution
+ ['a0', '1=' ],
+ ['a1', '1!=' ],
+ ['a2', '1,2=' ],
+ ['a3', '1,2!=' ],
+ ['a4', '$=' ],
+ ['a5', '$!=' ],
+ ['a6', '1~3=' ],
+ ['a7', '1~3=' ],
+ ['a8', '50~0=' ],
+ ['a9', '/foo/=' ],
+ ['a10', '/foo/!=' ],
+ ['a11', '\@foo@=' ],
+ ['a12', '0,/foo/=' ],
+ ['a13', '1,/foo/=' ],
+ ['a14', '/foo/,1=' ],
+ ['a15', '1,+10=' ],
+ ['a16', '1,~10=' ],
+ ['a17', '/foo/,+10='],
+ ['a18', '/foo/,~10='],
+
+ ## Test strings with special characters
+ ['s1', '/\\a/=' ],
+ ['s2', '/\\b/=' ],
+ ['s3', '/\\f/=' ],
+ ['s4', '/\\r/=' ],
+ ['s5', '/\\t/=' ],
+ ['s6', '/\\v/=' ],
+ ['s7', '/\\n/=' ],
+ ['s8', '/\\\\/=' ],
+ ['s9', '/\x01/=' ],
+ ['s10','/\//=' ],
+
+ ## Address Regex variations
+ ['r0', '/a/= ; //=' ],
+ ['r1', '/a/I=' ],
+ ['r2', '/a/M=' ],
+ ['r3', '/a/IM=' ],
+
+ ## substitute variations
+ ['t0', 's/a/b/' ],
+ ['t1', 's/a/b/g' ],
+ ['t2', 's/a/b/i' ],
+ ['t3', 's/a/b/I' ],
+ ['t4', 's/a/b/m' ],
+ ['t5', 's/a/b/M' ],
+ ['t6', 's/a/b/wX' ],
+ ['t7', 's/a/b/p' ],
+ ['t8', 's/a/b/e' ],
+ ['t9', 's/a/b/3' ],
+ ['t10','s/a/b/iMg5p'],
+
+ ['t20','s/\\(a\\)/\\1/' ],
+ ['t21','s/a/\\Ua/' ],
+ ['t22','s/a/\\ua/' ],
+ ['t23','s/a/\\La/' ],
+ ['t24','s/a/\\la/' ],
+ ['t25','s/a/\\U\\Ea/' ],
+ ['t26','s/a/&/' ],
+
+ ## Some special cases
+ ['l1', 'a\\' ],
+ ['l2', 'c\\' ],
+ ['l3', 'i\\' ],
+ ['l4', 's/[0-9]/&/' ], # report matched regex register
+ ['l5', 'n;N;D' ], # n/N/D with patterns containing \n.
+ ['l6', 'n;n;n;n;n' ], # n causing end-of-cycle
+ ['l7', 's/^/a/' ], # zero-length regex match
+ ['l8', 's/\\($\\)/a/' ], # zero-length regex match
+ );
+
+
+foreach my $t (@Tests)
+{
+ my $name = shift @$t;
+ my $cmd = shift @$t;
+
+ # Add "--debug" and single-quotes around the sed program.
+ $cmd = "--debug '" . $cmd . "'";
+ unshift @$t, $cmd;
+ unshift @$t, $name;
+
+ # Add the typical coreutils hash entries.
+ # With empty input, the sed program will be printed (due to --debug),
+ # but not executed.
+ push @$t, {IN=>''};
+ push @$t, {OUT=>''};
+ push @$t, {OUT_SUBST=>'s/.*//s'};
+}
+
+# Repeat the tests with some input, to test --debug during execution.
+# Discard the output, the exact debug output is not set in stone.
+my @xtests;
+Test:
+foreach my $t (@Tests)
+{
+ # Remove the '{IN}' hash
+ my @newt = grep { ! ( ref $_ eq 'HASH' && exists $_->{IN} ) } @$t;
+ next if $newt[0] =~ /_/;
+
+ # Rename the test (add "x_" prefix, for execution)
+ $newt[0] = 'x_' . $newt[0];
+
+ # Add non-empty input.
+ push @newt, {IN=>"1\n2\n3\n4\n"};
+ push @xtests, \@newt;
+}
+
+push @Tests, @xtests;
+
+my $save_temps = $ENV{SAVE_TEMPS};
+my $verbose = $ENV{VERBOSE};
+
+my $fail = run_tests ($program_name, $prog, \@Tests, $save_temps, $verbose);
+exit $fail;
diff --git a/testsuite/distrib.inp b/testsuite/distrib.inp
new file mode 100644
index 0000000..ceaecec
--- /dev/null
+++ b/testsuite/distrib.inp
@@ -0,0 +1,28 @@
+From crash@cygnus.com Wed Mar 8 18:02:42 1995
+Received: from s1.msi.umn.edu (s1.msi.umn.edu [128.101.24.1]) by cygnus.com (8.6.9/8.6.9) with ESMTP id SAA21692 for <crash@cygnus.com>; Wed, 8 Mar 1995 18:02:41 -0800
+Received: from cygint.cygnus.com (cygint.cygnus.com [140.174.1.1]) by s1.msi.umn.edu (8.6.10/8.6.9) with ESMTP id TAA13398 for <molenda@msi.umn.edu>; Wed, 8 Mar 1995 19:59:18 -0600
+Received: from phydeaux.cygnus.com (phydeaux.cygnus.com [140.174.1.85]) by cygnus.com (8.6.9/8.6.9) with SMTP id SAA21688 for <molenda@msi.umn.edu>; Wed, 8 Mar 1995 18:02:33 -0800
+From: Jason Molenda <crash@cygnus.com>
+Received: by phydeaux.cygnus.com (5.65/4.7) id AA06931; Wed, 8 Mar 1995 18:02:28 -0800
+Message-Id: <9503090202.AA06931@phydeaux.cygnus.com>
+Subject: Note for sed testsuite
+To: molenda@msi.umn.edu
+Date: Wed, 8 Mar 1995 18:02:24 -0800 (PST)
+X-Mailer: ELM [version 2.4 PL23]
+
+ _Summum Bonum_
+
+ All the breath and the bloom of the
+ year in the bag of one bee:
+ All the wonder and wealth of the mine in
+ the heart of one gem:
+ In the core of one pearl all the shade and the
+ shine of the sea:
+ Breath and bloom, shade and shine, -- wonder,
+ wealth, and -- how far above them --
+ Truth, thats brighter than gem,
+ Trust, that's purer than pearl, --
+ Brightest truth, purest trust in the universe --
+ all were for me
+ In the kiss of one girl.
+ -- Robert Browning
diff --git a/testsuite/distrib.sh b/testsuite/distrib.sh
new file mode 100644
index 0000000..700c983
--- /dev/null
+++ b/testsuite/distrib.sh
@@ -0,0 +1,126 @@
+#!/bin/sh
+
+# Test runner for the old 'distrib' test
+
+# Copyright (C) 2017-2018 Free Software Foundation, Inc.
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+. "${srcdir=.}/testsuite/init.sh"; path_prepend_ ./sed
+print_ver_ sed
+
+# This is straight out of C News
+#
+#
+# All this does is massage the headers so they look like what news
+# software expects. To:, Cc: and Resent-*: headers are masked.
+# Reply-To: is turned into references, which is questionable (could
+# just as well be dropped.
+#
+# The From: line is rewritten to use the "address (comments)" form
+# instead of "phrase <route>" form our mailer uses. Also, addresses
+# with no "@domainname" are assumed to originate locally, and so are
+# given a domain.
+#
+# The Sender: field below reflects the address of the person who
+# maintains our mailing lists. The Approved: field is in a special
+# form, so that we can do bidirectional gatewaying. Any message
+# in a newsgroup that bears this stamp will not be fed into the
+# matching mailing list.
+cat << \EOF > distrib.sed || framework_failure_
+1i\
+Path: mailnewsgateway
+ :a
+ /^[Rr]eceived:/b r
+ /^[Nn]ewsgroups:/b r
+ /^[Pp]ath:/b r
+ /^[Tt][Oo]:/s/^/Original-/
+ /^[Cc][Cc]:/s/^/Original-/
+ /^[Rr][Ee][Ss][Ee][Nn][Tt]-.*/s/^/Original-/
+ /^[Mm][Ee][Ss][Ss][Aa][Gg][Ee]-[Ii][Dd]:/s/@/.alt.buddha.fat.short.guy@/
+ s/^[Ii]n-[Rr]eply-[Tt]o:/References:/
+ /^From:/{
+ s/<\([^@]*\)>$/<\1@$thissite>/
+ s/^From:[ ][ ]*\(.*\) *<\(.*\)>$/From: \2 (\1)/
+ }
+ s/-[Ii]d:/-ID:/
+ s/^[Ss][Uu][Bb][Jj][Ee][Cc][Tt]:[ ]*$/Subject: (none)/
+ s/^\([^:]*:\)[ ]*/\1 /
+ /^$/{i\
+Newsgroups: alt.buddha.short.fat.guy\
+Distribution: world\
+Sender: news@cygnus.com\
+Approved: alt.buddha.short.fat.guy@cygnus.com
+ b e
+ }
+ p
+ n
+ b a
+ :r
+ s/.*//g
+ n
+ /^[ ]/b r
+ b a
+ :e
+ p
+ n
+ b e
+EOF
+
+
+# The expected output
+cat << \EOF > distrib-exp || framework_failure_
+Path: mailnewsgateway
+From crash@cygnus.com Wed Mar 8 18: 02:42 1995
+From: crash@cygnus.com (Jason Molenda)
+Message-ID: <9503090202.AA06931.alt.buddha.fat.short.guy@phydeaux.cygnus.com>
+Subject: Note for sed testsuite
+Original-To: molenda@msi.umn.edu
+Date: Wed, 8 Mar 1995 18:02:24 -0800 (PST)
+X-Mailer: ELM [version 2.4 PL23]
+Newsgroups: alt.buddha.short.fat.guy
+Distribution: world
+Sender: news@cygnus.com
+Approved: alt.buddha.short.fat.guy@cygnus.com
+
+ _Summum Bonum_
+
+ All the breath and the bloom of the
+ year in the bag of one bee:
+ All the wonder and wealth of the mine in
+ the heart of one gem:
+ In the core of one pearl all the shade and the
+ shine of the sea:
+ Breath and bloom, shade and shine, -- wonder,
+ wealth, and -- how far above them --
+ Truth, thats brighter than gem,
+ Trust, that's purer than pearl, --
+ Brightest truth, purest trust in the universe --
+ all were for me
+ In the kiss of one girl.
+ -- Robert Browning
+EOF
+
+# NOTE:
+# The input has lines wider than 80 characters, and is kept as a separate file.
+
+# location of external test files
+dir="$abs_top_srcdir/testsuite"
+
+
+sed -n -f distrib.sed < "$dir/distrib.inp" > distrib-out || fail=1
+remove_cr_inplace distrib-out
+compare distrib-exp distrib-out || fail=1
+
+
+Exit $fail
diff --git a/testsuite/envvar-check b/testsuite/envvar-check
new file mode 100644
index 0000000..708013e
--- /dev/null
+++ b/testsuite/envvar-check
@@ -0,0 +1,64 @@
+# -*- sh -*-
+# Check environment variables for sane values while testing.
+
+# Copyright (C) 2000-2018 Free Software Foundation, Inc.
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+
+if (FOO=FOO; unset FOO) >/dev/null 2>&1; then
+ as_unset=unset
+else
+ as_unset=false
+fi
+
+envvar_check_fail=0
+vars='
+ _POSIX2_VERSION
+ _STDBUF_E
+ _STDBUF_I
+ _STDBUF_O
+ BASH_ENV
+ BLOCKSIZE
+ BLOCK_SIZE
+ CDPATH
+ COLS
+ COLUMNS
+ DF_BLOCK_SIZE
+ DU_BLOCK_SIZE
+ ENV
+ LANGUAGE
+ LS_BLOCK_SIZE
+ LS_COLORS
+ OMP_NUM_THREADS
+ POSIXLY_CORRECT
+ QUOTING_STYLE
+ SIMPLE_BACKUP_SUFFIX
+ TABSIZE
+ TERM
+ COLORTERM
+ TIME_STYLE
+ TMPDIR
+ VERSION_CONTROL
+'
+for var in $vars
+do
+ $as_unset $var
+ if eval test \"\${$var+set}\" = set; then
+ echo "$0: the $var environment variable is set --" \
+ ' unset it and rerun this test' >&2
+ envvar_check_fail=1
+ fi
+done
+
+test "$envvar_check_fail" = 1 && exit 1
diff --git a/testsuite/eval.sh b/testsuite/eval.sh
new file mode 100755
index 0000000..33395dc
--- /dev/null
+++ b/testsuite/eval.sh
@@ -0,0 +1,141 @@
+#!/bin/sh
+
+# Test runner for old 'eval' test
+
+# Copyright (C) 2017-2018 Free Software Foundation, Inc.
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+. "${srcdir=.}/testsuite/init.sh"; path_prepend_ ./sed
+print_ver_ sed
+
+cat << \EOF > eval-in || framework_failure_
+17380: 2 2 5 11 79
+abcd
+cpu
+ abcd
+ cpu
+EOF
+
+# create a copy of the input file.
+# Keep the name 'eval.in2' - it is used in the 'eval' commands in the
+# sed program below.
+cp eval-in eval.in2 || framework_failure_
+
+
+# The sed program - containing multiple 'e' (eval) commands.
+# NOTE: the program executes 'sed' using 'e' commands - and
+# assumes GNU sed is in the $PATH (which is the case here).
+cat << \EOF > eval.sed || framework_failure_
+1d
+
+ #Try eval command
+ /cpu/!b2
+ esed 1q eval.in2
+
+:2
+p
+i---
+h
+
+ #Try eval option
+ s,.* *cpu *,sed 1q eval.in2; echo "&",e
+
+:3
+p
+g
+i---
+
+ h
+ #Try eval option with print
+ s,.* *cpu.*,sed 1q eval.in2,ep
+ g
+
+
+:4
+p
+i---
+
+$!d
+
+#Do some more tests
+s/.*/Doing some more tests -----------------------/p
+s,.*,sed 1q eval.in2,ep
+i---
+s,.*,sed 1q eval.in2,pe
+i---
+s,.*,sed 1q eval.in2,
+h
+e
+p
+g
+i---
+s/^/echo /ep
+i---
+s/^fubar$/echo wozthis/e
+EOF
+
+
+# The expected output file
+cat << \EOF > eval-exp || framework_failure_
+abcd
+---
+abcd
+---
+abcd
+---
+17380: 2 2 5 11 79
+cpu
+---
+17380: 2 2 5 11 79
+cpu
+---
+17380: 2 2 5 11 79
+cpu
+---
+ abcd
+---
+ abcd
+---
+ abcd
+---
+17380: 2 2 5 11 79
+ cpu
+---
+17380: 2 2 5 11 79
+ cpu
+---
+17380: 2 2 5 11 79
+ cpu
+---
+Doing some more tests -----------------------
+17380: 2 2 5 11 79
+---
+sed 1q eval.in2
+---
+17380: 2 2 5 11 79
+---
+sed 1q eval.in2
+---
+sed 1q eval.in2
+EOF
+
+
+
+
+sed -f eval.sed eval-in > eval-out || fail=1
+remove_cr_inplace eval-out
+compare eval-exp eval-out || fail=1
+
+
+Exit $fail
diff --git a/testsuite/execute-tests.sh b/testsuite/execute-tests.sh
new file mode 100644
index 0000000..191a518
--- /dev/null
+++ b/testsuite/execute-tests.sh
@@ -0,0 +1,142 @@
+#!/bin/sh
+# Test execution less-common cases
+
+# Copyright (C) 2016-2018 Free Software Foundation, Inc.
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+. "${srcdir=.}/testsuite/init.sh"; path_prepend_ ./sed
+print_ver_ sed
+
+#
+# 'D' when pattern-space has no newline (act like 'd')
+#
+echo a | sed 1D > out1 || fail=1
+compare_ /dev/null out1 || fail=1
+
+#
+# s///e with a command that returns zero output
+#
+printf "\n" > exp2 || framework_failure_
+echo "" | sed '1etrue' > out2 || fail=1
+compare_ exp2 out2 || fail=1
+
+
+#
+# plain 'e' with a command that returns non-delimted output
+#
+printf "a\n" > exp3 || framework_failure_
+echo "printf a" | sed '1e' > out3 || fail=1
+compare_ exp3 out3 || fail=1
+
+#
+# plain 'e' with a command that returns delimted '\n' output
+# (implementation note: the delimiter is first chomp'd)
+printf "a\n" > exp4 || framework_failure_
+echo "echo a" | sed '1e' > out4 || fail=1
+compare_ exp4 out4 || fail=1
+
+#
+# e with a command that returns delimted '\0' output
+#
+printf "b\0" > exp5 || framework_failure_
+# This input file contains the shell command to be excuted:
+printf 'cat exp5' > in5 || framework_failure_
+sed -z '1e' <in5 > out5 || fail=1
+compare_ exp5 out5 || fail=1
+
+if test "$fail" -eq 1 ; then
+ od -tx1c exp5
+ od -tx1c out5
+fi
+
+#
+# 'P' command, with and without '\n' in the pattern space
+#
+echo a > in6 || framework_failure_
+printf "%s\n" a b | sed -n 'N;P' > out6 || fail=1
+compare_ in6 out6 || fail=1
+
+printf "%s\n" a | sed -n 'P' > out7 || fail=1
+compare_ in6 out7 || fail=1
+
+#
+# 'Q' with exit code
+#
+echo a > in7 || framework_failure_
+returns_ 42 sed '1Q42' in7 || fail=1
+
+#
+# 'r' without a filename (silently ignored)
+#
+echo c > in8 || framework_failure_
+sed 'rfoo.bar' in8 > out8 || fail=1
+compare_ in8 out8 || fail=1
+
+#
+# 'W' without a filename (silently ignored)
+#
+echo d > in9 || framework_failure_
+sed 'Wfoo1' in9 > out9 || fail=1
+compare_ in9 out9 || fail=1
+
+#
+# 'W', with and without '\n' in pattern space
+#
+
+# pattern-space with '\n', only 'a' should be written
+printf "%s\n" a b > in10 || framework_failure_
+echo a > a || framework_failure_
+sed 'N;Ww1.txt' in10 > out10 || fail=1
+compare_ a w1.txt || fail=1
+compare_ in10 out10 || fail=1
+
+# pattern-space without '\n', entire pattern-space ('a') should be written
+sed 'Ww2.txt' a > out11 || fail=1
+compare_ a out11 || fail=1
+compare_ a w2.txt || fail=1
+
+
+#
+# 'T' command
+#
+
+# Unsuccessful substitute, 'T' jumps to 'skip'.
+echo a | sed -n 's/X/Y/ ; Tskip ; Q42 ; :skip' || fail=1
+
+# Successful substitute, 'T' does not jumps to 'skip', sed exits with code 42.
+echo a | returns_ 42 sed -n 's/a/Y/ ; Tskip ; Q42 ; :skip' || fail=1
+
+
+#
+# 'F' command
+#
+echo a > in12 || framework_failure_
+printf "%s\n" in12 a > exp12 || framework_failure_
+sed F in12 > out12 || fail=1
+compare_ exp12 out12 || fail=1
+
+# 'F' with multiple files
+echo b > in13 || framework_failure_
+echo c > in14 || framework_failure_
+printf "%s\n" in12 a in13 b in14 c > exp14 || framework_failure_
+sed F in12 in13 in14 > out14 || fail=1
+compare_ exp14 out14 || fail=1
+
+# 'F' with stdin
+printf "%s\n" - a > exp15 || framework_failure_
+sed F < in12 > out15 || fail=1
+compare_ exp15 out15 || fail=1
+
+
+Exit $fail
diff --git a/testsuite/follow-symlinks-stdin.sh b/testsuite/follow-symlinks-stdin.sh
new file mode 100755
index 0000000..788729c
--- /dev/null
+++ b/testsuite/follow-symlinks-stdin.sh
@@ -0,0 +1,28 @@
+#!/bin/sh
+# Verify that --follow-symlinks does not break reading from stdin.
+
+# Copyright (C) 2015-2018 Free Software Foundation, Inc.
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+. "${srcdir=.}/testsuite/init.sh"; path_prepend_ ./sed
+print_ver_ sed
+
+echo dbc > exp-out || framework_failure_
+
+echo abc | sed --follow-symlinks s/a/d/ > out 2> err || fail=1
+
+compare exp-out out || fail=1
+compare /dev/null err || fail=1
+
+Exit $fail
diff --git a/testsuite/follow-symlinks.sh b/testsuite/follow-symlinks.sh
new file mode 100644
index 0000000..6abc822
--- /dev/null
+++ b/testsuite/follow-symlinks.sh
@@ -0,0 +1,72 @@
+#!/bin/sh
+# Test --follow-symlinks option
+
+# Copyright (C) 2016-2018 Free Software Foundation, Inc.
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+. "${srcdir=.}/testsuite/init.sh"; path_prepend_ ./sed
+print_ver_ sed
+
+#
+# 'F' with/out follow-symlinks
+#
+echo dummy > a || framework_failure_
+ln -s a la1 || framework_failure_
+ln -s la1 la2 || framework_failure_
+
+echo a > exp-a || framework_failure_
+echo la1 > exp-la1 || framework_failure_
+
+# Sanity-check: the real file
+sed 'F;Q' a > out-a || fail=1
+compare_ exp-a out-a || fail=1
+
+# Without follow-symlinks
+sed -n 'F' la1 > out-la1 || fail=1
+compare_ exp-la1 out-la1 || fail=1
+
+# With follow-symlinks
+sed -n --follow-symlinks 'F' la1 > out-la1-flw || fail=1
+compare_ exp-a out-la1-flw || fail=1
+
+# With follow-symlinks and two levels of indirections
+sed -n --follow-symlinks 'F' la2 > out-la2-flw || fail=1
+compare_ exp-a out-la2-flw || fail=1
+
+# Two symlinks input
+# (implementation note: utils.c:follow_symlinks() uses a static buffer
+# which will be non-empty on the second invocation)
+printf "%s\n" a a > exp-two-symlinks || framework_failure_
+sed --follow-symlinks -n 'F' la1 la2 > out-two-symlinks || fail=1
+compare_ exp-two-symlinks out-two-symlinks || fail=1
+
+# non-existing input with --follow-symlink
+# implementation note: lstat() will be called before open(), thus 'cannot stat'.
+cat <<\EOF >exp-stat || framework_failure_
+sed: cannot stat badfile:
+EOF
+returns_ 4 sed --follow-symlinks 'F' badfile >/dev/null 2>err-stat || fail=1
+
+# trim the filename/errno message (using sed itself...)
+sed -i 's/badfile:.*$/badfile:/' err-stat || framework_failure_
+compare_ exp-stat err-stat || fail=1
+
+
+# symlinks with absolute path
+ln -s "$PWD/a" la-abs || framework_failure_
+echo "$PWD/a" > exp-la-abs || framework_failure_
+sed -n --follow-symlinks 'F' la-abs > out-la-abs || fail=1
+compare_ exp-la-abs out-la-abs || fail=1
+
+Exit $fail
diff --git a/testsuite/get-mb-cur-max.c b/testsuite/get-mb-cur-max.c
new file mode 100644
index 0000000..6c34a88
--- /dev/null
+++ b/testsuite/get-mb-cur-max.c
@@ -0,0 +1,35 @@
+/* Auxiliary program to detect support for a locale.
+ Copyright 2010-2018 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; If not, see <https://www.gnu.org/licenses/>. */
+
+#include <config.h>
+#include <locale.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "progname.h"
+
+int
+main (int argc, char **argv)
+{
+ set_program_name (argv[0]);
+ if (1 < argc && setlocale (LC_ALL, argv[1]))
+ {
+ printf ("%d\n", (int) MB_CUR_MAX);
+ exit (EXIT_SUCCESS);
+ }
+
+ exit (EXIT_FAILURE);
+}
diff --git a/testsuite/help-version.sh b/testsuite/help-version.sh
new file mode 100755
index 0000000..d053cf1
--- /dev/null
+++ b/testsuite/help-version.sh
@@ -0,0 +1,276 @@
+#! /bin/sh
+# Make sure all of these programs work properly
+# when invoked with --help or --version.
+
+# Copyright (C) 2000-2018 Free Software Foundation, Inc.
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+
+# Ensure that $SHELL is set to *some* value and exported.
+# This is required for dircolors, which would fail e.g., when
+# invoked via debuild (which removes SHELL from the environment).
+test "x$SHELL" = x && SHELL=/bin/sh
+export SHELL
+
+. "${srcdir=.}/testsuite/init.sh"; path_prepend_ ./sed
+
+expected_failure_status_chroot=125
+expected_failure_status_env=125
+expected_failure_status_nice=125
+expected_failure_status_nohup=125
+expected_failure_status_stdbuf=125
+expected_failure_status_timeout=125
+expected_failure_status_printenv=2
+expected_failure_status_tty=3
+expected_failure_status_sort=2
+expected_failure_status_expr=3
+expected_failure_status_lbracket=2
+expected_failure_status_dir=2
+expected_failure_status_ls=2
+expected_failure_status_vdir=2
+
+expected_failure_status_cmp=2
+expected_failure_status_zcmp=2
+expected_failure_status_sdiff=2
+expected_failure_status_diff3=2
+expected_failure_status_diff=2
+expected_failure_status_zdiff=2
+expected_failure_status_zgrep=2
+expected_failure_status_zegrep=2
+expected_failure_status_zfgrep=2
+
+expected_failure_status_grep=2
+expected_failure_status_egrep=2
+expected_failure_status_fgrep=2
+
+expected_failure_status_sed=4
+
+test "$built_programs" \
+ || fail_ "built_programs not specified!?!"
+
+test "$VERSION" \
+ || fail_ "set envvar VERSION; it is required for a PATH sanity-check"
+
+# Extract version from --version output of the first program
+for i in $built_programs; do
+ v=$(env $i --version | sed -n '1s/.* //p;q')
+ break
+done
+
+# Ensure that it matches $VERSION.
+test "x$v" = "x$VERSION" \
+ || fail_ "--version-\$VERSION mismatch"
+
+for i in $built_programs; do
+
+ # Skip 'test'; it doesn't accept --help or --version.
+ test $i = test && continue
+
+ # false fails even when invoked with --help or --version.
+ # true and false are tested with these options separately.
+ test $i = false || test $i = true && continue
+
+ # The just-built install executable is always named 'ginstall'.
+ test $i = install && i=ginstall
+
+ # Make sure they exit successfully, under normal conditions.
+ env $i --help >/dev/null || fail=1
+ env $i --version >/dev/null || fail=1
+
+ # Make sure they fail upon 'disk full' error.
+ if test -w /dev/full && test -c /dev/full; then
+ env $i --help >/dev/full 2>/dev/null && fail=1
+ env $i --version >/dev/full 2>/dev/null && fail=1
+ status=$?
+ test $i = [ && prog=lbracket || prog=$(echo $i|sed "s/$EXEEXT$//")
+ eval "expected=\$expected_failure_status_$prog"
+ test x$expected = x && expected=1
+ if test $status = $expected; then
+ : # ok
+ else
+ fail=1
+ echo "*** $i: bad exit status '$status' (expected $expected)," 1>&2
+ echo " with --help or --version output redirected to /dev/full" 1>&2
+ fi
+ fi
+done
+
+bigZ_in=bigZ-in.Z
+zin=zin.gz
+zin2=zin2.gz
+
+tmp=tmp-$$
+tmp_in=in-$$
+tmp_in2=in2-$$
+tmp_dir=dir-$$
+tmp_out=out-$$
+mkdir $tmp || fail=1
+cd $tmp || fail=1
+
+comm_setup () { args="$tmp_in $tmp_in"; }
+csplit_setup () { args="$tmp_in //"; }
+cut_setup () { args='-f 1'; }
+join_setup () { args="$tmp_in $tmp_in"; }
+tr_setup () { args='a a'; }
+
+chmod_setup () { args="a+x $tmp_in"; }
+# Punt on these.
+chgrp_setup () { args=--version; }
+chown_setup () { args=--version; }
+mkfifo_setup () { args=--version; }
+mknod_setup () { args=--version; }
+# Punt on uptime, since it fails (e.g., failing to get boot time)
+# on some systems, and we shouldn't let that stop 'make check'.
+uptime_setup () { args=--version; }
+
+# Create a file in the current directory, not in $TMPDIR.
+mktemp_setup () { args=mktemp.XXXX; }
+
+cmp_setup () { args="$tmp_in $tmp_in2"; }
+
+# Tell dd not to print the line with transfer rate and total.
+# The transfer rate would vary between runs.
+dd_setup () { args=status=noxfer; }
+
+zdiff_setup () { args="$zin $zin2"; }
+zcmp_setup () { args="$zin $zin2"; }
+zcat_setup () { args=$zin; }
+gunzip_setup () { args=$zin; }
+zmore_setup () { args=$zin; }
+zless_setup () { args=$zin; }
+znew_setup () { args=$bigZ_in; }
+zforce_setup () { args=$zin; }
+zgrep_setup () { args="z $zin"; }
+zegrep_setup () { args="z $zin"; }
+zfgrep_setup () { args="z $zin"; }
+gzexe_setup () { args=$tmp_in; }
+
+# We know that $tmp_in contains a "0"
+grep_setup () { args="0 $tmp_in"; }
+egrep_setup () { args="0 $tmp_in"; }
+fgrep_setup () { args="0 $tmp_in"; }
+
+sed_setup () { args="s/a/b/ $tmp_in"; }
+
+diff_setup () { args="$tmp_in $tmp_in2"; }
+sdiff_setup () { args="$tmp_in $tmp_in2"; }
+diff3_setup () { args="$tmp_in $tmp_in2 $tmp_in2"; }
+cp_setup () { args="$tmp_in $tmp_in2"; }
+ln_setup () { args="$tmp_in ln-target"; }
+ginstall_setup () { args="$tmp_in $tmp_in2"; }
+mv_setup () { args="$tmp_in $tmp_in2"; }
+mkdir_setup () { args=$tmp_dir/subdir; }
+realpath_setup () { args=$tmp_in; }
+rmdir_setup () { args=$tmp_dir; }
+rm_setup () { args=$tmp_in; }
+shred_setup () { args=$tmp_in; }
+touch_setup () { args=$tmp_in2; }
+truncate_setup () { args="--reference=$tmp_in $tmp_in2"; }
+
+mkid_setup () { printf 'f(){}\ntypedef int t;\n' > f.c; args=. ; }
+lid_setup () { args=; }
+fid_setup () { args=f.c; }
+fnid_setup () { args=; }
+xtokid_setup () { args=; }
+aid_setup () { args=f; }
+eid_setup () { args=--version; }
+gid_setup () { args=f; }
+defid_setup () { args=t; }
+
+basename_setup () { args=$tmp_in; }
+dirname_setup () { args=$tmp_in; }
+expr_setup () { args=foo; }
+
+# Punt, in case GNU 'id' hasn't been installed yet.
+groups_setup () { args=--version; }
+
+pathchk_setup () { args=$tmp_in; }
+yes_setup () { args=--version; }
+logname_setup () { args=--version; }
+nohup_setup () { args=--version; }
+printf_setup () { args=foo; }
+seq_setup () { args=10; }
+sleep_setup () { args=0; }
+stdbuf_setup () { args="-oL true"; }
+timeout_setup () { args=--version; }
+
+# I'd rather not run sync, since it spins up disks that I've
+# deliberately caused to spin down (but not unmounted).
+sync_setup () { args=--version; }
+
+test_setup () { args=foo; }
+
+# This is necessary in the unusual event that there is
+# no valid entry in /etc/mtab.
+df_setup () { args=/; }
+
+# This is necessary in the unusual event that getpwuid (getuid ()) fails.
+id_setup () { args=-u; }
+
+# Use env to avoid invoking built-in sleep of Solaris 11's /bin/sh.
+kill_setup () {
+ env sleep 31.5 &
+ args=$!
+}
+
+link_setup () { args="$tmp_in link-target"; }
+unlink_setup () { args=$tmp_in; }
+
+readlink_setup () {
+ ln -s . slink
+ args=slink;
+}
+
+stat_setup () { args=$tmp_in; }
+unlink_setup () { args=$tmp_in; }
+lbracket_setup () { args=": ]"; }
+
+parted_setup () { args="-s $tmp_in mklabel gpt"
+ dd if=/dev/null of=$tmp_in seek=2000; }
+
+# Ensure that each program "works" (exits successfully) when doing
+# something more than --help or --version.
+for i in $built_programs; do
+ # Skip these.
+ case $i in chroot|stty|tty|false|chcon|runcon|coreutils) continue;; esac
+
+ rm -rf $tmp_in $tmp_in2 $tmp_dir $tmp_out $bigZ_in $zin $zin2
+ echo z |gzip > $zin
+ cp $zin $zin2
+ cp $zin $bigZ_in
+
+ # This is sort of kludgey: use numbers so this is valid input for factor,
+ # and two tokens so it's valid input for tsort.
+ echo 2147483647 0 > $tmp_in
+ # Make $tmp_in2 identical. Then, using $tmp_in and $tmp_in2 as arguments
+ # to the likes of cmp and diff makes them exit successfully.
+ cp $tmp_in $tmp_in2
+ mkdir $tmp_dir
+ # echo ================== $i
+ test $i = [ && prog=lbracket || prog=$(echo $i|sed "s/$EXEEXT$//")
+ if type ${prog}_setup > /dev/null 2>&1; then
+ ${prog}_setup
+ else
+ args=
+ fi
+ if env $i $args < $tmp_in > $tmp_out; then
+ : # ok
+ else
+ echo FAIL: $i
+ fail=1
+ fi
+ rm -rf $tmp_in $tmp_in2 $tmp_out $tmp_dir
+done
+
+Exit $fail
diff --git a/testsuite/help.sh b/testsuite/help.sh
new file mode 100644
index 0000000..bddaf08
--- /dev/null
+++ b/testsuite/help.sh
@@ -0,0 +1,38 @@
+#!/bin/sh
+
+# Test --help screen
+
+# Copyright (C) 2017-2018 Free Software Foundation, Inc.
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+. "${srcdir=.}/testsuite/init.sh"; path_prepend_ ./sed
+print_ver_ sed
+
+# Help screen should include the contact email address
+sed --help | grep E-mail > /dev/null 2>&1 || fail=1
+
+
+# With explicit --help - show usage then email at the bottom.
+# With missing parameters - show the usage without the email.
+# Ensure these are identical (except for the email).
+sed --help \
+ | sed '1s/ [^ ]* / sed /; /^E-mail/,$d' > help-out1
+
+sed 2>&1 \
+ | sed '1s/ [^ ]* / sed /' > help-out2
+
+compare help-out1 help-out2 || fail=1
+
+
+Exit $fail
diff --git a/testsuite/in-place-hyphen.sh b/testsuite/in-place-hyphen.sh
new file mode 100755
index 0000000..8a97ba3
--- /dev/null
+++ b/testsuite/in-place-hyphen.sh
@@ -0,0 +1,29 @@
+#!/bin/sh
+# Verify that "-" is treated as a file name with --in-place.
+
+# Copyright (C) 2015-2018 Free Software Foundation, Inc.
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+. "${srcdir=.}/testsuite/init.sh"; path_prepend_ ./sed
+print_ver_ sed
+
+echo abc > ./- || framework_failure_
+echo aXc > exp-out || framework_failure_
+
+sed -i 's/b/X/' - > out 2> err || fail=1
+
+compare exp-out ./- || fail=1
+compare /dev/null err || fail=1
+
+Exit $fail
diff --git a/testsuite/in-place-suffix-backup.sh b/testsuite/in-place-suffix-backup.sh
new file mode 100644
index 0000000..bc7defc
--- /dev/null
+++ b/testsuite/in-place-suffix-backup.sh
@@ -0,0 +1,64 @@
+#!/bin/sh
+# Test -i/--inplace with backup suffixes
+
+# Copyright (C) 2016-2018 Free Software Foundation, Inc.
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+. "${srcdir=.}/testsuite/init.sh"; path_prepend_ ./sed
+print_ver_ sed
+
+
+# create multiple uniquely-named input files
+# (the content does not matter for the first few)
+touch a b c d e || framework_failure_
+echo z > z || framework_failure_
+printf "1\nz\n" >> exp-z || framework_failure_
+
+
+# TODO: misleading error: the problem is the target filename of rename(2),
+# not the source filename.
+cat <<\EOF >exp-err-rename || framework_failure_
+sed: cannot rename ./e: No such file or directory
+EOF
+
+
+# simple backup suffix
+sed -i.bak = a || fail=1
+test -e a.bak || fail=1
+
+# backup suffix with explicit wildcard
+sed -i'*.foo' = b || fail=1
+test -e b.foo || fail=1
+
+sed -i'==*==' = c || fail=1
+test -e ==c== || fail=1
+
+# abuse the suffix-name resolver
+sed -i'*=*' = d || fail=1
+test -e d=d || fail=1
+
+# This fails (as expected, with the backup name resolving './e./e./e').
+# TODO: improve error message;
+# document why exit code is 4.
+returns_ 4 sed -i'***' = ./e 2>err-rename || fail=1
+compare_ exp-err-rename err-rename || fail=1
+
+# backup filename resolving to the same as the input filename,
+# silently ignored, backup not created (in execute.c:closedown() ).
+sed -i'*' = z || fail=1
+# ensure the input file was modified in-place
+compare_ exp-z z || fail=1
+
+
+Exit $fail
diff --git a/testsuite/init.sh b/testsuite/init.sh
new file mode 100644
index 0000000..93d14f5
--- /dev/null
+++ b/testsuite/init.sh
@@ -0,0 +1,618 @@
+# source this file; set up for tests
+
+# Copyright (C) 2009-2018 Free Software Foundation, Inc.
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+
+# Using this file in a test
+# =========================
+#
+# The typical skeleton of a test looks like this:
+#
+# #!/bin/sh
+# . "${srcdir=.}/init.sh"; path_prepend_ .
+# Execute some commands.
+# Note that these commands are executed in a subdirectory, therefore you
+# need to prepend "../" to relative filenames in the build directory.
+# Note that the "path_prepend_ ." is useful only if the body of your
+# test invokes programs residing in the initial directory.
+# For example, if the programs you want to test are in src/, and this test
+# script is named tests/test-1, then you would use "path_prepend_ ../src",
+# or perhaps export PATH='$(abs_top_builddir)/src$(PATH_SEPARATOR)'"$$PATH"
+# to all tests via automake's TESTS_ENVIRONMENT.
+# Set the exit code 0 for success, 77 for skipped, or 1 or other for failure.
+# Use the skip_ and fail_ functions to print a diagnostic and then exit
+# with the corresponding exit code.
+# Exit $?
+
+# Executing a test that uses this file
+# ====================================
+#
+# Running a single test:
+# $ make check TESTS=test-foo.sh
+#
+# Running a single test, with verbose output:
+# $ make check TESTS=test-foo.sh VERBOSE=yes
+#
+# Running a single test, keeping the temporary directory:
+# $ make check TESTS=test-foo.sh KEEP=yes
+#
+# Running a single test, with single-stepping:
+# 1. Go into a sub-shell:
+# $ bash
+# 2. Set relevant environment variables from TESTS_ENVIRONMENT in the
+# Makefile:
+# $ export srcdir=../../tests # this is an example
+# 3. Execute the commands from the test, copy&pasting them one by one:
+# $ . "$srcdir/init.sh"; path_prepend_ .
+# ...
+# 4. Finally
+# $ exit
+
+ME_=`expr "./$0" : '.*/\(.*\)$'`
+
+# Prepare PATH_SEPARATOR.
+# The user is always right.
+if test "${PATH_SEPARATOR+set}" != set; then
+ # Determine PATH_SEPARATOR by trying to find /bin/sh in a PATH which
+ # contains only /bin. Note that ksh looks also at the FPATH variable,
+ # so we have to set that as well for the test.
+ PATH_SEPARATOR=:
+ (PATH='/bin;/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 \
+ && { (PATH='/bin:/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 \
+ || PATH_SEPARATOR=';'
+ }
+fi
+
+# We use a trap below for cleanup. This requires us to go through
+# hoops to get the right exit status transported through the handler.
+# So use 'Exit STATUS' instead of 'exit STATUS' inside of the tests.
+# Turn off errexit here so that we don't trip the bug with OSF1/Tru64
+# sh inside this function.
+Exit () { set +e; (exit $1); exit $1; }
+
+# Print warnings (e.g., about skipped and failed tests) to this file number.
+# Override by defining to say, 9, in init.cfg, and putting say,
+# export ...ENVVAR_SETTINGS...; $(SHELL) 9>&2
+# in the definition of TESTS_ENVIRONMENT in your tests/Makefile.am file.
+# This is useful when using automake's parallel tests mode, to print
+# the reason for skip/failure to console, rather than to the .log files.
+: ${stderr_fileno_=2}
+
+# Note that correct expansion of "$*" depends on IFS starting with ' '.
+# Always write the full diagnostic to stderr.
+# When stderr_fileno_ is not 2, also emit the first line of the
+# diagnostic to that file descriptor.
+warn_ ()
+{
+ # If IFS does not start with ' ', set it and emit the warning in a subshell.
+ case $IFS in
+ ' '*) printf '%s\n' "$*" >&2
+ test $stderr_fileno_ = 2 \
+ || { printf '%s\n' "$*" | sed 1q >&$stderr_fileno_ ; } ;;
+ *) (IFS=' '; warn_ "$@");;
+ esac
+}
+fail_ () { warn_ "$ME_: failed test: $@"; Exit 1; }
+skip_ () { warn_ "$ME_: skipped test: $@"; Exit 77; }
+fatal_ () { warn_ "$ME_: hard error: $@"; Exit 99; }
+framework_failure_ () { warn_ "$ME_: set-up failure: $@"; Exit 99; }
+
+# This is used to simplify checking of the return value
+# which is useful when ensuring a command fails as desired.
+# I.e., just doing `command ... &&fail=1` will not catch
+# a segfault in command for example. With this helper you
+# instead check an explicit exit code like
+# returns_ 1 command ... || fail
+returns_ () {
+ # Disable tracing so it doesn't interfere with stderr of the wrapped command
+ { set +x; } 2>/dev/null
+
+ local exp_exit="$1"
+ shift
+ "$@"
+ test $? -eq $exp_exit && ret_=0 || ret_=1
+
+ if test "$VERBOSE" = yes && test "$gl_set_x_corrupts_stderr_" = false; then
+ set -x
+ fi
+ { return $ret_; } 2>/dev/null
+}
+
+# Sanitize this shell to POSIX mode, if possible.
+DUALCASE=1; export DUALCASE
+if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then
+ emulate sh
+ NULLCMD=:
+ alias -g '${1+"$@"}'='"$@"'
+ setopt NO_GLOB_SUBST
+else
+ case `(set -o) 2>/dev/null` in
+ *posix*) set -o posix ;;
+ esac
+fi
+
+# We require $(...) support unconditionally.
+# We require non-surprising "local" semantics (this eliminates dash).
+# This takes the admittedly draconian step of eliminating dash, because the
+# assignment tab=$(printf '\t') works fine, yet preceding it with "local "
+# transforms it into an assignment that sets the variable to the empty string.
+# That is too counter-intuitive, and can lead to subtle run-time malfunction.
+# The example below is less subtle in that with dash, it evokes the run-time
+# exception "dash: 1: local: 1: bad variable name".
+# We require a few additional shell features only when $EXEEXT is nonempty,
+# in order to support automatic $EXEEXT emulation:
+# - hyphen-containing alias names
+# - we prefer to use ${var#...} substitution, rather than having
+# to work around lack of support for that feature.
+# The following code attempts to find a shell with support for these features.
+# If the current shell passes the test, we're done. Otherwise, test other
+# shells until we find one that passes. If one is found, re-exec it.
+# If no acceptable shell is found, skip the current test.
+#
+# The "...set -x; P=1 true 2>err..." test is to disqualify any shell that
+# emits "P=1" into err, as /bin/sh from SunOS 5.11 and OpenBSD 4.7 do.
+#
+# Use "9" to indicate success (rather than 0), in case some shell acts
+# like Solaris 10's /bin/sh but exits successfully instead of with status 2.
+
+# Eval this code in a subshell to determine a shell's suitability.
+# 10 - passes all tests; ok to use
+# 9 - ok, but enabling "set -x" corrupts app stderr; prefer higher score
+# ? - not ok
+gl_shell_test_script_='
+test $(echo y) = y || exit 1
+f_local_() { local v=1; }; f_local_ || exit 1
+f_dash_local_fail_() { local t=$(printf " 1"); }; f_dash_local_fail_
+score_=10
+if test "$VERBOSE" = yes; then
+ test -n "$( (exec 3>&1; set -x; P=1 true 2>&3) 2> /dev/null)" && score_=9
+fi
+test -z "$EXEEXT" && exit $score_
+shopt -s expand_aliases
+alias a-b="echo zoo"
+v=abx
+ test ${v%x} = ab \
+ && test ${v#a} = bx \
+ && test $(a-b) = zoo \
+ && exit $score_
+'
+
+if test "x$1" = "x--no-reexec"; then
+ shift
+else
+ # Assume a working shell. Export to subshells (setup_ needs this).
+ gl_set_x_corrupts_stderr_=false
+ export gl_set_x_corrupts_stderr_
+
+ # Record the first marginally acceptable shell.
+ marginal_=
+
+ # Search for a shell that meets our requirements.
+ for re_shell_ in __current__ "${CONFIG_SHELL:-no_shell}" \
+ /bin/sh bash dash zsh pdksh fail
+ do
+ test "$re_shell_" = no_shell && continue
+
+ # If we've made it all the way to the sentinel, "fail" without
+ # finding even a marginal shell, skip this test.
+ if test "$re_shell_" = fail; then
+ test -z "$marginal_" && skip_ failed to find an adequate shell
+ re_shell_=$marginal_
+ break
+ fi
+
+ # When testing the current shell, simply "eval" the test code.
+ # Otherwise, run it via $re_shell_ -c ...
+ if test "$re_shell_" = __current__; then
+ # 'eval'ing this code makes Solaris 10's /bin/sh exit with
+ # $? set to 2. It does not evaluate any of the code after the
+ # "unexpected" first '('. Thus, we must run it in a subshell.
+ ( eval "$gl_shell_test_script_" ) > /dev/null 2>&1
+ else
+ "$re_shell_" -c "$gl_shell_test_script_" 2>/dev/null
+ fi
+
+ st_=$?
+
+ # $re_shell_ works just fine. Use it.
+ if test $st_ = 10; then
+ gl_set_x_corrupts_stderr_=false
+ break
+ fi
+
+ # If this is our first marginally acceptable shell, remember it.
+ if test "$st_:$marginal_" = 9: ; then
+ marginal_="$re_shell_"
+ gl_set_x_corrupts_stderr_=true
+ fi
+ done
+
+ if test "$re_shell_" != __current__; then
+ # Found a usable shell. Preserve -v and -x.
+ case $- in
+ *v*x* | *x*v*) opts_=-vx ;;
+ *v*) opts_=-v ;;
+ *x*) opts_=-x ;;
+ *) opts_= ;;
+ esac
+ re_shell=$re_shell_
+ export re_shell
+ exec "$re_shell_" $opts_ "$0" --no-reexec "$@"
+ echo "$ME_: exec failed" 1>&2
+ exit 127
+ fi
+fi
+
+# If this is bash, turn off all aliases.
+test -n "$BASH_VERSION" && unalias -a
+
+# Note that when supporting $EXEEXT (transparently mapping from PROG_NAME to
+# PROG_NAME.exe), we want to support hyphen-containing names like test-acos.
+# That is part of the shell-selection test above. Why use aliases rather
+# than functions? Because support for hyphen-containing aliases is more
+# widespread than that for hyphen-containing function names.
+test -n "$EXEEXT" && test -n "$BASH_VERSION" && shopt -s expand_aliases
+
+# Enable glibc's malloc-perturbing option.
+# This is useful for exposing code that depends on the fact that
+# malloc-related functions often return memory that is mostly zeroed.
+# If you have the time and cycles, use valgrind to do an even better job.
+: ${MALLOC_PERTURB_=87}
+export MALLOC_PERTURB_
+
+# This is a stub function that is run upon trap (upon regular exit and
+# interrupt). Override it with a per-test function, e.g., to unmount
+# a partition, or to undo any other global state changes.
+cleanup_ () { :; }
+
+# Emit a header similar to that from diff -u; Print the simulated "diff"
+# command so that the order of arguments is clear. Don't bother with @@ lines.
+emit_diff_u_header_ ()
+{
+ printf '%s\n' "diff -u $*" \
+ "--- $1 1970-01-01" \
+ "+++ $2 1970-01-01"
+}
+
+# Arrange not to let diff or cmp operate on /dev/null,
+# since on some systems (at least OSF/1 5.1), that doesn't work.
+# When there are not two arguments, or no argument is /dev/null, return 2.
+# When one argument is /dev/null and the other is not empty,
+# cat the nonempty file to stderr and return 1.
+# Otherwise, return 0.
+compare_dev_null_ ()
+{
+ test $# = 2 || return 2
+
+ if test "x$1" = x/dev/null; then
+ test -s "$2" || return 0
+ emit_diff_u_header_ "$@"; sed 's/^/+/' "$2"
+ return 1
+ fi
+
+ if test "x$2" = x/dev/null; then
+ test -s "$1" || return 0
+ emit_diff_u_header_ "$@"; sed 's/^/-/' "$1"
+ return 1
+ fi
+
+ return 2
+}
+
+for diff_opt_ in -u -U3 -c '' no; do
+ test "$diff_opt_" != no &&
+ diff_out_=`exec 2>/dev/null; diff $diff_opt_ "$0" "$0" < /dev/null` &&
+ break
+done
+if test "$diff_opt_" != no; then
+ if test -z "$diff_out_"; then
+ compare_ () { diff $diff_opt_ "$@"; }
+ else
+ compare_ ()
+ {
+ # If no differences were found, AIX and HP-UX 'diff' produce output
+ # like "No differences encountered". Hide this output.
+ diff $diff_opt_ "$@" > diff.out
+ diff_status_=$?
+ test $diff_status_ -eq 0 || cat diff.out || diff_status_=2
+ rm -f diff.out || diff_status_=2
+ return $diff_status_
+ }
+ fi
+elif cmp -s /dev/null /dev/null 2>/dev/null; then
+ compare_ () { cmp -s "$@"; }
+else
+ compare_ () { cmp "$@"; }
+fi
+
+# Usage: compare EXPECTED ACTUAL
+#
+# Given compare_dev_null_'s preprocessing, defer to compare_ if 2 or more.
+# Otherwise, propagate $? to caller: any diffs have already been printed.
+compare ()
+{
+ # This looks like it can be factored to use a simple "case $?"
+ # after unchecked compare_dev_null_ invocation, but that would
+ # fail in a "set -e" environment.
+ if compare_dev_null_ "$@"; then
+ return 0
+ else
+ case $? in
+ 1) return 1;;
+ *) compare_ "$@";;
+ esac
+ fi
+}
+
+# An arbitrary prefix to help distinguish test directories.
+testdir_prefix_ () { printf gt; }
+
+# Run the user-overridable cleanup_ function, remove the temporary
+# directory and exit with the incoming value of $?.
+remove_tmp_ ()
+{
+ __st=$?
+ cleanup_
+ if test "$KEEP" = yes; then
+ echo "Not removing temporary directory $test_dir_"
+ else
+ # cd out of the directory we're about to remove
+ cd "$initial_cwd_" || cd / || cd /tmp
+ chmod -R u+rwx "$test_dir_"
+ # If removal fails and exit status was to be 0, then change it to 1.
+ rm -rf "$test_dir_" || { test $__st = 0 && __st=1; }
+ fi
+ exit $__st
+}
+
+# Given a directory name, DIR, if every entry in it that matches *.exe
+# contains only the specified bytes (see the case stmt below), then print
+# a space-separated list of those names and return 0. Otherwise, don't
+# print anything and return 1. Naming constraints apply also to DIR.
+find_exe_basenames_ ()
+{
+ feb_dir_=$1
+ feb_fail_=0
+ feb_result_=
+ feb_sp_=
+ for feb_file_ in $feb_dir_/*.exe; do
+ # If there was no *.exe file, or there existed a file named "*.exe" that
+ # was deleted between the above glob expansion and the existence test
+ # below, just skip it.
+ test "x$feb_file_" = "x$feb_dir_/*.exe" && test ! -f "$feb_file_" \
+ && continue
+ # Exempt [.exe, since we can't create a function by that name, yet
+ # we can't invoke [ by PATH search anyways due to shell builtins.
+ test "x$feb_file_" = "x$feb_dir_/[.exe" && continue
+ case $feb_file_ in
+ *[!-a-zA-Z/0-9_.+]*) feb_fail_=1; break;;
+ *) # Remove leading file name components as well as the .exe suffix.
+ feb_file_=${feb_file_##*/}
+ feb_file_=${feb_file_%.exe}
+ feb_result_="$feb_result_$feb_sp_$feb_file_";;
+ esac
+ feb_sp_=' '
+ done
+ test $feb_fail_ = 0 && printf %s "$feb_result_"
+ return $feb_fail_
+}
+
+# Consider the files in directory, $1.
+# For each file name of the form PROG.exe, create an alias named
+# PROG that simply invokes PROG.exe, then return 0. If any selected
+# file name or the directory name, $1, contains an unexpected character,
+# define no alias and return 1.
+create_exe_shims_ ()
+{
+ case $EXEEXT in
+ '') return 0 ;;
+ .exe) ;;
+ *) echo "$0: unexpected \$EXEEXT value: $EXEEXT" 1>&2; return 1 ;;
+ esac
+
+ base_names_=`find_exe_basenames_ $1` \
+ || { echo "$0 (exe_shim): skipping directory: $1" 1>&2; return 0; }
+
+ if test -n "$base_names_"; then
+ for base_ in $base_names_; do
+ alias "$base_"="$base_$EXEEXT"
+ done
+ fi
+
+ return 0
+}
+
+# Use this function to prepend to PATH an absolute name for each
+# specified, possibly-$initial_cwd_-relative, directory.
+path_prepend_ ()
+{
+ while test $# != 0; do
+ path_dir_=$1
+ case $path_dir_ in
+ '') fail_ "invalid path dir: '$1'";;
+ /* | ?:*) abs_path_dir_=$path_dir_;;
+ *) abs_path_dir_=$initial_cwd_/$path_dir_;;
+ esac
+ case $abs_path_dir_ in
+ *$PATH_SEPARATOR*) fail_ "invalid path dir: '$abs_path_dir_'";;
+ esac
+ PATH="$abs_path_dir_$PATH_SEPARATOR$PATH"
+
+ # Create an alias, FOO, for each FOO.exe in this directory.
+ create_exe_shims_ "$abs_path_dir_" \
+ || fail_ "something failed (above): $abs_path_dir_"
+ shift
+ done
+ export PATH
+}
+
+setup_ ()
+{
+ if test "$VERBOSE" = yes; then
+ # Test whether set -x may cause the selected shell to corrupt an
+ # application's stderr. Many do, including zsh-4.3.10 and the /bin/sh
+ # from SunOS 5.11, OpenBSD 4.7 and Irix 5.x and 6.5.
+ # If enabling verbose output this way would cause trouble, simply
+ # issue a warning and refrain.
+ if $gl_set_x_corrupts_stderr_; then
+ warn_ "using SHELL=$SHELL with 'set -x' corrupts stderr"
+ else
+ set -x
+ fi
+ fi
+
+ initial_cwd_=$PWD
+
+ pfx_=`testdir_prefix_`
+ test_dir_=`mktempd_ "$initial_cwd_" "$pfx_-$ME_.XXXX"` \
+ || fail_ "failed to create temporary directory in $initial_cwd_"
+ cd "$test_dir_" || fail_ "failed to cd to temporary directory"
+
+ # As autoconf-generated configure scripts do, ensure that IFS
+ # is defined initially, so that saving and restoring $IFS works.
+ gl_init_sh_nl_='
+'
+ IFS=" "" $gl_init_sh_nl_"
+
+ # This trap statement, along with a trap on 0 below, ensure that the
+ # temporary directory, $test_dir_, is removed upon exit as well as
+ # upon receipt of any of the listed signals.
+ for sig_ in 1 2 3 13 15; do
+ eval "trap 'Exit $(expr $sig_ + 128)' $sig_"
+ done
+}
+
+# Create a temporary directory, much like mktemp -d does.
+# Written by Jim Meyering.
+#
+# Usage: mktempd_ /tmp phoey.XXXXXXXXXX
+#
+# First, try to use the mktemp program.
+# Failing that, we'll roll our own mktemp-like function:
+# - try to get random bytes from /dev/urandom
+# - failing that, generate output from a combination of quickly-varying
+# sources and gzip. Ignore non-varying gzip header, and extract
+# "random" bits from there.
+# - given those bits, map to file-name bytes using tr, and try to create
+# the desired directory.
+# - make only $MAX_TRIES_ attempts
+
+# Helper function. Print $N pseudo-random bytes from a-zA-Z0-9.
+rand_bytes_ ()
+{
+ n_=$1
+
+ # Maybe try openssl rand -base64 $n_prime_|tr '+/=\012' abcd first?
+ # But if they have openssl, they probably have mktemp, too.
+
+ chars_=abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789
+ dev_rand_=/dev/urandom
+ if test -r "$dev_rand_"; then
+ # Note: 256-length($chars_) == 194; 3 copies of $chars_ is 186 + 8 = 194.
+ dd ibs=$n_ count=1 if=$dev_rand_ 2>/dev/null \
+ | LC_ALL=C tr -c $chars_ 01234567$chars_$chars_$chars_
+ return
+ fi
+
+ n_plus_50_=`expr $n_ + 50`
+ cmds_='date; date +%N; free; who -a; w; ps auxww; ps -ef'
+ data_=` (eval "$cmds_") 2>&1 | gzip `
+
+ # Ensure that $data_ has length at least 50+$n_
+ while :; do
+ len_=`echo "$data_"|wc -c`
+ test $n_plus_50_ -le $len_ && break;
+ data_=` (echo "$data_"; eval "$cmds_") 2>&1 | gzip `
+ done
+
+ echo "$data_" \
+ | dd bs=1 skip=50 count=$n_ 2>/dev/null \
+ | LC_ALL=C tr -c $chars_ 01234567$chars_$chars_$chars_
+}
+
+mktempd_ ()
+{
+ case $# in
+ 2);;
+ *) fail_ "Usage: mktempd_ DIR TEMPLATE";;
+ esac
+
+ destdir_=$1
+ template_=$2
+
+ MAX_TRIES_=4
+
+ # Disallow any trailing slash on specified destdir:
+ # it would subvert the post-mktemp "case"-based destdir test.
+ case $destdir_ in
+ / | //) destdir_slash_=$destdir;;
+ */) fail_ "invalid destination dir: remove trailing slash(es)";;
+ *) destdir_slash_=$destdir_/;;
+ esac
+
+ case $template_ in
+ *XXXX) ;;
+ *) fail_ \
+ "invalid template: $template_ (must have a suffix of at least 4 X's)";;
+ esac
+
+ # First, try to use mktemp.
+ d=`unset TMPDIR; { mktemp -d -t -p "$destdir_" "$template_"; } 2>/dev/null` &&
+
+ # The resulting name must be in the specified directory.
+ case $d in "$destdir_slash_"*) :;; *) false;; esac &&
+
+ # It must have created the directory.
+ test -d "$d" &&
+
+ # It must have 0700 permissions. Handle sticky "S" bits.
+ perms=`ls -dgo "$d" 2>/dev/null` &&
+ case $perms in drwx--[-S]---*) :;; *) false;; esac && {
+ echo "$d"
+ return
+ }
+
+ # If we reach this point, we'll have to create a directory manually.
+
+ # Get a copy of the template without its suffix of X's.
+ base_template_=`echo "$template_"|sed 's/XX*$//'`
+
+ # Calculate how many X's we've just removed.
+ template_length_=`echo "$template_" | wc -c`
+ nx_=`echo "$base_template_" | wc -c`
+ nx_=`expr $template_length_ - $nx_`
+
+ err_=
+ i_=1
+ while :; do
+ X_=`rand_bytes_ $nx_`
+ candidate_dir_="$destdir_slash_$base_template_$X_"
+ err_=`mkdir -m 0700 "$candidate_dir_" 2>&1` \
+ && { echo "$candidate_dir_"; return; }
+ test $MAX_TRIES_ -le $i_ && break;
+ i_=`expr $i_ + 1`
+ done
+ fail_ "$err_"
+}
+
+# If you want to override the testdir_prefix_ function,
+# or to add more utility functions, use this file.
+test -f "$srcdir/init.cfg" \
+ && . "$srcdir/init.cfg"
+
+setup_ "$@"
+# This trap is here, rather than in the setup_ function, because some
+# shells run the exit trap at shell function exit, rather than script exit.
+trap remove_tmp_ 0
diff --git a/testsuite/inplace-hold.sh b/testsuite/inplace-hold.sh
new file mode 100644
index 0000000..1bc293c
--- /dev/null
+++ b/testsuite/inplace-hold.sh
@@ -0,0 +1,32 @@
+#!/bin/sh
+
+# Test runner for old 'inplace-hold' test
+
+# Copyright (C) 2017-2018 Free Software Foundation, Inc.
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+. "${srcdir=.}/testsuite/init.sh"; path_prepend_ ./sed
+print_ver_ sed
+
+echo > inplace-hold-exp || framework_failure_
+
+echo x > inplace-hold-out1 || framework_failure_
+echo y > inplace-hold-out2 || framework_failure_
+
+sed -i x inplace-hold-out1 inplace-hold-out2 || fail=1
+
+compare inplace-hold-exp inplace-hold-out1 || fail=1
+compare inplace-hold-exp inplace-hold-out2 || fail=1
+
+Exit $fail
diff --git a/testsuite/inplace-selinux.sh b/testsuite/inplace-selinux.sh
new file mode 100755
index 0000000..783c3de
--- /dev/null
+++ b/testsuite/inplace-selinux.sh
@@ -0,0 +1,51 @@
+#!/bin/sh
+
+# Copyright (C) 2017-2018 Free Software Foundation, Inc.
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+. "${srcdir=.}/testsuite/init.sh"; path_prepend_ ./sed
+print_ver_ sed
+require_selinux_
+
+touch a || framework_failure_
+chcon -u system_u a || skip_ "chcon doesn't work"
+chcon -u user_u a || skip_ "chcon doesn't work"
+
+# Create the first file and symlink pointing at it.
+echo "Hello World" > inplace-selinux-file || framework_failure_
+ln -s ./inplace-selinux-file inplace-selinux-link || framework_failure_
+
+chcon -h -u system_u inplace-selinux-file || framework_failure_
+chcon -h -u user_u inplace-selinux-link || framework_failure_
+
+
+# Create the second file and symlink pointing at it.
+# These will be used with the --follow-symlink option.
+echo "Hello World" > inplace-selinux-file2 || framework_failure_
+ln -s ./inplace-selinux-file2 inplace-selinux-link2 || framework_failure_
+
+chcon -h -u system_u inplace-selinux-file2 || framework_failure_
+chcon -h -u user_u inplace-selinux-link2 || framework_failure_
+
+# Modify prepared files inplace via the symlinks
+sed -i -e "s~Hello~Hi~" inplace-selinux-link || fail=1
+sed -i --follow-symlinks -e "s~Hello~Hi~" inplace-selinux-link2 || fail=1
+
+# Check selinux context - the first file should be created with the context
+# of the symlink...
+ls -Z inplace-selinux-link | grep user_u: || fail=1
+# ...the second file should use the context of the file itself.
+ls -Z inplace-selinux-file2 | grep system_u: || fail=1
+
+Exit $fail
diff --git a/testsuite/invalid-mb-seq-UMR.sh b/testsuite/invalid-mb-seq-UMR.sh
new file mode 100755
index 0000000..196db65
--- /dev/null
+++ b/testsuite/invalid-mb-seq-UMR.sh
@@ -0,0 +1,42 @@
+#!/bin/sh
+# Inserting an invalid multibyte sequence could lead to
+# reading uninitialized memory.
+
+# Copyright (C) 2015-2018 Free Software Foundation, Inc.
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+. "${srcdir=.}/testsuite/init.sh"; path_prepend_ ./sed
+print_ver_ sed
+
+require_valgrind_
+
+test "$LOCALE_JA" = none && skip_ found no Japanese EUC locale
+
+# Ensure the implementation is not buggy (skip otherwise)
+require_valid_ja_eucjp_locale_ "$LOCALE_JA"
+
+echo a > in || framework_failure_
+printf 'b\262C\n' > exp || framework_failure_
+LC_ALL=$LOCALE_JA valgrind --quiet --error-exitcode=1 \
+ sed 's/a/b\U\xb2c/' in > out 2> err || fail=1
+
+# Work around a bug in CentOS 5.10's valgrind
+# FIXME: remove in 2018 or when CentOS 5 is no longer officially supported
+grep 'valgrind: .*Assertion.*failed' err > /dev/null \
+ && skip_ 'you seem to have a buggy version of valgrind'
+
+compare exp out || fail=1
+compare /dev/null err || fail=1
+
+Exit $fail
diff --git a/testsuite/local.mk b/testsuite/local.mk
new file mode 100644
index 0000000..43623bf
--- /dev/null
+++ b/testsuite/local.mk
@@ -0,0 +1,212 @@
+# Copyright (C) 2016-2018 Free Software Foundation, Inc.
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+
+CLEANFILES += tmp* core *.core $(EXTRA_PROGRAMS) *.*out *.log
+
+TEST_EXTENSIONS = .sh .pl
+
+if HAVE_PERL
+TESTSUITE_PERL = $(PERL)
+else
+TESTSUITE_PERL = $(SHELL) $(srcdir)/no-perl
+endif
+
+# Options passed to the perl invocations running the perl test scripts.
+TESTSUITE_PERL_OPTIONS = -w -I$(srcdir)/testsuite -MCuSkip -MCoreutils
+# '$f' is set by the Automake-generated test harness to the path of the
+# current test script stripped of VPATH components, and is used by the
+# CuTmpdir module to determine the name of the temporary files to be
+# used. Note that $f is a shell variable, not a make macro, so the use
+# of '$$f' below is correct, and not a typo.
+TESTSUITE_PERL_OPTIONS += -M"CuTmpdir qw($$f)"
+
+SH_LOG_COMPILER = $(SHELL)
+PL_LOG_COMPILER = $(TESTSUITE_PERL) $(TESTSUITE_PERL_OPTIONS)
+
+# Ensure that anything not covered by the above evokes failure.
+LOG_COMPILER = false
+
+# Put new, init.sh-using tests here, so that each name
+# is listed in only one place.
+
+T = \
+ testsuite/misc.pl \
+ testsuite/bug32082.sh \
+ testsuite/bug32271-1.sh \
+ testsuite/bug32271-2.sh \
+ testsuite/cmd-l.sh \
+ testsuite/cmd-R.sh \
+ testsuite/colon-with-no-label.sh \
+ testsuite/comment-n.sh \
+ testsuite/compile-errors.sh \
+ testsuite/compile-tests.sh \
+ testsuite/convert-number.sh \
+ testsuite/command-endings.sh \
+ testsuite/debug.pl \
+ testsuite/execute-tests.sh \
+ testsuite/help-version.sh \
+ testsuite/in-place-hyphen.sh \
+ testsuite/in-place-suffix-backup.sh \
+ testsuite/inplace-selinux.sh \
+ testsuite/invalid-mb-seq-UMR.sh \
+ testsuite/mb-bad-delim.sh \
+ testsuite/mb-charclass-non-utf8.sh \
+ testsuite/mb-match-slash.sh \
+ testsuite/mb-y-translate.sh \
+ testsuite/missing-filename.sh \
+ testsuite/newline-dfa-bug.sh \
+ testsuite/normalize-text.sh \
+ testsuite/nulldata.sh \
+ testsuite/obinary.sh \
+ testsuite/panic-tests.sh \
+ testsuite/posix-char-class.sh \
+ testsuite/posix-mode-addr.sh \
+ testsuite/posix-mode-bad-ref.sh \
+ testsuite/posix-mode-ERE.sh \
+ testsuite/posix-mode-s.sh \
+ testsuite/posix-mode-N.sh \
+ testsuite/range-overlap.sh \
+ testsuite/recursive-escape-c.sh \
+ testsuite/regex-errors.sh \
+ testsuite/regex-max-int.sh \
+ testsuite/sandbox.sh \
+ testsuite/stdin-prog.sh \
+ testsuite/subst-options.sh \
+ testsuite/subst-mb-incomplete.sh \
+ testsuite/subst-replacement.sh \
+ testsuite/temp-file-cleanup.sh \
+ testsuite/title-case.sh \
+ testsuite/unbuffered.sh
+
+if TEST_SYMLINKS
+T += testsuite/follow-symlinks.sh \
+ testsuite/follow-symlinks-stdin.sh
+endif
+
+# Old tests converted to newer init.sh style
+T += testsuite/8bit.sh \
+ testsuite/8to7.sh \
+ testsuite/badenc.sh \
+ testsuite/binary.sh \
+ testsuite/bsd-wrapper.sh \
+ testsuite/dc.sh \
+ testsuite/distrib.sh \
+ testsuite/eval.sh \
+ testsuite/help.sh \
+ testsuite/inplace-hold.sh \
+ testsuite/mac-mf.sh \
+ testsuite/madding.sh \
+ testsuite/newjis.sh \
+ testsuite/stdin.sh \
+ testsuite/utf8-ru.sh \
+ testsuite/uniq.sh \
+ testsuite/word-delim.sh \
+ testsuite/xemacs.sh
+
+TESTS = $(SEDTESTS) $(T)
+
+SEDTESTS =
+
+noinst_HEADERS += testsuite/testcases.h testsuite/ptestcases.h
+
+check_PROGRAMS = testsuite/get-mb-cur-max testsuite/test-mbrtowc
+testsuite_get_mb_cur_max_LDADD = lib/libsed.a $(INTLLIBS)
+testsuite_test_mbrtowc_LDADD = lib/libsed.a $(INTLLIBS)
+
+# Note that the first lines are statements. They ensure that environment
+# variables that can perturb tests are unset or set to expected values.
+# The rest are envvar settings that propagate build-related Makefile
+# variables to test scripts.
+TESTS_ENVIRONMENT = \
+ tmp__=$${TMPDIR-/tmp}; \
+ test -d "$$tmp__" && test -w "$$tmp__" || tmp__=.; \
+ . $(srcdir)/testsuite/envvar-check; \
+ TMPDIR=$$tmp__; export TMPDIR; \
+ \
+ if test -n "$$BASH_VERSION" || (eval "export v=x") 2>/dev/null; then \
+ export_with_values () { export "$$@"; }; \
+ else \
+ export_with_values () \
+ { \
+ sed_extract_var='s/=.*//'; \
+ sed_quote_value="s/'/'\\\\''/g;s/=\\(.*\\)/='\\1'/";\
+ for arg in "$$@"; do \
+ var=`echo "$$arg" | sed "$$sed_extract_var"`; \
+ arg=`echo "$$arg" | sed "$$sed_quote_value"`; \
+ eval "$$arg"; \
+ export "$$var"; \
+ done; \
+ }; \
+ fi; \
+ \
+ export_with_values \
+ VERSION='$(VERSION)' \
+ LOCALE_FR='$(LOCALE_FR)' \
+ LOCALE_FR_UTF8='$(LOCALE_FR_UTF8)' \
+ LOCALE_JA='$(LOCALE_JA)' \
+ AWK=$(AWK) \
+ LC_ALL=C \
+ abs_top_builddir='$(abs_top_builddir)' \
+ abs_top_srcdir='$(abs_top_srcdir)' \
+ abs_srcdir='$(abs_srcdir)' \
+ built_programs=sed; \
+ srcdir='$(srcdir)' \
+ top_srcdir='$(top_srcdir)' \
+ CC='$(CC)' \
+ SED_TEST_NAME=`echo $$tst|sed 's,^\./,,;s,/,-,g'` \
+ MAKE=$(MAKE) \
+ MALLOC_PERTURB_=$(MALLOC_PERTURB_) \
+ PACKAGE_BUGREPORT='$(PACKAGE_BUGREPORT)' \
+ PACKAGE_VERSION=$(PACKAGE_VERSION) \
+ PERL='$(PERL)' \
+ SHELL='$(SHELL)' \
+ PATH='$(abs_top_builddir)/sed$(PATH_SEPARATOR)'"$$PATH" \
+ $(LOCALCHARSET_TESTS_ENVIRONMENT) \
+ ; 9>&2
+
+EXTRA_DIST += \
+ $(T) \
+ testsuite/Coreutils.pm \
+ testsuite/CuSkip.pm \
+ testsuite/CuTmpdir.pm \
+ testsuite/init.sh init.cfg \
+ testsuite/envvar-check \
+ testsuite/PCRE.tests testsuite/BOOST.tests testsuite/SPENCER.tests \
+ testsuite/8bit.good \
+ testsuite/8bit.inp \
+ testsuite/binary.sed \
+ testsuite/binary2.sed \
+ testsuite/binary3.sed \
+ testsuite/bsd.good \
+ testsuite/bsd.sh \
+ testsuite/dc.sed \
+ testsuite/distrib.inp \
+ testsuite/mac-mf.good \
+ testsuite/mac-mf.inp \
+ testsuite/mac-mf.sed \
+ testsuite/madding.good \
+ testsuite/madding.inp \
+ testsuite/madding.sed \
+ testsuite/uniq.good \
+ testsuite/uniq.inp \
+ testsuite/uniq.sed \
+ testsuite/xemacs.good \
+ testsuite/xemacs.inp
+
+# automake makes `check' depend on $(TESTS). Declare
+# dummy targets for $(TESTS) so that make does not complain.
+
+.PHONY: $(SEDTESTS)
+$(SEDTESTS):
diff --git a/testsuite/mac-mf.good b/testsuite/mac-mf.good
new file mode 100644
index 0000000..4e8bc45
--- /dev/null
+++ b/testsuite/mac-mf.good
@@ -0,0 +1,200 @@
+## config:mac-pre.in
+## common Macintosh prefix for all Makefile.in in the Kerberos V5 tree
+
+#
+# MPW-style lines for the MakeFile
+#
+# This first part is long enough that NFS:Share doesn't notice the non-ASCII
+# characters in the rest of the file, so it claims that the file is type
+# TEXT, which is what we want. The non-ASCII chars are necessary for MPW
+# Make
+#
+# This first part is long enough that NFS:Share doesn't notice the non-ASCII
+# characters in the rest of the file, so it claims that the file is type
+# TEXT, which is what we want. The non-ASCII chars are necessary for MPW
+# Make
+#
+# This first part is long enough that NFS:Share doesn't notice the non-ASCII
+# characters in the rest of the file, so it claims that the file is type
+# TEXT, which is what we want. The non-ASCII chars are necessary for MPW
+# Make
+#
+# This first part is long enough that NFS:Share doesn't notice the non-ASCII
+# characters in the rest of the file, so it claims that the file is type
+# TEXT, which is what we want. The non-ASCII chars are necessary for MPW
+# Make
+#
+# This first part is long enough that NFS:Share doesn't notice the non-ASCII
+# characters in the rest of the file, so it claims that the file is type
+# TEXT, which is what we want. The non-ASCII chars are necessary for MPW
+# Make
+#
+# This first part is long enough that NFS:Share doesn't notice the non-ASCII
+# characters in the rest of the file, so it claims that the file is type
+# TEXT, which is what we want. The non-ASCII chars are necessary for MPW
+# Make
+#
+# This first part is long enough that NFS:Share doesn't notice the non-ASCII
+# characters in the rest of the file, so it claims that the file is type
+# TEXT, which is what we want. The non-ASCII chars are necessary for MPW
+# Make
+#
+# This first part is long enough that NFS:Share doesn't notice the non-ASCII
+# characters in the rest of the file, so it claims that the file is type
+# TEXT, which is what we want. The non-ASCII chars are necessary for MPW
+# Make
+#
+# This first part is long enough that NFS:Share doesn't notice the non-ASCII
+# characters in the rest of the file, so it claims that the file is type
+# TEXT, which is what we want. The non-ASCII chars are necessary for MPW
+# Make
+#
+# This first part is long enough that NFS:Share doesn't notice the non-ASCII
+# characters in the rest of the file, so it claims that the file is type
+# TEXT, which is what we want. The non-ASCII chars are necessary for MPW
+# Make
+#
+# This first part is long enough that NFS:Share doesn't notice the non-ASCII
+# characters in the rest of the file, so it claims that the file is type
+# TEXT, which is what we want. The non-ASCII chars are necessary for MPW
+# Make
+#
+# This first part is long enough that NFS:Share doesn't notice the non-ASCII
+# characters in the rest of the file, so it claims that the file is type
+# TEXT, which is what we want. The non-ASCII chars are necessary for MPW
+# Make
+#
+# This first part is long enough that NFS:Share doesn't notice the non-ASCII
+# characters in the rest of the file, so it claims that the file is type
+# TEXT, which is what we want. The non-ASCII chars are necessary for MPW
+# Make
+#
+# This first part is long enough that NFS:Share doesn't notice the non-ASCII
+# characters in the rest of the file, so it claims that the file is type
+# TEXT, which is what we want. The non-ASCII chars are necessary for MPW
+# Make
+
+#
+# End of MPW-style lines for MakeFile
+#
+
+WHAT = mac
+
+# Directory syntax
+R=
+C=
+S=:
+U=:
+
+BUILDTOP = :::
+srcdir =
+
+# FIXME This doesn't translate to MPW yet, srcdir must be same as objdir
+# File in object dir can come from either the current dir or srcdir
+#
+# . . "{srcdir}"
+
+# Default rule that puts each file into separate segment
+
+.c.o .c
+ {CC} {DepDir}{Default}.c {CFLAGS} -s {Default} -o {TargDir}{Default}.c.o
+
+CPPFLAGS = -i {SRCTOP}:include -i {BUILDTOP}:include -i {SRCTOP}:include:krb5 -i {BUILDTOP}:include:krb5 -i {CIncludes}
+DEFS = {CPPFLAGS}
+CC = c
+LD = link
+# The funny quoting in the LDFLAGS is to avoid xxx.c.o being mangled by
+# mac-mf.sed into xxx.c.o
+LDFLAGS=-t MPST -c "MPS " -sym on {Libraries}"Runtime."o {CLibraries}"StdClib."o {Libraries}"ToolLibs."o {Libraries}"Interface."o
+CCOPTS =
+LIBS =
+KRB5ROOT= @KRB5ROOT@
+KRB4=@KRB4@
+INSTALL=Duplicate -y
+INSTALL_PROGRAM=Duplicate -y
+INSTALL_DATA=Duplicate -y
+INSTALL_SETUID=Duplicate -y
+
+KRB5MANROOT = {KRB5ROOT}{S}man
+ADMIN_BINDIR = {KRB5ROOT}{S}admin
+SERVER_BINDIR = {KRB5ROOT}{S}sbin
+CLIENT_BINDIR = {KRB5ROOT}{S}bin
+ADMIN_MANDIR = {KRB5MANROOT}{S}man8
+SERVER_MANDIR = {KRB5MANROOT}{S}man8
+CLIENT_MANDIR = {KRB5MANROOT}{S}man1
+FILE_MANDIR = {KRB5MANROOT}{S}man5
+KRB5_LIBDIR = {KRB5ROOT}{S}lib
+KRB5_INCDIR = {KRB5ROOT}{S}include
+KRB5_INCSUBDIRS =
+ {KRB5_INCDIR}{S}krb5
+ {KRB5_INCDIR}{S}asn.1
+ {KRB5_INCDIR}{S}kerberosIV
+
+
+RM = Delete -y -i
+CP = Duplicate -y
+MV = mv -f
+CHMOD=chmod
+RANLIB = @RANLIB@
+ARCHIVE = @ARCHIVE@
+ARADD = @ARADD@
+LN = @LN_S@
+AWK = @AWK@
+LEX = @LEX@
+LEXLIB = @LEXLIB@
+YACC = @YACC@
+
+# FIXME This won't work for srcdir != objdir. But on the Mac, there
+# is no easy way to build a relative or absolute path, because means
+# both the path separator, and the "go up a directory" indicator
+#SRCTOP = {srcdir}{S}{BUILDTOP}
+SRCTOP = {BUILDTOP}
+SUBDIRS = @subdirs@
+
+TOPLIBD = {BUILDTOP}{S}lib
+
+OBJEXT = c.o
+LIBEXT = a
+EXEEXT =
+
+all 陳
+# Generated automatically from Makefile.in by configure
+CFLAGS = {CCOPTS} {DEFS} -i ::des
+
+##DOSBUILDTOP = ..\..\:
+##DOSLIBNAME=..\crypto.lib
+##DOS!include {BUILDTOP}\config\windows.in
+
+OBJS= md5.{OBJEXT} md5glue.{OBJEXT} md5crypto.{OBJEXT}
+
+SRCS= md5.c md5glue.c md5crypto.c
+
+all 陳 {OBJS}
+
+t_mddriver t_mddriver.c.o md5.c.o
+ Link {LDFLAGS} -o t_mddriver t_mddriver.c.o md5.c.o
+
+t_mddriver.exe
+ {CC} {CFLAGS2} -o t_mddriver.exe t_mddriver.c md5.c
+
+check 陳 t_mddriver{EXEEXT}
+ {C}t_mddriver{EXEEXT} -x
+
+clean 陳
+ {RM} t_mddriver{EXEEXT} t_mddriver.{OBJEXT}
+# config:post.in
+# put all 陳 first just in case no other rules occur here
+#
+all 陳
+
+check 陳
+
+clean 陳 clean-{WHAT}
+ {RM} config.log pre.c.out post.c.out Makefile.c.out
+
+clean-unix 陳
+ if test -n "{OBJS}" ; then {RM} {OBJS}; else ; fi
+
+clean-windows 陳
+ {RM} .{OBJEXT}
+ {RM} msvc.pdb .err
diff --git a/testsuite/mac-mf.inp b/testsuite/mac-mf.inp
new file mode 100644
index 0000000..7d80191
--- /dev/null
+++ b/testsuite/mac-mf.inp
@@ -0,0 +1,200 @@
+## config/mac-pre.in
+## common Macintosh prefix for all Makefile.in in the Kerberos V5 tree.
+
+#
+# MPW-style lines for the MakeFile.
+#
+# This first part is long enough that NFS/Share doesn't notice the non-ASCII
+# characters in the rest of the file, so it claims that the file is type
+# TEXT, which is what we want. The non-ASCII chars are necessary for MPW
+# Make.
+#
+# This first part is long enough that NFS/Share doesn't notice the non-ASCII
+# characters in the rest of the file, so it claims that the file is type
+# TEXT, which is what we want. The non-ASCII chars are necessary for MPW
+# Make.
+#
+# This first part is long enough that NFS/Share doesn't notice the non-ASCII
+# characters in the rest of the file, so it claims that the file is type
+# TEXT, which is what we want. The non-ASCII chars are necessary for MPW
+# Make.
+#
+# This first part is long enough that NFS/Share doesn't notice the non-ASCII
+# characters in the rest of the file, so it claims that the file is type
+# TEXT, which is what we want. The non-ASCII chars are necessary for MPW
+# Make.
+#
+# This first part is long enough that NFS/Share doesn't notice the non-ASCII
+# characters in the rest of the file, so it claims that the file is type
+# TEXT, which is what we want. The non-ASCII chars are necessary for MPW
+# Make.
+#
+# This first part is long enough that NFS/Share doesn't notice the non-ASCII
+# characters in the rest of the file, so it claims that the file is type
+# TEXT, which is what we want. The non-ASCII chars are necessary for MPW
+# Make.
+#
+# This first part is long enough that NFS/Share doesn't notice the non-ASCII
+# characters in the rest of the file, so it claims that the file is type
+# TEXT, which is what we want. The non-ASCII chars are necessary for MPW
+# Make.
+#
+# This first part is long enough that NFS/Share doesn't notice the non-ASCII
+# characters in the rest of the file, so it claims that the file is type
+# TEXT, which is what we want. The non-ASCII chars are necessary for MPW
+# Make.
+#
+# This first part is long enough that NFS/Share doesn't notice the non-ASCII
+# characters in the rest of the file, so it claims that the file is type
+# TEXT, which is what we want. The non-ASCII chars are necessary for MPW
+# Make.
+#
+# This first part is long enough that NFS/Share doesn't notice the non-ASCII
+# characters in the rest of the file, so it claims that the file is type
+# TEXT, which is what we want. The non-ASCII chars are necessary for MPW
+# Make.
+#
+# This first part is long enough that NFS/Share doesn't notice the non-ASCII
+# characters in the rest of the file, so it claims that the file is type
+# TEXT, which is what we want. The non-ASCII chars are necessary for MPW
+# Make.
+#
+# This first part is long enough that NFS/Share doesn't notice the non-ASCII
+# characters in the rest of the file, so it claims that the file is type
+# TEXT, which is what we want. The non-ASCII chars are necessary for MPW
+# Make.
+#
+# This first part is long enough that NFS/Share doesn't notice the non-ASCII
+# characters in the rest of the file, so it claims that the file is type
+# TEXT, which is what we want. The non-ASCII chars are necessary for MPW
+# Make.
+#
+# This first part is long enough that NFS/Share doesn't notice the non-ASCII
+# characters in the rest of the file, so it claims that the file is type
+# TEXT, which is what we want. The non-ASCII chars are necessary for MPW
+# Make.
+
+#
+# End of MPW-style lines for MakeFile.
+#
+
+WHAT = mac
+
+# Directory syntax:
+R=
+C=
+S=:
+U=:
+
+BUILDTOP = ../../..
+srcdir = .
+
+# FIXME: This doesn't translate to MPW yet, srcdir must be same as objdir.
+# File in object dir can come from either the current dir or srcdir.
+#
+# . : . "{srcdir}"
+
+# Default rule that puts each file into separate segment.
+
+.c.o: .c
+ {CC} {DepDir}{Default}.c {CFLAGS} -s {Default} -o {TargDir}{Default}.c.o
+
+CPPFLAGS = -I$(SRCTOP)/include -I$(BUILDTOP)/include -I$(SRCTOP)/include/krb5 -I$(BUILDTOP)/include/krb5 -i {CIncludes}
+DEFS = $(CPPFLAGS)
+CC = c
+LD = link
+# The funny quoting in the LDFLAGS is to avoid xxx.o being mangled by
+# mac-mf.sed into xxx.c.o.
+LDFLAGS=-t MPST -c "MPS " -sym on {Libraries}"Runtime."o {CLibraries}"StdClib."o {Libraries}"ToolLibs."o {Libraries}"Interface."o
+CCOPTS =
+LIBS =
+KRB5ROOT= @KRB5ROOT@
+KRB4=@KRB4@
+INSTALL=Duplicate -y
+INSTALL_PROGRAM=Duplicate -y
+INSTALL_DATA=Duplicate -y
+INSTALL_SETUID=Duplicate -y
+
+KRB5MANROOT = $(KRB5ROOT)$(S)man
+ADMIN_BINDIR = $(KRB5ROOT)$(S)admin
+SERVER_BINDIR = $(KRB5ROOT)$(S)sbin
+CLIENT_BINDIR = $(KRB5ROOT)$(S)bin
+ADMIN_MANDIR = $(KRB5MANROOT)$(S)man8
+SERVER_MANDIR = $(KRB5MANROOT)$(S)man8
+CLIENT_MANDIR = $(KRB5MANROOT)$(S)man1
+FILE_MANDIR = $(KRB5MANROOT)$(S)man5
+KRB5_LIBDIR = $(KRB5ROOT)$(S)lib
+KRB5_INCDIR = $(KRB5ROOT)$(S)include
+KRB5_INCSUBDIRS = \
+ $(KRB5_INCDIR)$(S)krb5 \
+ $(KRB5_INCDIR)$(S)asn.1 \
+ $(KRB5_INCDIR)$(S)kerberosIV
+
+
+RM = Delete -y -i
+CP = Duplicate -y
+MV = mv -f
+CHMOD=chmod
+RANLIB = @RANLIB@
+ARCHIVE = @ARCHIVE@
+ARADD = @ARADD@
+LN = @LN_S@
+AWK = @AWK@
+LEX = @LEX@
+LEXLIB = @LEXLIB@
+YACC = @YACC@
+
+# FIXME: This won't work for srcdir != objdir. But on the Mac, there
+# is no easy way to build a relative or absolute path, because : means
+# both the path separator, and the "go up a directory" indicator.
+#SRCTOP = $(srcdir)$(S)$(BUILDTOP)
+SRCTOP = $(BUILDTOP)
+SUBDIRS = @subdirs@
+
+TOPLIBD = $(BUILDTOP)$(S)lib
+
+OBJEXT = c.o
+LIBEXT = a
+EXEEXT =
+
+all::
+# Generated automatically from Makefile.in by configure.
+CFLAGS = $(CCOPTS) $(DEFS) -I$(srcdir)/../des
+
+##DOSBUILDTOP = ..\..\..
+##DOSLIBNAME=..\crypto.lib
+##DOS!include $(BUILDTOP)\config\windows.in
+
+OBJS= md5.$(OBJEXT) md5glue.$(OBJEXT) md5crypto.$(OBJEXT)
+
+SRCS= $(srcdir)/md5.c $(srcdir)/md5glue.c $(srcdir)/md5crypto.c
+
+all:: $(OBJS)
+
+t_mddriver: t_mddriver.o md5.o
+ $(CC) $(CFLAGS) $(LDFLAGS) -o t_mddriver t_mddriver.o md5.o
+
+t_mddriver.exe:
+ $(CC) $(CFLAGS2) -o t_mddriver.exe t_mddriver.c md5.c
+
+check:: t_mddriver$(EXEEXT)
+ $(C)t_mddriver$(EXEEXT) -x
+
+clean::
+ $(RM) t_mddriver$(EXEEXT) t_mddriver.$(OBJEXT)
+# config/post.in
+# put all:: first just in case no other rules occur here
+#
+all::
+
+check::
+
+clean:: clean-$(WHAT)
+ $(RM) config.log pre.out post.out Makefile.out
+
+clean-unix::
+ if test -n "$(OBJS)" ; then $(RM) $(OBJS); else :; fi
+
+clean-windows::
+ $(RM) *.$(OBJEXT)
+ $(RM) msvc.pdb *.err
diff --git a/testsuite/mac-mf.sed b/testsuite/mac-mf.sed
new file mode 100644
index 0000000..680ce89
--- /dev/null
+++ b/testsuite/mac-mf.sed
@@ -0,0 +1,155 @@
+# Rewrite default rules from .c.o: to .c.o: .c
+/^\./s/^\(\.[a-z]*\)\(\.[a-z]*\)\( *: *\)$/\1\2\3 \1/
+
+# Change dependency char.
+/::/s/::/ \\Option-f\\Option-f /g
+/:/s/:/ \\Option-f /g
+/^[SU]=/s/ \\Option-f /:/g
+
+# Change syntax of Makefile vars.
+/\$/s/\${\([a-zA-Z0-9_]*\)}/{\1}/g
+/\$/s/\$(\([a-zA-Z0-9_]*\))/{\1}/g
+
+# Change $@ to {targ}
+/\$@/s/\$@/{targ}/g
+
+# Change pathname syntax.
+#
+# If line ends with .. then assume it sets a variable that will
+# be used to prefix something else -- eliminate one colon, assuming
+# that a slash after the ${name} will turn into the missing colon.
+# Mac pathname conventions are IRREGULAR and UGLY!
+/\./s,\.\./\.\.$,::,
+/\./s,\.\.$,:,
+# Same if it ends with . (a single dot); turn it into nothing.
+/\./s,\.$,,g
+# Rules for .. and . elsewhere in the line
+# Convert ../: to ::, recur to get whole paths.
+/\./s,\.\./:,::,g
+# Convert ../../ to :::
+/\./s,\.\./\.\./,:::,g
+/\./s,\.\./,::,g
+/\.\//s,\./,:,g
+/\//s,/,:,g
+
+/=/s/ = \.$/ = :/
+
+# Comment out any explicit srcdir setting.
+# /srcdir/s/^srcdir/# srcdir/
+
+/version/s/^version=/# version=/
+
+/BASEDIR/s/^BASEDIR =.*$/BASEDIR = "{srcroot}"/
+/{BASEDIR}:/s/{BASEDIR}:/{BASEDIR}/g
+# The original lines screw up -I$(srcdir)/../des by eliminating a colon.
+# Proposed fix: Eliminate srcdir prefixes totally.
+#/{srcdir}:/s/{srcdir}:/"{srcdir}"/g
+/{srcdir}:/s/{srcdir}://g
+#/"{srcdir}":/s/"{srcdir}":/"{srcdir}"/g
+
+# Comment out settings of anything set by mpw host config.
+##/CC/s/^CC *=/#CC =/
+##/CFLAGS/s/^CFLAGS *=/#CFLAGS =/
+##/LDFLAGS/s/^LDFLAGS *=/#LDFLAGS =/
+
+# Change -I usage.
+/-I/s/-I\./-i :/g
+/-I/s/-I::bfd/-i ::bfd:/g
+/-I/s/-I::include/-i ::include:/g
+/-I/s/-I/-i /g
+
+# Change -D usage.
+/-D/s/\([ =]\)-D\([^ ]*\)/\1-d \2/g
+
+# Change continuation char.
+/\\$/s/\\$/\\Option-d/
+
+# Change wildcard char.
+/^[^#]/s/\*/\\Option-x/g
+
+# Change path of various types of source files.
+#/\.[chly]/s/\([ ><=]\)\([-a-zA-Z0-9_$:"]*\)\.\([chly]\)/\1"{s}"\2.\3/g
+#/\.[chly]/s/^\([-a-zA-Z0-9_${}:"]*\)\.\([chly]\)/"{s}"\1.\2/g
+# Skip the {s} and {o} business for now...
+# Fix some overenthusiasms.
+#/{s}/s/"{s}""{srcdir}"/"{srcdir}"/g
+#/{s}/s/"{s}"{\([a-zA-Z0-9_]*\)dir}/"{\1dir}"/g
+#/{s}/s/"{s}"{\([a-zA-Z0-9_]*\)DIR}/"{\1DIR}"/g
+#/{s}/s/"{s}""{\([a-zA-Z0-9_]*\)dir}"/"{\1dir}"/g
+#/{s}/s/"{s}""{\([a-zA-Z0-9_]*\)DIR}"/"{\1DIR}"/g
+#/{s}/s/"{s}":/:/g
+#/{s}/s/^"{s}"//g
+#/^\./s/"{s}"\././g
+
+# Change extension and path of objects, except in the OBJEXT line.
+#/^OBJEXT/!s/\([ =]\)\([-a-zA-Z0-9_${}:"]*\)\.o/\1"{o}"\2.c.o/g
+#/\.o/s/^\([-a-zA-Z0-9_${}:"]*\)\.o/"{o}"\1.c.o/g
+# Skip the {o} stuff for now...
+/^OBJEXT/!s/\([ =]\)\([-a-zA-Z0-9_${}:"]*\)\.o/\1\2.c.o/g
+/\.o/s/^\([-a-zA-Z0-9_${}:"]*\)\.o/\1.c.o/g
+# Clean up.
+#/\.o/s/"{o}""{o}"/"{o}"/g
+#/{o}/s/^"{o}"\([a-zA-Z0-9_]*\)=/\1=/g
+
+# Change extension of libs.
+# /\.a/s/lib\([a-z]*\)\.a/lib\1.o/g
+
+# Remove non-echo option.
+/^ -/s/^ -/ /
+
+# Change cp to duplicate.
+# /cp/s/^\([ ]*\)cp /\1Duplicate -d -y /
+# Change mv to rename.
+# /mv/s/^\([ ]*\)mv /\1Rename -y /
+# /Rename/s/^\([ ]*\)Rename -y -f/\1Rename -y/
+# Change rm to delete.
+/^RM=/s/rm -f/Delete -i -y/
+# /rm/s/^\([ ]*\)rm /\1Delete -y /
+# /Delete/s/^\([ ]*\)Delete -y -f/\1Delete -y/
+# Comment out symlinking.
+# /ln/s/^\([ ]*\)ln /\1# ln /
+
+# Remove -c from explicit compiler calls.
+# /-c/s/{CC}\(.*\) -c \(.*\)\([-a-z]*\)\.c/{CC}\1 \2\3.c -o "{o}"\3.c.o/g
+# Don't ask... prev subst seems to omit the second filename.
+# /-o/s/\([-a-z]*\)\.c -o "{o}".c.o/\1\.c -o "{o}"\1.c.o/
+
+# Change linking cc to link.
+/LDFLAGS/ s/{CC} \(.*\){CFLAGS}\(.*\){LDFLAGS}/Link \1 \2 {LDFLAGS}/
+/CFLAGS_LINK/s/{CC} \(.*\){CFLAGS_LINK}\(.*\){LDFLAGS}/Link \1 \2 {LDFLAGS}/
+
+# Comment out .PHONY rules.
+/\.PHONY/s/^\.PHONY/# \.PHONY/
+# Comment out .SUFFIXES rules.
+/\.SUFFIXES/s/^\.SUFFIXES/# \.SUFFIXES/
+# Comment out .PRECIOUS rules.
+/\.PRECIOUS/s/^\.PRECIOUS/# \.PRECIOUS/
+## Comment out default rules.
+##/^\./s/^\(\.[a-z]*\.[a-z]* \)/# \1/
+
+#
+# End of original hack-mf.sed
+#
+# Begin original hack-mf2.sed
+#
+# Transform expressions.
+
+# Set the install program appropriate.
+# /INSTALL/s/^INSTALL *= *`.*`:install.sh -c/INSTALL = Duplicate -y/
+
+# Include from the extra-include dir.
+# /^INCLUDES = /s/^INCLUDES = /INCLUDES = -i "{srcroot}"extra-include /
+
+# Yuck - remove unconverted autoconf things.
+# /@/s/@[^ ]*@//g
+
+# Hackery, pure and simple
+# To speed up compiles, remove duplicated -i options.
+/-i/s/\(-i [^ ]*\) \1 /\1 /g
+
+# Note! There are 8-bit characters in the three lines below:
+# 0xc4, 0xb6, 0xc5.
+/Option/s/\\Option-f//g
+/Option/s/\\Option-d//g
+/Option/s/\\Option-x//g
+s/ $//
diff --git a/testsuite/mac-mf.sh b/testsuite/mac-mf.sh
new file mode 100755
index 0000000..d91b223
--- /dev/null
+++ b/testsuite/mac-mf.sh
@@ -0,0 +1,30 @@
+#!/bin/sh
+
+# Runner for old 'mac-mf' test
+
+# Copyright (C) 2017-2018 Free Software Foundation, Inc.
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+. "${srcdir=.}/testsuite/init.sh"; path_prepend_ ./sed
+print_ver_ sed
+
+# location of external test files
+dir="$abs_top_srcdir/testsuite"
+
+sed -f "$dir/"mac-mf.sed "$dir/mac-mf.inp" > mac-mf-out || fail=1
+remove_cr_inplace mac-mf-out
+compare "$dir/mac-mf.good" mac-mf-out || fail=1
+
+
+Exit $fail
diff --git a/testsuite/madding.good b/testsuite/madding.good
new file mode 100644
index 0000000..537ab50
--- /dev/null
+++ b/testsuite/madding.good
@@ -0,0 +1 @@
+The girl on the summit of the load sat motionless, surrounded by tables and chairs with their legs upwards, backed by an oak settle, and ornamented in front by pots of geraniums, myrtles, and cactuses, together with a caged canary -- all probably from the windows of the house just vacated. There was also a cat in a willow basket, from the partly-opened lid of which she gazed with half-closed eyes, and affectionately-surveyed the small birds around. The handsome girl waited for some time idly in her place, and the only sound heard in the stillness was the hopping of the canary up and down the perches of its prison. Then she looked attentively downwards. It was not at the bird, nor at the cat; it was at an oblong package tied in paper, and lying between them. She turned her head to learn if the waggoner were coming. He was not yet in sight; and her eyes crept back to the package, her thoughts seeming to run upon what was inside it. At length she drew the article into her lap, and untied the paper covering; a small swing looking- glass was disclosed, in which she proceeded to survey herself attentively. She parted her lips and smiled. It was a fine morning, and the sun lighted up to a scarlet glow the crimson jacket she wore, and painted a soft lustre upon her bright face and dark hair. The myrtles, geraniums, and cactuses packed around her were fresh and green, and at such a leafless season they invested the whole concern of horses, waggon, furniture, and girl with a peculiar vernal charm. What possessed her to indulge in such a performance in the sight of the sparrows, blackbirds, and unperceived farmer who were alone its spectators, -- whether the smile began as a factitious one, to test her capacity in that art, -- nobody knows; it ended certainly in a real smile. She blushed at herself, and seeing her reflection blush, blushed the more. The change from the customary spot and necessary occasion of such an act -- from the dressing hour in a bedroom to a time of travelling out of doors -- lent to the idle deed a novelty it did not intrinsically possess. The picture was a delicate one. Woman's prescriptive infirmity had stalked into the sunlight, which had clothed it in the freshness of an originality. A cynical inference was irresistible by Gabriel Oak as he regarded the scene, generous though he fain would have been. There was no necessity whatever for her looking in the glass. She did not adjust her hat, or pat her hair, or press a dimple into shape, or do one thing to signify that any such intention had been her motive in taking up the glass. She simply observed herself as a fair product of Nature in the feminine kind, her thoughts seeming to glide into far-off though likely dramas in which men would play a part -- vistas of probable triumphs -- the smiles being of a phase suggesting that hearts were imagined as lost and won. Still, this was but conjecture, and the whole series of actions was so idly put forth as to make it rash to assert that intention had any part in them at all. The waggoner's steps were heard returning. She put the glass in the paper, and the whole again into its place. When the waggon had passed on, Gabriel withdrew from his point of espial, and descending into the road, followed the vehicle to the turnpike-gate some way beyond the bottom of the hill, where the object of his contemplation now halted for the payment of toll. About twenty steps still remained between him and the gate, when he heard a dispute. It was a difference concerning twopence between the persons with the waggon and the man at the toll-bar. "Mis'ess's niece is upon the top of the things, and she says that's enough that I've offered ye, you great miser, and she won't pay any more." These were the waggoner's words. "Very well; then mis'ess's niece can't pass," said the turnpike-keeper, closing the gate. Oak looked from one to the other of the disputants, and fell into a reverie. There was something in the tone of twopence remarkably insignificant. Threepence had a definite value as money -- it was an appreciable infringement on a day's wages, and, as such, a higgling matter; but twopence -- "Here," he said, stepping forward and handing twopence to the gatekeeper; "let the young woman pass." He looked up at her then; she heard his words, and looked down. Gabriel's features adhered throughout their form so exactly to the middle line between the beauty of St. John and the ugliness of Judas Iscariot, as represented in a window of the church he attended, that not a single lineament could be selected and called worthy either of distinction or notoriety. The red-jacketed and dark-haired maiden seemed to think so too, for she carelessly glanced over him, and told her man to drive on. She might have looked her thanks to Gabriel on a minute scale, but she did not speak them; more probably she felt none, for in gaining her a passage he had lost her her point, and we know how women take a favour of that kind. The gatekeeper surveyed the retreating vehicle. "That's a handsome maid," he said to Oak. "But she has her faults," said Gabriel. "True, farmer." "And the greatest of them is -- well, what it is always." "Beating people down? ay, 'tis so." "O no." "What, then?" Gabriel, perhaps a little piqued by the comely traveller's indifference, glanced back to where he had witnessed her performance over the hedge, and said, "Vanity, dude."
diff --git a/testsuite/madding.inp b/testsuite/madding.inp
new file mode 100644
index 0000000..2367bc8
--- /dev/null
+++ b/testsuite/madding.inp
@@ -0,0 +1 @@
+The girl on the summit of the load sat motionless, surrounded by tables and chairs with their legs upwards, backed by an oak settle, and ornamented in front by pots of geraniums, myrtles, and cactuses, together with a caged canary -- all probably from the windows of the house just vacated. There was also a cat in a willow basket, from the partly-opened lid of which she gazed with half-closed eyes, and affectionately-surveyed the small birds around. The handsome girl waited for some time idly in her place, and the only sound heard in the stillness was the hopping of the canary up and down the perches of its prison. Then she looked attentively downwards. It was not at the bird, nor at the cat; it was at an oblong package tied in paper, and lying between them. She turned her head to learn if the waggoner were coming. He was not yet in sight; and her eyes crept back to the package, her thoughts seeming to run upon what was inside it. At length she drew the article into her lap, and untied the paper covering; a small swing looking- glass was disclosed, in which she proceeded to survey herself attentively. She parted her lips and smiled. It was a fine morning, and the sun lighted up to a scarlet glow the crimson jacket she wore, and painted a soft lustre upon her bright face and dark hair. The myrtles, geraniums, and cactuses packed around her were fresh and green, and at such a leafless season they invested the whole concern of horses, waggon, furniture, and girl with a peculiar vernal charm. What possessed her to indulge in such a performance in the sight of the sparrows, blackbirds, and unperceived farmer who were alone its spectators, -- whether the smile began as a factitious one, to test her capacity in that art, -- nobody knows; it ended certainly in a real smile. She blushed at herself, and seeing her reflection blush, blushed the more. The change from the customary spot and necessary occasion of such an act -- from the dressing hour in a bedroom to a time of travelling out of doors -- lent to the idle deed a novelty it did not intrinsically possess. The picture was a delicate one. Woman's prescriptive infirmity had stalked into the sunlight, which had clothed it in the freshness of an originality. A cynical inference was irresistible by Gabriel Oak as he regarded the scene, generous though he fain would have been. There was no necessity whatever for her looking in the glass. She did not adjust her hat, or pat her hair, or press a dimple into shape, or do one thing to signify that any such intention had been her motive in taking up the glass. She simply observed herself as a fair product of Nature in the feminine kind, her thoughts seeming to glide into far-off though likely dramas in which men would play a part -- vistas of probable triumphs -- the smiles being of a phase suggesting that hearts were imagined as lost and won. Still, this was but conjecture, and the whole series of actions was so idly put forth as to make it rash to assert that intention had any part in them at all. The waggoner's steps were heard returning. She put the glass in the paper, and the whole again into its place. When the waggon had passed on, Gabriel withdrew from his point of espial, and descending into the road, followed the vehicle to the turnpike-gate some way beyond the bottom of the hill, where the object of his contemplation now halted for the payment of toll. About twenty steps still remained between him and the gate, when he heard a dispute. It was a difference concerning twopence between the persons with the waggon and the man at the toll-bar. "Mis'ess's niece is upon the top of the things, and she says that's enough that I've offered ye, you great miser, and she won't pay any more." These were the waggoner's words. "Very well; then mis'ess's niece can't pass," said the turnpike-keeper, closing the gate. Oak looked from one to the other of the disputants, and fell into a reverie. There was something in the tone of twopence remarkably insignificant. Threepence had a definite value as money -- it was an appreciable infringement on a day's wages, and, as such, a higgling matter; but twopence -- "Here," he said, stepping forward and handing twopence to the gatekeeper; "let the young woman pass." He looked up at her then; she heard his words, and looked down. Gabriel's features adhered throughout their form so exactly to the middle line between the beauty of St. John and the ugliness of Judas Iscariot, as represented in a window of the church he attended, that not a single lineament could be selected and called worthy either of distinction or notoriety. The red-jacketed and dark-haired maiden seemed to think so too, for she carelessly glanced over him, and told her man to drive on. She might have looked her thanks to Gabriel on a minute scale, but she did not speak them; more probably she felt none, for in gaining her a passage he had lost her her point, and we know how women take a favour of that kind. The gatekeeper surveyed the retreating vehicle. "That's a handsome maid," he said to Oak. "But she has her faults," said Gabriel. "True, farmer." "And the greatest of them is -- well, what it is always." "Beating people down? ay, 'tis so." "O no." "What, then?" Gabriel, perhaps a little piqued by the comely traveller's indifference, glanced back to where he had witnessed her performance over the hedge, and said, "Vanity."
diff --git a/testsuite/madding.sed b/testsuite/madding.sed
new file mode 100644
index 0000000..5494f2b
--- /dev/null
+++ b/testsuite/madding.sed
@@ -0,0 +1,8 @@
+# this is from Thomas Hardy's _Far From the Madding Crowd_.
+#
+# cf ftp://ftp.cdrom.com/pub/gutenberg/etext94/crowd10a.txt
+#
+# the point of this test, in case it isn't obvious, is to overfill fixed
+# buffers wherever they might be.
+#
+s/The girl on the summit of the load sat motionless, surrounded by tables and chairs with their legs upwards, backed by an oak settle, and ornamented in front by pots of geraniums, myrtles, and cactuses, together with a caged canary -- all probably from the windows of the house just vacated. There was also a cat in a willow basket, from the partly-opened lid of which she gazed with half-closed eyes, and affectionately-surveyed the small birds around. The handsome girl waited for some time idly in her place, and the only sound heard in the stillness was the hopping of the canary up and down the perches of its prison. Then she looked attentively downwards. It was not at the bird, nor at the cat; it was at an oblong package tied in paper, and lying between them. She turned her head to learn if the waggoner were coming. He was not yet in sight; and her eyes crept back to the package, her thoughts seeming to run upon what was inside it. At length she drew the article into her lap, and untied the paper covering; a small swing looking- glass was disclosed, in which she proceeded to survey herself attentively. She parted her lips and smiled. It was a fine morning, and the sun lighted up to a scarlet glow the crimson jacket she wore, and painted a soft lustre upon her bright face and dark hair. The myrtles, geraniums, and cactuses packed around her were fresh and green, and at such a leafless season they invested the whole concern of horses, waggon, furniture, and girl with a peculiar vernal charm. What possessed her to indulge in such a performance in the sight of the sparrows, blackbirds, and unperceived farmer who were alone its spectators, -- whether the smile began as a factitious one, to test her capacity in that art, -- nobody knows; it ended certainly in a real smile. She blushed at herself, and seeing her reflection blush, blushed the more. The change from the customary spot and necessary occasion of such an act -- from the dressing hour in a bedroom to a time of travelling out of doors -- lent to the idle deed a novelty it did not intrinsically possess. The picture was a delicate one. Woman's prescriptive infirmity had stalked into the sunlight, which had clothed it in the freshness of an originality. A cynical inference was irresistible by Gabriel Oak as he regarded the scene, generous though he fain would have been. There was no necessity whatever for her looking in the glass. She did not adjust her hat, or pat her hair, or press a dimple into shape, or do one thing to signify that any such intention had been her motive in taking up the glass. She simply observed herself as a fair product of Nature in the feminine kind, her thoughts seeming to glide into far-off though likely dramas in which men would play a part -- vistas of probable triumphs -- the smiles being of a phase suggesting that hearts were imagined as lost and won. Still, this was but conjecture, and the whole series of actions was so idly put forth as to make it rash to assert that intention had any part in them at all. The waggoner's steps were heard returning. She put the glass in the paper, and the whole again into its place. When the waggon had passed on, Gabriel withdrew from his point of espial, and descending into the road, followed the vehicle to the turnpike-gate some way beyond the bottom of the hill, where the object of his contemplation now halted for the payment of toll. About twenty steps still remained between him and the gate, when he heard a dispute. It was a difference concerning twopence between the persons with the waggon and the man at the toll-bar. "Mis'ess's niece is upon the top of the things, and she says that's enough that I've offered ye, you great miser, and she won't pay any more." These were the waggoner's words. "Very well; then mis'ess's niece can't pass," said the turnpike-keeper, closing the gate. Oak looked from one to the other of the disputants, and fell into a reverie. There was something in the tone of twopence remarkably insignificant. Threepence had a definite value as money -- it was an appreciable infringement on a day's wages, and, as such, a higgling matter; but twopence -- "Here," he said, stepping forward and handing twopence to the gatekeeper; "let the young woman pass." He looked up at her then; she heard his words, and looked down. Gabriel's features adhered throughout their form so exactly to the middle line between the beauty of St. John and the ugliness of Judas Iscariot, as represented in a window of the church he attended, that not a single lineament could be selected and called worthy either of distinction or notoriety. The red-jacketed and dark-haired maiden seemed to think so too, for she carelessly glanced over him, and told her man to drive on. She might have looked her thanks to Gabriel on a minute scale, but she did not speak them; more probably she felt none, for in gaining her a passage he had lost her her point, and we know how women take a favour of that kind. The gatekeeper surveyed the retreating vehicle. "That's a handsome maid," he said to Oak. "But she has her faults," said Gabriel. "True, farmer." "And the greatest of them is -- well, what it is always." "Beating people down? ay, 'tis so." "O no." "What, then?" Gabriel, perhaps a little piqued by the comely traveller's indifference, glanced back to where he had witnessed her performance over the hedge, and said, "Vanity."/The girl on the summit of the load sat motionless, surrounded by tables and chairs with their legs upwards, backed by an oak settle, and ornamented in front by pots of geraniums, myrtles, and cactuses, together with a caged canary -- all probably from the windows of the house just vacated. There was also a cat in a willow basket, from the partly-opened lid of which she gazed with half-closed eyes, and affectionately-surveyed the small birds around. The handsome girl waited for some time idly in her place, and the only sound heard in the stillness was the hopping of the canary up and down the perches of its prison. Then she looked attentively downwards. It was not at the bird, nor at the cat; it was at an oblong package tied in paper, and lying between them. She turned her head to learn if the waggoner were coming. He was not yet in sight; and her eyes crept back to the package, her thoughts seeming to run upon what was inside it. At length she drew the article into her lap, and untied the paper covering; a small swing looking- glass was disclosed, in which she proceeded to survey herself attentively. She parted her lips and smiled. It was a fine morning, and the sun lighted up to a scarlet glow the crimson jacket she wore, and painted a soft lustre upon her bright face and dark hair. The myrtles, geraniums, and cactuses packed around her were fresh and green, and at such a leafless season they invested the whole concern of horses, waggon, furniture, and girl with a peculiar vernal charm. What possessed her to indulge in such a performance in the sight of the sparrows, blackbirds, and unperceived farmer who were alone its spectators, -- whether the smile began as a factitious one, to test her capacity in that art, -- nobody knows; it ended certainly in a real smile. She blushed at herself, and seeing her reflection blush, blushed the more. The change from the customary spot and necessary occasion of such an act -- from the dressing hour in a bedroom to a time of travelling out of doors -- lent to the idle deed a novelty it did not intrinsically possess. The picture was a delicate one. Woman's prescriptive infirmity had stalked into the sunlight, which had clothed it in the freshness of an originality. A cynical inference was irresistible by Gabriel Oak as he regarded the scene, generous though he fain would have been. There was no necessity whatever for her looking in the glass. She did not adjust her hat, or pat her hair, or press a dimple into shape, or do one thing to signify that any such intention had been her motive in taking up the glass. She simply observed herself as a fair product of Nature in the feminine kind, her thoughts seeming to glide into far-off though likely dramas in which men would play a part -- vistas of probable triumphs -- the smiles being of a phase suggesting that hearts were imagined as lost and won. Still, this was but conjecture, and the whole series of actions was so idly put forth as to make it rash to assert that intention had any part in them at all. The waggoner's steps were heard returning. She put the glass in the paper, and the whole again into its place. When the waggon had passed on, Gabriel withdrew from his point of espial, and descending into the road, followed the vehicle to the turnpike-gate some way beyond the bottom of the hill, where the object of his contemplation now halted for the payment of toll. About twenty steps still remained between him and the gate, when he heard a dispute. It was a difference concerning twopence between the persons with the waggon and the man at the toll-bar. "Mis'ess's niece is upon the top of the things, and she says that's enough that I've offered ye, you great miser, and she won't pay any more." These were the waggoner's words. "Very well; then mis'ess's niece can't pass," said the turnpike-keeper, closing the gate. Oak looked from one to the other of the disputants, and fell into a reverie. There was something in the tone of twopence remarkably insignificant. Threepence had a definite value as money -- it was an appreciable infringement on a day's wages, and, as such, a higgling matter; but twopence -- "Here," he said, stepping forward and handing twopence to the gatekeeper; "let the young woman pass." He looked up at her then; she heard his words, and looked down. Gabriel's features adhered throughout their form so exactly to the middle line between the beauty of St. John and the ugliness of Judas Iscariot, as represented in a window of the church he attended, that not a single lineament could be selected and called worthy either of distinction or notoriety. The red-jacketed and dark-haired maiden seemed to think so too, for she carelessly glanced over him, and told her man to drive on. She might have looked her thanks to Gabriel on a minute scale, but she did not speak them; more probably she felt none, for in gaining her a passage he had lost her her point, and we know how women take a favour of that kind. The gatekeeper surveyed the retreating vehicle. "That's a handsome maid," he said to Oak. "But she has her faults," said Gabriel. "True, farmer." "And the greatest of them is -- well, what it is always." "Beating people down? ay, 'tis so." "O no." "What, then?" Gabriel, perhaps a little piqued by the comely traveller's indifference, glanced back to where he had witnessed her performance over the hedge, and said, "Vanity, dude."/
diff --git a/testsuite/madding.sh b/testsuite/madding.sh
new file mode 100755
index 0000000..b2ea96f
--- /dev/null
+++ b/testsuite/madding.sh
@@ -0,0 +1,30 @@
+#!/bin/sh
+
+# Runner for old 'madding' test (long lines)
+
+# Copyright (C) 2017-2018 Free Software Foundation, Inc.
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+. "${srcdir=.}/testsuite/init.sh"; path_prepend_ ./sed
+print_ver_ sed
+
+# location of external test files
+dir="$abs_top_srcdir/testsuite"
+
+sed -f "$dir/"madding.sed "$dir/madding.inp" > madding-out || fail=1
+remove_cr_inplace madding-out
+compare "$dir/madding.good" madding-out || fail=1
+
+
+Exit $fail
diff --git a/testsuite/mb-bad-delim.sh b/testsuite/mb-bad-delim.sh
new file mode 100755
index 0000000..9ef16b5
--- /dev/null
+++ b/testsuite/mb-bad-delim.sh
@@ -0,0 +1,83 @@
+#!/bin/sh
+# Test 's' and 'y' non-slash delimiters in multibyte locales
+
+# Copyright (C) 2016-2018 Free Software Foundation, Inc.
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+. "${srcdir=.}/testsuite/init.sh"; path_prepend_ ./sed
+print_ver_ sed
+
+require_en_utf8_locale_
+
+# These tests use the following unicode character in various ways:
+# GREEK CAPITAL LETTER PHI (U+03A6)
+# UTF-8: hex: 0xCE 0xA6
+# oct: 0316 0246
+# bin: 11001110 10100110
+#
+# Octal encoding is used due to printf not supporting hex on older systems.
+# Using the first octet alone (\316) causes various multibyte related functions
+# to return '-2' (incomplete multibyte sequence).
+# using the second octet alone (\246) causess same functions to return '-1'
+# (ivalid multibyte sequence).
+
+
+# Reject a valid multibyte delimiter (instead of slash).
+printf 's\316\246a\316\246b\316\246' > prog1 || framework_failure_
+
+cat <<\EOF > exp-err1 || framework_failure_
+sed: file prog1 line 1: delimiter character is not a single-byte character
+EOF
+
+returns_ 1 env LC_ALL=en_US.UTF-8 sed -f prog1 < /dev/null 2>err1 || fail=1
+compare_ exp-err1 err1 || fail=1
+
+
+# Reject an incomplete multibyte delimiter (instead of slash).
+# This is an implmentation-specific behavior:
+# error is triggered upon first octet, before entire multibyte character
+# is scanned.
+printf 's\316a\316b\316' > prog2 || framework_failure_
+
+cat <<\EOF > exp-err2 || framework_failure_
+sed: file prog2 line 1: delimiter character is not a single-byte character
+EOF
+
+returns_ 1 env LC_ALL=en_US.UTF-8 sed -f prog2 </dev/null 2>err2 || fail=1
+compare_ exp-err2 err2 || fail=1
+
+# ... but accept octet \316 as delimiter in C locale
+echo a > in2 || framework_failure_
+echo b > exp2 || framework_failure_
+LC_ALL=C sed -f prog2 <in2 >out2 || fail=1
+compare_ exp2 out2 || fail=1
+
+
+
+# An invalid multibyte sequence is treated as a valid single byte,
+# thus accepted as a delimter (instead of slash).
+# This is an implmentation-specific behavior.
+printf 's\246a\246b\246' > prog3 || framework_failure_
+echo a > in3 || framework_failure_
+echo b > exp3 || framework_failure_
+
+LC_ALL=en_US.UTF-8 sed -f prog3 <in3 >out3 || fail=1
+compare_ exp3 out3 || fail=1
+
+# Expect identical result in C locale
+LC_ALL=C sed -f prog3 <in3 >out4 || fail=1
+compare_ exp3 out4 || fail=1
+
+
+Exit $fail
diff --git a/testsuite/mb-charclass-non-utf8.sh b/testsuite/mb-charclass-non-utf8.sh
new file mode 100755
index 0000000..7bf8951
--- /dev/null
+++ b/testsuite/mb-charclass-non-utf8.sh
@@ -0,0 +1,130 @@
+#!/bin/sh
+# Test multibyte locale which is not UTF-8 (ja_JP.shift_jis)
+# This is a stateful locale. Same byte value can be either
+# a single-byte character, or the second byte of a multibyte
+# character.
+
+# Copyright (C) 2016-2018 Free Software Foundation, Inc.
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+. "${srcdir=.}/testsuite/init.sh"; path_prepend_ ./sed
+print_ver_ sed
+
+# If found, LOCALE_JA_SJIS will contain the locale name.
+require_ja_shiftjis_locale_
+
+# Ensure the implementation is not buggy (skip otherwise)
+require_valid_ja_shiftjis_locale_ "$LOCALE_JA_SJIS"
+
+# This test uses two characters:
+# Unicode Character 'KATAKANA LETTER ZE' (U+30BC)
+# Unicode Character 'KATAKANA LETTER ZO' (U+30BE)
+#
+# In SHIFT-JIS locale, these multibyte characters contain
+# open/close brackets (ASCII 0x5B/0x5D) as the trailing byte.
+#
+# See also:
+# https://en.wikipedia.org/wiki/Shift_JIS
+# http://www.rikai.com/library/kanjitables/kanji_codes.sjis.shtml
+
+# Unicode Character 'KATAKANA LETTER ZE' (U+30BC)
+#
+# UTF-8: hex: 0xE3 0x82 0xBC
+# bin: 11100011 10000010 10111100
+#
+# Shift-jis hex: 0x83 0x5B
+# oct: 203 133
+# bin: 10000011 01011011
+#
+# Conversion example:
+# $ printf '\x83\x5B' | iconv -f SHIFT-JIS -t UTF-8 | od -tx1o1c
+# 0000000 e3 82 bc
+# 343 202 274
+# 343 202 274
+
+# Unicode Character 'KATAKANA LETTER ZO' (U+30BE)
+#
+# UTF-8: hex: 0xE3 0x82 0xBE
+# bin: 11100011 10000010 10111110
+#
+# Shift-jis hex: 0x83 0x5D
+# oct: 203 135
+# bin: 10000011 01011101
+#
+# Conversion example:
+# $ printf '\x83\x5D' | iconv -f SHIFT-JIS -t UTF-8 | od -tx1o1c
+# 0000000 e3 82 be
+# 343 202 276
+# 343 202 276
+#
+
+
+#
+# Tests 1,2: Test y/// command with multibyte, non-utf8 seqeunce.
+# Implmenetation notes: str_append() has special code path for non-utf8 cases.
+#
+
+# Test 1: valid multibyte seqeunce
+printf 'y/a/\203\133/' > p1 || framework_failure_
+echo Xa > in1 || framework_failure_
+printf 'X\203\133\n' > exp1 || framework_failure_
+
+LC_ALL="$LOCALE_JA_SJIS" sed -f p1 <in1 >out1 || fail=1
+compare_ exp1 out1 || fail=1
+
+# Test 2: invalid multibyte seqeunce, treated as two single-byte characters.
+printf 'y/aa/\203\060/' > p2 || framework_failure_
+LC_ALL="$LOCALE_JA_SJIS" sed -f p2 </dev/null 2>out2 || fail=1
+compare_ /dev/null out2 || fail=1
+
+#
+# Test 3: multibyte character class with these characters.
+#
+# Before sed-4.3, snarf_char_class would parse it incorrectly,
+# Treating the first closing-bracket as closing the character-class,
+# instead of being part of a multibyte sequence.
+
+printf '/[\203]/]/p' > p3 || framework_failure_
+LC_ALL="$LOCALE_JA_SJIS" sed -f p3 </dev/null >out3 || fail=1
+compare_ /dev/null out3 || fail=1
+
+# Test 4:
+# Same as test 3, but with the other multibyte character.
+# (this did not cause a failure before sed-4.3, but the code was incorrect).
+# Keep this test for code-coverage purposes.
+printf '/[\203[/]/p' > p4 || framework_failure_
+LC_ALL="$LOCALE_JA_SJIS" sed -f p4 </dev/null >out4 || fail=1
+compare_ /dev/null out4 || fail=1
+
+# TODO: Find a locale in which ':.=' can be part of a valid multibyte octet.
+#
+# snarf_char_class specifically tests for five bytes: ':.=[]' .
+# '[' and ']' are tested above, yet '.:=' are not valid as part of a
+# multibyte shift-jis sequence.
+#
+# valid:
+# $ printf '\203]' | iconv -f SHIFT-JIS -t utf-8
+# $ printf '\203[' | iconv -f SHIFT-JIS -t utf-8
+#
+# invalid:
+# $ printf '\203:' | iconv -f SHIFT-JIS -t utf-8
+# iconv: (stdin):1:0: cannot convert
+#
+# $ printf '\203=' | iconv -f SHIFT-JIS -t utf-8
+# iconv: (stdin):1:0: cannot convert
+#
+# $ printf '\203.' | iconv -f SHIFT-JIS -t utf-8
+# iconv: (stdin):0:0: cannot convert
+
+Exit $fail
diff --git a/testsuite/mb-match-slash.sh b/testsuite/mb-match-slash.sh
new file mode 100755
index 0000000..c824a2e
--- /dev/null
+++ b/testsuite/mb-match-slash.sh
@@ -0,0 +1,46 @@
+#!/bin/sh
+# Test slash following an incomplete multibyte character
+
+# Copyright (C) 2016-2018 Free Software Foundation, Inc.
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+. "${srcdir=.}/testsuite/init.sh"; path_prepend_ ./sed
+print_ver_ sed
+
+require_en_utf8_locale_
+
+# before sed-4.3, a slash following an incomplete multibyte character
+# would be ignored during program compilation, leading to an error.
+
+
+# Test 1: match_slash in 's' command.
+# Before sed-4.3, this would fail with "unterminated `s' command".
+printf 's/\316/X/' > p1 || framework_failure_
+LC_ALL=en_US.UTF-8 sed -f p1 </dev/null >out1 || fail=1
+compare_ /dev/null out1 || fail=1
+
+# Test 2: match_slash in address regex.
+# Before sed-4.3, this would fail with "unterminated address regex".
+printf '/\316/p' >p2 || framework_failure_
+LC_ALL=en_US.UTF-8 sed -f p2 </dev/null >out2 || fail=1
+compare_ /dev/null out2 || fail=1
+
+# Test 3: match_slash in 'y' command..
+# Before sed-4.3, this would fail with "unterminated `y' command".
+printf 'y/\316/X/' >p3 || framework_failure_
+LC_ALL=en_US.UTF-8 sed -f p3 </dev/null >out3 || fail=1
+compare_ /dev/null out3 || fail=1
+
+
+Exit $fail
diff --git a/testsuite/mb-y-translate.sh b/testsuite/mb-y-translate.sh
new file mode 100755
index 0000000..02238ec
--- /dev/null
+++ b/testsuite/mb-y-translate.sh
@@ -0,0 +1,134 @@
+#!/bin/sh
+# Test multibyte y/// translations
+
+# Copyright (C) 2016-2018 Free Software Foundation, Inc.
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+. "${srcdir=.}/testsuite/init.sh"; path_prepend_ ./sed
+print_ver_ sed
+
+require_en_utf8_locale_
+
+# These tests use the following unicode characters in various ways:
+# GREEK CAPITAL LETTER PHI (U+03A6)
+# UTF-8: hex: 0xCE 0xA6
+# oct: 0316 0246
+# bin: 11001110 10100110
+#
+# GREEK CAPITAL LETTER DELTA (U+0394)
+# UTF-8: hex: 0xCE 0x94
+# oct: 0316 0224
+# bin: 11001110 10010100
+#
+# Octal encoding is used due to printf not supporting hex on older systems.
+# Using the first octet alone (\316) causes various multibyte related functions
+# to return '-2' (incomplete multibyte sequence).
+# using the second octet alone (\246) causess same functions to return '-1'
+# (invalid multibyte sequence).
+
+
+#
+# Test 1: valid multibyte 'dest-chars'
+#
+printf 'y/a/\316\246/' > p1 || framework_failure_
+echo Xa > in1 || framework_failure_
+printf 'X\316\246\n' > exp1 || framework_failure_
+
+LC_ALL=en_US.UTF-8 sed -f p1 <in1 >out1 || fail=1
+compare_ exp1 out1 || fail=1
+
+# in C locale, report error of mismatched length
+cat <<\EOF > exp-err1 || framework_failure_
+sed: file p1 line 1: strings for `y' command are different lengths
+EOF
+returns_ 1 env LC_ALL=C sed -f p1 </dev/null 2>err1 || fail=1
+compare_ exp-err1 err1 || fail=1
+
+
+#
+# Test 2: valid multibyte 'src-chars'
+#
+printf 'y/\316\246/a/' > p2 || framework_failure_
+printf 'X\316\246\n' > in2 || framework_failure_
+echo Xa > exp2 || framework_failure_
+
+LC_ALL=en_US.UTF-8 sed -f p2 <in2 >out2 || fail=1
+compare_ exp2 out2 || fail=1
+
+# in C locale, report error of mismatched length
+cat <<\EOF > exp-err2 || framework_failure_
+sed: file p2 line 1: strings for `y' command are different lengths
+EOF
+returns_ 1 env LC_ALL=C sed -f p2 </dev/null 2>err2 || fail=1
+compare_ exp-err2 err2 || fail=1
+
+
+#
+# Tests 3-6: invalid/incomplete multibyte characters in src/dest.
+# All work as-is in C locale, treated as single-bytes in multibyte locales.
+# None should fail.
+
+# Test 3: invalid multibyte 'dest-chars'.
+echo Xa > in3 || framework_failure_
+printf 'y/a/\246/' > p3 || framework_failure_
+printf 'X\246\n' > exp3 || framework_failure_
+
+# Test 4: incomplete multibyte 'dest-chars'.
+echo Xa > in4 || framework_failure_
+printf 'y/a/\316/' > p4 || framework_failure_
+printf 'X\316\n' > exp4 || framework_failure_
+
+# Test 5: invalid multibyte 'src-chars'.
+printf 'X\246\n' > in5 || framework_failure_
+printf 'y/\246/a/' > p5 || framework_failure_
+echo Xa > exp5 || framework_failure_
+
+# Test 6: incomplete multibyte 'dest-chars'.
+printf 'X\316\n' > in6 || framework_failure_
+printf 'y/\316/a/' > p6 || framework_failure_
+echo Xa > exp6 || framework_failure_
+
+for t in 3 4 5 6 ;
+do
+ for l in C en_US.UTF-8 ;
+ do
+ LC_ALL=$l sed -f p$t <in$t >out$t-$l || fail=1
+ compare_ exp$t out$t-$l || fail=1
+ done
+done
+
+
+#
+# Tests 7,8: length mismatch in multibyte locales
+# Implementation note: the code path for length check differ between
+# single-byte/multibyte locales. The actual characters don't have to be
+# multibyte themselves.
+printf 'y/abc/d/' > p7 || framework_failure_
+cat <<\EOF > exp-err7 || framework_failure_
+sed: file p7 line 1: strings for `y' command are different lengths
+EOF
+
+returns_ 1 env LC_ALL=en_US.UTF-8 sed -f p7 </dev/null 2>err7 || fail=1
+compare_ exp-err7 err7 || fail=1
+
+printf 'y/a/bcd/' > p8 || framework_failure_
+cat <<\EOF > exp-err8 || framework_failure_
+sed: file p8 line 1: strings for `y' command are different lengths
+EOF
+
+returns_ 1 env LC_ALL=en_US.UTF-8 sed -f p8 </dev/null 2>err8 || fail=1
+compare_ exp-err8 err8 || fail=1
+
+
+Exit $fail
diff --git a/testsuite/misc.pl b/testsuite/misc.pl
new file mode 100644
index 0000000..3fa5b84
--- /dev/null
+++ b/testsuite/misc.pl
@@ -0,0 +1,1206 @@
+#!/usr/bin/perl
+# Test misc.
+
+# Copyright (C) 2017-2018 Free Software Foundation, Inc.
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+
+use strict;
+use File::stat;
+
+(my $program_name = $0) =~ s|.*/||;
+
+# Turn off localization of executable's output.
+@ENV{qw(LANGUAGE LANG LC_ALL)} = ('C') x 3;
+
+my $prog = 'sed';
+
+print "PATH = $ENV{PATH}\n";
+
+my @Tests =
+ (
+ ['empty', qw(-e ''), {IN=>''}, {OUT=>''}],
+ ['empty2', q('s/^ *//'), {IN=>"x\n\n"}, {OUT=>"x\n\n"}],
+
+ ['head', qw(3q), {IN=>"1\n2\n3\n4\n"}, {OUT=>"1\n2\n3\n"}],
+ ['space', q('s/_\S/XX/g;s/\s/_/g'),
+ {IN=> "Hello World\t!\nSecond_line_ of tests\n" },
+ {OUT=> "Hello_World_!\nSecondXXine__of_tests\n" }],
+
+ ['zero-anchor', qw(-z), q('N;N;s/^/X/g;s/^/X/mg;s/$/Y/g;s/$/Y/mg'),
+ {IN=>"a\0b\0c\0" },
+ {OUT=>"XXaY\0XbY\0XcYY\0" }],
+
+ ['case-insensitive', qw(-n), q('h;s/Version: *//p;g;s/version: *//Ip'),
+ {IN=>"Version: 1.2.3\n" },
+ {OUT=>"1.2.3\n1.2.3\n" },
+ ],
+
+ ['preserve-missing-EOL-at-EOF', q('s/$/x/'),
+ {IN=> "a\nb" },
+ {OUT=>"ax\nbx" },
+ ],
+
+ ['y-bracket', q('y/[/ /'),
+ {IN => "Are you sure (y/n)? [y]\n" },
+ {OUT=> "Are you sure (y/n)? y]\n" },
+ ],
+
+ ['y-zero', q('y/b/\x00/'),
+ {IN => "abc\n" },
+ {OUT=> "a\0c\n" },
+ ],
+
+ ['y-newline', q('H
+G
+y/Ss\nYy/yY$sS/'),
+ {IN => "Are you sure (y/n)? [y]\n" },
+ {OUT=> 'Are Sou Yure (S/n)? [S]$$Are Sou Yure (S/n)? [S]'."\n"},
+ ],
+
+ ['allsub', q('s/foo/bar/g'),
+ {IN => "foo foo fo oo f oo foo foo foo foo foo foo foo foo foo\n"},
+ {OUT=> "bar bar fo oo f oo bar bar bar bar bar bar bar bar bar\n"},
+ ],
+
+ ['insert-nl', qw(-f), {IN => "/foo/i\\\n"},
+ {IN => "bar\nfoo\n" },
+ {OUT=> "bar\n\nfoo\n" },
+ ],
+
+ ['recall',
+ # Check that the empty regex recalls the last *executed* regex,
+ # not the last *compiled* regex
+ qw(-f), {IN => "p;s/e/X/p;:x;s//Y/p;/f/bx"},
+ {IN => "eeefff\n" },
+ {OUT=> "eeefff\n"
+ . "Xeefff\n"
+ . "XYefff\n"
+ . "XYeYff\n"
+ . "XYeYYf\n"
+ . "XYeYYY\n"
+ . "XYeYYY\n"
+ },
+ ],
+
+ ['recall2',
+ # Starting from sed 4.1.3, regexes are compiled with REG_NOSUB
+ # if they are used in an address, so that the matcher does not
+ # have to obey leftmost-longest. The tricky part is to recompile
+ # them if they are then used in a substitution.
+ qw(-f), {IN => '/\(ab*\)\+/ s//>\1</g'},
+ {IN => "ababb||abbbabbbb\n" },
+ {OUT=> ">abb<||>abbbb<\n" },
+ ],
+
+ ['0range',
+ # Test address 0 (GNU extension)
+ # FIXME: This test does NOT actually fail if the address is changed to 1.
+ qw(-e '0,/aaa/d'),
+ {IN => "1\n"
+ . "2\n"
+ . "3\n"
+ . "4\n"
+ . "aaa\n"
+ . "yes\n"},
+ {OUT => "yes\n"}
+ ],
+
+ ['amp-escape',
+ # Test ampersand as escape sequence (ASCII 0x26), which should
+ # not have a special meaning (i.e. the 'matched pattern')
+ qw(-e 's/yes/yes\x26/'),
+ {IN => "yes\n"},
+ {OUT => "yes&\n"}
+ ],
+
+ ['appquit',
+ # Test 'a'ppend command before 'q'uit
+ qw(-f),
+ {IN => q(a\
+ok
+q)},
+ {IN => "doh\n"},
+ {OUT => "doh\n"
+ . "ok\n"}
+ ],
+
+
+ ['brackets',
+ qw(-f),
+ {IN => q(s/[[]/a/
+s/[[[]/b/
+s/[[[[]/c/
+s/[[[[[]/d/
+s/[[[[[[]/e/
+s/[[[[[[[]/f/
+s/[[[[[[[[]/g/
+s/[[[[[[[[[]/h/
+)},
+ {IN => "[[[[[[[[[\n"},
+ {OUT => "abcdefgh[\n"}
+ ],
+
+
+ ['bkslashes',
+ # Test backslashes in regex
+ # bug in sed 4.0b
+ qw(-f),
+ {IN => q(s/$/\\\\\
+/
+)},
+ {IN => "a\n"},
+ {OUT => "a\\\n"
+ . "\n"}
+ ],
+
+ ['classes',
+ # inspired by an autoconf generated configure script.
+ qw(-n -f),
+ {IN => 's/^\([/[:lower:]A-Z0-9]*_cv_[[:lower:][:upper:]/[:digit:]]*\)'.
+ '=\(.*\)/: \${\1=\'\2\'}/p'},
+ {IN => "_cv_=emptyvar\n"
+ . "ac_cv_prog/RANLIB=/usr/bin/ranlib\n"
+ . "ac_cv_prog/CC=/usr/unsupported/\\ \\ /lib/_cv_/cc\n"
+ . "a/c_cv_prog/CPP=/usr/bin/cpp\n"
+ . "SHELL=bash\n"
+ . "GNU=GNU!UNIX\n"},
+ {OUT => ": \${_cv_='emptyvar'}\n"
+ . ": \${ac_cv_prog/RANLIB='/usr/bin/ranlib'}\n"
+ . ": \${ac_cv_prog/CC='/usr/unsupported/\\ \\ /lib/_cv_/cc'}\n"
+ . ": \${a/c_cv_prog/CPP='/usr/bin/cpp'}\n"}
+ ],
+
+
+ ['cv-vars',
+ # inspired by an autoconf generated configure script.
+ qw(-n -f),
+ {IN => q|s/^\([a-zA-Z0-9_]*_cv_[a-zA-Z0-9_]*\)=\(.*\)/: \${\1='\2'}/p|},
+ {IN => "_cv_=emptyvar\n"
+ . "ac_cv_prog_RANLIB=/usr/bin/ranlib\n"
+ . "ac_cv_prog_CC=/usr/unsupported/\ \ /lib/_cv_/cc\n"
+ . "ac_cv_prog_CPP=/usr/bin/cpp\n"
+ . "SHELL=bash\n"
+ . "GNU=GNU!UNIX\n"},
+ {OUT => ": \${_cv_='emptyvar'}\n"
+ . ": \${ac_cv_prog_RANLIB='/usr/bin/ranlib'}\n"
+ . ": \${ac_cv_prog_CC='/usr/unsupported/\ \ /lib/_cv_/cc'}\n"
+ . ": \${ac_cv_prog_CPP='/usr/bin/cpp'}\n"}
+ ],
+
+ ['quiet',
+ # the old 'quiet' test: --quiet instead of -n
+ qw(--quiet -f),
+ {IN => q|s/^\([a-zA-Z0-9_]*_cv_[a-zA-Z0-9_]*\)=\(.*\)/: \${\1='\2'}/p|},
+ {IN => "_cv_=emptyvar\n"
+ . "ac_cv_prog_RANLIB=/usr/bin/ranlib\n"
+ . "ac_cv_prog_CC=/usr/unsupported/\ \ /lib/_cv_/cc\n"
+ . "ac_cv_prog_CPP=/usr/bin/cpp\n"
+ . "SHELL=bash\n"
+ . "GNU=GNU!UNIX\n"},
+ {OUT => ": \${_cv_='emptyvar'}\n"
+ . ": \${ac_cv_prog_RANLIB='/usr/bin/ranlib'}\n"
+ . ": \${ac_cv_prog_CC='/usr/unsupported/\ \ /lib/_cv_/cc'}\n"
+ . ": \${ac_cv_prog_CPP='/usr/bin/cpp'}\n"}
+ ],
+
+ ['file',
+ # the old 'file' test: --file instead of -f
+ qw(-n --file),
+ {IN => q|s/^\([a-zA-Z0-9_]*_cv_[a-zA-Z0-9_]*\)=\(.*\)/: \${\1='\2'}/p|},
+ {IN => "_cv_=emptyvar\n"
+ . "ac_cv_prog_RANLIB=/usr/bin/ranlib\n"
+ . "ac_cv_prog_CC=/usr/unsupported/\ \ /lib/_cv_/cc\n"
+ . "ac_cv_prog_CPP=/usr/bin/cpp\n"
+ . "SHELL=bash\n"
+ . "GNU=GNU!UNIX\n"},
+ {OUT => ": \${_cv_='emptyvar'}\n"
+ . ": \${ac_cv_prog_RANLIB='/usr/bin/ranlib'}\n"
+ . ": \${ac_cv_prog_CC='/usr/unsupported/\ \ /lib/_cv_/cc'}\n"
+ . ": \${ac_cv_prog_CPP='/usr/bin/cpp'}\n"}
+ ],
+
+
+ ['dollar',
+ # Test replacement on the last line (address '$')
+ qw(-e '$s/^/space /'),
+ {IN => "I can't quite remember where I heard it,\n"
+ . "but I can't seem to get out of my head\n"
+ . "the phrase\n"
+ . "the final frontier\n"},
+ {OUT => "I can't quite remember where I heard it,\n"
+ . "but I can't seem to get out of my head\n"
+ . "the phrase\n"
+ . "space the final frontier\n"}
+ ],
+
+ ['enable',
+ # inspired by an autoconf generated configure script.
+ qw(-e 's/-*enable-//;s/=.*//'),
+ {IN => "--enable-targets=sparc-sun-sunos4.1.3,srec\n"
+ . "--enable-x11-testing=on\n"
+ . "--enable-wollybears-in-minnesota=yes-id-like-that\n"},
+ {OUT => "targets\n"
+ . "x11-testing\n"
+ . "wollybears-in-minnesota\n"}
+ ],
+
+ ['fasts',
+ # test `fast' substitutions
+ qw(-f),
+ {IN => q(
+h
+s/a//
+p
+g
+s/a//g
+p
+g
+s/^a//p
+g
+s/^a//g
+p
+g
+s/not present//g
+p
+g
+s/^[a-z]//g
+p
+g
+s/a$//
+p
+g
+
+y/a/b/
+h
+s/b//
+p
+g
+s/b//g
+p
+g
+s/^b//p
+g
+s/^b//g
+p
+g
+s/^[a-z]//g
+p
+g
+s/b$//
+p
+g
+)},
+ {IN => "aaaaaaabbbbbbaaaaaaa\n"},
+ {OUT => "aaaaaabbbbbbaaaaaaa\n"
+ . "bbbbbb\n"
+ . "aaaaaabbbbbbaaaaaaa\n"
+ . "aaaaaabbbbbbaaaaaaa\n"
+ . "aaaaaaabbbbbbaaaaaaa\n"
+ . "aaaaaabbbbbbaaaaaaa\n"
+ . "aaaaaaabbbbbbaaaaaa\n"
+ . "bbbbbbbbbbbbbbbbbbb\n"
+ . "\n"
+ . "bbbbbbbbbbbbbbbbbbb\n"
+ . "bbbbbbbbbbbbbbbbbbb\n"
+ . "bbbbbbbbbbbbbbbbbbb\n"
+ . "bbbbbbbbbbbbbbbbbbb\n"
+ . "bbbbbbbbbbbbbbbbbbbb\n"}
+ ],
+
+
+
+ ['factor',
+ # Compute a few common factors for speed. Clear the subst flag
+ # These are placed here to make the flow harder to understand :-)
+ # The quotient of dividing by 11 is a limit to the remaining prime factors
+ # Pattern space looks like CANDIDATE\nNUMBER. When a candidate is valid,
+ # the number is divided and the candidate is tried again
+ # We have a prime factor in CANDIDATE! Print it
+ # If NUMBER = 1, we don't have any more factors
+ qw(-n -f),
+ {IN => q~
+s/.*/&;9aaaaaaaaa8aaaaaaaa7aaaaaaa6aaaaaa5aaaaa4aaaa3aaa2aa1a0/
+:encode
+s/\(a*\)\([0-9]\)\([0-9]*;.*\2\(a*\)\)/\1\1\1\1\1\1\1\1\1\1\4\3/
+tencode
+s/;.*//
+
+t7a
+
+:2
+a\
+2
+b2a
+:3
+a\
+3
+b3a
+:5
+a\
+5
+b5a
+:7
+a\
+7
+
+:7a
+s/^\(aa*\)\1\{6\}$/\1/
+t7
+:5a
+s/^\(aa*\)\1\{4\}$/\1/
+t5
+:3a
+s/^\(aa*\)\1\1$/\1/
+t3
+:2a
+s/^\(aa*\)\1$/\1/
+t2
+
+/^a$/b
+
+s/^\(aa*\)\1\{10\}/\1=&/
+
+:factor
+/^\(a\{7,\}\)=\1\1*$/! {
+ # Decrement CANDIDATE, and search again if it is still >1
+ s/^a//
+ /^aa/b factor
+
+ # Print the last remaining factor: since it is stored in the NUMBER
+ # rather than in the CANDIDATE, swap 'em: now NUMBER=1
+ s/\(.*\)=\(.*\)/\2=\1/
+}
+
+h
+s/=.*/;;0a1aa2aaa3aaaa4aaaaa5aaaaaa6aaaaaaa7aaaaaaaa8aaaaaaaaa9/
+
+:decode
+s/^\(a*\)\1\{9\}\(a\{0,9\}\)\([0-9]*;.*[^a]\2\([0-9]\)\)/\1\4\3/
+/^a/tdecode
+s/;.*//p
+
+g
+:divide
+s/^\(a*\)\(=b*\)\1/\1\2b/
+tdivide
+y/b/a/
+
+/aa$/bfactor
+~},
+
+ {IN => "2\n"
+ . "3\n"
+ . "4\n"
+ . "5\n"
+ . "8\n"
+ . "11\n"
+ . "16\n"
+ . "143\n"},
+ {OUT => "2\n"
+ . "3\n"
+ . "2\n"
+ . "2\n"
+ . "5\n"
+ . "2\n"
+ . "2\n"
+ . "2\n"
+ . "11\n"
+ . "2\n"
+ . "2\n"
+ . "2\n"
+ . "2\n"
+ . "13\n"
+ . "11\n"}
+ ],
+
+
+ ['flipcase',
+ qw(-f),
+ {IN => q|s,\([^A-Za-z]*\)\([A-Za-z]*\),\1\L\u\2,g|},
+ {IN => "09 - 02 - 2002 00.00 Tg La7 La7 -\n"
+ . "09 - 02 - 2002 00.00 Brand New Tmc 2 -\n"
+ . "09 - 02 - 2002 00.10 Tg1 Notte Rai Uno -\n"
+ . "09 - 02 - 2002 00.15 Tg Parlamento Rai Due -\n"
+ . "09 - 02 - 2002 00.15 Kung Fu - La Leggenda Continua La7 -\n"
+ . "09 - 02 - 2002 00.20 Berserk - La CoNFESSIONE Di Gatz"
+ . " Italia 1 Cartoon\n"
+ . "09 - 02 - 2002 00.20 Tg3 - Tg3 Meteo Rai TrE -\n"
+ . "09 - 02 - 2002 00.25 Meteo 2 Rai Due -\n"
+ . "09 - 02 - 2002 00.30 Appuntamento Al CinEMA RaI Due -\n"
+ . "09 - 02 - 2002 00.30 Rai Educational - Mediamente Rai Tre -\n"
+ . "09 - 02 - 2002 00.35 Profiler Rai Due -\n"
+ . "09 - 02 - 2002 00.35 Stampa OggI - Che Tempo Fa Rai Uno -\n"
+ . "09 - 02 - 2002 00.45 Rai Educational - Babele: Euro Rai Uno -\n"
+ . "09 - 02 - 2002 00.45 BollettINO Della NEVE RETE 4 News\n"
+ . "09 - 02 - 2002 00.50 STUDIO Aperto - La Giornata Italia 1 News\n"
+ . "09 - 02 - 2002 00.50 BOCCA A Bocca - 2 Tempo Rete 4 Film\n"
+ . "09 - 02 - 2002 01.00 AppuntAMENTO Al Cinema Rai Tre -\n"
+ . "09 - 02 - 2002 01.00 Music NoN Stop Tmc 2 -\n"
+ . "09 - 02 - 2002 01.00 Studio SpORT Italia 1 SporT\n"
+ . "09 - 02 - 2002 01.00 Tg 5 - Notte Canale 5 News\n"
+ . "09 - 02 - 2002 01.05 Fuori Orario. CosE (Mai) Viste Rai Tre -\n"
+ . "09 - 02 - 2002 01.15 RAINOTTE Rai Due -\n"
+ . "09 - 02 - 2002 01.15 Sottovoce Rai Uno -\n"
+ . "09 - 02 - 2002 01.15 GiOCHI Olimpici InVERNALI - CERIMONIA"
+ . " Di Apertura Rai Tre -\n"
+ . "09 - 02 - 2002 01.17 Italia Interroga Rai Due -\n"},
+ {OUT => "09 - 02 - 2002 00.00 Tg La7 La7 -\n"
+ . "09 - 02 - 2002 00.00 Brand New Tmc 2 -\n"
+ . "09 - 02 - 2002 00.10 Tg1 Notte Rai Uno -\n"
+ . "09 - 02 - 2002 00.15 Tg Parlamento Rai Due -\n"
+ . "09 - 02 - 2002 00.15 Kung Fu - La Leggenda Continua La7 -\n"
+ . "09 - 02 - 2002 00.20 Berserk - La Confessione Di Gatz"
+ . " Italia 1 Cartoon\n"
+ . "09 - 02 - 2002 00.20 Tg3 - Tg3 Meteo Rai Tre -\n"
+ . "09 - 02 - 2002 00.25 Meteo 2 Rai Due -\n"
+ . "09 - 02 - 2002 00.30 Appuntamento Al Cinema Rai Due -\n"
+ . "09 - 02 - 2002 00.30 Rai Educational - Mediamente Rai Tre -\n"
+ . "09 - 02 - 2002 00.35 Profiler Rai Due -\n"
+ . "09 - 02 - 2002 00.35 Stampa Oggi - Che Tempo Fa Rai Uno -\n"
+ . "09 - 02 - 2002 00.45 Rai Educational - Babele: Euro Rai Uno -\n"
+ . "09 - 02 - 2002 00.45 Bollettino Della Neve Rete 4 News\n"
+ . "09 - 02 - 2002 00.50 Studio Aperto - La Giornata Italia 1 News\n"
+ . "09 - 02 - 2002 00.50 Bocca A Bocca - 2 Tempo Rete 4 Film\n"
+ . "09 - 02 - 2002 01.00 Appuntamento Al Cinema Rai Tre -\n"
+ . "09 - 02 - 2002 01.00 Music Non Stop Tmc 2 -\n"
+ . "09 - 02 - 2002 01.00 Studio Sport Italia 1 Sport\n"
+ . "09 - 02 - 2002 01.00 Tg 5 - Notte Canale 5 News\n"
+ . "09 - 02 - 2002 01.05 Fuori Orario. Cose (Mai) Viste Rai Tre -\n"
+ . "09 - 02 - 2002 01.15 Rainotte Rai Due -\n"
+ . "09 - 02 - 2002 01.15 Sottovoce Rai Uno -\n"
+ . "09 - 02 - 2002 01.15 Giochi Olimpici Invernali - Cerimonia"
+ . " Di Apertura Rai Tre -\n"
+ . "09 - 02 - 2002 01.17 Italia Interroga Rai Due -\n"}
+ ],
+
+
+ ['inclib',
+ # inspired by an autoconf generated configure script.
+ qw(-e 's;lib;include;'),
+ {IN => " /usr/X11R6/lib\n"
+ . " /usr/X11R5/lib\n"
+ . " /usr/X11R4/lib\n"
+ . "\n"
+ . " /usr/lib/X11R6\n"
+ . " /usr/lib/X11R5\n"
+ . " /usr/lib/X11R4\n"
+ . "\n"
+ . " /usr/local/X11R6/lib\n"
+ . " /usr/local/X11R5/lib\n"
+ . " /usr/local/X11R4/lib\n"
+ . "\n"
+ . " /usr/local/lib/X11R6\n"
+ . " /usr/local/lib/X11R5\n"
+ . " /usr/local/lib/X11R4\n"
+ . "\n"
+ . " /usr/X11/lib\n"
+ . " /usr/lib/X11\n"
+ . " /usr/local/X11/lib\n"
+ . " /usr/local/lib/X11\n"
+ . "\n"
+ . " /usr/X386/lib\n"
+ . " /usr/x386/lib\n"
+ . " /usr/XFree86/lib/X11\n"
+ . "\n"
+ . " /usr/lib\n"
+ . " /usr/local/lib\n"
+ . " /usr/unsupported/lib\n"
+ . " /usr/athena/lib\n"
+ . " /usr/local/x11r5/lib\n"
+ . " /usr/lpp/Xamples/lib\n"
+ . "\n"
+ . " /usr/openwin/lib\n"
+ . " /usr/openwin/share/lib\n"},
+ {OUT => " /usr/X11R6/include\n"
+ . " /usr/X11R5/include\n"
+ . " /usr/X11R4/include\n"
+ . "\n"
+ . " /usr/include/X11R6\n"
+ . " /usr/include/X11R5\n"
+ . " /usr/include/X11R4\n"
+ . "\n"
+ . " /usr/local/X11R6/include\n"
+ . " /usr/local/X11R5/include\n"
+ . " /usr/local/X11R4/include\n"
+ . "\n"
+ . " /usr/local/include/X11R6\n"
+ . " /usr/local/include/X11R5\n"
+ . " /usr/local/include/X11R4\n"
+ . "\n"
+ . " /usr/X11/include\n"
+ . " /usr/include/X11\n"
+ . " /usr/local/X11/include\n"
+ . " /usr/local/include/X11\n"
+ . "\n"
+ . " /usr/X386/include\n"
+ . " /usr/x386/include\n"
+ . " /usr/XFree86/include/X11\n"
+ . "\n"
+ . " /usr/include\n"
+ . " /usr/local/include\n"
+ . " /usr/unsupported/include\n"
+ . " /usr/athena/include\n"
+ . " /usr/local/x11r5/include\n"
+ . " /usr/lpp/Xamples/include\n"
+ . "\n"
+ . " /usr/openwin/include\n"
+ . " /usr/openwin/share/include\n"}
+ ],
+
+ ['khadafy',
+ # The Khadafy test is brought to you by Scott Anderson . . .
+ qw(-f),
+ {IN => '/M[ou]\'\{0,1\}am\{1,2\}[ae]r' .
+ ' .*' .
+ '\([AEae]l[- ]\)\{0,1\}' .
+ '[GKQ]h\{0,1\}[aeu]\{1,\}\([dtz][dhz]\{0,1\}\)\{1,\}af[iy]/!d'},
+ {IN => "1) Muammar Qaddafi\n"
+ . "2) Mo'ammar Gadhafi\n"
+ . "3) Muammar Kaddafi\n"
+ . "4) Muammar Qadhafi\n"
+ . "5) Moammar El Kadhafi\n"
+ . "6) Muammar Gadafi\n"
+ . "7) Mu'ammar al-Qadafi\n"
+ . "8) Moamer El Kazzafi\n"
+ . "9) Moamar al-Gaddafi\n"
+ . "10) Mu'ammar Al Qathafi\n"
+ . "11) Muammar Al Qathafi\n"
+ . "12) Mo'ammar el-Gadhafi\n"
+ . "13) Moamar El Kadhafi\n"
+ . "14) Muammar al-Qadhafi\n"
+ . "15) Mu'ammar al-Qadhdhafi\n"
+ . "16) Mu'ammar Qadafi\n"
+ . "17) Moamar Gaddafi\n"
+ . "18) Mu'ammar Qadhdhafi\n"
+ . "19) Muammar Khaddafi\n"
+ . "20) Muammar al-Khaddafi\n"
+ . "21) Mu'amar al-Kadafi\n"
+ . "22) Muammar Ghaddafy\n"
+ . "23) Muammar Ghadafi\n"
+ . "24) Muammar Ghaddafi\n"
+ . "25) Muamar Kaddafi\n"
+ . "26) Muammar Quathafi\n"
+ . "27) Muammar Gheddafi\n"
+ . "28) Muamar Al-Kaddafi\n"
+ . "29) Moammar Khadafy\n"
+ . "30) Moammar Qudhafi\n"
+ . "31) Mu'ammar al-Qaddafi\n"
+ . "32) Mulazim Awwal Mu'ammar Muhammad Abu Minyar al-Qadhafi\n"},
+ {OUT => "1) Muammar Qaddafi\n"
+ . "2) Mo'ammar Gadhafi\n"
+ . "3) Muammar Kaddafi\n"
+ . "4) Muammar Qadhafi\n"
+ . "5) Moammar El Kadhafi\n"
+ . "6) Muammar Gadafi\n"
+ . "7) Mu'ammar al-Qadafi\n"
+ . "8) Moamer El Kazzafi\n"
+ . "9) Moamar al-Gaddafi\n"
+ . "10) Mu'ammar Al Qathafi\n"
+ . "11) Muammar Al Qathafi\n"
+ . "12) Mo'ammar el-Gadhafi\n"
+ . "13) Moamar El Kadhafi\n"
+ . "14) Muammar al-Qadhafi\n"
+ . "15) Mu'ammar al-Qadhdhafi\n"
+ . "16) Mu'ammar Qadafi\n"
+ . "17) Moamar Gaddafi\n"
+ . "18) Mu'ammar Qadhdhafi\n"
+ . "19) Muammar Khaddafi\n"
+ . "20) Muammar al-Khaddafi\n"
+ . "21) Mu'amar al-Kadafi\n"
+ . "22) Muammar Ghaddafy\n"
+ . "23) Muammar Ghadafi\n"
+ . "24) Muammar Ghaddafi\n"
+ . "25) Muamar Kaddafi\n"
+ . "26) Muammar Quathafi\n"
+ . "27) Muammar Gheddafi\n"
+ . "28) Muamar Al-Kaddafi\n"
+ . "29) Moammar Khadafy\n"
+ . "30) Moammar Qudhafi\n"
+ . "31) Mu'ammar al-Qaddafi\n"
+ . "32) Mulazim Awwal Mu'ammar Muhammad Abu Minyar al-Qadhafi\n"}
+ ],
+
+ ['linecnt',
+ qw(-e '='),
+ {IN => "A dialogue on poverty\n"
+ . "\n"
+ . " On the night when the rain beats,\n"
+ . " Driven by the wind,\n"
+ . " On the night when the snowflakes mingle\n"
+ . " With a sleety rain,\n"
+ . " I feel so helplessly cold.\n"
+ . " I nibble at a lump of salt,\n"
+ . " Sip the hot, oft-diluted dregs of _sake_;\n"
+ . " And coughing, snuffling,\n"
+ . " And stroking my scanty beard,\n"
+ . " I say in my pride,\n"
+ . " \"There's none worthy, save I!\"\n"
+ . " But I shiver still with cold.\n"
+ . " I pull up my hempen bedclothes,\n"
+ . " Wear what few sleeveless clothes I have,\n"
+ . " But cold and bitter is the night!\n"
+ . " As for those poorer than myself,\n"
+ . " Their parents must be cold and hungry,\n"
+ . " Their wives and children beg and cry.\n"
+ . " Then, how do you struggle through life?\n"
+ . "\n"
+ . " Wide as they call the heaven and earth,\n"
+ . " For me they have shrunk quite small;\n"
+ . " Bright though they call the sun and moon,\n"
+ . " They never shine for me.\n"
+ . " Is it the same with all men,\n"
+ . " Or for me alone?\n"
+ . " By rare chance I was born a man\n"
+ . " And no meaner than my fellows,\n"
+ . " But, wearing unwadded sleeveless clothes\n"
+ . " In tatters, like weeds waving in the sea,\n"
+ . " Hanging from my shoulders,\n"
+ . " And under the sunken roof,\n"
+ . " Within the leaning walls,\n"
+ . " Here I lie on straw\n"
+ . " Spread on bare earth,\n"
+ . " With my parents at my pillow,\n"
+ . " And my wife and children at my feet,\n"
+ . " All huddled in grief and tears.\n"
+ . " No fire sends up smoke\n"
+ . " At the cooking-place,\n"
+ . " And in the cauldron\n"
+ . " A spider spins its web.\n"
+ . " With not a grain to cook,\n"
+ . " We moan like the night thrush.\n"
+ . " Then, \"to cut,\" as the saying is,\n"
+ . " \"The ends of what is already too short,\"\n"
+ . " The village headman comes,\n"
+ . " With rod in hand, to our sleeping place,\n"
+ . " Growling for his dues.\n"
+ . " Must it be so hopeless --\n"
+ . " The way of this world?\n"
+ . "\n"
+ . " -- Yamanoue Okura\n"},
+ {OUT => "1\n"
+ . "A dialogue on poverty\n"
+ . "2\n"
+ . "\n"
+ . "3\n"
+ . " On the night when the rain beats,\n"
+ . "4\n"
+ . " Driven by the wind,\n"
+ . "5\n"
+ . " On the night when the snowflakes mingle\n"
+ . "6\n"
+ . " With a sleety rain,\n"
+ . "7\n"
+ . " I feel so helplessly cold.\n"
+ . "8\n"
+ . " I nibble at a lump of salt,\n"
+ . "9\n"
+ . " Sip the hot, oft-diluted dregs of _sake_;\n"
+ . "10\n"
+ . " And coughing, snuffling,\n"
+ . "11\n"
+ . " And stroking my scanty beard,\n"
+ . "12\n"
+ . " I say in my pride,\n"
+ . "13\n"
+ . " \"There's none worthy, save I!\"\n"
+ . "14\n"
+ . " But I shiver still with cold.\n"
+ . "15\n"
+ . " I pull up my hempen bedclothes,\n"
+ . "16\n"
+ . " Wear what few sleeveless clothes I have,\n"
+ . "17\n"
+ . " But cold and bitter is the night!\n"
+ . "18\n"
+ . " As for those poorer than myself,\n"
+ . "19\n"
+ . " Their parents must be cold and hungry,\n"
+ . "20\n"
+ . " Their wives and children beg and cry.\n"
+ . "21\n"
+ . " Then, how do you struggle through life?\n"
+ . "22\n"
+ . "\n"
+ . "23\n"
+ . " Wide as they call the heaven and earth,\n"
+ . "24\n"
+ . " For me they have shrunk quite small;\n"
+ . "25\n"
+ . " Bright though they call the sun and moon,\n"
+ . "26\n"
+ . " They never shine for me.\n"
+ . "27\n"
+ . " Is it the same with all men,\n"
+ . "28\n"
+ . " Or for me alone?\n"
+ . "29\n"
+ . " By rare chance I was born a man\n"
+ . "30\n"
+ . " And no meaner than my fellows,\n"
+ . "31\n"
+ . " But, wearing unwadded sleeveless clothes\n"
+ . "32\n"
+ . " In tatters, like weeds waving in the sea,\n"
+ . "33\n"
+ . " Hanging from my shoulders,\n"
+ . "34\n"
+ . " And under the sunken roof,\n"
+ . "35\n"
+ . " Within the leaning walls,\n"
+ . "36\n"
+ . " Here I lie on straw\n"
+ . "37\n"
+ . " Spread on bare earth,\n"
+ . "38\n"
+ . " With my parents at my pillow,\n"
+ . "39\n"
+ . " And my wife and children at my feet,\n"
+ . "40\n"
+ . " All huddled in grief and tears.\n"
+ . "41\n"
+ . " No fire sends up smoke\n"
+ . "42\n"
+ . " At the cooking-place,\n"
+ . "43\n"
+ . " And in the cauldron\n"
+ . "44\n"
+ . " A spider spins its web.\n"
+ . "45\n"
+ . " With not a grain to cook,\n"
+ . "46\n"
+ . " We moan like the night thrush.\n"
+ . "47\n"
+ . " Then, \"to cut,\" as the saying is,\n"
+ . "48\n"
+ . " \"The ends of what is already too short,\"\n"
+ . "49\n"
+ . " The village headman comes,\n"
+ . "50\n"
+ . " With rod in hand, to our sleeping place,\n"
+ . "51\n"
+ . " Growling for his dues.\n"
+ . "52\n"
+ . " Must it be so hopeless --\n"
+ . "53\n"
+ . " The way of this world?\n"
+ . "54\n"
+ . "\n"
+ . "55\n"
+ . " -- Yamanoue Okura\n"}
+ ],
+
+ ['manis',
+ # straight out of an autoconf-generated configure.
+ # The input should look just like the input after this is run.
+ #
+ # Protect against being on the right side of a sed subst in config.status.
+ qw(-f),
+ {IN => q(s/%@/@@/; s/@%/@@/; s/%g$/@g/; /@g$/s/[\\\\&%]/\\\\&/g;
+s/@@/%@/; s/@@/@%/; s/@g$/%g/
+)},
+ {IN => "s\%\@CFLAGS\@\%\%g\n"
+ . "s\%\@CPPFLAGS\@\%-I/\%g\n"
+ . "s\%\@CXXFLAGS\@\%-x c++\%g\n"
+ . "s\%\@DEFS\@\%\$DEFS\%g\n"
+ . "s\%\@LDFLAGS\@\%-L/usr/lib\%g\n"
+ . "s\%\@LIBS\@\%-lgnu -lbfd\%g\n"
+ . "s\%\@exec_prefix\@\%\%g\n"
+ . "s\%\@prefix\@\%\$prefix\%g\n"
+ . "s\%\@RANLIB\@\%\$RANLIB\%g\n"
+ . "s\%\@CC\@\%/usr/local/bin/gcc\%g\n"
+ . "s\%\@CPP\@\%\$CPP\%g\n"
+ . "s\%\@XCFLAGS\@\%\$XCFLAGS\%g\n"
+ . "s\%\@XINCLUDES\@\%\$XINCLUDES\%g\n"
+ . "s\%\@XLIBS\@\%\$XLIBS\%g\n"
+ . "s\%\@XPROGS\@\%\$XPROGS\%g\n"
+ . "s\%\@TCLHDIR\@\%\$TCLHDIR\%g\n"
+ . "s\%\@TCLLIB\@\%\$TCLLIB\%g\n"
+ . "s\%\@TKHDIR\@\%\$TKHDIR\%g\n"
+ . "s\%\@TKLIB\@\%\$TKLIB\%g\n"
+ . "s\%\@PTY_TYPE\@\%\$PTY_TYPE\%g\n"
+ . "s\%\@EVENT_TYPE\@\%\$EVENT_TYPE\%g\n"
+ . "s\%\@SETUID\@\%\$SETUID\%g\n"},
+ {OUT => "s\%\@CFLAGS\@\%\%g\n"
+ . "s\%\@CPPFLAGS\@\%-I/\%g\n"
+ . "s\%\@CXXFLAGS\@\%-x c++\%g\n"
+ . "s\%\@DEFS\@\%\$DEFS\%g\n"
+ . "s\%\@LDFLAGS\@\%-L/usr/lib\%g\n"
+ . "s\%\@LIBS\@\%-lgnu -lbfd\%g\n"
+ . "s\%\@exec_prefix\@\%\%g\n"
+ . "s\%\@prefix\@\%\$prefix\%g\n"
+ . "s\%\@RANLIB\@\%\$RANLIB\%g\n"
+ . "s\%\@CC\@\%/usr/local/bin/gcc\%g\n"
+ . "s\%\@CPP\@\%\$CPP\%g\n"
+ . "s\%\@XCFLAGS\@\%\$XCFLAGS\%g\n"
+ . "s\%\@XINCLUDES\@\%\$XINCLUDES\%g\n"
+ . "s\%\@XLIBS\@\%\$XLIBS\%g\n"
+ . "s\%\@XPROGS\@\%\$XPROGS\%g\n"
+ . "s\%\@TCLHDIR\@\%\$TCLHDIR\%g\n"
+ . "s\%\@TCLLIB\@\%\$TCLLIB\%g\n"
+ . "s\%\@TKHDIR\@\%\$TKHDIR\%g\n"
+ . "s\%\@TKLIB\@\%\$TKLIB\%g\n"
+ . "s\%\@PTY_TYPE\@\%\$PTY_TYPE\%g\n"
+ . "s\%\@EVENT_TYPE\@\%\$EVENT_TYPE\%g\n"
+ . "s\%\@SETUID\@\%\$SETUID\%g\n"}
+ ],
+
+ ['modulo',
+ qw(-e '0~2d;='),
+ {IN => "s\%\@CFLAGS\@\%\%g\n"
+ . "s\%\@CPPFLAGS\@\%-I/\%g\n"
+ . "s\%\@CXXFLAGS\@\%-x c++\%g\n"
+ . "s\%\@DEFS\@\%\$DEFS\%g\n"
+ . "s\%\@LDFLAGS\@\%-L/usr/lib\%g\n"
+ . "s\%\@LIBS\@\%-lgnu -lbfd\%g\n"
+ . "s\%\@exec_prefix\@\%\%g\n"
+ . "s\%\@prefix\@\%\$prefix\%g\n"
+ . "s\%\@RANLIB\@\%\$RANLIB\%g\n"
+ . "s\%\@CC\@\%/usr/local/bin/gcc\%g\n"
+ . "s\%\@CPP\@\%\$CPP\%g\n"
+ . "s\%\@XCFLAGS\@\%\$XCFLAGS\%g\n"
+ . "s\%\@XINCLUDES\@\%\$XINCLUDES\%g\n"
+ . "s\%\@XLIBS\@\%\$XLIBS\%g\n"
+ . "s\%\@XPROGS\@\%\$XPROGS\%g\n"
+ . "s\%\@TCLHDIR\@\%\$TCLHDIR\%g\n"
+ . "s\%\@TCLLIB\@\%\$TCLLIB\%g\n"
+ . "s\%\@TKHDIR\@\%\$TKHDIR\%g\n"
+ . "s\%\@TKLIB\@\%\$TKLIB\%g\n"
+ . "s\%\@PTY_TYPE\@\%\$PTY_TYPE\%g\n"
+ . "s\%\@EVENT_TYPE\@\%\$EVENT_TYPE\%g\n"
+ . "s\%\@SETUID\@\%\$SETUID\%g\n"},
+ {OUT => "1\n"
+ . "s\%\@CFLAGS\@\%\%g\n"
+ . "3\n"
+ . "s\%\@CXXFLAGS\@\%-x c++\%g\n"
+ . "5\n"
+ . "s\%\@LDFLAGS\@\%-L/usr/lib\%g\n"
+ . "7\n"
+ . "s\%\@exec_prefix\@\%\%g\n"
+ . "9\n"
+ . "s\%\@RANLIB\@\%\$RANLIB\%g\n"
+ . "11\n"
+ . "s\%\@CPP\@\%\$CPP\%g\n"
+ . "13\n"
+ . "s\%\@XINCLUDES\@\%\$XINCLUDES\%g\n"
+ . "15\n"
+ . "s\%\@XPROGS\@\%\$XPROGS\%g\n"
+ . "17\n"
+ . "s\%\@TCLLIB\@\%\$TCLLIB\%g\n"
+ . "19\n"
+ . "s\%\@TKLIB\@\%\$TKLIB\%g\n"
+ . "21\n"
+ . "s\%\@EVENT_TYPE\@\%\$EVENT_TYPE\%g\n"}
+ ],
+
+ ['middle',
+ qw(-n -e '3,5p'),
+ {IN => q( "...by imposing a tiny bit of order in a communication you are
+ translating, you are carving out a little bit of order in the
+ universe. You will never succeed. Everything will fail and come
+ to an end finally. But you have a chance to carve a little bit
+ of order and maybe even beauty out of the raw materials that
+ surround you everywhere, and I think there is no greater meaning
+ in life."
+
+ Donald L. Philippi, Oct 1930 - Jan 1993
+)},
+ {OUT =>
+q( universe. You will never succeed. Everything will fail and come
+ to an end finally. But you have a chance to carve a little bit
+ of order and maybe even beauty out of the raw materials that
+)}
+ ],
+
+ ['newline-anchor',
+ qw(-f),
+ {IN => q(N
+N
+s/^/X/g
+s/^/X/mg
+s/$/Y/g
+s/$/Y/mg
+)},
+ {IN => "a\n"
+ . "b\n"
+ . "c\n"},
+ {OUT => "XXaY\n"
+ . "XbY\n"
+ . "XcYY\n"}
+ ],
+
+ ['noeolw',
+ qw(-n -f),
+ # The sed program:
+ # generates two output files (in addition to STDOUT)
+ {IN => q(w noeolw.1out
+$ {
+ x
+ w noeolw.1out
+ x
+}
+h
+1,3w noeolw.2out
+p
+p
+)},
+ # The input file (was: noeolw.inp).
+ # NOTE: in the old test, the input file was given twice.
+ # here we specify two (identical) input files.
+ {IN => "This file is unique\n" .
+ "in that it does\n" .
+ "end in a newline."},
+ {IN => "This file is unique\n" .
+ "in that it does\n" .
+ "end in a newline."},
+
+ # The expected STDOUT (was: noeolw.good)
+ {OUT => "This file is unique\n" .
+ "This file is unique\n" .
+ "in that it does\n" .
+ "in that it does\n" .
+ "end in a newline.\n" .
+ "end in a newline.\n" .
+ "This file is unique\n" .
+ "This file is unique\n" .
+ "in that it does\n" .
+ "in that it does\n" .
+ "end in a newline.\n" .
+ "end in a newline."},
+
+ # The expected content of 'noeolw.1out' (was: noeolw.1good)
+ {CMP => [ "This file is unique\n" .
+ "in that it does\n" .
+ "end in a newline.\n" .
+ "This file is unique\n" .
+ "in that it does\n" .
+ "end in a newline.\n" .
+ "in that it does\n",
+ { 'noeolw.1out' => undef }]},
+
+ # The expected content of 'noeolw.2out' (was: noeolw.2good)
+ {CMP => [ "This file is unique\n" .
+ "in that it does\n" .
+ "end in a newline.",
+ { 'noeolw.2out' => undef }]},
+ ],
+
+ ['numsub',
+ qw(-f),
+ {IN => q(
+# the first one matches, the second doesn't
+1s/foo/bar/10
+2s/foo/bar/20
+
+# The second line should be deleted. ssed 3.55-3.58 do not.
+t
+d
+)},
+ {IN =>
+q(foo foo fo oo f oo foo foo foo foo foo foo foo foo foo foo foo foo foo
+foo foo fo oo f oo foo foo foo foo foo foo foo foo foo foo foo foo foo
+)},
+ {OUT => "foo foo fo oo f oo foo foo foo foo "
+ . "foo foo foo bar foo foo foo foo foo\n"}
+ ],
+
+
+ ['numsub2',
+ qw(-n -e 's/a*/b/2'),
+ {IN => "\n"},
+ {OUT => ""}
+ ],
+
+
+ ['numsub3',
+ qw(-n -e 's/^a*/b/2'),
+ {IN => "\n"},
+ {OUT => ""}
+ ],
+
+
+ ['numsub4',
+ qw(-n -e 's/^a*/b/2p'),
+ {IN => "z\n"},
+ {OUT => ""}
+ ],
+
+
+ ['numsub5',
+ qw(-n -e 's/a*/b/3p'),
+ {IN => "z\n"},
+ {OUT => ""}
+ ],
+
+ ['readin',
+ qw(-f),
+ {IN => q(/\.$/r readin.in2
+/too\.$/q
+)},
+ {AUX => { 'readin.in2' => "MOO\n" }},
+ {IN => "``Democracy will not come today, this year,\n"
+ . " nor ever through compromise and fear.\n"
+ . " I have as much right as the other fellow has\n"
+ . " to stand on my two feet and own the land.\n"
+ . " I tire so of hearing people say\n"
+ . " let things take their course,\n"
+ . " tomorrow is another day.\n"
+ . " I do not need my freedom when I'm dead.\n"
+ . " I cannot live on tomorrow's bread.\n"
+ . " Freedom is a strong seed\n"
+ . " planted in a great need.\n"
+ . " I live here, too.\n"
+ . " I want freedom just as you.''\n"
+ . " ``The Weary Blues'', Langston Hughes\n"},
+ {OUT => "``Democracy will not come today, this year,\n"
+ . " nor ever through compromise and fear.\n"
+ . "MOO\n"
+ . " I have as much right as the other fellow has\n"
+ . " to stand on my two feet and own the land.\n"
+ . "MOO\n"
+ . " I tire so of hearing people say\n"
+ . " let things take their course,\n"
+ . " tomorrow is another day.\n"
+ . "MOO\n"
+ . " I do not need my freedom when I'm dead.\n"
+ . "MOO\n"
+ . " I cannot live on tomorrow's bread.\n"
+ . "MOO\n"
+ . " Freedom is a strong seed\n"
+ . " planted in a great need.\n"
+ . "MOO\n"
+ . " I live here, too.\n"
+ . "MOO\n"}
+ ],
+
+
+ ['sep',
+ # inspired by an autoconf generated configure script.
+ qw(-f),
+ {IN => q(s%/[^/][^/]*$%%
+s%[\/][^\/][^\/]*$%%
+s,.*[^\/],,
+)},
+ {IN => "miss mary mack mack//mack/ran down/the track track track\n"
+ . "slashes\aren't%used enough/in/casual-conversation///\n"
+ . "possibly sentences would be more attractive if they ended"
+ . "in two slashes//\n"},
+ {OUT => "\n"
+ . "///\n"
+ . "//\n"}
+ ],
+
+ ['subwrite',
+ # test s///w option
+ qw(-e 's/you/YoU/w subwrite.wout'),
+ {IN => "Not some church, and not the state,\n"
+ . "Not some dark capricious fate.\n"
+ . "Who you are, and when you lose,\n"
+ . "Comes only from the things you choose.\n"},
+ # The expected STDOUT
+ {OUT => "Not some church, and not the state,\n"
+ . "Not some dark capricious fate.\n"
+ . "Who YoU are, and when you lose,\n"
+ . "Comes only from the things YoU choose.\n"},
+ # The expected content of 'writeout.wout'
+ {CMP => [ "Who YoU are, and when you lose,\n"
+ . "Comes only from the things YoU choose.\n",
+ { 'subwrite.wout' => undef }]}
+ ],
+
+ ['writeout',
+ # Test 'w' command
+ qw(-e '/^Facts ar/w writeout.wout'),
+ {IN => "Facts are simple and facts are straight\n"
+ . "Facts are lazy and facts are late\n"
+ . "Facts all come with points of view\n"
+ . "Facts don't do what I want them to\n"},
+ # The expected STDOUT
+ {OUT => "Facts are simple and facts are straight\n"
+ . "Facts are lazy and facts are late\n"
+ . "Facts all come with points of view\n"
+ . "Facts don't do what I want them to\n"},
+ # The expected content of 'writeout.wout'
+ {CMP => [ "Facts are simple and facts are straight\n"
+ . "Facts are lazy and facts are late\n",
+ { 'writeout.wout' => undef }]}
+ ],
+
+ ['xabcx',
+ # from the ChangeLog (Fri May 21 1993)
+ # Regex address with custom character (\xREGEXx)
+ qw(-e '\xfeetxs/blue/too/'),
+ {IN => "roses are red\n"
+ . "violets are blue\n"
+ . "my feet are cold\n"
+ . "your feet are blue\n"},
+ {OUT => "roses are red\n"
+ . "violets are blue\n"
+ . "my feet are cold\n"
+ . "your feet are too\n"}
+ ],
+
+
+ ['xbxcx',
+ # from the ChangeLog (Wed Sep 5 2001)
+ qw(-e 's/a*/x/g'),
+ {IN => "\n"
+ . "b\n"
+ . "bc\n"
+ . "bac\n"
+ . "baac\n"
+ . "baaac\n"
+ . "baaaac\n"},
+ {OUT => "x\n"
+ . "xbx\n"
+ . "xbxcx\n"
+ . "xbxcx\n"
+ . "xbxcx\n"
+ . "xbxcx\n"
+ . "xbxcx\n"}
+ ],
+
+ ['xbxcx3',
+ # Test s///N replacements (GNU extension)
+ qw(-e 's/a*/x/3'),
+ {IN => "\n"
+ . "b\n"
+ . "bc\n"
+ . "bac\n"
+ . "baac\n"
+ . "baaac\n"
+ . "baaaac\n"},
+ {OUT => "\n"
+ . "b\n"
+ . "bcx\n"
+ . "bacx\n"
+ . "baacx\n"
+ . "baaacx\n"
+ . "baaaacx\n"}
+ ],
+
+
+ # Four backslashes (2 pairs of "\\") to pass through two interpolations:
+ # once in Perl, then the shell command line argument.
+ # sed will see one backslash character in the s/// command.
+ ['bug30794_1', "s/z/\\\\x5cA/", {IN=>'z'}, {OUT => "\\A"}],
+ ['bug30794_2', "s/z/\\\\x5c/", {IN=>'z'}, {OUT => "\\"}],
+ ['bug30794_3', "s/z/\\\\x5c1/", {IN=>'z'}, {OUT => "\\1"}],
+ );
+
+my $save_temps = $ENV{SAVE_TEMPS};
+my $verbose = $ENV{VERBOSE};
+
+my $fail = run_tests ($program_name, $prog, \@Tests, $save_temps, $verbose);
+exit $fail;
diff --git a/testsuite/missing-filename.sh b/testsuite/missing-filename.sh
new file mode 100755
index 0000000..704bc67
--- /dev/null
+++ b/testsuite/missing-filename.sh
@@ -0,0 +1,36 @@
+#!/bin/sh
+# Test r/R/w/W commands without a file name.
+
+# Copyright (C) 2018 Free Software Foundation, Inc.
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+. "${srcdir=.}/testsuite/init.sh"; path_prepend_ ./sed
+print_ver_ sed
+
+# Same error message, different character position in the sed program.
+for i in 1 7 ; do
+ err="sed: -e expression #1, char $i: missing filename in r/R/w/W commands"
+ echo "$err" > exp-err$i || framework_failure_
+done
+
+# r/R/w/W commands
+for cmd in r R w W ; do
+ returns_ 1 sed $cmd </dev/null >/dev/null 2>err1 || fail=1
+ compare exp-err1 err1 || fail=1
+done
+
+returns_ 1 sed 's/1/2/w' </dev/null >/dev/null 2>err7 || fail=1
+compare exp-err7 err7 || fail=1
+
+Exit $fail
diff --git a/testsuite/newjis.sh b/testsuite/newjis.sh
new file mode 100755
index 0000000..51ed78e
--- /dev/null
+++ b/testsuite/newjis.sh
@@ -0,0 +1,48 @@
+#!/bin/sh
+
+# Test runner for newjis
+
+# Copyright (C) 2017-2018 Free Software Foundation, Inc.
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+. "${srcdir=.}/testsuite/init.sh"; path_prepend_ ./sed
+print_ver_ sed
+
+# Input file. \033 is ASCII escape (0x1B).
+{
+ printf '\033$B$H$J$j$N$?$1$,$-$K\033(B\n' ;
+ printf '\033$B$?$F$+$1$?$N$O\033(B\n' ;
+ printf '\033$B$?$F$+$1$?$+$C$?$+$i\033(B\n' ;
+ printf '\033$B$?$F$+$1$?!#\033(B\n' ;
+} > newjis-inp || framework_failure_
+
+# The expected output.
+{
+ printf '\033$B$H$J$j$NM9JX6I$K\033(B\n';
+ printf '\033$B$?$F$+$1$?$N$O\033(B\n' ;
+ printf '\033$B$?$F$+$1$?$+$C$?$+$i\033(B\n' ;
+ printf '\033$B$?$F$+$1$?!#\033(B\n' ;
+} > newjis-exp || framework_failure_
+
+# The sed program.
+cat <<\EOF > newjis.sed || framework_failure_
+s/$?$1$,$-/M9JX6I/
+EOF
+
+sed -f newjis.sed < newjis-inp > newjis-out || fail=1
+remove_cr_inplace newjis-out
+compare newjis-exp newjis-out || fail=1
+
+
+Exit $fail
diff --git a/testsuite/newline-dfa-bug.sh b/testsuite/newline-dfa-bug.sh
new file mode 100755
index 0000000..af31ce5
--- /dev/null
+++ b/testsuite/newline-dfa-bug.sh
@@ -0,0 +1,38 @@
+#!/bin/sh
+# sed may access to uninitialized memory if transit to 15th dfa state
+# with newline. This bug affected sed version 4.3.
+
+# Copyright (C) 2017-2018 Free Software Foundation, Inc.
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+. "${srcdir=.}/testsuite/init.sh"; path_prepend_ ./sed
+print_ver_ sed
+
+require_valgrind_
+
+printf 'abcdefg abcdefg\nB\n' > in || framework_failure_
+printf 'B\n' > exp || framework_failure_
+
+valgrind --quiet --error-exitcode=1 \
+ sed 'N;s/abcdefg.*\n//' in > out 2> err || fail=1
+
+# Work around a bug in CentOS 5.10's valgrind
+# FIXME: remove in 2018 or when CentOS 5 is no longer officially supported
+grep 'valgrind: .*Assertion.*failed' err > /dev/null \
+ && skip_ 'you seem to have a buggy version of valgrind'
+
+compare exp out || fail=1
+compare /dev/null err || fail=1
+
+Exit $fail
diff --git a/testsuite/normalize-text.sh b/testsuite/normalize-text.sh
new file mode 100644
index 0000000..71b03bf
--- /dev/null
+++ b/testsuite/normalize-text.sh
@@ -0,0 +1,81 @@
+#!/bin/sh
+# Text text escaping (compile.c:normalize_text()).
+# NOTE:
+# \dNNN \xNN \oNNN - tested in 'convert-number.sh'
+# character-classes in POSIX mode - tested in 'posix-char-class.sh'
+
+# Copyright (C) 2016-2018 Free Software Foundation, Inc.
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+. "${srcdir=.}/testsuite/init.sh"; path_prepend_ ./sed
+print_ver_ sed
+
+#
+# Common backslash combinations
+#
+printf "%s\n" a a a a a a >in1 || framework_failure_
+cat <<\EOF >prog1 || framework_failure_
+1y/a/\a/
+2y/a/\f/
+3y/a/\n/
+4y/a/\r/
+5y/a/\t/
+6y/a/\v/
+EOF
+printf "\a\n\f\n\n\n\r\n\t\n\v\n" > exp1 || framework_failure_
+
+sed -f prog1 in1 > out1 || fail=1
+compare_ exp1 out1 || fail=1
+
+#
+# test '\\\n' (backslash followed by ASCII 0x0A)
+# normalized to a simple '\n' .
+#
+echo a > in2 || framework_failure_
+printf "y/a/\\\n/" > prog2 || framework_failure_
+printf "\n\n" > exp2 || framework_failure_
+sed -f prog2 in2 > out2 || fail=1
+compare_ exp2 out2 || fail=1
+
+#
+# \cX combination
+#
+printf "%s\n" a a a a a a a a a a > in3 || framework_failure_
+cat <<\EOF >prog3 || framework_failure_
+1y/a/\cA/
+2y/a/\ca/
+3y/a/\cZ/
+4y/a/\cz/
+5y/a/\c{/
+6y/a/\c;/
+7y/a/\c#/
+8y/a/\c[/
+9y/a/\c\\/
+10y/a/\c]/
+EOF
+
+printf "\1\n\1\n\32\n\32\n;\n{\nc\n\33\n\34\n\35\n" > exp3 || framework_failure_
+sed -f prog3 in3 > out3 || fail=1
+compare_ exp3 out3 || fail=1
+
+# \c at end of (valid) text - normalize_text() stops, returns control to caller.
+# TODO: is this a bug?
+# compare with 'y/a/\d/' and 'y/a/\x/'
+cat <<\EOF >exp-err-c || framework_failure_
+sed: -e expression #1, char 7: strings for `y' command are different lengths
+EOF
+returns_ 1 sed 'y/a/\c/' </dev/null 2>err-c || fail=1
+compare_ exp-err-c err-c || fail=1
+
+Exit $fail
diff --git a/testsuite/nulldata.sh b/testsuite/nulldata.sh
new file mode 100755
index 0000000..a8a0114
--- /dev/null
+++ b/testsuite/nulldata.sh
@@ -0,0 +1,87 @@
+#!/bin/sh
+# Test -z/--null-data option
+
+# Copyright (C) 2016-2018 Free Software Foundation, Inc.
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+. "${srcdir=.}/testsuite/init.sh"; path_prepend_ ./sed
+print_ver_ sed
+
+# Two lines, differ based on the EOL character.
+printf "AB\000CD\nEF\n\000" > in1 || framework_failure_
+
+# 's/^./x/' cmd processed with EOF=\n
+printf "xB\000CD\nxF\nx" > exp-s-nl || framework_failure_
+# 's/^./x/' cmd processed with EOF=\0
+printf "xB\000xD\nEF\n\000" > exp-s-z || framework_failure_
+
+# '=' cmd processed with EOF=\n
+printf "1\nAB\000CD\n2\nEF\n3\n\000" > exp-=-nl || framework_failure_
+
+# '=' cmd processed with EOF=\0
+printf "1\000AB\0002\000CD\nEF\n\000" > exp-=-z || framework_failure_
+
+
+# 'l' cmd processed with EOF=\n
+cat <<\EOF >exp-l-nl || framework_failure_
+AB\000CD$
+EF$
+\000$
+EOF
+
+# 'l' cmd processed with EOF=\0
+printf 'AB$\000CD\\nEF\\n$\000' >exp-l-z || framework_failure_
+
+# 'F' cmd with EOL=\n
+printf "in1\n" > exp-F-nl || framework_failure_
+
+# 'F' cmd with EOL=\0
+printf "in1\000" > exp-F-z || framework_failure_
+
+
+# Test substitution
+sed 's/^./x/' in1 > out-s-nl || fail=1
+compare_ exp-s-nl out-s-nl || fail=1
+
+sed -z 's/^./x/' in1 > out-s-z || fail=1
+compare_ exp-s-z out-s-z || fail=1
+
+
+
+# Test '=' command
+sed = in1 > out-=-nl || fail=1
+compare_ exp-=-nl out-=-nl || fail=1
+
+sed -z = in1 > out-=-z || fail=1
+compare_ exp-=-z out-=-z || fail=1
+
+
+
+# Test 'l' command
+sed -n l in1 > out-l-nl || fail=1
+compare_ exp-l-nl out-l-nl || fail=1
+
+sed -zn l in1 > out-l-z || fail=1
+compare_ exp-l-z out-l-z || fail=1
+
+
+# Test 'F' command
+sed -n 1F in1 > out-F-nl || fail=1
+compare_ exp-F-nl out-F-nl || fail=1
+
+sed -zn 1F in1 > out-F-z || fail=1
+compare_ exp-F-z out-F-z || fail=1
+
+
+Exit $fail
diff --git a/testsuite/obinary.sh b/testsuite/obinary.sh
new file mode 100755
index 0000000..78bffb5
--- /dev/null
+++ b/testsuite/obinary.sh
@@ -0,0 +1,111 @@
+#!/bin/sh
+# Test CR/LF behaviour on platforms which support O_BINARY file mode
+# (i.e. differentiates between text and binary files).
+
+# Copyright (C) 2018 Free Software Foundation, Inc.
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+. "${srcdir=.}/testsuite/init.sh"; path_prepend_ ./sed
+print_ver_ sed
+
+# Test if O_TEXT is enabled by default (i.e. lines terminated with "\r\n").
+# If not, skip the test.
+printf a | sed cb > out1 \
+ || framework_failure_ "failed to run sed 'cb'"
+size=$(LC_ALL=C wc -c < out1 | tr -d '[:space:]') \
+ || framework_failure_ "failed to check size of 'out1'"
+case $size in
+ 2) skip_ "platform does not enable O_TEXT by default" ;;
+ 3) ;;
+ *) framework_failure_ "unexpected size '$size'" ;;
+esac
+
+
+# files with "\r\n" and with just "\n"
+printf 'a\015\12' > inT || framework_failure_
+printf 'a\12' > inB || framework_failure_
+cp inT inplaceT1 || framework_failure_
+cp inT inplaceT2 || framework_failure_
+cp inT inplaceT3 || framework_failure_
+cp inB inplaceB1 || framework_failure_
+cp inB inplaceB2 || framework_failure_
+
+printf 'z\015\12' > expT || framework_failure_
+printf 'z\12' > expB || framework_failure_
+
+
+# First round of tests. These all seem equivalent,
+# but older seds had sublte implementation differences
+# between STDIN and explicit input files (bug#25459).
+# Similarly, also test --inplace type output.
+sed 's/a/z/' inT > out1 || fail=1
+sed 's/a/z/' < inT > out2 || fail=1
+cat inT | sed 's/a/z/' > out3 || fail=1
+sed -i 's/a/z/' inplaceT1 || fail=1
+
+compare_ expT out1 || fail=1
+compare_ expT out2 || fail=1
+compare_ expT out3 || fail=1
+compare_ expT inplaceT1 || fail=1
+
+# Input file with only "\n". Output should contain "\r\n".
+sed 's/a/z/' inB > out4 || fail=1
+sed 's/a/z/' < inB > out5 || fail=1
+cat inB | sed 's/a/z/' > out6 || fail=1
+sed -i 's/a/z/' inplaceB1 || fail=1
+
+compare_ expT out4 || fail=1
+compare_ expT out5 || fail=1
+compare_ expT out6 || fail=1
+compare_ expT inplaceB1 || fail=1
+
+# Input file with only "\n", with "sed -b" should output only "\n".
+sed -b 's/a/z/' inB > out7 || fail=1
+sed -b 's/a/z/' < inB > out8 || fail=1
+cat inB | sed -b 's/a/z/' > out9 || fail=1
+sed -b -i 's/a/z/' inplaceB2 || fail=1
+
+compare_ expB out7 || fail=1
+compare_ expB out8 || fail=1
+compare_ expB out9 || fail=1
+compare_ expB inplaceB2 || fail=1
+
+# End-of-line tests on input file with "\r\n".
+# In TEXT mode, "\r\n" is end-of-line, the "y" character will be added prior to
+# it. In BINARY mode, "\r" is just another character - the "y" character will
+# be added after the "\r".
+printf 'ay\015\012' > expTeol || framework_failure_
+printf 'a\015y\012' > expBeol || framework_failure_
+
+sed 's/$/y/' inT > out10 || fail=1
+sed 's/$/y/' < inT > out11 || fail=1
+cat inT | sed 's/$/y/' > out12 || fail=1
+sed -i 's/$/y/' inplaceT2 || fail=1
+
+sed -b 's/$/y/' inT > out13 || fail=1
+sed -b 's/$/y/' < inT > out14 || fail=1
+cat inT | sed -b 's/$/y/' > out15 || fail=1
+sed -i -b 's/$/y/' inplaceT3 || fail=1
+
+compare_ expTeol out10 || fail=1
+compare_ expTeol out11 || fail=1
+compare_ expTeol out12 || fail=1
+compare_ expTeol inplaceT2 || fail=1
+
+compare_ expBeol out13 || fail=1
+compare_ expBeol out14 || fail=1
+compare_ expBeol out15 || fail=1
+compare_ expBeol inplaceT3 || fail=1
+
+Exit $fail
diff --git a/testsuite/panic-tests.sh b/testsuite/panic-tests.sh
new file mode 100644
index 0000000..76146f9
--- /dev/null
+++ b/testsuite/panic-tests.sh
@@ -0,0 +1,101 @@
+#!/bin/sh
+# Exercise some panic stops
+
+# Copyright (C) 2016-2018 Free Software Foundation, Inc.
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+. "${srcdir=.}/testsuite/init.sh"; path_prepend_ ./sed
+print_ver_ sed
+
+#
+# failure to create temp file
+#
+
+# inplace with an unwritable directory
+mkdir a || framework_failure_
+touch a/a || framework_failure_
+chmod a-w a || framework_failure_
+
+# Expected error message, with actual filename/errno trimmed
+cat <<\EOF >exp-err-temp || framework_failure_
+sed: couldn't open temporary file
+EOF
+
+# TODO: why exit-code 4 (currently hard-coded)
+returns_ 4 sed -i = a/a 2>err-temp || fail=1
+
+# trim the filename/errno message (using sed itself...)
+sed -i 's/file.*$/file/' err-temp || framework_failure_
+compare_ exp-err-temp err-temp || fail=1
+
+# restore writability, to ensure it can be deleted
+chmod a+w a || framework_failure_
+
+
+#
+# no input files (with inplace)
+#
+
+# Expected error message
+cat <<\EOF> exp-err-no-files || framework_failure_
+sed: no input files
+EOF
+
+# /dev/null to ensure it doesn't hang if panic is not invoked
+returns_ 4 sed -i = </dev/null 2>err-no-files || fail=1
+compare_ exp-err-no-files err-no-files || fail=1
+
+
+#
+# Not a regular file (with inplace)
+#
+cat <<\EOF >exp-err-not-reg-file || framework_failure_
+sed: couldn't edit f: not a regular file
+EOF
+
+mkfifo f || framework_failure_
+
+# NOTE: the file-mode check is not performed until the first line is read.
+# an empty/blocking fifo will hang forever.
+printf a > f &
+
+# TODO: add a timeout in case of bug leading to a blocking fifo?
+returns_ 4 sed -i = f 2>err-not-reg-file || fail=1
+compare_ exp-err-not-reg-file err-not-reg-file || fail=1
+
+
+#
+# inplace on a terminal device
+# (if available)
+#
+
+#NOTE: device name is replaced later
+cat <<\EOF >exp-err-tty || framework_failure_
+sed: couldn't edit X: is a terminal
+EOF
+
+ttydev=no-such-file
+type tty >/dev/null 2>&1 && ttydev=$(tty 2>/dev/null)
+if test -w "$ttydev" && test -r "$ttydev" ; then
+ returns_ 4 sed -i = "$ttydev" 2>err-tty || fail=1
+
+ # remove the actual terminal device name (using sed itself...)
+ sed -i 's/edit.*:/edit X:/' err-tty || framework_failure_
+
+ compare_ exp-err-tty err-tty || fail=1
+fi
+
+
+
+Exit $fail
diff --git a/testsuite/posix-char-class.sh b/testsuite/posix-char-class.sh
new file mode 100644
index 0000000..ecbda07
--- /dev/null
+++ b/testsuite/posix-char-class.sh
@@ -0,0 +1,65 @@
+#!/bin/sh
+# Test character-class definitions in POSIX mode.
+
+# Copyright (C) 2016-2018 Free Software Foundation, Inc.
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+
+# NOTE:
+# In GNU Extension mode, all text is normalized (e.g. backslash-X combinations).
+# In POSIX mode, normalize_text() ensures content of character
+# classes is not normalized.
+#
+# Compare:
+# $ printf "t\t\n" | sed 's/[\t]/X/' | od -a
+# 0000000 t X nl
+# $ printf "t\t\n" | sed --posix 's/[\t]/X/' | od -a
+# 0000000 X ht nl
+#
+# This test unit validates the special handling of character classes
+# in posix mode (compile.c:normalize_text() implementation).
+
+
+. "${srcdir=.}/testsuite/init.sh"; path_prepend_ ./sed
+print_ver_ sed
+
+echo X > exp || framework_failure_
+
+# Closing bracket without opening bracket, match as-is
+echo ']' | sed --posix 's/]/X/' > out1 || fail=1
+compare_ exp out1 || fail=1
+
+# Two opening brackets (same state when opening the second one)
+echo '[' | sed --posix 's/[[]/X/' > out2 || fail=1
+compare_ exp out2 || fail=1
+
+# Escaping before and after the character class, but not inside it (POSIX MODE)
+printf "\tt\t\n" | sed --posix 's/\t[\t]\t/X/' > out3 || fail=1
+compare_ exp out3 || fail=1
+
+# Escaping before, inside, and after the character class (GNU MODE)
+printf "\t\t\t\n" | sed 's/\t[\t]\t/X/' > out4 || fail=1
+compare_ exp out4 || fail=1
+
+# Special characters, but outside a valid character-class syntax
+printf "=\n" | sed --posix 's/[.=:.]/X/' > out5 || fail=1
+compare_ exp out5 || fail=1
+
+# A valid character class definition
+printf "b\n" | sed --posix 's/[[:alpha:]]/X/' > out6 || fail=1
+compare_ exp out6 || fail=1
+
+
+
+Exit $fail
diff --git a/testsuite/posix-mode-ERE.sh b/testsuite/posix-mode-ERE.sh
new file mode 100644
index 0000000..223593c
--- /dev/null
+++ b/testsuite/posix-mode-ERE.sh
@@ -0,0 +1,52 @@
+#!/bin/sh
+# Ensure extended regular expressions work in posix mode
+
+# Copyright (C) 2017-2018 Free Software Foundation, Inc.
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+. "${srcdir=.}/testsuite/init.sh"; path_prepend_ ./sed
+print_ver_ sed
+
+printf "hi+\n" > in1 || framework_failure_
+
+printf "{hi+}\n" > exp-special || framework_failure_
+printf "h{i+}\n" > exp-literal || framework_failure_
+
+# '+' is special in ERE
+sed -E 's/(.+)/{\1}/' in1 > out0 || fail=1
+compare_ exp-special out0 || fail=1
+
+# '+' is special in ERE, even if --posix is used.
+# sed-4.4 and earlier did not treat it as special (bug#26409).
+sed --posix -E 's/(.+)/{\1}/' in1 > out1 || fail=1
+compare_ exp-special out1 || fail=1
+
+# Escape the '+' it to remove special meaning in ERE
+sed --posix -E 's/(.\+)/{\1}/' in1 > out2 || fail=1
+compare_ exp-literal out2 || fail=1
+
+# with BRE and --posix, '+' should have no special meaning
+sed --posix 's/\(.+\)/{\1}/' in1 > out3 || fail=1
+compare_ exp-literal out3 || fail=1
+
+# with BRE without --posix, '+' should have no special meaning
+sed 's/\(.+\)/{\1}/' in1 > out4 || fail=1
+compare_ exp-literal out4 || fail=1
+
+# with BRE without --posix, '\+' is special (GNU extension)
+sed 's/\(.\+\)/{\1}/' in1 > out5 || fail=1
+compare_ exp-special out5 || fail=1
+
+
+Exit $fail
diff --git a/testsuite/posix-mode-N.sh b/testsuite/posix-mode-N.sh
new file mode 100755
index 0000000..344ec19
--- /dev/null
+++ b/testsuite/posix-mode-N.sh
@@ -0,0 +1,56 @@
+#!/bin/sh
+# Test 'N' command with/without posix conformity
+
+# Copyright (C) 2016-2018 Free Software Foundation, Inc.
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+. "${srcdir=.}/testsuite/init.sh"; path_prepend_ ./sed
+print_ver_ sed
+
+# in/exp as identical, but using 'exp' for both input and output
+# will cause unneeded confusion when looking at the logs.
+printf "A\nB\n" > in1 || framework_failure_
+cp in1 exp1 || framework_failure_
+printf "A\n" > in2 || framework_failure_
+cp in2 exp2 || framework_failure_
+
+# If there is a 'next' line, N behaves the same regardless of posixicity
+sed N in1 > out1 || fail=1
+compare exp1 out1 || fail=1
+
+sed --posix N in1 > out2 || fail=1
+compare exp1 out2 || fail=1
+
+POSIXLY_CORRECT=y sed N in1 > out3 || fail=1
+compare exp1 out3 || fail=1
+
+
+# If there is no 'next' line,
+# gnu-N quits with printing
+# posix-N quits without printing.
+sed N in2 > out4 || fail=1
+compare exp2 out4 || fail=1
+
+sed --posix N in2 > out5 || fail=1
+compare /dev/null out5 || fail=1
+
+POSIXLY_CORRECT=y sed N in2 > out6 || fail=1
+compare /dev/null out6 || fail=1
+
+# exception: gnu-mode N but no default output, should not print anything.
+sed -n N in2 > out7 || fail=1
+compare /dev/null out7 || fail=1
+
+
+Exit $fail
diff --git a/testsuite/posix-mode-addr.sh b/testsuite/posix-mode-addr.sh
new file mode 100644
index 0000000..1ba5fc7
--- /dev/null
+++ b/testsuite/posix-mode-addr.sh
@@ -0,0 +1,89 @@
+#!/bin/sh
+# Ensure GNU address extensions are rejected in posix mode
+
+# Copyright (C) 2016-2018 Free Software Foundation, Inc.
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+. "${srcdir=.}/testsuite/init.sh"; path_prepend_ ./sed
+print_ver_ sed
+
+cat <<\EOF> exp-err-addr0 || framework_failure_
+sed: -e expression #1, char 6: invalid usage of line address 0
+EOF
+
+cat <<\EOF >exp-err-bad-addr || framework_failure_
+sed: -e expression #1, char 3: unexpected `,'
+EOF
+
+printf "%s\n" A B A C D E F G H I J >in1 || framework_failure_
+
+# The expected output with zero-line address '0,/A/'
+# the regex will match the first line
+printf "A\n" >exp-l0 || framework_failure_
+
+# The expected output with one-line address '1,/A/'
+# the regex will not be checked against the first line,
+# will match the third line
+printf "%s\n" A B A >exp-l1 || framework_failure_
+
+# The expected output with address '2,+1'
+# (from line 2, count 1 addition line = line 3)
+printf "%s\n" B A >exp-plus || framework_failure_
+
+# The expected output with address '5,~4'
+# (from line 5 till a multiple of 4 = line 8)
+printf "%s\n" D E F G >exp-mult || framework_failure_
+
+
+#
+# Addressing extension: 0,/regexp/
+#
+
+# sanity check: address line=1 is valid for both posix and gnu
+sed -n '1,/A/p' in1 > out-l1 || fail=1
+compare_ exp-l1 out-l1 || fail=1
+
+# address line=0 is a gnu extension
+sed -n '0,/A/p' in1 > out-gnu-l0 || fail=1
+compare_ exp-l0 out-gnu-l0 || fail=1
+# rejected in posix mode
+returns_ 1 sed --posix -n '0,/A/p' in1 2>err-posix-l0 || fail=1
+compare_ exp-err-addr0 err-posix-l0 || fail=1
+
+
+
+#
+# Addressing extension: addr,+N
+#
+sed -n '2,+1p' in1 > out-plus || fail=1
+compare_ exp-plus out-plus || fail=1
+
+returns_ 1 sed --posix -n '2,+1p' in1 2> err-plus || fail=1
+compare_ exp-err-bad-addr err-plus || fail=1
+
+
+
+#
+# Addressing extension: addr,~N
+#
+
+sed -n '5,~4p' in1 > out-mult || fail=1
+compare_ exp-mult out-mult || fail=1
+
+returns_ 1 sed --posix -n '5,~4p' in1 2> err-mult || fail=1
+compare_ exp-err-bad-addr err-mult || fail=1
+
+
+
+Exit $fail
diff --git a/testsuite/posix-mode-bad-ref.sh b/testsuite/posix-mode-bad-ref.sh
new file mode 100755
index 0000000..f933f79
--- /dev/null
+++ b/testsuite/posix-mode-bad-ref.sh
@@ -0,0 +1,32 @@
+#!/bin/sh
+# Test non-posix-conforming gnu extensions when using --posix.
+
+# Copyright (C) 2016-2018 Free Software Foundation, Inc.
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+. "${srcdir=.}/testsuite/init.sh"; path_prepend_ ./sed
+print_ver_ sed
+
+cat <<\EOF >exp-err || framework_failure_
+sed: -e expression #1, char 10: invalid reference \1 on `s' command's RHS
+EOF
+
+# Invalid references are errors in non-posix mode
+returns_ 1 sed 's/abc/\1/g' 2>err < /dev/null || fail=1
+compare_ exp-err err || fail=1
+
+# Invalid references are silently ignored in posix mode
+sed --posix 's/abc/\1/g' < /dev/null || fail=1
+
+Exit $fail
diff --git a/testsuite/posix-mode-s.sh b/testsuite/posix-mode-s.sh
new file mode 100644
index 0000000..33ad92b
--- /dev/null
+++ b/testsuite/posix-mode-s.sh
@@ -0,0 +1,71 @@
+#!/bin/sh
+# Ensure GNU extensions are rejected in posix mode
+
+# Copyright (C) 2016-2018 Free Software Foundation, Inc.
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+. "${srcdir=.}/testsuite/init.sh"; path_prepend_ ./sed
+print_ver_ sed
+
+cat <<\EOF >exp-err || framework_failure_
+sed: -e expression #1, char 7: unknown option to `s'
+EOF
+
+# substitution command options (
+# TODO: conditionally test sSxX in perl mode
+for opt in i I m M ;
+do
+ # These options should fail in strict POSIX mode
+ returns_ 1 sed --posix "s/a/b/$opt" </dev/null 2>err || fail=1
+ compare_ exp-err err || fail=1
+
+ # These options are allowed otherwise
+ sed "s/a/b/$opt" </dev/null || fail=1
+
+ # POSIXLY_CORRECT alone does not disable them
+ POSIXLY_CORRECT=y sed "s/a/b/$opt" </dev/null || fail=1
+done
+
+
+# test s//e (execute pattern-space as shell)
+printf "A\n" > in1 || framework_failure_
+
+printf "hello\n" >exp-gnu-e || framework_failure_
+sed 's/./printf hello/e' in1 > out-gnu-e || fail=1
+compare exp-gnu-e out-gnu-e || fail=1
+
+
+# s///e rejected in POSIX mode
+cat <<\EOF >exp-err-psx-e || framework_failure_
+sed: -e expression #1, char 10: unknown option to `s'
+EOF
+returns_ 1 sed --posix 's/./echo/e' in1 2>err-posix-e || fail=1
+compare_ exp-err-psx-e err-posix-e || fail=1
+
+
+# substitution special commands (e.g \l \L \U \u \E).
+# see compile.c:setup_replacement()
+printf "a\n" > exp-gnu || framework_failure_
+printf "lA\n" > exp-posix || framework_failure_
+
+# gnu-extension: turn the next character to lowercase
+sed 's/./\l&/' in1 > out-gnu || fail=1
+compare_ exp-gnu out-gnu || fail=1
+
+# posix: '\l' is just 'l'
+sed --posix 's/./\l&/' in1 > out-posix || fail=1
+compare_ exp-posix out-posix || fail=1
+
+
+Exit $fail
diff --git a/testsuite/ptestcases.h b/testsuite/ptestcases.h
new file mode 100644
index 0000000..506b1cc
--- /dev/null
+++ b/testsuite/ptestcases.h
@@ -0,0 +1,326 @@
+ { 0, 0, "2.8.2 Regular Expression General Requirement", NULL, },
+ { 2, 4, "bb*", "abbbc", },
+ { 2, 2, "bb*", "ababbbc", },
+ { 7, 9, "A#*::", "A:A#:qA::qA#::qA##::q", },
+ { 1, 5, "A#*::", "A##::A#::qA::qA#:q", },
+ { 0, 0, "2.8.3.1.2 BRE Special Characters", NULL, },
+ { 0, 0, "GA108", NULL, },
+ { 2, 2, "\\.", "a.c", },
+ { 2, 2, "\\[", "a[c", },
+ { 2, 2, "\\\\", "a\\c", },
+ { 2, 2, "\\*", "a*c", },
+ { 2, 2, "\\^", "a^c", },
+ { 2, 2, "\\$", "a$c", },
+ { 7, 11, "X\\*Y\\*8", "Y*8X*8X*Y*8", },
+ { 0, 0, "GA109", NULL, },
+ { 2, 2, "[.]", "a.c", },
+ { 2, 2, "[[]", "a[c", },
+ { -1, -1, "[[]", "ac", },
+ { 2, 2, "[\\]", "a\\c", },
+ { 1, 1, "[\\a]", "abc", },
+ { 2, 2, "[\\.]", "a\\.c", },
+ { 2, 2, "[\\.]", "a.\\c", },
+ { 2, 2, "[*]", "a*c", },
+ { 2, 2, "[$]", "a$c", },
+ { 2, 2, "[X*Y8]", "7*8YX", },
+ { 0, 0, "GA110", NULL, },
+ { 2, 2, "*", "a*c", },
+ { 3, 4, "*a", "*b*a*c", },
+ { 1, 5, "**9=", "***9=9", },
+ { 0, 0, "GA111", NULL, },
+ { 1, 1, "^*", "*bc", },
+ { -1, -1, "^*", "a*c", },
+ { -1, -1, "^*", "^*ab", },
+ { 1, 5, "^**9=", "***9=", },
+ { -1, -1, "^*5<*9", "5<9*5<*9", },
+ { 0, 0, "GA112", NULL, },
+ { 2, 3, "\\(*b\\)", "a*b", },
+ { -1, -1, "\\(*b\\)", "ac", },
+ { 1, 6, "A\\(**9\\)=", "A***9=79", },
+ { 0, 0, "GA113(1)", NULL, },
+ { 1, 3, "\\(^*ab\\)", "*ab", },
+ { -1, -1, "\\(^*ab\\)", "^*ab", },
+ { -1, -1, "\\(^*b\\)", "a*b", },
+ { -1, -1, "\\(^*b\\)", "^*b", },
+ { 0, 0, "GA114", NULL, },
+ { 1, 3, "a^b", "a^b", },
+ { 1, 3, "a\\^b", "a^b", },
+ { 1, 1, "^^", "^bc", },
+ { 2, 2, "\\^", "a^c", },
+ { 1, 1, "[c^b]", "^abc", },
+ { 1, 1, "[\\^ab]", "^ab", },
+ { 2, 2, "[\\^ab]", "c\\d", },
+ { -1, -1, "[^^]", "^", },
+ { 1, 3, "\\(a^b\\)", "a^b", },
+ { 1, 3, "\\(a\\^b\\)", "a^b", },
+ { 2, 2, "\\(\\^\\)", "a^b", },
+ { 0, 0, "GA115", NULL, },
+ { 3, 3, "$$", "ab$", },
+ { -1, -1, "$$", "$ab", },
+ { 2, 3, "$c", "a$c", },
+ { 2, 2, "[$]", "a$c", },
+ { 1, 2, "\\$a", "$a", },
+ { 3, 3, "\\$$", "ab$", },
+ { 2, 6, "A\\([34]$[34]\\)B", "XA4$3BY", },
+ { 0, 0, "2.8.3.1.3 Periods in BREs", NULL, },
+ { 0, 0, "GA116", NULL, },
+ { 1, 1, ".", "abc", },
+ { -1, -1, ".ab", "abc", },
+ { 1, 3, "ab.", "abc", },
+ { 1, 3, "a.b", "a,b", },
+ { -1, -1, ".......", "PqRs6", },
+ { 1, 7, ".......", "PqRs6T8", },
+ { 0, 0, "2.8.3.2 RE Bracket Expression", NULL, },
+ { 0, 0, "GA118", NULL, },
+ { 2, 2, "[abc]", "xbyz", },
+ { -1, -1, "[abc]", "xyz", },
+ { 2, 2, "[abc]", "xbay", },
+ { 0, 0, "GA119", NULL, },
+ { 2, 2, "[^a]", "abc", },
+ { 4, 4, "[^]cd]", "cd]ef", },
+ { 2, 2, "[^abc]", "axyz", },
+ { -1, -1, "[^abc]", "abc", },
+ { 3, 3, "[^[.a.]b]", "abc", },
+ { 3, 3, "[^[=a=]b]", "abc", },
+ { 2, 2, "[^-ac]", "abcde-", },
+ { 2, 2, "[^ac-]", "abcde-", },
+ { 3, 3, "[^a-b]", "abcde", },
+ { 3, 3, "[^a-bd-e]", "dec", },
+ { 2, 2, "[^---]", "-ab", },
+ { 16, 16, "[^a-zA-Z0-9]", "pqrstVWXYZ23579#", },
+ { 0, 0, "GA120(1)", NULL, },
+ { 3, 3, "[]a]", "cd]ef", },
+ { 1, 1, "[]-a]", "a_b", },
+ { 3, 3, "[][.-.]-0]", "ab0-]", },
+ { 1, 1, "[]^a-z]", "string", },
+ { 0, 0, "GA120(2)", NULL, },
+ { 4, 4, "[^]cd]", "cd]ef", },
+ { 0, 0, "[^]]*", "]]]]]]]]X", },
+ { 0, 0, "[^]]*", "]]]]]]]]", },
+ { 9, 9, "[^]]\\{1,\\}", "]]]]]]]]X", },
+ { -1, -1, "[^]]\\{1,\\}", "]]]]]]]]", },
+ { 0, 0, "GA120(3)", NULL, },
+ { 3, 3, "[c[.].]d]", "ab]cd", },
+ { 2, 8, "[a-z]*[[.].]][A-Z]*", "Abcd]DEFg", },
+ { 0, 0, "GA121", NULL, },
+ { 2, 2, "[[.a.]b]", "Abc", },
+ { 1, 1, "[[.a.]b]", "aBc", },
+ { -1, -1, "[[.a.]b]", "ABc", },
+ { 3, 3, "[^[.a.]b]", "abc", },
+ { 3, 3, "[][.-.]-0]", "ab0-]", },
+ { 3, 3, "[A-[.].]c]", "ab]!", },
+ { 0, 0, "GA122", NULL, },
+ { -2, -2, "[[.ch.]]", "abc", },
+ { -2, -2, "[[.ab.][.CD.][.EF.]]", "yZabCDEFQ9", },
+ { 0, 0, "GA125", NULL, },
+ { 2, 2, "[[=a=]b]", "Abc", },
+ { 1, 1, "[[=a=]b]", "aBc", },
+ { -1, -1, "[[=a=]b]", "ABc", },
+ { 3, 3, "[^[=a=]b]", "abc", },
+ { 0, 0, "GA126", NULL, },
+ { 0, 0, NULL, "the expected result for [[:alnum:]]* is 2-7 which is wrong" },
+ { 0, 0, "[[:alnum:]]*", " aB28gH", },
+ { 2, 7, "[[:alnum:]][[:alnum:]]*", " aB28gH", },
+ { 0, 0, NULL, "the expected result for [^[:alnum:]]* is 2-5 which is wrong" },
+ { 0, 0, "[^[:alnum:]]*", "2 ,a", },
+ { 2, 5, "[^[:alnum:]][^[:alnum:]]*", "2 ,a", },
+ { 0, 0, NULL, "the expected result for [[:alpha:]]* is 2-5 which is wrong" },
+ { 0, 0, "[[:alpha:]]*", " aBgH2", },
+ { 2, 5, "[[:alpha:]][[:alpha:]]*", " aBgH2", },
+ { 1, 6, "[^[:alpha:]]*", "2 8,a", },
+ { 1, 2, "[[:blank:]]*", " \r", },
+ { 1, 8, "[^[:blank:]]*", "aB28gH, ", },
+ { 1, 2, "[[:cntrl:]]*", "  ", },
+ { 1, 8, "[^[:cntrl:]]*", "aB2 8gh,", },
+ { 0, 0, NULL, "the expected result for [[:digit:]]* is 2-3 which is wrong" },
+ { 0, 0, "[[:digit:]]*", "a28", },
+ { 2, 3, "[[:digit:]][[:digit:]]*", "a28", },
+ { 1, 8, "[^[:digit:]]*", "aB gH,", },
+ { 1, 7, "[[:graph:]]*", "aB28gH, ", },
+ { 1, 3, "[^[:graph:]]*", " ,", },
+ { 1, 2, "[[:lower:]]*", "agB", },
+ { 1, 8, "[^[:lower:]]*", "B2 8H,a", },
+ { 1, 8, "[[:print:]]*", "aB2 8gH, ", },
+ { 1, 2, "[^[:print:]]*", "  ", },
+ { 0, 0, NULL, "the expected result for [[:punct:]]* is 2-2 which is wrong" },
+ { 0, 0, "[[:punct:]]*", "a,2", },
+ { 2, 3, "[[:punct:]][[:punct:]]*", "a,,2", },
+ { 1, 9, "[^[:punct:]]*", "aB2 8gH", },
+ { 1, 3, "[[:space:]]*", " \r", },
+ { 0, 0, NULL, "the expected result for [^[:space:]]* is 2-9 which is wrong" },
+ { 0, 0, "[^[:space:]]*", " aB28gH, ", },
+ { 2, 9, "[^[:space:]][^[:space:]]*", " aB28gH, ", },
+ { 0, 0, NULL, "the expected result for [[:upper:]]* is 2-3 which is wrong" },
+ { 0, 0, "[[:upper:]]*", "aBH2", },
+ { 2, 3, "[[:upper:]][[:upper:]]*", "aBH2", },
+ { 1, 8, "[^[:upper:]]*", "a2 8g,B", },
+ { 0, 0, NULL, "the expected result for [[:xdigit:]]* is 2-5 which is wrong" },
+ { 0, 0, "[[:xdigit:]]*", "gaB28h", },
+ { 2, 5, "[[:xdigit:]][[:xdigit:]]*", "gaB28h", },
+ { 0, 0, NULL, "the expected result for [^[:xdigit:]]* is 2-7 which is wrong" },
+ { 2, 7, "[^[:xdigit:]][^[:xdigit:]]*", "a gH,2", },
+ { 0, 0, "GA127", NULL, },
+ { -2, -2, "[b-a]", "abc", },
+ { 1, 1, "[a-c]", "bbccde", },
+ { 2, 2, "[a-b]", "-bc", },
+ { 3, 3, "[a-z0-9]", "AB0", },
+ { 3, 3, "[^a-b]", "abcde", },
+ { 3, 3, "[^a-bd-e]", "dec", },
+ { 1, 1, "[]-a]", "a_b", },
+ { 2, 2, "[+--]", "a,b", },
+ { 2, 2, "[--/]", "a.b", },
+ { 2, 2, "[^---]", "-ab", },
+ { 3, 3, "[][.-.]-0]", "ab0-]", },
+ { 3, 3, "[A-[.].]c]", "ab]!", },
+ { 2, 6, "bc[d-w]xy", "abchxyz", },
+ { 0, 0, "GA129", NULL, },
+ { 1, 1, "[a-cd-f]", "dbccde", },
+ { -1, -1, "[a-ce-f]", "dBCCdE", },
+ { 2, 4, "b[n-zA-M]Y", "absY9Z", },
+ { 2, 4, "b[n-zA-M]Y", "abGY9Z", },
+ { 0, 0, "GA130", NULL, },
+ { 3, 3, "[-xy]", "ac-", },
+ { 2, 4, "c[-xy]D", "ac-D+", },
+ { 2, 2, "[--/]", "a.b", },
+ { 2, 4, "c[--/]D", "ac.D+b", },
+ { 2, 2, "[^-ac]", "abcde-", },
+ { 1, 3, "a[^-ac]c", "abcde-", },
+ { 3, 3, "[xy-]", "zc-", },
+ { 2, 4, "c[xy-]7", "zc-786", },
+ { 2, 2, "[^ac-]", "abcde-", },
+ { 2, 4, "a[^ac-]c", "5abcde-", },
+ { 2, 2, "[+--]", "a,b", },
+ { 2, 4, "a[+--]B", "Xa,By", },
+ { 2, 2, "[^---]", "-ab", },
+ { 4, 6, "X[^---]Y", "X-YXaYXbY", },
+ { 0, 0, "2.8.3.3 BREs Matching Multiple Characters", NULL, },
+ { 0, 0, "GA131", NULL, },
+ { 3, 4, "cd", "abcdeabcde", },
+ { 1, 2, "ag*b", "abcde", },
+ { -1, -1, "[a-c][e-f]", "abcdef", },
+ { 3, 4, "[a-c][e-f]", "acbedf", },
+ { 4, 8, "abc*XYZ", "890abXYZ#*", },
+ { 4, 9, "abc*XYZ", "890abcXYZ#*", },
+ { 4, 15, "abc*XYZ", "890abcccccccXYZ#*", },
+ { -1, -1, "abc*XYZ", "890abc*XYZ#*", },
+ { 0, 0, "GA132", NULL, },
+ { 2, 4, "\\(*bc\\)", "a*bc", },
+ { 1, 2, "\\(ab\\)", "abcde", },
+ { 1, 10, "\\(a\\(b\\(c\\(d\\(e\\(f\\(g\\)h\\(i\\(j\\)\\)\\)\\)\\)\\)\\)\\)", "abcdefghijk", },
+ { 3, 8, "43\\(2\\(6\\)*0\\)AB", "654320ABCD", },
+ { 3, 9, "43\\(2\\(7\\)*0\\)AB", "6543270ABCD", },
+ { 3, 12, "43\\(2\\(7\\)*0\\)AB", "6543277770ABCD", },
+ { 0, 0, "GA133", NULL, },
+ { 1, 10, "\\(a\\(b\\(c\\(d\\(e\\(f\\(g\\)h\\(i\\(j\\)\\)\\)\\)\\)\\)\\)\\)", "abcdefghijk", },
+ { -1, -1, "\\(a\\(b\\(c\\(d\\(e\\(f\\(g\\)h\\(i\\(k\\)\\)\\)\\)\\)\\)\\)\\)", "abcdefghijk", },
+ { 0, 0, "GA134", NULL, },
+ { 2, 4, "\\(bb*\\)", "abbbc", },
+ { 2, 2, "\\(bb*\\)", "ababbbc", },
+ { 1, 6, "a\\(.*b\\)", "ababbbc", },
+ { 1, 2, "a\\(b*\\)", "ababbbc", },
+ { 1, 20, "a\\(.*b\\)c", "axcaxbbbcsxbbbbbbbbc", },
+ { 0, 0, "GA135", NULL, },
+ { 1, 7, "\\(a\\(b\\(c\\(d\\(e\\)\\)\\)\\)\\)\\4", "abcdededede", },
+ { 0, 0, NULL, "POSIX does not really specify whether a\\(b\\)*c\\1 matches acb." },
+ { 0, 0, NULL, "back references are supposed to expand to the last match, but what" },
+ { 0, 0, NULL, "if there never was a match as in this case?" },
+ { -1, -1, "a\\(b\\)*c\\1", "acb", },
+ { 1, 11, "\\(a\\(b\\(c\\(d\\(e\\(f\\(g\\)h\\(i\\(j\\)\\)\\)\\)\\)\\)\\)\\)\\9", "abcdefghijjk", },
+ { 0, 0, "GA136", NULL, },
+ { 0, 0, NULL, "These two tests have the same problem as the test in GA135. No match" },
+ { 0, 0, NULL, "of a subexpression, why should the back reference be usable?" },
+ { 0, 0, NULL, "1 2 a\\(b\\)*c\\1 acb" },
+ { 0, 0, NULL, "4 7 a\\(b\\(c\\(d\\(f\\)*\\)\\)\\)\\4xYzabcdePQRST" },
+ { -1, -1, "a\\(b\\)*c\\1", "acb", },
+ { -1, -1, "a\\(b\\(c\\(d\\(f\\)*\\)\\)\\)\\4", "xYzabcdePQRST", },
+ { 0, 0, "GA137", NULL, },
+ { -2, -2, "\\(a\\(b\\)\\)\\3", "foo", },
+ { -2, -2, "\\(a\\(b\\)\\)\\(a\\(b\\)\\)\\5", "foo", },
+ { 0, 0, "GA138", NULL, },
+ { 1, 2, "ag*b", "abcde", },
+ { 1, 10, "a.*b", "abababvbabc", },
+ { 2, 5, "b*c", "abbbcdeabbbbbbcde", },
+ { 2, 5, "bbb*c", "abbbcdeabbbbbbcde", },
+ { 1, 5, "a\\(b\\)*c\\1", "abbcbbb", },
+ { -1, -1, "a\\(b\\)*c\\1", "abbdbd", },
+ { 0, 0, "\\([a-c]*\\)\\1", "abcacdef", },
+ { 1, 6, "\\([a-c]*\\)\\1", "abcabcabcd", },
+ { 1, 2, "a^*b", "ab", },
+ { 1, 5, "a^*b", "a^^^b", },
+ { 0, 0, "GA139", NULL, },
+ { 1, 2, "a\\{2\\}", "aaaa", },
+ { 1, 7, "\\([a-c]*\\)\\{0,\\}", "aabcaab", },
+ { 1, 2, "\\(a\\)\\1\\{1,2\\}", "aabc", },
+ { 1, 3, "\\(a\\)\\1\\{1,2\\}", "aaaabc", },
+ { 0, 0, NULL, "the expression \\(\\(a\\)\\1\\)\\{1,2\\} is ill-formed, using \\2" },
+ { 1, 4, "\\(\\(a\\)\\2\\)\\{1,2\\}", "aaaabc", },
+ { 0, 0, "GA140", NULL, },
+ { 1, 2, "a\\{2\\}", "aaaa", },
+ { -1, -1, "a\\{2\\}", "abcd", },
+ { 0, 0, "a\\{0\\}", "aaaa", },
+ { 1, 64, "a\\{64\\}", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", },
+ { 0, 0, "GA141", NULL, },
+ { 1, 7, "\\([a-c]*\\)\\{0,\\}", "aabcaab", },
+ { 0, 0, NULL, "the expected result for \\([a-c]*\\)\\{2,\\} is failure which isn't correct" },
+ { 1, 3, "\\([a-c]*\\)\\{2,\\}", "abcdefg", },
+ { 1, 3, "\\([a-c]*\\)\\{1,\\}", "abcdefg", },
+ { -1, -1, "a\\{64,\\}", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", },
+ { 0, 0, "GA142", NULL, },
+ { 1, 3, "a\\{2,3\\}", "aaaa", },
+ { -1, -1, "a\\{2,3\\}", "abcd", },
+ { 0, 0, "\\([a-c]*\\)\\{0,0\\}", "foo", },
+ { 1, 63, "a\\{1,63\\}", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", },
+ { 0, 0, "2.8.3.4 BRE Precedence", NULL, },
+ { 0, 0, "GA143", NULL, },
+ { 0, 0, NULL, "There are numerous bugs in the original version." },
+ { 2, 19, "\\^\\[[[.].]]\\\\(\\\\1\\\\)\\*\\\\{1,2\\\\}\\$", "a^[]\\(\\1\\)*\\{1,2\\}$b", },
+ { 1, 6, "[[=*=]][[=\\=]][[=]=]][[===]][[...]][[:punct:]]", "*\\]=.;", },
+ { 1, 6, "[$\\(*\\)^]*", "$\\()*^", },
+ { 1, 1, "[\\1]", "1", },
+ { 1, 1, "[\\{1,2\\}]", "{", },
+ { 0, 0, NULL, "the expected result for \\(*\\)*\\1* is 2-2 which isn't correct" },
+ { 0, 0, "\\(*\\)*\\1*", "a*b*11", },
+ { 2, 3, "\\(*\\)*\\1*b", "a*b*11", },
+ { 0, 0, NULL, "the expected result for \\(a\\(b\\{1,2\\}\\)\\{1,2\\}\\) is 1-5 which isn't correct" },
+ { 1, 3, "\\(a\\(b\\{1,2\\}\\)\\{1,2\\}\\)", "abbab", },
+ { 1, 5, "\\(a\\(b\\{1,2\\}\\)\\)\\{1,2\\}", "abbab", },
+ { 1, 1, "^\\(^\\(^a$\\)$\\)$", "a", },
+ { 1, 2, "\\(a\\)\\1$", "aa", },
+ { 1, 3, "ab*", "abb", },
+ { 1, 4, "ab\\{2,4\\}", "abbbc", },
+ { 0, 0, "2.8.3.5 BRE Expression Anchoring", NULL, },
+ { 0, 0, "GA144", NULL, },
+ { 1, 1, "^a", "abc", },
+ { -1, -1, "^b", "abc", },
+ { -1, -1, "^[a-zA-Z]", "99Nine", },
+ { 1, 4, "^[a-zA-Z]*", "Nine99", },
+ { 0, 0, "GA145(1)", NULL, },
+ { 1, 2, "\\(^a\\)\\1", "aabc", },
+ { -1, -1, "\\(^a\\)\\1", "^a^abc", },
+ { 1, 2, "\\(^^a\\)", "^a", },
+ { 1, 1, "\\(^^\\)", "^^", },
+ { 1, 3, "\\(^abc\\)", "abcdef", },
+ { -1, -1, "\\(^def\\)", "abcdef", },
+ { 0, 0, "GA146", NULL, },
+ { 3, 3, "a$", "cba", },
+ { -1, -1, "a$", "abc", },
+ { 5, 7, "[a-z]*$", "99ZZxyz", },
+ { 0, 0, NULL, "the expected result for [a-z]*$ is failure which isn't correct" },
+ { 10, 9, "[a-z]*$", "99ZZxyz99", },
+ { 3, 3, "$$", "ab$", },
+ { -1, -1, "$$", "$ab", },
+ { 3, 3, "\\$$", "ab$", },
+ { 0, 0, "GA147(1)", NULL, },
+ { -1, -1, "\\(a$\\)\\1", "bcaa", },
+ { -1, -1, "\\(a$\\)\\1", "ba$", },
+ { -1, -1, "\\(ab$\\)", "ab$", },
+ { 1, 2, "\\(ab$\\)", "ab", },
+ { 4, 6, "\\(def$\\)", "abcdef", },
+ { -1, -1, "\\(abc$\\)", "abcdef", },
+ { 0, 0, "GA148", NULL, },
+ { 0, 0, "^$", "", },
+ { 1, 3, "^abc$", "abc", },
+ { -1, -1, "^xyz$", "^xyz^", },
+ { -1, -1, "^234$", "^234$", },
+ { 1, 9, "^[a-zA-Z0-9]*$", "2aA3bB9zZ", },
+ { -1, -1, "^[a-z0-9]*$", "2aA3b#B9zZ", },
diff --git a/testsuite/range-overlap.sh b/testsuite/range-overlap.sh
new file mode 100755
index 0000000..ddcd08f
--- /dev/null
+++ b/testsuite/range-overlap.sh
@@ -0,0 +1,34 @@
+#!/bin/sh
+# Verify that even with overlapping ranges of line numbers,
+# only the selected lines are affected.
+
+# Copyright (C) 2015-2018 Free Software Foundation, Inc.
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+. "${srcdir=.}/testsuite/init.sh"; path_prepend_ ./sed
+print_ver_ sed
+
+printf '%s\n' 1 2 3 4 5 6 > in || framework_failure_
+printf '%s\n' 1 5 6 > exp || framework_failure_
+
+# Before sed-4.3, this would mistakenly modify line 5 like this:
+# 1
+# yx5
+# 6
+sed '2,4d;2,3s/^/x/;3,4s/^/y/' in > out 2> err || framework_failure_
+
+compare exp out || fail=1
+compare /dev/null err || fail=1
+
+Exit $fail
diff --git a/testsuite/recursive-escape-c.sh b/testsuite/recursive-escape-c.sh
new file mode 100644
index 0000000..41bbced
--- /dev/null
+++ b/testsuite/recursive-escape-c.sh
@@ -0,0 +1,71 @@
+#!/bin/sh
+# test \c escaping
+
+# Copyright (C) 2016-2018 Free Software Foundation, Inc.
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+. "${srcdir=.}/testsuite/init.sh"; path_prepend_ ./sed
+print_ver_ sed
+
+unset POSIXLY_CORRECT
+export LC_ALL=C
+
+# input file, any 6 lines would do, each a different test case
+printf "%s\n" a a a a a a >in1 || framework_failure_
+
+# input program
+cat << \EOF > prog1 || framework_failure_
+1s/./\cA/
+2s/./\cB/
+3s/./\c[/
+4s/./\c]/
+
+# '\c' at end-of-buffer, a backslash is pushed up
+# on level of interpretation, and the '.' match is replaced
+# with one backslash.
+5s/./\c/
+
+# This would return incorrect results before 4.3,
+# producing both \034 and another backslash.
+6s/./\c\\/
+EOF
+
+# expected output:
+printf '\001\n\002\n\033\n\035\n\\\n\034\n' > exp1 || framework_failure_
+
+#
+# Run simple test cases
+#
+sed -f prog1 in1 > out1 || fail=1
+compare_ exp1 out1 || fail=1
+
+# for easier troubleshooting, if users ever report errors
+if test "$fail" -eq 1 ; then
+ od -tx1c prog1
+ od -tx1c exp1
+ od -tx1c out1
+fi
+
+#
+# Test invalid usage
+#
+cat << \EOF > exp-err || framework_failure_
+sed: -e expression #1, char 10: recursive escaping after \c not allowed
+EOF
+
+# Before sed-4.3, this resulted in '\034d'. Now, it is rejected.
+returns_ 1 sed '1s/./\c\d/' in1 2>err || fail=1
+compare_ exp-err err || fail=1
+
+Exit $fail
diff --git a/testsuite/regex-errors.sh b/testsuite/regex-errors.sh
new file mode 100644
index 0000000..454f7c0
--- /dev/null
+++ b/testsuite/regex-errors.sh
@@ -0,0 +1,43 @@
+#!/bin/sh
+# Exercise regex_compile errors
+
+# Copyright (C) 2016-2018 Free Software Foundation, Inc.
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+. "${srcdir=.}/testsuite/init.sh"; path_prepend_ ./sed
+print_ver_ sed
+
+#
+# Invalid backref in address regex
+#
+cat <<\EOF >exp-err-inv-backref || framework_failure_
+sed: -e expression #1, char 4: Invalid back reference
+EOF
+
+returns_ 1 sed '/\1/,$p' </dev/null 2>err-inv-backref || fail=1
+compare_ exp-err-inv-backref err-inv-backref || fail=1
+
+
+#
+# modifiers on empty regex (BAD_MODIF in regex.c)
+#
+cat <<\EOF >exp-err-bad-modif || framework_failure_
+sed: -e expression #1, char 3: cannot specify modifiers on empty regexp
+EOF
+
+returns_ 1 sed '//M,$p' </dev/null 2>err-bad-modif || fail=1
+compare_ exp-err-bad-modif err-bad-modif || fail=1
+
+
+Exit $fail
diff --git a/testsuite/regex-max-int.sh b/testsuite/regex-max-int.sh
new file mode 100755
index 0000000..fcea466
--- /dev/null
+++ b/testsuite/regex-max-int.sh
@@ -0,0 +1,48 @@
+#!/bin/sh
+# Test regex on input buffers larger than 2GB
+
+# Copyright (C) 2018 Free Software Foundation, Inc.
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+. "${srcdir=.}/testsuite/init.sh"; path_prepend_ ./sed
+very_expensive_
+print_ver_ sed
+
+# Create a file larger than 2GB and containing a single line
+# (resulting in a regex match against the entire file)
+#
+# This is a "very expensive" test, we can assume it is only run by
+# developers or advanced users, and we can assume truncate(1) exists.
+#
+# On most modern file-systems, the file will be sparse and would not
+# consume 2GB of physical storage.
+
+truncate -s 1G input || framework_failure_
+printf aaaa >> input || framework_failure_
+truncate -s +1G input || framework_failure_
+printf 'a\n' >> input || framework_failure_
+
+# The expected error message
+cat <<\EOF > exp-err1 || framework_failure_
+sed: regex input buffer length larger than INT_MAX
+EOF
+
+
+# Before sed-4.5, this was silently a no-op: would not perform the subsitution
+# but would not indicate any error either (https://bugs.gnu.org/30520).
+# Exit code 4 is "panic".
+returns_ 4 sed 's/a/b/g' input >/dev/null 2>err1 || fail=1
+compare_ exp-err1 err1 || fail=1
+
+Exit $fail
diff --git a/testsuite/sandbox.sh b/testsuite/sandbox.sh
new file mode 100755
index 0000000..cd5e7c9
--- /dev/null
+++ b/testsuite/sandbox.sh
@@ -0,0 +1,90 @@
+#!/bin/sh
+# Test --sandbox mode
+
+# Copyright (C) 2016-2018 Free Software Foundation, Inc.
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+. "${srcdir=.}/testsuite/init.sh"; path_prepend_ ./sed
+print_ver_ sed
+
+echo a > a || framework_failure_
+echo b > b || framework_failure_
+
+# Same error message, different character position in the sed program.
+for i in 1 6 14 ; do
+ err="sed: -e expression #1, char $i: e/r/w commands disabled in sandbox mode"
+ echo "$err" > exp-err$i || framework_failure_
+done
+
+# read command - without sandbox
+printf "a\nb\n" > exp || framework_failure_
+sed rb a > out || fail=1
+compare exp out || fail=1
+
+# read command - with sandbox
+returns_ 1 sed --sandbox -e 'ra' b >/dev/null 2>err1 || fail=1
+compare exp-err1 err1 || fail=1
+
+
+# write command (create file 'c') - without sandbox
+sed wc a > out || fail=1
+compare a c || fail=1
+compare out a || fail=1
+
+# write command - with sandbox
+returns_ 1 sed --sandbox -e 'wd' a >/dev/null 2>err2 || fail=1
+compare exp-err1 err1 || fail=1
+# ensure file 'd' was not created
+test -e d && fail=1
+
+
+
+# execute command - without sandbox
+sed 'etouch e' b > out || fail=1
+compare b out || fail=1
+# ensure 'e' was created
+test -e e || fail=1
+
+# execute command - with sandbox
+returns_ 1 sed --sandbox -e 'etouch f' b >/dev/null 2>err3 || fail=1
+compare exp-err1 err3 || fail=1
+# ensure 'f' was not created
+test -e f && fail=1
+
+
+
+# substitute+write option - without sandbox
+sed 's/^//wg' a > out || fail=1
+test -e g || fail=1
+
+# substitute+write option - with sandbox
+returns_ 1 sed --sandbox 's/^//wh' a >/dev/null 2>err4 || fail=1
+compare exp-err6 err4 || fail=1
+# ensure file 'h' was not created
+test -e h && fail=1
+
+
+
+# substitute+execute option - without sandbox
+sed 's/.*/touch i/e' a > out || fail=1
+test -e i || fail=1
+
+# substitute+execute option - with sandbox
+returns_ 1 sed --sandbox 's/.*/touch j/e' a >/dev/null 2>err5 || fail=1
+compare exp-err14 err5 || fail=1
+# ensure file 'j' was not created
+test -e j && fail=1
+
+
+Exit $fail
diff --git a/testsuite/stdin-prog.sh b/testsuite/stdin-prog.sh
new file mode 100644
index 0000000..6a7167c
--- /dev/null
+++ b/testsuite/stdin-prog.sh
@@ -0,0 +1,37 @@
+#!/bin/sh
+# Test program file from STDIN
+
+# Copyright (C) 2016-2018 Free Software Foundation, Inc.
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+. "${srcdir=.}/testsuite/init.sh"; path_prepend_ ./sed
+print_ver_ sed
+
+echo X > in1 || framework_failure_
+printf "1\nX\n" > exp1 || framework_failure_
+
+# program filename starts with '-'
+printf "=\n" > ./-myprog || framework_failure_
+
+
+# program from STDIN
+printf "=\n" | sed -f - in1 > out1 || fail=1
+compare_ exp1 out1 || fail=1
+
+# program filename starting with '-'
+# (if a buggy sed reads from STDIN, the 'v9' command will fail)
+printf "v9\n" | sed -f -myprog in1 > out2 || fail=1
+compare_ exp1 out2 || fail=1
+
+Exit $fail
diff --git a/testsuite/stdin.sh b/testsuite/stdin.sh
new file mode 100755
index 0000000..71c17d2
--- /dev/null
+++ b/testsuite/stdin.sh
@@ -0,0 +1,37 @@
+#!/bin/sh
+
+# Test runner for old 'stdin' test
+
+# Copyright (C) 2017-2018 Free Software Foundation, Inc.
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+. "${srcdir=.}/testsuite/init.sh"; path_prepend_ ./sed
+print_ver_ sed
+
+
+cat << \EOF > stdin-in || framework_failure_
+foo foo fo oo f oo foo foo foo foo foo foo foo foo foo foo foo foo foo
+foo foo fo oo f oo foo foo foo foo foo foo foo foo foo foo foo foo foo
+EOF
+
+# This checks for a bug in 3.02 and 3.02.80
+( sed d ; sed G ) < stdin-in > stdin-out1 || fail=1
+cat stdin | ( sed d ; sed G ) > stdin-out2 || fail=1
+
+remove_cr_inplace stdin-out1
+remove_cr_inplace stdin-out2
+
+compare stdin-out1 stdin-out2 || fail=1
+
+Exit $fail
diff --git a/testsuite/subst-mb-incomplete.sh b/testsuite/subst-mb-incomplete.sh
new file mode 100755
index 0000000..5045e2d
--- /dev/null
+++ b/testsuite/subst-mb-incomplete.sh
@@ -0,0 +1,33 @@
+#!/bin/sh
+# Ensure that sed no longer writes beyond the end of a heap buffer when
+# performing a substitution with a replacement string containing an
+# incomplete multi-byte character.
+
+# Copyright (C) 2015-2018 Free Software Foundation, Inc.
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+. "${srcdir=.}/testsuite/init.sh"; path_prepend_ ./sed
+print_ver_ sed
+
+require_en_utf8_locale_
+
+echo > in || framework_failure_
+printf '\233\375\200\n' > exp-out || framework_failure_
+
+LC_ALL=en_US.utf8 sed $(printf 's/^/\\L\233\375\\\200/') in > out 2> err
+
+compare exp-out out || fail=1
+compare /dev/null err || fail=1
+
+Exit $fail
diff --git a/testsuite/subst-options.sh b/testsuite/subst-options.sh
new file mode 100644
index 0000000..bc206aa
--- /dev/null
+++ b/testsuite/subst-options.sh
@@ -0,0 +1,123 @@
+#!/bin/sh
+# Test Substitute options (for code-coverage purposes as well)
+
+# Copyright (C) 2016-2018 Free Software Foundation, Inc.
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+. "${srcdir=.}/testsuite/init.sh"; path_prepend_ ./sed
+print_ver_ sed
+
+#
+# Simple modifiers to s//
+# (specific characters included as make_subst_opts's implementation
+# checks for them before returning control)
+printf "%s\n" a a a a a a > subst-in1 || framework_failure_
+printf "%s\n" x x x x x x > subst-exp1 || framework_failure_
+cat << \EOF >> subst-prog1 || framework_failure_
+1s/A/x/i
+2s/A/x/I
+
+# s// followed by '}'
+3{s/./x/}
+# s// followed by '#'
+4s/./x/#
+# s// followed by ';'
+5s/./x/;
+# s// followed by '\n
+6s/./x/
+EOF
+
+sed -f subst-prog1 subst-in1 > subst-out1 || fail=1
+compare_ subst-exp1 subst-out1 || fail=1
+
+
+#
+# Number modifiers to s//
+#
+
+cat << \EOF >subst-in2 || framework_failure_
+bbbbbbbbbb
+bbbbbbbbbb
+bbbbbbbbbb
+bbbbbbbbbb
+bbbbbbbbbb
+bbbbbbbbbb
+bbbbbbbbbb
+bbbbbbbbbb
+bbbbbbbbbb
+bbbbbbbbbb
+EOF
+
+cat << \EOF >subst-prog2 || framework_failure_
+1s/./x/g
+2s/./x/1
+3s/./x/2
+4s/./x/3
+5s/./x/4
+6s/./x/5
+7s/./x/6
+8s/./x/7
+9s/./x/8
+10s/./x/9
+EOF
+
+cat << \EOF >subst-exp2
+xxxxxxxxxx
+xbbbbbbbbb
+bxbbbbbbbb
+bbxbbbbbbb
+bbbxbbbbbb
+bbbbxbbbbb
+bbbbbxbbbb
+bbbbbbxbbb
+bbbbbbbxbb
+bbbbbbbbxb
+EOF
+
+sed -f subst-prog2 subst-in2 > subst-out2 || fail=1
+compare_ subst-exp2 subst-out2 || fail=1
+
+#
+# Multiline modifier: s///m
+# ('N' will read and concatenate the second line
+# into the patten space, making it "foo\nbar".
+# s// will then operate on it as one string).
+printf "foo\nbar\n" > subst-in3 || fail=1
+printf "Xoo\nXar\n" > subst-exp3 || fail=1
+
+sed 'N;s/^./X/gm' subst-in3 > subst-out3-1 || fail=1
+compare_ subst-exp3 subst-out3-1 || fail=1
+sed 'N;s/^./X/gM' subst-in3 > subst-out3-2 || fail=1
+compare_ subst-exp3 subst-out3-2 || fail=1
+
+# sanity-check: without m, only the first line should match
+printf "Xoo\nbar\n" > subst-exp3-3 || fail=1
+sed 'N;s/^./X/g' subst-in3 > subst-out3-3 || fail=1
+compare_ subst-exp3-3 subst-out3-3 || fail=1
+
+
+#
+# s// followed by \r\n
+#
+
+printf "s/./X/\r\n" > subst-prog4 || framework_failure_
+echo a > subst-in4 || framework_failure_
+echo X > subst-exp4 || framework_failure_
+sed -f subst-prog4 subst-in4 > subst-out4 || fail=1
+compare_ subst-exp4 subst-out4 || fail=1
+
+
+
+
+Exit $fail
diff --git a/testsuite/subst-replacement.sh b/testsuite/subst-replacement.sh
new file mode 100644
index 0000000..7f4854b
--- /dev/null
+++ b/testsuite/subst-replacement.sh
@@ -0,0 +1,86 @@
+#!/bin/sh
+# Test Substitute replacements, e.g. 's/(.)/\U\1/'
+
+# Copyright (C) 2016-2018 Free Software Foundation, Inc.
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+. "${srcdir=.}/testsuite/init.sh"; path_prepend_ ./sed
+print_ver_ sed
+
+#
+# Backslash followed by unrecognized letter,
+# use letter as-is.
+echo a > in-rpl1 || framework_failure_
+echo Q > exp-rpl1 || framework_failure_
+sed -E 's/(.)/\Q/' in-rpl1 > out-rpl1 || fail=1
+compare_ exp-rpl1 out-rpl1 || fail=1
+
+#
+# numbered backreferences
+#
+echo 123456789 > in-rpl2 || framework_failure_
+for i in 1 2 3 4 5 6 7 8 9 ;
+do
+ echo $i > exp-rpl2-$i || framework_failure_
+ sed -E "s/(.)(.)(.)(.)(.)(.)(.)(.)(.)/\\$i/" in-rpl2 > out-rpl2-$i || fail=1
+ compare_ exp-rpl2-$i out-rpl2-$i || fail=1
+done
+
+# \0 matches entire pattern (TODO: is this documented?)
+# output should be the same as the input.
+sed -E 's/(.)(.)(.)(.)(.)(.)(.)(.)(.)/\0/' in-rpl2 > out-rpl2-0 || fail=1
+compare_ in-rpl2 out-rpl2-0 || fail=1
+
+# Unescaped '&' matches entire pattern
+# output should be the same as the input.
+sed -E 's/(.)(.)(.)(.)(.)(.)(.)(.)(.)/&/' in-rpl2 > out-rpl2-amp || fail=1
+compare_ in-rpl2 out-rpl2-amp || fail=1
+
+
+#
+# gnu extension: \U \u \L \l \E
+#
+echo abCde > in-rpl3 || framework_failure_
+
+# \U - all uppercase
+echo ABCde > exp-rpl3-U || framework_failure_
+sed -E 's/(.)(.)(.)/\U\1\2\3/' in-rpl3 > out-rpl3-U || fail=1
+compare_ exp-rpl3-U out-rpl3-U || fail=1
+
+# \u - next-char uppercase
+echo AbCde > exp-rpl3-u || framework_failure_
+sed -E 's/(.)(.)(.)/\u\1\2\3/' in-rpl3 > out-rpl3-u || fail=1
+compare_ exp-rpl3-u out-rpl3-u || fail=1
+
+# \L - all lowercase
+echo abcde > exp-rpl3-L || framework_failure_
+sed -E 's/(.)(.)(.)/\L\1\2\3/' in-rpl3 > out-rpl3-L || fail=1
+compare_ exp-rpl3-L out-rpl3-L || fail=1
+
+# \l - next-char lowercase
+echo abCde > exp-rpl3-l || framework_failure_
+sed -E 's/(.)(.)(.)/\l\1\2\3/' in-rpl3 > out-rpl3-l || fail=1
+compare_ exp-rpl3-l out-rpl3-l || fail=1
+
+# \E - stop \U \u \L \l processing
+echo AbCde > exp-rpl3-E1 || framework_failure_
+sed -E 's/(.)(.)(.)/\U\1\E\2\3/' in-rpl3 > out-rpl3-E1 || fail=1
+compare_ exp-rpl3-E1 out-rpl3-E1 || fail=1
+
+echo abCde > exp-rpl3-E2 || framework_failure_
+sed -E 's/(.)(.)(.)/\L\1\2\E\3/' in-rpl3 > out-rpl3-E2 || fail=1
+compare_ exp-rpl3-E2 out-rpl3-E2 || fail=1
+
+
+Exit $fail
diff --git a/testsuite/temp-file-cleanup.sh b/testsuite/temp-file-cleanup.sh
new file mode 100755
index 0000000..dc53d57
--- /dev/null
+++ b/testsuite/temp-file-cleanup.sh
@@ -0,0 +1,37 @@
+#!/bin/sh
+# Verify that an erroneous use of sed -i no longer leaves behind
+# a temporary file.
+
+# Copyright (C) 2015-2018 Free Software Foundation, Inc.
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+. "${srcdir=.}/testsuite/init.sh"; path_prepend_ ./sed
+print_ver_ sed
+
+# The input file must have at least one line.
+echo > in || framework_failure_
+printf 'sed: -e expression #1, char 0: no previous regular expression\n' \
+ > exp || framework_failure_
+
+# Before sed-4.3, this would create a file named sed??????
+returns_ 1 sed -i s//b/ in > out 2> err || fail=1
+
+compare /dev/null out || fail=1
+compare exp err || fail=1
+
+# Ensure that no other file has been created in this directory.
+files=$(echo *)
+test "$files" = "err exp in out" || fail=1
+
+Exit $fail
diff --git a/testsuite/test-mbrtowc.c b/testsuite/test-mbrtowc.c
new file mode 100644
index 0000000..912b14b
--- /dev/null
+++ b/testsuite/test-mbrtowc.c
@@ -0,0 +1,170 @@
+/* Auxiliary program to test mbrtowc(3) behaviour.
+ Copyright 2016-2018 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; If not, see <https://www.gnu.org/licenses/>. */
+
+/* Test the operating-system's native mbrtowc(3) function,
+ by feeding it multibyte seqeunces one byte at a time,
+ and reporting the result.
+
+ The program prints the following values after each mbrtowc invocation,
+ separated by commas:
+
+ -2 the octet is contributes to a valid yet incomplete multibyte sequence
+ in the current locale.
+
+ -1 the octet causes an encoding error.
+
+ 0 the octet represents a NUL byte
+
+ 1 the octet is a valid single-byte character, OR
+ completes a valid multibyte sequence.
+
+ Because the program invokes mbrtowc(3) byte-by-byte, the reported
+ result should never be larger than 1.
+
+ Example of typical output with UTF-8 encoding
+ ---------------------------------------------
+
+ The unicode character 'N-ARY SUMMATION' (U+2211), encoded in UTF-8 as:
+ hex: 0xE2 0x88 0x91
+ oct: 342 210 211
+
+ Decoding the valid sequence byte-by-byte gives:
+ $ printf '\342\210\221' | LC_ALL=en_US.UTF-8 test-mbrtowc
+ -2,-2,1
+
+ '\210' is not a valid leading byte in UTF-8,
+ thus the first byte gives -1, and the 'X' is treated
+ as a valid single-byte character:
+
+ $ printf '\210X' | LC_ALL=en_US.UTF-8 test-mbrtowc
+ -1,1
+
+ '\342' is a valid yet incomplete multibyte sequence.
+ Passing it to mbrtowc results in value '-2'.
+ The following value 'X' gives an encoding error '-1'
+ (as 'X' is not a valid trailing byte in a multibyte UTF-8 sequence):
+
+ $ printf '\342X' | LC_ALL=en_US.UTF-8 test-mbrtowc
+ -2,-1
+
+
+ Detecting implementation bugs in mbrtowc
+ ----------------------------------------
+
+ UTF-8 implementation is correct on most operating systems.
+ Other multibyte locales might present more difficulties.
+ An example is the Japanese SHIFT-JIS locale under Mac OS X.
+ NOTE: The locale is 'ja_JP.SJIS' under Mac OS X, 'ja_JP.shiftjis'
+ under Ubuntu. 'ja_JP.sjis' was also found on some systems.
+
+ Using unicode character 'KATAKANA LETTER ZE' (U+30BC)
+ UTF-8: hex: 0xE3 0x82 0xBC
+ Shift-jis hex: 0x83 0x5B
+ oct: 203 133
+
+ The following is a valid multibyte sequence in SHIFT-JIS,
+ the first byte should result in '-2' (valid yet incomplete),
+ and the second byte should result in '1' (a valid multibyte sequence
+ completed):
+
+ $ printf '\203\133' | LC_ALL=ja_JP.SJIS test-mbrtowc
+ -2,1
+
+ The follwing is an INVALID multibyte sequence in SHIFT-JIS
+ (The byte ':' is not valid as a second octet).
+ Buggy implementations will accept this as a valid multibyte sequence:
+
+ # NOTE: this result indicates a buggy mbrtowc
+ $ printf '\203:' | LC_ALL=ja_JP.SJIS test-mbrtowc
+ -2,1
+
+ A correct implementations should report '-1' for the second byte (i.e.
+ an encoding error):
+
+ $ printf '\203:' | LC_ALL=ja_JP.SJIS test-mbrtowc
+ -2,-1
+
+
+ Expected results with correct implementations
+ ---------------------------------------------
+
+ In GNU Sed some tests purposely use invalid multibyte sequences
+ to test sed's behaviour. A buggy implemetation of mbrtowc
+ would result in false-alarm failures.
+
+ The following are expected results in correct implementations:
+ (locale names are from Mac OS X):
+
+ $ printf '\203\133' | LC_ALL=ja_JP.SJIS test-mbrtowc
+ -2,1
+ $ printf '\203:' | LC_ALL=ja_JP.SJIS test-mbrtowc
+ -2,-1
+ $ printf '\262C' | LC_ALL=ja_JP.eucJP test-mbrtowc
+ -2,-1
+*/
+
+#include <config.h>
+#include <locale.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <wchar.h>
+
+#include "closeout.h"
+#include "error.h"
+#include "progname.h"
+
+/* stub replacement for non-standard err(3) */
+static int
+die (const char *msg)
+{
+ error (0, 0, "%s: error: %s\n", program_name, msg);
+ exit (EXIT_FAILURE);
+}
+
+int
+main (int argc, char **argv)
+{
+ int c;
+ int first = 1;
+
+ set_program_name (argv[0]);
+ if (!setlocale (LC_ALL, ""))
+ die ("failed to set locale");
+
+ while ((c = getchar ()) != EOF)
+ {
+ wchar_t wc;
+ char ch = (unsigned char) c;
+ int i = (int) mbrtowc (&wc, &ch, 1, NULL);
+
+ if (!first)
+ putchar (',');
+ first = 0;
+
+ printf ("%d", i);
+ }
+
+ if (first)
+ die ("empty input");
+
+ putchar ('\n');
+
+ if (ferror (stdin))
+ die ("read error");
+ close_stdout ();
+
+ exit (EXIT_SUCCESS);
+}
diff --git a/testsuite/testcases.h b/testsuite/testcases.h
new file mode 100644
index 0000000..834f530
--- /dev/null
+++ b/testsuite/testcases.h
@@ -0,0 +1,167 @@
+ {0, "(.*)*\\1", "xx"},
+ {0, "^", ""},
+ {0, "$", ""},
+ {0, "^$", ""},
+ {0, "^a$", "a"},
+ {0, "abc", "abc"},
+ {1, "abc", "xbc"},
+ {1, "abc", "axc"},
+ {1, "abc", "abx"},
+ {0, "abc", "xabcy"},
+ {0, "abc", "ababc"},
+ {0, "ab*c", "abc"},
+ {0, "ab*bc", "abc"},
+ {0, "ab*bc", "abbc"},
+ {0, "ab*bc", "abbbbc"},
+ {0, "ab+bc", "abbc"},
+ {1, "ab+bc", "abc"},
+ {1, "ab+bc", "abq"},
+ {0, "ab+bc", "abbbbc"},
+ {0, "ab?bc", "abbc"},
+ {0, "ab?bc", "abc"},
+ {1, "ab?bc", "abbbbc"},
+ {0, "ab?c", "abc"},
+ {0, "^abc$", "abc"},
+ {1, "^abc$", "abcc"},
+ {0, "^abc", "abcc"},
+ {1, "^abc$", "aabc"},
+ {0, "abc$", "aabc"},
+ {0, "^", "abc"},
+ {0, "$", "abc"},
+ {0, "a.c", "abc"},
+ {0, "a.c", "axc"},
+ {0, "a.*c", "axyzc"},
+ {1, "a.*c", "axyzd"},
+ {1, "a[bc]d", "abc"},
+ {0, "a[bc]d", "abd"},
+ {1, "a[b-d]e", "abd"},
+ {0, "a[b-d]e", "ace"},
+ {0, "a[b-d]", "aac"},
+ {0, "a[-b]", "a-"},
+ {0, "a[b-]", "a-"},
+ {2, "a[b-a]", "-"},
+ {2, "a[]b", "-"},
+ {2, "a[", "-"},
+ {0, "a]", "a]"},
+ {0, "a[]]b", "a]b"},
+ {0, "a[^bc]d", "aed"},
+ {1, "a[^bc]d", "abd"},
+ {0, "a[^-b]c", "adc"},
+ {1, "a[^-b]c", "a-c"},
+ {1, "a[^]b]c", "a]c"},
+ {0, "a[^]b]c", "adc"},
+ {0, "ab|cd", "abc"},
+ {0, "ab|cd", "abcd"},
+ {0, "()ef", "def"},
+ {0, "()*", "-"},
+ {2, "*a", "-"},
+ {2, "^*", "-"},
+ {2, "$*", "-"},
+ {2, "(*)b", "-"},
+ {1, "$b", "b"},
+ {2, "a\\", "-"},
+ {0, "a\\(b", "a(b"},
+ {0, "a\\(*b", "ab"},
+ {0, "a\\(*b", "a((b"},
+ {1, "a\\x", "a\\x"},
+ {1, "abc)", "-"},
+ {2, "(abc", "-"},
+ {0, "((a))", "abc"},
+ {0, "(a)b(c)", "abc"},
+ {0, "a+b+c", "aabbabc"},
+ {0, "a**", "-"},
+ {0, "a*?", "-"},
+ {0, "(a*)*", "-"},
+ {0, "(a*)+", "-"},
+ {0, "(a|)*", "-"},
+ {0, "(a*|b)*", "-"},
+ {0, "(a+|b)*", "ab"},
+ {0, "(a+|b)+", "ab"},
+ {0, "(a+|b)?", "ab"},
+ {0, "[^ab]*", "cde"},
+ {0, "(^)*", "-"},
+ {0, "(ab|)*", "-"},
+ {2, ")(", "-"},
+ {1, "abc", ""},
+ {1, "abc", ""},
+ {0, "a*", ""},
+ {0, "([abc])*d", "abbbcd"},
+ {0, "([abc])*bcd", "abcd"},
+ {0, "a|b|c|d|e", "e"},
+ {0, "(a|b|c|d|e)f", "ef"},
+ {0, "((a*|b))*", "-"},
+ {0, "abcd*efg", "abcdefg"},
+ {0, "ab*", "xabyabbbz"},
+ {0, "ab*", "xayabbbz"},
+ {0, "(ab|cd)e", "abcde"},
+ {0, "[abhgefdc]ij", "hij"},
+ {1, "^(ab|cd)e", "abcde"},
+ {0, "(abc|)ef", "abcdef"},
+ {0, "(a|b)c*d", "abcd"},
+ {0, "(ab|ab*)bc", "abc"},
+ {0, "a([bc]*)c*", "abc"},
+ {0, "a([bc]*)(c*d)", "abcd"},
+ {0, "a([bc]+)(c*d)", "abcd"},
+ {0, "a([bc]*)(c+d)", "abcd"},
+ {0, "a[bcd]*dcdcde", "adcdcde"},
+ {1, "a[bcd]+dcdcde", "adcdcde"},
+ {0, "(ab|a)b*c", "abc"},
+ {0, "((a)(b)c)(d)", "abcd"},
+ {0, "[A-Za-z_][A-Za-z0-9_]*", "alpha"},
+ {0, "^a(bc+|b[eh])g|.h$", "abh"},
+ {0, "(bc+d$|ef*g.|h?i(j|k))", "effgz"},
+ {0, "(bc+d$|ef*g.|h?i(j|k))", "ij"},
+ {1, "(bc+d$|ef*g.|h?i(j|k))", "effg"},
+ {1, "(bc+d$|ef*g.|h?i(j|k))", "bcdd"},
+ {0, "(bc+d$|ef*g.|h?i(j|k))", "reffgz"},
+ {1, "((((((((((a))))))))))", "-"},
+ {0, "(((((((((a)))))))))", "a"},
+ {1, "multiple words of text", "uh-uh"},
+ {0, "multiple words", "multiple words, yeah"},
+ {0, "(.*)c(.*)", "abcde"},
+ {1, "\\((.*),", "(.*)\\)"},
+ {1, "[k]", "ab"},
+ {0, "abcd", "abcd"},
+ {0, "a(bc)d", "abcd"},
+ {0, "a[-]?c", "ac"},
+ {0, "(....).*\\1", "beriberi"},
+ {0, "M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "Muammar Qaddafi"},
+ {0, "M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "Mo'ammar Gadhafi"},
+ {0, "M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "Muammar Kaddafi"},
+ {0, "M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "Muammar Qadhafi"},
+ {0, "M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "Moammar El Kadhafi"},
+ {0, "M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "Muammar Gadafi"},
+ {0, "M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "Mu'ammar al-Qadafi"},
+ {0, "M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "Moamer El Kazzafi"},
+ {0, "M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "Moamar al-Gaddafi"},
+ {0, "M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "Mu'ammar Al Qathafi"},
+ {0, "M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "Muammar Al Qathafi"},
+ {0, "M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "Mo'ammar el-Gadhafi"},
+ {0, "M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "Moamar El Kadhafi"},
+ {0, "M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "Muammar al-Qadhafi"},
+ {0, "M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "Mu'ammar al-Qadhdhafi"},
+ {0, "M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "Mu'ammar Qadafi"},
+ {0, "M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "Moamar Gaddafi"},
+ {0, "M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "Mu'ammar Qadhdhafi"},
+ {0, "M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "Muammar Khaddafi"},
+ {0, "M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "Muammar al-Khaddafi"},
+ {0, "M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "Mu'amar al-Kadafi"},
+ {0, "M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "Muammar Ghaddafy"},
+ {0, "M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "Muammar Ghadafi"},
+ {0, "M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "Muammar Ghaddafi"},
+ {0, "M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "Muamar Kaddafi"},
+ {0, "M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "Muammar Quathafi"},
+ {0, "M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "Muammar Gheddafi"},
+ {0, "M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "Muamar Al-Kaddafi"},
+ {0, "M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "Moammar Khadafy "},
+ {0, "M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "Moammar Qudhafi"},
+ {0, "M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "Mu'ammar al-Qaddafi"},
+ {0, "M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", "Mulazim Awwal Mu'ammar Muhammad Abu Minyar al-Qadhafi"},
+ {0, "[[:digit:]]+", "01234"},
+ {1, "[[:alpha:]]+", "01234"},
+ {0, "^[[:digit:]]*$", "01234"},
+ {1, "^[[:digit:]]*$", "01234a"},
+ {0, "^[[:alnum:]]*$", "01234a"},
+ {0, "^[[:xdigit:]]*$", "01234a"},
+ {1, "^[[:xdigit:]]*$", "01234g"},
+ {0, "^[[:alnum:][:space:]]*$", "Hello world"},
diff --git a/testsuite/title-case.sh b/testsuite/title-case.sh
new file mode 100755
index 0000000..a365f08
--- /dev/null
+++ b/testsuite/title-case.sh
@@ -0,0 +1,34 @@
+#!/bin/sh
+# Test case insensitive matching for titlecase and similarly odd chars.
+
+# Copyright (C) 2016-2018 Free Software Foundation, Inc.
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+. "${srcdir=.}/testsuite/init.sh"; path_prepend_ ./sed
+print_ver_ sed
+
+require_el_iso88597_locale_
+
+a='\323' # SIGMA
+b='\362' # stigma
+c='\363' # sigma
+
+printf "$a\\n$b\\n$c\\n" >in || framework_failure_
+for chr in "$a" "$b" "$c"; do
+ printf '/\(\)\\1'"$chr"/Ip >prog || fail=1
+ LC_ALL=el_GR.iso88597 sed -n -f prog in >out || fail=1
+ compare_ in out || fail=1
+done
+
+Exit $fail
diff --git a/testsuite/unbuffered.sh b/testsuite/unbuffered.sh
new file mode 100644
index 0000000..5cdfc27
--- /dev/null
+++ b/testsuite/unbuffered.sh
@@ -0,0 +1,37 @@
+#!/bin/sh
+# Test -u/--unbuffered option
+
+# Copyright (C) 2016-2018 Free Software Foundation, Inc.
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+. "${srcdir=.}/testsuite/init.sh"; path_prepend_ ./sed
+print_ver_ sed
+
+printf "1\n2\n" > in1 || framework_failure_
+
+# expected output for both programs
+printf "1\n" >> exp || framework_failure_
+
+
+# in unbuffered mode,
+# sed should consume and print the first line,
+# wc should see the rest of the input (second line).
+# The second sed trims optional leading whitespace.
+( sed -u 1q > out-sed ; wc -l | sed 's/^ *//' > out-wc ) < in1
+
+compare_ exp out-sed || fail=1
+compare_ exp out-wc || fail=1
+
+
+Exit $fail
diff --git a/testsuite/uniq.good b/testsuite/uniq.good
new file mode 100644
index 0000000..95fc5ee
--- /dev/null
+++ b/testsuite/uniq.good
@@ -0,0 +1,874 @@
+
+#define DPRINTF(p) /*nothing */
+#define DPRINTF(p) printf p
+#define GETCHAR(c, eptr) c = *eptr;
+#define GETCHARINC(c, eptr) c = *eptr++;
+#define class pcre_class
+#define match_condassert 0x01 /* Called to check a condition assertion */
+#define match_isgroup 0x02 /* Set if start of bracketed group */
+#else
+#endif
+#ifdef DEBUG /* Sigh. Some compilers never learn. */
+#ifdef DEBUG
+#ifdef __cplusplus
+#include "internal.h"
+&& length - re->max_match_size > start_offset)
+((*ecode++ == OP_BEG_WORD) ? prev_is_word : cur_is_word))
+((md->ctypes[*eptr] & ctype_word) != 0);
+((md->ctypes[eptr[-1]] & ctype_word) != 0);
+(eptr == md->end_subject - 1 && *eptr != '\n'))
+(i.e. keep it out of the loop). Also we can test that there are at least
+(md->ctypes[*eptr++] & ctype_digit) != 0)
+(md->ctypes[*eptr++] & ctype_digit) == 0)
+(md->ctypes[*eptr++] & ctype_space) != 0)
+(md->ctypes[*eptr++] & ctype_space) == 0)
+(md->ctypes[*eptr++] & ctype_word) != 0)
+(md->ctypes[*eptr++] & ctype_word) == 0)
+(offsetcount - 2) * sizeof (int));
+(offsets == NULL && offsetcount > 0))
+(pcre_free) (match_block.offset_vector);
+(pcre_free) (save);
+(re->tables + fcc_offset)[req_char] : req_char;
+* Match a back-reference *
+* Execute a Regular Expression *
+* Match from current position *
+* Debugging function to print chars *
+* Perl-Compatible Regular Expressions *
+* Macros and tables for character handling *
+*************************************************/
+*/
+*iptr = -1;
+*iptr++ = -1;
+*prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT ||
+*prev == OP_ONCE)
+-----------------------------------------------------------------------------
+-1 => failed to match
+/*
+/* "Once" brackets are like assertion brackets except that after a match,
+/* ... else fall through */
+/* Advance to a possible match for an initial string after study */
+/* Allow compilation as C++ source code, should anybody want to do that. */
+/* Always fail if not enough characters left */
+/* An alternation is the end of a branch; scan along to find the end of the
+/* Assert before internal newline if multiline, or before a terminating
+/* Assertion brackets. Check the alternative branches in turn - the
+/* At the start of a bracketed group, add the current subject pointer to the
+/* BRAZERO and BRAMINZERO occur just before a bracket group, indicating
+/* Caseful comparisons */
+/* Change option settings */
+/* Common code for all repeated single character type matches */
+/* Common code for all repeated single-character matches. We can give
+/* Compute the minimum number of offsets that we need to reset each time. Doing
+/* Conditional group: compilation checked that there are no more than
+/* Continue as from after the assertion, updating the offsets high water
+/* Continue from after the assertion, updating the offsets high water
+/* Control never gets here */
+/* Control never reaches here */
+/* Copy the offset information from temporary store if necessary */
+/* Do a single test if no case difference is set up */
+/* Do not stick any code in here without much thought; it is assumed
+/* End of a group, repeated or non-repeating. If we are at the end of
+/* End of subject assertion (\z) */
+/* End of subject or ending \n assertion (\Z) */
+/* End of the pattern. If PCRE_NOTEMPTY is set, fail if we have matched
+/* First, ensure the minimum number of matches are present. */
+/* First, ensure the minimum number of matches are present. Use inline
+/* First, ensure the minimum number of matches are present. We get back
+/* Flag bits for the match() function */
+/* For a non-repeating ket, just continue at this level. This also
+/* For anchored or unanchored matches, there may be a "last known required
+/* For extended extraction brackets (large number), we have to fish out
+/* For extended extraction brackets (large number), we have to fish out the
+/* For matches anchored to the end of the pattern, we can often avoid
+/* If a back reference hasn't been set, the length that is passed is greater
+/* If checking an assertion for a condition, return TRUE. */
+/* If hit the end of the group (which could be repeated), fail */
+/* If max == min we can continue with the main loop without the
+/* If maximizing it is worth using inline code for speed, doing the type
+/* If maximizing, find the longest possible run, then work backwards. */
+/* If maximizing, find the longest string and work backwards */
+/* If min = max, continue at the same level without recursing */
+/* If min = max, continue at the same level without recursion.
+/* If minimizing, keep testing the rest of the expression and advancing
+/* If minimizing, keep trying and advancing the pointer */
+/* If minimizing, we have to test the rest of the pattern before each
+/* If req_char is set, we know that that character must appear in the subject
+/* If the expression has got more back references than the offsets supplied can
+/* If the length of the reference is zero, just continue with the
+/* If the reference is unset, set the length to be longer than the amount
+/* If we can't find the required character, break the matching loop */
+/* If we have found the required character, save the point where we
+/* In all other cases except a conditional group we have to check the
+/* In case the recursion has set more capturing values, save the final
+/* Include the internals header, which itself includes Standard C headers plus
+/* Insufficient room for saving captured contents */
+/* Loop for handling unanchored repeated matching attempts; for anchored regexs
+/* Match a back reference, possibly repeatedly. Look past the end of the
+/* Match a character class, possibly repeatedly. Look past the end of the
+/* Match a negated single character */
+/* Match a negated single character repeatedly. This is almost a repeat of
+/* Match a run of characters */
+/* Match a single character repeatedly; different opcodes share code. */
+/* Match a single character type repeatedly; several different opcodes
+/* Match a single character type; inline for speed */
+/* Min and max values for the common repeats; for the maxima, 0 => infinity */
+/* Move the subject pointer back. This occurs only at the start of
+/* Negative assertion: all branches must fail to match */
+/* Now start processing the operations. */
+/* OP_KETRMAX */
+/* On entry ecode points to the first opcode, and eptr to the first character
+/* Opening capturing bracket. If there is space in the offset vector, save
+/* Or to a non-unique first char after study */
+/* Or to a unique first char if possible */
+/* Or to just after \n for a multiline match if possible */
+/* Other types of node can be handled by a switch */
+/* Otherwise test for either case */
+/* Print a sequence of chars in printable format, stopping at the end of the
+/* Recursion matches the current regex, nested. If there are any capturing
+/* Reset the maximum number of extractions we might see. */
+/* Reset the value of the ims flags, in case they got changed during
+/* Reset the working variable associated with each extraction. These should
+/* Separate the caselesss case for speed */
+/* Set up for repetition, or handle the non-repeated case */
+/* Set up the first character to match, if available. The first_char value is
+/* Skip over conditional reference data or large extraction number data if
+/* Start of subject assertion */
+/* Start of subject unless notbol, or after internal newline if multiline */
+/* Structure for building a chain of data that actually lives on the
+/* The code is duplicated for the caseless and caseful cases, for speed,
+/* The condition is an assertion. Call match() to evaluate it - setting
+/* The ims options can vary during the matching as a result of the presence
+/* The repeating kets try the rest of the pattern or restart from the
+/* There's been some horrible disaster. */
+/* This "while" is the end of the "do" above */
+/* This function applies a compiled re to a subject string and picks out
+/* Use a macro for debugging printing, 'cause that limits the use of #ifdef
+/* We don't need to repeat the search if we haven't yet reached the
+/* When a match occurs, substrings will be set for all internal extractions;
+/* Word boundary assertions */
+/*************************************************
+1. This software is distributed in the hope that it will be useful,
+2. The origin of this software must not be misrepresented, either by
+3. Altered versions must be plainly marked as such, and must not be
+4. If PCRE is embedded in any software that is released under the GNU
+5.005. If there is an options reset, it will get obeyed in the normal
+6 : 3 + (ecode[1] << 8) + ecode[2]),
+< -1 => some kind of unexpected problem
+= 0 => success, but offsets is not big enough
+Arguments:
+BOOL anchored;
+BOOL cur_is_word = (eptr < md->end_subject) &&
+BOOL is_subject;
+BOOL minimize = FALSE;
+BOOL prev_is_word = (eptr != md->start_subject) &&
+BOOL rc;
+BOOL startline;
+BOOL using_temporary_offsets = FALSE;
+Copyright (c) 1997-2000 University of Cambridge
+DPRINTF ((">>>> returning %d\n", match_block.errorcode));
+DPRINTF ((">>>> returning %d\n", rc));
+DPRINTF (("Copied offsets from temporary memory\n"));
+DPRINTF (("Freeing temporary memory\n"));
+DPRINTF (("Got memory to hold back references\n"));
+DPRINTF (("Unknown opcode %d\n", *ecode));
+DPRINTF (("bracket %d failed\n", number));
+DPRINTF (("bracket 0 failed\n"));
+DPRINTF (("ims reset to %02lx\n", ims));
+DPRINTF (("ims set to %02lx at group repeat\n", ims));
+DPRINTF (("ims set to %02lx\n", ims));
+DPRINTF (("matching %c{%d,%d} against subject %.*s\n", c, min, max,
+DPRINTF (("negative matching %c{%d,%d} against subject %.*s\n", c, min, max,
+DPRINTF (("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
+DPRINTF (("start bracket 0\n"));
+GETCHAR (c, eptr) /* Get character */
+GETCHARINC (c, eptr) /* Get character; increment eptr */
+General Purpose Licence (GPL), then the terms of that licence shall
+However, if the referenced string is the empty string, always treat
+If the bracket fails to match, we need to restore this value and also the
+If there isn't enough space in the offset vector, treat this as if it were a
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+Otherwise, we can use the vector supplied, rounding down its size to a multiple
+Permission is granted to anyone to use this software for any purpose on any
+REPEATCHAR:
+REPEATNOTCHAR:
+REPEATTYPE:
+Returns: > 0 => success; value is the number of elements filled in
+Returns: TRUE if matched
+Returns: TRUE if matched
+Returns: nothing
+They are not both allowed to be zero. */
+This is a library of functions to support regular expressions whose syntax
+This is the forcible breaking of infinite loops as implemented in Perl
+Writing separate code makes it go faster, as does using an autoincrement and
+Written by: Philip Hazel <ph10@cam.ac.uk>
+a move back into the brackets. Check the alternative branches in turn - the
+address of eptr, so that eptr can be a register variable. */
+an assertion "group", stop matching and return TRUE, but record the
+an empty string - recursion will then try other alternatives, if any. */
+an error. Save the top 15 values on the stack, and accept that the rest
+an unanchored pattern, of course. If there's no first char and the pattern was
+analyzing most of the pattern. length > re->max_match_size is
+anchored = ((re->options | options) & PCRE_ANCHORED) != 0;
+and advance one byte in the pattern code. */
+and reinstate them after the recursion. However, we don't know how many
+and semantics are as close as possible to those of the Perl 5 language. See
+and the required character in fact is caseful. */
+at run time, so we have to test for anchoring. The first char may be unset for
+avoid duplicate testing (which takes significant time). This covers the vast
+backing off on a match. */
+bmtable = extra->data.bmtable;
+both cases of the character. Otherwise set the two values the same, which will
+bracketed group and go to there. */
+brackets - for testing for empty matches
+brackets started but not finished, we have to save their starting points
+break;
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+c != md->lcc[*eptr++])
+c = *ecode++ - OP_CRSTAR;
+c = *ecode++ - OP_NOTSTAR;
+c = *ecode++ - OP_STAR;
+c = *ecode++ - OP_TYPESTAR;
+c = *ecode++;
+c = *eptr++;
+c = 15;
+c = max - min;
+c = md->end_subject - eptr;
+c = md->lcc[c];
+c = md->offset_max;
+c == md->lcc[*eptr++])
+can't just fail here, because of the possibility of quantifiers with zero
+case OP_ALT:
+case OP_ANY:
+case OP_ASSERT:
+case OP_ASSERTBACK:
+case OP_ASSERTBACK_NOT:
+case OP_ASSERT_NOT:
+case OP_BEG_WORD:
+case OP_BRA: /* Non-capturing bracket: optimized */
+case OP_BRAMINZERO:
+case OP_BRANUMBER:
+case OP_BRAZERO:
+case OP_CHARS:
+case OP_CIRC:
+case OP_CLASS:
+case OP_COND:
+case OP_CREF:
+case OP_CRMINPLUS:
+case OP_CRMINQUERY:
+case OP_CRMINRANGE:
+case OP_CRMINSTAR:
+case OP_CRPLUS:
+case OP_CRQUERY:
+case OP_CRRANGE:
+case OP_CRSTAR:
+case OP_DIGIT:
+case OP_DOLL:
+case OP_END:
+case OP_END_WORD:
+case OP_EOD:
+case OP_EODN:
+case OP_EXACT:
+case OP_KET:
+case OP_KETRMAX:
+case OP_KETRMIN:
+case OP_MINPLUS:
+case OP_MINQUERY:
+case OP_MINSTAR:
+case OP_MINUPTO:
+case OP_NOT:
+case OP_NOTEXACT:
+case OP_NOTMINPLUS:
+case OP_NOTMINQUERY:
+case OP_NOTMINSTAR:
+case OP_NOTMINUPTO:
+case OP_NOTPLUS:
+case OP_NOTQUERY:
+case OP_NOTSTAR:
+case OP_NOTUPTO:
+case OP_NOT_DIGIT:
+case OP_NOT_WHITESPACE:
+case OP_NOT_WORDCHAR:
+case OP_NOT_WORD_BOUNDARY:
+case OP_ONCE:
+case OP_OPT:
+case OP_PLUS:
+case OP_QUERY:
+case OP_RECURSE:
+case OP_REF:
+case OP_REVERSE:
+case OP_SOD:
+case OP_STAR:
+case OP_TYPEEXACT:
+case OP_TYPEMINPLUS:
+case OP_TYPEMINQUERY:
+case OP_TYPEMINSTAR:
+case OP_TYPEMINUPTO:
+case OP_TYPEPLUS:
+case OP_TYPEQUERY:
+case OP_TYPESTAR:
+case OP_TYPEUPTO:
+case OP_UPTO:
+case OP_WHITESPACE:
+case OP_WORDCHAR:
+case OP_WORD_BOUNDARY:
+case matching may be when this character is hit, so test for it in both its
+caselessly, or if there are any changes of this flag within the regex, set up
+cases if necessary. However, the different cased versions will not be set up
+character" set. If the PCRE_CASELESS is set, implying that the match starts
+characters and work backwards. */
+code for maximizing the speed, and do the type test once at the start
+code to character type repeats - written out again for speed. */
+commoning these up that doesn't require a test of the positive/negative
+computer system, and to redistribute it freely, subject to the following
+const char *subject;
+const pcre *re;
+const pcre_extra *extra;
+const uschar *bmtable = NULL;
+const uschar *data = ecode + 1; /* Save for matching */
+const uschar *end_subject;
+const uschar *next = ecode + 1;
+const uschar *p = md->start_subject + md->offset_vector[offset];
+const uschar *p;
+const uschar *pp = eptr;
+const uschar *prev = ecode - (ecode[1] << 8) - ecode[2];
+const uschar *prev = ecode;
+const uschar *req_char_ptr = start_match - 1;
+const uschar *saved_eptr = eptr;
+const uschar *saved_eptr = eptrb->saved_eptr;
+const uschar *saved_eptr;
+const uschar *start_bits = NULL;
+const uschar *start_match = (const uschar *) subject + start_offset;
+continue; /* With the main loop */
+continue;
+course of events. */
+ctype = *ecode++; /* Code for the character type */
+cur_is_word == prev_is_word : cur_is_word != prev_is_word)
+current high water mark for use by positive assertions. Do this also
+default: /* No repeat follows */
+default:
+do
+each branch of a lookbehind assertion. If we are too close to the start to
+each substring: the offsets to the start and end of the substring.
+ecode position in code
+ecode + ((offset < offset_top && md->offset_vector[offset] >= 0) ?
+ecode += (ecode[1] << 8) + ecode[2];
+ecode += 2;
+ecode += 3 + (ecode[4] << 8) + ecode[5];
+ecode += 33; /* Advance past the item */
+ecode += 3; /* Advance past the item */
+ecode += 3;
+ecode += 5;
+ecode = next + 3;
+ecode++;
+else
+else if ((extra->options & PCRE_STUDY_BM) != 0)
+else if (first_char >= 0)
+else if (start_bits != NULL)
+else if (startline)
+encountered */
+end_subject = match_block.end_subject;
+eptr pointer in subject
+eptr points into the subject
+eptr += c;
+eptr += length;
+eptr += min;
+eptr -= (ecode[1] << 8) + ecode[2];
+eptr -= length;
+eptr = md->end_match_ptr;
+eptr++;
+eptrb pointer to chain of blocks containing eptr at start of
+eptrb = &newptrb;
+eptrb = eptrb->prev; /* Back up the stack of bracket start pointers */
+eptrblock *eptrb;
+eptrblock newptrb;
+eptrblock;
+exactly what going to the ket would do. */
+explicit claim or by omission.
+external_extra points to "hints" from pcre_study() or is NULL
+external_re points to the compiled expression
+extraction by setting the offsets and bumping the high water mark. */
+first_char = match_block.lcc[first_char];
+first_char = re->first_char;
+flags can contain
+for (;;)
+for (i = 1; i <= c; i++)
+for (i = 1; i <= min; i++)
+for (i = min; i < max; i++)
+for (i = min;; i++)
+for the "once" (not-backup up) groups. */
+for the match to succeed. If the first character is set, req_char must be
+found it, so that we don't search again next time round the loop if
+from a previous iteration of this group, and be referred to by a reference
+goto REPEATCHAR;
+goto REPEATNOTCHAR;
+goto REPEATTYPE;
+group number back at the start and if necessary complete handling an
+happens for a repeating ket if no characters were matched in the group.
+here; that is handled in the code for KET. */
+hold, we get a temporary bit of working store to use during the matching.
+i.e. it could be ()* or ()? in the pattern. Brackets with fixed upper
+if (!anchored)
+if (!match (start_match, re->code, 2, &match_block, ims, NULL, match_isgroup))
+if (!match_ref (offset, eptr, length, md, ims))
+if (!md->endonly)
+if (!rc)
+if (!startline && extra != NULL)
+if ((*ecode++ == OP_WORD_BOUNDARY) ?
+if ((data[c / 8] & (1 << (c & 7))) != 0)
+if ((data[c / 8] & (1 << (c & 7))) == 0)
+if ((extra->options & PCRE_STUDY_MAPPED) != 0)
+if ((flags & match_condassert) != 0)
+if ((flags & match_isgroup) != 0)
+if ((ims & PCRE_CASELESS) != 0)
+if ((ims & PCRE_DOTALL) == 0 && c == '\n')
+if ((ims & PCRE_DOTALL) == 0 && eptr < md->end_subject && *eptr == '\n')
+if ((ims & PCRE_DOTALL) == 0)
+if ((ims & PCRE_MULTILINE) != 0)
+if ((md->ctypes[*eptr++] & ctype_digit) != 0)
+if ((md->ctypes[*eptr++] & ctype_digit) == 0)
+if ((md->ctypes[*eptr++] & ctype_space) != 0)
+if ((md->ctypes[*eptr++] & ctype_space) == 0)
+if ((md->ctypes[*eptr++] & ctype_word) != 0)
+if ((md->ctypes[*eptr++] & ctype_word) == 0)
+if ((md->ctypes[c] & ctype_digit) != 0)
+if ((md->ctypes[c] & ctype_digit) == 0)
+if ((md->ctypes[c] & ctype_space) != 0)
+if ((md->ctypes[c] & ctype_space) == 0)
+if ((md->ctypes[c] & ctype_word) != 0)
+if ((md->ctypes[c] & ctype_word) == 0)
+if ((options & ~PUBLIC_EXEC_OPTIONS) != 0)
+if ((re->options & PCRE_FIRSTSET) != 0)
+if ((re->options & PCRE_REQCHSET) != 0)
+if ((start_bits[c / 8] & (1 << (c & 7))) == 0)
+if (*ecode != OP_ONCE && *ecode != OP_ALT)
+if (*ecode == OP_KET || eptr == saved_eptr)
+if (*ecode == OP_KET)
+if (*ecode == OP_KETRMIN)
+if (*ecode++ != *eptr++)
+if (*ecode++ == *eptr++)
+if (*eptr != '\n')
+if (*eptr++ == '\n')
+if (*p++ != *eptr++)
+if (*p++ == req_char)
+if (*prev != OP_COND)
+if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT ||
+if (bmtable != NULL)
+if (bmtable[*start_match])
+if (c != *eptr++)
+if (c != md->lcc[*eptr++])
+if (c < 16)
+if (c == *eptr++)
+if (c == md->lcc[*eptr++])
+if (c > md->end_subject - eptr)
+if (cur_is_word == prev_is_word ||
+if (ecode[3] == OP_CREF) /* Condition is extraction test */
+if (ecode[3] == OP_OPT)
+if (eptr != md->start_subject && eptr[-1] != '\n')
+if (eptr != md->start_subject)
+if (eptr < md->end_subject - 1 ||
+if (eptr < md->end_subject)
+if (eptr < md->start_subject)
+if (eptr >= md->end_subject ||
+if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_digit) != 0)
+if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_digit) == 0)
+if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_space) != 0)
+if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_space) == 0)
+if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_word) != 0)
+if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_word) == 0)
+if (eptr >= md->end_subject || *eptr == '\n')
+if (eptr >= md->end_subject || c != *eptr)
+if (eptr >= md->end_subject || c != md->lcc[*eptr])
+if (eptr >= md->end_subject || c == *eptr)
+if (eptr >= md->end_subject || c == md->lcc[*eptr])
+if (eptr >= md->end_subject)
+if (eptr++ >= md->end_subject)
+if (i >= max || !match_ref (offset, eptr, length, md, ims))
+if (i >= max || eptr >= md->end_subject ||
+if (i >= max || eptr >= md->end_subject || c != *eptr++)
+if (i >= max || eptr >= md->end_subject || c == *eptr++)
+if (i >= max || eptr >= md->end_subject)
+if (is_subject && length > md->end_subject - p)
+if (isprint (c = *(p++)))
+if (length == 0)
+if (length > md->end_subject - eptr)
+if (match (eptr, ecode + 3, offset_top, md, ims, NULL,
+if (match (eptr, ecode + 3, offset_top, md, ims, NULL, match_isgroup))
+if (match (eptr, ecode + 3, offset_top, md, ims, eptrb, 0) ||
+if (match (eptr, ecode + 3, offset_top, md, ims, eptrb, match_isgroup))
+if (match (eptr, ecode, offset_top, md, ims, eptrb, 0))
+if (match (eptr, next + 3, offset_top, md, ims, eptrb, match_isgroup))
+if (match (eptr, next, offset_top, md, ims, eptrb, match_isgroup))
+if (match (eptr, prev, offset_top, md, ims, eptrb, match_isgroup) ||
+if (match (eptr--, ecode, offset_top, md, ims, eptrb, 0))
+if (match_block.end_offset_top > offsetcount)
+if (match_block.offset_vector != NULL)
+if (match_block.offset_vector == NULL)
+if (max == 0)
+if (md->lcc[*ecode++] != md->lcc[*eptr++])
+if (md->lcc[*ecode++] == md->lcc[*eptr++])
+if (md->lcc[*p++] != md->lcc[*eptr++])
+if (md->notbol && eptr == md->start_subject)
+if (md->notempty && eptr == md->start_match)
+if (md->noteol)
+if (min == max)
+if (min > 0)
+if (min > md->end_subject - eptr)
+if (minimize)
+if (number > 0)
+if (number > EXTRACT_BASIC_MAX)
+if (offset < md->offset_max)
+if (offset >= md->offset_max)
+if (offset_top <= offset)
+if (offsetcount < 2)
+if (offsetcount >= 4)
+if (op > OP_BRA)
+if (p > req_char_ptr)
+if (p >= end_subject)
+if (pp == req_char || pp == req_char2)
+if (re == NULL || subject == NULL ||
+if (re->magic_number != MAGIC_NUMBER)
+if (re->max_match_size >= 0
+if (re->top_backref > 0 && re->top_backref >= ocount / 3)
+if (req_char == req_char2)
+if (req_char >= 0)
+if (resetcount > offsetcount)
+if (save != stacksave)
+if (save == NULL)
+if (skipped_chars)
+if (start_match + bmtable[256] > end_subject)
+if (start_match > match_block.start_subject + start_offset)
+if (using_temporary_offsets)
+if certain parts of the pattern were not used. */
+if the malloc fails ... there is no way of returning to the top level with
+implied in the second condition, because start_offset > 0. */
+ims current /i, /m, and /s options
+ims the ims flags
+ims = (ims & ~PCRE_IMS) | ecode[4];
+ims = ecode[1];
+ims = original_ims;
+ims = re->options & (PCRE_CASELESS | PCRE_MULTILINE | PCRE_DOTALL);
+in the pattern. */
+in the subject string, while eptrb holds the value of eptr at the start of the
+initialize them to avoid reading uninitialized locations. */
+inline, and there are *still* stupid compilers about that don't like indented
+inside the group.
+int
+int *offsets;
+int *save;
+int c;
+int first_char = -1;
+int flags;
+int length;
+int min, max, ctype;
+int number = *prev - OP_BRA;
+int number = op - OP_BRA;
+int offset = (ecode[1] << 9) | (ecode[2] << 1); /* Doubled reference number */
+int offset = (ecode[4] << 9) | (ecode[5] << 1); /* Doubled reference number */
+int offset;
+int offset_top;
+int offsetcount;
+int op = (int) *ecode;
+int options;
+int rc;
+int req_char = -1;
+int req_char2 = -1;
+int resetcount, ocount;
+int save_offset1 = md->offset_vector[offset];
+int save_offset2 = md->offset_vector[offset + 1];
+int save_offset3 = md->offset_vector[md->offset_end - number];
+int skipped_chars = 0;
+int stacksave[15];
+int start_offset;
+is a bit large to put on the stack, but using malloc for small numbers
+is_subject TRUE if printing from within md->start_subject
+it as matched, any number of times (otherwise there could be infinite
+item to see if there is repeat information following. The code is similar
+item to see if there is repeat information following. Then obey similar
+last bracketed group - used for breaking infinite loops matching zero-length
+later in the subject; otherwise the test starts at the match point. This
+length length of subject string (may contain binary zeros)
+length length to be matched
+length number to print
+length = (offset >= offset_top || md->offset_vector[offset] < 0) ?
+length = md->end_subject - p;
+level without recursing. Otherwise, if minimizing, keep trying the rest of
+loop. */
+loops). */
+main loop. */
+majority of cases. It will be suboptimal when the case flag changes in a regex
+mark, since extracts may have been taken during the assertion. */
+mark, since extracts may have been taken. */
+match (eptr, ecode + 3, offset_top, md, ims, eptrb, 0))
+match (eptr, ecode, offset_top, md, ims, eptrb, flags)
+match (eptr, prev, offset_top, md, ims, eptrb, match_isgroup))
+match_block.ctypes = re->tables + ctypes_offset;
+match_block.end_subject = match_block.start_subject + length;
+match_block.endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;
+match_block.errorcode = PCRE_ERROR_NOMATCH; /* Default error */
+match_block.errorcode == PCRE_ERROR_NOMATCH &&
+match_block.lcc = re->tables + lcc_offset;
+match_block.lcc[*start_match] != first_char)
+match_block.notbol = (options & PCRE_NOTBOL) != 0;
+match_block.notempty = (options & PCRE_NOTEMPTY) != 0;
+match_block.noteol = (options & PCRE_NOTEOL) != 0;
+match_block.offset_end = ocount;
+match_block.offset_max = (2 * ocount) / 3;
+match_block.offset_overflow = FALSE;
+match_block.offset_overflow = TRUE;
+match_block.offset_vector = (int *) (pcre_malloc) (ocount * sizeof (int));
+match_block.offset_vector = offsets;
+match_block.start_match = start_match;
+match_block.start_pattern = re->code;
+match_block.start_subject = (const uschar *) subject;
+match_condassert - this is an assertion condition
+match_condassert | match_isgroup))
+match_data *md;
+match_data match_block;
+match_isgroup - this is the start of a bracketed group
+match_isgroup);
+match_ref (offset, eptr, length, md, ims)
+matches, we carry on as at the end of a normal bracket, leaving the subject
+matching won't pass the KET for an assertion. If any one branch matches,
+matching won't pass the KET for this kind of subpattern. If any one branch
+max = (ecode[1] << 8) + ecode[2];
+max = (ecode[3] << 8) + ecode[4];
+max = INT_MAX;
+max = rep_max[c]; /* zero for max => infinity */
+max, eptr));
+maximum. Alternatively, if maximizing, find the maximum number of
+may be wrong. */
+md pointer to "static" info for the match
+md pointer to matching data block, if is_subject is TRUE
+md points to match data block
+md->end_match_ptr = eptr; /* For ONCE */
+md->end_match_ptr = eptr; /* Record where we ended */
+md->end_offset_top = offset_top; /* and how many extracts were taken */
+md->end_offset_top = offset_top;
+md->end_subject - eptr + 1 :
+md->errorcode = PCRE_ERROR_UNKNOWN_NODE;
+md->offset_overflow = TRUE;
+md->offset_vector[md->offset_end - i] = save[i];
+md->offset_vector[md->offset_end - number] = eptr - md->start_subject;
+md->offset_vector[md->offset_end - number] = save_offset3;
+md->offset_vector[md->offset_end - number];
+md->offset_vector[offset + 1] - md->offset_vector[offset];
+md->offset_vector[offset + 1] = eptr - md->start_subject;
+md->offset_vector[offset + 1] = save_offset2;
+md->offset_vector[offset] =
+md->offset_vector[offset] = save_offset1;
+memcpy (offsets + 2, match_block.offset_vector + 2,
+min = (ecode[1] << 8) + ecode[2];
+min = 0;
+min = max = (ecode[1] << 8) + ecode[2];
+min = max = 1;
+min = rep_min[c]; /* Pick up values from tables; */
+minima. */
+minimize = (*ecode == OP_CRMINRANGE);
+minimize = (c & 1) != 0;
+minimize = *ecode == OP_MINUPTO;
+minimize = *ecode == OP_NOTMINUPTO;
+minimize = *ecode == OP_TYPEMINUPTO;
+minimize = TRUE;
+minimum number of matches are present. If min = max, continue at the same
+misrepresented as being the original software.
+move back, this match function fails. */
+mustn't change the current values of the data slot, because they may be set
+need to recurse. */
+never be used unless previously set, but they get saved and restored, and so we
+never set for an anchored regular expression, but the anchoring may be forced
+newline unless endonly is set, else end of subject unless noteol is set. */
+newptrb.prev = eptrb;
+newptrb.saved_eptr = eptr;
+next += (next[1] << 8) + next[2];
+non-capturing bracket. Don't worry about setting the flag for the error case
+number = (ecode[4] << 8) | ecode[5];
+number = (prev[4] << 8) | prev[5];
+number from a dummy opcode at the start. */
+number, then move along the subject till after the recursive match,
+ocount = offsetcount - (offsetcount % 3);
+ocount = re->top_backref * 3 + 3;
+of (?ims) items in the pattern. They are kept in a local variable so that
+of 3. */
+of subject left; this ensures that every attempt at a match fails. We
+offset index into the offset vector
+offset = number << 1;
+offset_top current top pointer
+offset_top = md->end_offset_top;
+offset_top = offset + 2;
+offset_top, md, ims, eptrb, match_isgroup);
+offsetcount the number of elements in the vector
+offsets points to a vector of ints to be filled in with offsets
+offsets[0] = start_match - match_block.start_subject;
+offsets[1] = match_block.end_match_ptr - match_block.start_subject;
+op = OP_BRA;
+opcode. */
+optimization can save a huge amount of backtracking in patterns with nested
+option for each character match. Maybe that wouldn't add very much to the
+options option bits
+p points to characters
+p--;
+past the end if there is only one branch, but that's OK because that is
+pchars (ecode, length, FALSE, md);
+pchars (eptr, 16, TRUE, md);
+pchars (eptr, length, TRUE, md);
+pchars (p, length, FALSE, md);
+pchars (p, length, is_subject, md)
+pchars (start_match, end_subject - start_match, TRUE, &match_block);
+pcre_exec (re, extra, subject, length, start_offset, options, offsets, offsetcount)
+place we found it at last time. */
+pointer. */
+portions of the string if it matches. Two elements in the vector are set for
+pre-processor statements. I suppose it's only been 10 years... */
+preceded by BRAZERO or BRAMINZERO. */
+preceding bracket, in the appropriate order. */
+preceding bracket, in the appropriate order. We need to reset any options
+printf (" against backref ");
+printf (" against pattern ");
+printf ("%c", c);
+printf (">>>> Match against: ");
+printf (">>>>> Skipped %d chars to reach first character\n",
+printf ("\\x%02x", c);
+printf ("\n");
+printf ("end bracket %d", number);
+printf ("matching subject ");
+printf ("matching subject <null> against pattern ");
+printf ("matching subject <null>");
+printf ("start bracket %d subject=", number);
+rc = 0;
+rc = match (eptr, md->start_pattern, offset_top, md, ims, eptrb,
+rc = match_block.offset_overflow ? 0 : match_block.end_offset_top / 2;
+register const uschar *ecode;
+register const uschar *eptr;
+register const uschar *p = start_match + ((first_char >= 0) ? 1 : 0);
+register int *iend = iptr + resetcount;
+register int *iend = iptr - resetcount / 2 + 1;
+register int *iptr = match_block.offset_vector + ocount;
+register int *iptr = match_block.offset_vector;
+register int c = *start_match;
+register int c;
+register int i;
+register int length = ecode[1];
+register int pp = *p++;
+repeat it in the interests of efficiency. */
+repeat limits are compiled as a number of copies, with the optional ones
+req_char = re->req_char;
+req_char2 = ((re->options & (PCRE_CASELESS | PCRE_ICHANGED)) != 0) ?
+req_char_ptr = p;
+resetcount = 2 + re->top_bracket * 2;
+resetcount = ocount;
+restoring at the exit of a group is easy. */
+restrictions:
+return FALSE;
+return PCRE_ERROR_BADMAGIC;
+return PCRE_ERROR_BADOPTION;
+return PCRE_ERROR_NOMATCH;
+return PCRE_ERROR_NOMEMORY;
+return PCRE_ERROR_NULL;
+return TRUE;
+return match (eptr,
+return match (eptr, ecode + 3, offset_top, md, ims, eptrb, match_isgroup);
+return match_block.errorcode;
+return rc;
+save = (int *) (pcre_malloc) ((c + 1) * sizeof (int));
+save = stacksave;
+save[i] = md->offset_vector[md->offset_end - i];
+seems expensive. As a compromise, the stack is used when there are fewer
+share code. This is very similar to the code for single characters, but we
+similar code to character type repeats - written out again for speed.
+since matching characters is likely to be quite common. First, ensure the
+skipped_chars += bmtable[*start_match],
+skipped_chars += bmtable[256] - 1;
+skipped_chars -= bmtable[256] - 1;
+skipped_chars);
+skipped_chars++,
+stack of such pointers, to be re-instated at the end of the group when we hit
+stack, for holding the values of the subject pointer at the start of each
+start of each branch to move the current point backwards, so the code at
+start_bits = extra->data.start_bits;
+start_match += bmtable[*start_match];
+start_match += bmtable[256] - 1;
+start_match -= bmtable[256] - 1;
+start_match = (const uschar *) subject + length - re->max_match_size;
+start_match++ < end_subject);
+start_match++;
+start_offset where to start in the subject string
+startline = (re->options & PCRE_STARTLINE) != 0;
+static BOOL
+static const char rep_max[] =
+static const char rep_min[] =
+static void
+strings.
+struct eptrblock *prev;
+studied, there may be a bitmap of possible first characters. */
+subject points to the subject string
+subject if the requested.
+subpattern - to break infinite loops. */
+subpattern, so as to detect when an empty string has been matched by a
+subsequent match. */
+such there are (offset_top records the completed total) so we just have
+supersede any condition above with which it is incompatible.
+switch (*ecode)
+switch (ctype)
+switch (op)
+test once at the start (i.e. keep it out of the loop). */
+than 16 values to store; otherwise malloc is used. A problem is what to do
+than the number of characters left in the string, so the match fails.
+that "continue" in the code above comes out to here to repeat the main
+that changed within the bracket before re-running it, so check the next
+that it may occur zero times. It may repeat infinitely, or not at all -
+the assertion is true. Lookbehind assertions have an OP_REVERSE item at the
+the closing ket. When match() is called in other circumstances, we don't add to
+the code for a repeated single character, but I haven't found a nice way of
+the current subject position in the working slot at the top of the vector. We
+the expression and advancing one matching character if failing, up to the
+the external pcre header. */
+the file Tech.Notes for some information on the internals.
+the final argument TRUE causes it to stop at the end of an assertion. */
+the group. */
+the length of the reference string explicitly rather than passing the
+the loop runs just once. */
+the minimum number of bytes before we start. */
+the number from a dummy opcode at the start. */
+the point in the subject string is not moved back. Thus there can never be
+the pointer while it matches the class. */
+the same bracket.
+the stack. */
+the start hasn't passed this character yet. */
+the subject. */
+there were too many extractions, set the return code to zero. In the case
+this level is identical to the lookahead case. */
+this makes a huge difference to execution time when there aren't many brackets
+those back references that we can. In this case there need not be overflow
+time taken, but character matching *is* what this is all about... */
+to save all the potential data. There may be up to 99 such values, which
+to that for character classes, but repeated for efficiency. Then obey
+two branches. If the condition is false, skipping the first branch takes us
+typedef struct eptrblock
+unless PCRE_CASELESS was given or the casing state changes within the regex.
+unlimited repeats that aren't going to match. We don't know what the state of
+unsigned long int ims = 0;
+unsigned long int ims;
+unsigned long int original_ims = ims; /* Save for resetting on ')' */
+up quickly if there are fewer than the minimum number of characters left in
+using_temporary_offsets = TRUE;
+values of the final offsets, in case they were set by a previous iteration of
+we just need to set up the whole thing as substring 0 before returning. If
+where we had to get some local store to hold offsets for backreferences, copy
+while (!anchored &&
+while (*ecode == OP_ALT)
+while (*ecode == OP_ALT);
+while (*next == OP_ALT);
+while (--iptr >= iend)
+while (eptr >= pp)
+while (iptr < iend)
+while (length-- > 0)
+while (p < end_subject)
+while (start_match < end_subject &&
+while (start_match < end_subject && *start_match != first_char)
+while (start_match < end_subject && start_match[-1] != '\n')
+while (start_match < end_subject)
+{
+{0, 0, 0, 0, 1, 1};
+{0, 0, 1, 1, 0, 0};
+} /* End of main loop */
+}
diff --git a/testsuite/uniq.inp b/testsuite/uniq.inp
new file mode 100644
index 0000000..aa61062
--- /dev/null
+++ b/testsuite/uniq.inp
@@ -0,0 +1,2058 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+#define DPRINTF(p) /*nothing */
+#define DPRINTF(p) printf p
+#define GETCHAR(c, eptr) c = *eptr;
+#define GETCHARINC(c, eptr) c = *eptr++;
+#define class pcre_class
+#define match_condassert 0x01 /* Called to check a condition assertion */
+#define match_isgroup 0x02 /* Set if start of bracketed group */
+#else
+#endif
+#endif
+#endif
+#endif
+#endif
+#endif
+#endif
+#endif
+#endif
+#endif
+#endif
+#endif
+#endif
+#endif
+#endif
+#endif
+#ifdef DEBUG /* Sigh. Some compilers never learn. */
+#ifdef DEBUG /* Sigh. Some compilers never learn. */
+#ifdef DEBUG
+#ifdef DEBUG
+#ifdef DEBUG
+#ifdef DEBUG
+#ifdef DEBUG
+#ifdef DEBUG
+#ifdef DEBUG
+#ifdef DEBUG
+#ifdef DEBUG
+#ifdef DEBUG
+#ifdef DEBUG
+#ifdef DEBUG
+#ifdef DEBUG
+#ifdef __cplusplus
+#include "internal.h"
+&& length - re->max_match_size > start_offset)
+((*ecode++ == OP_BEG_WORD) ? prev_is_word : cur_is_word))
+((md->ctypes[*eptr] & ctype_word) != 0);
+((md->ctypes[*eptr] & ctype_word) != 0);
+((md->ctypes[eptr[-1]] & ctype_word) != 0);
+((md->ctypes[eptr[-1]] & ctype_word) != 0);
+(eptr == md->end_subject - 1 && *eptr != '\n'))
+(eptr == md->end_subject - 1 && *eptr != '\n'))
+(i.e. keep it out of the loop). Also we can test that there are at least
+(md->ctypes[*eptr++] & ctype_digit) != 0)
+(md->ctypes[*eptr++] & ctype_digit) == 0)
+(md->ctypes[*eptr++] & ctype_space) != 0)
+(md->ctypes[*eptr++] & ctype_space) == 0)
+(md->ctypes[*eptr++] & ctype_word) != 0)
+(md->ctypes[*eptr++] & ctype_word) == 0)
+(offsetcount - 2) * sizeof (int));
+(offsets == NULL && offsetcount > 0))
+(pcre_free) (match_block.offset_vector);
+(pcre_free) (match_block.offset_vector);
+(pcre_free) (save);
+(re->tables + fcc_offset)[req_char] : req_char;
+* Match a back-reference *
+* Execute a Regular Expression *
+* Match from current position *
+* Debugging function to print chars *
+* Perl-Compatible Regular Expressions *
+* Macros and tables for character handling *
+*************************************************/
+*************************************************/
+*************************************************/
+*************************************************/
+*************************************************/
+*************************************************/
+*/
+*/
+*/
+*/
+*/
+*iptr = -1;
+*iptr++ = -1;
+*prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT ||
+*prev == OP_ONCE)
+-----------------------------------------------------------------------------
+-----------------------------------------------------------------------------
+-1 => failed to match
+/*
+/* "Once" brackets are like assertion brackets except that after a match,
+/* ... else fall through */
+/* ... else fall through */
+/* Advance to a possible match for an initial string after study */
+/* Allow compilation as C++ source code, should anybody want to do that. */
+/* Always fail if not enough characters left */
+/* An alternation is the end of a branch; scan along to find the end of the
+/* Assert before internal newline if multiline, or before a terminating
+/* Assertion brackets. Check the alternative branches in turn - the
+/* At the start of a bracketed group, add the current subject pointer to the
+/* BRAZERO and BRAMINZERO occur just before a bracket group, indicating
+/* Caseful comparisons */
+/* Caseful comparisons */
+/* Change option settings */
+/* Common code for all repeated single character type matches */
+/* Common code for all repeated single-character matches. We can give
+/* Common code for all repeated single-character matches. We can give
+/* Compute the minimum number of offsets that we need to reset each time. Doing
+/* Conditional group: compilation checked that there are no more than
+/* Continue as from after the assertion, updating the offsets high water
+/* Continue from after the assertion, updating the offsets high water
+/* Control never gets here */
+/* Control never gets here */
+/* Control never gets here */
+/* Control never gets here */
+/* Control never gets here */
+/* Control never gets here */
+/* Control never gets here */
+/* Control never gets here */
+/* Control never gets here */
+/* Control never gets here */
+/* Control never gets here */
+/* Control never gets here */
+/* Control never gets here */
+/* Control never gets here */
+/* Control never reaches here */
+/* Control never reaches here */
+/* Copy the offset information from temporary store if necessary */
+/* Do a single test if no case difference is set up */
+/* Do not stick any code in here without much thought; it is assumed
+/* End of a group, repeated or non-repeating. If we are at the end of
+/* End of subject assertion (\z) */
+/* End of subject or ending \n assertion (\Z) */
+/* End of the pattern. If PCRE_NOTEMPTY is set, fail if we have matched
+/* First, ensure the minimum number of matches are present. */
+/* First, ensure the minimum number of matches are present. Use inline
+/* First, ensure the minimum number of matches are present. We get back
+/* Flag bits for the match() function */
+/* For a non-repeating ket, just continue at this level. This also
+/* For a non-repeating ket, just continue at this level. This also
+/* For anchored or unanchored matches, there may be a "last known required
+/* For extended extraction brackets (large number), we have to fish out
+/* For extended extraction brackets (large number), we have to fish out the
+/* For matches anchored to the end of the pattern, we can often avoid
+/* If a back reference hasn't been set, the length that is passed is greater
+/* If checking an assertion for a condition, return TRUE. */
+/* If hit the end of the group (which could be repeated), fail */
+/* If max == min we can continue with the main loop without the
+/* If maximizing it is worth using inline code for speed, doing the type
+/* If maximizing, find the longest possible run, then work backwards. */
+/* If maximizing, find the longest string and work backwards */
+/* If min = max, continue at the same level without recursing */
+/* If min = max, continue at the same level without recursion.
+/* If minimizing, keep testing the rest of the expression and advancing
+/* If minimizing, keep trying and advancing the pointer */
+/* If minimizing, we have to test the rest of the pattern before each
+/* If req_char is set, we know that that character must appear in the subject
+/* If the expression has got more back references than the offsets supplied can
+/* If the length of the reference is zero, just continue with the
+/* If the reference is unset, set the length to be longer than the amount
+/* If we can't find the required character, break the matching loop */
+/* If we have found the required character, save the point where we
+/* In all other cases except a conditional group we have to check the
+/* In case the recursion has set more capturing values, save the final
+/* Include the internals header, which itself includes Standard C headers plus
+/* Insufficient room for saving captured contents */
+/* Loop for handling unanchored repeated matching attempts; for anchored regexs
+/* Match a back reference, possibly repeatedly. Look past the end of the
+/* Match a character class, possibly repeatedly. Look past the end of the
+/* Match a negated single character */
+/* Match a negated single character repeatedly. This is almost a repeat of
+/* Match a run of characters */
+/* Match a single character repeatedly; different opcodes share code. */
+/* Match a single character type repeatedly; several different opcodes
+/* Match a single character type; inline for speed */
+/* Min and max values for the common repeats; for the maxima, 0 => infinity */
+/* Move the subject pointer back. This occurs only at the start of
+/* Negative assertion: all branches must fail to match */
+/* Now start processing the operations. */
+/* OP_KETRMAX */
+/* OP_KETRMAX */
+/* On entry ecode points to the first opcode, and eptr to the first character
+/* Opening capturing bracket. If there is space in the offset vector, save
+/* Or to a non-unique first char after study */
+/* Or to a unique first char if possible */
+/* Or to just after \n for a multiline match if possible */
+/* Other types of node can be handled by a switch */
+/* Otherwise test for either case */
+/* Print a sequence of chars in printable format, stopping at the end of the
+/* Recursion matches the current regex, nested. If there are any capturing
+/* Reset the maximum number of extractions we might see. */
+/* Reset the value of the ims flags, in case they got changed during
+/* Reset the working variable associated with each extraction. These should
+/* Separate the caselesss case for speed */
+/* Set up for repetition, or handle the non-repeated case */
+/* Set up the first character to match, if available. The first_char value is
+/* Skip over conditional reference data or large extraction number data if
+/* Start of subject assertion */
+/* Start of subject unless notbol, or after internal newline if multiline */
+/* Structure for building a chain of data that actually lives on the
+/* The code is duplicated for the caseless and caseful cases, for speed,
+/* The code is duplicated for the caseless and caseful cases, for speed,
+/* The condition is an assertion. Call match() to evaluate it - setting
+/* The ims options can vary during the matching as a result of the presence
+/* The repeating kets try the rest of the pattern or restart from the
+/* The repeating kets try the rest of the pattern or restart from the
+/* There's been some horrible disaster. */
+/* This "while" is the end of the "do" above */
+/* This function applies a compiled re to a subject string and picks out
+/* Use a macro for debugging printing, 'cause that limits the use of #ifdef
+/* We don't need to repeat the search if we haven't yet reached the
+/* When a match occurs, substrings will be set for all internal extractions;
+/* Word boundary assertions */
+/*************************************************
+/*************************************************
+/*************************************************
+/*************************************************
+/*************************************************
+/*************************************************
+1. This software is distributed in the hope that it will be useful,
+2. The origin of this software must not be misrepresented, either by
+3. Altered versions must be plainly marked as such, and must not be
+4. If PCRE is embedded in any software that is released under the GNU
+5.005. If there is an options reset, it will get obeyed in the normal
+5.005. If there is an options reset, it will get obeyed in the normal
+6 : 3 + (ecode[1] << 8) + ecode[2]),
+< -1 => some kind of unexpected problem
+= 0 => success, but offsets is not big enough
+Arguments:
+Arguments:
+Arguments:
+Arguments:
+BOOL anchored;
+BOOL cur_is_word = (eptr < md->end_subject) &&
+BOOL cur_is_word = (eptr < md->end_subject) &&
+BOOL is_subject;
+BOOL minimize = FALSE;
+BOOL prev_is_word = (eptr != md->start_subject) &&
+BOOL prev_is_word = (eptr != md->start_subject) &&
+BOOL rc;
+BOOL startline;
+BOOL using_temporary_offsets = FALSE;
+Copyright (c) 1997-2000 University of Cambridge
+DPRINTF ((">>>> returning %d\n", match_block.errorcode));
+DPRINTF ((">>>> returning %d\n", rc));
+DPRINTF (("Copied offsets from temporary memory\n"));
+DPRINTF (("Freeing temporary memory\n"));
+DPRINTF (("Freeing temporary memory\n"));
+DPRINTF (("Got memory to hold back references\n"));
+DPRINTF (("Unknown opcode %d\n", *ecode));
+DPRINTF (("bracket %d failed\n", number));
+DPRINTF (("bracket 0 failed\n"));
+DPRINTF (("ims reset to %02lx\n", ims));
+DPRINTF (("ims set to %02lx at group repeat\n", ims));
+DPRINTF (("ims set to %02lx\n", ims));
+DPRINTF (("matching %c{%d,%d} against subject %.*s\n", c, min, max,
+DPRINTF (("negative matching %c{%d,%d} against subject %.*s\n", c, min, max,
+DPRINTF (("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
+DPRINTF (("start bracket 0\n"));
+GETCHAR (c, eptr) /* Get character */
+GETCHARINC (c, eptr) /* Get character; increment eptr */
+GETCHARINC (c, eptr) /* Get character; increment eptr */
+General Purpose Licence (GPL), then the terms of that licence shall
+However, if the referenced string is the empty string, always treat
+If the bracket fails to match, we need to restore this value and also the
+If there isn't enough space in the offset vector, treat this as if it were a
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+Otherwise, we can use the vector supplied, rounding down its size to a multiple
+Permission is granted to anyone to use this software for any purpose on any
+REPEATCHAR:
+REPEATNOTCHAR:
+REPEATTYPE:
+Returns: > 0 => success; value is the number of elements filled in
+Returns: TRUE if matched
+Returns: TRUE if matched
+Returns: nothing
+They are not both allowed to be zero. */
+This is a library of functions to support regular expressions whose syntax
+This is the forcible breaking of infinite loops as implemented in Perl
+This is the forcible breaking of infinite loops as implemented in Perl
+Writing separate code makes it go faster, as does using an autoincrement and
+Written by: Philip Hazel <ph10@cam.ac.uk>
+a move back into the brackets. Check the alternative branches in turn - the
+address of eptr, so that eptr can be a register variable. */
+an assertion "group", stop matching and return TRUE, but record the
+an empty string - recursion will then try other alternatives, if any. */
+an error. Save the top 15 values on the stack, and accept that the rest
+an unanchored pattern, of course. If there's no first char and the pattern was
+analyzing most of the pattern. length > re->max_match_size is
+anchored = ((re->options | options) & PCRE_ANCHORED) != 0;
+and advance one byte in the pattern code. */
+and reinstate them after the recursion. However, we don't know how many
+and semantics are as close as possible to those of the Perl 5 language. See
+and the required character in fact is caseful. */
+at run time, so we have to test for anchoring. The first char may be unset for
+avoid duplicate testing (which takes significant time). This covers the vast
+backing off on a match. */
+bmtable = extra->data.bmtable;
+both cases of the character. Otherwise set the two values the same, which will
+bracketed group and go to there. */
+brackets - for testing for empty matches
+brackets started but not finished, we have to save their starting points
+break;
+break;
+break;
+break;
+break;
+break;
+break;
+break;
+break;
+break;
+break;
+break;
+break;
+break;
+break;
+break;
+break;
+break;
+break;
+break;
+break;
+break;
+break;
+break;
+break;
+break;
+break;
+break;
+break;
+break;
+break;
+break;
+break;
+break;
+break;
+break;
+break;
+break;
+break;
+break;
+break;
+break;
+break;
+break;
+break;
+break;
+break;
+break;
+break;
+break;
+break;
+break;
+break;
+break;
+break;
+break;
+break;
+break;
+break;
+break;
+break;
+break;
+break;
+break;
+break;
+break;
+break;
+break;
+break;
+break;
+break;
+break;
+break;
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+c != md->lcc[*eptr++])
+c = *ecode++ - OP_CRSTAR;
+c = *ecode++ - OP_CRSTAR;
+c = *ecode++ - OP_NOTSTAR;
+c = *ecode++ - OP_STAR;
+c = *ecode++ - OP_TYPESTAR;
+c = *ecode++;
+c = *ecode++;
+c = *eptr++;
+c = 15;
+c = max - min;
+c = md->end_subject - eptr;
+c = md->lcc[c];
+c = md->lcc[c];
+c = md->offset_max;
+c == md->lcc[*eptr++])
+can't just fail here, because of the possibility of quantifiers with zero
+case OP_ALT:
+case OP_ANY:
+case OP_ANY:
+case OP_ANY:
+case OP_ANY:
+case OP_ASSERT:
+case OP_ASSERTBACK:
+case OP_ASSERTBACK_NOT:
+case OP_ASSERT_NOT:
+case OP_BEG_WORD:
+case OP_BRA: /* Non-capturing bracket: optimized */
+case OP_BRAMINZERO:
+case OP_BRANUMBER:
+case OP_BRAZERO:
+case OP_CHARS:
+case OP_CIRC:
+case OP_CLASS:
+case OP_COND:
+case OP_CREF:
+case OP_CRMINPLUS:
+case OP_CRMINPLUS:
+case OP_CRMINQUERY:
+case OP_CRMINQUERY:
+case OP_CRMINRANGE:
+case OP_CRMINRANGE:
+case OP_CRMINSTAR:
+case OP_CRMINSTAR:
+case OP_CRPLUS:
+case OP_CRPLUS:
+case OP_CRQUERY:
+case OP_CRQUERY:
+case OP_CRRANGE:
+case OP_CRRANGE:
+case OP_CRSTAR:
+case OP_CRSTAR:
+case OP_DIGIT:
+case OP_DIGIT:
+case OP_DIGIT:
+case OP_DIGIT:
+case OP_DOLL:
+case OP_END:
+case OP_END_WORD:
+case OP_EOD:
+case OP_EODN:
+case OP_EXACT:
+case OP_KET:
+case OP_KETRMAX:
+case OP_KETRMIN:
+case OP_MINPLUS:
+case OP_MINQUERY:
+case OP_MINSTAR:
+case OP_MINUPTO:
+case OP_NOT:
+case OP_NOTEXACT:
+case OP_NOTMINPLUS:
+case OP_NOTMINQUERY:
+case OP_NOTMINSTAR:
+case OP_NOTMINUPTO:
+case OP_NOTPLUS:
+case OP_NOTQUERY:
+case OP_NOTSTAR:
+case OP_NOTUPTO:
+case OP_NOT_DIGIT:
+case OP_NOT_DIGIT:
+case OP_NOT_DIGIT:
+case OP_NOT_DIGIT:
+case OP_NOT_WHITESPACE:
+case OP_NOT_WHITESPACE:
+case OP_NOT_WHITESPACE:
+case OP_NOT_WHITESPACE:
+case OP_NOT_WORDCHAR:
+case OP_NOT_WORDCHAR:
+case OP_NOT_WORDCHAR:
+case OP_NOT_WORDCHAR:
+case OP_NOT_WORD_BOUNDARY:
+case OP_ONCE:
+case OP_OPT:
+case OP_PLUS:
+case OP_QUERY:
+case OP_RECURSE:
+case OP_REF:
+case OP_REVERSE:
+case OP_SOD:
+case OP_STAR:
+case OP_TYPEEXACT:
+case OP_TYPEMINPLUS:
+case OP_TYPEMINQUERY:
+case OP_TYPEMINSTAR:
+case OP_TYPEMINUPTO:
+case OP_TYPEPLUS:
+case OP_TYPEQUERY:
+case OP_TYPESTAR:
+case OP_TYPEUPTO:
+case OP_UPTO:
+case OP_WHITESPACE:
+case OP_WHITESPACE:
+case OP_WHITESPACE:
+case OP_WHITESPACE:
+case OP_WORDCHAR:
+case OP_WORDCHAR:
+case OP_WORDCHAR:
+case OP_WORDCHAR:
+case OP_WORD_BOUNDARY:
+case matching may be when this character is hit, so test for it in both its
+caselessly, or if there are any changes of this flag within the regex, set up
+cases if necessary. However, the different cased versions will not be set up
+character" set. If the PCRE_CASELESS is set, implying that the match starts
+characters and work backwards. */
+characters and work backwards. */
+code for maximizing the speed, and do the type test once at the start
+code to character type repeats - written out again for speed. */
+commoning these up that doesn't require a test of the positive/negative
+computer system, and to redistribute it freely, subject to the following
+const char *subject;
+const pcre *re;
+const pcre_extra *extra;
+const uschar *bmtable = NULL;
+const uschar *data = ecode + 1; /* Save for matching */
+const uschar *end_subject;
+const uschar *next = ecode + 1;
+const uschar *next = ecode + 1;
+const uschar *p = md->start_subject + md->offset_vector[offset];
+const uschar *p;
+const uschar *pp = eptr;
+const uschar *pp = eptr;
+const uschar *pp = eptr;
+const uschar *pp = eptr;
+const uschar *pp = eptr;
+const uschar *pp = eptr;
+const uschar *pp = eptr;
+const uschar *prev = ecode - (ecode[1] << 8) - ecode[2];
+const uschar *prev = ecode;
+const uschar *req_char_ptr = start_match - 1;
+const uschar *saved_eptr = eptr;
+const uschar *saved_eptr = eptrb->saved_eptr;
+const uschar *saved_eptr;
+const uschar *start_bits = NULL;
+const uschar *start_match = (const uschar *) subject + start_offset;
+continue; /* With the main loop */
+continue;
+continue;
+continue;
+continue;
+continue;
+continue;
+continue;
+continue;
+continue;
+continue;
+continue;
+continue;
+continue;
+course of events. */
+course of events. */
+ctype = *ecode++; /* Code for the character type */
+cur_is_word == prev_is_word : cur_is_word != prev_is_word)
+current high water mark for use by positive assertions. Do this also
+default: /* No repeat follows */
+default: /* No repeat follows */
+default:
+do
+do
+do
+do
+do
+do
+do
+do
+do
+do
+do
+each branch of a lookbehind assertion. If we are too close to the start to
+each substring: the offsets to the start and end of the substring.
+ecode position in code
+ecode + ((offset < offset_top && md->offset_vector[offset] >= 0) ?
+ecode += (ecode[1] << 8) + ecode[2];
+ecode += (ecode[1] << 8) + ecode[2];
+ecode += (ecode[1] << 8) + ecode[2];
+ecode += (ecode[1] << 8) + ecode[2];
+ecode += (ecode[1] << 8) + ecode[2];
+ecode += (ecode[1] << 8) + ecode[2];
+ecode += (ecode[1] << 8) + ecode[2];
+ecode += (ecode[1] << 8) + ecode[2];
+ecode += (ecode[1] << 8) + ecode[2];
+ecode += (ecode[1] << 8) + ecode[2];
+ecode += 2;
+ecode += 2;
+ecode += 3 + (ecode[4] << 8) + ecode[5];
+ecode += 33; /* Advance past the item */
+ecode += 3; /* Advance past the item */
+ecode += 3;
+ecode += 3;
+ecode += 3;
+ecode += 3;
+ecode += 3;
+ecode += 3;
+ecode += 3;
+ecode += 3;
+ecode += 3;
+ecode += 3;
+ecode += 3;
+ecode += 3;
+ecode += 5;
+ecode += 5;
+ecode = next + 3;
+ecode++;
+ecode++;
+ecode++;
+ecode++;
+ecode++;
+ecode++;
+ecode++;
+ecode++;
+ecode++;
+ecode++;
+ecode++;
+ecode++;
+ecode++;
+ecode++;
+ecode++;
+ecode++;
+else
+else
+else
+else
+else
+else
+else
+else
+else
+else
+else
+else
+else
+else
+else
+else
+else
+else
+else
+else
+else
+else
+else
+else
+else
+else
+else
+else
+else
+else
+else
+else
+else if ((extra->options & PCRE_STUDY_BM) != 0)
+else if (first_char >= 0)
+else if (start_bits != NULL)
+else if (startline)
+encountered */
+end_subject = match_block.end_subject;
+eptr pointer in subject
+eptr points into the subject
+eptr += c;
+eptr += length;
+eptr += length;
+eptr += length;
+eptr += length;
+eptr += min;
+eptr -= (ecode[1] << 8) + ecode[2];
+eptr -= length;
+eptr = md->end_match_ptr;
+eptr = md->end_match_ptr;
+eptr++;
+eptr++;
+eptr++;
+eptr++;
+eptr++;
+eptr++;
+eptr++;
+eptr++;
+eptr++;
+eptr++;
+eptr++;
+eptr++;
+eptrb pointer to chain of blocks containing eptr at start of
+eptrb = &newptrb;
+eptrb = eptrb->prev; /* Back up the stack of bracket start pointers */
+eptrblock *eptrb;
+eptrblock newptrb;
+eptrblock;
+exactly what going to the ket would do. */
+explicit claim or by omission.
+external_extra points to "hints" from pcre_study() or is NULL
+external_re points to the compiled expression
+extraction by setting the offsets and bumping the high water mark. */
+first_char = match_block.lcc[first_char];
+first_char = re->first_char;
+flags can contain
+for (;;)
+for (i = 1; i <= c; i++)
+for (i = 1; i <= c; i++)
+for (i = 1; i <= min; i++)
+for (i = 1; i <= min; i++)
+for (i = 1; i <= min; i++)
+for (i = 1; i <= min; i++)
+for (i = 1; i <= min; i++)
+for (i = 1; i <= min; i++)
+for (i = 1; i <= min; i++)
+for (i = 1; i <= min; i++)
+for (i = 1; i <= min; i++)
+for (i = 1; i <= min; i++)
+for (i = 1; i <= min; i++)
+for (i = 1; i <= min; i++)
+for (i = 1; i <= min; i++)
+for (i = min; i < max; i++)
+for (i = min; i < max; i++)
+for (i = min; i < max; i++)
+for (i = min; i < max; i++)
+for (i = min; i < max; i++)
+for (i = min; i < max; i++)
+for (i = min; i < max; i++)
+for (i = min; i < max; i++)
+for (i = min; i < max; i++)
+for (i = min; i < max; i++)
+for (i = min; i < max; i++)
+for (i = min; i < max; i++)
+for (i = min; i < max; i++)
+for (i = min;; i++)
+for (i = min;; i++)
+for (i = min;; i++)
+for (i = min;; i++)
+for (i = min;; i++)
+for (i = min;; i++)
+for (i = min;; i++)
+for the "once" (not-backup up) groups. */
+for the match to succeed. If the first character is set, req_char must be
+found it, so that we don't search again next time round the loop if
+from a previous iteration of this group, and be referred to by a reference
+goto REPEATCHAR;
+goto REPEATCHAR;
+goto REPEATNOTCHAR;
+goto REPEATNOTCHAR;
+goto REPEATTYPE;
+goto REPEATTYPE;
+group number back at the start and if necessary complete handling an
+happens for a repeating ket if no characters were matched in the group.
+happens for a repeating ket if no characters were matched in the group.
+here; that is handled in the code for KET. */
+hold, we get a temporary bit of working store to use during the matching.
+i.e. it could be ()* or ()? in the pattern. Brackets with fixed upper
+if (!anchored)
+if (!match (start_match, re->code, 2, &match_block, ims, NULL, match_isgroup))
+if (!match_ref (offset, eptr, length, md, ims))
+if (!match_ref (offset, eptr, length, md, ims))
+if (!match_ref (offset, eptr, length, md, ims))
+if (!md->endonly)
+if (!rc)
+if (!startline && extra != NULL)
+if ((*ecode++ == OP_WORD_BOUNDARY) ?
+if ((data[c / 8] & (1 << (c & 7))) != 0)
+if ((data[c / 8] & (1 << (c & 7))) != 0)
+if ((data[c / 8] & (1 << (c & 7))) == 0)
+if ((extra->options & PCRE_STUDY_MAPPED) != 0)
+if ((flags & match_condassert) != 0)
+if ((flags & match_condassert) != 0)
+if ((flags & match_isgroup) != 0)
+if ((ims & PCRE_CASELESS) != 0)
+if ((ims & PCRE_CASELESS) != 0)
+if ((ims & PCRE_CASELESS) != 0)
+if ((ims & PCRE_CASELESS) != 0)
+if ((ims & PCRE_CASELESS) != 0)
+if ((ims & PCRE_CASELESS) != 0)
+if ((ims & PCRE_CASELESS) != 0)
+if ((ims & PCRE_DOTALL) == 0 && c == '\n')
+if ((ims & PCRE_DOTALL) == 0 && eptr < md->end_subject && *eptr == '\n')
+if ((ims & PCRE_DOTALL) == 0)
+if ((ims & PCRE_DOTALL) == 0)
+if ((ims & PCRE_MULTILINE) != 0)
+if ((ims & PCRE_MULTILINE) != 0)
+if ((md->ctypes[*eptr++] & ctype_digit) != 0)
+if ((md->ctypes[*eptr++] & ctype_digit) == 0)
+if ((md->ctypes[*eptr++] & ctype_space) != 0)
+if ((md->ctypes[*eptr++] & ctype_space) == 0)
+if ((md->ctypes[*eptr++] & ctype_word) != 0)
+if ((md->ctypes[*eptr++] & ctype_word) == 0)
+if ((md->ctypes[c] & ctype_digit) != 0)
+if ((md->ctypes[c] & ctype_digit) == 0)
+if ((md->ctypes[c] & ctype_space) != 0)
+if ((md->ctypes[c] & ctype_space) == 0)
+if ((md->ctypes[c] & ctype_word) != 0)
+if ((md->ctypes[c] & ctype_word) == 0)
+if ((options & ~PUBLIC_EXEC_OPTIONS) != 0)
+if ((re->options & PCRE_FIRSTSET) != 0)
+if ((re->options & PCRE_REQCHSET) != 0)
+if ((start_bits[c / 8] & (1 << (c & 7))) == 0)
+if (*ecode != OP_ONCE && *ecode != OP_ALT)
+if (*ecode == OP_KET || eptr == saved_eptr)
+if (*ecode == OP_KET || eptr == saved_eptr)
+if (*ecode == OP_KET)
+if (*ecode == OP_KETRMIN)
+if (*ecode == OP_KETRMIN)
+if (*ecode++ != *eptr++)
+if (*ecode++ == *eptr++)
+if (*eptr != '\n')
+if (*eptr++ == '\n')
+if (*p++ != *eptr++)
+if (*p++ == req_char)
+if (*prev != OP_COND)
+if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT ||
+if (bmtable != NULL)
+if (bmtable[*start_match])
+if (c != *eptr++)
+if (c != md->lcc[*eptr++])
+if (c < 16)
+if (c == *eptr++)
+if (c == md->lcc[*eptr++])
+if (c > md->end_subject - eptr)
+if (cur_is_word == prev_is_word ||
+if (ecode[3] == OP_CREF) /* Condition is extraction test */
+if (ecode[3] == OP_OPT)
+if (eptr != md->start_subject && eptr[-1] != '\n')
+if (eptr != md->start_subject)
+if (eptr < md->end_subject - 1 ||
+if (eptr < md->end_subject - 1 ||
+if (eptr < md->end_subject)
+if (eptr < md->end_subject)
+if (eptr < md->start_subject)
+if (eptr >= md->end_subject ||
+if (eptr >= md->end_subject ||
+if (eptr >= md->end_subject ||
+if (eptr >= md->end_subject ||
+if (eptr >= md->end_subject ||
+if (eptr >= md->end_subject ||
+if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_digit) != 0)
+if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_digit) == 0)
+if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_space) != 0)
+if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_space) == 0)
+if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_word) != 0)
+if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_word) == 0)
+if (eptr >= md->end_subject || *eptr == '\n')
+if (eptr >= md->end_subject || c != *eptr)
+if (eptr >= md->end_subject || c != md->lcc[*eptr])
+if (eptr >= md->end_subject || c == *eptr)
+if (eptr >= md->end_subject || c == md->lcc[*eptr])
+if (eptr >= md->end_subject)
+if (eptr >= md->end_subject)
+if (eptr >= md->end_subject)
+if (eptr >= md->end_subject)
+if (eptr >= md->end_subject)
+if (eptr++ >= md->end_subject)
+if (i >= max || !match_ref (offset, eptr, length, md, ims))
+if (i >= max || eptr >= md->end_subject ||
+if (i >= max || eptr >= md->end_subject ||
+if (i >= max || eptr >= md->end_subject || c != *eptr++)
+if (i >= max || eptr >= md->end_subject || c == *eptr++)
+if (i >= max || eptr >= md->end_subject)
+if (i >= max || eptr >= md->end_subject)
+if (is_subject && length > md->end_subject - p)
+if (isprint (c = *(p++)))
+if (length == 0)
+if (length > md->end_subject - eptr)
+if (length > md->end_subject - eptr)
+if (match (eptr, ecode + 3, offset_top, md, ims, NULL,
+if (match (eptr, ecode + 3, offset_top, md, ims, NULL, match_isgroup))
+if (match (eptr, ecode + 3, offset_top, md, ims, NULL, match_isgroup))
+if (match (eptr, ecode + 3, offset_top, md, ims, eptrb, 0) ||
+if (match (eptr, ecode + 3, offset_top, md, ims, eptrb, 0) ||
+if (match (eptr, ecode + 3, offset_top, md, ims, eptrb, match_isgroup))
+if (match (eptr, ecode + 3, offset_top, md, ims, eptrb, match_isgroup))
+if (match (eptr, ecode + 3, offset_top, md, ims, eptrb, match_isgroup))
+if (match (eptr, ecode, offset_top, md, ims, eptrb, 0))
+if (match (eptr, ecode, offset_top, md, ims, eptrb, 0))
+if (match (eptr, ecode, offset_top, md, ims, eptrb, 0))
+if (match (eptr, ecode, offset_top, md, ims, eptrb, 0))
+if (match (eptr, ecode, offset_top, md, ims, eptrb, 0))
+if (match (eptr, ecode, offset_top, md, ims, eptrb, 0))
+if (match (eptr, ecode, offset_top, md, ims, eptrb, 0))
+if (match (eptr, ecode, offset_top, md, ims, eptrb, 0))
+if (match (eptr, next + 3, offset_top, md, ims, eptrb, match_isgroup))
+if (match (eptr, next, offset_top, md, ims, eptrb, match_isgroup))
+if (match (eptr, prev, offset_top, md, ims, eptrb, match_isgroup) ||
+if (match (eptr, prev, offset_top, md, ims, eptrb, match_isgroup) ||
+if (match (eptr--, ecode, offset_top, md, ims, eptrb, 0))
+if (match (eptr--, ecode, offset_top, md, ims, eptrb, 0))
+if (match (eptr--, ecode, offset_top, md, ims, eptrb, 0))
+if (match (eptr--, ecode, offset_top, md, ims, eptrb, 0))
+if (match (eptr--, ecode, offset_top, md, ims, eptrb, 0))
+if (match (eptr--, ecode, offset_top, md, ims, eptrb, 0))
+if (match_block.end_offset_top > offsetcount)
+if (match_block.offset_vector != NULL)
+if (match_block.offset_vector == NULL)
+if (max == 0)
+if (max == 0)
+if (max == 0)
+if (max == 0)
+if (max == 0)
+if (max == 0)
+if (max == 0)
+if (md->lcc[*ecode++] != md->lcc[*eptr++])
+if (md->lcc[*ecode++] == md->lcc[*eptr++])
+if (md->lcc[*p++] != md->lcc[*eptr++])
+if (md->notbol && eptr == md->start_subject)
+if (md->notempty && eptr == md->start_match)
+if (md->noteol)
+if (md->noteol)
+if (min == max)
+if (min == max)
+if (min == max)
+if (min == max)
+if (min == max)
+if (min == max)
+if (min == max)
+if (min > 0)
+if (min > md->end_subject - eptr)
+if (min > md->end_subject - eptr)
+if (min > md->end_subject - eptr)
+if (minimize)
+if (minimize)
+if (minimize)
+if (minimize)
+if (minimize)
+if (minimize)
+if (minimize)
+if (number > 0)
+if (number > EXTRACT_BASIC_MAX)
+if (number > EXTRACT_BASIC_MAX)
+if (offset < md->offset_max)
+if (offset >= md->offset_max)
+if (offset_top <= offset)
+if (offsetcount < 2)
+if (offsetcount >= 4)
+if (op > OP_BRA)
+if (p > req_char_ptr)
+if (p >= end_subject)
+if (pp == req_char || pp == req_char2)
+if (re == NULL || subject == NULL ||
+if (re->magic_number != MAGIC_NUMBER)
+if (re->max_match_size >= 0
+if (re->top_backref > 0 && re->top_backref >= ocount / 3)
+if (req_char == req_char2)
+if (req_char >= 0)
+if (resetcount > offsetcount)
+if (save != stacksave)
+if (save == NULL)
+if (skipped_chars)
+if (start_match + bmtable[256] > end_subject)
+if (start_match > match_block.start_subject + start_offset)
+if (using_temporary_offsets)
+if (using_temporary_offsets)
+if certain parts of the pattern were not used. */
+if the malloc fails ... there is no way of returning to the top level with
+implied in the second condition, because start_offset > 0. */
+ims current /i, /m, and /s options
+ims the ims flags
+ims = (ims & ~PCRE_IMS) | ecode[4];
+ims = ecode[1];
+ims = original_ims;
+ims = re->options & (PCRE_CASELESS | PCRE_MULTILINE | PCRE_DOTALL);
+in the pattern. */
+in the subject string, while eptrb holds the value of eptr at the start of the
+initialize them to avoid reading uninitialized locations. */
+inline, and there are *still* stupid compilers about that don't like indented
+inside the group.
+int
+int *offsets;
+int *save;
+int c;
+int first_char = -1;
+int flags;
+int length;
+int length;
+int length;
+int length;
+int min, max, ctype;
+int number = *prev - OP_BRA;
+int number = op - OP_BRA;
+int offset = (ecode[1] << 9) | (ecode[2] << 1); /* Doubled reference number */
+int offset = (ecode[4] << 9) | (ecode[5] << 1); /* Doubled reference number */
+int offset;
+int offset;
+int offset;
+int offset_top;
+int offsetcount;
+int op = (int) *ecode;
+int options;
+int rc;
+int req_char = -1;
+int req_char2 = -1;
+int resetcount, ocount;
+int save_offset1 = md->offset_vector[offset];
+int save_offset2 = md->offset_vector[offset + 1];
+int save_offset3 = md->offset_vector[md->offset_end - number];
+int skipped_chars = 0;
+int stacksave[15];
+int start_offset;
+is a bit large to put on the stack, but using malloc for small numbers
+is_subject TRUE if printing from within md->start_subject
+it as matched, any number of times (otherwise there could be infinite
+item to see if there is repeat information following. The code is similar
+item to see if there is repeat information following. Then obey similar
+last bracketed group - used for breaking infinite loops matching zero-length
+later in the subject; otherwise the test starts at the match point. This
+length length of subject string (may contain binary zeros)
+length length to be matched
+length number to print
+length = (offset >= offset_top || md->offset_vector[offset] < 0) ?
+length = md->end_subject - p;
+level without recursing. Otherwise, if minimizing, keep trying the rest of
+level without recursing. Otherwise, if minimizing, keep trying the rest of
+loop. */
+loops). */
+main loop. */
+majority of cases. It will be suboptimal when the case flag changes in a regex
+mark, since extracts may have been taken during the assertion. */
+mark, since extracts may have been taken. */
+match (eptr, ecode + 3, offset_top, md, ims, eptrb, 0))
+match (eptr, ecode + 3, offset_top, md, ims, eptrb, 0))
+match (eptr, ecode, offset_top, md, ims, eptrb, flags)
+match (eptr, prev, offset_top, md, ims, eptrb, match_isgroup))
+match (eptr, prev, offset_top, md, ims, eptrb, match_isgroup))
+match_block.ctypes = re->tables + ctypes_offset;
+match_block.end_subject = match_block.start_subject + length;
+match_block.endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;
+match_block.errorcode = PCRE_ERROR_NOMATCH; /* Default error */
+match_block.errorcode == PCRE_ERROR_NOMATCH &&
+match_block.lcc = re->tables + lcc_offset;
+match_block.lcc[*start_match] != first_char)
+match_block.notbol = (options & PCRE_NOTBOL) != 0;
+match_block.notempty = (options & PCRE_NOTEMPTY) != 0;
+match_block.noteol = (options & PCRE_NOTEOL) != 0;
+match_block.offset_end = ocount;
+match_block.offset_max = (2 * ocount) / 3;
+match_block.offset_overflow = FALSE;
+match_block.offset_overflow = TRUE;
+match_block.offset_vector = (int *) (pcre_malloc) (ocount * sizeof (int));
+match_block.offset_vector = offsets;
+match_block.start_match = start_match;
+match_block.start_pattern = re->code;
+match_block.start_subject = (const uschar *) subject;
+match_condassert - this is an assertion condition
+match_condassert | match_isgroup))
+match_data *md;
+match_data *md;
+match_data *md;
+match_data match_block;
+match_isgroup - this is the start of a bracketed group
+match_isgroup);
+match_ref (offset, eptr, length, md, ims)
+matches, we carry on as at the end of a normal bracket, leaving the subject
+matching won't pass the KET for an assertion. If any one branch matches,
+matching won't pass the KET for this kind of subpattern. If any one branch
+max = (ecode[1] << 8) + ecode[2];
+max = (ecode[1] << 8) + ecode[2];
+max = (ecode[1] << 8) + ecode[2];
+max = (ecode[3] << 8) + ecode[4];
+max = (ecode[3] << 8) + ecode[4];
+max = INT_MAX;
+max = INT_MAX;
+max = INT_MAX;
+max = INT_MAX;
+max = INT_MAX;
+max = INT_MAX;
+max = INT_MAX;
+max = rep_max[c]; /* zero for max => infinity */
+max = rep_max[c]; /* zero for max => infinity */
+max = rep_max[c]; /* zero for max => infinity */
+max = rep_max[c]; /* zero for max => infinity */
+max = rep_max[c]; /* zero for max => infinity */
+max, eptr));
+max, eptr));
+maximum. Alternatively, if maximizing, find the maximum number of
+maximum. Alternatively, if maximizing, find the maximum number of
+may be wrong. */
+md pointer to "static" info for the match
+md pointer to matching data block, if is_subject is TRUE
+md points to match data block
+md->end_match_ptr = eptr; /* For ONCE */
+md->end_match_ptr = eptr; /* Record where we ended */
+md->end_offset_top = offset_top; /* and how many extracts were taken */
+md->end_offset_top = offset_top;
+md->end_subject - eptr + 1 :
+md->errorcode = PCRE_ERROR_UNKNOWN_NODE;
+md->offset_overflow = TRUE;
+md->offset_vector[md->offset_end - i] = save[i];
+md->offset_vector[md->offset_end - number] = eptr - md->start_subject;
+md->offset_vector[md->offset_end - number] = save_offset3;
+md->offset_vector[md->offset_end - number];
+md->offset_vector[offset + 1] - md->offset_vector[offset];
+md->offset_vector[offset + 1] = eptr - md->start_subject;
+md->offset_vector[offset + 1] = save_offset2;
+md->offset_vector[offset] =
+md->offset_vector[offset] = save_offset1;
+memcpy (offsets + 2, match_block.offset_vector + 2,
+min = (ecode[1] << 8) + ecode[2];
+min = (ecode[1] << 8) + ecode[2];
+min = 0;
+min = 0;
+min = 0;
+min = max = (ecode[1] << 8) + ecode[2];
+min = max = (ecode[1] << 8) + ecode[2];
+min = max = (ecode[1] << 8) + ecode[2];
+min = max = 1;
+min = rep_min[c]; /* Pick up values from tables; */
+min = rep_min[c]; /* Pick up values from tables; */
+min = rep_min[c]; /* Pick up values from tables; */
+min = rep_min[c]; /* Pick up values from tables; */
+min = rep_min[c]; /* Pick up values from tables; */
+minima. */
+minimize = (*ecode == OP_CRMINRANGE);
+minimize = (*ecode == OP_CRMINRANGE);
+minimize = (c & 1) != 0;
+minimize = (c & 1) != 0;
+minimize = (c & 1) != 0;
+minimize = (c & 1) != 0;
+minimize = (c & 1) != 0;
+minimize = *ecode == OP_MINUPTO;
+minimize = *ecode == OP_NOTMINUPTO;
+minimize = *ecode == OP_TYPEMINUPTO;
+minimize = TRUE;
+minimum number of matches are present. If min = max, continue at the same
+minimum number of matches are present. If min = max, continue at the same
+misrepresented as being the original software.
+move back, this match function fails. */
+mustn't change the current values of the data slot, because they may be set
+need to recurse. */
+never be used unless previously set, but they get saved and restored, and so we
+never set for an anchored regular expression, but the anchoring may be forced
+newline unless endonly is set, else end of subject unless noteol is set. */
+newptrb.prev = eptrb;
+newptrb.saved_eptr = eptr;
+next += (next[1] << 8) + next[2];
+next += (next[1] << 8) + next[2];
+non-capturing bracket. Don't worry about setting the flag for the error case
+number = (ecode[4] << 8) | ecode[5];
+number = (prev[4] << 8) | prev[5];
+number from a dummy opcode at the start. */
+number, then move along the subject till after the recursive match,
+ocount = offsetcount - (offsetcount % 3);
+ocount = re->top_backref * 3 + 3;
+of (?ims) items in the pattern. They are kept in a local variable so that
+of 3. */
+of subject left; this ensures that every attempt at a match fails. We
+offset index into the offset vector
+offset = number << 1;
+offset = number << 1;
+offset_top current top pointer
+offset_top = md->end_offset_top;
+offset_top = md->end_offset_top;
+offset_top = md->end_offset_top;
+offset_top = offset + 2;
+offset_top, md, ims, eptrb, match_isgroup);
+offsetcount the number of elements in the vector
+offsets points to a vector of ints to be filled in with offsets
+offsets[0] = start_match - match_block.start_subject;
+offsets[1] = match_block.end_match_ptr - match_block.start_subject;
+op = OP_BRA;
+opcode. */
+optimization can save a huge amount of backtracking in patterns with nested
+option for each character match. Maybe that wouldn't add very much to the
+options option bits
+p points to characters
+p--;
+p--;
+past the end if there is only one branch, but that's OK because that is
+pchars (ecode, length, FALSE, md);
+pchars (eptr, 16, TRUE, md);
+pchars (eptr, length, TRUE, md);
+pchars (eptr, length, TRUE, md);
+pchars (p, length, FALSE, md);
+pchars (p, length, is_subject, md)
+pchars (start_match, end_subject - start_match, TRUE, &match_block);
+pcre_exec (re, extra, subject, length, start_offset, options, offsets, offsetcount)
+place we found it at last time. */
+pointer. */
+portions of the string if it matches. Two elements in the vector are set for
+pre-processor statements. I suppose it's only been 10 years... */
+preceded by BRAZERO or BRAMINZERO. */
+preceding bracket, in the appropriate order. */
+preceding bracket, in the appropriate order. We need to reset any options
+printf (" against backref ");
+printf (" against pattern ");
+printf ("%c", c);
+printf (">>>> Match against: ");
+printf (">>>>> Skipped %d chars to reach first character\n",
+printf ("\\x%02x", c);
+printf ("\n");
+printf ("\n");
+printf ("\n");
+printf ("\n");
+printf ("\n");
+printf ("end bracket %d", number);
+printf ("matching subject ");
+printf ("matching subject ");
+printf ("matching subject <null> against pattern ");
+printf ("matching subject <null>");
+printf ("start bracket %d subject=", number);
+rc = 0;
+rc = match (eptr, md->start_pattern, offset_top, md, ims, eptrb,
+rc = match_block.offset_overflow ? 0 : match_block.end_offset_top / 2;
+register const uschar *ecode;
+register const uschar *eptr;
+register const uschar *eptr;
+register const uschar *p = start_match + ((first_char >= 0) ? 1 : 0);
+register int *iend = iptr + resetcount;
+register int *iend = iptr - resetcount / 2 + 1;
+register int *iptr = match_block.offset_vector + ocount;
+register int *iptr = match_block.offset_vector;
+register int c = *start_match;
+register int c;
+register int i;
+register int length = ecode[1];
+register int pp = *p++;
+repeat it in the interests of efficiency. */
+repeat limits are compiled as a number of copies, with the optional ones
+req_char = re->req_char;
+req_char2 = ((re->options & (PCRE_CASELESS | PCRE_ICHANGED)) != 0) ?
+req_char_ptr = p;
+resetcount = 2 + re->top_bracket * 2;
+resetcount = ocount;
+restoring at the exit of a group is easy. */
+restrictions:
+return FALSE;
+return FALSE;
+return FALSE;
+return FALSE;
+return FALSE;
+return FALSE;
+return FALSE;
+return FALSE;
+return FALSE;
+return FALSE;
+return FALSE;
+return FALSE;
+return FALSE;
+return FALSE;
+return FALSE;
+return FALSE;
+return FALSE;
+return FALSE;
+return FALSE;
+return FALSE;
+return FALSE;
+return FALSE;
+return FALSE;
+return FALSE;
+return FALSE;
+return FALSE;
+return FALSE;
+return FALSE;
+return FALSE;
+return FALSE;
+return FALSE;
+return FALSE;
+return FALSE;
+return FALSE;
+return FALSE;
+return FALSE;
+return FALSE;
+return FALSE;
+return FALSE;
+return FALSE;
+return FALSE;
+return FALSE;
+return FALSE;
+return FALSE;
+return FALSE;
+return FALSE;
+return FALSE;
+return FALSE;
+return FALSE;
+return FALSE;
+return FALSE;
+return FALSE;
+return FALSE;
+return FALSE;
+return FALSE;
+return FALSE;
+return FALSE;
+return FALSE;
+return FALSE;
+return FALSE;
+return FALSE;
+return FALSE;
+return FALSE;
+return FALSE;
+return FALSE;
+return FALSE;
+return FALSE;
+return FALSE;
+return FALSE;
+return FALSE;
+return FALSE;
+return FALSE;
+return FALSE;
+return FALSE;
+return FALSE;
+return FALSE;
+return FALSE;
+return FALSE;
+return FALSE;
+return PCRE_ERROR_BADMAGIC;
+return PCRE_ERROR_BADOPTION;
+return PCRE_ERROR_NOMATCH;
+return PCRE_ERROR_NOMEMORY;
+return PCRE_ERROR_NULL;
+return TRUE;
+return TRUE;
+return TRUE;
+return TRUE;
+return TRUE;
+return TRUE;
+return TRUE;
+return TRUE;
+return TRUE;
+return TRUE;
+return TRUE;
+return TRUE;
+return TRUE;
+return TRUE;
+return TRUE;
+return TRUE;
+return TRUE;
+return TRUE;
+return TRUE;
+return TRUE;
+return TRUE;
+return TRUE;
+return TRUE;
+return TRUE;
+return TRUE;
+return TRUE;
+return TRUE;
+return match (eptr,
+return match (eptr, ecode + 3, offset_top, md, ims, eptrb, match_isgroup);
+return match_block.errorcode;
+return rc;
+save = (int *) (pcre_malloc) ((c + 1) * sizeof (int));
+save = stacksave;
+save = stacksave;
+save[i] = md->offset_vector[md->offset_end - i];
+seems expensive. As a compromise, the stack is used when there are fewer
+share code. This is very similar to the code for single characters, but we
+similar code to character type repeats - written out again for speed.
+since matching characters is likely to be quite common. First, ensure the
+since matching characters is likely to be quite common. First, ensure the
+skipped_chars += bmtable[*start_match],
+skipped_chars += bmtable[256] - 1;
+skipped_chars -= bmtable[256] - 1;
+skipped_chars);
+skipped_chars++,
+skipped_chars++,
+skipped_chars++,
+skipped_chars++,
+stack of such pointers, to be re-instated at the end of the group when we hit
+stack, for holding the values of the subject pointer at the start of each
+start of each branch to move the current point backwards, so the code at
+start_bits = extra->data.start_bits;
+start_match += bmtable[*start_match];
+start_match += bmtable[256] - 1;
+start_match -= bmtable[256] - 1;
+start_match = (const uschar *) subject + length - re->max_match_size;
+start_match++ < end_subject);
+start_match++;
+start_match++;
+start_match++;
+start_match++;
+start_offset where to start in the subject string
+startline = (re->options & PCRE_STARTLINE) != 0;
+static BOOL
+static BOOL
+static const char rep_max[] =
+static const char rep_min[] =
+static void
+strings.
+struct eptrblock *prev;
+studied, there may be a bitmap of possible first characters. */
+subject points to the subject string
+subject if the requested.
+subpattern - to break infinite loops. */
+subpattern, so as to detect when an empty string has been matched by a
+subsequent match. */
+such there are (offset_top records the completed total) so we just have
+supersede any condition above with which it is incompatible.
+switch (*ecode)
+switch (*ecode)
+switch (ctype)
+switch (ctype)
+switch (ctype)
+switch (op)
+test once at the start (i.e. keep it out of the loop). */
+than 16 values to store; otherwise malloc is used. A problem is what to do
+than the number of characters left in the string, so the match fails.
+that "continue" in the code above comes out to here to repeat the main
+that changed within the bracket before re-running it, so check the next
+that it may occur zero times. It may repeat infinitely, or not at all -
+the assertion is true. Lookbehind assertions have an OP_REVERSE item at the
+the closing ket. When match() is called in other circumstances, we don't add to
+the code for a repeated single character, but I haven't found a nice way of
+the current subject position in the working slot at the top of the vector. We
+the expression and advancing one matching character if failing, up to the
+the expression and advancing one matching character if failing, up to the
+the external pcre header. */
+the file Tech.Notes for some information on the internals.
+the final argument TRUE causes it to stop at the end of an assertion. */
+the group. */
+the length of the reference string explicitly rather than passing the
+the loop runs just once. */
+the minimum number of bytes before we start. */
+the number from a dummy opcode at the start. */
+the point in the subject string is not moved back. Thus there can never be
+the pointer while it matches the class. */
+the same bracket.
+the stack. */
+the start hasn't passed this character yet. */
+the subject. */
+the subject. */
+there were too many extractions, set the return code to zero. In the case
+this level is identical to the lookahead case. */
+this makes a huge difference to execution time when there aren't many brackets
+those back references that we can. In this case there need not be overflow
+time taken, but character matching *is* what this is all about... */
+to save all the potential data. There may be up to 99 such values, which
+to that for character classes, but repeated for efficiency. Then obey
+two branches. If the condition is false, skipping the first branch takes us
+typedef struct eptrblock
+unless PCRE_CASELESS was given or the casing state changes within the regex.
+unlimited repeats that aren't going to match. We don't know what the state of
+unsigned long int ims = 0;
+unsigned long int ims;
+unsigned long int ims;
+unsigned long int original_ims = ims; /* Save for resetting on ')' */
+up quickly if there are fewer than the minimum number of characters left in
+up quickly if there are fewer than the minimum number of characters left in
+using_temporary_offsets = TRUE;
+values of the final offsets, in case they were set by a previous iteration of
+we just need to set up the whole thing as substring 0 before returning. If
+where we had to get some local store to hold offsets for backreferences, copy
+while (!anchored &&
+while (*ecode == OP_ALT)
+while (*ecode == OP_ALT);
+while (*ecode == OP_ALT);
+while (*ecode == OP_ALT);
+while (*ecode == OP_ALT);
+while (*ecode == OP_ALT);
+while (*ecode == OP_ALT);
+while (*ecode == OP_ALT);
+while (*ecode == OP_ALT);
+while (*next == OP_ALT);
+while (*next == OP_ALT);
+while (--iptr >= iend)
+while (eptr >= pp)
+while (eptr >= pp)
+while (eptr >= pp)
+while (eptr >= pp)
+while (eptr >= pp)
+while (eptr >= pp)
+while (eptr >= pp)
+while (iptr < iend)
+while (length-- > 0)
+while (length-- > 0)
+while (length-- > 0)
+while (length-- > 0)
+while (length-- > 0)
+while (p < end_subject)
+while (p < end_subject)
+while (start_match < end_subject &&
+while (start_match < end_subject && *start_match != first_char)
+while (start_match < end_subject && start_match[-1] != '\n')
+while (start_match < end_subject)
+while (start_match < end_subject)
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{
+{0, 0, 0, 0, 1, 1};
+{0, 0, 1, 1, 0, 0};
+} /* End of main loop */
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
+}
diff --git a/testsuite/uniq.sed b/testsuite/uniq.sed
new file mode 100644
index 0000000..7ec66c4
--- /dev/null
+++ b/testsuite/uniq.sed
@@ -0,0 +1,20 @@
+h
+
+:b
+# On the last line, print and exit
+$b
+N
+/^\(.*\)\n\1$/ {
+ # The two lines are identical. Undo the effect of
+ # the n command.
+ g
+ bb
+}
+
+# If the @code{N} command had added the last line, print and exit
+$b
+
+# The lines are different; print the first and go
+# back working on the second.
+P
+D
diff --git a/testsuite/uniq.sh b/testsuite/uniq.sh
new file mode 100644
index 0000000..f394cb3
--- /dev/null
+++ b/testsuite/uniq.sh
@@ -0,0 +1,30 @@
+#!/bin/sh
+
+# Test runner for the uniq.sed script
+
+# Copyright (C) 2017-2018 Free Software Foundation, Inc.
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+. "${srcdir=.}/testsuite/init.sh"; path_prepend_ ./sed
+print_ver_ sed
+
+# location of the external SED scripts
+dir="$abs_top_srcdir/testsuite"
+
+sed -f "$dir/uniq.sed" < "$dir/uniq.inp" > out || fail=1
+remove_cr_inplace out
+compare "$dir/uniq.good" out || fail=1
+
+
+Exit $fail
diff --git a/testsuite/utf8-ru.sh b/testsuite/utf8-ru.sh
new file mode 100644
index 0000000..695c383
--- /dev/null
+++ b/testsuite/utf8-ru.sh
@@ -0,0 +1,123 @@
+#!/bin/sh
+
+# Test GNU extension "\u" and "\U" (uppercase conversion)
+# in "s///" command.
+# This is an adaptation of the old utf8-1/2/3/4 tests.
+
+# Copyright (C) 2017-2018 Free Software Foundation, Inc.
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+. "${srcdir=.}/testsuite/init.sh"; path_prepend_ ./sed
+print_ver_ sed
+
+require_ru_utf8_locale_
+
+# The letter used in these tests are:
+# UTF8:Octal UTF8:HEX CodePoint Name
+# \320\220 \xD0\x90 U+0410 \N{CYRILLIC CAPITAL LETTER A}
+# \320\224 \xD0\x94 U+0414 \N{CYRILLIC CAPITAL LETTER DE}
+# 舒 \320\260 \xD0\xB0 U+0430 \N{CYRILLIC SMALL LETTER A}
+# 亟 \320\264 \xD0\xB4 U+0434 \N{CYRILLIC SMALL LETTER DE}
+
+# Using octal values, as these are the most portable access various printfs.
+
+
+# Input: Same input for all test (all lower case letters)
+# 亟 舒 亟
+printf '\320\264\320\260 \320\264\n' > utf8-inp || framework_failure_
+
+
+# Test 1: Convert "small DE" to upper case (with \U)
+# s/亟/\U&/g
+printf 's/\320\264/\\U&/g' > utf8-1.sed || framework_failure_
+
+# Test 1: Expected output - two capital DE letters.
+# 舒
+printf '\320\224\320\260 \320\224\n' > utf8-1-exp || framework_failure_
+
+
+# Test 2: Convert "small DE" to upper case (with \u - next character only)
+# s/亟/\u&/g
+printf 's/\320\264/\\u&/g\n' > utf8-2.sed || framework_failure_
+
+# The expected output of test 2 is identical to test 1.
+# We create the file to make the test loop (below) simpler.
+cp utf8-1-exp utf8-2-exp || framework_failure_
+
+
+
+# Test 3: Capitalize only the next character (\u)
+# Only the first "DE" should be capitilized.
+# s/亟.*/\u&/g
+printf 's/\320\264.*/\\u&/g' > utf8-3.sed || framework_failure_
+
+# Test 3: Expected output - First DE capitilized, second DE not.
+# 舒 亟
+printf '\320\224\320\260 \320\264\n' > utf8-3-exp || framework_failure_
+
+
+# Test 4: Capitalize all matched characters
+# s/亟.*/\U&/g
+printf 's/\320\264.*/\\U&/g' > utf8-4.sed || framework_failure_
+
+
+# Test 4: Expected output - All capital letters:
+#
+printf '\320\224\320\220 \320\224\n' > utf8-4-exp || framework_failure_
+
+# Step 1: force Russian UTF8 locale.
+# The case-conversion should either work, or not modify the input.
+for i in 1 2 3 4;
+do
+ LC_ALL=ru_RU.UTF-8 \
+ sed -f utf8-$i.sed < utf8-inp > utf8-$i-ru-out || fail=1
+
+ remove_cr_inplace utf8-$i-ru-out
+
+ # If we have the expected output - continue to next text
+ compare utf8-$i-exp utf8-$i-ru-out && continue
+
+ # Otherwise, ensure the input wasn't modified
+ # (i.e. sed did not modify partial octets resulting in
+ # invalid multibyte sequences)
+ compare utf8-$i-inp utf8-$i-ru-out || fail=1
+done
+
+
+# Step 2: If the current locale supports UTF8, repeat the above tests.
+l=$(locale | grep '^LC_CTYPE=' | sed 's/^.*="// ; s/"$//')
+case "$n" in
+ *UTF-8 | *UTF8 | *utf8 | *utf-8) utf8=yes;;
+ *) utf8=no;;
+esac
+
+if test "$utf8" = yes ; then
+ for i in 1 2 3 4;
+ do
+ sed -f utf8-$i.sed < utf8-inp > utf8-$i-out || fail=1
+
+ remove_cr_inplace utf8-$i-out
+
+ # If we have the expected output - continue to next text
+ compare utf8-$i-exp utf8-$i-out && continue
+
+ # Otherwise, ensure the input wasn't modified
+ # (i.e. sed did not modify partial octets resulting in
+ # invalid multibyte sequences)
+ compare utf8-$i-inp utf8-$i-out || fail=1
+ done
+fi
+
+
+Exit $fail
diff --git a/testsuite/word-delim.sh b/testsuite/word-delim.sh
new file mode 100755
index 0000000..ade3137
--- /dev/null
+++ b/testsuite/word-delim.sh
@@ -0,0 +1,19 @@
+#!/bin/sh
+# Exercise the DFA regression in sed-4.6.
+. "${srcdir=.}/testsuite/init.sh"; path_prepend_ ./sed
+print_ver_ sed
+
+require_en_utf8_locale_
+
+# Also ensure that this works in both the C locale and that multibyte one.
+# In the C locale, it failed due to a dfa.c regression in sed-4.6.
+echo 123-x > in || framework_failure_
+echo 123 > exp || framework_failure_
+
+for locale in C en_US.UTF-8; do
+ LC_ALL=$locale sed 's/.\bx//' in > out 2>err || fail=1
+ compare exp out || fail=1
+ compare /dev/null err || fail=1
+done
+
+Exit $fail
diff --git a/testsuite/xemacs.good b/testsuite/xemacs.good
new file mode 100644
index 0000000..abe2903
--- /dev/null
+++ b/testsuite/xemacs.good
@@ -0,0 +1,66 @@
+#Makefile.in generated automatically by automake 1.5 from Makefile.am.
+
+#Copyright 1994-2018 Free Software Foundation, Inc.
+#This Makefile.in is free software; the Free Software Foundation
+#gives unlimited permission to copy and/or distribute it,
+#with or without modifications, as long as this notice is preserved.
+
+#This program is distributed in the hope that it will be useful,
+#but WITHOUT ANY WARRANTY, to the extent permitted by law; without
+#even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+#PARTICULAR PURPOSE.
+
+"@SET_MAKE@"
+
+#Automake requirements
+
+"SHELL = @SHELL@"
+
+"PACKAGE = sed"
+
+"EXTRA_DIST = BUGS THANKS README.boot bootstrap.sh dc.sed autogen \\"
+" m4/codeset.m4 m4/gettext.m4 m4/iconv.m4 m4/lcmessage.m4 \\"
+" m4/getline.m4 m4/glibc21.m4 m4/isc-posix.m4 m4/progtest.m4 \\"
+" m4/obstack.m4"
+
+"subdir = ."
+"ACLOCAL_M4 = $(top_srcdir)/aclocal.m4"
+"mkinstalldirs = $(SHELL) $(top_srcdir)/mkinstalldirs"
+"CONFIG_HEADER = config.h"
+"CONFIG_CLEAN_FILES = bootstrap.sh intl/Makefile"
+"DIST_SOURCES ="
+"DATA = $(noinst_DATA)"
+
+"HEADERS = $(noinst_HEADERS)"
+
+
+"RECURSIVE_TARGETS = info-recursive dvi-recursive install-info-recursive \\"
+" uninstall-info-recursive all-recursive install-data-recursive \\"
+" install-exec-recursive installdirs-recursive install-recursive \\"
+" uninstall-recursive check-recursive installcheck-recursive"
+"DIST_COMMON = README $(noinst_HEADERS) ./stamp-h.in ABOUT-NLS AUTHORS \\"
+" COPYING ChangeLog INSTALL Makefile.am Makefile.in NEWS THANKS \\"
+" TODO acconfig.h aclocal.m4 bootstrap.sh.in config.guess \\"
+" config.sub config_h.in configure configure.ac depcomp \\"
+" install-sh missing mkinstalldirs"
+"DIST_SUBDIRS = $(SUBDIRS)"
+"all: config.h"
+" $(MAKE) $(AM_MAKEFLAGS) all-recursive"
+
+".SUFFIXES:"
+"$(srcdir)/Makefile.in: Makefile.am $(top_srcdir)/configure.ac $(ACLOCAL_M4)"
+" cd $(top_srcdir) && \\"
+" $(AUTOMAKE) --gnu Makefile"
+"Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status"
+" cd $(top_builddir) && \\"
+" CONFIG_HEADERS= CONFIG_LINKS= \\"
+" CONFIG_FILES=$@ $(SHELL) ./config.status"
+
+"$(top_builddir)/config.status: $(srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)"
+" $(SHELL) ./config.status --recheck"
+"$(srcdir)/configure: $(srcdir)/configure.ac $(ACLOCAL_M4) $(CONFIGURE_DEPENDENCIES)"
+" cd $(srcdir) && $(AUTOCONF)"
+
+"$(ACLOCAL_M4): configure.ac m4/codeset.m4 m4/getline.m4 m4/gettext.m4 m4/glibc21.m4 m4/iconv.m4 m4/isc-posix.m4 m4/lcmessage.m4 m4/obstack.m4 m4/progtest.m4"
+" cd $(srcdir) && $(ACLOCAL) $(ACLOCAL_AMFLAGS)"
+"config.h: stamp-h"
diff --git a/testsuite/xemacs.inp b/testsuite/xemacs.inp
new file mode 100644
index 0000000..c313b4a
--- /dev/null
+++ b/testsuite/xemacs.inp
@@ -0,0 +1,66 @@
+# Makefile.in generated automatically by automake 1.5 from Makefile.am.
+
+# Copyright 1994-2018 Free Software Foundation, Inc.
+# This Makefile.in is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE.
+
+@SET_MAKE@
+
+# Automake requirements
+
+SHELL = @SHELL@
+
+PACKAGE = sed
+
+EXTRA_DIST = BUGS THANKS README.boot bootstrap.sh dc.sed autogen \
+ m4/codeset.m4 m4/gettext.m4 m4/iconv.m4 m4/lcmessage.m4 \
+ m4/getline.m4 m4/glibc21.m4 m4/isc-posix.m4 m4/progtest.m4 \
+ m4/obstack.m4
+
+subdir = .
+ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
+mkinstalldirs = $(SHELL) $(top_srcdir)/mkinstalldirs
+CONFIG_HEADER = config.h
+CONFIG_CLEAN_FILES = bootstrap.sh intl/Makefile
+DIST_SOURCES =
+DATA = $(noinst_DATA)
+
+HEADERS = $(noinst_HEADERS)
+
+
+RECURSIVE_TARGETS = info-recursive dvi-recursive install-info-recursive \
+ uninstall-info-recursive all-recursive install-data-recursive \
+ install-exec-recursive installdirs-recursive install-recursive \
+ uninstall-recursive check-recursive installcheck-recursive
+DIST_COMMON = README $(noinst_HEADERS) ./stamp-h.in ABOUT-NLS AUTHORS \
+ COPYING ChangeLog INSTALL Makefile.am Makefile.in NEWS THANKS \
+ TODO acconfig.h aclocal.m4 bootstrap.sh.in config.guess \
+ config.sub config_h.in configure configure.ac depcomp \
+ install-sh missing mkinstalldirs
+DIST_SUBDIRS = $(SUBDIRS)
+all: config.h
+ $(MAKE) $(AM_MAKEFLAGS) all-recursive
+
+.SUFFIXES:
+$(srcdir)/Makefile.in: Makefile.am $(top_srcdir)/configure.ac $(ACLOCAL_M4)
+ cd $(top_srcdir) && \
+ $(AUTOMAKE) --gnu Makefile
+Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
+ cd $(top_builddir) && \
+ CONFIG_HEADERS= CONFIG_LINKS= \
+ CONFIG_FILES=$@ $(SHELL) ./config.status
+
+$(top_builddir)/config.status: $(srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
+ $(SHELL) ./config.status --recheck
+$(srcdir)/configure: $(srcdir)/configure.ac $(ACLOCAL_M4) $(CONFIGURE_DEPENDENCIES)
+ cd $(srcdir) && $(AUTOCONF)
+
+$(ACLOCAL_M4): configure.ac m4/codeset.m4 m4/getline.m4 m4/gettext.m4 m4/glibc21.m4 m4/iconv.m4 m4/isc-posix.m4 m4/lcmessage.m4 m4/obstack.m4 m4/progtest.m4
+ cd $(srcdir) && $(ACLOCAL) $(ACLOCAL_AMFLAGS)
+config.h: stamp-h
diff --git a/testsuite/xemacs.sh b/testsuite/xemacs.sh
new file mode 100755
index 0000000..e1877a2
--- /dev/null
+++ b/testsuite/xemacs.sh
@@ -0,0 +1,49 @@
+#!/bin/sh
+
+# Test runner for xemacs.sed
+
+# Copyright (C) 2017-2018 Free Software Foundation, Inc.
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+. "${srcdir=.}/testsuite/init.sh"; path_prepend_ ./sed
+print_ver_ sed
+
+# location of external test files
+dir="$abs_top_srcdir/testsuite"
+
+# Inspired by xemacs' config.status script
+# submitted by John Fremlin (john@fremlin.de)
+cat << \EOF > xemacs.sed || framework_failure_
+/^# Generated/d
+s%/\*\*/#.*%%
+s/^ *# */#/
+/^##/d
+/^#/ {
+ p
+ d
+}
+/./ {
+ s/\([\"]\)/\\\1/g
+ s/^/"/
+ s/$/"/
+}
+EOF
+
+
+sed -f xemacs.sed < "$dir/xemacs.inp" > out || fail=1
+remove_cr_inplace out
+compare "$dir/xemacs.good" out || fail=1
+
+
+Exit $fail