summaryrefslogtreecommitdiff
path: root/testsuite/utf8-ru.sh
diff options
context:
space:
mode:
Diffstat (limited to 'testsuite/utf8-ru.sh')
-rw-r--r--testsuite/utf8-ru.sh123
1 files changed, 123 insertions, 0 deletions
diff --git a/testsuite/utf8-ru.sh b/testsuite/utf8-ru.sh
new file mode 100644
index 0000000..695c383
--- /dev/null
+++ b/testsuite/utf8-ru.sh
@@ -0,0 +1,123 @@
+#!/bin/sh
+
+# Test GNU extension "\u" and "\U" (uppercase conversion)
+# in "s///" command.
+# This is an adaptation of the old utf8-1/2/3/4 tests.
+
+# Copyright (C) 2017-2018 Free Software Foundation, Inc.
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+. "${srcdir=.}/testsuite/init.sh"; path_prepend_ ./sed
+print_ver_ sed
+
+require_ru_utf8_locale_
+
+# The letter used in these tests are:
+# UTF8:Octal UTF8:HEX CodePoint Name
+# А \320\220 \xD0\x90 U+0410 \N{CYRILLIC CAPITAL LETTER A}
+# Д \320\224 \xD0\x94 U+0414 \N{CYRILLIC CAPITAL LETTER DE}
+# а \320\260 \xD0\xB0 U+0430 \N{CYRILLIC SMALL LETTER A}
+# д \320\264 \xD0\xB4 U+0434 \N{CYRILLIC SMALL LETTER DE}
+
+# Using octal values, as these are the most portable access various printfs.
+
+
+# Input: Same input for all test (all lower case letters)
+# д а д
+printf '\320\264\320\260 \320\264\n' > utf8-inp || framework_failure_
+
+
+# Test 1: Convert "small DE" to upper case (with \U)
+# s/д/\U&/g
+printf 's/\320\264/\\U&/g' > utf8-1.sed || framework_failure_
+
+# Test 1: Expected output - two capital DE letters.
+# Д а Д
+printf '\320\224\320\260 \320\224\n' > utf8-1-exp || framework_failure_
+
+
+# Test 2: Convert "small DE" to upper case (with \u - next character only)
+# s/д/\u&/g
+printf 's/\320\264/\\u&/g\n' > utf8-2.sed || framework_failure_
+
+# The expected output of test 2 is identical to test 1.
+# We create the file to make the test loop (below) simpler.
+cp utf8-1-exp utf8-2-exp || framework_failure_
+
+
+
+# Test 3: Capitalize only the next character (\u)
+# Only the first "DE" should be capitilized.
+# s/д.*/\u&/g
+printf 's/\320\264.*/\\u&/g' > utf8-3.sed || framework_failure_
+
+# Test 3: Expected output - First DE capitilized, second DE not.
+# Д а д
+printf '\320\224\320\260 \320\264\n' > utf8-3-exp || framework_failure_
+
+
+# Test 4: Capitalize all matched characters
+# s/д.*/\U&/g
+printf 's/\320\264.*/\\U&/g' > utf8-4.sed || framework_failure_
+
+
+# Test 4: Expected output - All capital letters:
+# Д А Д
+printf '\320\224\320\220 \320\224\n' > utf8-4-exp || framework_failure_
+
+# Step 1: force Russian UTF8 locale.
+# The case-conversion should either work, or not modify the input.
+for i in 1 2 3 4;
+do
+ LC_ALL=ru_RU.UTF-8 \
+ sed -f utf8-$i.sed < utf8-inp > utf8-$i-ru-out || fail=1
+
+ remove_cr_inplace utf8-$i-ru-out
+
+ # If we have the expected output - continue to next text
+ compare utf8-$i-exp utf8-$i-ru-out && continue
+
+ # Otherwise, ensure the input wasn't modified
+ # (i.e. sed did not modify partial octets resulting in
+ # invalid multibyte sequences)
+ compare utf8-$i-inp utf8-$i-ru-out || fail=1
+done
+
+
+# Step 2: If the current locale supports UTF8, repeat the above tests.
+l=$(locale | grep '^LC_CTYPE=' | sed 's/^.*="// ; s/"$//')
+case "$n" in
+ *UTF-8 | *UTF8 | *utf8 | *utf-8) utf8=yes;;
+ *) utf8=no;;
+esac
+
+if test "$utf8" = yes ; then
+ for i in 1 2 3 4;
+ do
+ sed -f utf8-$i.sed < utf8-inp > utf8-$i-out || fail=1
+
+ remove_cr_inplace utf8-$i-out
+
+ # If we have the expected output - continue to next text
+ compare utf8-$i-exp utf8-$i-out && continue
+
+ # Otherwise, ensure the input wasn't modified
+ # (i.e. sed did not modify partial octets resulting in
+ # invalid multibyte sequences)
+ compare utf8-$i-inp utf8-$i-out || fail=1
+ done
+fi
+
+
+Exit $fail