diff options
Diffstat (limited to 'testsuite/utf8-ru.sh')
-rw-r--r-- | testsuite/utf8-ru.sh | 123 |
1 files changed, 123 insertions, 0 deletions
diff --git a/testsuite/utf8-ru.sh b/testsuite/utf8-ru.sh new file mode 100644 index 0000000..695c383 --- /dev/null +++ b/testsuite/utf8-ru.sh @@ -0,0 +1,123 @@ +#!/bin/sh + +# Test GNU extension "\u" and "\U" (uppercase conversion) +# in "s///" command. +# This is an adaptation of the old utf8-1/2/3/4 tests. + +# Copyright (C) 2017-2018 Free Software Foundation, Inc. + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. +. "${srcdir=.}/testsuite/init.sh"; path_prepend_ ./sed +print_ver_ sed + +require_ru_utf8_locale_ + +# The letter used in these tests are: +# UTF8:Octal UTF8:HEX CodePoint Name +# А \320\220 \xD0\x90 U+0410 \N{CYRILLIC CAPITAL LETTER A} +# Д \320\224 \xD0\x94 U+0414 \N{CYRILLIC CAPITAL LETTER DE} +# а \320\260 \xD0\xB0 U+0430 \N{CYRILLIC SMALL LETTER A} +# д \320\264 \xD0\xB4 U+0434 \N{CYRILLIC SMALL LETTER DE} + +# Using octal values, as these are the most portable access various printfs. + + +# Input: Same input for all test (all lower case letters) +# д а д +printf '\320\264\320\260 \320\264\n' > utf8-inp || framework_failure_ + + +# Test 1: Convert "small DE" to upper case (with \U) +# s/д/\U&/g +printf 's/\320\264/\\U&/g' > utf8-1.sed || framework_failure_ + +# Test 1: Expected output - two capital DE letters. +# Д а Д +printf '\320\224\320\260 \320\224\n' > utf8-1-exp || framework_failure_ + + +# Test 2: Convert "small DE" to upper case (with \u - next character only) +# s/д/\u&/g +printf 's/\320\264/\\u&/g\n' > utf8-2.sed || framework_failure_ + +# The expected output of test 2 is identical to test 1. +# We create the file to make the test loop (below) simpler. +cp utf8-1-exp utf8-2-exp || framework_failure_ + + + +# Test 3: Capitalize only the next character (\u) +# Only the first "DE" should be capitilized. +# s/д.*/\u&/g +printf 's/\320\264.*/\\u&/g' > utf8-3.sed || framework_failure_ + +# Test 3: Expected output - First DE capitilized, second DE not. +# Д а д +printf '\320\224\320\260 \320\264\n' > utf8-3-exp || framework_failure_ + + +# Test 4: Capitalize all matched characters +# s/д.*/\U&/g +printf 's/\320\264.*/\\U&/g' > utf8-4.sed || framework_failure_ + + +# Test 4: Expected output - All capital letters: +# Д А Д +printf '\320\224\320\220 \320\224\n' > utf8-4-exp || framework_failure_ + +# Step 1: force Russian UTF8 locale. +# The case-conversion should either work, or not modify the input. +for i in 1 2 3 4; +do + LC_ALL=ru_RU.UTF-8 \ + sed -f utf8-$i.sed < utf8-inp > utf8-$i-ru-out || fail=1 + + remove_cr_inplace utf8-$i-ru-out + + # If we have the expected output - continue to next text + compare utf8-$i-exp utf8-$i-ru-out && continue + + # Otherwise, ensure the input wasn't modified + # (i.e. sed did not modify partial octets resulting in + # invalid multibyte sequences) + compare utf8-$i-inp utf8-$i-ru-out || fail=1 +done + + +# Step 2: If the current locale supports UTF8, repeat the above tests. +l=$(locale | grep '^LC_CTYPE=' | sed 's/^.*="// ; s/"$//') +case "$n" in + *UTF-8 | *UTF8 | *utf8 | *utf-8) utf8=yes;; + *) utf8=no;; +esac + +if test "$utf8" = yes ; then + for i in 1 2 3 4; + do + sed -f utf8-$i.sed < utf8-inp > utf8-$i-out || fail=1 + + remove_cr_inplace utf8-$i-out + + # If we have the expected output - continue to next text + compare utf8-$i-exp utf8-$i-out && continue + + # Otherwise, ensure the input wasn't modified + # (i.e. sed did not modify partial octets resulting in + # invalid multibyte sequences) + compare utf8-$i-inp utf8-$i-out || fail=1 + done +fi + + +Exit $fail |