From e62e8b3d2e9ac7f1e1a13578eb989f751b6c94ae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zbigniew=20J=C4=99drzejewski-Szmek?= Date: Sat, 9 Jun 2018 13:41:24 +0200 Subject: basic/utf8: add ascii_is_valid_n() --- src/basic/utf8.c | 18 ++++++++++++++++++ src/basic/utf8.h | 1 + src/test/test-utf8.c | 13 ++++++++++++- 3 files changed, 31 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/basic/utf8.c b/src/basic/utf8.c index 1e86949a4..af86f74d1 100644 --- a/src/basic/utf8.c +++ b/src/basic/utf8.c @@ -247,6 +247,9 @@ char *utf8_escape_non_printable(const char *str) { char *ascii_is_valid(const char *str) { const char *p; + /* Check whether the string consists of valid ASCII bytes, + * i.e values between 0 and 127, inclusive. */ + assert(str); for (p = str; *p; p++) @@ -256,6 +259,21 @@ char *ascii_is_valid(const char *str) { return (char*) str; } +char *ascii_is_valid_n(const char *str, size_t len) { + size_t i; + + /* Very similar to ascii_is_valid(), but checks exactly len + * bytes and rejects any NULs in that range. */ + + assert(str); + + for (i = 0; i < len; i++) + if ((unsigned char) str[i] >= 128 || str[i] == 0) + return NULL; + + return (char*) str; +} + /** * utf8_encode_unichar() - Encode single UCS-4 character as UTF-8 * @out_utf8: output buffer of at least 4 bytes or NULL diff --git a/src/basic/utf8.h b/src/basic/utf8.h index 7d68105a0..d6936ea46 100644 --- a/src/basic/utf8.h +++ b/src/basic/utf8.h @@ -22,6 +22,7 @@ bool unichar_is_valid(char32_t c); const char *utf8_is_valid(const char *s) _pure_; char *ascii_is_valid(const char *s) _pure_; +char *ascii_is_valid_n(const char *str, size_t len); bool utf8_is_printable_newline(const char* str, size_t length, bool newline) _pure_; #define utf8_is_printable(str, length) utf8_is_printable_newline(str, length, true) diff --git a/src/test/test-utf8.c b/src/test/test-utf8.c index ec963437b..d645dc958 100644 --- a/src/test/test-utf8.c +++ b/src/test/test-utf8.c @@ -24,11 +24,21 @@ static void test_utf8_is_valid(void) { } static void test_ascii_is_valid(void) { - assert_se(ascii_is_valid("alsdjf\t\vbarr\nba z")); + assert_se( ascii_is_valid("alsdjf\t\vbarr\nba z")); assert_se(!ascii_is_valid("\342\204\242")); assert_se(!ascii_is_valid("\341\204")); } +static void test_ascii_is_valid_n(void) { + assert_se( ascii_is_valid_n("alsdjf\t\vbarr\nba z", 17)); + assert_se( ascii_is_valid_n("alsdjf\t\vbarr\nba z", 16)); + assert_se(!ascii_is_valid_n("alsdjf\t\vbarr\nba z", 18)); + assert_se(!ascii_is_valid_n("\342\204\242", 3)); + assert_se(!ascii_is_valid_n("\342\204\242", 2)); + assert_se(!ascii_is_valid_n("\342\204\242", 1)); + assert_se( ascii_is_valid_n("\342\204\242", 0)); +} + static void test_utf8_encoded_valid_unichar(void) { assert_se(utf8_encoded_valid_unichar("\342\204\242") == 3); assert_se(utf8_encoded_valid_unichar("\302\256") == 2); @@ -115,6 +125,7 @@ int main(int argc, char *argv[]) { test_utf8_is_valid(); test_utf8_is_printable(); test_ascii_is_valid(); + test_ascii_is_valid_n(); test_utf8_encoded_valid_unichar(); test_utf8_escaping(); test_utf8_escaping_printable(); -- cgit v1.2.3