summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/basic/utf8.c18
-rw-r--r--src/basic/utf8.h1
-rw-r--r--src/test/test-utf8.c13
3 files changed, 31 insertions, 1 deletions
diff --git a/src/basic/utf8.c b/src/basic/utf8.c
index 1e86949a4..af86f74d1 100644
--- a/src/basic/utf8.c
+++ b/src/basic/utf8.c
@@ -247,6 +247,9 @@ char *utf8_escape_non_printable(const char *str) {
char *ascii_is_valid(const char *str) {
const char *p;
+ /* Check whether the string consists of valid ASCII bytes,
+ * i.e values between 0 and 127, inclusive. */
+
assert(str);
for (p = str; *p; p++)
@@ -256,6 +259,21 @@ char *ascii_is_valid(const char *str) {
return (char*) str;
}
+char *ascii_is_valid_n(const char *str, size_t len) {
+ size_t i;
+
+ /* Very similar to ascii_is_valid(), but checks exactly len
+ * bytes and rejects any NULs in that range. */
+
+ assert(str);
+
+ for (i = 0; i < len; i++)
+ if ((unsigned char) str[i] >= 128 || str[i] == 0)
+ return NULL;
+
+ return (char*) str;
+}
+
/**
* utf8_encode_unichar() - Encode single UCS-4 character as UTF-8
* @out_utf8: output buffer of at least 4 bytes or NULL
diff --git a/src/basic/utf8.h b/src/basic/utf8.h
index 7d68105a0..d6936ea46 100644
--- a/src/basic/utf8.h
+++ b/src/basic/utf8.h
@@ -22,6 +22,7 @@ bool unichar_is_valid(char32_t c);
const char *utf8_is_valid(const char *s) _pure_;
char *ascii_is_valid(const char *s) _pure_;
+char *ascii_is_valid_n(const char *str, size_t len);
bool utf8_is_printable_newline(const char* str, size_t length, bool newline) _pure_;
#define utf8_is_printable(str, length) utf8_is_printable_newline(str, length, true)
diff --git a/src/test/test-utf8.c b/src/test/test-utf8.c
index ec963437b..d645dc958 100644
--- a/src/test/test-utf8.c
+++ b/src/test/test-utf8.c
@@ -24,11 +24,21 @@ static void test_utf8_is_valid(void) {
}
static void test_ascii_is_valid(void) {
- assert_se(ascii_is_valid("alsdjf\t\vbarr\nba z"));
+ assert_se( ascii_is_valid("alsdjf\t\vbarr\nba z"));
assert_se(!ascii_is_valid("\342\204\242"));
assert_se(!ascii_is_valid("\341\204"));
}
+static void test_ascii_is_valid_n(void) {
+ assert_se( ascii_is_valid_n("alsdjf\t\vbarr\nba z", 17));
+ assert_se( ascii_is_valid_n("alsdjf\t\vbarr\nba z", 16));
+ assert_se(!ascii_is_valid_n("alsdjf\t\vbarr\nba z", 18));
+ assert_se(!ascii_is_valid_n("\342\204\242", 3));
+ assert_se(!ascii_is_valid_n("\342\204\242", 2));
+ assert_se(!ascii_is_valid_n("\342\204\242", 1));
+ assert_se( ascii_is_valid_n("\342\204\242", 0));
+}
+
static void test_utf8_encoded_valid_unichar(void) {
assert_se(utf8_encoded_valid_unichar("\342\204\242") == 3);
assert_se(utf8_encoded_valid_unichar("\302\256") == 2);
@@ -115,6 +125,7 @@ int main(int argc, char *argv[]) {
test_utf8_is_valid();
test_utf8_is_printable();
test_ascii_is_valid();
+ test_ascii_is_valid_n();
test_utf8_encoded_valid_unichar();
test_utf8_escaping();
test_utf8_escaping_printable();