summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorgregor herrmann <gregoa@debian.org>2020-02-22 01:02:48 +0100
committergregor herrmann <gregoa@debian.org>2020-02-22 01:02:48 +0100
commit4202d737e6e0232d35763fa4d24fdac2be84eddd (patch)
treedad8e1729a9e20712d6eef2ff6e23e139afaccad
parent7820272fcad389ac8366e65469ffa8d1ce9a6dd9 (diff)
New upstream version 0.56
-rw-r--r--Changes8
-rw-r--r--MANIFEST1
-rw-r--r--META.json7
-rw-r--r--META.yml5
-rw-r--r--Makefile.PL7
-rw-r--r--README15
-rw-r--r--errors.c56
-rw-r--r--json-common.c7
-rw-r--r--json-entry-points.c28
-rw-r--r--json-perl.c39
-rw-r--r--lib/JSON/Parse.pm2
-rw-r--r--lib/JSON/Parse.pod50
-rw-r--r--lib/JSON/Tokenize.pm2
-rw-r--r--lib/JSON/Tokenize.pod6
-rw-r--r--t/bugzilla-2049.t11
-rw-r--r--unicode.c159
-rw-r--r--unicode.h58
17 files changed, 227 insertions, 234 deletions
diff --git a/Changes b/Changes
index 017f4bc..2081e79 100644
--- a/Changes
+++ b/Changes
@@ -1,3 +1,11 @@
+0.56 2020-02-06
+
+* Allow build on Solaris and SunOS
+
+0.55_02 2019-01-24
+
+* For testing of Daxim segfault through CPAN testers
+
0.55 2017-10-21
* Versions synchronised
diff --git a/MANIFEST b/MANIFEST
index c03c144..7121017 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -33,6 +33,7 @@ MANIFEST.SKIP
README
script/validjson
t/array.t
+t/bugzilla-2049.t
t/collision.t
t/JSON-Parse.t
t/json-tokenize.t
diff --git a/META.json b/META.json
index 9fc04fd..43ddad7 100644
--- a/META.json
+++ b/META.json
@@ -4,7 +4,7 @@
"Ben Bullock <bkb@cpan.org>"
],
"dynamic_config" : 1,
- "generated_by" : "ExtUtils::MakeMaker version 7.3, CPAN::Meta::Converter version 2.150010",
+ "generated_by" : "ExtUtils::MakeMaker version 7.34, CPAN::Meta::Converter version 2.150010",
"license" : [
"perl_5"
],
@@ -32,6 +32,7 @@
},
"runtime" : {
"requires" : {
+ "Carp" : "0",
"perl" : "5.008009"
}
}
@@ -47,10 +48,10 @@
"web" : "https://github.com/benkasminbullock/JSON-Parse"
}
},
- "version" : "0.55",
+ "version" : "0.56",
"x_contributors" : [
"Shlomi Fish <shlomif@cpan.org>",
"kolmogorov42"
],
- "x_serialization_backend" : "JSON::PP version 2.94"
+ "x_serialization_backend" : "JSON::PP version 2.97001"
}
diff --git a/META.yml b/META.yml
index 5dda686..83543ba 100644
--- a/META.yml
+++ b/META.yml
@@ -7,7 +7,7 @@ build_requires:
configure_requires:
ExtUtils::MakeMaker: '0'
dynamic_config: 1
-generated_by: 'ExtUtils::MakeMaker version 7.3, CPAN::Meta::Converter version 2.150010'
+generated_by: 'ExtUtils::MakeMaker version 7.34, CPAN::Meta::Converter version 2.150010'
license: perl
meta-spec:
url: http://module-build.sourceforge.net/META-spec-v1.4.html
@@ -18,11 +18,12 @@ no_index:
- t
- inc
requires:
+ Carp: '0'
perl: '5.008009'
resources:
bugtracker: https://github.com/benkasminbullock/JSON-Parse/issues
repository: git://github.com/benkasminbullock/JSON-Parse.git
-version: '0.55'
+version: '0.56'
x_contributors:
- 'Shlomi Fish <shlomif@cpan.org>'
- kolmogorov42
diff --git a/Makefile.PL b/Makefile.PL
index 73bc04a..96f109a 100644
--- a/Makefile.PL
+++ b/Makefile.PL
@@ -2,10 +2,6 @@ use strict;
use warnings;
use ExtUtils::MakeMaker;
-if ($^O =~ /solaris|sunos/i) {
- exit;
-}
-
my $pm = 'lib/JSON/Parse.pm';
my $pod = 'lib/JSON/Parse.pod';
my $github = 'github.com/benkasminbullock/JSON-Parse';
@@ -18,6 +14,7 @@ WriteMakefile (
AUTHOR => 'Ben Bullock <bkb@cpan.org>',
LICENSE => 'perl',
PREREQ_PM => {
+ 'Carp' => '0',
},
META_MERGE => {
'meta-spec' => {
@@ -53,4 +50,6 @@ WriteMakefile (
# OPTIMIZE => '-Wall -O',
# This achieves only small speedups with GCC.
# OPTIMIZE => '-O3',
+
+# CC => 'gcc',
);
diff --git a/README b/README
index 8912549..718a095 100644
--- a/README
+++ b/README
@@ -6,7 +6,7 @@
-This is the README for JSON::Parse version 0.55.
+This is the README for JSON::Parse version 0.56.
JSON::Parse is a "module" for the Perl computer programming language, a
library of computer code to install on a computer. This document contains
@@ -50,12 +50,11 @@ JSON::Parse accepts only UTF-8 as input. See "UTF-8 only" and
2. DOCUMENTATION
You can read the documentation for the module online at the following
-websites:
+website:
- * http://search.cpan.org/perldoc?JSON::Parse
* http://metacpan.org/release/JSON-Parse
-(These links go to the latest version of the module.)
+(This link goes to the latest version of the module.)
After installing the module, you can read the documentation on your
computer using
@@ -76,11 +75,11 @@ If you have the App::cpanminus installer, you may prefer
cpanm JSON::Parse
-To install the module from the source file, JSON-Parse-0.55.tar.gz, follow
+To install the module from the source file, JSON-Parse-0.56.tar.gz, follow
this sequence of commands:
- tar xfz JSON-Parse-0.55.tar.gz
- cd JSON-Parse-0.55
+ tar xfz JSON-Parse-0.56.tar.gz
+ cd JSON-Parse-0.56
perl Makefile.PL
make
make install
@@ -105,6 +104,6 @@ repository on github at
-----------------------------------------------------------------------------
-This README was written on Tue Oct 24 10:40:46 2017.
+This README was written on Mon Feb 17 13:11:05 2020.
-----------------------------------------------------------------------------
diff --git a/errors.c b/errors.c
index bd4d01e..520ee21 100644
--- a/errors.c
+++ b/errors.c
@@ -67,7 +67,7 @@ enum expectation {
#define XBYTES_90_BF (1<<xbytes_90_bf)
#define XBYTES_A0_BF (1<<xbytes_a0_bf)
char * input_expectation[n_expectations] = {
-"whitespace: '\\n', '\\r', '\\t', ' '",
+"whitespace: 'n', '\\r', '\\t', ' '",
"comma: ','",
"value separator: ':'",
"end of object: '}'",
@@ -80,23 +80,23 @@ char * input_expectation[n_expectations] = {
"plus: '+'",
"exponential sign: 'e', 'E'",
"start of an array or object: '{', '['",
-"escape: '\\', '/', '\"', 'b', 'f', 'n', 'r', 't', 'u'",
-"printable ASCII or first byte of UTF-8: '\\x20-\\x7f', '\\xC2-\\xF4'",
+"escape: '', '/', '\"', 'b', 'f', 'n', 'r', 't', 'u'",
+"printable ASCII or first byte of UTF-8: 'x20-\\x7f', '\\xC2-\\xF4'",
"start of literal: 't', 'f', 'n'",
"after the start of true, false, or null",
"the second half of a surrogate pair",
-"bytes in range 80-8f: '\\x80-\\x8f'",
-"bytes in range 80-9f: '\\x80-\\x9f'",
-"bytes in range 80-bf: '\\x80-\\xbf'",
-"bytes in range 90-bf: '\\x90-\\xbf'",
-"bytes in range a0-bf: '\\xa0-\\xbf'",
+"bytes in range 80-8f: 'x80-\\x8f'",
+"bytes in range 80-9f: 'x80-\\x9f'",
+"bytes in range 80-bf: 'x80-\\xbf'",
+"bytes in range 90-bf: 'x90-\\xbf'",
+"bytes in range a0-bf: 'xa0-\\xbf'",
};
unsigned char allowed[n_expectations][JSON3MAXBYTE] = {
/* whitespace */
-{0,0,0,0,0,0,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+{0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
@@ -225,7 +225,7 @@ unsigned char allowed[n_expectations][JSON3MAXBYTE] = {
/* escape */
{0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,1,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
@@ -234,7 +234,7 @@ unsigned char allowed[n_expectations][JSON3MAXBYTE] = {
},
/* stringchar */
{0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
@@ -274,9 +274,9 @@ unsigned char allowed[n_expectations][JSON3MAXBYTE] = {
},
/* bytes_80_8f */
{0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
@@ -284,9 +284,9 @@ unsigned char allowed[n_expectations][JSON3MAXBYTE] = {
},
/* bytes_80_9f */
{0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
@@ -294,9 +294,9 @@ unsigned char allowed[n_expectations][JSON3MAXBYTE] = {
},
/* bytes_80_bf */
{0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
@@ -304,20 +304,20 @@ unsigned char allowed[n_expectations][JSON3MAXBYTE] = {
},
/* bytes_90_bf */
{0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
},
/* bytes_a0_bf */
{0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
diff --git a/json-common.c b/json-common.c
index 15bcc3c..86a0318 100644
--- a/json-common.c
+++ b/json-common.c
@@ -164,12 +164,13 @@ typedef struct parser {
unsigned int length;
- /* The input. */
+ /* The input. This is fixed at the beginning throughout
+ parsing. */
unsigned char * input;
- /* The end-point of the parsing. This increments through
- "input". */
+ /* The end-point of the parsing, the last parsed thing. This
+ increments through "input". */
unsigned char * end;
diff --git a/json-entry-points.c b/json-entry-points.c
index 3385948..579633e 100644
--- a/json-entry-points.c
+++ b/json-entry-points.c
@@ -39,16 +39,16 @@ static void check_end (json_parse_t * parser)
#ifndef NOPERL
-/* Set up the parser. */
-
-#define GETSTRING \
- { \
- STRLEN length; \
- parser->end = parser->input = \
- (unsigned char *) SvPV (json, length); \
- parser->length = (unsigned int) length; \
- parser->unicode = SvUTF8 (json) ? 1 : 0; \
- }
+/* Set up "parser" with the string from "json". */
+
+static void getstring (SV * json, json_parse_t * parser)
+{
+ STRLEN length;
+ parser->input = (unsigned char *) SvPV (json, length);
+ parser->end = parser->input;
+ parser->length = (unsigned int) length;
+ parser->unicode = SvUTF8 (json) ? 1 : 0;
+}
#endif /* ndef NOPERL */
@@ -79,7 +79,7 @@ json_parse_run (json_parse_t * parser, SV * json)
SV * r = & PL_sv_undef;
- GETSTRING;
+ getstring (json, parser);
if (parser->length == 0) {
fail_empty (parser);
@@ -327,7 +327,7 @@ validate (SV * json, unsigned int flags)
{
ENTRYDECL;
- GETSTRING;
+ getstring (json, parser);
if (parser->length == 0) {
fail_empty (parser);
@@ -338,7 +338,7 @@ validate (SV * json, unsigned int flags)
static void
check (json_parse_t * parser, SV * json)
{
- GETSTRING;
+ getstring (json, parser);
c_validate (parser);
}
@@ -347,7 +347,7 @@ tokenize (SV * json)
{
ENTRYDECL;
- GETSTRING;
+ getstring (json, parser);
/* Mark this parser as being used for tokenizing to bypass the
checks for memory leaks when the parser is freed. */
diff --git a/json-perl.c b/json-perl.c
index 831cf32..9852d9a 100644
--- a/json-perl.c
+++ b/json-perl.c
@@ -529,49 +529,14 @@ PREFIX (string) (json_parse_t * parser)
#ifdef PERLING
-#if 0
- /* This was an attempt at a speedup by copying the prefix part of
- the string and the contents of parser->buffer sequentially into
- an SV. This didn't result in a significant speedup. */
- parser->end--;
- /* Save the length of the part without escapes. */
- prefixlen = (STRLEN) (parser->end - start);
- len = get_string (parser);
- if (prefixlen > 0) {
- char * svbuf;
- string = newSV (len + prefixlen + 1);
- svbuf = SvPVX (string);
- memcpy (svbuf, start, prefixlen);
- memcpy (svbuf + prefixlen, parser->buffer, len);
- svbuf[len + prefixlen] = '\0';
- SvPOK_only (string);
- SvCUR_set (string, len+prefixlen);
- }
- else {
- /* Have an escape as the first character so nothing to
- copy. */
- string = newSVpvn ((const char *) parser->buffer, len);
- }
-#elif 0
- /* This is the original method up to version 0.32, set the point
- of parsing back to the first character of the string, then get
- the string into an allocated buffer. */
- parser->end = start;
- len = get_string (parser);
- string = newSVpvn ((const char *) parser->buffer, len);
-#else
- /* New-fangled method, use perl_get_string which keeps the buffer
- on the stack. Results in a minor speed increase. */
+ /* Use "perl_get_string" which keeps the buffer on the
+ stack. Results in a minor speed increase. */
parser->end = start;
prefixlen = (STRLEN) (parser->end - start);
string = perl_get_string (parser, prefixlen);
-#endif
#elif defined (TOKENING)
/* Don't use "len" here since it subtracts the escapes. */
- /*
- printf ("New token string : <<%.*s>> <<%c>>.\n", parser->end - start, start - 1, *(parser->end));
- */
parser->end = start;
len = get_string (parser);
string = json_token_new (parser,
diff --git a/lib/JSON/Parse.pm b/lib/JSON/Parse.pm
index be1ffbf..a45e46d 100644
--- a/lib/JSON/Parse.pm
+++ b/lib/JSON/Parse.pm
@@ -17,7 +17,7 @@ require Exporter;
use warnings;
use strict;
use Carp;
-our $VERSION = '0.55';
+our $VERSION = '0.56';
require XSLoader;
XSLoader::load (__PACKAGE__, $VERSION);
diff --git a/lib/JSON/Parse.pod b/lib/JSON/Parse.pod
index 321fc71..09cb809 100644
--- a/lib/JSON/Parse.pod
+++ b/lib/JSON/Parse.pod
@@ -23,8 +23,8 @@ Convert JSON into Perl.
=head1 VERSION
-This documents version 0.55 of JSON::Parse corresponding to
-L<git commit 739c1e12e85756c703242229caef685615ba77ca|https://github.com/benkasminbullock/JSON-Parse/commit/739c1e12e85756c703242229caef685615ba77ca> released on Tue Oct 24 07:11:47 2017 +0900.
+This documents version 0.56 of JSON::Parse corresponding to
+L<git commit c00e1e8b7dfc7958de6700700ee20582f81b56a6|https://github.com/benkasminbullock/JSON-Parse/commit/c00e1e8b7dfc7958de6700700ee20582f81b56a6> released on Mon Feb 17 13:10:15 2020 +0900.
@@ -79,7 +79,7 @@ produces output
HASH
-(This example is included as L<F<hash.pl>|https://fastapi.metacpan.org/source/BKB/JSON-Parse-0.55/examples/hash.pl> in the distribution.)
+(This example is included as L<F<hash.pl>|https://fastapi.metacpan.org/source/BKB/JSON-Parse-0.56/examples/hash.pl> in the distribution.)
If the input JSON text is a serialized array, an array reference is
@@ -97,7 +97,7 @@ produces output
ARRAY
-(This example is included as L<F<array.pl>|https://fastapi.metacpan.org/source/BKB/JSON-Parse-0.55/examples/array.pl> in the distribution.)
+(This example is included as L<F<array.pl>|https://fastapi.metacpan.org/source/BKB/JSON-Parse-0.56/examples/array.pl> in the distribution.)
Otherwise a Perl scalar is returned.
@@ -162,7 +162,7 @@ produces output
-(This example is included as L<F<assert.pl>|https://fastapi.metacpan.org/source/BKB/JSON-Parse-0.55/examples/assert.pl> in the distribution.)
+(This example is included as L<F<assert.pl>|https://fastapi.metacpan.org/source/BKB/JSON-Parse-0.56/examples/assert.pl> in the distribution.)
This is the underlying function for L</valid_json>. It runs at the
@@ -221,7 +221,7 @@ refers to the caller's line.
As the name implies, this is meant to be a "safety-first" version of
L</parse_json>. This function does not pass all of the tests of the
L</JSON Parsing Test Suite>, because it creates an error for duplicate
-keys in objects, which is legal JSON. See L<t/jpts.t> for details.
+keys in objects, which is legal JSON. See F<t/jpts.t> for details.
This function was added in version 0.38.
@@ -370,7 +370,7 @@ produces output
Native Perl: かあ
-(This example is included as L<F<unicode-details.pl>|https://fastapi.metacpan.org/source/BKB/JSON-Parse-0.55/examples/unicode-details.pl> in the distribution.)
+(This example is included as L<F<unicode-details.pl>|https://fastapi.metacpan.org/source/BKB/JSON-Parse-0.56/examples/unicode-details.pl> in the distribution.)
Although in general the above would be an unsafe practice, JSON::Parse
@@ -443,7 +443,7 @@ produces output
Ambiguous key 'a' is 2
-(This example is included as L<F<key-collision.pl>|https://fastapi.metacpan.org/source/BKB/JSON-Parse-0.55/examples/key-collision.pl> in the distribution.)
+(This example is included as L<F<key-collision.pl>|https://fastapi.metacpan.org/source/BKB/JSON-Parse-0.56/examples/key-collision.pl> in the distribution.)
Here the key "a" could be either 1 or 2. As seen in the example,
@@ -561,7 +561,7 @@ produces output
{"fripp":false,"bruce":true,"clapton":true,"hendrix":false}
-(This example is included as L<F<json-tiny-round-trip-demo.pl>|https://fastapi.metacpan.org/source/BKB/JSON-Parse-0.55/examples/json-tiny-round-trip-demo.pl> in the distribution.)
+(This example is included as L<F<json-tiny-round-trip-demo.pl>|https://fastapi.metacpan.org/source/BKB/JSON-Parse-0.56/examples/json-tiny-round-trip-demo.pl> in the distribution.)
Most of the other CPAN modules use similar methods to L<JSON::Tiny>,
@@ -714,7 +714,7 @@ produces output
-(This example is included as L<F<collide.pl>|https://fastapi.metacpan.org/source/BKB/JSON-Parse-0.55/examples/collide.pl> in the distribution.)
+(This example is included as L<F<collide.pl>|https://fastapi.metacpan.org/source/BKB/JSON-Parse-0.56/examples/collide.pl> in the distribution.)
The C<detect_collisions (1)> behaviour is the behaviour of
@@ -997,7 +997,7 @@ error like this:
gives output
- JSON error at line 1, byte 1/1: Unexpected character '+' parsing initial state: expecting whitespace: '\n', '\r', '\t', ' ' or start of string: '"' or digit: '0-9' or minus: '-' or start of an array or object: '{', '[' or start of literal: 't', 'f', 'n'
+ JSON error at line 1, byte 1/1: Unexpected character '+' parsing initial state: expecting whitespace: 'n', '\r', '\t', ' ' or start of string: '"' or digit: '0-9' or minus: '-' or start of an array or object: '{', '[' or start of literal: 't', 'f', 'n'
@@ -1012,7 +1012,7 @@ byte n":
gives output
- JSON error at line 1, byte 11/13: Unexpected character 'a' parsing string starting from byte 9: expecting escape: '\', '/', '"', 'b', 'f', 'n', 'r', 't', 'u'
+ JSON error at line 1, byte 11/13: Unexpected character 'a' parsing string starting from byte 9: expecting escape: '', '/', '"', 'b', 'f', 'n', 'r', 't', 'u'
@@ -1060,7 +1060,7 @@ and a leading zero,
gives output
- JSON error at line 1, byte 3/6: Unexpected character '1' parsing number starting from byte 2: expecting whitespace: '\n', '\r', '\t', ' ' or comma: ',' or end of array: ']' or dot: '.' or exponential sign: 'e', 'E'
+ JSON error at line 1, byte 3/6: Unexpected character '1' parsing number starting from byte 2: expecting whitespace: 'n', '\r', '\t', ' ' or comma: ',' or end of array: ']' or dot: '.' or exponential sign: 'e', 'E'
@@ -1243,8 +1243,8 @@ able to fully work out the reason behind the better speed.
There is some benchmarking code in the github repository under the
directory "benchmarks" for those wishing to test these claims. The
-script L<F<benchmarks/bench>|https://github.com/benkasminbullock/JSON-Parse/blob/739c1e12e85756c703242229caef685615ba77ca/benchmarks/bench> is an adaptation of the similar
-script in the L<JSON::XS> distribution. The script L<F<benchmarks/pub-bench.pl>|https://github.com/benkasminbullock/JSON-Parse/blob/739c1e12e85756c703242229caef685615ba77ca/benchmarks/pub-bench.pl> runs the benchmarks and prints them
+script L<F<benchmarks/bench>|https://github.com/benkasminbullock/JSON-Parse/blob/c00e1e8b7dfc7958de6700700ee20582f81b56a6/benchmarks/bench> is an adaptation of the similar
+script in the L<JSON::XS> distribution. The script L<F<benchmarks/pub-bench.pl>|https://github.com/benkasminbullock/JSON-Parse/blob/c00e1e8b7dfc7958de6700700ee20582f81b56a6/benchmarks/pub-bench.pl> runs the benchmarks and prints them
out as POD.
The following benchmark tests used version 0.47 of JSON::Parse and
@@ -1680,7 +1680,7 @@ supplied with the module in the F</t/> subdirectory of the
distribution.
More extensive testing code is in the git repository. This is not
-supplied in the CPAN distribution. A script, L<F<randomjson.pl>|https://github.com/benkasminbullock/JSON-Parse/blob/739c1e12e85756c703242229caef685615ba77ca/randomjson.pl>,
+supplied in the CPAN distribution. A script, L<F<randomjson.pl>|https://github.com/benkasminbullock/JSON-Parse/blob/c00e1e8b7dfc7958de6700700ee20582f81b56a6/randomjson.pl>,
generates a set number of bytes of random JSON and checks that the
module's bytewise validation of input is correct. It does this by
taking a valid fragment, then adding each possible byte from 0 to 255
@@ -1690,18 +1690,18 @@ it to the fragment and continuing the process until a complete valid
JSON input is formed. The module has undergone about a billion
repetitions of this test.
-This setup relies on a C file, L<F<json-random-test.c>|https://github.com/benkasminbullock/JSON-Parse/blob/739c1e12e85756c703242229caef685615ba77ca/json-random-test.c>, which isn't in
-the CPAN distribution, and it also requires L<F<Json3.xs>|https://github.com/benkasminbullock/JSON-Parse/blob/739c1e12e85756c703242229caef685615ba77ca/Json3.xs> to be edited
+This setup relies on a C file, L<F<json-random-test.c>|https://github.com/benkasminbullock/JSON-Parse/blob/c00e1e8b7dfc7958de6700700ee20582f81b56a6/json-random-test.c>, which isn't in
+the CPAN distribution, and it also requires L<F<Json3.xs>|https://github.com/benkasminbullock/JSON-Parse/blob/c00e1e8b7dfc7958de6700700ee20582f81b56a6/Json3.xs> to be edited
to make the macro C<TESTRANDOM> true (uncomment line 7 of the
file). The testing code uses C setjmp/longjmp, so it's not guaranteed
to work on all operating systems and is commented out for CPAN
releases.
-A pure C version called L<F<random-test.c>|https://github.com/benkasminbullock/JSON-Parse/blob/739c1e12e85756c703242229caef685615ba77ca/random-test.c> also exists. This applies
+A pure C version called L<F<random-test.c>|https://github.com/benkasminbullock/JSON-Parse/blob/c00e1e8b7dfc7958de6700700ee20582f81b56a6/random-test.c> also exists. This applies
exactly the same tests, and requires no Perl at all.
If you're interested in testing your own JSON parser, the outputs
-generated by L<F<randomjson.pl>|https://github.com/benkasminbullock/JSON-Parse/blob/739c1e12e85756c703242229caef685615ba77ca/randomjson.pl> are quite a good place to start. The
+generated by L<F<randomjson.pl>|https://github.com/benkasminbullock/JSON-Parse/blob/c00e1e8b7dfc7958de6700700ee20582f81b56a6/randomjson.pl> are quite a good place to start. The
default is to produce UTF-8 output, which looks pretty horrible since
it tends to produce long strings of UTF-8 garbage. (This is because it
chooses randomly from 256 bytes and the end-of-string marker C<"> has
@@ -1733,17 +1733,17 @@ module's tests, but some of the files (like 100,000 open arrays)
actually L<cause crashes on some versions of Perl on some
machines|http://fast-matrix.cpantesters.org/?dist=JSON-Parse%200.48_01>,
so they're not really suitable for distribution. The tests are found,
-however, in the repository under L<F<xt/jpts.t>|https://github.com/benkasminbullock/JSON-Parse/blob/739c1e12e85756c703242229caef685615ba77ca/xt/jpts.t> and the
-subdirectory L<F<xt/jpts>|https://github.com/benkasminbullock/JSON-Parse/blob/739c1e12e85756c703242229caef685615ba77ca/xt/jpts>, so if you are interested in the
+however, in the repository under L<F<xt/jpts.t>|https://github.com/benkasminbullock/JSON-Parse/blob/c00e1e8b7dfc7958de6700700ee20582f81b56a6/xt/jpts.t> and the
+subdirectory L<F<xt/jpts>|https://github.com/benkasminbullock/JSON-Parse/blob/c00e1e8b7dfc7958de6700700ee20582f81b56a6/xt/jpts>, so if you are interested in the
results, please copy that and try it. There is also a test for the
-L<validjson|/SCRIPT> script as L<F<xt/validjson.t>|https://github.com/benkasminbullock/JSON-Parse/blob/739c1e12e85756c703242229caef685615ba77ca/xt/validjson.t> in the
+L<validjson|/SCRIPT> script as L<F<xt/validjson.t>|https://github.com/benkasminbullock/JSON-Parse/blob/c00e1e8b7dfc7958de6700700ee20582f81b56a6/xt/validjson.t> in the
repository. These are author tests, so you may need to install extra
modules to run them. These author tests are run automatically before
any code is uploaded to CPAN.
=head1 HISTORY
-See L<F<Changes>|https://github.com/benkasminbullock/JSON-Parse/blob/739c1e12e85756c703242229caef685615ba77ca/Changes> in the distribution for a full list of changes.
+See L<F<Changes>|https://github.com/benkasminbullock/JSON-Parse/blob/c00e1e8b7dfc7958de6700700ee20582f81b56a6/Changes> in the distribution for a full list of changes.
This module started out under the name C<JSON::Argo>. It was
originally a way to escape from having to use the other JSON modules
@@ -1814,7 +1814,7 @@ Ben Bullock, <bkb@cpan.org>
=head1 COPYRIGHT & LICENCE
This package and associated files are copyright (C)
-2013-2017
+2013-2020
Ben Bullock.
You can use, copy, modify and redistribute this package and associated
diff --git a/lib/JSON/Tokenize.pm b/lib/JSON/Tokenize.pm
index c9cf37b..1f7d64e 100644
--- a/lib/JSON/Tokenize.pm
+++ b/lib/JSON/Tokenize.pm
@@ -7,7 +7,7 @@ use JSON::Parse;
our @EXPORT_OK = qw/tokenize_json tokenize_start tokenize_next tokenize_start tokenize_end tokenize_type tokenize_child tokenize_text/;
our %EXPORT_TAGS = ('all' => \@EXPORT_OK);
use Carp;
-our $VERSION = '0.55';
+our $VERSION = '0.56';
sub tokenize_text
{
diff --git a/lib/JSON/Tokenize.pod b/lib/JSON/Tokenize.pod
index 73f5b44..c168173 100644
--- a/lib/JSON/Tokenize.pod
+++ b/lib/JSON/Tokenize.pod
@@ -48,8 +48,8 @@ This outputs
=head1 VERSION
-This documents version 0.55 of JSON::Tokenize corresponding to
-L<git commit 739c1e12e85756c703242229caef685615ba77ca|https://github.com/benkasminbullock/JSON-Parse/commit/739c1e12e85756c703242229caef685615ba77ca> released on Tue Oct 24 07:11:47 2017 +0900.
+This documents version 0.56 of JSON::Tokenize corresponding to
+L<git commit c00e1e8b7dfc7958de6700700ee20582f81b56a6|https://github.com/benkasminbullock/JSON-Parse/commit/c00e1e8b7dfc7958de6700700ee20582f81b56a6> released on Mon Feb 17 13:10:15 2020 +0900.
@@ -130,7 +130,7 @@ Ben Bullock, <bkb@cpan.org>
=head1 COPYRIGHT & LICENCE
This package and associated files are copyright (C)
-2016-2017
+2016-2020
Ben Bullock.
You can use, copy, modify and redistribute this package and associated
diff --git a/t/bugzilla-2049.t b/t/bugzilla-2049.t
new file mode 100644
index 0000000..36a0c72
--- /dev/null
+++ b/t/bugzilla-2049.t
@@ -0,0 +1,11 @@
+use warnings;
+use strict;
+use Test::More;
+use JSON::Parse 'json_file_to_perl';
+eval {
+my $type = '';
+my $tri2file = json_file_to_perl ('$type-tri2file.txt');
+};
+ok ($@);
+note ($@);
+done_testing ();
diff --git a/unicode.c b/unicode.c
index 2116e25..81e2988 100644
--- a/unicode.c
+++ b/unicode.c
@@ -51,9 +51,9 @@
/* All of the functions in this library return an "int32_t". Negative
values are used to indicate errors. */
-/* The following return value indicates a successful completion of a
- routines which doesn't use the return value to communicate data
- back to the caller. */
+/* This return value indicates the successful completion of a routine
+ which doesn't use the return value to communicate data back to the
+ caller. */
#define UNICODE_OK 0
@@ -104,11 +104,11 @@
#define UNICODE_NOT_CHARACTER -8
-/* The UTF-8 is valid. */
+/* This return value indicates that the UTF-8 is valid. */
#define UTF8_VALID 1
-/* The UTF-8 is not valid. */
+/* This return value indicates that the UTF-8 is not valid. */
#define UTF8_INVALID 0
@@ -146,7 +146,7 @@ const uint8_t utf8_sequence_len[0x100] =
11110100). If "c" is not a valid UTF-8 first byte, the value
UTF8_BAD_LEADING_BYTE is returned. */
-int32_t utf8_bytes (unsigned char c)
+int32_t utf8_bytes (uint8_t c)
{
int32_t r;
r = utf8_sequence_len[c];
@@ -159,20 +159,20 @@ int32_t utf8_bytes (unsigned char c)
/* This macro converts four bytes of UTF-8 into the corresponding code
point. */
-#define FOUR(x) \
- (((x[0] & 0x07) << 18) \
- | ((x[1] & 0x3F) << 12) \
- | ((x[2] & 0x3F) << 6) \
- | ((x[3] & 0x3F)))
+#define FOUR(x) \
+ (((int32_t) (x[0] & 0x07)) << 18) \
+ | (((int32_t) (x[1] & 0x3F)) << 12) \
+ | (((int32_t) (x[2] & 0x3F)) << 6) \
+ | (((int32_t) (x[3] & 0x3F)))
/* Try to convert "input" from UTF-8 to UCS-2, and return a value even
if the input is partly broken. This checks the first byte of the
input, but it doesn't check the subsequent bytes. */
int32_t
-utf8_no_checks (const unsigned char * input, const unsigned char ** end_ptr)
+utf8_no_checks (const uint8_t * input, const uint8_t ** end_ptr)
{
- unsigned char c;
+ uint8_t c;
c = input[0];
switch (utf8_sequence_len[c]) {
case 1:
@@ -221,9 +221,10 @@ utf8_no_checks (const unsigned char * input, const unsigned char ** end_ptr)
returned. If the value extrapolated from "input" ends in 0xFFFF or
0xFFFE, UNICODE_NOT_CHARACTER is returned. */
-int32_t utf8_to_ucs2 (const unsigned char * input, const unsigned char ** end_ptr)
+int32_t
+utf8_to_ucs2 (const uint8_t * input, const uint8_t ** end_ptr)
{
- unsigned char c;
+ uint8_t c;
uint8_t l;
*end_ptr = input;
c = input[0];
@@ -245,8 +246,8 @@ int32_t utf8_to_ucs2 (const unsigned char * input, const unsigned char ** end_pt
}
* end_ptr = input + 2;
return
- (c & 0x1F) << 6 |
- (input[1] & 0x3F);
+ ((int32_t) (c & 0x1F) << 6) |
+ ((int32_t) (input[1] & 0x3F));
}
if (l == 3) {
/* Three byte case. */
@@ -262,16 +263,16 @@ int32_t utf8_to_ucs2 (const unsigned char * input, const unsigned char ** end_pt
}
* end_ptr = input + 3;
return
- (c & 0x0F) << 12 |
- (input[1] & 0x3F) << 6 |
- (input[2] & 0x3F);
+ ((int32_t) (c & 0x0F)) << 12 |
+ ((int32_t) (input[1] & 0x3F)) << 6 |
+ ((int32_t) (input[2] & 0x3F));
}
if (l == 4) {
/* Four byte case. */
- unsigned char d;
- unsigned char e;
- unsigned char f;
- uint32_t v;
+ uint8_t d;
+ uint8_t e;
+ uint8_t f;
+ int32_t v;
d = input[1];
e = input[2];
f = input[3];
@@ -330,7 +331,8 @@ int32_t utf8_to_ucs2 (const unsigned char * input, const unsigned char ** end_pt
buffer "utf8" has at least UNICODE_MAX_LENGTH (5) bytes of space to
write to, without checking. */
-int32_t ucs2_to_utf8 (int32_t ucs2, unsigned char * utf8)
+int32_t
+ucs2_to_utf8 (int32_t ucs2, uint8_t * utf8)
{
if (ucs2 < 0x80) {
utf8[0] = ucs2;
@@ -383,7 +385,7 @@ int32_t ucs2_to_utf8 (int32_t ucs2, unsigned char * utf8)
UNICODE_OK is returned. */
int32_t
-unicode_to_surrogates (unsigned unicode, int32_t * hi_ptr, int32_t * lo_ptr)
+unicode_to_surrogates (int32_t unicode, int32_t * hi_ptr, int32_t * lo_ptr)
{
int32_t hi = UNI_SUR_HIGH_START;
int32_t lo = UNI_SUR_LOW_START;
@@ -410,7 +412,7 @@ unicode_to_surrogates (unsigned unicode, int32_t * hi_ptr, int32_t * lo_ptr)
int32_t
surrogates_to_unicode (int32_t hi, int32_t lo)
{
- uint32_t u;
+ int32_t u;
if (hi < UNI_SUR_HIGH_START || hi > UNI_SUR_HIGH_END ||
lo < UNI_SUR_LOW_START || lo > UNI_SUR_LOW_END) {
return UNICODE_NOT_SURROGATE_PAIR;
@@ -429,11 +431,12 @@ surrogates_to_unicode (int32_t hi, int32_t lo)
#undef LOW_TEN_BITS
/* Convert the surrogate pair in "hi" and "lo" to UTF-8 in
- "utf8". This calls surrogates_to_unicode and ucs2_to_utf8, thus it
- can return the same errors as them, and has the same restriction on
- "utf8" as ucs2_to_utf8. */
+ "utf8". This calls "surrogates_to_unicode" and "ucs2_to_utf8", thus
+ it can return the same errors as them, and has the same restriction
+ on "utf8" as "ucs2_to_utf8". */
-int32_t surrogate_to_utf8 (int32_t hi, int32_t lo, unsigned char * utf8)
+int32_t
+surrogate_to_utf8 (int32_t hi, int32_t lo, uint8_t * utf8)
{
int32_t C;
C = surrogates_to_unicode (hi, lo);
@@ -451,10 +454,10 @@ int32_t surrogate_to_utf8 (int32_t hi, int32_t lo, unsigned char * utf8)
values of "utf8_to_ucs2". */
int32_t
-unicode_chars_to_bytes (const unsigned char * utf8, int32_t n_chars)
+unicode_chars_to_bytes (const uint8_t * utf8, int32_t n_chars)
{
int32_t i;
- const unsigned char * p = utf8;
+ const uint8_t * p = utf8;
int32_t len = strlen ((const char *) utf8);
if (len == 0 && n_chars != 0) {
return UNICODE_EMPTY_INPUT;
@@ -473,10 +476,11 @@ unicode_chars_to_bytes (const unsigned char * utf8, int32_t n_chars)
sequence. It may return UTF8_BAD_LEADING_BYTE if the first byte is
invalid. */
-int32_t unicode_count_chars_fast (const unsigned char * utf8)
+int32_t
+unicode_count_chars_fast (const uint8_t * utf8)
{
int32_t chars;
- const unsigned char * p;
+ const uint8_t * p;
chars = 0;
p = utf8;
while (*p) {
@@ -501,10 +505,11 @@ int32_t unicode_count_chars_fast (const unsigned char * utf8)
If an error occurs, this may return UTF8_BAD_LEADING_BYTE or any of the
errors of "utf8_to_ucs2". */
-int32_t unicode_count_chars (const unsigned char * utf8)
+int32_t
+unicode_count_chars (const uint8_t * utf8)
{
int32_t chars = 0;
- const unsigned char * p = utf8;
+ const uint8_t * p = utf8;
int32_t len = strlen ((const char *) utf8);
if (len == 0) {
return 0;
@@ -597,10 +602,10 @@ int32_t unicode_count_chars (const unsigned char * utf8)
UTF8_INVALID. */
int32_t
-valid_utf8 (const unsigned char * input, int32_t input_length)
+valid_utf8 (const uint8_t * input, int32_t input_length)
{
int32_t i;
- unsigned char c;
+ uint8_t c;
i = 0;
@@ -752,10 +757,10 @@ valid_utf8 (const unsigned char * input, int32_t input_length)
invalid UTF-8 bytes such as 0xFE and 0xFF. */
int32_t
-trim_to_utf8_start (unsigned char ** ptr)
+trim_to_utf8_start (uint8_t ** ptr)
{
- unsigned char * p = *ptr;
- unsigned char c;
+ uint8_t * p = *ptr;
+ uint8_t c;
int32_t i;
/* 0xC0 = 1100_0000. */
c = *p & 0xC0;
@@ -772,8 +777,13 @@ trim_to_utf8_start (unsigned char ** ptr)
return UTF8_BAD_CONTINUATION_BYTE;
}
+/* Given a return value "code" which is negative or zero, return a
+ string which describes what the return value means. Positive
+ non-zero return values never indicate errors or statuses in this
+ library. */
+
const char *
-code_to_error (int32_t code)
+unicode_code_to_error (int32_t code)
{
switch (code) {
case UTF8_BAD_LEADING_BYTE:
@@ -815,30 +825,23 @@ code_to_error (int32_t code)
#include <stdlib.h>
#include "c-tap-test.h"
-void print_bytes (const unsigned char * bytes)
-{
- int32_t i;
- for (i = 0; i < strlen ((const char *) bytes); i++) {
- fprintf (stderr, "%02X", bytes[i]);
- }
- fprintf (stderr, "\n");
-}
+static const uint8_t * utf8 = (uint8_t *) "漢数字ÔÕÖX";
-static const unsigned char * utf8 = (unsigned char *) "漢数字ÔÕÖX";
+#define BUFFSIZE 0x100
-void test_ucs2_to_utf8 ()
+static void test_ucs2_to_utf8 ()
{
/* Buffer to print utf8 out into. */
- unsigned char buffer[0x100];
+ uint8_t buffer[BUFFSIZE];
/* Offset into buffer. */
- unsigned char * offset;
- const unsigned char * start = utf8;
+ uint8_t * offset;
+ const uint8_t * start = utf8;
offset = buffer;
while (1) {
int32_t unicode;
int32_t bytes;
- const unsigned char * end;
+ const uint8_t * end;
unicode = utf8_to_ucs2 (start, & end);
if (unicode == UNICODE_EMPTY_INPUT) {
break;
@@ -846,7 +849,7 @@ void test_ucs2_to_utf8 ()
if (unicode < 0) {
fprintf (stderr,
"%s:%d: unexpected error %s converting unicode.\n",
- __FILE__, __LINE__, code_to_error (unicode));
+ __FILE__, __LINE__, unicode_code_to_error (unicode));
// exit ok in test
exit (EXIT_FAILURE);
}
@@ -857,6 +860,12 @@ void test_ucs2_to_utf8 ()
"round trip OK for %X (%d bytes)", unicode, bytes);
start = end;
offset += bytes;
+ if (offset - buffer >= BUFFSIZE) {
+ fprintf (stderr, "%s:%d: out of space in buffer.\n",
+ __FILE__, __LINE__);
+ // exit ok
+ exit (EXIT_FAILURE);
+ }
}
* offset = '\0';
TAP_TEST_MSG (strcmp ((const char *) buffer, (const char *) utf8) == 0,
@@ -867,10 +876,10 @@ void test_ucs2_to_utf8 ()
static void
test_invalid_utf8 ()
{
- unsigned char invalid_utf8[UTF8_MAX_LENGTH];
+ uint8_t invalid_utf8[UTF8_MAX_LENGTH];
int32_t unicode;
int32_t valid;
- const unsigned char * end;
+ const uint8_t * end;
snprintf ((char *) invalid_utf8, UTF8_MAX_LENGTH - 1,
"%c%c%c", 0xe8, 0xe4, 0xe5);
unicode = utf8_to_ucs2 (invalid_utf8, & end);
@@ -889,17 +898,17 @@ test_surrogate_pairs ()
int32_t rt;
/* This is the wide character space, which does not require
representation as a surrogate pair. */
- unsigned nogood = 0x3000;
+ int32_t nogood = 0x3000;
/*
Two examples from the Wikipedia article on UTF-16
https://en.wikipedia.org/w/index.php?title=UTF-16&oldid=744329865#Examples. */
- unsigned wikipedia_1 = 0x10437;
- unsigned wikipedia_2 = 0x24b62;
+ int32_t wikipedia_1 = 0x10437;
+ int32_t wikipedia_2 = 0x24b62;
/*
An example from the JSON RFC
http://rfc7159.net/rfc7159#rfc.section.7
*/
- unsigned json_spec = 0x1D11E;
+ int32_t json_spec = 0x1D11E;
status = unicode_to_surrogates (nogood, & hi, & lo);
@@ -931,7 +940,11 @@ test_surrogate_pairs ()
rt, json_spec);
}
-static void test_utf8_bytes ()
+/* Test sending various bytes into "utf8_bytes" and seeing whether the
+ return value is what we expected. */
+
+static void
+test_utf8_bytes ()
{
struct tub {
int32_t first;
@@ -955,13 +968,15 @@ static void test_utf8_bytes ()
}
}
+/* Test the conversion from utf-8 to ucs-2 (UTF-16). */
+
static void
test_utf8_to_ucs2 ()
{
- const unsigned char * start = utf8;
+ const uint8_t * start = utf8;
while (*start) {
int32_t unicode;
- const unsigned char * end;
+ const uint8_t * end;
unicode = utf8_to_ucs2 (start, & end);
TAP_TEST_MSG (unicode > 0, "no bad value at %s", start);
printf ("# %s is %04X, length is %d\n", start, unicode, end - start);
@@ -969,6 +984,8 @@ test_utf8_to_ucs2 ()
}
}
+/* Test counting of unicode characters. */
+
static void
test_unicode_count_chars ()
{
@@ -991,12 +1008,12 @@ static void
test_trim_to_utf8_start ()
{
int32_t status;
- unsigned char * p;
+ uint8_t * p;
/* Invalid UTF-8. */
- unsigned char bad[] = {0x99, 0x99, 0x99, 0x99, 0x99, 0x99};
+ uint8_t bad[] = {0x99, 0x99, 0x99, 0x99, 0x99, 0x99};
/* Valid UTF-8. */
- unsigned char good[] = "化苦";
- unsigned char good2[] = "化abc";
+ uint8_t good[] = "化苦";
+ uint8_t good2[] = "化abc";
p = bad;
status = trim_to_utf8_start (& p);
TAP_TEST_MSG (status == UTF8_BAD_CONTINUATION_BYTE,
diff --git a/unicode.h b/unicode.h
index 17abb81..0f1efcc 100644
--- a/unicode.h
+++ b/unicode.h
@@ -27,36 +27,36 @@ This file was generated by the following command:
extern const uint8_t utf8_sequence_len[];
#line 103 "unicode.c"
-int32_t utf8_bytes (unsigned char c);
+int32_t utf8_bytes (uint8_t c);
#line 124 "unicode.c"
-int32_t utf8_no_checks (const unsigned char* input, const unsigned char** end_ptr);
+int32_t utf8_no_checks (const uint8_t* input, const uint8_t** end_ptr);
-#line 159 "unicode.c"
-int32_t utf8_to_ucs2 (const unsigned char* input, const unsigned char** end_ptr);
+#line 160 "unicode.c"
+int32_t utf8_to_ucs2 (const uint8_t* input, const uint8_t** end_ptr);
-#line 248 "unicode.c"
-int32_t ucs2_to_utf8 (int32_t ucs2, unsigned char* utf8);
+#line 250 "unicode.c"
+int32_t ucs2_to_utf8 (int32_t ucs2, uint8_t* utf8);
-#line 293 "unicode.c"
-int32_t unicode_to_surrogates (unsigned unicode, int32_t* hi_ptr, int32_t* lo_ptr);
+#line 295 "unicode.c"
+int32_t unicode_to_surrogates (int32_t unicode, int32_t* hi_ptr, int32_t* lo_ptr);
-#line 312 "unicode.c"
+#line 314 "unicode.c"
int32_t surrogates_to_unicode (int32_t hi, int32_t lo);
-#line 334 "unicode.c"
-int32_t surrogate_to_utf8 (int32_t hi, int32_t lo, unsigned char* utf8);
+#line 337 "unicode.c"
+int32_t surrogate_to_utf8 (int32_t hi, int32_t lo, uint8_t* utf8);
-#line 347 "unicode.c"
-int32_t unicode_chars_to_bytes (const unsigned char* utf8, int32_t n_chars);
+#line 350 "unicode.c"
+int32_t unicode_chars_to_bytes (const uint8_t* utf8, int32_t n_chars);
-#line 366 "unicode.c"
-int32_t unicode_count_chars_fast (const unsigned char* utf8);
+#line 370 "unicode.c"
+int32_t unicode_count_chars_fast (const uint8_t* utf8);
-#line 387 "unicode.c"
-int32_t unicode_count_chars (const unsigned char* utf8);
+#line 392 "unicode.c"
+int32_t unicode_count_chars (const uint8_t* utf8);
-#line 410 "unicode.c"
+#line 415 "unicode.c"
#define BYTE_80_8F \
0x80: case 0x81: case 0x82: case 0x83: case 0x84: case 0x85: case 0x86: \
case 0x87: case 0x88: case 0x89: case 0x8A: case 0x8B: case 0x8C: case 0x8D: \
@@ -106,23 +106,13 @@ int32_t unicode_count_chars (const unsigned char* utf8);
#define BYTE_F1_F3 \
0xF1: case 0xF2: case 0xF3
-#line 474 "unicode.c"
-int32_t valid_utf8 (const unsigned char* input, int32_t input_length);
+#line 479 "unicode.c"
+int32_t valid_utf8 (const uint8_t* input, int32_t input_length);
-#line 611 "unicode.c"
-int32_t trim_to_utf8_start (unsigned char** ptr);
+#line 616 "unicode.c"
+int32_t trim_to_utf8_start (uint8_t** ptr);
-#line 632 "unicode.c"
-const char* code_to_error (int32_t code);
-
-#line 668 "unicode.c"
-
-#ifdef TEST
-void print_bytes (const unsigned char* bytes);
-
-#line 679 "unicode.c"
-void test_ucs2_to_utf8 ();
-
-#endif /* def TEST */
+#line 639 "unicode.c"
+const char* unicode_code_to_error (int32_t code);
#endif /* CFH_UNICODE_H */