summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorgregor herrmann <gregoa@debian.org>2016-07-09 16:01:18 +0200
committergregor herrmann <gregoa@debian.org>2016-07-09 16:01:18 +0200
commitb0687d59a1198d64a2314454bfd7b3a0af2b8712 (patch)
tree4f6db963e7e6b44cfbd60ee1437d9fd8b3f16ab6
parent9241879ab494e960ab8a83fee763c2d69ab11e27 (diff)
Imported Upstream version 0.76
-rw-r--r--Build.PL140
-rw-r--r--Changes33
-rw-r--r--MANIFEST2
-rw-r--r--META.json32
-rw-r--r--META.yml30
-rw-r--r--btool_faq.pod8
-rw-r--r--btparse/pccts/antlr.h2
-rw-r--r--btparse/pccts/err.h22
-rw-r--r--btparse/src/bibtex.c25
-rw-r--r--btparse/src/format_name.c1
-rw-r--r--btparse/src/input.c2
-rw-r--r--btparse/src/prototypes.h3
-rw-r--r--btparse/src/sym.c2
-rw-r--r--btparse/src/util.c2182
-rw-r--r--btparse/tests/macro_test.c1
-rw-r--r--btparse/tests/name_test.c1
-rw-r--r--btparse/tests/namebug.c6
-rw-r--r--btparse/tests/postprocess_test.c1
-rw-r--r--btparse/tests/purify_test.c3
-rw-r--r--btparse/tests/tex_test.c4
-rwxr-xr-xexamples/append_entries6
-rw-r--r--inc/MyBuilder.pm8
-rw-r--r--lib/Text/BibTeX.pm84
-rw-r--r--lib/Text/BibTeX/Bib.pm32
-rw-r--r--lib/Text/BibTeX/BibFormat.pm4
-rw-r--r--lib/Text/BibTeX/BibSort.pm6
-rw-r--r--lib/Text/BibTeX/Entry.pm115
-rw-r--r--lib/Text/BibTeX/File.pm111
-rw-r--r--lib/Text/BibTeX/Name.pm70
-rw-r--r--lib/Text/BibTeX/NameFormat.pm12
-rw-r--r--lib/Text/BibTeX/Structure.pm14
-rw-r--r--lib/Text/BibTeX/Value.pm22
-rwxr-xr-xscripts/btcheck4
-rwxr-xr-xscripts/btformat4
-rwxr-xr-xscripts/btsort4
-rw-r--r--t/bib.t11
-rw-r--r--t/common.pl16
-rw-r--r--t/corpora.bib264
-rw-r--r--t/from_file.t35
-rw-r--r--t/macro.t32
-rw-r--r--t/modify.t2
-rw-r--r--t/nameformat.t40
-rw-r--r--t/names.t40
-rw-r--r--t/output.t39
-rw-r--r--t/parse.t4
-rw-r--r--t/parse_f.t12
-rw-r--r--t/parse_s.t12
-rw-r--r--t/split_names4
-rw-r--r--typemap3
-rw-r--r--xscode/BibTeX.xs2
-rw-r--r--xscode/btxs_support.c25
51 files changed, 2058 insertions, 1479 deletions
diff --git a/Build.PL b/Build.PL
index 6dbb07f..7d805d9 100644
--- a/Build.PL
+++ b/Build.PL
@@ -7,94 +7,98 @@ use Config;
use File::Spec::Functions qw.catdir catfile.;
use File::Copy;
-my $version = get_version();
+my $version = get_version();
-my $builder = MyBuilder->new
- (
- module_name => 'Text::BibTeX',
- license => 'perl',
- dist_author => ['Alberto Simões <ambs@cpan.org>',
- 'Greg Ward <gward@python.net>'],
- needs_compiler => 1,
- meta_merge => {
- resources => {
- repository => 'http://github.com/ambs/Text-BibTeX',
- },
+my $builder = MyBuilder->new(
+ module_name => 'Text::BibTeX',
+ license => 'perl',
+ dist_author =>
+ [ 'Alberto Simões <ambs@cpan.org>', 'Greg Ward <gward@python.net>' ],
+ needs_compiler => 1,
+ meta_merge => {
+ resources => { repository => 'http://github.com/ambs/Text-BibTeX', },
+ },
+ configure_requires => {
+ 'Module::Build' => '0.36',
+ 'Config::AutoConf' => '0.16',
+ 'ExtUtils::LibBuilder' => '0.02',
},
- configure_requires => {
- 'Module::Build' => '0.36',
- 'Config::AutoConf' => '0.16',
- 'ExtUtils::LibBuilder' => '0.02',
- },
- requires => {
- 'Scalar::Util' => '1.42',
- },
- build_requires => {
- 'File::Copy' => '0',
- 'Config::AutoConf' => '0.16',
- 'ExtUtils::LibBuilder' => '0.02',
- 'Capture::Tiny' => '0.06',
- 'ExtUtils::CBuilder' => '0.27',
- 'Module::Build' => '0.3603',
- },
- add_to_cleanup => [
- 'Text-BibTeX-*',
-# NOT SURE YET 'btparse/src/bt_config.h',
- 'btparse/src/*.so',
- 'btparse/src/*.dylib',
- 'btparse/src/*.dll',
- 'btparse/src/*.o',
- 'xscode/*.o',
- 'btparse/tests/*.o',
- 'btparse/progs/*.o',
- 'btparse/progs/dumpnames',
- 'btparse/progs/bibparse',
- 'btparse/progs/biblex',
- 'btparse/tests/postprocess_test',
- 'btparse/tests/read_test',
- 'btparse/tests/simple_test',
- 'btparse/tests/macro_test',
- 'btparse/tests/case_test',
- 'btparse/tests/name_test',
- 'btparse/tests/purify_test',
- ],
- );
+ requires => {
+ 'Scalar::Util' => '1.42',
+ 'Unicode::Normalize' => '0',
+ 'Encode' => '0',
+ },
+ build_requires => {
+ 'File::Copy' => '0',
+ 'Config::AutoConf' => '0.16',
+ 'ExtUtils::LibBuilder' => '0.02',
+ 'Capture::Tiny' => '0.06',
+ 'ExtUtils::CBuilder' => '0.27',
+ 'Module::Build' => '0.3603',
+ },
+ add_to_cleanup => [
+ 'Text-BibTeX-*',
+
+ # NOT SURE YET 'btparse/src/bt_config.h',
+ 'btparse/src/*.so',
+ 'btparse/src/*.dylib',
+ 'btparse/src/*.dll',
+ 'btparse/src/*.o',
+ 'xscode/*.o',
+ 'btparse/tests/*.o',
+ 'btparse/progs/*.o',
+ 'btparse/progs/dumpnames',
+ 'btparse/progs/bibparse',
+ 'btparse/progs/biblex',
+ 'btparse/tests/postprocess_test',
+ 'btparse/tests/read_test',
+ 'btparse/tests/simple_test',
+ 'btparse/tests/macro_test',
+ 'btparse/tests/case_test',
+ 'btparse/tests/name_test',
+ 'btparse/tests/purify_test',
+ ],
+);
## HACK HACK HACK HACK
my $libdir = $builder->install_destination("bin");
-if ($^O =~ /mswin32/i) {
+if ( $^O =~ /mswin32/i ) {
$libdir = undef;
+
# Find a place where we can write.
- my @folders = split /;/, $ENV{PATH};
+ my @folders = split /;/, $ENV{PATH};
my $installed = 0;
- my $target = "text-bibtex.$$";
- while(@folders && !$installed) {
- $libdir = shift @folders;
+ my $target = "text-bibtex.$$";
+ while ( @folders && !$installed ) {
+ $libdir = shift @folders;
- copy("MANIFEST", catfile($libdir,$target));
- $installed = 1 if -f catfile($libdir, $target);
+ copy( "MANIFEST", catfile( $libdir, $target ) );
+ $installed = 1 if -f catfile( $libdir, $target );
}
- if (!$installed) {
+ if ( !$installed ) {
warn("Wasn't able to find a suitable place for libbtparse.dll!");
- } else {
+ }
+ else {
print STDERR "libbtparse.dll will be installed in $libdir\n";
- unlink catfile($libdir, $target);
+ unlink catfile( $libdir, $target );
}
-} else {
- if ($Config{archname} =~ /^x86_64|^ppc64|^s390x|^aarch64/) {
+}
+else {
+ if ( $Config{archname} =~ /^x86_64|^ppc64|^s390x|^aarch64/ ) {
$libdir =~ s/\bbin\b/lib64/;
- if (!-d $libdir) {
+ if ( !-d $libdir ) {
my $test = $libdir;
$test =~ s/lib64/lib/;
$libdir = $test if -d $test;
}
- } else {
+ }
+ else {
$libdir =~ s/\bbin\b/lib/;
}
}
-$builder->notes('btparse_version' => $version);
-$builder->notes('lib_path' => $libdir);
+$builder->notes( 'btparse_version' => $version );
+$builder->notes( 'lib_path' => $libdir );
$builder->add_build_element('usrlib');
$builder->install_path( 'usrlib' => $libdir );
@@ -102,7 +106,8 @@ $builder->create_build_script;
sub get_version {
my $version = undef;
- open PM, "lib/Text/BibTeX.pm" or die "Cannot open 'lib/Text/BibTeX.pm' for reading: $!\n";
+ open PM, "lib/Text/BibTeX.pm"
+ or die "Cannot open 'lib/Text/BibTeX.pm' for reading: $!\n";
while (<PM>) {
if (m!^our\s+\$VERSION\s*=\s*'([^']+)'!) {
$version = $1;
@@ -113,4 +118,3 @@ sub get_version {
die "Could not find VERSION on your .pm file. Weirdo!\n" unless $version;
}
-
diff --git a/Changes b/Changes
index a2513aa..758e809 100644
--- a/Changes
+++ b/Changes
@@ -1,5 +1,38 @@
Revision history for Perl module Text::BibTeX
+0.76 2016-07-06
+ * Added 'reset_macros' option to Text::BibTeX::File, in order
+ to remove all defined macros (except months)
+
+0.76_02 2016-07-05
+ * Fix issue with binmode not being copied in Clone method.
+ * Make month abbreviations available always, and not only when
+ using Text::BibTeX::Bib.
+ * Added docs to supported options for Text::BibTeX::Entry.
+
+0.76_01 2016-07-04
+ * Solved nasty bug when using lvalues as parameters (substr).
+ * Added tests.
+
+0.75 2016-07-03
+ * Stable version with bytes/utf-8 support.
+
+0.75_05 2016-07-02
+ * Get 5.8.x back aboard;
+
+0.75_04 2016-07-01
+ * Fixed reference to empty function name;
+
+0.75_03 2016-06-30
+ * Rename split_list to isplit_list, and creaed split_list wrapper;
+ * Added normalization option;
+
+0.75_02 2016-06-25
+ * Minor fix for some perl version parsing problems.
+
+0.75_01 2016-06-24
+ * Added binmode option. Should allow unicode handling directly.
+
0.74 2016-06-15
* Get perl 5.8.x back.
diff --git a/MANIFEST b/MANIFEST
index c8f6e07..4efd2e3 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -151,3 +151,5 @@ btparse/src/bt_config.h.in
README.OLD
META.json
MANIFEST.SKIP
+t/corpora.bib
+t/from_file.t
diff --git a/META.json b/META.json
index 49b6846..af64600 100644
--- a/META.json
+++ b/META.json
@@ -34,62 +34,64 @@
},
"runtime" : {
"requires" : {
- "Scalar::Util" : "1.42"
+ "Encode" : "0",
+ "Scalar::Util" : "1.42",
+ "Unicode::Normalize" : "0"
}
}
},
"provides" : {
"Text::BibTeX" : {
"file" : "lib/Text/BibTeX.pm",
- "version" : "0.74"
+ "version" : "0.76"
},
"Text::BibTeX::BibEntry" : {
"file" : "lib/Text/BibTeX/Bib.pm",
- "version" : "0.74"
+ "version" : "0.76"
},
"Text::BibTeX::BibFormat" : {
"file" : "lib/Text/BibTeX/BibFormat.pm",
- "version" : "0.74"
+ "version" : "0.76"
},
"Text::BibTeX::BibSort" : {
"file" : "lib/Text/BibTeX/BibSort.pm",
- "version" : "0.74"
+ "version" : "0.76"
},
"Text::BibTeX::BibStructure" : {
"file" : "lib/Text/BibTeX/Bib.pm",
- "version" : "0.74"
+ "version" : "0.76"
},
"Text::BibTeX::Entry" : {
"file" : "lib/Text/BibTeX/Entry.pm",
- "version" : "0.74"
+ "version" : "0.76"
},
"Text::BibTeX::File" : {
"file" : "lib/Text/BibTeX/File.pm",
- "version" : "0.74"
+ "version" : "0.76"
},
"Text::BibTeX::Name" : {
"file" : "lib/Text/BibTeX/Name.pm",
- "version" : "0.74"
+ "version" : "0.76"
},
"Text::BibTeX::NameFormat" : {
"file" : "lib/Text/BibTeX/NameFormat.pm",
- "version" : "0.74"
+ "version" : "0.76"
},
"Text::BibTeX::SimpleValue" : {
"file" : "lib/Text/BibTeX/Value.pm",
- "version" : "0.74"
+ "version" : "0.76"
},
"Text::BibTeX::Structure" : {
"file" : "lib/Text/BibTeX/Structure.pm",
- "version" : "0.74"
+ "version" : "0.76"
},
"Text::BibTeX::StructuredEntry" : {
"file" : "lib/Text/BibTeX/Structure.pm",
- "version" : "0.74"
+ "version" : "0.76"
},
"Text::BibTeX::Value" : {
"file" : "lib/Text/BibTeX/Value.pm",
- "version" : "0.74"
+ "version" : "0.76"
}
},
"release_status" : "stable",
@@ -101,6 +103,6 @@
"url" : "http://github.com/ambs/Text-BibTeX"
}
},
- "version" : "0.74",
+ "version" : "0.76",
"x_serialization_backend" : "JSON::PP version 2.27400"
}
diff --git a/META.yml b/META.yml
index 604c709..5b3ceec 100644
--- a/META.yml
+++ b/META.yml
@@ -24,47 +24,49 @@ name: Text-BibTeX
provides:
Text::BibTeX:
file: lib/Text/BibTeX.pm
- version: '0.74'
+ version: '0.76'
Text::BibTeX::BibEntry:
file: lib/Text/BibTeX/Bib.pm
- version: '0.74'
+ version: '0.76'
Text::BibTeX::BibFormat:
file: lib/Text/BibTeX/BibFormat.pm
- version: '0.74'
+ version: '0.76'
Text::BibTeX::BibSort:
file: lib/Text/BibTeX/BibSort.pm
- version: '0.74'
+ version: '0.76'
Text::BibTeX::BibStructure:
file: lib/Text/BibTeX/Bib.pm
- version: '0.74'
+ version: '0.76'
Text::BibTeX::Entry:
file: lib/Text/BibTeX/Entry.pm
- version: '0.74'
+ version: '0.76'
Text::BibTeX::File:
file: lib/Text/BibTeX/File.pm
- version: '0.74'
+ version: '0.76'
Text::BibTeX::Name:
file: lib/Text/BibTeX/Name.pm
- version: '0.74'
+ version: '0.76'
Text::BibTeX::NameFormat:
file: lib/Text/BibTeX/NameFormat.pm
- version: '0.74'
+ version: '0.76'
Text::BibTeX::SimpleValue:
file: lib/Text/BibTeX/Value.pm
- version: '0.74'
+ version: '0.76'
Text::BibTeX::Structure:
file: lib/Text/BibTeX/Structure.pm
- version: '0.74'
+ version: '0.76'
Text::BibTeX::StructuredEntry:
file: lib/Text/BibTeX/Structure.pm
- version: '0.74'
+ version: '0.76'
Text::BibTeX::Value:
file: lib/Text/BibTeX/Value.pm
- version: '0.74'
+ version: '0.76'
requires:
+ Encode: '0'
Scalar::Util: '1.42'
+ Unicode::Normalize: '0'
resources:
license: http://dev.perl.org/licenses/
repository: http://github.com/ambs/Text-BibTeX
-version: '0.74'
+version: '0.76'
x_serialization_backend: 'CPAN::Meta::YAML version 0.018'
diff --git a/btool_faq.pod b/btool_faq.pod
index bf22ebd..4310811 100644
--- a/btool_faq.pod
+++ b/btool_faq.pod
@@ -27,9 +27,9 @@ parse entries from a file, say F<foo.bib> that quite sensibly use the
month macros (C<jan>, C<feb>, etc.) provided by the BibTeX standard
style files:
- $bibfile = new Text::BibTeX::File 'foo.bib' # open file
+ $bibfile = Text::BibTeX::File->new('foo.bib') # open file
or die "foo.bib: $!\n";
- $entry = new Text::BibTeX::Entry $bibfile; # parse first entry
+ $entry = Text::BibTeX::Entry->new($bibfile); # parse first entry
Using this code, you might get an "undefined macro" warning for every
entry parsed from F<foo.bib>. Apart from the superficial annoyance of
@@ -60,7 +60,7 @@ C<Text::BibTeX> that entries from C<$bibfile> are expected to conform to
the C<Bib> structure (which is implemented by the C<Text::BibTeX::Bib>
module, but you don't really need to know that):
- $bibfile = new Text::BibTeX::File 'foo.bib'
+ $bibfile = Text::BibTeX::File->new('foo.bib')
or die "foo.bib: $!\n";
$bibfile->set_structure ('Bib');
@@ -84,7 +84,7 @@ man pages, here's the trick: if you pass just a filename to
C<Text::BibTeX::File>'s C<new> method, then it's treated just like a
filename passed to Perl's builtin C<open>:
- my $append_file = new Text::BibTeX::File ">>$filename"
+ my $append_file = Text::BibTeX::File->new(">>$filename")
or die "couldn't open $filename for appending: $!\n";
opens C<$filename> for appending. If, later on, you have an entry from
diff --git a/btparse/pccts/antlr.h b/btparse/pccts/antlr.h
index bcd9542..d54220b 100644
--- a/btparse/pccts/antlr.h
+++ b/btparse/pccts/antlr.h
@@ -385,7 +385,7 @@ extern void _inf_zzgettok();
#define zzaCur (zzaStack[zzasp])
#define zzaRet (*zzaRetPtr)
#define zzaArg(v,n) zzaStack[v-n]
-#define zzMakeAttr { zzNON_GUESS_MODE {zzOvfChk; --zzasp; zzcr_attr(&(zzaStack[zzasp]),LA(1),LATEXT(1));}}
+#define zzMakeAttr { zzNON_GUESS_MODE {zzOvfChk; --zzasp; zzcr_attr(&(zzaStack[zzasp]),LA(1), (char*)LATEXT(1));}}
#ifdef zzdef0
#define zzMake0 { zzOvfChk; --zzasp; zzdef0(&(zzaStack[zzasp]));}
#else
diff --git a/btparse/pccts/err.h b/btparse/pccts/err.h
index e3107d9..6cec4cb 100644
--- a/btparse/pccts/err.h
+++ b/btparse/pccts/err.h
@@ -164,7 +164,12 @@ va_dcl
int freeSpace = (ZZLEXBUFSIZE+1) - strlen(text);
#endif
if ( i>1 ) strcat(text, " ");
- strncat(text, LATEXT(i), freeSpace); // strncat(a,b,n) will actually write n+1 bytes because of the terminating NULL, unlike strlcpy (non-standard OpenBSD function) which writes exactly n. this may end up dropping a character, but this is debug output from a failure case, so it doesn't matter much.
+ // strncat(a,b,n) will actually write n+1 bytes
+ // because of the terminating NULL, unlike strlcpy
+ // non-standard OpenBSD function) which writes exactly n.
+ // this may end up dropping a character, but this is
+ // debug output from a failure case, so it doesn't matter much.
+ strncat(text, (char*)LATEXT(i), freeSpace);
if ( !zzset_el((unsigned)LA(i), f[i-1]) ) break;
}
miss_set = va_arg(ap, SetWordType **);
@@ -179,9 +184,9 @@ va_dcl
* (The old LL sub 1 (k) versus LL(k) parsing technique)
*/
*miss_set = NULL;
- *miss_text = zzlextext;
+ *miss_text = (char*)zzlextext; // hide warning! [ambs]
*bad_tok = LA(1);
- *bad_text = LATEXT(1);
+ *bad_text = (char*)LATEXT(1); // hide warning! [ambs]
*err_k = k;
return;
}
@@ -189,7 +194,7 @@ va_dcl
*miss_set = f[i-1];
*miss_text = text;
*bad_tok = LA(i);
- *bad_text = LATEXT(i);
+ *bad_text = (char*) LATEXT(i); // hide warning! [ambs]
if ( i==1 ) *err_k = 1;
else *err_k = k;
}
@@ -228,7 +233,7 @@ zzantlr_state *buf;
buf->labase = zzlabase;
#else
buf->token = zztoken;
- strcpy(buf->text, zzlextext);
+ strcpy(buf->text, (char*) zzlextext); // hide warning! [ambs]
#endif
}
@@ -266,7 +271,7 @@ zzantlr_state *buf;
zzlabase = buf->labase;
#else
zztoken = buf->token;
- strcpy(zzlextext, buf->text);
+ strcpy((char*) zzlextext, buf->text); // Hide warning [ambs]
#endif
}
@@ -482,7 +487,7 @@ SetWordType **zzMissSet;
#endif
{
if ( LA(1)!=_t ) {
- *zzBadText = *zzMissText=LATEXT(1);
+ *zzBadText = *zzMissText= (char*) LATEXT(1); // hide warning! [ambs]
*zzMissTok= _t; *zzBadTok=LA(1);
*zzMissSet=NULL;
return 0;
@@ -656,7 +661,8 @@ SetWordType **zzMissSet;
#endif
#endif
if ( !zzset_el((unsigned)LA(1), e) ) {
- *zzBadText = LATEXT(1); *zzMissText=NULL;
+ *zzBadText = (char*)LATEXT(1); // hide warning [ambs]
+ *zzMissText=NULL;
*zzMissTok= 0; *zzBadTok=LA(1);
*zzMissSet=e;
return 0;
diff --git a/btparse/src/bibtex.c b/btparse/src/bibtex.c
index 78e3405..9ca2ee4 100644
--- a/btparse/src/bibtex.c
+++ b/btparse/src/bibtex.c
@@ -18,6 +18,7 @@
#include "lex_auxiliary.h"
#include "error.h"
#include "my_dmalloc.h"
+#include "parse_auxiliary.h"
extern char * InputFilename; /* for zzcr_ast call in pccts/ast.c */
#define GENAST
@@ -54,7 +55,7 @@ AST **_root;
zzBLOCK(zztasp2);
zzMake0;
{
- while ( (LA(1)==AT) ) {
+ while ( LA(1)==AT ) {
_ast = NULL; entry(&_ast);
/* a little creative forestry... */
if ((*_root) == NULL)
@@ -120,7 +121,7 @@ AST **_root;
zzBLOCK(zztasp1);
zzMake0;
{
- if ( (LA(1)==STRING) ) {
+ if ( LA(1)==STRING) {
if (!(metatype == BTE_COMMENT )) {zzfailed_pred(" metatype == BTE_COMMENT ");}
zzmatch(STRING); zzsubchild(_root, &_sibling, &_tail);
zzastArg(1)->nodetype = BTAST_STRING;
@@ -128,7 +129,7 @@ AST **_root;
}
else {
- if ( (LA(1)==ENTRY_OPEN) ) {
+ if ( LA(1)==ENTRY_OPEN) {
zzmatch(ENTRY_OPEN); zzCONSUME;
contents(zzSTR, metatype ); zzlink(_root, &_sibling, &_tail);
zzmatch(ENTRY_CLOSE); zzCONSUME;
@@ -163,11 +164,11 @@ AST **_root;
zzBLOCK(zztasp2);
zzMake0;
{
- if ( (LA(1)==NAME) ) {
+ if ( LA(1)==NAME ) {
zzmatch(NAME); zzsubchild(_root, &_sibling, &_tail); zzCONSUME;
}
else {
- if ( (LA(1)==NUMBER) ) {
+ if ( LA(1)==NUMBER) {
zzmatch(NUMBER); zzsubchild(_root, &_sibling, &_tail); zzCONSUME;
}
else {zzFAIL(1,zzerr2,&zzMissSet,&zzMissText,&zzBadTok,&zzBadText,&zzErrk); goto fail;}
@@ -213,13 +214,13 @@ AST **_root;
zzBLOCK(zztasp1);
zzMake0;
{
- if ( (LA(1)==NAME) ) {
+ if ( LA(1)==NAME) {
field(zzSTR); zzlink(_root, &_sibling, &_tail);
{
zzBLOCK(zztasp2);
zzMake0;
{
- if ( (LA(1)==COMMA) ) {
+ if ( LA(1)==COMMA) {
zzmatch(COMMA); zzCONSUME;
fields(zzSTR); zzlink(_root, &_sibling, &_tail);
}
@@ -228,7 +229,7 @@ AST **_root;
}
}
else {
- if ( (LA(1)==ENTRY_CLOSE) ) {
+ if ( LA(1)==ENTRY_CLOSE) {
}
else {zzFAIL(1,zzerr4,&zzMissSet,&zzMissText,&zzBadTok,&zzBadText,&zzErrk); goto fail;}
}
@@ -291,7 +292,7 @@ AST **_root;
zzBLOCK(zztasp2);
zzMake0;
{
- while ( (LA(1)==HASH) ) {
+ while ( LA(1)==HASH) {
zzmatch(HASH); zzCONSUME;
simple_value(zzSTR); zzlink(_root, &_sibling, &_tail);
zzLOOP(zztasp2);
@@ -320,19 +321,19 @@ AST **_root;
zzBLOCK(zztasp1);
zzMake0;
{
- if ( (LA(1)==STRING) ) {
+ if ( LA(1)==STRING) {
zzmatch(STRING); zzsubchild(_root, &_sibling, &_tail);
zzastArg(1)->nodetype = BTAST_STRING;
zzCONSUME;
}
else {
- if ( (LA(1)==NUMBER) ) {
+ if ( LA(1)==NUMBER) {
zzmatch(NUMBER); zzsubchild(_root, &_sibling, &_tail);
zzastArg(1)->nodetype = BTAST_NUMBER;
zzCONSUME;
}
else {
- if ( (LA(1)==NAME) ) {
+ if ( LA(1)==NAME) {
zzmatch(NAME); zzsubchild(_root, &_sibling, &_tail);
zzastArg(1)->nodetype = BTAST_MACRO;
zzCONSUME;
diff --git a/btparse/src/format_name.c b/btparse/src/format_name.c
index 1fefc4e..3e8a891 100644
--- a/btparse/src/format_name.c
+++ b/btparse/src/format_name.c
@@ -21,6 +21,7 @@
#include <string.h>
#include <assert.h>
#include "btparse.h"
+#include "prototypes.h"
#include "error.h"
#include "my_dmalloc.h"
#include "bt_debug.h"
diff --git a/btparse/src/input.c b/btparse/src/input.c
index dace5a9..f3d9df7 100644
--- a/btparse/src/input.c
+++ b/btparse/src/input.c
@@ -126,7 +126,7 @@ start_parse (FILE *infile, char *instring, int line)
}
else
{
- zzrdstr (instring);
+ zzrdstr ((unsigned char*)instring);
zzline = line;
}
diff --git a/btparse/src/prototypes.h b/btparse/src/prototypes.h
index 9775e66..a902f2a 100644
--- a/btparse/src/prototypes.h
+++ b/btparse/src/prototypes.h
@@ -30,6 +30,8 @@
#include "btparse.h" /* for types */
/* util.c */
+int get_uchar(char *string, int offset);
+int isulower(char *string);
#if !HAVE_STRLWR
char *strlwr (char *s);
#endif
@@ -37,6 +39,7 @@ char *strlwr (char *s);
char *strupr (char *s);
#endif
+
/* macros.c */
void init_macros (void);
void done_macros (void);
diff --git a/btparse/src/sym.c b/btparse/src/sym.c
index 4bdfa34..ef41e13 100644
--- a/btparse/src/sym.c
+++ b/btparse/src/sym.c
@@ -292,7 +292,7 @@ zzs_stat(void)
if ( q != NULL && low==0 ) low = p-table;
len = 0;
- if ( q != NULL ) printf("[%d]", p-table);
+ if ( q != NULL ) printf("[%ld]", p-table);
while ( q != NULL )
{
len++;
diff --git a/btparse/src/util.c b/btparse/src/util.c
index a8ba809..e568e76 100644
--- a/btparse/src/util.c
+++ b/btparse/src/util.c
@@ -1,22 +1,17 @@
-/* ------------------------------------------------------------------------
-@NAME : util.c
-@INPUT :
-@OUTPUT :
-@RETURNS :
-@DESCRIPTION: Miscellaneous utility functions. So far, just:
- strlwr
- strupr
-@CREATED : Summer 1996, Greg Ward
-@MODIFIED :
-@VERSION : $Id$
-@COPYRIGHT : Copyright (c) 1996-99 by Gregory P. Ward. All rights reserved.
-
- This file is part of the btparse library. This library is
- free software; you can redistribute it and/or modify it under
- the terms of the GNU Library General Public License as
- published by the Free Software Foundation; either version 2
- of the License, or (at your option) any later version.
--------------------------------------------------------------------------- */
+/*
+ * ------------------------------------------------------------------------
+ * @NAME : util.c @INPUT : @OUTPUT : @RETURNS :
+ * @DESCRIPTION: Miscellaneous utility functions. So far, just: strlwr
+ * strupr @CREATED : Summer 1996, Greg Ward @MODIFIED : @VERSION :
+ * $Id$ @COPYRIGHT : Copyright (c) 1996-99 by Gregory P. Ward. All rights
+ * reserved.
+ *
+ * This file is part of the btparse library. This library is free software; you
+ * can redistribute it and/or modify it under the terms of the GNU Library
+ * General Public License as published by the Free Software Foundation;
+ * either version 2 of the License, or (at your option) any later version.
+ * --------------------------------------------------------------------------
+ */
#include "bt_config.h"
#include <string.h>
@@ -24,1201 +19,1162 @@
#include "prototypes.h"
#include "my_dmalloc.h"
-/* ------------------------------------------------------------------------
-@NAME : strlwr()
-@INPUT :
-@OUTPUT :
-@RETURNS :
-@DESCRIPTION: Converts a string to lowercase in place.
-@GLOBALS :
-@CALLS :
-@CREATED : 1996/01/06, GPW
-@MODIFIED :
-@COMMENTS : This should work the same as strlwr() in DOS compilers --
- why this isn't mandated by ANSI is a mystery to me...
--------------------------------------------------------------------------- */
+/*
+ * ------------------------------------------------------------------------
+ * @NAME : strlwr() @INPUT : @OUTPUT : @RETURNS :
+ * @DESCRIPTION: Converts a string to lowercase in place. @GLOBALS :
+ * @CALLS : @CREATED : 1996/01/06, GPW @MODIFIED : @COMMENTS :
+ * This should work the same as strlwr() in DOS compilers -- why this isn't
+ * mandated by ANSI is a mystery to me...
+ * --------------------------------------------------------------------------
+ */
#if !HAVE_STRLWR
-char *strlwr (char *s)
+char *
+strlwr(char *s)
{
- int len, i;
+ int len , i;
- len = strlen (s);
- for (i = 0; i < len; i++)
- s[i] = tolower (s[i]);
+ len = strlen(s);
+ for (i = 0; i < len; i++)
+ s[i] = tolower(s[i]);
- return s;
+ return s;
}
#endif
-/* ------------------------------------------------------------------------
-@NAME : strupr()
-@INPUT :
-@OUTPUT :
-@RETURNS :
-@DESCRIPTION: Converts a string to uppercase in place.
-@GLOBALS :
-@CALLS :
-@CREATED : 1996/01/06, GPW
-@MODIFIED :
-@COMMENTS : This should work the same as strupr() in DOS compilers --
- why this isn't mandated by ANSI is a mystery to me...
--------------------------------------------------------------------------- */
+/*
+ * ------------------------------------------------------------------------
+ * @NAME : strupr() @INPUT : @OUTPUT : @RETURNS :
+ * @DESCRIPTION: Converts a string to uppercase in place. @GLOBALS :
+ * @CALLS : @CREATED : 1996/01/06, GPW @MODIFIED : @COMMENTS :
+ * This should work the same as strupr() in DOS compilers -- why this isn't
+ * mandated by ANSI is a mystery to me...
+ * --------------------------------------------------------------------------
+ */
#if !HAVE_STRUPR
-char *strupr (char *s)
+char *
+strupr(char *s)
{
- int len, i;
+ int len , i;
- len = strlen (s);
- for (i = 0; i < len; i++)
- s[i] = toupper (s[i]);
+ len = strlen(s);
+ for (i = 0; i < len; i++)
+ s[i] = toupper(s[i]);
- return s;
+ return s;
}
#endif
-/* ------------------------------------------------------------------------
-@NAME : get_uchar()
-@INPUT : string
- offset in string
-@OUTPUT : number of bytes required to gobble the next unicode character, including any combining marks
-@RETURNS :
-@DESCRIPTION: In order to deal with unicode chars when calculating abbreviations,
- we need to know how many bytes the next character is.
-@CALLS :
-@CALLERS : count_virtual_char()
-@CREATED : 2010/03/14, PK
-@MODIFIED :
--------------------------------------------------------------------------- */
+/*
+ * ------------------------------------------------------------------------
+ * @NAME : get_uchar() @INPUT : string offset in string @OUTPUT
+ * : number of bytes required to gobble the next unicode character, including
+ * any combining marks @RETURNS : @DESCRIPTION: In order to deal with
+ * unicode chars when calculating abbreviations, we need to know how many
+ * bytes the next character is. @CALLS : @CALLERS :
+ * count_virtual_char() @CREATED : 2010/03/14, PK @MODIFIED :
+ * --------------------------------------------------------------------------
+ */
int
-get_uchar(char * string, int offset)
+get_uchar(char *string, int offset)
{
- unsigned char * bytes = (unsigned char *)string;
- int init;
- unsigned int c = 0; // Without unsigned, for some reason Solaris coredumps
+ unsigned char *bytes = (unsigned char *)string;
+ int init;
+ unsigned int c = 0;
+ //Without unsigned, for some reason Solaris coredumps
- if(!string)
- return 0;
+ if (!string)
+ return 0;
- if ( (// ASCII
- bytes[offset] == 0x09 ||
- bytes[offset] == 0x0A ||
- bytes[offset] == 0x0D ||
- (0x20 <= bytes[offset] && bytes[offset] <= 0x7E)
- )
- )
- {
- init = 1;
+ if ((//ASCII
+ bytes[offset] == 0x09 ||
+ bytes[offset] == 0x0A ||
+ bytes[offset] == 0x0D ||
+ (0x20 <= bytes[offset] && bytes[offset] <= 0x7E)
+ )
+ ) {
+ init = 1;
}
-
- if( (// non-overlong 2-byte
- (0xC2 <= bytes[offset] && bytes[offset] <= 0xDF) &&
- (0x80 <= bytes[offset+1] && bytes[offset+1] <= 0xBF)
- )
- )
- {
- init = 2;
+ if ((//non - overlong 2 - byte
+ (0xC2 <= bytes[offset] && bytes[offset] <= 0xDF) &&
+ (0x80 <= bytes[offset + 1] && bytes[offset + 1] <= 0xBF)
+ )
+ ) {
+ init = 2;
}
-
- if( (// excluding overlongs
- bytes[offset] == 0xE0 &&
- (0xA0 <= bytes[offset+1] && bytes[offset+1] <= 0xBF) &&
- (0x80 <= bytes[offset+2] && bytes[offset+2] <= 0xBF)
- ) ||
- (// straight 3-byte
- ((0xE1 <= bytes[offset] && bytes[offset] <= 0xEC) ||
- bytes[offset] == 0xEE ||
- bytes[offset] == 0xEF) &&
- (0x80 <= bytes[offset+1] && bytes[offset+1] <= 0xBF) &&
- (0x80 <= bytes[offset+2] && bytes[offset+2] <= 0xBF)
- ) ||
- (// excluding surrogates
- bytes[offset] == 0xED &&
- (0x80 <= bytes[offset+1] && bytes[offset+1] <= 0x9F) &&
- (0x80 <= bytes[offset+2] && bytes[offset+2] <= 0xBF)
- )
- )
- {
- init = 3;
+ if ((//excluding overlongs
+ bytes[offset] == 0xE0 &&
+ (0xA0 <= bytes[offset + 1] && bytes[offset + 1] <= 0xBF) &&
+ (0x80 <= bytes[offset + 2] && bytes[offset + 2] <= 0xBF)
+ ) ||
+ (//straight 3 - byte
+ ((0xE1 <= bytes[offset] && bytes[offset] <= 0xEC) ||
+ bytes[offset] == 0xEE ||
+ bytes[offset] == 0xEF) &&
+ (0x80 <= bytes[offset + 1] && bytes[offset + 1] <= 0xBF) &&
+ (0x80 <= bytes[offset + 2] && bytes[offset + 2] <= 0xBF)
+ ) ||
+ (//excluding surrogates
+ bytes[offset] == 0xED &&
+ (0x80 <= bytes[offset + 1] && bytes[offset + 1] <= 0x9F) &&
+ (0x80 <= bytes[offset + 2] && bytes[offset + 2] <= 0xBF)
+ )
+ ) {
+ init = 3;
}
-
- if( (// planes 1-3
- bytes[offset] == 0xF0 &&
- (0x90 <= bytes[offset+1] && bytes[offset+1] <= 0xBF) &&
- (0x80 <= bytes[offset+2] && bytes[offset+2] <= 0xBF) &&
- (0x80 <= bytes[offset+3] && bytes[offset+3] <= 0xBF)
- ) ||
- (// planes 4-15
- (0xF1 <= bytes[offset] && bytes[offset] <= 0xF3) &&
- (0x80 <= bytes[offset+1] && bytes[offset+1] <= 0xBF) &&
- (0x80 <= bytes[offset+2] && bytes[offset+2] <= 0xBF) &&
- (0x80 <= bytes[offset+3] && bytes[offset+3] <= 0xBF)
- ) ||
- (// plane 16
- bytes[offset] == 0xF4 &&
- (0x80 <= bytes[offset+1] && bytes[offset+1] <= 0x8F) &&
- (0x80 <= bytes[offset+2] && bytes[offset+2] <= 0xBF) &&
- (0x80 <= bytes[offset+3] && bytes[offset+3] <= 0xBF)
- )
- )
- {
- init = 4;
+ if ((//planes 1 - 3
+ bytes[offset] == 0xF0 &&
+ (0x90 <= bytes[offset + 1] && bytes[offset + 1] <= 0xBF) &&
+ (0x80 <= bytes[offset + 2] && bytes[offset + 2] <= 0xBF) &&
+ (0x80 <= bytes[offset + 3] && bytes[offset + 3] <= 0xBF)
+ ) ||
+ (//planes 4 - 15
+ (0xF1 <= bytes[offset] && bytes[offset] <= 0xF3) &&
+ (0x80 <= bytes[offset + 1] && bytes[offset + 1] <= 0xBF) &&
+ (0x80 <= bytes[offset + 2] && bytes[offset + 2] <= 0xBF) &&
+ (0x80 <= bytes[offset + 3] && bytes[offset + 3] <= 0xBF)
+ ) ||
+ (//plane 16
+ bytes[offset] == 0xF4 &&
+ (0x80 <= bytes[offset + 1] && bytes[offset + 1] <= 0x8F) &&
+ (0x80 <= bytes[offset + 2] && bytes[offset + 2] <= 0xBF) &&
+ (0x80 <= bytes[offset + 3] && bytes[offset + 3] <= 0xBF)
+ )
+ ) {
+ init = 4;
}
-
- /* Now check for combining marks which are separate even in NFC */
- while (bytes[offset+init+c]) {
- /* 0300–036F - Combining Diacritical Marks */
- if ( bytes[offset+init+c] == 0xCC &&
- (0x80 <= bytes[offset+init+1+c] && bytes[offset+init+1+c] <= 0xAF)
- )
- {
- c = c + 2; /* Skip to next possible combining mark */
- }
- /* 1DC0–1DFF - Combining Diacritical Marks Supplement */
- else if ( bytes[offset+init+c] == 0xE1 &&
- bytes[offset+init+1+c] == 0xB7 &&
- (0x80 <= bytes[offset+init+2+c] && bytes[offset+init+2+c] <= 0xBF)
- )
- {
- c = c + 3; /* Skip to next possible combining mark */
- }
- /* FE20–FE2F - Combining Half Marks */
- else if ( bytes[offset+init+c] == 0xEF &&
- bytes[offset+init+1+c] == 0xB8 &&
- (0xA0 <= bytes[offset+init+2+c] && bytes[offset+init+2+c] <= 0xAF)
- )
- {
- c = c + 3; /* Skip to next possible combining mark */
- }
- else {
- break;
+ /* Now check for combining marks which are separate even in NFC */
+ while (bytes[offset + init + c]) {
+ /* 0300–036F - Combining Diacritical Marks */
+ if (bytes[offset + init + c] == 0xCC &&
+ (0x80 <= bytes[offset + init + 1 + c] && bytes[offset + init + 1 + c] <= 0xAF)
+ ) {
+ c = c + 2; /* Skip to next possible combining
+ * mark */
+ }
+ /* 1DC0–1DFF - Combining Diacritical Marks Supplement */
+ else if (bytes[offset + init + c] == 0xE1 &&
+ bytes[offset + init + 1 + c] == 0xB7 &&
+ (0x80 <= bytes[offset + init + 2 + c] && bytes[offset + init + 2 + c] <= 0xBF)
+ ) {
+ c = c + 3; /* Skip to next possible combining
+ * mark */
+ }
+ /* FE20–FE2F - Combining Half Marks */
+ else if (bytes[offset + init + c] == 0xEF &&
+ bytes[offset + init + 1 + c] == 0xB8 &&
+ (0xA0 <= bytes[offset + init + 2 + c] && bytes[offset + init + 2 + c] <= 0xAF)
+ ) {
+ c = c + 3; /* Skip to next possible combining
+ * mark */
+ } else {
+ break;
+ }
}
- }
- return init+c;
+ return init + c;
}
-/* ------------------------------------------------------------------------
-@NAME : isulower()
-@INPUT : some bytes
-@OUTPUT :
-@RETURNS : boolean 1 or 0
-@DESCRIPTION: Passed some bytes, returns 1 of the first UTF-8 char is lowercase
- The code was autogenerated from a dump of perl's fabulous
- unichars -a '\p{Ll}', massaged into bytes and printed. This list of
- lowercased property glyphs is from Unicode 6.2.0
-@CALLS :
-@CALLERS : find_lc_tokens()
-@CREATED : 2014/02/27, PK
-@MODIFIED :
--------------------------------------------------------------------------- */
+/*
+ * ------------------------------------------------------------------------
+ * @NAME : isulower() @INPUT : some bytes @OUTPUT : @RETURNS
+ * : boolean 1 or 0 @DESCRIPTION: Passed some bytes, returns 1 of the first
+ * UTF-8 char is lowercase The code was autogenerated from a dump of perl's
+ * fabulous unichars -a '\p{Ll}', massaged into bytes and printed. This list
+ * of lowercased property glyphs is from Unicode 6.2.0 @CALLS : @CALLERS
+ * : find_lc_tokens() @CREATED : 2014/02/27, PK @MODIFIED :
+ * --------------------------------------------------------------------------
+ */
int
-isulower(char * string)
+isulower(char *string)
{
- unsigned char * bytes = (unsigned char *)string;
-
- if(!string)
- return 0;
-
- if (
- ( 0x61 <= bytes[0] && bytes[0] <= 0x7A )
- ) { return 1; }
- if (
- (
- bytes[0] == 0xC2 &&
- (
- bytes[1] == 0xB5
- )
-
- ) ||
- (
- bytes[0] == 0xC3 &&
- (
- ( 0x9F <= bytes[1] && bytes[1] <= 0xB6 ) ||
- ( 0xB8 <= bytes[1] && bytes[1] <= 0xBF )
- )
-
- ) ||
- (
- bytes[0] == 0xC4 &&
- (
- bytes[1] == 0x81 ||
- bytes[1] == 0x83 ||
- bytes[1] == 0x85 ||
- bytes[1] == 0x87 ||
- bytes[1] == 0x89 ||
- bytes[1] == 0x8B ||
- bytes[1] == 0x8D ||
- bytes[1] == 0x8F ||
- bytes[1] == 0x91 ||
- bytes[1] == 0x93 ||
- bytes[1] == 0x95 ||
- bytes[1] == 0x97 ||
- bytes[1] == 0x99 ||
- bytes[1] == 0x9B ||
- bytes[1] == 0x9D ||
- bytes[1] == 0x9F ||
- bytes[1] == 0xA1 ||
- bytes[1] == 0xA3 ||
- bytes[1] == 0xA5 ||
- bytes[1] == 0xA7 ||
- bytes[1] == 0xA9 ||
- bytes[1] == 0xAB ||
- bytes[1] == 0xAD ||
- bytes[1] == 0xAF ||
- bytes[1] == 0xB1 ||
- bytes[1] == 0xB3 ||
- bytes[1] == 0xB5 ||
- ( 0xB7 <= bytes[1] && bytes[1] <= 0xB8 ) ||
- bytes[1] == 0xBA ||
- bytes[1] == 0xBC ||
- bytes[1] == 0xBE
- )
-
- ) ||
- (
- bytes[0] == 0xC5 &&
- (
- bytes[1] == 0x80 ||
- bytes[1] == 0x82 ||
- bytes[1] == 0x84 ||
- bytes[1] == 0x86 ||
- ( 0x88 <= bytes[1] && bytes[1] <= 0x89 ) ||
- bytes[1] == 0x8B ||
- bytes[1] == 0x8D ||
- bytes[1] == 0x8F ||
- bytes[1] == 0x91 ||
- bytes[1] == 0x93 ||
- bytes[1] == 0x95 ||
- bytes[1] == 0x97 ||
- bytes[1] == 0x99 ||
- bytes[1] == 0x9B ||
- bytes[1] == 0x9D ||
- bytes[1] == 0x9F ||
- bytes[1] == 0xA1 ||
- bytes[1] == 0xA3 ||
- bytes[1] == 0xA5 ||
- bytes[1] == 0xA7 ||
- bytes[1] == 0xA9 ||
- bytes[1] == 0xAB ||
- bytes[1] == 0xAD ||
- bytes[1] == 0xAF ||
- bytes[1] == 0xB1 ||
- bytes[1] == 0xB3 ||
- bytes[1] == 0xB5 ||
- bytes[1] == 0xB7 ||
- bytes[1] == 0xBA ||
- bytes[1] == 0xBC ||
- ( 0xBE <= bytes[1] && bytes[1] <= 0xBF )
- )
-
- ) ||
- (
- bytes[0] == 0xC6 &&
- (
- bytes[1] == 0x80 ||
- bytes[1] == 0x83 ||
- bytes[1] == 0x85 ||
- bytes[1] == 0x88 ||
- ( 0x8C <= bytes[1] && bytes[1] <= 0x8D ) ||
- bytes[1] == 0x92 ||
- bytes[1] == 0x95 ||
- ( 0x99 <= bytes[1] && bytes[1] <= 0x9B ) ||
- bytes[1] == 0x9E ||
- bytes[1] == 0xA1 ||
- bytes[1] == 0xA3 ||
- bytes[1] == 0xA5 ||
- bytes[1] == 0xA8 ||
- ( 0xAA <= bytes[1] && bytes[1] <= 0xAB ) ||
- bytes[1] == 0xAD ||
- bytes[1] == 0xB0 ||
- bytes[1] == 0xB4 ||
- bytes[1] == 0xB6 ||
- ( 0xB9 <= bytes[1] && bytes[1] <= 0xBA ) ||
- ( 0xBD <= bytes[1] && bytes[1] <= 0xBF )
- )
-
- ) ||
- (
- bytes[0] == 0xC7 &&
- (
- bytes[1] == 0x86 ||
- bytes[1] == 0x89 ||
- bytes[1] == 0x8C ||
- bytes[1] == 0x8E ||
- bytes[1] == 0x90 ||
- bytes[1] == 0x92 ||
- bytes[1] == 0x94 ||
- bytes[1] == 0x96 ||
- bytes[1] == 0x98 ||
- bytes[1] == 0x9A ||
- ( 0x9C <= bytes[1] && bytes[1] <= 0x9D ) ||
- bytes[1] == 0x9F ||
- bytes[1] == 0xA1 ||
- bytes[1] == 0xA3 ||
- bytes[1] == 0xA5 ||
- bytes[1] == 0xA7 ||
- bytes[1] == 0xA9 ||
- bytes[1] == 0xAB ||
- bytes[1] == 0xAD ||
- ( 0xAF <= bytes[1] && bytes[1] <= 0xB0 ) ||
- bytes[1] == 0xB3 ||
- bytes[1] == 0xB5 ||
- bytes[1] == 0xB9 ||
- bytes[1] == 0xBB ||
- bytes[1] == 0xBD ||
- bytes[1] == 0xBF
- )
-
- ) ||
- (
- bytes[0] == 0xC8 &&
- (
- bytes[1] == 0x81 ||
- bytes[1] == 0x83 ||
- bytes[1] == 0x85 ||
- bytes[1] == 0x87 ||
- bytes[1] == 0x89 ||
- bytes[1] == 0x8B ||
- bytes[1] == 0x8D ||
- bytes[1] == 0x8F ||
- bytes[1] == 0x91 ||
- bytes[1] == 0x93 ||
- bytes[1] == 0x95 ||
- bytes[1] == 0x97 ||
- bytes[1] == 0x99 ||
- bytes[1] == 0x9B ||
- bytes[1] == 0x9D ||
- bytes[1] == 0x9F ||
- bytes[1] == 0xA1 ||
- bytes[1] == 0xA3 ||
- bytes[1] == 0xA5 ||
- bytes[1] == 0xA7 ||
- bytes[1] == 0xA9 ||
- bytes[1] == 0xAB ||
- bytes[1] == 0xAD ||
- bytes[1] == 0xAF ||
- bytes[1] == 0xB1 ||
- ( 0xB3 <= bytes[1] && bytes[1] <= 0xB9 ) ||
- bytes[1] == 0xBC ||
- bytes[1] == 0xBF
- )
-
- ) ||
- (
- bytes[0] == 0xC9 &&
- (
- bytes[1] == 0x80 ||
- bytes[1] == 0x82 ||
- bytes[1] == 0x87 ||
- bytes[1] == 0x89 ||
- bytes[1] == 0x8B ||
- bytes[1] == 0x8D ||
- ( 0x8F <= bytes[1] && bytes[1] <= 0xBF )
- )
+ unsigned char *bytes = (unsigned char *)string;
- ) ||
- (
- bytes[0] == 0xCA &&
- (
- ( 0x80 <= bytes[1] && bytes[1] <= 0x93 ) ||
- ( 0x95 <= bytes[1] && bytes[1] <= 0xAF )
- )
-
- ) ||
- (
- bytes[0] == 0xCD &&
- (
- bytes[1] == 0xB1 ||
- bytes[1] == 0xB3 ||
- bytes[1] == 0xB7 ||
- ( 0xBB <= bytes[1] && bytes[1] <= 0xBD )
- )
+ if (!string)
+ return 0;
- ) ||
- (
- bytes[0] == 0xCE &&
- (
- bytes[1] == 0x90 ||
- ( 0xAC <= bytes[1] && bytes[1] <= 0xBF )
- )
-
- ) ||
- (
- bytes[0] == 0xCF &&
- (
- ( 0x80 <= bytes[1] && bytes[1] <= 0x8E ) ||
- ( 0x90 <= bytes[1] && bytes[1] <= 0x91 ) ||
- ( 0x95 <= bytes[1] && bytes[1] <= 0x97 ) ||
- bytes[1] == 0x99 ||
- bytes[1] == 0x9B ||
- bytes[1] == 0x9D ||
- bytes[1] == 0x9F ||
- bytes[1] == 0xA1 ||
- bytes[1] == 0xA3 ||
- bytes[1] == 0xA5 ||
- bytes[1] == 0xA7 ||
- bytes[1] == 0xA9 ||
- bytes[1] == 0xAB ||
- bytes[1] == 0xAD ||
- ( 0xAF <= bytes[1] && bytes[1] <= 0xB3 ) ||
- bytes[1] == 0xB5 ||
- bytes[1] == 0xB8 ||
- ( 0xBB <= bytes[1] && bytes[1] <= 0xBC )
- )
-
- ) ||
- (
- bytes[0] == 0xD0 &&
- (
- ( 0xB0 <= bytes[1] && bytes[1] <= 0xBF )
- )
-
- ) ||
- (
- bytes[0] == 0xD1 &&
- (
- ( 0x80 <= bytes[1] && bytes[1] <= 0x9F ) ||
- bytes[1] == 0xA1 ||
- bytes[1] == 0xA3 ||
- bytes[1] == 0xA5 ||
- bytes[1] == 0xA7 ||
- bytes[1] == 0xA9 ||
- bytes[1] == 0xAB ||
- bytes[1] == 0xAD ||
- bytes[1] == 0xAF ||
- bytes[1] == 0xB1 ||
- bytes[1] == 0xB3 ||
- bytes[1] == 0xB5 ||
- bytes[1] == 0xB7 ||
- bytes[1] == 0xB9 ||
- bytes[1] == 0xBB ||
- bytes[1] == 0xBD ||
- bytes[1] == 0xBF
- )
-
- ) ||
- (
- bytes[0] == 0xD2 &&
- (
- bytes[1] == 0x81 ||
- bytes[1] == 0x8B ||
- bytes[1] == 0x8D ||
- bytes[1] == 0x8F ||
- bytes[1] == 0x91 ||
- bytes[1] == 0x93 ||
- bytes[1] == 0x95 ||
- bytes[1] == 0x97 ||
- bytes[1] == 0x99 ||
- bytes[1] == 0x9B ||
- bytes[1] == 0x9D ||
- bytes[1] == 0x9F ||
- bytes[1] == 0xA1 ||
- bytes[1] == 0xA3 ||
- bytes[1] == 0xA5 ||
- bytes[1] == 0xA7 ||
- bytes[1] == 0xA9 ||
- bytes[1] == 0xAB ||
- bytes[1] == 0xAD ||
- bytes[1] == 0xAF ||
- bytes[1] == 0xB1 ||
- bytes[1] == 0xB3 ||
- bytes[1] == 0xB5 ||
- bytes[1] == 0xB7 ||
- bytes[1] == 0xB9 ||
- bytes[1] == 0xBB ||
- bytes[1] == 0xBD ||
- bytes[1] == 0xBF
- )
-
- ) ||
- (
- bytes[0] == 0xD3 &&
- (
- bytes[1] == 0x82 ||
- bytes[1] == 0x84 ||
- bytes[1] == 0x86 ||
- bytes[1] == 0x88 ||
- bytes[1] == 0x8A ||
- bytes[1] == 0x8C ||
- ( 0x8E <= bytes[1] && bytes[1] <= 0x8F ) ||
- bytes[1] == 0x91 ||
- bytes[1] == 0x93 ||
- bytes[1] == 0x95 ||
- bytes[1] == 0x97 ||
- bytes[1] == 0x99 ||
- bytes[1] == 0x9B ||
- bytes[1] == 0x9D ||
- bytes[1] == 0x9F ||
- bytes[1] == 0xA1 ||
- bytes[1] == 0xA3 ||
- bytes[1] == 0xA5 ||
- bytes[1] == 0xA7 ||
- bytes[1] == 0xA9 ||
- bytes[1] == 0xAB ||
- bytes[1] == 0xAD ||
- bytes[1] == 0xAF ||
- bytes[1] == 0xB1 ||
- bytes[1] == 0xB3 ||
- bytes[1] == 0xB5 ||
- bytes[1] == 0xB7 ||
- bytes[1] == 0xB9 ||
- bytes[1] == 0xBB ||
- bytes[1] == 0xBD ||
- bytes[1] == 0xBF
- )
-
- ) ||
- (
- bytes[0] == 0xD4 &&
- (
- bytes[1] == 0x81 ||
- bytes[1] == 0x83 ||
- bytes[1] == 0x85 ||
- bytes[1] == 0x87 ||
- bytes[1] == 0x89 ||
- bytes[1] == 0x8B ||
- bytes[1] == 0x8D ||
- bytes[1] == 0x8F ||
- bytes[1] == 0x91 ||
- bytes[1] == 0x93 ||
- bytes[1] == 0x95 ||
- bytes[1] == 0x97 ||
- bytes[1] == 0x99 ||
- bytes[1] == 0x9B ||
- bytes[1] == 0x9D ||
- bytes[1] == 0x9F ||
- bytes[1] == 0xA1 ||
- bytes[1] == 0xA3 ||
- bytes[1] == 0xA5 ||
- bytes[1] == 0xA7
- )
-
- ) ||
- (
- bytes[0] == 0xD5 &&
- (
- ( 0xA1 <= bytes[1] && bytes[1] <= 0xBF )
- )
-
- ) ||
- (
- bytes[0] == 0xD6 &&
- (
- ( 0x80 <= bytes[1] && bytes[1] <= 0x87 )
- )
-
- )
- ) { return 1; }
- if (
- (
- bytes[0] == 0xE1 &&
- (
- bytes[1] == 0xB4 &&
- (
- ( 0x80 <= bytes[2] && bytes[2] <= 0xAB )
- )
- ) ||
- (
- bytes[1] == 0xB5 &&
- (
- ( 0xAB <= bytes[2] && bytes[2] <= 0xB7 ) ||
- ( 0xB9 <= bytes[2] && bytes[2] <= 0xBF )
- )
- ) ||
- (
- bytes[1] == 0xB6 &&
- (
- ( 0x80 <= bytes[2] && bytes[2] <= 0x9A )
- )
- ) ||
- (
- bytes[1] == 0xB8 &&
- (
- bytes[2] == 0x81 ||
- bytes[2] == 0x83 ||
- bytes[2] == 0x85 ||
- bytes[2] == 0x87 ||
- bytes[2] == 0x89 ||
- bytes[2] == 0x8B ||
- bytes[2] == 0x8D ||
- bytes[2] == 0x8F ||
- bytes[2] == 0x91 ||
- bytes[2] == 0x93 ||
- bytes[2] == 0x95 ||
- bytes[2] == 0x97 ||
- bytes[2] == 0x99 ||
- bytes[2] == 0x9B ||
- bytes[2] == 0x9D ||
- bytes[2] == 0x9F ||
- bytes[2] == 0xA1 ||
- bytes[2] == 0xA3 ||
- bytes[2] == 0xA5 ||
- bytes[2] == 0xA7 ||
- bytes[2] == 0xA9 ||
- bytes[2] == 0xAB ||
- bytes[2] == 0xAD ||
- bytes[2] == 0xAF ||
- bytes[2] == 0xB1 ||
- bytes[2] == 0xB3 ||
- bytes[2] == 0xB5 ||
- bytes[2] == 0xB7 ||
- bytes[2] == 0xB9 ||
- bytes[2] == 0xBB ||
- bytes[2] == 0xBD ||
- bytes[2] == 0xBF
- )
- ) ||
- (
- bytes[1] == 0xB9 &&
- (
- bytes[2] == 0x81 ||
- bytes[2] == 0x83 ||
- bytes[2] == 0x85 ||
- bytes[2] == 0x87 ||
- bytes[2] == 0x89 ||
- bytes[2] == 0x8B ||
- bytes[2] == 0x8D ||
- bytes[2] == 0x8F ||
- bytes[2] == 0x91 ||
- bytes[2] == 0x93 ||
- bytes[2] == 0x95 ||
- bytes[2] == 0x97 ||
- bytes[2] == 0x99 ||
- bytes[2] == 0x9B ||
- bytes[2] == 0x9D ||
- bytes[2] == 0x9F ||
- bytes[2] == 0xA1 ||
- bytes[2] == 0xA3 ||
- bytes[2] == 0xA5 ||
- bytes[2] == 0xA7 ||
- bytes[2] == 0xA9 ||
- bytes[2] == 0xAB ||
- bytes[2] == 0xAD ||
- bytes[2] == 0xAF ||
- bytes[2] == 0xB1 ||
- bytes[2] == 0xB3 ||
- bytes[2] == 0xB5 ||
- bytes[2] == 0xB7 ||
- bytes[2] == 0xB9 ||
- bytes[2] == 0xBB ||
- bytes[2] == 0xBD ||
- bytes[2] == 0xBF
- )
- ) ||
- (
- bytes[1] == 0xBA &&
- (
- bytes[2] == 0x81 ||
- bytes[2] == 0x83 ||
- bytes[2] == 0x85 ||
- bytes[2] == 0x87 ||
- bytes[2] == 0x89 ||
- bytes[2] == 0x8B ||
- bytes[2] == 0x8D ||
- bytes[2] == 0x8F ||
- bytes[2] == 0x91 ||
- bytes[2] == 0x93 ||
- ( 0x95 <= bytes[2] && bytes[2] <= 0x9D ) ||
- bytes[2] == 0x9F ||
- bytes[2] == 0xA1 ||
- bytes[2] == 0xA3 ||
- bytes[2] == 0xA5 ||
- bytes[2] == 0xA7 ||
- bytes[2] == 0xA9 ||
- bytes[2] == 0xAB ||
- bytes[2] == 0xAD ||
- bytes[2] == 0xAF ||
- bytes[2] == 0xB1 ||
- bytes[2] == 0xB3 ||
- bytes[2] == 0xB5 ||
- bytes[2] == 0xB7 ||
- bytes[2] == 0xB9 ||
- bytes[2] == 0xBB ||
- bytes[2] == 0xBD ||
- bytes[2] == 0xBF
- )
- ) ||
- (
- bytes[1] == 0xBB &&
- (
- bytes[2] == 0x81 ||
- bytes[2] == 0x83 ||
- bytes[2] == 0x85 ||
- bytes[2] == 0x87 ||
- bytes[2] == 0x89 ||
- bytes[2] == 0x8B ||
- bytes[2] == 0x8D ||
- bytes[2] == 0x8F ||
- bytes[2] == 0x91 ||
- bytes[2] == 0x93 ||
- bytes[2] == 0x95 ||
- bytes[2] == 0x97 ||
- bytes[2] == 0x99 ||
- bytes[2] == 0x9B ||
- bytes[2] == 0x9D ||
- bytes[2] == 0x9F ||
- bytes[2] == 0xA1 ||
- bytes[2] == 0xA3 ||
- bytes[2] == 0xA5 ||
- bytes[2] == 0xA7 ||
- bytes[2] == 0xA9 ||
- bytes[2] == 0xAB ||
- bytes[2] == 0xAD ||
- bytes[2] == 0xAF ||
- bytes[2] == 0xB1 ||
- bytes[2] == 0xB3 ||
- bytes[2] == 0xB5 ||
- bytes[2] == 0xB7 ||
- bytes[2] == 0xB9 ||
- bytes[2] == 0xBB ||
- bytes[2] == 0xBD ||
- bytes[2] == 0xBF
- )
- ) ||
- (
- bytes[1] == 0xBC &&
- (
- ( 0x80 <= bytes[2] && bytes[2] <= 0x87 ) ||
- ( 0x90 <= bytes[2] && bytes[2] <= 0x95 ) ||
- ( 0xA0 <= bytes[2] && bytes[2] <= 0xA7 ) ||
- ( 0xB0 <= bytes[2] && bytes[2] <= 0xB7 )
- )
- ) ||
- (
- bytes[1] == 0xBD &&
- (
- ( 0x80 <= bytes[2] && bytes[2] <= 0x85 ) ||
- ( 0x90 <= bytes[2] && bytes[2] <= 0x97 ) ||
- ( 0xA0 <= bytes[2] && bytes[2] <= 0xA7 ) ||
- ( 0xB0 <= bytes[2] && bytes[2] <= 0xBD )
- )
- ) ||
- (
- bytes[1] == 0xBE &&
- (
- ( 0x80 <= bytes[2] && bytes[2] <= 0x87 ) ||
- ( 0x90 <= bytes[2] && bytes[2] <= 0x97 ) ||
- ( 0xA0 <= bytes[2] && bytes[2] <= 0xA7 ) ||
- ( 0xB0 <= bytes[2] && bytes[2] <= 0xB4 ) ||
- ( 0xB6 <= bytes[2] && bytes[2] <= 0xB7 ) ||
- bytes[2] == 0xBE
- )
- ) ||
- (
- bytes[1] == 0xBF &&
- (
- ( 0x82 <= bytes[2] && bytes[2] <= 0x84 ) ||
- ( 0x86 <= bytes[2] && bytes[2] <= 0x87 ) ||
- ( 0x90 <= bytes[2] && bytes[2] <= 0x93 ) ||
- ( 0x96 <= bytes[2] && bytes[2] <= 0x97 ) ||
- ( 0xA0 <= bytes[2] && bytes[2] <= 0xA7 ) ||
- ( 0xB2 <= bytes[2] && bytes[2] <= 0xB4 ) ||
- ( 0xB6 <= bytes[2] && bytes[2] <= 0xB7 )
- )
- )
- ) ||
- (
- bytes[0] == 0xE2 &&
- (
- bytes[1] == 0x84 &&
- (
- bytes[2] == 0x8A ||
- ( 0x8E <= bytes[2] && bytes[2] <= 0x8F ) ||
- bytes[2] == 0x93 ||
- bytes[2] == 0xAF ||
- bytes[2] == 0xB4 ||
- bytes[2] == 0xB9 ||
- ( 0xBC <= bytes[2] && bytes[2] <= 0xBD )
- )
- ) ||
- (
- bytes[1] == 0x85 &&
- (
- ( 0x86 <= bytes[2] && bytes[2] <= 0x89 ) ||
- bytes[2] == 0x8E
- )
- ) ||
- (
- bytes[1] == 0x86 &&
- (
- bytes[2] == 0x84
- )
- ) ||
- (
- bytes[1] == 0xB0 &&
- (
- ( 0xB0 <= bytes[2] && bytes[2] <= 0xBF )
- )
- ) ||
- (
- bytes[1] == 0xB1 &&
- (
- ( 0x80 <= bytes[2] && bytes[2] <= 0x9E ) ||
- bytes[2] == 0xA1 ||
- ( 0xA5 <= bytes[2] && bytes[2] <= 0xA6 ) ||
- bytes[2] == 0xA8 ||
- bytes[2] == 0xAA ||
- bytes[2] == 0xAC ||
- bytes[2] == 0xB1 ||
- ( 0xB3 <= bytes[2] && bytes[2] <= 0xB4 ) ||
- ( 0xB6 <= bytes[2] && bytes[2] <= 0xBB )
- )
- ) ||
- (
- bytes[1] == 0xB2 &&
- (
- bytes[2] == 0x81 ||
- bytes[2] == 0x83 ||
- bytes[2] == 0x85 ||
- bytes[2] == 0x87 ||
- bytes[2] == 0x89 ||
- bytes[2] == 0x8B ||
- bytes[2] == 0x8D ||
- bytes[2] == 0x8F ||
- bytes[2] == 0x91 ||
- bytes[2] == 0x93 ||
- bytes[2] == 0x95 ||
- bytes[2] == 0x97 ||
- bytes[2] == 0x99 ||
- bytes[2] == 0x9B ||
- bytes[2] == 0x9D ||
- bytes[2] == 0x9F ||
- bytes[2] == 0xA1 ||
- bytes[2] == 0xA3 ||
- bytes[2] == 0xA5 ||
- bytes[2] == 0xA7 ||
- bytes[2] == 0xA9 ||
- bytes[2] == 0xAB ||
- bytes[2] == 0xAD ||
- bytes[2] == 0xAF ||
- bytes[2] == 0xB1 ||
- bytes[2] == 0xB3 ||
- bytes[2] == 0xB5 ||
- bytes[2] == 0xB7 ||
- bytes[2] == 0xB9 ||
- bytes[2] == 0xBB ||
- bytes[2] == 0xBD ||
- bytes[2] == 0xBF
- )
- ) ||
- (
- bytes[1] == 0xB3 &&
- (
- bytes[2] == 0x81 ||
- bytes[2] == 0x83 ||
- bytes[2] == 0x85 ||
- bytes[2] == 0x87 ||
- bytes[2] == 0x89 ||
- bytes[2] == 0x8B ||
- bytes[2] == 0x8D ||
- bytes[2] == 0x8F ||
- bytes[2] == 0x91 ||
- bytes[2] == 0x93 ||
- bytes[2] == 0x95 ||
- bytes[2] == 0x97 ||
- bytes[2] == 0x99 ||
- bytes[2] == 0x9B ||
- bytes[2] == 0x9D ||
- bytes[2] == 0x9F ||
- bytes[2] == 0xA1 ||
- ( 0xA3 <= bytes[2] && bytes[2] <= 0xA4 ) ||
- bytes[2] == 0xAC ||
- bytes[2] == 0xAE ||
- bytes[2] == 0xB3
- )
- ) ||
- (
- bytes[1] == 0xB4 &&
- (
- ( 0x80 <= bytes[2] && bytes[2] <= 0xA5 ) ||
- bytes[2] == 0xA7 ||
- bytes[2] == 0xAD
- )
- )
- ) ||
- (
- bytes[0] == 0xEA &&
- (
- bytes[1] == 0x99 &&
- (
- bytes[2] == 0x81 ||
- bytes[2] == 0x83 ||
- bytes[2] == 0x85 ||
- bytes[2] == 0x87 ||
- bytes[2] == 0x89 ||
- bytes[2] == 0x8B ||
- bytes[2] == 0x8D ||
- bytes[2] == 0x8F ||
- bytes[2] == 0x91 ||
- bytes[2] == 0x93 ||
- bytes[2] == 0x95 ||
- bytes[2] == 0x97 ||
- bytes[2] == 0x99 ||
- bytes[2] == 0x9B ||
- bytes[2] == 0x9D ||
- bytes[2] == 0x9F ||
- bytes[2] == 0xA1 ||
- bytes[2] == 0xA3 ||
- bytes[2] == 0xA5 ||
- bytes[2] == 0xA7 ||
- bytes[2] == 0xA9 ||
- bytes[2] == 0xAB ||
- bytes[2] == 0xAD
- )
- ) ||
- (
- bytes[1] == 0x9A &&
- (
- bytes[2] == 0x81 ||
- bytes[2] == 0x83 ||
- bytes[2] == 0x85 ||
- bytes[2] == 0x87 ||
- bytes[2] == 0x89 ||
- bytes[2] == 0x8B ||
- bytes[2] == 0x8D ||
- bytes[2] == 0x8F ||
- bytes[2] == 0x91 ||
- bytes[2] == 0x93 ||
- bytes[2] == 0x95 ||
- bytes[2] == 0x97
- )
- ) ||
- (
- bytes[1] == 0x9C &&
- (
- bytes[2] == 0xA3 ||
- bytes[2] == 0xA5 ||
- bytes[2] == 0xA7 ||
- bytes[2] == 0xA9 ||
- bytes[2] == 0xAB ||
- bytes[2] == 0xAD ||
- ( 0xAF <= bytes[2] && bytes[2] <= 0xB1 ) ||
- bytes[2] == 0xB3 ||
- bytes[2] == 0xB5 ||
- bytes[2] == 0xB7 ||
- bytes[2] == 0xB9 ||
- bytes[2] == 0xBB ||
- bytes[2] == 0xBD ||
- bytes[2] == 0xBF
- )
- ) ||
- (
- bytes[1] == 0x9D &&
- (
- bytes[2] == 0x81 ||
- bytes[2] == 0x83 ||
- bytes[2] == 0x85 ||
- bytes[2] == 0x87 ||
- bytes[2] == 0x89 ||
- bytes[2] == 0x8B ||
- bytes[2] == 0x8D ||
- bytes[2] == 0x8F ||
- bytes[2] == 0x91 ||
- bytes[2] == 0x93 ||
- bytes[2] == 0x95 ||
- bytes[2] == 0x97 ||
- bytes[2] == 0x99 ||
- bytes[2] == 0x9B ||
- bytes[2] == 0x9D ||
- bytes[2] == 0x9F ||
- bytes[2] == 0xA1 ||
- bytes[2] == 0xA3 ||
- bytes[2] == 0xA5 ||
- bytes[2] == 0xA7 ||
- bytes[2] == 0xA9 ||
- bytes[2] == 0xAB ||
- bytes[2] == 0xAD ||
- bytes[2] == 0xAF ||
- ( 0xB1 <= bytes[2] && bytes[2] <= 0xB8 ) ||
- bytes[2] == 0xBA ||
- bytes[2] == 0xBC ||
- bytes[2] == 0xBF
- )
- ) ||
- (
- bytes[1] == 0x9E &&
- (
- bytes[2] == 0x81 ||
- bytes[2] == 0x83 ||
- bytes[2] == 0x85 ||
- bytes[2] == 0x87 ||
- bytes[2] == 0x8C ||
- bytes[2] == 0x8E ||
- bytes[2] == 0x91 ||
- bytes[2] == 0x93 ||
- bytes[2] == 0xA1 ||
- bytes[2] == 0xA3 ||
- bytes[2] == 0xA5 ||
- bytes[2] == 0xA7 ||
- bytes[2] == 0xA9
- )
- ) ||
- (
- bytes[1] == 0x9F &&
- (
- bytes[2] == 0xBA
- )
- )
- ) ||
- (
- bytes[0] == 0xEF &&
- (
- bytes[1] == 0xAC &&
- (
- ( 0x80 <= bytes[2] && bytes[2] <= 0x86 ) ||
- ( 0x93 <= bytes[2] && bytes[2] <= 0x97 )
- )
- ) ||
- (
- bytes[1] == 0xBD &&
+ if (
+ (0x61 <= bytes[0] && bytes[0] <= 0x7A)
+ ) {
+ return 1;
+ }
+ if (
(
- ( 0x81 <= bytes[2] && bytes[2] <= 0x9A )
- )
- )
- )
- ) { return 1; }
- if (
- (
- bytes[0] == 0xF0 &&
- (
- bytes[1] == 0x90 &&
+ bytes[0] == 0xC2 &&
+ (
+ bytes[1] == 0xB5
+ )
+ ) ||
(
- bytes[2] == 0x90 &&
+ bytes[0] == 0xC3 &&
(
- ( 0xA8 <= bytes[3] && bytes[3] <= 0xBF )
+ (0x9F <= bytes[1] && bytes[1] <= 0xB6) ||
+ (0xB8 <= bytes[1] && bytes[1] <= 0xBF)
)
) ||
(
- bytes[2] == 0x91 &&
+ bytes[0] == 0xC4 &&
(
- ( 0x80 <= bytes[3] && bytes[3] <= 0x8F )
+ bytes[1] == 0x81 ||
+ bytes[1] == 0x83 ||
+ bytes[1] == 0x85 ||
+ bytes[1] == 0x87 ||
+ bytes[1] == 0x89 ||
+ bytes[1] == 0x8B ||
+ bytes[1] == 0x8D ||
+ bytes[1] == 0x8F ||
+ bytes[1] == 0x91 ||
+ bytes[1] == 0x93 ||
+ bytes[1] == 0x95 ||
+ bytes[1] == 0x97 ||
+ bytes[1] == 0x99 ||
+ bytes[1] == 0x9B ||
+ bytes[1] == 0x9D ||
+ bytes[1] == 0x9F ||
+ bytes[1] == 0xA1 ||
+ bytes[1] == 0xA3 ||
+ bytes[1] == 0xA5 ||
+ bytes[1] == 0xA7 ||
+ bytes[1] == 0xA9 ||
+ bytes[1] == 0xAB ||
+ bytes[1] == 0xAD ||
+ bytes[1] == 0xAF ||
+ bytes[1] == 0xB1 ||
+ bytes[1] == 0xB3 ||
+ bytes[1] == 0xB5 ||
+ (0xB7 <= bytes[1] && bytes[1] <= 0xB8) ||
+ bytes[1] == 0xBA ||
+ bytes[1] == 0xBC ||
+ bytes[1] == 0xBE
)
- )
- ) ||
- (
- bytes[1] == 0x9D &&
+ ) ||
(
- bytes[2] == 0x90 &&
+ bytes[0] == 0xC5 &&
(
- ( 0x9A <= bytes[3] && bytes[3] <= 0xB3 )
+ bytes[1] == 0x80 ||
+ bytes[1] == 0x82 ||
+ bytes[1] == 0x84 ||
+ bytes[1] == 0x86 ||
+ (0x88 <= bytes[1] && bytes[1] <= 0x89) ||
+ bytes[1] == 0x8B ||
+ bytes[1] == 0x8D ||
+ bytes[1] == 0x8F ||
+ bytes[1] == 0x91 ||
+ bytes[1] == 0x93 ||
+ bytes[1] == 0x95 ||
+ bytes[1] == 0x97 ||
+ bytes[1] == 0x99 ||
+ bytes[1] == 0x9B ||
+ bytes[1] == 0x9D ||
+ bytes[1] == 0x9F ||
+ bytes[1] == 0xA1 ||
+ bytes[1] == 0xA3 ||
+ bytes[1] == 0xA5 ||
+ bytes[1] == 0xA7 ||
+ bytes[1] == 0xA9 ||
+ bytes[1] == 0xAB ||
+ bytes[1] == 0xAD ||
+ bytes[1] == 0xAF ||
+ bytes[1] == 0xB1 ||
+ bytes[1] == 0xB3 ||
+ bytes[1] == 0xB5 ||
+ bytes[1] == 0xB7 ||
+ bytes[1] == 0xBA ||
+ bytes[1] == 0xBC ||
+ (0xBE <= bytes[1] && bytes[1] <= 0xBF)
)
) ||
(
- bytes[2] == 0x91 &&
+ bytes[0] == 0xC6 &&
(
- ( 0x8E <= bytes[3] && bytes[3] <= 0x94 ) ||
- ( 0x96 <= bytes[3] && bytes[3] <= 0xA7 )
+ bytes[1] == 0x80 ||
+ bytes[1] == 0x83 ||
+ bytes[1] == 0x85 ||
+ bytes[1] == 0x88 ||
+ (0x8C <= bytes[1] && bytes[1] <= 0x8D) ||
+ bytes[1] == 0x92 ||
+ bytes[1] == 0x95 ||
+ (0x99 <= bytes[1] && bytes[1] <= 0x9B) ||
+ bytes[1] == 0x9E ||
+ bytes[1] == 0xA1 ||
+ bytes[1] == 0xA3 ||
+ bytes[1] == 0xA5 ||
+ bytes[1] == 0xA8 ||
+ (0xAA <= bytes[1] && bytes[1] <= 0xAB) ||
+ bytes[1] == 0xAD ||
+ bytes[1] == 0xB0 ||
+ bytes[1] == 0xB4 ||
+ bytes[1] == 0xB6 ||
+ (0xB9 <= bytes[1] && bytes[1] <= 0xBA) ||
+ (0xBD <= bytes[1] && bytes[1] <= 0xBF)
)
) ||
(
- bytes[2] == 0x92 &&
+ bytes[0] == 0xC7 &&
(
- ( 0x82 <= bytes[3] && bytes[3] <= 0x9B ) ||
- ( 0xB6 <= bytes[3] && bytes[3] <= 0xB9 ) ||
- bytes[3] == 0xBB ||
- ( 0xBD <= bytes[3] && bytes[3] <= 0xBF )
+ bytes[1] == 0x86 ||
+ bytes[1] == 0x89 ||
+ bytes[1] == 0x8C ||
+ bytes[1] == 0x8E ||
+ bytes[1] == 0x90 ||
+ bytes[1] == 0x92 ||
+ bytes[1] == 0x94 ||
+ bytes[1] == 0x96 ||
+ bytes[1] == 0x98 ||
+ bytes[1] == 0x9A ||
+ (0x9C <= bytes[1] && bytes[1] <= 0x9D) ||
+ bytes[1] == 0x9F ||
+ bytes[1] == 0xA1 ||
+ bytes[1] == 0xA3 ||
+ bytes[1] == 0xA5 ||
+ bytes[1] == 0xA7 ||
+ bytes[1] == 0xA9 ||
+ bytes[1] == 0xAB ||
+ bytes[1] == 0xAD ||
+ (0xAF <= bytes[1] && bytes[1] <= 0xB0) ||
+ bytes[1] == 0xB3 ||
+ bytes[1] == 0xB5 ||
+ bytes[1] == 0xB9 ||
+ bytes[1] == 0xBB ||
+ bytes[1] == 0xBD ||
+ bytes[1] == 0xBF
)
) ||
(
- bytes[2] == 0x93 &&
+ bytes[0] == 0xC8 &&
(
- ( 0x80 <= bytes[3] && bytes[3] <= 0x83 ) ||
- ( 0x85 <= bytes[3] && bytes[3] <= 0x8F ) ||
- ( 0xAA <= bytes[3] && bytes[3] <= 0xBF )
+ bytes[1] == 0x81 ||
+ bytes[1] == 0x83 ||
+ bytes[1] == 0x85 ||
+ bytes[1] == 0x87 ||
+ bytes[1] == 0x89 ||
+ bytes[1] == 0x8B ||
+ bytes[1] == 0x8D ||
+ bytes[1] == 0x8F ||
+ bytes[1] == 0x91 ||
+ bytes[1] == 0x93 ||
+ bytes[1] == 0x95 ||
+ bytes[1] == 0x97 ||
+ bytes[1] == 0x99 ||
+ bytes[1] == 0x9B ||
+ bytes[1] == 0x9D ||
+ bytes[1] == 0x9F ||
+ bytes[1] == 0xA1 ||
+ bytes[1] == 0xA3 ||
+ bytes[1] == 0xA5 ||
+ bytes[1] == 0xA7 ||
+ bytes[1] == 0xA9 ||
+ bytes[1] == 0xAB ||
+ bytes[1] == 0xAD ||
+ bytes[1] == 0xAF ||
+ bytes[1] == 0xB1 ||
+ (0xB3 <= bytes[1] && bytes[1] <= 0xB9) ||
+ bytes[1] == 0xBC ||
+ bytes[1] == 0xBF
)
) ||
(
- bytes[2] == 0x94 &&
+ bytes[0] == 0xC9 &&
(
- ( 0x80 <= bytes[3] && bytes[3] <= 0x83 ) ||
- ( 0x9E <= bytes[3] && bytes[3] <= 0xB7 )
+ bytes[1] == 0x80 ||
+ bytes[1] == 0x82 ||
+ bytes[1] == 0x87 ||
+ bytes[1] == 0x89 ||
+ bytes[1] == 0x8B ||
+ bytes[1] == 0x8D ||
+ (0x8F <= bytes[1] && bytes[1] <= 0xBF)
)
) ||
(
- bytes[2] == 0x95 &&
+ bytes[0] == 0xCA &&
(
- ( 0x92 <= bytes[3] && bytes[3] <= 0xAB )
+ (0x80 <= bytes[1] && bytes[1] <= 0x93) ||
+ (0x95 <= bytes[1] && bytes[1] <= 0xAF)
)
) ||
(
- bytes[2] == 0x96 &&
+ bytes[0] == 0xCD &&
(
- ( 0x86 <= bytes[3] && bytes[3] <= 0x9F ) ||
- ( 0xBA <= bytes[3] && bytes[3] <= 0xBF )
+ bytes[1] == 0xB1 ||
+ bytes[1] == 0xB3 ||
+ bytes[1] == 0xB7 ||
+ (0xBB <= bytes[1] && bytes[1] <= 0xBD)
)
) ||
(
- bytes[2] == 0x97 &&
+ bytes[0] == 0xCE &&
(
- ( 0x80 <= bytes[3] && bytes[3] <= 0x93 ) ||
- ( 0xAE <= bytes[3] && bytes[3] <= 0xBF )
+ bytes[1] == 0x90 ||
+ (0xAC <= bytes[1] && bytes[1] <= 0xBF)
)
) ||
(
- bytes[2] == 0x98 &&
+ bytes[0] == 0xCF &&
(
- ( 0x80 <= bytes[3] && bytes[3] <= 0x87 ) ||
- ( 0xA2 <= bytes[3] && bytes[3] <= 0xBB )
+ (0x80 <= bytes[1] && bytes[1] <= 0x8E) ||
+ (0x90 <= bytes[1] && bytes[1] <= 0x91) ||
+ (0x95 <= bytes[1] && bytes[1] <= 0x97) ||
+ bytes[1] == 0x99 ||
+ bytes[1] == 0x9B ||
+ bytes[1] == 0x9D ||
+ bytes[1] == 0x9F ||
+ bytes[1] == 0xA1 ||
+ bytes[1] == 0xA3 ||
+ bytes[1] == 0xA5 ||
+ bytes[1] == 0xA7 ||
+ bytes[1] == 0xA9 ||
+ bytes[1] == 0xAB ||
+ bytes[1] == 0xAD ||
+ (0xAF <= bytes[1] && bytes[1] <= 0xB3) ||
+ bytes[1] == 0xB5 ||
+ bytes[1] == 0xB8 ||
+ (0xBB <= bytes[1] && bytes[1] <= 0xBC)
)
) ||
(
- bytes[2] == 0x99 &&
+ bytes[0] == 0xD0 &&
(
- ( 0x96 <= bytes[3] && bytes[3] <= 0xAF )
+ (0xB0 <= bytes[1] && bytes[1] <= 0xBF)
)
) ||
(
- bytes[2] == 0x9A &&
+ bytes[0] == 0xD1 &&
(
- ( 0x8A <= bytes[3] && bytes[3] <= 0xA5 )
+ (0x80 <= bytes[1] && bytes[1] <= 0x9F) ||
+ bytes[1] == 0xA1 ||
+ bytes[1] == 0xA3 ||
+ bytes[1] == 0xA5 ||
+ bytes[1] == 0xA7 ||
+ bytes[1] == 0xA9 ||
+ bytes[1] == 0xAB ||
+ bytes[1] == 0xAD ||
+ bytes[1] == 0xAF ||
+ bytes[1] == 0xB1 ||
+ bytes[1] == 0xB3 ||
+ bytes[1] == 0xB5 ||
+ bytes[1] == 0xB7 ||
+ bytes[1] == 0xB9 ||
+ bytes[1] == 0xBB ||
+ bytes[1] == 0xBD ||
+ bytes[1] == 0xBF
)
) ||
(
- bytes[2] == 0x9B &&
+ bytes[0] == 0xD2 &&
(
- ( 0x82 <= bytes[3] && bytes[3] <= 0x9A ) ||
- ( 0x9C <= bytes[3] && bytes[3] <= 0xA1 ) ||
- ( 0xBC <= bytes[3] && bytes[3] <= 0xBF )
+ bytes[1] == 0x81 ||
+ bytes[1] == 0x8B ||
+ bytes[1] == 0x8D ||
+ bytes[1] == 0x8F ||
+ bytes[1] == 0x91 ||
+ bytes[1] == 0x93 ||
+ bytes[1] == 0x95 ||
+ bytes[1] == 0x97 ||
+ bytes[1] == 0x99 ||
+ bytes[1] == 0x9B ||
+ bytes[1] == 0x9D ||
+ bytes[1] == 0x9F ||
+ bytes[1] == 0xA1 ||
+ bytes[1] == 0xA3 ||
+ bytes[1] == 0xA5 ||
+ bytes[1] == 0xA7 ||
+ bytes[1] == 0xA9 ||
+ bytes[1] == 0xAB ||
+ bytes[1] == 0xAD ||
+ bytes[1] == 0xAF ||
+ bytes[1] == 0xB1 ||
+ bytes[1] == 0xB3 ||
+ bytes[1] == 0xB5 ||
+ bytes[1] == 0xB7 ||
+ bytes[1] == 0xB9 ||
+ bytes[1] == 0xBB ||
+ bytes[1] == 0xBD ||
+ bytes[1] == 0xBF
)
) ||
(
- bytes[2] == 0x9C &&
+ bytes[0] == 0xD3 &&
(
- ( 0x80 <= bytes[3] && bytes[3] <= 0x94 ) ||
- ( 0x96 <= bytes[3] && bytes[3] <= 0x9B ) ||
- ( 0xB6 <= bytes[3] && bytes[3] <= 0xBF )
+ bytes[1] == 0x82 ||
+ bytes[1] == 0x84 ||
+ bytes[1] == 0x86 ||
+ bytes[1] == 0x88 ||
+ bytes[1] == 0x8A ||
+ bytes[1] == 0x8C ||
+ (0x8E <= bytes[1] && bytes[1] <= 0x8F) ||
+ bytes[1] == 0x91 ||
+ bytes[1] == 0x93 ||
+ bytes[1] == 0x95 ||
+ bytes[1] == 0x97 ||
+ bytes[1] == 0x99 ||
+ bytes[1] == 0x9B ||
+ bytes[1] == 0x9D ||
+ bytes[1] == 0x9F ||
+ bytes[1] == 0xA1 ||
+ bytes[1] == 0xA3 ||
+ bytes[1] == 0xA5 ||
+ bytes[1] == 0xA7 ||
+ bytes[1] == 0xA9 ||
+ bytes[1] == 0xAB ||
+ bytes[1] == 0xAD ||
+ bytes[1] == 0xAF ||
+ bytes[1] == 0xB1 ||
+ bytes[1] == 0xB3 ||
+ bytes[1] == 0xB5 ||
+ bytes[1] == 0xB7 ||
+ bytes[1] == 0xB9 ||
+ bytes[1] == 0xBB ||
+ bytes[1] == 0xBD ||
+ bytes[1] == 0xBF
)
) ||
(
- bytes[2] == 0x9D &&
+ bytes[0] == 0xD4 &&
(
- ( 0x80 <= bytes[3] && bytes[3] <= 0x8E ) ||
- ( 0x90 <= bytes[3] && bytes[3] <= 0x95 ) ||
- ( 0xB0 <= bytes[3] && bytes[3] <= 0xBF )
+ bytes[1] == 0x81 ||
+ bytes[1] == 0x83 ||
+ bytes[1] == 0x85 ||
+ bytes[1] == 0x87 ||
+ bytes[1] == 0x89 ||
+ bytes[1] == 0x8B ||
+ bytes[1] == 0x8D ||
+ bytes[1] == 0x8F ||
+ bytes[1] == 0x91 ||
+ bytes[1] == 0x93 ||
+ bytes[1] == 0x95 ||
+ bytes[1] == 0x97 ||
+ bytes[1] == 0x99 ||
+ bytes[1] == 0x9B ||
+ bytes[1] == 0x9D ||
+ bytes[1] == 0x9F ||
+ bytes[1] == 0xA1 ||
+ bytes[1] == 0xA3 ||
+ bytes[1] == 0xA5 ||
+ bytes[1] == 0xA7
)
) ||
(
- bytes[2] == 0x9E &&
+ bytes[0] == 0xD5 &&
(
- ( 0x80 <= bytes[3] && bytes[3] <= 0x88 ) ||
- ( 0x8A <= bytes[3] && bytes[3] <= 0x8F ) ||
- ( 0xAA <= bytes[3] && bytes[3] <= 0xBF )
+ (0xA1 <= bytes[1] && bytes[1] <= 0xBF)
)
) ||
(
- bytes[2] == 0x9F &&
+ bytes[0] == 0xD6 &&
(
- ( 0x80 <= bytes[3] && bytes[3] <= 0x82 ) ||
- ( 0x84 <= bytes[3] && bytes[3] <= 0x89 ) ||
- bytes[3] == 0x8B
+ (0x80 <= bytes[1] && bytes[1] <= 0x87)
)
)
+ ) {
+ return 1;
+ }
+ if (
+ (
+ bytes[0] == 0xE1 && (
+ (bytes[1] == 0xB4 && 0x80 <= bytes[2] && bytes[2] <= 0xAB) ||
+ (
+ bytes[1] == 0xB5 &&
+ (
+ (0xAB <= bytes[2] && bytes[2] <= 0xB7) ||
+ (0xB9 <= bytes[2] && bytes[2] <= 0xBF)
+ )
+ ) ||
+ (
+ bytes[1] == 0xB6 &&
+ (
+ (0x80 <= bytes[2] && bytes[2] <= 0x9A)
+ )
+ ) ||
+ (
+ bytes[1] == 0xB8 &&
+ (
+ bytes[2] == 0x81 ||
+ bytes[2] == 0x83 ||
+ bytes[2] == 0x85 ||
+ bytes[2] == 0x87 ||
+ bytes[2] == 0x89 ||
+ bytes[2] == 0x8B ||
+ bytes[2] == 0x8D ||
+ bytes[2] == 0x8F ||
+ bytes[2] == 0x91 ||
+ bytes[2] == 0x93 ||
+ bytes[2] == 0x95 ||
+ bytes[2] == 0x97 ||
+ bytes[2] == 0x99 ||
+ bytes[2] == 0x9B ||
+ bytes[2] == 0x9D ||
+ bytes[2] == 0x9F ||
+ bytes[2] == 0xA1 ||
+ bytes[2] == 0xA3 ||
+ bytes[2] == 0xA5 ||
+ bytes[2] == 0xA7 ||
+ bytes[2] == 0xA9 ||
+ bytes[2] == 0xAB ||
+ bytes[2] == 0xAD ||
+ bytes[2] == 0xAF ||
+ bytes[2] == 0xB1 ||
+ bytes[2] == 0xB3 ||
+ bytes[2] == 0xB5 ||
+ bytes[2] == 0xB7 ||
+ bytes[2] == 0xB9 ||
+ bytes[2] == 0xBB ||
+ bytes[2] == 0xBD ||
+ bytes[2] == 0xBF
+ )
+ ) ||
+ (
+ bytes[1] == 0xB9 &&
+ (
+ bytes[2] == 0x81 ||
+ bytes[2] == 0x83 ||
+ bytes[2] == 0x85 ||
+ bytes[2] == 0x87 ||
+ bytes[2] == 0x89 ||
+ bytes[2] == 0x8B ||
+ bytes[2] == 0x8D ||
+ bytes[2] == 0x8F ||
+ bytes[2] == 0x91 ||
+ bytes[2] == 0x93 ||
+ bytes[2] == 0x95 ||
+ bytes[2] == 0x97 ||
+ bytes[2] == 0x99 ||
+ bytes[2] == 0x9B ||
+ bytes[2] == 0x9D ||
+ bytes[2] == 0x9F ||
+ bytes[2] == 0xA1 ||
+ bytes[2] == 0xA3 ||
+ bytes[2] == 0xA5 ||
+ bytes[2] == 0xA7 ||
+ bytes[2] == 0xA9 ||
+ bytes[2] == 0xAB ||
+ bytes[2] == 0xAD ||
+ bytes[2] == 0xAF ||
+ bytes[2] == 0xB1 ||
+ bytes[2] == 0xB3 ||
+ bytes[2] == 0xB5 ||
+ bytes[2] == 0xB7 ||
+ bytes[2] == 0xB9 ||
+ bytes[2] == 0xBB ||
+ bytes[2] == 0xBD ||
+ bytes[2] == 0xBF
+ )
+ ) ||
+ (
+ bytes[1] == 0xBA &&
+ (
+ bytes[2] == 0x81 ||
+ bytes[2] == 0x83 ||
+ bytes[2] == 0x85 ||
+ bytes[2] == 0x87 ||
+ bytes[2] == 0x89 ||
+ bytes[2] == 0x8B ||
+ bytes[2] == 0x8D ||
+ bytes[2] == 0x8F ||
+ bytes[2] == 0x91 ||
+ bytes[2] == 0x93 ||
+ (0x95 <= bytes[2] && bytes[2] <= 0x9D) ||
+ bytes[2] == 0x9F ||
+ bytes[2] == 0xA1 ||
+ bytes[2] == 0xA3 ||
+ bytes[2] == 0xA5 ||
+ bytes[2] == 0xA7 ||
+ bytes[2] == 0xA9 ||
+ bytes[2] == 0xAB ||
+ bytes[2] == 0xAD ||
+ bytes[2] == 0xAF ||
+ bytes[2] == 0xB1 ||
+ bytes[2] == 0xB3 ||
+ bytes[2] == 0xB5 ||
+ bytes[2] == 0xB7 ||
+ bytes[2] == 0xB9 ||
+ bytes[2] == 0xBB ||
+ bytes[2] == 0xBD ||
+ bytes[2] == 0xBF
+ )
+ ) ||
+ (
+ bytes[1] == 0xBB &&
+ (
+ bytes[2] == 0x81 ||
+ bytes[2] == 0x83 ||
+ bytes[2] == 0x85 ||
+ bytes[2] == 0x87 ||
+ bytes[2] == 0x89 ||
+ bytes[2] == 0x8B ||
+ bytes[2] == 0x8D ||
+ bytes[2] == 0x8F ||
+ bytes[2] == 0x91 ||
+ bytes[2] == 0x93 ||
+ bytes[2] == 0x95 ||
+ bytes[2] == 0x97 ||
+ bytes[2] == 0x99 ||
+ bytes[2] == 0x9B ||
+ bytes[2] == 0x9D ||
+ bytes[2] == 0x9F ||
+ bytes[2] == 0xA1 ||
+ bytes[2] == 0xA3 ||
+ bytes[2] == 0xA5 ||
+ bytes[2] == 0xA7 ||
+ bytes[2] == 0xA9 ||
+ bytes[2] == 0xAB ||
+ bytes[2] == 0xAD ||
+ bytes[2] == 0xAF ||
+ bytes[2] == 0xB1 ||
+ bytes[2] == 0xB3 ||
+ bytes[2] == 0xB5 ||
+ bytes[2] == 0xB7 ||
+ bytes[2] == 0xB9 ||
+ bytes[2] == 0xBB ||
+ bytes[2] == 0xBD ||
+ bytes[2] == 0xBF
+ )
+ ) ||
+ (
+ bytes[1] == 0xBC &&
+ (
+ (0x80 <= bytes[2] && bytes[2] <= 0x87) ||
+ (0x90 <= bytes[2] && bytes[2] <= 0x95) ||
+ (0xA0 <= bytes[2] && bytes[2] <= 0xA7) ||
+ (0xB0 <= bytes[2] && bytes[2] <= 0xB7)
+ )
+ ) ||
+ (
+ bytes[1] == 0xBD &&
+ (
+ (0x80 <= bytes[2] && bytes[2] <= 0x85) ||
+ (0x90 <= bytes[2] && bytes[2] <= 0x97) ||
+ (0xA0 <= bytes[2] && bytes[2] <= 0xA7) ||
+ (0xB0 <= bytes[2] && bytes[2] <= 0xBD)
+ )
+ ) ||
+ (
+ bytes[1] == 0xBE &&
+ (
+ (0x80 <= bytes[2] && bytes[2] <= 0x87) ||
+ (0x90 <= bytes[2] && bytes[2] <= 0x97) ||
+ (0xA0 <= bytes[2] && bytes[2] <= 0xA7) ||
+ (0xB0 <= bytes[2] && bytes[2] <= 0xB4) ||
+ (0xB6 <= bytes[2] && bytes[2] <= 0xB7) ||
+ bytes[2] == 0xBE
+ )
+ ) ||
+ (
+ bytes[1] == 0xBF &&
+ (
+ (0x82 <= bytes[2] && bytes[2] <= 0x84) ||
+ (0x86 <= bytes[2] && bytes[2] <= 0x87) ||
+ (0x90 <= bytes[2] && bytes[2] <= 0x93) ||
+ (0x96 <= bytes[2] && bytes[2] <= 0x97) ||
+ (0xA0 <= bytes[2] && bytes[2] <= 0xA7) ||
+ (0xB2 <= bytes[2] && bytes[2] <= 0xB4) ||
+ (0xB6 <= bytes[2] && bytes[2] <= 0xB7)
+ )
+ )
+ )
+ ) ||
+ (
+ bytes[0] == 0xE2 &&
+ ((
+ bytes[1] == 0x84 &&
+ (
+ bytes[2] == 0x8A ||
+ (0x8E <= bytes[2] && bytes[2] <= 0x8F) ||
+ bytes[2] == 0x93 ||
+ bytes[2] == 0xAF ||
+ bytes[2] == 0xB4 ||
+ bytes[2] == 0xB9 ||
+ (0xBC <= bytes[2] && bytes[2] <= 0xBD)
)
- )
- ) { return 1; }
-
-
- return 0; // no lowercase character
-} /* isulower */
-
+ ) ||
+ (
+ bytes[1] == 0x85 &&
+ (
+ (0x86 <= bytes[2] && bytes[2] <= 0x89) ||
+ bytes[2] == 0x8E
+ )
+ ) ||
+ (
+ bytes[1] == 0x86 &&
+ (
+ bytes[2] == 0x84
+ )
+ ) ||
+ (
+ bytes[1] == 0xB0 &&
+ (
+ (0xB0 <= bytes[2] && bytes[2] <= 0xBF)
+ )
+ ) ||
+ (
+ bytes[1] == 0xB1 &&
+ (
+ (0x80 <= bytes[2] && bytes[2] <= 0x9E) ||
+ bytes[2] == 0xA1 ||
+ (0xA5 <= bytes[2] && bytes[2] <= 0xA6) ||
+ bytes[2] == 0xA8 ||
+ bytes[2] == 0xAA ||
+ bytes[2] == 0xAC ||
+ bytes[2] == 0xB1 ||
+ (0xB3 <= bytes[2] && bytes[2] <= 0xB4) ||
+ (0xB6 <= bytes[2] && bytes[2] <= 0xBB)
+ )
+ ) ||
+ (
+ bytes[1] == 0xB2 &&
+ (
+ bytes[2] == 0x81 ||
+ bytes[2] == 0x83 ||
+ bytes[2] == 0x85 ||
+ bytes[2] == 0x87 ||
+ bytes[2] == 0x89 ||
+ bytes[2] == 0x8B ||
+ bytes[2] == 0x8D ||
+ bytes[2] == 0x8F ||
+ bytes[2] == 0x91 ||
+ bytes[2] == 0x93 ||
+ bytes[2] == 0x95 ||
+ bytes[2] == 0x97 ||
+ bytes[2] == 0x99 ||
+ bytes[2] == 0x9B ||
+ bytes[2] == 0x9D ||
+ bytes[2] == 0x9F ||
+ bytes[2] == 0xA1 ||
+ bytes[2] == 0xA3 ||
+ bytes[2] == 0xA5 ||
+ bytes[2] == 0xA7 ||
+ bytes[2] == 0xA9 ||
+ bytes[2] == 0xAB ||
+ bytes[2] == 0xAD ||
+ bytes[2] == 0xAF ||
+ bytes[2] == 0xB1 ||
+ bytes[2] == 0xB3 ||
+ bytes[2] == 0xB5 ||
+ bytes[2] == 0xB7 ||
+ bytes[2] == 0xB9 ||
+ bytes[2] == 0xBB ||
+ bytes[2] == 0xBD ||
+ bytes[2] == 0xBF
+ )
+ ) ||
+ (
+ bytes[1] == 0xB3 &&
+ (
+ bytes[2] == 0x81 ||
+ bytes[2] == 0x83 ||
+ bytes[2] == 0x85 ||
+ bytes[2] == 0x87 ||
+ bytes[2] == 0x89 ||
+ bytes[2] == 0x8B ||
+ bytes[2] == 0x8D ||
+ bytes[2] == 0x8F ||
+ bytes[2] == 0x91 ||
+ bytes[2] == 0x93 ||
+ bytes[2] == 0x95 ||
+ bytes[2] == 0x97 ||
+ bytes[2] == 0x99 ||
+ bytes[2] == 0x9B ||
+ bytes[2] == 0x9D ||
+ bytes[2] == 0x9F ||
+ bytes[2] == 0xA1 ||
+ (0xA3 <= bytes[2] && bytes[2] <= 0xA4) ||
+ bytes[2] == 0xAC ||
+ bytes[2] == 0xAE ||
+ bytes[2] == 0xB3
+ )
+ ) ||
+ (
+ bytes[1] == 0xB4 &&
+ (
+ (0x80 <= bytes[2] && bytes[2] <= 0xA5) ||
+ bytes[2] == 0xA7 ||
+ bytes[2] == 0xAD
+ )
+ )
+ )) ||
+ (
+ bytes[0] == 0xEA &&
+ ((
+ bytes[1] == 0x99 &&
+ (
+ bytes[2] == 0x81 ||
+ bytes[2] == 0x83 ||
+ bytes[2] == 0x85 ||
+ bytes[2] == 0x87 ||
+ bytes[2] == 0x89 ||
+ bytes[2] == 0x8B ||
+ bytes[2] == 0x8D ||
+ bytes[2] == 0x8F ||
+ bytes[2] == 0x91 ||
+ bytes[2] == 0x93 ||
+ bytes[2] == 0x95 ||
+ bytes[2] == 0x97 ||
+ bytes[2] == 0x99 ||
+ bytes[2] == 0x9B ||
+ bytes[2] == 0x9D ||
+ bytes[2] == 0x9F ||
+ bytes[2] == 0xA1 ||
+ bytes[2] == 0xA3 ||
+ bytes[2] == 0xA5 ||
+ bytes[2] == 0xA7 ||
+ bytes[2] == 0xA9 ||
+ bytes[2] == 0xAB ||
+ bytes[2] == 0xAD
+ )
+ ) ||
+ (
+ bytes[1] == 0x9A &&
+ (
+ bytes[2] == 0x81 ||
+ bytes[2] == 0x83 ||
+ bytes[2] == 0x85 ||
+ bytes[2] == 0x87 ||
+ bytes[2] == 0x89 ||
+ bytes[2] == 0x8B ||
+ bytes[2] == 0x8D ||
+ bytes[2] == 0x8F ||
+ bytes[2] == 0x91 ||
+ bytes[2] == 0x93 ||
+ bytes[2] == 0x95 ||
+ bytes[2] == 0x97
+ )
+ ) ||
+ (
+ bytes[1] == 0x9C &&
+ (
+ bytes[2] == 0xA3 ||
+ bytes[2] == 0xA5 ||
+ bytes[2] == 0xA7 ||
+ bytes[2] == 0xA9 ||
+ bytes[2] == 0xAB ||
+ bytes[2] == 0xAD ||
+ (0xAF <= bytes[2] && bytes[2] <= 0xB1) ||
+ bytes[2] == 0xB3 ||
+ bytes[2] == 0xB5 ||
+ bytes[2] == 0xB7 ||
+ bytes[2] == 0xB9 ||
+ bytes[2] == 0xBB ||
+ bytes[2] == 0xBD ||
+ bytes[2] == 0xBF
+ )
+ ) ||
+ (
+ bytes[1] == 0x9D &&
+ (
+ bytes[2] == 0x81 ||
+ bytes[2] == 0x83 ||
+ bytes[2] == 0x85 ||
+ bytes[2] == 0x87 ||
+ bytes[2] == 0x89 ||
+ bytes[2] == 0x8B ||
+ bytes[2] == 0x8D ||
+ bytes[2] == 0x8F ||
+ bytes[2] == 0x91 ||
+ bytes[2] == 0x93 ||
+ bytes[2] == 0x95 ||
+ bytes[2] == 0x97 ||
+ bytes[2] == 0x99 ||
+ bytes[2] == 0x9B ||
+ bytes[2] == 0x9D ||
+ bytes[2] == 0x9F ||
+ bytes[2] == 0xA1 ||
+ bytes[2] == 0xA3 ||
+ bytes[2] == 0xA5 ||
+ bytes[2] == 0xA7 ||
+ bytes[2] == 0xA9 ||
+ bytes[2] == 0xAB ||
+ bytes[2] == 0xAD ||
+ bytes[2] == 0xAF ||
+ (0xB1 <= bytes[2] && bytes[2] <= 0xB8) ||
+ bytes[2] == 0xBA ||
+ bytes[2] == 0xBC ||
+ bytes[2] == 0xBF
+ )
+ ) ||
+ (
+ bytes[1] == 0x9E &&
+ (
+ bytes[2] == 0x81 ||
+ bytes[2] == 0x83 ||
+ bytes[2] == 0x85 ||
+ bytes[2] == 0x87 ||
+ bytes[2] == 0x8C ||
+ bytes[2] == 0x8E ||
+ bytes[2] == 0x91 ||
+ bytes[2] == 0x93 ||
+ bytes[2] == 0xA1 ||
+ bytes[2] == 0xA3 ||
+ bytes[2] == 0xA5 ||
+ bytes[2] == 0xA7 ||
+ bytes[2] == 0xA9
+ )
+ ) ||
+ (
+ bytes[1] == 0x9F &&
+ (
+ bytes[2] == 0xBA
+ )
+ )
+ )) ||
+ (
+ bytes[0] == 0xEF &&
+ ((
+ bytes[1] == 0xAC &&
+ (
+ (0x80 <= bytes[2] && bytes[2] <= 0x86) ||
+ (0x93 <= bytes[2] && bytes[2] <= 0x97)
+ )
+ ) ||
+ (
+ bytes[1] == 0xBD &&
+ (
+ (0x81 <= bytes[2] && bytes[2] <= 0x9A)
+ )
+ ))
+ )) {
+ return 1;
+ }
+ if (
+ (
+ bytes[0] == 0xF0
+ &&
+ (
+ (
+ bytes[1] == 0x90
+ &&
+ (
+ (bytes[2] == 0x90 && 0xA8 <= bytes[3] && bytes[3] <= 0xBF)
+ ||
+ (bytes[2] == 0x91 && 0x80 <= bytes[3] && bytes[3] <= 0x8F)
+ )
+ )
+ ||
+ (
+ bytes[1] == 0x9D
+ && (
+ (bytes[2] == 0x90 && 0x9A <= bytes[3] && bytes[3] <= 0xB3)
+ ||
+ (
+ bytes[2] == 0x91 &&
+ (
+ (0x8E <= bytes[3] && bytes[3] <= 0x94)
+ ||
+ (0x96 <= bytes[3] && bytes[3] <= 0xA7)
+ )
+ )
+ ||
+ (
+ bytes[2] == 0x92 &&
+ (
+ (0x82 <= bytes[3] && bytes[3] <= 0x9B)
+ ||
+ (0xB6 <= bytes[3] && bytes[3] <= 0xB9)
+ ||
+ bytes[3] == 0xBB
+ ||
+ (0xBD <= bytes[3] && bytes[3] <= 0xBF)
+ )
+ )
+ ||
+ (
+ bytes[2] == 0x93 &&
+ (
+ (0x80 <= bytes[3] && bytes[3] <= 0x83) ||
+ (0x85 <= bytes[3] && bytes[3] <= 0x8F) ||
+ (0xAA <= bytes[3] && bytes[3] <= 0xBF)
+ )
+ )
+ ||
+ (
+ bytes[2] == 0x94 &&
+ (
+ (0x80 <= bytes[3] && bytes[3] <= 0x83) ||
+ (0x9E <= bytes[3] && bytes[3] <= 0xB7)
+ )
+ )
+ ||
+ (
+ bytes[2] == 0x95 && 0x92 <= bytes[3] && bytes[3] <= 0xAB
+ )
+ ||
+ (
+ bytes[2] == 0x96 &&
+ (
+ (0x86 <= bytes[3] && bytes[3] <= 0x9F) ||
+ (0xBA <= bytes[3] && bytes[3] <= 0xBF)
+ )
+ )
+ ||
+ (
+ bytes[2] == 0x97 &&
+ (
+ (0x80 <= bytes[3] && bytes[3] <= 0x93) ||
+ (0xAE <= bytes[3] && bytes[3] <= 0xBF)
+ )
+ )
+ ||
+ (
+ bytes[2] == 0x98 &&
+ (
+ (0x80 <= bytes[3] && bytes[3] <= 0x87) ||
+ (0xA2 <= bytes[3] && bytes[3] <= 0xBB)
+ )
+ )
+ ||
+ (bytes[2] == 0x99 && 0x96 <= bytes[3] && bytes[3] <= 0xAF)
+ ||
+ (bytes[2] == 0x9A && 0x8A <= bytes[3] && bytes[3] <= 0xA5)
+ ||
+ (
+ bytes[2] == 0x9B &&
+ (
+ (0x82 <= bytes[3] && bytes[3] <= 0x9A) ||
+ (0x9C <= bytes[3] && bytes[3] <= 0xA1) ||
+ (0xBC <= bytes[3] && bytes[3] <= 0xBF)
+ )
+ )
+ ||
+ (
+ bytes[2] == 0x9C &&
+ (
+ (0x80 <= bytes[3] && bytes[3] <= 0x94) ||
+ (0x96 <= bytes[3] && bytes[3] <= 0x9B) ||
+ (0xB6 <= bytes[3] && bytes[3] <= 0xBF)
+ )
+ )
+ ||
+ (
+ bytes[2] == 0x9D &&
+ (
+ (0x80 <= bytes[3] && bytes[3] <= 0x8E) ||
+ (0x90 <= bytes[3] && bytes[3] <= 0x95) ||
+ (0xB0 <= bytes[3] && bytes[3] <= 0xBF)
+ )
+ )
+ ||
+ (
+ bytes[2] == 0x9E &&
+ (
+ (0x80 <= bytes[3] && bytes[3] <= 0x88) ||
+ (0x8A <= bytes[3] && bytes[3] <= 0x8F) ||
+ (0xAA <= bytes[3] && bytes[3] <= 0xBF)
+ )
+ )
+ ||
+ (
+ bytes[2] == 0x9F &&
+ (
+ (0x80 <= bytes[3] && bytes[3] <= 0x82) ||
+ (0x84 <= bytes[3] && bytes[3] <= 0x89) ||
+ bytes[3] == 0x8B
+ )
+ )
+ )
+ ))
+ )
+ ) {
+ return 1;
+ }
+ return 0;
+ //no lowercase character
+} /* isulower */
diff --git a/btparse/tests/macro_test.c b/btparse/tests/macro_test.c
index 9e77a37..5126c9c 100644
--- a/btparse/tests/macro_test.c
+++ b/btparse/tests/macro_test.c
@@ -17,6 +17,7 @@
* $Id$
*/
+#include <string.h>
#include <stdlib.h>
#include <stdio.h>
#include <ctype.h>
diff --git a/btparse/tests/name_test.c b/btparse/tests/name_test.c
index 28d0546..3577707 100644
--- a/btparse/tests/name_test.c
+++ b/btparse/tests/name_test.c
@@ -6,6 +6,7 @@
* $Id$
*/
+#include <string.h>
#include <stdlib.h>
#include <stdio.h>
#include "btparse.h"
diff --git a/btparse/tests/namebug.c b/btparse/tests/namebug.c
index a7ee3ce..15263c9 100644
--- a/btparse/tests/namebug.c
+++ b/btparse/tests/namebug.c
@@ -2,7 +2,9 @@
#include <string.h>
#include "btparse.h"
-void main (void)
+void dump_name(bt_name*);
+
+int main (void)
{
char * snames[4] = { "Joe Blow", "John Smith", "Fred Rogers", "" };
bt_name * names[4];
@@ -20,5 +22,7 @@ void main (void)
{
dump_name (names[i]);
}
+
+ return 0;
}
diff --git a/btparse/tests/postprocess_test.c b/btparse/tests/postprocess_test.c
index 70465fd..06debde 100644
--- a/btparse/tests/postprocess_test.c
+++ b/btparse/tests/postprocess_test.c
@@ -1,3 +1,4 @@
+#include <string.h>
#include <stdlib.h>
#include <stdio.h>
#include "my_dmalloc.h"
diff --git a/btparse/tests/purify_test.c b/btparse/tests/purify_test.c
index 8925779..9b3ac42 100644
--- a/btparse/tests/purify_test.c
+++ b/btparse/tests/purify_test.c
@@ -1,6 +1,5 @@
-/* $Id$ */
-
#include <stdlib.h>
+#include <string.h>
#include <stdio.h>
#include "btparse.h"
diff --git a/btparse/tests/tex_test.c b/btparse/tests/tex_test.c
index 58bf18c..e157264 100644
--- a/btparse/tests/tex_test.c
+++ b/btparse/tests/tex_test.c
@@ -2,9 +2,10 @@
#include <stdlib.h>
#include <stdio.h>
+#include <string.h>
#include "btparse.h"
-void main (void)
+int main (void)
{
char line[1024];
int line_num;
@@ -37,4 +38,5 @@ void main (void)
}
}
}
+ return 0;
}
diff --git a/examples/append_entries b/examples/append_entries
index 9680dd1..dc7b4cf 100755
--- a/examples/append_entries
+++ b/examples/append_entries
@@ -46,9 +46,9 @@ my ($dest_filename, $source_filename, $key_pattern) = @ARGV;
# Open the two files: dest_file in append mode (ultimately just using
# perl's builtin 'open'), and source_file in regular read-only mode.
-my $dest_file = new Text::BibTeX::File ">>$dest_filename"
+my $dest_file = Text::BibTeX::File->new(">>$dest_filename")
or die "couldn't open $dest_filename for appending: $!\n";
-my $source_file = new Text::BibTeX::File $source_filename
+my $source_file = Text::BibTeX::File->new($source_filename)
or die "couldn't open $source_filename: $!\n";
# Turn on 'value preservation' mode for the input file. This is mainly so
@@ -60,7 +60,7 @@ $source_file->preserve_values (1);
# And loop over all entries in the source file, optionally appending
# each one to the destination file.
-while (my $entry = new Text::BibTeX::Entry $source_file)
+while (my $entry = Text::BibTeX::Entry->new($source_file))
{
# Skip this entry if it's not a regular entry -- that is, we just
# drop '@string', '@comment', and '@preamble' entries, probably
diff --git a/inc/MyBuilder.pm b/inc/MyBuilder.pm
index f234489..9fe152c 100644
--- a/inc/MyBuilder.pm
+++ b/inc/MyBuilder.pm
@@ -16,6 +16,11 @@ use ExtUtils::Mkbootstrap;
use File::Spec::Functions qw.catdir catfile.;
use File::Path qw.mkpath.;
+my @EXTRA_FLAGS = ();
+
+## debug
+## @EXTRA_FLAGS = ('-g', "-DDEBUG=2");
+
sub ACTION_install {
my $self = shift;
@@ -98,6 +103,7 @@ sub ACTION_compile_xscode {
$self->add_to_cleanup($ofile); ## FIXME
if (!$self->up_to_date($cfile, $ofile)) {
$cbuilder->compile( source => $cfile,
+ extra_compiler_flags => [@EXTRA_FLAGS],
include_dirs => [ catdir("btparse","src") ],
object_file => $ofile);
}
@@ -168,7 +174,7 @@ sub ACTION_create_objects {
$object =~ s/\.c/.o/;
next if $self->up_to_date($file, $object);
$cbuilder->compile(object_file => $object,
- extra_compiler_flags=>["-D_FORTIFY_SOURCE=1"],
+ extra_compiler_flags=>["-D_FORTIFY_SOURCE=1",@EXTRA_FLAGS],
source => $file,
include_dirs => ["btparse/src"]);
}
diff --git a/lib/Text/BibTeX.pm b/lib/Text/BibTeX.pm
index a49b071..54ed65a 100644
--- a/lib/Text/BibTeX.pm
+++ b/lib/Text/BibTeX.pm
@@ -13,18 +13,19 @@
# ----------------------------------------------------------------------
package Text::BibTeX;
+use Text::BibTeX::Name;
+use Text::BibTeX::NameFormat;
use 5.008001; # needed for Text::BibTeX::Entry
use strict;
-#use UNIVERSAL qw(isa can); # for 'check_class' subroutine
use Carp;
use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS $AUTOLOAD);
require Exporter;
require DynaLoader;
-our $VERSION='0.74';
+our $VERSION='0.76';
@ISA = qw(Exporter DynaLoader);
%EXPORT_TAGS = (nodetypes => [qw(BTAST_STRING BTAST_MACRO BTAST_NUMBER)],
@@ -45,9 +46,53 @@ our $VERSION='0.74';
@{$EXPORT_TAGS{'nodetypes'}},
@{$EXPORT_TAGS{'nameparts'}},
@{$EXPORT_TAGS{'joinmethods'}},
- 'check_class', 'display_list');
+ 'check_class', 'display_list' );
@EXPORT = @{$EXPORT_TAGS{'metatypes'}};
+use Encode 'encode', 'decode';
+use Unicode::Normalize;
+
+
+sub _process_result {
+ no strict 'refs';
+ my ( $self, $result, $encoding, $norm ) = @_;
+
+ $norm ||= "NFC"; # best to force it here.
+ my $normsub = \&{"$norm"}; # symbolic ref
+ if ( $encoding eq "utf-8" ) {
+ if ( utf8::is_utf8($result) ) {
+ return $normsub->($result);
+ }
+ else {
+ return $normsub->( decode( $encoding, $result ) );
+ }
+ }
+ else { return $result; }
+
+}
+
+sub _process_argument {
+ my ( $self, $value, $encoding ) = @_;
+
+ if ( $encoding eq "utf-8" && utf8::is_utf8($value)) {
+ return encode( $encoding, $value );
+ }
+ else {
+ return $value;
+ }
+}
+
+sub split_list {
+ my ( $field, $delim, $filename, $line, $desc, $opts ) = @_;
+ $opts ||= {};
+ $opts->{binmode} ||= 'bytes';
+ $opts->{normalization} ||= 'NFC';
+ return
+ map { Text::BibTeX->_process_result( $_, $opts->{binmode}, $opts->{normalization} ) }
+ Text::BibTeX::isplit_list( $field, $delim, $filename, $line, $desc );
+
+}
+
=head1 NAME
Text::BibTeX - interface to read and parse BibTeX files
@@ -241,6 +286,16 @@ bootstrap Text::BibTeX;
initialize(); # these are both XS functions
END { &cleanup; }
+# This can't go in a BEGIN because of the .XS bootstrapping mechanism
+_define_months();
+
+sub _define_months {
+ for my $month (qw.january february march april may june
+ july august september october november december.) {
+ add_macro_text(substr($month, 0, 3), ucfirst($month));
+ }
+}
+
=head1 EXPORTS
@@ -572,7 +627,22 @@ of it as appropriate. They're just mentioned here for completeness.
=over 4
-=item split_list (STRING, DELIM [, FILENAME [, LINE [, DESCRIPTION]]])
+=item split_list (STRING, DELIM [, FILENAME [, LINE [, DESCRIPTION [, OPTS]]]])
+
+Splits a string on a fixed delimiter according to the BibTeX rules for
+splitting up lists of names. With BibTeX, the delimiter is hard-coded
+as C<"and">; here, you can supply any string. Instances of DELIM in
+STRING are considered delimiters if they are at brace-depth zero,
+surrounded by whitespace, and not at the beginning or end of STRING; the
+comparison is case-insensitive. See L<bt_split_names> for full details
+of how splitting is done (it's I<not> the same as Perl's C<split>
+function). OPTS is a hash ref of the same binmode and normalization
+arguments as with, e.g. Text::BibTeX::File->open(). split_list calls isplit_list()
+internally but handles UTF-8 conversion and normalization, if requested.
+
+Returns the list of strings resulting from splitting STRING on DELIM.
+
+=item isplit_list (STRING, DELIM [, FILENAME [, LINE [, DESCRIPTION]]])
Splits a string on a fixed delimiter according to the BibTeX rules for
splitting up lists of names. With BibTeX, the delimiter is hard-coded
@@ -581,7 +651,8 @@ STRING are considered delimiters if they are at brace-depth zero,
surrounded by whitespace, and not at the beginning or end of STRING; the
comparison is case-insensitive. See L<bt_split_names> for full details
of how splitting is done (it's I<not> the same as Perl's C<split>
-function).
+function). This function returns bytes. Use Text::BibTeX::split_list to specify
+the same binmode and normalization arguments as with, e.g. Text::BibTeX::File->open()
Returns the list of strings resulting from splitting STRING on DELIM.
@@ -654,7 +725,8 @@ takes no action.
=item delete_all_macros ()
-Deletes all macros from the macro table.
+Deletes all macros from the macro table, even the predefined month
+names.
=item macro_length (MACRO)
diff --git a/lib/Text/BibTeX/Bib.pm b/lib/Text/BibTeX/Bib.pm
index d400dd3..9fb50f5 100644
--- a/lib/Text/BibTeX/Bib.pm
+++ b/lib/Text/BibTeX/Bib.pm
@@ -27,7 +27,7 @@ Text::BibTeX::Bib - defines the "Bib" database structure
=head1 SYNOPSIS
- $bibfile = new Text::BibTeX::File $filename;
+ $bibfile = Text::BibTeX::File $filename->new;
$bibfile->set_structure ('Bib',
# Default option values:
sortby => 'name',
@@ -45,11 +45,11 @@ Text::BibTeX::Bib - defines the "Bib" database structure
$bibfile->set_option (labels => 'alpha'); # not implemented yet!
# parse entry from $bibfile and automatically make it a BibEntry
- $entry = new Text::BibTeX::Entry $bibfile;
+ $entry = Text::BibTeX::Entry->new($bibfile);
# or get an entry from somewhere else which is hard-coded to be
# a BibEntry
- $entry = new Text::BibTeX::BibEntry ...;
+ $entry = Text::BibTeX::BibEntry->new(...);
$sortkey = $entry->sort_key;
@blocks = $entry->format;
@@ -111,7 +111,7 @@ package Text::BibTeX::BibStructure;
use strict;
use vars qw(@ISA $VERSION);
@ISA = qw(Text::BibTeX::Structure);
-$VERSION = '0.74';
+$VERSION = '0.76';
=head1 STRUCTURE OPTIONS
@@ -450,33 +450,13 @@ package Text::BibTeX::BibEntry;
use strict;
use vars qw(@ISA $VERSION);
-$VERSION = '0.74';
+$VERSION = '0.76';
use Text::BibTeX::BibSort;
use Text::BibTeX::BibFormat;
@ISA = qw(Text::BibTeX::BibSort Text::BibTeX::BibFormat);
-
-# Pre-define the "month name" macros for compatibility with BibTeX.
-# This ignores all sorts of issues, like internationalization and
-# abbreviation.
-my %month_names =
- ('jan' => 'January',
- 'feb' => 'February',
- 'mar' => 'March',
- 'apr' => 'April',
- 'may' => 'May',
- 'jun' => 'June',
- 'jul' => 'July',
- 'aug' => 'August',
- 'sep' => 'September',
- 'oct' => 'October',
- 'nov' => 'November',
- 'dec' => 'December');
-
-my ($macro, $expansion);
-Text::BibTeX::add_macro_text ($macro, $expansion)
- while (($macro, $expansion) = each %month_names);
+
1;
diff --git a/lib/Text/BibTeX/BibFormat.pm b/lib/Text/BibTeX/BibFormat.pm
index 2c78d04..f286fc1 100644
--- a/lib/Text/BibTeX/BibFormat.pm
+++ b/lib/Text/BibTeX/BibFormat.pm
@@ -26,7 +26,7 @@ use Text::BibTeX::NameFormat;
use Text::BibTeX::Structure;
@ISA = qw(Text::BibTeX::StructuredEntry);
-$VERSION = 0.74;
+$VERSION = 0.76;
use Text::BibTeX qw(:subs display_list :nameparts :joinmethods);
@@ -148,7 +148,7 @@ sub format_names
unless $style =~ /^(full|abbrev|nopunct|nospace)$/;
$order = ($order eq 'first') ? 'fvlj' : 'vljf';
- $format = new Text::BibTeX::NameFormat ($order, ! ($style eq 'full'));
+ $format = Text::BibTeX::NameFormat->new ($order, ! ($style eq 'full'));
$format->set_text (&BTN_FIRST, undef, undef, undef, '')
if $style eq 'nopunct' || $style eq 'nospace';
diff --git a/lib/Text/BibTeX/BibSort.pm b/lib/Text/BibTeX/BibSort.pm
index dc23a89..52b60ac 100644
--- a/lib/Text/BibTeX/BibSort.pm
+++ b/lib/Text/BibTeX/BibSort.pm
@@ -23,7 +23,7 @@ use vars qw(@ISA $VERSION);
use Text::BibTeX::Structure;
@ISA = qw(Text::BibTeX::StructuredEntry);
-$VERSION = 0.74;
+$VERSION = 0.76;
use Text::BibTeX qw(purify_string change_case);
@@ -147,8 +147,8 @@ sub sort_format_names
my ($abbrev, $format, $name);
$abbrev = ! ($self->structure->get_options ('namestyle') eq 'full');
- $format = new Text::BibTeX::NameFormat ("vljf", $abbrev);
- $name = new Text::BibTeX::Name;
+ $format = Text::BibTeX::NameFormat->new ("vljf", $abbrev);
+ $name = Text::BibTeX::Name->new;
my (@snames, $i, $sname);
@snames = $self->split ($field);
diff --git a/lib/Text/BibTeX/Entry.pm b/lib/Text/BibTeX/Entry.pm
index 6fdf242..36422cc 100644
--- a/lib/Text/BibTeX/Entry.pm
+++ b/lib/Text/BibTeX/Entry.pm
@@ -23,7 +23,7 @@ use vars qw'$VERSION';
use Carp;
use Text::BibTeX qw(:metatypes :nodetypes);
-$VERSION = 0.74;
+$VERSION = 0.76;
=head1 NAME
@@ -31,7 +31,7 @@ Text::BibTeX::Entry - read and parse BibTeX files
=head1 SYNOPSIS
- use Text::BibTeX; # do not use Text::BibTeX::Entry alone!
+ use Text::BibTeX::Entry;
# ...assuming that $bibfile and $newbib are both objects of class
# Text::BibTeX::File, opened for reading and writing (respectively):
@@ -127,7 +127,7 @@ anything extra.
=over 4
-=item new ([SOURCE])
+=item new ([OPTS ,] [SOURCE])
Creates a new C<Text::BibTeX::Entry> object. If the SOURCE parameter is
supplied, it must be one of the following: a C<Text::BibTeX::File> (or
@@ -169,6 +169,28 @@ But using a C<Text::BibTeX::File> object is simpler and preferred:
Returns the new object, unless SOURCE is supplied and reading/parsing
the entry fails (e.g., due to end of file) -- then it returns false.
+You may supply a reference to an option hash as first argument.
+Supported options are:
+
+=over 4
+
+=item BINMODE
+
+Set the way Text::BibTeX deals with strings. By default it manages
+strings as bytes. You can set BINMODE to 'utf-8' to get NFC normalized
+
+Text::BibTeX::Entry->new(
+ { binmode => 'utf-8', normalization => 'NFD' },
+ $file });
+
+
+=item NORMALIZATION
+
+UTF-8 strings and you can customise the normalization with the NORMALIZATION option.
+
+=back
+
+
=cut
sub new
@@ -176,6 +198,7 @@ sub new
my ($class, @source) = @_;
$class = ref ($class) || $class;
+
my $self = {'file' => undef,
'type' => undef,
'key' => undef,
@@ -183,8 +206,15 @@ sub new
'metatype' => undef,
'fields' => [],
'values' => {}};
-
bless $self, $class;
+
+ my $opts = {};
+ $opts = shift @source if scalar(@source) and ref $source[0] eq "HASH";
+ $opts->{ lc $_ } = $opts->{$_} for ( keys %$opts );
+ $self->{binmode} = 'utf-8'
+ if exists $opts->{binmode} && $opts->{binmode} =~ /utf-?8/i;
+ $self->{normalization} = $opts->{normalization} if exists $opts->{normalization};
+
if (@source)
{
my $status;
@@ -234,6 +264,8 @@ sub clone
$clone->{file} = $self->{file}
}
# These might be changed so make copies
+ $clone->{binmode} = $self->{binmode};
+ $clone->{normalization} = $self->{normalization};
$clone->{type} = $self->{type};
$clone->{key} = $self->{key};
$clone->{status} = $self->{status};
@@ -271,6 +303,10 @@ sub read
my $fn = $source->{'filename'};
my $fh = $source->{'handle'};
$self->{'file'} = $source; # store File object for later use
+ ## Propagate flags
+ for my $f (qw.binmode normalization.) {
+ $self->{$f} = $source->{$f} unless exists $self->{$f};
+ }
return $self->parse ($fn, $fh, $preserve);
}
@@ -392,27 +428,39 @@ context.)
=item fieldlist ()
-Returns the list of fields in the entry. In a scalar context, returns a
-reference to the object's own list of fields. That way, you can change or
-reorder the field list with minimal interference from the class. I'm not
-entirely sure if this is a good idea, so don't rely on it existing in the
-future; feel free to play around with it and let me know if you get bitten
-in dangerous ways or find this enormously useful.
+Returns the list of fields in the entry.
+
+B<WARNING> In scalar context, it no longer returns a
+reference to the object's own list of fields.
=cut
sub parse_ok { shift->{'status'}; }
-sub metatype { shift->{'metatype'}; }
+sub metatype {
+ my $self = shift;
+ Text::BibTeX->_process_result( $self->{'metatype'}, $self->{binmode}, $self->{normalization} );
+}
-sub type { shift->{'type'}; }
+sub type {
+ my $self = shift;
+ Text::BibTeX->_process_result( $self->{'type'}, $self->{binmode}, $self->{normalization} );
+}
-sub key { shift->{'key'}; }
+sub key {
+ my $self = shift;
+ exists $self->{key}
+ ? Text::BibTeX->_process_result($self->{key}, $self->{binmode}, $self->{normalization})
+ : undef;
+}
sub num_fields { scalar @{shift->{'fields'}}; }
-sub fieldlist { wantarray ? @{shift->{'fields'}} : shift->{'fields'}; }
-
+sub fieldlist {
+ my $self = shift;
+ return map { Text::BibTeX->_process_result($_, $self->{binmode}, $self->{normalization})} @{$self->{'fields'}};
+}
+
=item exists (FIELD)
Returns true if a field named FIELD is present in the entry, false
@@ -484,17 +532,24 @@ sub exists
{
my ($self, $field) = @_;
- exists $self->{'values'}{$field};
+ exists $self->{values}{Text::BibTeX->_process_argument($field, $self->{binmode}, $self->{normalization})};
}
sub get
{
my ($self, @fields) = @_;
- @{$self->{'values'}}{@fields};
+ my @x = @{$self->{'values'}}{map {Text::BibTeX->_process_argument($_, $self->{binmode}, $self->{normalization})} @fields};
+
+ @x = map {defined($_) ? Text::BibTeX->_process_result($_, $self->{binmode}, $self->{normalization}): undef} @x;
+
+ return (@x > 1) ? @x : $x[0];
}
-sub value { shift->{'value'} }
+sub value {
+ my $self = shift;
+ Text::BibTeX->_process_result($self->{value}, $self->{binmode}, $self->{normalization});
+}
=head2 Author name methods
@@ -590,17 +645,19 @@ sub split
{
my ($self, $field, $delim, $desc) = @_;
- return unless $self->exists ($field);
+ return unless $self->exists($field);
$delim ||= 'and';
$desc ||= 'name';
- my $filename = ($self->{'file'} && $self->{'file'}{'filename'});
- my $line = $self->{'lines'}{$field};
-
# local $^W = 0 # suppress spurious warning from
# unless defined $filename; # undefined $filename
- Text::BibTeX::split_list ($self->{'values'}{$field}, $delim,
- $filename, $line, $desc);
+ Text::BibTeX::split_list($self->{values}{$field},
+ $delim,
+ ($self->{file} && $self->{file}{filename}),
+ $self->{lines}{$field},
+ $desc,
+ {binmode => $self->{binmode},
+ normalization => $self->{normalization}});
}
sub names
@@ -618,7 +675,8 @@ sub names
# unless defined $filename; # undefined $filename
for $i (0 .. $#names)
{
- $names[$i] = Text::BibTeX::Name->new($names[$i], $filename, $line, $i);
+ $names[$i] = Text::BibTeX::Name->new(
+ {binmode => $self->{binmode}, normalization => $self->{normalization}},$names[$i], $filename, $line, $i);
}
@names;
}
@@ -696,7 +754,7 @@ sub set_key
{
my ($self, $key) = @_;
- $self->{'key'} = $key;
+ $self->{'key'} = Text::BibTeX->_process_argument($key, $self->{binmode}, $self->{normalization});
}
sub set
@@ -708,7 +766,7 @@ sub set
while (@_)
{
- ($field,$value) = (shift,shift);
+ ($field,$value) = (shift,Text::BibTeX->_process_argument(shift, $self->{binmode}, $self->{normalization}));
push (@{$self->{'fields'}}, $field)
unless exists $self->{'values'}{$field};
$self->{'values'}{$field} = $value;
@@ -870,7 +928,8 @@ sub print_s
# Tack on the last line, and we're done!
$output .= "}\n\n";
- $output;
+
+ Text::BibTeX->_process_result($output, $self->{binmode}, $self->{normalization});
}
=back
diff --git a/lib/Text/BibTeX/File.pm b/lib/Text/BibTeX/File.pm
index 3e6e888..bd94163 100644
--- a/lib/Text/BibTeX/File.pm
+++ b/lib/Text/BibTeX/File.pm
@@ -22,7 +22,7 @@ use Carp;
use IO::File;
use vars qw'$VERSION';
-$VERSION = 0.74;
+$VERSION = 0.76;
=head1 NAME
@@ -30,12 +30,12 @@ Text::BibTeX::File - interface to whole BibTeX files
=head1 SYNOPSIS
- use Text::BibTeX; # this loads Text::BibTeX::File
+ use Text::BibTeX::File;
- $bib = new Text::BibTeX::File "foo.bib" or die "foo.bib: $!\n";
+ $bib = Text::BibTeX::File->new("foo.bib") or die "foo.bib: $!\n";
# or:
- $bib = new Text::BibTeX::File;
- $bib->open ("foo.bib") || die "foo.bib: $!\n";
+ $bib = Text::BibTeX::File->new;
+ $bib->open("foo.bib", {binmode => 'utf-8', normalization => 'NFC'}) || die "foo.bib: $!\n";
$bib->set_structure ($structure_name,
$option1 => $value1, ...);
@@ -61,19 +61,56 @@ These concepts are fully documented in L<Text::BibTeX::Structure>.
=over 4
-=item new ([FILENAME [,MODE [,PERMS]]])
+=item new ([FILENAME], [OPTS])
-Creates a new C<Text::BibTeX::File> object. If FILENAME is supplied,
-passes it to the C<open> method (along with MODE and PERMS if they
-are supplied). If the C<open> fails, C<new> fails and returns false; if
-the C<open> succeeds (or if FILENAME isn't supplied), C<new> returns the
-new object reference.
+Creates a new C<Text::BibTeX::File> object. If FILENAME is supplied, passes
+it to the C<open> method (along with OPTS). If the C<open> fails, C<new>
+fails and returns false; if the C<open> succeeds (or if FILENAME isn't
+supplied), C<new> returns the new object reference.
-=item open (FILENAME [,MODE [,PERMS]])
+=item open (FILENAME [OPTS])
-Opens the file specified by FILENAME, possibly using MODE and PERMS.
-See L<IO::File> for full semantics; this C<open> is just a front end for
-C<IO::File::open>.
+Opens the file specified by FILENAME. OPTS is an hashref that can have
+the following values:
+
+=over 4
+
+=item MODE
+
+mode as specified by L<IO::File>
+
+=item PERMS
+
+permissions as specified by L<IO::File>. Can only be used in conjunction
+with C<MODE>
+
+=item BINMODE
+
+By default, Text::BibTeX uses bytes directly. Thus, you need to encode
+strings accordingly with the encoding of the files you are reading. You can
+also select UTF-8. In this case, Text::BibTeX will return UTF-8 strings in
+NFC mode. Note that at the moment files with BOM are not supported.
+
+Valid values are 'raw/bytes' or 'utf-8'.
+
+=item NORMALIZATION
+
+By default, Text::BibTeX outputs UTF-8 in NFC form. You can change this by passing
+the name of a different form.
+
+Valid values are those forms supported by the Unicode::Normalize module
+('NFD', 'NFDK' etc.)
+
+=item RESET_MACROS
+
+By default, Text::BibTeX accumulates macros. This means that when you open a second
+file, macros defined by the first are still available. This may result on warnings
+of macros being redefined.
+
+This option can be used to force Text::BibTeX to clean up all macros definitions
+(except for the month macros).
+
+=back
=item close ()
@@ -95,19 +132,45 @@ sub new
$class = ref ($class) || $class;
my $self = bless {}, $class;
- ($self->open (@_) || return undef) if @_; # filename [, mode [, perms]]
+ ($self->open (@_) || return undef) if @_;
$self;
}
-sub open
-{
- my $self = shift;
-
- $self->{filename} = $_[0];
- $self->{handle} = new IO::File;
- $self->{handle}->open (@_); # filename, maybe mode, maybe perms
+sub open {
+ my ($self) = shift;
+ $self->{filename} = shift;
+
+ $self->{binmode} = 'bytes';
+ $self->{normalization} = 'NFC';
+ my @args = ( $self->{filename} );
+
+ if ( ref $_[0] eq "HASH" ) {
+ my $opts = {};
+ $opts = shift;
+ $opts->{ lc $_ } = $opts->{$_} for ( keys %$opts );
+ $self->{binmode} = 'utf-8'
+ if exists $opts->{binmode} && $opts->{binmode} =~ /utf-?8/i;
+ $self->{normalization} = $opts->{normalization} if exists $opts->{normalization};
+
+ if (exists $opts->{reset_macros} && $opts->{reset_macros}) {
+ Text::BibTeX::delete_all_macros();
+ Text::BibTeX::_define_months();
+ }
+
+ if ( exists $opts->{mode} ) {
+ push @args, $opts->{mode};
+ push @args, $opts->{perms} if exists $opts->{perms};
+ }
+ }
+ else {
+ push @args, @_;
+ }
+
+ $self->{handle} = IO::File->new;
+ $self->{handle}->open(@args); # filename, maybe mode, maybe perms
}
+
sub close
{
my $self = shift;
@@ -155,7 +218,7 @@ sub set_structure
croak "Text::BibTeX::File::set_structure: options list must have even " .
"number of elements"
unless @options % 2 == 0;
- $self->{structure} = new Text::BibTeX::Structure ($structure, @options);
+ $self->{structure} = Text::BibTeX::Structure->new($structure, @options);
}
sub structure { shift->{structure} }
diff --git a/lib/Text/BibTeX/Name.pm b/lib/Text/BibTeX/Name.pm
index 2e69281..b8a72b0 100644
--- a/lib/Text/BibTeX/Name.pm
+++ b/lib/Text/BibTeX/Name.pm
@@ -23,7 +23,7 @@ require 5.004;
use strict;
use Carp;
use vars qw'$VERSION';
-$VERSION = 0.74;
+$VERSION = 0.76;
use Text::BibTeX;
@@ -33,6 +33,8 @@ Text::BibTeX::Name - interface to BibTeX-style author names
=head1 SYNOPSIS
+ use Text::BibTeX::Name;
+
$name = Text::BibTeX::Name->new();
$name->split('J. Random Hacker');
# or:
@@ -269,7 +271,7 @@ way is the job of another module: see L<Text::BibTeX::NameFormat>.
=over 4
-=item new([ NAME [, FILENAME, LINE, NAME_NUM]])
+=item new([ [OPTS,] NAME [, FILENAME, LINE, NAME_NUM]])
Creates a new C<Text::BibTeX::Name> object. If NAME is supplied, it
must be a string containing a single name, and it will be be passed to
@@ -277,17 +279,46 @@ the C<split> method for further processing. FILENAME, LINE, and
NAME_NUM, if present, are all also passed to C<split> to allow better
error messages.
+If the first argument is a hash reference, it is used to define
+configuration values. At the moment the available values are:
+
+=over 4
+
+=item BINMODE
+
+Set the way Text::BibTeX deals with strings. By default it manages
+strings as bytes. You can set BINMODE to 'utf-8' to get NFC normalized
+UTF-8 strings and you can customise the normalization with the NORMALIZATION option.
+
+ Text::BibTeX::Name->new(
+ { binmode => 'utf-8', normalization => 'NFD' },
+ "Alberto Simões"});
+
+=back
+
=cut
-sub new
-{
- my ($class, $name, $filename, $line, $name_num) = @_;
+sub new {
+ my $class = shift;
+ my $opts = ref $_[0] eq 'HASH' ? shift : {};
+
+ $opts->{ lc $_ } = $opts->{$_} for ( keys %$opts );
+
+ my ( $name, $filename, $line, $name_num ) = @_;
+
+ $class = ref($class) || $class;
+ my $self = bless { }, $class;
- $class = ref ($class) || $class;
- my $self = bless {}, $class;
- $self->split ($name, $filename, $line, $name_num, 1)
- if (defined $name);
- $self;
+ $self->{binmode} = 'bytes';
+ $self->{normalization} = 'NFC';
+ $self->{binmode} = 'utf-8'
+ if exists $opts->{binmode} && $opts->{binmode} =~ /utf-?8/i;
+ $self->{normalization} = $opts->{normalization} if exists $opts->{normalization};
+
+ $self->split( Text::BibTeX->_process_argument($name, $self->{binmode}, $self->{normalization}),
+ $filename, $line, $name_num, 1 )
+ if ( defined $name );
+ $self;
}
@@ -317,7 +348,7 @@ sub split
my ($self, $name, $filename, $line, $name_num) = @_;
# Call the XSUB with default values if necessary
- $self->_split ($name, $filename,
+ $self->_split (Text::BibTeX->_process_argument($name, $self->{binmode}, $self->{normalization}), $filename,
defined $line ? $line : -1,
defined $name_num ? $name_num : -1,
1);
@@ -341,13 +372,18 @@ would return the list C<('de','la')>.
=cut
-sub part
-{
- my ($self, $partname) = @_;
+sub part {
+ my ( $self, $partname ) = @_;
+
+ croak "unknown name part"
+ unless $partname =~ /^(first|von|last|jr)$/;
- croak "unknown name part"
- unless $partname =~ /^(first|von|last|jr)$/;
- exists $self->{$partname} ? @{$self->{$partname}} : ();
+ if ( exists $self->{$partname} ) {
+ my @x = map { Text::BibTeX->_process_result($_, $self->{binmode}, $self->{normalization}) }
+ @{ $self->{$partname} };
+ return @x > 1 ? @x : $x[0];
+ }
+ return undef;
}
diff --git a/lib/Text/BibTeX/NameFormat.pm b/lib/Text/BibTeX/NameFormat.pm
index 04dcef9..1d980a8 100644
--- a/lib/Text/BibTeX/NameFormat.pm
+++ b/lib/Text/BibTeX/NameFormat.pm
@@ -23,7 +23,7 @@ require 5.004;
use strict;
use Carp;
use vars qw'$VERSION';
-$VERSION = 0.74;
+$VERSION = 0.76;
=head1 NAME
@@ -31,6 +31,8 @@ Text::BibTeX::NameFormat - format BibTeX-style author names
=head1 SYNOPSIS
+ use Text::BibTeX::NameFormat;
+
$format = Text::BibTeX::NameFormat->($parts, $abbrev_first);
$format->set_text ($part,
@@ -39,6 +41,7 @@ Text::BibTeX::NameFormat - format BibTeX-style author names
$format->set_options ($part, $abbrev, $join_tokens, $join_part
+ ## Uses the encoding/binmode and normalization form stored in $name
$formatted_name = $format->apply ($name);
=head1 DESCRIPTION
@@ -242,7 +245,12 @@ sub apply
croak "invalid Name object: no C structure";
my $format_struct = $self->{'_cstruct'} ||
croak "invalid NameFormat object: no C structure";
- format_name ($name_struct, $format_struct);
+
+ my $ans = format_name ($name_struct, $format_struct);
+
+ $ans = Text::BibTeX->_process_result($ans, $name->{binmode}, $name->{normalization});
+
+ return $ans;
}
=back
diff --git a/lib/Text/BibTeX/Structure.pm b/lib/Text/BibTeX/Structure.pm
index a15bf08..a42c39e 100644
--- a/lib/Text/BibTeX/Structure.pm
+++ b/lib/Text/BibTeX/Structure.pm
@@ -24,7 +24,7 @@ use strict;
use Carp;
use vars qw'$VERSION';
-$VERSION = 0.74;
+$VERSION = 0.76;
use Text::BibTeX ('check_class');
@@ -322,8 +322,8 @@ module. A short example will illustrate this.
Typically, a C<Text::BibTeX>-based program is based around a kernel of
code like this:
- $bibfile = new Text::BibTeX::File "foo.bib";
- while ($entry = new Text::BibTeX::Entry $bibfile)
+ $bibfile = Text::BibTeX::File->new("foo.bib");
+ while ($entry = Text::BibTeX::Entry->new($bibfile))
{
# process $entry
}
@@ -336,9 +336,9 @@ behaviour. Let us now suppose that C<$bibfile> is expected to conform
to a database structure specified by C<$structure> (presumably a
user-supplied value, and thus unknown at compile-time):
- $bibfile = new Text::BibTeX::File "foo.bib";
+ $bibfile = Text::BibTeX::File->new("foo.bib");
$bibfile->set_structure ($structure);
- while ($entry = new Text::BibTeX::Entry $bibfile)
+ while ($entry = Text::BibTeX::Entry->new($bibfile))
{
# process $entry
}
@@ -450,7 +450,7 @@ implements the C<Bib> structure. Use the pseudo-option C<module> to
override this module name. For instance, if the structure C<Foo> is
implemented by the module C<Foo>:
- $structure = new Text::BibTeX::Structure
+ $structure = Text::BibTeX::Structure->new
('Foo', module => 'Foo');
This method C<die>s if there are any errors loading/compiling the
@@ -870,7 +870,7 @@ sub get_options
package Text::BibTeX::StructuredEntry;
use strict;
use vars qw(@ISA $VERSION);
-$VERSION = 0.74;
+$VERSION = 0.76;
use Carp;
diff --git a/lib/Text/BibTeX/Value.pm b/lib/Text/BibTeX/Value.pm
index 7c1d178..97bc7a8 100644
--- a/lib/Text/BibTeX/Value.pm
+++ b/lib/Text/BibTeX/Value.pm
@@ -22,7 +22,7 @@ use Scalar::Util 'blessed';
use Carp;
use vars qw'$VERSION';
-$VERSION = 0.74;
+$VERSION = 0.76;
=head1 NAME
@@ -32,7 +32,7 @@ Text::BibTeX::Value - interfaces to BibTeX values and simple values
use Text::BibTeX;
- $entry = new Text::BibTeX::Entry;
+ $entry = Text::BibTeX::Entry->new;
# set the 'preserve_values' flag to 1 for this parse
$entry->parse ($filename, $filehandle, 1);
@@ -80,7 +80,7 @@ C<title> field is a single string, and the C<journal> and C<year> fields
are, respectively, a single macro and a single number. If you parse
this entry in the usual way:
- $entry = new Text::BibTeX::Entry $entry_text;
+ $entry = Text::BibTeX::Entry->new($entry_text);
then the C<get> method on C<$entry> would return simple strings.
Assuming that the C<and> macro is defined as C<" and ">, then
@@ -96,14 +96,14 @@ There are two ways to make this request: per-file and per-entry. For a
per-file request, use the C<preserve_values> method on your C<File>
object:
- $bibfile = new Text::BibTeX::File $filename;
+ $bibfile = Text::BibTeX::File->new($filename);
$bibfile->preserve_values (1);
- $entry = new Text::BibTeX::Entry $bibfile;
+ $entry = Text::BibTeX::Entry->new($bibfile);
$entry->get ($field); # returns a Value object
$bibfile->preserve_values (0);
- $entry = new Text::BibTeX::Entry $bibfile;
+ $entry = Text::BibTeX::Entry->new($bibfile);
$entry->get ($field); # returns a string
If you're not using a C<File> object, or want to control things at a
@@ -111,7 +111,7 @@ finer scale, then you have to pass in the C<preserve_values> flag when
invoking C<read>, C<parse>, or C<parse_s> on your C<Entry> objects:
# no File object, parsing from a string
- $entry = new Text::BibTeX::Entry;
+ $entry = Text::BibTeX::Entry->new;
$entry->parse_s ($entry_text, 0); # preserve_values=0 (default)
$entry->get ($field); # returns a string
@@ -183,8 +183,8 @@ two-element list containing the type and text of the simple value. For
example, one way to recreate the C<author> field of the example entry in
L<"DESCRIPTION"> would be:
- $and_macro = new Text::BibTeX::SimpleValue (BTAST_MACRO, 'and');
- $value = new Text::BibTeX::Value
+ $and_macro = Text::BibTeX::SimpleValue->new (BTAST_MACRO, 'and');
+ $value = Text::BibTeX::Value->new
([BTAST_STRING, 'Homer Simpson'],
$and_macro,
[BTAST_STRING, 'Ned Flanders']);
@@ -202,7 +202,7 @@ sub new
my $self = bless [], $class;
while (my $sval = shift)
{
- $sval = new Text::BibTeX::SimpleValue @$sval
+ $sval = Text::BibTeX::SimpleValue->new(@$sval)
if ref $sval eq 'ARRAY' && @$sval == 2;
croak "simple value is neither a two-element array ref " .
"nor a Text::BibTeX::SimpleValue object"
@@ -245,7 +245,7 @@ use Carp;
use Text::BibTeX qw(:nodetypes);
use vars qw($VERSION);
-$VERSION = '0.74';
+$VERSION = '0.76';
=head2 Text::BibTeX::SimpleValue methods
diff --git a/scripts/btcheck b/scripts/btcheck
index 12cee09..01c5ec3 100755
--- a/scripts/btcheck
+++ b/scripts/btcheck
@@ -18,10 +18,10 @@ die "usage: btcheck file [structure]\n" unless @ARGV == 1 || @ARGV == 2;
($filename, $structure) = @ARGV;
$structure ||= 'Bib';
-$bibfile = new Text::BibTeX::File $filename or die "$filename: $!\n";
+$bibfile = Text::BibTeX::File->new( $filename) or die "$filename: $!\n";
$bibfile->set_structure ($structure);
-while ($entry = new Text::BibTeX::Entry $bibfile)
+while ($entry = Text::BibTeX::Entry->new( $bibfile))
{
next unless $entry->parse_ok and $entry->metatype == BTE_REGULAR;
my $key = $entry->key;
diff --git a/scripts/btformat b/scripts/btformat
index 6038664..4d3a1ab 100755
--- a/scripts/btformat
+++ b/scripts/btformat
@@ -63,14 +63,14 @@ die "$usage\nIncorrect number of arguments\n" unless (@ARGV == 1);
my ($filename, $bibfile, $entry, %select);
$filename = shift;
-$bibfile = new Text::BibTeX::File $filename or die "$filename: $!\n";
+$bibfile = Text::BibTeX::File->new( $filename) or die "$filename: $!\n";
$bibfile->set_structure ('Bib', namestyle => 'nopunct', nameorder => 'first');
%select = map { ($_ => 1) } @select
if @select;
my $entry_num = 0;
-while ($entry = new Text::BibTeX::Entry $bibfile)
+while ($entry = Text::BibTeX::Entry->new( $bibfile))
{
next unless $entry->parse_ok && $entry->metatype == BTE_REGULAR;
next if (@select && ! $select{$entry->key});
diff --git a/scripts/btsort b/scripts/btsort
index 0a3493a..6763809 100755
--- a/scripts/btsort
+++ b/scripts/btsort
@@ -17,10 +17,10 @@ die "usage: btcheck file [structure [options]]\n" unless @ARGV >= 1;
($filename, $structure, @options) = @ARGV;
$structure ||= 'Bib';
-$bibfile = new Text::BibTeX::File $filename or die "$filename: $!\n";
+$bibfile = Text::BibTeX::File->new( $filename) or die "$filename: $!\n";
$bibfile->set_structure ('Bib', @options);
-while ($entry = new Text::BibTeX::Entry $bibfile)
+while ($entry = Text::BibTeX::Entry->new( $bibfile))
{
next unless $entry->parse_ok && $entry->metatype == BTE_REGULAR;
$entry->check;
diff --git a/t/bib.t b/t/bib.t
index 570e26c..322dc00 100644
--- a/t/bib.t
+++ b/t/bib.t
@@ -32,7 +32,8 @@ my $entries = <<'ENTRIES';
@book{george98,
author = "George Simpson",
title = "How to Found a Big Department Store",
- year = 1998
+ year = 1998,
+ month = feb
}
ENTRIES
@@ -46,13 +47,13 @@ close $fh;
# Open it as a Text::BibTeX::File object, set the structure class (which
# controls the structured entry class of all entries parsed from that
# file), and get the structure class (so we can set options on it).
-my $file = new Text::BibTeX::File ($fn);
+my $file = Text::BibTeX::File->new ($fn);
$file->set_structure ('Bib');
my $structure = $file->structure;
# Read the two entries
-my $entry1 = new Text::BibTeX::BibEntry $file;
-my $entry2 = new Text::BibTeX::BibEntry $file;
+my $entry1 = Text::BibTeX::BibEntry->new( $file );
+my $entry2 = Text::BibTeX::BibEntry->new( $file );
$file->close;
#unlink ($fn) || warn "couldn't delete temporary file $fn: $!\n";
@@ -136,7 +137,7 @@ ok(! $blocks[2][1][0]); # no publisher
ok(! $blocks[2][1][1]); # no publisher address
ok(! $blocks[2][1][2]); # no edition
-is($blocks[2][1][3], '1998'); # but we do at least have a date!
+is($blocks[2][1][3], 'February 1998'); # but we do at least have a date!
# fiddle a bit more with name-generation options just to make sure
# everything's in working order
diff --git a/t/common.pl b/t/common.pl
index b4cc46a..c15c2b1 100644
--- a/t/common.pl
+++ b/t/common.pl
@@ -37,24 +37,26 @@ sub slist_equal {
}
sub test_entry {
- my ($entry, $type, $key, $fields, $values) = @_;
+ my ($entry, $type, $key, $fields, $values, $test) = @_;
my ($i, @vals);
+ $test ||= "";
+
croak "test_entry: num fields != num values"
unless $#$fields == $#$values;
- ok($entry->parse_ok);
- is($entry->type, $type);
+ ok($entry->parse_ok, "Parse ok for $test");
+ is($entry->type, $type, "Type ok for $test");
if (defined $key) {
- is($entry->key, $key);
+ is($entry->key, $key, "Key ok for $test");
} else {
- ok(!defined $entry->key);
+ ok(!defined $entry->key, "Key ok for $test");
}
ok(slist_equal ([$entry->fieldlist], $fields));
- for $i (0 .. $#$fields) {
- my $val = $entry->get ($fields->[$i]) || '';
+ for $i (0 .. $#$fields) {
+ my $val = $entry->get ($fields->[$i]) || '';
ok($entry->exists ($fields->[$i]));
is($val, $values->[$i]);
}
diff --git a/t/corpora.bib b/t/corpora.bib
new file mode 100644
index 0000000..2544e0d
--- /dev/null
+++ b/t/corpora.bib
@@ -0,0 +1,264 @@
+
+@Article{linguamatica:6:2:Laboreiroetal,
+ author = {Gustavo Laboreiro and Eugénio Oliveira},
+ title = {Avaliação de métodos de desofuscação de palavrões},
+ journal = {Linguamática},
+ year = {2014},
+ volume = {6},
+ number = {2},
+ pages = {25--43},
+ month = {Dezembro},
+ editor = {Alberto Simões and José João Almeida and Xavier Gómez Guinovart}
+}
+
+@Article{Arbelatz13,
+ Title = {An extensive comparative study of cluster validity indicess},
+ Author = {Arbelaitz, Olatz and Gurrutxaga, Ibai and Muguerza, Javier and Pérez, Jesús M and Perona, Iñigo},
+ Journal = {Pattern Recognition},
+ Year = {2013},
+ Number = {1},
+ Pages = {243--256},
+ Volume = {46},
+
+ Publisher = {Elsevier}
+}
+
+@InProceedings{ester1996density,
+ Title = {A density-based algorithm for discovering clusters in large spatial databases with noise},
+ Author = {Ester, Martin and Kriegel, Hans-Peter and Sander, Jörg and Xu, Xiaowei},
+ Booktitle = {Proceedings of Knowledge Discovery and Data Mining},
+ Year = {1996},
+ Number = {34},
+ Pages = {226--231},
+ Volume = {96}
+}
+
+@Article{frey2007clustering,
+ Title = {Clustering by passing messages between data points},
+ Author = {Frey, Brendan J and Dueck, Delbert},
+ Journal = {Science},
+ Year = {2007},
+ Number = {5814},
+ Pages = {972--976},
+ Volume = {315},
+
+ Publisher = {American Association for the Advancement of Science}
+}
+
+@Article{fukunaga1975estimation,
+ Title = {The estimation of the gradient of a density function, with applications in pattern recognition},
+ Author = {Fukunaga, Keinosuke and Hostetler, Larry D},
+ Journal = {IEEE Transactions on Information Theory},
+ Year = {1975},
+ Number = {1},
+ Pages = {32--40},
+ Volume = {21},
+
+ Publisher = {IEEE}
+}
+
+@Article{grvcar2012methodology,
+ Title = {A methodology for mining document-enriched heterogeneous information networks},
+ Author = {Gr{\v{c}}ar, Miha and Trdin, Nejc and Lavra{\v{c}}, Nada},
+ Journal = {The Computer Journal},
+ Year = {2012},
+
+ Publisher = {Br Computer Soc}
+}
+
+@Article{hartigan1979algorithm,
+ Title = {Algorithm {AS} 136: {A} k-means clustering algorithm},
+ Author = {Hartigan, John A and Wong, Manchek A},
+ Journal = {Applied Statistics},
+ Year = {1979},
+ Pages = {100--108},
+
+ Publisher = {JSTOR}
+}
+
+@InProceedings{huynh2012scientific,
+ Title = {Scientific publication recommendations based on collaborative citation networks},
+ Author = {Huynh, Tin and Hoang, Kiem and Do, Loc and Tran, Huong and Luong, Hiep and Gauch, Susan},
+ Booktitle = {International Conference on Collaboration Technologies and Systems (CTS)},
+ Year = {2012},
+ Organization = {IEEE},
+ Pages = {316--321}
+}
+
+@Article{johnson1967hierarchical,
+ Title = {Hierarchical clustering schemes},
+ Author = {Johnson, Stephen C},
+ Journal = {Psychometrika},
+ Year = {1967},
+ Number = {3},
+ Pages = {241--254},
+ Volume = {32},
+
+ Publisher = {Springer}
+}
+
+@InCollection{liang2011finding,
+ Title = {Finding relevant papers based on citation relations},
+ Author = {Liang, Yicong and Li, Qing and Qian, Tieyun},
+ Booktitle = {Web-age Information Management},
+ Publisher = {Springer},
+ Year = {2011},
+ Pages = {403--414}
+}
+
+@TechReport{ilprints422,
+ Title = {The {PageRank Citation Ranking: Bringing Order to the Web.}},
+ Author = {Lawrence Page and Sergey Brin and Rajeev Motwani and Terry Winograd},
+ Institution = {Stanford InfoLab},
+ Year = {1999},
+
+ Address = {Stanford, CA},
+ Month = {November},
+ Number = {1999-66},
+
+ Publisher = {Stanford InfoLab}
+}
+
+@Article{pedregosa2011scikit,
+ Title = {Scikit-learn: Machine learning in {P}ython},
+ Author = {Fabian Pedregosa and Gaël Varoquaux and Alexandre Gramfort and Vincent Michel and Bertrand Thirion and Olivier Grisel and Mathieu Blondel and Peter Prettenhofer and Ron Weiss and Vincent Dubourg and Jake Vanderplas and Alexandre Passos and David Cournapeau and Matthieu Brucher and Matthieu Perrot and Édouard Duchesnay},
+ Journal = {The Journal of Machine Learning Research},
+ Year = {2011},
+ Pages = {2825--2830},
+ Volume = {12},
+
+ Publisher = {JMLR. org}
+}
+
+@InProceedings{pham2012enhancing,
+ Title = {Enhancing academic event participation with context-aware and social recommendations},
+ Author = {Pham, Manh Cuong and Kovachev, Dejan and Cao, Yiwei and Mbogos, Ghislain Manib and Klamma, Ralf},
+ Booktitle = {Proceedings of IEEE/ACM International Conference on Advances in Social Networks Analysis and Mining (ASONAM)},
+ Year = {2012},
+ Organization = {IEEE},
+ Pages = {464--471}
+}
+
+@Article{rousseeuw1987silhouettes,
+ Title = {Silhouettes: a graphical aid to the interpretation and validation of cluster analysis},
+ Author = {Rousseeuw, Peter J},
+ Journal = {Journal of Computational and Applied Mathematics},
+ Year = {1987},
+ Pages = {53--65},
+ Volume = {20},
+
+ Publisher = {Elsevier}
+}
+
+@Article{salton1975vector,
+ Title = {A vector space model for automatic indexing},
+ Author = {Salton, Gerard and Wong, Anita and Yang, Chung-Shu},
+ Journal = {Communications of the ACM},
+ Year = {1975},
+ Number = {11},
+ Pages = {613--620},
+ Volume = {18},
+
+ Publisher = {ACM}
+}
+
+@InProceedings{sculley2010web,
+ Title = {Web-scale k-means clustering},
+ Author = {Sculley, David},
+ Booktitle = {Proceedings of the 19th International Conference on World Wide Web},
+ Year = {2010},
+ Organization = {ACM},
+ Pages = {1177--1178}
+}
+
+@Article{van2008visualizing,
+ Title = {Visualizing data using t-{SNE}},
+ Author = {Van der Maaten, Laurens and Hinton, Geoffrey},
+ Journal = {Journal of Machine Learning Research},
+ Year = {2008},
+ Number = {2579-2605},
+ Pages = {85},
+ Volume = {9}
+}
+
+@InProceedings{wagstaff2001constrained,
+ Title = {Constrained k-means clustering with background knowledge},
+ Author = {Wagstaff, Kiri and Cardie, Claire and Rogers, Seth and Schr\"{o}dl, Stefan},
+ Booktitle = {Proceedings of the International Conference on Machine Learning},
+ Year = {2001},
+ Pages = {577--584},
+ Volume = {1}
+}
+
+@InProceedings{xia2014folksonomy,
+ Title = {Folksonomy based socially-aware recommendation of scholarly papers for conference participants},
+ Author = {Xia, Feng and Asabere, Nana Yaw and Liu, Haifeng and Deonauth, Nakema and Li, Fengqi},
+ Booktitle = {Proceedings of the Companion Publication of the 23rd International Conference on World Wide Web Companion},
+ Year = {2014},
+ Organization = {International World Wide Web Conferences Steering Committee},
+ Pages = {781--786}
+}
+
+@InProceedings{xia2013socially,
+ Title = {Socially-aware venue recommendation for conference participants},
+ Author = {Xia, Feng and Asabere, Nana Yaw and Rodrigues, Joel JPC and Basso, Filippo and Deonauth, Nakema and Wang, Wei},
+ Booktitle = {Proceedings of the 10th International Conference on Autonomic and Trusted Computing (UIC/ATC)},
+ Year = {2013},
+ Organization = {IEEE},
+ Pages = {134--141}
+}
+
+@Article{zhu2010data,
+ Title = {Data clustering with size constraints},
+ Author = {Zhu, Shunzhi and Wang, Dingding and Li, Tao},
+ Journal = {Knowledge-Based Systems},
+ Year = {2010},
+ Number = {8},
+ Pages = {883--889},
+ Volume = {23},
+
+ Publisher = {Elsevier}
+}
+
+@Proceedings{aime,
+ Title = {Artificial Intelligence in Medicine: 14th Conference on Artificial Intelligence in Medicine, AIME 2013, Murcia, Spain},
+ Year = {2013},
+ Editor = {Niels Peek and Roque Marin Morales and Mor Peleg },
+ Publisher = {Springer},
+ Series = {Lecture Notes in Artificial Intelligence},
+ Volume = {7885},
+
+ Booktitle = {Proceedings of 14th Conference on Artificial Intelligence in Medicine}
+}
+
+@Proceedings{aime2,
+ Title = {13th Conference on Artificial Intelligence in Medicine},
+ Booktitle = {Artificial Intelligence in Mediclne},
+ Year = {2011},
+ Editor = {Peleg, Mor and Lavra\v{c}, Nada and Combi, Carlo},
+ Publisher = {Springer}
+}
+
+@article{spasic2005text,
+ title ={Text mining and ontologies in biomedicine: making sense of raw text},
+ author ={Spasic, Irena and Ananiadou, Sophia and McNaught, John and Kumar, Anand},
+ journal ={Briefings in bioinformatics},
+ volume ={6},
+ number ={3},
+ pages ={239--251},
+ year ={2005},
+ publisher ={Oxford University Press}
+}
+
+@article{blei2012probabilistic,
+ title ={Probabilistic topic models},
+ author ={Blei, David M.},
+ journal ={Communications of the ACM},
+ volume ={55},
+ number ={4},
+ pages ={77--84},
+ year ={2012},
+ publisher ={ACM}
+}
+
+
diff --git a/t/from_file.t b/t/from_file.t
new file mode 100644
index 0000000..a1975c2
--- /dev/null
+++ b/t/from_file.t
@@ -0,0 +1,35 @@
+use strict;
+use warnings;
+
+use Test::More tests => 5;
+use utf8;
+
+use Text::BibTeX;
+
+my $bibtex = Text::BibTeX::File->new("t/corpora.bib", { binmode => 'utf-8'});
+is ref($bibtex), "Text::BibTeX::File";
+
+my @entries;
+while (my $entry = Text::BibTeX::Entry->new($bibtex)) {
+ push @entries, $entry;
+}
+
+is scalar(@entries), 25;
+
+# @Article{linguamatica:6:2:Laboreiroetal,
+# author = {Gustavo Laboreiro and Eugénio Oliveira},
+# title = {Avaliação de métodos de desofuscação de palavrões},
+# journal = {Linguamática},
+# year = {2014},
+# volume = {6},
+# number = {2},
+# pages = {25--43},
+# month = {Dezembro},
+# editor = {Alberto Simões and José João Almeida and Xavier Gómez Guinovart}
+# }
+is $entries[0]->get("title"), "Avaliação de métodos de desofuscação de palavrões";
+is $entries[0]->get("author"), "Gustavo Laboreiro and Eugénio Oliveira";
+
+my @editors = $entries[0]->names("editor");
+
+is $editors[0]->part("last"), "Simões";
diff --git a/t/macro.t b/t/macro.t
index a3bc0ec..d1197b4 100644
--- a/t/macro.t
+++ b/t/macro.t
@@ -2,7 +2,7 @@
use strict;
use warnings;
-use Test::More tests => 57;
+use Test::More tests => 67;
use vars ('$DEBUG');
@@ -17,7 +17,7 @@ $DEBUG = 1;
# ----------------------------------------------------------------------
# test macro parsing and expansion
-my ($macrodef, $regular, $entry, @warnings);
+my ($macrodef, $regular, $other, $entry, @warnings);
$macrodef = <<'TEXT';
@string ( foo = " The Foo
@@ -37,6 +37,10 @@ $regular = <<'TEXT';
}
TEXT
+$other = <<'EOT';
+@article { xxx, institution = ugh }
+EOT
+
# Direct access to macro table, part 1: make sure the macros we're going to
# defined aren't defined
@@ -54,13 +58,13 @@ err_like( sub{ ok(! defined macro_text('bar') ); }, qr/undefined macro "bar"/);
# macros we're interested in into the macro table so we can
# successfully parse the regular entry
print "parsing macro-definition entry to define 3 macros\n" if $DEBUG;
-$entry = new Text::BibTeX::Entry;
+$entry = Text::BibTeX::Entry->new();;
no_err( sub{ $entry->parse_s($macrodef); } );
test_entry($entry, 'string', undef,
[qw(foo sons bar)],
- [' The Foo Journal', ' \& Sons', 'Bar \& Sons']);
+ [' The Foo Journal', ' \& Sons', 'Bar \& Sons'], "test 1");
# Direct access to macro table, part 2: make sure the macros we've just
# defined now have the correct values
@@ -83,7 +87,7 @@ no_err( sub {
# calling a parse or read method on an existing object isn't documented
# as an "ok thing to do", but it is (at least as the XS code currently
# is!) -- hence I can leave the "new" uncommented
-# $entry = new Text::BibTeX::Entry;
+# $entry = Text::BibTeX::Entry->new();
print "parsing the regular entry which uses those 2 of those macros\n"
if $DEBUG;
@@ -91,7 +95,7 @@ no_err( sub { $entry->parse_s ($regular); });
test_entry ($entry, 'article', 'my_article',
[qw(author journal publisher)],
- ['Us and Them', 'The Foo Journal', 'FuBar \& Sons']);
+ ['Us and Them', 'The Foo Journal', 'FuBar \& Sons'], "test 2");
# Delete the 'bar' macro and change 'foo' -- this should result in
@@ -113,4 +117,18 @@ err_like( sub { $entry->parse_s ($regular); }, qr/undefined macro "bar"/);
test_entry ($entry, 'article', 'my_article',
[qw(author journal publisher)],
- ['Us and Them', 'The Journal of Fooology', 'Fu']);
+ ['Us and Them', 'The Journal of Fooology', 'Fu'], "test 3");
+
+my $ugh = 'University of Good Heavens';
+add_macro_text('ugh', $ugh);
+is macro_length('ugh'), length($ugh), "ugh got defined";
+no_err( sub { $entry->parse_s ($other); }, qr/undefined macro "ugh"/);
+test_entry($entry, 'article', 'xxx', ['institution'], [$ugh], "Macro replaced");
+
+my $string = 'wednesday';
+add_macro_text(substr($string, 0, 3), $string);
+is macro_length('wed'), 9;
+
+
+
+
diff --git a/t/modify.t b/t/modify.t
index c3bc557..8138dcf 100644
--- a/t/modify.t
+++ b/t/modify.t
@@ -24,7 +24,7 @@ $text = <<'TEXT';
}
TEXT
-ok($entry = new Text::BibTeX::Entry);
+ok($entry = Text::BibTeX::Entry->new);
ok($entry->parse_s ($text));
ok($entry->type eq 'article');
diff --git a/t/nameformat.t b/t/nameformat.t
index f9a461b..6092741 100644
--- a/t/nameformat.t
+++ b/t/nameformat.t
@@ -3,14 +3,14 @@ use strict;
use vars qw($DEBUG);
use IO::Handle;
use Test::More tests=>26;
-use Encode;
use utf8;
+use Encode 'decode';
+use Unicode::Normalize;
require "t/common.pl";
use Text::BibTeX qw(:nameparts :joinmethods);
-use Text::BibTeX::Name;
-use Text::BibTeX::NameFormat;
+
$DEBUG = 1;
@@ -37,11 +37,11 @@ $DEBUG = 1;
# tests 4..5..
my $name1 = Text::BibTeX::Name->new('{John Henry} Ford');
my $format1 = Text::BibTeX::NameFormat->new('f', 1);
- is $format1->apply($name1), 'J.';
+ is $format1->apply($name1), 'J.', "first name is abbreviated correctly [1]";
my $name2 = Text::BibTeX::Name->new('{John} Ford');
my $format2 = Text::BibTeX::NameFormat->new('f', 1);
- is $format2->apply($name2), 'J.';
+ is $format2->apply($name2), 'J.', "first name is abbreviated correctly [2]";
}
{
@@ -55,14 +55,14 @@ $DEBUG = 1;
$format3->set_text(BTN_LAST, undef, undef, undef, '.');
$format3->set_options(BTN_LAST, 1, BTJ_NOTHING, BTJ_NOTHING);
- is $format3->apply($name3), 'U.';
+ is $format3->apply($name3), 'U.', 'big institution';
}
{
# tests 7..8..
my $name4 = Text::BibTeX::Name->new("{\\'E}mile Zola");
my $format4 = Text::BibTeX::NameFormat->new('f', 1);
- is $format4->apply($name4), "{\\'E}.";
+ is $format4->apply($name4), "{\\'E}.", "accented first letter";
my $name5 = Text::BibTeX::Name->new('St John-Mollusc, Oliver');
my $format5 = Text::BibTeX::NameFormat->new('l', 1);
@@ -70,7 +70,7 @@ $DEBUG = 1;
$format5->set_text(BTN_LAST, undef, undef, undef, '.');
$format5->set_options(BTN_LAST, 1, BTJ_MAYTIE, BTJ_NOTHING);
- is $format5->apply($name5), 'S.~J.-M.';
+ is $format5->apply($name5), 'S.~J.-M.', "abbreviated surname";
}
{
@@ -81,7 +81,7 @@ $DEBUG = 1;
$format6->set_text (BTN_LAST, undef, undef, undef, '.');
$format6->set_options (BTN_LAST, 1, BTJ_MAYTIE, BTJ_NOTHING);
- is $format6->apply($name6), "S.~J.-{\\'E}.~M.";
+ is $format6->apply($name6), "S.~J.-{\\'E}.~M.", "Abbreviated accented surname";
}
{
@@ -97,31 +97,33 @@ $DEBUG = 1;
{
# test 11... to 16
+
+ ## This in raw mode
my $name8 = Text::BibTeX::Name->new('Šomeone Smith');
my $formatter = Text::BibTeX::NameFormat->new('f', 1);
- is decode_utf8($formatter->apply($name8)), 'Š.';
+ is NFC(decode('UTF-8',$formatter->apply($name8))), 'Š.', "raw test 1";
my $name9 = Text::BibTeX::Name->new('Šomeone-Šomething Smith');
- is decode_utf8($formatter->apply($name9)), 'Š.-Š.';
+ is NFC(decode('UTF-8',$formatter->apply($name9))), 'Š.-Š.', "raw test 2";
$formatter = Text::BibTeX::NameFormat->new('f', 1);
- my $name10 = Text::BibTeX::Name->new('{Šomeone-Šomething} Smith');
- is decode_utf8($formatter->apply($name10)), 'Š.';
+ my $name10 = Text::BibTeX::Name->new({binmode=>'utf-8'},'{Šomeone-Šomething} Smith');
+ is $formatter->apply($name10), 'Š.', "utf-8 [1]";
# Initial is 2 bytes long in UTF8
my $formatterlast = Text::BibTeX::NameFormat->new('f', 1);
- my $name11 = Text::BibTeX::Name->new('Żaa Smith');
- is decode_utf8($formatterlast->apply($name11)), 'Ż.';
+ my $name11 = Text::BibTeX::Name->new({binmode=>'utf-8'},'Żaa Smith');
+ is $formatterlast->apply($name11), 'Ż.', "utf-8 [2]";
# Initial is 3 bytes long in UTF8 (Z + 2 byte combining mark)
$formatterlast = Text::BibTeX::NameFormat->new('f', 1);
- my $name12 = Text::BibTeX::Name->new('Z̃ Smith');
- is decode_utf8($formatterlast->apply($name12)), 'Z̃.';
+ my $name12 = Text::BibTeX::Name->new({binmode=>'utf-8'},'Z̃ Smith');
+ is $formatterlast->apply($name12), 'Z̃.', "utf-8 [3]";
# Initial is 7 bytes long in UTF8 (A + 3 * 2 byte combining marks)
$formatterlast = Text::BibTeX::NameFormat->new('f', 1);
- my $name13 = Text::BibTeX::Name->new('A̧̦̓ Smith');
- is decode_utf8($formatterlast->apply($name13)), 'A̧̦̓.';
+ my $name13 = Text::BibTeX::Name->new({binmode=>'utf-8'},'A̧̦̓ Smith');
+ is $formatterlast->apply($name13), 'A̧̦̓.', "utf-8 [3]";
}
diff --git a/t/names.t b/t/names.t
index 65e9e2e..3e8e3eb 100644
--- a/t/names.t
+++ b/t/names.t
@@ -2,14 +2,14 @@
use strict;
use warnings;
use vars qw($DEBUG);
-use Encode;
+
use IO::Handle;
-use Test::More tests => 62;
+use Test::More tests => 61;
use utf8;
+use Encode 'encode';
+use Text::BibTeX;
BEGIN {
- use_ok("Text::BibTeX");
- use_ok("Text::BibTeX::Name");
require "t/common.pl";
}
@@ -32,7 +32,8 @@ sub test_name {
$ok &= ! $name->part ($partnames[$i]);
}
}
- ok(keys %$name <= 4 && $ok);
+ # Only 5 keys max: first, von, last, jr AND encoding, normalization
+ ok(keys %$name <= 6 && $ok);
}
@@ -64,7 +65,7 @@ my ($text, $entry, $pentry, $uentry);
$namelist = join (' and ', @orig_namelist);
@namelist = Text::BibTeX::split_list
($namelist, 'and', 'test', 0, 'name');
-is_deeply(\@orig_namelist, \@namelist);
+is_deeply(\@orig_namelist, \@namelist, "same lists...");
my $i;
foreach $i (0 .. $#namelist)
@@ -72,7 +73,7 @@ foreach $i (0 .. $#namelist)
is($namelist[$i], $orig_namelist[$i]);
my %parts;
Text::BibTeX::Name::_split (\%parts, $namelist[$i], 'test', 0, $i, 0);
- ok (keys %parts <= 4);
+ ok (keys %parts <= 4, "number keys is OK");
my @name = map { join ('+', ref $_ ? @$_ : ()) }
@parts{'first','von','last','jr'};
@@ -98,16 +99,21 @@ my $protected_test = <<'PROT';
}
PROT
-my $uname = new Text::BibTeX::Name('фон дер Иванов, И. И.');
-is (decode_utf8(join('', $uname->part('last'))), 'Иванов');
-is (decode_utf8(join('', $uname->part('first'))), 'И.И.');
-is (decode_utf8(join(' ', $uname->part('von'))), 'фон дер');# 2-byte UTF-8 lowercase
-$uname = new Text::BibTeX::Name('ꝥaa Smith, John');
-is (decode_utf8(join('', $uname->part('von'))), 'ꝥaa');# 3-byte UTF-8 lowercase (U+A765)
-$uname = new Text::BibTeX::Name('𝓺aa Smith, John');
-is (decode_utf8(join('', $uname->part('von'))), '𝓺aa');# 4-byte UTF-8 lowercase (U+1D4FA)
+my $uname = Text::BibTeX::Name->new({binmode => 'utf-8'},'фон дер Иванов, И. И.');
+is (join('', $uname->part('last')), 'Иванов', "Testing unicode...");
+is (join('', $uname->part('first')), 'И.И.');
+is (join(' ', $uname->part('von')), 'фон дер');# 2-byte UTF-8 lowercase
+
+$uname = Text::BibTeX::Name->new({binmode => 'utf-8'},'ꝥaa Smith, John');
+is (join('', $uname->part('von')), 'ꝥaa');# 3-byte UTF-8 lowercase (U+A765)
+$uname = Text::BibTeX::Name->new({binmode => 'utf-8'},'𝓺aa Smith, John');
+is (join('', $uname->part('von')), '𝓺aa');# 4-byte UTF-8 lowercase (U+1D4FA)
+
+$uname = Text::BibTeX::Name->new({binmode => 'raw'},'𝓺aa Smith, John');
+is (join('', $uname->part('von')), encode('UTF-8','𝓺aa'), "check raw mode");# 4-byte UTF-8 lowercase (U+1D4FA)
+
-ok ($pentry = new Text::BibTeX::Entry $protected_test);
+ok ($pentry = Text::BibTeX::Entry->new($protected_test));
my $pauthor = $pentry->get ('author');
is ($pauthor, '{U.S. Department of Health and Human Services, National Institute of Mental Health, National Heart, Lung and Blood Institute}');
@pnames = $pentry->split ('author');
@@ -117,7 +123,7 @@ ok (@pnames == 1);
test_name ($pnames[0], [undef, undef, ['{U.S. Department of Health and Human Services, National Institute of Mental Health, National Heart, Lung and Blood Institute}'], undef]);
-ok ($entry = new Text::BibTeX::Entry $text);
+ok ($entry = Text::BibTeX::Entry->new($text));
my $author = $entry->get ('author');
is ($author, 'Homer Simpson and Flanders, Jr., Ned Q. and {Foo Bar and Co.}');
@names = $entry->split ('author');
diff --git a/t/output.t b/t/output.t
index 4f52874..4f348f6 100644
--- a/t/output.t
+++ b/t/output.t
@@ -28,24 +28,29 @@ $text = <<'TEXT';
year = 1997
}
TEXT
-ok($entry = new Text::BibTeX::Entry $text);
-ok($entry->parse_ok);
+ok($entry = Text::BibTeX::Entry->new($text), "new entry is defined");
+ok($entry->parse_ok, "new entry parsed correctly");
$new_text = $entry->print_s;
-like $new_text => qr/^\@article\{homer97,\s*$/m;
-like $new_text => qr/^\s*author\s*=\s*\{H\{\\"o\}mer Simpson \\"und Ned Flanders\},\s*$/m;
-like $new_text => qr/^\s*title\s*=\s*[{"]Territorial[^}"]*Suburbia[}"],\s*$/m;
-like $new_text => qr/^\s*journal\s*=\s*[{"]Journal[^\}]*Studies[}"],\s*$/m;
-like $new_text => qr/^\s*year\s*=\s*[{"]1997[}"],\s*$/m;
+like $new_text => qr/^\@article\{homer97,\s*$/m, 'we have type and key';
+like $new_text =>
+ qr/^\s*author\s*=\s*\{H\{\\"o\}mer Simpson \\"und Ned Flanders\},\s*$/m,
+ 'we have author';
+like $new_text => qr/^\s*title\s*=\s*[{"]Territorial[^}"]*Suburbia[}"],\s*$/m,
+ 'we have title';
+like $new_text => qr/^\s*journal\s*=\s*[{"]Journal[^\}]*Studies[}"],\s*$/m,
+ 'we have journal';
+like $new_text => qr/^\s*year\s*=\s*[{"]1997[}"],\s*$/m, 'we have year'
+;
-$new_entry = new Text::BibTeX::Entry $new_text;
-ok($entry->parse_ok);
+$new_entry = Text::BibTeX::Entry->new($new_text);
+ok($entry->parse_ok, "second entry parsed correctly");
-is $entry->type => $new_entry->type;
-is $entry->key => $new_entry->key;
+is $entry->type => $new_entry->type, "entry type is correct";
+is $entry->key => $new_entry->key, "entry key is correct";
-ok(slist_equal (scalar $entry->fieldlist, scalar $new_entry->fieldlist));
+ok(slist_equal ([sort $entry->fieldlist], [sort $new_entry->fieldlist]), "same field list");
@fields = $entry->fieldlist;
ok(slist_equal ([$entry->get (@fields)], [$new_entry->get (@fields)]));
@@ -59,12 +64,12 @@ open (BIB, ">$test[0]") || die "couldn't create $test[0]: $!\n";
$entry->print (\*BIB);
close (BIB);
-$bib = new IO::File $test[1], O_CREAT|O_WRONLY
+$bib = IO::File->new($test[1], O_CREAT|O_WRONLY)
or die "couldn't create $test[1]: $!\n";
$entry->print ($bib);
$bib->close;
-$bib = new Text::BibTeX::File $test[2], O_CREAT|O_WRONLY
+$bib = Text::BibTeX::File->new($test[2], {MODE => O_CREAT|O_WRONLY})
or die "couldn't create $test[2]: $!\n";
$entry->write ($bib);
$bib->close;
@@ -77,9 +82,9 @@ for $i (0 .. 2)
close (BIB);
}
-is $new_text => $contents[0];
-is $new_text => $contents[1];
-is $new_text => $contents[2];
+is $new_text => $contents[0], "Contents [0]";
+is $new_text => $contents[1], "Contents [1]";
+is $new_text => $contents[2], "Contents [2]";
my $clone = $entry->clone;
is ref($clone) => 'Text::BibTeX::Entry';
diff --git a/t/parse.t b/t/parse.t
index 63d50b2..b607486 100644
--- a/t/parse.t
+++ b/t/parse.t
@@ -22,8 +22,8 @@ $DEBUG = 0;
my ($bibfile, $entry);
my $multiple_file = 'btparse/tests/data/simple.bib';
-ok($bibfile = new Text::BibTeX::File $multiple_file);
-err_like sub { ok($entry = new Text::BibTeX::Entry $bibfile); },
+ok($bibfile = Text::BibTeX::File->new( $multiple_file));
+err_like sub { ok($entry = Text::BibTeX::Entry->new( $bibfile)); },
qr!$multiple_file, line 5, warning: undefined macro "junk"!;
test_entry ($entry, 'book', 'abook',
diff --git a/t/parse_f.t b/t/parse_f.t
index 3edd821..af46acf 100644
--- a/t/parse_f.t
+++ b/t/parse_f.t
@@ -24,13 +24,13 @@ my $regular_file = 'btparse/tests/data/regular.bib';
# bundled into one call
open (BIB, $regular_file) || die "couldn't open $regular_file: $!\n";
-err_like sub { ok($entry = new Text::BibTeX::Entry $regular_file, \*BIB); },
+err_like sub { ok($entry = Text::BibTeX::Entry->new($regular_file, \*BIB)); },
qr!$regular_file, line 5, warning: undefined macro "junk"!;
test_entry ($entry, 'book', 'abook',
[qw(title editor publisher year)],
['A Book', 'John Q. Random', 'Foo Bar \& Sons', '1922']);
-ok(! new Text::BibTeX::Entry $regular_file, \*BIB);
+ok(!Text::BibTeX::Entry->new($regular_file, \*BIB));
# An interesting note: if I forget the 'seek' here, a bug is exposed in
@@ -63,14 +63,14 @@ test_entry ($entry, 'string', undef, ['junk'], [', III']);
# Now open that same file using IO::File, and pass in the resulting object
# instead of a glob ref; everything else here is just the same
-$fh = new IO::File $regular_file
+$fh = IO::File->new($regular_file)
or die "couldn't open $regular_file: $!\n";
-no_err sub { ok($entry = new Text::BibTeX::Entry $regular_file, $fh); };
+no_err sub { ok($entry = Text::BibTeX::Entry->new($regular_file, $fh)); };
test_entry ($entry, 'book', 'abook',
[qw(title editor publisher year)],
['A Book', 'John Q. Random, III', 'Foo Bar \& Sons', '1922']);
-ok(! new Text::BibTeX::Entry $regular_file, $fh);
+ok(! Text::BibTeX::Entry->new( $regular_file, $fh));
$fh->seek (0, 0);
# and again, with unbundled 'parse' call
@@ -79,6 +79,6 @@ no_err sub { ok($entry->parse ($regular_file, $fh)); };
test_entry ($entry, 'book', 'abook',
[qw(title editor publisher year)],
['A Book', 'John Q. Random, III', 'Foo Bar \& Sons', '1922']);
-ok(! new Text::BibTeX::Entry $regular_file, $fh);
+ok(! Text::BibTeX::Entry->new( $regular_file, $fh));
$fh->close;
diff --git a/t/parse_s.t b/t/parse_s.t
index e8c49a1..16d9af1 100644
--- a/t/parse_s.t
+++ b/t/parse_s.t
@@ -34,12 +34,12 @@ $text_uck = <<'TEXT';
}
TEXT
-ok($entry_uck = new Text::BibTeX::Entry);
+ok($entry_uck = Text::BibTeX::Entry->new());
ok($entry_uck->parse_s($text_uck));
-ok($entry = new Text::BibTeX::Entry);
+ok($entry = Text::BibTeX::Entry->new());
err_like
sub { ok($entry->parse_s ($text)); },
@@ -56,6 +56,8 @@ ok($entry->{fields}[0] eq 'f1' &&
ok(scalar keys %{$entry->{'values'}} == 3);
ok($entry->{'values'}{f1} eq 'hello there');
+
+
# Now the same tests again, but using the object's methods
test_entry ($entry, 'foo', 'mykey',
['f1', 'f2', 'f3'],
@@ -64,7 +66,7 @@ test_entry ($entry, 'foo', 'mykey',
# Repeat with "bundled" form (new and parse_s in one go)
err_like
- sub { ok($entry = new Text::BibTeX::Entry $text); },
+ sub { ok($entry = Text::BibTeX::Entry->new($text)); },
qr/line 3, warning: undefined macro "foo".*line 4, warning: undefined macro "foo"/s;
# Repeat tests of entry contents
@@ -77,7 +79,7 @@ test_entry ($entry, 'foo', 'mykey',
# look into how btparse responds to bt_parse_s() on an empty string
# before I know how Text::BibTeX should do it!
-# $entry = new Text::BibTeX::Entry;
+# $entry = Text::BibTeX::Entry->new();
# $result = $entry->parse_s ('');
# ok(! warnings && ! $result);
@@ -96,7 +98,7 @@ $text = <<'TEXT';
@foo{key, title = "{System}- und {Signaltheorie}"}
TEXT
-no_err sub { $entry = new Text::BibTeX::Entry $text; };
+no_err sub { $entry = Text::BibTeX::Entry->new($text); };
ok($entry->parse_ok);
test_entry ($entry, 'foo', 'key',
diff --git a/t/split_names b/t/split_names
index 187de93..4f81d07 100644
--- a/t/split_names
+++ b/t/split_names
@@ -9,7 +9,7 @@ use Text::BibTeX::Name;
sub show_name
{
my $str = shift;
- my $name = new Text::BibTeX::Name $str;
+ my $name = Text::BibTeX::Name->new($str);
my $part;
foreach $part (qw(first last von jr))
@@ -21,7 +21,7 @@ sub show_name
}
}
-my $rl = new Term::ReadLine 'BibTeX name splitter';
+my $rl = Term::ReadLine->new('BibTeX name splitter');
while (defined ($_ = $rl->readline (">> ")))
{
show_name ($_);
diff --git a/typemap b/typemap
index 88d7b4a..3298aca 100644
--- a/typemap
+++ b/typemap
@@ -11,7 +11,8 @@ INPUT
# Text::BibTeX::Entry::parse and have it wind up as NULL
# in bt_parse_entry()
T_PV
- $var = (SvOK ($arg)) ? ($type) SvPV ($arg,PL_na) : NULL
+ SvGETMAGIC($arg);
+ $var = SvOK ($arg) ? ($type) SvPV_nomg($arg, PL_na) : NULL;
T_NAME
$var = (bt_name *) SvIV ($arg)
diff --git a/xscode/BibTeX.xs b/xscode/BibTeX.xs
index 14919d1..f3c7999 100644
--- a/xscode/BibTeX.xs
+++ b/xscode/BibTeX.xs
@@ -73,7 +73,7 @@ bt_cleanup()
# purify_string
void
-bt_split_list (string, delim, filename=NULL, line=0, description=NULL)
+bt_isplit_list (string, delim, filename=NULL, line=0, description=NULL)
char * string
char * delim
diff --git a/xscode/btxs_support.c b/xscode/btxs_support.c
index 39d93dd..39c662b 100644
--- a/xscode/btxs_support.c
+++ b/xscode/btxs_support.c
@@ -40,29 +40,29 @@ constant (char * name, IV * arg)
switch (name[2])
{
case 'E': /* entry metatypes */
- if (strEQ (name, "BTE_UNKNOWN")) { *arg = BTE_UNKNOWN; ok = TRUE; }
- if (strEQ (name, "BTE_REGULAR")) { *arg = BTE_REGULAR; ok = TRUE; }
- if (strEQ (name, "BTE_COMMENT")) { *arg = BTE_COMMENT; ok = TRUE; }
+ if (strEQ (name, "BTE_UNKNOWN")) { *arg = BTE_UNKNOWN; ok = TRUE; }
+ if (strEQ (name, "BTE_REGULAR")) { *arg = BTE_REGULAR; ok = TRUE; }
+ if (strEQ (name, "BTE_COMMENT")) { *arg = BTE_COMMENT; ok = TRUE; }
if (strEQ (name, "BTE_PREAMBLE")) { *arg = BTE_PREAMBLE; ok = TRUE; }
if (strEQ (name, "BTE_MACRODEF")) { *arg = BTE_MACRODEF; ok = TRUE; }
break;
case 'A': /* AST nodetypes (not all of them) */
if (strEQ (name, "BTAST_STRING")) { *arg = BTAST_STRING; ok = TRUE; }
if (strEQ (name, "BTAST_NUMBER")) { *arg = BTAST_NUMBER; ok = TRUE; }
- if (strEQ (name, "BTAST_MACRO")) { *arg = BTAST_MACRO; ok = TRUE; }
+ if (strEQ (name, "BTAST_MACRO")) { *arg = BTAST_MACRO; ok = TRUE; }
break;
case 'N': /* name parts */
if (strEQ (name, "BTN_FIRST")) { *arg = BTN_FIRST; ok = TRUE; }
- if (strEQ (name, "BTN_VON")) { *arg = BTN_VON; ok = TRUE; }
- if (strEQ (name, "BTN_LAST")) { *arg = BTN_LAST; ok = TRUE; }
- if (strEQ (name, "BTN_JR")) { *arg = BTN_JR; ok = TRUE; }
- if (strEQ (name, "BTN_NONE")) { *arg = BTN_NONE; ok = TRUE; }
+ if (strEQ (name, "BTN_VON")) { *arg = BTN_VON; ok = TRUE; }
+ if (strEQ (name, "BTN_LAST")) { *arg = BTN_LAST; ok = TRUE; }
+ if (strEQ (name, "BTN_JR")) { *arg = BTN_JR; ok = TRUE; }
+ if (strEQ (name, "BTN_NONE")) { *arg = BTN_NONE; ok = TRUE; }
break;
case 'J': /* token join methods */
- if (strEQ (name, "BTJ_MAYTIE")) { *arg = BTJ_MAYTIE; ok = TRUE; }
- if (strEQ (name, "BTJ_SPACE")) { *arg = BTJ_SPACE; ok = TRUE; }
+ if (strEQ (name, "BTJ_MAYTIE")) { *arg = BTJ_MAYTIE; ok = TRUE; }
+ if (strEQ (name, "BTJ_SPACE")) { *arg = BTJ_SPACE; ok = TRUE; }
if (strEQ (name, "BTJ_FORCETIE")) { *arg = BTJ_FORCETIE; ok = TRUE; }
- if (strEQ (name, "BTJ_NOTHING")) { *arg = BTJ_NOTHING; ok = TRUE; }
+ if (strEQ (name, "BTJ_NOTHING")) { *arg = BTJ_NOTHING; ok = TRUE; }
break;
default:
break;
@@ -280,8 +280,9 @@ convert_value_entry (AST *top, HV *entry, boolean preserve)
/* Walk the list of values to find the last one (for its line number) */
item = NULL;
- while (item = bt_next_value (top, item, NULL, NULL))
+ while ((item = bt_next_value (top, item, NULL, NULL)))
prev_item = item;
+
if (prev_item) {
last_line = prev_item->line;
hv_store (lines, "STOP", 4, newSViv (last_line), 0);