summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
Diffstat (limited to 'lib')
-rw-r--r--lib/Text/BibTeX.pm84
-rw-r--r--lib/Text/BibTeX/Bib.pm32
-rw-r--r--lib/Text/BibTeX/BibFormat.pm4
-rw-r--r--lib/Text/BibTeX/BibSort.pm6
-rw-r--r--lib/Text/BibTeX/Entry.pm115
-rw-r--r--lib/Text/BibTeX/File.pm111
-rw-r--r--lib/Text/BibTeX/Name.pm70
-rw-r--r--lib/Text/BibTeX/NameFormat.pm12
-rw-r--r--lib/Text/BibTeX/Structure.pm14
-rw-r--r--lib/Text/BibTeX/Value.pm22
10 files changed, 344 insertions, 126 deletions
diff --git a/lib/Text/BibTeX.pm b/lib/Text/BibTeX.pm
index a49b071..54ed65a 100644
--- a/lib/Text/BibTeX.pm
+++ b/lib/Text/BibTeX.pm
@@ -13,18 +13,19 @@
# ----------------------------------------------------------------------
package Text::BibTeX;
+use Text::BibTeX::Name;
+use Text::BibTeX::NameFormat;
use 5.008001; # needed for Text::BibTeX::Entry
use strict;
-#use UNIVERSAL qw(isa can); # for 'check_class' subroutine
use Carp;
use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS $AUTOLOAD);
require Exporter;
require DynaLoader;
-our $VERSION='0.74';
+our $VERSION='0.76';
@ISA = qw(Exporter DynaLoader);
%EXPORT_TAGS = (nodetypes => [qw(BTAST_STRING BTAST_MACRO BTAST_NUMBER)],
@@ -45,9 +46,53 @@ our $VERSION='0.74';
@{$EXPORT_TAGS{'nodetypes'}},
@{$EXPORT_TAGS{'nameparts'}},
@{$EXPORT_TAGS{'joinmethods'}},
- 'check_class', 'display_list');
+ 'check_class', 'display_list' );
@EXPORT = @{$EXPORT_TAGS{'metatypes'}};
+use Encode 'encode', 'decode';
+use Unicode::Normalize;
+
+
+sub _process_result {
+ no strict 'refs';
+ my ( $self, $result, $encoding, $norm ) = @_;
+
+ $norm ||= "NFC"; # best to force it here.
+ my $normsub = \&{"$norm"}; # symbolic ref
+ if ( $encoding eq "utf-8" ) {
+ if ( utf8::is_utf8($result) ) {
+ return $normsub->($result);
+ }
+ else {
+ return $normsub->( decode( $encoding, $result ) );
+ }
+ }
+ else { return $result; }
+
+}
+
+sub _process_argument {
+ my ( $self, $value, $encoding ) = @_;
+
+ if ( $encoding eq "utf-8" && utf8::is_utf8($value)) {
+ return encode( $encoding, $value );
+ }
+ else {
+ return $value;
+ }
+}
+
+sub split_list {
+ my ( $field, $delim, $filename, $line, $desc, $opts ) = @_;
+ $opts ||= {};
+ $opts->{binmode} ||= 'bytes';
+ $opts->{normalization} ||= 'NFC';
+ return
+ map { Text::BibTeX->_process_result( $_, $opts->{binmode}, $opts->{normalization} ) }
+ Text::BibTeX::isplit_list( $field, $delim, $filename, $line, $desc );
+
+}
+
=head1 NAME
Text::BibTeX - interface to read and parse BibTeX files
@@ -241,6 +286,16 @@ bootstrap Text::BibTeX;
initialize(); # these are both XS functions
END { &cleanup; }
+# This can't go in a BEGIN because of the .XS bootstrapping mechanism
+_define_months();
+
+sub _define_months {
+ for my $month (qw.january february march april may june
+ july august september october november december.) {
+ add_macro_text(substr($month, 0, 3), ucfirst($month));
+ }
+}
+
=head1 EXPORTS
@@ -572,7 +627,22 @@ of it as appropriate. They're just mentioned here for completeness.
=over 4
-=item split_list (STRING, DELIM [, FILENAME [, LINE [, DESCRIPTION]]])
+=item split_list (STRING, DELIM [, FILENAME [, LINE [, DESCRIPTION [, OPTS]]]])
+
+Splits a string on a fixed delimiter according to the BibTeX rules for
+splitting up lists of names. With BibTeX, the delimiter is hard-coded
+as C<"and">; here, you can supply any string. Instances of DELIM in
+STRING are considered delimiters if they are at brace-depth zero,
+surrounded by whitespace, and not at the beginning or end of STRING; the
+comparison is case-insensitive. See L<bt_split_names> for full details
+of how splitting is done (it's I<not> the same as Perl's C<split>
+function). OPTS is a hash ref of the same binmode and normalization
+arguments as with, e.g. Text::BibTeX::File->open(). split_list calls isplit_list()
+internally but handles UTF-8 conversion and normalization, if requested.
+
+Returns the list of strings resulting from splitting STRING on DELIM.
+
+=item isplit_list (STRING, DELIM [, FILENAME [, LINE [, DESCRIPTION]]])
Splits a string on a fixed delimiter according to the BibTeX rules for
splitting up lists of names. With BibTeX, the delimiter is hard-coded
@@ -581,7 +651,8 @@ STRING are considered delimiters if they are at brace-depth zero,
surrounded by whitespace, and not at the beginning or end of STRING; the
comparison is case-insensitive. See L<bt_split_names> for full details
of how splitting is done (it's I<not> the same as Perl's C<split>
-function).
+function). This function returns bytes. Use Text::BibTeX::split_list to specify
+the same binmode and normalization arguments as with, e.g. Text::BibTeX::File->open()
Returns the list of strings resulting from splitting STRING on DELIM.
@@ -654,7 +725,8 @@ takes no action.
=item delete_all_macros ()
-Deletes all macros from the macro table.
+Deletes all macros from the macro table, even the predefined month
+names.
=item macro_length (MACRO)
diff --git a/lib/Text/BibTeX/Bib.pm b/lib/Text/BibTeX/Bib.pm
index d400dd3..9fb50f5 100644
--- a/lib/Text/BibTeX/Bib.pm
+++ b/lib/Text/BibTeX/Bib.pm
@@ -27,7 +27,7 @@ Text::BibTeX::Bib - defines the "Bib" database structure
=head1 SYNOPSIS
- $bibfile = new Text::BibTeX::File $filename;
+ $bibfile = Text::BibTeX::File $filename->new;
$bibfile->set_structure ('Bib',
# Default option values:
sortby => 'name',
@@ -45,11 +45,11 @@ Text::BibTeX::Bib - defines the "Bib" database structure
$bibfile->set_option (labels => 'alpha'); # not implemented yet!
# parse entry from $bibfile and automatically make it a BibEntry
- $entry = new Text::BibTeX::Entry $bibfile;
+ $entry = Text::BibTeX::Entry->new($bibfile);
# or get an entry from somewhere else which is hard-coded to be
# a BibEntry
- $entry = new Text::BibTeX::BibEntry ...;
+ $entry = Text::BibTeX::BibEntry->new(...);
$sortkey = $entry->sort_key;
@blocks = $entry->format;
@@ -111,7 +111,7 @@ package Text::BibTeX::BibStructure;
use strict;
use vars qw(@ISA $VERSION);
@ISA = qw(Text::BibTeX::Structure);
-$VERSION = '0.74';
+$VERSION = '0.76';
=head1 STRUCTURE OPTIONS
@@ -450,33 +450,13 @@ package Text::BibTeX::BibEntry;
use strict;
use vars qw(@ISA $VERSION);
-$VERSION = '0.74';
+$VERSION = '0.76';
use Text::BibTeX::BibSort;
use Text::BibTeX::BibFormat;
@ISA = qw(Text::BibTeX::BibSort Text::BibTeX::BibFormat);
-
-# Pre-define the "month name" macros for compatibility with BibTeX.
-# This ignores all sorts of issues, like internationalization and
-# abbreviation.
-my %month_names =
- ('jan' => 'January',
- 'feb' => 'February',
- 'mar' => 'March',
- 'apr' => 'April',
- 'may' => 'May',
- 'jun' => 'June',
- 'jul' => 'July',
- 'aug' => 'August',
- 'sep' => 'September',
- 'oct' => 'October',
- 'nov' => 'November',
- 'dec' => 'December');
-
-my ($macro, $expansion);
-Text::BibTeX::add_macro_text ($macro, $expansion)
- while (($macro, $expansion) = each %month_names);
+
1;
diff --git a/lib/Text/BibTeX/BibFormat.pm b/lib/Text/BibTeX/BibFormat.pm
index 2c78d04..f286fc1 100644
--- a/lib/Text/BibTeX/BibFormat.pm
+++ b/lib/Text/BibTeX/BibFormat.pm
@@ -26,7 +26,7 @@ use Text::BibTeX::NameFormat;
use Text::BibTeX::Structure;
@ISA = qw(Text::BibTeX::StructuredEntry);
-$VERSION = 0.74;
+$VERSION = 0.76;
use Text::BibTeX qw(:subs display_list :nameparts :joinmethods);
@@ -148,7 +148,7 @@ sub format_names
unless $style =~ /^(full|abbrev|nopunct|nospace)$/;
$order = ($order eq 'first') ? 'fvlj' : 'vljf';
- $format = new Text::BibTeX::NameFormat ($order, ! ($style eq 'full'));
+ $format = Text::BibTeX::NameFormat->new ($order, ! ($style eq 'full'));
$format->set_text (&BTN_FIRST, undef, undef, undef, '')
if $style eq 'nopunct' || $style eq 'nospace';
diff --git a/lib/Text/BibTeX/BibSort.pm b/lib/Text/BibTeX/BibSort.pm
index dc23a89..52b60ac 100644
--- a/lib/Text/BibTeX/BibSort.pm
+++ b/lib/Text/BibTeX/BibSort.pm
@@ -23,7 +23,7 @@ use vars qw(@ISA $VERSION);
use Text::BibTeX::Structure;
@ISA = qw(Text::BibTeX::StructuredEntry);
-$VERSION = 0.74;
+$VERSION = 0.76;
use Text::BibTeX qw(purify_string change_case);
@@ -147,8 +147,8 @@ sub sort_format_names
my ($abbrev, $format, $name);
$abbrev = ! ($self->structure->get_options ('namestyle') eq 'full');
- $format = new Text::BibTeX::NameFormat ("vljf", $abbrev);
- $name = new Text::BibTeX::Name;
+ $format = Text::BibTeX::NameFormat->new ("vljf", $abbrev);
+ $name = Text::BibTeX::Name->new;
my (@snames, $i, $sname);
@snames = $self->split ($field);
diff --git a/lib/Text/BibTeX/Entry.pm b/lib/Text/BibTeX/Entry.pm
index 6fdf242..36422cc 100644
--- a/lib/Text/BibTeX/Entry.pm
+++ b/lib/Text/BibTeX/Entry.pm
@@ -23,7 +23,7 @@ use vars qw'$VERSION';
use Carp;
use Text::BibTeX qw(:metatypes :nodetypes);
-$VERSION = 0.74;
+$VERSION = 0.76;
=head1 NAME
@@ -31,7 +31,7 @@ Text::BibTeX::Entry - read and parse BibTeX files
=head1 SYNOPSIS
- use Text::BibTeX; # do not use Text::BibTeX::Entry alone!
+ use Text::BibTeX::Entry;
# ...assuming that $bibfile and $newbib are both objects of class
# Text::BibTeX::File, opened for reading and writing (respectively):
@@ -127,7 +127,7 @@ anything extra.
=over 4
-=item new ([SOURCE])
+=item new ([OPTS ,] [SOURCE])
Creates a new C<Text::BibTeX::Entry> object. If the SOURCE parameter is
supplied, it must be one of the following: a C<Text::BibTeX::File> (or
@@ -169,6 +169,28 @@ But using a C<Text::BibTeX::File> object is simpler and preferred:
Returns the new object, unless SOURCE is supplied and reading/parsing
the entry fails (e.g., due to end of file) -- then it returns false.
+You may supply a reference to an option hash as first argument.
+Supported options are:
+
+=over 4
+
+=item BINMODE
+
+Set the way Text::BibTeX deals with strings. By default it manages
+strings as bytes. You can set BINMODE to 'utf-8' to get NFC normalized
+
+Text::BibTeX::Entry->new(
+ { binmode => 'utf-8', normalization => 'NFD' },
+ $file });
+
+
+=item NORMALIZATION
+
+UTF-8 strings and you can customise the normalization with the NORMALIZATION option.
+
+=back
+
+
=cut
sub new
@@ -176,6 +198,7 @@ sub new
my ($class, @source) = @_;
$class = ref ($class) || $class;
+
my $self = {'file' => undef,
'type' => undef,
'key' => undef,
@@ -183,8 +206,15 @@ sub new
'metatype' => undef,
'fields' => [],
'values' => {}};
-
bless $self, $class;
+
+ my $opts = {};
+ $opts = shift @source if scalar(@source) and ref $source[0] eq "HASH";
+ $opts->{ lc $_ } = $opts->{$_} for ( keys %$opts );
+ $self->{binmode} = 'utf-8'
+ if exists $opts->{binmode} && $opts->{binmode} =~ /utf-?8/i;
+ $self->{normalization} = $opts->{normalization} if exists $opts->{normalization};
+
if (@source)
{
my $status;
@@ -234,6 +264,8 @@ sub clone
$clone->{file} = $self->{file}
}
# These might be changed so make copies
+ $clone->{binmode} = $self->{binmode};
+ $clone->{normalization} = $self->{normalization};
$clone->{type} = $self->{type};
$clone->{key} = $self->{key};
$clone->{status} = $self->{status};
@@ -271,6 +303,10 @@ sub read
my $fn = $source->{'filename'};
my $fh = $source->{'handle'};
$self->{'file'} = $source; # store File object for later use
+ ## Propagate flags
+ for my $f (qw.binmode normalization.) {
+ $self->{$f} = $source->{$f} unless exists $self->{$f};
+ }
return $self->parse ($fn, $fh, $preserve);
}
@@ -392,27 +428,39 @@ context.)
=item fieldlist ()
-Returns the list of fields in the entry. In a scalar context, returns a
-reference to the object's own list of fields. That way, you can change or
-reorder the field list with minimal interference from the class. I'm not
-entirely sure if this is a good idea, so don't rely on it existing in the
-future; feel free to play around with it and let me know if you get bitten
-in dangerous ways or find this enormously useful.
+Returns the list of fields in the entry.
+
+B<WARNING> In scalar context, it no longer returns a
+reference to the object's own list of fields.
=cut
sub parse_ok { shift->{'status'}; }
-sub metatype { shift->{'metatype'}; }
+sub metatype {
+ my $self = shift;
+ Text::BibTeX->_process_result( $self->{'metatype'}, $self->{binmode}, $self->{normalization} );
+}
-sub type { shift->{'type'}; }
+sub type {
+ my $self = shift;
+ Text::BibTeX->_process_result( $self->{'type'}, $self->{binmode}, $self->{normalization} );
+}
-sub key { shift->{'key'}; }
+sub key {
+ my $self = shift;
+ exists $self->{key}
+ ? Text::BibTeX->_process_result($self->{key}, $self->{binmode}, $self->{normalization})
+ : undef;
+}
sub num_fields { scalar @{shift->{'fields'}}; }
-sub fieldlist { wantarray ? @{shift->{'fields'}} : shift->{'fields'}; }
-
+sub fieldlist {
+ my $self = shift;
+ return map { Text::BibTeX->_process_result($_, $self->{binmode}, $self->{normalization})} @{$self->{'fields'}};
+}
+
=item exists (FIELD)
Returns true if a field named FIELD is present in the entry, false
@@ -484,17 +532,24 @@ sub exists
{
my ($self, $field) = @_;
- exists $self->{'values'}{$field};
+ exists $self->{values}{Text::BibTeX->_process_argument($field, $self->{binmode}, $self->{normalization})};
}
sub get
{
my ($self, @fields) = @_;
- @{$self->{'values'}}{@fields};
+ my @x = @{$self->{'values'}}{map {Text::BibTeX->_process_argument($_, $self->{binmode}, $self->{normalization})} @fields};
+
+ @x = map {defined($_) ? Text::BibTeX->_process_result($_, $self->{binmode}, $self->{normalization}): undef} @x;
+
+ return (@x > 1) ? @x : $x[0];
}
-sub value { shift->{'value'} }
+sub value {
+ my $self = shift;
+ Text::BibTeX->_process_result($self->{value}, $self->{binmode}, $self->{normalization});
+}
=head2 Author name methods
@@ -590,17 +645,19 @@ sub split
{
my ($self, $field, $delim, $desc) = @_;
- return unless $self->exists ($field);
+ return unless $self->exists($field);
$delim ||= 'and';
$desc ||= 'name';
- my $filename = ($self->{'file'} && $self->{'file'}{'filename'});
- my $line = $self->{'lines'}{$field};
-
# local $^W = 0 # suppress spurious warning from
# unless defined $filename; # undefined $filename
- Text::BibTeX::split_list ($self->{'values'}{$field}, $delim,
- $filename, $line, $desc);
+ Text::BibTeX::split_list($self->{values}{$field},
+ $delim,
+ ($self->{file} && $self->{file}{filename}),
+ $self->{lines}{$field},
+ $desc,
+ {binmode => $self->{binmode},
+ normalization => $self->{normalization}});
}
sub names
@@ -618,7 +675,8 @@ sub names
# unless defined $filename; # undefined $filename
for $i (0 .. $#names)
{
- $names[$i] = Text::BibTeX::Name->new($names[$i], $filename, $line, $i);
+ $names[$i] = Text::BibTeX::Name->new(
+ {binmode => $self->{binmode}, normalization => $self->{normalization}},$names[$i], $filename, $line, $i);
}
@names;
}
@@ -696,7 +754,7 @@ sub set_key
{
my ($self, $key) = @_;
- $self->{'key'} = $key;
+ $self->{'key'} = Text::BibTeX->_process_argument($key, $self->{binmode}, $self->{normalization});
}
sub set
@@ -708,7 +766,7 @@ sub set
while (@_)
{
- ($field,$value) = (shift,shift);
+ ($field,$value) = (shift,Text::BibTeX->_process_argument(shift, $self->{binmode}, $self->{normalization}));
push (@{$self->{'fields'}}, $field)
unless exists $self->{'values'}{$field};
$self->{'values'}{$field} = $value;
@@ -870,7 +928,8 @@ sub print_s
# Tack on the last line, and we're done!
$output .= "}\n\n";
- $output;
+
+ Text::BibTeX->_process_result($output, $self->{binmode}, $self->{normalization});
}
=back
diff --git a/lib/Text/BibTeX/File.pm b/lib/Text/BibTeX/File.pm
index 3e6e888..bd94163 100644
--- a/lib/Text/BibTeX/File.pm
+++ b/lib/Text/BibTeX/File.pm
@@ -22,7 +22,7 @@ use Carp;
use IO::File;
use vars qw'$VERSION';
-$VERSION = 0.74;
+$VERSION = 0.76;
=head1 NAME
@@ -30,12 +30,12 @@ Text::BibTeX::File - interface to whole BibTeX files
=head1 SYNOPSIS
- use Text::BibTeX; # this loads Text::BibTeX::File
+ use Text::BibTeX::File;
- $bib = new Text::BibTeX::File "foo.bib" or die "foo.bib: $!\n";
+ $bib = Text::BibTeX::File->new("foo.bib") or die "foo.bib: $!\n";
# or:
- $bib = new Text::BibTeX::File;
- $bib->open ("foo.bib") || die "foo.bib: $!\n";
+ $bib = Text::BibTeX::File->new;
+ $bib->open("foo.bib", {binmode => 'utf-8', normalization => 'NFC'}) || die "foo.bib: $!\n";
$bib->set_structure ($structure_name,
$option1 => $value1, ...);
@@ -61,19 +61,56 @@ These concepts are fully documented in L<Text::BibTeX::Structure>.
=over 4
-=item new ([FILENAME [,MODE [,PERMS]]])
+=item new ([FILENAME], [OPTS])
-Creates a new C<Text::BibTeX::File> object. If FILENAME is supplied,
-passes it to the C<open> method (along with MODE and PERMS if they
-are supplied). If the C<open> fails, C<new> fails and returns false; if
-the C<open> succeeds (or if FILENAME isn't supplied), C<new> returns the
-new object reference.
+Creates a new C<Text::BibTeX::File> object. If FILENAME is supplied, passes
+it to the C<open> method (along with OPTS). If the C<open> fails, C<new>
+fails and returns false; if the C<open> succeeds (or if FILENAME isn't
+supplied), C<new> returns the new object reference.
-=item open (FILENAME [,MODE [,PERMS]])
+=item open (FILENAME [OPTS])
-Opens the file specified by FILENAME, possibly using MODE and PERMS.
-See L<IO::File> for full semantics; this C<open> is just a front end for
-C<IO::File::open>.
+Opens the file specified by FILENAME. OPTS is an hashref that can have
+the following values:
+
+=over 4
+
+=item MODE
+
+mode as specified by L<IO::File>
+
+=item PERMS
+
+permissions as specified by L<IO::File>. Can only be used in conjunction
+with C<MODE>
+
+=item BINMODE
+
+By default, Text::BibTeX uses bytes directly. Thus, you need to encode
+strings accordingly with the encoding of the files you are reading. You can
+also select UTF-8. In this case, Text::BibTeX will return UTF-8 strings in
+NFC mode. Note that at the moment files with BOM are not supported.
+
+Valid values are 'raw/bytes' or 'utf-8'.
+
+=item NORMALIZATION
+
+By default, Text::BibTeX outputs UTF-8 in NFC form. You can change this by passing
+the name of a different form.
+
+Valid values are those forms supported by the Unicode::Normalize module
+('NFD', 'NFDK' etc.)
+
+=item RESET_MACROS
+
+By default, Text::BibTeX accumulates macros. This means that when you open a second
+file, macros defined by the first are still available. This may result on warnings
+of macros being redefined.
+
+This option can be used to force Text::BibTeX to clean up all macros definitions
+(except for the month macros).
+
+=back
=item close ()
@@ -95,19 +132,45 @@ sub new
$class = ref ($class) || $class;
my $self = bless {}, $class;
- ($self->open (@_) || return undef) if @_; # filename [, mode [, perms]]
+ ($self->open (@_) || return undef) if @_;
$self;
}
-sub open
-{
- my $self = shift;
-
- $self->{filename} = $_[0];
- $self->{handle} = new IO::File;
- $self->{handle}->open (@_); # filename, maybe mode, maybe perms
+sub open {
+ my ($self) = shift;
+ $self->{filename} = shift;
+
+ $self->{binmode} = 'bytes';
+ $self->{normalization} = 'NFC';
+ my @args = ( $self->{filename} );
+
+ if ( ref $_[0] eq "HASH" ) {
+ my $opts = {};
+ $opts = shift;
+ $opts->{ lc $_ } = $opts->{$_} for ( keys %$opts );
+ $self->{binmode} = 'utf-8'
+ if exists $opts->{binmode} && $opts->{binmode} =~ /utf-?8/i;
+ $self->{normalization} = $opts->{normalization} if exists $opts->{normalization};
+
+ if (exists $opts->{reset_macros} && $opts->{reset_macros}) {
+ Text::BibTeX::delete_all_macros();
+ Text::BibTeX::_define_months();
+ }
+
+ if ( exists $opts->{mode} ) {
+ push @args, $opts->{mode};
+ push @args, $opts->{perms} if exists $opts->{perms};
+ }
+ }
+ else {
+ push @args, @_;
+ }
+
+ $self->{handle} = IO::File->new;
+ $self->{handle}->open(@args); # filename, maybe mode, maybe perms
}
+
sub close
{
my $self = shift;
@@ -155,7 +218,7 @@ sub set_structure
croak "Text::BibTeX::File::set_structure: options list must have even " .
"number of elements"
unless @options % 2 == 0;
- $self->{structure} = new Text::BibTeX::Structure ($structure, @options);
+ $self->{structure} = Text::BibTeX::Structure->new($structure, @options);
}
sub structure { shift->{structure} }
diff --git a/lib/Text/BibTeX/Name.pm b/lib/Text/BibTeX/Name.pm
index 2e69281..b8a72b0 100644
--- a/lib/Text/BibTeX/Name.pm
+++ b/lib/Text/BibTeX/Name.pm
@@ -23,7 +23,7 @@ require 5.004;
use strict;
use Carp;
use vars qw'$VERSION';
-$VERSION = 0.74;
+$VERSION = 0.76;
use Text::BibTeX;
@@ -33,6 +33,8 @@ Text::BibTeX::Name - interface to BibTeX-style author names
=head1 SYNOPSIS
+ use Text::BibTeX::Name;
+
$name = Text::BibTeX::Name->new();
$name->split('J. Random Hacker');
# or:
@@ -269,7 +271,7 @@ way is the job of another module: see L<Text::BibTeX::NameFormat>.
=over 4
-=item new([ NAME [, FILENAME, LINE, NAME_NUM]])
+=item new([ [OPTS,] NAME [, FILENAME, LINE, NAME_NUM]])
Creates a new C<Text::BibTeX::Name> object. If NAME is supplied, it
must be a string containing a single name, and it will be be passed to
@@ -277,17 +279,46 @@ the C<split> method for further processing. FILENAME, LINE, and
NAME_NUM, if present, are all also passed to C<split> to allow better
error messages.
+If the first argument is a hash reference, it is used to define
+configuration values. At the moment the available values are:
+
+=over 4
+
+=item BINMODE
+
+Set the way Text::BibTeX deals with strings. By default it manages
+strings as bytes. You can set BINMODE to 'utf-8' to get NFC normalized
+UTF-8 strings and you can customise the normalization with the NORMALIZATION option.
+
+ Text::BibTeX::Name->new(
+ { binmode => 'utf-8', normalization => 'NFD' },
+ "Alberto Simões"});
+
+=back
+
=cut
-sub new
-{
- my ($class, $name, $filename, $line, $name_num) = @_;
+sub new {
+ my $class = shift;
+ my $opts = ref $_[0] eq 'HASH' ? shift : {};
+
+ $opts->{ lc $_ } = $opts->{$_} for ( keys %$opts );
+
+ my ( $name, $filename, $line, $name_num ) = @_;
+
+ $class = ref($class) || $class;
+ my $self = bless { }, $class;
- $class = ref ($class) || $class;
- my $self = bless {}, $class;
- $self->split ($name, $filename, $line, $name_num, 1)
- if (defined $name);
- $self;
+ $self->{binmode} = 'bytes';
+ $self->{normalization} = 'NFC';
+ $self->{binmode} = 'utf-8'
+ if exists $opts->{binmode} && $opts->{binmode} =~ /utf-?8/i;
+ $self->{normalization} = $opts->{normalization} if exists $opts->{normalization};
+
+ $self->split( Text::BibTeX->_process_argument($name, $self->{binmode}, $self->{normalization}),
+ $filename, $line, $name_num, 1 )
+ if ( defined $name );
+ $self;
}
@@ -317,7 +348,7 @@ sub split
my ($self, $name, $filename, $line, $name_num) = @_;
# Call the XSUB with default values if necessary
- $self->_split ($name, $filename,
+ $self->_split (Text::BibTeX->_process_argument($name, $self->{binmode}, $self->{normalization}), $filename,
defined $line ? $line : -1,
defined $name_num ? $name_num : -1,
1);
@@ -341,13 +372,18 @@ would return the list C<('de','la')>.
=cut
-sub part
-{
- my ($self, $partname) = @_;
+sub part {
+ my ( $self, $partname ) = @_;
+
+ croak "unknown name part"
+ unless $partname =~ /^(first|von|last|jr)$/;
- croak "unknown name part"
- unless $partname =~ /^(first|von|last|jr)$/;
- exists $self->{$partname} ? @{$self->{$partname}} : ();
+ if ( exists $self->{$partname} ) {
+ my @x = map { Text::BibTeX->_process_result($_, $self->{binmode}, $self->{normalization}) }
+ @{ $self->{$partname} };
+ return @x > 1 ? @x : $x[0];
+ }
+ return undef;
}
diff --git a/lib/Text/BibTeX/NameFormat.pm b/lib/Text/BibTeX/NameFormat.pm
index 04dcef9..1d980a8 100644
--- a/lib/Text/BibTeX/NameFormat.pm
+++ b/lib/Text/BibTeX/NameFormat.pm
@@ -23,7 +23,7 @@ require 5.004;
use strict;
use Carp;
use vars qw'$VERSION';
-$VERSION = 0.74;
+$VERSION = 0.76;
=head1 NAME
@@ -31,6 +31,8 @@ Text::BibTeX::NameFormat - format BibTeX-style author names
=head1 SYNOPSIS
+ use Text::BibTeX::NameFormat;
+
$format = Text::BibTeX::NameFormat->($parts, $abbrev_first);
$format->set_text ($part,
@@ -39,6 +41,7 @@ Text::BibTeX::NameFormat - format BibTeX-style author names
$format->set_options ($part, $abbrev, $join_tokens, $join_part
+ ## Uses the encoding/binmode and normalization form stored in $name
$formatted_name = $format->apply ($name);
=head1 DESCRIPTION
@@ -242,7 +245,12 @@ sub apply
croak "invalid Name object: no C structure";
my $format_struct = $self->{'_cstruct'} ||
croak "invalid NameFormat object: no C structure";
- format_name ($name_struct, $format_struct);
+
+ my $ans = format_name ($name_struct, $format_struct);
+
+ $ans = Text::BibTeX->_process_result($ans, $name->{binmode}, $name->{normalization});
+
+ return $ans;
}
=back
diff --git a/lib/Text/BibTeX/Structure.pm b/lib/Text/BibTeX/Structure.pm
index a15bf08..a42c39e 100644
--- a/lib/Text/BibTeX/Structure.pm
+++ b/lib/Text/BibTeX/Structure.pm
@@ -24,7 +24,7 @@ use strict;
use Carp;
use vars qw'$VERSION';
-$VERSION = 0.74;
+$VERSION = 0.76;
use Text::BibTeX ('check_class');
@@ -322,8 +322,8 @@ module. A short example will illustrate this.
Typically, a C<Text::BibTeX>-based program is based around a kernel of
code like this:
- $bibfile = new Text::BibTeX::File "foo.bib";
- while ($entry = new Text::BibTeX::Entry $bibfile)
+ $bibfile = Text::BibTeX::File->new("foo.bib");
+ while ($entry = Text::BibTeX::Entry->new($bibfile))
{
# process $entry
}
@@ -336,9 +336,9 @@ behaviour. Let us now suppose that C<$bibfile> is expected to conform
to a database structure specified by C<$structure> (presumably a
user-supplied value, and thus unknown at compile-time):
- $bibfile = new Text::BibTeX::File "foo.bib";
+ $bibfile = Text::BibTeX::File->new("foo.bib");
$bibfile->set_structure ($structure);
- while ($entry = new Text::BibTeX::Entry $bibfile)
+ while ($entry = Text::BibTeX::Entry->new($bibfile))
{
# process $entry
}
@@ -450,7 +450,7 @@ implements the C<Bib> structure. Use the pseudo-option C<module> to
override this module name. For instance, if the structure C<Foo> is
implemented by the module C<Foo>:
- $structure = new Text::BibTeX::Structure
+ $structure = Text::BibTeX::Structure->new
('Foo', module => 'Foo');
This method C<die>s if there are any errors loading/compiling the
@@ -870,7 +870,7 @@ sub get_options
package Text::BibTeX::StructuredEntry;
use strict;
use vars qw(@ISA $VERSION);
-$VERSION = 0.74;
+$VERSION = 0.76;
use Carp;
diff --git a/lib/Text/BibTeX/Value.pm b/lib/Text/BibTeX/Value.pm
index 7c1d178..97bc7a8 100644
--- a/lib/Text/BibTeX/Value.pm
+++ b/lib/Text/BibTeX/Value.pm
@@ -22,7 +22,7 @@ use Scalar::Util 'blessed';
use Carp;
use vars qw'$VERSION';
-$VERSION = 0.74;
+$VERSION = 0.76;
=head1 NAME
@@ -32,7 +32,7 @@ Text::BibTeX::Value - interfaces to BibTeX values and simple values
use Text::BibTeX;
- $entry = new Text::BibTeX::Entry;
+ $entry = Text::BibTeX::Entry->new;
# set the 'preserve_values' flag to 1 for this parse
$entry->parse ($filename, $filehandle, 1);
@@ -80,7 +80,7 @@ C<title> field is a single string, and the C<journal> and C<year> fields
are, respectively, a single macro and a single number. If you parse
this entry in the usual way:
- $entry = new Text::BibTeX::Entry $entry_text;
+ $entry = Text::BibTeX::Entry->new($entry_text);
then the C<get> method on C<$entry> would return simple strings.
Assuming that the C<and> macro is defined as C<" and ">, then
@@ -96,14 +96,14 @@ There are two ways to make this request: per-file and per-entry. For a
per-file request, use the C<preserve_values> method on your C<File>
object:
- $bibfile = new Text::BibTeX::File $filename;
+ $bibfile = Text::BibTeX::File->new($filename);
$bibfile->preserve_values (1);
- $entry = new Text::BibTeX::Entry $bibfile;
+ $entry = Text::BibTeX::Entry->new($bibfile);
$entry->get ($field); # returns a Value object
$bibfile->preserve_values (0);
- $entry = new Text::BibTeX::Entry $bibfile;
+ $entry = Text::BibTeX::Entry->new($bibfile);
$entry->get ($field); # returns a string
If you're not using a C<File> object, or want to control things at a
@@ -111,7 +111,7 @@ finer scale, then you have to pass in the C<preserve_values> flag when
invoking C<read>, C<parse>, or C<parse_s> on your C<Entry> objects:
# no File object, parsing from a string
- $entry = new Text::BibTeX::Entry;
+ $entry = Text::BibTeX::Entry->new;
$entry->parse_s ($entry_text, 0); # preserve_values=0 (default)
$entry->get ($field); # returns a string
@@ -183,8 +183,8 @@ two-element list containing the type and text of the simple value. For
example, one way to recreate the C<author> field of the example entry in
L<"DESCRIPTION"> would be:
- $and_macro = new Text::BibTeX::SimpleValue (BTAST_MACRO, 'and');
- $value = new Text::BibTeX::Value
+ $and_macro = Text::BibTeX::SimpleValue->new (BTAST_MACRO, 'and');
+ $value = Text::BibTeX::Value->new
([BTAST_STRING, 'Homer Simpson'],
$and_macro,
[BTAST_STRING, 'Ned Flanders']);
@@ -202,7 +202,7 @@ sub new
my $self = bless [], $class;
while (my $sval = shift)
{
- $sval = new Text::BibTeX::SimpleValue @$sval
+ $sval = Text::BibTeX::SimpleValue->new(@$sval)
if ref $sval eq 'ARRAY' && @$sval == 2;
croak "simple value is neither a two-element array ref " .
"nor a Text::BibTeX::SimpleValue object"
@@ -245,7 +245,7 @@ use Carp;
use Text::BibTeX qw(:nodetypes);
use vars qw($VERSION);
-$VERSION = '0.74';
+$VERSION = '0.76';
=head2 Text::BibTeX::SimpleValue methods