diff options
Diffstat (limited to 'lib')
-rw-r--r-- | lib/Text/BibTeX.pm | 84 | ||||
-rw-r--r-- | lib/Text/BibTeX/Bib.pm | 32 | ||||
-rw-r--r-- | lib/Text/BibTeX/BibFormat.pm | 4 | ||||
-rw-r--r-- | lib/Text/BibTeX/BibSort.pm | 6 | ||||
-rw-r--r-- | lib/Text/BibTeX/Entry.pm | 115 | ||||
-rw-r--r-- | lib/Text/BibTeX/File.pm | 111 | ||||
-rw-r--r-- | lib/Text/BibTeX/Name.pm | 70 | ||||
-rw-r--r-- | lib/Text/BibTeX/NameFormat.pm | 12 | ||||
-rw-r--r-- | lib/Text/BibTeX/Structure.pm | 14 | ||||
-rw-r--r-- | lib/Text/BibTeX/Value.pm | 22 |
10 files changed, 344 insertions, 126 deletions
diff --git a/lib/Text/BibTeX.pm b/lib/Text/BibTeX.pm index a49b071..54ed65a 100644 --- a/lib/Text/BibTeX.pm +++ b/lib/Text/BibTeX.pm @@ -13,18 +13,19 @@ # ---------------------------------------------------------------------- package Text::BibTeX; +use Text::BibTeX::Name; +use Text::BibTeX::NameFormat; use 5.008001; # needed for Text::BibTeX::Entry use strict; -#use UNIVERSAL qw(isa can); # for 'check_class' subroutine use Carp; use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS $AUTOLOAD); require Exporter; require DynaLoader; -our $VERSION='0.74'; +our $VERSION='0.76'; @ISA = qw(Exporter DynaLoader); %EXPORT_TAGS = (nodetypes => [qw(BTAST_STRING BTAST_MACRO BTAST_NUMBER)], @@ -45,9 +46,53 @@ our $VERSION='0.74'; @{$EXPORT_TAGS{'nodetypes'}}, @{$EXPORT_TAGS{'nameparts'}}, @{$EXPORT_TAGS{'joinmethods'}}, - 'check_class', 'display_list'); + 'check_class', 'display_list' ); @EXPORT = @{$EXPORT_TAGS{'metatypes'}}; +use Encode 'encode', 'decode'; +use Unicode::Normalize; + + +sub _process_result { + no strict 'refs'; + my ( $self, $result, $encoding, $norm ) = @_; + + $norm ||= "NFC"; # best to force it here. + my $normsub = \&{"$norm"}; # symbolic ref + if ( $encoding eq "utf-8" ) { + if ( utf8::is_utf8($result) ) { + return $normsub->($result); + } + else { + return $normsub->( decode( $encoding, $result ) ); + } + } + else { return $result; } + +} + +sub _process_argument { + my ( $self, $value, $encoding ) = @_; + + if ( $encoding eq "utf-8" && utf8::is_utf8($value)) { + return encode( $encoding, $value ); + } + else { + return $value; + } +} + +sub split_list { + my ( $field, $delim, $filename, $line, $desc, $opts ) = @_; + $opts ||= {}; + $opts->{binmode} ||= 'bytes'; + $opts->{normalization} ||= 'NFC'; + return + map { Text::BibTeX->_process_result( $_, $opts->{binmode}, $opts->{normalization} ) } + Text::BibTeX::isplit_list( $field, $delim, $filename, $line, $desc ); + +} + =head1 NAME Text::BibTeX - interface to read and parse BibTeX files @@ -241,6 +286,16 @@ bootstrap Text::BibTeX; initialize(); # these are both XS functions END { &cleanup; } +# This can't go in a BEGIN because of the .XS bootstrapping mechanism +_define_months(); + +sub _define_months { + for my $month (qw.january february march april may june + july august september october november december.) { + add_macro_text(substr($month, 0, 3), ucfirst($month)); + } +} + =head1 EXPORTS @@ -572,7 +627,22 @@ of it as appropriate. They're just mentioned here for completeness. =over 4 -=item split_list (STRING, DELIM [, FILENAME [, LINE [, DESCRIPTION]]]) +=item split_list (STRING, DELIM [, FILENAME [, LINE [, DESCRIPTION [, OPTS]]]]) + +Splits a string on a fixed delimiter according to the BibTeX rules for +splitting up lists of names. With BibTeX, the delimiter is hard-coded +as C<"and">; here, you can supply any string. Instances of DELIM in +STRING are considered delimiters if they are at brace-depth zero, +surrounded by whitespace, and not at the beginning or end of STRING; the +comparison is case-insensitive. See L<bt_split_names> for full details +of how splitting is done (it's I<not> the same as Perl's C<split> +function). OPTS is a hash ref of the same binmode and normalization +arguments as with, e.g. Text::BibTeX::File->open(). split_list calls isplit_list() +internally but handles UTF-8 conversion and normalization, if requested. + +Returns the list of strings resulting from splitting STRING on DELIM. + +=item isplit_list (STRING, DELIM [, FILENAME [, LINE [, DESCRIPTION]]]) Splits a string on a fixed delimiter according to the BibTeX rules for splitting up lists of names. With BibTeX, the delimiter is hard-coded @@ -581,7 +651,8 @@ STRING are considered delimiters if they are at brace-depth zero, surrounded by whitespace, and not at the beginning or end of STRING; the comparison is case-insensitive. See L<bt_split_names> for full details of how splitting is done (it's I<not> the same as Perl's C<split> -function). +function). This function returns bytes. Use Text::BibTeX::split_list to specify +the same binmode and normalization arguments as with, e.g. Text::BibTeX::File->open() Returns the list of strings resulting from splitting STRING on DELIM. @@ -654,7 +725,8 @@ takes no action. =item delete_all_macros () -Deletes all macros from the macro table. +Deletes all macros from the macro table, even the predefined month +names. =item macro_length (MACRO) diff --git a/lib/Text/BibTeX/Bib.pm b/lib/Text/BibTeX/Bib.pm index d400dd3..9fb50f5 100644 --- a/lib/Text/BibTeX/Bib.pm +++ b/lib/Text/BibTeX/Bib.pm @@ -27,7 +27,7 @@ Text::BibTeX::Bib - defines the "Bib" database structure =head1 SYNOPSIS - $bibfile = new Text::BibTeX::File $filename; + $bibfile = Text::BibTeX::File $filename->new; $bibfile->set_structure ('Bib', # Default option values: sortby => 'name', @@ -45,11 +45,11 @@ Text::BibTeX::Bib - defines the "Bib" database structure $bibfile->set_option (labels => 'alpha'); # not implemented yet! # parse entry from $bibfile and automatically make it a BibEntry - $entry = new Text::BibTeX::Entry $bibfile; + $entry = Text::BibTeX::Entry->new($bibfile); # or get an entry from somewhere else which is hard-coded to be # a BibEntry - $entry = new Text::BibTeX::BibEntry ...; + $entry = Text::BibTeX::BibEntry->new(...); $sortkey = $entry->sort_key; @blocks = $entry->format; @@ -111,7 +111,7 @@ package Text::BibTeX::BibStructure; use strict; use vars qw(@ISA $VERSION); @ISA = qw(Text::BibTeX::Structure); -$VERSION = '0.74'; +$VERSION = '0.76'; =head1 STRUCTURE OPTIONS @@ -450,33 +450,13 @@ package Text::BibTeX::BibEntry; use strict; use vars qw(@ISA $VERSION); -$VERSION = '0.74'; +$VERSION = '0.76'; use Text::BibTeX::BibSort; use Text::BibTeX::BibFormat; @ISA = qw(Text::BibTeX::BibSort Text::BibTeX::BibFormat); - -# Pre-define the "month name" macros for compatibility with BibTeX. -# This ignores all sorts of issues, like internationalization and -# abbreviation. -my %month_names = - ('jan' => 'January', - 'feb' => 'February', - 'mar' => 'March', - 'apr' => 'April', - 'may' => 'May', - 'jun' => 'June', - 'jul' => 'July', - 'aug' => 'August', - 'sep' => 'September', - 'oct' => 'October', - 'nov' => 'November', - 'dec' => 'December'); - -my ($macro, $expansion); -Text::BibTeX::add_macro_text ($macro, $expansion) - while (($macro, $expansion) = each %month_names); + 1; diff --git a/lib/Text/BibTeX/BibFormat.pm b/lib/Text/BibTeX/BibFormat.pm index 2c78d04..f286fc1 100644 --- a/lib/Text/BibTeX/BibFormat.pm +++ b/lib/Text/BibTeX/BibFormat.pm @@ -26,7 +26,7 @@ use Text::BibTeX::NameFormat; use Text::BibTeX::Structure; @ISA = qw(Text::BibTeX::StructuredEntry); -$VERSION = 0.74; +$VERSION = 0.76; use Text::BibTeX qw(:subs display_list :nameparts :joinmethods); @@ -148,7 +148,7 @@ sub format_names unless $style =~ /^(full|abbrev|nopunct|nospace)$/; $order = ($order eq 'first') ? 'fvlj' : 'vljf'; - $format = new Text::BibTeX::NameFormat ($order, ! ($style eq 'full')); + $format = Text::BibTeX::NameFormat->new ($order, ! ($style eq 'full')); $format->set_text (&BTN_FIRST, undef, undef, undef, '') if $style eq 'nopunct' || $style eq 'nospace'; diff --git a/lib/Text/BibTeX/BibSort.pm b/lib/Text/BibTeX/BibSort.pm index dc23a89..52b60ac 100644 --- a/lib/Text/BibTeX/BibSort.pm +++ b/lib/Text/BibTeX/BibSort.pm @@ -23,7 +23,7 @@ use vars qw(@ISA $VERSION); use Text::BibTeX::Structure; @ISA = qw(Text::BibTeX::StructuredEntry); -$VERSION = 0.74; +$VERSION = 0.76; use Text::BibTeX qw(purify_string change_case); @@ -147,8 +147,8 @@ sub sort_format_names my ($abbrev, $format, $name); $abbrev = ! ($self->structure->get_options ('namestyle') eq 'full'); - $format = new Text::BibTeX::NameFormat ("vljf", $abbrev); - $name = new Text::BibTeX::Name; + $format = Text::BibTeX::NameFormat->new ("vljf", $abbrev); + $name = Text::BibTeX::Name->new; my (@snames, $i, $sname); @snames = $self->split ($field); diff --git a/lib/Text/BibTeX/Entry.pm b/lib/Text/BibTeX/Entry.pm index 6fdf242..36422cc 100644 --- a/lib/Text/BibTeX/Entry.pm +++ b/lib/Text/BibTeX/Entry.pm @@ -23,7 +23,7 @@ use vars qw'$VERSION'; use Carp; use Text::BibTeX qw(:metatypes :nodetypes); -$VERSION = 0.74; +$VERSION = 0.76; =head1 NAME @@ -31,7 +31,7 @@ Text::BibTeX::Entry - read and parse BibTeX files =head1 SYNOPSIS - use Text::BibTeX; # do not use Text::BibTeX::Entry alone! + use Text::BibTeX::Entry; # ...assuming that $bibfile and $newbib are both objects of class # Text::BibTeX::File, opened for reading and writing (respectively): @@ -127,7 +127,7 @@ anything extra. =over 4 -=item new ([SOURCE]) +=item new ([OPTS ,] [SOURCE]) Creates a new C<Text::BibTeX::Entry> object. If the SOURCE parameter is supplied, it must be one of the following: a C<Text::BibTeX::File> (or @@ -169,6 +169,28 @@ But using a C<Text::BibTeX::File> object is simpler and preferred: Returns the new object, unless SOURCE is supplied and reading/parsing the entry fails (e.g., due to end of file) -- then it returns false. +You may supply a reference to an option hash as first argument. +Supported options are: + +=over 4 + +=item BINMODE + +Set the way Text::BibTeX deals with strings. By default it manages +strings as bytes. You can set BINMODE to 'utf-8' to get NFC normalized + +Text::BibTeX::Entry->new( + { binmode => 'utf-8', normalization => 'NFD' }, + $file }); + + +=item NORMALIZATION + +UTF-8 strings and you can customise the normalization with the NORMALIZATION option. + +=back + + =cut sub new @@ -176,6 +198,7 @@ sub new my ($class, @source) = @_; $class = ref ($class) || $class; + my $self = {'file' => undef, 'type' => undef, 'key' => undef, @@ -183,8 +206,15 @@ sub new 'metatype' => undef, 'fields' => [], 'values' => {}}; - bless $self, $class; + + my $opts = {}; + $opts = shift @source if scalar(@source) and ref $source[0] eq "HASH"; + $opts->{ lc $_ } = $opts->{$_} for ( keys %$opts ); + $self->{binmode} = 'utf-8' + if exists $opts->{binmode} && $opts->{binmode} =~ /utf-?8/i; + $self->{normalization} = $opts->{normalization} if exists $opts->{normalization}; + if (@source) { my $status; @@ -234,6 +264,8 @@ sub clone $clone->{file} = $self->{file} } # These might be changed so make copies + $clone->{binmode} = $self->{binmode}; + $clone->{normalization} = $self->{normalization}; $clone->{type} = $self->{type}; $clone->{key} = $self->{key}; $clone->{status} = $self->{status}; @@ -271,6 +303,10 @@ sub read my $fn = $source->{'filename'}; my $fh = $source->{'handle'}; $self->{'file'} = $source; # store File object for later use + ## Propagate flags + for my $f (qw.binmode normalization.) { + $self->{$f} = $source->{$f} unless exists $self->{$f}; + } return $self->parse ($fn, $fh, $preserve); } @@ -392,27 +428,39 @@ context.) =item fieldlist () -Returns the list of fields in the entry. In a scalar context, returns a -reference to the object's own list of fields. That way, you can change or -reorder the field list with minimal interference from the class. I'm not -entirely sure if this is a good idea, so don't rely on it existing in the -future; feel free to play around with it and let me know if you get bitten -in dangerous ways or find this enormously useful. +Returns the list of fields in the entry. + +B<WARNING> In scalar context, it no longer returns a +reference to the object's own list of fields. =cut sub parse_ok { shift->{'status'}; } -sub metatype { shift->{'metatype'}; } +sub metatype { + my $self = shift; + Text::BibTeX->_process_result( $self->{'metatype'}, $self->{binmode}, $self->{normalization} ); +} -sub type { shift->{'type'}; } +sub type { + my $self = shift; + Text::BibTeX->_process_result( $self->{'type'}, $self->{binmode}, $self->{normalization} ); +} -sub key { shift->{'key'}; } +sub key { + my $self = shift; + exists $self->{key} + ? Text::BibTeX->_process_result($self->{key}, $self->{binmode}, $self->{normalization}) + : undef; +} sub num_fields { scalar @{shift->{'fields'}}; } -sub fieldlist { wantarray ? @{shift->{'fields'}} : shift->{'fields'}; } - +sub fieldlist { + my $self = shift; + return map { Text::BibTeX->_process_result($_, $self->{binmode}, $self->{normalization})} @{$self->{'fields'}}; +} + =item exists (FIELD) Returns true if a field named FIELD is present in the entry, false @@ -484,17 +532,24 @@ sub exists { my ($self, $field) = @_; - exists $self->{'values'}{$field}; + exists $self->{values}{Text::BibTeX->_process_argument($field, $self->{binmode}, $self->{normalization})}; } sub get { my ($self, @fields) = @_; - @{$self->{'values'}}{@fields}; + my @x = @{$self->{'values'}}{map {Text::BibTeX->_process_argument($_, $self->{binmode}, $self->{normalization})} @fields}; + + @x = map {defined($_) ? Text::BibTeX->_process_result($_, $self->{binmode}, $self->{normalization}): undef} @x; + + return (@x > 1) ? @x : $x[0]; } -sub value { shift->{'value'} } +sub value { + my $self = shift; + Text::BibTeX->_process_result($self->{value}, $self->{binmode}, $self->{normalization}); +} =head2 Author name methods @@ -590,17 +645,19 @@ sub split { my ($self, $field, $delim, $desc) = @_; - return unless $self->exists ($field); + return unless $self->exists($field); $delim ||= 'and'; $desc ||= 'name'; - my $filename = ($self->{'file'} && $self->{'file'}{'filename'}); - my $line = $self->{'lines'}{$field}; - # local $^W = 0 # suppress spurious warning from # unless defined $filename; # undefined $filename - Text::BibTeX::split_list ($self->{'values'}{$field}, $delim, - $filename, $line, $desc); + Text::BibTeX::split_list($self->{values}{$field}, + $delim, + ($self->{file} && $self->{file}{filename}), + $self->{lines}{$field}, + $desc, + {binmode => $self->{binmode}, + normalization => $self->{normalization}}); } sub names @@ -618,7 +675,8 @@ sub names # unless defined $filename; # undefined $filename for $i (0 .. $#names) { - $names[$i] = Text::BibTeX::Name->new($names[$i], $filename, $line, $i); + $names[$i] = Text::BibTeX::Name->new( + {binmode => $self->{binmode}, normalization => $self->{normalization}},$names[$i], $filename, $line, $i); } @names; } @@ -696,7 +754,7 @@ sub set_key { my ($self, $key) = @_; - $self->{'key'} = $key; + $self->{'key'} = Text::BibTeX->_process_argument($key, $self->{binmode}, $self->{normalization}); } sub set @@ -708,7 +766,7 @@ sub set while (@_) { - ($field,$value) = (shift,shift); + ($field,$value) = (shift,Text::BibTeX->_process_argument(shift, $self->{binmode}, $self->{normalization})); push (@{$self->{'fields'}}, $field) unless exists $self->{'values'}{$field}; $self->{'values'}{$field} = $value; @@ -870,7 +928,8 @@ sub print_s # Tack on the last line, and we're done! $output .= "}\n\n"; - $output; + + Text::BibTeX->_process_result($output, $self->{binmode}, $self->{normalization}); } =back diff --git a/lib/Text/BibTeX/File.pm b/lib/Text/BibTeX/File.pm index 3e6e888..bd94163 100644 --- a/lib/Text/BibTeX/File.pm +++ b/lib/Text/BibTeX/File.pm @@ -22,7 +22,7 @@ use Carp; use IO::File; use vars qw'$VERSION'; -$VERSION = 0.74; +$VERSION = 0.76; =head1 NAME @@ -30,12 +30,12 @@ Text::BibTeX::File - interface to whole BibTeX files =head1 SYNOPSIS - use Text::BibTeX; # this loads Text::BibTeX::File + use Text::BibTeX::File; - $bib = new Text::BibTeX::File "foo.bib" or die "foo.bib: $!\n"; + $bib = Text::BibTeX::File->new("foo.bib") or die "foo.bib: $!\n"; # or: - $bib = new Text::BibTeX::File; - $bib->open ("foo.bib") || die "foo.bib: $!\n"; + $bib = Text::BibTeX::File->new; + $bib->open("foo.bib", {binmode => 'utf-8', normalization => 'NFC'}) || die "foo.bib: $!\n"; $bib->set_structure ($structure_name, $option1 => $value1, ...); @@ -61,19 +61,56 @@ These concepts are fully documented in L<Text::BibTeX::Structure>. =over 4 -=item new ([FILENAME [,MODE [,PERMS]]]) +=item new ([FILENAME], [OPTS]) -Creates a new C<Text::BibTeX::File> object. If FILENAME is supplied, -passes it to the C<open> method (along with MODE and PERMS if they -are supplied). If the C<open> fails, C<new> fails and returns false; if -the C<open> succeeds (or if FILENAME isn't supplied), C<new> returns the -new object reference. +Creates a new C<Text::BibTeX::File> object. If FILENAME is supplied, passes +it to the C<open> method (along with OPTS). If the C<open> fails, C<new> +fails and returns false; if the C<open> succeeds (or if FILENAME isn't +supplied), C<new> returns the new object reference. -=item open (FILENAME [,MODE [,PERMS]]) +=item open (FILENAME [OPTS]) -Opens the file specified by FILENAME, possibly using MODE and PERMS. -See L<IO::File> for full semantics; this C<open> is just a front end for -C<IO::File::open>. +Opens the file specified by FILENAME. OPTS is an hashref that can have +the following values: + +=over 4 + +=item MODE + +mode as specified by L<IO::File> + +=item PERMS + +permissions as specified by L<IO::File>. Can only be used in conjunction +with C<MODE> + +=item BINMODE + +By default, Text::BibTeX uses bytes directly. Thus, you need to encode +strings accordingly with the encoding of the files you are reading. You can +also select UTF-8. In this case, Text::BibTeX will return UTF-8 strings in +NFC mode. Note that at the moment files with BOM are not supported. + +Valid values are 'raw/bytes' or 'utf-8'. + +=item NORMALIZATION + +By default, Text::BibTeX outputs UTF-8 in NFC form. You can change this by passing +the name of a different form. + +Valid values are those forms supported by the Unicode::Normalize module +('NFD', 'NFDK' etc.) + +=item RESET_MACROS + +By default, Text::BibTeX accumulates macros. This means that when you open a second +file, macros defined by the first are still available. This may result on warnings +of macros being redefined. + +This option can be used to force Text::BibTeX to clean up all macros definitions +(except for the month macros). + +=back =item close () @@ -95,19 +132,45 @@ sub new $class = ref ($class) || $class; my $self = bless {}, $class; - ($self->open (@_) || return undef) if @_; # filename [, mode [, perms]] + ($self->open (@_) || return undef) if @_; $self; } -sub open -{ - my $self = shift; - - $self->{filename} = $_[0]; - $self->{handle} = new IO::File; - $self->{handle}->open (@_); # filename, maybe mode, maybe perms +sub open { + my ($self) = shift; + $self->{filename} = shift; + + $self->{binmode} = 'bytes'; + $self->{normalization} = 'NFC'; + my @args = ( $self->{filename} ); + + if ( ref $_[0] eq "HASH" ) { + my $opts = {}; + $opts = shift; + $opts->{ lc $_ } = $opts->{$_} for ( keys %$opts ); + $self->{binmode} = 'utf-8' + if exists $opts->{binmode} && $opts->{binmode} =~ /utf-?8/i; + $self->{normalization} = $opts->{normalization} if exists $opts->{normalization}; + + if (exists $opts->{reset_macros} && $opts->{reset_macros}) { + Text::BibTeX::delete_all_macros(); + Text::BibTeX::_define_months(); + } + + if ( exists $opts->{mode} ) { + push @args, $opts->{mode}; + push @args, $opts->{perms} if exists $opts->{perms}; + } + } + else { + push @args, @_; + } + + $self->{handle} = IO::File->new; + $self->{handle}->open(@args); # filename, maybe mode, maybe perms } + sub close { my $self = shift; @@ -155,7 +218,7 @@ sub set_structure croak "Text::BibTeX::File::set_structure: options list must have even " . "number of elements" unless @options % 2 == 0; - $self->{structure} = new Text::BibTeX::Structure ($structure, @options); + $self->{structure} = Text::BibTeX::Structure->new($structure, @options); } sub structure { shift->{structure} } diff --git a/lib/Text/BibTeX/Name.pm b/lib/Text/BibTeX/Name.pm index 2e69281..b8a72b0 100644 --- a/lib/Text/BibTeX/Name.pm +++ b/lib/Text/BibTeX/Name.pm @@ -23,7 +23,7 @@ require 5.004; use strict; use Carp; use vars qw'$VERSION'; -$VERSION = 0.74; +$VERSION = 0.76; use Text::BibTeX; @@ -33,6 +33,8 @@ Text::BibTeX::Name - interface to BibTeX-style author names =head1 SYNOPSIS + use Text::BibTeX::Name; + $name = Text::BibTeX::Name->new(); $name->split('J. Random Hacker'); # or: @@ -269,7 +271,7 @@ way is the job of another module: see L<Text::BibTeX::NameFormat>. =over 4 -=item new([ NAME [, FILENAME, LINE, NAME_NUM]]) +=item new([ [OPTS,] NAME [, FILENAME, LINE, NAME_NUM]]) Creates a new C<Text::BibTeX::Name> object. If NAME is supplied, it must be a string containing a single name, and it will be be passed to @@ -277,17 +279,46 @@ the C<split> method for further processing. FILENAME, LINE, and NAME_NUM, if present, are all also passed to C<split> to allow better error messages. +If the first argument is a hash reference, it is used to define +configuration values. At the moment the available values are: + +=over 4 + +=item BINMODE + +Set the way Text::BibTeX deals with strings. By default it manages +strings as bytes. You can set BINMODE to 'utf-8' to get NFC normalized +UTF-8 strings and you can customise the normalization with the NORMALIZATION option. + + Text::BibTeX::Name->new( + { binmode => 'utf-8', normalization => 'NFD' }, + "Alberto Simões"}); + +=back + =cut -sub new -{ - my ($class, $name, $filename, $line, $name_num) = @_; +sub new { + my $class = shift; + my $opts = ref $_[0] eq 'HASH' ? shift : {}; + + $opts->{ lc $_ } = $opts->{$_} for ( keys %$opts ); + + my ( $name, $filename, $line, $name_num ) = @_; + + $class = ref($class) || $class; + my $self = bless { }, $class; - $class = ref ($class) || $class; - my $self = bless {}, $class; - $self->split ($name, $filename, $line, $name_num, 1) - if (defined $name); - $self; + $self->{binmode} = 'bytes'; + $self->{normalization} = 'NFC'; + $self->{binmode} = 'utf-8' + if exists $opts->{binmode} && $opts->{binmode} =~ /utf-?8/i; + $self->{normalization} = $opts->{normalization} if exists $opts->{normalization}; + + $self->split( Text::BibTeX->_process_argument($name, $self->{binmode}, $self->{normalization}), + $filename, $line, $name_num, 1 ) + if ( defined $name ); + $self; } @@ -317,7 +348,7 @@ sub split my ($self, $name, $filename, $line, $name_num) = @_; # Call the XSUB with default values if necessary - $self->_split ($name, $filename, + $self->_split (Text::BibTeX->_process_argument($name, $self->{binmode}, $self->{normalization}), $filename, defined $line ? $line : -1, defined $name_num ? $name_num : -1, 1); @@ -341,13 +372,18 @@ would return the list C<('de','la')>. =cut -sub part -{ - my ($self, $partname) = @_; +sub part { + my ( $self, $partname ) = @_; + + croak "unknown name part" + unless $partname =~ /^(first|von|last|jr)$/; - croak "unknown name part" - unless $partname =~ /^(first|von|last|jr)$/; - exists $self->{$partname} ? @{$self->{$partname}} : (); + if ( exists $self->{$partname} ) { + my @x = map { Text::BibTeX->_process_result($_, $self->{binmode}, $self->{normalization}) } + @{ $self->{$partname} }; + return @x > 1 ? @x : $x[0]; + } + return undef; } diff --git a/lib/Text/BibTeX/NameFormat.pm b/lib/Text/BibTeX/NameFormat.pm index 04dcef9..1d980a8 100644 --- a/lib/Text/BibTeX/NameFormat.pm +++ b/lib/Text/BibTeX/NameFormat.pm @@ -23,7 +23,7 @@ require 5.004; use strict; use Carp; use vars qw'$VERSION'; -$VERSION = 0.74; +$VERSION = 0.76; =head1 NAME @@ -31,6 +31,8 @@ Text::BibTeX::NameFormat - format BibTeX-style author names =head1 SYNOPSIS + use Text::BibTeX::NameFormat; + $format = Text::BibTeX::NameFormat->($parts, $abbrev_first); $format->set_text ($part, @@ -39,6 +41,7 @@ Text::BibTeX::NameFormat - format BibTeX-style author names $format->set_options ($part, $abbrev, $join_tokens, $join_part + ## Uses the encoding/binmode and normalization form stored in $name $formatted_name = $format->apply ($name); =head1 DESCRIPTION @@ -242,7 +245,12 @@ sub apply croak "invalid Name object: no C structure"; my $format_struct = $self->{'_cstruct'} || croak "invalid NameFormat object: no C structure"; - format_name ($name_struct, $format_struct); + + my $ans = format_name ($name_struct, $format_struct); + + $ans = Text::BibTeX->_process_result($ans, $name->{binmode}, $name->{normalization}); + + return $ans; } =back diff --git a/lib/Text/BibTeX/Structure.pm b/lib/Text/BibTeX/Structure.pm index a15bf08..a42c39e 100644 --- a/lib/Text/BibTeX/Structure.pm +++ b/lib/Text/BibTeX/Structure.pm @@ -24,7 +24,7 @@ use strict; use Carp; use vars qw'$VERSION'; -$VERSION = 0.74; +$VERSION = 0.76; use Text::BibTeX ('check_class'); @@ -322,8 +322,8 @@ module. A short example will illustrate this. Typically, a C<Text::BibTeX>-based program is based around a kernel of code like this: - $bibfile = new Text::BibTeX::File "foo.bib"; - while ($entry = new Text::BibTeX::Entry $bibfile) + $bibfile = Text::BibTeX::File->new("foo.bib"); + while ($entry = Text::BibTeX::Entry->new($bibfile)) { # process $entry } @@ -336,9 +336,9 @@ behaviour. Let us now suppose that C<$bibfile> is expected to conform to a database structure specified by C<$structure> (presumably a user-supplied value, and thus unknown at compile-time): - $bibfile = new Text::BibTeX::File "foo.bib"; + $bibfile = Text::BibTeX::File->new("foo.bib"); $bibfile->set_structure ($structure); - while ($entry = new Text::BibTeX::Entry $bibfile) + while ($entry = Text::BibTeX::Entry->new($bibfile)) { # process $entry } @@ -450,7 +450,7 @@ implements the C<Bib> structure. Use the pseudo-option C<module> to override this module name. For instance, if the structure C<Foo> is implemented by the module C<Foo>: - $structure = new Text::BibTeX::Structure + $structure = Text::BibTeX::Structure->new ('Foo', module => 'Foo'); This method C<die>s if there are any errors loading/compiling the @@ -870,7 +870,7 @@ sub get_options package Text::BibTeX::StructuredEntry; use strict; use vars qw(@ISA $VERSION); -$VERSION = 0.74; +$VERSION = 0.76; use Carp; diff --git a/lib/Text/BibTeX/Value.pm b/lib/Text/BibTeX/Value.pm index 7c1d178..97bc7a8 100644 --- a/lib/Text/BibTeX/Value.pm +++ b/lib/Text/BibTeX/Value.pm @@ -22,7 +22,7 @@ use Scalar::Util 'blessed'; use Carp; use vars qw'$VERSION'; -$VERSION = 0.74; +$VERSION = 0.76; =head1 NAME @@ -32,7 +32,7 @@ Text::BibTeX::Value - interfaces to BibTeX values and simple values use Text::BibTeX; - $entry = new Text::BibTeX::Entry; + $entry = Text::BibTeX::Entry->new; # set the 'preserve_values' flag to 1 for this parse $entry->parse ($filename, $filehandle, 1); @@ -80,7 +80,7 @@ C<title> field is a single string, and the C<journal> and C<year> fields are, respectively, a single macro and a single number. If you parse this entry in the usual way: - $entry = new Text::BibTeX::Entry $entry_text; + $entry = Text::BibTeX::Entry->new($entry_text); then the C<get> method on C<$entry> would return simple strings. Assuming that the C<and> macro is defined as C<" and ">, then @@ -96,14 +96,14 @@ There are two ways to make this request: per-file and per-entry. For a per-file request, use the C<preserve_values> method on your C<File> object: - $bibfile = new Text::BibTeX::File $filename; + $bibfile = Text::BibTeX::File->new($filename); $bibfile->preserve_values (1); - $entry = new Text::BibTeX::Entry $bibfile; + $entry = Text::BibTeX::Entry->new($bibfile); $entry->get ($field); # returns a Value object $bibfile->preserve_values (0); - $entry = new Text::BibTeX::Entry $bibfile; + $entry = Text::BibTeX::Entry->new($bibfile); $entry->get ($field); # returns a string If you're not using a C<File> object, or want to control things at a @@ -111,7 +111,7 @@ finer scale, then you have to pass in the C<preserve_values> flag when invoking C<read>, C<parse>, or C<parse_s> on your C<Entry> objects: # no File object, parsing from a string - $entry = new Text::BibTeX::Entry; + $entry = Text::BibTeX::Entry->new; $entry->parse_s ($entry_text, 0); # preserve_values=0 (default) $entry->get ($field); # returns a string @@ -183,8 +183,8 @@ two-element list containing the type and text of the simple value. For example, one way to recreate the C<author> field of the example entry in L<"DESCRIPTION"> would be: - $and_macro = new Text::BibTeX::SimpleValue (BTAST_MACRO, 'and'); - $value = new Text::BibTeX::Value + $and_macro = Text::BibTeX::SimpleValue->new (BTAST_MACRO, 'and'); + $value = Text::BibTeX::Value->new ([BTAST_STRING, 'Homer Simpson'], $and_macro, [BTAST_STRING, 'Ned Flanders']); @@ -202,7 +202,7 @@ sub new my $self = bless [], $class; while (my $sval = shift) { - $sval = new Text::BibTeX::SimpleValue @$sval + $sval = Text::BibTeX::SimpleValue->new(@$sval) if ref $sval eq 'ARRAY' && @$sval == 2; croak "simple value is neither a two-element array ref " . "nor a Text::BibTeX::SimpleValue object" @@ -245,7 +245,7 @@ use Carp; use Text::BibTeX qw(:nodetypes); use vars qw($VERSION); -$VERSION = '0.74'; +$VERSION = '0.76'; =head2 Text::BibTeX::SimpleValue methods |