10 files changed, 344 insertions, 126 deletions
diff --git a/lib/Text/BibTeX.pm b/lib/Text/BibTeX.pm
index a49b071..54ed65a 100644
--- a/lib/Text/BibTeX.pm
+++ b/lib/Text/BibTeX.pm
@@ -13,18 +13,19 @@
 # ----------------------------------------------------------------------
 
 package Text::BibTeX;
+use Text::BibTeX::Name;
+use Text::BibTeX::NameFormat;
 
 use 5.008001;                          # needed for Text::BibTeX::Entry
 
 use strict;
-#use UNIVERSAL qw(isa can);              # for 'check_class' subroutine
 use Carp;
 use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS $AUTOLOAD);
 
 require Exporter;
 require DynaLoader;
 
-our $VERSION='0.74';
+our $VERSION='0.76';
 
 @ISA = qw(Exporter DynaLoader);
 %EXPORT_TAGS = (nodetypes => [qw(BTAST_STRING BTAST_MACRO BTAST_NUMBER)],
@@ -45,9 +46,53 @@ our $VERSION='0.74';
               @{$EXPORT_TAGS{'nodetypes'}},
               @{$EXPORT_TAGS{'nameparts'}},
               @{$EXPORT_TAGS{'joinmethods'}},
-              'check_class', 'display_list');
+              'check_class', 'display_list' );
 @EXPORT = @{$EXPORT_TAGS{'metatypes'}};
 
+use Encode 'encode', 'decode';
+use Unicode::Normalize;
+
+
+sub _process_result {
+  no strict 'refs';
+  my ( $self, $result, $encoding, $norm ) = @_;
+
+  $norm ||= "NFC"; # best to force it here.
+  my $normsub = \&{"$norm"}; # symbolic ref
+    if ( $encoding eq "utf-8" ) {
+        if ( utf8::is_utf8($result) ) {
+            return $normsub->($result);
+        }
+        else {
+            return $normsub->( decode( $encoding, $result ) );
+        }
+    }
+    else { return $result; }
+
+}
+
+sub _process_argument {
+    my ( $self, $value, $encoding ) = @_;
+
+     if ( $encoding eq "utf-8" && utf8::is_utf8($value)) {
+         return encode( $encoding, $value );
+     }
+     else {
+        return $value;
+    }
+}
+
+sub split_list {
+    my ( $field, $delim, $filename, $line, $desc, $opts ) = @_;
+    $opts                  ||= {};
+    $opts->{binmode}       ||= 'bytes';
+    $opts->{normalization} ||= 'NFC';
+    return
+        map { Text::BibTeX->_process_result( $_, $opts->{binmode}, $opts->{normalization} ) }
+        Text::BibTeX::isplit_list( $field, $delim, $filename, $line, $desc );
+
+}
+
 =head1 NAME
 
 Text::BibTeX - interface to read and parse BibTeX files
@@ -241,6 +286,16 @@ bootstrap Text::BibTeX;
 initialize();                            # these are both XS functions
 END { &cleanup; }
 
+# This can't go in a BEGIN because of the .XS bootstrapping mechanism
+_define_months();
+
+sub _define_months {
+  for my $month (qw.january february march april may june
+             july august september october november december.) {
+    add_macro_text(substr($month, 0, 3), ucfirst($month));
+  }
+}
+
 
 =head1 EXPORTS
 
@@ -572,7 +627,22 @@ of it as appropriate.  They're just mentioned here for completeness.
 
 =over 4
 
-=item split_list (STRING, DELIM [, FILENAME [, LINE [, DESCRIPTION]]])
+=item split_list (STRING, DELIM [, FILENAME [, LINE [, DESCRIPTION [, OPTS]]]])
+
+Splits a string on a fixed delimiter according to the BibTeX rules for
+splitting up lists of names.  With BibTeX, the delimiter is hard-coded
+as C<"and">; here, you can supply any string.  Instances of DELIM in
+STRING are considered delimiters if they are at brace-depth zero,
+surrounded by whitespace, and not at the beginning or end of STRING; the
+comparison is case-insensitive.  See L<bt_split_names> for full details
+of how splitting is done (it's I<not> the same as Perl's C<split>
+function). OPTS is a hash ref of the same binmode and normalization
+arguments as with, e.g. Text::BibTeX::File->open(). split_list calls isplit_list()
+internally but handles UTF-8 conversion and normalization, if requested.
+
+Returns the list of strings resulting from splitting STRING on DELIM.
+
+=item isplit_list (STRING, DELIM [, FILENAME [, LINE [, DESCRIPTION]]])
 
 Splits a string on a fixed delimiter according to the BibTeX rules for
 splitting up lists of names.  With BibTeX, the delimiter is hard-coded
@@ -581,7 +651,8 @@ STRING are considered delimiters if they are at brace-depth zero,
 surrounded by whitespace, and not at the beginning or end of STRING; the
 comparison is case-insensitive.  See L<bt_split_names> for full details
 of how splitting is done (it's I<not> the same as Perl's C<split>
-function).
+function). This function returns bytes. Use Text::BibTeX::split_list to specify
+the same binmode and normalization arguments as with, e.g. Text::BibTeX::File->open()
 
 Returns the list of strings resulting from splitting STRING on DELIM.
 
@@ -654,7 +725,8 @@ takes no action.
 
 =item delete_all_macros ()
 
-Deletes all macros from the macro table.
+Deletes all macros from the macro table, even the predefined month
+names.
 
 =item macro_length (MACRO)
 
diff --git a/lib/Text/BibTeX/Bib.pm b/lib/Text/BibTeX/Bib.pm
index d400dd3..9fb50f5 100644
--- a/lib/Text/BibTeX/Bib.pm
+++ b/lib/Text/BibTeX/Bib.pm
@@ -27,7 +27,7 @@ Text::BibTeX::Bib - defines the "Bib" database structure
 
 =head1 SYNOPSIS
 
-   $bibfile = new Text::BibTeX::File $filename;
+   $bibfile = Text::BibTeX::File $filename->new;
    $bibfile->set_structure ('Bib',
                             # Default option values:
                             sortby => 'name',
@@ -45,11 +45,11 @@ Text::BibTeX::Bib - defines the "Bib" database structure
    $bibfile->set_option (labels => 'alpha');   # not implemented yet!
 
    # parse entry from $bibfile and automatically make it a BibEntry
-   $entry = new Text::BibTeX::Entry $bibfile;
+   $entry = Text::BibTeX::Entry->new($bibfile);
 
    # or get an entry from somewhere else which is hard-coded to be
    # a BibEntry
-   $entry = new Text::BibTeX::BibEntry ...;
+   $entry = Text::BibTeX::BibEntry->new(...);
 
    $sortkey = $entry->sort_key;
    @blocks = $entry->format;
@@ -111,7 +111,7 @@ package Text::BibTeX::BibStructure;
 use strict;
 use vars qw(@ISA $VERSION);
 @ISA = qw(Text::BibTeX::Structure);
-$VERSION = '0.74';
+$VERSION = '0.76';
 
 =head1 STRUCTURE OPTIONS
 
@@ -450,33 +450,13 @@ package Text::BibTeX::BibEntry;
 use strict;
 use vars qw(@ISA $VERSION);
 
-$VERSION = '0.74';
+$VERSION = '0.76';
 
 use Text::BibTeX::BibSort;
 use Text::BibTeX::BibFormat;
 
 @ISA = qw(Text::BibTeX::BibSort Text::BibTeX::BibFormat);
-
-# Pre-define the "month name" macros for compatibility with BibTeX.
-# This ignores all sorts of issues, like internationalization and
-# abbreviation.  
-my %month_names = 
-   ('jan' => 'January',
-    'feb' => 'February',
-    'mar' => 'March',
-    'apr' => 'April',
-    'may' => 'May',
-    'jun' => 'June',
-    'jul' => 'July',
-    'aug' => 'August',
-    'sep' => 'September',
-    'oct' => 'October',
-    'nov' => 'November',
-    'dec' => 'December');
-
-my ($macro, $expansion);
-Text::BibTeX::add_macro_text ($macro, $expansion)
-   while (($macro, $expansion) = each %month_names);    
+ 
 
 1;
 
diff --git a/lib/Text/BibTeX/BibFormat.pm b/lib/Text/BibTeX/BibFormat.pm
index 2c78d04..f286fc1 100644
--- a/lib/Text/BibTeX/BibFormat.pm
+++ b/lib/Text/BibTeX/BibFormat.pm
@@ -26,7 +26,7 @@ use Text::BibTeX::NameFormat;
 use Text::BibTeX::Structure;
 
 @ISA = qw(Text::BibTeX::StructuredEntry);
-$VERSION = 0.74;
+$VERSION = 0.76;
 
 use Text::BibTeX qw(:subs display_list :nameparts :joinmethods);
 
@@ -148,7 +148,7 @@ sub format_names
       unless $style =~ /^(full|abbrev|nopunct|nospace)$/;
 
    $order = ($order eq 'first') ? 'fvlj' : 'vljf';
-   $format = new Text::BibTeX::NameFormat ($order, ! ($style eq 'full'));
+   $format = Text::BibTeX::NameFormat->new ($order, ! ($style eq 'full'));
 
    $format->set_text (&BTN_FIRST, undef, undef, undef, '')
       if $style eq 'nopunct' || $style eq 'nospace';
diff --git a/lib/Text/BibTeX/BibSort.pm b/lib/Text/BibTeX/BibSort.pm
index dc23a89..52b60ac 100644
--- a/lib/Text/BibTeX/BibSort.pm
+++ b/lib/Text/BibTeX/BibSort.pm
@@ -23,7 +23,7 @@ use vars qw(@ISA $VERSION);
 use Text::BibTeX::Structure;
 
 @ISA = qw(Text::BibTeX::StructuredEntry);
-$VERSION = 0.74;
+$VERSION = 0.76;
 
 use Text::BibTeX qw(purify_string change_case);
 
@@ -147,8 +147,8 @@ sub sort_format_names
    my ($abbrev, $format, $name);
 
    $abbrev = ! ($self->structure->get_options ('namestyle') eq 'full');
-   $format = new Text::BibTeX::NameFormat ("vljf", $abbrev);
-   $name = new Text::BibTeX::Name;
+   $format =  Text::BibTeX::NameFormat->new ("vljf", $abbrev);
+   $name   = Text::BibTeX::Name->new;
 
    my (@snames, $i, $sname);
    @snames = $self->split ($field);
diff --git a/lib/Text/BibTeX/Entry.pm b/lib/Text/BibTeX/Entry.pm
index 6fdf242..36422cc 100644
--- a/lib/Text/BibTeX/Entry.pm
+++ b/lib/Text/BibTeX/Entry.pm
@@ -23,7 +23,7 @@ use vars qw'$VERSION';
 use Carp;
 use Text::BibTeX qw(:metatypes :nodetypes);
 
-$VERSION = 0.74;
+$VERSION = 0.76;
 
 =head1 NAME
 
@@ -31,7 +31,7 @@ Text::BibTeX::Entry - read and parse BibTeX files
 
 =head1 SYNOPSIS
 
-   use Text::BibTeX;            # do not use Text::BibTeX::Entry alone!
+   use Text::BibTeX::Entry;
 
    # ...assuming that $bibfile and $newbib are both objects of class
    # Text::BibTeX::File, opened for reading and writing (respectively):
@@ -127,7 +127,7 @@ anything extra.
 
 =over 4
 
-=item new ([SOURCE])
+=item new ([OPTS ,] [SOURCE])
 
 Creates a new C<Text::BibTeX::Entry> object.  If the SOURCE parameter is
 supplied, it must be one of the following: a C<Text::BibTeX::File> (or
@@ -169,6 +169,28 @@ But using a C<Text::BibTeX::File> object is simpler and preferred:
 Returns the new object, unless SOURCE is supplied and reading/parsing
 the entry fails (e.g., due to end of file) -- then it returns false.
 
+You may supply a reference to an option hash as first argument.
+Supported options are:
+
+=over 4 
+
+=item BINMODE
+
+Set the way Text::BibTeX deals with strings. By default it manages
+strings as bytes. You can set BINMODE to 'utf-8' to get NFC normalized
+
+Text::BibTeX::Entry->new(
+      { binmode => 'utf-8', normalization => 'NFD' },
+      $file });
+
+
+=item NORMALIZATION
+
+UTF-8 strings and you can customise the normalization with the NORMALIZATION option.
+
+=back
+
+
 =cut
 
 sub new
@@ -176,6 +198,7 @@ sub new
    my ($class, @source) = @_;
 
    $class = ref ($class) || $class;
+   
    my $self = {'file'     => undef,
                'type'     => undef,
                'key'      => undef,
@@ -183,8 +206,15 @@ sub new
                'metatype' => undef,
                'fields'   => [],
                'values'   => {}};
-
    bless $self, $class;
+
+   my $opts = {};
+   $opts = shift @source if scalar(@source) and ref $source[0] eq "HASH";
+   $opts->{ lc $_ } = $opts->{$_} for ( keys %$opts );
+   $self->{binmode} = 'utf-8'
+          if exists $opts->{binmode} && $opts->{binmode} =~ /utf-?8/i;
+   $self->{normalization} = $opts->{normalization} if exists $opts->{normalization};
+
    if (@source)
    {
       my $status;
@@ -234,6 +264,8 @@ sub clone
     $clone->{file} = $self->{file}
   }
   # These might be changed so make copies
+  $clone->{binmode} = $self->{binmode};
+  $clone->{normalization} = $self->{normalization};
   $clone->{type}     = $self->{type};
   $clone->{key}      = $self->{key};
   $clone->{status}   = $self->{status};
@@ -271,6 +303,10 @@ sub read
    my $fn = $source->{'filename'};
    my $fh = $source->{'handle'};
    $self->{'file'} = $source;        # store File object for later use
+   ## Propagate flags
+   for my $f (qw.binmode normalization.) {
+      $self->{$f} = $source->{$f} unless exists $self->{$f};
+   }
    return $self->parse ($fn, $fh, $preserve);
 }
 
@@ -392,27 +428,39 @@ context.)
 
 =item fieldlist ()
 
-Returns the list of fields in the entry.  In a scalar context, returns a
-reference to the object's own list of fields.  That way, you can change or
-reorder the field list with minimal interference from the class.  I'm not
-entirely sure if this is a good idea, so don't rely on it existing in the
-future; feel free to play around with it and let me know if you get bitten
-in dangerous ways or find this enormously useful.
+Returns the list of fields in the entry.  
+
+B<WARNING> In scalar context, it no longer returns a
+reference to the object's own list of fields.
 
 =cut
 
 sub parse_ok   { shift->{'status'}; }
 
-sub metatype   { shift->{'metatype'}; }
+sub metatype   {
+    my $self = shift;
+    Text::BibTeX->_process_result( $self->{'metatype'}, $self->{binmode}, $self->{normalization} );
+}
 
-sub type       { shift->{'type'}; }
+sub type {
+    my $self = shift;
+    Text::BibTeX->_process_result( $self->{'type'}, $self->{binmode}, $self->{normalization} );
+}
 
-sub key        { shift->{'key'}; }
+sub key        { 
+  my $self = shift;
+  exists $self->{key}
+    ? Text::BibTeX->_process_result($self->{key}, $self->{binmode}, $self->{normalization})
+    : undef;
+}
 
 sub num_fields { scalar @{shift->{'fields'}}; }
 
-sub fieldlist  { wantarray ? @{shift->{'fields'}} : shift->{'fields'}; }
-
+sub fieldlist  { 
+  my $self = shift;
+  return map { Text::BibTeX->_process_result($_, $self->{binmode}, $self->{normalization})} @{$self->{'fields'}};
+}
+  
 =item exists (FIELD)
 
 Returns true if a field named FIELD is present in the entry, false
@@ -484,17 +532,24 @@ sub exists
 {
    my ($self, $field) = @_;
 
-   exists $self->{'values'}{$field};
+   exists $self->{values}{Text::BibTeX->_process_argument($field, $self->{binmode}, $self->{normalization})};
 }
 
 sub get
 {
    my ($self, @fields) = @_;
 
-   @{$self->{'values'}}{@fields};
+   my @x = @{$self->{'values'}}{map {Text::BibTeX->_process_argument($_, $self->{binmode}, $self->{normalization})} @fields};
+
+   @x = map {defined($_) ? Text::BibTeX->_process_result($_, $self->{binmode}, $self->{normalization}): undef} @x;
+
+   return (@x > 1) ? @x : $x[0];
 }
 
-sub value { shift->{'value'} }
+sub value { 
+  my $self = shift;
+  Text::BibTeX->_process_result($self->{value}, $self->{binmode}, $self->{normalization});
+}
 
 
 =head2 Author name methods
@@ -590,17 +645,19 @@ sub split
 {
    my ($self, $field, $delim, $desc) = @_;
 
-   return unless $self->exists ($field);
+   return unless $self->exists($field);
    $delim ||= 'and';
    $desc ||= 'name';
 
-   my $filename = ($self->{'file'} && $self->{'file'}{'filename'});
-   my $line = $self->{'lines'}{$field};
-
 #   local $^W = 0                        # suppress spurious warning from 
 #      unless defined $filename;         # undefined $filename
-   Text::BibTeX::split_list ($self->{'values'}{$field}, $delim,
-                             $filename, $line, $desc);
+   Text::BibTeX::split_list($self->{values}{$field},
+                            $delim,
+                            ($self->{file} && $self->{file}{filename}),
+                            $self->{lines}{$field},
+                            $desc,
+                            {binmode       => $self->{binmode},
+                             normalization => $self->{normalization}});
 }
 
 sub names
@@ -618,7 +675,8 @@ sub names
 #      unless defined $filename;         # undefined $filename
    for $i (0 .. $#names)
    {
-      $names[$i] = Text::BibTeX::Name->new($names[$i], $filename, $line, $i);
+      $names[$i] = Text::BibTeX::Name->new(
+        {binmode => $self->{binmode}, normalization => $self->{normalization}},$names[$i], $filename, $line, $i);
    }
    @names;
 }
@@ -696,7 +754,7 @@ sub set_key
 {
    my ($self, $key) = @_;
 
-   $self->{'key'} = $key;
+   $self->{'key'} = Text::BibTeX->_process_argument($key, $self->{binmode}, $self->{normalization});
 }
 
 sub set
@@ -708,7 +766,7 @@ sub set
 
    while (@_)
    {
-      ($field,$value) = (shift,shift);
+      ($field,$value) = (shift,Text::BibTeX->_process_argument(shift, $self->{binmode}, $self->{normalization}));
       push (@{$self->{'fields'}}, $field)
          unless exists $self->{'values'}{$field};
       $self->{'values'}{$field} = $value;
@@ -870,7 +928,8 @@ sub print_s
 
    # Tack on the last line, and we're done!
    $output .= "}\n\n";
-   $output;
+   
+   Text::BibTeX->_process_result($output, $self->{binmode}, $self->{normalization});
 }
 
 =back
diff --git a/lib/Text/BibTeX/File.pm b/lib/Text/BibTeX/File.pm
index 3e6e888..bd94163 100644
--- a/lib/Text/BibTeX/File.pm
+++ b/lib/Text/BibTeX/File.pm
@@ -22,7 +22,7 @@ use Carp;
 use IO::File;
 
 use vars qw'$VERSION';
-$VERSION = 0.74;
+$VERSION = 0.76;
 
 =head1 NAME
 
@@ -30,12 +30,12 @@ Text::BibTeX::File - interface to whole BibTeX files
 
 =head1 SYNOPSIS
 
-   use Text::BibTeX;     # this loads Text::BibTeX::File
+   use Text::BibTeX::File;
 
-   $bib = new Text::BibTeX::File "foo.bib" or die "foo.bib: $!\n";
+   $bib = Text::BibTeX::File->new("foo.bib") or die "foo.bib: $!\n";
    # or:
-   $bib = new Text::BibTeX::File;
-   $bib->open ("foo.bib") || die "foo.bib: $!\n";
+   $bib =  Text::BibTeX::File->new;
+   $bib->open("foo.bib", {binmode => 'utf-8', normalization => 'NFC'}) || die "foo.bib: $!\n";
 
    $bib->set_structure ($structure_name,
                         $option1 => $value1, ...);
@@ -61,19 +61,56 @@ These concepts are fully documented in L<Text::BibTeX::Structure>.
 
 =over 4
 
-=item new ([FILENAME [,MODE [,PERMS]]]) 
+=item new ([FILENAME], [OPTS]) 
 
-Creates a new C<Text::BibTeX::File> object.  If FILENAME is supplied,
-passes it to the C<open> method (along with MODE and PERMS if they
-are supplied).  If the C<open> fails, C<new> fails and returns false; if
-the C<open> succeeds (or if FILENAME isn't supplied), C<new> returns the
-new object reference.
+Creates a new C<Text::BibTeX::File> object.  If FILENAME is supplied, passes
+it to the C<open> method (along with OPTS).  If the C<open> fails, C<new>
+fails and returns false; if the C<open> succeeds (or if FILENAME isn't
+supplied), C<new> returns the new object reference.
 
-=item open (FILENAME [,MODE [,PERMS]])
+=item open (FILENAME [OPTS])
 
-Opens the file specified by FILENAME, possibly using MODE and PERMS.
-See L<IO::File> for full semantics; this C<open> is just a front end for
-C<IO::File::open>.
+Opens the file specified by FILENAME. OPTS is an hashref that can have
+the following values:
+
+=over 4
+
+=item MODE
+
+mode as specified by L<IO::File>
+
+=item PERMS
+
+permissions as specified by L<IO::File>. Can only be used in conjunction
+with C<MODE>
+
+=item BINMODE
+
+By default, Text::BibTeX uses bytes directly. Thus, you need to encode
+strings accordingly with the encoding of the files you are reading. You can
+also select UTF-8. In this case, Text::BibTeX will return UTF-8 strings in
+NFC mode. Note that at the moment files with BOM are not supported.
+
+Valid values are 'raw/bytes' or 'utf-8'.
+
+=item NORMALIZATION
+
+By default, Text::BibTeX outputs UTF-8 in NFC form. You can change this by passing
+the name of a different form.
+
+Valid values are those forms supported by the Unicode::Normalize module
+('NFD', 'NFDK' etc.)
+
+=item RESET_MACROS
+
+By default, Text::BibTeX accumulates macros. This means that when you open a second
+file, macros defined by the first are still available. This may result on warnings
+of macros being redefined.
+
+This option can be used to force Text::BibTeX to clean up all macros definitions
+(except for the month macros).
+
+=back 
 
 =item close ()
 
@@ -95,19 +132,45 @@ sub new
 
    $class = ref ($class) || $class;
    my $self = bless {}, $class;
-   ($self->open (@_) || return undef) if @_; # filename [, mode [, perms]]
+   ($self->open (@_) || return undef) if @_; 
    $self;
 }
 
-sub open
-{
-   my $self = shift;
-
-   $self->{filename} = $_[0];
-   $self->{handle} = new IO::File;
-   $self->{handle}->open (@_);          # filename, maybe mode, maybe perms
+sub open {
+    my ($self) = shift;
+    $self->{filename} = shift;
+
+    $self->{binmode}       = 'bytes';
+    $self->{normalization} = 'NFC';
+    my @args = ( $self->{filename} );
+
+    if ( ref $_[0] eq "HASH" ) {
+        my $opts = {};
+        $opts = shift;
+        $opts->{ lc $_ } = $opts->{$_} for ( keys %$opts );
+        $self->{binmode} = 'utf-8'
+            if exists $opts->{binmode} && $opts->{binmode} =~ /utf-?8/i;
+        $self->{normalization} = $opts->{normalization} if exists $opts->{normalization};
+
+        if (exists $opts->{reset_macros} && $opts->{reset_macros}) {
+          Text::BibTeX::delete_all_macros();
+          Text::BibTeX::_define_months();
+        }
+
+        if ( exists $opts->{mode} ) {
+            push @args, $opts->{mode};
+            push @args, $opts->{perms} if exists $opts->{perms};
+        }
+    }
+    else {
+        push @args, @_;
+    }
+
+    $self->{handle} = IO::File->new;
+    $self->{handle}->open(@args);    # filename, maybe mode, maybe perms
 }
 
+
 sub close
 {
    my $self = shift;
@@ -155,7 +218,7 @@ sub set_structure
    croak "Text::BibTeX::File::set_structure: options list must have even " .
          "number of elements"
       unless @options % 2 == 0;
-   $self->{structure} = new Text::BibTeX::Structure ($structure, @options);
+   $self->{structure} = Text::BibTeX::Structure->new($structure, @options);
 }
 
 sub structure { shift->{structure} }
diff --git a/lib/Text/BibTeX/Name.pm b/lib/Text/BibTeX/Name.pm
index 2e69281..b8a72b0 100644
--- a/lib/Text/BibTeX/Name.pm
+++ b/lib/Text/BibTeX/Name.pm
@@ -23,7 +23,7 @@ require 5.004;
 use strict;
 use Carp;
 use vars qw'$VERSION';
-$VERSION = 0.74;
+$VERSION = 0.76;
 
 use Text::BibTeX;
 
@@ -33,6 +33,8 @@ Text::BibTeX::Name - interface to BibTeX-style author names
 
 =head1 SYNOPSIS
 
+   use Text::BibTeX::Name;
+
    $name = Text::BibTeX::Name->new();
    $name->split('J. Random Hacker');
    # or:
@@ -269,7 +271,7 @@ way is the job of another module: see L<Text::BibTeX::NameFormat>.
 
 =over 4
 
-=item new([ NAME [, FILENAME, LINE, NAME_NUM]])
+=item new([ [OPTS,] NAME [, FILENAME, LINE, NAME_NUM]])
 
 Creates a new C<Text::BibTeX::Name> object.  If NAME is supplied, it
 must be a string containing a single name, and it will be be passed to
@@ -277,17 +279,46 @@ the C<split> method for further processing.  FILENAME, LINE, and
 NAME_NUM, if present, are all also passed to C<split> to allow better
 error messages.
 
+If the first argument is a hash reference, it is used to define
+configuration values. At the moment the available values are:
+
+=over 4 
+
+=item BINMODE
+
+Set the way Text::BibTeX deals with strings. By default it manages
+strings as bytes. You can set BINMODE to 'utf-8' to get NFC normalized
+UTF-8 strings and you can customise the normalization with the NORMALIZATION option.
+
+   Text::BibTeX::Name->new(
+      { binmode => 'utf-8', normalization => 'NFD' },
+      "Alberto Simões"});
+
+=back
+
 =cut
 
-sub new
-{
-   my ($class, $name, $filename, $line, $name_num) = @_;
+sub new {
+    my $class = shift;
+    my $opts = ref $_[0] eq 'HASH' ? shift : {};
+
+    $opts->{ lc $_ } = $opts->{$_} for ( keys %$opts );
+
+    my ( $name, $filename, $line, $name_num ) = @_;
+
+    $class = ref($class) || $class;
+    my $self = bless { }, $class;
 
-   $class = ref ($class) || $class;
-   my $self = bless {}, $class;
-   $self->split ($name, $filename, $line, $name_num, 1)
-      if (defined $name);
-   $self;
+    $self->{binmode} = 'bytes';
+    $self->{normalization} = 'NFC';
+    $self->{binmode} = 'utf-8'
+        if exists $opts->{binmode} && $opts->{binmode} =~ /utf-?8/i;
+    $self->{normalization} = $opts->{normalization} if exists $opts->{normalization};
+
+    $self->split( Text::BibTeX->_process_argument($name, $self->{binmode}, $self->{normalization}),
+        $filename, $line, $name_num, 1 )
+        if ( defined $name );
+    $self;
 }
 
 
@@ -317,7 +348,7 @@ sub split
    my ($self, $name, $filename, $line, $name_num) = @_;
 
    # Call the XSUB with default values if necessary
-   $self->_split ($name, $filename, 
+   $self->_split (Text::BibTeX->_process_argument($name, $self->{binmode}, $self->{normalization}), $filename, 
                   defined $line ? $line : -1,
                   defined $name_num ? $name_num : -1,
                   1);
@@ -341,13 +372,18 @@ would return the list C<('de','la')>.
 
 =cut
 
-sub part
-{
-   my ($self, $partname) = @_;
+sub part {
+    my ( $self, $partname ) = @_;
+
+    croak "unknown name part"
+        unless $partname =~ /^(first|von|last|jr)$/;
 
-   croak "unknown name part" 
-      unless $partname =~ /^(first|von|last|jr)$/;
-   exists $self->{$partname} ? @{$self->{$partname}} : ();
+    if ( exists $self->{$partname} ) {
+        my @x = map { Text::BibTeX->_process_result($_, $self->{binmode}, $self->{normalization}) }
+            @{ $self->{$partname} };
+        return @x > 1 ? @x : $x[0];
+    }
+    return undef;
 }
 
 
diff --git a/lib/Text/BibTeX/NameFormat.pm b/lib/Text/BibTeX/NameFormat.pm
index 04dcef9..1d980a8 100644
--- a/lib/Text/BibTeX/NameFormat.pm
+++ b/lib/Text/BibTeX/NameFormat.pm
@@ -23,7 +23,7 @@ require 5.004;
 use strict;
 use Carp;
 use vars qw'$VERSION';
-$VERSION = 0.74;
+$VERSION = 0.76;
 
 =head1 NAME
 
@@ -31,6 +31,8 @@ Text::BibTeX::NameFormat - format BibTeX-style author names
 
 =head1 SYNOPSIS
 
+   use Text::BibTeX::NameFormat;
+
    $format = Text::BibTeX::NameFormat->($parts, $abbrev_first);
 
    $format->set_text ($part,
@@ -39,6 +41,7 @@ Text::BibTeX::NameFormat - format BibTeX-style author names
 
    $format->set_options ($part, $abbrev, $join_tokens, $join_part
 
+   ## Uses the encoding/binmode and normalization form stored in $name
    $formatted_name = $format->apply ($name);
 
 =head1 DESCRIPTION
@@ -242,7 +245,12 @@ sub apply
       croak "invalid Name object: no C structure";
    my $format_struct = $self->{'_cstruct'} ||
       croak "invalid NameFormat object: no C structure";
-   format_name ($name_struct, $format_struct);
+ 
+   my $ans = format_name ($name_struct, $format_struct);
+
+   $ans = Text::BibTeX->_process_result($ans, $name->{binmode}, $name->{normalization});
+   
+   return $ans;
 }
 
 =back
diff --git a/lib/Text/BibTeX/Structure.pm b/lib/Text/BibTeX/Structure.pm
index a15bf08..a42c39e 100644
--- a/lib/Text/BibTeX/Structure.pm
+++ b/lib/Text/BibTeX/Structure.pm
@@ -24,7 +24,7 @@ use strict;
 use Carp;
 
 use vars qw'$VERSION';
-$VERSION = 0.74;
+$VERSION = 0.76;
 
 use Text::BibTeX ('check_class');
 
@@ -322,8 +322,8 @@ module.  A short example will illustrate this.
 Typically, a C<Text::BibTeX>-based program is based around a kernel of
 code like this:
 
-   $bibfile = new Text::BibTeX::File "foo.bib";
-   while ($entry = new Text::BibTeX::Entry $bibfile)
+   $bibfile = Text::BibTeX::File->new("foo.bib");
+   while ($entry = Text::BibTeX::Entry->new($bibfile))
    {
       # process $entry
    }
@@ -336,9 +336,9 @@ behaviour.  Let us now suppose that C<$bibfile> is expected to conform
 to a database structure specified by C<$structure> (presumably a
 user-supplied value, and thus unknown at compile-time):
 
-   $bibfile = new Text::BibTeX::File "foo.bib";
+   $bibfile = Text::BibTeX::File->new("foo.bib");
    $bibfile->set_structure ($structure);
-   while ($entry = new Text::BibTeX::Entry $bibfile)
+   while ($entry = Text::BibTeX::Entry->new($bibfile))
    {
       # process $entry
    }
@@ -450,7 +450,7 @@ implements the C<Bib> structure.  Use the pseudo-option C<module> to
 override this module name.  For instance, if the structure C<Foo> is
 implemented by the module C<Foo>:
 
-   $structure = new Text::BibTeX::Structure
+   $structure = Text::BibTeX::Structure->new
       ('Foo', module => 'Foo');
 
 This method C<die>s if there are any errors loading/compiling the
@@ -870,7 +870,7 @@ sub get_options
 package Text::BibTeX::StructuredEntry;
 use strict;
 use vars qw(@ISA $VERSION);
-$VERSION = 0.74;
+$VERSION = 0.76;
 
 use Carp;
 
diff --git a/lib/Text/BibTeX/Value.pm b/lib/Text/BibTeX/Value.pm
index 7c1d178..97bc7a8 100644
--- a/lib/Text/BibTeX/Value.pm
+++ b/lib/Text/BibTeX/Value.pm
@@ -22,7 +22,7 @@ use Scalar::Util 'blessed';
 use Carp;
 
 use vars qw'$VERSION';
-$VERSION = 0.74;
+$VERSION = 0.76;
 
 =head1 NAME
 
@@ -32,7 +32,7 @@ Text::BibTeX::Value - interfaces to BibTeX values and simple values
 
    use Text::BibTeX;
 
-   $entry = new Text::BibTeX::Entry;
+   $entry = Text::BibTeX::Entry->new;
 
    # set the 'preserve_values' flag to 1 for this parse
    $entry->parse ($filename, $filehandle, 1);
@@ -80,7 +80,7 @@ C<title> field is a single string, and the C<journal> and C<year> fields
 are, respectively, a single macro and a single number.  If you parse
 this entry in the usual way:
 
-   $entry = new Text::BibTeX::Entry $entry_text;
+   $entry = Text::BibTeX::Entry->new($entry_text);
 
 then the C<get> method on C<$entry> would return simple strings.
 Assuming that the C<and> macro is defined as C<" and ">, then
@@ -96,14 +96,14 @@ There are two ways to make this request: per-file and per-entry.  For a
 per-file request, use the C<preserve_values> method on your C<File>
 object:
 
-   $bibfile = new Text::BibTeX::File $filename;
+   $bibfile = Text::BibTeX::File->new($filename);
    $bibfile->preserve_values (1);
 
-   $entry = new Text::BibTeX::Entry $bibfile;
+   $entry = Text::BibTeX::Entry->new($bibfile);
    $entry->get ($field);        # returns a Value object
 
    $bibfile->preserve_values (0);
-   $entry = new Text::BibTeX::Entry $bibfile;
+   $entry = Text::BibTeX::Entry->new($bibfile);
    $entry->get ($field);        # returns a string
 
 If you're not using a C<File> object, or want to control things at a
@@ -111,7 +111,7 @@ finer scale, then you have to pass in the C<preserve_values> flag when
 invoking C<read>, C<parse>, or C<parse_s> on your C<Entry> objects:
 
    # no File object, parsing from a string
-   $entry = new Text::BibTeX::Entry;
+   $entry = Text::BibTeX::Entry->new;
    $entry->parse_s ($entry_text, 0);  # preserve_values=0 (default)
    $entry->get ($field);        # returns a string
 
@@ -183,8 +183,8 @@ two-element list containing the type and text of the simple value.  For
 example, one way to recreate the C<author> field of the example entry in
 L<"DESCRIPTION"> would be:
 
-   $and_macro = new Text::BibTeX::SimpleValue (BTAST_MACRO, 'and');
-   $value = new Text::BibTeX::Value 
+   $and_macro = Text::BibTeX::SimpleValue->new (BTAST_MACRO, 'and');
+   $value = Text::BibTeX::Value->new 
       ([BTAST_STRING, 'Homer Simpson'],
        $and_macro,
        [BTAST_STRING, 'Ned Flanders']);
@@ -202,7 +202,7 @@ sub new
    my $self = bless [], $class;
    while (my $sval = shift)
    {
-      $sval = new Text::BibTeX::SimpleValue @$sval
+      $sval = Text::BibTeX::SimpleValue->new(@$sval)
          if ref $sval eq 'ARRAY' && @$sval == 2;
       croak "simple value is neither a two-element array ref " .
             "nor a Text::BibTeX::SimpleValue object"
@@ -245,7 +245,7 @@ use Carp;
 use Text::BibTeX qw(:nodetypes);
 
 use vars qw($VERSION);
-$VERSION = '0.74';
+$VERSION = '0.76';
 
 
 =head2 Text::BibTeX::SimpleValue methods