summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--BibTeX.pm759
-rw-r--r--BibTeX.xs542
-rw-r--r--BibTeX/Bib.pm493
-rw-r--r--BibTeX/BibFormat.pm499
-rw-r--r--BibTeX/BibSort.pm244
-rw-r--r--BibTeX/Entry.pm967
-rw-r--r--BibTeX/File.pm195
-rw-r--r--BibTeX/Name.pm384
-rw-r--r--BibTeX/NameFormat.pm306
-rw-r--r--BibTeX/Structure.pm1201
-rw-r--r--BibTeX/Value.pm326
-rw-r--r--CHANGES101
-rw-r--r--MANIFEST37
-rw-r--r--META.yml12
-rw-r--r--Makefile.PL417
-rw-r--r--README168
-rwxr-xr-xbtcheck31
-rwxr-xr-xbtformat128
-rw-r--r--btool_faq.pod121
-rwxr-xr-xbtsort33
-rw-r--r--btxs_support.c485
-rw-r--r--btxs_support.h45
-rwxr-xr-xexamples/append_entries78
-rw-r--r--t/bib.t148
-rw-r--r--t/common.pl95
-rw-r--r--t/macro.t121
-rw-r--r--t/modify.t82
-rw-r--r--t/nameformat.t59
-rw-r--r--t/namelist.t50
-rw-r--r--t/names.t107
-rw-r--r--t/output.t88
-rw-r--r--t/parse.t50
-rw-r--r--t/parse_f.t83
-rw-r--r--t/parse_s.t89
-rw-r--r--t/purify.t134
-rw-r--r--t/split_names28
-rw-r--r--typemap30
37 files changed, 8736 insertions, 0 deletions
diff --git a/BibTeX.pm b/BibTeX.pm
new file mode 100644
index 0000000..7994f1f
--- /dev/null
+++ b/BibTeX.pm
@@ -0,0 +1,759 @@
+# ----------------------------------------------------------------------
+# NAME : BibTeX.pm
+# DESCRIPTION: Code for the Text::BibTeX module; loads up everything
+# needed for parsing BibTeX files (both Perl and C code).
+# CREATED : February 1997, Greg Ward
+# MODIFIED :
+# VERSION : $Id: BibTeX.pm 6325 2008-10-08 12:35:41Z ambs $
+# COPYRIGHT : Copyright (c) 1997-2000 by Gregory P. Ward. All rights reserved.
+#
+# This file is part of the Text::BibTeX library. This
+# library is free software; you may redistribute it and/or
+# modify it under the same terms as Perl itself.
+# ----------------------------------------------------------------------
+
+package Text::BibTeX;
+
+require 5.004; # needed for Text::BibTeX::Entry
+
+use strict;
+use UNIVERSAL qw(isa can); # for 'check_class' subroutine
+use Carp;
+use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS $AUTOLOAD $VERSION);
+
+require Exporter;
+require DynaLoader;
+
+$VERSION=0.38;
+
+@ISA = qw(Exporter DynaLoader);
+%EXPORT_TAGS = (nodetypes => [qw(BTAST_STRING BTAST_MACRO BTAST_NUMBER)],
+ metatypes => [qw(BTE_UNKNOWN BTE_REGULAR BTE_COMMENT
+ BTE_PREAMBLE BTE_MACRODEF)],
+ nameparts => [qw(BTN_FIRST BTN_VON BTN_LAST BTN_JR BTN_NONE)],
+ joinmethods => [qw(BTJ_MAYTIE BTJ_SPACE
+ BTJ_FORCETIE BTJ_NOTHING)],
+ subs => [qw(bibloop split_list
+ purify_string change_case)],
+ macrosubs => [qw(add_macro_text
+ delete_macro
+ delete_all_macros
+ macro_length
+ macro_text)]);
+@EXPORT_OK = (@{$EXPORT_TAGS{'subs'}},
+ @{$EXPORT_TAGS{'macrosubs'}},
+ @{$EXPORT_TAGS{'nodetypes'}},
+ @{$EXPORT_TAGS{'nameparts'}},
+ @{$EXPORT_TAGS{'joinmethods'}},
+ 'check_class', 'display_list');
+@EXPORT = @{$EXPORT_TAGS{'metatypes'}};
+
+=head1 NAME
+
+Text::BibTeX - interface to read and parse BibTeX files
+
+=head1 SYNOPSIS
+
+ use Text::BibTeX;
+
+ $bibfile = new Text::BibTeX::File "foo.bib";
+ $newfile = new Text::BibTeX::File ">newfoo.bib";
+
+ while ($entry = new Text::BibTeX::Entry $bibfile)
+ {
+ next unless $entry->parse_ok;
+
+ . # hack on $entry contents, using various
+ . # Text::BibTeX::Entry methods
+ .
+
+ $entry->write ($newfile);
+ }
+
+=head1 DESCRIPTION
+
+The C<Text::BibTeX> module serves mainly as a high-level introduction to
+the C<Text::BibTeX> library, for both code and documentation purposes.
+The code loads the two fundamental modules for processing BibTeX files
+(C<Text::BibTeX::File> and C<Text::BibTeX::Entry>), and this
+documentation gives a broad overview of the whole library that isn't
+available in the documentation for the individual modules that comprise
+it.
+
+In addition, the C<Text::BibTeX> module provides a number of
+miscellaneous functions that are useful in processing BibTeX data
+(especially the kind that comes from bibliographies as defined by BibTeX
+0.99, rather than generic database files). These functions don't
+generally fit in the object-oriented class hierarchy centred around the
+C<Text::BibTeX::Entry> class, mainly because they are specific to
+bibliographic data and operate on generic strings (rather than being
+tied to a particular BibTeX entry). These are also documented here, in
+L<"MISCELLANEOUS FUNCTIONS">.
+
+Note that every module described here begins with the C<Text::BibTeX>
+prefix. For brevity, I have dropped this prefix from most class and
+module names in the rest of this manual page (and in most of the other
+manual pages in the library).
+
+=head1 MODULES AND CLASSES
+
+The C<Text::BibTeX> library includes a number of modules, many of which
+provide classes. Usually, the relationship is simple and obvious: a
+module provides a class of the same name---for instance, the
+C<Text::BibTeX::Entry> module provides the C<Text::BibTeX::Entry> class.
+There are a few exceptions, though: most obviously, the C<Text::BibTeX>
+module doesn't provide any classes itself, it merely loads two modules
+(C<Text::BibTeX::Entry> and C<Text::BibTeX::File>) that do. The other
+exceptions are mentioned in the descriptions below, and discussed in
+detail in the documentation for the respective modules.
+
+The modules are presented roughly in order of increasing specialization:
+the first three are essential for any program that processes BibTeX data
+files, regardless of what kind of data they hold. The later modules are
+specialized for use with bibliographic databases, and serve both to
+emulate BibTeX 0.99's standard styles and to provide an example of how
+to define a database structure through such specialized modules. Each
+module is fully documented in its respective manual page.
+
+=over 4
+
+=item C<Text::BibTeX>
+
+Loads the two fundamental modules (C<Entry> and C<File>), and provides a
+number of miscellaneous functions that don't fit anywhere in the class
+hierarchy.
+
+=item C<Text::BibTeX::File>
+
+Provides an object-oriented interface to BibTeX database files. In
+addition to the obvious attributes of filename and filehandle, the
+"file" abstraction manages properties such as the database structure and
+options for it.
+
+=item C<Text::BibTeX::Entry>
+
+Provides an object-oriented interface to BibTeX entries, which can be
+parsed from C<File> objects, arbitrary filehandles, or strings. Manages
+all the properties of a single entry: type, key, fields, and values.
+Also serves as the base class for the I<structured entry classes>
+(described in detail in L<Text::BibTeX::Structure>).
+
+=item C<Text::BibTeX::Value>
+
+Provides an object-oriented interface to I<values> and I<simple values>,
+high-level constructs that can be used to represent the strings
+associated with each field in an entry. Normally, field values are
+returned simply as Perl strings, with macros expanded and multiple
+strings "pasted" together. If desired, you can instruct C<Text::BibTeX>
+to return C<Text::BibTeX::Value> objects, which give you access to the
+original form of the data.
+
+=item C<Text::BibTeX::Structure>
+
+Provides the C<Structure> and C<StructuredEntry> classes, which serve
+primarily as base classes for the two kinds of classes that define
+database structures. Read this man page for a comprehensive description
+of the mechanism for implementing Perl classes analogous to BibTeX
+"style files".
+
+=item C<Text::BibTeX::Bib>
+
+Provides the C<BibStructure> and C<BibEntry> classes, which serve two
+purposes: they fulfill the same role as the standard style files of
+BibTeX 0.99, and they give an example of how to write new database
+structures. These ultimately derive from, respectively, the
+C<Structure> and C<StructuredEntry> classes provided by the C<Structure>
+module.
+
+=item C<Text::BibTeX::BibSort>
+
+One of the C<BibEntry> class's base classes: handles the generation of
+sort keys for sorting prior to output formatting.
+
+=item C<Text::BibTeX::BibFormat>
+
+One of the C<BibEntry> class's base classes: handles the formatting of
+bibliographic data for output in a markup language such as LaTeX.
+
+=item C<Text::BibTeX::Name>
+
+A class used by the C<Bib> structure and specific to bibliographic data
+as defined by BibTeX itself: parses individual author names into
+"first", "von", "last", and "jr" parts.
+
+=item C<Text::BibTeX::NameFormat>
+
+Also specific to bibliographic data: puts split-up names (as parsed by
+the C<Name> class) back together in a custom way.
+
+=back
+
+For a first time through the library, you'll probably want to confine
+your reading to L<Text::BibTeX::File> and L<Text::BibTeX::Entry>. The
+other modules will come in handy eventually, especially if you need to
+emulate BibTeX in a fairly fine grained way (e.g. parsing names,
+generating sort keys). But for the simple database hacks that are the
+bread and butter of the C<Text::BibTeX> library, the C<File> and
+C<Entry> classes are the bulk of what you'll need. You may also find
+some of the material in this manual page useful, namely L<"CONSTANT
+VALUES"> and L<"UTILITY FUNCTIONS">.
+
+=cut
+
+sub AUTOLOAD
+{
+ # This AUTOLOAD is used to 'autoload' constants from the constant()
+ # XS function.
+
+# print "AUTOLOAD: \$AUTOLOAD=$AUTOLOAD\n";
+
+ my ($constname, $ok, $val);
+ ($constname = $AUTOLOAD) =~ s/.*:://;
+ carp ("Recursive AUTOLOAD--probable compilation error"), return
+ if $constname eq 'constant';
+ $val = constant ($constname)
+ if $constname =~ /^BT/;
+ croak ("Unknown Text::BibTeX function: \"$constname\"")
+ unless (defined $val);
+
+# print " constant ($constname) returned \"$val\"\n";
+
+ eval "sub $AUTOLOAD { $val }";
+ $val;
+}
+
+# Load the two fundamental classes in the Text::BibTeX hierarchy
+require Text::BibTeX::File;
+require Text::BibTeX::Entry;
+
+# Load the XSUB code that's needed to parse BibTeX entries and
+# the strings in them
+bootstrap Text::BibTeX;
+
+# For the curious: I don't put the call to &initialize into a BEGIN block,
+# because then it would come before the bootstrap above, and &initialize is
+# XS code -- bad! (The manifestation of this error is rather interesting:
+# Perl calls my AUTOLOAD routine, which then tries to call `constant', but
+# that's also an as-yet-unloaded XS routine, so it falls back to AUTOLOAD,
+# which tries to call `constant' again, ad infinitum. The moral of the
+# story: beware of what you put in BEGIN blocks in XS-dependent modules!)
+
+initialize(); # these are both XS functions
+END { &cleanup; }
+
+
+=head1 EXPORTS
+
+The C<Text::BibTeX> module has a number of optional exports, most of
+them constant values described in L<"CONSTANT VALUES"> below. The
+default exports are a subset of these constant values that are used
+particularly often, the "entry metatypes" (also accessible via the
+export tag C<metatypes>). Thus, the following two lines are equivalent:
+
+ use Text::BibTeX;
+ use Text::BibTeX qw(:metatypes);
+
+Some of the various subroutines provided by the module are also
+exportable. C<bibloop>, C<split_list>, C<purify_string>, and
+C<change_case> are all useful in everyday processing of BibTeX data, but
+don't really fit anywhere in the class hierarchy. They may be imported
+from C<Text::BibTeX> using the C<subs> export tag. C<check_class> and
+C<display_list> are also exportable, but only by name; they are not
+included in any export tag. (These two mainly exist for use by other
+modules in the library.) For instance, to use C<Text::BibTeX> and
+import the entry metatype constants and the common subroutines:
+
+ use Text::BibTeX qw(:metatypes :subs);
+
+Another group of subroutines exists for direct manipulation of the macro
+table maintained by the underlying C library. These functions (see
+L<"Macro table functions">, below) allow you to define, delete, and
+query the value of BibTeX macros (or "abbreviations"). They may be
+imported I<en masse> using the C<macrosubs> export tag:
+
+ use Text::BibTeX qw(:macrosubs);
+
+=head1 CONSTANT VALUES
+
+The C<Text::BibTeX> module makes a number of constant values available.
+These correspond to the values of various enumerated types in the
+underlying C library, B<btparse>, and their meanings are more fully
+explained in the B<btparse> documentation.
+
+Each group of constants is optionally exportable using an export tag
+given in the descriptions below.
+
+=over 4
+
+=item Entry metatypes
+
+C<BTE_UNKNOWN>, C<BTE_REGULAR>, C<BTE_COMMENT>, C<BTE_PREAMBLE>,
+C<BTE_MACRODEF>. The C<metatype> method in the C<Entry> class always
+returns one of these values. The latter three describe, respectively,
+C<comment>, C<preamble>, and C<string> entries; C<BTE_REGULAR> describes
+all other entry types. C<BTE_UNKNOWN> should never be seen (it's mainly
+useful for C code that might have to detect half-baked data structures).
+See also L<btparse>. Export tag: C<metatypes>.
+
+=item AST node types
+
+C<BTAST_STRING>, C<BTAST_MACRO>, C<BTAST_NUMBER>. Used to distinguish
+the three kinds of simple values---strings, macros, and numbers. The
+C<SimpleValue> class' C<type> method always returns one of these three
+values. See also L<Text::BibTeX::Value>, L<btparse>. Export tag:
+C<nodetypes>.
+
+=item Name parts
+
+C<BTN_FIRST>, C<BTN_VON>, C<BTN_LAST>, C<BTN_JR>, C<BTN_NONE>. Used to
+specify the various parts of a name after it has been split up. These
+are mainly useful when using the C<NameFormat> class. See also
+L<bt_split_names> and L<bt_format_names>. Export tag: C<nameparts>.
+
+=item Join methods
+
+C<BTJ_MAYTIE>, C<BTJ_SPACE>, C<BTJ_FORCETIE>, C<BTJ_NOTHING>. Used to
+tell the C<NameFormat> class how to join adjacent tokens together; see
+L<Text::BibTeX::NameFormat> and L<bt_format_names>. Export tag:
+C<joinmethods>.
+
+=back
+
+=head1 UTILITY FUNCTIONS
+
+C<Text::BibTeX> provides several functions that operate outside of the
+normal class hierarchy. Of these, only C<bibloop> is likely to be of
+much use to you in writing everyday BibTeX-hacking programs; the other
+two (C<check_class> and C<display_list>) are mainly provided for the use
+of other modules in the library. They are documented here mainly for
+completeness, but also because they might conceivably be useful in other
+circumstances.
+
+=over 4
+
+=item bibloop (ACTION, FILES [, DEST])
+
+Loops over all entries in a set of BibTeX files, performing some
+caller-supplied action on each entry. FILES should be a reference to
+the list of filenames to process, and ACTION a reference to a subroutine
+that will be called on each entry. DEST, if given, should be a
+C<Text::BibTeX::File> object (opened for output) to which entries might
+be printed.
+
+The subroutine referenced by ACTION is called with exactly one argument:
+the C<Text::BibTeX::Entry> object representing the entry currently being
+processed. Information about both the entry itself and the file where
+it originated is available through this object; see
+L<Text::BibTeX::Entry>. The ACTION subroutine is only called if the
+entry was successfully parsed; any syntax errors will result in a
+warning message being printed, and that entry being skipped. Note that
+I<all> successfully parsed entries are passed to the ACTION subroutine,
+even C<preamble>, C<string>, and C<comment> entries. To skip these
+pseudo-entries and only process "regular" entries, then your action
+subroutine should look something like this:
+
+ sub action {
+ my $entry = shift;
+ return unless $entry->metatype == BTE_REGULAR;
+ # process $entry ...
+ }
+
+If your action subroutine needs any more arguments, you can just create
+a closure (anonymous subroutine) as a wrapper, and pass it to
+C<bibloop>:
+
+ sub action {
+ my ($entry, $extra_stuff) = @_;
+ # ...
+ }
+
+ my $extra = ...;
+ Text::BibTeX::bibloop (sub { &action ($_[0], $extra) }, \@files);
+
+If the ACTION subroutine returns a true value and DEST was given, then
+the processed entry will be written to DEST.
+
+=cut
+
+# ----------------------------------------------------------------------
+# NAME : bibloop
+# INPUT : $action
+# $files
+# $dest
+# OUTPUT :
+# RETURNS :
+# DESCRIPTION: Loops over all entries in a set of files, calling
+# &$action on each one.
+# CREATED : summer 1996 (in original Bibtex.pm module)
+# MODIFIED : May 1997 (added to Text::BibTeX with revisions)
+# Feb 1998 (simplified and documented)
+# ----------------------------------------------------------------------
+sub bibloop (&$;$)
+{
+ my ($action, $files, $dest) = @_;
+
+ my $file;
+ while ($file = shift @$files)
+ {
+ my $bib = new Text::BibTeX::File $file;
+
+ while (! $bib->eof())
+ {
+ my $entry = new Text::BibTeX::Entry $bib;
+ next unless $entry->parse_ok;
+
+ my $result = &$action ($entry);
+ $entry->write ($dest, 1)
+ if ($result && $dest)
+ }
+ }
+}
+
+=item check_class (PACKAGE, DESCRIPTION, SUPERCLASS, METHODS)
+
+Ensures that a PACKAGE implements a class meeting certain requirements.
+First, it inspects Perl's symbol tables to ensure that a package named
+PACKAGE actually exists. Then, it ensures that the class named by
+PACKAGE derives from SUPERCLASS (using the universal method C<isa>).
+This derivation might be through multiple inheritance, or through
+several generations of a class hierarchy; the only requirement is that
+SUPERCLASS is somewhere in PACKAGE's tree of base classes. Finally, it
+checks that PACKAGE provides each method listed in METHODS (a reference
+to a list of method names). This is done with the universal method
+C<can>, so the methods might actually come from one of PACKAGE's base
+classes.
+
+DESCRIPTION should be a brief string describing the class that was
+expected to be provided by PACKAGE. It is used for generating warning
+messages if any of the class requirements are not met.
+
+This is mainly used by the supervisory code in
+C<Text::BibTeX::Structure>, to ensure that user-supplied structure
+modules meet the rules required of them.
+
+=cut
+
+# ----------------------------------------------------------------------
+# NAME : check_class
+# INPUT : $package - the name of a package that is expected to exist
+# $description
+# - string describing what the package is
+# $superclass
+# - a package name from which $package is expected
+# to inherit
+# $methods - ref to list of method names expected to be
+# available via $package (possibly through
+# inheritance)
+# OUTPUT :
+# RETURNS :
+# DESCRIPTION: Makes sure that a package named by $package exists
+# (by following the chain of symbol tables starting
+# at %::) Dies if not.
+# CALLERS : Text::BibTeX::Structure::new
+# CREATED : 1997/09/09, GPW
+# MODIFIED :
+# ----------------------------------------------------------------------
+sub check_class
+{
+ my ($package, $description, $superclass, $methods) = @_;
+ my (@components, $component, $prev_symtab);
+
+ @components = split ('::', $package);
+ $prev_symtab = \%::;
+ while (@components)
+ {
+ $component = (shift @components) . '::';
+ unless (defined ($prev_symtab = $prev_symtab->{$component}))
+ {
+ die "Text::BibTeX::Structure: $description " .
+ "\"$package\" apparently not supplied\n";
+ }
+ }
+
+ if ($superclass && ! isa ($package, $superclass))
+ {
+ die "Text::BibTeX::Structure: $description \"$package\" " .
+ "improperly defined: ! isa ($superclass)\n";
+ }
+
+ my $method;
+ for $method (@$methods)
+ {
+ unless (can ($package, $method))
+ {
+ die "Text::BibTeX::Structure: $description \"$package\" " .
+ "improperly defined: no method \"$method\"\n";
+ }
+ }
+} # &check_class
+
+
+=item display_list (LIST, QUOTE)
+
+Converts a list of strings to the grammatical conventions of a human
+language (currently, only English rules are supported). LIST must be a
+reference to a list of strings. If this list is empty, the empty string
+is returned. If it has one element, then just that element is
+returned. If it has two elements, then they are joined with the string
+C<" and "> and the resulting string is returned. Otherwise, the list
+has I<N> elements for I<N> E<gt>= 3; elements 1..I<N>-1 are joined with
+commas, and the final element is tacked on with an intervening
+C<", and ">.
+
+If QUOTE is true, then each string is encased in single quotes before
+anything else is done.
+
+This is used elsewhere in the library for two very distinct purposes:
+for generating warning messages describing lists of fields that should
+be present or are conflicting in an entry, and for generating lists of
+author names in formatted bibliographies.
+
+=cut
+
+# ----------------------------------------------------------------------
+# NAME : display_list
+# INPUT : $list - reference to list of strings to join
+# $quote - if true, they will be single-quoted before join
+# OUTPUT :
+# RETURNS : elements of @$list, joined together into a single string
+# with commas and 'and' as appropriate
+# DESCRIPTION: Formats a list of strings for display as English text.
+# CALLERS : Text::BibTeX::Structure::check_interacting_fields
+# CALLS :
+# CREATED : 1997/09/23, GPW
+# MODIFIED :
+# ----------------------------------------------------------------------
+sub display_list
+{
+ my ($list, $quote) = @_;
+ my @list;
+
+ return '' if @$list == 0;
+ @list = $quote ? map { "'$_'" } @$list : @$list;
+ return $list[0] if @list == 1;
+ return $list[0] . ' and ' . $list[1] if @list == 2;
+ return join (', ', @list[0 .. ($#list-1)]) . ', and ' . $list[-1];
+}
+
+
+=back
+
+=head1 MISCELLANEOUS FUNCTIONS
+
+In addition to loading the C<File> and C<Entry> modules, C<Text::BibTeX>
+loads the XSUB code which bridges the Perl modules to the underlying C
+library, B<btparse>. This XSUB code provides a number of miscellaneous
+utility functions, most of which are put into other packages in the
+C<Text::BibTeX> family for use by the corresponding classes. (For
+instance, the XSUB code loaded by C<Text::BibTeX> provides a function
+C<Text::BibTeX::Entry::parse>, which is actually documented as the
+C<parse> method of the C<Text::BibTeX::Entry> class---see
+L<Text::BibTeX::Entry>. However, for completeness this function---and
+all the other functions that become available when you C<use
+Text::BibTeX>---are at least mentioned here. The only functions from
+this group that you're ever likely to use are described in L<"Generic
+string-processing functions">.
+
+=head2 Startup/shutdown functions
+
+These just initialize and shutdown the underlying C library. Don't call
+either one of them; the C<Text::BibTeX> startup/shutdown code takes care
+of it as appropriate. They're just mentioned here for completeness.
+
+=over 4
+
+=item initialize ()
+
+=item cleanup ()
+
+=back
+
+=head2 Generic string-processing functions
+
+=over 4
+
+=item split_list (STRING, DELIM [, FILENAME [, LINE [, DESCRIPTION]]])
+
+Splits a string on a fixed delimiter according to the BibTeX rules for
+splitting up lists of names. With BibTeX, the delimiter is hard-coded
+as C<"and">; here, you can supply any string. Instances of DELIM in
+STRING are considered delimiters if they are at brace-depth zero,
+surrounded by whitespace, and not at the beginning or end of STRING; the
+comparison is case-insensitive. See L<bt_split_names> for full details
+of how splitting is done (it's I<not> the same as Perl's C<split>
+function).
+
+Returns the list of strings resulting from splitting STRING on DELIM.
+
+=item purify_string (STRING [, OPTIONS])
+
+"Purifies" STRING in the BibTeX way (usually for generation of sort
+keys). See L<bt_misc> for details; note that, unlike the C interface,
+C<purify_string> does I<not> modify STRING in-place. A purified copy of
+the input string is returned.
+
+OPTIONS is currently unused.
+
+=item change_case (TRANFORM, STRING [, OPTIONS])
+
+Transforms the case of STRING according to TRANSFORM (a single
+character, one of C<'u'>, C<'l'>, or C<'t'>). See L<bt_misc> for
+details; again, C<change_case> differs from the C interface in that
+STRING is not modified in-place---the input string is copied, and the
+transformed copy is returned.
+
+=back
+
+=head2 Entry-parsing functions
+
+Although these functions are provided by the C<Text::BibTeX> module,
+they are actually in the C<Text::BibTeX::Entry> package. That's because
+they are implemented in C, and thus loaded with the XSUB code that
+C<Text::BibTeX> loads; however, they are actually methods in the
+C<Text::BibTeX::Entry> class. Thus, they are documented as methods in
+L<Text::BibTeX::Entry>.
+
+=over 4
+
+=item parse (ENTRY_STRUCT, FILENAME, FILEHANDLE)
+
+=item parse_s (ENTRY_STRUCT, TEXT)
+
+=back
+
+=head2 Macro table functions
+
+These functions allow direct access to the macro table maintained by
+B<btparse>, the C library underlying C<Text::BibTeX>. In the normal
+course of events, macro definitions always accumulate, and are only
+defined as a result of parsing a macro definition (C<@string>) entry.
+B<btparse> never deletes old macro definitions for you, and doesn't have
+any built-in default macros. If, for example, you wish to start fresh
+with new macros for every file, use C<delete_all_macros>. If you wish
+to pre-define certain macros, use C<add_macro_text>. (But note that the
+C<Bib> structure, as part of its mission to emulate BibTeX 0.99, defines
+the standard "month name" macros for you.)
+
+See also L<bt_macros> in the B<btparse> documentation for a description
+of the C interface to these functions.
+
+=over 4
+
+=item add_macro_text (MACRO, TEXT [, FILENAME [, LINE]])
+
+Defines a new macro, or redefines an old one. MACRO is the name of the
+macro, and TEXT is the text it should expand to. FILENAME and LINE are
+just used to generate any warnings about the macro definition. The only
+such warning occurs when you redefine an old macro: its value is
+overridden, and C<add_macro_text()> issues a warning saying so.
+
+=item delete_macro (MACRO)
+
+Deletes a macro from the macro table. If MACRO isn't defined,
+takes no action.
+
+=item delete_all_macros ()
+
+Deletes all macros from the macro table.
+
+=item macro_length (MACRO)
+
+Returns the length of a macro's expansion text. If the macro is
+undefined, returns 0; no warning is issued.
+
+=item macro_text (MACRO [, FILENAME [, LINE]])
+
+Returns the expansion text of a macro. If the macro is not defined,
+issues a warning and returns C<undef>. FILENAME and LINE, if supplied,
+are used for generating this warning; they should be supplied if you're
+looking up the macro as a result of finding it in a file.
+
+=back
+
+=head2 Name-parsing functions
+
+These are both private functions for the use of the C<Name> class, and
+therefore are put in the C<Text::BibTeX::Name> package. You should use
+the interface provided by that class for parsing names in the BibTeX
+style.
+
+=over 4
+
+=item _split (NAME_STRUCT, NAME, FILENAME, LINE, NAME_NUM, KEEP_CSTRUCT)
+
+=item free (NAME_STRUCT)
+
+=back
+
+=head2 Name-formatting functions
+
+These are private functions for the use of the C<NameFormat> class, and
+therefore are put in the C<Text::BibTeX::NameFormat> package. You
+should use the interface provided by that class for formatting names in
+the BibTeX style.
+
+=over 4
+
+=item create ([PARTS [, ABBREV_FIRST]])
+
+=item free (FORMAT_STRUCT)
+
+=item _set_text (FORMAT_STRUCT, PART, PRE_PART, POST_PART, PRE_TOKEN, POST_TOKEN)
+
+=item _set_options (FORMAT_STRUCT, PART, ABBREV, JOIN_TOKENS, JOIN_PART)
+
+=item format_name (NAME_STRUCT, FORMAT_STRUCT)
+
+=back
+
+=head1 BUGS AND LIMITATIONS
+
+C<Text::BibTeX> inherits several limitations from its base C library,
+B<btparse>; see L<btparse/BUGS AND LIMITATIONS> for details. In addition,
+C<Text::BibTeX> will not work with a Perl binary built using the C<sfio>
+library. This is because Perl's I/O abstraction layer does not extend to
+third-party C libraries that use stdio, and B<btparse> most certainly does
+use stdio.
+
+=head1 SEE ALSO
+
+L<btool_faq>, L<Text::BibTeX::File>, L<Text::BibTeX::Entry>,
+L<Text::BibTeX::Value>
+
+=head1 AUTHOR
+
+Greg Ward <gward@python.net>
+
+=head1 COPYRIGHT
+
+Copyright (c) 1997-2000 by Gregory P. Ward. All rights reserved. This file
+is part of the Text::BibTeX library. This library is free software; you
+may redistribute it and/or modify it under the same terms as Perl itself.
+
+=head1 AVAILABILITY
+
+The btOOL home page, where you can get up-to-date information about
+C<Text::BibTeX> (and download the latest version) is
+
+ http://starship.python.net/~gward/btOOL/
+
+You will also find the latest version of B<btparse>, the C library
+underlying C<Text::BibTeX>, there. B<btparse> is needed to build
+C<Text::BibTeX>, and must be downloaded separately.
+
+Both libraries are also available on CTAN (the Comprehensive TeX Archive
+Network, C<http://www.ctan.org/tex-archive/>) and CPAN (the Comprehensive
+Perl Archive Network, C<http://www.cpan.org/>). Look in
+F<biblio/bibtex/utils/btOOL/> on CTAN, and F<authors/Greg_Ward/> on
+CPAN. For example,
+
+ http://www.ctan.org/tex-archive/biblio/bibtex/utils/btOOL/
+ http://www.cpan.org/authors/Greg_Ward
+
+will both get you to the latest version of C<Text::BibTeX> and B<btparse>
+-- but of course, you should always access busy sites like CTAN and CPAN
+through a mirror.
+
+=cut
+
+1;
diff --git a/BibTeX.xs b/BibTeX.xs
new file mode 100644
index 0000000..a6d9061
--- /dev/null
+++ b/BibTeX.xs
@@ -0,0 +1,542 @@
+/* ------------------------------------------------------------------------
+@NAME : BibTeX.xs
+@INPUT :
+@OUTPUT :
+@RETURNS :
+@DESCRIPTION: Glue between my `btparse' library and the Perl module
+ Text::BibTeX. Provides the following functions to Perl:
+ Text::BibTeX::constant
+ Text::BibTeX::initialize
+ Text::BibTeX::cleanup
+ Text::BibTeX::split_list
+ Text::BibTeX::purify_string
+ Text::BibTeX::Entry::_parse_s
+ Text::BibTeX::Entry::_parse
+ Text::BibTeX::Name::split
+ Text::BibTeX::Name::free
+ Text::BibTeX::add_macro_text
+ Text::BibTeX::delete_macro
+ Text::BibTeX::delete_all_macros
+ Text::BibTeX::macro_length
+ Text::BibTeX::macro_text
+@GLOBALS :
+@CALLS :
+@CREATED : Jan/Feb 1997, Greg Ward
+@MODIFIED :
+@VERSION : $Id: BibTeX.xs 3031 2006-09-21 20:02:34Z ambs $
+-------------------------------------------------------------------------- */
+#include "EXTERN.h"
+#include "perl.h"
+#include "XSUB.h"
+
+#define BT_DEBUG 0
+
+#include "btparse.h"
+#include "btxs_support.h"
+
+
+MODULE = Text::BibTeX PACKAGE = Text::BibTeX
+
+# XSUBs with no corresponding functions in the C library (hence no prefix
+# for this section):
+# constant
+
+SV *
+constant(name)
+char * name
+ CODE:
+ IV i;
+ if (constant(name, &i))
+ ST(0) = sv_2mortal(newSViv(i));
+ else
+ ST(0) = &PL_sv_undef;
+
+
+MODULE = Text::BibTeX PACKAGE = Text::BibTeX PREFIX = bt_
+
+# XSUBs that consist solely of calls to corresponding C functions in the
+# library:
+# initialize
+# cleanup
+
+void
+bt_initialize()
+
+void
+bt_cleanup()
+
+
+# XSUBs that still go right into the Text::BibTeX package (ie. they don't
+# really belong in one of the subsidiary packages), but need a bit of work
+# to convert the C data to Perl form:
+# split_list
+# purify_string
+
+void
+bt_split_list (string, delim, filename=NULL, line=0, description=NULL)
+
+ char * string
+ char * delim
+ char * filename
+ int line
+ char * description
+
+ PREINIT:
+ bt_stringlist *
+ names;
+ int i;
+ SV * sv_name;
+
+ PPCODE:
+ names = bt_split_list (string, delim, filename, line, description);
+ if (names == NULL)
+ XSRETURN_EMPTY; /* return empty list to perl */
+
+ EXTEND (sp, names->num_items);
+ for (i = 0; i < names->num_items; i++)
+ {
+ if (names->items[i] == NULL)
+ sv_name = &PL_sv_undef;
+ else
+ sv_name = sv_2mortal (newSVpv (names->items[i], 0));
+
+ PUSHs (sv_name);
+ }
+
+ bt_free_list (names);
+
+
+SV *
+bt_purify_string (instr, options=0)
+
+ char * instr
+ int options
+
+ CODE:
+ if (instr == NULL) /* undef in, undef out */
+ XSRETURN_EMPTY;
+ RETVAL = newSVpv (instr, 0);
+ bt_purify_string (SvPVX (RETVAL), (ushort) options);
+ SvCUR_set (RETVAL, strlen (SvPVX (RETVAL))); /* reset SV's length */
+
+ OUTPUT:
+ RETVAL
+
+
+# Here's an alternate formulation of `purify_string' that acts more like
+# the C function (and less like nice Perl): it modifies the input string
+# in place, and returns nothing. In addition to being weird Perl,
+# this contradicts the documentation. And it would be impossible
+# to replicate this behaviour in a similar Python extension... all
+# round, a bad idea!
+
+## void
+## bt_purify_string (str, options=0)
+
+## char * str
+## int options
+
+## CODE:
+## if (str != NULL)
+## bt_purify_string (str, (ushort) options);
+## sv_setpv (ST(0), str);
+
+
+SV *
+bt_change_case (transform, string, options=0)
+ char transform
+ char * string
+ int options
+
+ CODE:
+ DBG_ACTION
+ (1, printf ("XSUB change_case: transform=%c, string=%p (%s)\n",
+ transform, string, string))
+ if (string == NULL)
+ XSRETURN_EMPTY;
+ RETVAL = newSVpv (string, 0);
+ bt_change_case (transform, SvPVX (RETVAL), (ushort) options);
+
+ OUTPUT:
+ RETVAL
+
+
+
+
+MODULE = Text::BibTeX PACKAGE = Text::BibTeX::Entry
+
+# The two XSUBs that go to the Text::BibTeX::Entry package; both rely on
+# ast_to_hash() to do the appropriate "convert to Perl form" work:
+# _parse
+# _parse_s
+
+int
+_parse (entry_ref, filename, file, preserve=FALSE)
+ SV * entry_ref;
+ char * filename;
+ FILE * file;
+ boolean preserve;
+
+ PREINIT:
+ ushort options = 0;
+ boolean status;
+ AST * top;
+
+ CODE:
+
+ top = bt_parse_entry (file, filename, options, &status);
+ DBG_ACTION
+ (2, dump_ast ("BibTeX.xs:parse: AST from bt_parse_entry():\n", top))
+
+ if (!top) /* at EOF -- return false to perl */
+ {
+ XSRETURN_NO;
+ }
+
+ ast_to_hash (entry_ref, top, status, preserve);
+ XSRETURN_YES; /* OK -- return true to perl */
+
+
+int
+_parse_s (entry_ref, text, preserve=FALSE)
+ SV * entry_ref;
+ char * text;
+ boolean preserve;
+
+ PREINIT:
+ ushort options = 0;
+ boolean status;
+ AST * top;
+
+ CODE:
+
+ top = bt_parse_entry_s (text, NULL, 1, options, &status);
+
+ if (!top) /* no entry found -- return false to perl */
+ {
+ XSRETURN_NO;
+ }
+
+ ast_to_hash (entry_ref, top, status, preserve);
+ XSRETURN_YES; /* OK -- return true to perl */
+
+
+MODULE = Text::BibTeX PACKAGE = Text::BibTeX::Name
+
+# The XSUBs that go in the Text::BibTeX::Name package (ie. that operate
+# on name objects):
+# split
+# free
+
+#if BT_DEBUG
+
+void
+dump_name (hashref)
+ SV * hashref
+
+ PREINIT:
+ HV * hash;
+ SV ** sv_name;
+ bt_name * name;
+
+ CODE:
+ hash = (HV *) SvRV (hashref);
+ sv_name = hv_fetch (hash, "_cstruct", 8, 0);
+ if (! sv_name)
+ {
+ warn ("Name::dump: no _cstruct member in hash");
+ }
+ else
+ {
+ name = (bt_name *) SvIV (*sv_name);
+ dump_name (name); /* currently in format_name.c */
+ }
+
+#endif
+
+
+void
+_split (name_hashref, name, filename, line, name_num, keep_cstruct)
+
+ SV * name_hashref
+ char * name
+ char * filename
+ int line
+ int name_num
+ int keep_cstruct
+
+ PREINIT:
+ HV * name_hash;
+ SV * sv_old_name;
+ bt_name * old_name;
+ bt_name * name_split;
+
+ CODE:
+ if (! (SvROK (name_hashref) &&
+ SvTYPE (SvRV (name_hashref)) == SVt_PVHV))
+ croak ("name_hashref is not a hash reference");
+ name_hash = (HV *) SvRV (name_hashref);
+
+ DBG_ACTION (1,
+ {
+ printf ("XS Name::_split:\n");
+ printf (" name_hashref=%p, name_hash=%p\n",
+ (void *) name_hashref, (void *) name_hash);
+ printf (" name=%p (%s), filename=%p (%s)\n",
+ name, name, filename, filename);
+ printf (" line=%d, name_num=%d, keep_cstruct=%d\n",
+ line, name_num, keep_cstruct);
+ })
+
+ sv_old_name = hv_delete (name_hash, "_cstruct", 8, 0);
+ if (sv_old_name)
+ {
+ old_name = (bt_name *) SvIV (sv_old_name);
+ DBG_ACTION
+ (1, printf ("XS Name::_split: name hash had old C structure "
+ "(%d tokens, first was >%s<) -- freeing it\n",
+ old_name->tokens->num_items,
+ old_name->tokens->items[0]))
+ bt_free_name (old_name);
+ }
+
+ name_split = bt_split_name (name, filename, line, name_num);
+ DBG_ACTION (1, printf ("XS Name::_split: back from bt_split_name, "
+ "calling store_stringlist x 4\n"))
+
+ store_stringlist (name_hash, "first",
+ name_split->parts[BTN_FIRST],
+ name_split->part_len[BTN_FIRST]);
+ store_stringlist (name_hash, "von",
+ name_split->parts[BTN_VON],
+ name_split->part_len[BTN_VON]);
+ store_stringlist (name_hash, "last",
+ name_split->parts[BTN_LAST],
+ name_split->part_len[BTN_LAST]);
+ store_stringlist (name_hash, "jr",
+ name_split->parts[BTN_JR],
+ name_split->part_len[BTN_JR]);
+
+ DBG_ACTION (1,
+ {
+ char ** last = name_split->parts[BTN_LAST];
+ char ** first = name_split->parts[BTN_FIRST];
+
+ printf ("XS Name::_split: name has %d tokens; "
+ "last[0]=%s, first[0]=%s\n",
+ name_split->tokens->num_items,
+ last ? last[0] : "*no last name*",
+ first ? first[0] : "*no first name*");
+ })
+
+ if (keep_cstruct)
+ {
+ hv_store (name_hash, "_cstruct", 8, newSViv ((IV) name_split), 0);
+ DBG_ACTION
+ (1, printf ("XS Name::_split: storing pointer to structure %p\n",
+ name_split))
+ }
+ else
+ {
+ bt_free_name (name_split);
+ }
+
+
+void
+free (name_hashref)
+ SV * name_hashref
+
+ PREINIT:
+ HV * name_hash;
+ SV ** sv_name;
+ bt_name * name;
+
+ CODE:
+ name_hash = (HV *) SvRV (name_hashref);
+ sv_name = hv_fetch (name_hash, "_cstruct", 8, 0);
+ if (sv_name != NULL)
+ {
+ name = (bt_name *) SvIV (*sv_name);
+ DBG_ACTION (1, printf ("XS Name::free: freeing name %p\n", name))
+ bt_free_name (name);
+ }
+#if BT_DEBUG >= 1
+ else
+ {
+ printf ("XS Name::free: no C structure to free!\n");
+ }
+#endif
+
+
+MODULE = Text::BibTeX PACKAGE = Text::BibTeX::NameFormat
+
+IV
+create (parts="fvlj", abbrev_first=FALSE)
+ char * parts
+ bool abbrev_first
+
+ PREINIT:
+
+ CODE:
+ DBG_ACTION
+ (1, printf ("XS NameFormat::create: "
+ "creating name format: parts=\"%s\", abbrev=%d\n",
+ parts, abbrev_first));
+ RETVAL = (IV) bt_create_name_format (parts, abbrev_first);
+
+ OUTPUT:
+ RETVAL
+
+
+void
+free (format)
+ bt_name_format * format
+
+ CODE:
+ bt_free_name_format ((bt_name_format *) format);
+
+
+#if BT_DEBUG
+
+void
+dump_format (hashref)
+ SV * hashref
+
+ PREINIT:
+ HV * hash;
+ SV ** sv_format;
+ bt_name_format * format;
+
+ CODE:
+ hash = (HV *) SvRV (hashref);
+ sv_format = hv_fetch (hash, "_cstruct", 8, 0);
+ if (! sv_format)
+ {
+ warn ("NameFormat::dump: no _cstruct member in hash");
+ }
+ else
+ {
+ format = (bt_name_format *) SvIV (*sv_format);
+ dump_format (format); /* currently in format_name.c */
+ }
+
+#endif
+
+
+void
+_set_text (format, part, pre_part, post_part, pre_token, post_token)
+ bt_name_format * format
+ bt_namepart part
+ char * pre_part
+ char * post_part
+ char * pre_token
+ char * post_token
+
+ CODE:
+#if BT_DEBUG >= 2
+ {
+ static char * nameparts[] =
+ { "first", "von", "last", "jr" };
+ static char * joinmethods[] =
+ {"may tie", "space", "force tie", "nothing"};
+
+ printf ("XS NameFormat::_set_text:\n");
+ printf (" format=%p, namepart=%d (%s)\n",
+ format, part, nameparts[part]);
+ printf (" format currently is:\n");
+ dump_format (format);
+ printf (" pre_part=%s, post_part=%s\n", pre_part, post_part);
+ printf (" pre_token=%s, post_token=%s\n", pre_token, post_token);
+ }
+#endif
+
+ /*
+ * No memory leak here -- just copy the pointers. At first
+ * blush, it might seem that we're opening ourselves up to
+ * the possibility of dangling pointers if the Perl strings
+ * that these char *'s refer to ever go away. However, this
+ * is taken care of at the Perl level -- see the comment
+ * in BibTeX/NameFormat.pm, sub set_text.
+ */
+
+ bt_set_format_text (format, part,
+ pre_part, post_part, pre_token, post_token);
+#if BT_DEBUG >= 2
+ printf ("XS NameFormat::_set_text: after call, format is:\n");
+ dump_format (format);
+#endif
+
+
+void
+_set_options (format, part, abbrev, join_tokens, join_part)
+ bt_name_format * format
+ bt_namepart part
+ bool abbrev
+ bt_joinmethod join_tokens
+ bt_joinmethod join_part
+
+ CODE:
+ DBG_ACTION (2,
+ printf ("XS _set_options: format=%p, part=%d, "
+ "abbrev=%d, join_tokens=%d, join_part=%d\n",
+ format, part, abbrev, join_tokens, join_part))
+ bt_set_format_options (format, part,
+ abbrev, join_tokens, join_part);
+
+
+char *
+format_name (name, format)
+ bt_name * name
+ bt_name_format * format
+
+ CODE:
+ DBG_ACTION
+ (2, printf ("XS format_name: name=%p, format=%p\n", name, format))
+ RETVAL = bt_format_name (name, format);
+ DBG_ACTION
+ (1, printf ("XS format_name: formatted name=%s\n", RETVAL))
+
+ OUTPUT:
+ RETVAL
+
+
+MODULE = Text::BibTeX PACKAGE = Text::BibTeX PREFIX = bt_
+
+void
+bt_add_macro_text (macro, text, filename=NULL, line=0)
+ char * macro
+ char * text
+ char * filename
+ int line
+
+void
+bt_delete_macro (macro)
+ char * macro
+
+void
+bt_delete_all_macros ()
+
+int
+bt_macro_length (macro)
+ char * macro
+
+char *
+bt_macro_text (macro, filename=NULL, line=0)
+ char * macro
+ char * filename
+ int line
+
+
+# This bootstrap code is used to make btparse do "minimal post-processing"
+# on all entries. That way, we can control how much is done on a per-entry
+# basis by simply calling bt_postprocess_entry() ourselves.
+#
+# The need to do this means that btparse is somewhat brain-damaged -- I
+# should be able to specify the per-entry processing options when I call
+# bt_parse_entry()! Shouldn't be too hard to fix....
+BOOT:
+ bt_set_stringopts (BTE_MACRODEF, 0);
+ bt_set_stringopts (BTE_REGULAR, 0);
+ bt_set_stringopts (BTE_COMMENT, 0);
+ bt_set_stringopts (BTE_PREAMBLE, 0);
+
diff --git a/BibTeX/Bib.pm b/BibTeX/Bib.pm
new file mode 100644
index 0000000..4c3a7b5
--- /dev/null
+++ b/BibTeX/Bib.pm
@@ -0,0 +1,493 @@
+# ----------------------------------------------------------------------
+# NAME : BibTeX/Bib.pm
+# CLASSES : Text::BibTeX::BibStructure, Text::BibTeX::BibEntry;
+# loads Text::BibTeX::BibSort and Text::BibTeX::BibFormat
+# for use by BibEntry
+# RELATIONS : BibStructure inherits from Structure
+# BibEntry inherits from BibSort and BibFormat, which
+# both inherit from StructuredEntry
+# DESCRIPTION: Implements the "Bib" structure, which provides the
+# same functionality -- though in a completely different
+# context, and much more customizably -- as the standard
+# style files of BibTeX 0.99.
+# CREATED : 1997/09/21, Greg Ward
+# MODIFIED :
+# VERSION : $Id: Bib.pm 3033 2006-09-21 20:07:27Z ambs $
+# COPYRIGHT : Copyright (c) 1997-2000 by Gregory P. Ward. All rights
+# reserved.
+#
+# This file is part of the Text::BibTeX library. This
+# library is free software; you may redistribute it and/or
+# modify it under the same terms as Perl itself.
+# ----------------------------------------------------------------------
+
+=head1 NAME
+
+Text::BibTeX::Bib - defines the "Bib" database structure
+
+=head1 SYNOPSIS
+
+ $bibfile = new Text::BibTeX::File $filename;
+ $bibfile->set_structure ('Bib',
+ # Default option values:
+ sortby => 'name',
+ namestyle => 'full'
+ nameorder => 'first',
+ atitle => 1,
+ labels => 'numeric');
+
+ # Alternate option values:
+ $bibfile->set_option (sortby => 'year');
+ $bibfile->set_option (namestyle => 'nopunct');
+ $bibfile->set_option (namestyle => 'nospace');
+ $bibfile->set_option (nameorder => 'last');
+ $bibfile->set_option (atitle => 0);
+ $bibfile->set_option (labels => 'alpha'); # not implemented yet!
+
+ # parse entry from $bibfile and automatically make it a BibEntry
+ $entry = new Text::BibTeX::Entry $bibfile;
+
+ # or get an entry from somewhere else which is hard-coded to be
+ # a BibEntry
+ $entry = new Text::BibTeX::BibEntry ...;
+
+ $sortkey = $entry->sort_key;
+ @blocks = $entry->format;
+
+=head1 DESCRIPTION
+
+(B<NOTE!> Do not believe everything you read in this document. The
+classes described here are unfinished and only lightly tested. The
+current implementation is a proof-of-principle, to convince myself (and
+anyone who might be interested) that it really is possible to
+reimplement BibTeX 0.99 in Perl using the core C<Text::BibTeX> classes;
+this principle is vaguely demonstrated by the current C<Bib*> modules,
+but not really proved. Many important features needed to reimplement
+the standard styles of BibTeX 0.99 are missing, even though this
+document may brashly assert otherwise. If you are interested in using
+these classes, you should start by reading and grokking the code, and
+contributing the missing bits and pieces that you need.)
+
+C<Text::BibTeX::Bib> implements the database structure for
+bibliographies as defined by the standard styles of BibTeX 0.99. It
+does this by providing two classes, C<BibStructure> and C<BibEntry> (the
+leading C<Text::BibTeX> is implied, and will be omitted for the rest of
+this document). These two classes, being specific to bibliographic
+data, are outside of the core C<Text::BibTeX> class hierarchy, but are
+distributed along with it as they provide a canonical example of a
+specific database structure using classes derived from the core
+hierarchy.
+
+C<BibStructure>, which derives from the C<Structure> class, deals with
+the structure as a whole: it handles structure options and describes all
+the types and fields that make up the database structure. If you're
+interested in writing your own database structure modules, the standard
+interface for both of these is described in L<Text::BibTeX::Structure>;
+if you're just interested in finding out the exact database structure or
+the options supported by the C<Bib> structure, you've come to the right
+place. (However, you may have to wade through a bit of excess verbiage
+due to this module's dual purpose: first, to reimplement the standard
+styles of BibTeX 0.99, and second, to provide an example for other
+programmers wishing to implement new or derived database structure
+modules.)
+
+C<BibEntry> derives from the C<StructuredEntry> class and provides
+methods that operate on individual entries presumed to come from a
+database conforming to the structure defined by the C<BibStructure>
+class. (Actually, to be completely accurate, C<BibEntry> inherits from
+two intermediate classes, C<BibSort> and C<BibFormat>. These two
+classes just exist to reduce the amount of code in the C<Bib> module,
+and thanks to the magic of inheritance, their existence is usually
+irrelevant. But you might want to consult those two classes if you're
+interested in the gory details of sorting and formatting entries from
+BibTeX 0.99-style bibliography databases.)
+
+=cut
+
+
+# first, the "structure class" (inherits from Text::BibTeX::Structure)
+
+package Text::BibTeX::BibStructure;
+use strict;
+use vars qw(@ISA);
+@ISA = qw(Text::BibTeX::Structure);
+
+=head1 STRUCTURE OPTIONS
+
+C<BibStructure> handles several user-supplied "structure options" and
+methods for dealing with them. The options currently supported by the
+C<Bib> database structure, and the values allowed for them, are:
+
+=over 4
+
+=item C<sortby>
+
+How to sort entries. Valid values: C<name> (sort on author names, year,
+and title), C<year> (sort on year, author names, and title). Sorting on
+"author names" is a bit more complicated than just using the C<author>
+field; see L<Text::BibTeX::BibSort> for details. Default value: C<name>.
+
+=item C<namestyle>
+
+How to print author (and editor) names: C<full> for unabbreviated first
+names, C<abbrev> for first names abbreviated with periods, C<nopunct>
+for first names abbreviated with space but no periods, and C<nospace> to
+abbreviate without space or periods. Default value: C<full>.
+
+=item C<nameorder>
+
+The order in which to print names: C<first> for "first von last jr"
+order, and C<last> for "von last jr first" order. Default value:
+C<first>.
+
+=item C<atitle_lower>
+
+A boolean option: if true, non-book titles will be changed to "sentence
+capitalization:" words following colons and sentence-ending punctuation
+will be capitalized, and everything else at brace-depth zero will be
+changed to lowercase. Default value: true.
+
+=item C<labels>
+
+The type of bibliographic labels to generate: C<numeric> or C<alpha>.
+(Alphabetic labels are not yet implemented, so this option is currently
+ignored.) Default value: C<numeric>.
+
+=back
+
+Also, several "markup options" are supported. Markup options are
+distinct because they don't change how text is extracted from the
+database entries and subsequently mangled; rather, they supply bits of
+markup that go around the database-derived text. Markup options are
+always two-element lists: the first to "turn on" some feature of the
+markup language, and the second to turn it off. For example, if your
+target language is LaTeX2e and you want journal names emphasized, you
+would supply a list reference C<['\emph{','}']> for the C<journal_mkup>
+option. If you were instead generating HTML, you might supply
+C<['E<lt>emphE<gt>','E<lt>/emphE<gt>']>. To keep the structure module
+general with respect to markup languages, all markup options are empty
+by default. (Or, rather, they are all references to lists consisting of
+two empty strings.)
+
+=over 4
+
+=item C<name_mkup>
+
+Markup to add around the list of author names.
+
+=item C<atitle_mkup>
+
+Markup to add around non-book (article) titles.
+
+=item C<btitle_mkup>
+
+Markup to add around book titles.
+
+=item C<journal_mkup>
+
+Markup to add around journal names.
+
+=back
+
+=cut
+
+my %default_options =
+ (sortby => 'name', # or 'year', 'none'
+ namestyle => 'full', # or 'abbrev', 'nopunct', 'nospace'
+ nameorder => 'first', # or 'last'
+ atitle_lower=> 1, # mangle case of non-book titles?
+ labels => 'numeric', # or 'alpha' (not yet supported!)
+ name_mkup => ['', ''],
+ atitle_mkup => ['', ''],
+ btitle_mkup => ['', ''],
+ journal_mkup=> ['', ''],
+ );
+
+
+=head2 Option methods
+
+As required by the C<Text::BibTeX::Structure> module,
+C<Text::BibTeX::Bib> provides two methods for handling options:
+C<known_option> and C<default_option>. (The other two option methods,
+C<set_options> and C<get_options>, are just inherited from
+C<Text::BibTeX::Structure>.)
+
+=over 4
+
+=item known_option (OPTION)
+
+Returns true if OPTION is one of the options on the above list.
+
+=item default_option (OPTION)
+
+Returns the default value of OPTION, or C<croak>s if OPTION is not a
+supported option.
+
+=back
+
+=cut
+
+sub known_option
+{
+ my ($self, $option) = @_;
+ return exists $default_options{$option};
+}
+
+
+sub default_option
+{
+ my ($self, $option) = @_;
+ return exists $default_options{$option}
+ ? $default_options{$option}
+ : $self->SUPER::default_option ($option);
+}
+
+
+# The field lists in the following documentation are automatically
+# generated by my `doc_structure' program -- I run it and read the
+# output into this file. Wouldn't it be cool if the module could just
+# document itself? Ah well, dreaming again...
+
+=head1 DATABASE STRUCTURE
+
+The other purpose of a structure class is to provide a method,
+C<describe_entry>, that lists the allowed entry types and the known
+fields for the structure. Programmers wishing to write their own
+database structure module should consult L<Text::BibTeX::Structure> for
+the conventions and requirements of this method; the purpose of the
+present document is to describe the C<Bib> database structure.
+
+The allowed entry types, and the fields recognized for them, are:
+
+=over 4
+
+=item C<article>
+
+Required fields: C<author>, C<title>, C<journal>, C<year>.
+Optional fields: C<volume>, C<number>, C<pages>, C<month>, C<note>.
+
+=item C<book>
+
+Required fields: C<title>, C<publisher>, C<year>.
+Optional fields: C<series>, C<address>, C<edition>, C<month>, C<note>.
+Constrained fields: exactly one of C<author>, C<editor>; at most one of C<volume>, C<number>.
+
+=item C<booklet>
+
+Required fields: C<title>.
+Optional fields: C<author>, C<howpublished>, C<address>, C<month>, C<year>, C<note>.
+
+=item C<inbook>
+
+Required fields: C<publisher>, C<year>.
+Optional fields: C<series>, C<type>, C<address>, C<edition>, C<month>, C<note>.
+Constrained fields: exactly one of C<author>, C<editor>; at least one of C<chapter>, C<pages>; at most one of C<volume>, C<number>.
+
+=item C<incollection>
+
+Required fields: C<author>, C<title>, C<booktitle>, C<publisher>, C<year>.
+Optional fields: C<editor>, C<series>, C<type>, C<chapter>, C<pages>, C<address>, C<edition>, C<month>, C<note>.
+Constrained fields: at most one of C<volume>, C<number>.
+
+=item C<inproceedings>
+
+=item C<conference>
+
+Required fields: C<author>, C<title>, C<booktitle>, C<year>.
+Optional fields: C<editor>, C<series>, C<pages>, C<address>, C<month>, C<organization>, C<publisher>, C<note>.
+Constrained fields: at most one of C<volume>, C<number>.
+
+=item C<manual>
+
+Required fields: C<title>.
+Optional fields: C<author>, C<organization>, C<address>, C<edition>, C<month>, C<year>, C<note>.
+
+=item C<mastersthesis>
+
+Required fields: C<author>, C<title>, C<school>, C<year>.
+Optional fields: C<type>, C<address>, C<month>, C<note>.
+
+=item C<misc>
+
+Required fields: none.
+Optional fields: C<author>, C<title>, C<howpublished>, C<month>, C<year>, C<note>.
+
+=item C<phdthesis>
+
+Required fields: C<author>, C<title>, C<school>, C<year>.
+Optional fields: C<type>, C<address>, C<month>, C<note>.
+
+=item C<proceedings>
+
+Required fields: C<title>, C<year>.
+Optional fields: C<editor>, C<series>, C<address>, C<month>, C<organization>, C<publisher>, C<note>.
+Constrained fields: at most one of C<volume>, C<number>.
+
+=item C<techreport>
+
+Required fields: C<author>, C<title>, C<institution>, C<year>.
+Optional fields: C<type>, C<number>, C<address>, C<month>, C<note>.
+
+=item C<unpublished>
+
+Required fields: C<author>, C<title>, C<note>.
+Optional fields: C<month>, C<year>.
+
+=back
+
+=cut
+
+sub describe_entry
+{
+ my $self = shift;
+
+ # Advantages of the current scheme (set all fields for a particular
+ # entry type together):
+ # - groups fields more naturally (by entry type)
+ # - might lend itself to structuring things by 'type' in the object
+ # as well, making it easier to determine if a type is valid
+ # - prevents accidentally giving a type optional fields but no
+ # required fields -- currently this mistake would make the type
+ # 'unknown'
+ #
+ # Requirement of any scheme:
+ # - must be easy for derived classes to override/augment the field
+ # lists defined here! (ie. they should be able just to inherit
+ # describe_entry; or explicitly call SUPER::describe_entry and then
+ # undo/change some of its definitions
+
+ # Things that I don't think are handled by this scheme, but that
+ # bibtex does look out for:
+ # * warns if month but no year
+ # * crossref stuff:
+ # - article can xref article; xref'd entry must have key or journal
+ # - book or inboox can xref book; xref'd entry must have editor,
+ # key, or series
+ # - incollection can xref a book and inproceedings can xref a
+ # proceedings; xref'd entry must have editor, key, or booktitle
+
+ $self->set_fields ('article',
+ [qw(author title journal year)],
+ [qw(volume number pages month note)]);
+ $self->set_fields ('book',
+ [qw(title publisher year)],
+ [qw(series address edition month note)],
+ [1, 1, [qw(author editor)]],
+ [0, 1, [qw(volume number)]]);
+ $self->set_fields ('booklet',
+ [qw(title)],
+ [qw(author howpublished address month year note)]);
+ $self->set_fields ('inbook',
+ [qw(publisher year)],
+ [qw(series type address edition month note)],
+ [1, 1, [qw(author editor)]],
+ [1, 2, [qw(chapter pages)]],
+ [0, 1, [qw(volume number)]]);
+ $self->set_fields ('incollection',
+ [qw(author title booktitle publisher year)],
+ [qw(editor series type chapter pages address
+ edition month note)],
+ [0, 1, [qw(volume number)]]);
+ $self->set_fields ('inproceedings',
+ [qw(author title booktitle year)],
+ [qw(editor series pages address month
+ organization publisher note)],
+ [0, 1, [qw(volume number)]]);
+ $self->set_fields ('conference',
+ [qw(author title booktitle year)],
+ [qw(editor series pages address month
+ organization publisher note)],
+ [0, 1, [qw(volume number)]]);
+ $self->set_fields ('manual',
+ [qw(title)],
+ [qw(author organization address edition
+ month year note)]);
+ $self->set_fields ('mastersthesis',
+ [qw(author title school year)],
+ [qw(type address month note)]);
+ $self->set_fields ('misc',
+ [],
+ [qw(author title howpublished month year note)]);
+ $self->set_fields ('phdthesis',
+ [qw(author title school year)],
+ [qw(type address month note)]);
+ $self->set_fields ('proceedings',
+ [qw(title year)],
+ [qw(editor series address month
+ organization publisher note)],
+ [0, 1, [qw(volume number)]]);
+ $self->set_fields ('techreport',
+ [qw(author title institution year)],
+ [qw(type number address month note)]);
+ $self->set_fields ('unpublished',
+ [qw(author title note)],
+ [qw(month year)]);
+
+} # describe_entry
+
+
+=head1 STRUCTURED ENTRY CLASS
+
+The second class provided by the C<Text::BibTeX::Bib> module is
+C<BibEntry> (again, a leading C<Text::BibTeX> is implied). This being a
+structured entry class, it derives from C<StructuredEntry>. The
+conventions and requirements for such a class are documented in
+L<Text::BibTeX::Structure> for the benefit of programmers implementing
+their own structure modules.
+
+If you wish to write utilities making use of the C<Bib> database
+structure, then you should call one of the "officially supported"
+methods provided by the C<BibEntry> class. Currently, there are only
+two of these: C<sort_key> and C<format>. These are actually implemented
+in the C<BibSort> and C<BibFormat> classes, respectively, which are base
+classes of C<BibEntry>. Thus, see L<Text::BibTeX::BibSort> and
+L<Text::BibTeX::BibFormat> for details on these two methods.
+
+=cut
+
+package Text::BibTeX::BibEntry;
+use strict;
+use vars qw(@ISA);
+
+use Text::BibTeX::BibSort;
+use Text::BibTeX::BibFormat;
+
+@ISA = qw(Text::BibTeX::BibSort Text::BibTeX::BibFormat);
+
+# Pre-define the "month name" macros for compatibility with BibTeX.
+# This ignores all sorts of issues, like internationalization and
+# abbreviation.
+my %month_names =
+ ('jan' => 'January',
+ 'feb' => 'February',
+ 'mar' => 'March',
+ 'apr' => 'April',
+ 'may' => 'May',
+ 'jun' => 'June',
+ 'jul' => 'July',
+ 'aug' => 'August',
+ 'sep' => 'September',
+ 'oct' => 'October',
+ 'nov' => 'November',
+ 'dec' => 'December');
+
+my ($macro, $expansion);
+Text::BibTeX::add_macro_text ($macro, $expansion)
+ while (($macro, $expansion) = each %month_names);
+
+1;
+
+=head1 SEE ALSO
+
+L<Text::BibTeX::Structure>, L<Text::BibTeX::BibSort>,
+L<Text::BibTeX::BibFormat>.
+
+=head1 AUTHOR
+
+Greg Ward <gward@python.net>
+
+=head1 COPYRIGHT
+
+Copyright (c) 1997-2000 by Gregory P. Ward. All rights reserved. This file
+is part of the Text::BibTeX library. This library is free software; you
+may redistribute it and/or modify it under the same terms as Perl itself.
diff --git a/BibTeX/BibFormat.pm b/BibTeX/BibFormat.pm
new file mode 100644
index 0000000..e8887b8
--- /dev/null
+++ b/BibTeX/BibFormat.pm
@@ -0,0 +1,499 @@
+# ----------------------------------------------------------------------
+# NAME : BibFormat.pm
+# CLASSES : Text::BibTeX::BibFormat
+# RELATIONS : sub-class of Text::BibTeX::StructuredEntry
+# super-class of Text::BibTeX::BibEntry
+# DESCRIPTION: Provides methods for final output formatting of
+# bibliographic entries.
+# CREATED : 1997/11/24, GPW
+# MODIFIED :
+# VERSION : $Id: BibFormat.pm 3033 2006-09-21 20:07:27Z ambs $
+# COPYRIGHT : Copyright (c) 1997 by Gregory P. Ward. All rights reserved.
+#
+# This file is part of the Text::BibTeX library. This is free
+# software; you can redistribute it and/or modify it under the
+# same terms as Perl itself.
+# ----------------------------------------------------------------------
+
+package Text::BibTeX::BibFormat;
+
+use Carp;
+use strict;
+use vars qw(@ISA);
+
+use Text::BibTeX::Name;
+use Text::BibTeX::NameFormat;
+use Text::BibTeX::Structure;
+
+@ISA = qw(Text::BibTeX::StructuredEntry);
+
+use Text::BibTeX qw(:subs display_list :nameparts :joinmethods);
+
+=head1 NAME
+
+Text::BibTeX::BibFormat - formats bibliography entries
+
+=head1 SYNOPSIS
+
+ # Assuming $entry comes from a database of the 'Bib' structure
+ # (i.e., that it's blessed into the BibEntry class, which inherits
+ # the format method from BibFormat):
+ @blocks = $entry->format;
+
+=head1 DESCRIPTION
+
+The C<Text::BibTeX::BibFormat> class is a base class of
+C<Text::BibTeX::BibEntry> for formatting bibliography entries. It thus
+performs the main job of any program that would hope to supplant BibTeX
+itself; the other important job (sorting) is handled by its companion
+class, C<Text::BibTeX::BibSort>.
+
+C<BibFormat> (the C<Text::BibTeX> prefix will be dropped for brevity)
+pays attention to almost all of the structure options described in
+L<Text::BibTeX::Bib>; it only ignores those that cover sorting,
+currently just C<sortby>. In particular, all of the "markup" options
+control what language is generated by C<BibFormat>; if none of those
+options are set, then it will generate plain, unmarked text.
+
+The only method in C<BibFormat>'s documented interface (so far) is
+C<format>. (The class defines many other methods, but these should not
+be necessary to outsiders, so they are undocumented and subject to
+change.)
+
+=head1 METHODS
+
+=over 4
+
+=cut
+
+# ----------------------------------------------------------------------
+# Ordinary subroutines:
+
+sub connect_words
+{
+ my ($s1, $s2) = @_;
+
+ return $s1 . ((length ($s2) < 3) ? '~' : ' ') . $s2;
+}
+
+
+# ----------------------------------------------------------------------
+# Utility methods (eg. apply a bit of markup to a string or field)
+
+sub markup_field
+{
+ my ($self, $markup, $field) = @_;
+
+ $markup = $self->structure->get_options ("${markup}_mkup")
+ unless (ref $markup eq 'ARRAY' && @$markup == 2);
+ croak "${markup}_mkup option not defined"
+ unless defined $markup;
+
+ $self->exists ($field)
+ ? $markup->[0] . $self->get ($field) . $markup->[1]
+ : '';
+}
+
+
+sub markup_string
+{
+ my ($self, $markup, $string) = @_;
+
+ $markup = $self->structure->get_options ("${markup}_mkup")
+ unless (ref $markup eq 'ARRAY' && @$markup == 2);
+ croak "${markup}_mkup option not defined"
+ unless defined $markup;
+
+ $markup->[0] . $string . $markup->[1];
+}
+
+
+# ----------------------------------------------------------------------
+# Formatting methods I: utility methods called by the entry-formatters
+
+sub format_authors
+{
+ my $self = shift;
+
+ return '' unless $self->exists ('author');
+ my @authors = $self->names ('author');
+ $self->format_names (\@authors)
+}
+
+
+sub format_editors
+{
+ my $self = shift;
+
+ # The word used to indicate editorship should be customizable --
+ # might want it in another language, or abbreviated, or both.
+ return '' unless $self->exists ('editor');
+ my @editors = $self->names ('editor');
+ my $tackon = (@editors == 1) ? ', editor' : ', editors';
+ $self->format_names (\@editors) . $tackon;
+}
+
+
+sub format_names
+{
+ my ($self, $names) = @_;
+ my ($format, $name);
+
+ my ($order, $style) =
+ $self->structure->get_options ('nameorder', 'namestyle');
+ croak "format_names: bad nameorder option \"$order\""
+ unless $order eq 'first' || $order eq 'last';
+ croak "format_names: bad namestyle option \"$style\""
+ unless $style =~ /^(full|abbrev|nopunct|nospace)$/;
+
+ $order = ($order eq 'first') ? 'fvlj' : 'vljf';
+ $format = new Text::BibTeX::NameFormat ($order, ! ($style eq 'full'));
+
+ $format->set_text (&BTN_FIRST, undef, undef, undef, '')
+ if $style eq 'nopunct' || $style eq 'nospace';
+ $format->set_options (&BTN_FIRST, 1, &BTJ_NOTHING, &BTJ_SPACE)
+ if $style eq 'nospace';
+
+ foreach $name (@$names)
+ {
+ $name = $name->format ($format);
+ $name = 'et. al.' if $name eq 'others';
+ }
+
+ return $self->markup_string ('name', display_list($names,0));
+}
+
+
+sub format_atitle
+{
+ my $self = shift;
+
+ my $lower = $self->structure->get_options ('atitle_lower');
+ my $title = $self->get ('title');
+ $title = change_case ('t', $title) if $lower;
+ $self->markup_string ('atitle', $title);
+# $markup->[0] . $title . $markup->[1];
+}
+
+
+sub format_btitle
+{
+ my $self = shift;
+
+ $self->markup_field ('btitle', 'title');
+# my $markup = $self->structure->get_options ('btitle_mkup');
+# my $title = $self->get ('title');
+# $markup->[0] . $title . $markup->[1];
+}
+
+
+# sub format_xref_article
+# {
+# my $self = shift;
+
+# # N.B. this assumes that the appropriate fields from the cross-
+# # referenced entry have already been put into the current entry!
+
+# # XXX hard-coded LaTeX markup here!!!
+
+# my ($key, $journal, $crossref);
+# $key = $self->get ('key');
+# $journal = $self->get ('journal');
+# $crossref = $self->get ('crossref');
+# if (defined $key)
+# {
+# return "In $key \cite{$crossref}";
+# }
+# else
+# {
+# if (defined $journal)
+# {
+# return "In {\em $journal} \cite{$crossref}";
+# }
+# else
+# {
+# $self->warn ("need key or journal for crossref");
+# return " \cite{$crossref}";
+# }
+# }
+# }
+
+
+sub format_pages
+{
+ my $self = shift;
+
+ my $pages = $self->get ('pages');
+ if ($pages =~ /[,+-]/) # multiple pages?
+ {
+ $pages =~ s/([^-])-([^-])/$1--$2/g;
+ return connect_words ("pages", $pages);
+ }
+ else
+ {
+ return connect_words ("page", $pages);
+ }
+}
+
+
+sub format_vol_num_pages
+{
+ my $self = shift;
+
+ my ($vol, $num, $pages) = $self->get ('volume', 'number', 'pages');
+ my $vnp = '';
+ $vnp .= $vol if defined $vol;
+ $vnp .= "($num)" if defined $num;
+ $vnp .= ":$pages" if defined $pages;
+ return $vnp;
+}
+
+
+sub format_bvolume
+{
+ my $self = shift;
+ my $volser; # potentially volume and series
+
+ if ($self->exists ('volume'))
+ {
+ $volser = connect_words ('volume', $self->get ('volume'));
+ $volser .= ' of ' . $self->markup_field ('btitle', 'series')
+ if $self->exists ('series');
+ return $volser;
+ }
+ else
+ {
+ return '';
+ }
+}
+
+
+sub format_number_series
+{
+ my ($self, $mid_sentence) = @_;
+
+ if ($self->exists ('volume'))
+ {
+ # if 'volume' field exists, then format_bvolume took care of
+ # formatting it, so don't do anything here
+ return '';
+ }
+ else
+ {
+ if ($self->exists ('number'))
+ {
+ my $numser;
+
+ $numser = connect_words ($mid_sentence ? 'number' : 'Number',
+ $self->get ('number'));
+ if ($self->exists ('series'))
+ {
+ $numser .= ' in ' . $self->get ('series');
+ }
+ else
+ {
+ $self->warn ("there's a number but no series " .
+ "(is this warning redundant?!?)");
+ }
+ return $numser;
+ }
+ else
+ {
+ # No 'number' -- just return the 'series' (or undef if none)
+ return $self->get ('series');
+ }
+ } # no 'volume' field
+} # format_number_series
+
+
+sub format_edition
+{
+ my ($self, $mid_sentence) = @_;
+
+ # XXX more fodder for I18N here: the word 'edition'
+ return '' unless $self->exists ('edition');
+ my $case_transform = $mid_sentence ? 'l' : 't';
+ return change_case ($case_transform, $self->get ('edition')) . ' edition';
+
+} # format_edition
+
+
+sub format_date
+{
+ my $self = shift;
+
+ my @date = grep ($_, $self->get ('month', 'year'));
+ return join (' ', @date);
+}
+
+
+# ----------------------------------------------------------------------
+# The actual entry-formatting methods:
+# format_article
+# format_book
+# format_inbook
+# ...and so on.
+
+# Each of these returns a list of blocks.
+# A block is a list of sentences.
+# A sentence is either a string or a list of clauses.
+# Any clause, sentence, or block in any list may be empty or undefined;
+# it should be removed before output.
+# If a sentence consists of a list of clauses, they should be joined
+# together with ", " to form the sentence-as-string.
+#
+# For example, the formatted entry for an article (in the absence of
+# cross-references) consists of four blocks:
+# - the name block, which has a single sentence; this sentence
+# has a single clause (the list of author names), and thus is
+# represented as a string like "Joe Blow, Fred Jones, and John Smith"
+# - the title block, which has a single sentence; this sentence
+# has a single clause, the title of the article, eg. "The mating
+# habits of foobars"
+# - the journal block, which consists of a single sentence that has
+# three clauses: the journal name, the volume/number/pages, and
+# the date. When the three clauses are joined, we get something like
+# "Journal of Foo, 4(5):122--130, May 1996" for the single sentence
+# in the block.
+# - the note block -- if the entry has no `note' field, this block
+# will be an undefined value rather than a list of sentences
+#
+# These four blocks are returned from `format_article' (and thus from
+# `format') as a list-of-lists-of-(strings or lists-of-strings. That
+# is, each format methods returns a list of blocks, each of which is in
+# turn a list of sentences. (Hence "list of lists of X".) Each
+# sentence is either a string ("list of lists of strings") or a list of
+# clauses ("list of lists of lists of strings'). Clear? Hope so!
+#
+# [ # enter list of blocks
+# ["Joe Blow, Fred Jones, and John Smith"] # name block:
+# # 1 sentence w/ 1 clause
+# ["The mating habits of foobars"] # title block:
+# # 1 sentence w/ 1 clause
+# [["Journal of Foo", # journal block:
+# "4(5):122--130", # 1 sentence w/ 3 clauses
+# "May 1996"]]
+# undef
+# ]
+#
+# A note: the journal name will normally have a bit of markup around it,
+# say to italicize it -- that's determined by the calling application,
+# though; the default markups are all empty strings. There could
+# probably be arbitrary markup for every element of an entry, but I
+# haven't gone that far yet.
+#
+# It is then the responsibility of the calling application to apply the
+# appropriate punctuation and munge all those lists of strings together
+# into something worth printing. The canonical application for doing
+# this is btformat, which supports LaTeX 2.09, LaTeX2e, and HTML markup
+# and output.
+
+
+sub format_article
+{
+ my $self = shift;
+
+
+ my $name_block = [$self->format_authors];
+ my $title_block = [$self->format_atitle];
+ my $journal_block = [[$self->markup_string('journal', $self->get ('journal')),
+ $self->format_vol_num_pages,
+ $self->format_date]];
+
+# if ($self->exists ('crossref'))
+# {
+# push (@blocks, [[$self->format_xref_article,
+# $self->format_pages]]);
+# }
+# else
+# {
+# }
+
+# push (@blocks, [$self->get ('note')]) if $self->exists ('note');
+# @blocks;
+
+ ($name_block, $title_block, $journal_block, $self->get ('note'));
+} # format_article
+
+
+sub format_book
+{
+ my $self = shift;
+
+ my $name_block = # author(s) or editor(s)
+ ($self->exists ('author'))
+ ? [$self->format_authors]
+ : [$self->format_editors];
+ my $title_block = # title (and volume)
+ [[$self->format_btitle, $self->format_bvolume]];
+ my $from_block = # number/series; publisher, address,
+ [$self->format_number_series (0), # edition, date
+ [$self->get ('publisher'), $self->get ('address'),
+ $self->format_edition (0), $self->format_date]];
+
+ ($name_block, $title_block, $from_block, $self->get('note'));
+
+} # format_book
+
+
+# ----------------------------------------------------------------------
+# Finally, the `format' method -- just calls one of the
+# type-specific format methods (format_article, etc.)
+
+=item format ()
+
+Formats a single entry for inclusion in the bibliography of some
+document. The exact processing performed is highly dependent on the
+entry type and the fields present; in general, you should be able to
+join C<format>'s outputs together to create a single paragraph for
+inclusion in a document of whatever markup language you're working with.
+
+Returns a list of "blocks," which can either be jammed together like
+sentences (for a traditional "tight" bibliography) or printed on
+separate lines (for an "open" bibliography format). Each block is a
+reference to a list of sentences; sentences should be joined together
+with an intervening period. Each sentence is either a single string or
+a list of clauses; clauses should be joined together with an intervening
+comma. Each clause is just a simple string.
+
+See the source code for C<btformat> for an example of how to use the
+output of C<format>.
+
+=cut
+
+sub format
+{
+ my $self = shift;
+
+ my $type = $self->type;
+ my $key = $self->key;
+ my $method_name = 'format_' . $type;
+ my $method = $self->can ($method_name);
+ unless ($method)
+ {
+ $self->warn ("can't format entry: " .
+ "no method $method_name (for type $type)");
+ return;
+ }
+
+ return &$method ($self);
+}
+
+1;
+
+=back
+
+=head1 SEE ALSO
+
+L<Text::BibTeX::Structure>, L<Text::BibTeX::Bib>,
+L<Text::BibTeX::BibSort>
+
+=head1 AUTHOR
+
+Greg Ward <gward@python.net>
+
+=head1 COPYRIGHT
+
+Copyright (c) 1997-2000 by Gregory P. Ward. All rights reserved. This file
+is part of the Text::BibTeX library. This library is free software; you
+may redistribute it and/or modify it under the same terms as Perl itself.
diff --git a/BibTeX/BibSort.pm b/BibTeX/BibSort.pm
new file mode 100644
index 0000000..0954108
--- /dev/null
+++ b/BibTeX/BibSort.pm
@@ -0,0 +1,244 @@
+# ----------------------------------------------------------------------
+# NAME : BibSort.pm
+# CLASSES : Text::BibTeX::BibSort
+# RELATIONS : sub-class of StructuredEntry
+# super-class of BibEntry
+# DESCRIPTION: Provides methods for generating sort keys of entries
+# in a BibTeX-style bibliographic database.
+# CREATED : 1997/11/24, GPW (taken from Bib.pm)
+# MODIFIED :
+# VERSION : $Id: BibSort.pm 3033 2006-09-21 20:07:27Z ambs $
+# COPYRIGHT : Copyright (c) 1997-2000 by Gregory P. Ward. All rights
+# reserved.
+#
+# This file is part of the Text::BibTeX library. This is free
+# software; you can redistribute it and/or modify it under the
+# same terms as Perl itself.
+# ----------------------------------------------------------------------
+
+package Text::BibTeX::BibSort;
+use strict;
+use vars qw(@ISA);
+
+use Text::BibTeX::Structure;
+
+@ISA = qw(Text::BibTeX::StructuredEntry);
+
+use Text::BibTeX qw(purify_string change_case);
+
+use Carp;
+
+=head1 NAME
+
+Text::BibTeX::BibSort - generate sort keys for bibliographic entries
+
+=head1 SYNOPSIS
+
+ # Assuming $entry comes from a database of the 'Bib' structure
+ # (i.e., that it's blessed into the BibEntry class, which inherits
+ # the sort_key method from BibSort):
+ $sort_key = $entry->sort_key;
+
+=head1 DESCRIPTION
+
+C<Text::BibTeX::BibSort> is a base class of C<Text::BibTeX::BibEntry>
+for generating sort keys from bibliography entries. It could in
+principle (and, someday, might) offer a wide range of highly
+customizable sort-key generators. Currently, though, it provides only a
+single method (C<sort_key>) for public use, and that method only pays
+attention to one structure option, C<sortby>.
+
+=head1 METHODS
+
+=over 4
+
+=item sort_key ()
+
+Generates a sort key for a single bibliographic entry. Assumes this
+entry conforms to the C<Bib> database structure. The nature of this
+sort key is controlled by the C<sortby> option, which can be either
+C<"name"> or C<"year">. (The C<namestyle> also has a role, in
+determining how author/editor names are formatted for inclusion in the
+sort key.)
+
+For by-name sorting (which is how BibTeX's standard styles work), the sort
+key consists of one of the C<author>, C<editor>, C<organization>, or C<key>
+fields (depending on the entry type and which fields are actually present),
+followed by the year and the title. All fields are drastically simplified
+to produce the sort key: non-English letters are mercilessly anglicized,
+non-alphabetic characters are stripped, and everything is forced to
+lowercase. (The first two steps are done by the C<purify_string> routine;
+see L<Text::BibTeX/"Generic string-processing functions"> for a brief
+description, and the descripton of the C function C<bt_purify_string()> in
+L<bt_misc> for all the gory details.)
+
+=cut
+
+# methods for sorting -- everything here is geared towards generating
+# a sort key; it's up to external code to actually order entries (since,
+# of course, a single entry doesn't know anything about any other
+# entries!)
+
+# also, we assume that an entry has been checked and coerced into
+# shape -- that way we don't need to check for defined-ness of
+# strings, or check the type, or anything.
+
+sub sort_key
+{
+ my $self = shift;
+ my ($sortby, $type, $nkey, $skey);
+
+ $sortby = $self->structure->get_options ('sortby');
+ croak ("BibSort::sort_key: sortby option is 'none'")
+ if $sortby eq 'none';
+ croak ("BibSort::sort_key: unknown sortby option '$sortby'")
+ unless $sortby eq 'name' || $sortby eq 'year';
+
+ $type = $self->type;
+
+ if ($type eq 'book' || $type eq 'inbook')
+ {
+ $nkey = $self->format_alt_fields ('author' => 'sort_format_names',
+ 'editor' => 'sort_format_names',
+ 'key' => 'sortify');
+ }
+ elsif ($type eq 'proceedings')
+ {
+ $nkey = $self->format_alt_fields ('editor' => 'sort_format_names',
+ 'organization' => 'sort_format_org',
+ 'key' => 'sortify');
+ }
+ elsif ($type eq 'manual')
+ {
+ $nkey = $self->format_alt_fields ('author' => 'sort_format_names',
+ 'organization' => 'sort_format_org',
+ 'key' => 'sortify');
+ }
+ else
+ {
+ $nkey = $self->format_alt_fields ('author' => 'sort_format_names',
+ 'key' => 'sortify');
+ }
+
+ my $ykey = change_case ('l', (purify_string ($self->get ('year'))));
+ $skey = ($sortby eq 'name')
+ ? $nkey . ' ' . $ykey
+ : $ykey . ' ' . $nkey;
+ $skey .= ' ' . $self->sort_format_title ('title');
+ return $skey;
+
+} # sort_key
+
+
+sub sortify
+{
+ my ($self, $field) = @_;
+ return lc (purify_string ($self->get ($field)));
+}
+
+
+sub sort_format_names
+{
+ require Text::BibTeX::Name;
+ require Text::BibTeX::NameFormat;
+
+ my ($self, $field) = @_;
+ my ($abbrev, $format, $name);
+
+ $abbrev = ! ($self->structure->get_options ('namestyle') eq 'full');
+ $format = new Text::BibTeX::NameFormat ("vljf", $abbrev);
+ $name = new Text::BibTeX::Name;
+
+ my (@snames, $i, $sname);
+ @snames = $self->split ($field);
+ for $i (0 .. $#snames)
+ {
+ $sname = $snames[$i];
+ if ($sname eq 'others') # hmmm... should we only do this on
+ { # the final name?
+ $sname = 'et al'; # purified version of "et. al."
+ }
+ else
+ {
+ # A spot of ugliness here:
+ # - lc (purify_string (x)) ought to be sortify (x), but I have
+ # already made sortify a method that only operates on a field,
+ # rather than a generic function (as it is in BibTeX)
+
+ $name->split ($sname, $self->filename, $self->line ($field), $i+1);
+ $sname = $name->format ($format);
+# print "s_f_n: about to purify >$sname<\n";
+ $snames[$i] = lc (purify_string ($sname));
+ }
+ }
+ return join (' ', @snames);
+}
+
+
+
+# sort_format_org and sort_format_title are suspiciously similar...
+# could probably have one method to handle both tasks...
+
+sub sort_format_org
+{
+ my ($self, $field) = @_;
+
+ my $value = $self->get ($field);
+ $value =~ s/^the\b\s*//i;
+ return lc (purify_string ($value));
+}
+
+
+sub sort_format_title
+{
+ my ($self, $field) = @_;
+
+ my $value = $self->get ($field);
+ $value =~ s/^(the|an?)\b\s*//i;
+ return lc (purify_string ($value));
+}
+
+
+# Hmm, I suspect format_alt_fields is a little more general purpose --
+# probably belongs outside of the "generate sort key" methods.
+# (Or.... does it maybe belong in one of the base classes, StructuredEntry
+# or even Entry?)
+
+sub format_alt_fields
+{
+ my $self = shift;
+ my ($field, $method);
+
+ while (@_)
+ {
+ ($field, $method) = (shift, shift);
+ if ($self->exists ($field))
+ {
+ $method = $self->can ($method)
+ || croak ("unknown method in class " . (ref $self));
+ return &$method ($self, $field);
+ }
+ }
+
+ return undef; # whoops, none of the alternate fields
+ # were present
+}
+
+1;
+
+=back
+
+=head1 SEE ALSO
+
+L<Text::BibTeX::Structure>, L<Text::BibTeX::Bib>,
+L<Text::BibTeX::BibFormat>
+
+=head1 AUTHOR
+
+Greg Ward <gward@python.net>
+
+=head1 COPYRIGHT
+
+Copyright (c) 1997-2000 by Gregory P. Ward. All rights reserved. This file
+is part of the Text::BibTeX library. This library is free software; you
+may redistribute it and/or modify it under the same terms as Perl itself.
diff --git a/BibTeX/Entry.pm b/BibTeX/Entry.pm
new file mode 100644
index 0000000..ff8c10d
--- /dev/null
+++ b/BibTeX/Entry.pm
@@ -0,0 +1,967 @@
+# ----------------------------------------------------------------------
+# NAME : BibTeX/Entry.pm
+# CLASSES : Text::BibTeX::Entry
+# RELATIONS : base class for Text::BibTeX::StructuredEntry, and
+# ultimately for all user-supplied structured entry classes
+# DESCRIPTION: Provides an object-oriented interface to BibTeX entries.
+# CREATED : March 1997, Greg Ward
+# MODIFIED :
+# VERSION : $Id: Entry.pm 6325 2008-10-08 12:35:41Z ambs $
+# COPYRIGHT : Copyright (c) 1997-2000 by Gregory P. Ward. All rights
+# reserved.
+#
+# This file is part of the Text::BibTeX library. This
+# library is free software; you may redistribute it and/or
+# modify it under the same terms as Perl itself.
+# ----------------------------------------------------------------------
+package Text::BibTeX::Entry;
+
+require 5.004; # for isa, and delete on a slice
+
+use strict;
+use UNIVERSAL 'isa';
+use Carp;
+use Text::BibTeX qw(:metatypes :nodetypes);
+
+=head1 NAME
+
+Text::BibTeX::Entry - read and parse BibTeX files
+
+=head1 SYNOPSIS
+
+ use Text::BibTeX; # do not use Text::BibTeX::Entry alone!
+
+ # ...assuming that $bibfile and $newbib are both objects of class
+ # Text::BibTeX::File, opened for reading and writing (respectively):
+
+ # Entry creation/parsing methods:
+ $entry = new Text::BibTeX::Entry;
+ $entry->read ($bibfile);
+ $entry->parse ($filename, $filehandle);
+ $entry->parse_s ($entry_text);
+
+ # or:
+ $entry = new Text::BibTeX::Entry $bibfile;
+ $entry = new Text::BibTeX::Entry $filename, $filehandle;
+ $entry = new Text::BibTeX::Entry $entry_text;
+
+ # Entry query methods
+ warn "error in input" unless $entry->parse_ok;
+ $metatype = $entry->metatype;
+ $type = $entry->type;
+
+ # if metatype is BTE_REGULAR or BTE_MACRODEF:
+ $key = $entry->key; # only for BTE_REGULAR metatype
+ $num_fields = $entry->num_fields;
+ @fieldlist = $entry->fieldlist;
+ $has_title = $entry->exists ('title');
+ $title = $entry->get ('title');
+ # or:
+ ($val1,$val2,...$valn) = $entry->get ($field1, $field2, ..., $fieldn);
+
+ # if metatype is BTE_COMMENT or BTE_PREAMBLE:
+ $value = $entry->value;
+
+ # Author name methods
+ @authors = $entry->split ('author');
+ ($first_author) = $entry->names ('author');
+
+ # Entry modification methods
+ $entry->set_type ($new_type);
+ $entry->set_key ($new_key);
+ $entry->set ('title', $new_title);
+ # or:
+ $entry->set ($field1, $val1, $field2, $val2, ..., $fieldn, $valn);
+ $entry->delete (@fields);
+ $entry->set_fieldlist (\@fieldlist);
+
+ # Entry output methods
+ $entry->write ($newbib);
+ $entry->print ($filehandle);
+ $entry_text = $entry->print_s;
+
+ # Miscellaneous methods
+ $entry->warn ($entry_warning);
+ # or:
+ $entry->warn ($field_warning, $field);
+
+=head1 DESCRIPTION
+
+C<Text::BibTeX::Entry> does all the real work of reading and parsing
+BibTeX files. (Well, actually it just provides an object-oriented Perl
+front-end to a C library that does all that. But that's not important
+right now.)
+
+BibTeX entries can be read either from C<Text::BibTeX::File> objects (using
+the C<read> method), or directly from a filehandle (using the C<parse>
+method), or from a string (using C<parse_s>). The first is preferable,
+since you don't have to worry about supplying the filename, and because of
+the extra functionality provided by the C<Text::BibTeX::File> class.
+Currently, this means that you may specify the I<database structure> to
+which entries are expected to conform via the C<File> class. This lets you
+ensure that entries follow the rules for required fields and mutually
+constrained fields for a particular type of database, and also gives you
+access to all the methods of the I<structured entry class> for this
+database structure. See L<Text::BibTeX::Structure> for details on database
+structures.
+
+Once you have the entry, you can query it or change it in a variety of
+ways. The query methods are C<parse_ok>, C<type>, C<key>, C<num_fields>,
+C<fieldlist>, C<exists>, and C<get>. Methods for changing the entry are
+C<set_type>, C<set_key>, C<set_fieldlist>, C<delete>, and C<set>.
+
+Finally, you can output BibTeX entries, again either to an open
+C<Text::BibTeX::File> object, a filehandle or a string. (A filehandle or
+C<File> object must, of course, have been opened in write mode.) Output to
+a C<File> object is done with the C<write> method, to a filehandle via
+C<print>, and to a string with C<print_s>. Using the C<File> class is
+recommended for future extensibility, although it currently doesn't offer
+anything extra.
+
+=head1 METHODS
+
+=head2 Entry creation/parsing methods
+
+=over 4
+
+=item new ([SOURCE])
+
+Creates a new C<Text::BibTeX::Entry> object. If the SOURCE parameter is
+supplied, it must be one of the following: a C<Text::BibTeX::File> (or
+descendant class) object, a filename/filehandle pair, or a string. Calls
+C<read> to read from a C<Text::BibTeX::File> object, C<parse> to read from
+a filehandle, and C<parse_s> to read from a string.
+
+A filehandle can be specified as a GLOB reference, or as an
+C<IO::Handle> (or descendants) object, or as a C<FileHandle> (or
+descendants) object. (But there's really no point in using
+C<FileHandle> objects, since C<Text::BibTeX> requires Perl 5.004, which
+always includes the C<IO> modules.) You can I<not> pass in the name of
+a filehandle as a string, though, because C<Text::BibTeX::Entry>
+conforms to the C<use strict> pragma (which disallows such symbolic
+references).
+
+The corresponding filename should be supplied in order to allow for
+accurate error messages; if you simply don't have the filename, you can
+pass C<undef> and you'll get error messages without a filename. (It's
+probably better to rearrange your code so that the filename is
+available, though.)
+
+Thus, the following are equivalent to read from a file named by
+C<$filename> (error handling ignored):
+
+ # good ol' fashioned filehandle and GLOB ref
+ open (BIBFILE, $filename);
+ $entry = new Text::BibTeX::Entry ($filename, \*BIBFILE);
+
+ # newfangled IO::File thingy
+ $file = new IO::File $filename;
+ $entry = new Text::BibTeX::Entry ($filename, $file);
+
+But using a C<Text::BibTeX::File> object is simpler and preferred:
+
+ $file = new Text::BibTeX::File $filename;
+ $entry = new Text::BibTeX::Entry $file;
+
+Returns the new object, unless SOURCE is supplied and reading/parsing
+the entry fails (e.g., due to end of file) -- then it returns false.
+
+=cut
+
+sub new
+{
+ my ($class, @source) = @_;
+
+ $class = ref ($class) || $class;
+ my $self = {'file' => undef,
+ 'type' => undef,
+ 'key' => undef,
+ 'status' => undef,
+ 'metatype' => undef,
+ 'fields' => [],
+ 'values' => {}};
+
+ bless $self, $class;
+ if (@source)
+ {
+ my $status;
+
+ if (@source == 1 && isa ($source[0], 'Text::BibTeX::File'))
+ {
+ my $file = $source[0];
+ $status = $self->read ($file);
+ if (my $structure = $file->structure)
+ {
+ $self->{structure} = $structure;
+ bless $self, $structure->entry_class;
+ }
+ }
+ elsif (@source == 2 && defined $source[0] && ! ref $source[0] && fileno ($source[1]))
+ { $status = $self->parse ($source[0], $source[1]) }
+ elsif (@source == 1 && defined $source[0] && ! ref $source[0])
+ { $status = $self->parse_s ($source[0]) }
+ else
+ { croak "new: source argument must be either a Text::BibTeX::File " .
+ "(or descendant) object, filename/filehandle pair, or " .
+ "a string"; }
+
+ return $status unless $status; # parse failed -- tell our caller
+ }
+ $self;
+}
+
+=item read (BIBFILE)
+
+Reads and parses an entry from BIBFILE, which must be a
+C<Text::BibTeX::File> object (or descendant). The next entry will be read
+from the file associated with that object.
+
+Returns the same as C<parse> (or C<parse_s>): false if no entry found
+(e.g., at end-of-file), true otherwise. To see if the parse itself failed
+(due to errors in the input), call the C<parse_ok> method.
+
+=cut
+
+sub read
+{
+ my ($self, $source, $preserve) = @_;
+ croak "`source' argument must be ref to open Text::BibTeX::File " .
+ "(or descendant) object"
+ unless (isa ($source, 'Text::BibTeX::File'));
+
+ my $fn = $source->{'filename'};
+ my $fh = $source->{'handle'};
+ $self->{'file'} = $source; # store File object for later use
+ return $self->parse ($fn, $fh, $preserve);
+}
+
+
+=item parse (FILENAME, FILEHANDLE)
+
+Reads and parses the next entry from FILEHANDLE. (That is, it scans the
+input until an '@' sign is seen, and then slurps up to the next '@'
+sign. Everything between the two '@' signs [including the first one,
+but not the second one -- it's pushed back onto the input stream for the
+next entry] is parsed as a BibTeX entry, with the simultaneous
+construction of an abstract syntax tree [AST]. The AST is traversed to
+ferret out the most interesting information, and this is stuffed into a
+Perl hash, which coincidentally is the C<Text::BibTeX::Entry> object
+you've been tossing around. But you don't need to know any of that -- I
+just figured if you've read this far, you might want to know something
+about the inner workings of this module.)
+
+The success of the parse is stored internally so that you can later
+query it with the C<parse_ok> method. Even in the presence of syntax
+errors, you'll usually get something resembling your input, but it's
+usually not wise to try to do anything with it. Just call C<parse_ok>,
+and if it returns false then silently skip to the next entry. (The
+error messages printed out by the parser should be quite adequate for
+the user to figure out what's wrong. And no, there's currently no way
+for you to capture or redirect those error messages -- they're always
+printed to C<stderr> by the underlying C code. That should change in
+future releases.)
+
+If no '@' signs are seen on the input before reaching end-of-file, then
+we've exhausted all the entries in the file, and C<parse> returns a
+false value. Otherwise, it returns a true value -- even if there were
+syntax errors. Hence, it's important to check C<parse_ok>.
+
+The FILENAME parameter is only used for generating error messages, but
+anybody using your program will certainly appreciate your setting it
+correctly!
+
+=item parse_s (TEXT)
+
+Parses a BibTeX entry (using the above rules) from the string TEXT. The
+string is not modified; repeatedly calling C<parse_s> with the same string
+will give you the same results each time. Thus, there's no point in
+putting multiple entries in one string.
+
+=back
+
+=cut
+
+sub _preserve
+{
+ my ($self, $preserve) = @_;
+
+ $preserve = $self->{'file'}->preserve_values
+ if ! defined $preserve &&
+ defined $self->{'file'} &&
+ isa ($self->{'file'}, 'Text::BibTeX::File');
+ require Text::BibTeX::Value if $preserve;
+ $preserve;
+}
+
+sub parse
+{
+ my ($self, $filename, $filehandle, $preserve) = @_;
+
+ $preserve = $self->_preserve ($preserve);
+ _parse ($self, $filename, $filehandle, $preserve);
+}
+
+
+sub parse_s
+{
+ my ($self, $text, $preserve) = @_;
+
+ $preserve = $self->_preserve ($preserve);
+ _parse_s ($self, $text, $preserve);
+}
+
+
+=head2 Entry query methods
+
+=over 4
+
+=item parse_ok ()
+
+Returns false if there were any serious errors encountered while parsing
+the entry. (A "serious" error is a lexical or syntax error; currently,
+warnings such as "undefined macro" result in an error message being
+printed to C<stderr> for the user's edification, but no notice is
+available to the calling code.)
+
+=item type ()
+
+Returns the type of the entry. (The `type' is the word that follows the
+'@' sign; e.g. `article', `book', `inproceedings', etc. for the standard
+BibTeX styles.)
+
+=item metatype ()
+
+Returns the metatype of the entry. (The `metatype' is a numeric value used
+to classify entry types into four groups: comment, preamble, macro
+definition (C<@string> entries), and regular (all other entry types).
+C<Text::BibTeX> exports four constants for these metatypes: C<BTE_COMMENT>,
+C<BTE_PREAMBLE>, C<BTE_MACRODEF>, and C<BTE_REGULAR>.)
+
+=item key ()
+
+Returns the key of the entry. (The key is the token immediately
+following the opening `{' or `(' in "regular" entries. Returns C<undef>
+for entries that don't have a key, such as macro definition (C<@string>)
+entries.)
+
+=item num_fields ()
+
+Returns the number of fields in the entry. (Note that, currently, this is
+I<not> equivalent to putting C<scalar> in front of a call to C<fieldlist>.
+See below for the consequences of calling C<fieldlist> in a scalar
+context.)
+
+=item fieldlist ()
+
+Returns the list of fields in the entry. In a scalar context, returns a
+reference to the object's own list of fields. That way, you can change or
+reorder the field list with minimal interference from the class. I'm not
+entirely sure if this is a good idea, so don't rely on it existing in the
+future; feel free to play around with it and let me know if you get bitten
+in dangerous ways or find this enormously useful.
+
+=cut
+
+sub parse_ok { shift->{'status'}; }
+
+sub metatype { shift->{'metatype'}; }
+
+sub type { shift->{'type'}; }
+
+sub key { shift->{'key'}; }
+
+sub num_fields { scalar @{shift->{'fields'}}; }
+
+sub fieldlist { wantarray ? @{shift->{'fields'}} : shift->{'fields'}; }
+
+=item exists (FIELD)
+
+Returns true if a field named FIELD is present in the entry, false
+otherwise.
+
+=item get (FIELD, ...)
+
+Returns the value of one or more FIELDs, as a list of values. For example:
+
+ $author = $entry->get ('author');
+ ($author, $editor) = $entry->get ('author', 'editor');
+
+If a FIELD is not present in the entry, C<undef> will be returned at its
+place in the return list. However, you can't completely trust this as a
+test for presence or absence of a field; it is possible for a field to be
+present but undefined. Currently this can only happen due to certain
+syntax errors in the input, or if you pass an undefined value to C<set>, or
+if you create a new field with C<set_fieldlist> (the new field's value is
+implicitly set to C<undef>).
+
+Normally, the field value is what the input looks like after "maximal
+processing"--quote characters are removed, whitespace is collapsed (the
+same way that BibTeX itself does it), macros are expanded, and multiple
+tokens are pasted together. (See L<bt_postprocess> for details on the
+post-processing performed by B<btparse>.)
+
+For example, if your input file has the following:
+
+ @string{of = "of"}
+ @string{foobars = "Foobars"}
+
+ @article{foobar,
+ title = { The Mating Habits } # of # " Adult " # foobars
+ }
+
+then using C<get> to query the value of the C<title> field from the
+C<foobar> entry would give the string "The Mating Habits of Adult Foobars".
+
+However, in certain circumstances you may wish to preserve the values as
+they appear in the input. This is done by setting a C<preserve_values>
+flag at some point; then, C<get> will return not strings but
+C<Text::BibTeX::Value> objects. Each C<Value> object is a list of
+C<Text::BibTeX::SimpleValue> objects, which in turn consists of a simple
+value type (string, macro, or number) and the text of the simple value.
+Various ways to set the C<preserve_values> flag and the interface to
+both C<Value> and C<SimpleValue> objects are described in
+L<Text::BibTeX::Value>.
+
+=item value ()
+
+Retuns the single string associated with C<@comment> and C<@preamble>
+entries. For instance, the entry
+
+ @preamble{" This is a preamble" #
+ {---the concatenation of several strings}}
+
+would return a value of "This is a preamble---the concatenation of
+several strings".
+
+If this entry was parsed in "value preservation" mode, then C<value>
+acts like C<get>, and returns a C<Value> object rather than a simple
+string.
+
+=back
+
+=cut
+
+sub exists
+{
+ my ($self, $field) = @_;
+
+ exists $self->{'values'}{$field};
+}
+
+sub get
+{
+ my ($self, @fields) = @_;
+
+ @{$self->{'values'}}{@fields};
+}
+
+sub value { shift->{'value'} }
+
+
+=head2 Author name methods
+
+This is the only part of the module that makes any assumption about the
+nature of the data, namely that certain fields are lists delimited by a
+simple word such as "and", and that the delimited sub-strings are human
+names of the "First von Last" or "von Last, Jr., First" style used by
+BibTeX. If you are using this module for anything other than
+bibliographic data, you can most likely forget about these two methods.
+However, if you are in fact hacking on BibTeX-style bibliographic data,
+these could come in very handy -- the name-parsing done by BibTeX is not
+trivial, and the list-splitting would also be a pain to implement in
+Perl because you have to pay attention to brace-depth. (Not that it
+wasn't a pain to implement in C -- it's just a lot more efficient than a
+Perl implementation would be.)
+
+Incidentally, both of these methods assume that the strings being split
+have already been "collapsed" in the BibTeX way, i.e. all leading and
+trailing whitespace removed and internal whitespace reduced to single
+spaces. This should always be the case when using these two methods on
+a C<Text::BibTeX::Entry> object, but these are actually just front ends
+to more general functions in C<Text::BibTeX>. (More general in that you
+supply the string to be parsed, rather than supplying the name of an
+entry field.) Should you ever use those more general functions
+directly, you might have to worry about collapsing whitespace; see
+L<Text::BibTeX> (the C<split_list> and C<split_name> functions in
+particular) for more information.
+
+Please note that the interface to author name parsing is experimental,
+subject to change, and open to discussion. Please let me know if you
+have problems with it, think it's just perfect, or whatever.
+
+=over 4
+
+=item split (FIELD [, DELIM [, DESC]])
+
+Splits the value of FIELD on DELIM (default: `and'). Don't assume that
+this works the same as Perl's builtin C<split> just because the names are
+the same: in particular, DELIM must be a simple string (no regexps), and
+delimiters that are at the beginning or end of the string, or at non-zero
+brace depth, or not surrounded by whitespace, are ignored. Some examples
+might illuminate matters:
+
+ if field F is... then split (F) returns...
+ 'Name1 and Name2' ('Name1', 'Name2')
+ 'Name1 and and Name2' ('Name1', undef, 'Name2')
+ 'Name1 and' ('Name1 and')
+ 'and Name2' ('and Name2')
+ 'Name1 {and} Name2 and Name3' ('Name1 {and} Name2', 'Name3')
+ '{Name1 and Name2} and Name3' ('{Name1 and Name2}', 'Name3')
+
+Note that a warning will be issued for empty names (as in the second
+example above). A warning ought to be issued for delimiters at the
+beginning or end of a string, but currently this isn't done. (Hmmm.)
+
+DESC is a one-word description of the substrings; it defaults to 'name'.
+It is only used for generating warning messages.
+
+=item names (FIELD)
+
+Splits FIELD as described above, and further splits each name into four
+components: first, von, last, and jr.
+
+Returns a list of C<Text::BibTeX::Name> objects, each of which represents
+one name. Use the C<part> method to query these objects; see
+L<Text::BibTeX::Name> for details on the interface to name objects (and on
+name-parsing as well).
+
+For example if this entry:
+
+ @article{foo,
+ author = {John Smith and
+ Hacker, J. Random and
+ Ludwig van Beethoven and
+ {Foo, Bar and Company}}}
+
+has been parsed into a C<Text::BibTeX::Entry> object C<$entry>, then
+
+ @names = $entry->names ('author');
+
+will put a list of C<Text::BibTeX::Name> objects in C<@names>. These can
+be queried individually as described in L<Text::BibTeX::Name>; for instance,
+
+ @last = $names[0]->part ('last');
+
+would put the list of tokens comprising the last name of the first author
+into the C<@last> array: C<('Smith')>.
+
+=cut
+
+sub split
+{
+ my ($self, $field, $delim, $desc) = @_;
+
+ return unless $self->exists ($field);
+ $delim ||= 'and';
+ $desc ||= 'name';
+
+ my $filename = ($self->{'file'} && $self->{'file'}{'filename'});
+ my $line = $self->{'lines'}{$field};
+
+# local $^W = 0 # suppress spurious warning from
+# unless defined $filename; # undefined $filename
+ Text::BibTeX::split_list ($self->{'values'}{$field}, $delim,
+ $filename, $line, $desc);
+}
+
+sub names
+{
+ require Text::BibTeX::Name;
+
+ my ($self, $field) = @_;
+ my (@names, $i);
+
+ my $filename = ($self->{'file'} && $self->{'file'}{'filename'});
+ my $line = $self->{'lines'}{$field};
+
+ @names = $self->split ($field);
+# local $^W = 0 # suppress spurious warning from
+# unless defined $filename; # undefined $filename
+ for $i (0 .. $#names)
+ {
+ $names[$i] = new Text::BibTeX::Name ($names[$i], $filename, $line, $i);
+ }
+ @names;
+}
+
+=back
+
+=head2 Entry modification methods
+
+=over 4
+
+=item set_type (TYPE)
+
+Sets the entry's type.
+
+=item set_metatype (METATYPE)
+
+Sets the entry's metatype (must be one of the four constants
+C<BTE_COMMENT>, C<BTE_PREAMBLE>, C<BTE_MACRODEF>, and C<BTE_REGULAR>, which
+are all optionally exported from C<Text::BibTeX>).
+
+=item set_key (KEY)
+
+Sets the entry's key.
+
+=item set (FIELD, VALUE, ...)
+
+Sets the value of field FIELD. (VALUE might be C<undef> or unsupplied,
+in which case FIELD will simply be set to C<undef> -- this is where the
+difference between the C<exists> method and testing the definedness of
+field values becomes clear.)
+
+Multiple (FIELD, VALUE) pairs may be supplied; they will be processed in
+order (i.e. the input is treated like a list, not a hash). For example:
+
+ $entry->set ('author', $author);
+ $entry->set ('author', $author, 'editor', $editor);
+
+VALUE can be either a simple string or a C<Text::BibTeX::Value> object;
+it doesn't matter if the entry was parsed in "full post-processing" or
+"preserve input values" mode.
+
+=item delete (FIELD)
+
+Deletes field FIELD from an entry.
+
+=item set_fieldlist (FIELDLIST)
+
+Sets the entry's list of fields to FIELDLIST, which must be a list
+reference. If any of the field names supplied in FIELDLIST are not
+currently present in the entry, they are created with the value C<undef>
+and a warning is printed. Conversely, if any of the fields currently
+present in the entry are not named in the list of fields supplied to
+C<set_fields>, they are deleted from the entry and another warning is
+printed.
+
+=back
+
+=cut
+
+sub set_type
+{
+ my ($self, $type) = @_;
+
+ $self->{'type'} = $type;
+}
+
+sub set_metatype
+{
+ my ($self, $metatype) = @_;
+
+ $self->{'metatype'} = $metatype;
+}
+
+sub set_key
+{
+ my ($self, $key) = @_;
+
+ $self->{'key'} = $key;
+}
+
+sub set
+{
+ my $self = shift;
+ croak "set: must supply an even number of arguments"
+ unless (@_ % 2 == 0);
+ my ($field, $value);
+
+ while (@_)
+ {
+ ($field,$value) = (shift,shift);
+ push (@{$self->{'fields'}}, $field)
+ unless exists $self->{'values'}{$field};
+ $self->{'values'}{$field} = $value;
+ }
+}
+
+sub delete
+{
+ my ($self, @fields) = @_;
+ my (%gone);
+
+ %gone = map {$_, 1} @fields;
+ @{$self->{'fields'}} = grep (! $gone{$_}, @{$self->{'fields'}});
+ delete @{$self->{'values'}}{@fields};
+}
+
+sub set_fieldlist
+{
+ my ($self, $fields) = @_;
+
+ # Warn if any of the caller's fields aren't already present in the entry
+
+ my ($field, %in_list);
+ foreach $field (@$fields)
+ {
+ $in_list{$field} = 1;
+ unless (exists $self->{'values'}{$field})
+ {
+ carp "Implicitly adding undefined field \"$field\"";
+ $self->{'values'}{$field} = undef;
+ }
+ }
+
+ # And see if there are any fields in the entry that aren't in the user's
+ # list; delete them from the entry if so
+
+ foreach $field (keys %{$self->{'values'}})
+ {
+ unless ($in_list{$field})
+ {
+ carp "Implicitly deleting field \"$field\"";
+ delete $self->{'values'}{$field};
+ }
+ }
+
+ # Now we can install (a copy of) the caller's desired field list;
+
+ $self->{'fields'} = [@$fields];
+}
+
+
+=head2 Entry output methods
+
+=over 4
+
+=item write (BIBFILE)
+
+Prints a BibTeX entry on the filehandle associated with BIBFILE (which
+should be a C<Text::BibTeX::File> object, opened for output). Currently
+the printout is not particularly human-friendly; a highly configurable
+pretty-printer will be developed eventually.
+
+=item print (FILEHANDLE)
+
+Prints a BibTeX entry on FILEHANDLE.
+
+=item print_s ()
+
+Prints a BibTeX entry to a string, which is the return value.
+
+=cut
+
+sub write
+{
+ my ($self, $bibfile) = @_;
+
+ my $fh = $bibfile->{'handle'};
+ $self->print ($fh);
+}
+
+sub print
+{
+ my ($self, $handle) = @_;
+
+ $handle ||= \*STDOUT;
+ print $handle $self->print_s;
+}
+
+sub print_s
+{
+ my $self = shift;
+ my ($field, $output);
+
+ sub value_to_string
+ {
+ my $value = shift;
+
+ if (! ref $value) # just a string
+ {
+ return "{$value}";
+ }
+ else # a Text::BibTeX::Value object
+ {
+ confess "value is a reference, but not to Text::BibTeX::Value object"
+ unless isa ($value, 'Text::BibTeX::Value');
+ my @values = $value->values;
+ foreach (@values)
+ {
+ $_ = $_->type == &BTAST_STRING ? '{' . $_->text . '}' : $_->text;
+ }
+ return join (' # ', @values);
+ }
+ }
+
+ carp "entry type undefined" unless defined $self->{'type'};
+ carp "entry metatype undefined" unless defined $self->{'metatype'};
+
+ # Regular and macro-def entries have to be treated differently when
+ # printing the first line, because the former have keys and the latter
+ # do not.
+ if ($self->{'metatype'} == &BTE_REGULAR)
+ {
+ carp "entry key undefined" unless defined $self->{'key'};
+ $output = sprintf ("@%s{%s,\n",
+ $self->{'type'} || '',
+ $self->{'key'} || '');
+ }
+ elsif ($self->{'metatype'} == &BTE_MACRODEF)
+ {
+ $output = sprintf ("@%s{\n",
+ $self->{'type'} || '');
+ }
+
+ # Comment and preamble entries are treated the same -- we print out
+ # the entire entry, on one line, right here.
+ else # comment or preamble
+ {
+ return sprintf ("@%s{%s}\n\n",
+ $self->{'type'},
+ value_to_string ($self->{'value'}));
+ }
+
+ # Here we print out all the fields/values of a regular or macro-def entry
+ my @fields = @{$self->{'fields'}};
+ while ($field = shift @fields)
+ {
+ my $value = $self->{'values'}{$field};
+ if (! defined $value)
+ {
+ carp "field \"$field\" has undefined value\n";
+ $value = '';
+ }
+
+ $output .= " $field = ";
+ $output .= value_to_string ($value);
+
+ $output .= ',' if @fields; # more fields yet to come
+ $output .= "\n";
+ }
+
+ # Tack on the last line, and we're done!
+ $output .= "}\n\n";
+ $output;
+}
+
+=back
+
+=head2 Miscellaneous methods
+
+=over 4
+
+=item warn (WARNING [, FIELD])
+
+Prepends a bit of location information (filename and line number(s)) to
+WARNING, appends a newline, and passes it to Perl's C<warn>. If FIELD is
+supplied, the line number given is just that of the field; otherwise, the
+range of lines for the whole entry is given. (Well, almost -- currently,
+the line number of the last field is used as the last line of the whole
+entry. This is a bug.)
+
+For example, if lines 10-15 of file F<foo.bib> look like this:
+
+ @article{homer97,
+ author = {Homer Simpson and Ned Flanders},
+ title = {Territorial Imperatives in Modern Suburbia},
+ journal = {Journal of Suburban Studies},
+ year = 1997
+ }
+
+then, after parsing this entry to C<$entry>, the calls
+
+ $entry->warn ('what a silly entry');
+ $entry->warn ('what a silly journal', 'journal');
+
+would result in the following warnings being issued:
+
+ foo.bib, lines 10-14: what a silly entry
+ foo.bib, line 13: what a silly journal
+
+=cut
+
+sub warn
+{
+ my ($self, $warning, $field) = @_;
+
+ my $location = '';
+ if ($self->{'file'})
+ {
+ $location = $self->{'file'}{'filename'} . ", ";
+ }
+
+ my $lines = $self->{'lines'};
+ my $entry_range = ($lines->{'START'} == $lines->{'STOP'})
+ ? "line $lines->{'START'}"
+ : "lines $lines->{'START'}-$lines->{'STOP'}";
+
+ if (defined $field)
+ {
+ $location .= (exists $lines->{$field})
+ ? "line $lines->{$field}: "
+ : "$entry_range (unknown field \"$field\"): ";
+ }
+ else
+ {
+ $location .= "$entry_range: ";
+ }
+
+ warn "$location$warning\n";
+}
+
+
+=item line ([FIELD])
+
+Returns the line number of FIELD. If the entry was parsed from a string,
+this still works--it's just the line number relative to the start of the
+string. If the entry was parsed from a file, this works just as you'd
+expect it to: it returns the absolute line number with respect to the
+whole file. Line numbers are one-based.
+
+If FIELD is not supplied, returns a two-element list containing the line
+numbers of the beginning and end of the whole entry. (Actually, the
+"end" line number is currently inaccurate: it's really the the line
+number of the last field in the entry. But it's better than nothing.)
+
+=cut
+
+sub line
+{
+ my ($self, $field) = @_;
+
+ if (defined $field)
+ {
+ return $self->{'lines'}{$field};
+ }
+ else
+ {
+ return @{$self->{'lines'}}{'START','STOP'};
+ }
+}
+
+=item filename ()
+
+Returns the name of the file from which the entry was parsed. Only
+works if the file is represented by a C<Text::BibTeX::File> object---if
+you just passed a filename/filehandle pair to C<parse>, you can't get
+the filename back. (Sorry.)
+
+=cut
+
+sub filename
+{
+ my $self = shift;
+
+ $self->{'file'}{'filename'}; # ooh yuck -- poking into File object
+}
+
+1;
+
+=back
+
+=head1 SEE ALSO
+
+L<Text::BibTeX>, L<Text::BibTeX::File>, L<Text::BibTeX::Structure>
+
+=head1 AUTHOR
+
+Greg Ward <gward@python.net>
+
+=head1 COPYRIGHT
+
+Copyright (c) 1997-2000 by Gregory P. Ward. All rights reserved. This file
+is part of the Text::BibTeX library. This library is free software; you
+may redistribute it and/or modify it under the same terms as Perl itself.
+
+=cut
diff --git a/BibTeX/File.pm b/BibTeX/File.pm
new file mode 100644
index 0000000..84d6beb
--- /dev/null
+++ b/BibTeX/File.pm
@@ -0,0 +1,195 @@
+# ----------------------------------------------------------------------
+# NAME : BibTeX/File.pm
+# CLASSES : Text::BibTeX::File
+# RELATIONS :
+# DESCRIPTION: Provides an object-oriented interface to whole BibTeX
+# files.
+# CREATED : March 1997, Greg Ward
+# MODIFIED :
+# VERSION : $Id: File.pm 3033 2006-09-21 20:07:27Z ambs $
+# COPYRIGHT : Copyright (c) 1997-2000 by Gregory P. Ward. All rights
+# reserved.
+#
+# This file is part of the Text::BibTeX library. This
+# library is free software; you may redistribute it and/or
+# modify it under the same terms as Perl itself.
+# ----------------------------------------------------------------------
+
+package Text::BibTeX::File;
+
+use strict;
+use Carp;
+use IO::File;
+
+=head1 NAME
+
+Text::BibTeX::File - interface to whole BibTeX files
+
+=head1 SYNOPSIS
+
+ use Text::BibTeX; # this loads Text::BibTeX::File
+
+ $bib = new Text::BibTeX::File "foo.bib" or die "foo.bib: $!\n";
+ # or:
+ $bib = new Text::BibTeX::File;
+ $bib->open ("foo.bib") || die "foo.bib: $!\n";
+
+ $bib->set_structure ($structure_name,
+ $option1 => $value1, ...);
+
+ $at_eof = $bib->eof;
+
+ $bib->close;
+
+=head1 DESCRIPTION
+
+C<Text::BibTeX::File> provides an object-oriented interface to BibTeX
+files. Its most obvious purpose is to keep track of a filename and
+filehandle together for use by the C<Text::BibTeX::Entry> module (which
+is much more interesting). In addition, it allows you to specify
+certain options which are applicable to a whole database (file), rather
+than having to specify them for each entry in the file. Currently, you
+can specify the I<database structure> and some I<structure options>.
+These concepts are fully documented in L<Text::BibTeX::Structure>.
+
+=head1 METHODS
+
+=head2 Object creation, file operations
+
+=over 4
+
+=item new ([FILENAME [,MODE [,PERMS]]])
+
+Creates a new C<Text::BibTeX::File> object. If FILENAME is supplied,
+passes it to the C<open> method (along with MODE and PERMS if they
+are supplied). If the C<open> fails, C<new> fails and returns false; if
+the C<open> succeeds (or if FILENAME isn't supplied), C<new> returns the
+new object reference.
+
+=item open (FILENAME [,MODE [,PERMS]])
+
+Opens the file specified by FILENAME, possibly using MODE and PERMS.
+See L<IO::File> for full semantics; this C<open> is just a front end for
+C<IO::File::open>.
+
+=item close ()
+
+Closes the filehandle associated with the object. If there is no such
+filehandle (i.e., C<open> was never called on the object), does nothing.
+
+=item eof ()
+
+Returns the end-of-file state of the filehandle associated with the
+object: a true value means we are at the end of the file.
+
+=back
+
+=cut
+
+sub new
+{
+ my $class = shift;
+
+ $class = ref ($class) || $class;
+ my $self = bless {}, $class;
+ ($self->open (@_) || return undef) if @_; # filename [, mode [, perms]]
+ $self;
+}
+
+sub open
+{
+ my $self = shift;
+
+ $self->{filename} = $_[0];
+ $self->{handle} = new IO::File;
+ $self->{handle}->open (@_); # filename, maybe mode, maybe perms
+}
+
+sub close
+{
+ my $self = shift;
+ $self->{handle}->close if $self->{handle};
+}
+
+sub eof
+{
+ eof (shift->{handle});
+}
+
+sub DESTROY
+{
+ my $self = shift;
+ $self->close;
+}
+
+=head2 Object properties
+
+=over 4
+
+=item set_structure (STRUCTURE [, OPTION =E<gt> VALUE, ...])
+
+Sets the database structure for a BibTeX file. At the simplest level,
+this means that entries from the file are expected to conform to certain
+field requirements as specified by the I<structure module>. It also
+gives you full access to the methods of the particular I<structured
+entry class> for this structure, allowing you to perform operations
+specific to this kind of database. See L<Text::BibTeX::Structure/"CLASS
+INTERACTIONS"> for all the consequences of setting the database
+structure for a C<Text::BibTeX::File> object.
+
+=item structure ()
+
+Returns the name of the database structure associated with the object
+(as set by C<set_structure>).
+
+=cut
+
+sub set_structure
+{
+ my ($self, $structure, @options) = @_;
+
+ require Text::BibTeX::Structure;
+ croak "Text::BibTeX::File::set_structure: options list must have even " .
+ "number of elements"
+ unless @options % 2 == 0;
+ $self->{structure} = new Text::BibTeX::Structure ($structure, @options);
+}
+
+sub structure { shift->{structure} }
+
+
+=item preserve_values ([PRESERVE])
+
+Sets the "preserve values" flag, to control all future parsing of entries
+from this file. If PRESERVE isn't supplied, returns the current state of
+the flag. See L<Text::BibTeX::Value> for details on parsing in "value
+preservation" mode.
+
+=back
+
+=cut
+
+sub preserve_values
+{
+ my $self = shift;
+
+ $self->{'preserve_values'} = shift if @_;
+ $self->{'preserve_values'};
+}
+
+
+1;
+
+=head1 SEE ALSO
+
+L<Text::BibTeX>, L<Text::BibTeX::Entry>, L<Text::BibTeX::Structure>
+
+=head1 AUTHOR
+
+Greg Ward <gward@python.net>
+
+=head1 COPYRIGHT
+
+Copyright (c) 1997-2000 by Gregory P. Ward. All rights reserved. This file
+is part of the Text::BibTeX library. This library is free software; you
+may redistribute it and/or modify it under the same terms as Perl itself.
diff --git a/BibTeX/Name.pm b/BibTeX/Name.pm
new file mode 100644
index 0000000..50d0d91
--- /dev/null
+++ b/BibTeX/Name.pm
@@ -0,0 +1,384 @@
+# ----------------------------------------------------------------------
+# NAME : BibTeX/Name.pm
+# CLASSES : Text::BibTeX::Name
+# RELATIONS :
+# DESCRIPTION: Provides an object-oriented interface to the BibTeX-
+# style author names (parsing them, that is; formatting
+# them is done by the Text::BibTeX::NameFormat class).
+# CREATED : Nov 1997, Greg Ward
+# MODIFIED :
+# VERSION : $Id: Name.pm 3033 2006-09-21 20:07:27Z ambs $
+# COPYRIGHT : Copyright (c) 1997-2000 by Gregory P. Ward. All rights
+# reserved.
+#
+# This file is part of the Text::BibTeX library. This
+# library is free software; you may redistribute it and/or
+# modify it under the same terms as Perl itself.
+# ----------------------------------------------------------------------
+
+package Text::BibTeX::Name;
+
+require 5.004;
+
+use strict;
+use Carp;
+
+=head1 NAME
+
+Text::BibTeX::Name - interface to BibTeX-style author names
+
+=head1 SYNOPSIS
+
+ $name = new Text::BibTeX::Name;
+ $name->split('J. Random Hacker');
+ # or:
+ $name = new Text::BibTeX::Name ('J. Random Hacker');
+
+ @firstname_tokens = $name->part ('first');
+ $lastname = join (' ', $name->part ('last'));
+
+ $format = new Text::BibTeX::NameFormat;
+ # ...customize $format...
+ $formatted = $name->format ($format);
+
+=head1 DESCRIPTION
+
+C<Text::BibTeX::Name> provides an abstraction for BibTeX-style names and
+some basic operations on them. A name, in the BibTeX world, consists of
+a list of I<tokens> which are divided amongst four I<parts>: `first',
+`von', `last', and `jr'.
+
+Tokens are separated by whitespace or commas at brace-level zero. Thus
+the name
+
+ van der Graaf, Horace Q.
+
+has five tokens, whereas the name
+
+ {Foo, Bar, and Sons}
+
+consists of a single token. Skip down to L<"EXAMPLES"> for more examples, or
+read on if you want to know the exact details of how names are split into
+tokens and parts.
+
+How tokens are divided into parts depends on the form of the name. If
+the name has no commas at brace-level zero (as in the second example),
+then it is assumed to be in either "first last" or "first von last"
+form. If there are no tokens that start with a lower-case letter, then
+"first last" form is assumed: the final token is the last name, and all
+other tokens form the first name. Otherwise, the earliest contiguous
+sequence of tokens with initial lower-case letters is taken as the `von'
+part; if this sequence includes the final token, then a warning is
+printed and the final token is forced to be the `last' part.
+
+If a name has a single comma, then it is assumed to be in "von last,
+first" form. A leading sequence of tokens with initial lower-case
+letters, if any, forms the `von' part; tokens between the `von' and the
+comma form the `last' part; tokens following the comma form the `first'
+part. Again, if there are no tokens following a leading sequence of
+lowercase tokens, a warning is printed and the token immediately
+preceding the comma is taken to be the `last' part.
+
+If a name has more than two commas, a warning is printed and the name is
+treated as though only the first two commas were present.
+
+Finally, if a name has two commas, it is assumed to be in "von last, jr,
+first" form. (This is the only way to represent a name with a `jr'
+part.) The parsing of the name is the same as for a one-comma name,
+except that tokens between the two commas are taken to be the `jr' part.
+
+=head1 CAVEAT
+
+The C code that does the actual work of splitting up names takes a shortcut
+and makes few assumptions about whitespace. In particular, there must be
+no leading whitespace, no trailing whitespace, no consecutive whitespace
+characters in the string, and no whitespace characters other than space.
+In other words, all whitespace must consist of lone internal spaces.
+
+=head1 EXAMPLES
+
+The strings C<"John Smith"> and C<"Smith, John"> are different
+representations of the same name, so split into parts and tokens the
+same way, namely as:
+
+ first => ('John')
+ von => ()
+ last => ('Smith')
+ jr => ()
+
+Note that every part is a list of tokens, even if there is only one
+token in that part; empty parts get empty token lists. Every token is
+just a string. Writing this example in actual code is simple:
+
+ $name = new Text::BibTeX::Name ("John Smith"); # or "Smith, John"
+ $name->part ('first'); # returns list ("John")
+ $name->part ('last'); # returns list ("Smith")
+ $name->part ('von'); # returns list ()
+ $name->part ('jr'); # returns list ()
+
+(We'll omit the empty parts in the rest of the examples: just assume
+that any unmentioned part is an empty list.) If more than two tokens
+are included and there's no comma, they'll go to the first name: thus
+C<"John Q. Smith"> splits into
+
+ first => ("John", "Q."))
+ last => ("Smith")
+
+and C<"J. R. R. Tolkein"> into
+
+ first => ("J.", "R.", "R.")
+ last => ("Tolkein")
+
+The ambiguous name C<"Kevin Philips Bong"> splits into
+
+ first => ("Kevin", "Philips")
+ last => ("Bong")
+
+which may or may not be the right thing, depending on the particular
+person. There's no way to know though, so if this fellow's last name is
+"Philips Bong" and not "Bong", the string representation of his name
+must disambiguate. One possibility is C<"Philips Bong, Kevin"> which
+splits into
+
+ first => ("Kevin")
+ last => ("Philips", "Bong")
+
+Alternately, C<"Kevin {Philips Bong}"> takes advantage of the fact that
+tokes are only split on whitespace I<at brace-level zero>, and becomes
+
+ first => ("Kevin")
+ last => ("{Philips Bong}")
+
+which is fine if your names are destined to be processed by TeX, but
+might be problematic in other contexts. Similarly, C<"St John-Mollusc,
+Oliver"> becomes
+
+ first => ("Oliver")
+ last => ("St", "John-Mollusc")
+
+which can also be written as C<"Oliver {St John-Mollusc}">:
+
+ first => ("Oliver")
+ last => ("{St John-Mollusc}")
+
+Since tokens are separated purely by whitespace, hyphenated names will
+work either way: both C<"Nigel Incubator-Jones"> and C<"Incubator-Jones,
+Nigel"> come out as
+
+ first => ("Nigel")
+ last => ("Incubator-Jones")
+
+Multi-token last names with lowercase components -- the "von part" --
+work fine: both C<"Ludwig van Beethoven"> and C<"van Beethoven, Ludwig">
+parse (correctly) into
+
+ first => ("Ludwig")
+ von => ("van")
+ last => ("Beethoven")
+
+This allows these European aristocratic names to sort properly,
+i.e. I<van Beethoven> under I<B> rather than I<v>. Speaking of
+aristocratic European names, C<"Charles Louis Xavier Joseph de la
+Vall{\'e}e Poussin"> is handled just fine, and splits into
+
+ first => ("Charles", "Louis", "Xavier", "Joseph")
+ von => ("de", "la")
+ last => ("Vall{\'e}e", "Poussin")
+
+so could be sorted under I<V> rather than I<d>. (Note that the sorting
+algorithm in L<Text::BibTeX::BibSort> is a slavish imitiation of BibTeX
+0.99, and therefore does the wrong thing with these names: the sort key
+starts with the "von" part.)
+
+However, capitalized "von parts" don't work so well: C<"R. J. Van de
+Graaff"> splits into
+
+ first => ("R.", "J.", "Van")
+ von => ("de")
+ last => ("Graaff")
+
+which is clearly wrong. This name should be represented as C<"Van de
+Graaff, R. J.">
+
+ first => ("R.", "J.")
+ last => ("Van", "de", "Graaff")
+
+which is probably right. (This particular Van de Graaff was an
+American, so he probably belongs under I<V> -- which is where my
+(British) dictionary puts him. Other Van de Graaff's mileages may
+vary.)
+
+Finally, many names include a suffix: "Jr.", "III", "fils", and so
+forth. These are handled, but with some limitations. If there's a
+comma before the suffix (the usual U.S. convention for "Jr."), then the
+name should be in I<last, jr, first> form, e.g. C<"Doe, Jr., John">
+comes out (correctly) as
+
+ first => ("John")
+ last => ("Doe")
+ jr => ("Jr.")
+
+but C<"John Doe, Jr."> is ambiguous and is parsed as
+
+ first => ("Jr.")
+ last => ("John", "Doe")
+
+(so don't do it that way). If there's no comma before the suffix -- the
+usual for Roman numerals, and occasionally seen with "Jr." -- then
+you're stuck and have to make the suffix part of the last name. Thus,
+C<"Gates III, William H."> comes out
+
+ first => ("William", "H.")
+ last => ("Gates", "III")
+
+but C<"William H. Gates III"> is ambiguous, and becomes
+
+ first => ("William", "H.", "Gates")
+ last => ("III")
+
+-- not what you want. Again, the curly-brace trick comes in handy, so
+C<"William H. {Gates III}"> splits into
+
+ first => ("William", "H.")
+ last => ("{Gates III}")
+
+There is no way to make a comma-less suffix the C<jr> part. (This is an
+unfortunate consequence of slavishly imitating BibTeX 0.99.)
+
+Finally, names that aren't really names of people but rather are
+organization or company names should be forced into a single token by
+wrapping them in curly braces. For example, "Foo, Bar and Sons" should
+be written C<"{Foo, Bar and Sons}">, which will split as
+
+ last => ("{Foo, Bar and Sons}")
+
+Of course, if this is one name in a BibTeX C<authors> or C<editors>
+list, this name has to be wrapped in braces anyways (because of the C<"
+and ">), but that's another story.
+
+=head1 FORMATTING NAMES
+
+Putting a split-up name back together again in a flexible, customizable
+way is the job of another module: see L<Text::BibTeX::NameFormat>.
+
+=head1 METHODS
+
+=over 4
+
+=item new (CLASS [, NAME [, FILENAME, LINE, NAME_NUM]])
+
+Creates a new C<Text::BibTeX::Name> object. If NAME is supplied, it
+must be a string containing a single name, and it will be be passed to
+the C<split> method for further processing. FILENAME, LINE, and
+NAME_NUM, if present, are all also passed to C<split> to allow better
+error messages.
+
+=cut
+
+sub new
+{
+ my ($class, $name, $filename, $line, $name_num) = @_;
+
+ $class = ref ($class) || $class;
+ my $self = bless {}, $class;
+ $self->split ($name, $filename, $line, $name_num, 1)
+ if (defined $name);
+ $self;
+}
+
+
+sub DESTROY
+{
+ my $self = shift;
+ $self->free; # free the C structure kept by `split'
+}
+
+
+=item split (NAME [, FILENAME, LINE, NAME_NUM])
+
+Splits NAME (a string containing a single name) into tokens and
+subsequently into the four parts of a BibTeX-style name (first, von,
+last, and jr). (Each part is a list of tokens, and tokens are separated
+by whitespace or commas at brace-depth zero. See above for full details
+on how a name is split into its component parts.)
+
+The token-lists that make up each part of the name are then stored in
+the C<Text::BibTeX::Name> object for later retrieval or formatting with
+the C<part> and C<format> methods.
+
+=cut
+
+sub split
+{
+ my ($self, $name, $filename, $line, $name_num) = @_;
+
+ # Call the XSUB with default values if necessary
+ $self->_split ($name, $filename,
+ defined $line ? $line : -1,
+ defined $name_num ? $name_num : -1,
+ 1);
+}
+
+
+=item part (PARTNAME)
+
+Returns the list of tokens in part PARTNAME of a name previously split with
+C<split>. For example, suppose a C<Text::BibTeX::Name> object is created and
+initialized like this:
+
+ $name = new Text::BibTeX::Name;
+ $name->split ('Charles Louis Xavier Joseph de la Vall{\'e}e Poussin');
+
+Then this code:
+
+ $name->part ('von');
+
+would return the list C<('de','la')>.
+
+=cut
+
+sub part
+{
+ my ($self, $partname) = @_;
+
+ croak "unknown name part"
+ unless $partname =~ /^(first|von|last|jr)$/;
+ exists $self->{$partname} ? @{$self->{$partname}} : ();
+}
+
+
+=item format (FORMAT)
+
+Formats a name according to the specifications encoded in FORMAT, which
+should be a C<Text::BibTeX::NameFormat> (or descendant) object. (In short,
+it must supply a method C<apply> which takes a C<Text::BibTeX::NameFormat>
+object as its only argument.) Returns the formatted name as a string.
+
+See L<Text::BibTeX::NameFormat> for full details on formatting names.
+
+=cut
+
+sub format
+{
+ my ($self, $format) = @_;
+
+ $format->apply ($self);
+}
+
+1;
+
+=back
+
+=head1 SEE ALSO
+
+L<Text::BibTeX::Entry>, L<Text::BibTeX::NameFormat>, L<bt_split_names>.
+
+=head1 AUTHOR
+
+Greg Ward <gward@python.net>
+
+=head1 COPYRIGHT
+
+Copyright (c) 1997-2000 by Gregory P. Ward. All rights reserved. This file
+is part of the Text::BibTeX library. This library is free software; you
+may redistribute it and/or modify it under the same terms as Perl itself.
diff --git a/BibTeX/NameFormat.pm b/BibTeX/NameFormat.pm
new file mode 100644
index 0000000..d7c6a91
--- /dev/null
+++ b/BibTeX/NameFormat.pm
@@ -0,0 +1,306 @@
+# ----------------------------------------------------------------------
+# NAME : BibTeX/NameFormat.pm
+# CLASSES : Text::BibTeX::NameFormat
+# RELATIONS :
+# DESCRIPTION: Provides a way to format already-parsed BibTeX-style
+# author names. (The parsing is done by the
+# Text::BibTeX:Name class.)
+# CREATED : Nov 1997, Greg Ward
+# MODIFIED :
+# VERSION : $Id: NameFormat.pm 3033 2006-09-21 20:07:27Z ambs $
+# COPYRIGHT : Copyright (c) 1997-2000 by Gregory P. Ward. All rights
+# reserved.
+#
+# This file is part of the Text::BibTeX library. This
+# library is free software; you may redistribute it and/or
+# modify it under the same terms as Perl itself.
+# ----------------------------------------------------------------------
+
+package Text::BibTeX::NameFormat;
+
+require 5.004;
+
+use strict;
+use Carp;
+
+=head1 NAME
+
+Text::BibTeX::NameFormat - format BibTeX-style author names
+
+=head1 SYNOPSIS
+
+ $format = new Text::BibTeX::NameFormat ($parts, $abbrev_first);
+
+ $format->set_text ($part,
+ $pre_part, $post_part,
+ $pre_token, $post_token);
+
+ $format->set_options ($part, $abbrev, $join_tokens, $join_part
+
+ $formatted_name = $format->apply ($name);
+
+=head1 DESCRIPTION
+
+After splitting a name into its components parts (represented as a
+C<Text::BibTeX::Name> object), you often want to put it back together
+again as a single string formatted in a consistent way.
+C<Text::BibTeX::NameFormat> provides a very flexible way to do this,
+generally in two stages: first, you create a "name format" which
+describes how to put the tokens and parts of any name back together, and
+then you apply the format to a particular name.
+
+The "name format" is encapsulated in a C<Text::BibTeX::NameFormat>
+object. The constructor (C<new>) includes some clever behind-the-scenes
+trickery that means you can usually get away with calling it alone, and
+not need to do any customization of the format object. If you do need
+to customize the format, though, the C<set_text()> and C<set_options()>
+methods provide that capability.
+
+Note that C<Text::BibTeX::NameFormat> is a fairly direct translation of
+the name-formatting C interface in the B<btparse> library. This manual
+page is meant to provide enough information to use the Perl class, but
+for more details and examples, consult L<bt_format_names>.
+
+=head1 CONSTANTS
+
+Two enumerated types for dealing with names and name formatting have
+been brought from C into Perl. In the B<btparse> documentation, you'll
+see references to C<bt_namepart> and C<bt_joinmethod>. The former lists
+the four "parts" of a BibTeX name: first, von, last, and jr; its values
+(in both C and Perl) are C<BTN_FIRST>, C<BTN_VON>, C<BTN_LAST>, and
+C<BTN_JR>. The latter lists the ways in which C<bt_format_name()> (the
+C function that corresponds to C<Text::BibTeX::NameFormat>'s C<apply>
+method) can join adjacent tokens together: C<BTJ_MAYTIE>, C<BTJ_SPACE>,
+C<BTJ_FORCETIE>, and C<BTJ_NOTHING>. Both sets of values may be
+imported from the C<Text::BibTeX> module, using the import tags
+C<nameparts> and C<joinmethods>. For instance:
+
+ use Text::BibTeX qw(:nameparts :joinmethods);
+ use Text::BibTeX::Name;
+ use Text::BibTeX::NameFormat;
+
+The "name part" constants are used to specify surrounding text or
+formatting options on a per-part basis: for instance, you can supply the
+"pre-token" text, or the "abbreviate" flag, for a single part without
+affecting other parts. The "join methods" are two of the three
+formatting options that you can set for a part: you can control how to
+join the individual tokens of a name (C<"JR Smith">, or C<"J R Smith">,
+or C<"J~R Smith">, and you can control how the final token of one part
+is joined to the next part (C<"la Roche"> versus C<"la~Roche">).
+
+=head1 METHODS
+
+=over 4
+
+=item new (PARTS, ABBREV_FIRST)
+
+Creates a new name format, with the two most common customizations:
+which parts to include (and in what order), and whether to abbreviate
+the first name. PARTS should be a string with at most four characters,
+one representing each part that you want to occur in a formatted name.
+For example, C<"fvlj"> means to format names in "first von last jr"
+order, while C<"vljf"> denotes "von last jr first." ABBREV_FIRST is
+just a boolean value: false to print out the first name in full, and
+true to abbreviate it with periods after each token and discretionary
+ties between tokens. All intra- and inter-token punctuation and spacing
+is independently controllable with the C<set_text> and C<set_options>
+methods, although these will rarely be necessary---sensible defaults are
+chosen for everything, based on the PARTS and ABBREV_FIRST values that
+you supply. See the description of C<bt_create_name_format()> in
+L<bt_format_names> for full details of the choices made.
+
+=cut
+
+sub new
+{
+ my ($class, $parts, $abbrev_first) = @_;
+
+ $class = ref ($class) || $class;
+ my $self = bless {}, $class;
+ $self->{_cstruct} = create ($parts, $abbrev_first);
+ $self;
+}
+
+
+sub DESTROY
+{
+ my $self = shift;
+ free ($self->{'_cstruct'})
+ if defined $self->{'_cstruct'};
+}
+
+
+=item set_text (PART, PRE_PART, POST_PART, PRE_TOKEN, POST_TOKEN)
+
+Allows you to customize some or all of the surrounding text for a single
+name part. Every name part has four possible chunks of text that go
+around or within it: before/after the part as a whole, and before/after
+each token in the part. For instance, if you are abbreviating first
+names and wish to control the punctuation after each token in the first
+name, you would set the "post token" text:
+
+ $format->set_text ('first', undef, undef, undef, '');
+
+would set the post-token text to the empty string, resulting in names
+like C<"J R Smith">. (Normally, abbreviated first names will have a
+period after each token: C<"J. R. Smith">.) Note that supplying
+C<undef> for the other three values leaves them unchanged.
+
+See L<bt_format_names> for full information on formatting names.
+
+=cut
+
+sub set_text
+{
+ my ($self, $part, $pre_part, $post_part, $pre_token, $post_token) = @_;
+
+ # Engage in a little conspiracy with the XS code (_set_text) and the
+ # underlying C function (bt_set_format_text) here. In particular,
+ # neither of those functions copy the strings we pass in here -- they
+ # just copy the C pointers. Ultimately, those refer back to the Perl
+ # strings that we're passing in now. Thus, if those Perl strings
+ # were to go away (ref count drop to zero), then the C code might
+ # have dangling pointers to free'd strings -- oops! The solution is
+ # to keep references of those Perl strings here, so that their ref
+ # count can never drop to zero without our assent. Every time
+ # set_text is called, the old references are overridden (ref count
+ # drops), and when the NameFormat object is destroyed, we destroy
+ # them (ref count drops). Other than that, there will always be some
+ # reference to the strings passed in to set_text.
+
+ # XXX what if some of these are undef?
+
+ $self->{'textrefs'} = [\$pre_part, \$post_part, \$pre_token, \$post_token];
+
+ _set_text ($self->{'_cstruct'},
+ $part,
+ $pre_part,
+ $post_part,
+ $pre_token,
+ $post_token);
+ 1;
+}
+
+
+=item set_options (PART, ABBREV, JOIN_TOKENS, JOIN_PART)
+
+Allows further customization of a name format: you can set the
+abbreviation flag and the two token-join methods. Alas, there is no
+mechanism for leaving a value unchanged; you must set everything with
+C<set_options>.
+
+For example, let's say that just dropping periods from abbreviated
+tokens in the first name isn't enough; you I<really> want to save
+space by jamming the abbreviated tokens together: C<"JR Smith"> rather
+than C<"J R Smith"> Assuming the two calls in the above example have
+been done, the following will finish the job:
+
+ $format->set_options (BTN_FIRST,
+ 1, # keep same value for abbrev flag
+ BTJ_NOTHING, # jam tokens together
+ BTJ_SPACE); # space after final token of part
+
+Note that we unfortunately had to know (and supply) the current values
+for the abbreviation flag and post-part join method, even though we were
+only setting the intra-part join method.
+
+=cut
+
+sub set_options
+{
+ my ($self, $part, $abbrev, $join_tokens, $join_part) = @_;
+
+ _set_options ($self->{'_cstruct'}, $part,
+ $abbrev, $join_tokens, $join_part);
+ 1;
+}
+
+
+=item apply (NAME)
+
+Once a name format has been created and customized to your heart's
+content, you can use it to format any number of names using the C<apply>
+method. NAME must be a C<Text::BibTeX::Name> object (i.e., a pre-split
+name); C<apply> returns a string containing the parts of the name
+formatted according to the C<Text::BibTeX::NameFormat> structure it is
+called on.
+
+=cut
+
+sub apply
+{
+ my ($self, $name) = @_;
+
+ my $name_struct = $name->{'_cstruct'} ||
+ croak "invalid Name object: no C structure";
+ my $format_struct = $self->{'_cstruct'} ||
+ croak "invalid NameFormat object: no C structure";
+ format_name ($name_struct, $format_struct);
+}
+
+=back
+
+=head1 EXAMPLES
+
+Although the process of splitting and formatting names may sound
+complicated and convoluted from reading the above (along with
+L<Text::BibTeX::Name>), it's actually quite simple. There are really
+only three steps to worry about: split the name (create a
+C<Text::BibTeX::Name> object), create and customize the format
+(C<Text::BibTeX::NameFormat> object), and apply the format to the name.
+
+The first step is covered in L<Text::BibTeX::Name>; here's a brief
+example:
+
+ $orig_name = 'Charles Louis Xavier Joseph de la Vall{\'e}e Poussin';
+ $name = new Text::BibTeX::Name $orig_name;
+
+The various parts of the name can now be accessed through
+C<Text::BibTeX::Name> methods; for instance C<$name-E<gt>part('von')>
+returns the list C<("de","la")>.
+
+Creating the name format is equally simple:
+
+ $format = new Text::BibTeX::NameFormat ('vljf', 1);
+
+creates a format that will print the name in "von last jr first" order,
+with the first name abbreviated. And for no extra charge, you get the
+right punctuation at the right place: a comma before any `jr' or `first'
+tokens, and periods after each `first' token.
+
+For instance, we can perform no further customization on this format,
+and apply it immediately to C<$name>. There are in fact two ways to do
+this, depending on whether you prefer to think of it in terms of
+"Applying the format to a name" or "formatting a name". The first is
+done with C<Text::BibTeX::NameFormat>'s C<apply> method:
+
+ $formatted_name = $format->apply ($name);
+
+while the second uses C<Text::BibTeX::Name>'s C<format> method:
+
+ $formatted_name = $name->format ($format);
+
+which is just a wrapper around C<Text::BibTeX::NameFormat::apply>. In
+either case, the result with the example name and format shown is
+
+ de~la Vall{\'e}e~Poussin, C.~L. X.~J.
+
+Note the strategic insertion of TeX "ties" (non-breakable spaces) at
+sensitive spots in the name. (The exact rules for insertion of
+discretionary ties are given in L<bt_format_names>.)
+
+=head1 SEE ALSO
+
+L<Text::BibTeX::Entry>, L<Text::BibTeX::Name>, L<bt_format_names>.
+
+=head1 AUTHOR
+
+Greg Ward <gward@python.net>
+
+=head1 COPYRIGHT
+
+Copyright (c) 1997-2000 by Gregory P. Ward. All rights reserved. This file
+is part of the Text::BibTeX library. This library is free software; you
+may redistribute it and/or modify it under the same terms as Perl itself.
+
+=cut
diff --git a/BibTeX/Structure.pm b/BibTeX/Structure.pm
new file mode 100644
index 0000000..2fcd571
--- /dev/null
+++ b/BibTeX/Structure.pm
@@ -0,0 +1,1201 @@
+# ----------------------------------------------------------------------
+# NAME : BibTeX/Structure.pm
+# CLASSES : Text::BibTeX::Structure, Text::BibTeX::StructuredEntry
+# RELATIONS :
+# DESCRIPTION: Provides the two base classes needed to implement
+# Text::BibTeX structure modules.
+# CREATED : in original form: Apr 1997
+# completely redone: Oct 1997
+# MODIFIED :
+# VERSION : $Id: Structure.pm 3033 2006-09-21 20:07:27Z ambs $
+# COPYRIGHT : Copyright (c) 1997-2000 by Gregory P. Ward. All rights
+# reserved.
+#
+# This file is part of the Text::BibTeX library. This
+# library is free software; you may redistribute it and/or
+# modify it under the same terms as Perl itself.
+# ----------------------------------------------------------------------
+
+package Text::BibTeX::Structure;
+
+require 5.004; # for 'isa' and 'can'
+
+use strict;
+use Carp;
+
+use Text::BibTeX ('check_class');
+
+=head1 NAME
+
+Text::BibTeX::Structure - provides base classes for user structure modules
+
+=head1 SYNOPSIS
+
+ # Define a 'Foo' structure for BibTeX databases: first, the
+ # structure class:
+
+ package Text::BibTeX::FooStructure;
+ @ISA = ('Text::BibTeX::Structure');
+
+ sub known_option
+ {
+ my ($self, $option) = @_;
+
+ ...
+ }
+
+ sub default_option
+ {
+ my ($self, $option) = @_;
+
+ ...
+ }
+
+ sub describe_entry
+ {
+ my $self = shift;
+
+ $self->set_fields ($type,
+ \@required_fields,
+ \@optional_fields,
+ [$constraint_1, $constraint_2, ...]);
+ ...
+ }
+
+
+ # Now, the structured entry class
+
+ package Text::BibTeX::FooEntry;
+ @ISA = ('Text::BibTeX::StructuredEntry');
+
+ # define whatever methods you like
+
+=head1 DESCRIPTION
+
+The module C<Text::BibTeX::Structure> provides two classes that form the
+basis of the B<btOOL> "structure module" system. This system is how
+database structures are defined and imposed on BibTeX files, and
+provides an elegant synthesis of object-oriented techniques with
+BibTeX-style database structures. Nothing described here is
+particularly deep or subtle; anyone familar with object-oriented
+programming should be able to follow it. However, a fair bit of jargon
+in invented and tossed around, so pay attention.
+
+A I<database structure>, in B<btOOL> parlance, is just a set of allowed
+entry types and the rules for fields in each of those entry types.
+Currently, there are three kinds of rules that apply to fields: some
+fields are I<required>, meaning they must be present in every entry for
+a given type; some are I<optional>, meaning they may be present, and
+will be used if they are; other fields are members of I<constraint
+sets>, which are explained in L<"Field lists and constraint sets">
+below.
+
+A B<btOOL> structure is implemented with two classes: the I<structure
+class> and the I<structured entry class>. The former defines everything
+that applies to the structure as a whole (allowed types and field
+rules). The latter provides methods that operate on individual entries
+which conform (or are supposed to conform) to the structure. The two
+classes provided by the C<Text::BibTeX::Structure> module are
+C<Text::BibTeX::Structure> and C<Text::BibTeX::StructuredEntry>; these
+serve as base classes for, respectively, all structure classes and all
+structured entry classes. One canonical structure is provided as an
+example with B<btOOL>: the C<Bib> structure, which (via the
+C<BibStructure> and C<BibEntry> classes) provides the same functionality
+as the standard style files of BibTeX 0.99. It is hoped that other
+programmers will write new bibliography-related structures, possibly
+deriving from the C<Bib> structure, to emulate some of the functionality
+that is available through third-party BibTeX style files.
+
+The purpose of this manual page is to describe the whole "structure
+module" system. It is mainly for programmers wishing to implement a new
+database structure for data files with BibTeX syntax; if you are
+interested in the particular rules for the BibTeX-emulating C<Bib>
+structure, see L<Text::BibTeX::Bib>.
+
+Please note that the C<Text::BibTeX> prefix is dropped from most module
+and class names in this manual page, except where necessary.
+
+=head1 STRUCTURE CLASSES
+
+Structure classes have two roles: to define the list of allowed types
+and field rules, and to handle I<structure options>.
+
+=head2 Field lists and constraint sets
+
+Field lists and constraint sets define the database structure for a
+particular entry type: that is, they specify the rules which an entry
+must follow to conform to the structure (assuming that entry is of an
+allowed type). There are three components to the field rules for each
+entry type: a list of required fields, a list of optional fields, and
+I<field constraints>. Required and optional fields should be obvious to
+anyone with BibTeX experience: all required fields must be present, and
+any optional fields that are present have some meaning to the structure.
+(One could conceive of a "strict" interpretation, where any field not
+mentioned in the official definition is disallowed; this would be
+contrary to the open spirit of BibTeX databases, but could be useful in
+certain applications where a stricter level of control is desired.
+Currently, B<btOOL> does not offer such an option.)
+
+Field constraints capture the "one or the other, but not both" type of
+relationships present for some entry types in the BibTeX standard style
+files. Most BibTeX documentation glosses over the distinction between
+mutually constrained fields and required/optional fields. For instance,
+one of the standard entry types is C<book>, and "C<author> or C<editor>"
+is given in the list of required fields for that type. The meaning of
+this is that an entry of type C<book> must have I<either> the C<author>
+or C<editor> fields, but not both. Likewise, the "C<volume> or
+C<number>" are listed under the "optional fields" heading for C<book>
+entries; it would be more accurate to say that every C<book> entry may
+have one or the other, or neither, of C<volume> or C<number>---but not
+both.
+
+B<btOOL> attempts to clarify this situation by creating a third category
+of fields, those that are mutually constrained. For instance, neither
+C<author> nor C<editor> appears in the list of required fields for
+the C<inbook> type according to B<btOOL>; rather, a field constraint is
+created to express this relationship:
+
+ [1, 1, ['author', 'editor']]
+
+That is, a field constraint is a reference to a three-element list. The
+last element is a reference to the I<constraint set>, the list of fields
+to which the constraint applies. (Calling this a set is a bit
+inaccurate, as there are conditions in which the order of fields
+matters---see the C<check_field_constraints> method in L<"METHODS 2:
+BASE STRUCTURED ENTRY CLASS">.) The first two elements are the minimum
+and maximum number of fields from the constraint set that must be
+present for an entry to conform to the constraint. This constraint thus
+expresses that there must be exactly one (>= 1 and <= 1) of the fields
+C<author> and C<editor> in a C<book> entry.
+
+The "either one or neither, but not both" constraint that applies to the
+C<volume> and C<number> fields for C<book> entries is expressed slightly
+differently:
+
+ [0, 1, ['volume', 'number']]
+
+That is, either 0 or 1, but not the full 2, of C<volume> and C<number>
+may be present.
+
+It is important to note that checking and enforcing field constraints is
+based purely on counting which fields from a set are actually present;
+this mechanism can't capture "x must be present if y is" relationships.
+
+The requirements imposed on the actual structure class are simple: it
+must provide a method C<describe_entry> which sets up a fancy data
+structure describing the allowed entry types and all the field rules for
+those types. The C<Structure> class provides methods (inherited by a
+particular structure class) to help particular structure classes create
+this data structure in a consistent, controlled way. For instance, the
+C<describe_structure> method in the BibTeX 0.99-emulating
+C<BibStructure> class is quite simple:
+
+ sub describe_entry
+ {
+ my $self = shift;
+
+ # series of 13 calls to $self->set_fields (one for each standard
+ # entry type)
+ }
+
+One of those calls to the C<set_fields> method defines the rules for
+C<book> entries:
+
+ $self->set_fields ('book',
+ [qw(title publisher year)],
+ [qw(series address edition month note)],
+ [1, 1, [qw(author editor)]],
+ [0, 1, [qw(volume number)]]);
+
+The first field list is the list of required fields, and the second is
+the list of optional fields. Any number of field constraints may follow
+the list of optional fields; in this case, there are two, one for each
+of the constraints (C<author>/C<editor> and C<volume>/C<number>)
+described above. At no point is a list of allowed types explicitly
+supplied; rather, each call to C<set_fields> adds one more allowed type.
+
+New structure modules that derive from existing ones will probably use the
+C<add_fields> method (and possibly C<add_constraints>) to augment an
+existing entry type. Adding new types should be done with C<set_fields>,
+though.
+
+=head2 Structure options
+
+The other responsibility of structure classes is to handle I<structure
+options>. These are scalar values that let the user customize the
+behaviour of both the structure class and the structured entry class.
+For instance, one could have an option to enable "extended structure",
+which might add on a bunch of new entry types and new fields. (In this
+case, the C<describe_entry> method would have to pay attention to this
+option and modify its behaviour accordingly.) Or, one could have
+options to control how the structured entry class sorts or formats
+entries (for bibliography structures such as C<Bib>).
+
+The easy way to handle structure options is to provide two methods,
+C<known_option> and C<default_option>. These return, respectively,
+whether a given option is supported, and what its default value is. (If
+your structure doesn't support any options, you can just inherit these
+methods from the C<Structure> class. The default C<known_option>
+returns false for all options, and its companion C<default_option>
+crashes with an "unknown option" error.)
+
+Once C<known_option> and C<default_option> are provided, the structure
+class can sit back and inherit the more visible C<set_options> and
+C<get_options> methods from the C<Structure> class. These are the
+methods actually used to modify/query options, and will be used by
+application programs to customize the structure module's behaviour, and
+by the structure module itself to pay attention to the user's wishes.
+
+Options should generally have pure string values, so that the generic
+set_options method doesn't have to parse user-supplied strings into some
+complicated structure. However, C<set_options> will take any scalar
+value, so if the structure module clearly documents its requirements,
+the application program could supply a structure that meets its needs.
+Keep in mind that this requires cooperation between the application and
+the structure module; the intermediary code in
+C<Text::BibTeX::Structure> knows nothing about the format or syntax of
+your structure's options, and whatever scalar the application passes via
+C<set_options> will be stored for your module to retrieve via
+C<get_options>.
+
+As an example, the C<Bib> structure supports a number of "markup"
+options that allow applications to control the markup language used for
+formatting bibliographic entries. These options are naturally paired,
+as formatting commands in markup languages generally have to be turned
+on and off. The C<Bib> structure thus expects references to two-element
+lists for markup options; to specify LaTeX 2e-style emphasis for book
+titles, an application such as C<btformat> would set the C<btitle_mkup>
+option as follows:
+
+ $structure->set_options (btitle_mkup => ['\emph{', '}']);
+
+Other options for other structures might have a more complicated
+structure, but it's up to the structure class to document and enforce
+this.
+
+=head1 STRUCTURED ENTRY CLASSES
+
+A I<structured entry class> defines the behaviour of individual entries
+under the regime of a particular database structure. This is the
+I<raison d'E<ecirc>tre> for any database structure: the structure class
+merely lays out the rules for entries to conform to the structure, but
+the structured entry class provides the methods that actually operate on
+individual entries. Because this is completely open-ended, the
+requirements of a structured entry class are much less rigid than for a
+structure class. In fact, all of the requirements of a structured entry
+class can be met simply by inheriting from
+C<Text::BibTeX::StructuredEntry>, the other class provided by the
+C<Text::BibTeX::Structure> module. (For the record, those requirements
+are: a structured entry class must provide the entry
+parse/query/manipulate methods of the C<Entry> class, and it must
+provide the C<check>, C<coerce>, and C<silently_coerce> methods of the
+C<StructuredEntry> class. Since C<StructuredEntry> inherits from
+C<Entry>, both of these requirements are met "for free" by structured
+entry classes that inherit from C<Text::BibTeX::StructuredEntry>, so
+naturally this is the recommended course of action!)
+
+There are deliberately no other methods required of structured entry
+classes. A particular application (eg. C<btformat> for bibliography
+structures) will require certain methods, but it's up to the application
+and the structure module to work out the requirements through
+documentation.
+
+=head1 CLASS INTERACTIONS
+
+Imposing a database structure on your entries sets off a chain reaction
+of interactions between various classes in the C<Text::BibTeX> library
+that should be transparent when all goes well. It could prove confusing
+if things go wrong and you have to go wading through several levels of
+application program, core C<Text::BibTeX> classes, and some structure
+module.
+
+The justification for this complicated behaviour is that it allows you
+to write programs that will use a particular structured module without
+knowing the name of the structure when you write the program. Thus, the
+user can supply a database structure, and ultimately the entry objects
+you manipulate will be blessed into a class supplied by the structure
+module. A short example will illustrate this.
+
+Typically, a C<Text::BibTeX>-based program is based around a kernel of
+code like this:
+
+ $bibfile = new Text::BibTeX::File "foo.bib";
+ while ($entry = new Text::BibTeX::Entry $bibfile)
+ {
+ # process $entry
+ }
+
+In this case, nothing fancy is happening behind the scenes: the
+C<$bibfile> object is blessed into the C<Text::BibTeX::File> class, and
+C<$entry> is blessed into C<Text::BibTeX::Entry>. This is the
+conventional behaviour of Perl classes, but it is not the only possible
+behaviour. Let us now suppose that C<$bibfile> is expected to conform
+to a database structure specified by C<$structure> (presumably a
+user-supplied value, and thus unknown at compile-time):
+
+ $bibfile = new Text::BibTeX::File "foo.bib";
+ $bibfile->set_structure ($structure);
+ while ($entry = new Text::BibTeX::Entry $bibfile)
+ {
+ # process $entry
+ }
+
+A lot happens behind the scenes with the call to C<$bibfile>'s
+C<set_structure> method. First, a new structure object is created from
+C<$structure>. The structure name implies the name of a Perl
+module---the structure module---which is C<require>'d by the
+C<Structure> constructor. (The main consequence of this is that any
+compile-time errors in your structure module will not be revealed until
+a C<Text::BibTeX::File::set_structure> or
+C<Text::BibTeX::Structure::new> call attempts to load it.)
+
+Recall that the first responsibility of a structure module is to define
+a structure class. The "structure object" created by the
+C<set_structure> method call is actually an object of this class; this
+is the first bit of trickery---the structure object (buried behind the
+scenes) is blessed into a class whose name is not known until run-time.
+
+Now, the behaviour of the C<Text::BibTeX::Entry::new> constructor
+changes subtly: rather than returning an object blessed into the
+C<Text::BibTeX::Entry> class as you might expect from the code, the
+object is blessed into the structured entry class associated with
+C<$structure>.
+
+For example, if the value of C<$structure> is C<"Foo">, that means the
+user has supplied a module implementing the C<Foo> structure.
+(Ordinarily, this module would be called C<Text::BibTeX::Foo>---but you
+can customize this.) Calling the C<set_structure> method on C<$bibfile>
+will attempt to create a new structure object via the
+C<Text::BibTeX::Structure> constructor, which loads the structure module
+C<Text::BibTeX::Foo>. Once this module is successfully loaded, the new
+object is blessed into its structure class, which will presumably be
+called C<Text::BibTeX::FooStructure> (again, this is customizable). The
+new object is supplied with the user's structure options via the
+C<set_options> method (usually inherited), and then it is asked to
+describe the actual entry layout by calling its C<describe_entry>
+method. This, in turn, will usually call the inherited C<set_fields>
+method for each entry type in the database structure. When the
+C<Structure> constructor is finished, the new structure object is stored
+in the C<File> object (remember, we started all this by calling
+C<set_structure> on a C<File> object) for future reference.
+
+Then, when a new C<Entry> object is created and parsed from that
+particular C<File> object, some more trickery happens. Trivially, the
+structure object stored in the C<File> object is also stored in the
+C<Entry> object. (The idea is that entries could belong to a database
+structure independently of any file, but usually they will just get the
+structure that was assigned to their database file.) More importantly,
+the new C<Entry> object is re-blessed into the structured entry class
+supplied by the structure module---presumably, in this case,
+C<Text::BibTeX::FooEntry> (also customizable).
+
+Once all this sleight-of-hand is accomplished, the application may treat
+its entry objects as objects of the structured entry class for the
+C<Foo> structure---they may call the check/coerce methods inherited from
+C<Text::BibTeX::StructuredEntry>, and they may also call any methods
+specific to entries for this particular database structure. What these
+methods might be is up to the structure implementor to decide and
+document; thus, applications may be specific to one particular database
+structure, or they may work on all structures that supply certain
+methods. The choice is up to the application developer, and the range
+of options open to him depends on which methods structure implementors
+provide.
+
+=head1 EXAMPLE
+
+For example code, please refer to the source of the C<Bib> module and
+the C<btcheck>, C<btsort>, and C<btformat> applications supplied with
+C<Text::BibTeX>.
+
+=head1 METHODS 1: BASE STRUCTURE CLASS
+
+The first class provided by the C<Text::BibTeX::Structure> module is
+C<Text::BibTeX::Structure>. This class is intended to provide methods
+that will be inherited by user-supplied structure classes; such classes
+should not override any of the methods described here (except
+C<known_option> and C<default_option>) without very good reason.
+Furthermore, overriding the C<new> method would be useless, because in
+general applications won't know the name of your structure class---they
+can only call C<Text::BibTeX::Structure::new> (usually via
+C<Text::BibTeX::File::set_structure>).
+
+Finally, there are three methods that structure classes should
+implement: C<known_option>, C<default_option>, and C<describe_entry>.
+The first two are described in L<"Structure options"> above, the latter
+in L<"Field lists and constraint sets">. Note that C<describe_entry>
+depends heavily on the C<set_fields>, C<add_fields>, and
+C<add_constraints> methods described here.
+
+=head2 Constructor/simple query methods
+
+=over 4
+
+=item new (STRUCTURE, [OPTION =E<gt> VALUE, ...])
+
+Constructs a new structure object---I<not> a C<Text::BibTeX::Structure>
+object, but rather an object blessed into the structure class associated
+with STRUCTURE. More precisely:
+
+=over 4
+
+=item *
+
+Loads (with C<require>) the module implementing STRUCTURE. In the
+absence of other information, the module name is derived by appending
+STRUCTURE to C<"Text::BibTeX::">---thus, the module C<Text::BibTeX::Bib>
+implements the C<Bib> structure. Use the pseudo-option C<module> to
+override this module name. For instance, if the structure C<Foo> is
+implemented by the module C<Foo>:
+
+ $structure = new Text::BibTeX::Structure
+ ('Foo', module => 'Foo');
+
+This method C<die>s if there are any errors loading/compiling the
+structure module.
+
+=item *
+
+Verifies that the structure module provides a structure class and a
+structured entry class. The structure class is named by appending
+C<"Structure"> to the name of the module, and the structured entry class
+by appending C<"Entry">. Thus, in the absence of a C<module> option,
+these two classes (for the C<Bib> structure) would be named
+C<Text::BibTeX::BibStructure> and C<Text::BibTeX::BibEntry>. Either or
+both of the default class names may be overridden by having the
+structure module return a reference to a hash (as opposed to the
+traditional C<1> returned by modules). This hash could then supply a
+C<structure_class> element to name the structure class, and an
+C<entry_class> element to name the structured entry class.
+
+Apart from ensuring that the two classes actually exist, C<new> verifies
+that they inherit correctly (from C<Text::BibTeX::Structure> and
+C<Text::BibTeX::StructuredEntry> respectively), and that the structure
+class provides the required C<known_option>, C<default_option>, and
+C<describe_entry> methods.
+
+=item *
+
+Creates the new structure object, and blesses it into the structure
+class. Supplies it with options by passing all (OPTION, VALUE) pairs to
+its C<set_options> method. Calls its C<describe_entry> method, which
+should list the field requirements for all entry types recognized by
+this structure. C<describe_entry> will most likely use some or all of
+the C<set_fields>, C<add_fields>, and C<add_constraints>
+methods---described below---for this.
+
+=back
+
+=cut
+
+sub new
+{
+ my ($type, $name, %options) = @_;
+
+ # - $type is presumably "Text::BibTeX::Structure" (if called from
+ # Text::BibTeX::File::set_structure), but shouldn't assume that
+ # - $name is the name of the user-supplied structure; it also
+ # determines the module we will attempt to load here, unless
+ # a 'module' option is given in %options
+ # - %options is a mix of options recognized here (in particular
+ # 'module'), by Text::BibTeX::StructuredEntry (? 'check', 'coerce',
+ # 'warn' flags), and by the user structure classes
+
+ my $module = (delete $options{'module'}) || ('Text::BibTeX::' . $name);
+
+ my $module_info = eval "require $module";
+ die "Text::BibTeX::Structure: unable to load module \"$module\" for " .
+ "user structure \"$name\": $@\n"
+ if $@;
+
+ my ($structure_class, $entry_class);
+ if (ref $module_info eq 'HASH')
+ {
+ $structure_class = $module_info->{'structure_class'};
+ $entry_class = $module_info->{'entry_class'};
+ }
+ $structure_class ||= $module . 'Structure';
+ $entry_class ||= $module . 'Entry';
+
+ check_class ($structure_class, "user structure class",
+ 'Text::BibTeX::Structure',
+ ['known_option', 'default_option', 'describe_entry']);
+ check_class ($entry_class, "user entry class",
+ 'Text::BibTeX::StructuredEntry',
+ []);
+
+ my $self = bless {}, $structure_class;
+ $self->{entry_class} = $entry_class;
+ $self->{name} = $name;
+ $self->set_options (%options); # these methods are both provided by
+ $self->describe_entry; # the user structure class
+ $self;
+}
+
+
+=item name ()
+
+Returns the name of the structure described by the object.
+
+=item entry_class ()
+
+Returns the name of the structured entry class associated with this
+structure.
+
+=back
+
+=cut
+
+sub name { shift->{'name'} }
+
+sub entry_class { shift->{'entry_class'} }
+
+
+=head2 Field structure description methods
+
+=over 4
+
+=item add_constraints (TYPE, CONSTRAINT, ...)
+
+Adds one or more field constraints to the structure. A field constraint
+is specified as a reference to a three-element list; the last element is
+a reference to the list of fields affected, and the first two elements
+are the minimum and maximum number of fields from the constraint set
+allowed in an entry of type TYPE. See L<"Field lists and constraint
+sets"> for a full explanation of field constraints.
+
+=cut
+
+sub add_constraints
+{
+ my ($self, $type, @constraints) = @_;
+ my ($constraint);
+
+ foreach $constraint (@constraints)
+ {
+ my ($min, $max, $fields) = @$constraint;
+ croak "add_constraints: constraint record must be a 3-element " .
+ "list, with the last element a list ref"
+ unless (@$constraint == 3 && ref $fields eq 'ARRAY');
+ croak "add_constraints: constraint record must have 0 <= 'min' " .
+ "<= 'max' <= length of field list"
+ unless ($min >= 0 && $max >= $min && $max <= @$fields);
+ map { $self->{fields}{$type}{$_} = $constraint } @$fields;
+ }
+ push (@{$self->{fieldgroups}{$type}{'constraints'}}, @constraints);
+
+} # add_constraints
+
+
+=item add_fields (TYPE, REQUIRED [, OPTIONAL [, CONSTRAINT, ...]])
+
+Adds fields to the required/optional lists for entries of type TYPE.
+Can also add field constraints, but you can just as easily use
+C<add_constraints> for that.
+
+REQUIRED and OPTIONAL, if defined, should be references to lists of
+fields to add to the respective field lists. The CONSTRAINTs, if given,
+are exactly as described for C<add_constraints> above.
+
+=cut
+
+sub add_fields # add fields for a particular type
+{
+ my ($self, $type, $required, $optional, @constraints) = @_;
+
+ # to be really robust and inheritance-friendly, we should:
+ # - check that no field is in > 1 list (just check $self->{fields}
+ # before we start assigning stuff)
+ # - allow sub-classes to delete fields or move them to another group
+
+ if ($required)
+ {
+ push (@{$self->{fieldgroups}{$type}{'required'}}, @$required);
+ map { $self->{fields}{$type}{$_} = 'required' } @$required;
+ }
+
+ if ($optional)
+ {
+ push (@{$self->{fieldgroups}{$type}{'optional'}}, @$optional);
+ map { $self->{fields}{$type}{$_} = 'optional' } @$optional;
+ }
+
+ $self->add_constraints ($type, @constraints);
+
+} # add_fields
+
+
+=item set_fields (TYPE, REQUIRED [, OPTIONAL [, CONSTRAINTS, ...]])
+
+Sets the lists of required/optional fields for entries of type TYPE.
+Identical to C<add_fields>, except that the field lists and list of
+constraints are set from scratch here, rather than being added to.
+
+=back
+
+=cut
+
+sub set_fields
+{
+ my ($self, $type, $required, $optional, @constraints) = @_;
+ my ($constraint, $field);
+
+ undef %{$self->{fields}{$type}};
+
+ if ($required)
+ {
+ $self->{fieldgroups}{$type}{'required'} = $required;
+ map { $self->{fields}{$type}{$_} = 'required' } @$required;
+ }
+
+ if ($optional)
+ {
+ $self->{fieldgroups}{$type}{'optional'} = $optional;
+ map { $self->{fields}{$type}{$_} = 'optional' } @$optional;
+ }
+
+ undef @{$self->{fieldgroups}{$type}{'constraints'}};
+ $self->add_constraints ($type, @constraints);
+
+} # set_fields
+
+
+=head2 Field structure query methods
+
+=over 4
+
+=item types ()
+
+Returns the list of entry types supported by the structure.
+
+=item known_type (TYPE)
+
+Returns true if TYPE is a supported entry type.
+
+=item known_field (TYPE, FIELD)
+
+Returns true if FIELD is in the required list, optional list, or one of
+the constraint sets for entries of type TYPE.
+
+=item required_fields (TYPE)
+
+Returns the list of required fields for entries of type TYPE.
+
+=item optional_fields ()
+
+Returns the list of optional fields for entries of type TYPE.
+
+=item field_constraints ()
+
+Returns the list of field constraints (in the format supplied to
+C<add_constraints>) for entries of type TYPE.
+
+=back
+
+=cut
+
+sub types
+{
+ my $self = shift;
+
+ keys %{$self->{'fieldgroups'}};
+}
+
+sub known_type
+{
+ my ($self, $type) = @_;
+
+ exists $self->{'fieldgroups'}{$type};
+}
+
+sub _check_type
+{
+ my ($self, $type) = @_;
+
+ croak "unknown entry type \"$type\" for $self->{'name'} structure"
+ unless exists $self->{'fieldgroups'}{$type};
+}
+
+sub known_field
+{
+ my ($self, $type, $field) = @_;
+
+ $self->_check_type ($type);
+ $self->{'fields'}{$type}{$field}; # either 'required', 'optional', or
+} # a constraint record (or undef!)
+
+sub required_fields
+{
+ my ($self, $type) = @_;
+
+ $self->_check_type ($type);
+ @{$self->{'fieldgroups'}{$type}{'required'}};
+}
+
+sub optional_fields
+{
+ my ($self, $type) = @_;
+
+ $self->_check_type ($type);
+ @{$self->{'fieldgroups'}{$type}{'optional'}};
+}
+
+sub field_constraints
+{
+ my ($self, $type) = @_;
+
+ $self->_check_type ($type);
+ @{$self->{'fieldgroups'}{$type}{'constraints'}};
+}
+
+
+=head2 Option methods
+
+=over 4
+
+=item known_option (OPTION)
+
+Returns false. This is mainly for the use of derived structures that
+don't have any options, and thus don't need to provide their own
+C<known_option> method. Structures that actually offer options should
+override this method; it should return true if OPTION is a supported
+option.
+
+=cut
+
+sub known_option
+{
+ return 0;
+}
+
+
+=item default_option (OPTION)
+
+Crashes with an "unknown option" message. Again, this is mainly for use
+by derived structure classes that don't actually offer any options.
+Structures that handle options should override this method; every option
+handled by C<known_option> should have a default value (which might just
+be C<undef>) that is returned by C<default_option>. Your
+C<default_options> method should crash on an unknown option, perhaps by
+calling C<SUPER::default_option> (in order to ensure consistent error
+messages). For example:
+
+ sub default_option
+ {
+ my ($self, $option) = @_;
+ return $default_options{$option}
+ if exists $default_options{$option};
+ $self->SUPER::default_option ($option); # crash
+ }
+
+The default value for an option is returned by C<get_options> when that
+options has not been explicitly set with C<set_options>.
+
+=cut
+
+sub default_option
+{
+ my ($self, $option) = @_;
+
+ croak "unknown option \"$option\" for structure \"$self->{'name'}\"";
+}
+
+
+=item set_options (OPTION =E<gt> VALUE, ...)
+
+Sets one or more option values. (You can supply as many
+C<OPTION =E<gt> VALUE> pairs as you like, just so long as there are an even
+number of arguments.) Each OPTION must be handled by the structure
+module (as indicated by the C<known_option> method); if not
+C<set_options> will C<croak>. Each VALUE may be any scalar value; it's
+up to the structure module to validate them.
+
+=cut
+
+sub set_options
+{
+ my $self = shift;
+ my ($option, $value);
+
+ croak "must supply an even number of arguments (option/value pairs)"
+ unless @_ % 2 == 0;
+ while (@_)
+ {
+ ($option, $value) = (shift, shift);
+ croak "unknown option \"$option\" for structure \"$self->{'name'}\""
+ unless $self->known_option ($option);
+ $self->{'options'}{$option} = $value;
+ }
+}
+
+
+=item get_options (OPTION, ...)
+
+Returns the value(s) of one or more options. Any OPTION that has not
+been set by C<set_options> will return its default value, fetched using
+the C<default_value> method. If OPTION is not supported by the
+structure module, then your program either already crashed (when it
+tried to set it with C<set_option>), or it will crash here (thanks to
+calling C<default_option>).
+
+=back
+
+=cut
+
+sub get_options
+{
+ my $self = shift;
+ my ($options, $option, $value, @values);
+
+ $options = $self->{'options'};
+ while (@_)
+ {
+ $option = shift;
+ $value = (exists $options->{$option})
+ ? $options->{$option}
+ : $self->default_option ($option);
+ push (@values, $value);
+ }
+
+ wantarray ? @values : $values[0];
+}
+
+
+
+# ----------------------------------------------------------------------
+# Text::BibTeX::StructuredEntry methods dealing with entry structure
+
+package Text::BibTeX::StructuredEntry;
+use strict;
+use vars qw(@ISA);
+use Carp;
+
+@ISA = ('Text::BibTeX::Entry');
+use Text::BibTeX qw(:metatypes display_list);
+
+=head1 METHODS 2: BASE STRUCTURED ENTRY CLASS
+
+The other class provided by the C<Structure> module is
+C<StructuredEntry>, the base class for all structured entry classes.
+This class inherits from C<Entry>, so all of its entry
+query/manipulation methods are available. C<StructuredEntry> adds
+methods for checking that an entry conforms to the database structure
+defined by a structure class.
+
+It only makes sense for C<StructuredEntry> to be used as a base class;
+you would never create standalone C<StructuredEntry> objects. The
+superficial reason for this is that only particular structured-entry
+classes have an actual structure class associated with them,
+C<StructuredEntry> on its own doesn't have any information about allowed
+types, required fields, field constraints, and so on. For a deeper
+understanding, consult L<"CLASS INTERACTIONS"> above.
+
+Since C<StructuredEntry> derives from C<Entry>, it naturally operates on
+BibTeX entries. Hence, the following descriptions refer to "the
+entry"---this is just the object (entry) being operated on. Note that
+these methods are presented in bottom-up order, meaning that the methods
+you're most likely to actually use---C<check>, C<coerce>, and
+C<silently_coerce> are at the bottom. On a first reading, you'll
+probably want to skip down to them for a quick summary.
+
+=over 4
+
+=item structure ()
+
+Returns the object that defines the structure the entry to which is
+supposed to conform. This will be an instantiation of some structure
+class, and exists mainly so the check/coerce methods can query the
+structure about the types and fields it recognizes. If, for some
+reason, you wanted to query an entry's structure about the validity of
+type C<foo>, you might do this:
+
+ # assume $entry is an object of some structured entry class, i.e.
+ # it inherits from Text::BibTeX::StructuredEntry
+ $structure = $entry->structure;
+ $foo_known = $structure->known_type ('foo');
+
+=cut
+
+sub structure
+{
+ my $self = shift;
+ $self->{'structure'};
+}
+
+
+=item check_type ([WARN])
+
+Returns true if the entry has a valid type according to its structure.
+If WARN is true, then an invalid type results in a warning being
+printed.
+
+=cut
+
+sub check_type
+{
+ my ($self, $warn) = @_;
+
+ my $type = $self->{'type'};
+ if (! $self->{'structure'}->known_type ($type))
+ {
+ $self->warn ("unknown entry type \"$type\"") if $warn;
+ return 0;
+ }
+ return 1;
+}
+
+
+=item check_required_fields ([WARN [, COERCE]])
+
+Checks that all required fields are present in the entry. If WARN is
+true, then a warning is printed for every missing field. If COERCE is
+true, then missing fields are set to the empty string.
+
+This isn't generally used by other code; see the C<check> and C<coerce>
+methods below.
+
+=cut
+
+sub check_required_fields
+{
+ my ($self, $warn, $coerce) = @_;
+ my ($field, $warning);
+ my $num_errors = 0;
+
+ foreach $field ($self->{'structure'}->required_fields ($self->type))
+ {
+ if (! $self->exists ($field))
+ {
+ $warning = "required field '$field' not present" if $warn;
+ if ($coerce)
+ {
+ $warning .= " (setting to empty string)" if $warn;
+ $self->set ($field, '');
+ }
+ $self->warn ($warning) if $warn;
+ $num_errors++;
+ }
+ }
+
+ # Coercion is always successful, so if $coerce is true return true.
+ # Otherwise, return true if no errors found.
+
+ return $coerce || ($num_errors == 0);
+
+} # check_required_fields
+
+
+=item check_field_constraints ([WARN [, COERCE]])
+
+Checks that the entry conforms to all of the field constraints imposed
+by its structure. Recall that a field constraint consists of a list of
+fields, and a minimum and maximum number of those fields that must be
+present in an entry. For each constraint, C<check_field_constraints>
+simply counts how many fields in the constraint's field set are present.
+If this count falls below the minimum or above the maximum for that
+constraint and WARN is true, a warning is issued. In general, this
+warning is of the form "between x and y of fields foo, bar, and baz must
+be present". The more common cases are handled specially to generate
+more useful and human-friendly warning messages.
+
+If COERCE is true, then the entry is modified to force it into
+conformance with all field constraints. How this is done depends on
+whether the violation is a matter of not enough fields present in the
+entry, or of too many fields present. In the former case, just enough
+fields are added (as empty strings) to meet the requirements of the
+constraint; in the latter case, fields are deleted. Which fields to add
+or delete is controlled by the order of fields in the constraint's field
+list.
+
+An example should clarify this. For instance, a field constraint
+specifying that exactly one of C<author> or C<editor> must appear in an
+entry would look like this:
+
+ [1, 1, ['author', 'editor']]
+
+Suppose the following entry is parsed and expected to conform to this
+structure:
+
+ @inbook{unknown:1997a,
+ title = "An Unattributed Book Chapter",
+ booktitle = "An Unedited Book",
+ publisher = "Foo, Bar \& Company",
+ year = 1997
+ }
+
+If C<check_field_constraints> is called on this method with COERCE true
+(which is done by any of the C<full_check>, C<coerce>, and
+C<silently_coerce> methods), then the C<author> field is set to the
+empty string. (We go through the list of fields in the constraint's
+field set in order -- since C<author> is the first missing field, we
+supply it; with that done, the entry now conforms to the
+C<author>/C<editor> constraint, so we're done.)
+
+However, if the same structure was applied to this entry:
+
+ @inbook{smith:1997a,
+ author = "John Smith",
+ editor = "Fred Jones",
+ ...
+ }
+
+then the C<editor> field would be deleted. In this case, we allow the
+first field in the constraint's field list---C<author>. Since only one
+field from the set may be present, all fields after the first one are in
+violation, so they are deleted.
+
+Again, this method isn't generally used by other code; rather, it is
+called by C<full_check> and its friends below.
+
+=cut
+
+sub check_field_constraints
+{
+ my ($self, $warn, $coerce) = @_;
+
+ my $num_errors = 0;
+ my $constraint;
+
+ foreach $constraint ($self->{'structure'}->field_constraints ($self->type))
+ {
+ my ($warning);
+ my ($min, $max, $fields) = @$constraint;
+
+ my $field;
+ my $num_seen = 0;
+ map { $num_seen++ if $self->exists ($_) } @$fields;
+
+ if ($num_seen < $min || $num_seen > $max)
+ {
+ if ($warn)
+ {
+ if ($min == 0 && $max > 0)
+ {
+ $warning = sprintf ("at most %d of fields %s may be present",
+ $max, display_list ($fields, 1));
+ }
+ elsif ($min < @$fields && $max == @$fields)
+ {
+ $warning = sprintf ("at least %d of fields %s must be present",
+ $min, display_list ($fields, 1));
+ }
+ elsif ($min == $max)
+ {
+ $warning = sprintf ("exactly %d of fields %s %s be present",
+ $min, display_list ($fields, 1),
+ ($num_seen < $min) ? "must" : "may");
+ }
+ else
+ {
+ $warning = sprintf ("between %d and %d of fields %s " .
+ "must be present",
+ $min, $max, display_list ($fields, 1))
+ }
+ }
+
+ if ($coerce)
+ {
+ if ($num_seen < $min)
+ {
+ my @blank = @{$fields}[$num_seen .. ($min-1)];
+ $warning .= sprintf (" (setting %s to empty string)",
+ display_list (\@blank, 1))
+ if $warn;
+ @blank = map (($_, ''), @blank);
+ $self->set (@blank);
+ }
+ elsif ($num_seen > $max)
+ {
+ my @delete = @{$fields}[$max .. ($num_seen-1)];
+ $warning .= sprintf (" (deleting %s)",
+ display_list (\@delete, 1))
+ if $warn;
+ $self->delete (@delete);
+ }
+ } # if $coerce
+
+ $self->warn ($warning) if $warn;
+ $num_errors++;
+ } # if $num_seen out-of-range
+
+ } # foreach $constraint
+
+ # Coercion is always successful, so if $coerce is true return true.
+ # Otherwise, return true if no errors found.
+
+ return $coerce || ($num_errors == 0);
+
+} # check_field_constraints
+
+
+=item full_check ([WARN [, COERCE]])
+
+Returns true if an entry's type and fields are all valid. That is, it
+calls C<check_type>, C<check_required_fields>, and
+C<check_field_constraints>; if all of them return true, then so does
+C<full_check>. WARN and COERCE are simply passed on to the three
+C<check_*> methods: the first controls the printing of warnings, and the
+second decides whether we should modify the entry to force it into
+conformance.
+
+=cut
+
+sub full_check
+{
+ my ($self, $warn, $coerce) = @_;
+
+ return 1 unless $self->metatype == &BTE_REGULAR;
+ return unless $self->check_type ($warn);
+ return $self->check_required_fields ($warn, $coerce) &&
+ $self->check_field_constraints ($warn, $coerce);
+}
+
+
+# Front ends for full_check -- there are actually four possible wrappers,
+# but having both $warn and $coerce false is pointless.
+
+=item check ()
+
+Checks that the entry conforms to the requirements of its associated
+database structure: the type must be known, all required fields must be
+present, and all field constraints must be met. See C<check_type>,
+C<check_required_fields>, and C<check_field_constraints> for details.
+
+Calling C<check> is the same as calling C<full_check> with WARN true and
+COERCE false.
+
+=item coerce ()
+
+Same as C<check>, except entries are coerced into conformance with the
+database structure---that is, it's just like C<full_check> with both
+WARN and COERCE true.
+
+=item silently_coerce ()
+
+Same as C<coerce>, except warnings aren't printed---that is, it's just
+like C<full_check> with WARN false and COERCE true.
+
+=back
+
+=cut
+
+sub check { shift->full_check (1, 0) }
+
+sub coerce { shift->full_check (1, 1) }
+
+sub silently_coerce { shift->full_check (0, 1) }
+
+1;
+
+=head1 SEE ALSO
+
+L<Text::BibTeX>, L<Text::BibTeX::Entry>, L<Text::BibTeX::File>
+
+=head1 AUTHOR
+
+Greg Ward <gward@python.net>
+
+=head1 COPYRIGHT
+
+Copyright (c) 1997-2000 by Gregory P. Ward. All rights reserved. This file
+is part of the Text::BibTeX library. This library is free software; you
+may redistribute it and/or modify it under the same terms as Perl itself.
diff --git a/BibTeX/Value.pm b/BibTeX/Value.pm
new file mode 100644
index 0000000..8259896
--- /dev/null
+++ b/BibTeX/Value.pm
@@ -0,0 +1,326 @@
+# ----------------------------------------------------------------------
+# NAME : Text::BibTeX::Value
+# CLASSES : Text::BibTeX::Value, Text::BibTeX::SimpleValue
+# RELATIONS :
+# DESCRIPTION: Provides interfaces to BibTeX values (list of simple
+# values) and simple values (string/macro/number).
+# CREATED : 1998/03/12, Greg Ward
+# MODIFIED :
+# VERSION : $Id: Value.pm 3033 2006-09-21 20:07:27Z ambs $
+# COPYRIGHT : Copyright (c) 1997-2000 by Gregory P. Ward. All rights
+# reserved.
+#
+# This file is part of the Text::BibTeX library. This
+# library is free software; you may redistribute it and/or
+# modify it under the same terms as Perl itself.
+# ----------------------------------------------------------------------
+
+package Text::BibTeX::Value;
+
+use strict;
+use UNIVERSAL 'isa';
+use Carp;
+
+=head1 NAME
+
+Text::BibTeX::Value - interfaces to BibTeX values and simple values
+
+=head1 SYNOPSIS
+
+ use Text::BibTeX;
+
+ $entry = new Text::BibTeX::Entry;
+
+ # set the 'preserve_values' flag to 1 for this parse
+ $entry->parse ($filename, $filehandle, 1);
+
+ # 'get' method now returns a Text::BibTeX::Value object
+ # rather than a string
+ $value = $entry->get ($field);
+
+ # query the `Value' object (list of SimpleValue objects)
+ @all_values = $value->values;
+ $first_value = $value->value (0);
+ $last_value = $value->value (-1);
+
+ # query the simple value objects -- type will be one of BTAST_STRING,
+ # BTAST_MACRO, or BTAST_NUMBER
+ use Text::BibTex (':nodetypes'); # import "node type" constants
+ $is_macro = ($first_value->type == BTAST_MACRO);
+ $text = $first_value->text;
+
+=head1 DESCRIPTION
+
+The C<Text::BibTeX::Value> module provides two classes,
+C<Text::BibTeX::Value> and C<Text::BibTeX::SimpleValue>, which respectively
+give you access to BibTeX "compound values" and "simple values". Recall
+that every field value in a BibTeX entry is the concatenation of one or
+more simple values, and that each of those simple values may be a literal
+string, a macro (abbreviation), or a number. Normally with
+C<Text::BibTeX>, field values are "fully processed," so that you only have
+access to the string that results from expanding macros, converting numbers
+to strings, concatenating all sub-strings, and collapsing whitespace in the
+resulting string.
+
+For example, in the following entry:
+
+ @article{homer97,
+ author = "Homer Simpson" # and # "Ned Flanders",
+ title = {Territorial Imperatives in Modern Suburbia},
+ journal = jss,
+ year = 1997
+ }
+
+we see the full range of options. The C<author> field consists of three
+simple values: a string, a macro (C<and>), and another string. The
+C<title> field is a single string, and the C<journal> and C<year> fields
+are, respectively, a single macro and a single number. If you parse
+this entry in the usual way:
+
+ $entry = new Text::BibTeX::Entry $entry_text;
+
+then the C<get> method on C<$entry> would return simple strings.
+Assuming that the C<and> macro is defined as C<" and ">, then
+
+ $entry->get ('author')
+
+would return the Perl string C<"Homer Simpson and Ned Flanders">.
+
+However, you can also request that the library preserve the input values
+in your entries, i.e. not lose the information about which values use
+macros, which values are composed of multiple simple values, and so on.
+There are two ways to make this request: per-file and per-entry. For a
+per-file request, use the C<preserve_values> method on your C<File>
+object:
+
+ $bibfile = new Text::BibTeX::File $filename;
+ $bibfile->preserve_values (1);
+
+ $entry = new Text::BibTeX::Entry $bibfile;
+ $entry->get ($field); # returns a Value object
+
+ $bibfile->preserve_values (0);
+ $entry = new Text::BibTeX::Entry $bibfile;
+ $entry->get ($field); # returns a string
+
+If you're not using a C<File> object, or want to control things at a
+finer scale, then you have to pass in the C<preserve_values> flag when
+invoking C<read>, C<parse>, or C<parse_s> on your C<Entry> objects:
+
+ # no File object, parsing from a string
+ $entry = new Text::BibTeX::Entry;
+ $entry->parse_s ($entry_text, 0); # preserve_values=0 (default)
+ $entry->get ($field); # returns a string
+
+ $entry->parse_s ($entry_text, 1);
+ $entry->get ($field); # returns a Value object
+
+ # using a File object, but want finer control
+ $entry->read ($bibfile, 0); # now get will return strings (default)
+ $entry->read ($bibfile, 1); # now get will return Value objects
+
+A compound value, usually just called a value, is simply a list of
+simple values. The C<Text::BibTeX::Value> class (hereinafter
+abbreviated as C<Value>) provides a simple interface to this list; you
+can request the whole list, or an individual member of the list. The
+C<SimpleValue> class gives you access to the innards of each simple
+value, which consist of the I<type> and the I<text>. The type just
+tells you if this simple value is a string, macro, or number; it is
+represented using the Perl translation of the "node type" enumeration
+from C. The possible types are C<BTAST_STRING>, C<BTAST_NUMBER>, and
+C<BTAST_MACRO>. The text is just what appears in the original entry
+text, be it a string, number, or macro.
+
+For example, we could parse the above entry in "preserve values" mode as
+follows:
+
+ $entry->parse_s ($entry_text, 1); # preserve_values is 1
+
+Then, using the C<get> method on C<$entry> would return not a string,
+but a C<Value> object. We can get the list of all simple values using
+the C<values> method, or a single value using C<value>:
+
+ $author = $entry->get ('author'); # now a Text::BibTeX::Value object
+ @all_values = $author->values; # array of Text::BibTeX::SimpleValue
+ $second = $author->value (1); # same as $all_values[1]
+
+The simple values may be queried using the C<Text::BibTeX::SimpleValue>
+methods, C<type> and C<text>:
+
+ $all_values[0]->type; # returns BTAST_STRING
+ $second->type; # returns BTAST_MACRO
+
+ $all_values[0]->text; # "Homer Simpson"
+ $second->text; # "and" (NOT the macro expansion!)
+
+ $entry->get ('year')->value (0)->text; # "1997"
+
+=head1 METHODS
+
+Normally, you won't need to create C<Value> or C<SimpleValue>
+objects---they'll be created for you when an entry is parsed, and
+returned to you by the C<get> method in the C<Entry> class. Thus, the
+query methods (C<values> and C<value> for the C<Value> class, C<type>
+and C<text> for C<SimpleValue>) are probably all you need to worry
+about. If you wish, though, you can create new values and simple values
+using the two classes' respective constructors. You can also put
+newly-created C<Value> objects back into an existing C<Entry> object
+using the C<set> entry method; it doesn't matter how the entry was
+parsed, this is acceptable anytime.
+
+=head2 Text::BibTeX::Value methods
+
+=over 4
+
+=item new (SVAL, ...)
+
+Creates a new C<Value> object from a list of simple values. Each simple
+value, SVAL, may be either a C<SimpleValue> object or a reference to a
+two-element list containing the type and text of the simple value. For
+example, one way to recreate the C<author> field of the example entry in
+L<"DESCRIPTION"> would be:
+
+ $and_macro = new Text::BibTeX::SimpleValue (BTAST_MACRO, 'and');
+ $value = new Text::BibTeX::Value
+ ([BTAST_STRING, 'Homer Simpson'],
+ $and_macro,
+ [BTAST_STRING, 'Ned Flanders']);
+
+The resulting C<Value> object could then be installed into an entry
+using the C<set> method of the C<Entry> class.
+
+=cut
+
+sub new
+{
+ my $class = shift;
+
+ $class = ref $class || $class;
+ my $self = bless [], $class;
+ while (my $sval = shift)
+ {
+ $sval = new Text::BibTeX::SimpleValue @$sval
+ if ref $sval eq 'ARRAY' && @$sval == 2;
+ croak "simple value is neither a two-element array ref " .
+ "nor a Text::BibTeX::SimpleValue object"
+ unless isa ($sval, 'Text::BibTeX::SimpleValue');
+ push (@$self, $sval);
+ }
+
+ $self;
+}
+
+=item values ()
+
+Returns the list of C<SimpleValue> objects that make up a C<Value> object.
+
+=item value (NUM)
+
+Returns the NUM'th C<SimpleValue> object from the list of C<SimpleValue>
+objects that make up a C<Value> object. This is just like a Perl array
+reference: NUM is zero-based, and negative numbers count from the end of
+the array.
+
+=back
+
+=cut
+
+# A Text::BibTeX::Value object is just an array ref; that array is a list
+# of Text::BibTeX::SimpleValue objects. Most of the real work for Value
+# and SimpleValue is done behind the scenes when an entry is parsed, in
+# BibTeX.xs and btxs_support.c.
+
+sub values { @{$_[0]} }
+
+sub value { $_[0]->[$_[1]] }
+
+
+package Text::BibTeX::SimpleValue;
+
+use strict;
+use Carp;
+use Text::BibTeX qw(:nodetypes);
+
+=head2 Text::BibTeX::SimpleValue methods
+
+=over
+
+=item new (TYPE, TEXT)
+
+Creates a new C<SimpleValue> object with the specified TYPE and TEXT.
+TYPE must be one of the allowed types for BibTeX simple values,
+i.e. C<BTAST_STRING>, C<BTAST_NUMBER>, or C<BTAST_MACRO>. You'll
+probably want to import these constants from C<Text::BibTeX> using the
+C<nodetypes> export tag:
+
+ use Text::BibTeX qw(:nodetypes);
+
+TEXT may be any string. Note that if TYPE is C<BTAST_NUMBER> and TEXT
+is not a string of digits, the C<SimpleValue> object will be created
+anyways, but a warning will be issued. No warning is issued about
+non-existent macros.
+
+=cut
+
+sub new
+{
+ my ($class, $type, $text) = @_;
+
+ croak "invalid simple value type ($type)"
+ unless ($type == &BTAST_STRING ||
+ $type == &BTAST_NUMBER ||
+ $type == &BTAST_MACRO);
+ croak "invalid simple value text (must be a simple string or number)"
+ unless defined $text && ! ref $text;
+ carp "warning: creating a 'number' simple value with non-numeric text"
+ if $type == &BTAST_NUMBER && $text !~ /^\d+$/;
+
+ $class = ref $class || $class;
+ my $self = bless [undef, undef], $class;
+ $self->[0] = $type;
+ $self->[1] = $text;
+ $self;
+}
+
+
+=item type ()
+
+Returns the type of a simple value. This will be one of the allowed
+"node types" as described under L</new> above.
+
+=item text ()
+
+Returns the text of a simple value. This is just the text that appears
+in the original entry---unexpanded macro name, or unconverted number.
+(Of course, converting numbers doesn't make any difference from Perl; in
+fact, it's all the same in C too, since the C code just keeps numbers as
+strings of digits. It's simply a matter of whether the string of digits
+is represented as a string or a number, which you might be interested in
+knowing if you want to preserve the structure of the input as much
+possible.)
+
+=back
+
+=cut
+
+sub type { shift->[0] }
+
+sub text { shift->[1] }
+
+1;
+
+=head1 SEE ALSO
+
+L<Text::BibTeX>, L<Text::BibTeX::File>, L<Text::BibTeX::Entry>
+
+=head1 AUTHOR
+
+Greg Ward <gward@python.net>
+
+=head1 COPYRIGHT
+
+Copyright (c) 1997-2000 by Gregory P. Ward. All rights reserved. This file
+is part of the Text::BibTeX library. This library is free software; you
+may redistribute it and/or modify it under the same terms as Perl itself.
+
+=cut
diff --git a/CHANGES b/CHANGES
new file mode 100644
index 0000000..61e0fa4
--- /dev/null
+++ b/CHANGES
@@ -0,0 +1,101 @@
+Release 0.38 - 8 March, 2008
+----------------------------
+* fixed problem when creating an empty Entry.
+ Thanks to Frank Wiegand.
+
+Release 0.37 - 21 Setember, 2006
+--------------------------------
+* added support for 'conference' key (alias for inproceedings)
+
+Release 0.36 - 24 July, 2005
+----------------------------
+* fixed url where btparse should be downloaded from. (stupid bug)
+
+Release 0.35 - 23 July, 2005
+----------------------------
+* changed from 0.33 to 0.35 to maintain relation with btparse.
+* some tests now use Test::More (more tests will change soon)
+* the way the module initializes changed. Now the module can be
+ included to check its version.
+
+Release 0.33 - 22 March, 2000
+-----------------------------
+* fixed long-standing bug with import/inheritance code; should fix the
+ mysterious "method redefined" and "can't locate object method" errors
+ that various people have seen on various platforms over the years.
+ Thanks to Nikolay Pelov <Nikolay.Pelov@cs.kuleuven.ac.be> for
+ fixing the bug!
+* fixed some small bugs spotted by Horst Tellioglu
+ <telliogl@h2hobel.phl.univie.ac.at> that messed up creating a brand-
+ new entry from scratch
+* fixed even smaller bug spotted by Horst Tellioglu in the BibTeX 0.99
+ emulation code
+
+
+Release 0.32 - 28 November, 1999
+--------------------------------
+* made Makefile.PL able to download and configure btparse, instead
+ of leaving it up to the user
+* fixed entry output method (print_s) so strings are wrapped in braces,
+ not quotes -- that way we don't generate bogus BibTeX files if there
+ are quotes at top-level in a string
+
+
+Release 0.31 - 28 October, 1999
+-------------------------------
+* fixed small bug in Text::BibTeX::BibFormat
+* better documentation for Text::BibTeX::Name example
+* better adherence to POD standard
+
+
+Release 0.30 - 12 March, 1999
+-----------------------------
+* the "structure module" system is in place -- lets you write classes
+ analogous to BibTeX style files, but with all the advantages of
+ object-oriented programming in Perl (see Text::BibTeX::Structure
+ man page)
+* the Bib structure, meant to emulate the standard style files of
+ BibTeX 0.99, is partially complete: can generate sort keys for
+ any of the 13 standard entry types, and format 'article' and 'book'
+ entries -- no support for crossrefs or alphabetic labels yet though
+* can now (optionally) access the data in more "raw" form, ie. with
+ macros not expanded, strings not concatenated, whitespace not collapsed)
+ (see Text::BibTeX::Value man page)
+* moved support for parsing individual names to the Text::BibTeX::Name class
+* added support for name-formatting via Text::BibTeX::NameFormat class
+ (parallels work in btparse -- see also bt_format_names man page)
+* added BibTeX-style string purificaction
+* added three example programs: btcheck, btsort, and btformat (btformat
+ is *very* preliminary!)
+* lots of documentation added/revised in the existing modules
+
+
+Release 0.21 - 20 October, 1997
+------------------------------
+* companion to btparse 0.21 -- mainly due to bug fixes and one
+ minor interface change (bt_cite_key -> bt_entry_key) in btparse
+* documentation/portability/warning fixes
+* fixed XS code to not make an accidental second "strip quote
+ characters" pass
+* fixed Entry 'print_s' method to handle all entry metatypes
+* changed Entry 'delete' method to handle a list of fields
+* started introducing changes to support the new way of
+ doing 'structure modules' -- nothing documented yet, though
+* deprecated old Structure module
+
+
+Release 0.2 - 8 September, 1997
+-------------------------------
+* fixed a bunch of little memory leaks in the btparse C library
+* rationalized the Text::BibTeX::Entry method names
+* added (incomplete) Text::BibTeX::Structure module
+* completely overhauled the parser and the interface to it;
+ this necessitated many small changes to BibTeX.xs
+* greatly expanded the test suite and fixed a few little bugs
+ found in the process
+* fixed the XS code to handle comment and preamble entries
+
+
+Release 0.1 - 8 March 1997
+-------------------------
+* initial release
diff --git a/MANIFEST b/MANIFEST
new file mode 100644
index 0000000..59d4bb3
--- /dev/null
+++ b/MANIFEST
@@ -0,0 +1,37 @@
+MANIFEST
+README
+CHANGES
+Makefile.PL
+btool_faq.pod
+typemap
+btcheck
+btsort
+btformat
+BibTeX.pm
+BibTeX.xs
+btxs_support.h
+btxs_support.c
+BibTeX/File.pm
+BibTeX/Entry.pm
+BibTeX/Value.pm
+BibTeX/Structure.pm
+BibTeX/Name.pm
+BibTeX/NameFormat.pm
+BibTeX/Bib.pm
+BibTeX/BibFormat.pm
+BibTeX/BibSort.pm
+t/common.pl
+t/bib.t
+t/macro.t
+t/modify.t
+t/nameformat.t
+t/namelist.t
+t/names.t
+t/output.t
+t/parse.t
+t/parse_f.t
+t/parse_s.t
+t/purify.t
+t/split_names
+examples/append_entries
+META.yml Module meta-data (added by MakeMaker)
diff --git a/META.yml b/META.yml
new file mode 100644
index 0000000..714ff63
--- /dev/null
+++ b/META.yml
@@ -0,0 +1,12 @@
+--- #YAML:1.0
+name: Text-BibTeX
+version: 0.38
+abstract: ~
+license: ~
+author: ~
+generated_by: ExtUtils::MakeMaker version 6.44
+distribution_type: module
+requires:
+meta-spec:
+ url: http://module-build.sourceforge.net/META-spec-v1.3.html
+ version: 1.3
diff --git a/Makefile.PL b/Makefile.PL
new file mode 100644
index 0000000..047610e
--- /dev/null
+++ b/Makefile.PL
@@ -0,0 +1,417 @@
+require 5.005; # to compile BibTeX.xs (I think)
+use strict;
+use ExtUtils::MakeMaker;
+use File::Basename;
+
+# $Id: Makefile.PL 3031 2006-09-21 20:02:34Z ambs $
+
+my @btparse_versions = qw(0.35);
+
+
+# Searches the specified @$search_dirs for either release directories or
+# archive files of any of the btparse versions listed in @$versions;
+# returns the name of whatever it finds (a single string, might be either a
+# directory or file name).
+sub find_btparse
+{
+ my ($versions, $search_dirs) = @_;
+
+ # We'll look for things in reverse order by version, so be sure
+ # we have a version list sorted that way.
+ my @versions = sort { $b <=> $a } @$versions;
+
+ sub latest_file
+ {
+ my @files = @_;
+ my %mtime = map (($_ => (stat $_)[9]), @files);
+ return (sort { $mtime{$a} <=> $mtime{$b} } @files)[-1];
+ }
+
+ my ($dir, @archive_files, @release_dirs);
+ local *DIR;
+ DIR:
+ for my $dir (@$search_dirs)
+ {
+ opendir (DIR, $dir) || die "Couldn't open $dir: $!\n";
+ my @all_files = map ("$dir/$_", readdir (DIR));
+ closedir (DIR);
+
+ VERSION:
+ for my $version (@versions)
+ {
+ my $release = "btparse-$version";
+ @release_dirs = grep (-d && m+/$release$+, @all_files);
+ @archive_files = grep (-f && m+/$release\.(tar(\.(g?z|Z))|tgz|zip)$+,
+ @all_files);
+ last DIR if @release_dirs || @archive_files;
+ }
+ }
+
+ my $extra_versions = join (", ", @versions[1..$#versions]);
+ my $wanted_dirs = "btparse-$versions[0]";
+ $wanted_dirs .= " or $extra_versions" if $extra_versions;
+
+ $search_dirs = join (" ", @$search_dirs);
+
+ # If no release directories were found, try looking for archive files.
+
+ if (@release_dirs == 0)
+ {
+ # If no archive files were found, we failed -- crash
+ if (@archive_files == 0) # found nothing applicable
+ {
+ warn <<MSG;
+I couldn't find an acceptable version of btparse nearby, either
+as a release directory ($wanted_dirs)
+or archive file. I'll try to download it from CPAN now.
+
+MSG
+
+ return download_btparse (\@versions);
+ }
+
+ # Multiple archive files found -- warn and pick the latest one
+ # (according to file mtime)
+ elsif (@archive_files > 1)
+ {
+ my $file = latest_file (@archive_files);
+ warn <<WARN;
+Found multiple archive files in @$search_dirs
+-- using $file because it\'s the latest
+WARN
+ return $file;
+ }
+
+ # Good, exactly one archive file found -- return it
+ else
+ {
+ return $archive_files[0];
+ }
+ }
+
+ # Multiple directories found -- warn and pick the latest one
+ elsif (@release_dirs > 1)
+ {
+ my $dir = latest_file (@release_dirs);
+ warn <<WARN;
+Found multiple release directories
+-- using $dir because it\'s the latest
+WARN
+ return $dir;
+ }
+
+ # Exactly one directory found -- return it
+ else
+ {
+ return $release_dirs[0];
+ }
+
+} # find_btparse()
+
+
+sub download_http
+{
+ my ($trials) = @_;
+
+ eval { require LWP::Simple; require HTTP::Status; };
+ if ($@)
+ {
+ die <<MSG . " " . join (" \n", map ($_->[1], @$trials)) . "\n";
+I couldn't load the LWP::Simple and HTTP::Status modules, so I can't
+download btparse. I would have tried to download it from:
+MSG
+ }
+
+ my ($trial, $last_error, @urls_tried);
+ foreach $trial (@$trials)
+ {
+ my ($filename, $url, $save_to) = @$trial;
+
+ print "Trying to download btparse from $url...\n";
+ my $response = LWP::Simple::getstore ($url, $save_to);
+ if (LWP::Simple::is_success ($response))
+ {
+ return $save_to;
+ }
+ else
+ {
+ $last_error = sprintf ("%d: %s",
+ $response,
+ HTTP::Status::status_message ($response));
+ print "warning: download failed ($last_error), trying another one\n";
+ push (@urls_tried, $url);
+ }
+ }
+
+ # only get here if all failed
+ my $msg = "I tried to download btparse from the following URLs:\n ";
+ $msg .= join (" \n", @urls_tried);
+ $msg .= <<MSG;
+but they all failed. The last HTTP response was:
+$last_error
+MSG
+ die $msg;
+}
+
+
+sub download_ftp
+{
+ my ($trials) = @_;
+
+ my ($trial, $last_error, @urls_tried);
+ foreach $trial (@$trials)
+ {
+ my ($filename, $url, $save_to) = @$trial;
+ my ($host, $dir, $check_filename) =
+ ($url =~ m|^ftp://([^/]+)(/.*)/([^/]+)|);
+ die "this should not happen: '$filename' ne '$check_filename'\n"
+ if $filename ne $check_filename;
+
+ eval { require Net::FTP; };
+ if ($@)
+ {
+ die <<MSG . " " . join (" \n", map ($_->[1], @$trials)) . "\n";
+I couldn't load the Net::FTP module, so I can't download
+btparse. I would have tried to download it from:
+MSG
+ }
+
+ eval
+ {
+ print "Connecting to $host...\n";
+ my $ftp = Net::FTP->new ($host) or die "$@\n";
+ $ftp->login or die "login failed\n";
+ $ftp->binary or die "couldn't set binary mode\n";
+ $ftp->cwd ($dir) or die "couldn't chdir to $dir\n";
+ #$ftp->cwd ($cpan_dir) or die "couldn't chdir to $cpan_dir\n";
+
+ print "Downloading $dir/$filename...\n";
+ $ftp->get ($filename, $save_to)
+ or die "download failed (file not there?)\n";
+ };
+
+ if ($@) # oh dear, we blew up
+ {
+ $last_error = $@;
+ chop $last_error;
+ print "warning: download failed ($last_error), trying another one\n";
+ push (@urls_tried, $url);
+ }
+ else # success!
+ {
+ return $save_to;
+ }
+ }
+
+ # if we get here, all attempts failed
+
+ my $msg = "I tried to download btparse from the following URLs:\n ";
+ $msg .= join (" \n", @urls_tried);
+ $msg .= <<MSG;
+but they all failed. The last error message was:
+$last_error
+MSG
+ die $msg;
+} # download_ftp ()
+
+
+sub download_btparse
+{
+ my ($versions) = @_;
+
+ # Try to find a preferred CPAN mirror from the CPAN.pm config.
+ my $cpan_url;
+ eval { require CPAN::Config; $cpan_url = $CPAN::Config->{'urllist'}[0]; };
+ if ($@ || !$cpan_url)
+ {
+ $cpan_url = 'http://www.cpan.org/';
+ print "CPAN module not fully configured -- " .
+ "using default CPAN site at $cpan_url\n";
+ }
+ else
+ {
+ $cpan_url .= '/' unless substr($cpan_url,-1,1) eq '/';
+ print "Using your preferred CPAN mirror at $cpan_url\n";
+ }
+
+ # build list of [base-filename, whole-url, where-to-save-it] lists
+ my $cpan_dir = 'modules/by-authors/id/A/AM/AMBS/btparse/';
+ my @download_trials = ();
+ my ($ver, $archive_file);
+ foreach $ver (@$versions)
+ {
+ $archive_file = "btparse-${ver}.tar.gz";
+ push (@download_trials, [$archive_file,
+ $cpan_url . $cpan_dir . $archive_file,
+ "../$archive_file"]);
+ }
+
+ my $saved_archive;
+ eval
+ {
+ if ($cpan_url =~ /^http:/)
+ {
+ $saved_archive = download_http (\@download_trials);
+ }
+ elsif ($cpan_url =~ m|^ftp://|)
+ {
+ $saved_archive = download_ftp (\@download_trials);
+ }
+ else
+ {
+ die <<MSG;
+I can't download btparse, because I don't know how to deal with the URL
+$cpan_url
+MSG
+ }
+ };
+
+ if ($@)
+ {
+ die $@ . <<MSG if $@;
+
+You\'ll have to download it yourself and put it in the current
+directory or its parent.
+MSG
+ }
+
+ if ($saved_archive)
+ {
+ print "Successfully downloaded btparse to $saved_archive\n";
+ return $saved_archive;
+ }
+
+ die "this should not happen: can't get here!\n";
+
+} # download_btparse()
+
+
+# Takes the directory or filename returned by 'find_btparse()' and
+# ensures that it is unpacked and ready to build.
+sub unpack_btparse
+{
+ my ($versions, $search_dirs) = @_;
+
+ if (-d "btparse")
+ {
+ print "btparse already unpacked to 'btparse' -- " .
+ "no further detective work needed\n";
+ return;
+ }
+
+ my $btparse_release = find_btparse ($versions, $search_dirs);
+ print "Found btparse release in $btparse_release\n";
+
+ if (-f $btparse_release)
+ {
+ $| = 1;
+ my $cmd;
+ if ($btparse_release =~ /\.tar\.(g?z|Z)$/)
+ { $cmd = "gzip -dc $btparse_release | tar xf -"; }
+ elsif ($btparse_release =~ /\.tar$/)
+ { $cmd = "tar xf $btparse_release"; }
+ elsif ($btparse_release =~ /\.zip$/)
+ { $cmd = "unzip -d $btparse_release"; }
+
+
+ print "Unpacking $btparse_release: $cmd\n";
+ system $cmd;
+ die "unpacking failed\n" if $?;
+
+ # strip off directory and extension(s) -- this should just give
+ # us the name of the directory we just unpacked to
+ $btparse_release = fileparse ($btparse_release, '\.tar.*', '\.zip');
+ if (! -d $btparse_release)
+ {
+ die <<ERR
+Directory "$btparse_release" doesn't exist, but it should have
+been created when we unpacked the archive file.
+ERR
+ }
+ }
+
+ print "Creating symlink: btparse -> $btparse_release\n";
+ symlink ($btparse_release, "btparse")
+ || die "symlink failed: $!\n";
+
+} # unpack_btparse ()
+
+
+sub configure_btparse
+{
+ my ($dir) = @_;
+
+ chdir $dir or die "couldn't chdir to $dir: $!\n";
+ unless (-f "config.status" && -f "config.cache" &&
+ -M "config.status" < -M "configure" &&
+ -M "config.cache" < -M "configure")
+ {
+ print "btparse distribution in '$dir' is not configured; doing so now:\n";
+ my @cmd = ('sh', './configure');
+ print "@cmd\n";
+ system @cmd;
+ die "configure failed\n" unless $? == 0;
+
+ }
+
+ chdir ".." or die "couldn't chdir to ..: $!\n";
+ print "btparse distribution in '$dir' is configured and ready to build\n";
+}
+
+# -- BEGIN main --------------------------------------------------------
+
+
+# This stuff is arranged so that I do not need to keep a complete copy of
+# the btparse distribution around for the development copy of Text::BibTeX,
+# but can still have Makefile.PL take care of unpacking btparse for
+# building anywhere else.
+
+unpack_btparse (\@btparse_versions, ['.', '..']);
+
+my @support_files = ('btxs_support$(OBJ_EXT)');
+
+# See lib/ExtUtils/MakeMaker.pm for details of how to influence
+# the contents of the Makefile that is written.
+WriteMakefile (
+ 'NAME' => 'Text::BibTeX',
+ 'VERSION_FROM' => 'BibTeX.pm',
+ 'XSPROTOARG' => '-prototypes',
+ 'LIBS' => [''], # e.g., '-lm'
+ 'DEFINE' => '', # e.g., '-DHAVE_SOMETHING'
+ 'INC' => '-Ibtparse/src',
+ 'MYEXTLIB' => 'btparse/src/.libs/libbtparse$(LIB_EXT)',
+ 'OBJECT' => 'BibTeX$(OBJ_EXT) ' . join (' ', @support_files),
+ 'dynamic_lib' => { INST_DYNAMIC_DEP => join (' ', @support_files) },
+ 'dist' => { COMPRESS => "gzip", SUFFIX => "gz" }
+);
+
+configure_btparse ('btparse');
+
+# -- END main ----------------------------------------------------------
+
+
+# -- Overrides ---------------------------------------------------------
+package MY;
+
+sub dist
+{
+ local $_ = shift->SUPER::dist;
+ s/CI \s* = \s* .*/CI = true/mx;
+ s/-Nv/-sRel -N\$(NAME_SYM)_v/m;
+ return $_;
+}
+
+sub postamble
+{
+'
+$(MYEXTLIB):
+ cd btparse && $(MAKE)
+';
+}
+
+sub manifypods
+{
+ local $_ = shift->SUPER::manifypods (@_);
+ s/Text::btool_faq/btool_faq/;
+ return $_;
+}
diff --git a/README b/README
new file mode 100644
index 0000000..6f67d8e
--- /dev/null
+++ b/README
@@ -0,0 +1,168 @@
+ Text::BibTeX
+ version 0.34
+ Greg Ward (gward@python.net)
+ 22 December, 2000
+
+INTRODUCTION
+------------
+
+Text::BibTeX is a Perl library for reading, parsing, and processing
+BibTeX files. It is the Perl half of btOOL, a pair of libraries for
+dealing with BibTeX data; the C half, btparse, is needed to compile
+Text::BibTeX (see below).
+
+Text::BibTeX gives you access to the data at many different levels: you
+may work with BibTeX entries as simple "field -> string" mappings, or
+get at the original form of the data as a list of simple values
+(strings, macros, or numbers) pasted together. You can choose not to
+impose nany restrictions on the allowed/expected entry types or fields,
+or you can use the structure defined by BibTeX 0.99's standard style
+files, or you can invent your own.
+
+The library is copiously documented. After installing the module, see
+the Text::BibTeX man page for a comprehensive introduction to the
+system. If you wish to dive straight in without regards for niceties
+such as splitting and formatting names, defining or imposing database
+structures, getting access to unprocessed field values, and other nifty
+features, then you can probably get away with just reading the
+Text::BibTeX::Entry man page. Comments/criticism of the documentation
+are welcome.
+
+In addition to the man pages embedded in each module (and available
+after installation), I have written a technical report describing btOOL,
+with the btparse and Text::BibTeX documentation included as appendices.
+The whole report is just over 100 pages, around 45 of which make up the
+Text::BibTeX documentation (the btparse documentation is a further 30
+pages). You can find it at the btOOL home page:
+
+ http://starship.python.net/~gward/btOOL/
+
+I may also make the btparse and Text::BibTeX manuals available as
+separate PostScript files, but they aren't there as of this writing.
+
+
+DEPENDENCIES
+------------
+
+Text::BibTeX requires Perl 5.004 or later and an ANSI-compliant C
+compiler.
+
+You must also have the btparse distribution, the C library underlying
+Text::BibTeX. If you didn't download btparse with Text::BibTeX, don't
+sweat it; the Makefile.PL is smart enough to go out and find it for you
+(using your preferred CPAN mirror if you have set one up,
+http://www.cpan.org/ otherwise). If you did download btparse, just put
+it in the same directory as the Text::BibTeX tarball. Text::BibTeX's
+Makefile.PL will unpack and configure it for you.
+
+
+BUILDING
+--------
+
+Start by generating the Makefile and other preparatory steps (most notably,
+preparing the btparse package for building).
+
+ perl Makefile.PL
+
+This goes to some lengths to ensure that btparse, the companion C library,
+is found and available. It will configure btparse for you, so you
+shouldn't have to do anything in the 'btparse' directory unless things go
+wrong there.
+
+Note that if Perl was built with a non-ANSI C compiler, you will need to
+specify an ANSI compiler when running Makefile.PL. For instance, if you're
+running SunOS and Perl was built with 'cc' (non-ANSI), you will need to
+build Text::BibTeX (and btparse as well, for that matter) with an ANSI
+compiler such as 'gcc' or Sun's 'acc'. This can be done for Text::BibTeX
+as follows:
+
+ perl Makefile.PL CC=gcc # or acc, or whatever works on your system
+
+However, this is fraught with danger and not recommended. The only safe
+thing to do is to build Perl with an ANSI-compliant compiler. This will
+probably save you trouble with other extensions that require an ANSI C
+compiler.
+
+Now, build everything:
+
+ make
+
+This takes care of building btparse as well as the Text::BibTeX modules, so
+again you don't have to go grubbing around in the btparse directory unless
+things go wrong.
+
+Optionally, you can test and install btparse:
+
+ cd btparse
+ make test
+ make install
+ cd ..
+
+(You do not have to install btparse to use Text::BibTeX; that's only needed
+if you plan to do C programming using btparse. See btparse/README for
+details.)
+
+Finally, test and install the Text::BibTeX Perl modules:
+
+ make test
+ make install
+
+Note that you will need to be root on most systems in order to install
+either btparse or Text::BibTeX. Consult the 'perlmodinstall' man page (as
+of Perl 5.005 or later) for more information on building and installing
+Perl module distributions.
+
+Please let me know if anything goes wrong with building either btparse
+or Text::BibTeX.
+
+
+BUGS AND LIMITIATIONS
+---------------------
+
+See the btparse(3) and Text::BibTeX(3) man pages for details. Executive
+summary:
+
+ * Text::BibTeX is not thread-safe, due to a heavy dependence on
+ global variables in the parser and lexical scanner components
+ of btparse
+
+ * for the same reason, you can't have multiple files open and
+ being parsed at the same time; attempting to do so is an
+ invalid use of btparse
+
+ * Text::BibTeX cannot be used with an sfio-based Perl, because
+ the Perl I/O API is apparently not yet up to the task of replacing
+ stdio in third-party C libraries
+
+
+AVAILABILITY
+------------
+
+You can find the latest version of Text::BibTeX, as well as
+documentation, information on known bugs, etc. at the btOOL web site:
+
+ http://starship.python.net/~gward/btOOL/
+
+Here you will find HTML versions of the documentation and technical
+report describing the project, links to download the code, and whatever
+other goodies I can come up with over time.
+
+
+CREDITS
+-------
+
+For spotting bugs and sometimes even providing patches:
+
+ Dirk Vleugels <vleugels@do.isst.fhg.de>
+ Kjetil Kjernsmo <kjetil.kjernsmo@astro.uio.no>
+ Andrew Cassin <acassin@cs.mu.oz.au>
+ Thomas Kamphusmann <thomas@delphi.ping.de>
+ Horst Tellioglu <telliogl@h2hobel.phl.univie.ac.at>
+ Nikolay Pelov <Nikolay.Pelov@cs.kuleuven.ac.be>
+
+...and probably a few others. Please send me email if you feel you
+belong on this list and I forgot to include you.
+
+Also, see the btparse README file for contributors to the C library.
+
+$Id: README 3031 2006-09-21 20:02:34Z ambs $
diff --git a/btcheck b/btcheck
new file mode 100755
index 0000000..544415c
--- /dev/null
+++ b/btcheck
@@ -0,0 +1,31 @@
+#!/usr/local/bin/perl5 -w
+
+#
+# btcheck
+#
+# Check the syntax and structure of a single BibTeX database file.
+# Currently hardcoded to use the "Bib" structure, which implements
+# exactly the structure of BibTeX 0.99.
+#
+# $Id: btcheck 3032 2006-09-21 20:05:55Z ambs $
+#
+
+use strict;
+use Text::BibTeX (':metatypes');
+
+my ($filename, $structure, $bibfile, $entry, %seen_key);
+die "usage: btcheck file [structure]\n" unless @ARGV == 1 || @ARGV == 2;
+($filename, $structure) = @ARGV;
+$structure ||= 'Bib';
+
+$bibfile = new Text::BibTeX::File $filename or die "$filename: $!\n";
+$bibfile->set_structure ($structure);
+
+while ($entry = new Text::BibTeX::Entry $bibfile)
+{
+ next unless $entry->parse_ok and $entry->metatype == BTE_REGULAR;
+ my $key = $entry->key;
+ $entry->warn ("repeated entry key \"$key\"") if $seen_key{$key};
+ $seen_key{$key} = 1;
+ $entry->check;
+}
diff --git a/btformat b/btformat
new file mode 100755
index 0000000..6038664
--- /dev/null
+++ b/btformat
@@ -0,0 +1,128 @@
+#!/usr/local/bin/perl -w
+
+use strict;
+use Text::BibTeX;
+use Getopt::Tabular;
+
+# ----------------------------------------------------------------------
+# Command-line options and option table
+
+my @select; # list of citation keys
+my $markup = 'latex';
+my $open_bib = 0;
+
+# Default markups -- should be customizable
+my %markup =
+ (pre_entry => { latex => '\bibitem{%KEY%}' . "\n",
+ latex2e => '\bibitem{%KEY%}' . "\n",
+ html => '"[%LABEL%]"' },
+ inter_block => { latex => "\n\\newblock ",
+ latex2e => "\n\\newblock ",
+ html => $open_bib ? "<br>\n" : " " },
+ atitle => { latex => ['{\em ', '}'],
+ latex2e => ['\emph{', '}'],
+ html => ['<emph>', '</emph>'] },
+ btitle => { latex => ['{\em ', '}'],
+ latex2e => ['\emph{', '}'],
+ html => ['<emph>', '</emph>'] },
+ journal => { latex => ['{\em ', '}'],
+ latex2e => ['\emph{', '}'],
+ html => ['<emph>', '</emph>'] },
+ );
+
+my @opt_table =
+ (['-select', 'call', undef, sub { &get_list_arg (@_, \@select) },
+ 'list of entries to format (selected by citation key)',
+ 'key1 ...'],
+ ['-latex', 'const', 'latex', \$markup,
+ 'add LaTeX 2.09 markup to the bibliography entries'],
+ ['-latex2e', 'const', 'latex2e', \$markup,
+ 'add LaTeX 2e markup to the bibliography entries'],
+ ['-html', 'const', 'html', \$markup,
+ 'add HTML markup to the bibliography entries'],
+ ['-openbib|-closedbib', 'boolean', 0, \$open_bib,
+ 'use "open" bibliography format'],
+ );
+
+
+
+# ----------------------------------------------------------------------
+# Main program
+
+# First, parse the command line and make sure there's exactly one
+# argument (the .bib file to format) left.
+
+my $usage = "usage: btformat [options] bibfile\n";
+Getopt::Tabular::SetHelp ($usage, undef);
+GetOptions (\@opt_table, \@ARGV) || exit 1;
+
+die "$usage\nIncorrect number of arguments\n" unless (@ARGV == 1);
+
+
+# OK, we're happy with the command-line -- let's start working for real
+my ($filename, $bibfile, $entry, %select);
+
+$filename = shift;
+$bibfile = new Text::BibTeX::File $filename or die "$filename: $!\n";
+$bibfile->set_structure ('Bib', namestyle => 'nopunct', nameorder => 'first');
+
+%select = map { ($_ => 1) } @select
+ if @select;
+
+my $entry_num = 0;
+while ($entry = new Text::BibTeX::Entry $bibfile)
+{
+ next unless $entry->parse_ok && $entry->metatype == BTE_REGULAR;
+ next if (@select && ! $select{$entry->key});
+ $entry_num++;
+
+# printf "formatting entry >%s<\n", $entry->key;
+ my (@blocks, $block, $sentence);
+ @blocks = $entry->format;
+ @blocks = grep ($_, @blocks); # strip empty blocks
+
+ BLOCK:
+ for $block (@blocks)
+ {
+ SENTENCE:
+ for $sentence (@$block)
+ {
+ # If sentence has multiple clauses, process them: first, strip
+ # out empties, and jump to the next sentence if it turns out
+ # this one is empty (ie. just a bunch of empty clauses). Then
+ # join the left-over clauses with commas.
+ if (ref $sentence eq 'ARRAY')
+ {
+ @$sentence = grep ($_, @$sentence);
+ ($sentence = '', next SENTENCE) unless @$sentence;
+ $sentence = join (', ', @$sentence);
+ }
+
+ # finish sentence with a period if it's not already punctuated
+ $sentence .= '.' unless $sentence eq '' || $sentence =~ /[.!?]$/;
+ }
+
+ # Now join together all the sentences in the block, first stripping
+ # any empties.
+ @$block = grep ($_, @$block);
+ next BLOCK unless @$block;
+ $block = join (' ', @$block); # put the sentences together
+ }
+
+ if (@blocks)
+ {
+ my ($key, $label, $header, $f_entry, $footer);
+
+ $key = $entry->key;
+ $label = $entry_num; # for now!
+ $header = $markup{pre_entry}{$markup};
+ $header =~ s/%KEY%/$key/g;
+ $header =~ s/%LABEL%/$label/g;
+
+ $f_entry = join ($markup{inter_block}{$markup}, @blocks);
+
+ print $header;
+ print $f_entry;
+ print "\n\n";
+ }
+}
diff --git a/btool_faq.pod b/btool_faq.pod
new file mode 100644
index 0000000..bf22ebd
--- /dev/null
+++ b/btool_faq.pod
@@ -0,0 +1,121 @@
+=head1 NAME
+
+btool_faq - Frequently-Asked Questions about btparse and Text::BibTeX
+
+=head1 DESCRIPTION
+
+This document attempts to address questions that I have been asked
+several times, and are easy to answer -- but not by perusing the
+documentation. For various reasons, the answers tend to be thinly
+distributed across several man pages, making it difficult to figure out
+what's going on. Hence, this man page will attempt to tie together
+various strands of thought, providing quick, focused, "How do I do X?"
+answers as opposed to lengthy descriptions of the capabilities and
+conventions of the btOOL libraries.
+
+=head1 PERL LIBRARY
+
+This section covers questions that users of C<Text::BibTeX>, the Perl
+component of B<btOOL>, have asked.
+
+=head2 Why aren't the BibTeX "month" macros defined?
+
+Because they're bibliography-specific, and C<Text::BibTeX> by default
+doesn't impose any assumptions about a particular type of database or
+data-processing domain on your entries. The problem arises when you
+parse entries from a file, say F<foo.bib> that quite sensibly use the
+month macros (C<jan>, C<feb>, etc.) provided by the BibTeX standard
+style files:
+
+ $bibfile = new Text::BibTeX::File 'foo.bib' # open file
+ or die "foo.bib: $!\n";
+ $entry = new Text::BibTeX::Entry $bibfile; # parse first entry
+
+Using this code, you might get an "undefined macro" warning for every
+entry parsed from F<foo.bib>. Apart from the superficial annoyance of
+all those warning messages, the undefined macros are expanded as empty
+strings, meaning you lose any information about them---not good.
+
+You could always kludge it and forcibly define the month macros
+yourself. Prior to release 0.30, this had to be done by parsing a set
+of fake entries, but now C<Text::BibTeX> provides a direct interface to
+the underlying macro table. You I<could> just do this before parsing any
+entries:
+
+ use Text::BibTeX qw(:macrosubs);
+ # ...
+ my %month = (jan => 'January', feb => 'February', ... );
+ add_macro_text ($macro, $value)
+ while (($macro, $value) = each %month);
+
+But there's a better way that's more in keeping with how things are done
+under BibTeX (where default macros are defined in the style file): use
+C<Text::BibTeX>'s object-oriented analogue to style files, called
+structure modules. C<Text::BibTeX> provides a structure module,
+C<Text::BibTeX::Bib>, that (partially) emulates the standard style files
+of BibTeX 0.99, including the definition of month macros. Structure
+modules are specified on a per-file basis by using the C<set_structure>
+method on a C<Text::BibTeX::File> object. It's quite simple to tell
+C<Text::BibTeX> that entries from C<$bibfile> are expected to conform to
+the C<Bib> structure (which is implemented by the C<Text::BibTeX::Bib>
+module, but you don't really need to know that):
+
+ $bibfile = new Text::BibTeX::File 'foo.bib'
+ or die "foo.bib: $!\n";
+ $bibfile->set_structure ('Bib');
+
+You probably shouldn't hardcode the name of a particular structure in
+your programs, though, as there will eventually be a multitude of
+structure modules to choose from (just as there are a multitude of
+BibTeX style files to choose from). My preferred approach is to make
+the structure a command-line option which defaults to C<Bib> (since
+that's the only structure actually implemented as of this writing).
+
+=head2 How do I append to a BibTeX file?
+
+Just open it in append mode, and write entries to it as usual.
+Remember, a C<Text::BibTeX::File> object is mainly a wrapper around an
+C<IO::File> object, and the C<Text::BibTeX::File::open> method (and thus
+C<new> as well) is just a front-end to C<IO::File::open>.
+C<IO::File::open>, in turn, is a front-end either to Perl's builtin
+C<open> (if called with one argument) or C<sysopen> (two or three
+arguments). To save you the trouble of going off and reading all those
+man pages, here's the trick: if you pass just a filename to
+C<Text::BibTeX::File>'s C<new> method, then it's treated just like a
+filename passed to Perl's builtin C<open>:
+
+ my $append_file = new Text::BibTeX::File ">>$filename"
+ or die "couldn't open $filename for appending: $!\n";
+
+opens C<$filename> for appending. If, later on, you have an entry from
+another file (say C<$entry>), then you can append it to C<$append_file>
+by just writing it as usual:
+
+ $entry->write ($append_file);
+
+See C<append_entries> in the F<examples/> subdirectory of the
+C<Text::BibTeX> distribution for a complete example.
+
+=head1 C LIBRARY
+
+This section covers frequently-asked questions about B<btparse>, the C
+component of B<btOOL>.
+
+=head2 Is there a Python binding for B<btparse> yet?
+
+Not that I know of. I haven't written one. If you do so, please let me
+know about it.
+
+=head1 SEE ALSO
+
+L<btparse>, L<Text::BibTeX>
+
+=head1 AUTHOR
+
+Greg Ward <gward@python.net>
+
+=head1 COPYRIGHT
+
+Copyright (c) 1997-2000 by Gregory P. Ward. All rights reserved. This file
+is part of the Text::BibTeX library. This library is free software; you
+may redistribute it and/or modify it under the same terms as Perl itself.
diff --git a/btsort b/btsort
new file mode 100755
index 0000000..803007b
--- /dev/null
+++ b/btsort
@@ -0,0 +1,33 @@
+#!/usr/local/bin/perl -w
+
+#
+# btsort
+#
+# Reads an entire BibTeX file, sorts the entries, and spits them back out
+# again.
+#
+# $Id: btsort 3031 2006-09-21 20:02:34Z ambs $
+#
+
+use strict;
+use Text::BibTeX (':metatypes');
+
+my ($filename, $structure, @options, $bibfile, $entry, %sortkey, @entries);
+die "usage: btcheck file [structure [options]]\n" unless @ARGV >= 1;
+($filename, $structure, @options) = @ARGV;
+$structure ||= 'Bib';
+
+$bibfile = new Text::BibTeX::File $filename or die "$filename: $!\n";
+$bibfile->set_structure ('Bib', @options);
+
+while ($entry = new Text::BibTeX::Entry $bibfile)
+{
+ next unless $entry->parse_ok && $entry->metatype == BTE_REGULAR;
+ $entry->check;
+ $sortkey{$entry} = $entry->sort_key;
+ push (@entries, $entry);
+}
+$bibfile->close;
+
+@entries = sort { $sortkey{$a} cmp $sortkey{$b} } @entries;
+$entry->print while $entry = shift @entries;
diff --git a/btxs_support.c b/btxs_support.c
new file mode 100644
index 0000000..53b0e3d
--- /dev/null
+++ b/btxs_support.c
@@ -0,0 +1,485 @@
+/* ------------------------------------------------------------------------
+@NAME : btxs_support.c
+@DESCRIPTION: Support functions needed by the XSUBs in BibTeX.xs.
+@GLOBALS :
+@CREATED : 1997/11/16, Greg Ward (from code in BibTeX.xs)
+@MODIFIED :
+@VERSION : $Id: btxs_support.c 3032 2006-09-21 20:05:55Z ambs $
+@COPYRIGHT : Copyright (c) 1997-2000 by Gregory P. Ward. All rights reserved.
+-------------------------------------------------------------------------- */
+#include "EXTERN.h"
+#include "perl.h"
+#include "XSUB.h"
+
+#define BT_DEBUG 0
+
+#include "btparse.h"
+#include "btxs_support.h"
+
+
+static char *nodetype_names[] =
+{
+ "entry", "macrodef", "text", "key", "field", "string", "number", "macro"
+};
+
+
+/* ----------------------------------------------------------------------
+ * Miscellaneous stuff
+ */
+
+int
+constant (char * name, IV * arg)
+{
+ int ok = FALSE;
+
+ DBG_ACTION (1, printf ("constant: name=%s\n", name));
+
+ if (! (name[0] == 'B' && name[1] == 'T')) /* should not happen! */
+ croak ("Illegal constant name \"%s\"", name);
+
+ switch (name[2])
+ {
+ case 'E': /* entry metatypes */
+ if (strEQ (name, "BTE_UNKNOWN")) { *arg = BTE_UNKNOWN; ok = TRUE; }
+ if (strEQ (name, "BTE_REGULAR")) { *arg = BTE_REGULAR; ok = TRUE; }
+ if (strEQ (name, "BTE_COMMENT")) { *arg = BTE_COMMENT; ok = TRUE; }
+ if (strEQ (name, "BTE_PREAMBLE")) { *arg = BTE_PREAMBLE; ok = TRUE; }
+ if (strEQ (name, "BTE_MACRODEF")) { *arg = BTE_MACRODEF; ok = TRUE; }
+ break;
+ case 'A': /* AST nodetypes (not all of them) */
+ if (strEQ (name, "BTAST_STRING")) { *arg = BTAST_STRING; ok = TRUE; }
+ if (strEQ (name, "BTAST_NUMBER")) { *arg = BTAST_NUMBER; ok = TRUE; }
+ if (strEQ (name, "BTAST_MACRO")) { *arg = BTAST_MACRO; ok = TRUE; }
+ break;
+ case 'N': /* name parts */
+ if (strEQ (name, "BTN_FIRST")) { *arg = BTN_FIRST; ok = TRUE; }
+ if (strEQ (name, "BTN_VON")) { *arg = BTN_VON; ok = TRUE; }
+ if (strEQ (name, "BTN_LAST")) { *arg = BTN_LAST; ok = TRUE; }
+ if (strEQ (name, "BTN_JR")) { *arg = BTN_JR; ok = TRUE; }
+ if (strEQ (name, "BTN_NONE")) { *arg = BTN_NONE; ok = TRUE; }
+ break;
+ case 'J': /* token join methods */
+ if (strEQ (name, "BTJ_MAYTIE")) { *arg = BTJ_MAYTIE; ok = TRUE; }
+ if (strEQ (name, "BTJ_SPACE")) { *arg = BTJ_SPACE; ok = TRUE; }
+ if (strEQ (name, "BTJ_FORCETIE")) { *arg = BTJ_FORCETIE; ok = TRUE; }
+ if (strEQ (name, "BTJ_NOTHING")) { *arg = BTJ_NOTHING; ok = TRUE; }
+ break;
+ default:
+ break;
+ }
+
+ return ok;
+}
+
+
+/* ----------------------------------------------------------------------
+ * Stuff for converting a btparse entry AST to a Perl structure:
+ * convert_value() [private]
+ * convert_assigned_entry() [private]
+ * convert_value_entry() [private]
+ * ast_to_hash()
+ */
+
+static SV *
+convert_value (char * field_name, AST * field, boolean preserve)
+{
+ AST * value;
+ bt_nodetype
+ nodetype;
+ char * text;
+ SV * sv_field_value;
+
+ value = bt_next_value (field, NULL, &nodetype, &text);
+ if (preserve)
+ {
+ HV * val_stash; /* stash for Text::BibTeX::Value pkg */
+ HV * sval_stash; /* and for Text::BibTeX::SimpleValue */
+ AV * compound_value; /* list of simple values */
+ SV * sval_contents[2]; /* type and text */
+ AV * simple_value; /* list of (type, text) */
+ SV * simple_value_ref; /* ref to simple_value */
+
+ /*
+ * Get the stashes for the two classes into which we'll be
+ * blessing things.
+ */
+ val_stash = gv_stashpv ("Text::BibTeX::Value", TRUE);
+ sval_stash = gv_stashpv ("Text::BibTeX::SimpleValue", TRUE);
+ if (val_stash == NULL || sval_stash == NULL)
+ {
+ croak ("unable to get stash for one or both of "
+ "Text::BibTeX::Value or Text::BibTeX::SimpleValue");
+ }
+
+ /* Start the compound value as an empty list */
+ compound_value = newAV ();
+
+ /* Walk the list of simple values */
+ while (value)
+ {
+ /*
+ * Convert the nodetype and text to SVs and save them in what will
+ * soon become a Text::BibTeX::SimpleValue object.
+ */
+ sval_contents[0] = newSViv ((IV) nodetype);
+ sval_contents[1] = newSVpv (text, 0);
+ simple_value = av_make (2, sval_contents);
+
+ /*
+ * We're done with these two SVs (they're saved in the
+ * simple_value AV), so decrement them out of existence
+ */
+ SvREFCNT_dec (sval_contents[0]);
+ SvREFCNT_dec (sval_contents[1]);
+
+ /* Create the SimpleValue object by blessing a reference */
+ simple_value_ref = newRV_noinc ((SV *) simple_value);
+ sv_bless (simple_value_ref, sval_stash);
+
+ /* Push this SimpleValue object onto the main list */
+ av_push (compound_value, simple_value_ref);
+
+ /* And find the next simple value in this field */
+ value = bt_next_value (field, value, &nodetype, &text);
+ }
+
+ /* Make a Text::BibTeX::Value object from our list of SimpleValues */
+ sv_field_value = newRV_noinc ((SV *) compound_value);
+ sv_bless (sv_field_value, val_stash);
+ }
+ else
+ {
+ if (value &&
+ (nodetype != BTAST_STRING ||
+ bt_next_value (field, value, NULL, NULL) != NULL))
+ {
+ croak ("BibTeX.xs: internal error in entry post-processing--"
+ "value for field %s is not a simple string",
+ field_name);
+ }
+
+ DBG_ACTION (2, printf (" field=%s, value=\"%s\"\n",
+ field_name, text));
+ sv_field_value = text ? newSVpv (text, 0) : &PL_sv_undef;
+ }
+
+ return sv_field_value;
+} /* convert_value () */
+
+
+static void
+convert_assigned_entry (AST *top, HV *entry, boolean preserve)
+{
+ AV * flist; /* the field list -- put into entry */
+ HV * values; /* the field values -- put into entry */
+ HV * lines; /* line numbers of entry and its fields */
+ AST * field;
+ char * field_name;
+ AST * item;
+ char * item_text;
+ int prev_line;
+
+ /*
+ * Start the line number hash. It will contain (num_fields)+2 elements;
+ * one for each field (keyed on the field name), and the `start' and
+ * `stop' lines for the entry as a whole. (Currently, the `stop' line
+ * number is the same as the line number of the last field. This isn't
+ * strictly correct, but by the time we get our hands on the AST, that
+ * closing brace or parenthesis is long lost -- so this is the best we
+ * get. I just want to put this redundant line number in in case some
+ * day I get ambitious and keep track of its true value.)
+ */
+
+ lines = newHV ();
+ hv_store (lines, "START", 5, newSViv (top->line), 0);
+
+ /*
+ * Now loop over all fields in the entry. As we loop, we build
+ * three structures: the list of field names, the hash relating
+ * field names to (fully expanded) values, and the list of line
+ * numbers.
+ */
+
+ DBG_ACTION (2, printf (" creating field list, value hash\n"));
+ flist = newAV ();
+ values = newHV ();
+
+ DBG_ACTION (2, printf (" getting fields and values\n"));
+ field = bt_next_field (top, NULL, &field_name);
+ while (field)
+ {
+ SV * sv_field_name;
+ SV * sv_field_value;
+
+ if (!field_name) /* this shouldn't happen -- but if */
+ continue; /* it does, skipping the field seems */
+ /* reasonable to me */
+
+ /* Convert the field name to an SV (for storing in the entry hash) */
+ sv_field_name = newSVpv (field_name, 0);
+
+ /*
+ * Convert the field value to an SV; this might be just a string, or
+ * it might be a reference to a Text::BibTeX::Value object (if
+ * 'preserve' is true).
+ */
+ sv_field_value = convert_value (field_name, field, preserve);
+
+ /*
+ * Push the field name onto the field list, add the field value to
+ * the values hash, and add the line number onto the line number
+ * hash.
+ */
+ av_push (flist, sv_field_name);
+ hv_store (values, field_name, strlen (field_name), sv_field_value, 0);
+ hv_store (lines, field_name, strlen (field_name),
+ newSViv (field->line), 0);
+ prev_line = field->line; /* so we can duplicate last line no. */
+
+ field = bt_next_field (top, field, &field_name);
+ DBG_ACTION (2, printf (" stored field/value; next will be %s\n",
+ field_name));
+ }
+
+
+ /*
+ * Duplicate the last element of `lines' (kludge until we keep track of
+ * the true end-of-entry line number).
+ */
+ hv_store (lines, "STOP", 4, newSViv (prev_line), 0);
+
+
+ /* Put refs to field list, value hash, and line list into the main hash */
+
+ DBG_ACTION (2, printf (" got all fields; storing list/hash refs\n"));
+ hv_store (entry, "fields", 6, newRV ((SV *) flist), 0);
+ hv_store (entry, "values", 6, newRV ((SV *) values), 0);
+ hv_store (entry, "lines", 5, newRV ((SV *) lines), 0);
+
+} /* convert_assigned_entry () */
+
+
+static void
+convert_value_entry (AST *top, HV *entry, boolean preserve)
+{
+ HV * lines; /* line numbers of entry and its fields */
+ AST * item,
+ * prev_item;
+ int last_line;
+ char * value;
+ SV * sv_value;
+
+ /*
+ * Start the line number hash. For "value" entries, it's a bit simpler --
+ * just a `start' and `stop' line number. Again, the `stop' line is
+ * inaccurate; it's just the line number of the last value in the
+ * entry.
+ */
+ lines = newHV ();
+ hv_store (lines, "START", 5, newSViv (top->line), 0);
+
+ /* Walk the list of values to find the last one (for its line number) */
+ item = NULL;
+ while (item = bt_next_value (top, item, NULL, NULL))
+ prev_item = item;
+ last_line = prev_item->line;
+ hv_store (lines, "STOP", 4, newSViv (last_line), 0);
+
+ /* Store the line number hash in the entry hash */
+ hv_store (entry, "lines", 5, newRV ((SV *) lines), 0);
+
+ /* And get the value of the entry as a single string (fully processed) */
+
+ if (preserve)
+ {
+ sv_value = convert_value (NULL, top, TRUE);
+ }
+ else
+ {
+ value = bt_get_text (top);
+ sv_value = value ? newSVpv (value, 0) : &PL_sv_undef;
+ }
+ hv_store (entry, "value", 5, sv_value, 0);
+
+} /* convert_value_entry () */
+
+
+void
+ast_to_hash (SV * entry_ref,
+ AST * top,
+ boolean parse_status,
+ boolean preserve)
+{
+ char * type;
+ char * key;
+ bt_metatype
+ metatype;
+ ushort options; /* post-processing options */
+ HV * entry; /* the main hash -- build and return */
+
+ DBG_ACTION (1, printf ("ast_to_hash: entry\n"));
+
+ /* printf ("checking that entry_ref is a ref and a hash ref\n"); */
+ if (! (SvROK (entry_ref) && (SvTYPE (SvRV (entry_ref)) == SVt_PVHV)))
+ croak ("entry_ref must be a hash ref");
+ entry = (HV *) SvRV (entry_ref);
+
+ /*
+ * Clear out all hash values that might not be replaced in this
+ * conversion (in case the user parses into an existing
+ * Text::BibTeX::Entry object). (We don't blow the hash away with
+ * hv_clear() in case higher-up code has put interesting stuff into it.)
+ */
+
+ hv_delete (entry, "key", 3, G_DISCARD);
+ hv_delete (entry, "fields", 6, G_DISCARD);
+ hv_delete (entry, "lines", 5, G_DISCARD);
+ hv_delete (entry, "values", 6, G_DISCARD);
+ hv_delete (entry, "value", 5, G_DISCARD);
+
+ /*
+ * Perform entry post-processing. How exactly we post-process depends on
+ * 1) the entry type, and 2) the 'preserve' flag.
+ */
+
+ metatype = bt_entry_metatype (top);
+ if (preserve) /* if true, then entry type */
+ { /* doesn't matter */
+ options = BTO_MINIMAL;
+ }
+ else
+ {
+ if (metatype == BTE_MACRODEF)
+ options = BTO_MACRO;
+ else
+ options = BTO_FULL;
+ }
+
+ /*
+ * Postprocess the entry, with the string-processing options we just
+ * determined plus "no store macros" turned on. (That's because
+ * macros will already have been stored by the postprocessing done
+ * by bt_parse*; we don't want to do it again and generate spurious
+ * warnings!
+ */
+ bt_postprocess_entry (top, options | BTO_NOSTORE);
+
+
+ /*
+ * Start filling in the hash; all entries have a type and metatype,
+ * and we'll do the key here (even though it's not in all entries)
+ * for good measure.
+ */
+
+ type = bt_entry_type (top);
+ key = bt_entry_key (top);
+ DBG_ACTION (2, printf (" inserting type (%s), metatype (%d)\n",
+ type ? type : "*none*", bt_entry_metatype (top)));
+ DBG_ACTION (2, printf (" ... key (%s) status (%d)\n",
+ key ? key : "*none*", parse_status));
+
+ if (!type)
+ croak ("entry has no type");
+ hv_store (entry, "type", 4, newSVpv (type, 0), 0);
+ hv_store (entry, "metatype", 8, newSViv (bt_entry_metatype (top)), 0);
+
+ if (key)
+ hv_store (entry, "key", 3, newSVpv (key, 0), 0);
+
+ hv_store (entry, "status", 6, newSViv ((IV) parse_status), 0);
+
+
+ switch (metatype)
+ {
+ case BTE_MACRODEF:
+ case BTE_REGULAR:
+ convert_assigned_entry (top, entry, preserve);
+ break;
+
+ case BTE_COMMENT:
+ case BTE_PREAMBLE:
+ convert_value_entry (top, entry, preserve);
+ break;
+
+ default: /* this should never happen! */
+ croak ("unknown entry metatype (%d)\n", bt_entry_metatype (top));
+ }
+
+ /*
+ * If 'preserve' was true, then the user is going to need the
+ * Text::BibTeX::Value module!
+ *
+ * XXX this doesn't work! Why?!?!
+ */
+/*
+ if (preserve)
+ {
+ printf ("requiring Text::BibTeX::Value...\n");
+ perl_require_pv ("Text::BibTeX::Value");
+ }
+*/
+
+ /* And finally, free up the AST */
+
+ bt_free_ast (top);
+
+/* hv_store (entry, "ast", 3, newSViv ((IV) top), 0); */
+
+ DBG_ACTION (1, printf ("ast_to_hash: exit\n"));
+} /* ast_to_hash () */
+
+
+/* ----------------------------------------------------------------------
+ * Stuff for converting a list of C strings to Perl
+ * convert_stringlist() [private]
+ * store_stringlist()
+ */
+
+static SV *
+convert_stringlist (char **list, int num_strings)
+{
+ int i;
+ AV * perl_list;
+ SV * sv_string;
+
+ perl_list = newAV ();
+ for (i = 0; i < num_strings; i++)
+ {
+ sv_string = newSVpv (list[i], 0);
+ av_push (perl_list, sv_string);
+ }
+
+ return newRV ((SV *) perl_list);
+
+} /* convert_stringlist() */
+
+
+void
+store_stringlist (HV *hash, char *key, char **list, int num_strings)
+{
+ SV * listref;
+
+ if (list)
+ {
+ DBG_ACTION (2,
+ {
+ int i;
+
+ printf ("store_stringlist(): hash=%p, key=%s, list=(",
+ hash, key);
+ for (i = 0; i < num_strings; i++)
+ printf ("%s%c", list[i], (i == num_strings-1) ? ')' : ',');
+ printf ("\n");
+ })
+
+ listref = convert_stringlist (list, num_strings);
+ hv_store (hash, key, strlen (key), listref, 0);
+ }
+ else
+ {
+ DBG_ACTION (2, printf ("store_stringlist(): hash=%p, key=%s: deleting\n",
+ hash, key))
+ hv_delete (hash, key, strlen (key), G_DISCARD);
+ }
+
+} /* store_stringlist() */
diff --git a/btxs_support.h b/btxs_support.h
new file mode 100644
index 0000000..0092e9b
--- /dev/null
+++ b/btxs_support.h
@@ -0,0 +1,45 @@
+/* ------------------------------------------------------------------------
+@NAME : btxs_support.h
+@DESCRIPTION: Macros, prototypes, and whatnot needed by both btxs_support.c
+ and BibTeX.xs.
+@GLOBALS :
+@CREATED : 1997/11/16, Greg Ward
+@MODIFIED :
+@VERSION : $Id: btxs_support.h 3032 2006-09-21 20:05:55Z ambs $
+@COPYRIGHT : Copyright (c) 1997-2000 by Gregory P. Ward. All rights reserved.
+-------------------------------------------------------------------------- */
+
+#ifndef BTXS_SUPPORT_H
+#define BTXS_SUPPORT_H
+
+#ifndef BT_DEBUG
+# define BT_DEBUG 0
+#endif
+
+#if BT_DEBUG
+# define DBG_ACTION(level,action) if (BT_DEBUG >= level) { action; }
+#else
+# define DBG_ACTION(level,action)
+#endif
+
+/* Portability hacks go here... */
+
+/*
+ * First, on SGIs, <string.h> doesn't prototype strdup() if _POSIX_SOURCE
+ * is defined -- and it usually is for Perl, because that's the default.
+ * So we workaround this by putting a prototype here. Yuck.
+ */
+#if defined(__sgi) && defined(_POSIX_SOURCE)
+extern char *strdup(const char *);
+#endif
+
+
+/* Prototypes */
+void store_stringlist (HV *hash, char *key, char **list, int num_strings);
+void ast_to_hash (SV * entry_ref,
+ AST * top,
+ boolean parse_status,
+ boolean preserve);
+int constant (char * name, IV * arg);
+
+#endif /* BTXS_SUPPORT_H */
diff --git a/examples/append_entries b/examples/append_entries
new file mode 100755
index 0000000..50b771c
--- /dev/null
+++ b/examples/append_entries
@@ -0,0 +1,78 @@
+#!/usr/local/bin/perl5 -w
+
+#
+# append_entries
+#
+# Appends entries from a source file to a destination file. Only regular
+# entries are copied; macro definitions, preambles, and comments are
+# dropped. User may supply a regex which the entry keys must match to be
+# appended.
+#
+# Note that a "real" entry appender/database merger would be a lot more
+# complicated than this! Some things that would have to be handled:
+# * enforcing the structure of input entries (eg. making sure they
+# conform to the rules of some database structure such as 'Bib')
+# * doing any other checks particular to your database, such as ensuring
+# that journal or conference names come from an approved list of
+# "known" journals and conferences (to enforce consistent naming
+# across a large database)
+# * detecting and resolving key collisions
+# * adding any preambles in the source file to the destination file
+# * checking for duplicate macro definitions
+# * ensuring that macros used in the source file are defined in
+# the destination file
+#
+# by Greg Ward, 1998/04/04
+#
+# $Id: append_entries 3032 2006-09-21 20:05:55Z ambs $
+#
+
+# Copyright (c) 1997-2000 by Gregory P. Ward. All rights reserved. This file
+# is part of the Text::BibTeX library. This library is free software; you
+# may redistribute it and/or modify it under the same terms as Perl itself.
+
+use strict;
+use Text::BibTeX;
+
+my $usage = <<USAGE;
+usage: $0 dest_file source_file [key_pattern]
+ appends regular entries from <source_file> whose keys match
+ <key_pattern> to <dest_file>; if <key_pattern> not supplied, all
+ entries from <source_file> are taken
+USAGE
+
+die $usage unless @ARGV == 2 || @ARGV == 3;
+my ($dest_filename, $source_filename, $key_pattern) = @ARGV;
+
+# Open the two files: dest_file in append mode (ultimately just using
+# perl's builtin 'open'), and source_file in regular read-only mode.
+my $dest_file = new Text::BibTeX::File ">>$dest_filename"
+ or die "couldn't open $dest_filename for appending: $!\n";
+my $source_file = new Text::BibTeX::File $source_filename
+ or die "couldn't open $source_filename: $!\n";
+
+# Turn on 'value preservation' mode for the input file. This is mainly so
+# we don't lose the fact that macros are macros and numbers are numbers,
+# but it also frees us from having to worry about predefined macros
+# (such as the month names).
+$source_file->preserve_values (1);
+
+# And loop over all entries in the source file, optionally appending
+# each one to the destination file.
+
+while (my $entry = new Text::BibTeX::Entry $source_file)
+{
+ # Skip this entry if it's not a regular entry -- that is, we just
+ # drop '@string', '@comment', and '@preamble' entries, probably
+ # unacceptable in the real world.
+ next unless $entry->metatype == BTE_REGULAR;
+
+ # Skip this entry if the user supplied a regex that keys must match
+ # and this entry's key doesn't match.
+ next if defined $key_pattern && $entry->key !~ /$key_pattern/o;
+
+ # Otherwise, write this entry to the destination file. Since $dest_file
+ # was opened in append mode, $entry will be appended to the end of
+ # $dest_file.
+ $entry->write ($dest_file);
+} # while $source_file
diff --git a/t/bib.t b/t/bib.t
new file mode 100644
index 0000000..1220125
--- /dev/null
+++ b/t/bib.t
@@ -0,0 +1,148 @@
+use strict;
+use vars qw($DEBUG);
+use IO::Handle;
+use POSIX qw(tmpnam);
+
+BEGIN { require "t/common.pl"; }
+
+my $loaded;
+BEGIN { $| = 1; print "1..21\n"; }
+END {print "not ok 1\n" unless $loaded;}
+use Text::BibTeX;
+use Text::BibTeX::Bib;
+$loaded = 1;
+print "ok 1\n";
+
+$DEBUG = 1;
+
+setup_stderr;
+
+# Basic test of the BibEntry classes (really, its base classes
+# BibFormat and BibSort)
+
+my $entries = <<'ENTRIES';
+@article{homer97,
+ author = {Simpson, Homer J. and Andr{\'e} de la Poobah},
+ title = {Territorial Imperatives in Modern Suburbia},
+ journal = {Journal of Suburban Studies},
+ volume = 4,
+ pages = "125--130",
+ year = 1997
+}
+
+@book{george98,
+ author = "George Simpson",
+ title = "How to Found a Big Department Store",
+ year = 1998
+}
+ENTRIES
+
+# (Currently) we have to go through a Text::BibTeX::File object to get
+# Entry objects blessed into a structured entry class, so start
+# by creating the file to parse.
+my $fn = tmpnam . ".bib";
+open (F, ">$fn") || die "couldn't create $fn: $!\n";
+print F $entries;
+close (F);
+
+# Open it as a Text::BibTeX::File object, set the structure class (which
+# controls the structured entry class of all entries parsed from that
+# file), and get the structure class (so we can set options on it).
+my $file = new Text::BibTeX::File ($fn);
+$file->set_structure ('Bib');
+my $structure = $file->structure;
+
+# Read the two entries
+my $entry1 = new Text::BibTeX::BibEntry $file;
+my $entry2 = new Text::BibTeX::BibEntry $file;
+
+$file->close;
+unlink ($fn) || warn "couldn't delete temporary file $fn: $!\n";
+
+# The default options of BibStructure are:
+# namestyle => 'full'
+# nameorder => 'first'
+# atitle => 1 (true)
+# sortby => 'name'
+# Let's make sure these are respected.
+
+my @blocks = $entry1->format;
+test (@blocks == 4 && # 4 blocks:
+ defined $blocks[0] && # author
+ defined $blocks[1] && # title
+ defined $blocks[2] && # journal
+ !defined $blocks[3]); # note (there is no note!)
+test (ref $blocks[0] eq 'ARRAY' && # 1 sentence, 1 clauses (2 authors)
+ @{$blocks[0]} == 1);
+test ($blocks[0][0] eq "Homer~J. Simpson and Andr{\\'e} de~la Poobah");
+test (ref $blocks[1] eq 'ARRAY' && # 1 sentence, 1 clause for title
+ @{$blocks[1]} == 1 &&
+ $blocks[1][0] eq "Territorial imperatives in modern suburbia");
+test (ref $blocks[2] eq 'ARRAY' && # 1 sentence for journal
+ @{$blocks[2]} == 1);
+test (ref $blocks[2][0] eq 'ARRAY' && # 3 clauses in that 1 sentence
+ @{$blocks[2][0]} == 3);
+test ($blocks[2][0][0] eq 'Journal of Suburban Studies' &&
+ $blocks[2][0][1] eq '4:125--130' &&
+ $blocks[2][0][2] eq '1997');
+
+# Tweak options, one at a time, testing the result of each tweak
+$structure->set_options (nameorder => 'last');
+@blocks = $entry1->format;
+test ($blocks[0][0] eq "Simpson, Homer~J. and de~la Poobah, Andr{\\'e}");
+
+$structure->set_options (namestyle => 'abbrev',
+ nameorder => 'first');
+@blocks = $entry1->format;
+test ($blocks[0][0] eq "H.~J. Simpson and A. de~la Poobah");
+
+$structure->set_options (nameorder => 'last');
+@blocks = $entry1->format;
+test ($blocks[0][0] eq "Simpson, H.~J. and de~la Poobah, A.");
+
+$structure->set_options (namestyle => 'nopunct');
+@blocks = $entry1->format;
+test ($blocks[0][0] eq "Simpson, H~J and de~la Poobah, A");
+
+$structure->set_options (namestyle => 'nospace');
+@blocks = $entry1->format;
+test ($blocks[0][0] eq "Simpson, HJ and de~la Poobah, A");
+
+$structure->set_options (atitle_lower => 0);
+@blocks = $entry1->format;
+test ($blocks[1][0] eq "Territorial Imperatives in Modern Suburbia");
+
+# Now some formatting tests on the second entry (a book). Note that the
+# two entries share a structure object, so the last-set options apply
+# here!
+
+@blocks = $entry2->format;
+test (@blocks == 4 && # again, 4 blocks:
+ defined $blocks[0] && # name (authors or editors)
+ defined $blocks[1] && # title (and volume no.)
+ defined $blocks[2] && # no/series/publisher/date
+ ! defined $blocks[3]); # note (again none)
+test ($blocks[0][0] eq "Simpson, G");
+test ($blocks[1][0][0] eq "How to Found a Big Department Store" &&
+ ! $blocks[1][0][1]); # no volume number
+test (! $blocks[2][0] && # no number/series
+ ! $blocks[2][1][0] && # no publisher
+ ! $blocks[2][1][1] && # no publisher address
+ ! $blocks[2][1][2] && # no edition
+ $blocks[2][1][3] eq '1998'); # but we do at least have a date!
+
+# fiddle a bit more with name-generation options just to make sure
+# everything's in working order
+$structure->set_options (namestyle => 'full',
+ nameorder => 'first');
+@blocks = $entry2->format;
+test ($blocks[0][0] eq "George Simpson");
+
+# Now test sorting: by default, the book (G. Simpson 1998) should come
+# before the article (H. J. Simpson 1997) because the default sort
+# order is (name, year).
+test ($entry2->sort_key lt $entry1->sort_key);
+
+# But if we change to sort by year, the article comes first
+$structure->set_options (sortby => 'year');
+test ($entry1->sort_key lt $entry2->sort_key);
diff --git a/t/common.pl b/t/common.pl
new file mode 100644
index 0000000..c8249bb
--- /dev/null
+++ b/t/common.pl
@@ -0,0 +1,95 @@
+use Carp;
+
+my $err_file = 't/errors';
+
+END { unlink $err_file }
+
+
+sub setup_stderr
+{
+ open (SAVE_STDERR, ">&STDERR")
+ || die "couldn't save stderr: $!\n";
+ open (STDERR, ">$err_file")
+ || die "couldn't redirect stderr to $err_file: $!\n";
+ STDERR->autoflush (1);
+
+# $SIG{'__WARN__'} = sub { print SAVE_STDERR @_ };
+ $SIG{'__DIE__'} = sub
+ {
+ open (STDERR, '>&=' . fileno (SAVE_STDERR));
+ die @_;
+ };
+}
+
+sub warnings
+{
+ my @err;
+ open (ERR, $err_file) || die "couldn't open $err_file: $!\n";
+ chomp (@err = <ERR>);
+ close (ERR);
+ open (STDERR, ">$err_file")
+ || die "couldn't redirect stderr to $err_file: $!\n";
+ STDERR->autoflush (1);
+ if ($DEBUG)
+ {
+ printf "caught %d messages on stderr:\n", scalar @err;
+ print join ("\n", @err) . "\n";
+ }
+ @err;
+}
+
+sub list_equal
+{
+ my ($eq, $a, $b) = @_;
+
+ croak "list_equal: \$a and \$b not lists"
+ unless ref $a eq 'ARRAY' && ref $b eq 'ARRAY';
+
+ return 0 unless @$a == @$b; # compare lengths
+ my @eq = map { &$eq ($a->[$_], $b->[$_]) } (0 .. $#$a);
+ return 0 unless (grep ($_ == 1, @eq)) == @eq;
+}
+
+sub slist_equal
+{
+ my ($a, $b) = @_;
+ list_equal (sub
+ {
+ my ($a, $b) = @_;
+ (defined $a && defined $b && $a eq $b) ||
+ (! defined $a && ! defined $b);
+ }, $a, $b);
+}
+
+my $i = 1;
+sub test
+{
+ my ($result) = @_;
+
+ ++$i;
+ printf "%s %d\n", ($result ? "ok" : "not ok"), $i;
+}
+
+sub test_entry
+{
+ my ($entry, $type, $key, $fields, $values) = @_;
+ my ($i, @vals);
+
+ croak "test_entry: num fields != num values"
+ unless $#$fields == $#$values;
+ test ($entry->parse_ok);
+ test ($entry->type eq $type);
+ test (defined $key ? $entry->key eq $key : !defined $entry->key);
+ test (slist_equal ([$entry->fieldlist], $fields));
+ for $i (0 .. $#$fields)
+ {
+ my $val = $entry->get ($fields->[$i]) || '';
+ test ($entry->exists ($fields->[$i]) &&
+ $val eq $values->[$i]);
+ }
+
+ @vals = map ($_ || '', $entry->get (@$fields));
+ test (slist_equal (\@vals, $values));
+}
+
+1;
diff --git a/t/macro.t b/t/macro.t
new file mode 100644
index 0000000..3ce9fb1
--- /dev/null
+++ b/t/macro.t
@@ -0,0 +1,121 @@
+use strict;
+use vars ('$DEBUG');
+use IO::Handle;
+BEGIN { require "t/common.pl"; }
+
+my $loaded;
+BEGIN { $| = 1; print "1..36\n"; }
+END {print "not ok 1\n" unless $loaded;}
+use Text::BibTeX qw(:macrosubs);
+$loaded = 1;
+print "ok 1\n";
+
+$DEBUG = 1;
+
+setup_stderr;
+
+# ----------------------------------------------------------------------
+# test macro parsing and expansion
+
+my ($macrodef, $regular, $entry, @warnings);
+
+$macrodef = <<'TEXT';
+@string ( foo = " The Foo
+ Journal",
+ sons = " \& Sons",
+ bar
+= {Bar } # sons,
+
+)
+TEXT
+
+$regular = <<'TEXT';
+@article { my_article,
+ author = { Us and Them },
+ journal = foo,
+ publisher = "Fu" # bar
+ }
+TEXT
+
+# Direct access to macro table, part 1: make sure the macros we're going to
+# defined aren't defined
+
+print "testing that none of our macros are defined yet\n" if $DEBUG;
+test (macro_length ('foo') == 0 &&
+ macro_length ('sons') == 0 &&
+ macro_length ('bar') == 0);
+
+test (! defined macro_text ('foo') &&
+ ! defined macro_text ('sons') &&
+ ! defined macro_text ('bar'));
+@warnings = warnings;
+test (@warnings == 3 &&
+ $warnings[0] =~ /undefined macro "foo"/ &&
+ $warnings[1] =~ /undefined macro "sons"/ &&
+ $warnings[2] =~ /undefined macro "bar"/);
+
+
+# Now parse the macro-definition entry; this should put the three
+# macros we're interested in into the macro table so we can
+# successfully parse the regular entry
+print "parsing macro-definition entry to define 3 macros\n" if $DEBUG;
+$entry = new Text::BibTeX::Entry;
+$entry->parse_s ($macrodef);
+test (! warnings);
+test_entry ($entry, 'string', undef,
+ [qw(foo sons bar)],
+ [' The Foo Journal', ' \& Sons', 'Bar \& Sons']);
+
+# Direct access to macro table, part 2: make sure the macros we've just
+# defined now have the correct values
+print "checking macro table to ensure that the macros were properly defined\n"
+ if $DEBUG;
+test (macro_length ('foo') == 19 &&
+ macro_length ('sons') == 8 &&
+ macro_length ('bar') == 14);
+
+test (macro_text ('foo') eq ' The Foo Journal' &&
+ macro_text ('sons') eq ' \& Sons' &&
+ macro_text ('bar') eq 'Bar \& Sons');
+test (! warnings);
+
+
+# Parse the regular entry -- there should be no warnings, because
+# we've just defined the 'foo' and 'bar' macros on which it depends
+
+# calling a parse or read method on an existing object isn't documented
+# as an "ok thing to do", but it is (at least as the XS code currently
+# is!) -- hence I can leave the "new" uncommented
+# $entry = new Text::BibTeX::Entry;
+print "parsing the regular entry which uses those 2 of those macros\n"
+ if $DEBUG;
+$entry->parse_s ($regular);
+test (! warnings);
+test_entry ($entry, 'article', 'my_article',
+ [qw(author journal publisher)],
+ ['Us and Them', 'The Foo Journal', 'FuBar \& Sons']);
+
+
+# Delete the 'bar' macro and change 'foo' -- this should result in
+# one warning about the macro value being overridden
+delete_macro ('bar');
+test (macro_length ('bar') == 0 &&
+ ! defined macro_text ('bar') &&
+ (@warnings = warnings) == 1 &&
+ $warnings[0] =~ /undefined macro "bar"/);
+
+add_macro_text ('foo', 'The Journal of Fooology');
+test ((@warnings = warnings) == 1 &&
+ $warnings[0] =~ /overriding existing definition of macro "foo"/);
+
+
+# Now re-parse our regular entry; we should get a warning about the deleted
+# "bar" macro, and the "journal" field (which relies on "foo") should have
+# a different value
+
+$entry->parse_s ($regular);
+test ((@warnings = warnings) == 1 &&
+ $warnings[0] =~ /undefined macro "bar"/);
+test_entry ($entry, 'article', 'my_article',
+ [qw(author journal publisher)],
+ ['Us and Them', 'The Journal of Fooology', 'Fu']);
diff --git a/t/modify.t b/t/modify.t
new file mode 100644
index 0000000..ebf1a90
--- /dev/null
+++ b/t/modify.t
@@ -0,0 +1,82 @@
+use strict;
+use IO::Handle;
+BEGIN { require "t/common.pl"; }
+
+my $loaded;
+BEGIN { $| = 1; print "1..22\n"; }
+END {print "not ok 1\n" unless $loaded;}
+use Text::BibTeX;
+$loaded = 1;
+print "ok 1\n";
+
+setup_stderr;
+
+# ----------------------------------------------------------------------
+# entry modification methods
+
+my ($text, $entry, @warnings, @fieldlist);
+
+$text = <<'TEXT';
+@article{homer97,
+ author = {Homer Simpson and Ned Flanders},
+ title = {Territorial Imperatives in Modern Suburbia},
+ journal = {Journal of Suburban Studies},
+ year = 1997
+}
+TEXT
+
+test ($entry = new Text::BibTeX::Entry);
+test ($entry->parse_s ($text));
+
+test ($entry->type eq 'article');
+$entry->set_type ('book');
+test ($entry->type eq 'book');
+
+test ($entry->key eq 'homer97');
+$entry->set_key ($entry->key . 'a');
+test ($entry->key eq 'homer97a');
+
+my @names = $entry->names ('author');
+$names[0] = $names[0]->{'last'}[0] . ', ' . $names[0]->{'first'}[0];
+$names[1] = $names[1]->{'last'}[0] . ', ' . $names[1]->{'first'}[0];
+$entry->set ('author', join (' and ', @names));
+
+my $author = $entry->get ('author');
+test ($author eq 'Simpson, Homer and Flanders, Ned');
+test (! warnings);
+
+$entry->set (author => 'Foo Bar {and} Co.',
+ title => 'This is a new title');
+test ($entry->get ('author') eq 'Foo Bar {and} Co.');
+test ($entry->get ('title') eq 'This is a new title');
+test (slist_equal ([$entry->get ('author', 'title')],
+ ['Foo Bar {and} Co.', 'This is a new title']));
+test (! warnings);
+
+test (slist_equal ([$entry->fieldlist], [qw(author title journal year)]));
+test ($entry->exists ('journal'));
+
+$entry->delete ('journal');
+@fieldlist = $entry->fieldlist;
+test (! $entry->exists ('journal') &&
+ slist_equal (\@fieldlist, [qw(author title year)]));
+test (! warnings);
+
+$entry->set_fieldlist ([qw(author title journal year)]);
+@warnings = warnings;
+test (@warnings == 1 &&
+ $warnings[0] =~ /implicitly adding undefined field \"journal\"/i);
+
+@fieldlist = $entry->fieldlist;
+test ($entry->exists ('journal') &&
+ ! defined $entry->get ('journal') &&
+ slist_equal (\@fieldlist, [qw(author title journal year)]));
+test (! warnings);
+
+$entry->delete ('journal', 'author', 'year');
+@fieldlist = $entry->fieldlist;
+test (! $entry->exists ('journal') &&
+ ! $entry->exists ('author') &&
+ ! $entry->exists ('year') &&
+ @fieldlist == 1 && $fieldlist[0] eq 'title');
+test (! warnings);
diff --git a/t/nameformat.t b/t/nameformat.t
new file mode 100644
index 0000000..3d02821
--- /dev/null
+++ b/t/nameformat.t
@@ -0,0 +1,59 @@
+# -*- cperl -*-
+use strict;
+use vars qw($DEBUG);
+use IO::Handle;
+use Test::More tests=>11;
+
+require "t/common.pl";
+
+use Text::BibTeX qw(:nameparts :joinmethods);
+use Text::BibTeX::Name;
+use Text::BibTeX::NameFormat;
+
+$DEBUG = 1;
+
+#setup_stderr;
+
+# Get a name to work with (and just a quick check that the Name class
+# is in working order)
+my $name = new Text::BibTeX::Name
+ "Charles Louis Xavier Joseph de la Vall{\'e}e Poussin";
+my @first = $name->part ('first');
+my @von = $name->part ('von');
+my @last = $name->part ('last');
+is_deeply(\@first, [qw(Charles Louis Xavier Joseph)]);
+is_deeply(\@von, [qw(de la)]);
+is_deeply(\@last, ['Vall{\'e}e', 'Poussin']);
+
+
+# Start with a basic "von last, jr, first" formatter
+my $format = new Text::BibTeX::NameFormat ('vljf', 1);
+is ($format->apply ($name), "de~la Vall{\'e}e~Poussin, C.~L. X.~J.");
+is ($format->apply ($name), $name->format ($format));
+
+# Tweak options: force ties between tokens of the first name
+$format->set_options (BTN_FIRST, 1, BTJ_FORCETIE, BTJ_NOTHING);
+is ($format->apply ($name), "de~la Vall{\'e}e~Poussin, C.~L.~X.~J.");
+
+# And no ties in the "von" part
+$format->set_options (BTN_VON, 0, BTJ_SPACE, BTJ_SPACE);
+is ($format->apply ($name), "de la Vall{\'e}e~Poussin, C.~L.~X.~J.");
+
+# No punctuation in the first name
+$format->set_text (BTN_FIRST, undef, undef, undef, '');
+is ($format->apply ($name), "de la Vall{\'e}e~Poussin, C~L~X~J");
+
+# And drop the first name inter-token separation entirely
+$format->set_options (BTN_FIRST, 1, BTJ_NOTHING, BTJ_NOTHING);
+is ($format->apply ($name), "de la Vall{\'e}e~Poussin, CLXJ");
+
+# Now we get silly: keep the first name tokens jammed together, but
+# don't abbreviate them any more
+$format->set_options (BTN_FIRST, 0, BTJ_NOTHING, BTJ_NOTHING);
+is ($format->apply ($name),
+ "de la Vall{\'e}e~Poussin, CharlesLouisXavierJoseph");
+
+# OK, but spaces back in to the first name
+$format->set_options (BTN_FIRST, 0, BTJ_SPACE, BTJ_NOTHING);
+is ($format->apply ($name),
+ "de la Vall{\'e}e~Poussin, Charles Louis Xavier Joseph");
diff --git a/t/namelist.t b/t/namelist.t
new file mode 100644
index 0000000..3d0e647
--- /dev/null
+++ b/t/namelist.t
@@ -0,0 +1,50 @@
+use strict;
+use vars qw($DEBUG);
+BEGIN { require "t/common.pl"; }
+
+my $loaded;
+BEGIN { $| = 1; print "1..12\n"; }
+END {print "not ok 1\n" unless $loaded;}
+use Text::BibTeX;
+$loaded = 1;
+print "ok 1\n";
+
+$DEBUG = 0;
+
+setup_stderr;
+
+# ----------------------------------------------------------------------
+# make sure we can split up lists of names
+
+my (@names);
+
+@names =
+ ('J. Smith and N. D. Andrews' => ['J. Smith', 'N. D. Andrews'],
+ 'J. Smith and A. Jones' => ['J. Smith', 'A. Jones'],
+ 'J. Smith and A. Jones and J. Random' => ['J. Smith', 'A. Jones', 'J. Random'],
+ 'A. Smith and J. Jones' => ['A. Smith', 'J. Jones'],
+ 'A. Smith and A. Jones' => ['A. Smith', 'A. Jones'],
+ 'Amy Smith and Andrew Jones' => ['Amy Smith', 'Andrew Jones'],
+ 'Amy Smith and And y Jones' => ['Amy Smith', undef, 'y Jones'],
+ 'K. Herterich and S. Determann and B. Grieger and I. Hansen and P. Helbig and S. Lorenz and A. Manschke' => ['K. Herterich', 'S. Determann', 'B. Grieger', 'I. Hansen', 'P. Helbig', 'S. Lorenz', 'A. Manschke'],
+ 'A. Manschke and M. Matthies and A. Paul and R. Schlotte and U. Wyputta' => ['A. Manschke', 'M. Matthies', 'A. Paul', 'R. Schlotte', 'U. Wyputta'],
+ 'S. Lorenz and A. Manschke and M. Matthies' => ['S. Lorenz', 'A. Manschke', 'M. Matthies'],
+ 'K. Herterich and S. Determann and B. Grieger and I. Hansen and P. Helbig and S. Lorenz and A. Manschke and M. Matthies and A. Paul and R. Schlotte and U. Wyputta' => ['K. Herterich', 'S. Determann', 'B. Grieger', 'I. Hansen', 'P. Helbig', 'S. Lorenz', 'A. Manschke', 'M. Matthies', 'A. Paul', 'R. Schlotte', 'U. Wyputta'],
+ );
+
+while (@names)
+{
+ my ($name, $should_split) = (shift @names, shift @names);
+ my $actual_split = [Text::BibTeX::split_list ($name, 'and')];
+
+ if ($DEBUG)
+ {
+ printf "name = >%s<\n", $name;
+ print "should split to:\n ";
+ print join ("\n ", @$should_split) . "\n";
+ print "actually split to:\n ";
+ print join ("\n ", @$actual_split) . "\n";
+ }
+
+ test (slist_equal ($should_split, $actual_split));
+}
diff --git a/t/names.t b/t/names.t
new file mode 100644
index 0000000..6e7e94a
--- /dev/null
+++ b/t/names.t
@@ -0,0 +1,107 @@
+# -*- cperl -*-
+use strict;
+use vars qw($DEBUG);
+use IO::Handle;
+use Test::More tests => 51;
+BEGIN {
+ use_ok("Text::BibTeX");
+ require "t/common.pl";
+}
+
+$DEBUG = 0;
+
+#setup_stderr;
+
+sub test_name
+{
+ my ($name, $parts) = @_;
+ my $ok = 1;
+ my @partnames = qw(first von last jr);
+ my $i;
+
+ for $i (0 .. $#partnames)
+ {
+ if (defined $parts->[$i])
+ {
+ $ok &= ($name->part ($partnames[$i]))
+ && slist_equal ($parts->[$i], [$name->part ($partnames[$i])]);
+ }
+ else
+ {
+ $ok &= ! $name->part ($partnames[$i]);
+ }
+ }
+
+ ok (keys %$name <= 4 && $ok);
+}
+
+
+# ----------------------------------------------------------------------
+# processing of author names
+
+my (@names, %names, @orig_namelist, $namelist, @namelist);
+my ($text, $entry);
+
+# first just a big ol' list of names, not attached to any entry
+%names =
+ ('van der Graaf' => '|van+der|Graaf|',
+ 'Jones' => '||Jones|',
+ 'van' => '||van|',
+ 'John Smith' => 'John||Smith|',
+ 'John van Smith' => 'John|van|Smith|',
+ 'John van Smith Jr.' => 'John|van|Smith+Jr.|',
+ 'John Smith Jr.' => 'John+Smith||Jr.|',
+ 'John van' => 'John||van|',
+ 'John van der' => 'John|van|der|',
+ 'John van der Graaf' => 'John|van+der|Graaf|',
+ 'John van der Graaf foo' => 'John|van+der|Graaf+foo|',
+ 'foo Foo foo' => '|foo|Foo+foo|',
+ 'Foo foo' => 'Foo||foo|',
+ 'foo Foo' => '|foo|Foo|'
+ );
+
+@orig_namelist = keys %names;
+$namelist = join (' and ', @orig_namelist);
+@namelist = Text::BibTeX::split_list
+ ($namelist, 'and', 'test', 0, 'name');
+is_deeply(\@orig_namelist, \@namelist);
+
+my $i;
+foreach $i (0 .. $#namelist)
+{
+ is($namelist[$i], $orig_namelist[$i]);
+ my %parts;
+ Text::BibTeX::Name::_split (\%parts, $namelist[$i], 'test', 0, $i, 0);
+ ok (keys %parts <= 4);
+
+ my @name = map { join ('+', ref $_ ? @$_ : ()) }
+ @parts{'first','von','last','jr'};
+ is (join ('|', @name), $names{$orig_namelist[$i]});
+}
+
+# now an entry with some names in it
+
+$text = <<'TEXT';
+@article{homer97,
+ author = { Homer Simpson and
+ Flanders, Jr., Ned Q. and
+ {Foo Bar and Co.}},
+ title = {Territorial Imperatives in Modern Suburbia},
+ journal = {Journal of Suburban Studies},
+ year = 1997
+}
+TEXT
+
+ok ($entry = new Text::BibTeX::Entry $text);
+my $author = $entry->get ('author');
+is ($author, 'Homer Simpson and Flanders, Jr., Ned Q. and {Foo Bar and Co.}');
+@names = $entry->split ('author');
+ok (@names == 3 &&
+ $names[0] eq 'Homer Simpson' &&
+ $names[1] eq 'Flanders, Jr., Ned Q.' &&
+ $names[2] eq '{Foo Bar and Co.}');
+@names = $entry->names ('author');
+ok (@names == 3);
+test_name ($names[0], [['Homer'], undef, ['Simpson'], undef]);
+test_name ($names[1], [['Ned', 'Q.'], undef, ['Flanders'], ['Jr.']]);
+test_name ($names[2], [undef, undef, ['{Foo Bar and Co.}']]);
diff --git a/t/output.t b/t/output.t
new file mode 100644
index 0000000..1fc84cd
--- /dev/null
+++ b/t/output.t
@@ -0,0 +1,88 @@
+use strict;
+use IO::File;
+BEGIN { require "t/common.pl"; }
+
+my $loaded;
+BEGIN { $| = 1; print "1..12\n"; }
+END {print "not ok 1\n" unless $loaded;}
+use Text::BibTeX;
+$loaded = 1;
+print "ok 1\n";
+
+setup_stderr;
+
+# ----------------------------------------------------------------------
+# entry output methods
+
+my ($text, $entry, @warnings, @fields);
+my ($new_text, $new_entry);
+
+$text = <<'TEXT';
+@article{homer97,
+ author = "H{\"o}mer Simpson" # { \"und } # "Ned Flanders",
+ title = {Territorial Imperatives in Modern Suburbia},
+ journal = {Journal of Suburban Studies},
+ year = 1997
+}
+TEXT
+
+my $quote_warning = 'found \" (at brace-depth zero )?in string';
+
+test ($entry = new Text::BibTeX::Entry $text);
+test ($entry->parse_ok);
+@warnings = warnings;
+test (@warnings == 1 &&
+ $warnings[0] =~ /$quote_warning/);
+
+$new_text = $entry->print_s;
+
+test ($new_text =~ /^\@article\{homer97,$/m &&
+ $new_text =~ /^\s*author\s*=\s*{H{\\"o}mer Simpson \\"und Ned Flanders},$/m &&
+ $new_text =~ /^\s*title\s*=\s*[{"]Territorial[^}"]*Suburbia[}"],$/m &&
+ $new_text =~ /^\s*journal\s*=\s*[{"]Journal[^\}]*Studies[}"],$/m &&
+ $new_text =~ /^\s*year\s*=\s*[{"]1997[}"],?$/m);
+
+$new_entry = new Text::BibTeX::Entry $new_text;
+
+test ($entry->parse_ok);
+@warnings = warnings;
+test (@warnings == 1 &&
+ $warnings[0] =~ /$quote_warning/);
+test ($entry->type eq $new_entry->type);
+test ($entry->key eq $new_entry->key);
+test (slist_equal (scalar $entry->fieldlist, scalar $new_entry->fieldlist));
+
+@fields = $entry->fieldlist;
+test (slist_equal ([$entry->get (@fields)], [$new_entry->get (@fields)]));
+
+my @test = map { "t/test$_.bib" } 1..3;
+my ($bib);
+
+END { unlink @test }
+
+open (BIB, ">$test[0]") || die "couldn't create $test[0]: $!\n";
+$entry->print (\*BIB);
+close (BIB);
+
+$bib = new IO::File $test[1], O_CREAT|O_WRONLY
+ or die "couldn't create $test[1]: $!\n";
+$entry->print ($bib);
+$bib->close;
+
+$bib = new Text::BibTeX::File $test[2], O_CREAT|O_WRONLY
+ or die "couldn't create $test[2]: $!\n";
+$entry->write ($bib);
+$bib->close;
+
+my (@contents, $i);
+for $i (0 .. 2)
+{
+ open (BIB, $test[$i]) || die "couldn't open $test[$i]: $!\n";
+ $contents[$i] = join ('', <BIB>);
+ close (BIB);
+}
+
+test ($new_text eq $contents[0] &&
+ $new_text eq $contents[1] &&
+ $new_text eq $contents[2]);
+
diff --git a/t/parse.t b/t/parse.t
new file mode 100644
index 0000000..34ef95e
--- /dev/null
+++ b/t/parse.t
@@ -0,0 +1,50 @@
+use strict;
+use vars ('$DEBUG');
+use IO::Handle;
+BEGIN { require "t/common.pl"; }
+
+my $loaded;
+BEGIN { $| = 1; print "1..26\n"; }
+END {print "not ok 1\n" unless $loaded;}
+use Text::BibTeX;
+$loaded = 1;
+print "ok 1\n";
+
+$DEBUG = 0;
+
+setup_stderr;
+
+# ----------------------------------------------------------------------
+# entry creation and parsing from a Text::BibTeX::File object
+
+my ($bibfile, $entry);
+my $multiple_file = 'btparse/tests/data/simple.bib';
+
+test ($bibfile = new Text::BibTeX::File $multiple_file);
+test ($entry = new Text::BibTeX::Entry $bibfile);
+test (slist_equal
+ ([warnings],
+ [$multiple_file . ', line 5, warning: undefined macro "junk"']));
+test_entry ($entry, 'book', 'abook',
+ [qw(title editor publisher year)],
+ ['A Book', 'John Q. Random', 'Foo Bar \& Sons', '1922']);
+
+test ($entry->read ($bibfile));
+test_entry ($entry, 'string', undef,
+ ['macro', 'foo'],
+ ['macro text ', 'blah blah ding dong ']);
+
+
+test ($entry->read ($bibfile));
+test ($entry->parse_ok &&
+ $entry->type eq 'comment' &&
+ $entry->metatype == BTE_COMMENT &&
+ $entry->value eq 'this is a comment entry, anything at all can go in it (as long as parentheses are balanced), even {braces}');
+
+test ($entry->read ($bibfile));
+test ($entry->parse_ok &&
+ $entry->type eq 'preamble' &&
+ $entry->metatype == BTE_PREAMBLE &&
+ $entry->value eq 'This is a preamble---the concatenation of several strings');
+
+test (! $entry->read ($bibfile));
diff --git a/t/parse_f.t b/t/parse_f.t
new file mode 100644
index 0000000..2bacc1e
--- /dev/null
+++ b/t/parse_f.t
@@ -0,0 +1,83 @@
+use strict;
+use IO::Handle;
+BEGIN { require "t/common.pl"; }
+
+my $loaded;
+BEGIN { $| = 1; print "1..56\n"; }
+END {print "not ok 1\n" unless $loaded;}
+use Text::BibTeX;
+$loaded = 1;
+print "ok 1\n";
+
+setup_stderr;
+
+# ----------------------------------------------------------------------
+# entry creation and parsing from files
+
+my ($fh, $entry);
+
+my $regular_file = 'btparse/tests/data/regular.bib';
+
+# first, from a regular ol' Perl filehandle, with 'new' and 'parse"
+# bundled into one call
+open (BIB, $regular_file) || die "couldn't open $regular_file: $!\n";
+test ($entry = new Text::BibTeX::Entry $regular_file, \*BIB);
+test (slist_equal
+ ([warnings],
+ [$regular_file . ', line 5, warning: undefined macro "junk"']));
+test_entry ($entry, 'book', 'abook',
+ [qw(title editor publisher year)],
+ ['A Book', 'John Q. Random', 'Foo Bar \& Sons', '1922']);
+test (! new Text::BibTeX::Entry $regular_file, \*BIB);
+
+
+# An interesting note: if I forget the 'seek' here, a bug is exposed in
+# btparse -- it crashes with an internal error if it hits eof twice in a
+# row. Should add a test for that bug to the official suite, once
+# it's fixed of course. ;-)
+
+seek (BIB, 0, 0);
+
+# now the same, separating the 'new' and 'parse' calls -- also a test
+# to see if we can pass undef for filename and get no filename in the
+# error message (and suffer no other consequences!)
+test ($entry->parse (undef, \*BIB));
+test (slist_equal
+ ([warnings],
+ ['line 5, warning: undefined macro "junk"']));
+test_entry ($entry, 'book', 'abook',
+ [qw(title editor publisher year)],
+ ['A Book', 'John Q. Random', 'Foo Bar \& Sons', '1922']);
+test (! $entry->parse (undef, \*BIB));
+
+close (BIB);
+
+# this is so I can stop checking the damned 'undefined macro' warning
+# -- guess I really do need a "set macro value" interface at some level...
+# (problem is that there's just one macro table for the whole process)
+
+test ($entry->parse_s ('@string(junk={, III})'));
+test_entry ($entry, 'string', undef, ['junk'], [', III']);
+
+# Now open that same file using IO::File, and pass in the resulting object
+# instead of a glob ref; everything else here is just the same
+
+$fh = new IO::File $regular_file
+ or die "couldn't open $regular_file: $!\n";
+test ($entry = new Text::BibTeX::Entry $regular_file, $fh);
+test (! warnings);
+test_entry ($entry, 'book', 'abook',
+ [qw(title editor publisher year)],
+ ['A Book', 'John Q. Random, III', 'Foo Bar \& Sons', '1922']);
+test (! new Text::BibTeX::Entry $regular_file, $fh);
+$fh->seek (0, 0);
+
+# and again, with unbundled 'parse' call
+test ($entry->parse ($regular_file, $fh));
+test (! warnings);
+test_entry ($entry, 'book', 'abook',
+ [qw(title editor publisher year)],
+ ['A Book', 'John Q. Random, III', 'Foo Bar \& Sons', '1922']);
+test (! new Text::BibTeX::Entry $regular_file, $fh);
+
+$fh->close;
diff --git a/t/parse_s.t b/t/parse_s.t
new file mode 100644
index 0000000..2a4c51d
--- /dev/null
+++ b/t/parse_s.t
@@ -0,0 +1,89 @@
+use strict;
+use IO::Handle;
+BEGIN { require "t/common.pl"; }
+
+my $loaded;
+BEGIN { $| = 1; print "1..36\n"; }
+END {print "not ok 1\n" unless $loaded;}
+use Text::BibTeX;
+$loaded = 1;
+print "ok 1\n";
+
+setup_stderr;
+
+# ----------------------------------------------------------------------
+# entry creation and parsing from a string
+
+my ($text, $entry, @warnings, $result);
+
+$text = <<'TEXT';
+@foo { mykey,
+ f1 = {hello } # { there},
+ f2 = "fancy " # "that!" # foo # 1991,
+ f3 = foo
+ }
+TEXT
+
+test ($entry = new Text::BibTeX::Entry);
+test ($entry->parse_s ($text));
+@warnings = warnings;
+test (@warnings == 2 &&
+ $warnings[0] eq 'line 3, warning: undefined macro "foo"' &&
+ $warnings[1] eq 'line 4, warning: undefined macro "foo"');
+
+# First, low-level tests: make sure the data structure itself looks right
+test ($entry->{'status'});
+test ($entry->{'type'} eq 'foo');
+test ($entry->{'key'} eq 'mykey');
+test (scalar @{$entry->{fields}} == 3);
+test ($entry->{fields}[0] eq 'f1' &&
+ $entry->{fields}[1] eq 'f2' &&
+ $entry->{fields}[2] eq 'f3');
+test (scalar keys %{$entry->{'values'}} == 3);
+test ($entry->{'values'}{f1} eq 'hello there');
+
+# Now the same tests again, but using the object's methods
+test_entry ($entry, 'foo', 'mykey',
+ ['f1', 'f2', 'f3'],
+ ['hello there', 'fancy that!1991', '']);
+
+# Repeat with "bundled" form (new and parse_s in one go)
+test ($entry = new Text::BibTeX::Entry $text);
+@warnings = warnings;
+test (@warnings == 2 &&
+ $warnings[0] eq 'line 3, warning: undefined macro "foo"' &&
+ $warnings[1] eq 'line 4, warning: undefined macro "foo"');
+
+# Repeat tests of entry contents
+test_entry ($entry, 'foo', 'mykey',
+ ['f1', 'f2', 'f3'],
+ ['hello there', 'fancy that!1991', '']);
+
+# Make sure parsing an empty string, or string with no entry in it,
+# just returns false... nope, doesn't work right now. Need to
+# look into how btparse responds to bt_parse_s() on an empty string
+# before I know how Text::BibTeX should do it!
+
+# $entry = new Text::BibTeX::Entry;
+# $result = $entry->parse_s ('');
+# test (! warnings && ! $result);
+
+# $result = $entry->parse_s ('top-level junk that is not caught');
+# test (! warnings && ! $result);
+
+
+# Test the "proper noun at both ends" bug (the bt_get_text() call in
+# BibTeX.xs stripped off the leading and trailing braces; has since
+# been changed to bt_next_value(), under the assumption that compound
+# values will have been collapsed to a single simple value)
+
+# (thanks to Reiner Schotte for reporting this bug)
+
+$text = <<'TEXT';
+@foo{key, title = "{System}- und {Signaltheorie}"}
+TEXT
+
+$entry = new Text::BibTeX::Entry $text;
+test (! warnings && $entry->parse_ok);
+test_entry ($entry, 'foo', 'key',
+ ['title'], ['{System}- und {Signaltheorie}']);
diff --git a/t/purify.t b/t/purify.t
new file mode 100644
index 0000000..5eac685
--- /dev/null
+++ b/t/purify.t
@@ -0,0 +1,134 @@
+#
+# purify.t
+#
+# Text::BibTeX test program -- compare my purify routine with known
+# results from BibTeX 0.99.
+#
+# $Id: purify.t 3030 2006-09-21 20:01:18Z ambs $
+#
+
+use strict;
+use vars qw($DEBUG);
+use IO::Handle;
+BEGIN { require "t/common.pl"; }
+
+my $loaded;
+BEGIN { $| = 1; print "1..56\n"; }
+END {print "not ok 1\n" unless $loaded;}
+use Text::BibTeX qw(purify_string);
+$loaded = 1;
+print "ok 1\n";
+
+$DEBUG = 1;
+
+# make sure that purify_string doesn't modify its input string
+# (at least while it's *supposed* to act this way!)
+my ($in1, $in2, $out);
+$in1 = 'f{\"o}o';
+$in2 = $in1;
+$out = 'clobber me';
+$out = purify_string ($in2);
+test ($in1 eq $in2 && $out eq 'foo');
+test (length $in1 == 7 && length $in2 == 7 && length $out == 3);
+
+# These two *don't* come from BibTeX -- just borderline cases
+# that should be checked
+test (purify_string ('') eq '');
+test (! defined purify_string (undef));
+
+
+# The "expected" results here are all taken directly from BibTeX, using
+# a special .bst file of my own devising. One problem is that BibTeX
+# strips trailing spaces from each line on output, which means that
+# "purified" strings ending with a space are not delivered exactly as
+# I expect them. However, BibTeX's text.length$ function does give the
+# correct length (including those trailing spaces), so at least I can
+# indirectly check that things are as I expect them to be.
+#
+# The upshot of all this is that the "expected purified strings" in the
+# table below are shorn of trailing spaces, but have accurate lengths.
+# My reasoning for doing things this way is that although it is (apparently)
+# BibTeX's output routines that does the space-stripping, there is no
+# way to get data out of BibTeX other than through its output routines.
+# Thus, if I'm going to compare my results with BibTeX's, I'd better be
+# prepared to deal with the stripped-spaces problem...so I am!
+
+my @tests =
+ (q[Bl{\"o}w, Jo{\'{e}} Q. and J.~R. R. Tolk{\u e}in and {Fo{\'o} Bar ~ {\aa}nd {\SS}on{\v{s}}, Ltd.}] =>
+ [58, 'Blow Joe Q and J R R Tolkein and Foo Bar aand SSonvs Ltd'],
+ q[] => [0, ''],
+ q[G{\"o}del] => [5, 'Godel'],
+ q[G{\" o}del] => [5, 'Godel'],
+ q[G{\" o }del] => [5, 'Godel'],
+ q[G{\"o }del] => [5, 'Godel'],
+ q[G{\"{o}}del] => [5, 'Godel'],
+ q[G{\" {o}}del] => [5, 'Godel'],
+ q[G{\" { o}}del] => [5, 'Godel'],
+ q[G{\" {o }}del] => [5, 'Godel'],
+ q[G{\" { o }}del] => [5, 'Godel'],
+ q[G{\" { o } }del] => [5, 'Godel'],
+ q[G{\"{o} }del] => [5, 'Godel'],
+ q[G{\" {o} }del] => [5, 'Godel'],
+ q[G{\"o foo}del] => [8, 'Gofoodel'],
+ q[G{\"foo}del] => [7, 'Gfoodel'],
+ q[G{\"{foo}}del] => [7, 'Gfoodel'],
+ q[{G\"odel}] => [5, 'Godel'],
+ q[G{\"o}del] => [5, 'Godel'],
+ q[G{\"{o}}del] => [5, 'Godel'],
+ q[{\ss}uper-duper] => [12, 'ssuper duper'],
+ q[{\ss }uper-duper] => [12, 'ssuper duper'],
+ q[{ \ss}uper-duper] => [13, ' ssuper duper'],
+ q[{\ss{}}uper-duper] => [12, 'ssuper duper'],
+ q[{\ss foo}uper-duper] => [15, 'ssfoouper duper'],
+ q[{\ss { }}uper-duper] => [12, 'ssuper duper'],
+ q[{\ss {foo}}uper-duper] => [15, 'ssfoouper duper'],
+ q[{\ss{foo}}uper-duper] => [15, 'ssfoouper duper'],
+ q[Tom{\`a}{\v s}] => [5, 'Tomas'],
+ q[Tom{\`a}{\v{s}}] => [5, 'Tomas'],
+ q[Tom{\`a}{{\v s}}] => [7, 'Tomav s'],
+ q[{Tom{\`a}{\v s}}] => [7, 'Tomav s'],
+ q[{Tom{\`a}{\v{s}}}] => [6, 'Tomavs'],
+ q[{Tom{\`a}{\v{ s}}}] => [7, 'Tomav s'],
+ q[{Tom{\`a}{\v{ s }}}] => [8, 'Tomav s'],
+ q[{\v s}] => [1, 's'],
+ q[{\x s}] => [1, 's'],
+ q[{\r s}] => [1, 's'],
+ q[{\foo s}] => [1, 's'],
+ q[{\oe}] => [2, 'oe'],
+ q[{\ae}] => [2, 'ae'],
+
+ # Handling of \aa is a bit problematic -- BibTeX 0.99 converts this
+ # special char. to "a", but my understanding of the Nordic languages
+ # leads me to believe it ought to be converted to "aa". (E.g.
+ # \AArhus is usually written "Aarhus" in English, not "Arhus".)
+ # Neither way will result in proper sorting (at least for Danish,
+ # where \aa comes at the end of the alphabet), but at least my way
+ # is consistent with the normal English rendering of \aa.
+# q[{\aa}] => [1, 'a'], # BibTeX 0.99's behaviour
+ q[{\aa}] => [2, 'aa'], # btparse's behaviour
+ q[{\AA}] => [2, 'Aa'],
+ q[{\o}] => [1, 'o'],
+ q[{\l}] => [1, 'l'],
+ q[{\ss}] => [2, 'ss'],
+ q[{\ae s}] => [3, 'aes'],
+ q[\TeX] => [3, 'TeX'],
+ q[{\TeX}] => [0, ''],
+ q[{{\TeX}}] => [3, 'TeX'],
+ q[{\foobar}] => [0, '']
+ );
+
+while (@tests)
+{
+ my $str = shift @tests;
+ my ($exp_length, $exp_purified) = @{shift @tests};
+
+ my $purified = purify_string ($str);
+ my $length = length $purified; # length before stripping
+ printf "[%s] -> [%s] (length %d) (expected [%s], length %d)\n",
+ $str, $purified, $length, $exp_purified, $exp_length
+ if $DEBUG;
+
+ $purified =~ s/ +$//; # strip trailing spaces
+ test ($purified eq $exp_purified && $length == $exp_length);
+}
+
diff --git a/t/split_names b/t/split_names
new file mode 100644
index 0000000..187de93
--- /dev/null
+++ b/t/split_names
@@ -0,0 +1,28 @@
+# not a real test suite! just an interactive "you give me da
+# name, I show ya how it splits up" driver.
+
+use strict;
+use Term::ReadLine;
+use Text::BibTeX;
+use Text::BibTeX::Name;
+
+sub show_name
+{
+ my $str = shift;
+ my $name = new Text::BibTeX::Name $str;
+ my $part;
+
+ foreach $part (qw(first last von jr))
+ {
+ my @tokens = $name->part ($part);
+ printf " %-5s => (%s)\n",
+ $part,
+ join (", ", map (qq["$_"], @tokens));
+ }
+}
+
+my $rl = new Term::ReadLine 'BibTeX name splitter';
+while (defined ($_ = $rl->readline (">> ")))
+{
+ show_name ($_);
+}
diff --git a/typemap b/typemap
new file mode 100644
index 0000000..88d7b4a
--- /dev/null
+++ b/typemap
@@ -0,0 +1,30 @@
+bt_name * T_NAME
+bt_name_format * T_NAME_FORMAT
+bt_namepart T_IV
+bt_joinmethod T_IV
+boolean T_BOOL
+
+# ----------------------------------------------------------------------
+INPUT
+
+# this is needed so we can pass 'undef' for the filename to
+# Text::BibTeX::Entry::parse and have it wind up as NULL
+# in bt_parse_entry()
+T_PV
+ $var = (SvOK ($arg)) ? ($type) SvPV ($arg,PL_na) : NULL
+
+T_NAME
+ $var = (bt_name *) SvIV ($arg)
+
+T_NAME_FORMAT
+ $var = (bt_name_format *) SvIV ($arg)
+
+T_BOOL
+ $var = (SvOK ($arg)) ? (int) SvIV ($arg) : 0
+
+
+# ----------------------------------------------------------------------
+OUTPUT
+
+#T_NAME_FORMAT
+# $arg = (IV) $var