summaryrefslogtreecommitdiff
path: root/BibTeX/Entry.pm
diff options
context:
space:
mode:
Diffstat (limited to 'BibTeX/Entry.pm')
-rw-r--r--BibTeX/Entry.pm967
1 files changed, 967 insertions, 0 deletions
diff --git a/BibTeX/Entry.pm b/BibTeX/Entry.pm
new file mode 100644
index 0000000..ff8c10d
--- /dev/null
+++ b/BibTeX/Entry.pm
@@ -0,0 +1,967 @@
+# ----------------------------------------------------------------------
+# NAME : BibTeX/Entry.pm
+# CLASSES : Text::BibTeX::Entry
+# RELATIONS : base class for Text::BibTeX::StructuredEntry, and
+# ultimately for all user-supplied structured entry classes
+# DESCRIPTION: Provides an object-oriented interface to BibTeX entries.
+# CREATED : March 1997, Greg Ward
+# MODIFIED :
+# VERSION : $Id: Entry.pm 6325 2008-10-08 12:35:41Z ambs $
+# COPYRIGHT : Copyright (c) 1997-2000 by Gregory P. Ward. All rights
+# reserved.
+#
+# This file is part of the Text::BibTeX library. This
+# library is free software; you may redistribute it and/or
+# modify it under the same terms as Perl itself.
+# ----------------------------------------------------------------------
+package Text::BibTeX::Entry;
+
+require 5.004; # for isa, and delete on a slice
+
+use strict;
+use UNIVERSAL 'isa';
+use Carp;
+use Text::BibTeX qw(:metatypes :nodetypes);
+
+=head1 NAME
+
+Text::BibTeX::Entry - read and parse BibTeX files
+
+=head1 SYNOPSIS
+
+ use Text::BibTeX; # do not use Text::BibTeX::Entry alone!
+
+ # ...assuming that $bibfile and $newbib are both objects of class
+ # Text::BibTeX::File, opened for reading and writing (respectively):
+
+ # Entry creation/parsing methods:
+ $entry = new Text::BibTeX::Entry;
+ $entry->read ($bibfile);
+ $entry->parse ($filename, $filehandle);
+ $entry->parse_s ($entry_text);
+
+ # or:
+ $entry = new Text::BibTeX::Entry $bibfile;
+ $entry = new Text::BibTeX::Entry $filename, $filehandle;
+ $entry = new Text::BibTeX::Entry $entry_text;
+
+ # Entry query methods
+ warn "error in input" unless $entry->parse_ok;
+ $metatype = $entry->metatype;
+ $type = $entry->type;
+
+ # if metatype is BTE_REGULAR or BTE_MACRODEF:
+ $key = $entry->key; # only for BTE_REGULAR metatype
+ $num_fields = $entry->num_fields;
+ @fieldlist = $entry->fieldlist;
+ $has_title = $entry->exists ('title');
+ $title = $entry->get ('title');
+ # or:
+ ($val1,$val2,...$valn) = $entry->get ($field1, $field2, ..., $fieldn);
+
+ # if metatype is BTE_COMMENT or BTE_PREAMBLE:
+ $value = $entry->value;
+
+ # Author name methods
+ @authors = $entry->split ('author');
+ ($first_author) = $entry->names ('author');
+
+ # Entry modification methods
+ $entry->set_type ($new_type);
+ $entry->set_key ($new_key);
+ $entry->set ('title', $new_title);
+ # or:
+ $entry->set ($field1, $val1, $field2, $val2, ..., $fieldn, $valn);
+ $entry->delete (@fields);
+ $entry->set_fieldlist (\@fieldlist);
+
+ # Entry output methods
+ $entry->write ($newbib);
+ $entry->print ($filehandle);
+ $entry_text = $entry->print_s;
+
+ # Miscellaneous methods
+ $entry->warn ($entry_warning);
+ # or:
+ $entry->warn ($field_warning, $field);
+
+=head1 DESCRIPTION
+
+C<Text::BibTeX::Entry> does all the real work of reading and parsing
+BibTeX files. (Well, actually it just provides an object-oriented Perl
+front-end to a C library that does all that. But that's not important
+right now.)
+
+BibTeX entries can be read either from C<Text::BibTeX::File> objects (using
+the C<read> method), or directly from a filehandle (using the C<parse>
+method), or from a string (using C<parse_s>). The first is preferable,
+since you don't have to worry about supplying the filename, and because of
+the extra functionality provided by the C<Text::BibTeX::File> class.
+Currently, this means that you may specify the I<database structure> to
+which entries are expected to conform via the C<File> class. This lets you
+ensure that entries follow the rules for required fields and mutually
+constrained fields for a particular type of database, and also gives you
+access to all the methods of the I<structured entry class> for this
+database structure. See L<Text::BibTeX::Structure> for details on database
+structures.
+
+Once you have the entry, you can query it or change it in a variety of
+ways. The query methods are C<parse_ok>, C<type>, C<key>, C<num_fields>,
+C<fieldlist>, C<exists>, and C<get>. Methods for changing the entry are
+C<set_type>, C<set_key>, C<set_fieldlist>, C<delete>, and C<set>.
+
+Finally, you can output BibTeX entries, again either to an open
+C<Text::BibTeX::File> object, a filehandle or a string. (A filehandle or
+C<File> object must, of course, have been opened in write mode.) Output to
+a C<File> object is done with the C<write> method, to a filehandle via
+C<print>, and to a string with C<print_s>. Using the C<File> class is
+recommended for future extensibility, although it currently doesn't offer
+anything extra.
+
+=head1 METHODS
+
+=head2 Entry creation/parsing methods
+
+=over 4
+
+=item new ([SOURCE])
+
+Creates a new C<Text::BibTeX::Entry> object. If the SOURCE parameter is
+supplied, it must be one of the following: a C<Text::BibTeX::File> (or
+descendant class) object, a filename/filehandle pair, or a string. Calls
+C<read> to read from a C<Text::BibTeX::File> object, C<parse> to read from
+a filehandle, and C<parse_s> to read from a string.
+
+A filehandle can be specified as a GLOB reference, or as an
+C<IO::Handle> (or descendants) object, or as a C<FileHandle> (or
+descendants) object. (But there's really no point in using
+C<FileHandle> objects, since C<Text::BibTeX> requires Perl 5.004, which
+always includes the C<IO> modules.) You can I<not> pass in the name of
+a filehandle as a string, though, because C<Text::BibTeX::Entry>
+conforms to the C<use strict> pragma (which disallows such symbolic
+references).
+
+The corresponding filename should be supplied in order to allow for
+accurate error messages; if you simply don't have the filename, you can
+pass C<undef> and you'll get error messages without a filename. (It's
+probably better to rearrange your code so that the filename is
+available, though.)
+
+Thus, the following are equivalent to read from a file named by
+C<$filename> (error handling ignored):
+
+ # good ol' fashioned filehandle and GLOB ref
+ open (BIBFILE, $filename);
+ $entry = new Text::BibTeX::Entry ($filename, \*BIBFILE);
+
+ # newfangled IO::File thingy
+ $file = new IO::File $filename;
+ $entry = new Text::BibTeX::Entry ($filename, $file);
+
+But using a C<Text::BibTeX::File> object is simpler and preferred:
+
+ $file = new Text::BibTeX::File $filename;
+ $entry = new Text::BibTeX::Entry $file;
+
+Returns the new object, unless SOURCE is supplied and reading/parsing
+the entry fails (e.g., due to end of file) -- then it returns false.
+
+=cut
+
+sub new
+{
+ my ($class, @source) = @_;
+
+ $class = ref ($class) || $class;
+ my $self = {'file' => undef,
+ 'type' => undef,
+ 'key' => undef,
+ 'status' => undef,
+ 'metatype' => undef,
+ 'fields' => [],
+ 'values' => {}};
+
+ bless $self, $class;
+ if (@source)
+ {
+ my $status;
+
+ if (@source == 1 && isa ($source[0], 'Text::BibTeX::File'))
+ {
+ my $file = $source[0];
+ $status = $self->read ($file);
+ if (my $structure = $file->structure)
+ {
+ $self->{structure} = $structure;
+ bless $self, $structure->entry_class;
+ }
+ }
+ elsif (@source == 2 && defined $source[0] && ! ref $source[0] && fileno ($source[1]))
+ { $status = $self->parse ($source[0], $source[1]) }
+ elsif (@source == 1 && defined $source[0] && ! ref $source[0])
+ { $status = $self->parse_s ($source[0]) }
+ else
+ { croak "new: source argument must be either a Text::BibTeX::File " .
+ "(or descendant) object, filename/filehandle pair, or " .
+ "a string"; }
+
+ return $status unless $status; # parse failed -- tell our caller
+ }
+ $self;
+}
+
+=item read (BIBFILE)
+
+Reads and parses an entry from BIBFILE, which must be a
+C<Text::BibTeX::File> object (or descendant). The next entry will be read
+from the file associated with that object.
+
+Returns the same as C<parse> (or C<parse_s>): false if no entry found
+(e.g., at end-of-file), true otherwise. To see if the parse itself failed
+(due to errors in the input), call the C<parse_ok> method.
+
+=cut
+
+sub read
+{
+ my ($self, $source, $preserve) = @_;
+ croak "`source' argument must be ref to open Text::BibTeX::File " .
+ "(or descendant) object"
+ unless (isa ($source, 'Text::BibTeX::File'));
+
+ my $fn = $source->{'filename'};
+ my $fh = $source->{'handle'};
+ $self->{'file'} = $source; # store File object for later use
+ return $self->parse ($fn, $fh, $preserve);
+}
+
+
+=item parse (FILENAME, FILEHANDLE)
+
+Reads and parses the next entry from FILEHANDLE. (That is, it scans the
+input until an '@' sign is seen, and then slurps up to the next '@'
+sign. Everything between the two '@' signs [including the first one,
+but not the second one -- it's pushed back onto the input stream for the
+next entry] is parsed as a BibTeX entry, with the simultaneous
+construction of an abstract syntax tree [AST]. The AST is traversed to
+ferret out the most interesting information, and this is stuffed into a
+Perl hash, which coincidentally is the C<Text::BibTeX::Entry> object
+you've been tossing around. But you don't need to know any of that -- I
+just figured if you've read this far, you might want to know something
+about the inner workings of this module.)
+
+The success of the parse is stored internally so that you can later
+query it with the C<parse_ok> method. Even in the presence of syntax
+errors, you'll usually get something resembling your input, but it's
+usually not wise to try to do anything with it. Just call C<parse_ok>,
+and if it returns false then silently skip to the next entry. (The
+error messages printed out by the parser should be quite adequate for
+the user to figure out what's wrong. And no, there's currently no way
+for you to capture or redirect those error messages -- they're always
+printed to C<stderr> by the underlying C code. That should change in
+future releases.)
+
+If no '@' signs are seen on the input before reaching end-of-file, then
+we've exhausted all the entries in the file, and C<parse> returns a
+false value. Otherwise, it returns a true value -- even if there were
+syntax errors. Hence, it's important to check C<parse_ok>.
+
+The FILENAME parameter is only used for generating error messages, but
+anybody using your program will certainly appreciate your setting it
+correctly!
+
+=item parse_s (TEXT)
+
+Parses a BibTeX entry (using the above rules) from the string TEXT. The
+string is not modified; repeatedly calling C<parse_s> with the same string
+will give you the same results each time. Thus, there's no point in
+putting multiple entries in one string.
+
+=back
+
+=cut
+
+sub _preserve
+{
+ my ($self, $preserve) = @_;
+
+ $preserve = $self->{'file'}->preserve_values
+ if ! defined $preserve &&
+ defined $self->{'file'} &&
+ isa ($self->{'file'}, 'Text::BibTeX::File');
+ require Text::BibTeX::Value if $preserve;
+ $preserve;
+}
+
+sub parse
+{
+ my ($self, $filename, $filehandle, $preserve) = @_;
+
+ $preserve = $self->_preserve ($preserve);
+ _parse ($self, $filename, $filehandle, $preserve);
+}
+
+
+sub parse_s
+{
+ my ($self, $text, $preserve) = @_;
+
+ $preserve = $self->_preserve ($preserve);
+ _parse_s ($self, $text, $preserve);
+}
+
+
+=head2 Entry query methods
+
+=over 4
+
+=item parse_ok ()
+
+Returns false if there were any serious errors encountered while parsing
+the entry. (A "serious" error is a lexical or syntax error; currently,
+warnings such as "undefined macro" result in an error message being
+printed to C<stderr> for the user's edification, but no notice is
+available to the calling code.)
+
+=item type ()
+
+Returns the type of the entry. (The `type' is the word that follows the
+'@' sign; e.g. `article', `book', `inproceedings', etc. for the standard
+BibTeX styles.)
+
+=item metatype ()
+
+Returns the metatype of the entry. (The `metatype' is a numeric value used
+to classify entry types into four groups: comment, preamble, macro
+definition (C<@string> entries), and regular (all other entry types).
+C<Text::BibTeX> exports four constants for these metatypes: C<BTE_COMMENT>,
+C<BTE_PREAMBLE>, C<BTE_MACRODEF>, and C<BTE_REGULAR>.)
+
+=item key ()
+
+Returns the key of the entry. (The key is the token immediately
+following the opening `{' or `(' in "regular" entries. Returns C<undef>
+for entries that don't have a key, such as macro definition (C<@string>)
+entries.)
+
+=item num_fields ()
+
+Returns the number of fields in the entry. (Note that, currently, this is
+I<not> equivalent to putting C<scalar> in front of a call to C<fieldlist>.
+See below for the consequences of calling C<fieldlist> in a scalar
+context.)
+
+=item fieldlist ()
+
+Returns the list of fields in the entry. In a scalar context, returns a
+reference to the object's own list of fields. That way, you can change or
+reorder the field list with minimal interference from the class. I'm not
+entirely sure if this is a good idea, so don't rely on it existing in the
+future; feel free to play around with it and let me know if you get bitten
+in dangerous ways or find this enormously useful.
+
+=cut
+
+sub parse_ok { shift->{'status'}; }
+
+sub metatype { shift->{'metatype'}; }
+
+sub type { shift->{'type'}; }
+
+sub key { shift->{'key'}; }
+
+sub num_fields { scalar @{shift->{'fields'}}; }
+
+sub fieldlist { wantarray ? @{shift->{'fields'}} : shift->{'fields'}; }
+
+=item exists (FIELD)
+
+Returns true if a field named FIELD is present in the entry, false
+otherwise.
+
+=item get (FIELD, ...)
+
+Returns the value of one or more FIELDs, as a list of values. For example:
+
+ $author = $entry->get ('author');
+ ($author, $editor) = $entry->get ('author', 'editor');
+
+If a FIELD is not present in the entry, C<undef> will be returned at its
+place in the return list. However, you can't completely trust this as a
+test for presence or absence of a field; it is possible for a field to be
+present but undefined. Currently this can only happen due to certain
+syntax errors in the input, or if you pass an undefined value to C<set>, or
+if you create a new field with C<set_fieldlist> (the new field's value is
+implicitly set to C<undef>).
+
+Normally, the field value is what the input looks like after "maximal
+processing"--quote characters are removed, whitespace is collapsed (the
+same way that BibTeX itself does it), macros are expanded, and multiple
+tokens are pasted together. (See L<bt_postprocess> for details on the
+post-processing performed by B<btparse>.)
+
+For example, if your input file has the following:
+
+ @string{of = "of"}
+ @string{foobars = "Foobars"}
+
+ @article{foobar,
+ title = { The Mating Habits } # of # " Adult " # foobars
+ }
+
+then using C<get> to query the value of the C<title> field from the
+C<foobar> entry would give the string "The Mating Habits of Adult Foobars".
+
+However, in certain circumstances you may wish to preserve the values as
+they appear in the input. This is done by setting a C<preserve_values>
+flag at some point; then, C<get> will return not strings but
+C<Text::BibTeX::Value> objects. Each C<Value> object is a list of
+C<Text::BibTeX::SimpleValue> objects, which in turn consists of a simple
+value type (string, macro, or number) and the text of the simple value.
+Various ways to set the C<preserve_values> flag and the interface to
+both C<Value> and C<SimpleValue> objects are described in
+L<Text::BibTeX::Value>.
+
+=item value ()
+
+Retuns the single string associated with C<@comment> and C<@preamble>
+entries. For instance, the entry
+
+ @preamble{" This is a preamble" #
+ {---the concatenation of several strings}}
+
+would return a value of "This is a preamble---the concatenation of
+several strings".
+
+If this entry was parsed in "value preservation" mode, then C<value>
+acts like C<get>, and returns a C<Value> object rather than a simple
+string.
+
+=back
+
+=cut
+
+sub exists
+{
+ my ($self, $field) = @_;
+
+ exists $self->{'values'}{$field};
+}
+
+sub get
+{
+ my ($self, @fields) = @_;
+
+ @{$self->{'values'}}{@fields};
+}
+
+sub value { shift->{'value'} }
+
+
+=head2 Author name methods
+
+This is the only part of the module that makes any assumption about the
+nature of the data, namely that certain fields are lists delimited by a
+simple word such as "and", and that the delimited sub-strings are human
+names of the "First von Last" or "von Last, Jr., First" style used by
+BibTeX. If you are using this module for anything other than
+bibliographic data, you can most likely forget about these two methods.
+However, if you are in fact hacking on BibTeX-style bibliographic data,
+these could come in very handy -- the name-parsing done by BibTeX is not
+trivial, and the list-splitting would also be a pain to implement in
+Perl because you have to pay attention to brace-depth. (Not that it
+wasn't a pain to implement in C -- it's just a lot more efficient than a
+Perl implementation would be.)
+
+Incidentally, both of these methods assume that the strings being split
+have already been "collapsed" in the BibTeX way, i.e. all leading and
+trailing whitespace removed and internal whitespace reduced to single
+spaces. This should always be the case when using these two methods on
+a C<Text::BibTeX::Entry> object, but these are actually just front ends
+to more general functions in C<Text::BibTeX>. (More general in that you
+supply the string to be parsed, rather than supplying the name of an
+entry field.) Should you ever use those more general functions
+directly, you might have to worry about collapsing whitespace; see
+L<Text::BibTeX> (the C<split_list> and C<split_name> functions in
+particular) for more information.
+
+Please note that the interface to author name parsing is experimental,
+subject to change, and open to discussion. Please let me know if you
+have problems with it, think it's just perfect, or whatever.
+
+=over 4
+
+=item split (FIELD [, DELIM [, DESC]])
+
+Splits the value of FIELD on DELIM (default: `and'). Don't assume that
+this works the same as Perl's builtin C<split> just because the names are
+the same: in particular, DELIM must be a simple string (no regexps), and
+delimiters that are at the beginning or end of the string, or at non-zero
+brace depth, or not surrounded by whitespace, are ignored. Some examples
+might illuminate matters:
+
+ if field F is... then split (F) returns...
+ 'Name1 and Name2' ('Name1', 'Name2')
+ 'Name1 and and Name2' ('Name1', undef, 'Name2')
+ 'Name1 and' ('Name1 and')
+ 'and Name2' ('and Name2')
+ 'Name1 {and} Name2 and Name3' ('Name1 {and} Name2', 'Name3')
+ '{Name1 and Name2} and Name3' ('{Name1 and Name2}', 'Name3')
+
+Note that a warning will be issued for empty names (as in the second
+example above). A warning ought to be issued for delimiters at the
+beginning or end of a string, but currently this isn't done. (Hmmm.)
+
+DESC is a one-word description of the substrings; it defaults to 'name'.
+It is only used for generating warning messages.
+
+=item names (FIELD)
+
+Splits FIELD as described above, and further splits each name into four
+components: first, von, last, and jr.
+
+Returns a list of C<Text::BibTeX::Name> objects, each of which represents
+one name. Use the C<part> method to query these objects; see
+L<Text::BibTeX::Name> for details on the interface to name objects (and on
+name-parsing as well).
+
+For example if this entry:
+
+ @article{foo,
+ author = {John Smith and
+ Hacker, J. Random and
+ Ludwig van Beethoven and
+ {Foo, Bar and Company}}}
+
+has been parsed into a C<Text::BibTeX::Entry> object C<$entry>, then
+
+ @names = $entry->names ('author');
+
+will put a list of C<Text::BibTeX::Name> objects in C<@names>. These can
+be queried individually as described in L<Text::BibTeX::Name>; for instance,
+
+ @last = $names[0]->part ('last');
+
+would put the list of tokens comprising the last name of the first author
+into the C<@last> array: C<('Smith')>.
+
+=cut
+
+sub split
+{
+ my ($self, $field, $delim, $desc) = @_;
+
+ return unless $self->exists ($field);
+ $delim ||= 'and';
+ $desc ||= 'name';
+
+ my $filename = ($self->{'file'} && $self->{'file'}{'filename'});
+ my $line = $self->{'lines'}{$field};
+
+# local $^W = 0 # suppress spurious warning from
+# unless defined $filename; # undefined $filename
+ Text::BibTeX::split_list ($self->{'values'}{$field}, $delim,
+ $filename, $line, $desc);
+}
+
+sub names
+{
+ require Text::BibTeX::Name;
+
+ my ($self, $field) = @_;
+ my (@names, $i);
+
+ my $filename = ($self->{'file'} && $self->{'file'}{'filename'});
+ my $line = $self->{'lines'}{$field};
+
+ @names = $self->split ($field);
+# local $^W = 0 # suppress spurious warning from
+# unless defined $filename; # undefined $filename
+ for $i (0 .. $#names)
+ {
+ $names[$i] = new Text::BibTeX::Name ($names[$i], $filename, $line, $i);
+ }
+ @names;
+}
+
+=back
+
+=head2 Entry modification methods
+
+=over 4
+
+=item set_type (TYPE)
+
+Sets the entry's type.
+
+=item set_metatype (METATYPE)
+
+Sets the entry's metatype (must be one of the four constants
+C<BTE_COMMENT>, C<BTE_PREAMBLE>, C<BTE_MACRODEF>, and C<BTE_REGULAR>, which
+are all optionally exported from C<Text::BibTeX>).
+
+=item set_key (KEY)
+
+Sets the entry's key.
+
+=item set (FIELD, VALUE, ...)
+
+Sets the value of field FIELD. (VALUE might be C<undef> or unsupplied,
+in which case FIELD will simply be set to C<undef> -- this is where the
+difference between the C<exists> method and testing the definedness of
+field values becomes clear.)
+
+Multiple (FIELD, VALUE) pairs may be supplied; they will be processed in
+order (i.e. the input is treated like a list, not a hash). For example:
+
+ $entry->set ('author', $author);
+ $entry->set ('author', $author, 'editor', $editor);
+
+VALUE can be either a simple string or a C<Text::BibTeX::Value> object;
+it doesn't matter if the entry was parsed in "full post-processing" or
+"preserve input values" mode.
+
+=item delete (FIELD)
+
+Deletes field FIELD from an entry.
+
+=item set_fieldlist (FIELDLIST)
+
+Sets the entry's list of fields to FIELDLIST, which must be a list
+reference. If any of the field names supplied in FIELDLIST are not
+currently present in the entry, they are created with the value C<undef>
+and a warning is printed. Conversely, if any of the fields currently
+present in the entry are not named in the list of fields supplied to
+C<set_fields>, they are deleted from the entry and another warning is
+printed.
+
+=back
+
+=cut
+
+sub set_type
+{
+ my ($self, $type) = @_;
+
+ $self->{'type'} = $type;
+}
+
+sub set_metatype
+{
+ my ($self, $metatype) = @_;
+
+ $self->{'metatype'} = $metatype;
+}
+
+sub set_key
+{
+ my ($self, $key) = @_;
+
+ $self->{'key'} = $key;
+}
+
+sub set
+{
+ my $self = shift;
+ croak "set: must supply an even number of arguments"
+ unless (@_ % 2 == 0);
+ my ($field, $value);
+
+ while (@_)
+ {
+ ($field,$value) = (shift,shift);
+ push (@{$self->{'fields'}}, $field)
+ unless exists $self->{'values'}{$field};
+ $self->{'values'}{$field} = $value;
+ }
+}
+
+sub delete
+{
+ my ($self, @fields) = @_;
+ my (%gone);
+
+ %gone = map {$_, 1} @fields;
+ @{$self->{'fields'}} = grep (! $gone{$_}, @{$self->{'fields'}});
+ delete @{$self->{'values'}}{@fields};
+}
+
+sub set_fieldlist
+{
+ my ($self, $fields) = @_;
+
+ # Warn if any of the caller's fields aren't already present in the entry
+
+ my ($field, %in_list);
+ foreach $field (@$fields)
+ {
+ $in_list{$field} = 1;
+ unless (exists $self->{'values'}{$field})
+ {
+ carp "Implicitly adding undefined field \"$field\"";
+ $self->{'values'}{$field} = undef;
+ }
+ }
+
+ # And see if there are any fields in the entry that aren't in the user's
+ # list; delete them from the entry if so
+
+ foreach $field (keys %{$self->{'values'}})
+ {
+ unless ($in_list{$field})
+ {
+ carp "Implicitly deleting field \"$field\"";
+ delete $self->{'values'}{$field};
+ }
+ }
+
+ # Now we can install (a copy of) the caller's desired field list;
+
+ $self->{'fields'} = [@$fields];
+}
+
+
+=head2 Entry output methods
+
+=over 4
+
+=item write (BIBFILE)
+
+Prints a BibTeX entry on the filehandle associated with BIBFILE (which
+should be a C<Text::BibTeX::File> object, opened for output). Currently
+the printout is not particularly human-friendly; a highly configurable
+pretty-printer will be developed eventually.
+
+=item print (FILEHANDLE)
+
+Prints a BibTeX entry on FILEHANDLE.
+
+=item print_s ()
+
+Prints a BibTeX entry to a string, which is the return value.
+
+=cut
+
+sub write
+{
+ my ($self, $bibfile) = @_;
+
+ my $fh = $bibfile->{'handle'};
+ $self->print ($fh);
+}
+
+sub print
+{
+ my ($self, $handle) = @_;
+
+ $handle ||= \*STDOUT;
+ print $handle $self->print_s;
+}
+
+sub print_s
+{
+ my $self = shift;
+ my ($field, $output);
+
+ sub value_to_string
+ {
+ my $value = shift;
+
+ if (! ref $value) # just a string
+ {
+ return "{$value}";
+ }
+ else # a Text::BibTeX::Value object
+ {
+ confess "value is a reference, but not to Text::BibTeX::Value object"
+ unless isa ($value, 'Text::BibTeX::Value');
+ my @values = $value->values;
+ foreach (@values)
+ {
+ $_ = $_->type == &BTAST_STRING ? '{' . $_->text . '}' : $_->text;
+ }
+ return join (' # ', @values);
+ }
+ }
+
+ carp "entry type undefined" unless defined $self->{'type'};
+ carp "entry metatype undefined" unless defined $self->{'metatype'};
+
+ # Regular and macro-def entries have to be treated differently when
+ # printing the first line, because the former have keys and the latter
+ # do not.
+ if ($self->{'metatype'} == &BTE_REGULAR)
+ {
+ carp "entry key undefined" unless defined $self->{'key'};
+ $output = sprintf ("@%s{%s,\n",
+ $self->{'type'} || '',
+ $self->{'key'} || '');
+ }
+ elsif ($self->{'metatype'} == &BTE_MACRODEF)
+ {
+ $output = sprintf ("@%s{\n",
+ $self->{'type'} || '');
+ }
+
+ # Comment and preamble entries are treated the same -- we print out
+ # the entire entry, on one line, right here.
+ else # comment or preamble
+ {
+ return sprintf ("@%s{%s}\n\n",
+ $self->{'type'},
+ value_to_string ($self->{'value'}));
+ }
+
+ # Here we print out all the fields/values of a regular or macro-def entry
+ my @fields = @{$self->{'fields'}};
+ while ($field = shift @fields)
+ {
+ my $value = $self->{'values'}{$field};
+ if (! defined $value)
+ {
+ carp "field \"$field\" has undefined value\n";
+ $value = '';
+ }
+
+ $output .= " $field = ";
+ $output .= value_to_string ($value);
+
+ $output .= ',' if @fields; # more fields yet to come
+ $output .= "\n";
+ }
+
+ # Tack on the last line, and we're done!
+ $output .= "}\n\n";
+ $output;
+}
+
+=back
+
+=head2 Miscellaneous methods
+
+=over 4
+
+=item warn (WARNING [, FIELD])
+
+Prepends a bit of location information (filename and line number(s)) to
+WARNING, appends a newline, and passes it to Perl's C<warn>. If FIELD is
+supplied, the line number given is just that of the field; otherwise, the
+range of lines for the whole entry is given. (Well, almost -- currently,
+the line number of the last field is used as the last line of the whole
+entry. This is a bug.)
+
+For example, if lines 10-15 of file F<foo.bib> look like this:
+
+ @article{homer97,
+ author = {Homer Simpson and Ned Flanders},
+ title = {Territorial Imperatives in Modern Suburbia},
+ journal = {Journal of Suburban Studies},
+ year = 1997
+ }
+
+then, after parsing this entry to C<$entry>, the calls
+
+ $entry->warn ('what a silly entry');
+ $entry->warn ('what a silly journal', 'journal');
+
+would result in the following warnings being issued:
+
+ foo.bib, lines 10-14: what a silly entry
+ foo.bib, line 13: what a silly journal
+
+=cut
+
+sub warn
+{
+ my ($self, $warning, $field) = @_;
+
+ my $location = '';
+ if ($self->{'file'})
+ {
+ $location = $self->{'file'}{'filename'} . ", ";
+ }
+
+ my $lines = $self->{'lines'};
+ my $entry_range = ($lines->{'START'} == $lines->{'STOP'})
+ ? "line $lines->{'START'}"
+ : "lines $lines->{'START'}-$lines->{'STOP'}";
+
+ if (defined $field)
+ {
+ $location .= (exists $lines->{$field})
+ ? "line $lines->{$field}: "
+ : "$entry_range (unknown field \"$field\"): ";
+ }
+ else
+ {
+ $location .= "$entry_range: ";
+ }
+
+ warn "$location$warning\n";
+}
+
+
+=item line ([FIELD])
+
+Returns the line number of FIELD. If the entry was parsed from a string,
+this still works--it's just the line number relative to the start of the
+string. If the entry was parsed from a file, this works just as you'd
+expect it to: it returns the absolute line number with respect to the
+whole file. Line numbers are one-based.
+
+If FIELD is not supplied, returns a two-element list containing the line
+numbers of the beginning and end of the whole entry. (Actually, the
+"end" line number is currently inaccurate: it's really the the line
+number of the last field in the entry. But it's better than nothing.)
+
+=cut
+
+sub line
+{
+ my ($self, $field) = @_;
+
+ if (defined $field)
+ {
+ return $self->{'lines'}{$field};
+ }
+ else
+ {
+ return @{$self->{'lines'}}{'START','STOP'};
+ }
+}
+
+=item filename ()
+
+Returns the name of the file from which the entry was parsed. Only
+works if the file is represented by a C<Text::BibTeX::File> object---if
+you just passed a filename/filehandle pair to C<parse>, you can't get
+the filename back. (Sorry.)
+
+=cut
+
+sub filename
+{
+ my $self = shift;
+
+ $self->{'file'}{'filename'}; # ooh yuck -- poking into File object
+}
+
+1;
+
+=back
+
+=head1 SEE ALSO
+
+L<Text::BibTeX>, L<Text::BibTeX::File>, L<Text::BibTeX::Structure>
+
+=head1 AUTHOR
+
+Greg Ward <gward@python.net>
+
+=head1 COPYRIGHT
+
+Copyright (c) 1997-2000 by Gregory P. Ward. All rights reserved. This file
+is part of the Text::BibTeX library. This library is free software; you
+may redistribute it and/or modify it under the same terms as Perl itself.
+
+=cut