summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJoey Hess <joeyh@debian.org>2010-02-05 13:42:32 -0800
committerJoey Hess <joeyh@debian.org>2010-02-05 13:42:32 -0800
commit474dd0dbd852f84e5b7507e01636df0c6e90c412 (patch)
tree293ccce7717ad8bbb62c932d58fcde03f749b851
liblingua-en-words2nums-perl (0.18) unstable; urgency=low
* Remove the PM_FILTER to support Windows. (rt.cpan.org #38101) # imported from the archive
-rw-r--r--MANIFEST12
-rwxr-xr-xMakefile.PL22
-rw-r--r--README11
-rw-r--r--TODO7
-rw-r--r--Words2Nums.pm280
-rw-r--r--debian/changelog128
-rw-r--r--debian/compat1
-rw-r--r--debian/control17
-rw-r--r--debian/copyright10
-rw-r--r--debian/docs1
-rw-r--r--debian/examples1
-rwxr-xr-xdebian/rules7
-rw-r--r--samples78
-rw-r--r--t/samples.t27
-rwxr-xr-xtestnum14
15 files changed, 616 insertions, 0 deletions
diff --git a/MANIFEST b/MANIFEST
new file mode 100644
index 0000000..4a4c1ae
--- /dev/null
+++ b/MANIFEST
@@ -0,0 +1,12 @@
+MANIFEST
+Makefile.PL
+README
+TODO
+debian/changelog
+debian/control
+debian/copyright
+debian/rules
+samples
+t/samples.t
+testnum
+Words2Nums.pm
diff --git a/Makefile.PL b/Makefile.PL
new file mode 100755
index 0000000..97778b2
--- /dev/null
+++ b/Makefile.PL
@@ -0,0 +1,22 @@
+#!/usr/bin/perl
+use ExtUtils::MakeMaker;
+
+WriteMakefile(
+ NAME => 'Lingua::EN::Words2Nums',
+ VERSION => getversion(),
+ $] < 5.005 ? () : (
+ AUTHOR => 'Joey Hess <joey@kitenet.net>',
+ ABSTRACT_FROM => 'Words2Nums.pm',
+ )
+);
+
+# The version is pulled from the debian changelog, to avoid having to keep
+# two copies synced.
+sub getversion {
+ open(DCH, "debian/changelog") or die "debian/changelog: $!";
+ $_=<DCH>;
+ my ($vers) = m/\((.+)\)/;
+ die "no version" unless length $vers;
+ return $vers;
+}
+
diff --git a/README b/README
new file mode 100644
index 0000000..bcca2bb
--- /dev/null
+++ b/README
@@ -0,0 +1,11 @@
+Lingua::EN::Words2Nums parses English representations of numbers like
+"two thousand and one". Some samples of the range of inputs it can handle
+can be found in the file "samples", which is used by the regression test
+suite.
+
+Web site is http://kitenet.net/~joey/code/words2nums/
+
+Copyright 2001, 2003 Joey Hess <joey@kitenet.net>
+
+This module is free software; you can redistribute it and/or
+modify it under the same terms as Perl itself.
diff --git a/TODO b/TODO
new file mode 100644
index 0000000..973cd47
--- /dev/null
+++ b/TODO
@@ -0,0 +1,7 @@
+* spelled out numbers, ie, "nine one one" => 911,
+ (currently, it returns 9 + 1 + 1)
+ (getting this right is a pre-req for decimals..)
+* fractions and decimals
+* support strings that start with a number and end with non-numbers,
+ returning the remaining part. HARD -- numbers are currently parsed
+ backwards!
diff --git a/Words2Nums.pm b/Words2Nums.pm
new file mode 100644
index 0000000..1c7f13b
--- /dev/null
+++ b/Words2Nums.pm
@@ -0,0 +1,280 @@
+#!/usr/bin/perl
+
+=head1 NAME
+
+Lingua::EN::Words2Nums - convert English text to numbers
+
+=cut
+
+package Lingua::EN::Words2Nums;
+use warnings;
+use strict;
+require Exporter;
+our @ISA=qw(Exporter);
+our @EXPORT=qw(&words2nums);
+
+=head1 SYNOPSIS
+
+ use Lingua::EN::Words2Nums;
+ $num=words2nums("two thousand and one");
+ $num=words2nums("twenty-second");
+ $num=words2nums("15 billion, 6 million, and ninteen");
+
+=head1 DESCRIPTION
+
+This module converts English text into numbers. It supports both ordinal and
+cardinal numbers, negative numbers, and very large numbers.
+
+The main subroutine, which is exported by default, is words2nums(). This
+subroutine, when fed a string, will attempt to convert it into a number.
+If it succeeds, the number will be returned. If it fails, it returns undef.
+
+=head1 VARIABLES
+
+There are a number of variables that can be used to tweak the behavior of this
+module. For example, debugging can be be enabled by setting
+$Lingua::EN::Words2Nums::debug=1
+
+=over 4
+
+=cut
+
+# Public global variables.
+our $debug = 0;
+our $billion = 10 ** 9;
+
+=item $Lingua::EN::Words2Nums::debug
+
+Default: 0. If set to a true value, outputs on standard error some useful
+messages if parsing fails for some reason.
+
+=item $Lingua::EN::Words2Nums::billion
+
+Default: 10 ** 9. This is the number that will be returned for "one billion".
+It defaults to the American version; the English will want to set it to
+10 ** 12. Setting this number automatically changes all the larger numbers
+(trillion, quadrillion, etc) to match.
+
+=back
+
+=head1 NOTES
+
+It does not understand decimals or fractions, yet.
+
+Scores are supported, eg: "four score and ten". So are dozens. So is a baker's
+dozen. And a gross.
+
+Various mispellings of numbers are understood.
+
+While it handles googol correctly, googolplex is too large to fit in perl's
+standard scalar type, and "inf" will be returned.
+
+=cut
+
+our %nametosub = (
+ naught => [ \&num, 0 ], # Cardinal numbers, leaving out the a
+ nought => [ \&num, 0 ],
+ zero => [ \&num, 0 ], # ones that just add "th".
+ one => [ \&num, 1 ], first => [ \&num, 1 ],
+ two => [ \&num, 2 ], second => [ \&num, 2 ],
+ three => [ \&num, 3 ], third => [ \&num, 3 ],
+ four => [ \&num, 4 ], fourth => [ \&num, 4 ],
+ five => [ \&num, 5 ], fifth => [ \&num, 5 ],
+ six => [ \&num, 6 ],
+ seven => [ \&num, 7 ], seven => [ \&num, 7 ],
+ eight => [ \&num, 8 ], eighth => [ \&num, 8 ],
+ nine => [ \&num, 9 ], ninth => [ \&num, 9 ],
+ ten => [ \&num, 10 ],
+ eleven => [ \&num, 11 ],
+ twelve => [ \&num, 12 ], twelfth => [ \&num, 12 ],
+ thirteen => [ \&num, 13 ],
+ fifteen => [ \&num, 15 ],
+ eighteen => [ \&num, 18 ],
+ ninteen => [ \&num, 19 ], # common(?) mispelling
+ teen => [ \&suffix, 10 ], # takes care of the regular teens
+ twenty => [ \&num, 20 ], twentieth => [ \&num, 20 ],
+ thirty => [ \&num, 30 ], thirtieth => [ \&num, 30 ],
+ forty => [ \&num, 40 ], fortieth => [ \&num, 40 ],
+ fourty => [ \&num, 40 ], fourtieth => [ \&num, 40 ], # at least I mispell it like this
+ fifty => [ \&num, 50 ], fiftieth => [ \&num, 50 ],
+ sixty => [ \&num, 60 ], sixtieth => [ \&num, 60 ],
+ seventy => [ \&num, 70 ], seventieth => [ \&num, 70 ],
+ eighty => [ \&num, 80 ], eightieth => [ \&num, 80 ],
+ ninety => [ \&num, 90 ], ninetieth => [ \&num, 90 ],
+ ninty => [ \&num, 90 ], # common mispelling
+ hundred => [ \&prefix, 100 ],
+ thousand => [ \&prefix, 1000 ],
+ million => [ \&prefix, 10 ** 6 ],
+ milion => [ \&prefix, 10 ** 6 ], # common(?) mispelling
+ milliard => [ \&prefix, 10 ** 9 ],
+ billion => [ \&powprefix, 2 ], # These vary depending on country.
+ billiard => [ \&prefix, 10 ** 15 ],
+ trillion => [ \&powprefix, 3 ],
+ trilliard => [ \&prefix, 10 ** 21 ],
+ quadrillion => [ \&powprefix, 4 ],
+ quadrilliard => [ \&prefix, 10 ** 27 ],
+ quintillion => [ \&powprefix, 5 ],
+ quintilliard => [ \&prefix, 10 ** 33 ],
+ sextillion => [ \&powprefix, 6 ],
+ sextilliard => [ \&prefix, 10 ** 39 ],
+ septillion => [ \&powprefix, 7 ],
+ septilliard => [ \&prefix, 10 ** 45 ],
+ octillion => [ \&powprefix, 8 ],
+ octilliard => [ \&prefix, 10 ** 51 ],
+ nonillion => [ \&powprefix, 9 ],
+ nonilliard => [ \&prefix, 10 ** 57 ],
+ decillion => [ \&powprefix, 10 ],
+ decilliard => [ \&prefix, 10 ** 63 ],
+ undecillion => [ \&powprefix, 11 ],
+ undecilliard => [ \&prefix, 10 ** 69 ],
+ duodecillion => [ \&powprefix, 12 ],
+ duodecilliard => [ \&prefix, 10 ** 75 ],
+ tredecillion => [ \&powprefix, 13 ],
+ tredecilliard => [ \&prefix, 10 ** 81 ],
+ quattuordecillion => [ \&powprefix, 14 ],
+ quattuordecilliard => [ \&prefix, 10 ** 87 ],
+ quindecillion => [ \&powprefix, 15 ],
+ quindecilliard => [ \&prefix, 10 ** 93 ],
+ sexdecillion => [ \&powprefix, 16 ],
+ septendecillion => [ \&powprefix, 17 ],
+ octodecillion => [ \&powprefix, 18 ],
+ novemdecillion => [ \&powprefix, 19 ],
+ vigintillion => [ \&powprefix, 20 ],
+ unvigintillion => [ \&powprefix, 21 ],
+ duovigintillion => [ \&powprefix, 22 ],
+ duvigintillion => [ \&powprefix, 22 ], # some use this spelling
+ trevigintillion => [ \&powprefix, 23 ],
+ quattuorvigintillion => [ \&powprefix, 24 ],
+ quinvigintillion => [ \&powprefix, 25 ],
+ sexvigintillion => [ \&powprefix, 26 ],
+ septenvigintillion => [ \&powprefix, 27 ],
+ octovigintillion => [ \&powprefix, 28 ],
+ novemvigintillion => [ \&powprefix, 29 ],
+ trigintillion => [ \&powprefix, 30 ],
+ # This process can be continued indefinitely, but one has to stop
+ # somewhere. -- A Dictionary of Units of Measurement
+ centillion => [ \&powprefix, 100 ],
+ googol => [ \&googol ],
+ googolplex => [ \&googolplex ],
+ negative => [ \&invert ],
+ minus => [ \&invert ],
+ score => [ \&prefix, 20 ],
+ gross => [ \&prefix, 12 * 12 ],
+ dozen => [ \&prefix, 12 ],
+ bakersdozen => [ \&prefix, 13 ],
+ bakerdozen => [ \&prefix, 13 ],
+ eleventyone => [ \&num, 111 ], # This nprogram written on the day
+ eleventyfirst =>[ \&num, 111 ], # FOTR released.
+ s => [ sub {} ], # ignore 's', at the end of a word,
+ # easy pluralization of dozens, etc.
+ es => [ sub {} ], # same for 'es'; for googolplexes, etc.
+ th => [ sub {} ], # ignore 'th', for cardinal nums
+);
+
+# Note the ordering, so that eg, ninety has a chance to match before nine.
+my $numregexp = join("|", reverse sort keys %nametosub);
+$numregexp=qr/($numregexp)/;
+
+my ($total, $mult, $oldpre, $newmult, $suffix, $val);
+
+sub num ($) {
+ $val = shift;
+ if ($suffix) {
+ $val += $suffix;
+ $suffix = 0;
+ }
+ $total += $val * $mult;
+ $newmult = 0;
+}
+
+sub prefix ($) {
+ my $pre = shift;
+ if ($pre > $oldpre) { # end of a prefix chain
+ $total += $mult if $newmult; # special case for lone "thousand", etc.
+ $mult = 1;
+ }
+ $mult *= $pre;
+ $oldpre = $pre;
+ $newmult = 1;
+}
+
+sub powprefix {
+ my $power = shift;
+ if ($billion == 10 ** 9) { # EN
+ prefix(10 ** (($power + 1) * 3));
+ }
+ elsif ($billion == 10 ** 12) { # GB
+ prefix(10 ** ($power * 6));
+ }
+ else {
+ failure("\$billion is set to odd value: $billion");
+ }
+}
+
+
+sub suffix ($) {
+ $suffix = shift;
+}
+
+sub invert () {
+ $total *= -1;
+}
+
+sub googol () {
+ prefix(10 ** 100);
+}
+
+sub googolplex () {
+ prefix(10 ** (10 ** 100));
+}
+
+sub failure ($) {
+ print STDERR shift()."\n" if $debug;
+ return; # undef on failure
+}
+
+sub words2nums ($) {
+ local $_=lc(shift);
+ chomp $_;
+
+ s/,//; # ignore comma, even if it's in a plain number
+ return $_ if /^[-+]?[.0-9\s]+$/; # short circuit for plain number
+
+ if (/^[-+0-9.]+$/) {
+ return failure("+ or - not at beginning") if length $_;
+ }
+
+ s/\b(and|a|of)\b//g; # ignore some common words
+ s/[^A-Za-z0-9.]//g; # ignore spaces and punctuation, except period.
+ return failure("not a number") unless length $_;
+
+ $total=$oldpre=$suffix=$newmult=0;
+ $mult=1;
+
+ # Work backwards up the string.
+ while (length $_) {
+ $nametosub{$1}[0]->($nametosub{$1}[1]) while s/$numregexp$//;
+ if (length $_) {
+ if (s/(\d+)(?:st|nd|rd|th)?$//) {
+ num($1);
+ }
+ else {
+ last;
+ }
+ }
+ }
+ return failure("error at $_") if length $_;
+ $total += $mult if $newmult; # special case for lone "thousand", etc.
+ return $total;
+}
+
+=head1 AUTHOR
+
+Copyright 2001-2003 Joey Hess <joey@kitenet.net>
+
+This module is free software; you can redistribute it and/or
+modify it under the same terms as Perl itself.
+
+=cut
+
+1
diff --git a/debian/changelog b/debian/changelog
new file mode 100644
index 0000000..44ca0b5
--- /dev/null
+++ b/debian/changelog
@@ -0,0 +1,128 @@
+liblingua-en-words2nums-perl (0.18) unstable; urgency=low
+
+ * Remove the PM_FILTER to support Windows. (rt.cpan.org #38101)
+
+ -- Joey Hess <joeyh@debian.org> Fri, 05 Feb 2010 16:42:32 -0500
+
+liblingua-en-words2nums-perl (0.17) unstable; urgency=low
+
+ * Use debhelper v7; rules file minimisation.
+
+ -- Joey Hess <joeyh@debian.org> Tue, 22 Jul 2008 00:29:12 -0400
+
+liblingua-en-words2nums-perl (0.16) unstable; urgency=low
+
+ * The repository has moved from svn to git.
+ * Minor improvement to debian/rules clean.
+
+ -- Joey Hess <joeyh@debian.org> Fri, 19 Oct 2007 22:21:04 -0400
+
+liblingua-en-words2nums-perl (0.15) unstable; urgency=low
+
+ * Update url to the web site.
+ * Current standards-version (no real changes).
+ * Fix unicode error in man page.
+ * Fix lintian warning about rules file.
+
+ -- Joey Hess <joeyh@debian.org> Mon, 04 Jun 2007 16:49:19 -0400
+
+liblingua-en-words2nums-perl (0.14) unstable; urgency=low
+
+ * Remove the tests that involve exponentents, as they may not on 64 bit
+ machines, and will cause false test failures. Closes: #250610
+
+ -- Joey Hess <joeyh@debian.org> Fri, 4 Jun 2004 15:09:13 -0300
+
+liblingua-en-words2nums-perl (0.13) unstable; urgency=low
+
+ * Remove quoting in Makefile.PL so it will build under 5.8.1 (this breaks
+ building under earlier versions of perl though). Closes: #213928
+
+ -- Joey Hess <joeyh@debian.org> Mon, 6 Oct 2003 19:49:19 -0400
+
+liblingua-en-words2nums-perl (0.12) unstable; urgency=low
+
+ * Move from build-depends-indep to build-depends to meet current policy.
+
+ -- Joey Hess <joeyh@debian.org> Wed, 3 Sep 2003 12:14:45 -0400
+
+liblingua-en-words2nums-perl (0.11) unstable; urgency=low
+
+ * Do not pass through things of the form "10-11", since they're note really
+ numbers.
+ * Fix testnum to work with library in same directory.
+
+ -- Joey Hess <joeyh@debian.org> Mon, 26 May 2003 15:48:37 -0400
+
+liblingua-en-words2nums-perl (0.10) unstable; urgency=low
+
+ * Add proper spelling of "forty", and alternate "nought" spelling.
+ * Make regression test work on win32, with exponents with leading zeroes.
+
+ -- Joey Hess <joeyh@debian.org> Wed, 7 May 2003 01:34:11 -0400
+
+liblingua-en-words2nums-perl (0.09) unstable; urgency=low
+
+ * Corrected parsing of "fourth". Oops!
+ * Moved pm file out of deep directory in source tarball, which was
+ unnecessary for such a small package.
+ * Added AUTHOR and ABSTRACT_FROM to Makefile.PL.
+ * Accept douvigintillion, as well as dovigintillion; I don't know which is
+ right.
+ * Thanks to Erick Calder for his help.
+
+ -- Joey Hess <joeyh@debian.org> Mon, 3 Feb 2003 12:16:02 -0500
+
+liblingua-en-words2nums-perl (0.08) unstable; urgency=low
+
+ * Localize $_.
+
+ -- Joey Hess <joeyh@debian.org> Fri, 18 Oct 2002 16:09:59 -0400
+
+liblingua-en-words2nums-perl (0.07) unstable; urgency=low
+
+ * Use debhelper v4.
+
+ -- Joey Hess <joeyh@debian.org> Sat, 1 Jun 2002 18:15:26 -0400
+
+liblingua-en-words2nums-perl (0.06) unstable; urgency=low
+
+ * Don't try to test for inf, since it seems "Infinity" is the string on some
+ platforms.
+
+ -- Joey Hess <joeyh@debian.org> Sat, 1 Jun 2002 10:01:09 -0400
+
+liblingua-en-words2nums-perl (0.05) unstable; urgency=low
+
+ * Added big numbers between undecillion and trigintillion. Also
+ centillion, and billiard through quindecilliard.
+
+ -- Joey Hess <joeyh@debian.org> Tue, 26 Feb 2002 23:09:29 -0500
+
+liblingua-en-words2nums-perl (0.04) unstable; urgency=low
+
+ * Corrected parsing of otherwise plain numbers that have commas in them
+ (123,456.789)
+
+ -- Joey Hess <joeyh@debian.org> Sat, 12 Jan 2002 17:33:22 -0500
+
+liblingua-en-words2nums-perl (0.03) unstable; urgency=low
+
+ * If the entire string is ignorables ("and", "a", punctuation), don't
+ return 0, but undef.
+
+ -- Joey Hess <joeyh@debian.org> Sat, 12 Jan 2002 14:17:16 -0500
+
+liblingua-en-words2nums-perl (0.02) unstable; urgency=low
+
+ * Added support for trillion through googolplex, and added localization
+ code for the different billions and other numbers.
+ * Lots of bugfixes, including getting the ordinals right (I hope).
+
+ -- Joey Hess <joeyh@debian.org> Wed, 19 Dec 2001 23:08:03 -0500
+
+liblingua-en-words2nums-perl (0.01) unstable; urgency=low
+
+ * First release.
+
+ -- Joey Hess <joeyh@debian.org> Wed, 19 Dec 2001 14:23:03 -0500
diff --git a/debian/compat b/debian/compat
new file mode 100644
index 0000000..7f8f011
--- /dev/null
+++ b/debian/compat
@@ -0,0 +1 @@
+7
diff --git a/debian/control b/debian/control
new file mode 100644
index 0000000..67adea7
--- /dev/null
+++ b/debian/control
@@ -0,0 +1,17 @@
+Source: liblingua-en-words2nums-perl
+Section: perl
+Priority: optional
+Build-Depends: debhelper (>= 7), perl5, dpkg-dev (>= 1.9.0)
+Maintainer: Joey Hess <joeyh@debian.org>
+Standards-Version: 3.8.4
+Vcs-Git: git://git.kitenet.net/words2nums
+Homepage: http://kitenet.net/~joey/code/words2nums/
+
+Package: liblingua-en-words2nums-perl
+Architecture: all
+Depends: ${perl:Depends}, ${misc:Depends}
+Description: convert English text to numbers
+ A perl module that can parse a wide variety of English text
+ and deduce the number it represents. For example, it can convert
+ "five million, one thousand and sixteen" to 5001016, and
+ "twenty-seventh" to 27.
diff --git a/debian/copyright b/debian/copyright
new file mode 100644
index 0000000..b08d379
--- /dev/null
+++ b/debian/copyright
@@ -0,0 +1,10 @@
+Lingua::EN::Words2Nums is a Debian native package.
+
+Copyright 2001-2003 Joey Hess <joey@kitenet.net>
+
+This module is free software; you can redistribute it and/or
+modify it under the same terms as Perl itself.
+
+That means it's dual licensed under the GPL
+(/usr/share/common-licenses/GPL) and Artistic
+(/usr/share/common-licenses/Artistic) licenses.
diff --git a/debian/docs b/debian/docs
new file mode 100644
index 0000000..2a6769e
--- /dev/null
+++ b/debian/docs
@@ -0,0 +1 @@
+README TODO
diff --git a/debian/examples b/debian/examples
new file mode 100644
index 0000000..81154dd
--- /dev/null
+++ b/debian/examples
@@ -0,0 +1 @@
+samples
diff --git a/debian/rules b/debian/rules
new file mode 100755
index 0000000..f6db6c6
--- /dev/null
+++ b/debian/rules
@@ -0,0 +1,7 @@
+#!/usr/bin/make -f
+%:
+ dh $@
+
+# Not intended for use by anyone except the author.
+announcedir:
+ @echo ${HOME}/src/joeywiki/code/words2nums/news
diff --git a/samples b/samples
new file mode 100644
index 0000000..5325934
--- /dev/null
+++ b/samples
@@ -0,0 +1,78 @@
+# Sample conversions. The result is on the left, and an input is on the
+# right. This file is used for regression testing. Note that this file
+# assumes that a billion is 10^9, but you can configure it otherwise when
+# you use the module.
+2001 two thousand one
+3424 three thousand four hundred twenty four
+3424 3 thousand 4 hundred 24
+798681 seven hundred ninety eight thousand six hundred eighty-one
+798000 798 thousand
+306172 three hundred six thousand, one hundred seventy two
+306172 3 hundred and six thousand, one hundred and seventy-2
+42524 fourty-two thousand five hundred twenty-four
+0 zero
+1 one
+1 first
+2 second
+3 three
+3 third
+9 ninth
+59 fifty-ninth
+1000 thousand
+1000 one thousand
+16 sixteen
+1000524 1000,524
+999.3333333 999.3333333
+30303.30303 30303.30303
+65569565609 65569565609
+-1 -1
+-12211.1133 -12,211.1133
+153 one hundred fifty three
+88 eighty-eight
+42 fourtytwo
+1000000 millionth
+3424 thirty-four hundred twenty-four
+11059 eleven thousand and fifty-nine
+9622000 nine million, six hundred and twenty-two thousand
+5600000 fifty-six hundred thousand
+167 one hundred and sixty-seventh
+25300 two hundred and fifty three hundred
+65065065065 sixty-five thousand sixty-five million sixty-five thousand and sixty-five
+11011011011 eleven billion eleven million eleven thousand eleven
+90 four score and ten
+501000000 five hundred and one million
+12 dozen
+48 four dozen
+13 baker's dozen
+13 bakers dozen
+13 baker dozen
+39 three baker's dozens
+4000 four thousands
+1001 thousand one
+4603 four thousand six hundred and three
+4103 four thousand, hundred and three
+288 two gross
+288 two grosses
+1000000 a million
+1000000 million
+1000000000 billion
+1000000000000 trillion
+1 1st
+2 2nd
+3 3rd
+4 4th
+4023 4023rd
+# Perl's numbers don't go this high.
+# Also, the return for infinity varies with platforms, so this is not a
+# good test.
+#inf googolplex
+#inf seven googolplexes
+# Maybe when we get Bignum support..
+#10314424798490535546171949056 Ten octillion, three hundred fourteen septillion, four hundred twenty-four sextillion, seven hundred ninety-eight quintillion, four hundred ninety quadrillion, five hundred thirty-five trillion, five hundred forty-six billion, one hundred seventy-one million, nine hundred forty-nine thousand, and fifty six
+# Some things that should not parse to a number:
+undef and
+undef ,
+undef
+undef now is the time for all good men to come to the aid of their country
+undef gazillion
+undef hexillion
diff --git a/t/samples.t b/t/samples.t
new file mode 100644
index 0000000..a049ec8
--- /dev/null
+++ b/t/samples.t
@@ -0,0 +1,27 @@
+#!/usr/bin/perl
+use strict;
+use Test;
+
+our @samples;
+BEGIN {
+ open(SAMPLES, "samples") || die "samples: $!";
+ @samples=grep { ! /^#/ } <SAMPLES>;
+ plan tests => (scalar @samples);
+}
+
+use Lingua::EN::Words2Nums;
+
+foreach (@samples) {
+ chomp $_;
+ my ($num, $text)=split(' ', $_, 2);
+ if ($num eq 'undef') {
+ ok(! defined words2nums($text));
+ }
+ else {
+ my $w2n = words2nums($text);
+ # On win32 platform, exponents semm to have leading zero.
+ # This makes it work either way.
+ $w2n =~ s/e+0(\d+)/e+$1/;
+ ok($w2n, $num);
+ }
+}
diff --git a/testnum b/testnum
new file mode 100755
index 0000000..9374a26
--- /dev/null
+++ b/testnum
@@ -0,0 +1,14 @@
+#!/usr/bin/perl
+use blib; # work on uninstalled package
+use Words2Nums;
+import Lingua::EN::Words2Nums;
+
+$Lingua::EN::Words2Nums::debug=1;
+if (@ARGV) {
+ print "$ARGV[0] => ".words2nums(shift)."\n";
+}
+else {
+ while (<>) {
+ print "$_ => ".words2nums($_)."\n";
+ }
+}