liblingua-en-words2nums-perl (0.18) unstable; urgency=low

* Remove the PM_FILTER to support Windows. (rt.cpan.org #38101) # imported from the archive
author: Joey Hess <joeyh@debian.org> 2010-02-05 13:42:32 -0800
committer: Joey Hess <joeyh@debian.org> 2010-02-05 13:42:32 -0800
commit: 474dd0dbd852f84e5b7507e01636df0c6e90c412 (patch)
tree: 293ccce7717ad8bbb62c932d58fcde03f749b851
15 files changed, 616 insertions, 0 deletions
diff --git a/MANIFEST b/MANIFEST
new file mode 100644
index 0000000..4a4c1ae
--- /dev/null
+++ b/MANIFEST
@@ -0,0 +1,12 @@
+MANIFEST
+Makefile.PL
+README
+TODO
+debian/changelog
+debian/control
+debian/copyright
+debian/rules
+samples
+t/samples.t
+testnum
+Words2Nums.pm
diff --git a/Makefile.PL b/Makefile.PL
new file mode 100755
index 0000000..97778b2
--- /dev/null
+++ b/Makefile.PL
@@ -0,0 +1,22 @@
+#!/usr/bin/perl
+use ExtUtils::MakeMaker;
+
+WriteMakefile(
+	NAME      => 'Lingua::EN::Words2Nums',
+	VERSION   => getversion(),
+	$] < 5.005 ? () : (
+		AUTHOR => 'Joey Hess <joey@kitenet.net>',
+		ABSTRACT_FROM => 'Words2Nums.pm',
+	)
+);
+
+# The version is pulled from the debian changelog, to avoid having to keep
+# two copies synced.
+sub getversion {
+	open(DCH, "debian/changelog") or die "debian/changelog: $!";
+	$_=<DCH>;
+	my ($vers) = m/\((.+)\)/;
+	die "no version" unless length $vers;
+	return $vers;
+}
+
diff --git a/README b/README
new file mode 100644
index 0000000..bcca2bb
--- /dev/null
+++ b/README
@@ -0,0 +1,11 @@
+Lingua::EN::Words2Nums parses English representations of numbers like
+"two thousand and one". Some samples of the range of inputs it can handle
+can be found in the file "samples", which is used by the regression test
+suite.
+
+Web site is http://kitenet.net/~joey/code/words2nums/
+
+Copyright 2001, 2003 Joey Hess <joey@kitenet.net>
+
+This module is free software; you can redistribute it and/or
+modify it under the same terms as Perl itself.
diff --git a/TODO b/TODO
new file mode 100644
index 0000000..973cd47
--- /dev/null
+++ b/TODO
@@ -0,0 +1,7 @@
+* spelled out numbers, ie, "nine one one" => 911,
+	(currently, it returns 9 + 1 + 1)
+	(getting this right is a pre-req for decimals..)
+* fractions and decimals
+* support strings that start with a number and end with non-numbers,
+  returning the remaining part. HARD -- numbers are currently parsed
+  backwards!
diff --git a/Words2Nums.pm b/Words2Nums.pm
new file mode 100644
index 0000000..1c7f13b
--- /dev/null
+++ b/Words2Nums.pm
@@ -0,0 +1,280 @@
+#!/usr/bin/perl
+
+=head1 NAME
+
+Lingua::EN::Words2Nums - convert English text to numbers
+
+=cut
+
+package Lingua::EN::Words2Nums;
+use warnings;
+use strict;
+require Exporter;
+our @ISA=qw(Exporter);
+our @EXPORT=qw(&words2nums);
+
+=head1 SYNOPSIS
+
+ use Lingua::EN::Words2Nums;
+ $num=words2nums("two thousand and one");
+ $num=words2nums("twenty-second");
+ $num=words2nums("15 billion, 6 million, and ninteen");
+
+=head1 DESCRIPTION
+
+This module converts English text into numbers. It supports both ordinal and
+cardinal numbers, negative numbers, and very large numbers.
+
+The main subroutine, which is exported by default, is words2nums(). This
+subroutine, when fed a string, will attempt to convert it into a number.
+If it succeeds, the number will be returned. If it fails, it returns undef.
+
+=head1 VARIABLES
+
+There are a number of variables that can be used to tweak the behavior of this
+module. For example, debugging can be be enabled by setting
+$Lingua::EN::Words2Nums::debug=1
+
+=over 4
+
+=cut
+
+# Public global variables.
+our $debug = 0;
+our $billion = 10 ** 9;
+
+=item $Lingua::EN::Words2Nums::debug
+
+Default: 0. If set to a true value, outputs on standard error some useful
+messages if parsing fails for some reason.
+
+=item $Lingua::EN::Words2Nums::billion
+
+Default: 10 ** 9. This is the number that will be returned for "one billion".
+It defaults to the American version; the English will want to set it to
+10 ** 12. Setting this number automatically changes all the larger numbers
+(trillion, quadrillion, etc) to match.
+
+=back
+
+=head1 NOTES
+
+It does not understand decimals or fractions, yet.
+
+Scores are supported, eg: "four score and ten". So are dozens. So is a baker's
+dozen. And a gross.
+
+Various mispellings of numbers are understood.
+
+While it handles googol correctly, googolplex is too large to fit in perl's
+standard scalar type, and "inf" will be returned.
+
+=cut
+ 
+our %nametosub = (
+	naught =>	[ \&num, 0 ],   # Cardinal numbers, leaving out the a
+	nought =>	[ \&num, 0 ],
+	zero =>		[ \&num, 0 ],	# ones that just add "th".
+	one =>		[ \&num, 1 ],	first =>	[ \&num, 1 ],
+	two =>		[ \&num, 2 ],	second =>	[ \&num, 2 ],
+	three =>	[ \&num, 3 ],	third =>	[ \&num, 3 ],
+	four =>		[ \&num, 4 ],	fourth =>	[ \&num, 4 ],
+	five =>		[ \&num, 5 ],	fifth =>	[ \&num, 5 ],
+	six =>		[ \&num, 6 ],
+	seven =>	[ \&num, 7 ],	seven =>	[ \&num, 7 ],
+	eight =>	[ \&num, 8 ],   eighth =>	[ \&num, 8 ],
+	nine =>		[ \&num, 9 ],	ninth =>	[ \&num, 9 ],
+	ten =>		[ \&num, 10 ],
+	eleven =>	[ \&num, 11 ],
+	twelve =>	[ \&num, 12 ],	twelfth =>	[ \&num, 12 ],
+	thirteen =>	[ \&num, 13 ],
+	fifteen =>	[ \&num, 15 ],
+	eighteen =>	[ \&num, 18 ],
+	ninteen =>	[ \&num, 19 ], # common(?) mispelling
+	teen =>		[ \&suffix, 10 ], # takes care of the regular teens
+	twenty =>	[ \&num, 20 ],	twentieth =>	[ \&num, 20 ],
+	thirty =>	[ \&num, 30 ],  thirtieth =>	[ \&num, 30 ],
+	forty =>	[ \&num, 40 ],	fortieth =>	[ \&num, 40 ],
+	fourty =>	[ \&num, 40 ],  fourtieth =>    [ \&num, 40 ], # at least I mispell it like this
+	fifty =>	[ \&num, 50 ],	fiftieth =>	[ \&num, 50 ],
+	sixty =>	[ \&num, 60 ],	sixtieth =>	[ \&num, 60 ],
+	seventy =>	[ \&num, 70 ],	seventieth =>	[ \&num, 70 ],
+	eighty =>	[ \&num, 80 ],	eightieth =>	[ \&num, 80 ],
+	ninety =>	[ \&num, 90 ],	ninetieth =>	[ \&num, 90 ],
+	ninty =>	[ \&num, 90 ], # common mispelling
+	hundred =>	[ \&prefix, 100 ],
+	thousand => 	[ \&prefix, 1000 ],
+	million =>	[ \&prefix, 10 ** 6 ],
+	milion =>	[ \&prefix, 10 ** 6 ], # common(?) mispelling
+	milliard =>	[ \&prefix, 10 ** 9 ],
+	billion => 	[ \&powprefix, 2 ], # These vary depending on country.
+	billiard =>	[ \&prefix, 10 ** 15 ],
+	trillion =>	[ \&powprefix, 3 ],
+	trilliard =>	[ \&prefix, 10 ** 21 ],
+	quadrillion =>	[ \&powprefix, 4 ],
+	quadrilliard =>	[ \&prefix, 10 ** 27 ],
+	quintillion =>	[ \&powprefix, 5 ],
+	quintilliard =>	[ \&prefix, 10 ** 33 ],
+	sextillion =>	[ \&powprefix, 6 ],
+	sextilliard =>	[ \&prefix, 10 ** 39 ],
+	septillion =>	[ \&powprefix, 7 ],
+	septilliard =>	[ \&prefix, 10 ** 45 ],
+	octillion =>	[ \&powprefix, 8 ],
+	octilliard =>	[ \&prefix, 10 ** 51 ],
+	nonillion =>	[ \&powprefix, 9 ],
+	nonilliard =>	[ \&prefix, 10 ** 57 ],
+	decillion =>	[ \&powprefix, 10 ],
+	decilliard =>	[ \&prefix, 10 ** 63 ],
+	undecillion =>	[ \&powprefix, 11 ],
+	undecilliard =>	[ \&prefix, 10 ** 69 ],
+	duodecillion => [ \&powprefix, 12 ],
+	duodecilliard =>	[ \&prefix, 10 ** 75 ],
+	tredecillion =>		[ \&powprefix, 13 ],
+	tredecilliard =>	[ \&prefix, 10 ** 81 ],
+	quattuordecillion =>	[ \&powprefix, 14 ],
+	quattuordecilliard =>	[ \&prefix, 10 ** 87 ],
+	quindecillion =>	[ \&powprefix, 15 ],
+	quindecilliard =>	[ \&prefix, 10 ** 93 ],
+	sexdecillion =>		[ \&powprefix, 16 ],
+	septendecillion => 	[ \&powprefix, 17 ],
+	octodecillion =>	[ \&powprefix, 18 ],
+	novemdecillion =>	[ \&powprefix, 19 ],
+	vigintillion =>		[ \&powprefix, 20 ],
+	unvigintillion => 	[ \&powprefix, 21 ],
+	duovigintillion => 	[ \&powprefix, 22 ],
+	duvigintillion => 	[ \&powprefix, 22 ], # some use this spelling
+	trevigintillion => 	[ \&powprefix, 23 ],
+	quattuorvigintillion =>	[ \&powprefix, 24 ],
+	quinvigintillion =>	[ \&powprefix, 25 ],
+	sexvigintillion =>	[ \&powprefix, 26 ],
+	septenvigintillion =>	[ \&powprefix, 27 ],
+	octovigintillion =>	[ \&powprefix, 28 ],
+	novemvigintillion =>	[ \&powprefix, 29 ],
+	trigintillion =>	[ \&powprefix, 30 ],
+	# This process can be continued indefinitely, but one has to stop
+	# somewhere. -- A Dictionary of Units of Measurement
+	centillion => 	[ \&powprefix, 100 ],
+	googol =>	[ \&googol ],
+	googolplex =>	[ \&googolplex ],
+	negative => 	[ \&invert ],
+	minus =>	[ \&invert ],
+	score =>	[ \&prefix, 20 ],
+	gross => 	[ \&prefix, 12 * 12 ],
+	dozen =>        [ \&prefix, 12 ],
+	bakersdozen =>	[ \&prefix, 13 ],
+	bakerdozen =>	[ \&prefix, 13 ],
+	eleventyone =>	[ \&num, 111 ], # This nprogram written on the day
+	eleventyfirst =>[ \&num, 111 ], # FOTR released.
+	s => 		[ sub {} ], # ignore 's', at the end of a word, 
+	                            # easy pluralization of dozens, etc.
+	es =>		[ sub {} ], # same for 'es'; for googolplexes, etc.
+	th =>		[ sub {} ], # ignore 'th', for cardinal nums
+);
+
+# Note the ordering, so that eg, ninety has a chance to match before nine.
+my $numregexp = join("|", reverse sort keys %nametosub);
+$numregexp=qr/($numregexp)/;
+
+my ($total, $mult, $oldpre, $newmult, $suffix, $val);
+
+sub num ($) {
+	$val = shift;
+	if ($suffix) {
+		$val += $suffix;
+		$suffix = 0;
+	}
+	$total += $val * $mult;
+	$newmult = 0;
+}
+
+sub prefix ($) {
+	my $pre = shift;
+	if ($pre > $oldpre) { # end of a prefix chain
+		$total += $mult if $newmult; # special case for lone "thousand", etc.
+		$mult = 1;
+	}
+	$mult *= $pre;
+	$oldpre = $pre;
+	$newmult = 1;
+}
+
+sub powprefix {
+	my $power = shift;
+	if ($billion == 10 ** 9) { # EN
+		prefix(10 ** (($power + 1) * 3));
+	}
+	elsif ($billion == 10 ** 12) { # GB
+		prefix(10 ** ($power * 6));
+	}
+	else {
+		failure("\$billion is set to odd value: $billion");
+	}
+}
+
+
+sub suffix ($) {
+	$suffix = shift;
+}
+
+sub invert () {
+	$total *= -1;
+}
+
+sub googol () {
+	prefix(10 ** 100);
+}
+
+sub googolplex () {
+	prefix(10 ** (10 ** 100));
+}
+
+sub failure ($) {
+	print STDERR shift()."\n" if $debug;
+	return; # undef on failure
+}
+
+sub words2nums ($) {
+	local $_=lc(shift);
+	chomp $_;
+
+	s/,//; # ignore comma, even if it's in a plain number
+	return $_ if /^[-+]?[.0-9\s]+$/; # short circuit for plain number
+
+	if (/^[-+0-9.]+$/) {
+		return failure("+ or - not at beginning") if length $_;
+	}
+	
+	s/\b(and|a|of)\b//g; # ignore some common words
+	s/[^A-Za-z0-9.]//g; # ignore spaces and punctuation, except period.
+	return failure("not a number") unless length $_;
+
+	$total=$oldpre=$suffix=$newmult=0;
+	$mult=1;
+	
+	# Work backwards up the string.
+	while (length $_) {
+		$nametosub{$1}[0]->($nametosub{$1}[1]) while s/$numregexp$//;
+		if (length $_) {
+			if (s/(\d+)(?:st|nd|rd|th)?$//) {
+				num($1);
+			}
+			else {
+				last;
+			}
+		}
+	}
+	return failure("error at $_") if length $_;
+	$total += $mult if $newmult; # special case for lone "thousand", etc.
+	return $total;
+}
+
+=head1 AUTHOR
+
+Copyright 2001-2003 Joey Hess <joey@kitenet.net>
+
+This module is free software; you can redistribute it and/or
+modify it under the same terms as Perl itself.
+
+=cut
+
+1
diff --git a/debian/changelog b/debian/changelog
new file mode 100644
index 0000000..44ca0b5
--- /dev/null
+++ b/debian/changelog
@@ -0,0 +1,128 @@
+liblingua-en-words2nums-perl (0.18) unstable; urgency=low
+
+  * Remove the PM_FILTER to support Windows. (rt.cpan.org #38101)
+
+ -- Joey Hess <joeyh@debian.org>  Fri, 05 Feb 2010 16:42:32 -0500
+
+liblingua-en-words2nums-perl (0.17) unstable; urgency=low
+
+  * Use debhelper v7; rules file minimisation.
+
+ -- Joey Hess <joeyh@debian.org>  Tue, 22 Jul 2008 00:29:12 -0400
+
+liblingua-en-words2nums-perl (0.16) unstable; urgency=low
+
+  * The repository has moved from svn to git.
+  * Minor improvement to debian/rules clean.
+
+ -- Joey Hess <joeyh@debian.org>  Fri, 19 Oct 2007 22:21:04 -0400
+
+liblingua-en-words2nums-perl (0.15) unstable; urgency=low
+
+  * Update url to the web site.
+  * Current standards-version (no real changes).
+  * Fix unicode error in man page.
+  * Fix lintian warning about rules file.
+
+ -- Joey Hess <joeyh@debian.org>  Mon, 04 Jun 2007 16:49:19 -0400
+
+liblingua-en-words2nums-perl (0.14) unstable; urgency=low
+
+  * Remove the tests that involve exponentents, as they may not on 64 bit
+    machines, and will cause false test failures. Closes: #250610
+
+ -- Joey Hess <joeyh@debian.org>  Fri,  4 Jun 2004 15:09:13 -0300
+
+liblingua-en-words2nums-perl (0.13) unstable; urgency=low
+
+  * Remove quoting in Makefile.PL so it will build under 5.8.1 (this breaks
+    building under earlier versions of perl though). Closes: #213928
+
+ -- Joey Hess <joeyh@debian.org>  Mon,  6 Oct 2003 19:49:19 -0400
+
+liblingua-en-words2nums-perl (0.12) unstable; urgency=low
+
+  * Move from build-depends-indep to build-depends to meet current policy.
+
+ -- Joey Hess <joeyh@debian.org>  Wed,  3 Sep 2003 12:14:45 -0400
+
+liblingua-en-words2nums-perl (0.11) unstable; urgency=low
+
+  * Do not pass through things of the form "10-11", since they're note really
+    numbers.
+  * Fix testnum to work with library in same directory.
+
+ -- Joey Hess <joeyh@debian.org>  Mon, 26 May 2003 15:48:37 -0400
+
+liblingua-en-words2nums-perl (0.10) unstable; urgency=low
+
+  * Add proper spelling of "forty", and alternate "nought" spelling.
+  * Make regression test work on win32, with exponents with leading zeroes.
+
+ -- Joey Hess <joeyh@debian.org>  Wed,  7 May 2003 01:34:11 -0400
+
+liblingua-en-words2nums-perl (0.09) unstable; urgency=low
+
+  * Corrected parsing of "fourth". Oops!
+  * Moved pm file out of deep directory in source tarball, which was
+    unnecessary for such a small package.
+  * Added AUTHOR and ABSTRACT_FROM to Makefile.PL.
+  * Accept douvigintillion, as well as dovigintillion; I don't know which is
+    right.
+  * Thanks to Erick Calder for his help.
+
+ -- Joey Hess <joeyh@debian.org>  Mon,  3 Feb 2003 12:16:02 -0500
+
+liblingua-en-words2nums-perl (0.08) unstable; urgency=low
+
+  * Localize $_. 
+
+ -- Joey Hess <joeyh@debian.org>  Fri, 18 Oct 2002 16:09:59 -0400
+
+liblingua-en-words2nums-perl (0.07) unstable; urgency=low
+
+  * Use debhelper v4. 
+
+ -- Joey Hess <joeyh@debian.org>  Sat,  1 Jun 2002 18:15:26 -0400
+
+liblingua-en-words2nums-perl (0.06) unstable; urgency=low
+
+  * Don't try to test for inf, since it seems "Infinity" is the string on some
+    platforms.
+
+ -- Joey Hess <joeyh@debian.org>  Sat,  1 Jun 2002 10:01:09 -0400
+
+liblingua-en-words2nums-perl (0.05) unstable; urgency=low
+
+  * Added big numbers between undecillion and trigintillion. Also
+    centillion, and billiard through quindecilliard.
+
+ -- Joey Hess <joeyh@debian.org>  Tue, 26 Feb 2002 23:09:29 -0500
+
+liblingua-en-words2nums-perl (0.04) unstable; urgency=low
+
+  * Corrected parsing of otherwise plain numbers that have commas in them
+    (123,456.789)
+
+ -- Joey Hess <joeyh@debian.org>  Sat, 12 Jan 2002 17:33:22 -0500
+
+liblingua-en-words2nums-perl (0.03) unstable; urgency=low
+
+  * If the entire string is ignorables ("and", "a", punctuation), don't
+    return 0, but undef.
+
+ -- Joey Hess <joeyh@debian.org>  Sat, 12 Jan 2002 14:17:16 -0500
+
+liblingua-en-words2nums-perl (0.02) unstable; urgency=low
+
+  * Added support for trillion through googolplex, and added localization
+    code for the different billions and other numbers.
+  * Lots of bugfixes, including getting the ordinals right (I hope).
+
+ -- Joey Hess <joeyh@debian.org>  Wed, 19 Dec 2001 23:08:03 -0500
+
+liblingua-en-words2nums-perl (0.01) unstable; urgency=low
+
+  * First release.
+
+ -- Joey Hess <joeyh@debian.org>  Wed, 19 Dec 2001 14:23:03 -0500
diff --git a/debian/compat b/debian/compat
new file mode 100644
index 0000000..7f8f011
--- /dev/null
+++ b/debian/compat
@@ -0,0 +1 @@
+7
diff --git a/debian/control b/debian/control
new file mode 100644
index 0000000..67adea7
--- /dev/null
+++ b/debian/control
@@ -0,0 +1,17 @@
+Source: liblingua-en-words2nums-perl
+Section: perl
+Priority: optional
+Build-Depends: debhelper (>= 7), perl5, dpkg-dev (>= 1.9.0)
+Maintainer: Joey Hess <joeyh@debian.org>
+Standards-Version: 3.8.4
+Vcs-Git: git://git.kitenet.net/words2nums
+Homepage: http://kitenet.net/~joey/code/words2nums/
+
+Package: liblingua-en-words2nums-perl
+Architecture: all
+Depends: ${perl:Depends}, ${misc:Depends}
+Description: convert English text to numbers
+ A perl module that can parse a wide variety of English text
+ and deduce the number it represents. For example, it can convert
+ "five million, one thousand and sixteen" to 5001016, and 
+ "twenty-seventh" to 27.
diff --git a/debian/copyright b/debian/copyright
new file mode 100644
index 0000000..b08d379
--- /dev/null
+++ b/debian/copyright
@@ -0,0 +1,10 @@
+Lingua::EN::Words2Nums is a Debian native package. 
+
+Copyright 2001-2003 Joey Hess <joey@kitenet.net>
+
+This module is free software; you can redistribute it and/or
+modify it under the same terms as Perl itself.
+
+That means it's dual licensed under the GPL
+(/usr/share/common-licenses/GPL) and Artistic
+(/usr/share/common-licenses/Artistic) licenses.
diff --git a/debian/docs b/debian/docs
new file mode 100644
index 0000000..2a6769e
--- /dev/null
+++ b/debian/docs
@@ -0,0 +1 @@
+README TODO
diff --git a/debian/examples b/debian/examples
new file mode 100644
index 0000000..81154dd
--- /dev/null
+++ b/debian/examples
@@ -0,0 +1 @@
+samples
diff --git a/debian/rules b/debian/rules
new file mode 100755
index 0000000..f6db6c6
--- /dev/null
+++ b/debian/rules
@@ -0,0 +1,7 @@
+#!/usr/bin/make -f
+%:
+	dh $@
+
+# Not intended for use by anyone except the author.
+announcedir:
+	@echo ${HOME}/src/joeywiki/code/words2nums/news
diff --git a/samples b/samples
new file mode 100644
index 0000000..5325934
--- /dev/null
+++ b/samples
@@ -0,0 +1,78 @@
+# Sample conversions. The result is on the left, and an input is on the
+# right. This file is used for regression testing. Note that this file
+# assumes that a billion is 10^9, but you can configure it otherwise when
+# you use the module.
+2001		two thousand one
+3424		three thousand four hundred twenty four
+3424		3 thousand 4 hundred 24
+798681		seven hundred ninety eight thousand six hundred eighty-one
+798000		798 thousand
+306172		three hundred six thousand, one hundred seventy two
+306172		3 hundred and six thousand, one hundred and seventy-2
+42524		fourty-two thousand five hundred twenty-four
+0		zero
+1		one
+1		first
+2		second
+3		three
+3		third
+9		ninth
+59		fifty-ninth
+1000		thousand
+1000		one thousand
+16		sixteen
+1000524		1000,524
+999.3333333	999.3333333
+30303.30303	30303.30303
+65569565609	65569565609
+-1		-1
+-12211.1133	-12,211.1133
+153		one hundred fifty three
+88		eighty-eight
+42		fourtytwo
+1000000		millionth
+3424		thirty-four hundred twenty-four
+11059		eleven thousand and fifty-nine
+9622000		nine million, six hundred and twenty-two thousand
+5600000		fifty-six hundred thousand
+167		one hundred and sixty-seventh
+25300		two hundred and fifty three hundred
+65065065065	sixty-five thousand sixty-five million sixty-five thousand and sixty-five
+11011011011	eleven billion eleven million eleven thousand eleven
+90		four score and ten
+501000000	five hundred and one million
+12		dozen
+48		four dozen
+13		baker's dozen
+13		bakers dozen
+13		baker dozen
+39		three baker's dozens
+4000		four thousands
+1001		thousand one
+4603		four thousand six hundred and three
+4103		four thousand, hundred and three
+288		two gross
+288		two grosses
+1000000		a million
+1000000		million
+1000000000	billion
+1000000000000	trillion
+1		1st
+2		2nd
+3		3rd
+4		4th
+4023		4023rd
+# Perl's numbers don't go this high.
+# Also, the return for infinity varies with platforms, so this is not a
+# good test.
+#inf		googolplex
+#inf		seven googolplexes
+# Maybe when we get Bignum support..
+#10314424798490535546171949056 Ten octillion, three hundred fourteen septillion, four hundred twenty-four sextillion, seven hundred ninety-eight quintillion, four hundred ninety quadrillion, five hundred thirty-five trillion, five hundred forty-six billion, one hundred seventy-one million, nine hundred forty-nine thousand, and fifty six
+# Some things that should not parse to a number:
+undef		and
+undef		,
+undef		
+undef		now is the time for all good men to come to the aid of their country
+undef		gazillion
+undef		hexillion
diff --git a/t/samples.t b/t/samples.t
new file mode 100644
index 0000000..a049ec8
--- /dev/null
+++ b/t/samples.t
@@ -0,0 +1,27 @@
+#!/usr/bin/perl
+use strict;
+use Test;
+
+our @samples;
+BEGIN {
+	open(SAMPLES, "samples") || die "samples: $!";
+	@samples=grep { ! /^#/ } <SAMPLES>;
+	plan tests => (scalar @samples);
+}
+
+use Lingua::EN::Words2Nums;
+
+foreach (@samples) {
+	chomp $_;
+	my ($num, $text)=split(' ', $_, 2);
+	if ($num eq 'undef') {
+		ok(! defined words2nums($text));
+	}
+	else {
+		my $w2n = words2nums($text);
+		# On win32 platform, exponents semm to have leading zero.
+		# This makes it work either way.
+		$w2n =~ s/e+0(\d+)/e+$1/;
+		ok($w2n, $num);
+	}
+}
diff --git a/testnum b/testnum
new file mode 100755
index 0000000..9374a26
--- /dev/null
+++ b/testnum
@@ -0,0 +1,14 @@
+#!/usr/bin/perl
+use blib; # work on uninstalled package
+use Words2Nums;
+import Lingua::EN::Words2Nums;
+
+$Lingua::EN::Words2Nums::debug=1;
+if (@ARGV) {
+	print "$ARGV[0] => ".words2nums(shift)."\n";
+}
+else {
+	while (<>) {
+		print "$_ => ".words2nums($_)."\n";
+	}
+}
author	Joey Hess <joeyh@debian.org>	2010-02-05 13:42:32 -0800
committer	Joey Hess <joeyh@debian.org>	2010-02-05 13:42:32 -0800
commit	474dd0dbd852f84e5b7507e01636df0c6e90c412 (patch)
tree	293ccce7717ad8bbb62c932d58fcde03f749b851