diff options
author | Florian Schlichting <fsfs@debian.org> | 2021-08-24 11:45:00 +0200 |
---|---|---|
committer | Florian Schlichting <fsfs@debian.org> | 2021-08-24 11:45:00 +0200 |
commit | bd2c34d5b9db136a29d7d8045ffdb3add4a7dcfb (patch) | |
tree | d937516fcd90fdd906eb55733fd3a2d24805ddc0 |
Import libwww-mechanize-perl_2.04.orig.tar.gz
[dgit import orig libwww-mechanize-perl_2.04.orig.tar.gz]
122 files changed, 17113 insertions, 0 deletions
diff --git a/CONTRIBUTORS b/CONTRIBUTORS new file mode 100644 index 0000000..ce3e417 --- /dev/null +++ b/CONTRIBUTORS @@ -0,0 +1,60 @@ + +# WWW-MECHANIZE CONTRIBUTORS # + +This is the (likely incomplete) list of people who have helped +make this distribution what it is, either via code contributions, +patches, bug reports, help with troubleshooting, etc. A huge +'thank you' to all of them. + + * Alexandr Ciornii + * Andrew Grangaard + * Andy Lester + * Bernhard Wagner + * Chase Whitener + * dakkar + * Dave Doyle + * David Precious + * David Steinbrunner + * Desmond Daignault + * Ed Avis + * Evan Zacks + * Ferenc Erki + * Flavio Poletti + * Florian Schlichting + * Gabor Szabo + * gjtunley@gmail.com + * gregor herrmann + * Grigor Karavardanyan + * James Raspass + * Jason May + * Jesse Vincent + * John Beppu + * Jozef Kutej + * Karen Etheridge + * Kirrily 'Skud' Robert + * Kivanc Yazan + * Lars Dɪᴇᴄᴋᴏᴡ 迪拉斯 + * Mark Stosberg + * Martin H. Sluka + * Matthew Chae + * Matt S Trout + * Max Maischein + * Mohammad S Anwar + * Neil Bowers + * Nik LaBelle + * Olaf Alders + * Philippe Bruhat (BooK) + * Ricardo Signes + * Schuyler Langdon + * Sergey Romanov + * Shoichi Kaji + * simbabque + * Steve Scaffidi + * Stuart Johnston + * sunnavy + * Varadinsky + * Ville Skyttä + * Zefram + * 積丹尼 Dan Jacobson + + @@ -0,0 +1,1640 @@ +Revision history for WWW::Mechanize + +2.04 2021-08-06 12:28:31Z + [ENHANCEMENTS] + - Add a head() method to allow relative URLs (GH#321) (Julien Fiegehenn) + + [DOCUMENTATION] + - Document that form_id() accepts an id, not a name (GH#319) (Olaf Alders) + +2.03 2020-11-10 14:47:04Z + [FIXED] + - Validate exclusive button selectors (GH#314) (Ferenc Erki) + +2.02 2020-10-13 13:50:28Z + [ENHANCEMENTS] + - Add redirects() as a short to HTTP::Response->redirects (GH#116) (Julien Fiegehenn) + - click_button( value => $foo) now also works for button tags and image buttons + (GH#131) (Spencer Christensen) and (Julien Fiegehenn) + + [DOCUMENTATION] + - Document that click_button() will die when it cannot find a button (GH#136) (Julien Fiegehenn) + - Document that content() returns undef before a request was made (GH#134) (Julien Fiegehenn) + + [TESTS] + - Add a test for dump_forms() with multiselect (GH#133) (Julien Fiegehenn) + - Add tests for select multiple (GH#132) (Michael G. Schwern) and (Julien Fiegehenn) + - Remove use of discouraged vars pragma (James Raspass) + - Add tests for dump_forms() and field() with empty attributes (GH#125) (Julien Fiegehenn) + +2.01 2020-09-18 17:51:10Z + - Add rel filter to find_link() (GH#305) (Julien Fiegehenn) + - Fix typos (GH#304) (Ferenc Erki) + +2.00 2020-06-09 19:09:53Z + - Require LWP::UserAgent 6.45 (GH#302) (Shoichi Kaji) + +1.99 2020-06-08 15:35:04Z + - Bump HTTP::Daemon test dependency to 6.12 (GH#300) (Olaf Alders) + +1.98 2020-05-25 17:06:47Z (TRIAL RELEASE) + - Don't make assumptions about port 80 in test (GH#299) (Olaf Alders) + +1.97 2020-05-14 00:46:53Z + - Respect CDATA[[ sections when parsing HTML (GH#298) (Max Maischein) + - extract image links also from css (GH#12) (Jozef Kutej) and (GH#297) + (Julien Fiegehenn) + - s/parm/param/ in documentation (GH#295) (Olaf Alders) + +1.96 2020-02-21 02:23:40Z + [FIXED] + - HTML::Form::find_input() has a 1-based index (GH#293) (Olaf Alders) + - Fix invocation of dump_forms in mech-dump (GH#288) (積丹尼 Dan Jacobson) + - make xt/author/eol.t pass (GH#291) (Shoichi Kaji) + [DOCUMENTATION] + - Fix documentation of use of undef in form_with() and all_forms_with() + (GH#289) (積丹尼 Dan Jacobson) + +1.95 2019-10-28 13:07:45Z + [FIXED] + - die if submit_form() called with invalid form_id (GH#287) (Olaf Alders) + +1.94 2019-10-10 13:12:28Z + [FIXED] + - Issue #182: Don't autocheck for mech-dump so basic auth works (GH#285) + (Julien Fiegehenn) + + [DOCUMENTATION] + - Fix pod error reported by CPANTS. (GH#284) (Mohammad S Anwar) + +1.93 2019-10-04 21:06:49Z + [FIXED] + - Allow images to not have a src attribute (GH#282) (Julien Fiegehenn) + + [DOCUMENTATION] + - Pod fixes. (GH#283) (Mohammad S Anwar) + +1.92 2019-08-24 01:00:35Z + [FIXED] + - Test requires HTTP::Daemon 6.05+ and uses 127.0.0.1 or [::1] according to + server's sockdomain (GH#280) (Shoichi Kaji) + - Install LWP::Protocol::https and fix xt/author/live/encoding.t (GH#277) + (Shoichi Kaji) + - Set dist trusty for old Perls on Travis (GH#279) (Shoichi Kaji) + - Fixed pod errors as reported by CPANTS. (GH#273) (Mohammad S Anwar) + + [DOCUMENTATION] + - Document that follow_link will die on failure with autocheck enabled (GH#271) (Olaf Alders) + + [TESTS] + - Add a test for finding a link in a meta refresh tag (GH#275) (Olaf Alders) + +1.91 2019-01-10 18:44:33Z + [ENHANCEMENTS] + - Don't install Perl::Critic and Perl::Tidy to run user tests (GH#268) + (Julien Fiegehenn) + - Remove redundant PodSyntaxTests (GH#265) (Andrew Grangaard) + - Add test dependency for Perl::Tidy (GH#263) (Julien Fiegehenn) + +1.90 2018-11-12 18:02:03Z + [DOCUMENTATION] + - Pod fixes (GH#261) (Julien Fiegehenn) + - Fixed pod error as reported by CPANTS. (GH#264) (Mohammad S Anwar) + + [ENHANCEMENTS] + - Upgrade to HTML::TreeBuilder version 5 to get support for weak references in + HTML::Element (GH#251) (Julien Fiegehenn) + +1.89 2018-10-18 19:13:34Z + [ENHANCEMENTS] + - Add support to find_image() and find_all_images() via 'id' + and 'class' (GH#242) (Julien Fiegehenn) + - Pass strict/verbose constructor args to HTML::Form (GH#256) (Julien Fiegehenn) + - Add ability to clear history and tests for history (GH#259) (mschae94) + +1.88 2018-03-23 15:37:25Z +======================================== + [FIXED] + - tick() now dies if checkbox is not found (GH#248) (Olaf Alders) + + [DOCUMENTATION] + - Clarify behaviour of submit_form when with_fields is supplied as an arg (GH#247) (Olaf Alders) + - Document some "Best Practices" (GH#246) (Olaf Alders) + - Update links in Pod. Suggest LWP::ConsoleLogger rather than LWP::Debug (GH#244) (Olaf Alders) + +1.87 2018-02-07 22:04:16Z +======================================== + [FIXED] + - Fix typo in contributor name (GH#241) (Philippe Bruhat (BooK)) + - Fix link to Michael Schilli's article in Linux magazine (GH#240) (Bernhard Wagner) + - Fix some section links (GH#238) (Evan Zacks) + - Override _agent() method. (GH#236) (Сергей Романов) + - Link to appropriate section of HTML::Form (GH#237) (Evan Zacks) + - Make version consistent in .pm files (GH#231) (Olaf Alders) + + [ENHANCEMENTS] + - Return form number in list context. (GH#235) (Сергей Романов) + - Overload 'post' in order to set 'base'. (GH#111) (Stuart A Johnston) + - Allow multiple file paths/uris in mech-dump; fixes issue 72 (GH#113) (Nik LaBelle) + - Add docs for the output of dump_forms (GH#112) (John Beppu) + +1.86 2017-07-04 15:48:46Z +======================================== + [FIXED] + - use 127.0.0.1 instead of 'localhost' in a test script to avoid the test + hanging due to ipv6 issues (GH#31, see also changes in 1.85) + +1.85 2017-06-28 22:06:00Z +======================================== + [FIXED] + - use 127.0.0.1 instead of 'localhost' in a test to avoid the test hanging + due to ipv6 issues (GH#31) + - Remove private logic for taint checking (Dave Doyle) + - Fix Pod (simbabque) + - Bump Test::More prereq to get working subtest support (Karen Etheridge) + - Fix intermittent failures of taint.t (GH#108) (Kivanc Yazan) + - Fix kwalitee issues (GH#107) (Kivanc Yazan) + + [ENHANCEMENTS] + - Print section titles if mech-dump --all is invoked (GH#81) (Сергей + Романов) + - Add cookbook docs on dumping a req without sending it (#115) (Grigor + Karavardanyan) + - Document that submit only submits current form (GH#114) (nawglan) + - Add Travis testing on Perl 5.26 (Karen Etheridge) + - Remove obsolete and unincremented $VERSIONs in test modules (Karen + Etheridge) + +1.84 2017-03-07 13:34:57-05:00 America/Toronto +======================================== +[ENHANCEMENTS] +- Parse url (href attribute) for js window.open GH#11 + +[FIXED] +- Set STDOUT to be utf8 in mech-dump. Fixes issue GH#36 +- Added --version option to mech-dump +- Don't die on uri() when there has been no request. Fixes issue GH#60 +- Remove old information from the cookbook. Fixes issue GH#28 +- Documentation correction. Fixes issue GH#65 and GH#59 +- Work around Test::More prior to 1.001004. Fixes GH#74 +- Fix hostname in test. Fixes GH#73 + +1.83 2016-10-14 16:45:30-04:00 America/Toronto +======================================== +[FIXED] +- Moved live tests to be author tests. Run using dzil test --author. (Steve + Scaffidi) + +1.82 2016-10-06 23:00:30-04:00 America/Toronto +======================================== +[ENHANCEMENTS] +- Added strict_forms flag to submit_form() which sets the HTML::Form strict flag (Gareth Tunley) + +[FIXED] +- Fixed tests which tried to access HTTPS urls when LWP::Protocol::https wasn't + installed (Olaf Alders). Reported by Slaven Rezić. See + https://github.com/libwww-perl/WWW-Mechanize/issues/54 + +1.81 2016-10-06 08:52:44-04:00 America/Toronto +======================================== +[FIXED] +- Work around bug in HTTP::Cookies that is triggered on reload(). See + https://rt.cpan.org/Public/Bug/Display.html?id=75897 (Gianni Ceccarelli) + +1.80 2016-09-24 22:38:27-04:00 America/Toronto +======================================== +[FIXED] +- Fixes behaviour of submit_form() when multiple filters have been supplied (Ed + Avis) + +1.79 2016-09-16 23:53:48-04:00 America/Toronto +======================================== +[ENHANCEMENTS] +- Added form_with() method. (Martin Sluka) + +1.78 2016-08-08 09:18:59-04:00 America/Toronto +======================================== +[OTHER CHANGES] +- No changes specific to this version. First non-develepment release in about a year. + +1.77 2016-08-05 12:50:12-04:00 America/Toronto (TRIAL RELEASE) +======================================== +[TESTS] +- Skip Wikipedia tests if LWP::Protocol::https is not installed. + +1.76 2016-07-29 12:17:25-04:00 America/Toronto (TRIAL RELEASE) +======================================== +[ENHANCEMENTS] +- Added history() and history_count() methods. (Ricardo Signes) +- click_button() now accepts ids. (Olaf Alders) +- Add a more descriptive error message when ->request is called without a + parameter. (Max Maischein) + +[DOCUMENTATION] +- Document that form_id warns in addition to returning undef when a form cannot + be found. (Olaf Alders) +- Document use of a proxy with bin/mech-dump. (Florian Schlichting) + +[OTHER CHANGES] +- New releases for this distribution are now generated by Dist::Zilla + +1.75 2015-06-03 +======================================== +[OTHER CHANGES] + +- WWW::Mechanize::Image and WWW::Mechanize::Link now have a defined $VERSION +- fixed warning about the use of the encoding pragma (new in 5.22) (RT#91971) +- fixed warning about the use of CGI::param in list context (RT#103096) + + +1.74 2015-01-23 +======================================== +[OTHER CHANGES] + +- updated repository link in metadata + + +1.73 2013-08-24 +======================================== +[TESTS] + +- Update t/local/back.t to use LocalServer for 404 checking to avoid fails +on win32. Fix by Matt S Trout, patient diagnostics and testing provided +by jayefuu of freenode #perl + +- Blow away more proxy env vars in LocalServer, and do it on load so that +the LWP env checking doesn't happen before we've done it. + +[OTHER CHANGES] + +- Better error when passing only one parameter to follow_link + + +1.72 Thu Feb 2 18:37:28 EST 2012 +======================================== +[DEPENDENCIES] +Bumped the HTML::Form dependency to fix failures on CentOS 5 + + + +1.71 Tue Nov 14 13:50:41 EDT 2011 +======================================== +[ENHANCEMENTS] +Recognise application/xhtml+xml as HTML. + +[DOCUMENTATION] +Improved docs about support of JavaScript +Typo fixes. + +[TESTS] +Updated tests as oops-music.com is in utf-8 now + +1.70 Fri Aug 26 13:46:30 EDT 2011 +======================================== +[ENHANCEMENTS] +Mech now defaults to _not_ running live tests by default. +You can still enable them by running "perl Makefile.PL --live" +Thanks to RJBS for the suggestion + +1.69_01 +======================================== +[INTERNALS] +The test suite for the local tests was updated + +1.68 Fri Apr 22 01:10:40 EST 2011 +======================================== +No changes from 1.67_01 + +1.67_01 +======================================== +[ANNOUNCE] +As of this release, Jesse Vincent has taken over maintenance of +WWW-Mechanize. The project's repository can be found at: + + https://github.com/bestpractical/www-mechanize + + +[FIXED] +Added prereq for HTML::TreeBuilder. + + +1.66 Fri Sep 10 16:25:44 CDT 2010 +======================================== +[FIXED] +Fixed prerequisites on HTTP::Server::Simple on Windows. + +DNS checks in t/autocheck.t and t/local/failure.t improved. Thanks, +Schwern. + +[ENHANCEMENTS] +New $mech->text method returns the text from your HTML page. The +exact rendering of this text is simply removing all the HTML +tags, but this will change. It's pretty ugly. If anyone wants to +work on a better-looking text dump, I'd love to see it. + +Added mech-dump --text. + +[DOCUMENTATION] +Improvements to the docs explaining explicitly about the subclassed +methods we inherit from LWP::UserAgent. Thanks, Lyle Hopkins! + + +1.64 Thu Jul 1 10:41:00 CDT 2010 +======================================== +[THINGS THAT MAY BREAK YOUR CODE] +If you've been accessing $mech->{forms} or $mech->{form} values +directly, instead of going through the $mech->forms or $mech->current_form +accessors, respectively, then this version of Mech will break your +code. + +[ENHANCEMENTS] +Parsing of forms has been delayed until they're actually needed. +If don't use forms on a page, you'll no longer waste time and memory +parsing them. + +$mech->title now caches the title of the page after parsing the +page to find it. + +mech-dump now takes a --cookie-file parameter for keeping cookies +between calls. Thanks, Damien Clark. + + +[DOCUMENTATION] +Typo fixes. + + +1.62 Sat Apr 10 23:10:07 CDT 2010 +======================================== +[FIXED] +Fixed a declaration in the Movable Type example in +WWW::Mechanize::Examples. + +Quiet warnings if %ENV has undef values. + +$mech->follow_link() no longer dies with an inappropriate error if +the link is not found. + +$mech->click_button() now checks to see if a form is selected. + +[INCOMPATIBILITIES] +$mech->form_name() and $mech->form_number() no longer throw warnings +if they can't find the form specified. They still return undef, +though. + +[DOCUMENTATION] +More additions to the FAQ. + + +1.60 Mon Aug 17 00:41:39 CDT 2009 +======================================== +No new features. Exists only to skip tests that always fail on +Windows. + +Fixed up some minor documentation problems. + + +1.58 Mon Jul 13 22:32:23 CDT 2009 +======================================== +No new features. If you have 1.56 installed OK, you do NOT need +to install 1.58. + +[FIXES] +Removed prereq of HTTP::Response::Encoding, even though it was never +used. Thanks for the catch, Gisle. + + + +1.56 Thu Jul 9 00:36:54 CDT 2009 +======================================== +[THINGS THAT MAY BREAK YOUR CODE] +For a while, Mech used HTTP::Response::Encoding to try to suss out +the proper encoding of the page it receives. Now, it lets +LWP::UserAgent do the work, and no longer requires +HTTP::Response::Encoding. + +[ENHANCEMENTS] +Added a new dump_headers() method to dump the HTTP response headers. + +Added --headers flag to mech-dump to dump the HTTP response headers. + +[FIXES] +Now requires LWP version 5.829 because HTTP::Response has memory +cycle bugs. + +[DOCUMENTATION] +Added a few notes to the FAQ, and fixed some incorrect docs. + + +1.55_01 Mon Jul 6 12:17:10 CDT 2009 +======================================== +This is mostly a bug fix release. There will be a number of other +bug fix releases in the next few days. + +[FIXED] +New test server now randomizes the port it runs on. + +t/cookies.t should not hang on Windows any more. + +META.yml has been updated so the search.cpan.org links should be +correct. + +Passing no_proxy would make LWP::UserAgent barf. Thanks to Mike +Schilli for the fix. + +Cookies test would fail under Windows. Fixed, thanks to many people +reporting it. + +[ENHANCEMENTS] +$mech->submit_form() now can specify the form by ID using the form_id +parameter. + +[DOCUMENTATION] +The docs used to say that ->stack_depth(0) was an infinite stack +size. This is wrong. Zero will tell Mech not to keep any history. + + + +1.54 Mon Jan 12 00:36:08 CST 2009 +======================================== +[FIXED] +Removed the computers4sure test that was failing. + + +1.52 Tue Nov 25 09:52:30 CST 2008 +======================================== +[FIXED] +Improved some error messages in $mech->submit_form(). Thanks to +Norbert Buchmuller. + + +1.51_03 Thu Nov 20 11:05:49 CST 2008 +======================================== +[FIXED] +The $mech->clone() method was not passing the cookie jar to its +clone properly. Thanks to David Sainty. + +The $mech->back() can fail if there's nothing on the stack to go +back to. Thanks to Dave Page. + +$mech->follow_link() did not complain if a link could not be found, +even with autocheck on. Now it does. Thanks, Flavio Poletti. + +[ENHANCEMENTS] +Added a $mech->form_id() method so you can look up forms by ID. + +Added $mech->content_type(), because $mech->ct() is too cryptic. + + +1.51_02 Tue Nov 18 01:30:54 CST 2008 +======================================== +[STILL BROKEN] +t/local/click_button.t is still failing its tests for calling ->click +on an HTML::Form object. I suspect this is an LWP change, but I +haven't dug into it enough yet. + +[FIXES] +Fixed the bad credentials API that stomped on LWP::UserAgent's +credentials() method. Thanks to Max Maschien and Matt Lawrence. + +The $mech->links method now finds <link href="..."> links. Thanks +to H.Merijn Brand. + +Makefile.PL explicitly requires Perl 5.8.0. + +URI.pm has to be version 1.36 or else URIs don't get encoded +correctly. + +LWP has to be 5.819 or we have encoding problems. + + +1.51_01 Thu Nov 6 15:13:03 CST 2008 +======================================== +[FIXES] +Page history is now working much better. The $mech->back() method +should behave more like a browser now. Most notably, it no longer +restores the cookie state, just like your browser doesn't restore +cookie state when you page back. It also should use much less +memory. + + +1.50 Sun Sun Oct 26 22:42:46 CDT 2008 +======================================== +[THINGS THAT MAY BREAK YOUR CODE] +WWW::Mechanize now requires version 5.815 of LWP. This in itself +may cause problems for you because of changes in how LWP does +authentication. + + +1.49_01 Sat Sep 27 23:50:04 CDT 2008 +======================================== +[THINGS THAT MAY BREAK YOUR CODE] +The autocheck argument to the constructor is now ON by default, +unless WWW::Mechanize is being subclassed. There are so many new +programmers whose ->get() calls fail unchecked that I'm now putting +on the seat belts for them. + +[FIXES] +I do believe we are on the way to having all the encoding problems +ironed out. This version incorporates a patch from here: + + http://code.google.com/p/www-mechanize/issues/detail?id=61 + +and tests from Miyagawa's WWW::Mechanize::DecodedContent + + http://search.cpan.org/dist/WWW-Mechanize-DecodedContent/ + +to finally fix this. + +[ENHANCEMENTS] +You can now specify not to set up the proxy, if there is one. The +proxy causes problems for Crypt::SSLeay. For details see: +http://code.google.com/p/www-mechanize/issues/detail?id=39 + +[DOCUMENTATION] +Fixed internal links. + +[INTERNALS] +Lots of refactoring based on Schwern's "Skimmable Code" talk. + +http://use.perl.org/~schwern/journal/36704 +http://schwern.org/~schwern/talks/Skimmable%20Code%20-%20YAPC-NA-2008.pdf + + +1.34 Mon Dec 10 00:30:39 CST 2007 +======================================== +[FIXES] +Many fixes to make the test suite more portable. + + +1.32 Tue Oct 30 12:02:17 CDT 2007 +======================================== +[ENHANCEMENTS] +Added dump methods to mirror mech-dump: +* $mech->dump_images() +* $mech->dump_links() +* $mech->dump_forms() +* $mech->dump_all() + +Sanity checks in the WWW::Mechanize::Image constructor. Every Image +must have a "url" and "tag" field passed in to it. + + +1.31_02 Thu Oct 25 11:48:29 CDT 2007 +======================================== +[ENHANCEMENTS] +Added class, class_regex, id and id_regex limiters to find_link() +and find_all_links(). Thanks to Adriano Ferreira. + + +1.31_01 Mon Sep 17 23:38:03 CDT 2007 +======================================== +[FIXES] +Mech tests now pass even if your DNS server gives A records for +anything (like OpenDNS). Thanks, Miyagawa! + +Searching for the <base href> is now case-inensitive. A better +solution would be to actually parse the HTML. + +[ENHANCEMENTS] +mech-dump now handles --user and --password arguments for sites +that require authentication. + + +1.30 Thu May 24 21:31:10 CDT 2007 +======================================== +[DOCUMENTATION] +Minor doc fixes. Thanks David Steinbrunner. + + +1.29_01 Tue May 22 14:02:55 CDT 2007 +======================================== +Kevin Falcone and I ask for your assistance in figuring out how to +handle the warnings thrown by the tests, other than hiding them. + +[FIXES] +Overhauled how tainting was done. Stole code directly from +Test::Taint. + +Have LWP only handle decoding of Content-Encoding, not charset. + +[DOCUMENTATION] +Fixed the docs for $mech->submit_form()'s with_fields arg. +Thanks, Peteris Krumins. + + +1.26 Wed May 16 14:21:29 CDT 2007 +======================================== +[FIXES] +Re-reversed the content decoding. This is critical for reading from +sites with gzip on the fly, like Wikipedia. + +Content is now properly tainted. + +[ENHANCEMENTS] +mech-dump can now pass --agent and --agent-alias flags so you can +fetch from sites like Wikipedia that block LWP user agents. + +[INSTALLATION] +The mech-dump program is now always installed. It no longer is +presented as an option. + + +1.24 Fri May 11 15:57:56 CDT 2007 +======================================== +NOTE: Version 1.24 will NOT automatically decode gzipped content for +you any more. Consider it a "do not use" release. + +[FIXES] + +* Fixed failures in "make test" with some versions of HTTP::Server::Simple +* RT #26593: Improved handling of charsets. Thanks Kevin Falcone. +* RT #24354: find_link now handles http-equivs with quoted URLs. +* Reverses the change in 1.21_01 where it decodes the content. + +[ENHANCEMENTS] +* Added find_all_inputs() and find_all_submits() methods. Thanks, + Mike O'Regan. +* Test::LongString is no longer needed, so has been removed as a + requirement. + +[TESTS] +* Added a test for save_content() + + +1.22 Fri Mar 2 00:05:57 CST 2007 +======================================== +[INTERNALS] +Added new tests. + +Added Perl::Critic changes and a perlcriticrc file. + + +1.21_04 Sat Oct 7 21:35:42 CDT 2006 +======================================== +[FIXES] +* $mech->content( type => 'text' ) was not freeing memory. Thanks to + Cat Okita for finding it. + +[INTERNALS] +* Made the order of params to $mech->content() not relevant. + + +1.21_03 Sat Oct 7 01:21:46 CDT 2006 +======================================== +[THINGS THAT MAY BREAK YOUR CODE] +* The methods $mech->form() and $mech->follow() have been removed. + They've been deprecated since 1.10, which was released in Feb 2005. + +[ENHANCEMENTS] +* I'm trying to nail down what seems to be a memory leak on long-running + Mech programs. I'm stringifying URI::URL objects wherever I can. + +[INTERNALS] +* No longer uses UNIVERSAL. + + +1.21_02 Wed Oct 4 13:14:30 CDT 2006 +======================================== +[ENHANCEMENTS THAT MAY BREAK YOUR CODE] + +* The $mech->stack_depth() setting had no way to say "don't cache any + pages at all". How silly! + + Now, if you set $mech->stack_depth(0), no history of pages will be kept. + In the past, it would mean "Keep all pages." This means that if you want + to set it to keep all pages, set it to some ridiculously large number. + +[DOCUMENTATION] +* The docs previously refered to Compress::Gzip instead of Compress::Zlib. + + +1.21_01 Mon Sep 18 17:18:43 CDT 2006 +======================================== +[ENHANCEMENTS] +* If Compress::Zlib is installed, gzipped content is now + accepted and transparently decoded. No additional syntax needed! + This should save time and bandwidth in a number of cases. + (Mark Stosberg) + +* Added a put() method. It also calls a subfunction called + _SUPER_put that will be removed once LWP::UserAgent supports put(). + + + +1.20 Sat Aug 19 09:09:08 EDT 2006 + + [ENHANCEMENTS] + * Added new two-argument form of credentials() method. + $mech->credentials($username, $password); + That provides simpler visiting of password-protected + resources in the vast majority of cases and still + allows the other cases to be supported. (Peter Scott) + + [BUG FIXES] + * autocheck no longer is triggered when informational + responses are returned. (Mark Stosberg) + + [INTERNALS] + * test suite no longer fails when Test::Warn is missing. + (CPAN testers, Mark Stosberg) + * Removed all the testing against live sites. The networking + code is not actually in Mech anway, and they were prone to + breaking, as the live sites changed. (Mark Stosberg) + + +1.19_02 Mon Aug 7 23:57:56 CDT 2006 + + [ENHANCEMENTS] + * Add new Do-What-I-Mean submit_form() option. + $mech->submit_form( + with_fields => \%data + ); + That expresses that you want to select the first form contains all + fields in \%data, and then submit the data to that form. See the docs + for form_with_fields() and submit_form() for details. + (Mark Stosberg, inspired by RT#6100) + + [BUG FIXES] + * The behavior of clone() now copies over the cookie jar, which + is probably what you expected it did in the first place. + This fixes bug RT#13541 filed against Test::WWW::Mechanize, + which was using clone() internally. (Mark Stosberg) + + * The correct URL is returned after redirecting. This a regression + from 1.04 and was reported as RT#9059, RT#12882, and RT#12786. + The documentation about this has also been clarified that we + return a URI object, but that it stringifies to the URI itself. + + [DOCUMENTATION] + * Fixed a misleading param in the constructor. + * Document the return value of set_visible (RT#6071, MJD, + Mark Stosberg) + * Document that form_name and form_number return an HTML::Form + object (Mark Stosberg) + + [INTERNALS] + * Made lots of little cleanups based on Perl::Critic + * Fix Taint-mode warnings with Perl 5.6.1 (RT#16945) + +1.18 Thu Feb 2 00:11:26 CST 2006 + [TESTS] + * Makefile.PL now takes four new params: + * --live/nolive turns on/off the live tests + * --local/nolocal turns on/off the local tests + * --mech-dump/nomech-dump installs/doesn't the mech-dump program + * --all turns on all tests and installs mech-dump + + * Fixed some failures in tests. Non-existent URLs now have a + "." postpended to them, so if someone's got a search domain + with a wildcard (i.e. ignore.us) it'll ignore that. Also, + Google's second link is now a https:// link, which some Mechs + can't handle. Added a 'url_regex' which now makes it look at + the second non-https link. Thanks to Pete Krawczyk. + +1.16 Fri Oct 28 17:34:20 CDT 2005 + [ENHANCEMENTS] + * Sped up Mech significantly (~20% in some cases). Images and + links are extracted from the HTML, and objects are created, + only when they're actually needed. This will be a speedup for + pages where you're only following links, or vice versa. + + [THINGS THAT MAY BREAK YOUR CODE] + * If you've been relying on the $mech->{images} and $mech->{links} + fields being populated so that you can bypass the $mech->images() + and $mech->links() accessors, your code will break. That's OK, + because you should have been using the accessors all along. + +1.14 Tue Aug 30 17:17:40 CDT 2005 + [DOCUMENTATION] + * Added lots of new FAQs. Thanks to Peter Stevens. + + [INTERNALS] + * Now requires Test::LongString. That's not too odious. + + [FIXES] + * Tests now pass with the shuffling around that Google did. + +1.13_01 Tue Apr 12 14:11:18 CDT 2005 + [ENHANCEMENTS] + * Now dies if you call submit_form() with a non-existing + form_number or form_name. Before, it would just warn. + + [DOCUMENTATION] + * Added an example of using credentials() in the cookbook. + +1.12 Thu Feb 24 23:38:44 CST 2005 + [FIXES] + * Fixed RT #9026: hang in t/local/back.t under Windows XP. + Thanks Andrew Savige. It also should no longer complain + about being unable to clean up a temp file. + +1.11_01 Mon Feb 14 00:12:48 CST 2005 + [THINGS THAT MAY BREAK YOUR CODE] + * Removed deprecated _parse_html() method. + + [FIXES] + * Was incorrectly looking for INPUT tags TYPE="SUBMIT" as images. + Thanks to Abe Timmerman. + + [ENHANCEMENTS] + * Calling $mech->set_fields() with no current form now dies. + Thanks to Julien Beasley. + + +1.10 Tue Jan 31 11:30pm-ish + [FIXES] + * Fixed bug where images inside of links would not be found. + + * Fixed test failures because of Google changes. Thanks to + Offer Kaye and others who sent in patches. + + [DOCUMENTATION] + * More samples in the FAQ. Thanks to Joshua Gatcomb. + + [INTERNALS] + + * Added explanation of running live tests against Google in + Makefile.PL. + + +1.08 Fri Dec 24 01:01:06 CST 2004 + [ENHANCEMENTS] + * Added find_image() and find_all_images(). + +1.06 Wed Dec 8 14:58:39 CST 2004 + [INTERNALS] + * Now uses the base pragma instead of setting @ISA. + +1.05_04 Fri Nov 5 23:35:38 CST 2004 + [ENHANCEMENTS] + * Added WWW::Mechanize::Image object for representing images. + * Improved the regex on the URL for META tags. + * Added --images flag to mech-dump. + + [FIXES] + * When parsing urls out of meta refresh tags, "url" may now be + uppercase (RT#8230) + + * Behavior of back() fixed in a number of cases (RT#8109 reported + by Josh Purinton, patched by Dominique Quatravaux) + + [INTERNALS] + * Mark figured out to how to prevent his text editor from + putting tabs into the code. Andy's blood pressure dropped + slightly. + +1.05_03 Sun Oct 31 20:54:33 CST 2004 + + [ENHANCEMENTS] + * click_button() has a new input option for HTML::Form::SubmitInput + objects (DOMQ) + + * content() has new options to return the page formatted + as text, with a <base href> added. (RT#8087, patch by + Dominique Quatravaux) + + * update_html() method has been added, which can be used + to modify the HTML that Mech parses. It should be sub-classed + instead of _parse_html(), which has been deprecated. + (RT#8087, patch by Dominique Quatravaux) + + * select() has new option to select an option by number + (RT#5789, Scott Lanning) + + * WWW::Mechanize::Link now has support providing all the + attributes of the link through a new attrs() method, which + returns them as a hashref. This is a replacement for the + alt() method, added in 1.05_01. It's not backwards + compatible with that, but, hey, that's what developer + releases are for. (RT#8092, Rob Casey and Mark Stosberg) + + [FIXES] + * Upload <input type="file" ... > does not use the default + value to prevent attacks, patch by Jan Pazdziora (RT #7843). + + [INTERNALS] + * Improved tests and documentation for select() (RT#5789, + Scott Lanning) + + * Improve taint-safeness on Perl 5.6.1 (RT#8042, patch by + Dominique Quatravaux) + + * Added tests for click_button() (RT#8061, by Dominique + Quatravaux) + + * Require URI 1.25, fixing bug which exposed itself in + WWW::Mechanize (RT#3048) + + * Move select() to better location in docs. Document and + test the return values. The return value is now "1" on + success instead of the undocumented behavior of returning + a form value. (RT#6138, spotted by MJD, patched by Mark + Stosberg) + + * Possible matching tags for the find_link() 'tag_regex' + attribute are now documented. (RT#2989, by Mark Stosberg) + + * refactored find_link() to avoid use of eval(). This should + improve performance a bit and avoid potential security + issues. (Mark Stosberg) + +1.05_02 Sat Oct 2 16:55:59 CDT 2004 + [ENHANCEMENTS] + * Added the $mech->save_content( $filename ) function, so you + can dump stuff to files easily. + +1.05_01 Thu Sep 30 21:04:44 CDT 2004 + + [FIXES] + * set_visible() doesn't stop setting values when it finds a zero. + + [ENHANCEMENTS] + * WWW::Mechanize::Link has a new, easier to remember constructor + interface. The old one is still supported. Support for including + an 'alt' attribute was added, which is useful for <area> links. + (RT #3317). Thanks to Mark Stosberg. + + * When links are extracted from <area> tags, the ALT attribute will + be captured and become part of the WWW::Mechanize::Link object. + (RT #3317). Patch by Mark Stosberg. + + [INTERNALS] + * t/mech-dump.t is now more portable (RT #7690) + + * t/local/follow.t has new tests to confirm that 'follow*' functions + work with characters like o-umlaut, even when the o-umlaut is + encoded in the HTML, but not in the call to follow(). (RT #2416) + By Mark Stosberg. + + + +1.04 Wed Sep 15 23:27:53 CDT 2004 + + [ENHANCEMENTS] + * $mech->get() now accepts a WWW::Mechanize::Link object. + + * $mech->stack_depth(n) lets you set the depth of the mech + object's page stack. This way, if you have a Mech that does + lots of stuff and never/rarely goes back(), you won't be eating + up memory. Thanks to BooK and Chi-Fung. (RT #5362) + + [FIXES] + * Fixed tests that fail under LWP >= 5.800. + + * Added a workaround for LWP::UserAgent->clone() when ->{proxy} + is undef. (RT #6443) + + * The Referer was getting passed as a URI object sometimes, + and that caused sadness. Eugene Haimov supplied a workaround. + (RT #6372) + + [DOCUMENTATION] + * Added Ian Langworth's listmod and John Beppu's photobucket + uploader programs to WWW::Mechanize::Examples. + + * Minor doc tweak for find_link() + + * Finally added a value() func. Thanks to Spoon, + who even now, months after his passing, is still contributing + to Mechanize. + + +1.02 Tue Apr 13 22:45:10 CDT 2004 + + No reason to install if you have 1.00. Fixes are only in tests. + + [FIXES] + * t/referer.t didn't cope with spaces in $FindBin::Bin. Plus, + it now forces its URL to localhost. + + +1.00 Sat Apr 10 00:35:51 CDT 2004 + + I figure it's about time we hit 1.00, and this version seems + like a good place to do it, because of the potential breakage + described below... + + [THINGS THAT WILL BREAK YOUR CODE] + * Header handling has changed. There is no more package variable + %headers that holds all the headers to be added. They are + now added on a per-object basis. + + If you were adding a header with add_header(), and the code + relied on that header still being set later on in a later + instance of the class, that code will now break, because the + later instance won't have the header set. + + [ENHANCEMENTS] + * You can now prevent a header from being sent by adding it with + an undef value, as in: + + $mech->add_header( Referer => undef ); + + [FIXES] + * Now correctly adds Accept-Encoding to all requests that need it. + + [INTERNALS] + * Added new $mech->_modify_request($req) method to do all the + HTTP header modification before the actual request gets + sent off. Subclasses are able to override it if they want. + + * Removed the unused Compress::Zlib stuff. + + +0.76 Wed Apr 7 22:01:43 CDT 2004 + + [ENHANCEMENTS] + * Added update_html() to let you update the HTML for the page + you're on. + + [FIXES] + * Test files account for new Google layout. + + [INTERNALS] + * Rearranged the local tests into their own t/local/ directory. + + * Made the standalone tests show what server they're hitting. + + * Checked that it runs under LWP 5.78. + + +0.74 Mon Mar 22 23:36:46 CST 2004 + + [ENHANCEMENTS] + * WWW::Mechanize now sends an Accept-Encoding header of "identity" + to always enforce plaintext responses. Preliminary support for + Compress::Zlib is also there, but is disabled by default. + + * Added click_button() and select() methods. The field() method + can now take an arrayref of values, if appropriate. Thanks, + Linda Lee Julien. + + * Added url_abs and url_abs_regex params to find_all_links(). + + * URLs in META REFRESH tags are now treated as links. + + * t/taint.t makes sure that things that should be tainted are. + + [FIXES] + * Still more fixes if the machine you're on doesn't have + DNS pointing to it. + + * The local changes use localhost as the local host name, instead + of whatever host name that might be on the box, but not in DNS. + Thanks to David Wheeler for letting me play on his box. + + * The http_proxy and HTTP_PROXY environment variables get + deleted during the tests that access the dummy local server. + This should let your tests pass, and clear up a lot of RT + tickets. + +0.72 Mon Jan 26 21:07:20 CST 2004 + [ENHANCEMENTS] + * Added the set_visible() method, thanks to Peter Scott. + + [DOCUMENTATION] + * Started the Cookbook at WWW::Mechanize::Cookbook.pod. + + [INTERNALS] + * Made the globbing in Makefile.PL a little less command-line + intensive. Also fixed the missing files in MANIFEST. + * Added t/pod-coverage.t for testing POD coverage. + +0.71_02 Mon Dec 22 14:29:13 CST 2003 + [THINGS THAT MAY BREAK YOUR CODE] + * Added a 5th, optional parameter to WWW::Mechanize::Link's + constructor. In 0.71_01, it was at the beginning of the + argument list and was required. Now it's at the end and is + optional. If, in the 15 hours since 0.71_01 came out, you + went and changed all your WWW::Mechanize::Link constructors, + you'll have to change them around again. Otherwise, you can + just ignore this change. + +0.71_01 Sun Dec 21 23:48:12 CST 2003 + [THINGS THAT MAY BREAK YOUR CODE] + * WWW::Mechanize::Link's constructor has a new argument + that needs to be passed in, at the start of the argument + list. + + [ENHANCEMENTS] + * WWW::Mechanize::Link object now takes a $base URL, and will + return absolute URLs with the url_abs() method. Thanks to + Ashley Pond. + * Added another script to WWW::Mechanize::Examples. It's a + script that didn't make it into Spidering Hacks. + + [INSTALL & TESTS] + * Heavy use of the new Test::Memory::Cycle module. + * Fixed Makefile.PL so that the tests are selected under Win32. + * Changed t/mech-dump.t so that the test succeeds under Win32. + * Updated t/referer.t and t/mech-dump.t so they run under VMS. + Thanks to Peter Prymmer. + + +0.70 Sun Nov 30 23:45:27 CST 2003 + [THINGS THAT MAY BREAK YOUR CODE] + * Redirects are now handled better by LWP, so the code that + changes POSTs to GETs on redirects has been removed. + + [FIXES] + * Fixed redirect_ok(), which had its API changed out from under + it in LWP 5.76. + + [ENHANCEMENTS] + * New warnings in find_link() for strings that are space padded, + and for text matches that are passed a regex. Thanks to + Jim Cromie. + + [DOCUMENTATION] + * Patches from Mark Stosberg and Jim Cromie. + + [INTERNALS] + * Removed all the checking for Carp. I don't know why I + was thinking that Carp wasn't core. RT #4523. + + Also, a big bump in requirements on LWP: We need 5.76. + + +0.66 Thu Nov 13 14:35:31 CST 2003 + + No new functionality. Fixed up some install bugs and made a + few documentation tweaks, mostly to plug Spidering Hacks. + + +0.65 Mon Nov 10 00:11:06 CST 2003 + [ENHANCEMENTS] + * Made a _parse_html() method that you can override or call + manually, per request from Gavin Estey. + + [FIXES] + * Made some path naming use File::Spec->catfile so that + they work correctly under Windows. + * "make clean" cleans up temp flag files. + + [INTERNALS] + * Uses the new Test::Pod 1.00 for simplicity. + + +0.64 October 23, 2003 11:15pm + [ENHANCEMENTS] + * Many new tests, based on the excellent coverage reporting + created by Paul Johnson's Devel::Cover module. + + * The start of JavaScript support, sort of! + + If you have an <A> tag that does an onClick that opens a + window, Mech will find the URL from that and make that + be the link for the tag. This is for things like Movable + Type that pop little windows to rebuild indexes. + + This is subject to change in the future. I don't know + if it will, but I'm not making promises. It might be so + buggy I just yank the whole thing. + + * Big jump in requirements, since we'll soon be using Gisle's + new HTML::Form stuff. Also, older versions of HTML::Form + don't give output I'm expecting. + + [FIXES] + * Fixed the t/mech-dump.t failure. + +0.63 October 13, 2003 2:56pm + [ENHANCEMENTS] + * mech-dump defaults to dumping forms. + * Added name, name_regex, tag and tag_regex options to find_link() + and follow_link(). + * Added tests from Jim Brandt. + + +0.62 October 7, 2003 8:46pm + [THINGS THAT MIGHT BREAK YOUR CODE] + * The params for find_link()'s url_regex and text_regex must now + be actual regex objects, as in qr// objects. They can't just + be little text strings. If this is a big bummer, let me know. + + [ENHANCEMENTS] + * Added autocheck param, to tell your Mech object to die on + any error. This saves you from having to check yourself. + This closes RT #3056. + * Renamed the internal _carp() method as warn(). + * Added a die() method. + * Can now override the warn() and die() handlers in the + constructor. + * find_link() now complains if it gets a *_regex param that isn't + actually a regex. See RT #3032. + + [FIXES] + * mech-dump.t no longer runs if you're not installing mech-dump. + See RT #3724. + + [DOCUMENTATION] + * More FAQs. Thanks to Gavin Estey. + + +0.61 October 6, 2003 6:30pm + No new functionality here. It's mostly to get the new tests + into the pipeline so the CPAN testers can run 'em. + + [FIXES] + * Missing dependency on File::Temp. Thanks, Ask. + + [ENHANCEMENTS] + * Added the test case for the form processing problem as a .t + file, since I spent so long getting it down to a simple case. + * Internal code uses accessors instead of direct hash entries. + Prepare for deprecation of existing hash entries! + + [DOCUMENTATION] + * The FAQ is now its own document at WWW::Mechanize::FAQ. + + +0.60 September 22, 2003 10:00pm + [FIXES] + * Changed how t/failure.t tries to fail. It used to hit + a bogus hostname in .com, but with Verisign doing its + SiteFinder crap, even bogus addresses in .com succeed. + + [ENHANCEMENTS] + * Added _make_request() to let WWW::Mechanize::Cached easily + hook into the request chain. + +0.59 September 3, 2003 11:56pm + [FIXES] + * Squelched a warning in follow() where it tries to do a regex + match against an undef value. + * The page stack functionality, including the back() button, + was entirely broken. Now it works. Thanks to the mighty + Iain Truskett for help. + + [ENHANCEMENTS] + * Added the mech-dump script, which replaces mech-forms. + It will dump forms and lists of links. Eventually it will + do lists of images, too, but not yet. + +0.58 August 14, 2003 11:30pm + [THINGS THAT MIGHT BREAK YOUR CODE] + * $mech->uri() now returns a plain string, not a URI object. + The automatic stringification of the URI object was + causing problems on Win32 and/or threaded Perls, and I + didn't feel like figuring out why. If the non-objectness + of the uri() method is a problem, let me know. + * form(), form_name() and form_number() now return the + HTML::Form object of the form that was chosen. They used + to return a 1 or 0. This means that if you're explicitly + checking for 1 or 0, instead of evaluating the return + code in a boolean context, your code will break. + + [FIXES] + * The <AREA>-handling in extract_links() was incorrectly + building the text. + * uri() now returns a string, not a URI object. + * form(), form_name() and form_number() now return the + HTML::Form object of the form that was chosen. + + [INTERNALS] + * Determination of live vs. local tests is now done in + Makefile.PL, and we don't have to set those silly semaphore + files any more. + * Made other cleanups in Makefile.PL, like using + ExtUtils::Command instead of rolling my own touch(). + * Moved all the *-live.t tests into t/live/*.t, and renamed + the *-local.t files to not be -local. + * Added more tests for <AREA> tags. + +0.57 July 31, 2003 11:21pm + [ENHANCEMENTS] + * Added <AREA HREF=...> tags to those that are links per + find_links(). + +0.56 July 24, 2003 12:15pm + [THINGS THAT MIGHT BREAK YOUR CODE] + * Created agent_alias() method to do the browser string + translation. Passing "Windows IE 6" to agent() will get + you back exactly that string as the agent. You have to call + $a->agent_alias( "Windows IE 6" ) to get the translation. + + Fortunately, unless you used the new functionality of agent() in + the past two days since I released 0.55, it won't be a problem. + + [ENHANCEMENTS] + * Removed the dependencies on Carp and Test::Builder. There still + is a dependency on Test::Builder for Test::More, but it's no + longer explicit in the Makefile.PL. Mech will use Carp if + possible, but it's no longer a requirement. + + [INTERNALS] + * Added _carp method for handling conditional warnings, rather + than checking quiet() all the time. + +0.55 July 22, 2003 12:10pm + [ENHANCEMENTS] + * Added WWW::Mechanize::Link object to encapsulate what used to + be an array reference of stuff from find_link(). This replaces + having to know that $link->[0] was URL and so on. However, + since WWW::Mechanize::Link is a blessed arrayref, it's backwards + compatible with existing code. + + * The WWW::Mechanize::Link object now tracks what tag the link + came from (<A>, <FRAME> or <IFRAME>). + + * No longer loads Carp unless and until it's necessary. + + * submit_form() now uses the currently specified form if a + form_name or form_number param is not specified. (RT #2768) + + * Added a translate table of handy browser strings that the + agent() method recognizes. These strings may be one of the + following: + + * Windows IE 6 + * Windows Mozilla + * Mac Safari + * Mac Mozilla + * Linux Mozilla + * Linux Konqueror + + For example, "Windows IE 6" gets replaced with + "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)". + Thanks to Ed Silva for the list. + + [DOCUMENTATION] + * Moved the deprecated methods to their own section. + + * Removed the TODO list, since it's all in RT. + +0.54 July 20, 2003 12:47am + [THINGS THAT MIGHT BREAK YOUR CODE] + * See the enhancements on extract_links() and failures. + Also, see the note about #2811 below. + + [ENHANCEMENTS] + * find_all_links() and find_link() can specify multiple + text or url parameters, which will be anded together. + Before, there was a pecking order in which param took + precedence, and any extras were ignored. To find the + first link with text of "news" and with "cnn.com" in the + URL, use: + + $a->find_link( text => "news", url_regex => qr/cnn\.com/ ); + + Thanks to Greg Davies for the idea. (RT #2973) + + * extract_links() is now an internal-only method, named + _extract_links(). If you're using it, don't. Use + find_all_links() instead, which is more flexible. + + * Now, when there is a failure of some kind, certain fields + like title, forms and links are cleared out. Before, you'd + have leftover links from the previous page. I suspect that + at least one person out there has been mistakenly relying + on this behavior, and his code will now break, but that's + a good thing. See t/failure.t for more. + + [FIXES] + * No longer tries to tech mech-forms if you're not installing + it. + * Fixed #2811: The definition of the link text contents + did not match the documentation. + + [DOCUMENTATION] + * Added a sample CPAN search from Ed Silva. + * Added link to the Perl Advent Calendar. + + +0.53 July 16, 2003 + This version ONLY fixes some test-file problems. There are no + functionality enhancements or bug fixes in WWW::Mechanize itself. + + [FIXES] + * Explicitly stringifies URI::file objects in the *.t files, + in hopes of getting the thread bummers fixed. See RT + #2874 and #2880. + +0.52 July 7, 2003 + [ENHANCEMENTS] + * mech-forms now handles local files. + * Added t/mech-forms.t to test the app + * Added some param-checking to submit_form() + * All the warns are now carps. Thanks to MJD for pointing + it out during his "Tricks Of The Wizards" talk. + + [FIXES] + * Bumped up the Test::Builder requirement to 0.17, which + I hope will get rid of the thread problems with t/tick.t. + Thanks to Autrijus Tang for pointing out the note in + Builder.pm. + +0.51 June 29, 2003 + [ENHANCEMENTS] + * Added the "mech-forms" command to list all the forms on + a given page. + + [FIXES] + * Made the tick.t test not rely on 2shortplanks.com. + +0.50 June 24, 2003 + [ENHANCEMENTS] + * Mech now does what most browsers do when they handle a + redirect: It changes the POST to a GET. This doesn't match the + RFC, but it's what browsers do. Thanks to Stuart Children. + +0.49 June 23, 2003 + [FIXES] + * Added a fix a while ago that tracks redirect history. That fix + had the bad side effect of always allowing a redirect. + That has now been fixed. + +0.48 June 22, 2003 + [ENHANCEMENTS] + * Added find_all_links() as the counterpart to find_link(). + + [FIXES] + * get() wasn't passing through its params to LWP::UserAgent::get(), + so you couldn't use the :content_file param. Now it + behaves like a good little subclass. + +0.47 June 21, 2003 + [ENHANCEMENTS] + * find_link() now lets you pass n=>"all" to get back a list + of all links that match the other criteria. NOTE: Outdated! See + v0.48 for the preferred way. + * find_link() and follow_link() now complain if you pass + invalid params to them. + + [FIXES] + * Tracks URI history properly after redirects. + +0.46 June 20, 2003 + [ENHANCEMENTS] + * Added tick() and untick() functions for handling checkboxes. + Thanks to Mark Fowler for this patch. + * The uri() will only be the URI of the requested page if it worked. + * Oh yes, I forgot to mention: We're on Sourceforge now. + +0.45 June 11, 2003 + [ENHANCEMENTS] + * Added response() method, which is identical to res(). + * Added a convenience method success(), which is just a wrapper + around $agent->res->is_success(). + * Passes along a Referer: header. It can still be overridden + with the %Headers hash, though. Thanks, Corion. + * We now have a set of localized tests that run for machines that + aren't connected to the Net. There's now a t/lib/ directory for + those helper files, and the previous Utils.pm has been dropped. + Again, Corion to the rescue. + + [DOCUMENTATION] + * Added to the FAQ + * Rearranged all the functions in the file so they are now + logically grouped. + +0.44 June 5, 2003 + [ENHANCEMENTS] + * Now follows redirects after a POST. By default, LWP::UserAgent + does NOT follow redirects after a POST. This matches the + official RFC. However, since WWW::Mechanize is meant to be a + browser clone, and browsers follow the redirects after a POST, + I've changed this behavior. If this causes bummers, let me + know and maybe I'll throw in a switch. + + [DOCUMENTATION] + * The package now includes the WWW::Mechanize logo in the etc/ + directory. I have no idea what I'll do with it, but thanks + to Meng Wong for making it! + +0.43 May 29, 2003 + [ENHANCEMENTS] + * Now uses request() instead of send_request(). This should + solve all of our redirection and cookie bummers. + * Added reload() method. Thanks, Corion. + +0.42 May 26, 2003 + [ENHANCEMENTS] + * Renamed _do_request() to send_request(), so it's now a + proper overload of LWP::UserAgent. Thanks to Philippe Bruhat. + Recorded in RT ticket #1708. + * Removed the req() method, and tests that used it. + * extract_links() now checks for <IFRAME> tags along with the + existing <A> and <FRAME>. + * Now explicitly requires HTML::HeadParser. + * Doesn't load Carp unless it's actually needed. + +0.41 May 22, 2003 + [ENHANCEMENTS] + * There are no enhancements in functionality. + * Testing suite getting more network-independent. + + [DOCUMENTATION] + * Started a FAQ section. + * New SYNOPSIS section. Thanks to Uri Guttmann. + * Documentation fixes. Thanks to Mark Stosberg, Mike Fragassi. + +0.40 April 18, 2003 + [ENHANCEMENTS] + * Three new methods. + * find_link() for finding a specified link + * follow_link() for following a specified link + * submit_form() for all-in-one form submission. + All three owe a great deal to Uri Guttmann. Thanks, Uri! + * Split out the examples into WWW::Mechanize::Examples.pod + * Added t/back.t + * Documentation fixes from Abigail + +0.39 April 1, 2003 + [ENHANCEMENTS] + * No functional enhancements. Everything is docs. + + * Added a new example from Dan Rinzel, for posting to Movable Type + + * Started playing with having the test suite not have to have + live net access. + +0.38 March 24, 2003 + [DEPRECATIONS] + * The %WWW::Mechanize::Headers hash is officially deprecated. + It will be removed pretty soon. I'm not sure what the + replacement will be, but it won't be a package-level hash. + + [ENHANCEMENTS] + * The submit() method is no longer an alias for click("submit"), + because some forms don't have a button called "submit". + In fact, some may not have any buttons at all if they're + JavaScript-controlled. + + [FIXES] + * RT #2056: $agent->field() wasn't returning proper input field, + because of an incorrect call to the underlying HTML::Form. + Thanks to Prakash Kailasa. + +0.37 March 4, 2003 + [ENHANCEMENTS] + * Added an is_html() method + * Added a title() method + * No longer requires the Clone module. + * No longer requires the Test::Pod module, although it's nice + to have. + +0.36 February 4, 2003 + [ENHANCEMENTS] + * Added form lookup by name. Thanks to Jan Ivar Pladsen and + Iain Truskett for their patches. + * Added new functions form_name() and form_number(). Existing + form() function calls each of these as appropriate. + * Explicitly requires LWP 5.69 because of the form naming. + * Added new POD section "Examples" for user-submitted sample code + of how to use WWW::Mechanize. + * Quieted the warnings in the test suite, so that you don't get + expected, but disconcerting, warnings to the screen. + +0.35 January 22, 2003 + [ENHANCEMENTS] + * Now creates an internal cookie_jar by default. + * Beefing up the test suite + * Don't forget, direct access to internal members is deprecated. + Use the accessors starting now. + +0.33 January 15, 2003 + [ENHANCEMENTS] + * Added accessor methods + * Deprecated the direct accessing of object contents. You can + no longer rely on the names of any of the hash elements. + * Added a quiet() method to suppress warnings to the screen. + + [FIXES] + * More documentation fixes. (Thanks to Briac Pilpré) + +0.32 September 23, 2002 + [FIXES] + * Now correctly calls the LWP::UserAgent constructor. (Thanks to + Philippe "BooK" Bruhat) + + * Fixed doc on what get() method returns. (Again thanks to BooK) + + * Now uses http://www.google.com/intl/en/ as the basis for + testing, since non-US users apparently get redirected to + country-specific sites when they hit http://www.google.com/ + +0.31 September 13, 2002 + [ENHANCEMENTS] + * get() now returns the HTTP::Response object. + * Prepended push_page_stack(), pop_page_stack() and + do_request() with underscores, to emphasize that they + are for internal use only. + +0.30 September 10, 2002 + [ENHANCEMENTS] + * Fixed the stack popping problem. + + [FIXES] + * Put all the inline tests into t/*.t + + * get() doesn't work w/relative URLs + http://rt.cpan.org/NoAuth/Bug.html?id=1492 + + * The arrays returned by extract_links() + now have a third element, which contains + the name attribute of the link (undef + if the link has no name attribute). + + +Code before this point is forked off from WWW::Automate 0.20b. @@ -0,0 +1,75 @@ +This is the Perl distribution WWW-Mechanize. + +Installing WWW-Mechanize is straightforward. + +## Installation with cpanm + +If you have cpanm, you only need one line: + + % cpanm WWW::Mechanize + +If it does not have permission to install modules to the current perl, cpanm +will automatically set up and install to a local::lib in your home directory. +See the local::lib documentation (https://metacpan.org/pod/local::lib) for +details on enabling it in your environment. + +## Installing with the CPAN shell + +Alternatively, if your CPAN shell is set up, you should just be able to do: + + % cpan WWW::Mechanize + +## Manual installation + +As a last resort, you can manually install it. If you have not already +downloaded the release tarball, you can find the download link on the module's +MetaCPAN page: https://metacpan.org/pod/WWW::Mechanize + +Untar the tarball, install configure prerequisites (see below), then build it: + + % perl Makefile.PL + % make && make test + +Then install it: + + % make install + +On Windows platforms, you should use `dmake` or `nmake`, instead of `make`. + +If your perl is system-managed, you can create a local::lib in your home +directory to install modules to. For details, see the local::lib documentation: +https://metacpan.org/pod/local::lib + +The prerequisites of this distribution will also have to be installed manually. The +prerequisites are listed in one of the files: `MYMETA.yml` or `MYMETA.json` generated +by running the manual build process described above. + +## Configure Prerequisites + +This distribution requires other modules to be installed before this +distribution's installer can be run. They can be found under the +"configure_requires" key of META.yml or the +"{prereqs}{configure}{requires}" key of META.json. + +## Other Prerequisites + +This distribution may require additional modules to be installed after running +Makefile.PL. +Look for prerequisites in the following phases: + +* to run make, PHASE = build +* to use the module code itself, PHASE = runtime +* to run tests, PHASE = test + +They can all be found in the "PHASE_requires" key of MYMETA.yml or the +"{prereqs}{PHASE}{requires}" key of MYMETA.json. + +## Documentation + +WWW-Mechanize documentation is available as POD. +You can run `perldoc` from a shell to read the documentation: + + % perldoc WWW::Mechanize + +For more information on installing Perl modules via CPAN, please see: +https://www.cpan.org/modules/INSTALL.html @@ -0,0 +1,379 @@ +This software is copyright (c) 2004 by Andy Lester. + +This is free software; you can redistribute it and/or modify it under +the same terms as the Perl 5 programming language system itself. + +Terms of the Perl programming language system itself + +a) the GNU General Public License as published by the Free + Software Foundation; either version 1, or (at your option) any + later version, or +b) the "Artistic License" + +--- The GNU General Public License, Version 1, February 1989 --- + +This software is Copyright (c) 2004 by Andy Lester. + +This is free software, licensed under: + + The GNU General Public License, Version 1, February 1989 + + GNU GENERAL PUBLIC LICENSE + Version 1, February 1989 + + Copyright (C) 1989 Free Software Foundation, Inc. + 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The license agreements of most software companies try to keep users +at the mercy of those companies. By contrast, our General Public +License is intended to guarantee your freedom to share and change free +software--to make sure the software is free for all its users. The +General Public License applies to the Free Software Foundation's +software and to any other program whose authors commit to using it. +You can use it for your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Specifically, the General Public License is designed to make +sure that you have the freedom to give away or sell copies of free +software, that you receive source code or can get it if you want it, +that you can change the software or use pieces of it in new free +programs; and that you know you can do these things. + + To protect your rights, we need to make restrictions that forbid +anyone to deny you these rights or to ask you to surrender the rights. +These restrictions translate to certain responsibilities for you if you +distribute copies of the software, or if you modify it. + + For example, if you distribute copies of a such a program, whether +gratis or for a fee, you must give the recipients all the rights that +you have. You must make sure that they, too, receive or can get the +source code. And you must tell them their rights. + + We protect your rights with two steps: (1) copyright the software, and +(2) offer you this license which gives you legal permission to copy, +distribute and/or modify the software. + + Also, for each author's protection and ours, we want to make certain +that everyone understands that there is no warranty for this free +software. If the software is modified by someone else and passed on, we +want its recipients to know that what they have is not the original, so +that any problems introduced by others will not reflect on the original +authors' reputations. + + The precise terms and conditions for copying, distribution and +modification follow. + + GNU GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License Agreement applies to any program or other work which +contains a notice placed by the copyright holder saying it may be +distributed under the terms of this General Public License. The +"Program", below, refers to any such program or work, and a "work based +on the Program" means either the Program or any work containing the +Program or a portion of it, either verbatim or with modifications. Each +licensee is addressed as "you". + + 1. You may copy and distribute verbatim copies of the Program's source +code as you receive it, in any medium, provided that you conspicuously and +appropriately publish on each copy an appropriate copyright notice and +disclaimer of warranty; keep intact all the notices that refer to this +General Public License and to the absence of any warranty; and give any +other recipients of the Program a copy of this General Public License +along with the Program. You may charge a fee for the physical act of +transferring a copy. + + 2. You may modify your copy or copies of the Program or any portion of +it, and copy and distribute such modifications under the terms of Paragraph +1 above, provided that you also do the following: + + a) cause the modified files to carry prominent notices stating that + you changed the files and the date of any change; and + + b) cause the whole of any work that you distribute or publish, that + in whole or in part contains the Program or any part thereof, either + with or without modifications, to be licensed at no charge to all + third parties under the terms of this General Public License (except + that you may choose to grant warranty protection to some or all + third parties, at your option). + + c) If the modified program normally reads commands interactively when + run, you must cause it, when started running for such interactive use + in the simplest and most usual way, to print or display an + announcement including an appropriate copyright notice and a notice + that there is no warranty (or else, saying that you provide a + warranty) and that users may redistribute the program under these + conditions, and telling the user how to view a copy of this General + Public License. + + d) You may charge a fee for the physical act of transferring a + copy, and you may at your option offer warranty protection in + exchange for a fee. + +Mere aggregation of another independent work with the Program (or its +derivative) on a volume of a storage or distribution medium does not bring +the other work under the scope of these terms. + + 3. You may copy and distribute the Program (or a portion or derivative of +it, under Paragraph 2) in object code or executable form under the terms of +Paragraphs 1 and 2 above provided that you also do one of the following: + + a) accompany it with the complete corresponding machine-readable + source code, which must be distributed under the terms of + Paragraphs 1 and 2 above; or, + + b) accompany it with a written offer, valid for at least three + years, to give any third party free (except for a nominal charge + for the cost of distribution) a complete machine-readable copy of the + corresponding source code, to be distributed under the terms of + Paragraphs 1 and 2 above; or, + + c) accompany it with the information you received as to where the + corresponding source code may be obtained. (This alternative is + allowed only for noncommercial distribution and only if you + received the program in object code or executable form alone.) + +Source code for a work means the preferred form of the work for making +modifications to it. For an executable file, complete source code means +all the source code for all modules it contains; but, as a special +exception, it need not include source code for modules which are standard +libraries that accompany the operating system on which the executable +file runs, or for standard header files or definitions files that +accompany that operating system. + + 4. You may not copy, modify, sublicense, distribute or transfer the +Program except as expressly provided under this General Public License. +Any attempt otherwise to copy, modify, sublicense, distribute or transfer +the Program is void, and will automatically terminate your rights to use +the Program under this License. However, parties who have received +copies, or rights to use copies, from you under this General Public +License will not have their licenses terminated so long as such parties +remain in full compliance. + + 5. By copying, distributing or modifying the Program (or any work based +on the Program) you indicate your acceptance of this license to do so, +and all its terms and conditions. + + 6. Each time you redistribute the Program (or any work based on the +Program), the recipient automatically receives a license from the original +licensor to copy, distribute or modify the Program subject to these +terms and conditions. You may not impose any further restrictions on the +recipients' exercise of the rights granted herein. + + 7. The Free Software Foundation may publish revised and/or new versions +of the General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + +Each version is given a distinguishing version number. If the Program +specifies a version number of the license which applies to it and "any +later version", you have the option of following the terms and conditions +either of that version or of any later version published by the Free +Software Foundation. If the Program does not specify a version number of +the license, you may choose any version ever published by the Free Software +Foundation. + + 8. If you wish to incorporate parts of the Program into other free +programs whose distribution conditions are different, write to the author +to ask for permission. For software which is copyrighted by the Free +Software Foundation, write to the Free Software Foundation; we sometimes +make exceptions for this. Our decision will be guided by the two goals +of preserving the free status of all derivatives of our free software and +of promoting the sharing and reuse of software generally. + + NO WARRANTY + + 9. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY +FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN +OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES +PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED +OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS +TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE +PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, +REPAIR OR CORRECTION. + + 10. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR +REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, +INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING +OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED +TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY +YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER +PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE +POSSIBILITY OF SUCH DAMAGES. + + END OF TERMS AND CONDITIONS + + Appendix: How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to humanity, the best way to achieve this is to make it +free software which everyone can redistribute and change under these +terms. + + To do so, attach the following notices to the program. It is safest to +attach them to the start of each source file to most effectively convey +the exclusion of warranty; and each file should have at least the +"copyright" line and a pointer to where the full notice is found. + + <one line to give the program's name and a brief idea of what it does.> + Copyright (C) 19yy <name of author> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 1, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301 USA + + +Also add information on how to contact you by electronic and paper mail. + +If the program is interactive, make it output a short notice like this +when it starts in an interactive mode: + + Gnomovision version 69, Copyright (C) 19xx name of author + Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the +appropriate parts of the General Public License. Of course, the +commands you use may be called something other than `show w' and `show +c'; they could even be mouse-clicks or menu items--whatever suits your +program. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the program, if +necessary. Here a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the + program `Gnomovision' (a program to direct compilers to make passes + at assemblers) written by James Hacker. + + <signature of Ty Coon>, 1 April 1989 + Ty Coon, President of Vice + +That's all there is to it! + + +--- The Artistic License 1.0 --- + +This software is Copyright (c) 2004 by Andy Lester. + +This is free software, licensed under: + + The Artistic License 1.0 + +The Artistic License + +Preamble + +The intent of this document is to state the conditions under which a Package +may be copied, such that the Copyright Holder maintains some semblance of +artistic control over the development of the package, while giving the users of +the package the right to use and distribute the Package in a more-or-less +customary fashion, plus the right to make reasonable modifications. + +Definitions: + + - "Package" refers to the collection of files distributed by the Copyright + Holder, and derivatives of that collection of files created through + textual modification. + - "Standard Version" refers to such a Package if it has not been modified, + or has been modified in accordance with the wishes of the Copyright + Holder. + - "Copyright Holder" is whoever is named in the copyright or copyrights for + the package. + - "You" is you, if you're thinking about copying or distributing this Package. + - "Reasonable copying fee" is whatever you can justify on the basis of media + cost, duplication charges, time of people involved, and so on. (You will + not be required to justify it to the Copyright Holder, but only to the + computing community at large as a market that must bear the fee.) + - "Freely Available" means that no fee is charged for the item itself, though + there may be fees involved in handling the item. It also means that + recipients of the item may redistribute it under the same conditions they + received it. + +1. You may make and give away verbatim copies of the source form of the +Standard Version of this Package without restriction, provided that you +duplicate all of the original copyright notices and associated disclaimers. + +2. You may apply bug fixes, portability fixes and other modifications derived +from the Public Domain or from the Copyright Holder. A Package modified in such +a way shall still be considered the Standard Version. + +3. You may otherwise modify your copy of this Package in any way, provided that +you insert a prominent notice in each changed file stating how and when you +changed that file, and provided that you do at least ONE of the following: + + a) place your modifications in the Public Domain or otherwise make them + Freely Available, such as by posting said modifications to Usenet or an + equivalent medium, or placing the modifications on a major archive site + such as ftp.uu.net, or by allowing the Copyright Holder to include your + modifications in the Standard Version of the Package. + + b) use the modified Package only within your corporation or organization. + + c) rename any non-standard executables so the names do not conflict with + standard executables, which must also be provided, and provide a separate + manual page for each non-standard executable that clearly documents how it + differs from the Standard Version. + + d) make other distribution arrangements with the Copyright Holder. + +4. You may distribute the programs of this Package in object code or executable +form, provided that you do at least ONE of the following: + + a) distribute a Standard Version of the executables and library files, + together with instructions (in the manual page or equivalent) on where to + get the Standard Version. + + b) accompany the distribution with the machine-readable source of the Package + with your modifications. + + c) accompany any non-standard executables with their corresponding Standard + Version executables, giving the non-standard executables non-standard + names, and clearly documenting the differences in manual pages (or + equivalent), together with instructions on where to get the Standard + Version. + + d) make other distribution arrangements with the Copyright Holder. + +5. You may charge a reasonable copying fee for any distribution of this +Package. You may charge any fee you choose for support of this Package. You +may not charge a fee for this Package itself. However, you may distribute this +Package in aggregate with other (possibly commercial) programs as part of a +larger (possibly commercial) software distribution provided that you do not +advertise this Package as a product of your own. + +6. The scripts and library files supplied as input to or produced as output +from the programs of this Package do not automatically fall under the copyright +of this Package, but belong to whomever generated them, and may be sold +commercially, and may be aggregated with this Package. + +7. C or perl subroutines supplied by you and linked into this Package shall not +be considered part of this Package. + +8. The name of the Copyright Holder may not be used to endorse or promote +products derived from this software without specific prior written permission. + +9. THIS PACKAGE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED +WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. + +The End + diff --git a/MANIFEST b/MANIFEST new file mode 100644 index 0000000..d89bbf0 --- /dev/null +++ b/MANIFEST @@ -0,0 +1,123 @@ +# This file was automatically generated by Dist::Zilla::Plugin::Manifest v6.024. +CONTRIBUTORS +Changes +INSTALL +LICENSE +MANIFEST +META.json +META.yml +Makefile.PL +README.md +cpanfile +dist.ini +etc/www-mechanize-logo.png +lib/WWW/Mechanize.pm +lib/WWW/Mechanize/Cookbook.pod +lib/WWW/Mechanize/Examples.pod +lib/WWW/Mechanize/FAQ.pod +lib/WWW/Mechanize/Image.pm +lib/WWW/Mechanize/Link.pm +perlcriticrc +perltidyrc +script/mech-dump +t/00-load.t +t/00-report-prereqs.dd +t/00-report-prereqs.t +t/TestServer.pm +t/Tools.pm +t/add_header.t +t/aliases.t +t/area_link.html +t/area_link.t +t/autocheck.t +t/bad-request.t +t/clone.t +t/content.t +t/cookies.t +t/credentials-api.t +t/credentials.t +t/die.t +t/dump.t +t/field.html +t/field.t +t/find_frame.html +t/find_frame.t +t/find_image.t +t/find_inputs.html +t/find_inputs.t +t/find_link-warnings.t +t/find_link.html +t/find_link.t +t/find_link_id.html +t/find_link_id.t +t/find_link_xhtml.html +t/find_link_xhtml.t +t/form-parsing.t +t/form_133_regression.html +t/form_with_fields.html +t/form_with_fields.t +t/form_with_fields_passthrough_params.t +t/form_with_fields_verbose.html +t/frames.html +t/frames.t +t/google.html +t/history.t +t/history_1.html +t/history_2.html +t/history_3.html +t/image-new.t +t/image-parse.css +t/image-parse.html +t/image-parse.t +t/link-base.t +t/link-relative.t +t/link.t +t/local/LocalServer.pm +t/local/back.t +t/local/click.t +t/local/click_button.t +t/local/content.t +t/local/encoding.t +t/local/failure.t +t/local/follow.t +t/local/form.t +t/local/get.t +t/local/head.t +t/local/log-server +t/local/nonascii.html +t/local/nonascii.t +t/local/overload.t +t/local/page_stack.t +t/local/post.t +t/local/referer-server +t/local/referer.t +t/local/reload.t +t/local/select_multiple.t +t/local/submit.t +t/mech-dump/mech-dump.t +t/new.t +t/refresh.html +t/regex-error.t +t/save_content.html +t/save_content.t +t/select.html +t/select.t +t/submit_form.t +t/taint.t +t/tick.html +t/tick.t +t/untaint.t +t/upload.html +t/upload.t +t/uri.t +t/warn.t +t/warnings.t +tidyall.ini +xt/author/eol.t +xt/author/live/wikipedia.t +xt/author/mojibake.t +xt/author/pod-coverage.t +xt/author/pod-syntax.t +xt/author/portability.t +xt/author/test-version.t +xt/author/tidyall.t diff --git a/META.json b/META.json new file mode 100644 index 0000000..4f4e90a --- /dev/null +++ b/META.json @@ -0,0 +1,960 @@ +{ + "abstract" : "Handy web browsing in a Perl object", + "author" : [ + "Andy Lester <andy at petdance.com>" + ], + "dynamic_config" : 0, + "generated_by" : "Dist::Zilla version 6.024, CPAN::Meta::Converter version 2.150010", + "license" : [ + "perl_5" + ], + "meta-spec" : { + "url" : "http://search.cpan.org/perldoc?CPAN::Meta::Spec", + "version" : 2 + }, + "name" : "WWW-Mechanize", + "no_index" : { + "directory" : [ + "examples", + "t", + "xt" + ] + }, + "prereqs" : { + "configure" : { + "requires" : { + "ExtUtils::MakeMaker" : "0" + }, + "suggests" : { + "JSON::PP" : "2.27300" + } + }, + "develop" : { + "recommends" : { + "Dist::Zilla::PluginBundle::Git::VersionManager" : "0.007" + }, + "requires" : { + "Code::TidyAll" : "0.71", + "Code::TidyAll::Plugin::SortLines::Naturally" : "0.000003", + "Code::TidyAll::Plugin::Test::Vars" : "0.04", + "Code::TidyAll::Plugin::UniqueLines" : "0.000003", + "LWP::Protocol::https" : "6.07", + "Parallel::ForkManager" : "1.19", + "Perl::Critic" : "1.132", + "Perl::Tidy" : "20180220", + "Pod::Coverage::TrustPod" : "0", + "Test::Code::TidyAll" : "0.50", + "Test::EOL" : "0", + "Test::Mojibake" : "0", + "Test::More" : "0.88", + "Test::Needs" : "0", + "Test::Pod" : "1.41", + "Test::Pod::Coverage" : "1.08", + "Test::Portability::Files" : "0", + "Test::RequiresInternet" : "0", + "Test::Vars" : "0.014", + "Test::Version" : "1", + "constant" : "0", + "lib" : "0" + } + }, + "runtime" : { + "recommends" : { + "Compress::Zlib" : "0" + }, + "requires" : { + "Carp" : "0", + "Getopt::Long" : "0", + "HTML::Form" : "1.00", + "HTML::HeadParser" : "0", + "HTML::TokeParser" : "0", + "HTML::TreeBuilder" : "5", + "HTTP::Cookies" : "0", + "HTTP::Request" : "1.30", + "HTTP::Request::Common" : "0", + "LWP::UserAgent" : "6.45", + "Pod::Usage" : "0", + "Scalar::Util" : "1.14", + "Tie::RefHash" : "0", + "URI::URL" : "0", + "URI::file" : "0", + "base" : "0", + "perl" : "5.006", + "strict" : "0", + "warnings" : "0" + } + }, + "test" : { + "recommends" : { + "CPAN::Meta" : "2.120900" + }, + "requires" : { + "CGI" : "4.32", + "Exporter" : "0", + "ExtUtils::MakeMaker" : "0", + "File::Spec" : "0", + "File::Temp" : "0", + "FindBin" : "0", + "HTTP::Daemon" : "6.12", + "HTTP::Response" : "0", + "HTTP::Server::Simple::CGI" : "0", + "LWP" : "0", + "LWP::Simple" : "0", + "Path::Tiny" : "0", + "Test::Deep" : "0", + "Test::Exception" : "0", + "Test::Fatal" : "0", + "Test::Memory::Cycle" : "1.06", + "Test::More" : "0.96", + "Test::NoWarnings" : "1.04", + "Test::Output" : "0", + "Test::Taint" : "1.08", + "Test::Warn" : "0", + "Test::Warnings" : "0", + "URI" : "0", + "URI::Escape" : "0", + "bytes" : "0", + "lib" : "0" + } + } + }, + "provides" : { + "WWW::Mechanize" : { + "file" : "lib/WWW/Mechanize.pm", + "version" : "2.04" + }, + "WWW::Mechanize::Image" : { + "file" : "lib/WWW/Mechanize/Image.pm", + "version" : "2.04" + }, + "WWW::Mechanize::Link" : { + "file" : "lib/WWW/Mechanize/Link.pm", + "version" : "2.04" + } + }, + "release_status" : "stable", + "resources" : { + "bugtracker" : { + "web" : "https://github.com/libwww-perl/WWW-Mechanize/issues" + }, + "homepage" : "https://github.com/libwww-perl/WWW-Mechanize", + "repository" : { + "type" : "git", + "url" : "https://github.com/libwww-perl/WWW-Mechanize.git", + "web" : "https://github.com/libwww-perl/WWW-Mechanize" + } + }, + "version" : "2.04", + "x_Dist_Zilla" : { + "perl" : { + "version" : "5.030002" + }, + "plugins" : [ + { + "class" : "Dist::Zilla::Plugin::PromptIfStale", + "config" : { + "Dist::Zilla::Plugin::PromptIfStale" : { + "check_all_plugins" : 0, + "check_all_prereqs" : 0, + "modules" : [ + "Dist::Zilla::PluginBundle::Author::OALDERS" + ], + "phase" : "build", + "run_under_travis" : 0, + "skip" : [] + } + }, + "name" : "@Author::OALDERS/stale modules, build", + "version" : "0.057" + }, + { + "class" : "Dist::Zilla::Plugin::PromptIfStale", + "config" : { + "Dist::Zilla::Plugin::PromptIfStale" : { + "check_all_plugins" : 1, + "check_all_prereqs" : 1, + "modules" : [], + "phase" : "release", + "run_under_travis" : 0, + "skip" : [] + } + }, + "name" : "@Author::OALDERS/stale modules, release", + "version" : "0.057" + }, + { + "class" : "Dist::Zilla::Plugin::OALDERS::TidyAll", + "name" : "@Author::OALDERS/OALDERS::TidyAll", + "version" : "0.000029" + }, + { + "class" : "Dist::Zilla::Plugin::AutoPrereqs", + "name" : "@Author::OALDERS/AutoPrereqs", + "version" : "6.024" + }, + { + "class" : "Dist::Zilla::Plugin::MakeMaker", + "config" : { + "Dist::Zilla::Role::TestRunner" : { + "default_jobs" : "4" + } + }, + "name" : "@Author::OALDERS/MakeMaker", + "version" : "6.024" + }, + { + "class" : "Dist::Zilla::Plugin::CPANFile", + "name" : "@Author::OALDERS/CPANFile", + "version" : "6.024" + }, + { + "class" : "Dist::Zilla::Plugin::ContributorsFile", + "name" : "@Author::OALDERS/ContributorsFile", + "version" : "0.3.0" + }, + { + "class" : "Dist::Zilla::Plugin::MetaJSON", + "name" : "@Author::OALDERS/MetaJSON", + "version" : "6.024" + }, + { + "class" : "Dist::Zilla::Plugin::MetaYAML", + "name" : "@Author::OALDERS/MetaYAML", + "version" : "6.024" + }, + { + "class" : "Dist::Zilla::Plugin::Manifest", + "name" : "@Author::OALDERS/Manifest", + "version" : "6.024" + }, + { + "class" : "Dist::Zilla::Plugin::MetaNoIndex", + "name" : "@Author::OALDERS/MetaNoIndex", + "version" : "6.024" + }, + { + "class" : "Dist::Zilla::Plugin::MetaConfig", + "name" : "@Author::OALDERS/MetaConfig", + "version" : "6.024" + }, + { + "class" : "Dist::Zilla::Plugin::MetaResources", + "name" : "@Author::OALDERS/MetaResources", + "version" : "6.024" + }, + { + "class" : "Dist::Zilla::Plugin::License", + "name" : "@Author::OALDERS/License", + "version" : "6.024" + }, + { + "class" : "Dist::Zilla::Plugin::InstallGuide", + "config" : { + "Dist::Zilla::Role::ModuleMetadata" : { + "Module::Metadata" : "1.000036", + "version" : "0.006" + } + }, + "name" : "@Author::OALDERS/InstallGuide", + "version" : "1.200014" + }, + { + "class" : "Dist::Zilla::Plugin::Prereqs", + "config" : { + "Dist::Zilla::Plugin::Prereqs" : { + "phase" : "develop", + "type" : "requires" + } + }, + "name" : "@Author::OALDERS/Modules for use with tidyall", + "version" : "6.024" + }, + { + "class" : "Dist::Zilla::Plugin::ExecDir", + "name" : "@Author::OALDERS/ExecDir", + "version" : "6.024" + }, + { + "class" : "Dist::Zilla::Plugin::MojibakeTests", + "name" : "@Author::OALDERS/MojibakeTests", + "version" : "0.8" + }, + { + "class" : "Dist::Zilla::Plugin::PodSyntaxTests", + "name" : "@Author::OALDERS/PodSyntaxTests", + "version" : "6.024" + }, + { + "class" : "Dist::Zilla::Plugin::Test::EOL", + "config" : { + "Dist::Zilla::Plugin::Test::EOL" : { + "filename" : "xt/author/eol.t", + "finder" : [ + ":ExecFiles", + ":InstallModules", + ":TestFiles" + ], + "trailing_whitespace" : 1 + } + }, + "name" : "@Author::OALDERS/Test::EOL", + "version" : "0.19" + }, + { + "class" : "Dist::Zilla::Plugin::Test::Pod::Coverage::Configurable", + "name" : "@Author::OALDERS/Test::Pod::Coverage::Configurable", + "version" : "0.07" + }, + { + "class" : "Dist::Zilla::Plugin::Test::Portability", + "config" : { + "Dist::Zilla::Plugin::Test::Portability" : { + "options" : "" + } + }, + "name" : "@Author::OALDERS/Test::Portability", + "version" : "2.001000" + }, + { + "class" : "Dist::Zilla::Plugin::TestRelease", + "name" : "@Author::OALDERS/TestRelease", + "version" : "6.024" + }, + { + "class" : "Dist::Zilla::Plugin::Test::ReportPrereqs", + "name" : "@Author::OALDERS/Test::ReportPrereqs", + "version" : "0.028" + }, + { + "class" : "Dist::Zilla::Plugin::Test::TidyAll", + "name" : "@Author::OALDERS/Test::TidyAll", + "version" : "0.04" + }, + { + "class" : "Dist::Zilla::Plugin::Test::Version", + "name" : "@Author::OALDERS/Test::Version", + "version" : "1.09" + }, + { + "class" : "Dist::Zilla::Plugin::RunExtraTests", + "config" : { + "Dist::Zilla::Role::TestRunner" : { + "default_jobs" : "4" + } + }, + "name" : "@Author::OALDERS/RunExtraTests", + "version" : "0.029" + }, + { + "class" : "Dist::Zilla::Plugin::PodWeaver", + "config" : { + "Dist::Zilla::Plugin::PodWeaver" : { + "finder" : [ + ":InstallModules", + ":ExecFiles" + ], + "plugins" : [ + { + "class" : "Pod::Weaver::Plugin::EnsurePod5", + "name" : "@CorePrep/EnsurePod5", + "version" : "4.015" + }, + { + "class" : "Pod::Weaver::Plugin::H1Nester", + "name" : "@CorePrep/H1Nester", + "version" : "4.015" + }, + { + "class" : "Pod::Weaver::Plugin::SingleEncoding", + "name" : "@Default/SingleEncoding", + "version" : "4.015" + }, + { + "class" : "Pod::Weaver::Section::Name", + "name" : "@Default/Name", + "version" : "4.015" + }, + { + "class" : "Pod::Weaver::Section::Version", + "name" : "@Default/Version", + "version" : "4.015" + }, + { + "class" : "Pod::Weaver::Section::Region", + "name" : "@Default/prelude", + "version" : "4.015" + }, + { + "class" : "Pod::Weaver::Section::Generic", + "name" : "SYNOPSIS", + "version" : "4.015" + }, + { + "class" : "Pod::Weaver::Section::Generic", + "name" : "DESCRIPTION", + "version" : "4.015" + }, + { + "class" : "Pod::Weaver::Section::Generic", + "name" : "OVERVIEW", + "version" : "4.015" + }, + { + "class" : "Pod::Weaver::Section::Collect", + "name" : "ATTRIBUTES", + "version" : "4.015" + }, + { + "class" : "Pod::Weaver::Section::Collect", + "name" : "METHODS", + "version" : "4.015" + }, + { + "class" : "Pod::Weaver::Section::Collect", + "name" : "FUNCTIONS", + "version" : "4.015" + }, + { + "class" : "Pod::Weaver::Section::Leftovers", + "name" : "@Default/Leftovers", + "version" : "4.015" + }, + { + "class" : "Pod::Weaver::Section::Region", + "name" : "@Default/postlude", + "version" : "4.015" + }, + { + "class" : "Pod::Weaver::Section::Authors", + "name" : "@Default/Authors", + "version" : "4.015" + }, + { + "class" : "Pod::Weaver::Section::Legal", + "name" : "@Default/Legal", + "version" : "4.015" + } + ] + } + }, + "name" : "@Author::OALDERS/PodWeaver", + "version" : "4.009" + }, + { + "class" : "Dist::Zilla::Plugin::PruneCruft", + "name" : "@Author::OALDERS/PruneCruft", + "version" : "6.024" + }, + { + "class" : "Dist::Zilla::Plugin::CopyFilesFromBuild", + "name" : "@Author::OALDERS/CopyFilesFromBuild", + "version" : "0.170880" + }, + { + "class" : "Dist::Zilla::Plugin::GithubMeta", + "name" : "@Author::OALDERS/GithubMeta", + "version" : "0.58" + }, + { + "class" : "Dist::Zilla::Plugin::Git::GatherDir", + "config" : { + "Dist::Zilla::Plugin::GatherDir" : { + "exclude_filename" : [ + "Install", + "LICENSE", + "META.json", + "Makefile.PL", + "README.md", + "cpanfile" + ], + "exclude_match" : [], + "follow_symlinks" : 0, + "include_dotfiles" : 0, + "prefix" : "", + "prune_directory" : [], + "root" : "." + }, + "Dist::Zilla::Plugin::Git::GatherDir" : { + "include_untracked" : 0 + } + }, + "name" : "@Author::OALDERS/Git::GatherDir", + "version" : "2.048" + }, + { + "class" : "Dist::Zilla::Plugin::CopyFilesFromRelease", + "config" : { + "Dist::Zilla::Plugin::CopyFilesFromRelease" : { + "filename" : [ + "Install" + ], + "match" : [] + } + }, + "name" : "@Author::OALDERS/CopyFilesFromRelease", + "version" : "0.007" + }, + { + "class" : "Dist::Zilla::Plugin::Git::Check", + "config" : { + "Dist::Zilla::Plugin::Git::Check" : { + "untracked_files" : "die" + }, + "Dist::Zilla::Role::Git::DirtyFiles" : { + "allow_dirty" : [ + "Changes", + "Install", + "LICENSE", + "META.json", + "Makefile.PL", + "README.md", + "cpanfile", + "dist.ini" + ], + "allow_dirty_match" : [], + "changelog" : "Changes" + }, + "Dist::Zilla::Role::Git::Repo" : { + "git_version" : "2.32.0", + "repo_root" : "." + } + }, + "name" : "@Author::OALDERS/Git::Check", + "version" : "2.048" + }, + { + "class" : "Dist::Zilla::Plugin::Git::Contributors", + "config" : { + "Dist::Zilla::Plugin::Git::Contributors" : { + "git_version" : "2.32.0", + "include_authors" : 0, + "include_releaser" : 1, + "order_by" : "name", + "paths" : [] + } + }, + "name" : "@Author::OALDERS/Git::Contributors", + "version" : "0.036" + }, + { + "class" : "Dist::Zilla::Plugin::ReadmeAnyFromPod", + "config" : { + "Dist::Zilla::Role::FileWatcher" : { + "version" : "0.006" + } + }, + "name" : "@Author::OALDERS/ReadmeMdInBuild", + "version" : "0.163250" + }, + { + "class" : "Dist::Zilla::Plugin::StaticInstall", + "config" : { + "Dist::Zilla::Plugin::StaticInstall" : { + "dry_run" : 0, + "mode" : "on" + } + }, + "name" : "@Author::OALDERS/StaticInstall", + "version" : "0.012" + }, + { + "class" : "Dist::Zilla::Plugin::ShareDir", + "name" : "@Author::OALDERS/ShareDir", + "version" : "6.024" + }, + { + "class" : "Dist::Zilla::Plugin::CheckIssues", + "name" : "@Author::OALDERS/CheckIssues", + "version" : "0.011" + }, + { + "class" : "Dist::Zilla::Plugin::ConfirmRelease", + "name" : "@Author::OALDERS/ConfirmRelease", + "version" : "6.024" + }, + { + "class" : "Dist::Zilla::Plugin::UploadToCPAN", + "name" : "@Author::OALDERS/UploadToCPAN", + "version" : "6.024" + }, + { + "class" : "Dist::Zilla::Plugin::Prereqs", + "config" : { + "Dist::Zilla::Plugin::Prereqs" : { + "phase" : "develop", + "type" : "recommends" + } + }, + "name" : "@Author::OALDERS/@Git::VersionManager/pluginbundle version", + "version" : "6.024" + }, + { + "class" : "Dist::Zilla::Plugin::RewriteVersion::Transitional", + "config" : { + "Dist::Zilla::Plugin::RewriteVersion" : { + "add_tarball_name" : 0, + "finders" : [ + ":ExecFiles", + ":InstallModules" + ], + "global" : 0, + "skip_version_provider" : 0 + }, + "Dist::Zilla::Plugin::RewriteVersion::Transitional" : {} + }, + "name" : "@Author::OALDERS/@Git::VersionManager/RewriteVersion::Transitional", + "version" : "0.009" + }, + { + "class" : "Dist::Zilla::Plugin::MetaProvides::Update", + "name" : "@Author::OALDERS/@Git::VersionManager/MetaProvides::Update", + "version" : "0.007" + }, + { + "class" : "Dist::Zilla::Plugin::CopyFilesFromRelease", + "config" : { + "Dist::Zilla::Plugin::CopyFilesFromRelease" : { + "filename" : [ + "Changes" + ], + "match" : [] + } + }, + "name" : "@Author::OALDERS/@Git::VersionManager/CopyFilesFromRelease", + "version" : "0.007" + }, + { + "class" : "Dist::Zilla::Plugin::Git::Commit", + "config" : { + "Dist::Zilla::Plugin::Git::Commit" : { + "add_files_in" : [], + "commit_msg" : "v%V%n%n%c", + "signoff" : 0 + }, + "Dist::Zilla::Role::Git::DirtyFiles" : { + "allow_dirty" : [ + "Changes", + "Install", + "LICENSE", + "META.json", + "Makefile.PL", + "README.md", + "cpanfile", + "dist.ini" + ], + "allow_dirty_match" : [], + "changelog" : "Changes" + }, + "Dist::Zilla::Role::Git::Repo" : { + "git_version" : "2.32.0", + "repo_root" : "." + }, + "Dist::Zilla::Role::Git::StringFormatter" : { + "time_zone" : "local" + } + }, + "name" : "@Author::OALDERS/@Git::VersionManager/release snapshot", + "version" : "2.048" + }, + { + "class" : "Dist::Zilla::Plugin::Git::Tag", + "config" : { + "Dist::Zilla::Plugin::Git::Tag" : { + "branch" : null, + "changelog" : "Changes", + "signed" : 0, + "tag" : "v2.04", + "tag_format" : "v%V", + "tag_message" : "v%V" + }, + "Dist::Zilla::Role::Git::Repo" : { + "git_version" : "2.32.0", + "repo_root" : "." + }, + "Dist::Zilla::Role::Git::StringFormatter" : { + "time_zone" : "local" + } + }, + "name" : "@Author::OALDERS/@Git::VersionManager/Git::Tag", + "version" : "2.048" + }, + { + "class" : "Dist::Zilla::Plugin::BumpVersionAfterRelease::Transitional", + "config" : { + "Dist::Zilla::Plugin::BumpVersionAfterRelease" : { + "finders" : [ + ":ExecFiles", + ":InstallModules" + ], + "global" : 0, + "munge_makefile_pl" : 1 + }, + "Dist::Zilla::Plugin::BumpVersionAfterRelease::Transitional" : {} + }, + "name" : "@Author::OALDERS/@Git::VersionManager/BumpVersionAfterRelease::Transitional", + "version" : "0.009" + }, + { + "class" : "Dist::Zilla::Plugin::NextRelease", + "name" : "@Author::OALDERS/@Git::VersionManager/NextRelease", + "version" : "6.024" + }, + { + "class" : "Dist::Zilla::Plugin::Git::Commit", + "config" : { + "Dist::Zilla::Plugin::Git::Commit" : { + "add_files_in" : [], + "commit_msg" : "increment $VERSION after %v release", + "signoff" : 0 + }, + "Dist::Zilla::Role::Git::DirtyFiles" : { + "allow_dirty" : [ + "Build.PL", + "Changes", + "Makefile.PL" + ], + "allow_dirty_match" : [ + "(?^:^lib/.*\\.pm$)" + ], + "changelog" : "Changes" + }, + "Dist::Zilla::Role::Git::Repo" : { + "git_version" : "2.32.0", + "repo_root" : "." + }, + "Dist::Zilla::Role::Git::StringFormatter" : { + "time_zone" : "local" + } + }, + "name" : "@Author::OALDERS/@Git::VersionManager/post-release commit", + "version" : "2.048" + }, + { + "class" : "Dist::Zilla::Plugin::Git::Push", + "config" : { + "Dist::Zilla::Plugin::Git::Push" : { + "push_to" : [ + "origin" + ], + "remotes_must_exist" : 1 + }, + "Dist::Zilla::Role::Git::Repo" : { + "git_version" : "2.32.0", + "repo_root" : "." + } + }, + "name" : "@Author::OALDERS/Git::Push", + "version" : "2.048" + }, + { + "class" : "Dist::Zilla::Plugin::ExecDir", + "name" : "ExecDir", + "version" : "6.024" + }, + { + "class" : "Dist::Zilla::Plugin::RunExtraTests", + "config" : { + "Dist::Zilla::Role::TestRunner" : { + "default_jobs" : "4" + } + }, + "name" : "RunExtraTests", + "version" : "0.029" + }, + { + "class" : "Dist::Zilla::Plugin::Prereqs", + "config" : { + "Dist::Zilla::Plugin::Prereqs" : { + "phase" : "runtime", + "type" : "requires" + } + }, + "name" : "RuntimeRequires", + "version" : "6.024" + }, + { + "class" : "Dist::Zilla::Plugin::Prereqs", + "config" : { + "Dist::Zilla::Plugin::Prereqs" : { + "phase" : "test", + "type" : "requires" + } + }, + "name" : "TestRequires", + "version" : "6.024" + }, + { + "class" : "Dist::Zilla::Plugin::Prereqs", + "config" : { + "Dist::Zilla::Plugin::Prereqs" : { + "phase" : "develop", + "type" : "requires" + } + }, + "name" : "DevelopRequires", + "version" : "6.024" + }, + { + "class" : "Dist::Zilla::Plugin::Prereqs::Soften", + "config" : { + "Dist::Zilla::Plugin::Prereqs::Soften" : { + "copy_to" : [], + "modules" : [ + "Compress::Zlib" + ], + "modules_from_features" : null, + "to_relationship" : "recommends" + } + }, + "name" : "Prereqs::Soften", + "version" : "0.006003" + }, + { + "class" : "Dist::Zilla::Plugin::MetaProvides::Package", + "config" : { + "Dist::Zilla::Plugin::MetaProvides::Package" : { + "finder_objects" : [ + { + "class" : "Dist::Zilla::Plugin::FinderCode", + "name" : "MetaProvides::Package/AUTOVIV/:InstallModulesPM", + "version" : "6.024" + } + ], + "include_underscores" : 0 + }, + "Dist::Zilla::Role::MetaProvider::Provider" : { + "$Dist::Zilla::Role::MetaProvider::Provider::VERSION" : "2.002004", + "inherit_missing" : 1, + "inherit_version" : 1, + "meta_noindex" : 1 + }, + "Dist::Zilla::Role::ModuleMetadata" : { + "Module::Metadata" : "1.000036", + "version" : "0.006" + } + }, + "name" : "MetaProvides::Package", + "version" : "2.004003" + }, + { + "class" : "Dist::Zilla::Plugin::FinderCode", + "name" : ":InstallModules", + "version" : "6.024" + }, + { + "class" : "Dist::Zilla::Plugin::FinderCode", + "name" : ":IncModules", + "version" : "6.024" + }, + { + "class" : "Dist::Zilla::Plugin::FinderCode", + "name" : ":TestFiles", + "version" : "6.024" + }, + { + "class" : "Dist::Zilla::Plugin::FinderCode", + "name" : ":ExtraTestFiles", + "version" : "6.024" + }, + { + "class" : "Dist::Zilla::Plugin::FinderCode", + "name" : ":ExecFiles", + "version" : "6.024" + }, + { + "class" : "Dist::Zilla::Plugin::FinderCode", + "name" : ":PerlExecFiles", + "version" : "6.024" + }, + { + "class" : "Dist::Zilla::Plugin::FinderCode", + "name" : ":ShareFiles", + "version" : "6.024" + }, + { + "class" : "Dist::Zilla::Plugin::FinderCode", + "name" : ":MainModule", + "version" : "6.024" + }, + { + "class" : "Dist::Zilla::Plugin::FinderCode", + "name" : ":AllFiles", + "version" : "6.024" + }, + { + "class" : "Dist::Zilla::Plugin::FinderCode", + "name" : ":NoFiles", + "version" : "6.024" + }, + { + "class" : "Dist::Zilla::Plugin::FinderCode", + "name" : "MetaProvides::Package/AUTOVIV/:InstallModulesPM", + "version" : "6.024" + } + ], + "zilla" : { + "class" : "Dist::Zilla::Dist::Builder", + "config" : { + "is_trial" : 0 + }, + "version" : "6.024" + } + }, + "x_contributors" : [ + "Alexandr Ciornii <alexchorny@gmail.com>", + "Andrew Grangaard <granny-github@ofb.net>", + "Andy Lester <andy@petdance.com>", + "Bernhard Wagner <gitcommit@bernhardwagner.net>", + "Chase Whitener <capoeirab@cpan.org>", + "dakkar <dakkar@thenautilus.net>", + "Dave Doyle <dave.s.doyle@gmail.com>", + "David Precious <davidp@preshweb.co.uk>", + "David Steinbrunner <dsteinbrunner@pobox.com>", + "Desmond Daignault <nawglan@gmail.com>", + "Ed Avis <eda@waniasset.com>", + "Evan Zacks <zackse@gmail.com>", + "Ferenc Erki <erkiferenc@gmail.com>", + "Flavio Poletti <flavio@polettix.it>", + "Florian Schlichting <fsfs@debian.org>", + "Gabor Szabo <szabgab@gmail.com>", + "gjtunley@gmail.com <gjtunley@gmail.com>", + "gregor herrmann <gregoa@debian.org>", + "Grigor Karavardanyan <k.grigor@yahoo.com>", + "James Raspass <jraspass@gmail.com>", + "Jason May <jasonmay@bestpractical.com>", + "Jesse Vincent <jesse@bestpractical.com>", + "John Beppu <john.beppu@gmail.com>", + "Jozef Kutej <jozef@kutej.net>", + "Karen Etheridge <ether@cpan.org>", + "Kirrily 'Skud' Robert <skud@infotrope.net>", + "Kivanc Yazan <kivancyazan@gmail.com>", + "Lars D\u026a\u1d07\u1d04\u1d0b\u1d0f\u1d21 \u8fea\u62c9\u65af <daxim@cpan.org>", + "Mark Stosberg <mark@summersault.com>", + "Martin H. Sluka <martin@sluka.de>", + "Matthew Chae <mschae@cpan.org>", + "Matt S Trout <mst@shadowcat.co.uk>", + "Max Maischein <corion@cpan.org>", + "Mohammad S Anwar <mohammad.anwar@yahoo.com>", + "Neil Bowers <neil@bowers.com>", + "Nik LaBelle <nalabelle@gmail.com>", + "Olaf Alders <olaf@wundersolutions.com>", + "Philippe Bruhat (BooK) <book@cpan.org>", + "Ricardo Signes <rjbs@cpan.org>", + "Schuyler Langdon <schuyler@velargo.com>", + "Sergey Romanov <sromanov-dev@yandex.ru>", + "Shoichi Kaji <skaji@cpan.org>", + "simbabque <simbabque@cpan.org>", + "Steve Scaffidi <stephen@scaffidi.net>", + "Stuart Johnston <saj_git@thecommune.net>", + "sunnavy <sunnavy@bestpractical.com>", + "Varadinsky <varadinsky@yahoo.com>", + "Ville Skytt\u00e4 <ville.skytta@iki.fi>", + "Zefram <zefram@fysh.org>", + "\u7a4d\u4e39\u5c3c Dan Jacobson <jidanni@jidanni.org>" + ], + "x_generated_by_perl" : "v5.30.2", + "x_serialization_backend" : "Cpanel::JSON::XS version 4.26", + "x_spdx_expression" : "Artistic-1.0-Perl OR GPL-1.0-or-later", + "x_static_install" : 1 +} + diff --git a/META.yml b/META.yml new file mode 100644 index 0000000..37894dc --- /dev/null +++ b/META.yml @@ -0,0 +1,713 @@ +--- +abstract: 'Handy web browsing in a Perl object' +author: + - 'Andy Lester <andy at petdance.com>' +build_requires: + CGI: '4.32' + Exporter: '0' + ExtUtils::MakeMaker: '0' + File::Spec: '0' + File::Temp: '0' + FindBin: '0' + HTTP::Daemon: '6.12' + HTTP::Response: '0' + HTTP::Server::Simple::CGI: '0' + LWP: '0' + LWP::Simple: '0' + Path::Tiny: '0' + Test::Deep: '0' + Test::Exception: '0' + Test::Fatal: '0' + Test::Memory::Cycle: '1.06' + Test::More: '0.96' + Test::NoWarnings: '1.04' + Test::Output: '0' + Test::Taint: '1.08' + Test::Warn: '0' + Test::Warnings: '0' + URI: '0' + URI::Escape: '0' + bytes: '0' + lib: '0' +configure_requires: + ExtUtils::MakeMaker: '0' +dynamic_config: 0 +generated_by: 'Dist::Zilla version 6.024, CPAN::Meta::Converter version 2.150010' +license: perl +meta-spec: + url: http://module-build.sourceforge.net/META-spec-v1.4.html + version: '1.4' +name: WWW-Mechanize +no_index: + directory: + - examples + - t + - xt +provides: + WWW::Mechanize: + file: lib/WWW/Mechanize.pm + version: '2.04' + WWW::Mechanize::Image: + file: lib/WWW/Mechanize/Image.pm + version: '2.04' + WWW::Mechanize::Link: + file: lib/WWW/Mechanize/Link.pm + version: '2.04' +recommends: + Compress::Zlib: '0' +requires: + Carp: '0' + Getopt::Long: '0' + HTML::Form: '1.00' + HTML::HeadParser: '0' + HTML::TokeParser: '0' + HTML::TreeBuilder: '5' + HTTP::Cookies: '0' + HTTP::Request: '1.30' + HTTP::Request::Common: '0' + LWP::UserAgent: '6.45' + Pod::Usage: '0' + Scalar::Util: '1.14' + Tie::RefHash: '0' + URI::URL: '0' + URI::file: '0' + base: '0' + perl: '5.006' + strict: '0' + warnings: '0' +resources: + bugtracker: https://github.com/libwww-perl/WWW-Mechanize/issues + homepage: https://github.com/libwww-perl/WWW-Mechanize + repository: https://github.com/libwww-perl/WWW-Mechanize.git +version: '2.04' +x_Dist_Zilla: + perl: + version: '5.030002' + plugins: + - + class: Dist::Zilla::Plugin::PromptIfStale + config: + Dist::Zilla::Plugin::PromptIfStale: + check_all_plugins: 0 + check_all_prereqs: 0 + modules: + - Dist::Zilla::PluginBundle::Author::OALDERS + phase: build + run_under_travis: 0 + skip: [] + name: '@Author::OALDERS/stale modules, build' + version: '0.057' + - + class: Dist::Zilla::Plugin::PromptIfStale + config: + Dist::Zilla::Plugin::PromptIfStale: + check_all_plugins: 1 + check_all_prereqs: 1 + modules: [] + phase: release + run_under_travis: 0 + skip: [] + name: '@Author::OALDERS/stale modules, release' + version: '0.057' + - + class: Dist::Zilla::Plugin::OALDERS::TidyAll + name: '@Author::OALDERS/OALDERS::TidyAll' + version: '0.000029' + - + class: Dist::Zilla::Plugin::AutoPrereqs + name: '@Author::OALDERS/AutoPrereqs' + version: '6.024' + - + class: Dist::Zilla::Plugin::MakeMaker + config: + Dist::Zilla::Role::TestRunner: + default_jobs: '4' + name: '@Author::OALDERS/MakeMaker' + version: '6.024' + - + class: Dist::Zilla::Plugin::CPANFile + name: '@Author::OALDERS/CPANFile' + version: '6.024' + - + class: Dist::Zilla::Plugin::ContributorsFile + name: '@Author::OALDERS/ContributorsFile' + version: 0.3.0 + - + class: Dist::Zilla::Plugin::MetaJSON + name: '@Author::OALDERS/MetaJSON' + version: '6.024' + - + class: Dist::Zilla::Plugin::MetaYAML + name: '@Author::OALDERS/MetaYAML' + version: '6.024' + - + class: Dist::Zilla::Plugin::Manifest + name: '@Author::OALDERS/Manifest' + version: '6.024' + - + class: Dist::Zilla::Plugin::MetaNoIndex + name: '@Author::OALDERS/MetaNoIndex' + version: '6.024' + - + class: Dist::Zilla::Plugin::MetaConfig + name: '@Author::OALDERS/MetaConfig' + version: '6.024' + - + class: Dist::Zilla::Plugin::MetaResources + name: '@Author::OALDERS/MetaResources' + version: '6.024' + - + class: Dist::Zilla::Plugin::License + name: '@Author::OALDERS/License' + version: '6.024' + - + class: Dist::Zilla::Plugin::InstallGuide + config: + Dist::Zilla::Role::ModuleMetadata: + Module::Metadata: '1.000036' + version: '0.006' + name: '@Author::OALDERS/InstallGuide' + version: '1.200014' + - + class: Dist::Zilla::Plugin::Prereqs + config: + Dist::Zilla::Plugin::Prereqs: + phase: develop + type: requires + name: '@Author::OALDERS/Modules for use with tidyall' + version: '6.024' + - + class: Dist::Zilla::Plugin::ExecDir + name: '@Author::OALDERS/ExecDir' + version: '6.024' + - + class: Dist::Zilla::Plugin::MojibakeTests + name: '@Author::OALDERS/MojibakeTests' + version: '0.8' + - + class: Dist::Zilla::Plugin::PodSyntaxTests + name: '@Author::OALDERS/PodSyntaxTests' + version: '6.024' + - + class: Dist::Zilla::Plugin::Test::EOL + config: + Dist::Zilla::Plugin::Test::EOL: + filename: xt/author/eol.t + finder: + - ':ExecFiles' + - ':InstallModules' + - ':TestFiles' + trailing_whitespace: 1 + name: '@Author::OALDERS/Test::EOL' + version: '0.19' + - + class: Dist::Zilla::Plugin::Test::Pod::Coverage::Configurable + name: '@Author::OALDERS/Test::Pod::Coverage::Configurable' + version: '0.07' + - + class: Dist::Zilla::Plugin::Test::Portability + config: + Dist::Zilla::Plugin::Test::Portability: + options: '' + name: '@Author::OALDERS/Test::Portability' + version: '2.001000' + - + class: Dist::Zilla::Plugin::TestRelease + name: '@Author::OALDERS/TestRelease' + version: '6.024' + - + class: Dist::Zilla::Plugin::Test::ReportPrereqs + name: '@Author::OALDERS/Test::ReportPrereqs' + version: '0.028' + - + class: Dist::Zilla::Plugin::Test::TidyAll + name: '@Author::OALDERS/Test::TidyAll' + version: '0.04' + - + class: Dist::Zilla::Plugin::Test::Version + name: '@Author::OALDERS/Test::Version' + version: '1.09' + - + class: Dist::Zilla::Plugin::RunExtraTests + config: + Dist::Zilla::Role::TestRunner: + default_jobs: '4' + name: '@Author::OALDERS/RunExtraTests' + version: '0.029' + - + class: Dist::Zilla::Plugin::PodWeaver + config: + Dist::Zilla::Plugin::PodWeaver: + finder: + - ':InstallModules' + - ':ExecFiles' + plugins: + - + class: Pod::Weaver::Plugin::EnsurePod5 + name: '@CorePrep/EnsurePod5' + version: '4.015' + - + class: Pod::Weaver::Plugin::H1Nester + name: '@CorePrep/H1Nester' + version: '4.015' + - + class: Pod::Weaver::Plugin::SingleEncoding + name: '@Default/SingleEncoding' + version: '4.015' + - + class: Pod::Weaver::Section::Name + name: '@Default/Name' + version: '4.015' + - + class: Pod::Weaver::Section::Version + name: '@Default/Version' + version: '4.015' + - + class: Pod::Weaver::Section::Region + name: '@Default/prelude' + version: '4.015' + - + class: Pod::Weaver::Section::Generic + name: SYNOPSIS + version: '4.015' + - + class: Pod::Weaver::Section::Generic + name: DESCRIPTION + version: '4.015' + - + class: Pod::Weaver::Section::Generic + name: OVERVIEW + version: '4.015' + - + class: Pod::Weaver::Section::Collect + name: ATTRIBUTES + version: '4.015' + - + class: Pod::Weaver::Section::Collect + name: METHODS + version: '4.015' + - + class: Pod::Weaver::Section::Collect + name: FUNCTIONS + version: '4.015' + - + class: Pod::Weaver::Section::Leftovers + name: '@Default/Leftovers' + version: '4.015' + - + class: Pod::Weaver::Section::Region + name: '@Default/postlude' + version: '4.015' + - + class: Pod::Weaver::Section::Authors + name: '@Default/Authors' + version: '4.015' + - + class: Pod::Weaver::Section::Legal + name: '@Default/Legal' + version: '4.015' + name: '@Author::OALDERS/PodWeaver' + version: '4.009' + - + class: Dist::Zilla::Plugin::PruneCruft + name: '@Author::OALDERS/PruneCruft' + version: '6.024' + - + class: Dist::Zilla::Plugin::CopyFilesFromBuild + name: '@Author::OALDERS/CopyFilesFromBuild' + version: '0.170880' + - + class: Dist::Zilla::Plugin::GithubMeta + name: '@Author::OALDERS/GithubMeta' + version: '0.58' + - + class: Dist::Zilla::Plugin::Git::GatherDir + config: + Dist::Zilla::Plugin::GatherDir: + exclude_filename: + - Install + - LICENSE + - META.json + - Makefile.PL + - README.md + - cpanfile + exclude_match: [] + follow_symlinks: 0 + include_dotfiles: 0 + prefix: '' + prune_directory: [] + root: . + Dist::Zilla::Plugin::Git::GatherDir: + include_untracked: 0 + name: '@Author::OALDERS/Git::GatherDir' + version: '2.048' + - + class: Dist::Zilla::Plugin::CopyFilesFromRelease + config: + Dist::Zilla::Plugin::CopyFilesFromRelease: + filename: + - Install + match: [] + name: '@Author::OALDERS/CopyFilesFromRelease' + version: '0.007' + - + class: Dist::Zilla::Plugin::Git::Check + config: + Dist::Zilla::Plugin::Git::Check: + untracked_files: die + Dist::Zilla::Role::Git::DirtyFiles: + allow_dirty: + - Changes + - Install + - LICENSE + - META.json + - Makefile.PL + - README.md + - cpanfile + - dist.ini + allow_dirty_match: [] + changelog: Changes + Dist::Zilla::Role::Git::Repo: + git_version: 2.32.0 + repo_root: . + name: '@Author::OALDERS/Git::Check' + version: '2.048' + - + class: Dist::Zilla::Plugin::Git::Contributors + config: + Dist::Zilla::Plugin::Git::Contributors: + git_version: 2.32.0 + include_authors: 0 + include_releaser: 1 + order_by: name + paths: [] + name: '@Author::OALDERS/Git::Contributors' + version: '0.036' + - + class: Dist::Zilla::Plugin::ReadmeAnyFromPod + config: + Dist::Zilla::Role::FileWatcher: + version: '0.006' + name: '@Author::OALDERS/ReadmeMdInBuild' + version: '0.163250' + - + class: Dist::Zilla::Plugin::StaticInstall + config: + Dist::Zilla::Plugin::StaticInstall: + dry_run: 0 + mode: on + name: '@Author::OALDERS/StaticInstall' + version: '0.012' + - + class: Dist::Zilla::Plugin::ShareDir + name: '@Author::OALDERS/ShareDir' + version: '6.024' + - + class: Dist::Zilla::Plugin::CheckIssues + name: '@Author::OALDERS/CheckIssues' + version: '0.011' + - + class: Dist::Zilla::Plugin::ConfirmRelease + name: '@Author::OALDERS/ConfirmRelease' + version: '6.024' + - + class: Dist::Zilla::Plugin::UploadToCPAN + name: '@Author::OALDERS/UploadToCPAN' + version: '6.024' + - + class: Dist::Zilla::Plugin::Prereqs + config: + Dist::Zilla::Plugin::Prereqs: + phase: develop + type: recommends + name: '@Author::OALDERS/@Git::VersionManager/pluginbundle version' + version: '6.024' + - + class: Dist::Zilla::Plugin::RewriteVersion::Transitional + config: + Dist::Zilla::Plugin::RewriteVersion: + add_tarball_name: 0 + finders: + - ':ExecFiles' + - ':InstallModules' + global: 0 + skip_version_provider: 0 + Dist::Zilla::Plugin::RewriteVersion::Transitional: {} + name: '@Author::OALDERS/@Git::VersionManager/RewriteVersion::Transitional' + version: '0.009' + - + class: Dist::Zilla::Plugin::MetaProvides::Update + name: '@Author::OALDERS/@Git::VersionManager/MetaProvides::Update' + version: '0.007' + - + class: Dist::Zilla::Plugin::CopyFilesFromRelease + config: + Dist::Zilla::Plugin::CopyFilesFromRelease: + filename: + - Changes + match: [] + name: '@Author::OALDERS/@Git::VersionManager/CopyFilesFromRelease' + version: '0.007' + - + class: Dist::Zilla::Plugin::Git::Commit + config: + Dist::Zilla::Plugin::Git::Commit: + add_files_in: [] + commit_msg: v%V%n%n%c + signoff: 0 + Dist::Zilla::Role::Git::DirtyFiles: + allow_dirty: + - Changes + - Install + - LICENSE + - META.json + - Makefile.PL + - README.md + - cpanfile + - dist.ini + allow_dirty_match: [] + changelog: Changes + Dist::Zilla::Role::Git::Repo: + git_version: 2.32.0 + repo_root: . + Dist::Zilla::Role::Git::StringFormatter: + time_zone: local + name: '@Author::OALDERS/@Git::VersionManager/release snapshot' + version: '2.048' + - + class: Dist::Zilla::Plugin::Git::Tag + config: + Dist::Zilla::Plugin::Git::Tag: + branch: ~ + changelog: Changes + signed: 0 + tag: v2.04 + tag_format: v%V + tag_message: v%V + Dist::Zilla::Role::Git::Repo: + git_version: 2.32.0 + repo_root: . + Dist::Zilla::Role::Git::StringFormatter: + time_zone: local + name: '@Author::OALDERS/@Git::VersionManager/Git::Tag' + version: '2.048' + - + class: Dist::Zilla::Plugin::BumpVersionAfterRelease::Transitional + config: + Dist::Zilla::Plugin::BumpVersionAfterRelease: + finders: + - ':ExecFiles' + - ':InstallModules' + global: 0 + munge_makefile_pl: 1 + Dist::Zilla::Plugin::BumpVersionAfterRelease::Transitional: {} + name: '@Author::OALDERS/@Git::VersionManager/BumpVersionAfterRelease::Transitional' + version: '0.009' + - + class: Dist::Zilla::Plugin::NextRelease + name: '@Author::OALDERS/@Git::VersionManager/NextRelease' + version: '6.024' + - + class: Dist::Zilla::Plugin::Git::Commit + config: + Dist::Zilla::Plugin::Git::Commit: + add_files_in: [] + commit_msg: 'increment $VERSION after %v release' + signoff: 0 + Dist::Zilla::Role::Git::DirtyFiles: + allow_dirty: + - Build.PL + - Changes + - Makefile.PL + allow_dirty_match: + - (?^:^lib/.*\.pm$) + changelog: Changes + Dist::Zilla::Role::Git::Repo: + git_version: 2.32.0 + repo_root: . + Dist::Zilla::Role::Git::StringFormatter: + time_zone: local + name: '@Author::OALDERS/@Git::VersionManager/post-release commit' + version: '2.048' + - + class: Dist::Zilla::Plugin::Git::Push + config: + Dist::Zilla::Plugin::Git::Push: + push_to: + - origin + remotes_must_exist: 1 + Dist::Zilla::Role::Git::Repo: + git_version: 2.32.0 + repo_root: . + name: '@Author::OALDERS/Git::Push' + version: '2.048' + - + class: Dist::Zilla::Plugin::ExecDir + name: ExecDir + version: '6.024' + - + class: Dist::Zilla::Plugin::RunExtraTests + config: + Dist::Zilla::Role::TestRunner: + default_jobs: '4' + name: RunExtraTests + version: '0.029' + - + class: Dist::Zilla::Plugin::Prereqs + config: + Dist::Zilla::Plugin::Prereqs: + phase: runtime + type: requires + name: RuntimeRequires + version: '6.024' + - + class: Dist::Zilla::Plugin::Prereqs + config: + Dist::Zilla::Plugin::Prereqs: + phase: test + type: requires + name: TestRequires + version: '6.024' + - + class: Dist::Zilla::Plugin::Prereqs + config: + Dist::Zilla::Plugin::Prereqs: + phase: develop + type: requires + name: DevelopRequires + version: '6.024' + - + class: Dist::Zilla::Plugin::Prereqs::Soften + config: + Dist::Zilla::Plugin::Prereqs::Soften: + copy_to: [] + modules: + - Compress::Zlib + modules_from_features: ~ + to_relationship: recommends + name: Prereqs::Soften + version: '0.006003' + - + class: Dist::Zilla::Plugin::MetaProvides::Package + config: + Dist::Zilla::Plugin::MetaProvides::Package: + finder_objects: + - + class: Dist::Zilla::Plugin::FinderCode + name: MetaProvides::Package/AUTOVIV/:InstallModulesPM + version: '6.024' + include_underscores: 0 + Dist::Zilla::Role::MetaProvider::Provider: + $Dist::Zilla::Role::MetaProvider::Provider::VERSION: '2.002004' + inherit_missing: '1' + inherit_version: '1' + meta_noindex: '1' + Dist::Zilla::Role::ModuleMetadata: + Module::Metadata: '1.000036' + version: '0.006' + name: MetaProvides::Package + version: '2.004003' + - + class: Dist::Zilla::Plugin::FinderCode + name: ':InstallModules' + version: '6.024' + - + class: Dist::Zilla::Plugin::FinderCode + name: ':IncModules' + version: '6.024' + - + class: Dist::Zilla::Plugin::FinderCode + name: ':TestFiles' + version: '6.024' + - + class: Dist::Zilla::Plugin::FinderCode + name: ':ExtraTestFiles' + version: '6.024' + - + class: Dist::Zilla::Plugin::FinderCode + name: ':ExecFiles' + version: '6.024' + - + class: Dist::Zilla::Plugin::FinderCode + name: ':PerlExecFiles' + version: '6.024' + - + class: Dist::Zilla::Plugin::FinderCode + name: ':ShareFiles' + version: '6.024' + - + class: Dist::Zilla::Plugin::FinderCode + name: ':MainModule' + version: '6.024' + - + class: Dist::Zilla::Plugin::FinderCode + name: ':AllFiles' + version: '6.024' + - + class: Dist::Zilla::Plugin::FinderCode + name: ':NoFiles' + version: '6.024' + - + class: Dist::Zilla::Plugin::FinderCode + name: MetaProvides::Package/AUTOVIV/:InstallModulesPM + version: '6.024' + zilla: + class: Dist::Zilla::Dist::Builder + config: + is_trial: '0' + version: '6.024' +x_contributors: + - 'Alexandr Ciornii <alexchorny@gmail.com>' + - 'Andrew Grangaard <granny-github@ofb.net>' + - 'Andy Lester <andy@petdance.com>' + - 'Bernhard Wagner <gitcommit@bernhardwagner.net>' + - 'Chase Whitener <capoeirab@cpan.org>' + - 'dakkar <dakkar@thenautilus.net>' + - 'Dave Doyle <dave.s.doyle@gmail.com>' + - 'David Precious <davidp@preshweb.co.uk>' + - 'David Steinbrunner <dsteinbrunner@pobox.com>' + - 'Desmond Daignault <nawglan@gmail.com>' + - 'Ed Avis <eda@waniasset.com>' + - 'Evan Zacks <zackse@gmail.com>' + - 'Ferenc Erki <erkiferenc@gmail.com>' + - 'Flavio Poletti <flavio@polettix.it>' + - 'Florian Schlichting <fsfs@debian.org>' + - 'Gabor Szabo <szabgab@gmail.com>' + - 'gjtunley@gmail.com <gjtunley@gmail.com>' + - 'gregor herrmann <gregoa@debian.org>' + - 'Grigor Karavardanyan <k.grigor@yahoo.com>' + - 'James Raspass <jraspass@gmail.com>' + - 'Jason May <jasonmay@bestpractical.com>' + - 'Jesse Vincent <jesse@bestpractical.com>' + - 'John Beppu <john.beppu@gmail.com>' + - 'Jozef Kutej <jozef@kutej.net>' + - 'Karen Etheridge <ether@cpan.org>' + - "Kirrily 'Skud' Robert <skud@infotrope.net>" + - 'Kivanc Yazan <kivancyazan@gmail.com>' + - 'Lars Dɪᴇᴄᴋᴏᴡ 迪拉斯 <daxim@cpan.org>' + - 'Mark Stosberg <mark@summersault.com>' + - 'Martin H. Sluka <martin@sluka.de>' + - 'Matthew Chae <mschae@cpan.org>' + - 'Matt S Trout <mst@shadowcat.co.uk>' + - 'Max Maischein <corion@cpan.org>' + - 'Mohammad S Anwar <mohammad.anwar@yahoo.com>' + - 'Neil Bowers <neil@bowers.com>' + - 'Nik LaBelle <nalabelle@gmail.com>' + - 'Olaf Alders <olaf@wundersolutions.com>' + - 'Philippe Bruhat (BooK) <book@cpan.org>' + - 'Ricardo Signes <rjbs@cpan.org>' + - 'Schuyler Langdon <schuyler@velargo.com>' + - 'Sergey Romanov <sromanov-dev@yandex.ru>' + - 'Shoichi Kaji <skaji@cpan.org>' + - 'simbabque <simbabque@cpan.org>' + - 'Steve Scaffidi <stephen@scaffidi.net>' + - 'Stuart Johnston <saj_git@thecommune.net>' + - 'sunnavy <sunnavy@bestpractical.com>' + - 'Varadinsky <varadinsky@yahoo.com>' + - 'Ville Skyttä <ville.skytta@iki.fi>' + - 'Zefram <zefram@fysh.org>' + - '積丹尼 Dan Jacobson <jidanni@jidanni.org>' +x_generated_by_perl: v5.30.2 +x_serialization_backend: 'YAML::Tiny version 1.73' +x_spdx_expression: 'Artistic-1.0-Perl OR GPL-1.0-or-later' +x_static_install: 1 diff --git a/Makefile.PL b/Makefile.PL new file mode 100644 index 0000000..2015537 --- /dev/null +++ b/Makefile.PL @@ -0,0 +1,134 @@ +# This file was automatically generated by Dist::Zilla::Plugin::MakeMaker v6.024. +use strict; +use warnings; + +use 5.006; + +use ExtUtils::MakeMaker; + +my %WriteMakefileArgs = ( + "ABSTRACT" => "Handy web browsing in a Perl object", + "AUTHOR" => "Andy Lester <andy at petdance.com>", + "CONFIGURE_REQUIRES" => { + "ExtUtils::MakeMaker" => 0 + }, + "DISTNAME" => "WWW-Mechanize", + "EXE_FILES" => [ + "script/mech-dump" + ], + "LICENSE" => "perl", + "MIN_PERL_VERSION" => "5.006", + "NAME" => "WWW::Mechanize", + "PREREQ_PM" => { + "Carp" => 0, + "Getopt::Long" => 0, + "HTML::Form" => "1.00", + "HTML::HeadParser" => 0, + "HTML::TokeParser" => 0, + "HTML::TreeBuilder" => 5, + "HTTP::Cookies" => 0, + "HTTP::Request" => "1.30", + "HTTP::Request::Common" => 0, + "LWP::UserAgent" => "6.45", + "Pod::Usage" => 0, + "Scalar::Util" => "1.14", + "Tie::RefHash" => 0, + "URI::URL" => 0, + "URI::file" => 0, + "base" => 0, + "strict" => 0, + "warnings" => 0 + }, + "TEST_REQUIRES" => { + "CGI" => "4.32", + "Exporter" => 0, + "ExtUtils::MakeMaker" => 0, + "File::Spec" => 0, + "File::Temp" => 0, + "FindBin" => 0, + "HTTP::Daemon" => "6.12", + "HTTP::Response" => 0, + "HTTP::Server::Simple::CGI" => 0, + "LWP" => 0, + "LWP::Simple" => 0, + "Path::Tiny" => 0, + "Test::Deep" => 0, + "Test::Exception" => 0, + "Test::Fatal" => 0, + "Test::Memory::Cycle" => "1.06", + "Test::More" => "0.96", + "Test::NoWarnings" => "1.04", + "Test::Output" => 0, + "Test::Taint" => "1.08", + "Test::Warn" => 0, + "Test::Warnings" => 0, + "URI" => 0, + "URI::Escape" => 0, + "bytes" => 0, + "lib" => 0 + }, + "VERSION" => "2.04", + "test" => { + "TESTS" => "t/*.t t/local/*.t t/mech-dump/*.t" + } +); + + +my %FallbackPrereqs = ( + "CGI" => "4.32", + "Carp" => 0, + "Exporter" => 0, + "ExtUtils::MakeMaker" => 0, + "File::Spec" => 0, + "File::Temp" => 0, + "FindBin" => 0, + "Getopt::Long" => 0, + "HTML::Form" => "1.00", + "HTML::HeadParser" => 0, + "HTML::TokeParser" => 0, + "HTML::TreeBuilder" => 5, + "HTTP::Cookies" => 0, + "HTTP::Daemon" => "6.12", + "HTTP::Request" => "1.30", + "HTTP::Request::Common" => 0, + "HTTP::Response" => 0, + "HTTP::Server::Simple::CGI" => 0, + "LWP" => 0, + "LWP::Simple" => 0, + "LWP::UserAgent" => "6.45", + "Path::Tiny" => 0, + "Pod::Usage" => 0, + "Scalar::Util" => "1.14", + "Test::Deep" => 0, + "Test::Exception" => 0, + "Test::Fatal" => 0, + "Test::Memory::Cycle" => "1.06", + "Test::More" => "0.96", + "Test::NoWarnings" => "1.04", + "Test::Output" => 0, + "Test::Taint" => "1.08", + "Test::Warn" => 0, + "Test::Warnings" => 0, + "Tie::RefHash" => 0, + "URI" => 0, + "URI::Escape" => 0, + "URI::URL" => 0, + "URI::file" => 0, + "base" => 0, + "bytes" => 0, + "lib" => 0, + "strict" => 0, + "warnings" => 0 +); + + +unless ( eval { ExtUtils::MakeMaker->VERSION(6.63_03) } ) { + delete $WriteMakefileArgs{TEST_REQUIRES}; + delete $WriteMakefileArgs{BUILD_REQUIRES}; + $WriteMakefileArgs{PREREQ_PM} = \%FallbackPrereqs; +} + +delete $WriteMakefileArgs{CONFIGURE_REQUIRES} + unless eval { ExtUtils::MakeMaker->VERSION(6.52) }; + +WriteMakefile(%WriteMakefileArgs); diff --git a/README.md b/README.md new file mode 100644 index 0000000..a5042d0 --- /dev/null +++ b/README.md @@ -0,0 +1,1699 @@ +# NAME + +WWW::Mechanize - Handy web browsing in a Perl object + +# VERSION + +version 2.04 + +# SYNOPSIS + +WWW::Mechanize supports performing a sequence of page fetches including +following links and submitting forms. Each fetched page is parsed +and its links and forms are extracted. A link or a form can be +selected, form fields can be filled and the next page can be fetched. +Mech also stores a history of the URLs you've visited, which can +be queried and revisited. + + use WWW::Mechanize (); + my $mech = WWW::Mechanize->new(); + + $mech->get( $url ); + + $mech->follow_link( n => 3 ); + $mech->follow_link( text_regex => qr/download this/i ); + $mech->follow_link( url => 'http://host.com/index.html' ); + + $mech->submit_form( + form_number => 3, + fields => { + username => 'mungo', + password => 'lost-and-alone', + } + ); + + $mech->submit_form( + form_name => 'search', + fields => { query => 'pot of gold', }, + button => 'Search Now' + ); + + # Enable strict form processing to catch typos and non-existant form fields. + my $strict_mech = WWW::Mechanize->new( strict_forms => 1); + + $strict_mech->get( $url ); + + # This method call will die, saving you lots of time looking for the bug. + $strict_mech->submit_form( + form_number => 3, + fields => { + usernaem => 'mungo', # typo in field name + password => 'lost-and-alone', + extra_field => 123, # field does not exist + } + ); + +# DESCRIPTION + +`WWW::Mechanize`, or Mech for short, is a Perl module for stateful +programmatic web browsing, used for automating interaction with +websites. + +Features include: + +- All HTTP methods +- High-level hyperlink and HTML form support, without having to parse HTML yourself +- SSL support +- Automatic cookies +- Custom HTTP headers +- Automatic handling of redirections +- Proxies +- HTTP authentication + +Mech is well suited for use in testing web applications. If you use +one of the Test::\*, like [Test::HTML::Lint](https://metacpan.org/pod/Test%3A%3AHTML%3A%3ALint) modules, you can check the +fetched content and use that as input to a test call. + + use Test::More; + like( $mech->content(), qr/$expected/, "Got expected content" ); + +Each page fetch stores its URL in a history stack which you can +traverse. + + $mech->back(); + +If you want finer control over your page fetching, you can use +these methods. `follow_link` and `submit_form` are just high +level wrappers around them. + + $mech->find_link( n => $number ); + $mech->form_number( $number ); + $mech->form_name( $name ); + $mech->field( $name, $value ); + $mech->set_fields( %field_values ); + $mech->set_visible( @criteria ); + $mech->click( $button ); + +[WWW::Mechanize](https://metacpan.org/pod/WWW%3A%3AMechanize) is a proper subclass of [LWP::UserAgent](https://metacpan.org/pod/LWP%3A%3AUserAgent) and +you can also use any of [LWP::UserAgent](https://metacpan.org/pod/LWP%3A%3AUserAgent)'s methods. + + $mech->add_header($name => $value); + +Please note that Mech does NOT support JavaScript, you need additional software +for that. Please check ["JavaScript" in WWW::Mechanize::FAQ](https://metacpan.org/pod/WWW%3A%3AMechanize%3A%3AFAQ#JavaScript) for more. + +# IMPORTANT LINKS + +- [https://github.com/libwww-perl/WWW-Mechanize/issues](https://github.com/libwww-perl/WWW-Mechanize/issues) + + The queue for bugs & enhancements in WWW::Mechanize. Please note that the + queue at [http://rt.cpan.org](http://rt.cpan.org) is no longer maintained. + +- [https://metacpan.org/pod/WWW::Mechanize](https://metacpan.org/pod/WWW::Mechanize) + + The CPAN documentation page for Mechanize. + +- [https://metacpan.org/pod/distribution/WWW-Mechanize/lib/WWW/Mechanize/FAQ.pod](https://metacpan.org/pod/distribution/WWW-Mechanize/lib/WWW/Mechanize/FAQ.pod) + + Frequently asked questions. Make sure you read here FIRST. + +# CONSTRUCTOR AND STARTUP + +## new() + +Creates and returns a new WWW::Mechanize object, hereafter referred to as +the "agent". + + my $mech = WWW::Mechanize->new() + +The constructor for WWW::Mechanize overrides two of the params to the +LWP::UserAgent constructor: + + agent => 'WWW-Mechanize/#.##' + cookie_jar => {} # an empty, memory-only HTTP::Cookies object + +You can override these overrides by passing params to the constructor, +as in: + + my $mech = WWW::Mechanize->new( agent => 'wonderbot 1.01' ); + +If you want none of the overhead of a cookie jar, or don't want your +bot accepting cookies, you have to explicitly disallow it, like so: + + my $mech = WWW::Mechanize->new( cookie_jar => undef ); + +Here are the params that WWW::Mechanize recognizes. These do not include +params that [LWP::UserAgent](https://metacpan.org/pod/LWP%3A%3AUserAgent) recognizes. + +- `autocheck => [0|1]` + + Checks each request made to see if it was successful. This saves + you the trouble of manually checking yourself. Any errors found + are errors, not warnings. + + The default value is ON, unless it's being subclassed, in which + case it is OFF. This means that standalone [WWW::Mechanize](https://metacpan.org/pod/WWW%3A%3AMechanize) instances + have autocheck turned on, which is protective for the vast majority + of Mech users who don't bother checking the return value of get() + and post() and can't figure why their code fails. However, if + [WWW::Mechanize](https://metacpan.org/pod/WWW%3A%3AMechanize) is subclassed, such as for [Test::WWW::Mechanize](https://metacpan.org/pod/Test%3A%3AWWW%3A%3AMechanize) + or [Test::WWW::Mechanize::Catalyst](https://metacpan.org/pod/Test%3A%3AWWW%3A%3AMechanize%3A%3ACatalyst), this may not be an appropriate + default, so it's off. + +- `noproxy => [0|1]` + + Turn off the automatic call to the [LWP::UserAgent](https://metacpan.org/pod/LWP%3A%3AUserAgent) `env_proxy` function. + + This needs to be explicitly turned off if you're using [Crypt::SSLeay](https://metacpan.org/pod/Crypt%3A%3ASSLeay) to + access a https site via a proxy server. Note: you still need to set your + HTTPS\_PROXY environment variable as appropriate. + +- `onwarn => \&func` + + Reference to a `warn`-compatible function, such as `[Carp](https://metacpan.org/pod/Carp)::carp`, + that is called when a warning needs to be shown. + + If this is set to `undef`, no warnings will ever be shown. However, + it's probably better to use the `quiet` method to control that behavior. + + If this value is not passed, Mech uses `Carp::carp` if [Carp](https://metacpan.org/pod/Carp) is + installed, or `CORE::warn` if not. + +- `onerror => \&func` + + Reference to a `die`-compatible function, such as `[Carp](https://metacpan.org/pod/Carp)::croak`, + that is called when there's a fatal error. + + If this is set to `undef`, no errors will ever be shown. + + If this value is not passed, Mech uses `Carp::croak` if [Carp](https://metacpan.org/pod/Carp) is + installed, or `CORE::die` if not. + +- `quiet => [0|1]` + + Don't complain on warnings. Setting `quiet => 1` is the same as + calling `$mech->quiet(1)`. Default is off. + +- `stack_depth => $value` + + Sets the depth of the page stack that keeps track of all the + downloaded pages. Default is effectively infinite stack size. If + the stack is eating up your memory, then set this to a smaller + number, say 5 or 10. Setting this to zero means Mech will keep no + history. + +In addition, WWW::Mechanize also allows you to globally enable +strict and verbose mode for form handling, which is done with [HTML::Form](https://metacpan.org/pod/HTML%3A%3AForm). + +- `strict_forms => [0|1]` + + Globally sets the HTML::Form strict flag which causes form submission to + croak if any of the passed fields don't exist in the form, and/or a value + doesn't exist in a select element. This can still be disabled in individual + calls to `[submit_form()](#mech-submit_form)`. + + Default is off. + +- `verbose_forms => [0|1]` + + Globally sets the HTML::Form verbose flag which causes form submission to + warn about any bad HTML form constructs found. This cannot be disabled + later. + + Default is off. + +- `marked_sections => [0|1]` + + Globally sets the HTML::Parser marked sections flag which causes HTML + `CDATA[[` sections to be honoured. This cannot be disabled + later. + + Default is on. + +To support forms, WWW::Mechanize's constructor pushes POST +on to the agent's `requests_redirectable` list (see also +[LWP::UserAgent](https://metacpan.org/pod/LWP%3A%3AUserAgent).) + +## $mech->agent\_alias( $alias ) + +Sets the user agent string to the expanded version from a table of actual user strings. +_$alias_ can be one of the following: + +- Windows IE 6 +- Windows Mozilla +- Mac Safari +- Mac Mozilla +- Linux Mozilla +- Linux Konqueror + +then it will be replaced with a more interesting one. For instance, + + $mech->agent_alias( 'Windows IE 6' ); + +sets your User-Agent to + + Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1) + +The list of valid aliases can be returned from `known_agent_aliases()`. The current list is: + +- Windows IE 6 +- Windows Mozilla +- Mac Safari +- Mac Mozilla +- Linux Mozilla +- Linux Konqueror + +## known\_agent\_aliases() + +Returns a list of all the agent aliases that Mech knows about. + +# PAGE-FETCHING METHODS + +## $mech->get( $uri ) + +Given a URL/URI, fetches it. Returns an [HTTP::Response](https://metacpan.org/pod/HTTP%3A%3AResponse) object. +_$uri_ can be a well-formed URL string, a [URI](https://metacpan.org/pod/URI) object, or a +[WWW::Mechanize::Link](https://metacpan.org/pod/WWW%3A%3AMechanize%3A%3ALink) object. + +The results are stored internally in the agent object, but you don't +know that. Just use the accessors listed below. Poking at the +internals is deprecated and subject to change in the future. + +`get()` is a well-behaved overloaded version of the method in +[LWP::UserAgent](https://metacpan.org/pod/LWP%3A%3AUserAgent). This lets you do things like + + $mech->get( $uri, ':content_file' => $filename ); + +and you can rest assured that the params will get filtered down +appropriately. See ["get" in LWP::UserAgent](https://metacpan.org/pod/LWP%3A%3AUserAgent#get) for more details. + +**NOTE:** Because `:content_file` causes the page contents to be +stored in a file instead of the response object, some Mech functions +that expect it to be there won't work as expected. Use with caution. + +## $mech->post( $uri, content => $content ) + +POSTs _$content_ to _$uri_. Returns an [HTTP::Response](https://metacpan.org/pod/HTTP%3A%3AResponse) object. +_$uri_ can be a well-formed URI string, a [URI](https://metacpan.org/pod/URI) object, or a +[WWW::Mechanize::Link](https://metacpan.org/pod/WWW%3A%3AMechanize%3A%3ALink) object. + +## $mech->put( $uri, content => $content ) + +PUTs _$content_ to _$uri_. Returns an [HTTP::Response](https://metacpan.org/pod/HTTP%3A%3AResponse) object. +_$uri_ can be a well-formed URI string, a [URI](https://metacpan.org/pod/URI) object, or a +[WWW::Mechanize::Link](https://metacpan.org/pod/WWW%3A%3AMechanize%3A%3ALink) object. + + my $res = $mech->head( $uri ); + my $res = $mech->head( $uri , $field_name => $value, ... ); + +## $mech->head ($uri ) + +Performs a HEAD request to _$uri_. Returns an [HTTP::Response](https://metacpan.org/pod/HTTP%3A%3AResponse) object. +_$uri_ can be a well-formed URI string, a [URI](https://metacpan.org/pod/URI) object, or a +[WWW::Mechanize::Link](https://metacpan.org/pod/WWW%3A%3AMechanize%3A%3ALink) object. + +## $mech->reload() + +Acts like the reload button in a browser: repeats the current +request. The history (as per the [back()](#mech-back) method) is not altered. + +Returns the [HTTP::Response](https://metacpan.org/pod/HTTP%3A%3AResponse) object from the reload, or `undef` +if there's no current request. + +## $mech->back() + +The equivalent of hitting the "back" button in a browser. Returns to +the previous page. Won't go back past the first page. (Really, what +would it do if it could?) + +Returns true if it could go back, or false if not. + +## $mech->clear\_history() + +This deletes all the history entries and returns true. + +## $mech->history\_count() + +This returns the number of items in the browser history. This number _does_ +include the most recently made request. + +## $mech->history($n) + +This returns the _n_th item in history. The 0th item is the most recent +request and response, which would be acted on by methods like +`[find_link()](#mech-find_link)`. +The 1st item is the state you'd return to if you called +`[back()](#mech-back)`. + +The maximum useful value for `$n` is `$mech->history_count - 1`. +Requests beyond that bound will return `undef`. + +History items are returned as hash references, in the form: + + { req => $http_request, res => $http_response } + +# STATUS METHODS + +## $mech->success() + +Returns a boolean telling whether the last request was successful. +If there hasn't been an operation yet, returns false. + +This is a convenience function that wraps `$mech->res->is_success`. + +## $mech->uri() + +Returns the current URI as a [URI](https://metacpan.org/pod/URI) object. This object stringifies +to the URI itself. + +## $mech->response() / $mech->res() + +Return the current response as an [HTTP::Response](https://metacpan.org/pod/HTTP%3A%3AResponse) object. + +Synonym for `$mech->response()` + +## $mech->status() + +Returns the HTTP status code of the response. This is a 3-digit +number like 200 for OK, 404 for not found, and so on. + +## $mech->ct() / $mech->content\_type() + +Returns the content type of the response. + +## $mech->base() + +Returns the base URI for the current response + +## $mech->forms() + +When called in a list context, returns a list of the forms found in +the last fetched page. In a scalar context, returns a reference to +an array with those forms. The forms returned are all [HTML::Form](https://metacpan.org/pod/HTML%3A%3AForm) +objects. + +## $mech->current\_form() + +Returns the current form as an [HTML::Form](https://metacpan.org/pod/HTML%3A%3AForm) object. + +## $mech->links() + +When called in a list context, returns a list of the links found in the +last fetched page. In a scalar context it returns a reference to an array +with those links. Each link is a [WWW::Mechanize::Link](https://metacpan.org/pod/WWW%3A%3AMechanize%3A%3ALink) object. + +## $mech->is\_html() + +Returns true/false on whether our content is HTML, according to the +HTTP headers. + +## $mech->title() + +Returns the contents of the `<TITLE>` tag, as parsed by +[HTML::HeadParser](https://metacpan.org/pod/HTML%3A%3AHeadParser). Returns undef if the content is not HTML. + +## $mech->redirects() + +Convenience method to get the [redirects](https://metacpan.org/pod/HTTP%3A%3AResponse#r-redirects) from the most recent [HTTP::Response](https://metacpan.org/pod/HTTP%3A%3AResponse). + +Note that you can also use [is\_redirect](https://metacpan.org/pod/HTTP%3A%3AResponse#r-is_redirect) to see if the most recent response was a redirect like this. + + $mech->get($url); + do_stuff() if $mech->res->is_redirect; + +# CONTENT-HANDLING METHODS + +## $mech->content(...) + +Returns the content that the mech uses internally for the last page +fetched. Ordinarily this is the same as +`$mech->response()->decoded_content()`, +but this may differ for HTML documents if [update\_html](#mech-update_html-html) is +overloaded (in which case the value passed to the base-class +implementation of same will be returned), and/or extra named arguments +are passed to _content()_: + +- _$mech->content( format => 'text' )_ + + Returns a text-only version of the page, with all HTML markup + stripped. This feature requires _HTML::TreeBuilder_ version 5 or higher + to be installed, or a fatal error will be thrown. This works only if + the contents are HTML. + +- _$mech->content( base\_href => \[$base\_href|undef\] )_ + + Returns the HTML document, modified to contain a + `<base href="$base_href">` mark-up in the header. + _$base\_href_ is `$mech->base()` if not specified. This is + handy to pass the HTML to e.g. [HTML::Display](https://metacpan.org/pod/HTML%3A%3ADisplay). This works only if + the contents are HTML. + +- _$mech->content( raw => 1 )_ + + Returns `$self->response()->content()`, i.e. the raw contents from the + response. + +- _$mech->content( decoded\_by\_headers => 1 )_ + + Returns the content after applying all `Content-Encoding` headers but + with not additional mangling. + +- _$mech->content( charset => $charset )_ + + Returns `$self->response()->decoded_content(charset => $charset)` + (see [HTTP::Response](https://metacpan.org/pod/HTTP%3A%3AResponse) for details). + +To preserve backwards compatibility, additional parameters will be +ignored unless none of `raw | decoded_by_headers | charset` is +specified and the text is HTML, in which case an error will be triggered. + +A fresh instance of WWW::Mechanize will return `undef` when `$mech->content()` +is called, because no content is present before a request has been made. + +## $mech->text() + +Returns the text of the current HTML content. If the content isn't +HTML, `$mech` will die. + +The text is extracted by parsing the content, and then the extracted +text is cached, so don't worry about performance of calling this +repeatedly. + +# LINK METHODS + +## $mech->links() + +Lists all the links on the current page. Each link is a +[WWW::Mechanize::Link](https://metacpan.org/pod/WWW%3A%3AMechanize%3A%3ALink) object. In list context, returns a list of all +links. In scalar context, returns an array reference of all links. + +## $mech->follow\_link(...) + +Follows a specified link on the page. You specify the match to be +found using the same params that `[find_link()](#mech-find_link)` uses. + +Here some examples: + +- 3rd link called "download" + + $mech->follow_link( text => 'download', n => 3 ); + +- first link where the URL has "download" in it, regardless of case: + + $mech->follow_link( url_regex => qr/download/i ); + + or + + $mech->follow_link( url_regex => qr/(?i:download)/ ); + +- 3rd link on the page + + $mech->follow_link( n => 3 ); + +- the link with the url + + $mech->follow_link( url => '/other/page' ); + + or + + $mech->follow_link( url => 'http://example.com/page' ); + +Returns the result of the `GET` method (an [HTTP::Response](https://metacpan.org/pod/HTTP%3A%3AResponse) object) if a link +was found. + +If the page has no links, or the specified link couldn't be found, returns +`undef`. If `autocheck` is enabled an exception will be thrown instead. + +## $mech->find\_link( ... ) + +Finds a link in the currently fetched page. It returns a +[WWW::Mechanize::Link](https://metacpan.org/pod/WWW%3A%3AMechanize%3A%3ALink) object which describes the link. (You'll +probably be most interested in the `url()` property.) If it fails +to find a link it returns undef. + +You can take the URL part and pass it to the `get()` method. If +that's your plan, you might as well use the `follow_link()` method +directly, since it does the `get()` for you automatically. + +Note that `<FRAME SRC="...">` tags are parsed out of the HTML and +treated as links so this method works with them. + +You can select which link to find by passing in one or more of these +key/value pairs: + +- `text => 'string',` and `text_regex => qr/regex/,` + + `text` matches the text of the link against _string_, which must be an + exact match. To select a link with text that is exactly "download", use + + $mech->find_link( text => 'download' ); + + `text_regex` matches the text of the link against _regex_. To select a + link with text that has "download" anywhere in it, regardless of case, use + + $mech->find_link( text_regex => qr/download/i ); + + Note that the text extracted from the page's links are trimmed. For + example, `<a> foo </a>` is stored as 'foo', and searching for + leading or trailing spaces will fail. + +- `url => 'string',` and `url_regex => qr/regex/,` + + Matches the URL of the link against _string_ or _regex_, as appropriate. + The URL may be a relative URL, like `foo/bar.html`, depending on how + it's coded on the page. + +- `url_abs => string` and `url_abs_regex => regex` + + Matches the absolute URL of the link against _string_ or _regex_, + as appropriate. The URL will be an absolute URL, even if it's relative + in the page. + +- `name => string` and `name_regex => regex` + + Matches the name of the link against _string_ or _regex_, as appropriate. + +- `rel => string` and `rel_regex => regex` + + Matches the rel of the link against _string_ or _regex_, as appropriate. + This can be used to find stylesheets, favicons, or links the author of the + page does not want bots to follow. + +- `id => string` and `id_regex => regex` + + Matches the attribute 'id' of the link against _string_ or + _regex_, as appropriate. + +- `class => string` and `class_regex => regex` + + Matches the attribute 'class' of the link against _string_ or + _regex_, as appropriate. + +- `tag => string` and `tag_regex => regex` + + Matches the tag that the link came from against _string_ or _regex_, + as appropriate. The `tag_regex` is probably most useful to check for + more than one tag, as in: + + $mech->find_link( tag_regex => qr/^(a|frame)$/ ); + + The tags and attributes looked at are defined below. + +If `n` is not specified, it defaults to 1. Therefore, if you don't +specify any params, this method defaults to finding the first link on the +page. + +Note that you can specify multiple text or URL parameters, which +will be ANDed together. For example, to find the first link with +text of "News" and with "cnn.com" in the URL, use: + + $mech->find_link( text => 'News', url_regex => qr/cnn\.com/ ); + +The return value is a reference to an array containing a +[WWW::Mechanize::Link](https://metacpan.org/pod/WWW%3A%3AMechanize%3A%3ALink) object for every link in `$self->content`. + +The links come from the following: + +- `<a href=...>` +- `<area href=...>` +- `<frame src=...>` +- `<iframe src=...>` +- `<link href=...>` +- `<meta content=...>` + +## $mech->find\_all\_links( ... ) + +Returns all the links on the current page that match the criteria. The +method for specifying link criteria is the same as in +`[find_link()](#mech-find_link)`. +Each of the links returned is a [WWW::Mechanize::Link](https://metacpan.org/pod/WWW%3A%3AMechanize%3A%3ALink) object. + +In list context, `find_all_links()` returns a list of the links. +Otherwise, it returns a reference to the list of links. + +`find_all_links()` with no parameters returns all links in the +page. + +## $mech->find\_all\_inputs( ... criteria ... ) + +find\_all\_inputs() returns an array of all the input controls in the +current form whose properties match all of the regexes passed in. +The controls returned are all descended from HTML::Form::Input. +See ["INPUTS" in HTML::Form](https://metacpan.org/pod/HTML%3A%3AForm#INPUTS) for details. + +If no criteria are passed, all inputs will be returned. + +If there is no current page, there is no form on the current +page, or there are no submit controls in the current form +then the return will be an empty array. + +You may use a regex or a literal string: + + # get all textarea controls whose names begin with "customer" + my @customer_text_inputs = $mech->find_all_inputs( + type => 'textarea', + name_regex => qr/^customer/, + ); + + # get all text or textarea controls called "customer" + my @customer_text_inputs = $mech->find_all_inputs( + type_regex => qr/^(text|textarea)$/, + name => 'customer', + ); + +## $mech->find\_all\_submits( ... criteria ... ) + +`find_all_submits()` does the same thing as `find_all_inputs()` +except that it only returns controls that are submit controls, +ignoring other types of input controls like text and checkboxes. + +# IMAGE METHODS + +## $mech->images + +Lists all the images on the current page. Each image is a +[WWW::Mechanize::Image](https://metacpan.org/pod/WWW%3A%3AMechanize%3A%3AImage) object. In list context, returns a list of all +images. In scalar context, returns an array reference of all images. + +## $mech->find\_image() + +Finds an image in the current page. It returns a +[WWW::Mechanize::Image](https://metacpan.org/pod/WWW%3A%3AMechanize%3A%3AImage) object which describes the image. If it fails +to find an image it returns undef. + +You can select which image to find by passing in one or more of these +key/value pairs: + +- `alt => 'string'` and `alt_regex => qr/regex/` + + `alt` matches the ALT attribute of the image against _string_, which must be an + exact match. To select a image with an ALT tag that is exactly "download", use + + $mech->find_image( alt => 'download' ); + + `alt_regex` matches the ALT attribute of the image against a regular + expression. To select an image with an ALT attribute that has "download" + anywhere in it, regardless of case, use + + $mech->find_image( alt_regex => qr/download/i ); + +- `url => 'string'` and `url_regex => qr/regex/` + + Matches the URL of the image against _string_ or _regex_, as appropriate. + The URL may be a relative URL, like `foo/bar.html`, depending on how + it's coded on the page. + +- `url_abs => string` and `url_abs_regex => regex` + + Matches the absolute URL of the image against _string_ or _regex_, + as appropriate. The URL will be an absolute URL, even if it's relative + in the page. + +- `tag => string` and `tag_regex => regex` + + Matches the tag that the image came from against _string_ or _regex_, + as appropriate. The `tag_regex` is probably most useful to check for + more than one tag, as in: + + $mech->find_image( tag_regex => qr/^(img|input)$/ ); + + The tags supported are `<img>` and `<input>`. + +- `id => string` and `id_regex => regex` + + `id` matches the id attribute of the image against _string_, which must + be an exact match. To select an image with the exact id "download-image", use + + $mech->find_image( id => 'download-image' ); + + `id_regex` matches the id attribute of the image against a regular + expression. To select the first image with an id that contains "download" + anywhere in it, use + + $mech->find_image( id_regex => qr/download/ ); + +- `classs => string` and `class_regex => regex` + + `class` matches the class attribute of the image against _string_, which must + be an exact match. To select an image with the exact class "img-fuid", use + + $mech->find_image( class => 'img-fluid' ); + + To select an image with the class attribute "rounded float-left", use + + $mech->find_image( class => 'rounded float-left' ); + + Note that the classes have to be matched as a complete string, in the exact + order they appear in the website's source code. + + `class_regex` matches the class attribute of the image against a regular + expression. Use this if you want a partial class name, or if an image has + several classes, but you only care about one. + + To select the first image with the class "rounded", where there are multiple + images that might also have either class "float-left" or "float-right", use + + $mech->find_image( class_regex => qr/\brounded\b/ ); + + Selecting an image with multiple classes where you do not care about the + order they appear in the website's source code is not currently supported. + +If `n` is not specified, it defaults to 1. Therefore, if you don't +specify any params, this method defaults to finding the first image on the +page. + +Note that you can specify multiple ALT or URL parameters, which +will be ANDed together. For example, to find the first image with +ALT text of "News" and with "cnn.com" in the URL, use: + + $mech->find_image( image => 'News', url_regex => qr/cnn\.com/ ); + +The return value is a reference to an array containing a +[WWW::Mechanize::Image](https://metacpan.org/pod/WWW%3A%3AMechanize%3A%3AImage) object for every image in `$self->content`. + +## $mech->find\_all\_images( ... ) + +Returns all the images on the current page that match the criteria. The +method for specifying image criteria is the same as in +`[find_image()](#mech-find_image)`. +Each of the images returned is a [WWW::Mechanize::Image](https://metacpan.org/pod/WWW%3A%3AMechanize%3A%3AImage) object. + +In list context, `find_all_images()` returns a list of the images. +Otherwise, it returns a reference to the list of images. + +`find_all_images()` with no parameters returns all images in the page. + +# FORM METHODS + +These methods let you work with the forms on a page. The idea is +to choose a form that you'll later work with using the field methods +below. + +## $mech->forms + +Lists all the forms on the current page. Each form is an [HTML::Form](https://metacpan.org/pod/HTML%3A%3AForm) +object. In list context, returns a list of all forms. In scalar +context, returns an array reference of all forms. + +## $mech->form\_number($number) + +Selects the _number_th form on the page as the target for subsequent +calls to `[field()](#mech-field-name-value-number)` +and `[click()](#mech-click-button-x-y)`. +Also returns the form that was selected. + +If it is found, the form is returned as an [HTML::Form](https://metacpan.org/pod/HTML%3A%3AForm) object and set internally +for later use with Mech's form methods such as +`[field()](#mech-field-name-value-number)` and +`[click()](#mech-click-button-x-y)`. +When called in a list context, the number of the found form is also returned as +a second value. + +Emits a warning and returns undef if no form is found. + +The first form is number 1, not zero. + +## $mech->form\_name( $name ) + +Selects a form by name. If there is more than one form on the page +with that name, then the first one is used, and a warning is +generated. + +If it is found, the form is returned as an [HTML::Form](https://metacpan.org/pod/HTML%3A%3AForm) object and +set internally for later use with Mech's form methods such as +`[field()](#mech-field-name-value-number)` and +`[click()](#mech-click-button-x-y)`. + +Returns undef if no form is found. + +## $mech->form\_id( $id ) + +Selects a form by ID. If there is more than one form on the page +with that ID, then the first one is used, and a warning is generated. + +If it is found, the form is returned as an [HTML::Form](https://metacpan.org/pod/HTML%3A%3AForm) object and +set internally for later use with Mech's form methods such as +`[field()](#mech-field-name-value-number)` and +`[click()](#mech-click-button-x-y)`. + +If no form is found it returns `undef`. This will also trigger a warning, +unless `quiet` is enabled. + +## $mech->all\_forms\_with\_fields( @fields ) + +Selects a form by passing in a list of field names it must contain. All matching forms (perhaps none) are returned as a list of [HTML::Form](https://metacpan.org/pod/HTML%3A%3AForm) objects. + +## $mech->form\_with\_fields( @fields ) + +Selects a form by passing in a list of field names it must contain. If there +is more than one form on the page with that matches, then the first one is used, +and a warning is generated. + +If it is found, the form is returned as an [HTML::Form](https://metacpan.org/pod/HTML%3A%3AForm) object and set internally +for later used with Mech's form methods such as +`[field()](#mech-field-name-value-number)` and +`[click()](#mech-click-button-x-y)`. + +Returns undef and emits a warning if no form is found. + +Note that this functionality requires libwww-perl 5.69 or higher. + +## $mech->all\_forms\_with( $attr1 => $value1, $attr2 => $value2, ... ) + +Searches for forms with arbitrary attribute/value pairs within the <form> +tag. +(Currently does not work for attribute `action` due to implementation details +of [HTML::Form](https://metacpan.org/pod/HTML%3A%3AForm).) +When given more than one pair, all criteria must match. +Using `undef` as value means that the attribute in question must not be present. + +All matching forms (perhaps none) are returned as a list of [HTML::Form](https://metacpan.org/pod/HTML%3A%3AForm) objects. + +## $mech->form\_with( $attr1 => $value1, $attr2 => $value2, ... ) + +Searches for forms with arbitrary attribute/value pairs within the <form> +tag. +(Currently does not work for attribute `action` due to implementation details +of [HTML::Form](https://metacpan.org/pod/HTML%3A%3AForm).) +When given more than one pair, all criteria must match. +Using `undef` as value means that the attribute in question must not be present. + +If it is found, the form is returned as an [HTML::Form](https://metacpan.org/pod/HTML%3A%3AForm) object and set internally +for later used with Mech's form methods such as +`[field()](#mech-field-name-value-number)` and +`[click()](#mech-click-button-x-y)`. + +Returns undef if no form is found. + +# FIELD METHODS + +These methods allow you to set the values of fields in a given form. + +## $mech->field( $name, $value, $number ) + +## $mech->field( $name, \\@values, $number ) + +Given the name of a field, set its value to the value specified. +This applies to the current form (as set by the +`[form_name()](#mech-form_name-name)` or +`[form_number()](#mech-form_number-number)` +method or defaulting to the first form on the page). + +The optional _$number_ parameter is used to distinguish between two fields +with the same name. The fields are numbered from 1. + +## $mech->select($name, $value) + +## $mech->select($name, \\@values) + +Given the name of a `select` field, set its value to the value +specified. If the field is not `<select multiple>` and the +`$value` is an array, only the **first** value will be set. \[Note: +the documentation previously claimed that only the last value would +be set, but this was incorrect.\] Passing `$value` as a hash with +an `n` key selects an item by number (e.g. +`{n => 3}` or `{n => [2,4]}`). +The numbering starts at 1. This applies to the current form. + +If you have a field with `<select multiple>` and you pass a single +`$value`, then `$value` will be added to the list of fields selected, +without clearing the others. However, if you pass an array reference, +then all previously selected values will be cleared. + +Returns true on successfully setting the value. On failure, returns +false and calls `$self->warn()` with an error message. + +## $mech->set\_fields( $name => $value ... ) + +This method sets multiple fields of the current form. It takes a list +of field name and value pairs. If there is more than one field with +the same name, the first one found is set. If you want to select which +of the duplicate field to set, use a value which is an anonymous array +which has the field value and its number as the 2 elements. + + # set the second foo field + $mech->set_fields( $name => [ 'foo', 2 ] ); + +The fields are numbered from 1. + +This applies to the current form. + +## $mech->set\_visible( @criteria ) + +This method sets fields of the current form without having to know +their names. So if you have a login screen that wants a username and +password, you do not have to fetch the form and inspect the source (or +use the `mech-dump` utility, installed with WWW::Mechanize) to see +what the field names are; you can just say + + $mech->set_visible( $username, $password ); + +and the first and second fields will be set accordingly. The method +is called set\__visible_ because it acts only on visible fields; +hidden form inputs are not considered. The order of the fields is +the order in which they appear in the HTML source which is nearly +always the order anyone viewing the page would think they are in, +but some creative work with tables could change that; caveat user. + +Each element in `@criteria` is either a field value or a field +specifier. A field value is a scalar. A field specifier allows +you to specify the _type_ of input field you want to set and is +denoted with an arrayref containing two elements. So you could +specify the first radio button with + + $mech->set_visible( [ radio => 'KCRW' ] ); + +Field values and specifiers can be intermixed, hence + + $mech->set_visible( 'fred', 'secret', [ option => 'Checking' ] ); + +would set the first two fields to "fred" and "secret", and the _next_ +`OPTION` menu field to "Checking". + +The possible field specifier types are: "text", "password", "hidden", +"textarea", "file", "image", "submit", "radio", "checkbox" and "option". + +`set_visible` returns the number of values set. + +## $mech->tick( $name, $value \[, $set\] ) + +"Ticks" the first checkbox that has both the name and value associated +with it on the current form. Dies if there is no named check box for +that value. Passing in a false value as the third optional argument +will cause the checkbox to be unticked. + +## $mech->untick($name, $value) + +Causes the checkbox to be unticked. Shorthand for +`tick($name,$value,undef)` + +## $mech->value( $name \[, $number\] ) + +Given the name of a field, return its value. This applies to the current +form. + +The optional _$number_ parameter is used to distinguish between two fields +with the same name. The fields are numbered from 1. + +If the field is of type file (file upload field), the value is always +cleared to prevent remote sites from downloading your local files. +To upload a file, specify its file name explicitly. + +## $mech->click( $button \[, $x, $y\] ) + +Has the effect of clicking a button on the current form. The first +argument is the name of the button to be clicked. The second and +third arguments (optional) allow you to specify the (x,y) coordinates +of the click. + +If there is only one button on the form, `$mech->click()` with +no arguments simply clicks that one button. + +Returns an [HTTP::Response](https://metacpan.org/pod/HTTP%3A%3AResponse) object. + +## $mech->click\_button( ... ) + +Has the effect of clicking a button on the current form by specifying +its attributes. The arguments are a list of key/value pairs. Only one +of name, id, number, input or value must be specified in the keys. + +Dies if no button is found. + +- `name => name` + + Clicks the button named _name_ in the current form. + +- `id => id` + + Clicks the button with the id _id_ in the current form. + +- `number => n` + + Clicks the _n_th button with type _submit_ in the current form. + Numbering starts at 1. + +- `value => value` + + Clicks the button with the value _value_ in the current form. + +- `input => $inputobject` + + Clicks on the button referenced by $inputobject, an instance of + [HTML::Form::SubmitInput](https://metacpan.org/pod/HTML%3A%3AForm%3A%3ASubmitInput) obtained e.g. from + + $mech->current_form()->find_input( undef, 'submit' ) + + `$inputobject` must belong to the current form. + +- `x => x` +- `y => y` + + These arguments (optional) allow you to specify the (x,y) coordinates + of the click. + +## $mech->submit() + +Submits the current form, without specifying a button to click. Actually, +no button is clicked at all. + +Returns an [HTTP::Response](https://metacpan.org/pod/HTTP%3A%3AResponse) object. + +This used to be a synonym for `$mech->click( 'submit' )`, but is no +longer so. + +## $mech->submit\_form( ... ) + +This method lets you select a form from the previously fetched page, +fill in its fields, and submit it. It combines the `form_number`/`form_name`, +`set_fields` and `click` methods into one higher level call. Its arguments +are a list of key/value pairs, all of which are optional. + +- `fields => \%fields` + + Specifies the fields to be filled in the current form. + +- `with_fields => \%fields` + + Probably all you need for the common case. It combines a smart form selector + and data setting in one operation. It selects the first form that contains all + fields mentioned in `\%fields`. This is nice because you don't need to know + the name or number of the form to do this. + + (calls `[form_with_fields()](#mech-form_with_fields-fields)` and + `[set_fields()](#mech-set_fields-name-value)`). + + If you choose `with_fields`, the `fields` option will be ignored. The + `form_number`, `form_name` and `form_id` options will still be used. An + exception will be thrown unless exactly one form matches all of the provided + criteria. + +- `form_number => n` + + Selects the _n_th form (calls + `[form_number()](#mech-form_number-number)`. If this param is not + specified, the currently-selected form is used. + +- `form_name => name` + + Selects the form named _name_ (calls + `[form_name()](#mech-form_name-name)`) + +- `form_id => ID` + + Selects the form with ID _ID_ (calls + `[form_id()](#mech-form_id-name)`) + +- `button => button` + + Clicks on button _button_ (calls `[click()](#mech-click-button-x-y)`) + +- `x => x, y => y` + + Sets the x or y values for `[click()](#mech-click-button-x-y)` + +- `strict_forms => bool` + + Sets the HTML::Form strict flag which causes form submission to croak if any of the passed + fields don't exist on the page, and/or a value doesn't exist in a select element. + By default HTML::Form sets this value to false. + + This behavior can also be turned on globally by passing `strict_forms => 1` to + `WWW::Mechanize->new`. If you do that, you can still disable it for individual calls + by passing `strict_forms => 0` here. + +If no form is selected, the first form found is used. + +If _button_ is not passed, then the `[submit()](#mech-submit)` +method is used instead. + +If you want to submit a file and get its content from a scalar rather +than a file in the filesystem, you can use: + + $mech->submit_form(with_fields => { logfile => [ [ undef, 'whatever', Content => $content ], 1 ] } ); + +Returns an [HTTP::Response](https://metacpan.org/pod/HTTP%3A%3AResponse) object. + +# MISCELLANEOUS METHODS + +## $mech->add\_header( name => $value \[, name => $value... \] ) + +Sets HTTP headers for the agent to add or remove from the HTTP request. + + $mech->add_header( Encoding => 'text/klingon' ); + +If a _value_ is `undef`, then that header will be removed from any +future requests. For example, to never send a Referer header: + + $mech->add_header( Referer => undef ); + +If you want to delete a header, use `delete_header`. + +Returns the number of name/value pairs added. + +**NOTE**: This method was very different in WWW::Mechanize before 1.00. +Back then, the headers were stored in a package hash, not as a member of +the object instance. Calling `add_header()` would modify the headers +for every WWW::Mechanize object, even after your object no longer existed. + +## $mech->delete\_header( name \[, name ... \] ) + +Removes HTTP headers from the agent's list of special headers. For +instance, you might need to do something like: + + # Don't send a Referer for this URL + $mech->add_header( Referer => undef ); + + # Get the URL + $mech->get( $url ); + + # Back to the default behavior + $mech->delete_header( 'Referer' ); + +## $mech->quiet(true/false) + +Allows you to suppress warnings to the screen. + + $mech->quiet(0); # turns on warnings (the default) + $mech->quiet(1); # turns off warnings + $mech->quiet(); # returns the current quietness status + +## $mech->stack\_depth( $max\_depth ) + +Get or set the page stack depth. Use this if you're doing a lot of page +scraping and running out of memory. + +A value of 0 means "no history at all." By default, the max stack depth +is humongously large, effectively keeping all history. + +## $mech->save\_content( $filename, %opts ) + +Dumps the contents of `$mech->content` into _$filename_. +_$filename_ will be overwritten. Dies if there are any errors. + +If the content type does not begin with "text/", then the content +is saved in binary mode (i.e. `binmode()` is set on the output +filehandle). + +Additional arguments can be passed as _key_/_value_ pairs: + +- _$mech->save\_content( $filename, binary => 1 )_ + + Filehandle is set with `binmode` to `:raw` and contents are taken + calling `$self->content(decoded_by_headers => 1)`. Same as calling: + + $mech->save_content( $filename, binmode => ':raw', + decoded_by_headers => 1 ); + + This _should_ be the safest way to save contents verbatim. + +- _$mech->save\_content( $filename, binmode => $binmode )_ + + Filehandle is set to binary mode. If `$binmode` begins with ':', it is + passed as a parameter to `binmode`: + + binmode $fh, $binmode; + + otherwise the filehandle is set to binary mode if `$binmode` is true: + + binmode $fh; + +- _all other arguments_ + + are passed as-is to `$mech->content(%opts)`. In particular, + `decoded_by_headers` might come handy if you want to revert the effect + of line compression performed by the web server but without further + interpreting the contents (e.g. decoding it according to the charset). + +## $mech->dump\_headers( \[$fh\] ) + +Prints a dump of the HTTP response headers for the most recent +response. If _$fh_ is not specified or is undef, it dumps to +STDOUT. + +Unlike the rest of the dump\_\* methods, $fh can be a scalar. It +will be used as a file name. + +## $mech->dump\_links( \[\[$fh\], $absolute\] ) + +Prints a dump of the links on the current page to _$fh_. If _$fh_ +is not specified or is undef, it dumps to STDOUT. + +If _$absolute_ is true, links displayed are absolute, not relative. + +## $mech->dump\_images( \[\[$fh\], $absolute\] ) + +Prints a dump of the images on the current page to _$fh_. If _$fh_ +is not specified or is undef, it dumps to STDOUT. + +If _$absolute_ is true, links displayed are absolute, not relative. + +The output will include empty lines for images that have no `src` attribute +and therefore no `<-`url>>. + +## $mech->dump\_forms( \[$fh\] ) + +Prints a dump of the forms on the current page to _$fh_. If _$fh_ +is not specified or is undef, it dumps to STDOUT. Running the following: + + my $mech = WWW::Mechanize->new(); + $mech->get("https://www.google.com/"); + $mech->dump_forms; + +will print: + + GET https://www.google.com/search [f] + ie=ISO-8859-1 (hidden readonly) + hl=en (hidden readonly) + source=hp (hidden readonly) + biw= (hidden readonly) + bih= (hidden readonly) + q= (text) + btnG=Google Search (submit) + btnI=I'm Feeling Lucky (submit) + gbv=1 (hidden readonly) + +## $mech->dump\_text( \[$fh\] ) + +Prints a dump of the text on the current page to _$fh_. If _$fh_ +is not specified or is undef, it dumps to STDOUT. + +# OVERRIDDEN LWP::UserAgent METHODS + +## $mech->clone() + +Clone the mech object. The clone will be using the same cookie jar +as the original mech. + +## $mech->redirect\_ok() + +An overloaded version of `redirect_ok()` in [LWP::UserAgent](https://metacpan.org/pod/LWP%3A%3AUserAgent). +This method is used to determine whether a redirection in the request +should be followed. + +Note that WWW::Mechanize's constructor pushes POST on to the agent's +`requests_redirectable` list. + +## $mech->request( $request \[, $arg \[, $size\]\]) + +Overloaded version of `request()` in [LWP::UserAgent](https://metacpan.org/pod/LWP%3A%3AUserAgent). Performs +the actual request. Normally, if you're using WWW::Mechanize, it's +because you don't want to deal with this level of stuff anyway. + +Note that `$request` will be modified. + +Returns an [HTTP::Response](https://metacpan.org/pod/HTTP%3A%3AResponse) object. + +## $mech->update\_html( $html ) + +Allows you to replace the HTML that the mech has found. Updates the +forms and links parse-trees that the mech uses internally. + +Say you have a page that you know has malformed output, and you want to +update it so the links come out correctly: + + my $html = $mech->content; + $html =~ s[</option>.{0,3}</td>][</option></select></td>]isg; + $mech->update_html( $html ); + +This method is also used internally by the mech itself to update its +own HTML content when loading a page. This means that if you would +like to _systematically_ perform the above HTML substitution, you +would overload _update\_html_ in a subclass thusly: + + package MyMech; + use base 'WWW::Mechanize'; + + sub update_html { + my ($self, $html) = @_; + $html =~ s[</option>.{0,3}</td>][</option></select></td>]isg; + $self->WWW::Mechanize::update_html( $html ); + } + +If you do this, then the mech will use the tidied-up HTML instead of +the original both when parsing for its own needs, and for returning to +you through `[content()](#mech-content)`. + +Overloading this method is also the recommended way of implementing +extra validation steps (e.g. link checkers) for every HTML page +received. ["warn"](#warn) and ["die"](#die) would then come in handy to signal +validation errors. + +## $mech->credentials( $username, $password ) + +Provide credentials to be used for HTTP Basic authentication for +all sites and realms until further notice. + +The four argument form described in [LWP::UserAgent](https://metacpan.org/pod/LWP%3A%3AUserAgent) is still +supported. + +## $mech->get\_basic\_credentials( $realm, $uri, $isproxy ) + +Returns the credentials for the realm and URI. + +## $mech->clear\_credentials() + +Remove any credentials set up with `credentials()`. + +# INHERITED UNCHANGED LWP::UserAgent METHODS + +As a subclass of [LWP::UserAgent](https://metacpan.org/pod/LWP%3A%3AUserAgent), WWW::Mechanize inherits all of +[LWP::UserAgent](https://metacpan.org/pod/LWP%3A%3AUserAgent)'s methods. Many of which are overridden or +extended. The following methods are inherited unchanged. View the +[LWP::UserAgent](https://metacpan.org/pod/LWP%3A%3AUserAgent) documentation for their implementation descriptions. + +This is not meant to be an inclusive list. LWP::UA may have added +others. + +## $mech->head() + +Inherited from [LWP::UserAgent](https://metacpan.org/pod/LWP%3A%3AUserAgent). + +## $mech->mirror() + +Inherited from [LWP::UserAgent](https://metacpan.org/pod/LWP%3A%3AUserAgent). + +## $mech->simple\_request() + +Inherited from [LWP::UserAgent](https://metacpan.org/pod/LWP%3A%3AUserAgent). + +## $mech->is\_protocol\_supported() + +Inherited from [LWP::UserAgent](https://metacpan.org/pod/LWP%3A%3AUserAgent). + +## $mech->prepare\_request() + +Inherited from [LWP::UserAgent](https://metacpan.org/pod/LWP%3A%3AUserAgent). + +## $mech->progress() + +Inherited from [LWP::UserAgent](https://metacpan.org/pod/LWP%3A%3AUserAgent). + +# INTERNAL-ONLY METHODS + +These methods are only used internally. You probably don't need to +know about them. + +## $mech->\_update\_page($request, $response) + +Updates all internal variables in $mech as if $request was just +performed, and returns $response. The page stack is **not** altered by +this method, it is up to caller (e.g. +`[request](#mech-request-request-arg-size)`) +to do that. + +## $mech->\_modify\_request( $req ) + +Modifies a [HTTP::Request](https://metacpan.org/pod/HTTP%3A%3ARequest) before the request is sent out, +for both GET and POST requests. + +We add a `Referer` header, as well as header to note that we can accept gzip +encoded content, if [Compress::Zlib](https://metacpan.org/pod/Compress%3A%3AZlib) is installed. + +## $mech->\_make\_request() + +Convenience method to make it easier for subclasses like +[WWW::Mechanize::Cached](https://metacpan.org/pod/WWW%3A%3AMechanize%3A%3ACached) to intercept the request. + +## $mech->\_reset\_page() + +Resets the internal fields that track page parsed stuff. + +## $mech->\_extract\_links() + +Extracts links from the content of a webpage, and populates the `{links}` +property with [WWW::Mechanize::Link](https://metacpan.org/pod/WWW%3A%3AMechanize%3A%3ALink) objects. + +## $mech->\_push\_page\_stack() + +The agent keeps a stack of visited pages, which it can pop when it needs +to go BACK and so on. + +The current page needs to be pushed onto the stack before we get a new +page, and the stack needs to be popped when BACK occurs. + +Neither of these take any arguments, they just operate on the $mech +object. + +## warn( @messages ) + +Centralized warning method, for diagnostics and non-fatal problems. +Defaults to calling `CORE::warn`, but may be overridden by setting +`onwarn` in the constructor. + +## die( @messages ) + +Centralized error method. Defaults to calling `CORE::die`, but +may be overridden by setting `onerror` in the constructor. + +# BEST PRACTICES + +The default settings can get you up and running quickly, but there are settings +you can change in order to make your life easier. + +- autocheck + + `autocheck` can save you the overhead of checking status codes for success. + You may outgrow it as your needs get more sophisticated, but it's a safe option + to start with. + + my $agent = WWW::Mechanize->new( autocheck => 1 ); + +- cookie\_jar + + You are encouraged to install [Mozilla::PublicSuffix](https://metacpan.org/pod/Mozilla%3A%3APublicSuffix) and use + [HTTP::CookieJar::LWP](https://metacpan.org/pod/HTTP%3A%3ACookieJar%3A%3ALWP) as your cookie jar. [HTTP::CookieJar::LWP](https://metacpan.org/pod/HTTP%3A%3ACookieJar%3A%3ALWP) provides a + better security model matching that of current Web browsers when + [Mozilla::PublicSuffix](https://metacpan.org/pod/Mozilla%3A%3APublicSuffix) is installed. + + use HTTP::CookieJar::LWP (); + + my $jar = HTTP::CookieJar::LWP->new; + my $agent = WWW::Mechanize->new( cookie_jar => $jar ); + +- protocols\_allowed + + This option is inherited directly from [LWP::UserAgent](https://metacpan.org/pod/LWP%3A%3AUserAgent). It allows you to + whitelist the protocols you're willing to allow. + + my $agent = WWW::Mechanize->new( + protocols_allowed => [ 'http', 'https' ] + ); + + This will prevent you from inadvertently following URLs like + `file:///etc/passwd` + +- protocols\_forbidden + + This option is also inherited directly from [LWP::UserAgent](https://metacpan.org/pod/LWP%3A%3AUserAgent). It allows you to + blacklist the protocols you're unwilling to allow. + + my $agent = WWW::Mechanize->new( + protocols_forbidden => [ 'file', 'mailto', 'ssh', ] + ); + + This will prevent you from inadvertently following URLs like + `file:///etc/passwd` + +- strict\_forms + + Consider turning on the `strict_forms` option when you create a new Mech. + This will perform a helpful sanity check on form fields every time you are + submitting a form, which can save you a lot of debugging time. + + my $agent = WWW::Mechanize->new( strict_forms => 1 ); + + If you do not want to have this option globally, you can still turn it on for + individual forms. + + $agent->submit_form( fields => { foo => 'bar' } , strict_forms => 1 ); + +# WWW::MECHANIZE'S GIT REPOSITORY + +WWW::Mechanize is hosted at GitHub. + +Repository: [https://github.com/libwww-perl/WWW-Mechanize](https://github.com/libwww-perl/WWW-Mechanize). +Bugs: [https://github.com/libwww-perl/WWW-Mechanize/issues](https://github.com/libwww-perl/WWW-Mechanize/issues). + +# OTHER DOCUMENTATION + +## _Spidering Hacks_, by Kevin Hemenway and Tara Calishain + +_Spidering Hacks_ from O'Reilly +([http://www.oreilly.com/catalog/spiderhks/](http://www.oreilly.com/catalog/spiderhks/)) is a great book for anyone +wanting to know more about screen-scraping and spidering. + +There are six hacks that use Mech or a Mech derivative: + +- #21 WWW::Mechanize 101 +- #22 Scraping with WWW::Mechanize +- #36 Downloading Images from Webshots +- #44 Archiving Yahoo! Groups Messages with WWW::Yahoo::Groups +- #64 Super Author Searching +- #73 Scraping TV Listings + +The book was also positively reviewed on Slashdot: +[http://books.slashdot.org/article.pl?sid=03/12/11/2126256](http://books.slashdot.org/article.pl?sid=03/12/11/2126256) + +# ONLINE RESOURCES AND SUPPORT + +- WWW::Mechanize mailing list + + The Mech mailing list is at + [http://groups.google.com/group/www-mechanize-users](http://groups.google.com/group/www-mechanize-users) and is specific + to Mechanize, unlike the LWP mailing list below. Although it is a + users list, all development discussion takes place here, too. + +- LWP mailing list + + The LWP mailing list is at + [http://lists.perl.org/showlist.cgi?name=libwww](http://lists.perl.org/showlist.cgi?name=libwww), and is more + user-oriented and well-populated than the WWW::Mechanize list. + +- Perlmonks + + [http://perlmonks.org](http://perlmonks.org) is an excellent community of support, and + many questions about Mech have already been answered there. + +- [WWW::Mechanize::Examples](https://metacpan.org/pod/WWW%3A%3AMechanize%3A%3AExamples) + + A random array of examples submitted by users, included with the + Mechanize distribution. + +# ARTICLES ABOUT WWW::MECHANIZE + +- [http://www.ibm.com/developerworks/linux/library/wa-perlsecure/](http://www.ibm.com/developerworks/linux/library/wa-perlsecure/) + + IBM article "Secure Web site access with Perl" + +- [http://www.oreilly.com/catalog/googlehks2/chapter/hack84.pdf](http://www.oreilly.com/catalog/googlehks2/chapter/hack84.pdf) + + Leland Johnson's hack #84 in _Google Hacks, 2nd Edition_ is + an example of a production script that uses WWW::Mechanize and + HTML::TableContentParser. It takes in keywords and returns the estimated + price of these keywords on Google's AdWords program. + +- [http://www.perl.com/pub/a/2004/06/04/recorder.html](http://www.perl.com/pub/a/2004/06/04/recorder.html) + + Linda Julien writes about using HTTP::Recorder to create WWW::Mechanize + scripts. + +- [http://www.developer.com/lang/other/article.php/3454041](http://www.developer.com/lang/other/article.php/3454041) + + Jason Gilmore's article on using WWW::Mechanize for scraping sales + information from Amazon and eBay. + +- [http://www.perl.com/pub/a/2003/01/22/mechanize.html](http://www.perl.com/pub/a/2003/01/22/mechanize.html) + + Chris Ball's article about using WWW::Mechanize for scraping TV + listings. + +- [http://www.stonehenge.com/merlyn/LinuxMag/col47.html](http://www.stonehenge.com/merlyn/LinuxMag/col47.html) + + Randal Schwartz's article on scraping Yahoo News for images. It's + already out of date: He manually walks the list of links hunting + for matches, which wouldn't have been necessary if the + `[find_link()](#mech-find_link)` method existed at press time. + +- [http://www.perladvent.org/2002/16th/](http://www.perladvent.org/2002/16th/) + + WWW::Mechanize on the Perl Advent Calendar, by Mark Fowler. + +- [http://www.linux-magazin.de/ausgaben/2004/03/datenruessel/](http://www.linux-magazin.de/ausgaben/2004/03/datenruessel/) + + Michael Schilli's article on Mech and [WWW::Mechanize::Shell](https://metacpan.org/pod/WWW%3A%3AMechanize%3A%3AShell) for the + German magazine _Linux Magazin_. + +## Other modules that use Mechanize + +Here are modules that use or subclass Mechanize. Let me know of any others: + +- [Finance::Bank::LloydsTSB](https://metacpan.org/pod/Finance%3A%3ABank%3A%3ALloydsTSB) +- [HTTP::Recorder](https://metacpan.org/pod/HTTP%3A%3ARecorder) + + Acts as a proxy for web interaction, and then generates WWW::Mechanize scripts. + +- [Win32::IE::Mechanize](https://metacpan.org/pod/Win32%3A%3AIE%3A%3AMechanize) + + Just like Mech, but using Microsoft Internet Explorer to do the work. + +- [WWW::Bugzilla](https://metacpan.org/pod/WWW%3A%3ABugzilla) +- [WWW::Google::Groups](https://metacpan.org/pod/WWW%3A%3AGoogle%3A%3AGroups) +- [WWW::Hotmail](https://metacpan.org/pod/WWW%3A%3AHotmail) +- [WWW::Mechanize::Cached](https://metacpan.org/pod/WWW%3A%3AMechanize%3A%3ACached) +- [WWW::Mechanize::Cached::GZip](https://metacpan.org/pod/WWW%3A%3AMechanize%3A%3ACached%3A%3AGZip) +- [WWW::Mechanize::FormFiller](https://metacpan.org/pod/WWW%3A%3AMechanize%3A%3AFormFiller) +- [WWW::Mechanize::Shell](https://metacpan.org/pod/WWW%3A%3AMechanize%3A%3AShell) +- [WWW::Mechanize::Sleepy](https://metacpan.org/pod/WWW%3A%3AMechanize%3A%3ASleepy) +- [WWW::Mechanize::SpamCop](https://metacpan.org/pod/WWW%3A%3AMechanize%3A%3ASpamCop) +- [WWW::Mechanize::Timed](https://metacpan.org/pod/WWW%3A%3AMechanize%3A%3ATimed) +- [WWW::SourceForge](https://metacpan.org/pod/WWW%3A%3ASourceForge) +- [WWW::Yahoo::Groups](https://metacpan.org/pod/WWW%3A%3AYahoo%3A%3AGroups) +- [WWW::Scripter](https://metacpan.org/pod/WWW%3A%3AScripter) + +# ACKNOWLEDGEMENTS + +Thanks to the numerous people who have helped out on WWW::Mechanize in +one way or another, including +Kirrily Robert for the original `WWW::Automate`, +Lyle Hopkins, +Damien Clark, +Ansgar Burchardt, +Gisle Aas, +Jeremy Ary, +Hilary Holz, +Rafael Kitover, +Norbert Buchmuller, +Dave Page, +David Sainty, +H.Merijn Brand, +Matt Lawrence, +Michael Schwern, +Adriano Ferreira, +Miyagawa, +Peteris Krumins, +Rafael Kitover, +David Steinbrunner, +Kevin Falcone, +Mike O'Regan, +Mark Stosberg, +Uri Guttman, +Peter Scott, +Philippe Bruhat, +Ian Langworth, +John Beppu, +Gavin Estey, +Jim Brandt, +Ask Bjoern Hansen, +Greg Davies, +Ed Silva, +Mark-Jason Dominus, +Autrijus Tang, +Mark Fowler, +Stuart Children, +Max Maischein, +Meng Wong, +Prakash Kailasa, +Abigail, +Jan Pazdziora, +Dominique Quatravaux, +Scott Lanning, +Rob Casey, +Leland Johnson, +Joshua Gatcomb, +Julien Beasley, +Abe Timmerman, +Peter Stevens, +Pete Krawczyk, +Tad McClellan, +and the late great Iain Truskett. + +# AUTHOR + +Andy Lester <andy at petdance.com> + +# COPYRIGHT AND LICENSE + +This software is copyright (c) 2004 by Andy Lester. + +This is free software; you can redistribute it and/or modify it under +the same terms as the Perl 5 programming language system itself. diff --git a/cpanfile b/cpanfile new file mode 100644 index 0000000..390f9f0 --- /dev/null +++ b/cpanfile @@ -0,0 +1,93 @@ +# This file is generated by Dist::Zilla::Plugin::CPANFile v6.024 +# Do not edit this file directly. To change prereqs, edit the `dist.ini` file. + +requires "Carp" => "0"; +requires "Getopt::Long" => "0"; +requires "HTML::Form" => "1.00"; +requires "HTML::HeadParser" => "0"; +requires "HTML::TokeParser" => "0"; +requires "HTML::TreeBuilder" => "5"; +requires "HTTP::Cookies" => "0"; +requires "HTTP::Request" => "1.30"; +requires "HTTP::Request::Common" => "0"; +requires "LWP::UserAgent" => "6.45"; +requires "Pod::Usage" => "0"; +requires "Scalar::Util" => "1.14"; +requires "Tie::RefHash" => "0"; +requires "URI::URL" => "0"; +requires "URI::file" => "0"; +requires "base" => "0"; +requires "perl" => "5.006"; +requires "strict" => "0"; +requires "warnings" => "0"; +recommends "Compress::Zlib" => "0"; + +on 'test' => sub { + requires "CGI" => "4.32"; + requires "Exporter" => "0"; + requires "ExtUtils::MakeMaker" => "0"; + requires "File::Spec" => "0"; + requires "File::Temp" => "0"; + requires "FindBin" => "0"; + requires "HTTP::Daemon" => "6.12"; + requires "HTTP::Response" => "0"; + requires "HTTP::Server::Simple::CGI" => "0"; + requires "LWP" => "0"; + requires "LWP::Simple" => "0"; + requires "Path::Tiny" => "0"; + requires "Test::Deep" => "0"; + requires "Test::Exception" => "0"; + requires "Test::Fatal" => "0"; + requires "Test::Memory::Cycle" => "1.06"; + requires "Test::More" => "0.96"; + requires "Test::NoWarnings" => "1.04"; + requires "Test::Output" => "0"; + requires "Test::Taint" => "1.08"; + requires "Test::Warn" => "0"; + requires "Test::Warnings" => "0"; + requires "URI" => "0"; + requires "URI::Escape" => "0"; + requires "bytes" => "0"; + requires "lib" => "0"; +}; + +on 'test' => sub { + recommends "CPAN::Meta" => "2.120900"; +}; + +on 'configure' => sub { + requires "ExtUtils::MakeMaker" => "0"; +}; + +on 'configure' => sub { + suggests "JSON::PP" => "2.27300"; +}; + +on 'develop' => sub { + requires "Code::TidyAll" => "0.71"; + requires "Code::TidyAll::Plugin::SortLines::Naturally" => "0.000003"; + requires "Code::TidyAll::Plugin::Test::Vars" => "0.04"; + requires "Code::TidyAll::Plugin::UniqueLines" => "0.000003"; + requires "LWP::Protocol::https" => "6.07"; + requires "Parallel::ForkManager" => "1.19"; + requires "Perl::Critic" => "1.132"; + requires "Perl::Tidy" => "20180220"; + requires "Pod::Coverage::TrustPod" => "0"; + requires "Test::Code::TidyAll" => "0.50"; + requires "Test::EOL" => "0"; + requires "Test::Mojibake" => "0"; + requires "Test::More" => "0.88"; + requires "Test::Needs" => "0"; + requires "Test::Pod" => "1.41"; + requires "Test::Pod::Coverage" => "1.08"; + requires "Test::Portability::Files" => "0"; + requires "Test::RequiresInternet" => "0"; + requires "Test::Vars" => "0.014"; + requires "Test::Version" => "1"; + requires "constant" => "0"; + requires "lib" => "0"; +}; + +on 'develop' => sub { + recommends "Dist::Zilla::PluginBundle::Git::VersionManager" => "0.007"; +}; diff --git a/dist.ini b/dist.ini new file mode 100644 index 0000000..54acb4b --- /dev/null +++ b/dist.ini @@ -0,0 +1,44 @@ +name = WWW-Mechanize +author = Andy Lester <andy at petdance.com> +license = Perl_5 +main_module = lib/WWW/Mechanize.pm +copyright_holder = Andy Lester +copyright_year = 2004 + +[@Author::OALDERS] +-remove = CheckChangesHasContent +-remove = MinimumPerl +-remove = Test::CPAN::Changes ; has invalid release dates +-remove = Test::Perl::Critic +-remove = Test::PodSpelling +-remove = Test::Synopsis +-remove = ExtraTests +StaticInstall.mode = on +StaticInstall.dry_run = 0 + +[ExecDir] +dir = script + +[RunExtraTests] + +[Prereqs / RuntimeRequires] +perl = 5.006 +Scalar::Util = 1.14 + +[Prereqs / TestRequires] +CGI = 4.32 +HTTP::Daemon = 6.12 +Test::Memory::Cycle = 1.06 +Test::NoWarnings = 1.04 +Test::Taint = 1.08 + +[Prereqs / DevelopRequires] +LWP::Protocol::https = 6.07 +Perl::Critic = 0 +Perl::Tidy = 0 + +[Prereqs::Soften] +module = Compress::Zlib +to_relationship = recommends + +[MetaProvides::Package] diff --git a/etc/www-mechanize-logo.png b/etc/www-mechanize-logo.png Binary files differnew file mode 100644 index 0000000..ce5da29 --- /dev/null +++ b/etc/www-mechanize-logo.png diff --git a/lib/WWW/Mechanize.pm b/lib/WWW/Mechanize.pm new file mode 100644 index 0000000..0bd4ec4 --- /dev/null +++ b/lib/WWW/Mechanize.pm @@ -0,0 +1,3614 @@ +package WWW::Mechanize; + +#ABSTRACT: Handy web browsing in a Perl object + + +use strict; +use warnings; + +our $VERSION = '2.04'; + +use Tie::RefHash; +use HTTP::Request 1.30; +use LWP::UserAgent 6.45; +use HTML::Form 1.00; +use HTML::TokeParser (); +use Scalar::Util qw(tainted); + +use base 'LWP::UserAgent'; + +our $HAS_ZLIB; +BEGIN { + $HAS_ZLIB = eval {require Compress::Zlib; 1;}; +} + + +sub new { + my $class = shift; + + my %parent_params = ( + agent => "WWW-Mechanize/$VERSION", + cookie_jar => {}, + ); + + my %mech_params = ( + autocheck => ($class eq 'WWW::Mechanize' ? 1 : 0), + onwarn => \&WWW::Mechanize::_warn, + onerror => \&WWW::Mechanize::_die, + quiet => 0, + stack_depth => 8675309, # Arbitrarily humongous stack + headers => {}, + noproxy => 0, + strict_forms => 0, # pass-through to HTML::Form + verbose_forms => 0, # pass-through to HTML::Form + marked_sections => 1, + ); + + my %passed_params = @_; + + # Keep the mech-specific params before creating the object. + while ( my($key,$value) = each %passed_params ) { + if ( exists $mech_params{$key} ) { + $mech_params{$key} = $value; + } + else { + $parent_params{$key} = $value; + } + } + + my $self = $class->SUPER::new( %parent_params ); + bless $self, $class; + + # Use the mech params now that we have a mech object. + for my $param ( keys %mech_params ) { + $self->{$param} = $mech_params{$param}; + } + $self->{page_stack} = []; + $self->env_proxy() unless $mech_params{noproxy}; + + # libwww-perl 5.800 (and before, I assume) has a problem where + # $ua->{proxy} can be undef and clone() doesn't handle it. + $self->{proxy} = {} unless defined $self->{proxy}; + push( @{$self->requests_redirectable}, 'POST' ); + + $self->_reset_page(); + + return $self; +} + +# overriding LWP::UA's static method +sub _agent { "WWW-Mechanize/$VERSION" } + + +my %known_agents = ( + 'Windows IE 6' => 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)', + 'Windows Mozilla' => 'Mozilla/5.0 (Windows; U; Windows NT 5.0; en-US; rv:1.4b) Gecko/20030516 Mozilla Firebird/0.6', + 'Mac Safari' => 'Mozilla/5.0 (Macintosh; U; PPC Mac OS X; en-us) AppleWebKit/85 (KHTML, like Gecko) Safari/85', + 'Mac Mozilla' => 'Mozilla/5.0 (Macintosh; U; PPC Mac OS X Mach-O; en-US; rv:1.4a) Gecko/20030401', + 'Linux Mozilla' => 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.4) Gecko/20030624', + 'Linux Konqueror' => 'Mozilla/5.0 (compatible; Konqueror/3; Linux)', +); + +sub agent_alias { + my $self = shift; + my $alias = shift; + + if ( defined $known_agents{$alias} ) { + return $self->agent( $known_agents{$alias} ); + } + else { + $self->warn( qq{Unknown agent alias "$alias"} ); + return $self->agent(); + } +} + + +sub known_agent_aliases { + my @aliases = sort keys %known_agents; + return @aliases; +} + + +sub get { + my $self = shift; + my $uri = shift; + + $uri = $uri->url if ref($uri) eq 'WWW::Mechanize::Link'; + + $uri = $self->base + ? URI->new_abs( $uri, $self->base ) + : URI->new( $uri ); + + # It appears we are returning a super-class method, + # but it in turn calls the request() method here in Mechanize + return $self->SUPER::get( $uri->as_string, @_ ); +} + + +sub post { + my $self = shift; + my $uri = shift; + + $uri = $uri->url if ref($uri) eq 'WWW::Mechanize::Link'; + + $uri = $self->base + ? URI->new_abs( $uri, $self->base ) + : URI->new( $uri ); + + # It appears we are returning a super-class method, + # but it in turn calls the request() method here in Mechanize + return $self->SUPER::post( $uri->as_string, @_ ); +} + + +sub put { + my $self = shift; + my $uri = shift; + + $uri = $uri->url if ref($uri) eq 'WWW::Mechanize::Link'; + + $uri = $self->base + ? URI->new_abs( $uri, $self->base ) + : URI->new( $uri ); + + # It appears we are returning a super-class method, + # but it in turn calls the request() method here in Mechanize + return $self->_SUPER_put( $uri->as_string, @_ ); +} + + +# Added until LWP::UserAgent has it. +sub _SUPER_put { + require HTTP::Request::Common; + my($self, @parameters) = @_; + my @suff = $self->_process_colonic_headers(\@parameters,1); + return $self->request( HTTP::Request::Common::PUT( @parameters ), @suff ); +} + + +sub head { + my $self = shift; + my $uri = shift; + + $uri = $uri->url if ref($uri) eq 'WWW::Mechanize::Link'; + + $uri = $self->base + ? URI->new_abs( $uri, $self->base ) + : URI->new( $uri ); + + # It appears we are returning a super-class method, + # but it in turn calls the request() method here in Mechanize + return $self->SUPER::head( $uri->as_string, @_ ); +} + + +sub reload { + my $self = shift; + + return unless my $req = $self->{req}; + + # LWP::UserAgent sets up a request_prepare handler that calls + # $self->cookie_jar->add_cookie_header($req) + # + # HTTP::Cookies::add_cookie_header always preserves existing + # cookies in a request object + # + # we pass an existing request to _make_request + # + # result: cookies will get repeated every time someone calls + # ->reload, sooner or later leading to a "request too big" from + # the server + # + # until https://rt.cpan.org/Public/Bug/Display.html?id=75897 is + # fixed, let's clear the cookies from the existing request + $req->remove_header('Cookie'); + + return $self->_update_page( $req, $self->_make_request( $req, @_ ) ); +} + + +sub back { + my $self = shift; + + my $stack = $self->{page_stack}; + return unless $stack && @{$stack}; + + my $popped = pop @{$self->{page_stack}}; + my $req = $popped->{req}; + my $res = $popped->{res}; + + $self->_update_page( $req, $res ); + + return 1; +} + + +sub clear_history { + my $self = shift; + + delete $self->{page_stack}; + + return 1; +} + + +sub history_count { + my $self = shift; + + # If we don't have a "current" page, we certainly don't have any previous + # ones. + return 0 unless $self->{req} && $self->{res}; + + my $stack = $self->{page_stack}; + + return 1 unless $stack; + + return 1 + @$stack; +} + + +sub history { + my $self = shift; + my $n = shift; + + return undef unless $self->{req} && $self->{res}; + + if ($n == 0) { + return { req => $self->{req}, res => $self->{res} }; + } + + my $stack = $self->{page_stack}; + return undef unless $stack && @$stack >= $n; + + return { req => $stack->[-$n]{req}, res => $stack->[-$n]{res} }; +} + + +sub success { + my $self = shift; + + return $self->res && $self->res->is_success; +} + + + +sub uri { + my $self = shift; + return $self->response ? $self->response->request->uri : undef; +} + +sub res { my $self = shift; return $self->{res}; } +sub response { my $self = shift; return $self->{res}; } +sub status { my $self = shift; return $self->{status}; } +sub ct { my $self = shift; return $self->{ct}; } +sub content_type { my $self = shift; return $self->{ct}; } +sub base { my $self = shift; return $self->{base}; } +sub is_html { + my $self = shift; + return defined $self->ct && + ($self->ct eq 'text/html' || $self->ct eq 'application/xhtml+xml'); +} + + +sub title { + my $self = shift; + + return unless $self->is_html; + + if ( not defined $self->{title} ) { + require HTML::HeadParser; + my $p = HTML::HeadParser->new; + $p->parse($self->content); + $self->{title} = $p->header('Title'); + } + return $self->{title}; +} + + +sub redirects { + my $self = shift; + + return $self->response->redirects; +} + + +sub content { + my $self = shift; + my %params = @_; + + my $content = $self->{content}; + if (delete $params{raw}) { + $content = $self->response()->content(); + } + elsif (delete $params{decoded_by_headers}) { + $content = $self->response()->decoded_content(charset => 'none'); + } + elsif (my $charset = delete $params{charset}) { + $content = $self->response()->decoded_content(charset => $charset); + } + elsif ( $self->is_html ) { + if ( exists $params{base_href} ) { + my $base_href = (delete $params{base_href}) || $self->base; + $content=~s/<head>/<head>\n<base href="$base_href">/i; + } + + if ( my $format = delete $params{format} ) { + if ( $format eq 'text' ) { + $content = $self->text; + } + else { + $self->die( qq{Unknown "format" parameter "$format"} ); + } + } + + $self->_check_unhandled_params( %params ); + } + + return $content; +} + + +sub text { + my $self = shift; + + if ( not defined $self->{text} ) { + unless ( exists $INC{'HTML::TreeBuilder'} ) { + require HTML::TreeBuilder; + HTML::TreeBuilder->VERSION(5); + HTML::TreeBuilder->import('-weak'); + } + my $tree = HTML::TreeBuilder->new(); + $tree->parse( $self->content ); + $tree->eof(); + $tree->elementify(); # just for safety + $self->{text} = $tree->as_text(); + } + + return $self->{text}; +} + +sub _check_unhandled_params { + my $self = shift; + my %params = @_; + + for my $cmd ( sort keys %params ) { + $self->die( qq{Unknown named argument "$cmd"} ); + } +} + + +sub links { + my $self = shift; + + $self->_extract_links() unless $self->{links}; + + return @{$self->{links}} if wantarray; + return $self->{links}; +} + + +sub follow_link { + my $self = shift; + $self->die( qq{Needs to get key-value pairs of parameters.} ) if @_ % 2; + my %params = ( n=>1, @_ ); + + if ( $params{n} eq 'all' ) { + delete $params{n}; + $self->warn( q{follow_link(n=>"all") is not valid} ); + } + + my $link = $self->find_link(%params); + if ( $link ) { + return $self->get( $link->url ); + } + + if ( $self->{autocheck} ) { + $self->die( 'Link not found' ); + } + + return; +} + + +sub find_link { + my $self = shift; + my %params = ( n=>1, @_ ); + + my $wantall = ( $params{n} eq 'all' ); + + $self->_clean_keys( \%params, qr/^(n|(text|url|url_abs|name|tag|id|class|rel)(_regex)?)$/ ); + + my @links = $self->links or return; + + my $nmatches = 0; + my @matches; + for my $link ( @links ) { + if ( _match_any_link_params($link,\%params) ) { + if ( $wantall ) { + push( @matches, $link ); + } + else { + ++$nmatches; + return $link if $nmatches >= $params{n}; + } + } + } # for @links + + if ( $wantall ) { + return @matches if wantarray; + return \@matches; + } + + return; +} # find_link + +# Used by find_links to check for matches +# The logic is such that ALL param criteria that are given must match +sub _match_any_link_params { + my $link = shift; + my $p = shift; + + # No conditions, anything matches + return 1 unless keys %$p; + + return if defined $p->{url} && !($link->url eq $p->{url} ); + return if defined $p->{url_regex} && !($link->url =~ $p->{url_regex} ); + return if defined $p->{url_abs} && !($link->url_abs eq $p->{url_abs} ); + return if defined $p->{url_abs_regex} && !($link->url_abs =~ $p->{url_abs_regex} ); + return if defined $p->{text} && !(defined($link->text) && $link->text eq $p->{text} ); + return if defined $p->{text_regex} && !(defined($link->text) && $link->text =~ $p->{text_regex} ); + return if defined $p->{name} && !(defined($link->name) && $link->name eq $p->{name} ); + return if defined $p->{name_regex} && !(defined($link->name) && $link->name =~ $p->{name_regex} ); + return if defined $p->{tag} && !($link->tag && $link->tag eq $p->{tag} ); + return if defined $p->{tag_regex} && !($link->tag && $link->tag =~ $p->{tag_regex} ); + + return if defined $p->{id} && !($link->attrs->{id} && $link->attrs->{id} eq $p->{id} ); + return if defined $p->{id_regex} && !($link->attrs->{id} && $link->attrs->{id} =~ $p->{id_regex} ); + return if defined $p->{class} && !($link->attrs->{class} && $link->attrs->{class} eq $p->{class} ); + return if defined $p->{class_regex} && !($link->attrs->{class} && $link->attrs->{class} =~ $p->{class_regex} ); + + return if defined $p->{rel} && !($link->attrs->{rel} && $link->attrs->{rel} eq $p->{rel} ); + return if defined $p->{rel_regex} && !($link->attrs->{rel} && $link->attrs->{rel} =~ $p->{rel_regex} ); + + # Success: everything that was defined passed. + return 1; + +} + +# Cleans the %params parameter for the find_link and find_image methods. +sub _clean_keys { + my $self = shift; + my $params = shift; + my $rx_keyname = shift; + + for my $key ( keys %$params ) { + my $val = $params->{$key}; + if ( $key !~ qr/$rx_keyname/ ) { + $self->warn( qq{Unknown link-finding parameter "$key"} ); + delete $params->{$key}; + next; + } + + my $key_regex = ( $key =~ /_regex$/ ); + my $val_regex = ( ref($val) eq 'Regexp' ); + + if ( $key_regex ) { + if ( !$val_regex ) { + $self->warn( qq{$val passed as $key is not a regex} ); + delete $params->{$key}; + next; + } + } + else { + if ( $val_regex ) { + $self->warn( qq{$val passed as '$key' is a regex} ); + delete $params->{$key}; + next; + } + if ( $val =~ /^\s|\s$/ ) { + $self->warn( qq{'$val' is space-padded and cannot succeed} ); + delete $params->{$key}; + next; + } + } + } # for keys %params + + return; +} # _clean_keys() + + + +sub find_all_links { + my $self = shift; + return $self->find_link( @_, n=>'all' ); +} + + +sub find_all_inputs { + my $self = shift; + my %criteria = @_; + + my $form = $self->current_form() or return; + + my @found; + foreach my $input ( $form->inputs ) { # check every pattern for a match on the current hash + my $matched = 1; + foreach my $criterion ( sort keys %criteria ) { # Sort so we're deterministic + my $field = $criterion; + my $is_regex = ( $field =~ s/(?:_regex)$// ); + my $what = $input->{$field}; + $matched = defined($what) && ( + $is_regex + ? ( $what =~ $criteria{$criterion} ) + : ( $what eq $criteria{$criterion} ) + ); + last if !$matched; + } + push @found, $input if $matched; + } + return @found; +} + + +sub find_all_submits { + my $self = shift; + + return $self->find_all_inputs( @_, type_regex => qr/^(submit|image)$/ ); +} + + + +sub images { + my $self = shift; + + $self->_extract_images() unless $self->{images}; + + return @{$self->{images}} if wantarray; + return $self->{images}; +} + + +sub find_image { + my $self = shift; + my %params = ( n=>1, @_ ); + + my $wantall = ( $params{n} eq 'all' ); + + $self->_clean_keys( \%params, qr/^(?:n|(?:alt|url|url_abs|tag|id|class)(?:_regex)?)$/ ); + + my @images = $self->images or return; + + my $nmatches = 0; + my @matches; + for my $image ( @images ) { + if ( _match_any_image_params($image,\%params) ) { + if ( $wantall ) { + push( @matches, $image ); + } + else { + ++$nmatches; + return $image if $nmatches >= $params{n}; + } + } + } # for @images + + if ( $wantall ) { + return @matches if wantarray; + return \@matches; + } + + return; +} + +# Used by find_images to check for matches +# The logic is such that ALL param criteria that are given must match +sub _match_any_image_params { + my $image = shift; + my $p = shift; + + # No conditions, anything matches + return 1 unless keys %$p; + + return if defined $p->{url} && !(defined($image->url) && $image->url eq $p->{url} ); + return if defined $p->{url_regex} && !(defined($image->url) && $image->url =~ $p->{url_regex} ); + return if defined $p->{url_abs} && !(defined($image->url_abs) && $image->url_abs eq $p->{url_abs} ); + return if defined $p->{url_abs_regex} && !(defined($image->url_abs) && $image->url_abs =~ $p->{url_abs_regex} ); + return if defined $p->{alt} && !(defined($image->alt) && $image->alt eq $p->{alt} ); + return if defined $p->{alt_regex} && !(defined($image->alt) && $image->alt =~ $p->{alt_regex} ); + return if defined $p->{tag} && !($image->tag && $image->tag eq $p->{tag} ); + return if defined $p->{tag_regex} && !($image->tag && $image->tag =~ $p->{tag_regex} ); + return if defined $p->{id} && !($image->attrs && $image->attrs->{id} && $image->attrs->{id} eq $p->{id} ); + return if defined $p->{id_regex} && !($image->attrs && $image->attrs->{id} && $image->attrs->{id} =~ $p->{id_regex} ); + return if defined $p->{class} && !($image->attrs && $image->attrs->{class} && $image->attrs->{class} eq $p->{class} ); + return if defined $p->{class_regex} && !($image->attrs && $image->attrs->{class} && $image->attrs->{class} =~ $p->{class_regex} ); + + # Success: everything that was defined passed. + return 1; +} + + + +sub find_all_images { + my $self = shift; + return $self->find_image( @_, n=>'all' ); +} + + +sub forms { + my $self = shift; + + $self->_extract_forms() unless $self->{forms}; + + return @{$self->{forms}} if wantarray; + return $self->{forms}; +} + +sub current_form { + my $self = shift; + + if ( !$self->{current_form} ) { + $self->form_number(1); + } + + return $self->{current_form}; +} + + +sub form_number { + my ($self, $form) = @_; + # XXX Should we die if no $form is defined? Same question for form_name() + + my $forms = $self->forms; + if ( $forms->[$form-1] ) { + $self->{current_form} = $forms->[$form-1]; + return wantarray + ? ($self->{current_form}, $form) + : $self->{current_form}; + } + + return wantarray ? () : undef; +} + + +sub form_name { + my ($self, $form) = @_; + return $self->form_with( name => $form ); +} + + +sub form_id { + my ($self, $formid) = @_; + defined( my $form = $self->form_with( id => $formid ) ) + or $self->warn(qq{ There is no form with ID "$formid"}); + return $form; +} + + + +sub all_forms_with_fields { + my ($self, @fields) = @_; + die 'no fields provided' unless scalar @fields; + + my @matches; + FORMS: for my $form (@{ $self->forms }) { + my @fields_in_form = $form->param(); + for my $field (@fields) { + next FORMS unless grep { $_ eq $field } @fields_in_form; + } + push @matches, $form; + } + return @matches; +} + + + +sub form_with_fields { + my ($self, @fields) = @_; + die 'no fields provided' unless scalar @fields; + + my @matches = $self->all_forms_with_fields(@fields); + my $nmatches = @matches; + if ( $nmatches > 0 ) { + if ( $nmatches > 1 ) { + $self->warn( "There are $nmatches forms with the named fields. The first one was used." ) + } + return $self->{current_form} = $matches[0]; + } + else { + $self->warn( qq{There is no form with the requested fields} ); + return undef; + } +} + + + +sub all_forms_with { + my ( $self, %spec ) = @_; + + my @forms = $self->forms; + foreach my $attr ( keys %spec ) { + @forms = grep _equal( $spec{$attr}, $_->attr($attr) ), @forms or return; + } + return @forms; +} + + +sub form_with { + my ( $self, %spec ) = @_; + + return if not $self->forms; + my @forms = $self->all_forms_with(%spec); + if ( @forms > 1 ) { # Warn if several forms matched. + # For ->form_with( method => 'POST', action => '', id => undef ) we get: + # >>There are 2 forms with empty action and no id and method "POST". + # The first one was used.<< + + $self->warn( + 'There are ' . @forms . ' forms ' . ( + keys %spec # explain search criteria if there were any + ? 'with ' . join( + ' and ', # "with ... and ... and ..." + map { + unless ( defined $spec{$_} ) { # case $attr => undef + qq{no $_}; + } + elsif ( $spec{$_} eq '' ) { # case $attr=> '' + qq{empty $_}; + } + else { # case $attr => $value + qq{$_ "$spec{$_}"}; + } + } # case $attr => undef + sort keys %spec # sort keys to get deterministic messages + ) + : '' + ) + . '. The first one was used.' + ); + } + + return $self->{current_form} = $forms[0]; +} + +# NOT an object method! +# Expects two values and returns true only when either +# both are defined and eq(ual) or when both are not defined. +sub _equal { + my ( $x, $y ) = @_; + defined $x ? defined $y && $x eq $y : !defined $y; +} + + + +sub field { + my ($self, $name, $value, $number) = @_; + $number ||= 1; + + my $form = $self->current_form(); + if ($number > 1) { + $form->find_input($name, undef, $number)->value($value); + } + else { + if ( ref($value) eq 'ARRAY' ) { + $form->param($name, $value); + } + else { + $form->value($name => $value); + } + } +} + + +sub select { + my ($self, $name, $value) = @_; + + my $form = $self->current_form(); + + my $input = $form->find_input($name); + if (!$input) { + $self->warn( qq{Input "$name" not found} ); + return; + } + + if ($input->type ne 'option') { + $self->warn( qq{Input "$name" is not type "select"} ); + return; + } + + # For $mech->select($name, {n => 3}) or $mech->select($name, {n => [2,4]}), + # transform the 'n' number(s) into value(s) and put it in $value. + if (ref($value) eq 'HASH') { + for (keys %$value) { + $self->warn(qq{Unknown select value parameter "$_"}) + unless $_ eq 'n'; + } + + if (defined($value->{n})) { + my @inputs = $form->find_input($name, 'option'); + my @values = (); + # distinguish between multiple and non-multiple selects + # (see INPUTS section of `perldoc HTML::Form`) + if (@inputs == 1) { + @values = $inputs[0]->possible_values(); + } + else { + foreach my $input (@inputs) { + my @possible = $input->possible_values(); + push @values, pop @possible; + } + } + + my $n = $value->{n}; + if (ref($n) eq 'ARRAY') { + $value = []; + for (@$n) { + unless (/^\d+$/) { + $self->warn(qq{"n" value "$_" is not a positive integer}); + return; + } + push @$value, $values[$_ - 1]; # might be undef + } + } + elsif (!ref($n) && $n =~ /^\d+$/) { + $value = $values[$n - 1]; # might be undef + } + else { + $self->warn('"n" value is not a positive integer or an array ref'); + return; + } + } + else { + $self->warn('Hash value is invalid'); + return; + } + } # hashref + + if (ref($value) eq 'ARRAY') { + $form->param($name, $value); + return 1; + } + + $form->value($name => $value); + return 1; +} + + +sub set_fields { + my $self = shift; + my %fields = @_; + + my $form = $self->current_form or $self->die( 'No form defined' ); + + while ( my ( $field, $value ) = each %fields ) { + if ( ref $value eq 'ARRAY' ) { + $form->find_input( $field, undef, + $value->[1])->value($value->[0] ); + } + else { + $form->value($field => $value); + } + } # while +} # set_fields() + + +sub set_visible { + my $self = shift; + + my $form = $self->current_form; + my @inputs = $form->inputs; + + my $num_set = 0; + for my $value ( @_ ) { + # Handle type/value pairs an arrayref + if ( ref $value eq 'ARRAY' ) { + my ( $type, $value ) = @$value; + while ( my $input = shift @inputs ) { + next if $input->type eq 'hidden'; + if ( $input->type eq $type ) { + $input->value( $value ); + $num_set++; + last; + } + } # while + } + # by default, it's a value + else { + while ( my $input = shift @inputs ) { + next if $input->type eq 'hidden'; + $input->value( $value ); + $num_set++; + last; + } # while + } + } # for + + return $num_set; +} # set_visible() + + +sub tick { + my $self = shift; + my $name = shift; + my $value = shift; + my $set = @_ ? shift : 1; # default to 1 if not passed + + # loop though all the inputs + my $index = 1; + while ( my $input = $self->current_form->find_input( $name, 'checkbox', $index ) ) { + # Can't guarantee that the first element will be undef and the second + # element will be the right name + foreach my $val ($input->possible_values()) { + next unless defined $val; + if ($val eq $value) { + $input->value($set ? $value : undef); + return; + } + } + + # move onto the next input + $index++; + } # while + + # got this far? Didn't find anything + $self->die( qq{No checkbox "$name" for value "$value" in form} ); +} # tick() + + +sub untick { + shift->tick(shift,shift,undef); +} + + +sub value { + my $self = shift; + my $name = shift; + my $number = shift || 1; + + my $form = $self->current_form; + if ( $number > 1 ) { + return $form->find_input( $name, undef, $number )->value(); + } + else { + return $form->value( $name ); + } +} # value + + +sub click { + my ($self, $button, $x, $y) = @_; + for ($x, $y) { $_ = 1 unless defined; } + my $request = $self->current_form->click($button, $x, $y); + return $self->request( $request ); +} + + +sub click_button { + my $self = shift; + my %args = @_; + + for ( keys %args ) { + if ( !/^(number|name|value|id|input|x|y)$/ ) { + $self->warn( qq{Unknown click_button parameter "$_"} ); + } + } + + my %exclusive_options = ( + id => 1, + input => 1, + name => 1, + number => 1, + value => 1, + ); + + my @present_exclusive_options = @exclusive_options{ keys %args }; + + if ( scalar @present_exclusive_options > 1 ) { + $self->die( 'click_button: More than one button selector has been used' ); + } + + for ($args{x}, $args{y}) { + $_ = 1 unless defined; + } + + my $form = $self->current_form or $self->die( 'click_button: No form has been selected' ); + + my $request; + if ( $args{name} ) { + $request = $form->click( $args{name}, $args{x}, $args{y} ); + } + # 0 is a valid id in HTML5 + elsif ( defined $args{id} ) { + # HTML::Form expects ids to be prefixed with '#' + my $input = $form->find_input('#' . $args{id}); + $request = $input->click( $form, $args{x}, $args{y} ); + } + elsif ( $args{number} ) { + # changing this 'submit' to qw/submit button image/ will probably break people's code + my $input = $form->find_input( undef, 'submit', $args{number} ); + $request = $input->click( $form, $args{x}, $args{y} ); + } + elsif ( $args{input} ) { + $request = $args{input}->click( $form, $args{x}, $args{y} ); + } + elsif ( $args{value} ) { + my @inputs = map { $form->find_input(undef, $_) } qw/submit button image/; + foreach my $input ( @inputs ) { + if ( $input->value && ($args{value} eq $input->value) ) { + $request = $input->click( $form, $args{x}, $args{y} ); + last; + } + } # foreach + } # $args{value} + + return $self->request( $request ); +} + + +sub submit { + my $self = shift; + + my $request = $self->current_form->make_request; + return $self->request( $request ); +} + + +sub submit_form { + my( $self, %args ) = @_; + + for ( keys %args ) { + if ( !/^(form_(number|name|fields|id)|(with_)?fields|button|x|y|strict_forms)$/ ) { + $self->die( qq{Unknown submit_form parameter "$_"} ); + } + } + + my $fields; + for (qw/with_fields fields/) { + if ($args{$_}) { + if ( ref $args{$_} eq 'HASH' ) { + $fields = $args{$_}; + } + else { + $self->die("$_ arg to submit_form must be a hashref"); + } + last; + } + } + + my @filtered_sets; + if ( $args{with_fields} ) { + $fields || die q{must submit some 'fields' with with_fields}; + my @got = $self->all_forms_with_fields(keys %{$fields}); + $self->die("There is no form with the requested fields") if not @got; + push @filtered_sets, \@got; + } + if ( my $form_number = $args{form_number} ) { + my $got = $self->form_number( $form_number ); + $self->die("There is no form numbered $form_number") if not $got; + push @filtered_sets, [ $got ]; + } + if ( my $form_name = $args{form_name} ) { + my @got = $self->all_forms_with( name => $form_name ); + $self->die(qq{There is no form named "$form_name"}) if not @got; + push @filtered_sets, \@got; + } + if ( my $form_id = $args{form_id} ) { + my @got = $self->all_forms_with( id => $form_id ); + $self->die(qq{There is no form with ID "$form_id"}) if not @got; + push @filtered_sets, \@got; + } + + if (not @filtered_sets) { + # No form selector was used. + # Maybe a form was set separately, or we'll default to the first form. + } + else { + # Need to intersect to apply all the various filters. + # Assume that each filtered set only has a given form object once. + # So we can count occurrences. + # + tie my %c, 'Tie::RefHash' or die; + foreach (@filtered_sets) { + foreach (@$_) { + ++$c{$_}; + } + } + my $expected_count = scalar @filtered_sets; + my @matched = grep { $c{$_} == $expected_count } keys %c; + if (not @matched) { + $self->die('There is no form that satisfies all the criteria'); + } + if (@matched > 1) { + $self->die('More than one form satisfies all the criteria'); + } + $self->{current_form} = $matched[0]; + } + + if (defined($args{strict_forms})) { + # Strict argument has been passed, set the flag as appropriate + # this must be done prior to attempting to set the fields + $self->current_form->strict($args{strict_forms}); + } + + $self->set_fields( %{$fields} ) if $fields; + + my $response; + if ( $args{button} ) { + $response = $self->click( $args{button}, $args{x} || 0, $args{y} || 0 ); + } + else { + $response = $self->submit(); + } + + return $response; +} + + +sub add_header { + my $self = shift; + my $npairs = 0; + + while ( @_ ) { + my $key = shift; + my $value = shift; + ++$npairs; + + $self->{headers}{$key} = $value; + } + + return $npairs; +} + + +sub delete_header { + my $self = shift; + + while ( @_ ) { + my $key = shift; + + delete $self->{headers}{$key}; + } + + return; +} + + + +sub quiet { + my $self = shift; + + $self->{quiet} = $_[0] if @_; + + return $self->{quiet}; +} + + +sub stack_depth { + my $self = shift; + $self->{stack_depth} = shift if @_; + return $self->{stack_depth}; +} + + +sub save_content { + my $self = shift; + my $filename = shift; + my %opts = @_; + if (delete $opts{binary}) { + $opts{binmode} = ':raw'; + $opts{decoded_by_headers} = 1; + } + + open( my $fh, '>', $filename ) or $self->die( "Unable to create $filename: $!" ); + if ((my $binmode = delete($opts{binmode}) || '') || ($self->content_type() !~ m{^text/})) { + if (length($binmode) && (substr($binmode, 0, 1) eq ':')) { + binmode $fh, $binmode; + } + else { + binmode $fh; + } + } + print {$fh} $self->content(%opts) or $self->die( "Unable to write to $filename: $!" ); + close $fh or $self->die( "Unable to close $filename: $!" ); + + return; +} + + + +sub _get_fh_default_stdout { + my $self = shift; + my $p = shift || ''; + if ( !$p ) { + return \*STDOUT; + } elsif ( !ref($p) ) { + open my $fh, '>', $p or $self->die( "Unable to write to $p: $!" );; + return $fh; + } else { + return $p; + } +} + +sub dump_headers { + my $self = shift; + my $fh = $self->_get_fh_default_stdout(shift); + + print {$fh} $self->response->headers_as_string; + + return; +} + + + +sub dump_links { + my $self = shift; + my $fh = shift || \*STDOUT; + my $absolute = shift; + + for my $link ( $self->links ) { + my $url = $absolute ? $link->url_abs : $link->url; + $url = '' if not defined $url; + print {$fh} $url, "\n"; + } + return; +} + + +sub dump_images { + my $self = shift; + my $fh = shift || \*STDOUT; + my $absolute = shift; + + for my $image ( $self->images ) { + my $url = $absolute ? $image->url_abs : $image->url; + $url = '' if not defined $url; + print {$fh} $url, "\n"; + } + return; +} + + +sub dump_forms { + my $self = shift; + my $fh = shift || \*STDOUT; + + for my $form ( $self->forms ) { + print {$fh} $form->dump, "\n"; + } + return; +} + + +sub dump_text { + my $self = shift; + my $fh = shift || \*STDOUT; + my $absolute = shift; + + print {$fh} $self->text, "\n"; + + return; +} + + + +sub clone { + my $self = shift; + my $clone = $self->SUPER::clone(); + + $clone->cookie_jar( $self->cookie_jar ); + $clone->{headers} = { %{$self->{headers}} }; + + return $clone; +} + + + +sub redirect_ok { + my $self = shift; + my $prospective_request = shift; + my $response = shift; + + my $ok = $self->SUPER::redirect_ok( $prospective_request, $response ); + if ( $ok ) { + $self->{redirected_uri} = $prospective_request->uri; + } + + return $ok; +} + + + +sub request { + my $self = shift; + my $request = shift; + + _die( '->request was called without a request parameter' ) + unless $request; + + $request = $self->_modify_request( $request ); + + if ( $request->method eq 'GET' || $request->method eq 'POST' ) { + $self->_push_page_stack(); + } + + return $self->_update_page($request, $self->_make_request( $request, @_ )); +} + + +sub update_html { + my $self = shift; + my $html = shift; + + $self->_reset_page; + $self->{ct} = 'text/html'; + $self->{content} = $html; + + return; +} + + +sub credentials { + my $self = shift; + + # The latest LWP::UserAgent also supports 2 arguments, + # in which case the first is host:port + if (@_ == 4 || (@_ == 2 && $_[0] =~ /:\d+$/)) { + return $self->SUPER::credentials(@_); + } + + @_ == 2 + or $self->die( 'Invalid # of args for overridden credentials()' ); + + return @$self{qw( __username __password )} = @_; +} + + +sub get_basic_credentials { + my $self = shift; + my @cred = grep { defined } @$self{qw( __username __password )}; + return @cred if @cred == 2; + return $self->SUPER::get_basic_credentials(@_); +} + + +sub clear_credentials { + my $self = shift; + delete @$self{qw( __username __password )}; +} + + +sub _update_page { + my ($self, $request, $res) = @_; + + $self->{req} = $request; + $self->{redirected_uri} = $request->uri->as_string; + + $self->{res} = $res; + + $self->{status} = $res->code; + $self->{base} = $res->base; + $self->{ct} = $res->content_type || ''; + + if ( $res->is_success ) { + $self->{uri} = $self->{redirected_uri}; + $self->{last_uri} = $self->{uri}; + } + + if ( $res->is_error ) { + if ( $self->{autocheck} ) { + $self->die( 'Error ', $request->method, 'ing ', $request->uri, ': ', $res->message ); + } + } + + $self->_reset_page; + + # Try to decode the content. Undef will be returned if there's nothing to decompress. + # See docs in HTTP::Message for details. Do we need to expose the options there? + my $content = $res->decoded_content(); + $content = $res->content if (not defined $content); + + $content .= _taintedness(); + + if ($self->is_html) { + $self->update_html($content); + } + else { + $self->{content} = $content; + } + + return $res; +} # _update_page + +our $_taintbrush; + +# This is lifted wholesale from Test::Taint +sub _taintedness { + return $_taintbrush if defined $_taintbrush; + + # Somehow we need to get some taintedness into our $_taintbrush. + # Let's try the easy way first. Either of these should be + # tainted, unless somebody has untainted them, so this + # will almost always work on the first try. + # (Unless, of course, taint checking has been turned off!) + $_taintbrush = substr("$0$^X", 0, 0); + return $_taintbrush if tainted( $_taintbrush ); + + # Let's try again. Maybe somebody cleaned those. + $_taintbrush = substr(join('', grep { defined } @ARGV, %ENV), 0, 0); + return $_taintbrush if tainted( $_taintbrush ); + + # If those don't work, go try to open some file from some unsafe + # source and get data from them. That data is tainted. + # (Yes, even reading from /dev/null works!) + for my $filename ( qw(/dev/null / . ..), values %INC, $0, $^X ) { + if ( open my $fh, '<', $filename ) { + my $data; + if ( defined sysread $fh, $data, 1 ) { + $_taintbrush = substr( $data, 0, 0 ); + last if tainted( $_taintbrush ); + } + } + } + + # Sanity check + die "Our taintbrush should have zero length!" if length $_taintbrush; + + return $_taintbrush; +} + + + +sub _modify_request { + my $self = shift; + my $req = shift; + + # add correct Accept-Encoding header to restore compliance with + # http://www.freesoft.org/CIE/RFC/2068/158.htm + # http://use.perl.org/~rhesa/journal/25952 + if (not $req->header( 'Accept-Encoding' ) ) { + # "identity" means "please! unencoded content only!" + $req->header( 'Accept-Encoding', $HAS_ZLIB ? 'gzip' : 'identity' ); + } + + my $last = $self->{last_uri}; + if ( $last ) { + $last = $last->as_string if ref($last); + $req->header( Referer => $last ); + } + while ( my($key,$value) = each %{$self->{headers}} ) { + if ( defined $value ) { + $req->header( $key => $value ); + } + else { + $req->remove_header( $key ); + } + } + + return $req; +} + + + +sub _make_request { + my $self = shift; + return $self->SUPER::request(@_); +} + + +sub _reset_page { + my $self = shift; + + $self->{links} = undef; + $self->{images} = undef; + $self->{forms} = undef; + $self->{current_form} = undef; + $self->{title} = undef; + $self->{text} = undef; + + return; +} + + +my %link_tags = ( + a => 'href', + area => 'href', + frame => 'src', + iframe => 'src', + link => 'href', + meta => 'content', +); + +sub _new_parser { + my $self = shift; + my $content_ref = shift; + + my $parser = HTML::TokeParser->new($content_ref); + $parser->marked_sections( $self->{marked_sections}); + $parser->xml_mode( $$content_ref=~/^\s*<\?xml/ ); # NOT GENERALLY RELIABLE + + return $parser; +} + +sub _extract_links { + my $self = shift; + + + $self->{links} = []; + if ( defined $self->{content} ) { + my $parser = $self->_new_parser(\$self->{content}); + while ( my $token = $parser->get_tag( keys %link_tags ) ) { + my $link = $self->_link_from_token( $token, $parser ); + push( @{$self->{links}}, $link ) if $link; + } # while + } + + return; +} + + +my %image_tags = ( + img => 'src', + input => 'src', +); + +sub _extract_images { + my $self = shift; + + $self->{images} = []; + + if ( defined $self->{content} ) { + if ($self->content_type eq 'text/css') { + push( @{$self->{images}}, $self->_images_from_css($self->{content}) ); + } + else { + my $parser = $self->_new_parser(\$self->{content}); + while ( my $token = $parser->get_tag() ) { + my ($tag_name, $attrs) = @{$token}; + next if $tag_name =~ m{^/}; + + if ($image_tags{$tag_name}) { + my $image = $self->_image_from_token( $token, $parser ); + push( @{$self->{images}}, $image ) if $image; + } + elsif ($tag_name eq 'style') { + push( @{$self->{images}}, $self->_images_from_css($parser->get_text) ); + } + + if ($attrs->{style}) { + push( @{$self->{images}}, $self->_images_from_css($attrs->{style}) ); + } + } # while + } + } + + return; +} + +sub _image_from_token { + my $self = shift; + my $token = shift; + my $parser = shift; + + my $tag = $token->[0]; + my $attrs = $token->[1]; + + if ( $tag eq 'input' ) { + my $type = $attrs->{type} or return; + return unless $type eq 'image'; + } + + require WWW::Mechanize::Image; + return + WWW::Mechanize::Image->new({ + tag => $tag, + base => $self->base, + url => $attrs->{src}, + name => $attrs->{name}, + height => $attrs->{height}, + width => $attrs->{width}, + alt => $attrs->{alt}, + attrs => $attrs, + }); +} + +my $STYLE_URL_REGEXP = qr{ + # ex. "url('/site.css')" + ( # capture non url path of the string + url # url + \s* # + \( # ( + \s* # + (['"]?) # opening ' or " + ) + ( # the rest is url + .+? # non greedy "everything" + ) + ( + \2 # closing ' or " + \s* # + \) # ) + ) +}xmsi; + +sub _images_from_css { + my $self = shift; + my $css = shift; + + my @images; + while ($css =~ m/$STYLE_URL_REGEXP/g) { + my $url = $3; + require WWW::Mechanize::Image; + push( + @images, + WWW::Mechanize::Image->new({ + tag => 'css', + base => $self->base, + url => $url, + name => undef, + height => undef, + width => undef, + alt => undef, + }) + ); + } + + return @images; +} + +sub _link_from_token { + my $self = shift; + my $token = shift; + my $parser = shift; + + my $tag = $token->[0]; + my $attrs = $token->[1]; + my $url = $attrs->{$link_tags{$tag}}; + + my $text; + my $name; + if ( $tag eq 'a' ) { + $text = $parser->get_trimmed_text("/$tag"); + $text = '' unless defined $text; + + my $onClick = $attrs->{onclick}; + if ( $onClick && ($onClick =~ /^window\.open\(\s*'([^']+)'/) ) { + $url = $1; + } + elsif( $url && $url =~ /^javascript\:\s*(?:void\(\s*)?window\.open\(\s*'([^']+)'/s ){ + $url = $1; + } + } # a + + # Of the tags we extract from, only 'AREA' has an alt tag + # The rest should have a 'name' attribute. + # ... but we don't do anything with that bit of wisdom now. + + $name = $attrs->{name}; + + if ( $tag eq 'meta' ) { + my $equiv = $attrs->{'http-equiv'}; + my $content = $attrs->{'content'}; + return unless $equiv && (lc $equiv eq 'refresh') && defined $content; + + if ( $content =~ /^\d+\s*;\s*url\s*=\s*(\S+)/i ) { + $url = $1; + $url =~ s/^"(.+)"$/$1/ or $url =~ s/^'(.+)'$/$1/; + } + else { + undef $url; + } + } # meta + + return unless defined $url; # probably just a name link or <AREA NOHREF...> + + require WWW::Mechanize::Link; + return + WWW::Mechanize::Link->new({ + url => $url, + text => $text, + name => $name, + tag => $tag, + base => $self->base, + attrs => $attrs, + }); +} # _link_from_token + + +sub _extract_forms { + my $self = shift; + + my @forms = HTML::Form->parse( + $self->content, + base => $self->base, + strict => $self->{strict_forms}, + verbose => $self->{verbose_forms}, + ); + $self->{forms} = \@forms; + for my $form ( @forms ) { + for my $input ($form->inputs) { + if ($input->type eq 'file') { + $input->value( undef ); + } + } + } + + return; +} + + +sub _push_page_stack { + my $self = shift; + + my $req = $self->{req}; + my $res = $self->{res}; + + return unless $req && $res && $self->stack_depth; + + # Don't push anything if it's a virgin object + my $stack = $self->{page_stack} ||= []; + if ( @{$stack} >= $self->stack_depth ) { + shift @{$stack}; + } + push( @{$stack}, { req => $req, res => $res } ); + + return 1; +} + + +sub warn { + my $self = shift; + + return unless my $handler = $self->{onwarn}; + + return if $self->quiet; + + return $handler->(@_); +} + + +sub die { + my $self = shift; + + return unless my $handler = $self->{onerror}; + + return $handler->(@_); +} + + +# NOT an object method! +sub _warn { + require Carp; + return &Carp::carp; ## no critic +} + +# NOT an object method! +sub _die { + require Carp; + return &Carp::croak; ## no critic +} + +1; # End of module + +__END__ + +=pod + +=encoding UTF-8 + +=head1 NAME + +WWW::Mechanize - Handy web browsing in a Perl object + +=head1 VERSION + +version 2.04 + +=head1 SYNOPSIS + +WWW::Mechanize supports performing a sequence of page fetches including +following links and submitting forms. Each fetched page is parsed +and its links and forms are extracted. A link or a form can be +selected, form fields can be filled and the next page can be fetched. +Mech also stores a history of the URLs you've visited, which can +be queried and revisited. + + use WWW::Mechanize (); + my $mech = WWW::Mechanize->new(); + + $mech->get( $url ); + + $mech->follow_link( n => 3 ); + $mech->follow_link( text_regex => qr/download this/i ); + $mech->follow_link( url => 'http://host.com/index.html' ); + + $mech->submit_form( + form_number => 3, + fields => { + username => 'mungo', + password => 'lost-and-alone', + } + ); + + $mech->submit_form( + form_name => 'search', + fields => { query => 'pot of gold', }, + button => 'Search Now' + ); + + # Enable strict form processing to catch typos and non-existant form fields. + my $strict_mech = WWW::Mechanize->new( strict_forms => 1); + + $strict_mech->get( $url ); + + # This method call will die, saving you lots of time looking for the bug. + $strict_mech->submit_form( + form_number => 3, + fields => { + usernaem => 'mungo', # typo in field name + password => 'lost-and-alone', + extra_field => 123, # field does not exist + } + ); + +=head1 DESCRIPTION + +C<WWW::Mechanize>, or Mech for short, is a Perl module for stateful +programmatic web browsing, used for automating interaction with +websites. + +Features include: + +=over 4 + +=item * All HTTP methods + +=item * High-level hyperlink and HTML form support, without having to parse HTML yourself + +=item * SSL support + +=item * Automatic cookies + +=item * Custom HTTP headers + +=item * Automatic handling of redirections + +=item * Proxies + +=item * HTTP authentication + +=back + +Mech is well suited for use in testing web applications. If you use +one of the Test::*, like L<Test::HTML::Lint> modules, you can check the +fetched content and use that as input to a test call. + + use Test::More; + like( $mech->content(), qr/$expected/, "Got expected content" ); + +Each page fetch stores its URL in a history stack which you can +traverse. + + $mech->back(); + +If you want finer control over your page fetching, you can use +these methods. C<follow_link> and C<submit_form> are just high +level wrappers around them. + + $mech->find_link( n => $number ); + $mech->form_number( $number ); + $mech->form_name( $name ); + $mech->field( $name, $value ); + $mech->set_fields( %field_values ); + $mech->set_visible( @criteria ); + $mech->click( $button ); + +L<WWW::Mechanize> is a proper subclass of L<LWP::UserAgent> and +you can also use any of L<LWP::UserAgent>'s methods. + + $mech->add_header($name => $value); + +Please note that Mech does NOT support JavaScript, you need additional software +for that. Please check L<WWW::Mechanize::FAQ/"JavaScript"> for more. + +=head1 IMPORTANT LINKS + +=over 4 + +=item * L<https://github.com/libwww-perl/WWW-Mechanize/issues> + +The queue for bugs & enhancements in WWW::Mechanize. Please note that the +queue at L<http://rt.cpan.org> is no longer maintained. + +=item * L<https://metacpan.org/pod/WWW::Mechanize> + +The CPAN documentation page for Mechanize. + +=item * L<https://metacpan.org/pod/distribution/WWW-Mechanize/lib/WWW/Mechanize/FAQ.pod> + +Frequently asked questions. Make sure you read here FIRST. + +=back + +=head1 CONSTRUCTOR AND STARTUP + +=head2 new() + +Creates and returns a new WWW::Mechanize object, hereafter referred to as +the "agent". + + my $mech = WWW::Mechanize->new() + +The constructor for WWW::Mechanize overrides two of the params to the +LWP::UserAgent constructor: + + agent => 'WWW-Mechanize/#.##' + cookie_jar => {} # an empty, memory-only HTTP::Cookies object + +You can override these overrides by passing params to the constructor, +as in: + + my $mech = WWW::Mechanize->new( agent => 'wonderbot 1.01' ); + +If you want none of the overhead of a cookie jar, or don't want your +bot accepting cookies, you have to explicitly disallow it, like so: + + my $mech = WWW::Mechanize->new( cookie_jar => undef ); + +Here are the params that WWW::Mechanize recognizes. These do not include +params that L<LWP::UserAgent> recognizes. + +=over 4 + +=item * C<< autocheck => [0|1] >> + +Checks each request made to see if it was successful. This saves +you the trouble of manually checking yourself. Any errors found +are errors, not warnings. + +The default value is ON, unless it's being subclassed, in which +case it is OFF. This means that standalone L<WWW::Mechanize> instances +have autocheck turned on, which is protective for the vast majority +of Mech users who don't bother checking the return value of get() +and post() and can't figure why their code fails. However, if +L<WWW::Mechanize> is subclassed, such as for L<Test::WWW::Mechanize> +or L<Test::WWW::Mechanize::Catalyst>, this may not be an appropriate +default, so it's off. + +=item * C<< noproxy => [0|1] >> + +Turn off the automatic call to the L<LWP::UserAgent> C<env_proxy> function. + +This needs to be explicitly turned off if you're using L<Crypt::SSLeay> to +access a https site via a proxy server. Note: you still need to set your +HTTPS_PROXY environment variable as appropriate. + +=item * C<< onwarn => \&func >> + +Reference to a C<warn>-compatible function, such as C<< L<Carp>::carp >>, +that is called when a warning needs to be shown. + +If this is set to C<undef>, no warnings will ever be shown. However, +it's probably better to use the C<quiet> method to control that behavior. + +If this value is not passed, Mech uses C<Carp::carp> if L<Carp> is +installed, or C<CORE::warn> if not. + +=item * C<< onerror => \&func >> + +Reference to a C<die>-compatible function, such as C<< L<Carp>::croak >>, +that is called when there's a fatal error. + +If this is set to C<undef>, no errors will ever be shown. + +If this value is not passed, Mech uses C<Carp::croak> if L<Carp> is +installed, or C<CORE::die> if not. + +=item * C<< quiet => [0|1] >> + +Don't complain on warnings. Setting C<< quiet => 1 >> is the same as +calling C<< $mech->quiet(1) >>. Default is off. + +=item * C<< stack_depth => $value >> + +Sets the depth of the page stack that keeps track of all the +downloaded pages. Default is effectively infinite stack size. If +the stack is eating up your memory, then set this to a smaller +number, say 5 or 10. Setting this to zero means Mech will keep no +history. + +=back + +In addition, WWW::Mechanize also allows you to globally enable +strict and verbose mode for form handling, which is done with L<HTML::Form>. + +=over 4 + +=item * C<< strict_forms => [0|1] >> + +Globally sets the HTML::Form strict flag which causes form submission to +croak if any of the passed fields don't exist in the form, and/or a value +doesn't exist in a select element. This can still be disabled in individual +calls to C<L<< submit_form()|"$mech->submit_form( ... )" >>>. + +Default is off. + +=item * C<< verbose_forms => [0|1] >> + +Globally sets the HTML::Form verbose flag which causes form submission to +warn about any bad HTML form constructs found. This cannot be disabled +later. + +Default is off. + +=item * C<< marked_sections => [0|1] >> + +Globally sets the HTML::Parser marked sections flag which causes HTML +C<< CDATA[[ >> sections to be honoured. This cannot be disabled +later. + +Default is on. + +=back + +To support forms, WWW::Mechanize's constructor pushes POST +on to the agent's C<requests_redirectable> list (see also +L<LWP::UserAgent>.) + +=head2 $mech->agent_alias( $alias ) + +Sets the user agent string to the expanded version from a table of actual user strings. +I<$alias> can be one of the following: + +=over 4 + +=item * Windows IE 6 + +=item * Windows Mozilla + +=item * Mac Safari + +=item * Mac Mozilla + +=item * Linux Mozilla + +=item * Linux Konqueror + +=back + +then it will be replaced with a more interesting one. For instance, + + $mech->agent_alias( 'Windows IE 6' ); + +sets your User-Agent to + + Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1) + +The list of valid aliases can be returned from C<known_agent_aliases()>. The current list is: + +=over + +=item * Windows IE 6 + +=item * Windows Mozilla + +=item * Mac Safari + +=item * Mac Mozilla + +=item * Linux Mozilla + +=item * Linux Konqueror + +=back + +=head2 known_agent_aliases() + +Returns a list of all the agent aliases that Mech knows about. + +=head1 PAGE-FETCHING METHODS + +=head2 $mech->get( $uri ) + +Given a URL/URI, fetches it. Returns an L<HTTP::Response> object. +I<$uri> can be a well-formed URL string, a L<URI> object, or a +L<WWW::Mechanize::Link> object. + +The results are stored internally in the agent object, but you don't +know that. Just use the accessors listed below. Poking at the +internals is deprecated and subject to change in the future. + +C<get()> is a well-behaved overloaded version of the method in +L<LWP::UserAgent>. This lets you do things like + + $mech->get( $uri, ':content_file' => $filename ); + +and you can rest assured that the params will get filtered down +appropriately. See L<LWP::UserAgent/get> for more details. + +B<NOTE:> Because C<:content_file> causes the page contents to be +stored in a file instead of the response object, some Mech functions +that expect it to be there won't work as expected. Use with caution. + +=head2 $mech->post( $uri, content => $content ) + +POSTs I<$content> to I<$uri>. Returns an L<HTTP::Response> object. +I<$uri> can be a well-formed URI string, a L<URI> object, or a +L<WWW::Mechanize::Link> object. + +=head2 $mech->put( $uri, content => $content ) + +PUTs I<$content> to I<$uri>. Returns an L<HTTP::Response> object. +I<$uri> can be a well-formed URI string, a L<URI> object, or a +L<WWW::Mechanize::Link> object. + + my $res = $mech->head( $uri ); + my $res = $mech->head( $uri , $field_name => $value, ... ); + +=head2 $mech->head ($uri ) + +Performs a HEAD request to I<$uri>. Returns an L<HTTP::Response> object. +I<$uri> can be a well-formed URI string, a L<URI> object, or a +L<WWW::Mechanize::Link> object. + +=head2 $mech->reload() + +Acts like the reload button in a browser: repeats the current +request. The history (as per the L<< back()|/$mech->back() >> method) is not altered. + +Returns the L<HTTP::Response> object from the reload, or C<undef> +if there's no current request. + +=head2 $mech->back() + +The equivalent of hitting the "back" button in a browser. Returns to +the previous page. Won't go back past the first page. (Really, what +would it do if it could?) + +Returns true if it could go back, or false if not. + +=head2 $mech->clear_history() + +This deletes all the history entries and returns true. + +=head2 $mech->history_count() + +This returns the number of items in the browser history. This number I<does> +include the most recently made request. + +=head2 $mech->history($n) + +This returns the I<n>th item in history. The 0th item is the most recent +request and response, which would be acted on by methods like +C<L<< find_link()|"$mech->find_link( ... )" >>>. +The 1st item is the state you'd return to if you called +C<L<< back()|/$mech->back() >>>. + +The maximum useful value for C<$n> is C<< $mech->history_count - 1 >>. +Requests beyond that bound will return C<undef>. + +History items are returned as hash references, in the form: + + { req => $http_request, res => $http_response } + +=head1 STATUS METHODS + +=head2 $mech->success() + +Returns a boolean telling whether the last request was successful. +If there hasn't been an operation yet, returns false. + +This is a convenience function that wraps C<< $mech->res->is_success >>. + +=head2 $mech->uri() + +Returns the current URI as a L<URI> object. This object stringifies +to the URI itself. + +=head2 $mech->response() / $mech->res() + +Return the current response as an L<HTTP::Response> object. + +Synonym for C<< $mech->response() >> + +=head2 $mech->status() + +Returns the HTTP status code of the response. This is a 3-digit +number like 200 for OK, 404 for not found, and so on. + +=head2 $mech->ct() / $mech->content_type() + +Returns the content type of the response. + +=head2 $mech->base() + +Returns the base URI for the current response + +=head2 $mech->forms() + +When called in a list context, returns a list of the forms found in +the last fetched page. In a scalar context, returns a reference to +an array with those forms. The forms returned are all L<HTML::Form> +objects. + +=head2 $mech->current_form() + +Returns the current form as an L<HTML::Form> object. + +=head2 $mech->links() + +When called in a list context, returns a list of the links found in the +last fetched page. In a scalar context it returns a reference to an array +with those links. Each link is a L<WWW::Mechanize::Link> object. + +=head2 $mech->is_html() + +Returns true/false on whether our content is HTML, according to the +HTTP headers. + +=head2 $mech->title() + +Returns the contents of the C<< <TITLE> >> tag, as parsed by +L<HTML::HeadParser>. Returns undef if the content is not HTML. + +=head2 $mech->redirects() + +Convenience method to get the L<< redirects|HTTP::Response/$r->redirects >> from the most recent L<HTTP::Response>. + +Note that you can also use L<< is_redirect|HTTP::Response/$r->is_redirect >> to see if the most recent response was a redirect like this. + + $mech->get($url); + do_stuff() if $mech->res->is_redirect; + +=head1 CONTENT-HANDLING METHODS + +=head2 $mech->content(...) + +Returns the content that the mech uses internally for the last page +fetched. Ordinarily this is the same as +C<< $mech->response()->decoded_content() >>, +but this may differ for HTML documents if L<< update_html|/$mech->update_html( $html ) >> is +overloaded (in which case the value passed to the base-class +implementation of same will be returned), and/or extra named arguments +are passed to I<content()>: + +=over 2 + +=item I<< $mech->content( format => 'text' ) >> + +Returns a text-only version of the page, with all HTML markup +stripped. This feature requires I<HTML::TreeBuilder> version 5 or higher +to be installed, or a fatal error will be thrown. This works only if +the contents are HTML. + +=item I<< $mech->content( base_href => [$base_href|undef] ) >> + +Returns the HTML document, modified to contain a +C<< <base href="$base_href"> >> mark-up in the header. +I<$base_href> is C<< $mech->base() >> if not specified. This is +handy to pass the HTML to e.g. L<HTML::Display>. This works only if +the contents are HTML. + +=item I<< $mech->content( raw => 1 ) >> + +Returns C<< $self->response()->content() >>, i.e. the raw contents from the +response. + +=item I<< $mech->content( decoded_by_headers => 1 ) >> + +Returns the content after applying all C<Content-Encoding> headers but +with not additional mangling. + +=item I<< $mech->content( charset => $charset ) >> + +Returns C<< $self->response()->decoded_content(charset => $charset) >> +(see L<HTTP::Response> for details). + +=back + +To preserve backwards compatibility, additional parameters will be +ignored unless none of C<< raw | decoded_by_headers | charset >> is +specified and the text is HTML, in which case an error will be triggered. + +A fresh instance of WWW::Mechanize will return C<undef> when C<< $mech->content() >> +is called, because no content is present before a request has been made. + +=head2 $mech->text() + +Returns the text of the current HTML content. If the content isn't +HTML, C<$mech> will die. + +The text is extracted by parsing the content, and then the extracted +text is cached, so don't worry about performance of calling this +repeatedly. + +=head1 LINK METHODS + +=head2 $mech->links() + +Lists all the links on the current page. Each link is a +L<WWW::Mechanize::Link> object. In list context, returns a list of all +links. In scalar context, returns an array reference of all links. + +=head2 $mech->follow_link(...) + +Follows a specified link on the page. You specify the match to be +found using the same params that C<L<< find_link()|"$mech->find_link( ... )" >>> uses. + +Here some examples: + +=over 4 + +=item * 3rd link called "download" + + $mech->follow_link( text => 'download', n => 3 ); + +=item * first link where the URL has "download" in it, regardless of case: + + $mech->follow_link( url_regex => qr/download/i ); + +or + + $mech->follow_link( url_regex => qr/(?i:download)/ ); + +=item * 3rd link on the page + + $mech->follow_link( n => 3 ); + +=item * the link with the url + + $mech->follow_link( url => '/other/page' ); + +or + + $mech->follow_link( url => 'http://example.com/page' ); + +=back + +Returns the result of the C<GET> method (an L<HTTP::Response> object) if a link +was found. + +If the page has no links, or the specified link couldn't be found, returns +C<undef>. If C<autocheck> is enabled an exception will be thrown instead. + +=head2 $mech->find_link( ... ) + +Finds a link in the currently fetched page. It returns a +L<WWW::Mechanize::Link> object which describes the link. (You'll +probably be most interested in the C<url()> property.) If it fails +to find a link it returns undef. + +You can take the URL part and pass it to the C<get()> method. If +that's your plan, you might as well use the C<follow_link()> method +directly, since it does the C<get()> for you automatically. + +Note that C<< <FRAME SRC="..."> >> tags are parsed out of the HTML and +treated as links so this method works with them. + +You can select which link to find by passing in one or more of these +key/value pairs: + +=over 4 + +=item * C<< text => 'string', >> and C<< text_regex => qr/regex/, >> + +C<text> matches the text of the link against I<string>, which must be an +exact match. To select a link with text that is exactly "download", use + + $mech->find_link( text => 'download' ); + +C<text_regex> matches the text of the link against I<regex>. To select a +link with text that has "download" anywhere in it, regardless of case, use + + $mech->find_link( text_regex => qr/download/i ); + +Note that the text extracted from the page's links are trimmed. For +example, C<< <a> foo </a> >> is stored as 'foo', and searching for +leading or trailing spaces will fail. + +=item * C<< url => 'string', >> and C<< url_regex => qr/regex/, >> + +Matches the URL of the link against I<string> or I<regex>, as appropriate. +The URL may be a relative URL, like F<foo/bar.html>, depending on how +it's coded on the page. + +=item * C<< url_abs => string >> and C<< url_abs_regex => regex >> + +Matches the absolute URL of the link against I<string> or I<regex>, +as appropriate. The URL will be an absolute URL, even if it's relative +in the page. + +=item * C<< name => string >> and C<< name_regex => regex >> + +Matches the name of the link against I<string> or I<regex>, as appropriate. + +=item * C<< rel => string >> and C<< rel_regex => regex >> + +Matches the rel of the link against I<string> or I<regex>, as appropriate. +This can be used to find stylesheets, favicons, or links the author of the +page does not want bots to follow. + +=item * C<< id => string >> and C<< id_regex => regex >> + +Matches the attribute 'id' of the link against I<string> or +I<regex>, as appropriate. + +=item * C<< class => string >> and C<< class_regex => regex >> + +Matches the attribute 'class' of the link against I<string> or +I<regex>, as appropriate. + +=item * C<< tag => string >> and C<< tag_regex => regex >> + +Matches the tag that the link came from against I<string> or I<regex>, +as appropriate. The C<tag_regex> is probably most useful to check for +more than one tag, as in: + + $mech->find_link( tag_regex => qr/^(a|frame)$/ ); + +The tags and attributes looked at are defined below. + +=back + +If C<n> is not specified, it defaults to 1. Therefore, if you don't +specify any params, this method defaults to finding the first link on the +page. + +Note that you can specify multiple text or URL parameters, which +will be ANDed together. For example, to find the first link with +text of "News" and with "cnn.com" in the URL, use: + + $mech->find_link( text => 'News', url_regex => qr/cnn\.com/ ); + +The return value is a reference to an array containing a +L<WWW::Mechanize::Link> object for every link in C<< $self->content >>. + +The links come from the following: + +=over 4 + +=item C<< <a href=...> >> + +=item C<< <area href=...> >> + +=item C<< <frame src=...> >> + +=item C<< <iframe src=...> >> + +=item C<< <link href=...> >> + +=item C<< <meta content=...> >> + +=back + +=head2 $mech->find_all_links( ... ) + +Returns all the links on the current page that match the criteria. The +method for specifying link criteria is the same as in +C<L<< find_link()|"$mech->find_link( ... )" >>>. +Each of the links returned is a L<WWW::Mechanize::Link> object. + +In list context, C<find_all_links()> returns a list of the links. +Otherwise, it returns a reference to the list of links. + +C<find_all_links()> with no parameters returns all links in the +page. + +=head2 $mech->find_all_inputs( ... criteria ... ) + +find_all_inputs() returns an array of all the input controls in the +current form whose properties match all of the regexes passed in. +The controls returned are all descended from HTML::Form::Input. +See L<HTML::Form/INPUTS> for details. + +If no criteria are passed, all inputs will be returned. + +If there is no current page, there is no form on the current +page, or there are no submit controls in the current form +then the return will be an empty array. + +You may use a regex or a literal string: + + # get all textarea controls whose names begin with "customer" + my @customer_text_inputs = $mech->find_all_inputs( + type => 'textarea', + name_regex => qr/^customer/, + ); + + # get all text or textarea controls called "customer" + my @customer_text_inputs = $mech->find_all_inputs( + type_regex => qr/^(text|textarea)$/, + name => 'customer', + ); + +=head2 $mech->find_all_submits( ... criteria ... ) + +C<find_all_submits()> does the same thing as C<find_all_inputs()> +except that it only returns controls that are submit controls, +ignoring other types of input controls like text and checkboxes. + +=head1 IMAGE METHODS + +=head2 $mech->images + +Lists all the images on the current page. Each image is a +L<WWW::Mechanize::Image> object. In list context, returns a list of all +images. In scalar context, returns an array reference of all images. + +=head2 $mech->find_image() + +Finds an image in the current page. It returns a +L<WWW::Mechanize::Image> object which describes the image. If it fails +to find an image it returns undef. + +You can select which image to find by passing in one or more of these +key/value pairs: + +=over 4 + +=item * C<< alt => 'string' >> and C<< alt_regex => qr/regex/ >> + +C<alt> matches the ALT attribute of the image against I<string>, which must be an +exact match. To select a image with an ALT tag that is exactly "download", use + + $mech->find_image( alt => 'download' ); + +C<alt_regex> matches the ALT attribute of the image against a regular +expression. To select an image with an ALT attribute that has "download" +anywhere in it, regardless of case, use + + $mech->find_image( alt_regex => qr/download/i ); + +=item * C<< url => 'string' >> and C<< url_regex => qr/regex/ >> + +Matches the URL of the image against I<string> or I<regex>, as appropriate. +The URL may be a relative URL, like F<foo/bar.html>, depending on how +it's coded on the page. + +=item * C<< url_abs => string >> and C<< url_abs_regex => regex >> + +Matches the absolute URL of the image against I<string> or I<regex>, +as appropriate. The URL will be an absolute URL, even if it's relative +in the page. + +=item * C<< tag => string >> and C<< tag_regex => regex >> + +Matches the tag that the image came from against I<string> or I<regex>, +as appropriate. The C<tag_regex> is probably most useful to check for +more than one tag, as in: + + $mech->find_image( tag_regex => qr/^(img|input)$/ ); + +The tags supported are C<< <img> >> and C<< <input> >>. + +=item * C<< id => string >> and C<< id_regex => regex >> + +C<id> matches the id attribute of the image against I<string>, which must +be an exact match. To select an image with the exact id "download-image", use + + $mech->find_image( id => 'download-image' ); + +C<id_regex> matches the id attribute of the image against a regular +expression. To select the first image with an id that contains "download" +anywhere in it, use + + $mech->find_image( id_regex => qr/download/ ); + +=item * C<< classs => string >> and C<< class_regex => regex >> + +C<class> matches the class attribute of the image against I<string>, which must +be an exact match. To select an image with the exact class "img-fuid", use + + $mech->find_image( class => 'img-fluid' ); + +To select an image with the class attribute "rounded float-left", use + + $mech->find_image( class => 'rounded float-left' ); + +Note that the classes have to be matched as a complete string, in the exact +order they appear in the website's source code. + +C<class_regex> matches the class attribute of the image against a regular +expression. Use this if you want a partial class name, or if an image has +several classes, but you only care about one. + +To select the first image with the class "rounded", where there are multiple +images that might also have either class "float-left" or "float-right", use + + $mech->find_image( class_regex => qr/\brounded\b/ ); + +Selecting an image with multiple classes where you do not care about the +order they appear in the website's source code is not currently supported. + +=back + +If C<n> is not specified, it defaults to 1. Therefore, if you don't +specify any params, this method defaults to finding the first image on the +page. + +Note that you can specify multiple ALT or URL parameters, which +will be ANDed together. For example, to find the first image with +ALT text of "News" and with "cnn.com" in the URL, use: + + $mech->find_image( image => 'News', url_regex => qr/cnn\.com/ ); + +The return value is a reference to an array containing a +L<WWW::Mechanize::Image> object for every image in C<< $self->content >>. + +=head2 $mech->find_all_images( ... ) + +Returns all the images on the current page that match the criteria. The +method for specifying image criteria is the same as in +C<L<< find_image()|"$mech->find_image()" >>>. +Each of the images returned is a L<WWW::Mechanize::Image> object. + +In list context, C<find_all_images()> returns a list of the images. +Otherwise, it returns a reference to the list of images. + +C<find_all_images()> with no parameters returns all images in the page. + +=head1 FORM METHODS + +These methods let you work with the forms on a page. The idea is +to choose a form that you'll later work with using the field methods +below. + +=head2 $mech->forms + +Lists all the forms on the current page. Each form is an L<HTML::Form> +object. In list context, returns a list of all forms. In scalar +context, returns an array reference of all forms. + +=head2 $mech->form_number($number) + +Selects the I<number>th form on the page as the target for subsequent +calls to C<L<< field()|"$mech->field( $name, $value, $number )" >>> +and C<L<< click()|"$mech->click( $button [, $x, $y] )" >>>. +Also returns the form that was selected. + +If it is found, the form is returned as an L<HTML::Form> object and set internally +for later use with Mech's form methods such as +C<L<< field()|"$mech->field( $name, $value, $number )" >>> and +C<L<< click()|"$mech->click( $button [, $x, $y] )" >>>. +When called in a list context, the number of the found form is also returned as +a second value. + +Emits a warning and returns undef if no form is found. + +The first form is number 1, not zero. + +=head2 $mech->form_name( $name ) + +Selects a form by name. If there is more than one form on the page +with that name, then the first one is used, and a warning is +generated. + +If it is found, the form is returned as an L<HTML::Form> object and +set internally for later use with Mech's form methods such as +C<L<< field()|"$mech->field( $name, $value, $number )" >>> and +C<L<< click()|"$mech->click( $button [, $x, $y] )" >>>. + +Returns undef if no form is found. + +=head2 $mech->form_id( $id ) + +Selects a form by ID. If there is more than one form on the page +with that ID, then the first one is used, and a warning is generated. + +If it is found, the form is returned as an L<HTML::Form> object and +set internally for later use with Mech's form methods such as +C<L<< field()|"$mech->field( $name, $value, $number )" >>> and +C<L<< click()|"$mech->click( $button [, $x, $y] )" >>>. + +If no form is found it returns C<undef>. This will also trigger a warning, +unless C<quiet> is enabled. + +=head2 $mech->all_forms_with_fields( @fields ) + +Selects a form by passing in a list of field names it must contain. All matching forms (perhaps none) are returned as a list of L<HTML::Form> objects. + +=head2 $mech->form_with_fields( @fields ) + +Selects a form by passing in a list of field names it must contain. If there +is more than one form on the page with that matches, then the first one is used, +and a warning is generated. + +If it is found, the form is returned as an L<HTML::Form> object and set internally +for later used with Mech's form methods such as +C<L<< field()|"$mech->field( $name, $value, $number )" >>> and +C<L<< click()|"$mech->click( $button [, $x, $y] )" >>>. + +Returns undef and emits a warning if no form is found. + +Note that this functionality requires libwww-perl 5.69 or higher. + +=head2 $mech->all_forms_with( $attr1 => $value1, $attr2 => $value2, ... ) + +Searches for forms with arbitrary attribute/value pairs within the E<lt>formE<gt> +tag. +(Currently does not work for attribute C<action> due to implementation details +of L<HTML::Form>.) +When given more than one pair, all criteria must match. +Using C<undef> as value means that the attribute in question must not be present. + +All matching forms (perhaps none) are returned as a list of L<HTML::Form> objects. + +=head2 $mech->form_with( $attr1 => $value1, $attr2 => $value2, ... ) + +Searches for forms with arbitrary attribute/value pairs within the E<lt>formE<gt> +tag. +(Currently does not work for attribute C<action> due to implementation details +of L<HTML::Form>.) +When given more than one pair, all criteria must match. +Using C<undef> as value means that the attribute in question must not be present. + +If it is found, the form is returned as an L<HTML::Form> object and set internally +for later used with Mech's form methods such as +C<L<< field()|"$mech->field( $name, $value, $number )" >>> and +C<L<< click()|"$mech->click( $button [, $x, $y] )" >>>. + +Returns undef if no form is found. + +=head1 FIELD METHODS + +These methods allow you to set the values of fields in a given form. + +=head2 $mech->field( $name, $value, $number ) + +=head2 $mech->field( $name, \@values, $number ) + +Given the name of a field, set its value to the value specified. +This applies to the current form (as set by the +C<L<< form_name()|"$mech->form_name( $name )" >>> or +C<L<< form_number()|"$mech->form_number($number)" >>> +method or defaulting to the first form on the page). + +The optional I<$number> parameter is used to distinguish between two fields +with the same name. The fields are numbered from 1. + +=head2 $mech->select($name, $value) + +=head2 $mech->select($name, \@values) + +Given the name of a C<select> field, set its value to the value +specified. If the field is not C<< <select multiple> >> and the +C<$value> is an array, only the B<first> value will be set. [Note: +the documentation previously claimed that only the last value would +be set, but this was incorrect.] Passing C<$value> as a hash with +an C<n> key selects an item by number (e.g. +C<< {n => 3} >> or C<< {n => [2,4]} >>). +The numbering starts at 1. This applies to the current form. + +If you have a field with C<< <select multiple> >> and you pass a single +C<$value>, then C<$value> will be added to the list of fields selected, +without clearing the others. However, if you pass an array reference, +then all previously selected values will be cleared. + +Returns true on successfully setting the value. On failure, returns +false and calls C<< $self->warn() >> with an error message. + +=head2 $mech->set_fields( $name => $value ... ) + +This method sets multiple fields of the current form. It takes a list +of field name and value pairs. If there is more than one field with +the same name, the first one found is set. If you want to select which +of the duplicate field to set, use a value which is an anonymous array +which has the field value and its number as the 2 elements. + + # set the second foo field + $mech->set_fields( $name => [ 'foo', 2 ] ); + +The fields are numbered from 1. + +This applies to the current form. + +=head2 $mech->set_visible( @criteria ) + +This method sets fields of the current form without having to know +their names. So if you have a login screen that wants a username and +password, you do not have to fetch the form and inspect the source (or +use the F<mech-dump> utility, installed with WWW::Mechanize) to see +what the field names are; you can just say + + $mech->set_visible( $username, $password ); + +and the first and second fields will be set accordingly. The method +is called set_I<visible> because it acts only on visible fields; +hidden form inputs are not considered. The order of the fields is +the order in which they appear in the HTML source which is nearly +always the order anyone viewing the page would think they are in, +but some creative work with tables could change that; caveat user. + +Each element in C<@criteria> is either a field value or a field +specifier. A field value is a scalar. A field specifier allows +you to specify the I<type> of input field you want to set and is +denoted with an arrayref containing two elements. So you could +specify the first radio button with + + $mech->set_visible( [ radio => 'KCRW' ] ); + +Field values and specifiers can be intermixed, hence + + $mech->set_visible( 'fred', 'secret', [ option => 'Checking' ] ); + +would set the first two fields to "fred" and "secret", and the I<next> +C<OPTION> menu field to "Checking". + +The possible field specifier types are: "text", "password", "hidden", +"textarea", "file", "image", "submit", "radio", "checkbox" and "option". + +C<set_visible> returns the number of values set. + +=head2 $mech->tick( $name, $value [, $set] ) + +"Ticks" the first checkbox that has both the name and value associated +with it on the current form. Dies if there is no named check box for +that value. Passing in a false value as the third optional argument +will cause the checkbox to be unticked. + +=head2 $mech->untick($name, $value) + +Causes the checkbox to be unticked. Shorthand for +C<tick($name,$value,undef)> + +=head2 $mech->value( $name [, $number] ) + +Given the name of a field, return its value. This applies to the current +form. + +The optional I<$number> parameter is used to distinguish between two fields +with the same name. The fields are numbered from 1. + +If the field is of type file (file upload field), the value is always +cleared to prevent remote sites from downloading your local files. +To upload a file, specify its file name explicitly. + +=head2 $mech->click( $button [, $x, $y] ) + +Has the effect of clicking a button on the current form. The first +argument is the name of the button to be clicked. The second and +third arguments (optional) allow you to specify the (x,y) coordinates +of the click. + +If there is only one button on the form, C<< $mech->click() >> with +no arguments simply clicks that one button. + +Returns an L<HTTP::Response> object. + +=head2 $mech->click_button( ... ) + +Has the effect of clicking a button on the current form by specifying +its attributes. The arguments are a list of key/value pairs. Only one +of name, id, number, input or value must be specified in the keys. + +Dies if no button is found. + +=over 4 + +=item * C<< name => name >> + +Clicks the button named I<name> in the current form. + +=item * C<< id => id >> + +Clicks the button with the id I<id> in the current form. + +=item * C<< number => n >> + +Clicks the I<n>th button with type I<submit> in the current form. +Numbering starts at 1. + +=item * C<< value => value >> + +Clicks the button with the value I<value> in the current form. + +=item * C<< input => $inputobject >> + +Clicks on the button referenced by $inputobject, an instance of +L<HTML::Form::SubmitInput> obtained e.g. from + + $mech->current_form()->find_input( undef, 'submit' ) + +C<$inputobject> must belong to the current form. + +=item * C<< x => x >> + +=item * C<< y => y >> + +These arguments (optional) allow you to specify the (x,y) coordinates +of the click. + +=back + +=head2 $mech->submit() + +Submits the current form, without specifying a button to click. Actually, +no button is clicked at all. + +Returns an L<HTTP::Response> object. + +This used to be a synonym for C<< $mech->click( 'submit' ) >>, but is no +longer so. + +=head2 $mech->submit_form( ... ) + +This method lets you select a form from the previously fetched page, +fill in its fields, and submit it. It combines the C<form_number>/C<form_name>, +C<set_fields> and C<click> methods into one higher level call. Its arguments +are a list of key/value pairs, all of which are optional. + +=over 4 + +=item * C<< fields => \%fields >> + +Specifies the fields to be filled in the current form. + +=item * C<< with_fields => \%fields >> + +Probably all you need for the common case. It combines a smart form selector +and data setting in one operation. It selects the first form that contains all +fields mentioned in C<\%fields>. This is nice because you don't need to know +the name or number of the form to do this. + +(calls C<L<< form_with_fields()|"$mech->form_with_fields( @fields )" >>> and + C<L<< set_fields()|"$mech->set_fields( $name => $value ... )" >>>). + +If you choose C<with_fields>, the C<fields> option will be ignored. The +C<form_number>, C<form_name> and C<form_id> options will still be used. An +exception will be thrown unless exactly one form matches all of the provided +criteria. + +=item * C<< form_number => n >> + +Selects the I<n>th form (calls +C<L<< form_number()|"$mech->form_number($number)" >>>. If this param is not +specified, the currently-selected form is used. + +=item * C<< form_name => name >> + +Selects the form named I<name> (calls +C<L<< form_name()|"$mech->form_name( $name )" >>>) + +=item * C<< form_id => ID >> + +Selects the form with ID I<ID> (calls +C<L<< form_id()|"$mech->form_id( $name )" >>>) + +=item * C<< button => button >> + +Clicks on button I<button> (calls C<L<< click()|"$mech->click( $button [, $x, $y] )" >>>) + +=item * C<< x => x, y => y >> + +Sets the x or y values for C<L<< click()|"$mech->click( $button [, $x, $y] )" >>> + +=item * C<< strict_forms => bool >> + +Sets the HTML::Form strict flag which causes form submission to croak if any of the passed +fields don't exist on the page, and/or a value doesn't exist in a select element. +By default HTML::Form sets this value to false. + +This behavior can also be turned on globally by passing C<< strict_forms => 1 >> to +C<< WWW::Mechanize->new >>. If you do that, you can still disable it for individual calls +by passing C<< strict_forms => 0 >> here. + +=back + +If no form is selected, the first form found is used. + +If I<button> is not passed, then the C<L<< submit()|"$mech->submit()" >>> +method is used instead. + +If you want to submit a file and get its content from a scalar rather +than a file in the filesystem, you can use: + + $mech->submit_form(with_fields => { logfile => [ [ undef, 'whatever', Content => $content ], 1 ] } ); + +Returns an L<HTTP::Response> object. + +=head1 MISCELLANEOUS METHODS + +=head2 $mech->add_header( name => $value [, name => $value... ] ) + +Sets HTTP headers for the agent to add or remove from the HTTP request. + + $mech->add_header( Encoding => 'text/klingon' ); + +If a I<value> is C<undef>, then that header will be removed from any +future requests. For example, to never send a Referer header: + + $mech->add_header( Referer => undef ); + +If you want to delete a header, use C<delete_header>. + +Returns the number of name/value pairs added. + +B<NOTE>: This method was very different in WWW::Mechanize before 1.00. +Back then, the headers were stored in a package hash, not as a member of +the object instance. Calling C<add_header()> would modify the headers +for every WWW::Mechanize object, even after your object no longer existed. + +=head2 $mech->delete_header( name [, name ... ] ) + +Removes HTTP headers from the agent's list of special headers. For +instance, you might need to do something like: + + # Don't send a Referer for this URL + $mech->add_header( Referer => undef ); + + # Get the URL + $mech->get( $url ); + + # Back to the default behavior + $mech->delete_header( 'Referer' ); + +=head2 $mech->quiet(true/false) + +Allows you to suppress warnings to the screen. + + $mech->quiet(0); # turns on warnings (the default) + $mech->quiet(1); # turns off warnings + $mech->quiet(); # returns the current quietness status + +=head2 $mech->stack_depth( $max_depth ) + +Get or set the page stack depth. Use this if you're doing a lot of page +scraping and running out of memory. + +A value of 0 means "no history at all." By default, the max stack depth +is humongously large, effectively keeping all history. + +=head2 $mech->save_content( $filename, %opts ) + +Dumps the contents of C<< $mech->content >> into I<$filename>. +I<$filename> will be overwritten. Dies if there are any errors. + +If the content type does not begin with "text/", then the content +is saved in binary mode (i.e. C<binmode()> is set on the output +filehandle). + +Additional arguments can be passed as I<key>/I<value> pairs: + +=over + +=item I<< $mech->save_content( $filename, binary => 1 ) >> + +Filehandle is set with C<binmode> to C<:raw> and contents are taken +calling C<< $self->content(decoded_by_headers => 1) >>. Same as calling: + + $mech->save_content( $filename, binmode => ':raw', + decoded_by_headers => 1 ); + +This I<should> be the safest way to save contents verbatim. + +=item I<< $mech->save_content( $filename, binmode => $binmode ) >> + +Filehandle is set to binary mode. If C<$binmode> begins with ':', it is +passed as a parameter to C<binmode>: + + binmode $fh, $binmode; + +otherwise the filehandle is set to binary mode if C<$binmode> is true: + + binmode $fh; + +=item I<all other arguments> + +are passed as-is to C<< $mech->content(%opts) >>. In particular, +C<decoded_by_headers> might come handy if you want to revert the effect +of line compression performed by the web server but without further +interpreting the contents (e.g. decoding it according to the charset). + +=back + +=head2 $mech->dump_headers( [$fh] ) + +Prints a dump of the HTTP response headers for the most recent +response. If I<$fh> is not specified or is undef, it dumps to +STDOUT. + +Unlike the rest of the dump_* methods, $fh can be a scalar. It +will be used as a file name. + +=head2 $mech->dump_links( [[$fh], $absolute] ) + +Prints a dump of the links on the current page to I<$fh>. If I<$fh> +is not specified or is undef, it dumps to STDOUT. + +If I<$absolute> is true, links displayed are absolute, not relative. + +=head2 $mech->dump_images( [[$fh], $absolute] ) + +Prints a dump of the images on the current page to I<$fh>. If I<$fh> +is not specified or is undef, it dumps to STDOUT. + +If I<$absolute> is true, links displayed are absolute, not relative. + +The output will include empty lines for images that have no C<src> attribute +and therefore no C<<->url>>. + +=head2 $mech->dump_forms( [$fh] ) + +Prints a dump of the forms on the current page to I<$fh>. If I<$fh> +is not specified or is undef, it dumps to STDOUT. Running the following: + + my $mech = WWW::Mechanize->new(); + $mech->get("https://www.google.com/"); + $mech->dump_forms; + +will print: + + GET https://www.google.com/search [f] + ie=ISO-8859-1 (hidden readonly) + hl=en (hidden readonly) + source=hp (hidden readonly) + biw= (hidden readonly) + bih= (hidden readonly) + q= (text) + btnG=Google Search (submit) + btnI=I'm Feeling Lucky (submit) + gbv=1 (hidden readonly) + +=head2 $mech->dump_text( [$fh] ) + +Prints a dump of the text on the current page to I<$fh>. If I<$fh> +is not specified or is undef, it dumps to STDOUT. + +=head1 OVERRIDDEN LWP::UserAgent METHODS + +=head2 $mech->clone() + +Clone the mech object. The clone will be using the same cookie jar +as the original mech. + +=head2 $mech->redirect_ok() + +An overloaded version of C<redirect_ok()> in L<LWP::UserAgent>. +This method is used to determine whether a redirection in the request +should be followed. + +Note that WWW::Mechanize's constructor pushes POST on to the agent's +C<requests_redirectable> list. + +=head2 $mech->request( $request [, $arg [, $size]]) + +Overloaded version of C<request()> in L<LWP::UserAgent>. Performs +the actual request. Normally, if you're using WWW::Mechanize, it's +because you don't want to deal with this level of stuff anyway. + +Note that C<$request> will be modified. + +Returns an L<HTTP::Response> object. + +=head2 $mech->update_html( $html ) + +Allows you to replace the HTML that the mech has found. Updates the +forms and links parse-trees that the mech uses internally. + +Say you have a page that you know has malformed output, and you want to +update it so the links come out correctly: + + my $html = $mech->content; + $html =~ s[</option>.{0,3}</td>][</option></select></td>]isg; + $mech->update_html( $html ); + +This method is also used internally by the mech itself to update its +own HTML content when loading a page. This means that if you would +like to I<systematically> perform the above HTML substitution, you +would overload I<update_html> in a subclass thusly: + + package MyMech; + use base 'WWW::Mechanize'; + + sub update_html { + my ($self, $html) = @_; + $html =~ s[</option>.{0,3}</td>][</option></select></td>]isg; + $self->WWW::Mechanize::update_html( $html ); + } + +If you do this, then the mech will use the tidied-up HTML instead of +the original both when parsing for its own needs, and for returning to +you through C<L<< content()|"$mech->content(...)" >>>. + +Overloading this method is also the recommended way of implementing +extra validation steps (e.g. link checkers) for every HTML page +received. L</warn> and L</die> would then come in handy to signal +validation errors. + +=head2 $mech->credentials( $username, $password ) + +Provide credentials to be used for HTTP Basic authentication for +all sites and realms until further notice. + +The four argument form described in L<LWP::UserAgent> is still +supported. + +=head2 $mech->get_basic_credentials( $realm, $uri, $isproxy ) + +Returns the credentials for the realm and URI. + +=head2 $mech->clear_credentials() + +Remove any credentials set up with C<credentials()>. + +=head1 INHERITED UNCHANGED LWP::UserAgent METHODS + +As a subclass of L<LWP::UserAgent>, WWW::Mechanize inherits all of +L<LWP::UserAgent>'s methods. Many of which are overridden or +extended. The following methods are inherited unchanged. View the +L<LWP::UserAgent> documentation for their implementation descriptions. + +This is not meant to be an inclusive list. LWP::UA may have added +others. + +=head2 $mech->head() + +Inherited from L<LWP::UserAgent>. + +=head2 $mech->mirror() + +Inherited from L<LWP::UserAgent>. + +=head2 $mech->simple_request() + +Inherited from L<LWP::UserAgent>. + +=head2 $mech->is_protocol_supported() + +Inherited from L<LWP::UserAgent>. + +=head2 $mech->prepare_request() + +Inherited from L<LWP::UserAgent>. + +=head2 $mech->progress() + +Inherited from L<LWP::UserAgent>. + +=head1 INTERNAL-ONLY METHODS + +These methods are only used internally. You probably don't need to +know about them. + +=head2 $mech->_update_page($request, $response) + +Updates all internal variables in $mech as if $request was just +performed, and returns $response. The page stack is B<not> altered by +this method, it is up to caller (e.g. +C<L<< request|"$mech->request( $request [, $arg [, $size]])" >>>) +to do that. + +=head2 $mech->_modify_request( $req ) + +Modifies a L<HTTP::Request> before the request is sent out, +for both GET and POST requests. + +We add a C<Referer> header, as well as header to note that we can accept gzip +encoded content, if L<Compress::Zlib> is installed. + +=head2 $mech->_make_request() + +Convenience method to make it easier for subclasses like +L<WWW::Mechanize::Cached> to intercept the request. + +=head2 $mech->_reset_page() + +Resets the internal fields that track page parsed stuff. + +=head2 $mech->_extract_links() + +Extracts links from the content of a webpage, and populates the C<{links}> +property with L<WWW::Mechanize::Link> objects. + +=head2 $mech->_push_page_stack() + +The agent keeps a stack of visited pages, which it can pop when it needs +to go BACK and so on. + +The current page needs to be pushed onto the stack before we get a new +page, and the stack needs to be popped when BACK occurs. + +Neither of these take any arguments, they just operate on the $mech +object. + +=head2 warn( @messages ) + +Centralized warning method, for diagnostics and non-fatal problems. +Defaults to calling C<CORE::warn>, but may be overridden by setting +C<onwarn> in the constructor. + +=head2 die( @messages ) + +Centralized error method. Defaults to calling C<CORE::die>, but +may be overridden by setting C<onerror> in the constructor. + +=head1 BEST PRACTICES + +The default settings can get you up and running quickly, but there are settings +you can change in order to make your life easier. + +=over 4 + +=item autocheck + +C<autocheck> can save you the overhead of checking status codes for success. +You may outgrow it as your needs get more sophisticated, but it's a safe option +to start with. + + my $agent = WWW::Mechanize->new( autocheck => 1 ); + +=item cookie_jar + +You are encouraged to install L<Mozilla::PublicSuffix> and use +L<HTTP::CookieJar::LWP> as your cookie jar. L<HTTP::CookieJar::LWP> provides a +better security model matching that of current Web browsers when +L<Mozilla::PublicSuffix> is installed. + + use HTTP::CookieJar::LWP (); + + my $jar = HTTP::CookieJar::LWP->new; + my $agent = WWW::Mechanize->new( cookie_jar => $jar ); + +=item protocols_allowed + +This option is inherited directly from L<LWP::UserAgent>. It allows you to +whitelist the protocols you're willing to allow. + + my $agent = WWW::Mechanize->new( + protocols_allowed => [ 'http', 'https' ] + ); + +This will prevent you from inadvertently following URLs like +C<file:///etc/passwd> + +=item protocols_forbidden + +This option is also inherited directly from L<LWP::UserAgent>. It allows you to +blacklist the protocols you're unwilling to allow. + + my $agent = WWW::Mechanize->new( + protocols_forbidden => [ 'file', 'mailto', 'ssh', ] + ); + +This will prevent you from inadvertently following URLs like +C<file:///etc/passwd> + +=item strict_forms + +Consider turning on the C<strict_forms> option when you create a new Mech. +This will perform a helpful sanity check on form fields every time you are +submitting a form, which can save you a lot of debugging time. + + my $agent = WWW::Mechanize->new( strict_forms => 1 ); + +If you do not want to have this option globally, you can still turn it on for +individual forms. + + $agent->submit_form( fields => { foo => 'bar' } , strict_forms => 1 ); + +=back + +=head1 WWW::MECHANIZE'S GIT REPOSITORY + +WWW::Mechanize is hosted at GitHub. + +Repository: L<https://github.com/libwww-perl/WWW-Mechanize>. +Bugs: L<https://github.com/libwww-perl/WWW-Mechanize/issues>. + +=head1 OTHER DOCUMENTATION + +=head2 I<Spidering Hacks>, by Kevin Hemenway and Tara Calishain + +I<Spidering Hacks> from O'Reilly +(L<http://www.oreilly.com/catalog/spiderhks/>) is a great book for anyone +wanting to know more about screen-scraping and spidering. + +There are six hacks that use Mech or a Mech derivative: + +=over 4 + +=item #21 WWW::Mechanize 101 + +=item #22 Scraping with WWW::Mechanize + +=item #36 Downloading Images from Webshots + +=item #44 Archiving Yahoo! Groups Messages with WWW::Yahoo::Groups + +=item #64 Super Author Searching + +=item #73 Scraping TV Listings + +=back + +The book was also positively reviewed on Slashdot: +L<http://books.slashdot.org/article.pl?sid=03/12/11/2126256> + +=head1 ONLINE RESOURCES AND SUPPORT + +=over 4 + +=item * WWW::Mechanize mailing list + +The Mech mailing list is at +L<http://groups.google.com/group/www-mechanize-users> and is specific +to Mechanize, unlike the LWP mailing list below. Although it is a +users list, all development discussion takes place here, too. + +=item * LWP mailing list + +The LWP mailing list is at +L<http://lists.perl.org/showlist.cgi?name=libwww>, and is more +user-oriented and well-populated than the WWW::Mechanize list. + +=item * Perlmonks + +L<http://perlmonks.org> is an excellent community of support, and +many questions about Mech have already been answered there. + +=item * L<WWW::Mechanize::Examples> + +A random array of examples submitted by users, included with the +Mechanize distribution. + +=back + +=head1 ARTICLES ABOUT WWW::MECHANIZE + +=over 4 + +=item * L<http://www.ibm.com/developerworks/linux/library/wa-perlsecure/> + +IBM article "Secure Web site access with Perl" + +=item * L<http://www.oreilly.com/catalog/googlehks2/chapter/hack84.pdf> + +Leland Johnson's hack #84 in I<Google Hacks, 2nd Edition> is +an example of a production script that uses WWW::Mechanize and +HTML::TableContentParser. It takes in keywords and returns the estimated +price of these keywords on Google's AdWords program. + +=item * L<http://www.perl.com/pub/a/2004/06/04/recorder.html> + +Linda Julien writes about using HTTP::Recorder to create WWW::Mechanize +scripts. + +=item * L<http://www.developer.com/lang/other/article.php/3454041> + +Jason Gilmore's article on using WWW::Mechanize for scraping sales +information from Amazon and eBay. + +=item * L<http://www.perl.com/pub/a/2003/01/22/mechanize.html> + +Chris Ball's article about using WWW::Mechanize for scraping TV +listings. + +=item * L<http://www.stonehenge.com/merlyn/LinuxMag/col47.html> + +Randal Schwartz's article on scraping Yahoo News for images. It's +already out of date: He manually walks the list of links hunting +for matches, which wouldn't have been necessary if the +C<L<< find_link()|"$mech->find_link( ... )" >>> method existed at press time. + +=item * L<http://www.perladvent.org/2002/16th/> + +WWW::Mechanize on the Perl Advent Calendar, by Mark Fowler. + +=item * L<http://www.linux-magazin.de/ausgaben/2004/03/datenruessel/> + +Michael Schilli's article on Mech and L<WWW::Mechanize::Shell> for the +German magazine I<Linux Magazin>. + +=back + +=head2 Other modules that use Mechanize + +Here are modules that use or subclass Mechanize. Let me know of any others: + +=over 4 + +=item * L<Finance::Bank::LloydsTSB> + +=item * L<HTTP::Recorder> + +Acts as a proxy for web interaction, and then generates WWW::Mechanize scripts. + +=item * L<Win32::IE::Mechanize> + +Just like Mech, but using Microsoft Internet Explorer to do the work. + +=item * L<WWW::Bugzilla> + +=item * L<WWW::Google::Groups> + +=item * L<WWW::Hotmail> + +=item * L<WWW::Mechanize::Cached> + +=item * L<WWW::Mechanize::Cached::GZip> + +=item * L<WWW::Mechanize::FormFiller> + +=item * L<WWW::Mechanize::Shell> + +=item * L<WWW::Mechanize::Sleepy> + +=item * L<WWW::Mechanize::SpamCop> + +=item * L<WWW::Mechanize::Timed> + +=item * L<WWW::SourceForge> + +=item * L<WWW::Yahoo::Groups> + +=item * L<WWW::Scripter> + +=back + +=head1 ACKNOWLEDGEMENTS + +Thanks to the numerous people who have helped out on WWW::Mechanize in +one way or another, including +Kirrily Robert for the original C<WWW::Automate>, +Lyle Hopkins, +Damien Clark, +Ansgar Burchardt, +Gisle Aas, +Jeremy Ary, +Hilary Holz, +Rafael Kitover, +Norbert Buchmuller, +Dave Page, +David Sainty, +H.Merijn Brand, +Matt Lawrence, +Michael Schwern, +Adriano Ferreira, +Miyagawa, +Peteris Krumins, +Rafael Kitover, +David Steinbrunner, +Kevin Falcone, +Mike O'Regan, +Mark Stosberg, +Uri Guttman, +Peter Scott, +Philippe Bruhat, +Ian Langworth, +John Beppu, +Gavin Estey, +Jim Brandt, +Ask Bjoern Hansen, +Greg Davies, +Ed Silva, +Mark-Jason Dominus, +Autrijus Tang, +Mark Fowler, +Stuart Children, +Max Maischein, +Meng Wong, +Prakash Kailasa, +Abigail, +Jan Pazdziora, +Dominique Quatravaux, +Scott Lanning, +Rob Casey, +Leland Johnson, +Joshua Gatcomb, +Julien Beasley, +Abe Timmerman, +Peter Stevens, +Pete Krawczyk, +Tad McClellan, +and the late great Iain Truskett. + +=head1 AUTHOR + +Andy Lester <andy at petdance.com> + +=head1 COPYRIGHT AND LICENSE + +This software is copyright (c) 2004 by Andy Lester. + +This is free software; you can redistribute it and/or modify it under +the same terms as the Perl 5 programming language system itself. + +=cut diff --git a/lib/WWW/Mechanize/Cookbook.pod b/lib/WWW/Mechanize/Cookbook.pod new file mode 100644 index 0000000..687aa3f --- /dev/null +++ b/lib/WWW/Mechanize/Cookbook.pod @@ -0,0 +1,103 @@ +# PODNAME: WWW::Mechanize::Cookbook +# ABSTRACT: Recipes for using WWW::Mechanize + +__END__ + +=pod + +=encoding UTF-8 + +=head1 NAME + +WWW::Mechanize::Cookbook - Recipes for using WWW::Mechanize + +=head1 VERSION + +version 2.04 + +=head1 INTRODUCTION + +First, please note that many of these are possible just using +L<LWP::UserAgent>. Since C<WWW::Mechanize> is a subclass of +L<LWP::UserAgent>, whatever works on C<LWP::UserAgent> should work +on C<WWW::Mechanize>. See the L<lwpcook> man page included with +the L<libwww-perl> distribution. + +=head1 BASICS + +=head2 Launch the WWW::Mechanize browser + + use WWW::Mechanize; + + my $mech = WWW::Mechanize->new( autocheck => 1 ); + +The C<< autocheck => 1 >> tells Mechanize to die if any IO fails, +so you don't have to manually check. It's easier that way. If you +want to do your own error checking, leave it out. + +=head2 Fetch a page + + $mech->get( "http://search.cpan.org" ); + print $mech->content; + +C<< $mech->content >> contains the raw HTML from the web page. It +is not parsed or handled in any way, at least through the C<content> +method. + +=head2 Fetch a page into a file + +Sometimes you want to dump your results directly into a file. For +example, there's no reason to read a JPEG into memory if you're +only going to write it out immediately. This can also help with +memory issues on large files. + + $mech->get( "http://www.cpan.org/src/stable.tar.gz", + ":content_file" => "stable.tar.gz" ); + +=head2 Fetch a password-protected page + +Generally, just call C<credentials> before fetching the page. + + $mech->credentials( 'admin' => 'password' ); + $mech->get( 'http://10.11.12.13/password.html' ); + print $mech->content(); + +=head1 LINKS + +=head2 Find all image links + +Find all links that point to a JPEG, GIF or PNG. + + my @links = $mech->find_all_links( + tag => "a", url_regex => qr/\.(jpe?g|gif|png)$/i ); + +=head2 Find all download links + +Find all links that have the word "download" in them. + + my @links = $mech->find_all_links( + tag => "a", text_regex => qr/\bdownload\b/i ); + +=head1 ADVANCED + +=head2 See what will be sent without actually sending anything + + $mech->add_handler("request_send", sub { shift->dump; exit; }); + $mech->get("http://www.example.com"); + +=head1 SEE ALSO + +L<WWW::Mechanize> + +=head1 AUTHOR + +Andy Lester <andy at petdance.com> + +=head1 COPYRIGHT AND LICENSE + +This software is copyright (c) 2004 by Andy Lester. + +This is free software; you can redistribute it and/or modify it under +the same terms as the Perl 5 programming language system itself. + +=cut diff --git a/lib/WWW/Mechanize/Examples.pod b/lib/WWW/Mechanize/Examples.pod new file mode 100644 index 0000000..355ed79 --- /dev/null +++ b/lib/WWW/Mechanize/Examples.pod @@ -0,0 +1,583 @@ +# PODNAME: WWW::Mechanize::Examples +# ABSTRACT: Sample programs that use WWW::Mechanize + +__END__ + +=pod + +=encoding UTF-8 + +=head1 NAME + +WWW::Mechanize::Examples - Sample programs that use WWW::Mechanize + +=head1 VERSION + +version 2.04 + +=head1 SYNOPSIS + +Plenty of people have learned WWW::Mechanize, and now, you can too! + +Following are user-supplied samples of WWW::Mechanize in action. +If you have samples you'd like to contribute, please send 'em to +C<< <andy@petdance.com> >>. + +You can also look at the F<t/*.t> files in the distribution. + +Please note that these examples are not intended to do any specific task. +For all I know, they're no longer functional because the sites they +hit have changed. They're here to give examples of how people have +used WWW::Mechanize. + +Note that the examples are in reverse order of my having received them, +so the freshest examples are always at the top. + +=head2 Starbucks Density Calculator, by Nat Torkington + +Here's a pair of programs from Nat Torkington, editor for O'Reilly Media +and co-author of the I<Perl Cookbook>. + +=over 4 + +Rael [Dornfest] discovered that you can easily find out how many Starbucks +there are in an area by searching for "Starbucks". So I wrote a silly +scraper for some old census data and came up with some Starbucks density +figures. There's no meaning to these numbers thanks to errors from using +old census data coupled with false positives in Yahoo search (e.g., +"Dodie Starbuck-Your Style Desgn" in Portland OR). But it was fun to +waste a night on. + +Here are the top twenty cities in descending order of population, +with the amount of territory each Starbucks has. E.g., A New York NY +Starbucks covers 1.7 square miles of ground. + + New York, NY 1.7 + Los Angeles, CA 1.2 + Chicago, IL 1.0 + Houston, TX 4.6 + Philadelphia, PA 6.8 + San Diego, CA 2.7 + Detroit, MI 19.9 + Dallas, TX 2.7 + Phoenix, AZ 4.1 + San Antonio, TX 12.3 + San Jose, CA 1.1 + Baltimore, MD 3.9 + Indianapolis, IN 12.1 + San Francisco, CA 0.5 + Jacksonville, FL 39.9 + Columbus, OH 7.3 + Milwaukee, WI 5.1 + Memphis, TN 15.1 + Washington, DC 1.4 + Boston, MA 0.5 + +=back + +C<get_pop_data> + + #!/usr/bin/perl -w + + use WWW::Mechanize; + use Storable; + + $url = 'http://www.census.gov/population/www/documentation/twps0027.html'; + $m = WWW::Mechanize->new(); + $m->get($url); + + $c = $m->content; + + $c =~ m{<A NAME=.tabA.>(.*?)</TABLE>}s + or die "Can't find the population table\n"; + $t = $1; + @outer = $t =~ m{<TR.*?>(.*?)</TR>}gs; + shift @outer; + foreach $r (@outer) { + @bits = $r =~ m{<TD.*?>(.*?)</TD>}gs; + for ($x = 0; $x < @bits; $x++) { + $b = $bits[$x]; + @v = split /\s*<BR>\s*/, $b; + foreach (@v) { s/^\s+//; s/\s+$// } + push @{$data[$x]}, @v; + } + } + + for ($y = 0; $y < @{$data[0]}; $y++) { + $data{$data[1][$y]} = { + NAME => $data[1][$y], + RANK => $data[0][$y], + POP => comma_free($data[2][$y]), + AREA => comma_free($data[3][$y]), + DENS => comma_free($data[4][$y]), + }; + } + + store(\%data, "cities.dat"); + + sub comma_free { + my $n = shift; + $n =~ s/,//; + return $n; + } + +C<plague_of_coffee> + + #!/usr/bin/perl -w + + use WWW::Mechanize; + use strict; + use Storable; + + $SIG{__WARN__} = sub {} ; # ssssssh + + my $Cities = retrieve("cities.dat"); + + my $m = WWW::Mechanize->new(); + $m->get("http://local.yahoo.com/"); + + my @cities = sort { $Cities->{$a}{RANK} <=> $Cities->{$b}{RANK} } keys %$Cities; + foreach my $c ( @cities ) { + my $fields = { + 'stx' => "starbucks", + 'csz' => $c, + }; + + my $r = $m->submit_form(form_number => 2, + fields => $fields); + die "Couldn't submit form" unless $r->is_success; + + my $hits = number_of_hits($r); + # my $ppl = sprintf("%d", 1000 * $Cities->{$c}{POP} / $hits); + # print "$c has $hits Starbucks. That's one for every $ppl people.\n"; + my $density = sprintf("%.1f", $Cities->{$c}{AREA} / $hits); + print "$c : $density\n"; + } + + sub number_of_hits { + my $r = shift; + my $c = $r->content; + if ($c =~ m{\d+ out of <b>(\d+)</b> total results for}) { + return $1; + } + if ($c =~ m{Sorry, no .*? found in or near}) { + return 0; + } + if ($c =~ m{Your search matched multiple cities}) { + warn "Your search matched multiple cities\n"; + return 0; + } + if ($c =~ m{Sorry we couldn.t find that location}) { + warn "No cities\n"; + return 0; + } + if ($c =~ m{Could not find.*?, showing results for}) { + warn "No matches\n"; + return 0; + } + die "Unknown response\n$c\n"; + } + +=head2 pb-upload, by John Beppu + +This program takes filenames of images from the command line and +uploads them to a www.photobucket.com folder. John Beppu, the author, says: + +=over 4 + +I had 92 pictures I wanted to upload, and doing it through a browser +would've been torture. But thanks to mech, all I had to do was +`./pb.upload *.jpg` and watch it do its thing. It felt good. +If I had more time, I'd implement WWW::Photobucket on top of +WWW::Mechanize. + +=back + + #!/usr/bin/perl -w -T + + use strict; + use WWW::Mechanize; + + my $login = "login_name"; + my $password = "password"; + my $folder = "folder"; + + my $url = "http://img78.photobucket.com/albums/v281/$login/$folder/"; + + # login to your photobucket.com account + my $mech = WWW::Mechanize->new(); + $mech->get($url); + $mech->submit_form( + form_number => 1, + fields => { password => $password }, + ); + die unless ($mech->success); + + # upload image files specified on command line + foreach (@ARGV) { + print "$_\n"; + $mech->form_number(2); + $mech->field('the_file[]' => $_); + $mech->submit(); + } + +=head2 listmod, by Ian Langworth + +Ian Langworth contributes this little gem that will bring joy to +beleaguered mailing list admins. It discards spam messages through +mailman's web interface. + + #!/arch/unix/bin/perl + use strict; + use warnings; + # + # listmod - fast alternative to mailman list interface + # + # usage: listmod crew XXXXXXXX + # + + die "usage: $0 <listname> <password>\n" unless @ARGV == 2; + my ($listname, $password) = @ARGV; + + use CGI qw(unescape); + + use WWW::Mechanize; + my $m = WWW::Mechanize->new( autocheck => 1 ); + + use Term::ReadLine; + my $term = Term::ReadLine->new($0); + + # submit the form, get the cookie, go to the list admin page + $m->get("https://lists.ccs.neu.edu/bin/admindb/$listname"); + $m->set_visible( $password ); + $m->click; + + # exit if nothing to do + print "There are no pending requests.\n" and exit + if $m->content =~ /There are no pending requests/; + + # select the first form and examine its contents + $m->form_number(1); + my $f = $m->current_form or die "Couldn't get first form!\n"; + + # get me the base form element for each email item + my @items = map {m/^.+?-(.+)/} grep {m/senderbanp/} $f->param + or die "Couldn't get items in first form!\n"; + + # iterate through items, prompt user, commit actions + foreach my $item (@items) { + + # show item info + my $sender = unescape($item); + my ($subject) = [$f->find_input("senderbanp-$item")->value_names]->[1] + =~ /Subject:\s+(.+?)\s+Size:/g; + + # prompt user + my $choice = ''; + while ( $choice !~ /^[DAX]$/ ) { + print "$sender\: '$subject'\n"; + $choice = uc $term->readline("Action: defer/accept/discard [dax]: "); + print "\n\n"; + } + + # set button + $m->field("senderaction-$item" => {D=>0,A=>1,X=>3}->{$choice}); + } + + # submit actions + $m->click; + +=head2 ccdl, by Andy Lester + +Steve McConnell, author of the landmark I<Code Complete> has put +up the chapters for the 2nd edition in PDF format on his website. +I needed to download them to take to Kinko's to have printed. This +little program did it for me. + + #!/usr/bin/perl -w + + use strict; + use WWW::Mechanize; + + my $start = "http://www.stevemcconnell.com/cc2/cc.htm"; + + my $mech = WWW::Mechanize->new( autocheck => 1 ); + $mech->get( $start ); + + my @links = $mech->find_all_links( url_regex => qr/\d+.+\.pdf$/ ); + + for my $link ( @links ) { + my $url = $link->url_abs; + my $filename = $url; + $filename =~ s[^.+/][]; + + print "Fetching $url"; + $mech->get( $url, ':content_file' => $filename ); + + print " ", -s $filename, " bytes\n"; + } + +=head2 quotes.pl, by Andy Lester + +This was a program that was going to get a hack in I<Spidering Hacks>, +but got cut at the last minute, probably because it's against IMDB's TOS +to scrape from it. I present it here as an example, not a suggestion +that you break their TOS. + +Last I checked, it didn't work because their HTML didn't match, but it's +still good as sample code. + + #!/usr/bin/perl -w + + use strict; + + use WWW::Mechanize; + use Getopt::Long; + use Text::Wrap; + + my $match = undef; + my $random = undef; + GetOptions( + "match=s" => \$match, + "random" => \$random, + ) or exit 1; + + my $movie = shift @ARGV or die "Must specify a movie\n"; + + my $quotes_page = get_quotes_page( $movie ); + my @quotes = extract_quotes( $quotes_page ); + + if ( $match ) { + $match = quotemeta($match); + @quotes = grep /$match/i, @quotes; + } + + if ( $random ) { + print $quotes[rand @quotes]; + } + else { + print join( "\n", @quotes ); + } + + + sub get_quotes_page { + my $movie = shift; + + my $mech = WWW::Mechanize->new; + $mech->get( "http://www.imdb.com/search" ); + $mech->success or die "Can't get the search page"; + + $mech->submit_form( + form_number => 2, + fields => { + title => $movie, + restrict => "Movies only", + }, + ); + + my @links = $mech->find_all_links( url_regex => qr[^/Title] ) + or die "No matches for \"$movie\" were found.\n"; + + # Use the first link + my ( $url, $title ) = @{$links[0]}; + + warn "Checking $title...\n"; + + $mech->get( $url ); + my $link = $mech->find_link( text_regex => qr/Memorable Quotes/i ) + or die qq{"$title" has no quotes in IMDB!\n}; + + warn "Fetching quotes...\n\n"; + $mech->get( $link->[0] ); + + return $mech->content; + } + + + sub extract_quotes { + my $page = shift; + + # Nibble away at the unwanted HTML at the beginnning... + $page =~ s/.+Memorable Quotes//si; + $page =~ s/.+?(<a name)/$1/si; + + # ... and the end of the page + $page =~ s/Browse titles in the movie quotes.+$//si; + $page =~ s/<p.+$//g; + + # Quotes separated by an <HR> tag + my @quotes = split( /<hr.+?>/, $page ); + + for my $quote ( @quotes ) { + my @lines = split( /<br>/, $quote ); + for ( @lines ) { + s/<[^>]+>//g; # Strip HTML tags + s/\s+/ /g; # Squash whitespace + s/^ //; # Strip leading space + s/ $//; # Strip trailing space + s/"/"/g; # Replace HTML entity quotes + + # Word-wrap to fit in 72 columns + $Text::Wrap::columns = 72; + $_ = wrap( '', ' ', $_ ); + } + $quote = join( "\n", @lines ); + } + + return @quotes; + } + +=head2 cpansearch.pl, by Ed Silva + +A quick little utility to search the CPAN and fire up a browser +with a results page. + + #!/usr/bin/perl + + # turn on perl's safety features + use strict; + use warnings; + + # work out the name of the module we're looking for + my $module_name = $ARGV[0] + or die "Must specify module name on command line"; + + # create a new browser + use WWW::Mechanize; + my $browser = WWW::Mechanize->new(); + + # tell it to get the main page + $browser->get("http://search.cpan.org/"); + + # okay, fill in the box with the name of the + # module we want to look up + $browser->form_number(1); + $browser->field("query", $module_name); + $browser->click(); + + # click on the link that matches the module name + $browser->follow_link( text_regex => $module_name ); + + my $url = $browser->uri; + + # launch a browser... + system('galeon', $url); + + exit(0); + +=head2 lj_friends.cgi, by Matt Cashner + + #!/usr/bin/perl + + # Provides an rss feed of a paid user's LiveJournal friends list + # Full entries, protected entries, etc. + # Add to your favorite rss reader as + # http://your.site.com/cgi-bin/lj_friends.cgi?user=USER&password=PASSWORD + + use warnings; + use strict; + + use WWW::Mechanize; + use CGI; + + my $cgi = CGI->new(); + my $form = $cgi->Vars; + + my $agent = WWW::Mechanize->new(); + + $agent->get('http://www.livejournal.com/login.bml'); + $agent->form_number('3'); + $agent->field('user',$form->{user}); + $agent->field('password',$form->{password}); + $agent->submit(); + $agent->get('http://www.livejournal.com/customview.cgi?user='.$form->{user}.'&styleid=225596&checkcookies=1'); + print "Content-type: text/plain\n\n"; + print $agent->content(); + +=head2 Hacking Movable Type, by Dan Rinzel + + use strict; + use WWW::Mechanize; + + # a tool to automatically post entries to a moveable type weblog, and set arbitrary creation dates + + my $mech = WWW::Mechanize->new(); + my $entry; + $entry->{title} = "Test AutoEntry Title"; + $entry->{btext} = "Test AutoEntry Body"; + $entry->{date} = '2002-04-15 14:18:00'; + my $start = qq|http://my.blog.site/mt.cgi|; + + $mech->get($start); + $mech->field('username','und3f1n3d'); + $mech->field('password','obscur3d'); + $mech->submit(); # to get login cookie + $mech->get(qq|$start?__mode=view&_type=entry&blog_id=1|); + $mech->form_name('entry_form'); + $mech->field('title',$entry->{title}); + $mech->field('category_id',1); # adjust as needed + $mech->field('text',$entry->{btext}); + $mech->field('status',2); # publish, or 1 = draft + $results = $mech->submit(); + + # if we're ok with this entry being datestamped "NOW" (no {date} in %entry) + # we're done. Otherwise, time to be tricksy + # MT returns a 302 redirect from this form. the redirect itself contains a <body onload=""> handler + # which takes the user to an editable version of the form where the create date can be edited + # MT date format of YYYY-MM-DD HH:MI:SS is the only one that won't error out + + if ($entry->{date} && $entry->{date} =~ /^\d{4}-\d{2}-\d{2}\s+\d{2}:\d{2}:\d{2}/) { + # travel the redirect + $results = $mech->get($results->{_headers}->{location}); + $results->{_content} =~ /<body onLoad="([^\"]+)"/is; + my $js = $1; + $js =~ /\'([^']+)\'/; + $results = $mech->get($start.$1); + $mech->form_name('entry_form'); + $mech->field('created_on_manual',$entry->{date}); + $mech->submit(); + } + +=head2 get-despair, by Randal Schwartz + +Randal submitted this bot that walks the despair.com site sucking down +all the pictures. + + use strict; + $|++; + + use WWW::Mechanize; + use File::Basename; + + my $m = WWW::Mechanize->new; + + $m->get("http://www.despair.com/indem.html"); + + my @top_links = @{$m->links}; + + for my $top_link_num (0..$#top_links) { + next unless $top_links[$top_link_num][0] =~ /^http:/; + + $m->follow_link( n=>$top_link_num ) or die "can't follow $top_link_num"; + + print $m->uri, "\n"; + for my $image (grep m{^http://store4}, map $_->[0], @{$m->links}) { + my $local = basename $image; + print " $image...", $m->mirror($image, $local)->message, "\n" + } + + $m->back or die "can't go back"; + } + +=head1 AUTHOR + +Andy Lester <andy at petdance.com> + +=head1 COPYRIGHT AND LICENSE + +This software is copyright (c) 2004 by Andy Lester. + +This is free software; you can redistribute it and/or modify it under +the same terms as the Perl 5 programming language system itself. + +=cut diff --git a/lib/WWW/Mechanize/FAQ.pod b/lib/WWW/Mechanize/FAQ.pod new file mode 100644 index 0000000..b50e892 --- /dev/null +++ b/lib/WWW/Mechanize/FAQ.pod @@ -0,0 +1,464 @@ +# PODNAME: WWW::Mechanize::FAQ +# ABSTRACT: Frequently Asked Questions about WWW::Mechanize + +__END__ + +=pod + +=encoding UTF-8 + +=head1 NAME + +WWW::Mechanize::FAQ - Frequently Asked Questions about WWW::Mechanize + +=head1 VERSION + +version 2.04 + +=head1 How to get help with WWW::Mechanize + +If your question isn't answered here in the FAQ, please turn to the +communities at: + +=over + +=item * StackOverflow L<https://stackoverflow.com/questions/tagged/www-mechanize> + +=item * #lwp on irc.perl.org + +=item * L<http://perlmonks.org> + +=item * The libwww-perl mailing list at L<http://lists.perl.org> + +=back + +=head1 JavaScript + +=head2 I have this web page that has JavaScript on it, and my Mech program doesn't work. + +That's because WWW::Mechanize doesn't operate on the JavaScript. It only +understands the HTML parts of the page. + +=head2 I thought Mech was supposed to work like a web browser. + +It does pretty much, but it doesn't support JavaScript. + +I added some basic attempts at picking up URLs in C<window.open()> +calls and return them in C<< $mech->links >>. They work sometimes. + +Since Javascript is completely visible to the client, it cannot be used +to prevent a scraper from following links. But it can make life difficult. If +you want to scrape specific pages, then a solution is always possible. + +One typical use of Javascript is to perform argument checking before +posting to the server. The URL you want is probably just buried in the +Javascript function. Do a regular expression match on +C<< $mech->content() >> +to find the link that you want and C<< $mech->get >> it directly (this +assumes that you know what you are looking for in advance). + +In more difficult cases, the Javascript is used for URL mangling to +satisfy the needs of some middleware. In this case you need to figure +out what the Javascript is doing (why are these URLs always really +long?). There is probably some function with one or more arguments which +calculates the new URL. Step one: using your favorite browser, get the +before and after URLs and save them to files. Edit each file, converting +the argument separators ('?', '&' or ';') into newlines. Now it is +easy to use diff or comm to find out what Javascript did to the URL. +Step 2 - find the function call which created the URL - you will need +to parse and interpret its argument list. The Javascript Debugger in the +Firebug extension for Firefox helps with the analysis. At this point, it is +fairly trivial to write your own function which emulates the Javascript +for the pages you want to process. + +Here's another approach that answers the question, "It works in Firefox, +but why not Mech?" Everything the web server knows about the client is +present in the HTTP request. If two requests are identical, the results +should be identical. So the real question is "What is different between +the mech request and the Firefox request?" + +The Firefox extension "Tamper Data" is an effective tool for examining +the headers of the requests to the server. Compare that with what LWP +is sending. Once the two are identical, the action of the server should +be the same as well. + +I say "should", because this is an oversimplification - some values +are naturally unique, e.g. a SessionID, but if a SessionID is present, +that is probably sufficient, even though the value will be different +between the LWP request and the Firefox request. The server could use +the session to store information which is troublesome, but that's not +the first place to look (and highly unlikely to be relevant when you +are requesting the login page of your site). + +Generally the problem is to be found in missing or incorrect POSTDATA +arguments, Cookies, User-Agents, Accepts, etc. If you are using mech, +then redirects and cookies should not be a problem, but are listed here +for completeness. If you are missing headers, C<< $mech->add_header >> +can be used to add the headers that you need. + +=head2 Which modules work like Mechanize and have JavaScript support? + +In no particular order: L<Gtk2::WebKit::Mechanize>, L<Win32::IE::Mechanize>, +L<WWW::Mechanize::Firefox>, L<WWW::Scripter>, L<WWW::Selenium> + +=head1 How do I do X? + +=head2 Can I do [such-and-such] with WWW::Mechanize? + +If it's possible with LWP::UserAgent, then yes. WWW::Mechanize is +a subclass of L<LWP::UserAgent>, so all the wondrous magic of that +class is inherited. + +=head2 How do I use WWW::Mechanize through a proxy server? + +See the docs in L<LWP::UserAgent> on how to use the proxy. Short version: + + $mech->proxy(['http', 'ftp'], 'http://proxy.example.com:8000/'); + +or get the specs from the environment: + + $mech->env_proxy(); + + # Environment set like so: + gopher_proxy=http://proxy.my.place/ + wais_proxy=http://proxy.my.place/ + no_proxy="localhost,my.domain" + export gopher_proxy wais_proxy no_proxy + +=head2 How can I see what fields are on the forms? + +Use the mech-dump utility, optionally installed with Mechanize. + + $ mech-dump --forms http://search.cpan.org + Dumping forms + GET http://search.cpan.org/search + query= + mode=all (option) [*all|module|dist|author] + <NONAME>=CPAN Search (submit) + +=head2 How do I get Mech to handle authentication? + + use MIME::Base64; + + my $agent = WWW::Mechanize->new(); + my @args = ( + Authorization => "Basic " . + MIME::Base64::encode( USER . ':' . PASS ) + ); + + $agent->credentials( ADDRESS, REALM, USER, PASS ); + $agent->get( URL, @args ); + +If you want to use the credentials for all future requests, you can +also use the L<LWP::UserAgent> C<default_header()> method instead +of the extra arguments to C<get()> + + $mech->default_header( + Authorization => 'Basic ' . encode_base64( USER . ':' . PASSWORD ) ); + +=head2 How can I get WWW::Mechanize to execute this JavaScript? + +You can't. JavaScript is entirely client-based, and WWW::Mechanize +is a client that doesn't understand JavaScript. See the top part +of this FAQ. + +=head2 How do I check a checkbox that doesn't have a value defined? + +Set it to the value of "on". + + $mech->field( my_checkbox => 'on' ); + +=head2 How do I handle frames? + +You don't deal with them as frames, per se, but as links. Extract +them with + + my @frame_links = $mech->find_link( tag => "frame" ); + +=head2 How do I get a list of HTTP headers and their values? + +All L<HTTP::Headers> methods work on a L<HTTP::Response> object which is +returned by the I<get()>, I<reload()>, I<response()/res()>, I<click()>, +I<submit_form()>, and I<request()> methods. + + my $mech = WWW::Mechanize->new( autocheck => 1 ); + $mech->get( 'http://my.site.com' ); + my $response = $mech->response(); + for my $key ( $response->header_field_names() ) { + print $key, " : ", $response->header( $key ), "\n"; + } + +=head2 How do I enable keep-alive? + +Since L<WWW::Mechanize> is a subclass of L<LWP::UserAgent>, you can +use the same mechanism to enable keep-alive: + + use LWP::ConnCache; + ... + $mech->conn_cache(LWP::ConnCache->new); + +=head2 How can I change/specify the action parameter of an HTML form? + +You can access the action of the form by utilizing the L<HTML::Form> +object returned from one of the specifying form methods. + +Using C<< $mech->form_number($number) >>: + + my $mech = WWW::mechanize->new; + $mech->get('http://someurlhere.com'); + # Access the form using its Zero-Based Index by DOM order + $mech->form_number(0)->action('http://newAction'); #ABS URL + +Using C<< $mech->form_name($number) >>: + + my $mech = WWW::mechanize->new; + $mech->get('http://someurlhere.com'); + #Access the form using its Zero-Based Index by DOM order + $mech->form_name('trgForm')->action('http://newAction'); #ABS URL + +=head2 How do I save an image? How do I save a large tarball? + +An image is just content. You get the image and save it. + + $mech->get( 'photo.jpg' ); + $mech->save_content( '/path/to/my/directory/photo.jpg' ); + +You can also save any content directly to disk using the C<:content_file> +flag to C<get()>, which is part of L<LWP::UserAgent>. + + $mech->get( 'http://www.cpan.org/src/stable.tar.gz', + ':content_file' => 'stable.tar.gz' ); + +=head2 How do I pick a specific value from a C<< <select> >> list? + +Find the C<HTML::Form::ListInput> in the page. + + my ($listbox) = $mech->find_all_inputs( name => 'listbox' ); + +Then create a hash for the lookup: + + my %name_lookup; + @name_lookup{ $listbox->value_names } = $listbox->possible_values; + my $value = $name_lookup{ 'Name I want' }; + +If you have duplicate names, this method won't work, and you'll +have to loop over C<< $listbox->value_names >> and +C<< $listbox->possible_values >> in parallel until you find a +matching name. + +=head2 How do I get Mech to not follow redirects? + +You use functionality in LWP::UserAgent, not Mech itself. + + $mech->requests_redirectable( [] ); + +Or you can set C<max_redirect>: + + $mech->max_redirect( 0 ); + +Both these options can also be set in the constructor. Mech doesn't +understand them, so will pass them through to the LWP::UserAgent +constructor. + +=head1 Why doesn't this work: Debugging your Mechanize program + +=head2 My Mech program doesn't work, but it works in the browser. + +Mechanize acts like a browser, but apparently something you're doing +is not matching the browser's behavior. Maybe it's expecting a +certain web client, or maybe you've not handling a field properly. +For some reason, your Mech problem isn't doing exactly what the +browser is doing, and when you find that, you'll have the answer. + +=head2 My Mech program gets these 500 errors. + +A 500 error from the web server says that the program on the server +side died. Probably the web server program was expecting certain +inputs that you didn't supply, and instead of handling it nicely, +the program died. + +Whatever the cause of the 500 error, if it works in the browser, +but not in your Mech program, you're not acting like the browser. +See the previous question. + +=head2 Why doesn't my program handle this form correctly? + +Run F<mech-dump> on your page and see what it says. + +F<mech-dump> is a marvelous diagnostic tool for figuring out what forms +and fields are on the page. Say you're scraping CNN.com, you'd get this: + + $ mech-dump http://www.cnn.com/ + GET http://search.cnn.com/cnn/search + source=cnn (hidden readonly) + invocationType=search/top (hidden readonly) + sites=web (radio) [*web/The Web ??|cnn/CNN.com ??] + query= (text) + <NONAME>=Search (submit) + + POST http://cgi.money.cnn.com/servlets/quote_redirect + query= (text) + <NONAME>=GET (submit) + + POST http://polls.cnn.com/poll + poll_id=2112 (hidden readonly) + question_1=<UNDEF> (radio) [1/Simplistic option|2/VIEW RESULTS] + <NONAME>=VOTE (submit) + + GET http://search.cnn.com/cnn/search + source=cnn (hidden readonly) + invocationType=search/bottom (hidden readonly) + sites=web (radio) [*web/??CNN.com|cnn/??] + query= (text) + <NONAME>=Search (submit) + +Four forms, including the first one duplicated at the end. All the +fields, all their defaults, lovingly generated by HTML::Form's C<dump> +method. + +If you want to run F<mech-dump> on something that doesn't lend itself +to a quick URL fetch, then use the C<save_content()> method to write +the HTML to a file, and run F<mech-dump> on the file. + +=head2 Why don't https:// URLs work? + +You need either L<IO::Socket::SSL> or L<Crypt::SSLeay> installed. + +=head2 Why do I get "Input 'fieldname' is readonly"? + +You're trying to change the value of a hidden field and you have +warnings on. + +First, make sure that you actually mean to change the field that you're +changing, and that you don't have a typo. Usually, hidden variables are +set by the site you're working on for a reason. If you change the value, +you might be breaking some functionality by faking it out. + +If you really do want to change a hidden value, make the changes in a +scope that has warnings turned off: + + { + local $^W = 0; + $agent->field( name => $value ); + } + +=head2 I tried to [such-and-such] and I got this weird error. + +Are you checking your errors? + +Are you sure? + +Are you checking that your action succeeded after every action? + +Are you sure? + +For example, if you try this: + + $mech->get( "http://my.site.com" ); + $mech->follow_link( "foo" ); + +and the C<get> call fails for some reason, then the Mech internals +will be unusable for the C<follow_link> and you'll get a weird +error. You B<must>, after every action that GETs or POSTs a page, +check that Mech succeeded, or all bets are off. + + $mech->get( "http://my.site.com" ); + die "Can't even get the home page: ", $mech->response->status_line + unless $mech->success; + + $mech->follow_link( "foo" ); + die "Foo link failed: ", $mech->response->status_line + unless $mech->success; + +=head2 How do I figure out why C<< $mech->get($url) >> doesn't work? + +There are many reasons why a C<< get() >> can fail. The server can take +you to someplace you didn't expect. It can generate redirects which are +not properly handled. You can get time-outs. Servers are down more often +than you think! etc, etc, etc. A couple of places to start: + +=over 4 + +=item 1 Check C<< $mech->status() >> after each call + +=item 2 Check the URL with C<< $mech->uri() >> to see where you ended up + +=item 3 Try debugging with C<< LWP::ConsoleLogger >>. + +=back + +If things are really strange, turn on debugging with +C<< use LWP::ConsoleLogger::Everywhere; >> +Just put this in the main program. This causes LWP to print out a trace +of the HTTP traffic between client and server and can be used to figure +out what is happening at the protocol level. + +It is also useful to set many traps to verify that processing is +proceeding as expected. A Mech program should always have an "I didn't +expect to get here" or "I don't recognize the page that I am processing" +case and bail out. + +Since errors can be transient, by the time you notice that the error +has occurred, it might not be possible to reproduce it manually. So +for automated processing it is useful to email yourself the following +information: + +=over 4 + +=item * where processing is taking place + +=item * An Error Message + +=item * $mech->uri + +=item * $mech->content + +=back + +You can also save the content of the page with C<< $mech->save_content( 'filename.html' ); >> + +=head2 I submitted a form, but the server ignored everything! I got an empty form back! + +The post is handled by application software. It is common for PHP +programmers to use the same file both to display a form and to process +the arguments returned. So the first task of the application programmer +is to decide whether there are arguments to processes. The program can +check whether a particular parameter has been set, whether a hidden +parameter has been set, or whether the submit button has been clicked. +(There are probably other ways that I haven't thought of). + +In any case, if your form is not setting the parameter (e.g. the submit +button) which the web application is keying on (and as an outsider there +is no way to know what it is keying on), it will not notice that the form +has been submitted. Try using C<< $mech->click() >> instead of +C<< $mech->submit() >> or vice-versa. + +=head2 I've logged in to the server, but I get 500 errors when I try to get to protected content. + +Some web sites use distributed databases for their processing. It +can take a few seconds for the login/session information to percolate +through to all the servers. For human users with their slow reaction +times, this is not a problem, but a Perl script can outrun the server. +So try adding a C<sleep(5)> between logging in and actually doing anything +(the optimal delay must be determined experimentally). + +=head2 Mech is a big memory pig! I'm running out of RAM! + +Mech keeps a history of every page, and the state it was in. It actually +keeps a clone of the full Mech object at every step along the way. + +You can limit this stack size with the C<stack_depth> param in the C<new()> +constructor. If you set stack_size to 0, Mech will not keep any history. + +=head1 AUTHOR + +Andy Lester <andy at petdance.com> + +=head1 COPYRIGHT AND LICENSE + +This software is copyright (c) 2004 by Andy Lester. + +This is free software; you can redistribute it and/or modify it under +the same terms as the Perl 5 programming language system itself. + +=cut diff --git a/lib/WWW/Mechanize/Image.pm b/lib/WWW/Mechanize/Image.pm new file mode 100644 index 0000000..028a7e6 --- /dev/null +++ b/lib/WWW/Mechanize/Image.pm @@ -0,0 +1,154 @@ +package WWW::Mechanize::Image; + +use strict; +use warnings; + +our $VERSION = '2.04'; + +#ABSTRACT: Image object for WWW::Mechanize + + +sub new { + my $class = shift; + my $params = shift || {}; + + my $self = bless {}, $class; + + for my $param ( qw( url base tag height width alt name attrs ) ) { + # Check for what we passed in, not whether it's defined + $self->{$param} = $params->{$param} if exists $params->{$param}; + } + + # url and tag are always required + for ( qw( url tag ) ) { + exists $self->{$_} or die "WWW::Mechanize::Image->new must have a $_ argument"; + } + + return $self; +} + + +sub url { return ($_[0])->{url}; } +sub base { return ($_[0])->{base}; } +sub name { return ($_[0])->{name}; } +sub tag { return ($_[0])->{tag}; } +sub height { return ($_[0])->{height}; } +sub width { return ($_[0])->{width}; } +sub alt { return ($_[0])->{alt}; } +sub attrs { return ($_[0])->{attrs}; } + + +sub URI { + my $self = shift; + + require URI::URL; + my $URI = URI::URL->new( $self->url, $self->base ); + + return $URI; +} + + +sub url_abs { + my $self = shift; + + return $self->URI->abs; +} + + +1; + +__END__ + +=pod + +=encoding UTF-8 + +=head1 NAME + +WWW::Mechanize::Image - Image object for WWW::Mechanize + +=head1 VERSION + +version 2.04 + +=head1 SYNOPSIS + +Image object to encapsulate all the stuff that Mech needs + +=head1 Constructor + +=head2 new() + +Creates and returns a new C<WWW::Mechanize::Image> object. + + my $image = WWW::Mechanize::Image->new( { + url => $url, + base => $base, + tag => $tag, + name => $name, # From the INPUT tag + height => $height, # optional + width => $width, # optional + alt => $alt, # optional + attrs => $attr_ref, # optional + } ); + +=head1 Accessors + +=head2 $image->url() + +Image URL from the C<src> attribute of the source tag. + +May be C<undef> if source tag has no C<src> attribute. + +=head2 $image->base() + +Base URL to which the links are relative. + +=head2 $image->name() + +Name for the field from the NAME attribute, if any. + +=head2 $image->tag() + +Tag name (either "image" or "input") + +=head2 $image->height() + +Image height + +=head2 $image->width() + +Image width + +=head2 $image->alt() + +ALT attribute from the source tag, if any. + +=head2 $image->attrs() + +Hash ref of all the attributes and attribute values in the tag. + +=head2 $image->URI() + +Returns the URL as a L<URI::URL> object. + +=head2 $image->url_abs() + +Returns the URL as an absolute URL string. + +=head1 SEE ALSO + +L<WWW::Mechanize> and L<WWW::Mechanize::Link> + +=head1 AUTHOR + +Andy Lester <andy at petdance.com> + +=head1 COPYRIGHT AND LICENSE + +This software is copyright (c) 2004 by Andy Lester. + +This is free software; you can redistribute it and/or modify it under +the same terms as the Perl 5 programming language system itself. + +=cut diff --git a/lib/WWW/Mechanize/Link.pm b/lib/WWW/Mechanize/Link.pm new file mode 100644 index 0000000..693a6f9 --- /dev/null +++ b/lib/WWW/Mechanize/Link.pm @@ -0,0 +1,143 @@ +package WWW::Mechanize::Link; + +use strict; +use warnings; + +our $VERSION = '2.04'; + +#ABSTRACT: Link object for WWW::Mechanize + + +sub new { + my $class = shift; + + my $self; + + # The order of the first four must stay as they are for + # compatibility with older code. + if ( ref $_[0] eq 'HASH' ) { + $self = [ @{$_[0]}{ qw( url text name tag base attrs ) } ]; + } + else { + $self = [ @_ ]; + } + + return bless $self, $class; +} + + +sub url { return ($_[0])->[0]; } +sub text { return ($_[0])->[1]; } +sub name { return ($_[0])->[2]; } +sub tag { return ($_[0])->[3]; } +sub base { return ($_[0])->[4]; } +sub attrs { return ($_[0])->[5]; } + + +sub URI { + my $self = shift; + + require URI::URL; + my $URI = URI::URL->new( $self->url, $self->base ); + + return $URI; +} + + +sub url_abs { + my $self = shift; + + return $self->URI->abs; +} + + +1; + +__END__ + +=pod + +=encoding UTF-8 + +=head1 NAME + +WWW::Mechanize::Link - Link object for WWW::Mechanize + +=head1 VERSION + +version 2.04 + +=head1 SYNOPSIS + +Link object to encapsulate all the stuff that Mech needs but nobody +wants to deal with as an array. + +=head1 Constructor + +=head2 new() + + my $link = WWW::Mechanize::Link->new( { + url => $url, + text => $text, + name => $name, + tag => $tag, + base => $base, + attr => $attr_href, + } ); + +For compatibility, this older interface is also supported: + + new( $url, $text, $name, $tag, $base, $attr_href ) + +Creates and returns a new C<WWW::Mechanize::Link> object. + +=head1 Accessors + +=head2 $link->url() + +URL from the link + +=head2 $link->text() + +Text of the link + +=head2 $link->name() + +NAME attribute from the source tag, if any. + +=head2 $link->tag() + +Tag name (one of: "a", "area", "frame", "iframe" or "meta"). + +=head2 $link->base() + +Base URL to which the links are relative. + +=head2 $link->attrs() + +Returns hash ref of all the attributes and attribute values in the tag. + +=head2 $link->URI() + +Returns the URL as a L<URI::URL> object. + +=head2 $link->url_abs() + +Returns a L<URI::URL> object for the absolute form of the string. + +=head1 SEE ALSO + +L<WWW::Mechanize> and L<WWW::Mechanize::Image> + +=head1 AUTHOR + +Andy Lester <andy at petdance.com> + +=head1 COPYRIGHT AND LICENSE + +This software is copyright (c) 2004 by Andy Lester. + +This is free software; you can redistribute it and/or modify it under +the same terms as the Perl 5 programming language system itself. + +=cut diff --git a/perlcriticrc b/perlcriticrc new file mode 100644 index 0000000..f6100a2 --- /dev/null +++ b/perlcriticrc @@ -0,0 +1,28 @@ +[-CodeLayout::ProhibitParensWithBuiltins] +[CodeLayout::ProhibitHardTabs] +allow_leading_tabs = 0 + +[-CodeLayout::RequireTidyCode] + +[-ControlStructures::ProhibitPostfixControls] + +[-Documentation::RequirePodAtEnd] +[-Documentation::RequirePodSections] + +[-Editor::RequireEmacsFileVariables] +[-ErrorHandling::RequireCarping] + +[-InputOutput::ProhibitInteractiveTest] +[-InputOutput::ProhibitBacktickOperators] + +[-Miscellanea::RequireRcsKeywords] + +[-Modules::RequireVersionVar] + +[-RegularExpressions::RequireExtendedFormatting] +[-RegularExpressions::RequireLineBoundaryMatching] + +[-ValuesAndExpressions::ProhibitConstantPragma] +[-ValuesAndExpressions::ProhibitEmptyQuotes] + +[-Variables::ProhibitPunctuationVars] diff --git a/perltidyrc b/perltidyrc new file mode 100644 index 0000000..b7ed624 --- /dev/null +++ b/perltidyrc @@ -0,0 +1,12 @@ +--blank-lines-before-packages=0 +--iterations=2 +--no-outdent-long-comments +-b +-bar +-boc +-ci=4 +-i=4 +-l=78 +-nolq +-se +-wbb="% + - * / x != == >= <= =~ !~ < > | & >= < = **= += *= &= <<= &&= -= /= |= >>= ||= .= %= ^= x=" diff --git a/script/mech-dump b/script/mech-dump new file mode 100755 index 0000000..2338c4a --- /dev/null +++ b/script/mech-dump @@ -0,0 +1,189 @@ +#!/usr/bin/perl + +# PODNAME: mech-dump +# ABSTRACT: Dumps information about a web page + +use warnings; +use strict; +use WWW::Mechanize (); +use Getopt::Long; +use Pod::Usage; + +use HTTP::Cookies; +my @actions; +my $absolute; +my $all; + +my $user; +my $pass; +my $agent; +my $agent_alias; +my $cookie_filename; + +GetOptions( + 'user=s' => \$user, + 'password=s' => \$pass, + headers => sub { push( @actions, \&dump_headers ) }, + forms => sub { push( @actions, \&dump_forms ) }, + links => sub { push( @actions, \&dump_links ) }, + images => sub { push( @actions, \&dump_images ) }, + all => sub { $all++; push( @actions, \&dump_headers, \&dump_forms, \&dump_links, \&dump_images ) }, + text => sub { push( @actions, \&dump_text ) }, + absolute => \$absolute, + 'agent=s' => \$agent, + 'agent-alias=s' => \$agent_alias, + 'cookie-file=s' => \$cookie_filename, + help => sub { pod2usage(1); }, + version => sub { print STDERR $WWW::Mechanize::VERSION, "\n"; exit 0; }, +) or pod2usage(2); + + +my @uris = @ARGV or die "Must specify a URL or file to check. See --help for details.\n"; + +@actions = (\&dump_forms) unless @actions; + +binmode(STDOUT, ':utf8'); + +my $mech = WWW::Mechanize->new( autocheck => 0 ); +if ( defined $agent ) { + $mech->agent( $agent ); +} +elsif ( defined $agent_alias ) { + $mech->agent_alias( $agent_alias ); +} +if ( defined $cookie_filename ) { + my $cookies = HTTP::Cookies->new( file => $cookie_filename, autosave => 1, ignore_discard => 1 ); + $cookies->load() ; + $mech->cookie_jar($cookies); +} +else { + $mech->cookie_jar(undef) ; +} + +$mech->env_proxy(); +foreach my $uri (@uris) { + if ( -e $uri ) { + require URI::file; + $uri = URI::file->new_abs( $uri )->as_string; + } + + my $response = $mech->get( $uri ); + if (!$response->is_success and defined ($response->www_authenticate)) { + if (!defined $user or !defined $pass) { + die("Page requires username and password, but none specified.\n"); + } + $mech->credentials($user,$pass); + $response = $mech->get( $uri ); + $response->is_success or die "Can't fetch $uri with username and password\n", $response->status_line, "\n"; + } + $mech->is_html or die qq{$uri returns type "}, $mech->ct, qq{", not "text/html"\n}; + + foreach my $action (@actions ) { + $action->( $mech ); + print "\n" if @actions; + } +} + + +sub dump_headers { + my $mech = shift; + print "--> Headers:\n" if $all; + $mech->dump_headers( undef ); + return; +} + +sub dump_forms { + my $mech = shift; + print "--> Forms:\n" if $all; + $mech->dump_forms( undef ); + return; +} + +sub dump_links { + my $mech = shift; + print "--> Links:\n" if $all; + $mech->dump_links( undef, $absolute ); + return; +} + +sub dump_images { + my $mech = shift; + print "--> Images:\n" if $all; + $mech->dump_images( undef, $absolute ); + return; +} + +sub dump_text { + my $mech = shift; + $mech->dump_text(); + return; +} + +__END__ + +=pod + +=encoding UTF-8 + +=head1 NAME + +mech-dump - Dumps information about a web page + +=head1 VERSION + +version 2.04 + +=head1 SYNOPSIS + +mech-dump [options] [file|url] + +Options: + + --headers Dump HTTP response headers + --forms Dump table of forms (default action) + --links Dump table of links + --images Dump table of images + --all Dump all four of the above, in that order + + --text Dumps the textual part of the web page + + --user=user Set the username + --password=pass Set the password + --cookie-file=filename Set the filename to use for persistent cookies + + --agent=agent Specify the UserAgent to pass + --agent-alias=alias + Specify the alias for the UserAgent to pass. + Pick one of: + * Windows IE 6 + * Windows Mozilla + * Mac Safari + * Mac Mozilla + * Linux Mozilla + * Linux Konqueror + + --absolute Show URLs as absolute, even if relative in the page + --help Show this message + +The order of the options specified is relevant. Repeated options +get repeated dumps. + +Proxy settings are specified through the environment (e.g. C<http_proxy=http://proxy.my.place/>). +See LWP::UserAgent for details. + +=head1 SEE ALSO + +L<WWW::Mechanize> + +=head1 AUTHOR + +Andy Lester <andy at petdance.com> + +=head1 COPYRIGHT AND LICENSE + +This software is copyright (c) 2004 by Andy Lester. + +This is free software; you can redistribute it and/or modify it under +the same terms as the Perl 5 programming language system itself. + +=cut diff --git a/t/00-load.t b/t/00-load.t new file mode 100644 index 0000000..7020c1f --- /dev/null +++ b/t/00-load.t @@ -0,0 +1,18 @@ +#!perl -T + +use warnings; +use strict; +use lib 't'; +use Test::More tests => 2; +use Tools; + +use_ok( 'WWW::Mechanize' ); +use_ok( 'WWW::Mechanize::Link' ); + +diag( "Testing WWW::Mechanize $WWW::Mechanize::VERSION, with LWP $LWP::VERSION, Perl $], $^X" ); +if ( $canTMC ) { + diag( "Test::Memory::Cycle $Test::Memory::Cycle::VERSION is installed." ); +} +else { + diag( 'Test::Memory::Cycle is not installed.' ); +} diff --git a/t/00-report-prereqs.dd b/t/00-report-prereqs.dd new file mode 100644 index 0000000..1af28f0 --- /dev/null +++ b/t/00-report-prereqs.dd @@ -0,0 +1,100 @@ +do { my $x = { + 'configure' => { + 'requires' => { + 'ExtUtils::MakeMaker' => '0' + }, + 'suggests' => { + 'JSON::PP' => '2.27300' + } + }, + 'develop' => { + 'recommends' => { + 'Dist::Zilla::PluginBundle::Git::VersionManager' => '0.007' + }, + 'requires' => { + 'Code::TidyAll' => '0.71', + 'Code::TidyAll::Plugin::SortLines::Naturally' => '0.000003', + 'Code::TidyAll::Plugin::Test::Vars' => '0.04', + 'Code::TidyAll::Plugin::UniqueLines' => '0.000003', + 'LWP::Protocol::https' => '6.07', + 'Parallel::ForkManager' => '1.19', + 'Perl::Critic' => '1.132', + 'Perl::Tidy' => '20180220', + 'Pod::Coverage::TrustPod' => '0', + 'Test::Code::TidyAll' => '0.50', + 'Test::EOL' => '0', + 'Test::Mojibake' => '0', + 'Test::More' => '0.88', + 'Test::Needs' => '0', + 'Test::Pod' => '1.41', + 'Test::Pod::Coverage' => '1.08', + 'Test::Portability::Files' => '0', + 'Test::RequiresInternet' => '0', + 'Test::Vars' => '0.014', + 'Test::Version' => '1', + 'constant' => '0', + 'lib' => '0' + } + }, + 'runtime' => { + 'recommends' => { + 'Compress::Zlib' => '0' + }, + 'requires' => { + 'Carp' => '0', + 'Getopt::Long' => '0', + 'HTML::Form' => '1.00', + 'HTML::HeadParser' => '0', + 'HTML::TokeParser' => '0', + 'HTML::TreeBuilder' => '5', + 'HTTP::Cookies' => '0', + 'HTTP::Request' => '1.30', + 'HTTP::Request::Common' => '0', + 'LWP::UserAgent' => '6.45', + 'Pod::Usage' => '0', + 'Scalar::Util' => '1.14', + 'Tie::RefHash' => '0', + 'URI::URL' => '0', + 'URI::file' => '0', + 'base' => '0', + 'perl' => '5.006', + 'strict' => '0', + 'warnings' => '0' + } + }, + 'test' => { + 'recommends' => { + 'CPAN::Meta' => '2.120900' + }, + 'requires' => { + 'CGI' => '4.32', + 'Exporter' => '0', + 'ExtUtils::MakeMaker' => '0', + 'File::Spec' => '0', + 'File::Temp' => '0', + 'FindBin' => '0', + 'HTTP::Daemon' => '6.12', + 'HTTP::Response' => '0', + 'HTTP::Server::Simple::CGI' => '0', + 'LWP' => '0', + 'LWP::Simple' => '0', + 'Path::Tiny' => '0', + 'Test::Deep' => '0', + 'Test::Exception' => '0', + 'Test::Fatal' => '0', + 'Test::Memory::Cycle' => '1.06', + 'Test::More' => '0.96', + 'Test::NoWarnings' => '1.04', + 'Test::Output' => '0', + 'Test::Taint' => '1.08', + 'Test::Warn' => '0', + 'Test::Warnings' => '0', + 'URI' => '0', + 'URI::Escape' => '0', + 'bytes' => '0', + 'lib' => '0' + } + } + }; + $x; + }
\ No newline at end of file diff --git a/t/00-report-prereqs.t b/t/00-report-prereqs.t new file mode 100644 index 0000000..c3a94ca --- /dev/null +++ b/t/00-report-prereqs.t @@ -0,0 +1,193 @@ +#!perl + +use strict; +use warnings; + +# This test was generated by Dist::Zilla::Plugin::Test::ReportPrereqs 0.028 + +use Test::More tests => 1; + +use ExtUtils::MakeMaker; +use File::Spec; + +# from $version::LAX +my $lax_version_re = + qr/(?: undef | (?: (?:[0-9]+) (?: \. | (?:\.[0-9]+) (?:_[0-9]+)? )? + | + (?:\.[0-9]+) (?:_[0-9]+)? + ) | (?: + v (?:[0-9]+) (?: (?:\.[0-9]+)+ (?:_[0-9]+)? )? + | + (?:[0-9]+)? (?:\.[0-9]+){2,} (?:_[0-9]+)? + ) + )/x; + +# hide optional CPAN::Meta modules from prereq scanner +# and check if they are available +my $cpan_meta = "CPAN::Meta"; +my $cpan_meta_pre = "CPAN::Meta::Prereqs"; +my $HAS_CPAN_META = eval "require $cpan_meta; $cpan_meta->VERSION('2.120900')" && eval "require $cpan_meta_pre"; ## no critic + +# Verify requirements? +my $DO_VERIFY_PREREQS = 1; + +sub _max { + my $max = shift; + $max = ( $_ > $max ) ? $_ : $max for @_; + return $max; +} + +sub _merge_prereqs { + my ($collector, $prereqs) = @_; + + # CPAN::Meta::Prereqs object + if (ref $collector eq $cpan_meta_pre) { + return $collector->with_merged_prereqs( + CPAN::Meta::Prereqs->new( $prereqs ) + ); + } + + # Raw hashrefs + for my $phase ( keys %$prereqs ) { + for my $type ( keys %{ $prereqs->{$phase} } ) { + for my $module ( keys %{ $prereqs->{$phase}{$type} } ) { + $collector->{$phase}{$type}{$module} = $prereqs->{$phase}{$type}{$module}; + } + } + } + + return $collector; +} + +my @include = qw( + +); + +my @exclude = qw( + +); + +# Add static prereqs to the included modules list +my $static_prereqs = do './t/00-report-prereqs.dd'; + +# Merge all prereqs (either with ::Prereqs or a hashref) +my $full_prereqs = _merge_prereqs( + ( $HAS_CPAN_META ? $cpan_meta_pre->new : {} ), + $static_prereqs +); + +# Add dynamic prereqs to the included modules list (if we can) +my ($source) = grep { -f } 'MYMETA.json', 'MYMETA.yml'; +my $cpan_meta_error; +if ( $source && $HAS_CPAN_META + && (my $meta = eval { CPAN::Meta->load_file($source) } ) +) { + $full_prereqs = _merge_prereqs($full_prereqs, $meta->prereqs); +} +else { + $cpan_meta_error = $@; # capture error from CPAN::Meta->load_file($source) + $source = 'static metadata'; +} + +my @full_reports; +my @dep_errors; +my $req_hash = $HAS_CPAN_META ? $full_prereqs->as_string_hash : $full_prereqs; + +# Add static includes into a fake section +for my $mod (@include) { + $req_hash->{other}{modules}{$mod} = 0; +} + +for my $phase ( qw(configure build test runtime develop other) ) { + next unless $req_hash->{$phase}; + next if ($phase eq 'develop' and not $ENV{AUTHOR_TESTING}); + + for my $type ( qw(requires recommends suggests conflicts modules) ) { + next unless $req_hash->{$phase}{$type}; + + my $title = ucfirst($phase).' '.ucfirst($type); + my @reports = [qw/Module Want Have/]; + + for my $mod ( sort keys %{ $req_hash->{$phase}{$type} } ) { + next if $mod eq 'perl'; + next if grep { $_ eq $mod } @exclude; + + my $file = $mod; + $file =~ s{::}{/}g; + $file .= ".pm"; + my ($prefix) = grep { -e File::Spec->catfile($_, $file) } @INC; + + my $want = $req_hash->{$phase}{$type}{$mod}; + $want = "undef" unless defined $want; + $want = "any" if !$want && $want == 0; + + my $req_string = $want eq 'any' ? 'any version required' : "version '$want' required"; + + if ($prefix) { + my $have = MM->parse_version( File::Spec->catfile($prefix, $file) ); + $have = "undef" unless defined $have; + push @reports, [$mod, $want, $have]; + + if ( $DO_VERIFY_PREREQS && $HAS_CPAN_META && $type eq 'requires' ) { + if ( $have !~ /\A$lax_version_re\z/ ) { + push @dep_errors, "$mod version '$have' cannot be parsed ($req_string)"; + } + elsif ( ! $full_prereqs->requirements_for( $phase, $type )->accepts_module( $mod => $have ) ) { + push @dep_errors, "$mod version '$have' is not in required range '$want'"; + } + } + } + else { + push @reports, [$mod, $want, "missing"]; + + if ( $DO_VERIFY_PREREQS && $type eq 'requires' ) { + push @dep_errors, "$mod is not installed ($req_string)"; + } + } + } + + if ( @reports ) { + push @full_reports, "=== $title ===\n\n"; + + my $ml = _max( map { length $_->[0] } @reports ); + my $wl = _max( map { length $_->[1] } @reports ); + my $hl = _max( map { length $_->[2] } @reports ); + + if ($type eq 'modules') { + splice @reports, 1, 0, ["-" x $ml, "", "-" x $hl]; + push @full_reports, map { sprintf(" %*s %*s\n", -$ml, $_->[0], $hl, $_->[2]) } @reports; + } + else { + splice @reports, 1, 0, ["-" x $ml, "-" x $wl, "-" x $hl]; + push @full_reports, map { sprintf(" %*s %*s %*s\n", -$ml, $_->[0], $wl, $_->[1], $hl, $_->[2]) } @reports; + } + + push @full_reports, "\n"; + } + } +} + +if ( @full_reports ) { + diag "\nVersions for all modules listed in $source (including optional ones):\n\n", @full_reports; +} + +if ( $cpan_meta_error || @dep_errors ) { + diag "\n*** WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING ***\n"; +} + +if ( $cpan_meta_error ) { + my ($orig_source) = grep { -f } 'MYMETA.json', 'MYMETA.yml'; + diag "\nCPAN::Meta->load_file('$orig_source') failed with: $cpan_meta_error\n"; +} + +if ( @dep_errors ) { + diag join("\n", + "\nThe following REQUIRED prerequisites were not satisfied:\n", + @dep_errors, + "\n" + ); +} + +pass('Reported prereqs'); + +# vim: ts=4 sts=4 sw=4 et: diff --git a/t/TestServer.pm b/t/TestServer.pm new file mode 100644 index 0000000..dba6dbe --- /dev/null +++ b/t/TestServer.pm @@ -0,0 +1,131 @@ +package TestServer; + +use warnings; +use strict; + +use Test::More; +use HTTP::Server::Simple::CGI; +use base qw( HTTP::Server::Simple::CGI ); + +my $dispatch_table = {}; + +=head1 OVERLOADED METHODS + +=cut + +our $pid; + +sub new { + die 'An instance of TestServer has already been started.' if $pid; + + my $class = shift; + my $port = shift; + + if ( !$port ) { + $port = int(rand(20000)) + 20000; + } + my $self = $class->SUPER::new( $port ); + + my $root = $self->root; + + return $self; +} + +sub run { + my $self = shift; + + $pid = $self->SUPER::run(@_); + + $SIG{__DIE__} = \&stop; + + return $pid; +} + +sub handle_request { + my $self = shift; + my $cgi = shift; + + my $path = $cgi->path_info(); + my $handler = $dispatch_table->{$path}; + + if (ref($handler) eq "CODE") { + print "HTTP/1.0 200 OK\r\n"; + $handler->($cgi); + } + else { + my $file = $path; + if ( $file =~ m{/$} ) { + $file .= 'index.html'; + } + $file =~ s/\s+//g; + + my $filename = "t/html/$file"; + if ( -r $filename ) { + if (my $response=do { local (@ARGV, $/) = $filename; <> }) { + print "HTTP/1.0 200 OK\r\n"; + print "Content-Type: text/html\r\nContent-Length: ", length($response), "\r\n\r\n", $response; + return; + } + } + else { + print "HTTP/1.0 404 Not found\r\n"; + print + $cgi->header, + $cgi->start_html('Not found'), + $cgi->h1('Not found'), + $cgi->end_html; + } + } +} + +=head1 METHODS UNIQUE TO TestServer + +=cut + +sub set_dispatch { + my $self = shift; + $dispatch_table = shift; + + return; +} + +sub background { + my $self = shift; + + $pid = $self->SUPER::background() + or Carp::confess( q{Can't start the test server} ); + + sleep 1; # background() may come back prematurely, so give it a second to fire up + + my $root = $self->root; + + diag( "Test server $root as PID $pid" ); + + return $pid; +} + + +sub hostname { + my $self = shift; + + return '127.0.0.1'; +} + +sub root { + my $self = shift; + my $port = $self->port; + my $hostname = $self->hostname; + + return "http://$hostname:$port"; +} + +sub stop { + if ( $pid ) { + kill( 9, $pid ) unless $^S; + undef $pid; + } + + return; +} + +1; diff --git a/t/Tools.pm b/t/Tools.pm new file mode 100644 index 0000000..46043db --- /dev/null +++ b/t/Tools.pm @@ -0,0 +1,20 @@ +package Tools; + +use base 'Exporter'; + +our @EXPORT_OK = qw( $canTMC memory_cycle_ok ); +our @EXPORT = @EXPORT_OK; + +our $canTMC; + +sub import { + delete @ENV{ qw( http_proxy HTTP_PROXY PATH IFS CDPATH ENV BASH_ENV) }; + + eval 'use Test::Memory::Cycle'; + $canTMC = !$@; + + Tools->export_to_level(1, @_); +} + + +1; diff --git a/t/add_header.t b/t/add_header.t new file mode 100644 index 0000000..96f98fd --- /dev/null +++ b/t/add_header.t @@ -0,0 +1,23 @@ +#!perl -T + +use warnings; +use strict; +use Test::More tests => 4; +use HTTP::Request::Common qw( GET ); + +BEGIN { + delete @ENV{qw( PATH IFS CDPATH ENV BASH_ENV )}; # Placates taint-unsafe Cwd.pm in 5.6.1 + use_ok( 'WWW::Mechanize' ); +} + +my $agent = WWW::Mechanize->new; +isa_ok( $agent, 'WWW::Mechanize', 'Created agent' ); + +$agent->add_header( Referer => 'x' ); +my $req = GET( 'http://www.google.com/' ); +$req = $agent->_modify_request( $req ); +like( $req->as_string, qr/Referer/, q{Referer's in there} ); + +$agent->add_header( Referer => undef ); +$req = $agent->_modify_request( $req ); +unlike( $req->as_string, qr/Referer/, q{Referer's not there} ); diff --git a/t/aliases.t b/t/aliases.t new file mode 100644 index 0000000..34b46f7 --- /dev/null +++ b/t/aliases.t @@ -0,0 +1,16 @@ +#!perl -T + +use warnings; +use strict; +use Test::More tests => 8; + +BEGIN { + use_ok( 'WWW::Mechanize' ); +} + +my @aliases = WWW::Mechanize::known_agent_aliases(); +is( scalar @aliases, 6, 'All aliases accounted for' ); + +for my $alias ( @aliases ) { + like( $alias, qr/^(Mac|Windows|Linux) /, 'We only know Mac, Windows or Linux' ); +} diff --git a/t/area_link.html b/t/area_link.html new file mode 100644 index 0000000..0f80068 --- /dev/null +++ b/t/area_link.html @@ -0,0 +1,20 @@ +<html> + <head> + <TITLE>Testing AREA tag handling</TITLE> + </head> + <body> + <MAP NAME="SOME_MAP"> + <AREA HREF="http://www.msnbc.com/area" COORDS="1,2,3,4"></AREA> + <AREA HREF="http://www.cnn.com/area" COORDS="5,6,7,8"> + <AREA HREF="http://www.cpan.org/area" COORDS="10,11,12,13" /> + </MAP> + <MAP NAME="OTHER_MAP"> + <AREA NOHREF COORDS="1,2,3,4"> + <AREA HREF="http://www.slashdot.org"> + <AREA HREF="http://mark.stosberg.com" ALT="Mark Stosberg's homepage"> + </MAP> + <IMG SRC="SOME_IMAGE" USEMAP="#SOME_MAP"> + <IMG SRC="SOME_IMAGE" USEMAP="#OTHER_MAP"> + </body> +</html> + diff --git a/t/area_link.t b/t/area_link.t new file mode 100644 index 0000000..5e7ed06 --- /dev/null +++ b/t/area_link.t @@ -0,0 +1,77 @@ +#!perl -T +# WWW::Mechanize tests for <area> tags + +use warnings; +use strict; +use Test::More tests => 9; + +use lib 't'; + +BEGIN { + use Tools; +} + +BEGIN { + use_ok( 'WWW::Mechanize' ); +} + +use URI::file; + +my $mech = WWW::Mechanize->new( cookie_jar => undef ); +isa_ok( $mech, 'WWW::Mechanize' ); + +my $uri = URI::file->new_abs( 't/area_link.html' ); +$mech->get( $uri ); +ok( $mech->success, "Fetched $uri" ) or die q{Can't get test page}; + + +AREA_CHECKS: { + my @wanted_links = ( + [ 'http://www.msnbc.com/area', undef, undef, 'area', { + coords => '1,2,3,4', + href => 'http://www.msnbc.com/area' + } ], + [ 'http://www.cnn.com/area', undef, undef, 'area', { + coords => '5,6,7,8', + href => 'http://www.cnn.com/area' + } ], + [ 'http://www.cpan.org/area', undef, undef, 'area', { + '/' => '/', + coords => '10,11,12,13', + href => 'http://www.cpan.org/area' + } ], + [ 'http://www.slashdot.org', undef, undef, 'area', { + href => 'http://www.slashdot.org' + } ], + [ 'http://mark.stosberg.com', undef, undef, 'area', { + alt => q{Mark Stosberg's homepage}, + href => 'http://mark.stosberg.com' + } ], + ); + my @links = $mech->find_all_links(); + + # Skip the 'base' field for now + for (@links) { + my $attrs = $_->[5]; + @{$_} = @{$_}[0..3]; + push @{$_}, $attrs; + } + + is_deeply( \@links, \@wanted_links, 'Correct links came back' ); + + my $linkref = $mech->find_all_links(); + is_deeply( $linkref, \@wanted_links, 'Correct links came back' ); + + SKIP: { + skip 'Test::Memory::Cycle not installed', 2 unless $canTMC; + memory_cycle_ok( \@links, 'Link list: no cycles' ); + memory_cycle_ok( $linkref, 'Single link: no cycles' ); + } +} + +SKIP: { + skip 'Test::Memory::Cycle not installed', 2 unless $canTMC; + + memory_cycle_ok( $uri, 'URI: no cycles' ); + memory_cycle_ok( $mech, 'Mech: no cycles' ); +} diff --git a/t/autocheck.t b/t/autocheck.t new file mode 100644 index 0000000..de1b0ed --- /dev/null +++ b/t/autocheck.t @@ -0,0 +1,26 @@ +#!perl -T + +use warnings; +use strict; + +use Test::Fatal qw( exception ); +use Test::More; +use WWW::Mechanize (); + +my $bad_url = "file:///foo.foo.xx.random"; + +AUTOCHECK_OFF: { + my $mech = WWW::Mechanize->new( autocheck => 0 ); + $mech->get( $bad_url ); + ok( !$mech->success, qq{Didn't fetch $bad_url, but didn't die, either} ); +} + +AUTOCHECK_ON: { + like( + exception { WWW::Mechanize->new->get($bad_url) }, + qr/Error GETing/, + qq{Couldn't fetch $bad_url, and died as a result} + ); +} + +done_testing(); diff --git a/t/bad-request.t b/t/bad-request.t new file mode 100644 index 0000000..8215ded --- /dev/null +++ b/t/bad-request.t @@ -0,0 +1,35 @@ +#!perl + +use warnings; +use strict; +use Test::More tests => 2; + +=head1 NAME + +bad-request.t + +=head1 SYNOPSIS + +Tests the detection of bad API usage. + + ->request() + +Checks for behaviour of calls to C<< ->request() >> without the required +parameter. + +=cut + +use WWW::Mechanize; + +my $mech = WWW::Mechanize->new(); + +my $lives= eval { +#line 1 + $mech->request(); + 1 +}; +my $err= $@; +ok !$lives, "->request wants at least one parameter"; +like $err, qr/->request was called without a request parameter/, + "We carp with a descriptive error message"; + diff --git a/t/clone.t b/t/clone.t new file mode 100644 index 0000000..2e8c83e --- /dev/null +++ b/t/clone.t @@ -0,0 +1,42 @@ +#!perl -T + +use warnings; +use strict; +use Test::More tests => 6; + +BEGIN { + use_ok( 'WWW::Mechanize' ); +} + +my $mech = WWW::Mechanize->new(); +isa_ok( $mech, 'WWW::Mechanize' ); +my $clone; + +INITIAL_CLONE: { + $mech->cookie_jar->set_cookie( 1, 2, 3, '/4', '5', 6, '7', 8, 9, 10 ); + my $old_cookies = $mech->cookie_jar->as_string; + + $clone = $mech->clone(); + isa_ok( $clone, 'WWW::Mechanize' ); + my $new_cookies = $clone->cookie_jar->as_string; + + is( $old_cookies, $new_cookies, 'Cookie jar contents are the same' ); +} + +COOKIE_SHARING: { + # Now see if we're still working on the same jar + $clone->cookie_jar->set_cookie( 10, 20, 30, '/40', '50', 60, '70', 80, 90, 10 ); + my $old_cookies = $mech->cookie_jar->as_string; + my $new_cookies = $clone->cookie_jar->as_string; + + is( $old_cookies, $new_cookies, 'Adding cookies adds to both jars' ); +} + +HEADERS_NOT_SHARING: { + # headers should be independent + $clone->add_header(foo=>'bar'); + ok( + not($mech->{headers}{foo}), + 'Adding headers does not add to both agents', + ); +} diff --git a/t/content.t b/t/content.t new file mode 100644 index 0000000..54fcaba --- /dev/null +++ b/t/content.t @@ -0,0 +1,98 @@ +use warnings; +use strict; +use Test::More tests => 9; + +=head1 NAME + +content.t + +=head1 SYNOPSIS + +Tests the transforming forms of $mech->content(). + +=cut + +BEGIN { delete @ENV{ qw( http_proxy HTTP_PROXY ) }; } +BEGIN { + use_ok( 'WWW::Mechanize' ); +} + +my $html = <<'HTML'; +<html> +<head> +<title>Howdy?</title> +</head> +<body> +Fine, thx! +</body> +</html> +HTML + + +my $mech = WWW::Mechanize->new(); +# Well actually there is no base (and therefore it does not belong to us +# :-), so let's kludge a bit. +$mech->{base} = 'http://example.com/'; + +is($mech->content, undef, 'content starts out as undef'); + +$mech->update_html($html); + +=head2 $mech->content(format => "text") + +=cut + +SKIP: { + eval 'use HTML::TreeBuilder 5'; + skip 'HTML::TreeBuilder version 5 not installed', 2 if $@; + + my $text = $mech->content(format => 'text'); + like( $text, qr/Fine/, 'Found Fine' ); + unlike( $text, qr/html/i, 'Could not find "html"' ); +} + +=head2 $mech->content(base_href => undef) + +=head2 $mech->content(base_href => $basehref) + +=cut + +my $content = $mech->content(base_href => 'foo'); +like($content, qr/base href="foo"/, 'Found the base href'); + + +$content = $mech->content(base_href => undef); +like($content, qr[base href="http://example.com/"], 'Found the new base href'); + +$mech->{res} = Test::MockResponse->new( + raw_content => 'this is the raw content', + charset_none => 'this is a slightly decoded content', + charset_whatever => 'this is charset whatever', +); + +$content = $mech->content(raw => 1); +is($content, 'this is the raw content', 'raw => 1'); + +$content = $mech->content(decoded_by_headers => 1); +is($content, 'this is a slightly decoded content', 'decoded_by_headers => 1'); + +$content = $mech->content(charset => 'whatever'); +is($content, 'this is charset whatever', 'charset => ...'); + +package Test::MockResponse; + +sub new { + my $package = shift; + return bless { @_ }, $package; +} + +sub content { + my ($self) = @_; + return $self->{raw_content}; +} + +sub decoded_content { + my ($self, %opts) = @_; + return $self->{decoded_content} unless exists $opts{charset}; + return $self->{"charset_$opts{charset}"}; +} diff --git a/t/cookies.t b/t/cookies.t new file mode 100644 index 0000000..f196f30 --- /dev/null +++ b/t/cookies.t @@ -0,0 +1,129 @@ +# XXX add cookie reading on the server side to the test + +BEGIN { delete @ENV{ qw( http_proxy HTTP_PROXY ) }; } + +use warnings; +use strict; +use Test::More; + +if ( $^O =~ /Win32/ ) { + plan skip_all => 'HTTP::Server::Simple does not support Windows yet.'; +} +else { + plan tests => 14; +} + +use WWW::Mechanize; +use URI::Escape qw( uri_unescape ); + +use lib 't/'; +use TestServer; + +my $ncookies = 0; + +sub send_cookies { + my $cgi = shift; + return if !ref $cgi; + + ++$ncookies; + + print + $cgi->header( + -cookie => $cgi->cookie( + -name => 'my_cookie', + -value => "Cookie #$ncookies", + -domain => '127.0.0.1', + -path => '/', + -expires => '+1h', + -secure => 0, + ) + ), + $cgi->start_html( -title => "Home of Cookie #$ncookies" ), + $cgi->h1( "Here is Cookie #$ncookies" ), + $cgi->end_html; +} + +sub nosend_cookies { + my $cgi = shift; + return if !ref $cgi; + + print + $cgi->header(), + $cgi->start_html( -title => 'No cookies sent' ), + $cgi->h1( 'No cookies sent' ), + $cgi->end_html; +} + +my $server = TestServer->new(); +$server->set_dispatch( { + '/feedme' => \&send_cookies, + '/nocookie' => \&nosend_cookies, +} ); +my $pid = $server->background(); + +my $root = $server->root; + +my $cookiepage_url = "$root/feedme"; +my $nocookiepage_url = "$root/nocookie"; + +my $mech = WWW::Mechanize->new( autocheck => 0 ); +isa_ok( $mech, 'WWW::Mechanize' ); + +FIRST_COOKIE: { + $mech->get( $cookiepage_url ); + is( $mech->status, 200, 'First fetch works' ); + + my $cookieval = cookieval( $mech ); + + is( $cookieval, 'Cookie #1', 'First cookie matches' ); + is( $mech->title, 'Home of Cookie #1', 'Right title' ); +} + +SECOND_COOKIE: { + $mech->get( $cookiepage_url ); + is( $mech->status, 200, 'Second fetch works' ); + + my $cookieval = cookieval( $mech ); + + is( $cookieval, 'Cookie #2', 'Second cookie matches' ); + is( $mech->title, 'Home of Cookie #2', 'Right title' ); +} + +BACK_TO_FIRST_PAGE: { + $mech->back(); + + my $cookieval = cookieval( $mech ); + + is( $cookieval, 'Cookie #2', 'Cookie did not change...' ); + is( $mech->title, 'Home of Cookie #1', '... but back to the first page title' ); +} + +FORWARD_TO_NONCOOKIE_PAGE: { + $mech->get( $nocookiepage_url ); + + my $cookieval = cookieval( $mech ); + + is( $cookieval, 'Cookie #2', 'Cookie did not change...' ); + is( $mech->title, 'No cookies sent', 'On the proper 3rd page' ); +} + +GET_A_THIRD_COOKIE: { + $mech->get( $cookiepage_url ); + + my $cookieval = cookieval( $mech ); + + is( $cookieval, 'Cookie #3', 'Got the third cookie' ); + is( $mech->title, 'Home of Cookie #3', 'Title is correct' ); +} + + +my $signal = ($^O eq 'MSWin32') ? 9 : 15; +my $nprocesses = kill $signal, $pid; +is( $nprocesses, 1, 'Signaled the child process' ); + + +sub cookieval { + my $mech = shift; + + return uri_unescape( $mech->cookie_jar->{COOKIES}{'127.0.0.1'}{'/'}{'my_cookie'}[1] ); +} diff --git a/t/credentials-api.t b/t/credentials-api.t new file mode 100644 index 0000000..064819d --- /dev/null +++ b/t/credentials-api.t @@ -0,0 +1,34 @@ +use strict; +use warnings; + +use Test::More tests => 4; +use LWP::UserAgent; +use WWW::Mechanize; +use URI; + +=pod + +The monkeypatch introduced since at least WWW::Mechanize 1.34 only +ever allows one instance of every LWP::UserAgent descendant to have +credentials. This test checks that this buggy behaviour is gone. + +=cut + +my $uri = URI->new( 'http://localhost' ); +my $realm = 'myrealm'; + +my $ua = LWP::UserAgent->new(); +$ua->credentials($uri, $realm, 'user', 'pass'); + +my $mech1 = WWW::Mechanize->new(); +my $mech2 = WWW::Mechanize->new(); +my $mech3 = WWW::Mechanize->new(); + +$mech1->credentials('mech1','mech1'); +$mech2->credentials('mech2','mech2'); + +is_deeply( [$ua->credentials($uri, $realm)], ['user', 'pass'], 'LWP::UserAgent instance retains its old credentials' ); + +is_deeply( [$mech1->get_basic_credentials( $realm, $uri )], ['mech1', 'mech1'], 'First instance retains its credentials' ); +is_deeply( [$mech2->get_basic_credentials( $realm, $uri )], ['mech2', 'mech2'], 'Second instance retains its credentials' ); +is_deeply( [$mech3->get_basic_credentials( $realm, $uri )], [], 'Untouched instance retains its credentials' ); diff --git a/t/credentials.t b/t/credentials.t new file mode 100644 index 0000000..a15ff8e --- /dev/null +++ b/t/credentials.t @@ -0,0 +1,54 @@ +#!perl -T + +use warnings; +use strict; + +use WWW::Mechanize; +use Test::More tests => 14; + +my $mech = WWW::Mechanize->new; +isa_ok( $mech, 'WWW::Mechanize' ); + +my ($user, $pass); + +my $uri = URI->new( 'http://localhost' ); + +($user, $pass) = $mech->get_basic_credentials('myrealm', $uri, 0); +is $user, undef, 'default username is undefined at first'; +is $pass, undef, 'default password is undefined at first'; + + +$mech->credentials("username", "password"); + +($user, $pass) = $mech->get_basic_credentials('myrealm', $uri, 0); +is $user, 'username', + 'calling credentials sets username for get_basic_credentials'; +is $pass, 'password', + 'calling credentials sets password for get_basic_credentials'; + +my $mech2 = $mech->clone; + +($user, $pass) = $mech2->get_basic_credentials('myrealm', $uri, 0); +is $user, 'username', + 'cloned object has username for get_basic_credentials'; +is $pass, 'password', + 'cloned object has password for get_basic_credentials'; + +my $mech3 = WWW::Mechanize->new; +isa_ok( $mech3, 'WWW::Mechanize' ); + +($user, $pass) = $mech3->get_basic_credentials('myrealm', $uri, 0); +is $user, undef, 'new object has no username for get_basic_credentials'; +is $pass, undef, 'new object has no password for get_basic_credentials'; + +$mech->clear_credentials; + +($user, $pass) = $mech->get_basic_credentials('myrealm', $uri, 0); +is $user, undef, 'username is undefined after clear_credentials'; +is $pass, undef, 'password is undefined after clear_credentials'; + +($user, $pass) = $mech2->get_basic_credentials('myrealm', $uri, 0); +is $user, 'username', + 'cloned object still has username for get_basic_credentials'; +is $pass, 'password', + 'cloned object still has password for get_basic_credentials'; @@ -0,0 +1,20 @@ +#!perl -T + +use warnings; +use strict; + +use Test::Exception; +use Test::More; +use WWW::Mechanize (); + +dies_ok { + WWW::Mechanize->new->die('OH NO! ERROR!'); +} +'Expecting to die'; + +lives_ok { + WWW::Mechanize->new( onerror => undef )->die('OH NO! ERROR!'); +} +'Not expecting to die'; + +done_testing(); diff --git a/t/dump.t b/t/dump.t new file mode 100644 index 0000000..79e94cd --- /dev/null +++ b/t/dump.t @@ -0,0 +1,187 @@ +#!perl + +use warnings; +use strict; +use Test::More 0.96 tests => 7; +use Test::Output; +use URI::file; +use File::Temp qw/tempdir/; +use File::Spec; + +BEGIN { + use_ok( 'WWW::Mechanize' ); +} + +my $dir = tempdir( CLEANUP => 1 ); + +subtest "dump_headers", sub { + plan tests => 5; + my $mech = create_mech('t/find_inputs.html'); + my $tmp_name = File::Spec->catfile($dir, 'headers.tmp'); + + $mech->dump_headers($tmp_name); + ok( -e $tmp_name, 'Dump file created'); + + fh_test($mech, 'dump_headers', qr/Content-Length/); +}; + + +subtest "dump_links test", sub { + dump_tests('dump_links', 't/find_link.html', <<'EXPECTED'); +http://www.drphil.com/ +HTTP://WWW.UPCASE.COM/ +styles.css +foo.png +http://blargle.com/ +http://a.cpan.org/ +http://b.cpan.org/ +foo.html +bar.html +http://c.cpan.org/ +http://d.cpan.org/ +http://www.msnbc.com/ +http://www.oreilly.com/ +http://www.cnn.com/ +http://www.bbc.co.uk/ +http://www.msnbc.com/ +http://www.cnn.com/ +http://www.bbc.co.uk/ +/cgi-bin/MT/mt.cgi +http://www.msnbc.com/area +http://www.cnn.com/area +http://www.cpan.org/area +http://nowhere.org/ +http://nowhere.org/padded +blongo.html +http://www.yahoo.com/ +EXPECTED +}; + +subtest "dump_images test", sub { + dump_tests('dump_images', 't/image-parse.html', <<'EXPECTED'); +/Images/bg-gradient.png +wango.jpg +bongo.gif +linked.gif +hacktober.jpg +hacktober.jpg +hacktober.jpg +http://example.org/abs.tif + +images/logo.png +inner.jpg +outer.jpg +EXPECTED +}; + +subtest "dump_forms test", sub { + dump_tests('dump_forms', 't/form_with_fields.html', <<'EXPECTED'); +POST http://localhost/ (multipart/form-data) [1st_form] + 1a= (text) + 1b= (text) + submit=Submit (submit) + +POST http://localhost/ [2nd_form] + opt[2]= (text) + 1b= (text) + submit=Submit (submit) + +POST http://localhost/ (multipart/form-data) [3rd_form_ambiguous] + 3a= (text) + 3b= (text) + submit=Submit (submit) + +POST http://localhost/ (multipart/form-data) [3rd_form_ambiguous] + 3c= (text) + 3d= (text) + x= (text) + submit=Submit (submit) + +POST http://localhost/ (multipart/form-data) [4th_form_1] + 4a= (text) + 4b= (text) + x= (text) + submit=Submit (submit) + +POST http://localhost/ (multipart/form-data) [4th_form_2] + 4a= (text) + 4b= (text) + x= (text) + submit=Submit (submit) + +POST https://localhost + 5a= (hidden readonly) + 5b=value (hidden readonly) + 5c= (hidden readonly) + 5d=foo (hidden readonly) + 5e=value (hidden readonly) + +EXPECTED +}; + +subtest "dump_forms multiselect", sub { + dump_tests('dump_forms', 't/form_133_regression.html', <<'EXPECTED'); +GET http://localhost/ + select1=1 (option) [*1|2|3|4] + select2=1 (option) [*1|2|3|4] + select3=1 (option) [*1|2|3|4] + select4=1 (option) [*1|2|3|4] + multiselect1=<UNDEF> (option) [*<UNDEF>/off|1] + multiselect1=<UNDEF> (option) [*<UNDEF>/off|2] + multiselect1=<UNDEF> (option) [*<UNDEF>/off|3] + multiselect1=<UNDEF> (option) [*<UNDEF>/off|4] + multiselect2=<UNDEF> (option) [*<UNDEF>/off|1] + multiselect2=<UNDEF> (option) [*<UNDEF>/off|2] + multiselect2=<UNDEF> (option) [*<UNDEF>/off|3] + multiselect2=<UNDEF> (option) [*<UNDEF>/off|4] + +EXPECTED +}; + +subtest "dump_text test", sub { + dump_tests('dump_text', 't/image-parse.html', <<'EXPECTED'); +Testing image extractionblargle And now, the dreaded wango CNN BBC Blongo!Logo +EXPECTED +}; + +sub dump_tests { + my ($method, $fp, $expected) = @_; + my $mech = create_mech($fp); + + fh_test($mech, $method, $expected); +}; + +sub create_mech { + my $filepath = shift; + my $mech = WWW::Mechanize->new( cookie_jar => undef ); + isa_ok( $mech, 'WWW::Mechanize' ); + my $uri = URI::file->new($filepath)->abs(URI::file->cwd)->as_string; + + $mech->get( $uri ); + ok( $mech->success, "Fetched $uri" ) or die q{Can't get test page}; + + return $mech; +} + + +sub fh_test { + my ($mech, $method, $expected) = @_; + unless($method && $expected) { + diag("No method/expected value found"); + return; + } + my ($content); + open my $fh, '>', \$content or die ($!); + + $mech->$method( $fh ); + + close $fh; + + if (ref $expected eq 'Regexp') { + like( $content, $expected, 'Dump has valid values'); + stdout_like( sub {$mech->$method()}, $expected, 'Valid STDOUT'); + } else { + is( $content, $expected, 'Dump has valid values'); + stdout_is ( sub {$mech->$method()}, $expected, 'Valid STDOUT'); + } +} diff --git a/t/field.html b/t/field.html new file mode 100644 index 0000000..3a53857 --- /dev/null +++ b/t/field.html @@ -0,0 +1,18 @@ +<HTML> +<HEAD> + Like a hole +</HEAD> +<BODY BGCOLOR="puce"> +<FORM ACTION="/shake-some/"> +<INPUT TYPE="text" NAME="dingo" VALUE="dingo1"> +<INPUT TYPE="text" NAME="bongo" VALUE="bongo!"> +<INPUT TYPE="radio" NAME="wango" VALUE="wango!"> +<INPUT TYPE="radio" NAME="wango" VALUE="wongo!"> +<INPUT TYPE="text" NAME="dingo" VALUE="dingo2"> +<input type="hidden" name="__no_value"> +<input type="hidden" name="__value" value> +<input type="hidden" name="__value_empty" value=""> +<input type="hidden" name="__value_full" value="full"> +</FORM> +</BODY> +</HTML> diff --git a/t/field.t b/t/field.t new file mode 100644 index 0000000..7dcfe4b --- /dev/null +++ b/t/field.t @@ -0,0 +1,45 @@ +#!perl -T + +use warnings; +use strict; +use Test::More tests => 14; +use URI::file; + +BEGIN { + delete @ENV{qw(PATH IFS CDPATH ENV BASH_ENV)}; # Placates taint-unsafe Cwd.pm in 5.6.1 + use_ok( 'WWW::Mechanize' ); +} + +my $mech = WWW::Mechanize->new( cookie_jar => undef ); +isa_ok( $mech, 'WWW::Mechanize' ); + +my $uri = URI::file->new_abs( 't/field.html' )->as_string; + +my $response = $mech->get( $uri ); +ok( $response->is_success, "Fetched $uri" ); + +$mech->field( 'dingo', 'Modified!' ); +is( $mech->value( 'dingo' ), 'Modified!', 'dingo got changed' ); + +$mech->set_visible('bingo', 'bango'); +is( $mech->value( 'dingo' ), 'bingo', 'dingo changed' ); +is( $mech->value( 'bongo' ), 'bango', 'bongo changed' ); + +$mech->set_visible( [ radio => 'wongo!' ], 'boingo' ); +is( $mech->value( 'wango' ), 'wongo!', 'wango changed' ); +is( $mech->value( 'dingo', 2 ), 'boingo', 'dingo changed' ); + +for my $name (qw/__no_value __value_empty/) { + ok( ! $mech->value( $name ), "$name is empty" ) or diag $mech->field($name); + $mech->field( $name, 'foo'); + is( $mech->value( $name ), 'foo', "$name changed" ); +} + +for my $name (qw/__value/) { + TODO: { + local $TODO = 'HTML::TokeParser does not understand how to parse this and returns a value where it should not have one'; + ok( ! $mech->value( $name ), "$name is empty" ) or diag $mech->field($name); + } + $mech->field( $name, 'foo'); + is( $mech->value( $name ), 'foo', "$name changed" ); +}
\ No newline at end of file diff --git a/t/find_frame.html b/t/find_frame.html new file mode 100644 index 0000000..883d572 --- /dev/null +++ b/t/find_frame.html @@ -0,0 +1,9 @@ +<html> + <head> + <title>find some frames</title> + </head> + <frameset rows="77,*" BORDER="0" frameborder="0"> + <frame id="top" name="top" src="bastro.html"> + <frame id="bottom" name="bottom" src="slint.html"> + </frameset> +</html> diff --git a/t/find_frame.t b/t/find_frame.t new file mode 100644 index 0000000..e4bdf78 --- /dev/null +++ b/t/find_frame.t @@ -0,0 +1,24 @@ +#!perl -T + +use warnings; +use strict; +use Test::More tests => 5; +use URI::file; + +BEGIN { + delete @ENV{qw(PATH IFS CDPATH ENV BASH_ENV)}; # Placates taint-unsafe Cwd.pm in 5.6.1 + use_ok( 'WWW::Mechanize' ); +} + +my $mech = WWW::Mechanize->new( cookie_jar => undef ); +isa_ok( $mech, 'WWW::Mechanize' ); + +my $uri = URI::file->new_abs( 't/find_frame.html' )->as_string; + +$mech->get( $uri ); +ok( $mech->success, "Fetched $uri" ) or die q{Can't get test page}; + +my $x; +$x = $mech->find_link(); +isa_ok( $x, 'WWW::Mechanize::Link' ); +is( $x->url, 'bastro.html', 'First link sequentially' ); diff --git a/t/find_image.t b/t/find_image.t new file mode 100644 index 0000000..dfb7042 --- /dev/null +++ b/t/find_image.t @@ -0,0 +1,397 @@ +#!perl -T + +use warnings; +use strict; + +use Test::More; +use Test::Fatal; +use Test::Warnings ':all'; +use Test::Deep; +use URI::file; + +BEGIN { + delete @ENV{qw(PATH IFS CDPATH ENV BASH_ENV)}; # Placates taint-unsafe Cwd.pm in 5.6.1 + use_ok( 'WWW::Mechanize' ); +} + +my $mech = WWW::Mechanize->new( cookie_jar => undef ); +isa_ok( $mech, 'WWW::Mechanize' ); + +my $uri = URI::file->new_abs( 't/image-parse.html' )->as_string; + +$mech->get( $uri ); +ok( $mech->success, "Fetched $uri" ) or die q{Can't get test page}; + +{ + my @images; + is( + exception { + @images = $mech->find_all_images + }, + undef, + 'find_all_images in the page' + ); + + cmp_deeply( + [map { $_->url } @images], + [ qw( + /Images/bg-gradient.png + wango.jpg + bongo.gif + linked.gif + hacktober.jpg + hacktober.jpg + hacktober.jpg + http://example.org/abs.tif + ), + undef, + qw( + images/logo.png + inner.jpg + outer.jpg + ), + ], + '... and all ten are in the right order' + ); + + cmp_deeply( + \@images, + [ $mech->images ], + 'images() and find_all_images() return the same thing in list context' + ); + + my $images = $mech->images; + my $all_images = $mech->find_all_images; + cmp_deeply( + $images, + $all_images, + 'images() and find_all_images() return the same thing in scalar context' + ); +} + +# The following data structure describes sets of tests for find_image +# and find_all_images. Each test-case is as follows: +# +# { +# name => 'Name of the test case', +# args => [ +# arg_name => 'value', +# another_arg_name => 'value, +# ], +# expected_single => [ 'WWW::Mechanize::Image method' => 'expected value' ], +# expected_all => [ +# # first image +# [ +# 'WWW::Mechanize::Image method' => 'expected value', +# 'another WWW::Mechanize::Image method' => 'expected value', +# ], +# # second image +# [ 'WWW::Mechanize::Image method' => 'expected value' ] +# ], +# }, +# +# We use Test::Deep to run these tests. The args are key/value pairs +# that will be passed to both find_image() and find_all_images(). This +# allows us to add more complex tests with a combination of different +# arguments easily. +# +# The expected_single and expected_all keys each contain +# a list of methods being called on the resulting WWW::Mechanize::Image +# objects, and the value expected to be returned. For expected_all, +# there is one dedicated list for every image found. +# +# It's possible to use Test::Deep's special functions like re() in the +# value side of the expected data. +# +# This data structure does not cover cases that return no match. See +# further below for those. +# +# To make things easier, these numbered $image variables provide +# shortcuts for all six images in the website. They can be used instead +# of each array reference. + +my $image0 = [ url => '/Images/bg-gradient.png', tag => 'css' ]; # this is the body background from the style tag +my $image1 = [ url => 'wango.jpg', alt => re('world of') ]; +my $image2 = [ url => 'bongo.gif', tag => 'input', height => 142 ]; +my $image3 = [ url => 'linked.gif', tag => 'img' ]; +my $image4 = [ url => 'hacktober.jpg', attrs => superhashof( { id => 'first-hacktober-image' } ) ]; +my $image5 = [ url => 'hacktober.jpg', attrs => superhashof( { class => re('my-class-2') } ) ]; +my $image6 = [ url => 'hacktober.jpg', attrs => superhashof( { class => re('my-class-3') } ) ]; +my $image7 = [ url => 'http://example.org/abs.tif', attrs => superhashof( { id => 'absolute' } ) ]; +my $image8 = [ url => undef, tag => 'img', attrs => superhashof( { 'data-image' => "hacktober.jpg", id => "no-src-regression-269" } ) ]; +my $image9 = [ url => 'images/logo.png', tag => 'css' ]; +my $image10 = [ url => 'inner.jpg', tag => 'img' ]; +my $image11 = [ url => 'outer.jpg', tag => 'css' ]; + +my $tests = [ + { + name => 'CSS', + args => [ + tag => 'css', + ], + expected_single => $image0, + expected_all => [ + $image0, + $image9, + $image11, + ], + }, + { + name => 'alt', + args => [ + alt => 'The world of the wango', + ], + expected_single => $image1, + expected_all => [ + $image1, + ], + }, + { + name => 'alt_regex', + args => [ + alt_regex => qr/world/, + ], + expected_single => $image1, + expected_all => [ + $image1, + ], + }, + { + name => 'url', + args => [ + url => 'hacktober.jpg', + ], + expected_single => $image4, + expected_all => [ + $image4, + $image5, + $image6, + ], + }, + { + name => 'url_regex', + args => [ + url_regex => qr/gif$/, + ], + expected_single => $image2, + expected_all => [ + $image2, + $image3, + ], + }, + { + name => 'url_abs', + args => [ + url_abs => 'http://example.org/abs.tif', + ], + expected_single => $image7, + expected_all => [ + $image7, + ], + }, { + name => 'url_abs_regex', + args => [ + url_abs_regex => qr/hacktober/, + ], + expected_single => $image4, + expected_all => [ + $image4, + $image5, + $image6, + ], + }, + { + name => 'tag (img)', + args => [ + tag => 'img', + ], + expected_single => $image1, + expected_all => [ + $image1, + $image3, + $image4, + $image5, + $image6, + $image7, + $image8, + $image10, + ], + }, + { + name => 'tag (input)', + args => [ + tag => 'input', + ], + expected_single => $image2, + expected_all => [ + $image2, + ], + }, + { + name => 'tag_regex', + args => [ + tag_regex => qr/img|input/, + ], + expected_single => $image1, + expected_all => [ + $image1, + $image2, + $image3, + $image4, + $image5, + $image6, + $image7, + $image8, + $image10, + ], + }, + { + name => 'id', + args => [ + id => 'first-hacktober-image', + ], + expected_single => $image4, + expected_all => [ + $image4, + ], + }, + { + name => 'id_regex', + args => [ + id_regex => qr/-/, + ], + expected_single => $image4, + expected_all => [ + $image4, + $image8, + ], + }, + { + name => 'class', + args => [ + class => 'my-class-1', + ], + expected_single => $image4, + expected_all => [ + $image4, + ], + }, + { + name => 'class_regex', + args => [ + class_regex => qr/foo/, + ], + expected_single => $image5, + expected_all => [ + $image5, + $image6, + ], + }, + { + name => 'class_regex and url', + args => [ + class_regex => qr/foo/, + url => 'hacktober.jpg' + ], + expected_single => $image5, + expected_all => [ + $image5, + $image6, + ], + }, + { + name => '2nd instance of an image', + args => [ + url => 'hacktober.jpg', + n => 2, + ], + expected_single => $image5, + }, + { + name => 'inline style background image', + args => [ + url_regex => qr/logo/, + ], + expected_single => $image9, + }, +]; + +foreach my $test ( @{ $tests } ) { + # verify we find the correct first image with a given set of criteria + cmp_deeply( + $mech->find_image( @{ $test->{args} } ), + all( + isa('WWW::Mechanize::Image'), + methods( @{ $test->{expected_single} } ), + ), + 'find_image: ' . $test->{name} + ); + + if (exists $test->{expected_all}) { + # verify we find all the correct images with a given set of criteria + cmp_deeply( + [ $mech->find_all_images( @{ $test->{args} } ) ], + [ + map { + all( + isa('WWW::Mechanize::Image'), + methods( @{ $_ } ), + ) + } + @{ $test->{expected_all} } + ], + 'find_all_images: ' . $test->{name} + ); + } +} + +foreach my $arg (qw/alt url url_abs tag id class/) { + cmp_deeply( + [ $mech->find_image( $arg => 'does not exist' ) ], + [], + "find_image with $arg that does not exist returns an empty list" + ); + + cmp_deeply( + [ $mech->find_image( $arg . '_regex' => qr/does not exist/ ) ], + [], + "find_image with ${arg}_regex that does not exist returns an empty list" + ); +} + +# all of these will find the "wrong" image +{ + my $image; + like( + warning { + $image = $mech->find_image( url => qr/tif$/ ) + }, + qr/is a regex/, + 'find_image warns when it sees an unexpected regex' + ); + unlike $image->url, qr/tif$/, '... and ignores this argument'; +} +{ + my $image; + like( + warning { + $image = $mech->find_image( url_regex => 'tif' ) + }, + qr/is not a regex/, + 'find_image warns when it expects a regex and sees a string' + ); + unlike $image->url, qr/tif$/, '... and ignores this argument'; +} +{ + my $image; + like( + warning { + $image = $mech->find_image( id => q{ absolute } ) + }, + qr/space-padded and cannot succeed/, + 'find_image warns about space-padding' + ); + is $image->attrs, undef, '... and ignores this argument'; +} + +done_testing; diff --git a/t/find_inputs.html b/t/find_inputs.html new file mode 100644 index 0000000..c4b1692 --- /dev/null +++ b/t/find_inputs.html @@ -0,0 +1,26 @@ +<html> +<body> + +<form action="http://localhost/" method="post" enctype="multipart/form-data" name="1st_form"> + <input type="text" name="1a" /> + <input type="IMAGE" name="submit1" value="Submit" label="Submit" /> + <button type="submit" name="submit2" value="Submit" label="Submit" /> +</form> + +<form action="http://localhost/" method="post" name="2nd_form"> + <input type="text" name="YourMom" /> + <input type="text" name="opt[2]" /> Like in PHP! + <input type="text" name="1b" /> + <input type="SubMit" name="submit" value="Submit" label="Submit" /> +</form> + +<form action="http://localhost/" method="post" name="3rd_form"> + <input type="text" name="YourMom" /> + <input type="text" name="YourDad" /> + <input type="text" name="YourSister" /> + <input type="text" name="YourSister" /> + <input type="SubMit" name="submit" value="Submit" label="Submit" /> +</form> + +</body> +</html> diff --git a/t/find_inputs.t b/t/find_inputs.t new file mode 100644 index 0000000..e148cff --- /dev/null +++ b/t/find_inputs.t @@ -0,0 +1,52 @@ +#!perl -T + +use warnings; +use strict; + +use Test::More tests => 11; +use URI::file; + +BEGIN { + delete @ENV{qw(PATH IFS CDPATH ENV BASH_ENV)}; # Placates taint-unsafe Cwd.pm in 5.6.1 + use_ok( 'WWW::Mechanize' ); +} + +my $mech = WWW::Mechanize->new( cookie_jar => undef ); +isa_ok( $mech, 'WWW::Mechanize' ); + +my $uri = URI::file->new_abs( 't/find_inputs.html' )->as_string; + +$mech->get( $uri ); +ok( $mech->success, "Fetched $uri" ) or die q{Can't get test page}; + +FIRST_FORM: { + my @inputs = $mech->find_all_inputs(); + is( scalar @inputs, 3, 'Exactly three inputs' ); + + my @submits = $mech->find_all_submits(); + is( scalar @submits, 2, 'Exactly two submits' ); +} + +SECOND_FORM: { + $mech->form_number(2); + my @inputs = $mech->find_all_inputs(); + is( scalar @inputs, 4, 'Exactly four inputs' ); + + my @submits = $mech->find_all_submits(); + is( scalar @submits, 1, 'Exactly one submit' ); +} + +THIRD_FORM: { + $mech->form_number(3); + my @inputs = $mech->find_all_inputs(); + is( scalar @inputs, 5, 'Exactly five inputs' ); + + my @relatives = $mech->find_all_inputs( name_regex => qr/^Your/ ); + is( scalar @relatives, 4, 'Found four relatives' ); + + my @sisters = $mech->find_all_inputs( name => 'YourSister' ); + is( scalar @sisters, 2, 'Found two sisters' ); + + my @submit_sisters = $mech->find_all_inputs( name => 'YourSister' ); + is( scalar @submit_sisters, 2, 'But no sisters are submits' ); +} diff --git a/t/find_link-warnings.t b/t/find_link-warnings.t new file mode 100644 index 0000000..a182795 --- /dev/null +++ b/t/find_link-warnings.t @@ -0,0 +1,56 @@ +use warnings; +use strict; + +use Test::More; +use Test::Warn qw( warning_like ); +use URI::file (); +use WWW::Mechanize (); + +BEGIN { delete @ENV{ qw( http_proxy HTTP_PROXY PATH IFS CDPATH ENV BASH_ENV) }; } + +my $mech = WWW::Mechanize->new( cookie_jar => undef ); +isa_ok( $mech, 'WWW::Mechanize' ); + +my $uri = URI::file->new_abs( 't/find_link.html' )->as_string; + +$mech->get( $uri ); +ok( $mech->success, "Fetched $uri" ) or die q{Can't get test page}; + +REGEX_USAGE: { + for my $tname (qw( TEXT NAME URL TAG )) { + warning_like( + sub { $mech->find_link( $tname => 'expect error' ) }, + qr/Unknown link-finding parameter/, "detected usage error: $tname => 'string'" + ); + } +} + +REGEX_STRING: { + for my $tn (qw( text name url tag )) { + my $tname = $tn.'_regex'; + warning_like( + sub { $mech->find_link( $tname => 'expect error' ) }, + qr/passed as $tname is not a regex/, "detected usage error: $tname => 'string'" + ); + } +} + +NON_REGEX_STRING: { + for my $tname (qw( text name url tag )) { + warning_like( + sub { $mech->find_link( $tname => qr/foo/ ) }, + qr/passed as '$tname' is a regex/, "detected usage error: $tname => Regex" + ); + } +} + +SPACE_PADDED: { + for my $tname (qw( text name url tag )) { + warning_like( + sub { $mech->find_link( $tname => ' a padded astring ' ) }, + qr/is space-padded and cannot succeed/, "detected usage error: $tname => padded-string" + ); + } +} + +done_testing(); diff --git a/t/find_link.html b/t/find_link.html new file mode 100644 index 0000000..a0d4a21 --- /dev/null +++ b/t/find_link.html @@ -0,0 +1,43 @@ +<html> + <head> + <meta http_equiv="Refresh" content="0; url=http://www.incorrect.com"> + <meta http-equiv="Rfresh" content="0; url=http://www.also-wrong.com"> + <meta http-equiv="Refresh" content="0; url='http://www.drphil.com/'"> + <META HTTP-EQUIV="REFRESH" CONTENT="0; URL=HTTP://WWW.UPCASE.COM/"> + <link rel="stylesheet" type="text/css" href="styles.css" /> + <link rel="icon" type="image/png" href="foo.png" /> + <TITLE>Testing the links</TITLE> + </head> + <body> + <A HREF="http://blargle.com/">blargle</A> + <A HREF="http://a.cpan.org/">CPAN A</A> + <A HREF="http://b.cpan.org/">CPAN B</A> + <FRAME SRC="foo.html"> + <FRAME SRC="bar.html"> + <A HREF="http://c.cpan.org/" NAME="bongo">CPAN C</A> + <A HREF="http://d.cpan.org/">CPAN D</A> + + <A HREF="http://www.msnbc.com/">MSNBC</A> + <FRAME SRC="http://www.oreilly.com/" NAME="wongo"> + <A HREF="http://www.cnn.com/">CNN</A> + <A HREF="http://www.bbc.co.uk/" NAME="Wilma">BBC</A> + <A HREF="http://www.msnbc.com/">News</A> + <A HREF="http://www.cnn.com/" NAME="Fred">News</A> + <A HREF="http://www.bbc.co.uk/">News</A> + <A onmouseover="window.status='Rebuild Files'; return true" href="#" onClick="window.open( '/cgi-bin/MT/mt.cgi', 'rebuild', 'width=400,height=200,resizable=yes')">Rebuild Index</A> + + <MAP NAME="SOME_MAP"> + <AREA HREF="http://www.msnbc.com/area" COORDS="1,2,3,4"></AREA> + <AREA HREF="http://www.cnn.com/area" COORDS="5,6,7,8" NAME="Marty"> + <AREA HREF="http://www.cpan.org/area" COORDS="10,11,12,13" /> + </MAP> + <IMG SRC="SOME_IMAGE" USEMAP="#SOME_MAP"> + + <!-- new stuff --> + <A HREF="http://nowhere.org/" Name="Here">NoWhere</A> + <A HREF="http://nowhere.org/padded" Name=" Here "> NoWhere </A> + <A HREF="blongo.html">Blongo!</A> + <A HREF="javascript: window.open( 'http://www.yahoo.com/', 'new', 'width=400,height=200,resizable=yes');" onClick="return confirm('Are your sure?');">Click Here</A> + </body> +</html> + diff --git a/t/find_link.t b/t/find_link.t new file mode 100644 index 0000000..d1bda95 --- /dev/null +++ b/t/find_link.t @@ -0,0 +1,171 @@ +#!perl -T + +use warnings; +use strict; + +use Test::More; +use URI::file; + +BEGIN { + delete @ENV{qw(PATH IFS CDPATH ENV BASH_ENV)}; # Placates taint-unsafe Cwd.pm in 5.6.1 + use_ok( 'WWW::Mechanize' ); +} + +my $mech = WWW::Mechanize->new( cookie_jar => undef ); +isa_ok( $mech, 'WWW::Mechanize' ); + +my $uri = URI::file->new_abs( 't/find_link.html' )->as_string; + +$mech->get( $uri ); +ok( $mech->success, "Fetched $uri" ) or die q{Can't get test page}; + +my $x; +$x = $mech->find_link(); +isa_ok( $x, 'WWW::Mechanize::Link' ); +is( $x->[0], 'http://www.drphil.com/', 'First link on the page' ); +is( $x->url, 'http://www.drphil.com/', 'First link on the page' ); + +$x = $mech->find_link( n => 3 ); +isa_ok( $x, 'WWW::Mechanize::Link' ); +is( $x->[0], 'styles.css', 'Third link should be the CSS' ); +is( $x->url, 'styles.css', 'Third link should be the CSS' ); + +$x = $mech->find_link( url_regex => qr/upcase/i ); +isa_ok( $x, 'WWW::Mechanize::Link' ); +like( $x->url, qr/\Qupcase.com/i, 'found link in uppercase meta tag' ); + +$x = $mech->find_link( text => 'CPAN A' ); +isa_ok( $x, 'WWW::Mechanize::Link' ); +is( $x->[0], 'http://a.cpan.org/', 'First CPAN link' ); +is( $x->url, 'http://a.cpan.org/', 'First CPAN link' ); + +$x = $mech->find_link( url => 'CPAN' ); +ok( !defined $x, 'No url matching CPAN' ); + +$x = $mech->find_link( text_regex => qr/CPAN/, n=>3 ); +isa_ok( $x, 'WWW::Mechanize::Link' ); +is( $x->[0], 'http://c.cpan.org/', '3rd CPAN text' ); +is( $x->url, 'http://c.cpan.org/', '3rd CPAN text' ); + +$x = $mech->find_link( text => 'CPAN', n=>34 ); +ok( !defined $x, 'No 34th CPAN text' ); + +$x = $mech->find_link( text_regex => qr/(?i:cpan)/ ); +isa_ok( $x, 'WWW::Mechanize::Link' ); +is( $x->[0], 'http://a.cpan.org/', 'Got 1st cpan via regex' ); +is( $x->url, 'http://a.cpan.org/', 'Got 1st cpan via regex' ); + +$x = $mech->find_link( text_regex => qr/cpan/i ); +isa_ok( $x, 'WWW::Mechanize::Link' ); +is( $x->[0], 'http://a.cpan.org/', 'Got 1st cpan via regex' ); +is( $x->url, 'http://a.cpan.org/', 'Got 1st cpan via regex' ); + +$x = $mech->find_link( text_regex => qr/cpan/i, n=>153 ); +ok( !defined $x, 'No 153rd cpan link' ); + +$x = $mech->find_link( url => 'http://b.cpan.org/' ); +isa_ok( $x, 'WWW::Mechanize::Link' ); +is( $x->[0], 'http://b.cpan.org/', 'Got b.cpan.org' ); +is( $x->url, 'http://b.cpan.org/', 'Got b.cpan.org' ); + +$x = $mech->find_link( url => 'http://b.cpan.org', n=>2 ); +ok( !defined $x, 'Not a second b.cpan.org' ); + +$x = $mech->find_link( url_regex => qr/[b-d]\.cpan\.org/, n=>2 ); +isa_ok( $x, 'WWW::Mechanize::Link' ); +is( $x->[0], 'http://c.cpan.org/', 'Got c.cpan.org' ); +is( $x->url, 'http://c.cpan.org/', 'Got c.cpan.org' ); + +my @wanted_links= ( + [ 'http://a.cpan.org/', 'CPAN A', undef, 'a' ], + [ 'http://b.cpan.org/', 'CPAN B', undef, 'a' ], + [ 'http://c.cpan.org/', 'CPAN C', 'bongo', 'a' ], + [ 'http://d.cpan.org/', 'CPAN D', undef, 'a' ], +); +my @links = $mech->find_all_links( text_regex => qr/CPAN/ ); +@{$_} = @{$_}[0..3] for @links; +is_deeply( \@links, \@wanted_links, 'Correct links came back' ); + +my $linkref = $mech->find_all_links( text_regex => qr/CPAN/ ); +is_deeply( $linkref, \@wanted_links, 'Correct links came back' ); + +# Check combinations of links +$x = $mech->find_link( text => 'News' ); +isa_ok( $x, 'WWW::Mechanize::Link' ); +is( $x->[0], 'http://www.msnbc.com/', 'First News is MSNBC' ); +is( $x->url, 'http://www.msnbc.com/', 'First News is MSNBC' ); + +$x = $mech->find_link( text => 'News', url_regex => qr/bbc/ ); +isa_ok( $x, 'WWW::Mechanize::Link' ); +is( $x->[0], 'http://www.bbc.co.uk/', 'First BBC news link' ); +is( $x->url, 'http://www.bbc.co.uk/', 'First BBC news link' ); +is( $x->[1], 'News', 'First BBC news text' ); +is( $x->text, 'News', 'First BBC news text' ); + +$x = $mech->find_link( text => 'News', url_regex => qr/cnn/ ); +isa_ok( $x, 'WWW::Mechanize::Link' ); +is( $x->[0], 'http://www.cnn.com/', 'First CNN news link' ); +is( $x->url, 'http://www.cnn.com/', 'First CNN news link' ); +is( $x->[1], 'News', 'First CNN news text' ); +is( $x->text, 'News', 'First CNN news text' ); + +AREA_CHECKS: { + my @wanted_links = ( + [ 'http://www.cnn.com/', 'CNN', undef, 'a' ], + [ 'http://www.cnn.com/', 'News', 'Fred', 'a' ], + # Can someone confirm that I just fixed a bug here, and + # area tags /should/ have names? -mls + [ 'http://www.cnn.com/area', undef, 'Marty', 'area' ], + ); + my @links = $mech->find_all_links( url_regex => qr/cnn\.com/ ); + @{$_} = @{$_}[0..3] for @links; + is_deeply( \@links, \@wanted_links, 'Correct links came back' ); +} + +$x = $mech->find_link( name => 'bongo' ); +isa_ok( $x, 'WWW::Mechanize::Link' ); +is_deeply( $x, [ 'http://c.cpan.org/', 'CPAN C', 'bongo', 'a' ], 'Got the CPAN C link' ); + +$x = $mech->find_link( name_regex => qr/^[A-Z]/, n => 2 ); +isa_ok( $x, 'WWW::Mechanize::Link' ); +is_deeply( $x, [ 'http://www.cnn.com/', 'News', 'Fred', 'a' ], 'Got 2nd link that begins with a capital' ); + +$x = $mech->find_link( tag => 'a', n => 3 ); +isa_ok( $x, 'WWW::Mechanize::Link' ); +is_deeply( $x, [ 'http://b.cpan.org/', 'CPAN B', undef, 'a' ], 'Got 3rd <A> tag' ); + +$x = $mech->find_link( tag_regex => qr/^(a|frame)$/, n => 7 ); +isa_ok( $x, 'WWW::Mechanize::Link' ); +is_deeply( $x, [ 'http://d.cpan.org/', 'CPAN D', undef, 'a' ], 'Got 7th <A> or <FRAME> tag' ); + +$x = $mech->find_link( text => 'Rebuild Index' ); +isa_ok( $x, 'WWW::Mechanize::Link' ); +is_deeply( [@{$x}[0..3]], [ '/cgi-bin/MT/mt.cgi', 'Rebuild Index', undef, 'a' ], 'Got the JavaScript link' ); + +$x = $mech->find_link( url => 'blongo.html' ); +isa_ok( $x, 'WWW::Mechanize::Link' ); + +$x = $mech->find_link( url_abs => 'blongo.html' ); +ok( !defined $x, 'No match' ); + +$x = $mech->find_link( url_abs_regex => qr[t/blongo\.html$] ); +isa_ok( $x, 'WWW::Mechanize::Link' ); + +$x = $mech->find_link( text_regex => qr/click/i); +isa_ok( $x, 'WWW::Mechanize::Link' ); +is( $x->[0], 'http://www.yahoo.com/', 'Got js url link' ); +is( $x->url, 'http://www.yahoo.com/', 'Got js url link' ); + +$x = $mech->find_link( rel => 'icon' ); +isa_ok( $x, 'WWW::Mechanize::Link' ); +is( $x->[0], 'foo.png', 'Got icon url link' ); + +$x = $mech->find_link( rel_regex => qr/sheet/i ); +isa_ok( $x, 'WWW::Mechanize::Link' ); +is( $x->[0], 'styles.css', 'Got stylesheet url link' ); + +$mech->get( URI::file->new_abs('t/refresh.html') ); +my $link = $mech->find_link( tag => 'meta' ); +is( $link->url, 'http://www.mysite.com/', 'got link from meta tag via tag search' ); + +done_testing(); diff --git a/t/find_link_id.html b/t/find_link_id.html new file mode 100644 index 0000000..77e5fc6 --- /dev/null +++ b/t/find_link_id.html @@ -0,0 +1,32 @@ +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" + "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> +<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en" id="facebook"> + +<head> +<title>Examples for find_link</title> +<meta http-equiv="Content-type" content="text/html; charset=utf-8" /> +</head> + +<body class="barframe"> +<div id="ad"> +<iframe src='http://ads.whatever.com/'></iframe> +</div> + +<div id="widebar" class="clearfix"> +<div id="app_content_23422222" class="app_content_23422222"><div> +<iframe src="http://boo.xyz.com/boo_app" smartsize="true" frameborder="0" class="smart_iframe"> +</iframe> + +<script type="text/javascript"> +smartSizingFrameAdded(); +</script> + +</div> +</div> + +<a href="signature2.html">Fake Signature</a> +<a href="signature.html" id='signature'>Signature</a> +<a href="signature3.html">Fake Signature</a> + +</body> +</html> diff --git a/t/find_link_id.t b/t/find_link_id.t new file mode 100644 index 0000000..e0b2c9c --- /dev/null +++ b/t/find_link_id.t @@ -0,0 +1,43 @@ +#!perl -T + +use warnings; +use strict; +use Test::More 'no_plan'; +use URI::file; + +BEGIN { + delete @ENV{qw(PATH IFS CDPATH ENV BASH_ENV)}; # Placates taint-unsafe Cwd.pm in 5.6.1 + use_ok( 'WWW::Mechanize' ); +} + +my $mech = WWW::Mechanize->new( cookie_jar => undef ); +isa_ok( $mech, 'WWW::Mechanize' ); + +my $uri = URI::file->new_abs( 't/find_link_id.html' )->as_string; + +$mech->get( $uri ); +ok( $mech->success, "Fetched $uri" ) or die q{Can't get test page}; + +FIND_BY_ID: { + my $x = $mech->find_link( id => 'signature' ); + isa_ok( $x, 'WWW::Mechanize::Link' ); + is( $x->url, 'signature.html', 'found link with given ID' ); +} + +FIND_BY_CLASS: { + my $x = $mech->find_link( tag => 'iframe', class => 'smart_iframe' ); + isa_ok( $x, 'WWW::Mechanize::Link' ); + is( $x->url, 'http://boo.xyz.com/boo_app', 'found link within "iframe" with given class' ); +} + +FIND_ID_BY_REGEX: { + my $x = $mech->find_link( id_regex => qr/^sig/ ); + isa_ok( $x, 'WWW::Mechanize::Link' ); + is( $x->url, 'signature.html', 'found link with ID matching a regex' ); +} + +FIND_CLASS_BY_REGEX: { + my $x = $mech->find_link( tag => 'iframe', class_regex => qr/IFRAME$/i ); + isa_ok( $x, 'WWW::Mechanize::Link' ); + is( $x->url, 'http://boo.xyz.com/boo_app', 'found link with class matching a regex' ); +} diff --git a/t/find_link_xhtml.html b/t/find_link_xhtml.html new file mode 100644 index 0000000..0661db4 --- /dev/null +++ b/t/find_link_xhtml.html @@ -0,0 +1,49 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" + "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"[ +<!ATTLIST html + xmlns:xsi CDATA #FIXED "http://www.w3.org/2001/XMLSchema-instance" + xsi:schemaLocation CDATA #IMPLIED > ]> +<html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en" + xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" + xsi:schemaLocation="http://www.w3.org/1999/xhtml + http://www.w3.org/2002/08/xhtml/xhtml1-strict.xsd"> +<head> + <meta http-equiv="Content-Type" content="text/html; charset=UTF-8" /> + <title>Hello, World!</title> +</head> +<body> +<p id="thelinks"> + +<a +href += +"http://www.example.com/1" +> +One +</a +> +<a id="Two" title="href="></a> +<!-- +<a href="http://www.example.com/3">Three</a> +--> +<a title=' href="http://www.example.com/4">Four' +href="http://www.example.com/5">Five</a> +<!--BEGIN--> +<script type="text/javascript">/*<![CDATA[ +</script> +*/ console.log(' <a href="http://www.example.com/6">Six</a> '); /* +<!-- +]]>*/</script> +<a href="http://www.example.com/7"><![CDATA[Se]]><span +>ve</span>n</a> +<script type="text/javascript">/*<![CDATA[ +--> +]]>*/</script> +<![CDATA[ +<a href="http://www.example.com/8">Eight</a> +]]> + +<!--END--></p> +</body> +</html>
\ No newline at end of file diff --git a/t/find_link_xhtml.t b/t/find_link_xhtml.t new file mode 100644 index 0000000..2fb62eb --- /dev/null +++ b/t/find_link_xhtml.t @@ -0,0 +1,69 @@ +#!perl -T + +use warnings; +use strict; + +use Test::More; +use URI::file; + +BEGIN { + delete @ENV{qw(PATH IFS CDPATH ENV BASH_ENV)}; # Placates taint-unsafe Cwd.pm in 5.6.1 + use_ok( 'WWW::Mechanize' ); +} + +my $mech = WWW::Mechanize->new( cookie_jar => undef ); +isa_ok( $mech, 'WWW::Mechanize' ); + +my $uri = URI::file->new_abs( 't/find_link_xhtml.html' )->as_string; + +$mech->get( $uri ); +ok( $mech->success, "Fetched $uri" ) or die q{Can't get test page}; + +my @links = map {[ $_->text, $_->url ]} $mech->links(); +my @expected = ( +['One','http://www.example.com/1'], +['Five','http://www.example.com/5'], +['Seven','http://www.example.com/7'], +); + +is_deeply \@links, \@expected, "We find exactly the valid links"; + +# now, test with explicit marked_sections => 1 + +$mech = WWW::Mechanize->new( cookie_jar => undef, marked_sections => 1 ); +isa_ok( $mech, 'WWW::Mechanize' ); + +$uri = URI::file->new_abs( 't/find_link_xhtml.html' )->as_string; + +$mech->get( $uri ); +ok( $mech->success, "Fetched $uri" ) or die q{Can't get test page}; + +@links = map {[ $_->text, $_->url ]} $mech->links(); +@expected = ( +['One','http://www.example.com/1'], +['Five','http://www.example.com/5'], +['Seven','http://www.example.com/7'], +); + +is_deeply \@links, \@expected, "We find exactly the valid links, explicitly"; + +# now, test with marked_sections => 0, giving us legacy results + +$mech = WWW::Mechanize->new( cookie_jar => undef, marked_sections => undef ); +isa_ok( $mech, 'WWW::Mechanize' ); + +$uri = URI::file->new_abs( 't/find_link_xhtml.html' )->as_string; + +$mech->get( $uri ); +ok( $mech->success, "Fetched $uri" ) or die q{Can't get test page}; + +@links = map {[ $_->text, $_->url ]} $mech->links(); +@expected = ( +['One','http://www.example.com/1'], +['Five','http://www.example.com/5'], +['Six','http://www.example.com/6'], # yeah... +); + +is_deeply \@links, \@expected, "We can enable the legacy behaviour"; + +done_testing(); diff --git a/t/form-parsing.t b/t/form-parsing.t new file mode 100644 index 0000000..8880913 --- /dev/null +++ b/t/form-parsing.t @@ -0,0 +1,25 @@ +#!perl -T + +use strict; +use warnings; +use Test::More tests=>1; +use HTML::Form; + +my $base = 'http://localhost/'; +my $content = do { local $/ = undef; <DATA> }; + +my $forms = [ HTML::Form->parse( $content, $base ) ]; +is( scalar @{$forms}, 1, 'Find one form, please' ); + +__DATA__ +<html> +<head> +<title>WWW::Mechanize::Shell test page</title> +</head> +<body> + <form name="f" action="/formsubmit"> + <input type="checkbox" name="cat" value="cat_baz" /> + </form> +</body> +</html> + diff --git a/t/form_133_regression.html b/t/form_133_regression.html new file mode 100644 index 0000000..9ec2b13 --- /dev/null +++ b/t/form_133_regression.html @@ -0,0 +1,45 @@ +<html> + <head> + <title>Test Page</title> + </head> + <body> + <form action="http://localhost/" method="GET"> + <select type="select" name="select1"> + <option value="1"></option> + <option value="2"></option> + <option value="3"></option> + <option value="4"></option> + </select> + <select type="select" name="select2"> + <option value="1"></option> + <option value="2"></option> + <option value="3"></option> + <option value="4"></option> + </select> + <select type="select" name="select3"> + <option value="1"></option> + <option value="2"></option> + <option value="3"></option> + <option value="4"></option> + </select> + <select type="select" name="select4"> + <option value="1"></option> + <option value="2"></option> + <option value="3"></option> + <option value="4"></option> + </select> + <select type="select" name="multiselect1" multiple="multiple"> + <option value="1"></option> + <option value="2"></option> + <option value="3"></option> + <option value="4"></option> + </select> + <select type="select" name="multiselect2" multiple="multiple"> + <option value="1"></option> + <option value="2"></option> + <option value="3"></option> + <option value="4"></option> + </select> + </form> + </body> +</html>
\ No newline at end of file diff --git a/t/form_with_fields.html b/t/form_with_fields.html new file mode 100644 index 0000000..cacae71 --- /dev/null +++ b/t/form_with_fields.html @@ -0,0 +1,51 @@ +<html> +<body> + +<form action="http://localhost/" method="post" enctype="multipart/form-data" name="1st_form"> + <input type="text" name="1a" /> + <input type="text" name="1b" /> + <input type="Submit" name="submit" value="Submit" label="Submit" /> +</form> + +<form action="http://localhost/" method="post" name="2nd_form"> + <input type="text" name="opt[2]" /> Like in PHP! + <input type="text" name="1b" /> + <input type="Submit" name="submit" value="Submit" label="Submit" /> +</form> + +<form action="http://localhost/" method="post" enctype="multipart/form-data" name="3rd_form_ambiguous"> + <input type="text" name="3a" /> + <input type="text" name="3b" /> + <input type="Submit" name="submit" value="Submit" label="Submit" /> +</form> + +<form action="http://localhost/" method="post" enctype="multipart/form-data" name="3rd_form_ambiguous"> + <input type="text" name="3c" /> + <input type="text" name="3d" /> + <input type="text" name="x" /> + <input type="Submit" name="submit" value="Submit" label="Submit" /> +</form> + +<form action="http://localhost/" method="post" enctype="multipart/form-data" name="4th_form_1"> + <input type="text" name="4a" /> + <input type="text" name="4b" /> + <input type="text" name="x" /> + <input type="Submit" name="submit" value="Submit" label="Submit" /> +</form> + +<form action="http://localhost/" method="post" enctype="multipart/form-data" name="4th_form_2"> + <input type="text" name="4a" /> + <input type="text" name="4b" /> + <input type="text" name="x" /> + <input type="Submit" name="submit" value="Submit" label="Submit" /> +</form> + +<form action="https://localhost" method="post"> + <input type="hidden" name="5a" /> + <input type="hidden" name="5b" value /> + <input type="hidden" name="5c" value="" /> + <input type="hidden" name="5d" value="foo" /> + <input type="hidden" name="5e" value="value" /> +</form> + +</body> diff --git a/t/form_with_fields.t b/t/form_with_fields.t new file mode 100644 index 0000000..e934870 --- /dev/null +++ b/t/form_with_fields.t @@ -0,0 +1,199 @@ +#!perl -T + +use warnings; +use strict; +use Test::More 'no_plan'; +use Test::Fatal; +use Test::Warnings ':all'; +use Test::Deep; +use URI::file (); + +BEGIN { + delete @ENV{qw(PATH IFS CDPATH ENV BASH_ENV)}; # Placates taint-unsafe Cwd.pm in 5.6.1 + use_ok( 'WWW::Mechanize' ); +} + +my $mech = WWW::Mechanize->new( cookie_jar => undef, autocheck => 0 ); +isa_ok( $mech, 'WWW::Mechanize' ); +my $uri = URI::file->new_abs( 't/form_with_fields.html' )->as_string; + +$mech->get( $uri ); +ok( $mech->success, "Fetched $uri" ) or die q{Can't get test page}; + +{ + my $test = 'dies with no input'; + like( + exception { my $form = $mech->form_with_fields(); }, + qr/no fields provided/, + $test, + ); +} + +{ + my $form; + cmp_deeply( + [ warnings { $form = $mech->form_with_fields(qw/1b/) } ], + [ re(qr/There are 2 forms with the named fields. The first one was used./) ], + 'warning on ambiguous match (1)', + ); + isa_ok( $form, 'HTML::Form' ); + is($form->attr('name'), '1st_form', 'first form matches'); +} + +{ + my $form = $mech->form_with_fields('1b', 'opt[2]'); + isa_ok( $form, 'HTML::Form' ); + is($form->attr('name'), '2nd_form', 'second form matches'); +} + +{ + my $form; + cmp_deeply( + [ warnings { $form = $mech->form_with_fields('4a', '4b') } ], + [ re(qr/There are 2 forms with the named fields. The first one was used./) ], + 'warning on ambiguous match (2)', + ); + isa_ok( $form, 'HTML::Form' ); + is($form->attr('name'), '4th_form_1', 'fourth form matches'); +} + +{ + my @forms = $mech->all_forms_with( name => '3rd_form_ambiguous' ); + is( scalar @forms, 2 ); + isa_ok( $forms[0], 'HTML::Form' ); + isa_ok( $forms[1], 'HTML::Form' ); + is($forms[0]->attr('name'), '3rd_form_ambiguous', 'first result of 3rd_form_ambiguous'); + is($forms[1]->attr('name'), '3rd_form_ambiguous', 'second result of 3rd_form_ambiguous'); +} + +{ + $mech->get($uri); + like( + exception { + $mech->submit_form( + with_fields => { 'xx' => '' }, + ); + }, + qr/There is no form with the requested fields/, + 'submit_form with no match (1)', + ); +} + +{ + $mech->get($uri); + like( + exception { + $mech->submit_form( + with_fields => { '1a' => '' }, + form_number => 2, + ); + }, + qr/There is no form that satisfies all the criteria/, + 'submit_form with no match (2)', + ); +} + +{ + $mech->get($uri); + like( + exception { + $mech->submit_form( + form_number => 2, + form_name => '3rd_form_ambiguous', + ); + }, + qr/There is no form that satisfies all the criteria/, + 'submit_form with no match (3)', + ); +} + +{ + $mech->get($uri); + like( + exception { + $mech->submit_form( + form_name => '3rd_form_ambiguous', + ); + }, + qr/More than one form satisfies all the criteria/, + 'submit_form with more than one match', + ); +} + +{ + $mech->get($uri); + is( + exception { + $mech->submit_form( + with_fields => { 'x' => '' }, + form_name => '3rd_form_ambiguous', + ); + }, + undef, + 'submit_form with intersection of two criteria', + ); +} + +{ + $mech->get($uri); + is( + exception { + $mech->submit_form( + with_fields => { '1b' => '', 'opt[2]' => '' }, + ); + }, + undef, + ' submit_form( with_fields => %data ) ', + ); +} + +{ + $mech->get($uri); + is( + exception { + $mech->submit_form( + form_name => '1st_form', + fields => { + '1c' => 'madeup_field', + }, + ); + }, + undef, + 'submit_form with invalid field and without strict_forms option succeeds', + ); +} + +{ + $mech->get($uri); + like( + exception { + $mech->submit_form( + form_name => '1st_form', + fields => { + '1c' => 'madeup_field', + }, + strict_forms => 1, + ); + }, + qr/^No such field '1c'/, + 'submit_form with invalid field and strict_forms option fails', + ); +} + +{ + $mech->get($uri); + is( + exception { + $mech->submit_form( + form_name => '1st_form', + fields => { + '1a' => 'value1', + '1b' => 'value2', + }, + strict_forms => 1, + ); + }, + undef, + 'submit_form with valid fields and strict_forms option succeeds', + ); +}
\ No newline at end of file diff --git a/t/form_with_fields_passthrough_params.t b/t/form_with_fields_passthrough_params.t new file mode 100644 index 0000000..5bd2175 --- /dev/null +++ b/t/form_with_fields_passthrough_params.t @@ -0,0 +1,246 @@ +#!perl -T + +use warnings; +use strict; +use Test::More 'no_plan'; +use Test::Fatal; +use Test::Warnings ':all'; +use Test::Deep; +use URI::file (); + +BEGIN { + delete @ENV{qw(PATH IFS CDPATH ENV BASH_ENV)}; # Placates taint-unsafe Cwd.pm in 5.6.1 + use_ok( 'WWW::Mechanize' ); +} + +my $mech = WWW::Mechanize->new( cookie_jar => undef, autocheck => 0, strict_forms => 1, verbose_forms => 1 ); +isa_ok( $mech, 'WWW::Mechanize' ); +my $uri = URI::file->new_abs( 't/form_with_fields.html' )->as_string; + +$mech->get( $uri ); +ok( $mech->success, "Fetched $uri" ) or die q{Can't get test page}; + +{ + my $test = 'dies with no input'; + like( + exception { my $form = $mech->form_with_fields(); }, + qr/no fields provided/, + $test, + ); +} + +{ + my $form; + cmp_deeply( + [ warnings { $form = $mech->form_with_fields(qw/1b/) } ], + [ re(qr/There are 2 forms with the named fields. The first one was used./) ], + 'warning on ambiguous match (1)', + ); + isa_ok( $form, 'HTML::Form' ); + is($form->attr('name'), '1st_form', 'first form matches'); +} + +{ + my $form = $mech->form_with_fields('1b', 'opt[2]'); + isa_ok( $form, 'HTML::Form' ); + is($form->attr('name'), '2nd_form', 'second form matches'); +} + +{ + my $form; + cmp_deeply( + [ warnings { $form = $mech->form_with_fields('4a', '4b') } ], + [ re(qr/There are 2 forms with the named fields. The first one was used./) ], + 'warning on ambiguous match (2)', + ); + isa_ok( $form, 'HTML::Form' ); + is($form->attr('name'), '4th_form_1', 'fourth form matches'); +} + +{ + my @forms = $mech->all_forms_with( name => '3rd_form_ambiguous' ); + is( scalar @forms, 2 ); + isa_ok( $forms[0], 'HTML::Form' ); + isa_ok( $forms[1], 'HTML::Form' ); + is($forms[0]->attr('name'), '3rd_form_ambiguous', 'first result of 3rd_form_ambiguous'); + is($forms[1]->attr('name'), '3rd_form_ambiguous', 'second result of 3rd_form_ambiguous'); +} + +{ + $mech->get($uri); + like( + exception { + $mech->submit_form( + with_fields => { 'xx' => '' }, + ); + }, + qr/There is no form with the requested fields/, + 'submit_form with no match (1)', + ); +} + +{ + $mech->get($uri); + like( + exception { + $mech->submit_form( + with_fields => { '1a' => '' }, + form_number => 2, + ); + }, + qr/There is no form that satisfies all the criteria/, + 'submit_form with no match (2)', + ); +} + +{ + $mech->get($uri); + like( + exception { + $mech->submit_form( + form_number => 2, + form_name => '3rd_form_ambiguous', + ); + }, + qr/There is no form that satisfies all the criteria/, + 'submit_form with no match (3)', + ); +} + +{ + $mech->get($uri); + like( + exception { + $mech->submit_form( + form_name => '3rd_form_ambiguous', + ); + }, + qr/More than one form satisfies all the criteria/, + 'submit_form with more than one match', + ); +} + +{ + $mech->get($uri); + is( + exception { + $mech->submit_form( + with_fields => { 'x' => '' }, + form_name => '3rd_form_ambiguous', + ); + }, + undef, + 'submit_form with intersection of two criteria', + ); +} + +{ + $mech->get($uri); + is( + exception { + $mech->submit_form( + with_fields => { '1b' => '', 'opt[2]' => '' }, + ); + }, + undef, + ' submit_form( with_fields => %data ) ', + ); +} + + +{ + $mech->get($uri); + like( + exception { + $mech->submit_form( + form_name => '1st_form', + fields => { + '1c' => 'madeup_field', + }, + ); + }, + qr/^No such field '1c'/, + 'submit_form with invalid field and with global strict_forms and without implicit strict_forms fails', + ); +} + +{ + $mech->get($uri); + like( + exception { + $mech->submit_form( + form_name => '1st_form', + fields => { + '1c' => 'madeup_field', + }, + strict_forms => 1, + ); + }, + qr/^No such field '1c'/, + 'submit_form with invalid field and with global strict_forms and with implicit strict_forms fails', + ); +} + +{ + $mech->get($uri); + like( + exception { + $mech->submit_form( + form_name => '1st_form', + fields => { + '1c' => 'madeup_field', + }, + strict_forms => 1, + ); + }, + qr/^No such field '1c'/, + 'submit_form with invalid field and strict_forms option fails', + ); +} + +{ + $mech->get($uri); + is( + exception { + $mech->submit_form( + form_name => '1st_form', + fields => { + '1a' => 'value1', + '1b' => 'value2', + }, + strict_forms => 1, + ); + }, + undef, + 'submit_form with valid fields and strict_forms option succeeds', + ); +} + +{ + $mech->get($uri); + is( + exception { + $mech->submit_form( + form_name => '1st_form', + fields => { + '1c' => 'madeup_field', + }, + strict_forms => 0, + ); + }, + undef, + 'submit_form with invalid field and with global strict_forms and with implicit disabled strict_forms succeeds', + ); +} + +{ + $mech->get(URI::file->new_abs( 't/form_with_fields_verbose.html' )->as_string); + my $form; + cmp_deeply( + [ warnings { $form = $mech->form_with_fields(qw/1a/) } ], + [ re(qr/foobar/) ], + 'verbose_fields catches wrong HTML', + ); + isa_ok( $form, 'HTML::Form' ); + is($form->attr('name'), '1st_form', '... and form matches'); +}
\ No newline at end of file diff --git a/t/form_with_fields_verbose.html b/t/form_with_fields_verbose.html new file mode 100644 index 0000000..dd180c6 --- /dev/null +++ b/t/form_with_fields_verbose.html @@ -0,0 +1,11 @@ +<html> +<body> + +<form action="http://localhost/" method="post" enctype="multipart/form-data" name="1st_form"> + <input type="foobar" name="1a" /> + <input type="text" name="1b" /> + <input type="Submit" name="submit" value="Submit" label="Submit" /> +</form> + +</body> +</html> diff --git a/t/frames.html b/t/frames.html new file mode 100644 index 0000000..1e5f3a5 --- /dev/null +++ b/t/frames.html @@ -0,0 +1,13 @@ +<html> + <head> + <title></title> + </head> + + <frameset rows="*,*" frameborder="1" framespacing="0" border="1"> + <frame name="top" src="find_link.html" marginwidth="8" +marginheight="8" scrolling="auto" frameborder="no"> + <frame name="bottom" src="google.html" marginwidth="0" +marginheight="0" scrolling="no" frameborder="no" noresize> + </frameset> + +</html> diff --git a/t/frames.t b/t/frames.t new file mode 100644 index 0000000..19743b4 --- /dev/null +++ b/t/frames.t @@ -0,0 +1,29 @@ +#!perl -T + +use warnings; +use strict; +use Test::More tests => 7; +use URI::file; + +BEGIN { + delete @ENV{qw(PATH IFS CDPATH ENV BASH_ENV)}; # Placates taint-unsafe Cwd.pm in 5.6.1 + use_ok( 'WWW::Mechanize' ); +} + +my $mech = WWW::Mechanize->new( cookie_jar => undef ); +isa_ok( $mech, 'WWW::Mechanize' ); + +my $uri = URI::file->new_abs( 't/frames.html' )->as_string; + +$mech->get( $uri ); +ok( $mech->success, "Fetched $uri" ) or die q{Can't get test page}; + +my $link = $mech->find_link(); +isa_ok( $link, 'WWW::Mechanize::Link' ); + +my @links = $mech->find_all_links(); +is( scalar @links, 2, 'Only two links' ); + +is_deeply( [@{$links[0]}[0..3]], [ 'find_link.html', undef, 'top', 'frame' ], 'First frame OK' ); + +is_deeply( [@{$links[1]}[0..3]], [ 'google.html', undef, 'bottom', 'frame' ], 'Second frame OK' ); diff --git a/t/google.html b/t/google.html new file mode 100644 index 0000000..1d8653e --- /dev/null +++ b/t/google.html @@ -0,0 +1,14 @@ +<html><head><meta http-equiv="content-type" content="text/html; charset=ISO-8859-1"><title>Google</title><style><!-- +body,td,a,p,.h{font-family:arial,sans-serif;} +.h{font-size: 20px;} +.q{text-decoration:none; color:#0000cc;} +//--> +</style> +<script> +<!-- +function sf(){document.f.q.focus();} +// --> +</script> +</head><body bgcolor=#ffffff text=#000000 link=#0000cc vlink=#551a8b alink=#ff0000 onLoad=sf()><center><table border=0 cellspacing=0 cellpadding=0><tr><td><img src="/images/logo.gif" width=276 height=110 alt="Google"></td></tr></table><br> +<table border=0 cellspacing=0 cellpadding=0><tr><td width=15> </td><td id=0 bgcolor=#3366cc align=center width=95 nowrap><font color=#ffffff size=-1><b>Web</b></font></td><td width=15> </td><td id=1 bgcolor=#efefef align=center width=95 nowrap onClick="" style=cursor:pointer;cursor:hand;><a id=1a class=q href="/imghp?hl=en&tab=wi&ie=UTF-8"><font size=-1>Images</font></a></td><td width=15> </td><td id=2 bgcolor=#efefef align=center width=95 nowrap onClick="" style=cursor:pointer;cursor:hand;><a id=2a class=q href="/grphp?hl=en&tab=wg&ie=UTF-8"><font size=-1>Groups</font></a></td><td width=15> </td><td id=3 bgcolor=#efefef align=center width=95 nowrap onClick="" style=cursor:pointer;cursor:hand;><a id=3a class=q href="/dirhp?hl=en&tab=wd&ie=UTF-8"><font size=-1>Directory</font></a></td><td width=15> </td><td id=4 bgcolor=#efefef align=center width=95 nowrap onClick="" style=cursor:pointer;cursor:hand;><a id=4a class=q href="/nwshp?hl=en&tab=wn&ie=UTF-8"><font size=-1>News</font></a></td><td width=15> </td></tr><tr><td colspan=12 bgcolor=#3366cc><img width=1 height=1 alt=""></td></tr></table><br><form action="/target-page" name="bob-the-form"><table cellspacing=0 cellpadding=0><tr><td width=75> </td><td align=center><input type=hidden name=hl value=en><span id=hf></span><input type=hidden name=ie value="ISO-8859-1"><input type=hidden name=notgoogle value=""><input maxLength=256 size=55 name=q value=""><br><input type=submit value="Google Search" name=btnG><input type=submit value="I'm Feeling Lucky" name=btnI></td><td valign=top nowrap><font size=-2> • <a href=/advanced_search?hl=en>Advanced Search</a><br> • <a href=/preferences?hl=en>Preferences</a><br> • <a href=/language_tools?hl=en>Language Tools</a></font></td></tr></table></form><br><p><font size=-1>Want more from Google? Try these <a href="/tour/services/query.html">expert search tips</a></font><p> +<br><font size=-1><a href="/ads/">Advertise with Us</a> - <a href="/services/">Business Solutions</a> - <a href="/options/">Services & Tools</a> - <a href=/about.html>Jobs, Press, & Help</a></font><p><font size=-2>©2003 Google - Searching 3,083,324,652 web pages</font></p></center></body></html> diff --git a/t/history.t b/t/history.t new file mode 100644 index 0000000..26c6a60 --- /dev/null +++ b/t/history.t @@ -0,0 +1,167 @@ +#!perl + +use warnings; +use strict; + +use lib qw( t/local ); + +use LocalServer (); +use Path::Tiny qw( path ); +use Test::Deep; +use Test::Fatal; +use Test::More; +use URI::file (); +use WWW::Mechanize (); + +BEGIN { + delete @ENV{qw(PATH IFS CDPATH ENV BASH_ENV)} + ; # Placates taint-unsafe Cwd.pm in 5.6.1 + use_ok('WWW::Mechanize'); +} + +{ + my $mech = WWW::Mechanize->new( cookie_jar => undef, autocheck => 0 ); + isa_ok( $mech, 'WWW::Mechanize' ); + + my $uri = URI::file->new_abs('t/history_1.html')->as_string; + + $mech->get($uri); + ok( $mech->success, "Fetch test page" ) or die q{Can't get test page}; + + is( $mech->history_count, 1, "... and it was recorded in the history" ); + cmp_deeply( + $mech->history(0), + { + req => isa('HTTP::Request'), + res => all( + isa('HTTP::Response'), + methods( 'content' => re(qr/Testing the history_1/) ), + ), + }, + "... and the first history item is of the correct format" + ); + $mech->follow_link( n => 1 ); + + is( $mech->history_count, 2, "... and it was recorded in the history" ); + cmp_deeply( + $mech->history(0), + { + req => isa('HTTP::Request'), + res => all( + isa('HTTP::Response'), + methods( 'content' => re(qr/Testing the history_2/) ), + ), + }, + "... and the second history item is of the correct format" + ); + + ok( + $mech->submit_form( form_name => "get_form" ), + "Submit form using 'get' method" + ); + + is( $mech->history_count, 3, "... and it was recorded in the history" ); + cmp_deeply( + $mech->history(0), + { + req => isa('HTTP::Request'), + res => all( + isa('HTTP::Response'), + methods( 'content' => re(qr/Testing the history_3/) ), + ), + }, + "... and the third history item is of the correct format" + ); + + is( + exception { + $mech->clear_history; + }, + undef, + "Clear the history" + ); + + is( + $mech->history_count, 1, + "... and the history contains only one item" + ); + + my $history_item_after_clearing = $mech->history(0); + cmp_deeply( + $history_item_after_clearing, + { + req => isa('HTTP::Request'), + res => all( + isa('HTTP::Response'), + methods( 'content' => re(qr/Testing the history_3/) ), + ), + }, + "... and the latest history item is of the correct format" + ); + + cmp_deeply( + $mech->res, + $history_item_after_clearing->{res}, + "... and we are still 'displaying' the page we were on when we cleared the history" + ); + ok( !$mech->back, "... and we cannot go back in the history" ); + + $mech->follow_link( n => 1 ); + ok( $mech->success, "Click a link in the page we are 'displaying'" ) + or die q{Can't get test page}; + is( $mech->history_count, 2, "... and it was recorded in the history" ); + like( + $mech->res->content, qr/Testing the history_1/, + "... and we are 'displaying' a different page" + ); + + ok( $mech->back, "We can go back in history" ); + cmp_deeply( + $mech->res, + $history_item_after_clearing->{res}, + "... and we are 'displaying' the page we were on when we cleared the history again" + ); +} + +{ + my $html = path('t/history_2.html')->slurp; + my $server = LocalServer->spawn( html => $html ); + my $mech = WWW::Mechanize->new( cookie_jar => undef, autocheck => 0 ); + $mech->get( $server->url ); + + ok( + $mech->submit_form( form_name => "post_form" ), + "Submit form using 'post' method" + ); + is( $mech->history_count, 2, "... and it was recorded in the history" ); + is( + $mech->history(0)->{req}->uri, $server->url, + "... and the correct request was saved" + ); +} + +{ + my $mech = WWW::Mechanize->new( cookie_jar => undef, autocheck => 0 ); + isa_ok( $mech, 'WWW::Mechanize' ); + + my $uri = URI::file->new_abs('t/history_1.html')->as_string; + $mech->stack_depth(0); + is( $mech->stack_depth(), 0, "stack_depth can be changed" ); + + $mech->get($uri) for 1, 2, 3; + is( + $mech->history_count(), 1, + "No history saved when history is turned off" + ); + + $mech->stack_depth(1); + + $mech->get($uri) for 1, 2, 3; + is( + $mech->history_count(), 2, + "Limited history is saved when stack_depth is explicitly set" + ); + +} + +done_testing(); diff --git a/t/history_1.html b/t/history_1.html new file mode 100644 index 0000000..d479f3c --- /dev/null +++ b/t/history_1.html @@ -0,0 +1,8 @@ +<html> + <head> + <title>Testing the history_1</title> + </head> + <body> + <a href="history_2.html">To second page</a> + </body> +</html> diff --git a/t/history_2.html b/t/history_2.html new file mode 100644 index 0000000..06e86bc --- /dev/null +++ b/t/history_2.html @@ -0,0 +1,13 @@ +<html> + <head> + <title>Testing the history_2</title> + </head> + <body> + <form action="history_3.html" method="get" name="get_form"> + <input type="Submit" name="submit"/> + </form> + <form action="" method="post" name="post_form"> + <input type="Submit" name="submit"/> + </form> + </body> +</html> diff --git a/t/history_3.html b/t/history_3.html new file mode 100644 index 0000000..c7579e5 --- /dev/null +++ b/t/history_3.html @@ -0,0 +1,8 @@ +<html> + <head> + <title>Testing the history_3</title> + </head> + <body> + <a href="history_1.html">To first page</a> + </body> +</html> diff --git a/t/image-new.t b/t/image-new.t new file mode 100644 index 0000000..caa8e36 --- /dev/null +++ b/t/image-new.t @@ -0,0 +1,46 @@ +#!perl -T + +use warnings; +use strict; + +use Test::More tests => 15; + +BEGIN { + use_ok( 'WWW::Mechanize::Image' ); +} + +# test new style API +my $img = WWW::Mechanize::Image->new( { + url => 'url.html', + base => 'http://base.example.com/', + name => 'name', + alt => 'alt', + tag => 'a', + height => 2112, + width => 5150, + attrs => { id => 'id', class => 'foo bar' }, +} ); + +is( $img->url, 'url.html', 'url() works' ); +is( $img->base, 'http://base.example.com/', 'base() works' ); +is( $img->name, 'name', 'name() works' ); +is( $img->alt, 'alt', 'alt() works' ); +is( $img->tag, 'a', 'tag() works' ); +is( $img->height, 2112, 'height works' ); +is( $img->width, 5150, 'width works' ); +is( $img->attrs->{id}, 'id', 'attrs/id works' ); +is( $img->attrs->{class}, 'foo bar', 'attrs/class works' ); +is( $img->url_abs, 'http://base.example.com/url.html', 'url_abs works' ); +isa_ok( $img->URI, 'URI::URL', 'Returns an object' ); + +my $img_no_src = WWW::Mechanize::Image->new( { + url => undef, + base => 'http://base.example.com/', + tag => 'img', + height => 123, + width => 321, +} ); + +isa_ok($img_no_src, 'WWW::Mechanize::Image'); +is( $img_no_src->url, undef, 'url() without url is undef'); +isa_ok( $img_no_src->URI, 'URI::URL', 'Returns an object' ); diff --git a/t/image-parse.css b/t/image-parse.css new file mode 100644 index 0000000..6a0a489 --- /dev/null +++ b/t/image-parse.css @@ -0,0 +1,9 @@ +body { + background-color:white; + background-image:url(/Images/bg-gradient.png); +} + +.logo { + background: url("images/logo.png") no-repeat; + background-size: 275px 95px; +} diff --git a/t/image-parse.html b/t/image-parse.html new file mode 100644 index 0000000..9d7dc42 --- /dev/null +++ b/t/image-parse.html @@ -0,0 +1,25 @@ +<html> + <head> + <TITLE>Testing image extraction</TITLE> + <style> + background-color:white; + background-image:url(/Images/bg-gradient.png); + </style> + </head> + <body> + <A HREF="http://blargle.com/">blargle</A> + <FORM> + And now, the dreaded wango + <IMG SRC="wango.jpg" ALT="The world of the wango"> + <A HREF="http://www.cnn.com/">CNN</A> + <INPUT NAME="boobala" TYPE="TEXT"> + <INPUT TYPE="image" SRC="bongo.gif" HEIGHT=142 WIDTH=43> + <a href="link.htm"><img src="linked.gif"></a> + </FORM> + <A HREF="http://www.bbc.co.uk/" NAME="Wilma">BBC</A> + <A HREF="blongo.html">Blongo!</A><img src="hacktober.jpg" id="first-hacktober-image" class="my-class-1"><img src="hacktober.jpg" class="my-class-2 foo"><img src="hacktober.jpg" class="my-class-3 foo bar"><img src="http://example.org/abs.tif" id="absolute"><img data-image="hacktober.jpg" id="no-src-regression-269"> + <div style="background:url(images/logo.png) no-repeat;background-size:275px 95px;" id="logo">Logo</div> + <img src="inner.jpg" style="background-image: url(outer.jpg); padding: 5em" id="weird_background_style_edge_case"> + </body> +</html> + diff --git a/t/image-parse.t b/t/image-parse.t new file mode 100644 index 0000000..7733432 --- /dev/null +++ b/t/image-parse.t @@ -0,0 +1,96 @@ +#!perl -T + +use warnings; +use strict; + +use Test::More tests => 47; +use URI::file; + +BEGIN { + delete @ENV{qw(PATH IFS CDPATH ENV BASH_ENV)}; # Placates taint-unsafe Cwd.pm in 5.6.1 + use_ok( 'WWW::Mechanize' ); +} + +my $mech = WWW::Mechanize->new( cookie_jar => undef ); +isa_ok( $mech, 'WWW::Mechanize' ); + +my $uri = URI::file->new_abs( 't/image-parse.html' )->as_string; + +$mech->get( $uri ); +ok( $mech->success, "Fetched $uri" ) or die 'Can\'t get test page'; + +my @images = $mech->images; +is( scalar @images, 12, 'Exactly twelve images' ); + +my $first = $images[0]; +is( $first->url, '/Images/bg-gradient.png', 'Got the background style image' ); +is( $first->tag, 'css', 'css tag' ); +is( $first->alt, undef, 'alt' ); + +my $second = $images[1]; +is( $second->tag, 'img', 'img tag' ); +is( $second->url, 'wango.jpg', 'URL matches' ); +is( $second->alt, 'The world of the wango', 'alt matches' ); + +my $third = $images[2]; +is( $third->tag, 'input', 'input tag' ); +is( $third->url, 'bongo.gif', 'URL matches' ); +is( $third->alt, undef, 'alt matches' ); +is( $third->height, 142, 'height' ); +is( $third->width, 43, 'width' ); + +my $fourth = $images[3]; +is( $fourth->url, 'linked.gif', 'Got the fourth image' ); +is( $fourth->tag, 'img', 'input tag' ); +is( $fourth->alt, undef, 'alt' ); + +my $fifth = $images[4]; +is( $fifth->url, 'hacktober.jpg', 'Got the fifth image' ); +is( $fifth->tag, 'img', 'input tag' ); +is( $fifth->alt, undef, 'alt' ); +is( $fifth->attrs->{id}, 'first-hacktober-image', 'id' ); +is( $fifth->attrs->{class}, 'my-class-1', 'class' ); + +my $sixth = $images[5]; +is( $sixth->url, 'hacktober.jpg', 'Got the sixth image' ); +is( $sixth->tag, 'img', 'input tag' ); +is( $sixth->alt, undef, 'alt' ); +is( $sixth->attrs->{id}, undef, 'id' ); +is( $sixth->attrs->{class}, 'my-class-2 foo', 'class' ); + +my $seventh = $images[6]; +is( $seventh->url, 'hacktober.jpg', 'Got the seventh image' ); +is( $seventh->tag, 'img', 'input tag' ); +is( $seventh->alt, undef, 'alt' ); +is( $seventh->attrs->{id}, undef, 'id' ); +is( $seventh->attrs->{class}, 'my-class-3 foo bar', 'class' ); + +# regression github #269 +my $eighth = $images[8]; +is( $eighth->attrs->{id}, 'no-src-regression-269', 'Got the eighth image'); +is( $eighth->url, undef, 'it has no URL'); +is( $eighth->attrs->{'data-image'}, 'hacktober.jpg', 'it has an extra attribute'); + +my $ninth = $images[9]; +is( $ninth->url, 'images/logo.png', 'Got the fifth image' ); +is( $ninth->tag, 'css', 'css tag' ); +is( $ninth->alt, undef, 'alt' ); + +# find image in css +$uri = URI::file->new_abs( 't/image-parse.css' )->as_string; + +$mech->get( $uri ); +ok( $mech->success, "Fetched $uri" ) or die q{Can't get test page}; + +eval { @images = $mech->find_all_images(); }; +is($@,'','survived eval'); +is( scalar @images, 2, 'Exactly two images' ); + +my $css_first = $images[0]; +is( $css_first->url, '/Images/bg-gradient.png', 'Got the first image' ); +is( $css_first->tag, 'css', 'css tag' ); +is( $css_first->alt, undef, 'alt' ); + +my $css_second = $images[1]; +is( $css_second->url, 'images/logo.png', 'Got the second image' ); +is( $css_second->tag, 'css', 'css tag' );
\ No newline at end of file diff --git a/t/link-base.t b/t/link-base.t new file mode 100644 index 0000000..0021ab5 --- /dev/null +++ b/t/link-base.t @@ -0,0 +1,20 @@ +#!perl -T + +use warnings; +use strict; + +use Test::More tests => 5; + +BEGIN { + use_ok( 'WWW::Mechanize::Link' ); +} + +NO_BASE: { + my $link = WWW::Mechanize::Link->new( 'url.html', 'Click here', undef, undef ); + isa_ok( $link, 'WWW::Mechanize::Link', 'constructor OK' ); + + my $URI = $link->URI; + isa_ok( $URI, 'URI::URL', 'URI is proper type' ); + is( $URI->rel, 'url.html', 'Short form of the url' ); + is( $link->url_abs, 'url.html', 'url_abs works' ); +} diff --git a/t/link-relative.t b/t/link-relative.t new file mode 100644 index 0000000..6e9ffb5 --- /dev/null +++ b/t/link-relative.t @@ -0,0 +1,29 @@ +#!perl -T + +use warnings; +use strict; + +use Test::More tests => 6; +use URI::file; + +BEGIN { + delete @ENV{qw(PATH IFS CDPATH ENV BASH_ENV)}; # Placates taint-unsafe Cwd.pm in 5.6.1 + use_ok( 'WWW::Mechanize' ); +} + +my $mech = WWW::Mechanize->new( cookie_jar => undef ); +isa_ok( $mech, 'WWW::Mechanize' ); + +my $uri = URI::file->new_abs( 't/image-parse.html' )->as_string; + +$mech->get( $uri ); +ok( $mech->success, "Fetched $uri" ) or die q{Can't get test page}; + +$mech->get( 'select.html' ); +ok( $mech->success, 'Fetch select.html, no directory' ); + +$mech->get( './select.html' ); +ok( $mech->success, 'Fetch select.html from ./' ); + +$mech->get( 'local/click.t' ); +ok( $mech->success, 'Fetched click.t' ); diff --git a/t/link.t b/t/link.t new file mode 100644 index 0000000..59eab86 --- /dev/null +++ b/t/link.t @@ -0,0 +1,58 @@ +#!perl -T + +use warnings; +use strict; + +use Test::More tests=>23; + +BEGIN { + use_ok( 'WWW::Mechanize::Link' ); +} + +OLD_API: { + my $link = + WWW::Mechanize::Link->new( + 'url.html', 'text', 'name', 'frame', 'http://base.example.com/', { alt => 'alt text' } ); + + isa_ok( $link, 'WWW::Mechanize::Link' ); + is( scalar @$link, 6, 'Should have five elements' ); + + # Test the new-style accessors + is( $link->url, 'url.html', 'url() works' ); + is( $link->text, 'text', 'text() works' ); + is( $link->name, 'name', 'name() works' ); + is( $link->tag, 'frame', 'tag() works' ); + is( $link->base, 'http://base.example.com/', 'base() works' ); + is( $link->attrs->{alt}, 'alt text', 'attrs() works' ); + + # Order of the params in the blessed array is important for backwards compatibility. + is( $link->[0], 'url.html', 'param 0 is url' ); + is( $link->[1], 'text', 'param 1 is text' ); + is( $link->[2], 'name', 'param 2 is name' ); + is( $link->[3], 'frame', 'param 3 is tag' ); + is( $link->[4], 'http://base.example.com/', 'param 4 is base' ); + + my $URI = $link->URI; + isa_ok( $URI, 'URI::URL', 'URI is proper type' ); + is( $URI->rel, 'url.html', 'Short form of the url' ); + is( $link->url_abs, 'http://base.example.com/url.html', 'url_abs works' ); +} + +NEW_API: { + # test new style API + my $link = WWW::Mechanize::Link->new( { + url => 'url.html', + text => 'text', + name => 'name', + tag => 'frame', + base => 'http://base.example.com/', + attrs => { alt => 'alt text' }, + } ); + + is( $link->url, 'url.html', 'url() works' ); + is( $link->text, 'text', 'text() works' ); + is( $link->name, 'name', 'name() works' ); + is( $link->tag, 'frame', 'tag() works' ); + is( $link->base, 'http://base.example.com/', 'base() works' ); + is( $link->attrs->{alt}, 'alt text', 'attrs() works' ); +} diff --git a/t/local/LocalServer.pm b/t/local/LocalServer.pm new file mode 100644 index 0000000..020ea76 --- /dev/null +++ b/t/local/LocalServer.pm @@ -0,0 +1,279 @@ +package LocalServer; + +# start a fake webserver, fork, and connect to ourselves +use warnings; +use strict; +# this has to happen here because LWP::Simple creates a $ua +# on load so any time after this is too late. +BEGIN { + delete @ENV{qw( + HTTP_PROXY http_proxy CGI_HTTP_PROXY + HTTPS_PROXY https_proxy HTTP_PROXY_ALL http_proxy_all + )}; +} + +use Carp qw(carp croak); +use File::Temp (); +use LWP::Simple qw( get ); +use Path::Tiny qw( path ); +use URI::URL qw(); + +=head1 SYNOPSIS + + use LWP::Simple qw(get); + my $server = Test::HTTP::LocalServer->spawn; + + ok get $server->url, "Retrieve " . $server->url; + + $server->stop; + +=head1 METHODS + +=head2 C<Test::HTTP::LocalServer-E<gt>spawn %ARGS> + +This spawns a new HTTP server. The server will stay running until +C<< $server->stop >> is called. + +Valid arguments are: + +=over 4 + +=item * + +C<< html => >> scalar containing the page to be served + +=item * + +C<< file => >> filename containing the page to be served + +=item * + +C<< debug => 1 >> to make the spawned server output debug information + +=item * + +C<< eval => >> string that will get evaluated per request in the server + +Try to avoid characters that are special to the shell, especially quotes. +A good idea for a slow server would be + + eval => sleep+10 + +=back + +All served HTML will have the first %s replaced by the current location. + +The following entries will be removed from C<%ENV>: + + HTTP_PROXY + http_proxy + CGI_HTTP_PROXY + HTTPS_PROXY + https_proxy + HTTP_PROXY_ALL + http_proxy_all + +=cut + +sub spawn { + my ($class,%args) = @_; + my $self = { %args }; + bless $self,$class; + + local $ENV{TEST_HTTP_VERBOSE}; + $ENV{TEST_HTTP_VERBOSE} = 1 + if (delete $args{debug}); + + $self->{delete} = []; + if (my $html = delete $args{html}) { + # write the html to a temp file + my ($fh,$tempfile) = File::Temp::tempfile(); + binmode $fh; + print $fh $html + or die "Couldn't write tempfile $tempfile : $!"; + close $fh; + push @{$self->{delete}},$tempfile; + $args{file} = $tempfile; + }; + my ($fh,$logfile) = File::Temp::tempfile(); + close $fh; + push @{$self->{delete}},$logfile; + $self->{logfile} = $logfile; + my $web_page = delete $args{file} || ""; + + my $server_file = path('t/local/log-server')->absolute; + my @opts; + push @opts, "-e" => qq{"} . delete($args{ eval }) . qq{"} + if $args{ eval }; + + my $pid = open my $server, qq'$^X "$server_file" "$web_page" "$logfile" @opts|' + or croak "Couldn't spawn local server $server_file : $!"; + my $url = <$server>; + chomp $url; + die "Couldn't read back local server url" + unless $url; + + $self->{_server_url} = URI::URL->new($url); + $self->{_fh} = $server; + $self->{_pid} = $pid; + + $self; +}; + +=head2 C<< $server->port >> + +This returns the port of the current server. As new instances +will most likely run under a different port, this is convenient +if you need to compare results from two runs. + +=cut + +sub port { + carp __PACKAGE__ . '::port called without a server' unless $_[0]->{_server_url}; + $_[0]->{_server_url}->port +}; + +=head2 C<< $server->url >> + +This returns the url where you can contact the server. This url +is valid until the C<$server> goes out of scope or you call +C<< $server->stop >> or C<< $server->get_log >>. + +=cut + +sub url { + $_[0]->{_server_url}->abs->as_string +}; + +=head2 C<< $server->stop >> + +This stops the server process by requesting a special +url. + +=cut + +sub stop { + my ($self) = @_; + get( $self->quit_server ); + undef $self->{_server_url}; + if ( $self->{_fh} ) { + close $self->{_fh}; + delete $self->{_fh}; + } +}; + +=head2 C<< $server->kill >> + +This kills the server process via C<kill>. The log +cannot be retrieved then. + +=cut + +sub kill { + CORE::kill( 9 => $_[0]->{ _pid } ); + undef $_[0]->{_server_url}; + undef $_[0]->{_pid}; +}; + +=head2 C<< $server->get_log >> + +This stops the server by calling C<stop> and then returns the +output of the server process. This output will be a list of +all requests made to the server concatenated together +as a string. + +=cut + +sub get_log { + my ($self) = @_; + + my $log = get( $self->get_server_log ); + $self->stop; + return $log; +}; + +sub DESTROY { + $_[0]->stop if $_[0]->{_server_url}; + for my $file (@{$_[0]->{delete}}) { + unlink $file or warn "Couldn't remove tempfile $file : $!\n"; + }; +}; + +=head1 URLs implemented by the server + +=head2 302 redirect C<< $server->redirect($target) >> + +This URL will issue a redirect to C<$target>. No special care is taken +towards URL-decoding C<$target> as not to complicate the server code. +You need to be wary about issuing requests with escaped URL parameters. + +=head2 404 error C<< $server->error_notfound($target) >> + +This URL will response with status code 404. + +=head2 Timeout C<< $server->error_timeout($seconds) >> + +This URL will send a 599 error after C<$seconds> seconds. + +=head2 Timeout+close C<< $server->error_close($seconds) >> + +This URL will send nothing and close the connection after C<$seconds> seconds. + +=head2 Error in response content C<< $server->error_after_headers >> + +This URL will send headers for a successfull response but will close the +socket with an error after 2 blocks of 16 spaces have been sent. + +=head2 Chunked response C<< $server->chunked >> + +This URL will return 5 blocks of 16 spaces at a rate of one block per second +in a chunked response. + +=head2 Other URLs + +All other URLs will echo back the cookies and query parameters. + +=cut + +my %urls = ( + 'quit_server' => 'quit_server', + 'get_server_log' => 'get_server_log', + 'redirect' => 'redirect/%s', + 'error_notfound' => 'error/notfound/%s', + 'error_timeout' => 'error/timeout/%s', + 'error_close' => 'error/close/%s', + 'error_after_headers' => 'error/after_headers', + 'chunked' => 'chunks', +); +for (keys %urls) { + no strict 'refs'; + my $name = $_; + *{ $name } = sub { + my $self = shift; + $self->url . sprintf $urls{ $name }, @_; + }; +}; + +=head1 EXPORT + +None by default. + +=head1 COPYRIGHT AND LICENSE + +This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself. + +Copyright (C) 2003-2011 Max Maischein + +=head1 AUTHOR + +Max Maischein, E<lt>corion@cpan.orgE<gt> + +Please contact me if you find bugs or otherwise improve the module. More tests are also very welcome ! + +=head1 SEE ALSO + +L<WWW::Mechanize>,L<WWW::Mechanize::Shell>,L<WWW::Mechanize::Firefox> + +=cut + +1; diff --git a/t/local/back.t b/t/local/back.t new file mode 100644 index 0000000..92fb792 --- /dev/null +++ b/t/local/back.t @@ -0,0 +1,156 @@ +use warnings; +use strict; +use Test::More tests => 47; +use lib qw( t t/local ); +use LocalServer; +use HTTP::Response; + + +=head1 NAME + +=head1 SYNOPSIS + +This tests Mech's Back "button". Tests were converted from t/live/back.t, +and subsequently enriched to deal with RT ticket #8109. + +=cut + +BEGIN { + use Tools; +} + +BEGIN { + delete @ENV{ qw( IFS CDPATH ENV BASH_ENV ) }; + use_ok( 'WWW::Mechanize' ); +} + +my $mech = WWW::Mechanize->new(cookie_jar => {}); +isa_ok( $mech, 'WWW::Mechanize' ); +isa_ok( $mech->cookie_jar(), 'HTTP::Cookies', 'this $mech starts with a cookie jar' ); + +my $html = <<'HTML'; +<html> +<head><title>%s</title></head> +<body>Whatever. +<a href="images/">Images</a> +<a href="/scripts">Scripts</a> +<a href="/ports/">Ports</a> +<a href="modules/">Modules</a> +<form action="/search.cgi"> +<input type="text" name="q"> +<input type="submit"> +</form> +</body> +</html> +HTML + +my $server = LocalServer->spawn( html => $html ); +isa_ok( $server, 'LocalServer' ); + +ok( !$mech->back(), 'With no stack, no going back' ); + +$mech->get($server->url); +ok( $mech->success, 'Fetched OK' ); + +my $first_base = $mech->base; +my $title = $mech->title; + +$mech->follow_link( n=>2 ); +ok( $mech->success, 'Followed OK' ); + +ok( $mech->back(), 'Back should succeed' ); +is( $mech->base, $first_base, 'Did the base get set back?' ); +is( $mech->title, $title, 'Title set back?' ); + +$mech->follow_link( text => 'Images' ); +ok( $mech->success, 'Followed OK' ); + +ok( $mech->back(), 'Back should succeed' ); +is( $mech->base, $first_base, 'Did the base get set back?' ); +is( $mech->title, $title, 'Title set back?' ); + +is( scalar @{$mech->{page_stack}}, 0, 'Pre-search check' ); +$mech->submit_form( + fields => { 'q' => 'perl' }, +); +ok( $mech->success, 'Searched for Perl' ); +like( $mech->title, qr/search.cgi/, 'Right page title' ); +is( scalar @{$mech->{page_stack}}, 1, 'POST is in the stack' ); + +$mech->head( $server->url ); +ok( $mech->success, 'HEAD succeeded' ); +is( scalar @{$mech->{page_stack}}, 1, 'HEAD is not in the stack' ); + +ok( $mech->back(), 'Back should succeed' ); +ok( $mech->success, 'Back' ); +is( $mech->base, $first_base, 'Did the base get set back?' ); +is( $mech->title, $title, 'Title set back?' ); +is( scalar @{$mech->{page_stack}}, 0, 'Post-search check' ); + +=head2 Back and misc. internal fields + +RT ticket #8109 reported that back() is broken after reload(), and +that the cookie_jar was also damaged by back(). We test for that: +reload() should not alter the back() stack, and the cookie jar should +not be versioned (once a cookie is set, hitting the back button in a +browser does not cause it to go away). + +=cut + +$mech->follow_link( text => 'Images' ); +$mech->reload(); +ok( $mech->back(), 'Back should succeed' ); +is($mech->title, $title, 'reload() does not push page to stack' ); + +ok(defined($mech->cookie_jar()), + '$mech still has a cookie jar after a number of back()'); + +# Now some other weird stuff. Start with a fresh history by recreating +# $mech. +SKIP: { + skip 'Test::Memory::Cycle not installed', 1 unless $canTMC; + + memory_cycle_ok( $mech, 'No memory cycles found' ); +} + +$mech = WWW::Mechanize->new( autocheck => 0 ); +isa_ok( $mech, 'WWW::Mechanize' ); +$mech->get( $server->url ); +ok( $mech->success, 'Got root URL' ); + +my @links = qw( + /scripts + /ports/ + modules/ +); + +is( scalar @{$mech->{page_stack}}, 0, 'Pre-404 check' ); + +my $server404url = $server->error_notfound('404check'); + +$mech->get($server404url); +is( $mech->status, 404 , '404 check') or + diag( qq{\$server404url=$server404url\n\$mech->content="}, $mech->content, qq{"\n} ); + +is( scalar @{$mech->{page_stack}}, 1, 'Even 404s get on the stack' ); + +ok( $mech->back(), 'Back should succeed' ); +is( $mech->uri, $server->url, 'Back from the 404' ); +is( scalar @{$mech->{page_stack}}, 0, 'Post-404 check' ); + +for my $link ( @links ) { + $mech->get( $link ); + warn $mech->status() if (! $mech->success()); + is( $mech->status, 200, "Get $link" ); + + ok( $mech->back(), 'Back should succeed' ); + is( $mech->uri, $server->url, "Back from $link" ); +} + +SKIP: { + skip 'Test::Memory::Cycle not installed', 1 unless $canTMC; + + memory_cycle_ok( $mech, 'No memory cycles found' ); +} + + diff --git a/t/local/click.t b/t/local/click.t new file mode 100644 index 0000000..4ff1c06 --- /dev/null +++ b/t/local/click.t @@ -0,0 +1,30 @@ +use warnings; +use strict; +use lib 't/local'; +use LocalServer; +use Test::More tests => 9; + +BEGIN { + delete @ENV{ qw( IFS CDPATH ENV BASH_ENV ) }; + use_ok( 'WWW::Mechanize' ); +} + +my $mech = WWW::Mechanize->new(); +isa_ok( $mech, 'WWW::Mechanize', 'Created the object' ); + +my $server = LocalServer->spawn(); +isa_ok( $server, 'LocalServer' ); + +my $response = $mech->get( $server->url ); +isa_ok( $response, 'HTTP::Response', 'Got back a response' ); +ok( $response->is_success, 'Got URL' ) or die q{Can't even fetch local url}; +ok( $mech->is_html, 'Local page is HTML' ); + +$mech->field(query => 'foo'); # Filled the 'q' field + +$response = $mech->click('submit'); +isa_ok( $response, 'HTTP::Response', 'Got back a response' ); +ok( $response->is_success, q{Can click 'Go' ('Google Search' button)} ); + +is( $mech->field('query'),'foo', 'Filled field correctly'); + diff --git a/t/local/click_button.t b/t/local/click_button.t new file mode 100644 index 0000000..b26331b --- /dev/null +++ b/t/local/click_button.t @@ -0,0 +1,108 @@ +use warnings; +use strict; + +use lib 't/local'; + +use LocalServer; +use Test::More 0.96; +use Test::Exception; + +BEGIN { + delete @ENV{ qw( IFS CDPATH ENV BASH_ENV ) }; + use_ok( 'WWW::Mechanize' ); +} + +my $mech = WWW::Mechanize->new(); +isa_ok( $mech, 'WWW::Mechanize', 'Created the object' ); + +my $server = LocalServer->spawn(); +isa_ok( $server, 'LocalServer' ); + +my $response = $mech->get( $server->url ); +isa_ok( $response, 'HTTP::Response', 'Got back a response' ); +ok( $response->is_success, 'Got URL' ) or die q{Can't even fetch local url}; +ok( $mech->is_html, 'Local page is HTML' ); + +my @forms = $mech->forms; +my $form = $forms[0]; + +subtest 'click by id' => sub { + $mech->click_button(id => 0); + test_click( $mech ); + + ok( + !eval { $mech->click_button( id => 'i-do-not-exist' ); 1 }, + 'Button id not found' + ); +}; + +subtest 'click by number' => sub { + $mech->click_button(number => 1); + test_click( $mech ); + + ok(! eval { $mech->click_button(number => 3); 1 }, 'Button number out of range'); +}; + +subtest 'click by name' => sub { + $mech->click_button(name => 'submit'); + test_click( $mech ); + + ok(! eval { $mech->click_button(name => 'bogus'); 1 }, + 'Button name unknown'); +}; + +subtest 'click a <button> tag' => sub { + $mech->click_button(name => 'button_tag'); + test_click( $mech, 'button_tag', 'Walk' ); +}; + +subtest 'click by value' => sub { + # input tag + $mech->click_button(value => 'Go'); + test_click( $mech ); + + # button tag + $mech->click_button(value => 'Walk'); + test_click( $mech, 'button_tag', 'Walk' ); + + # image type + $mech->click_button(value => 'image'); + { + like( $mech->uri, qr/formsubmit/, 'Clicking on button' ); + like( $mech->uri, qr/image_input\.x=1/, 'Correct button was pressed' ); + like( $mech->uri, qr/cat_foo/, 'Parameters got transmitted OK' ); + unlike( $mech->uri, qr/Go/, 'Submit button was not transmitted' ); + } + + ok(! eval { $mech->click_button(value => 'bogus'); 1 }, + 'Button name unknown'); +}; + +CLICK_BY_OBJECT_REFERENCE: { + subtest 'click by object reference' => sub { + my $clicky_button = $form->find_input( undef, 'submit' ); + isa_ok( $clicky_button, 'HTML::Form::Input', 'Found the submit button' ); + is( $clicky_button->value, 'Go', 'Named the right thing, too' ); + + my $res = $mech->click_button(input => $clicky_button); + local $TODO = q{Calling ->click() on an object doesn't seem to use the submit button.}; + test_click( $mech ); + diag $res->request->uri; + }; +} + +subtest 'multiple button selectors' => sub { + dies_ok { $mech->click_button( id => 0, input => 1 ) } 'Dies when multiple button selectors are used'; +}; + +sub test_click { + my $mech = shift; + my $name = shift || 'submit'; + my $value = shift || 'Go'; + like( $mech->uri, qr/formsubmit/, 'Clicking on button' ); + like( $mech->uri, qr/$name=$value/, 'Correct button was pressed' ); + like( $mech->uri, qr/cat_foo/, 'Parameters got transmitted OK' ); + $mech->back; +} + +done_testing(); diff --git a/t/local/content.t b/t/local/content.t new file mode 100644 index 0000000..ab54625 --- /dev/null +++ b/t/local/content.t @@ -0,0 +1,32 @@ +use warnings; +use strict; +use lib 't/local'; +use LocalServer; +use Test::More tests => 10; + +BEGIN { + delete @ENV{ qw( IFS CDPATH ENV BASH_ENV ) }; + use_ok( 'WWW::Mechanize' ); +} + +my $mech = WWW::Mechanize->new(); +isa_ok( $mech, 'WWW::Mechanize', 'Created the object' ); + +my $server = LocalServer->spawn(); +isa_ok( $server, 'LocalServer' ); + +diag('Running tests against ' . $server->url . '?xml=1'); +my $response = $mech->get( $server->url . '?xml=1' ); +isa_ok( $response, 'HTTP::Response', 'Got back a response' ); +ok( $response->is_success, 'Got URL' ) or die q{Can't even fetch local url}; +is( $response->content_type, 'application/xhtml+xml', 'Content type is application/xhtml+xml' ); +ok( $mech->is_html, 'Local page is HTML' ); + +$mech->field(query => 'foo'); # Filled the 'q' field + +$response = $mech->click('submit'); +isa_ok( $response, 'HTTP::Response', 'Got back a response' ); +ok( $response->is_success, q{Can click 'Go' ('Google Search' button)} ); + +is( $mech->field('query'),'foo', 'Filled field correctly'); + diff --git a/t/local/encoding.t b/t/local/encoding.t new file mode 100644 index 0000000..5c93dfb --- /dev/null +++ b/t/local/encoding.t @@ -0,0 +1,20 @@ +use warnings; +use strict; +use Test::More tests => 5; +use lib qw( t/local ); +use LocalServer; + +BEGIN { + delete @ENV{qw( IFS CDPATH ENV BASH_ENV )}; + use_ok('WWW::Mechanize'); +} + +my $mech = WWW::Mechanize->new(); +isa_ok( $mech, 'WWW::Mechanize' ); +my $server = LocalServer->spawn(); +isa_ok( $server, 'LocalServer' ); + +my $response = $mech->get( $server->url . 'encoding/euc-jp' ); +ok( $mech->success, 'Fetched OK' ); +is( $response->content_charset(), 'EUC-JP', 'got encoding enc-jp' ); + diff --git a/t/local/failure.t b/t/local/failure.t new file mode 100644 index 0000000..2ae48b7 --- /dev/null +++ b/t/local/failure.t @@ -0,0 +1,57 @@ +use warnings; +use strict; +use Test::More; + +use lib 't/local'; +use LocalServer; + + +BEGIN { + delete @ENV{ qw( IFS CDPATH ENV BASH_ENV ) }; +} + +my $NONEXISTENT = 'blahblahblah.xx-only-testing.foo.'; +my @results = gethostbyname( $NONEXISTENT ); +if ( @results ) { + my ($name,$aliases,$addrtype,$length,@addrs) = @results; + my $ip = join( '.', unpack('C4',$addrs[0]) ); + plan skip_all => "Your ISP is overly helpful and returns $ip for non-existent domain $NONEXISTENT. This test cannot be run."; +} +my $bad_url = "http://$NONEXISTENT/"; + +plan tests => 15; +require_ok( 'WWW::Mechanize' ); +my $server = LocalServer->spawn; +isa_ok( $server, 'LocalServer' ); + +my $mech = WWW::Mechanize->new( autocheck => 0 ); +isa_ok( $mech, 'WWW::Mechanize', 'Created object' ); + +GOOD_PAGE: { + my $response = $mech->get($server->url); + isa_ok( $response, 'HTTP::Response' ); + ok( $response->is_success, 'Success' ); + ok( $mech->success, 'Get webpage' ); + ok( $mech->is_html, 'It\'s HTML' ); + is( $mech->title, 'WWW::Mechanize test page', 'Correct title' ); + + my @links = $mech->links; + is( scalar @links, 10, '10 links, please' ); + my @forms = $mech->forms; + is( scalar @forms, 2, 'Two form' ); +} + +BAD_PAGE: { + my $bad_url = "http://$NONEXISTENT/"; + $mech->get( $bad_url ); + + ok( !$mech->success, 'Failed the fetch' ); + ok( !$mech->is_html, "Isn't HTML" ); + ok( !defined $mech->title, "No title" ); + + my @links = $mech->links; + is( scalar @links, 0, "No links" ); + + my @forms = $mech->forms; + is( scalar @forms, 0, "No forms" ); +} diff --git a/t/local/follow.t b/t/local/follow.t new file mode 100644 index 0000000..63f96f0 --- /dev/null +++ b/t/local/follow.t @@ -0,0 +1,62 @@ +use warnings; +use strict; +use Test::More tests => 28; +use lib 't/local'; +use LocalServer; + +BEGIN { + delete @ENV{ qw( IFS CDPATH ENV BASH_ENV ) }; + use_ok( 'WWW::Mechanize' ); +} + +my $server = LocalServer->spawn; +isa_ok( $server, 'LocalServer' ); + +my $agent = WWW::Mechanize->new( autocheck => 0 ); +isa_ok( $agent, 'WWW::Mechanize', 'Created object' ); +$agent->quiet(1); + +my $response; + +$agent->get( $server->url ); +ok( $agent->success, 'Got some page' ); +is( $agent->uri, $server->url, 'Got local server page' ); + +$response = $agent->follow_link( n => 99999 ); +ok( !$response, q{Can't follow too-high-numbered link}); + +$response = $agent->follow_link( n => 1 ); +isa_ok( $response, 'HTTP::Response', 'Gives a response' ); +isnt( $agent->uri, $server->url, 'Need to be on a separate page' ); + +ok($agent->back(), 'Can go back'); +is( $agent->uri, $server->url, 'Back at the first page' ); + +ok(! $agent->follow_link( text_regex => qr/asdfghjksdfghj/ ), "Can't follow unlikely named link"); + +ok($agent->follow_link( text => 'Link /foo' ), 'Can follow obvious named link'); +isnt( $agent->uri, $server->url, 'Need to be on a separate page' ); + +ok($agent->back(), 'Can still go back'); +ok($agent->follow_link( text_regex=>qr/L\x{f6}schen/ ), 'Can follow link with o-umlaut'); +isnt( $agent->uri, $server->url, 'Need to be on a separate page' ); + +ok($agent->back(), 'Can still go back'); +ok($agent->follow_link( text_regex=>qr/St\x{f6}sberg/ ), q{Can follow link with o-umlaut, when it's encoded in the HTML, but not in "follow"}); +isnt( $agent->uri, $server->url, 'Need to be on a separate page' ); + +ok($agent->back(), 'Can still go back'); +is( $agent->uri, $server->url, 'Back at the start page again' ); + +$response = $agent->follow_link( text_regex => qr/Snargle/ ); +ok( !$response, q{Couldn't find it} ); + +ok($agent->follow_link( url => '/foo' ), 'can follow url'); +isnt( $agent->uri, $server->url, 'Need to be on a separate page' ); +ok($agent->back(), 'Can still go back'); + +ok(!$agent->follow_link( url => '/notfoo' ), "can't follow wrong url"); +is( $agent->uri, $server->url, 'Needs to be on the same page' ); +eval {$agent->follow_link( '/foo' )}; +like($@, qr/Needs to get key-value pairs of parameters.*follow\.t/, "Invalid parameter passing gets better error message"); + diff --git a/t/local/form.t b/t/local/form.t new file mode 100644 index 0000000..05f14da --- /dev/null +++ b/t/local/form.t @@ -0,0 +1,58 @@ +use warnings; +use strict; +use Test::More tests => 21; + +use lib 't/local'; +use LocalServer; + +BEGIN { + delete @ENV{ qw( IFS CDPATH ENV BASH_ENV ) }; + use_ok( 'WWW::Mechanize' ); +} + +my $server = LocalServer->spawn; +isa_ok( $server, 'LocalServer' ); + +my @warnings; +my $mech = WWW::Mechanize->new( onwarn => sub { push @warnings, @_ } ); +isa_ok( $mech, 'WWW::Mechanize' ) or die; +$mech->quiet(1); +$mech->get($server->url); +ok( $mech->success, 'Got a page' ) or die; +is( $mech->uri, $server->url, 'Got page' ); + +my ($form, $number) = $mech->form_number(1); +isa_ok( $form, 'HTML::Form', 'Can select the first form in list context call'); +is( $number, 1, 'Form number is correct' ); + +my $form_number_1 = $mech->form_number(1); +isa_ok( $form_number_1, 'HTML::Form', 'Can select the first form'); +is( $mech->current_form(), $mech->{forms}->[0], 'Set the form attribute' ); + +ok( !$mech->form_number(99), 'cannot select the 99th form'); +is( $mech->current_form(), $mech->{forms}->[0], 'Form is still set to 1' ); + +my $form_name_f = $mech->form_name('f'); +isa_ok( $form_name_f, 'HTML::Form', 'Can select the form' ); +ok( !$mech->form_name('bargle-snark'), 'cannot select non-existent form' ); + +my $form_id_pounder = $mech->form_id('pounder'); +isa_ok( $form_id_pounder, 'HTML::Form', 'Can select the form' ); +ok( !$mech->form_id('bargle-snark'), 'cannot select non-existent form' ); + +my $form_with = $mech->form_with( class => 'test', id => undef ); +isa_ok( $form_with, 'HTML::Form', 'Can select the form without id' ); +is( $mech->current_form, $form_number_1, + 'Form without id is now the current form' ); + +is( scalar @warnings, 0, 'no warnings so far' ); +$mech->quiet(0); +$form_with = $mech->form_with( class => 'test', foo => '', bar => undef ); +is( $form_with, $form_number_1, 'Can select form with ambiguous criteria' ); +is( scalar @warnings, 1, 'Got one warning' ); +is( + "@warnings", + 'There are 2 forms with no bar and class "test"' + . ' and empty foo. The first one was used.', + 'Got expected warning' +); diff --git a/t/local/get.t b/t/local/get.t new file mode 100644 index 0000000..8e30f6d --- /dev/null +++ b/t/local/get.t @@ -0,0 +1,77 @@ +use warnings; +use strict; +use Test::More tests => 34; + +use lib qw( t t/local ); +use LocalServer; + +BEGIN { + use Tools; +} + +BEGIN { + delete @ENV{ qw( IFS CDPATH ENV BASH_ENV ) }; + use_ok( 'WWW::Mechanize' ); +} + +my $server = LocalServer->spawn; +isa_ok( $server, 'LocalServer' ); + +my $agent = WWW::Mechanize->new; +isa_ok( $agent, 'WWW::Mechanize', 'Created object' ); + +my $response = $agent->get($server->url); +isa_ok( $response, 'HTTP::Response' ); +isa_ok( $agent->response, 'HTTP::Response' ); +ok( $response->is_success, 'Page read OK' ); +ok( $agent->success, "Get webpage" ); +is( $agent->ct, "text/html", "Got the content-type..." ); +ok( $agent->is_html, "... and the is_html wrapper" ); +is( $agent->title, 'WWW::Mechanize test page', 'Titles match' ); + +$agent->get( '/foo/' ); +ok( $agent->success, 'Got the /foo' ); +is( $agent->uri, sprintf('%sfoo/',$server->url), 'Got relative OK' ); +ok( $agent->is_html,'Got HTML back' ); +is( $agent->title, 'WWW::Mechanize test page', 'Got the right page' ); + +$agent->get( '../bar/' ); +ok( $agent->success, 'Got the /bar page' ); +is( $agent->uri, sprintf('%sbar/',$server->url), 'Got relative OK' ); +ok( $agent->is_html, 'is HTML' ); +is( $agent->title, 'WWW::Mechanize test page', 'Got the right page' ); + +$agent->get( 'basics.html' ); +ok( $agent->success, 'Got the basics page' ); +is( $agent->uri, sprintf('%sbar/basics.html',$server->url), 'Got relative OK' ); +ok( $agent->is_html, 'is HTML' ); +is( $agent->title, 'WWW::Mechanize test page', 'Title matches' ); +like( $agent->content, qr/WWW::Mechanize test page/, 'Got the right page' ); + +$agent->get( './refinesearch.html' ); +ok( $agent->success, 'Got the "refine search" page' ); +is( $agent->uri, sprintf('%sbar/refinesearch.html',$server->url), 'Got relative OK' ); +ok( $agent->is_html, 'is HTML' ); +is( $agent->title, 'WWW::Mechanize test page', 'Title matches' ); +like( $agent->content, qr/WWW::Mechanize test page/, 'Got the right page' ); +my $rslength = do {use bytes; length $agent->content}; + +my $tempfile = './temp'; +unlink $tempfile; +ok( !-e $tempfile, 'tempfile not there right now' ); +$agent->get( './refinesearch.html', ':content_file'=>$tempfile ); +ok( -e $tempfile, 'File exists' ); +is( -s $tempfile, $rslength, 'Did all the bytes get saved?' ); +unlink $tempfile; + +SKIP: { + skip 'Test::Memory::Cycle not installed', 1 unless $canTMC; + + memory_cycle_ok( $agent, 'Mech: no cycles' ); +} + +$agent->get('/foo/'); +ok( ! $agent->redirects, 'redirects is false before we have a redirect'); +$agent->get($server->redirect('/foo/')); +is( scalar $agent->redirects, 1, 'redirects picks up a redirect'); + diff --git a/t/local/head.t b/t/local/head.t new file mode 100644 index 0000000..ce957e3 --- /dev/null +++ b/t/local/head.t @@ -0,0 +1,28 @@ +use warnings; +use strict; +use Test::More; + +use lib qw( t t/local ); +use LocalServer; + +BEGIN { + delete @ENV{ qw( IFS CDPATH ENV BASH_ENV ) }; + use_ok( 'WWW::Mechanize' ); +} + +my $server = LocalServer->spawn; +isa_ok( $server, 'LocalServer' ); + +my $agent = WWW::Mechanize->new; +isa_ok( $agent, 'WWW::Mechanize', 'Created object' ); + +ok !$agent->base, 'Starting out with no ->base'; +my $response = $agent->get($server->url); +isa_ok( $response, 'HTTP::Response' ); +ok $agent->base, '... and now there is a ->base'; + +$agent->head( '/foo.html' ); +ok !$agent->content, 'HEADing returns no content'; +is my $filename = $agent->response->filename, 'foo.html', '... but the filename is set'; + +done_testing;
\ No newline at end of file diff --git a/t/local/log-server b/t/local/log-server new file mode 100644 index 0000000..2bf6225 --- /dev/null +++ b/t/local/log-server @@ -0,0 +1,189 @@ +# Thanks to merlyn for nudging me and giving me this snippet! +use strict; +use HTTP::Daemon (); +use CGI 4.08; +use Getopt::Long; + +$|++; + +GetOptions( + 'e=s' => \my $expression, +); + +my $d = HTTP::Daemon->new or die; + +print $d->url, "\n"; + +my ($filename,$logfile) = @ARGV[0,1]; +if ($filename) { + open DATA, "< $filename" + or die "Couldn't read page '$filename' : $!\n"; +}; +#open LOG, ">", $logfile +# or die "Couldn't create logfile '$logfile' : $!\n"; +my $log; +my $body = join "", <DATA>; +$body =~ s/<!x([0-9a-fA-F]+)>/chr(hex($1))/eg; +utf8::encode($body); +utf8::upgrade($body); + +sub debug($) { + my $message = $_[0]; + $message =~ s!\n!\n#SERVER:!g; + warn "#SERVER: $message" + if $ENV{TEST_HTTP_VERBOSE}; +}; + +SERVERLOOP: { + my $quitserver; + while (my $c = $d->accept) { + debug "New connection"; + while (my $r = $c->get_request) { + debug "Request:\n" . $r->as_string; + my $location = ($r->uri->path || "/"); + my ($link1,$link2) = ('',''); + if ($location =~ m!^/link/([^/]+)/(.*)$!) { + ($link1,$link2) = ($1,$2); + }; + my $res; + if ($location eq '/get_server_log') { + $res = HTTP::Response->new(200, "OK", undef, $log); + $log = ''; + } elsif ( $location eq '/quit_server') { + debug "Quitting"; + $res = HTTP::Response->new(200, "OK", [Connection => 'close'], "quit"); + $quitserver = 1; + } else { + eval $expression + if $expression; + warn "eval: $@" if $@; + $log .= "Request:\n" . $r->as_string . "\n"; + if ($location =~ m!^/redirect/(.*)$!) { + $res = HTTP::Response->new(302); + $res->header('location', $d->url . $1); + } elsif ($location =~ m!^/error/notfound/(.*)$!) { + $res = HTTP::Response->new(404, "Not found", [Connection => 'close']); + } elsif ($location =~ m!^/error/timeout/(\d+)$!) { + sleep $1; + $res = HTTP::Response->new(599, "Timeout reached", [Connection => 'close']); + } elsif ($location =~ m!^/error/close/(\d+)$!) { + sleep $1; + $res = undef; + } elsif ( $location =~ m!^/chunks!) { + my $count = 5; + $res = HTTP::Response->new(200, "OK", undef, sub { + sleep 1; + my $buf = 'x' x 16; + return $buf if $count-- > 0; + return undef; # done + }); + } elsif ($location =~ m!^/error/after_headers$!) { + my $count = 2; + $res = HTTP::Response->new(200, "OK", undef, sub { + sleep 1; + my $buf = 'x' x 16; + return $buf if $count-- > 0; + die "Planned error after headers"; + }); + } elsif ($location =~ m!^/encoding/(.*)!) { + my $encoding = $1; + $res = HTTP::Response->new( + 200, "OK", + [ 'Content-Type' => "text/html; charset=$encoding" ], + "encoding $encoding" + ); + } else { + my $q = CGI->new($r->uri->query); + + # Make sticky form fields + my ($query,$session,%cat); + $query = defined $q->param('query') ? $q->param('query') : "(empty)"; + $session = defined $q->param('session') ? $q->param('session') : 1; + %cat = map { $_ => 1 } (defined $q->param('cat') ? $q->multi_param('cat') : qw( cat_foo cat_bar )); + my @categories = map { $cat{$_} ? "checked" : "" } qw( cat_foo cat_bar cat_baz ); + (my $h = $r->headers->{host}) =~ s/:\d+//; + my $rbody = sprintf $body,$location,$session,$query,@categories; + $res = HTTP::Response->new(200, "OK", [ + "Set-Cookie" => $q->cookie(-name => 'log-server',-value=>'shazam2', -discard=>1,), + 'Cache-Control' => 'no-cache', + 'Pragma' => 'no-cache', + 'Max-Age' => 0, + 'Connection' => 'close', + 'Content-Length' => length($rbody), + ], $rbody); + + $res->content_type( + $q->param('xml') ? 'application/xhtml+xml' : 'text/html' + ); + + debug "Request " . ($r->uri->path || "/"); + }; + }; + debug "Response:\n" . $res->as_string + if $res; + eval { + $c->send_response($res) + if $res; + }; + if (my $err = $@) { + debug "Server raised error: $err"; + if ($err !~ /^Planned error\b/) { + warn $err; + }; + $c->close; + }; + if (! $res) { + $c->close; + }; + last if $quitserver; + } + sleep 1; + undef($c); + last SERVERLOOP + if $quitserver; + } +}; +END { debug "Server stopped" }; + +__DATA__ +<html> +<head> +<title>WWW::Mechanize test page</title> +</head> +<body> +<h1>Location: %s</h1> +<p> + <a href="/test">Link /test</a> + <a href="/foo">Link /foo</a> + <a href="/slash_end">Link /</a> + <a href="/slash_front">/Link </a> + <a href="/slash_both">/Link in slashes/</a> + <a href="/foo1.save_log_server_test.tmp">Link foo1.save_log_server_test.tmp</a> + <a href="/foo2.save_log_server_test.tmp">Link foo2.save_log_server_test.tmp</a> + <a href="/foo3.save_log_server_test.tmp">Link foo3.save_log_server_test.tmp</a> + <a href="/o-umlaut">L<!xf6>schen -- testing for o-umlaut.</a> + <a href="/o-umlaut-encoded">Stösberg -- testing for encoded o-umlaut.</a> + + <table> + <tr><th>Col1</th><th>Col2</th><th>Col3</th></tr> + <tr><td>A1</td><td>A2</td><td>A3</td></tr> + <tr><td>B1</td><td>B2</td><td>B3</td></tr> + <tr><td>C1</td><td>C2</td><td>C3</td></tr> + </table> + <form name="f" action="/formsubmit" class="test" foo=""> + <input type="hidden" name="session" value="%s"/> + <input type="text" name="query" value="%s"/> + <input type="submit" name="submit" value="Go" id="0" /> + <input type="checkbox" name="cat" value="cat_foo" %s /> + <input type="checkbox" name="cat" value="cat_bar" %s /> + <input type="checkbox" name="cat" value="cat_baz" %s /> + <input type="file" name="upload" value="README" /> + <button type="submit" name="button_tag" value="Walk" /> + <input type="image" name="image_input" value="image" /> + </form> + <form id="pounder" action="/formsubmit" class="test" foo=""> + <input type="text" name="query" value="%s"/> + </form> +</p> +</body> +</html> diff --git a/t/local/nonascii.html b/t/local/nonascii.html new file mode 100644 index 0000000..16255ec --- /dev/null +++ b/t/local/nonascii.html @@ -0,0 +1,14 @@ +<!DOCTYPE html + PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" + "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> + +<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"><head> + +<title>Query Builder</title> +</head> + <body id="comp-Search-Build"> +<form method="post" action="Build.html" name="BuildQuery"> +<input name="ValueOf'CF.{т}'" size="20" /> +</form> + </body> +</html> diff --git a/t/local/nonascii.t b/t/local/nonascii.t new file mode 100644 index 0000000..d9e74db --- /dev/null +++ b/t/local/nonascii.t @@ -0,0 +1,24 @@ +use warnings; +use strict; +use Test::More tests => 5; +use lib 't/local'; +use LocalServer; + +BEGIN { + delete @ENV{ qw( IFS CDPATH ENV BASH_ENV ) }; + use_ok( 'WWW::Mechanize' ); +} + +my $server = LocalServer->spawn( file => 't/local/nonascii.html' ); +isa_ok( $server, 'LocalServer' ); + +my $agent = WWW::Mechanize->new; +isa_ok( $agent, 'WWW::Mechanize', 'Created object' ); +$agent->quiet(0); + +$agent->get( $server->url ); +ok( $agent->success, 'Got some page' ); + +# \321\202 is \x{442} +$agent->field("ValueOf'CF.{\321\202}'", "\321\201"); +is($agent->value("ValueOf'CF.{\321\202}'"), "\321\201", 'set utf value'); diff --git a/t/local/overload.t b/t/local/overload.t new file mode 100644 index 0000000..0b42d23 --- /dev/null +++ b/t/local/overload.t @@ -0,0 +1,88 @@ +use Test::More skip_all => "Mysteriously stopped passing, and I don't know why."; +use warnings; +use strict; +use lib 't/local'; +use LocalServer; +use Test::More tests => 11; + +=head1 NAME + +overload.t + +=head1 SYNOPSIS + +This tests for various ways, advertised in L<WWW::Mechanize>, to +create a subclass of the mech to alter it's behavior in a useful +manner. (Of course free-style overloading is discouraged, as it breaks +encapsulation big time.) + +This test first feeds some bad HTML to Mech to make sure that it throws +an error. Then, it overloads update_html() to fix the HTML before +processing it, and then we should not have an error. + +=head2 Overloading update_html() + +This is the recommended way to tidy up the received HTML in a generic +way, and/or to install supplemental "surface tests" on the HTML +(e.g. link checker). + +=cut + +BEGIN { + delete @ENV{ qw( IFS CDPATH ENV BASH_ENV ) }; + use_ok( 'WWW::Mechanize' ); +} + +my $server = LocalServer->spawn(html => <<'BROKEN_HTML'); +<html> +<head><title>Broken document</head> +<form> +<table> +<tr><select name="foo"> +<option value="bar">Bar</option></td></tr> +</form> +</html> +BROKEN_HTML +isa_ok( $server, 'LocalServer' ); + +do { + package MyMech; + use base 'WWW::Mechanize'; + + sub update_html { + my $self = shift; + my $html = shift; + + $html =~ s[Broken][Fixed]isg or die "Couldn't fix the HTML for the test (#1)"; + $html =~ s[</option>.{0,3}</td>][</option></select></td>]isg or die "Couldn't fix the HTML for the test (#2)"; + + $self->WWW::Mechanize::update_html( $html ); + } +}; + +my $carpmsg; +local $^W = 1; +no warnings 'redefine'; +local *Carp::carp = sub {$carpmsg = shift}; + +my $mech = WWW::Mechanize->new(); +isa_ok( $mech, 'WWW::Mechanize' ); + +$mech->get( $server->url ); +like($carpmsg, qr{bad.*select}i, 'Standard mech chokes on bogus HTML'); + +# If at first you don't succeed, try with a shorter bungee... +undef $carpmsg; +$mech = MyMech->new(); +isa_ok( $mech, 'WWW::Mechanize', 'Derived object' ); + +my $response = $mech->get( $server->url ); +isa_ok( $response, 'HTTP::Response', 'Response I got back' ); +ok( $response->is_success, 'Got URL' ) or die 'Can\'t even fetch local url'; +ok( $mech->is_html, 'Local page is HTML' ); +ok( !$carpmsg, 'No warnings this time' ); + +my @forms = $mech->forms; +is( scalar @forms, 1, 'One form' ); + +like($mech->content(), qr{/select}, 'alteration visible in ->content() too'); diff --git a/t/local/page_stack.t b/t/local/page_stack.t new file mode 100644 index 0000000..b97cff5 --- /dev/null +++ b/t/local/page_stack.t @@ -0,0 +1,62 @@ +use warnings; +use strict; +use Test::More; + +use lib 't/local'; +use LocalServer; + +BEGIN { + delete @ENV{ qw( IFS CDPATH ENV BASH_ENV ) }; + use_ok( 'WWW::Mechanize' ); +} + + +my $server = LocalServer->spawn; +isa_ok( $server, 'LocalServer' ); + +STANDARD_STACK: { + my $history; + my $mech = WWW::Mechanize->new; + isa_ok( $mech, 'WWW::Mechanize', 'Created object' ); + + is( scalar @{$mech->{page_stack}}, 0, 'Page stack starts empty' ); + is( $mech->history_count, 0, 'No history count to start' ); + is( $mech->history(0), undef, 'No 0th history item yet' ); + + ok( $mech->get($server->url)->is_success, 'Got start page' ); + is( scalar @{$mech->{page_stack}}, 0, 'Page stack empty after first get' ); + $history = $mech->history(0); + is( $history->{req}->url, $server->url, "0th history is last request"); + is( $mech->history(1), undef, 'No 1th history item yet' ); + + is( $mech->history_count, 1, 'One history count after first get' ); + $mech->_push_page_stack(); + is( scalar @{$mech->{page_stack}}, 1, 'Pushed item onto page stack' ); + is( $mech->history_count, 2, 'Two history count after push' ); + $mech->_push_page_stack(); + is( scalar @{$mech->{page_stack}}, 2, 'Pushed item onto page stack' ); + is( $mech->history_count, 3, 'Three history count after push' ); + $mech->back(); + is( scalar @{$mech->{page_stack}}, 1, 'Popped item from page stack' ); + is( $mech->history_count, 2, 'History count back to 2 post pop' ); + $mech->back(); + is( scalar @{$mech->{page_stack}}, 0, 'Popped item from page stack' ); + is( $mech->history_count, 1, 'History count back to 1 post pop' ); + $mech->back(); + is( scalar @{$mech->{page_stack}}, 0, 'Cannot pop beyond end of page stack' ); + is( $mech->history_count, 1, 'History count stable at 1' ); +} + +NO_STACK: { + my $mech = WWW::Mechanize->new; + isa_ok( $mech, 'WWW::Mechanize', 'Created object' ); + $mech->stack_depth(0); + + is( scalar @{$mech->{page_stack}}, 0, 'Page stack starts empty' ); + ok( $mech->get($server->url)->is_success, 'Got start page' ); + is( scalar @{$mech->{page_stack}}, 0, 'Page stack starts empty' ); + $mech->_push_page_stack(); + is( scalar @{$mech->{page_stack}}, 0, 'Pushing has no effect' ); +} + +done_testing; diff --git a/t/local/post.t b/t/local/post.t new file mode 100644 index 0000000..0075283 --- /dev/null +++ b/t/local/post.t @@ -0,0 +1,29 @@ +use warnings; +use strict; +use Test::More tests => 5; + +use lib qw( t t/local ); +use LocalServer; + +BEGIN { + use Tools; +} + +BEGIN { + delete @ENV{ qw( IFS CDPATH ENV BASH_ENV ) }; + use_ok( 'WWW::Mechanize' ); +} + +my $server = LocalServer->spawn; +isa_ok( $server, 'LocalServer' ); + +my $agent = WWW::Mechanize->new; +isa_ok( $agent, 'WWW::Mechanize', 'Created object' ); + +# GET with full URL to set the base +$agent->get($server->url); +ok( $agent->success, "Get webpage" ); + +# POST with relative URL +$agent->post('/post'); +ok( $agent->success, "Post webpage" ); diff --git a/t/local/referer-server b/t/local/referer-server new file mode 100644 index 0000000..9ecd393 --- /dev/null +++ b/t/local/referer-server @@ -0,0 +1,18 @@ +# Thanks to merlyn for nudging me and giving me this snippet! + +use HTTP::Daemon (); + +$|++; + +my $d = HTTP::Daemon->new or die; +print $d->url, "\n"; + +$counter = 5; +while ($counter-- and my $c = $d->accept) { + while (my $r = $c->get_request) { + my $ref = $r->headers->referer || ""; + $c->send_response(HTTP::Response->new(200, "OK", undef, "Referer: '$ref'")); + } + $c->close; + undef($c); +} diff --git a/t/local/referer.t b/t/local/referer.t new file mode 100644 index 0000000..9ea193b --- /dev/null +++ b/t/local/referer.t @@ -0,0 +1,68 @@ +use warnings; +use strict; +use FindBin; + +use Test::More tests => 13; + +BEGIN { + use lib 't'; + use Tools; +} + +BEGIN { + delete @ENV{ qw( IFS CDPATH ENV BASH_ENV ) }; + use_ok( 'WWW::Mechanize' ); +} + +our $server; +my $agent = WWW::Mechanize->new(); +isa_ok( $agent, 'WWW::Mechanize' ); + +SKIP: { + # We want to be safe from non-resolving local host names + delete $ENV{HTTP_PROXY}; + + # Now start a fake webserver, fork, and connect to ourselves + my $command = qq'"$^X" "$FindBin::Bin/referer-server"'; + if ($^O eq 'VMS') { + $command = qq'mcr $^X t/referer-server'; + } + + open $server, "$command |" or die "Couldn't spawn fake server: $!"; + sleep 1; # give the child some time + my $url = <$server>; + chomp $url; + + $agent->get( $url ); + is($agent->status, 200, 'Got first page') or diag $agent->res->message; + is($agent->content, q{Referer: ''}, 'First page gets sent with empty referrer'); + + $agent->get( $url ); + is($agent->status, 200, 'Got second page') or diag $agent->res->message; + is($agent->content, "Referer: '$url'", 'Referer got sent for absolute url'); + + $agent->get( '.' ); + is($agent->status, 200, 'Got third page') or diag $agent->res->message; + is($agent->content, "Referer: '$url'", 'Referer got sent for relative url'); + + $agent->add_header( Referer => 'x' ); + $agent->get( $url ); + is($agent->status, 200, 'Got fourth page') or diag $agent->res->message; + is($agent->content, q{Referer: 'x'}, 'Referer can be set to empty again'); + + my $ref = 'This is not the referer you are looking for *jedi gesture*'; + $agent->add_header( Referer => $ref ); + $agent->get( $url ); + is($agent->status, 200, 'Got fourth page') or diag $agent->res->message; + is($agent->content, "Referer: '$ref'", 'Custom referer can be set'); +}; + +SKIP: { + skip 'Test::Memory::Cycle not installed', 1 unless $canTMC; + + memory_cycle_ok( $agent, 'No memory cycles found' ); +} + +END { + close $server if $server; +} diff --git a/t/local/reload.t b/t/local/reload.t new file mode 100644 index 0000000..0a598b9 --- /dev/null +++ b/t/local/reload.t @@ -0,0 +1,58 @@ +use warnings; +use strict; +use Test::More tests => 15; + +use lib qw( t t/local ); +use LocalServer; + +BEGIN { + use Tools; +} + +BEGIN { + delete @ENV{ qw( IFS CDPATH ENV BASH_ENV ) }; + use_ok( 'WWW::Mechanize' ); +} + +my $server = LocalServer->spawn; +isa_ok( $server, 'LocalServer' ); + +my $agent = WWW::Mechanize->new; +isa_ok( $agent, 'WWW::Mechanize', 'Created object' ); + +NO_GET: { + my $r = $agent->reload; + ok( !defined($r), 'Initial reload should fail' ); +} + +FIRST_GET: { + my $r = $agent->get($server->url); + isa_ok( $r, 'HTTP::Response' ); + ok( $r->is_success, 'Get google webpage'); + ok( $agent->is_html, 'Valid HTML' ); + is( $agent->title, 'WWW::Mechanize test page' ); +} + +INVALIDATE: { + undef $agent->{content}; + undef $agent->{ct}; + isnt( $agent->title, 'WWW::Mechanize test page' ); + ok( !$agent->is_html, 'Not HTML' ); +} + +RELOAD: { + my $r = $agent->reload; + isa_ok( $r, 'HTTP::Response' ); + ok( $agent->is_html, 'Valid HTML' ); + ok( $agent->title, 'WWW::Mechanize test page' ); + my $cookie_before = $agent->history(0)->{req}->header('Cookie'); + $agent->reload; + my $cookie_after = $agent->history(0)->{req}->header('Cookie'); + is( $cookie_after, $cookie_before, 'cookies are not multiplied' ); +} + +SKIP: { + skip 'Test::Memory::Cycle not installed', 1 unless $canTMC; + + memory_cycle_ok( $agent, 'Mech: no cycles' ); +} diff --git a/t/local/select_multiple.t b/t/local/select_multiple.t new file mode 100644 index 0000000..973bb9b --- /dev/null +++ b/t/local/select_multiple.t @@ -0,0 +1,114 @@ +use warnings; +use strict; +use Test::More; + +use lib 't/local'; +use LocalServer; + +BEGIN { + use_ok( 'WWW::Mechanize' ); + delete @ENV{ qw( http_proxy HTTP_PROXY ) }; + delete @ENV{ qw( IFS CDPATH ENV BASH_ENV ) }; + +} + +my $mech = WWW::Mechanize->new(cookie_jar => {}); +isa_ok( $mech, "WWW::Mechanize" ); +ok(defined($mech->cookie_jar()), + 'this $mech starts with a cookie jar'); + +my $html = <<'HTML'; +<html> +<head><title>%s</title></head> +<body>Whatever. + <form action="foo.thing"> + <select name="chanId" MULTIPLE> + <option value="130" selected>Anime Network</option> + <option value="119" >COM 250</option> + </select> + </form> +</body> +</html> +HTML + +my $server = LocalServer->spawn( html => $html ); +isa_ok( $server, "LocalServer" ); + +$mech->get($server->url); +ok( $mech->success, 'Fetched OK' ); + +eval { + $mech->submit_form( + form_number => 1, + fields => { + chanId => 119, + } + ); +}; +is( $@, '', 'submit_form, second value' ); +like( $mech->uri, qr/chanId=119/, '... and the second value was set'); + +eval { + $mech->form_number(1); + $mech->set_fields( + chanId => 119, + ); +}; +is( $@, '', 'set_fields, second value' ); +like( $mech->uri, qr/chanId=119/, '... and the second value was set'); + + +eval { + $mech->submit_form( + form_number => 1, + fields => { + chanId => [119], + } + ); +}; +is( $@, '', 'submit_form, second value as array' ); +like( $mech->uri, qr/chanId=119/, '... and the second value was set'); + + +eval { + $mech->form_number(1); + $mech->field( + chanId => 119, + ); + $mech->submit; +}; +is( $@, '', 'field, second value' ); +like( $mech->uri, qr/chanId=119/, '... and the second value was set'); + + +eval { + $mech->form_number(1); + $mech->field( + chanId => [119], + ); + $mech->submit; +}; +is( $@, '', 'field, second value as array' ); +like( $mech->uri, qr/chanId=119/, '... and the second value was set'); + + +eval { + $mech->submit_form( + form_number => 1, + fields => { + chanId => 130, + } + ); +}; +is( $@, '', 'submit_form, first value' ); +like( $mech->uri, qr/chanId=130/, '... and the first value was set'); + + +SKIP: { + eval "use Test::Memory::Cycle"; + skip "Test::Memory::Cycle not installed", 1 if $@; + + memory_cycle_ok( $mech, "No memory cycles found" ); +} + +done_testing;
\ No newline at end of file diff --git a/t/local/submit.t b/t/local/submit.t new file mode 100644 index 0000000..f884ca8 --- /dev/null +++ b/t/local/submit.t @@ -0,0 +1,44 @@ +use warnings; +use strict; +use lib qw( t t/local ); +use Test::More tests => 13; +use LocalServer; + +BEGIN { + use Tools; +} + +BEGIN { + delete @ENV{ qw( IFS CDPATH ENV BASH_ENV ) }; + use_ok( 'WWW::Mechanize' ); +} + +my $server = LocalServer->spawn; +isa_ok( $server, 'LocalServer' ); + +my $mech = WWW::Mechanize->new(); +isa_ok( $mech, 'WWW::Mechanize', 'Created the object' ) or die; + +my $response = $mech->get( $server->url ); +isa_ok( $response, 'HTTP::Response', 'Got back a response' ) or die; +is( $mech->uri, $server->url, 'Got the correct page' ); +ok( $response->is_success, 'Got local page' ) or die 'cannot even fetch local page'; +ok( $mech->is_html, 'is HTML' ); + +is( $mech->value('upload'), '', 'Hopefully no upload happens'); + +$mech->field(query => 'foo'); # Filled the 'q' field + +$response = $mech->submit; +isa_ok( $response, 'HTTP::Response', 'Got back a response' ); +ok( $response->is_success, 'Can click "submit" ("submit" button)'); + +like($mech->content, qr/\bfoo\b/i, 'Found "Foo"'); + +is( $mech->value('upload'), '', 'No upload happens' ); + +SKIP: { + skip 'Test::Memory::Cycle not installed', 1 unless $canTMC; + + memory_cycle_ok( $mech, 'Mech: no cycles' ); +} diff --git a/t/mech-dump/mech-dump.t b/t/mech-dump/mech-dump.t new file mode 100644 index 0000000..bff4710 --- /dev/null +++ b/t/mech-dump/mech-dump.t @@ -0,0 +1,103 @@ +#!perl -T + +use warnings; +use strict; + +use Test::More; +use File::Spec; +use LWP; + +BEGIN { + delete @ENV{ qw( IFS CDPATH ENV BASH_ENV PATH ) }; +} + +plan skip_all => 'Not installing mech-dump' if -e File::Spec->catfile( qw( t SKIP-MECH-DUMP ) ); +plan tests => 4; + +my $exe = File::Spec->catfile( qw( script mech-dump ) ); +if ( $^O eq 'VMS' ) { + $exe = qq[mcr $^X -Ilib $exe]; +} + +# Simply use a file: uri instead of the filename to make this test +# more independent of what URI::* thinks. +my $source = 'file:t/google.html t/find_inputs.html'; + +my $perl; +$perl = $1 if $^X =~ /^(.+)$/; +my $command = "$perl -Ilib $exe --forms $source"; + +my $actual = `$command`; + +my $expected; +if ( $LWP::VERSION < 5.800 ) { + $expected = <<'EOF'; +GET file:/target-page [bob-the-form] + hl=en (hidden) + ie=ISO-8859-1 (hidden) + notgoogle= (hidden readonly) + q= + btnG=Google Search (submit) + btnI=I'm Feeling Lucky (submit) + +POST http://localhost/ (multipart/form-data) [1st_form] + 1a= (text) + submit1=Submit (image) + submit2=Submit (submit) + +POST http://localhost/ [2nd_form] + YourMom= (text) + opt[2]= (text) + 1b= (text) + submit=Submit (submit) + +POST http://localhost/ [3rd_form] + YourMom= (text) + YourDad= (text) + YourSister= (text) + YourSister= (text) + submit=Submit (submit) +EOF +} else { + $expected = <<'EOF'; +GET file:/target-page [bob-the-form] + hl=en (hidden readonly) + ie=ISO-8859-1 (hidden readonly) + notgoogle= (hidden readonly) + q= (text) + btnG=Google Search (submit) + btnI=I'm Feeling Lucky (submit) + +POST http://localhost/ (multipart/form-data) [1st_form] + 1a= (text) + submit1=Submit (image) + submit2=Submit (submit) + +POST http://localhost/ [2nd_form] + YourMom= (text) + opt[2]= (text) + 1b= (text) + submit=Submit (submit) + +POST http://localhost/ [3rd_form] + YourMom= (text) + YourDad= (text) + YourSister= (text) + YourSister= (text) + submit=Submit (submit) +EOF +} + +my @actual = split /\s*\n/, $actual; +my @expected = split /\s*\n/, $expected; + +# First line is platform-dependent, so handle it accordingly. +shift @expected; +my $first = shift @actual; +like( $first, qr/^GET file:.*\/target-page \[bob-the-form\]/, 'First line matches' ); + +cmp_ok( @expected, '>', 0, 'Still some expected' ); +cmp_ok( @actual, '>', 0, 'Still some actual' ); + +is_deeply( \@actual, \@expected, 'Rest of the lines match' ); + @@ -0,0 +1,47 @@ +#!perl -T + +use warnings; +use strict; +use Test::More tests => 15; + +BEGIN { + use_ok( 'WWW::Mechanize' ); +} + +RES_ON_NEW: { + my $m = WWW::Mechanize->new; + isa_ok( $m, 'WWW::Mechanize' ); + + ok( !$m->success, 'success() is false before any get' ); + + my $res = $m->res; + ok( !defined $res, 'res() is undef' ); +} + + +NO_AGENT: { + my $m = WWW::Mechanize->new; + isa_ok( $m, 'WWW::Mechanize' ); + can_ok( $m, 'request' ); + like( $m->agent, qr/WWW-Mechanize/, 'Set user agent string' ); + like( $m->agent, qr/$WWW::Mechanize::VERSION/, 'Set user agent version' ); + + $m->agent( 'foo/bar v1.23' ); + is( $m->agent, 'foo/bar v1.23', 'Can set the agent' ); + + like( $m->_agent, qr/WWW-Mechanize/, '_agent() is static' ); +} + +USER_AGENT: { + my $alias = 'Windows IE 6'; + my $m = WWW::Mechanize->new( agent => $alias ); + isa_ok( $m, 'WWW::Mechanize' ); + can_ok( $m, 'request' ); + is( $m->agent, $alias, q{Aliases don't get translated in the constructor} ); + + $m->agent_alias( $alias ); + like( $m->agent, qr/^Mozilla.+compatible.+Windows/, 'Alias sets the agent' ); + + $m->agent( 'ratso/bongo v.43' ); + is( $m->agent, 'ratso/bongo v.43', 'Can still set the agent' ); +} diff --git a/t/refresh.html b/t/refresh.html new file mode 100644 index 0000000..cae0839 --- /dev/null +++ b/t/refresh.html @@ -0,0 +1,3 @@ +<body> + <meta content="0; url='http://www.mysite.com/'" http-equiv="refresh"> +</body> diff --git a/t/regex-error.t b/t/regex-error.t new file mode 100644 index 0000000..0c3d2ee --- /dev/null +++ b/t/regex-error.t @@ -0,0 +1,21 @@ +#!perl -T + +use warnings; +use strict; + +use Test::More; +use Test::Warn qw( warning_like); +use WWW::Mechanize (); + +my $m = WWW::Mechanize->new; +isa_ok( $m, 'WWW::Mechanize' ); + +warning_like { + $m->find_link( link_regex => 'foo' ); +} qr[Unknown link-finding parameter "link_regex".+line \d+], 'Passes message, and includes the line number'; + +warning_like { + $m->find_link( url_regex => 'foo' ); +} qr[foo passed as url_regex is not a regex.+line \d+], 'Passes message, and includes the line number'; + +done_testing(); diff --git a/t/save_content.html b/t/save_content.html new file mode 100644 index 0000000..03091aa --- /dev/null +++ b/t/save_content.html @@ -0,0 +1,8 @@ +<html> +<head> + <META http-equiv="Content-Type" content="text/html; charset=UTF-8"> +</head> +<body> + Però poi si vedrà!!! +</body> +</html> diff --git a/t/save_content.t b/t/save_content.t new file mode 100644 index 0000000..737c73b --- /dev/null +++ b/t/save_content.t @@ -0,0 +1,58 @@ +#!perl -T + +use warnings; +use strict; + +use Test::More tests => 8; +use URI::file; + +BEGIN { + delete @ENV{qw(PATH IFS CDPATH ENV BASH_ENV)}; # Placates taint-unsafe Cwd.pm in 5.6.1 + use_ok( 'WWW::Mechanize' ); +} + +my $mech = WWW::Mechanize->new( cookie_jar => undef ); +isa_ok( $mech, 'WWW::Mechanize' ); + +my $original = 't/find_inputs.html'; +my $saved = 'saved1.test.txt'; + +my $uri = URI::file->new_abs( $original )->as_string; + +$mech->get( $uri ); +ok( $mech->success, "Fetched $uri" ) or die q{Can't get test page}; + +#unlink $saved; +ok( !-e $saved, "$saved does not exist" ); +$mech->save_content( $saved ); + +my $old_text = slurp( $original ); +my $new_text = slurp( $saved ); + +ok( $old_text eq $new_text, 'Saved copy matches the original' ) && unlink $saved; + +{ + my $original = 't/save_content.html'; + my $saved = 'saved2.test.txt'; + + my $uri = URI::file->new_abs( $original )->as_string; + + $mech->get( $uri ); + ok( $mech->success, "Fetched $uri" ) or die q{Can't get test page}; + + #unlink $saved; + ok( !-e $saved, "$saved does not exist" ); + $mech->save_content( $saved, binary => 1 ); + + my $old_text = slurp( $original ); + my $new_text = slurp( $saved ); + + ok( $old_text eq $new_text, 'Saved copy matches the original' ) && unlink $saved; +} + +sub slurp { + my $name = shift; + + open( my $fh, '<', $name ) or die "Can't open $name: $!\n"; + return join '', <$fh>; +} diff --git a/t/select.html b/t/select.html new file mode 100644 index 0000000..8f15629 --- /dev/null +++ b/t/select.html @@ -0,0 +1,22 @@ +<HTML> +<HEAD> + Like a hole +</HEAD> +<BODY BGCOLOR="puce"> +<FORM ACTION="/shake-some/"> +<select name="multilist" rows=4 multiple> +<option value="aaa">aaa</a> +<option value="bbb">bbb</a> +<option value="ccc">ccc</a> +<option value="ddd">ddd</a> +</select> + +<select name="singlelist" rows=4> +<option value="aaa">aaa</a> +<option value="bbb">bbb</a> +<option value="ccc">ccc</a> +<option value="ddd">ddd</a> +</select> +</FORM> +</BODY> +</HTML> diff --git a/t/select.t b/t/select.t new file mode 100644 index 0000000..55ceb4a --- /dev/null +++ b/t/select.t @@ -0,0 +1,82 @@ +#!perl -T + +use warnings; +use strict; +use Test::More tests => 14; +use URI::file; + +BEGIN { + delete @ENV{qw(PATH IFS CDPATH ENV BASH_ENV)}; # Placates taint-unsafe Cwd.pm in 5.6.1 + use_ok( 'WWW::Mechanize' ); +} + +my $mech = WWW::Mechanize->new( cookie_jar => undef ); +isa_ok( $mech, 'WWW::Mechanize' ); + +my $uri = URI::file->new_abs( 't/select.html' )->as_string; +my $response = $mech->get( $uri ); +ok( $response->is_success, "Fetched $uri" ); + +my ($sendsingle, @sendmulti, %sendsingle, %sendmulti, + $rv, $return, @return, @singlereturn, $form); +# possible values are: aaa, bbb, ccc, ddd +$sendsingle = 'aaa'; +@sendmulti = qw(bbb ccc); +@singlereturn = ($sendmulti[0]); +%sendsingle = (n => 1); +%sendmulti = (n => [2, 3]); + +ok($mech->form_number(1), 'set form to number 1'); +$form = $mech->current_form(); + + +# Multi-select + +# pass multiple values to a multi select +$mech->select('multilist', \@sendmulti); +@return = $form->param('multilist'); +is_deeply(\@return, \@sendmulti, 'multi->multi value is ' . join(' ', @return)); + +$mech->select('multilist', \%sendmulti); +@return = $form->param('multilist'); +is_deeply(\@return, \@sendmulti, 'multi->multi value is ' . join(' ', @return)); + +# pass a single value to a multi select +$mech->select('multilist', $sendsingle); +$return = $form->param('multilist'); +is($return, $sendsingle, "single->multi value is '$return'"); + +$mech->select('multilist', \%sendsingle); +$return = $form->param('multilist'); +is($return, $sendsingle, "single->multi value is '$return'"); + + +# Single select + +# pass multiple values to a single select (only the _first_ should be set) +$mech->select('singlelist', \@sendmulti); +@return = $form->param('singlelist'); +is_deeply(\@return, \@singlereturn, 'multi->single value is ' . join(' ', @return)); + +$mech->select('singlelist', \%sendmulti); +@return = $form->param('singlelist'); +is_deeply(\@return, \@singlereturn, 'multi->single value is ' . join(' ', @return)); + + +# pass a single value to a single select +$rv = $mech->select('singlelist', $sendsingle); +$return = $form->param('singlelist'); +is($return, $sendsingle, "single->single value is '$return'"); + +$rv = $mech->select('singlelist', \%sendsingle); +$return = $form->param('singlelist'); +is($return, $sendsingle, "single->single value is '$return'"); + +# test return value from $mech->select +is($rv, 1, 'return 1 after successful select'); + +EAT_THE_WARNING: { # Mech complains about the non-existent field + local $SIG{__WARN__} = sub {}; + $rv = $mech->select('missing_list', 1); +} +is($rv, undef, 'return undef after failed select'); diff --git a/t/submit_form.t b/t/submit_form.t new file mode 100644 index 0000000..7beaf90 --- /dev/null +++ b/t/submit_form.t @@ -0,0 +1,94 @@ +#!perl + +use warnings; +use strict; + +use Test::More; +use Test::Fatal; +use Test::Warnings ':all'; +use URI::file (); +use WWW::Mechanize (); + +my $mech = WWW::Mechanize->new( cookie_jar => undef, autocheck => 0 ); +my $uri = URI::file->new_abs('t/form_with_fields.html')->as_string; + +$mech->get($uri); + +{ + $mech->get($uri); + like( + exception { + $mech->submit_form( + form_id => 'i-do-not-exist', + ); + }, + qr/There is no form with ID "i-do-not-exist"/, + 'submit_form with no match on form_id', + ); +} + +{ + $mech->get($uri); + like( + exception { + $mech->submit_form( + form_thing => 'i-do-not-exist', + ); + }, + qr/Unknown submit_form parameter "form_thing"/, + 'submit_form with invalid arg', + ); +} + +{ + $mech->get($uri); + like( + exception { + $mech->submit_form( + form_number => 99, + ); + }, + qr/There is no form numbered 99/, + 'submit_form with invalid form number', + ); +} +{ + $mech->get($uri); + like( + exception { + $mech->submit_form( + form_name => 99, + ); + }, + qr/There is no form named "99"/, + 'submit_form with invalid form name', + ); +} + +{ + $mech->get($uri); + like( + exception { + $mech->submit_form( + with_fields => [ 'foo', 'bar' ], + ); + }, + qr/with_fields arg to submit_form must be a hashref/, + 'submit_form with invalid arg value for with_fields', + ); +} + +{ + $mech->get($uri); + like( + exception { + $mech->submit_form( + fields => [ 'foo', 'bar' ], + ); + }, + qr/fields arg to submit_form must be a hashref/, + 'submit_form with invalid arg value for fields', + ); +} + +done_testing(); diff --git a/t/taint.t b/t/taint.t new file mode 100644 index 0000000..a486df2 --- /dev/null +++ b/t/taint.t @@ -0,0 +1,28 @@ +#!perl -T + +use warnings; +use strict; +use Test::More; + +BEGIN { + eval 'use Test::Taint'; + plan skip_all => 'Test::Taint required for checking taintedness' if $@; + plan tests=>6; +} + +BEGIN { + use_ok( 'WWW::Mechanize' ); +} + +my $mech = WWW::Mechanize->new( autocheck => 1 ); +isa_ok( $mech, 'WWW::Mechanize', 'Created object' ); + +$mech->get( 'file:t/google.html' ); + +# Make sure taint checking is on correctly +tainted_ok( $^X, 'Interpreter Variable taints OK' ); + +is( $mech->title, 'Google', 'Correct title' ); +untainted_ok( $mech->title, 'Title should not be tainted' ); + +tainted_ok( $mech->content, 'But content should' ); diff --git a/t/tick.html b/t/tick.html new file mode 100644 index 0000000..88256a5 --- /dev/null +++ b/t/tick.html @@ -0,0 +1,14 @@ +<html> +<body> +<form action="http://localhost/" method="POST"> + +<input type="checkbox" name="foo" value="hello" /> Hello<br /> +<input type="checkbox" name="foo" value="bye" /> Bye<br /> +<input type="checkbox" name="foo" value="arse" /> Arse<br /> +<input type="checkbox" name="foo" value="wibble" /> Wibble<br /> +<input type="checkbox" name="foo" value="foo" /> Foo<br /> + +<input type="Submit" name="submit" value="Submit" label="Submit" /> +</form> +</body> + diff --git a/t/tick.t b/t/tick.t new file mode 100644 index 0000000..2a46fd0 --- /dev/null +++ b/t/tick.t @@ -0,0 +1,46 @@ +#!perl -T + +use warnings; +use strict; + +use Test::Fatal qw( exception ); +use Test::More; +use URI::file; + +delete @ENV{qw(PATH IFS CDPATH ENV BASH_ENV)}; # Placates taint-unsafe Cwd.pm in 5.6.1 +use_ok( 'WWW::Mechanize' ); + +my $mech = WWW::Mechanize->new( cookie_jar => undef ); +isa_ok( $mech, 'WWW::Mechanize' ); + +my $uri = URI::file->new_abs( 't/tick.html' )->as_string; +$mech->get( $uri ); +ok( $mech->success, $uri ); + +$mech->form_number( 1 ); +$mech->tick('foo','hello'); +$mech->tick('foo','bye'); +$mech->untick('foo','hello'); + +my $form = $mech->form_number(1); +isa_ok( $form, 'HTML::Form' ); + +my $reqstring = $form->click->as_string; + +my $wanted = <<'EOT'; +POST http://localhost/ +Content-Length: 21 +Content-Type: application/x-www-form-urlencoded + +foo=bye&submit=Submit +EOT + +is( $reqstring, $wanted, 'Proper posting' ); + +like( + exception { $mech->tick( 'not_there', 1 ) }, + qr{No checkbox "not_there" for value "1" in form}, + 'dies if checkbox not found' +); + +done_testing(); diff --git a/t/untaint.t b/t/untaint.t new file mode 100644 index 0000000..a307dab --- /dev/null +++ b/t/untaint.t @@ -0,0 +1,16 @@ +use strict; +use warnings; +use URI::file; + +use Test::More; + +eval 'use Test::NoWarnings'; +if ( $@ ) { + plan( skip_all => 'Test::NoWarnings not installed' ); +} + +plan( tests => 2 ); # the use_ok and then the warning check +$ENV{test} = 14; +use_ok( 'WWW::Mechanize' ); +my $uri = URI::file->new_abs( 't/find_link_id.html' )->as_string; +WWW::Mechanize->new->get($uri); diff --git a/t/upload.html b/t/upload.html new file mode 100644 index 0000000..78f6c2c --- /dev/null +++ b/t/upload.html @@ -0,0 +1,10 @@ +<html> +<body> + +<form action="http://localhost/" method="post" enctype="multipart/form-data"> + +<input type="file" name="upload" value="MANIFEST" /> +<input type="Submit" name="submit" value="Submit" label="Submit" /> +</form> +</body> + diff --git a/t/upload.t b/t/upload.t new file mode 100644 index 0000000..d1b0f95 --- /dev/null +++ b/t/upload.t @@ -0,0 +1,42 @@ +#!perl -T + +use strict; +use warnings; +use Test::More tests => 5; +use URI::file; + +BEGIN { delete @ENV{ qw( http_proxy HTTP_PROXY PATH IFS CDPATH ENV BASH_ENV) }; } +use_ok( 'WWW::Mechanize' ); + +my $mech = WWW::Mechanize->new( cookie_jar => undef ); +isa_ok( $mech, 'WWW::Mechanize' ); + +my $uri = URI::file->new_abs( 't/upload.html' )->as_string; +$mech->get( $uri ); +ok( $mech->success, $uri ); + +my $form = $mech->form_number(1); +my $reqstring = $form->click->as_string; +$reqstring =~ s/\r//g; + +# trim off possible extra newline +$reqstring =~ s/^\Z\n//m; + +my $wanted = <<'EOT'; +POST http://localhost/ +Content-Length: 77 +Content-Type: multipart/form-data; boundary=xYzZY + +--xYzZY +Content-Disposition: form-data; name="submit" + +Submit +--xYzZY-- +EOT + +is( $reqstring, $wanted, 'Proper posting' ); + +$mech->field('upload', 'dist.ini'); +$reqstring = $form->click->as_string; +like( $reqstring, qr/WWW-Mechanize/, 'The uploaded file should be in the request'); + @@ -0,0 +1,10 @@ +use strict; +use warnings; + +use Test::More; +use WWW::Mechanize (); + +my $mech = WWW::Mechanize->new; +is( $mech->uri, undef, 'undef uri() with a pristine object' ); + +done_testing(); diff --git a/t/warn.t b/t/warn.t new file mode 100644 index 0000000..700b3df --- /dev/null +++ b/t/warn.t @@ -0,0 +1,28 @@ +#!perl -T + +use warnings; +use strict; + +use Test::Warn qw( warning_like ); +use Test::More; +use WWW::Mechanize (); + +my $m = WWW::Mechanize->new; +isa_ok( $m, 'WWW::Mechanize' ); + +warning_like { + $m->warn( 'Something bad' ); +} qr[Something bad.+line \d+], 'Passes the message, and includes the line number'; + +warning_like { + $m->quiet(1); + $m->warn( 'Something bad' ); +} undef, 'Quiets correctly'; + +my $hushed = WWW::Mechanize->new( quiet => 1 ); +isa_ok( $hushed, 'WWW::Mechanize' ); +warning_like { + $hushed->warn( 'Something bad' ); +} undef, 'Quiets correctly'; + +done_testing(); diff --git a/t/warnings.t b/t/warnings.t new file mode 100644 index 0000000..77860ba --- /dev/null +++ b/t/warnings.t @@ -0,0 +1,19 @@ +#!perl -T + +use warnings; +use strict; + +use Test::More; +use Test::Warn qw( warning_is ); +use WWW::Mechanize (); + +UNKNOWN_ALIAS: { + my $m = WWW::Mechanize->new; + isa_ok( $m, 'WWW::Mechanize' ); + + warning_is { + $m->agent_alias( 'Blongo' ); + } 'Unknown agent alias "Blongo"', 'Unknown aliases squawk appropriately'; +} + +done_testing(); diff --git a/tidyall.ini b/tidyall.ini new file mode 100644 index 0000000..4111dd4 --- /dev/null +++ b/tidyall.ini @@ -0,0 +1,13 @@ +[PerlCritic non-auto-generated xt] +select = xt/author/live/wikipedia.t +argv = --profile=$ROOT/perlcriticrc + +[PerlTidy non-auto-generated xt] +select = xt/author/live/wikipedia.t +argv = --profile=$ROOT/perltidyrc + +[SortLines::Naturally] +select = .gitignore + +[UniqueLines] +select = .gitignore diff --git a/xt/author/eol.t b/xt/author/eol.t new file mode 100644 index 0000000..ccb5253 --- /dev/null +++ b/xt/author/eol.t @@ -0,0 +1,112 @@ +use strict; +use warnings; + +# this test was generated with Dist::Zilla::Plugin::Test::EOL 0.19 + +use Test::More 0.88; +use Test::EOL; + +my @files = ( + 'lib/WWW/Mechanize.pm', + 'lib/WWW/Mechanize/Cookbook.pod', + 'lib/WWW/Mechanize/Examples.pod', + 'lib/WWW/Mechanize/FAQ.pod', + 'lib/WWW/Mechanize/Image.pm', + 'lib/WWW/Mechanize/Link.pm', + 'script/mech-dump', + 't/00-load.t', + 't/00-report-prereqs.dd', + 't/00-report-prereqs.t', + 't/TestServer.pm', + 't/Tools.pm', + 't/add_header.t', + 't/aliases.t', + 't/area_link.html', + 't/area_link.t', + 't/autocheck.t', + 't/bad-request.t', + 't/clone.t', + 't/content.t', + 't/cookies.t', + 't/credentials-api.t', + 't/credentials.t', + 't/die.t', + 't/dump.t', + 't/field.html', + 't/field.t', + 't/find_frame.html', + 't/find_frame.t', + 't/find_image.t', + 't/find_inputs.html', + 't/find_inputs.t', + 't/find_link-warnings.t', + 't/find_link.html', + 't/find_link.t', + 't/find_link_id.html', + 't/find_link_id.t', + 't/find_link_xhtml.html', + 't/find_link_xhtml.t', + 't/form-parsing.t', + 't/form_133_regression.html', + 't/form_with_fields.html', + 't/form_with_fields.t', + 't/form_with_fields_passthrough_params.t', + 't/form_with_fields_verbose.html', + 't/frames.html', + 't/frames.t', + 't/google.html', + 't/history.t', + 't/history_1.html', + 't/history_2.html', + 't/history_3.html', + 't/image-new.t', + 't/image-parse.css', + 't/image-parse.html', + 't/image-parse.t', + 't/link-base.t', + 't/link-relative.t', + 't/link.t', + 't/local/LocalServer.pm', + 't/local/back.t', + 't/local/click.t', + 't/local/click_button.t', + 't/local/content.t', + 't/local/encoding.t', + 't/local/failure.t', + 't/local/follow.t', + 't/local/form.t', + 't/local/get.t', + 't/local/head.t', + 't/local/log-server', + 't/local/nonascii.html', + 't/local/nonascii.t', + 't/local/overload.t', + 't/local/page_stack.t', + 't/local/post.t', + 't/local/referer-server', + 't/local/referer.t', + 't/local/reload.t', + 't/local/select_multiple.t', + 't/local/submit.t', + 't/mech-dump/mech-dump.t', + 't/new.t', + 't/refresh.html', + 't/regex-error.t', + 't/save_content.html', + 't/save_content.t', + 't/select.html', + 't/select.t', + 't/submit_form.t', + 't/taint.t', + 't/tick.html', + 't/tick.t', + 't/untaint.t', + 't/upload.html', + 't/upload.t', + 't/uri.t', + 't/warn.t', + 't/warnings.t' +); + +eol_unix_ok($_, { trailing_whitespace => 1 }) foreach @files; +done_testing; diff --git a/xt/author/live/wikipedia.t b/xt/author/live/wikipedia.t new file mode 100644 index 0000000..0ae7164 --- /dev/null +++ b/xt/author/live/wikipedia.t @@ -0,0 +1,39 @@ +#!perl -T + +use warnings; +use strict; + +use constant LANGUAGES => qw( en it ja es nl pl ); + +use Test::RequiresInternet( 'wikipedia.org' => 443 ); +use Test::Needs 'LWP::Protocol::https'; +use Test::More; +use WWW::Mechanize (); + +use lib 't'; + +BEGIN { + use Tools; +} + +my $mech = WWW::Mechanize->new; + +$mech->agent_alias('Windows IE 6'); # Wikipedia 403s out obvious bots + +for my $lang (LANGUAGES) { + my $start = "https://$lang.wikipedia.org/"; + + $mech->get($start); + + ok( $mech->success, "Got $start" ); + my @links = $mech->links(); + cmp_ok( scalar @links, '>', 50, "Over 50 links on $start" ); +} + +SKIP: { + skip 'Test::Memory::Cycle not installed', 1 unless $canTMC; + + memory_cycle_ok( $mech, 'No memory cycles found' ); +} + +done_testing(); diff --git a/xt/author/mojibake.t b/xt/author/mojibake.t new file mode 100644 index 0000000..5ef161e --- /dev/null +++ b/xt/author/mojibake.t @@ -0,0 +1,9 @@ +#!perl + +use strict; +use warnings qw(all); + +use Test::More; +use Test::Mojibake; + +all_files_encoding_ok(); diff --git a/xt/author/pod-coverage.t b/xt/author/pod-coverage.t new file mode 100644 index 0000000..8878c2d --- /dev/null +++ b/xt/author/pod-coverage.t @@ -0,0 +1,44 @@ +#!perl +# This file was automatically generated by Dist::Zilla::Plugin::Test::Pod::Coverage::Configurable 0.07. + +use Test::Pod::Coverage 1.08; +use Test::More 0.88; + +BEGIN { + if ( $] <= 5.008008 ) { + plan skip_all => 'These tests require Pod::Coverage::TrustPod, which only works with Perl 5.8.9+'; + } +} +use Pod::Coverage::TrustPod; + +my %skip = map { $_ => 1 } qw( ); + +my @modules; +for my $module ( all_modules() ) { + next if $skip{$module}; + + push @modules, $module; +} + +plan skip_all => 'All the modules we found were excluded from POD coverage test.' + unless @modules; + +plan tests => scalar @modules; + +my %trustme = (); + +my @also_private; + +for my $module ( sort @modules ) { + pod_coverage_ok( + $module, + { + coverage_class => 'Pod::Coverage::TrustPod', + also_private => \@also_private, + trustme => $trustme{$module} || [], + }, + "pod coverage for $module" + ); +} + +done_testing(); diff --git a/xt/author/pod-syntax.t b/xt/author/pod-syntax.t new file mode 100644 index 0000000..e563e5d --- /dev/null +++ b/xt/author/pod-syntax.t @@ -0,0 +1,7 @@ +#!perl +# This file was automatically generated by Dist::Zilla::Plugin::PodSyntaxTests. +use strict; use warnings; +use Test::More; +use Test::Pod 1.41; + +all_pod_files_ok(); diff --git a/xt/author/portability.t b/xt/author/portability.t new file mode 100644 index 0000000..c531252 --- /dev/null +++ b/xt/author/portability.t @@ -0,0 +1,10 @@ +use strict; +use warnings; + +use Test::More; + +eval 'use Test::Portability::Files'; +plan skip_all => 'Test::Portability::Files required for testing portability' + if $@; + +run_tests(); diff --git a/xt/author/test-version.t b/xt/author/test-version.t new file mode 100644 index 0000000..247ba9a --- /dev/null +++ b/xt/author/test-version.t @@ -0,0 +1,23 @@ +use strict; +use warnings; +use Test::More; + +# generated by Dist::Zilla::Plugin::Test::Version 1.09 +use Test::Version; + +my @imports = qw( version_all_ok ); + +my $params = { + is_strict => 0, + has_version => 1, + multiple => 0, + +}; + +push @imports, $params + if version->parse( $Test::Version::VERSION ) >= version->parse('1.002'); + +Test::Version->import(@imports); + +version_all_ok; +done_testing; diff --git a/xt/author/tidyall.t b/xt/author/tidyall.t new file mode 100644 index 0000000..4d226bc --- /dev/null +++ b/xt/author/tidyall.t @@ -0,0 +1,11 @@ +# This file was automatically generated by Dist::Zilla::Plugin::Test::TidyAll v$VERSION + +use Test::More 0.88; +use Test::Code::TidyAll 0.24; + +tidyall_ok( + verbose => ( exists $ENV{TEST_TIDYALL_VERBOSE} ? $ENV{TEST_TIDYALL_VERBOSE} : 0 ), + jobs => ( exists $ENV{TEST_TIDYALL_JOBS} ? $ENV{TEST_TIDYALL_JOBS} : 1 ), +); + +done_testing; |