summaryrefslogtreecommitdiff
path: root/web
diff options
context:
space:
mode:
authorfiddlosopher <fiddlosopher@788f1e2b-df1e-0410-8736-df70ead52e1b>2007-09-15 03:15:27 +0000
committerfiddlosopher <fiddlosopher@788f1e2b-df1e-0410-8736-df70ead52e1b>2007-09-15 03:15:27 +0000
commitbf100f82769b194336ef4b92f5a9803f262f0d8b (patch)
tree2faacef34283b131a4a46411e74e1c3b3c0317a2 /web
parentb5819b8ed6b107a089f55e636c78cac7166ea36d (diff)
Added security measures to html2x.pl.
git-svn-id: https://pandoc.googlecode.com/svn/trunk@1018 788f1e2b-df1e-0410-8736-df70ead52e1b
Diffstat (limited to 'web')
-rwxr-xr-xweb/html2x.pl52
1 files changed, 28 insertions, 24 deletions
diff --git a/web/html2x.pl b/web/html2x.pl
index 98c23dccc..a034f0e58 100755
--- a/web/html2x.pl
+++ b/web/html2x.pl
@@ -1,34 +1,38 @@
#!/usr/bin/env perl
+use strict;
use CGI qw/:standard/;
use CGI::Carp 'fatalsToBrowser';
$CGI::POST_MAX=1024 * 100; # max 100K posts
$CGI::DISABLE_UPLOADS = 1; # no uploads
-if (param('url') && param('format')) {
- $options = '--standalone --reference-links';
- $url = param('url');
- $format = param('format') || 'markdown';
- if ($format =~ '^markdown$') {
- $options .= ' --strict';
- }
- if ($format =~ '^markdown\+$') {
- $format = 'markdown';
- }
- $output = `wget -O- $url | tidy -asxhtml -utf8 | pandoc -r html -w $format $options`;
- if ($format =~ "rtf") {
- $type = "application/rtf"
- } else {
- $type = "text/plain"
- };
- print header(-charset=>"utf8",-type=>"$type"),
- $output;
-} else {
- print start_html(-title=>"html2x"),
- h1("Usage"),
- p("You have tried to call html2x.pl without the proper parameters."),
- p("Please use <a href=\"/pandoc/html2x.html\">this form</a>."),
- end_html();
+param('url') && param('format') or die "Missing url and/or format parameters.\n";
+
+my $options = '-r html --standalone --reference-links';
+my $url = param('url');
+my $format = param('format') || 'markdown';
+if ($format =~ /^markdown$/) {
+ $options .= ' --strict';
+}
+if ($format =~ /^markdown\+$/) {
+ $format = 'markdown';
}
+# Validate URL and format
+unless ($url =~ /^(https?:\/\/)?[\w#?_-]+(\.[\w#?_-]+)+[\w\/#?_.-]*$/) {
+ die "Illegal URL: $url\n" ;
+}
+unless ($format =~ /^markdown\+?|rst|latex|context|rtf|man|docbook$/) {
+ die "Illegal format: $format\n";
+}
+my $output = `wget -O- $url | tidy -asxhtml -utf8 | pandoc -w $format $options`;
+if ($output =~ /^\s*$/) {
+ print start_html,
+ h1("No output"),
+ p("Either $url could not be retrieved, or its HTML was too malformed to parse."),
+ end_html;
+ exit 0;
+}
+print header(-charset=>"utf8",-type=>"text/plain"),
+ $output;