diff options
author | fiddlosopher <fiddlosopher@788f1e2b-df1e-0410-8736-df70ead52e1b> | 2006-10-17 14:22:29 +0000 |
---|---|---|
committer | fiddlosopher <fiddlosopher@788f1e2b-df1e-0410-8736-df70ead52e1b> | 2006-10-17 14:22:29 +0000 |
commit | df7b68225101966051f8b592a27127bf789eb81e (patch) | |
tree | a063e97ed58d0bdb2cbb5a95c3e8c1bcce54aa00 /html2markdown | |
parent | e7dbfef4d8aa528d9245424e9c372e900a774c90 (diff) |
initial import
git-svn-id: https://pandoc.googlecode.com/svn/trunk@2 788f1e2b-df1e-0410-8736-df70ead52e1b
Diffstat (limited to 'html2markdown')
-rw-r--r-- | html2markdown | 39 |
1 files changed, 39 insertions, 0 deletions
diff --git a/html2markdown b/html2markdown new file mode 100644 index 000000000..3f9a4857e --- /dev/null +++ b/html2markdown @@ -0,0 +1,39 @@ +#!/bin/sh -e +# converts html to markdown +# uses an available program to fetch URL and tidy to normalize it first + +[ -n "$(which pandoc)" ] || { + echo >&2 "You need 'pandoc' to use this program!" + exit 1 +} +[ -n "$(which tidy)" ] || { + echo >&2 "You need 'tidy' to use this program!" + exit 1 +} + +if [ -z "$1" ] || [ -f $1 ]; then + tidy -utf8 $1 2>/dev/null | pandoc -r html -w markdown -s +else + # Treat given argument as an URL. Locate a + # sensible text based browser (note the order). + for p in wget lynx w3m curl links w3c; do + if which $p >/dev/null; then + DUMPER=$p + break + fi + done + # Setup proper options. + case "$DUMPER" in + wget) OPT="-O-" ;; + lynx) OPT="-source" ;; + w3m) OPT="-dump_source" ;; + curl) OPT="" ;; + links) OPT="-source" ;; + w3c) OPT="-n -get" ;; + "") echo -n >&2 "Needs a program to fetch the URL " + echo -n >&2 "(e.g. wget, w3m, lynx, w3m or curl)." + exit 1 ;; + esac + # Fetch and feed to pandoc. + $DUMPER $OPT $1 2>/dev/null | tidy -utf8 2>/dev/null | pandoc -r html -w markdown -s +fi |