#!/bin/sh -e # converts html to markdown # uses an available program to fetch URL and tidy to normalize it first [ -n "$(which pandoc)" ] || { echo >&2 "You need 'pandoc' to use this program!" exit 1 } [ -n "$(which tidy)" ] || { echo >&2 "You need 'tidy' to use this program!" exit 1 } if [ -z "$1" ] || [ -f $1 ]; then tidy -utf8 $1 2>/dev/null | pandoc -r html -w markdown -s else # Treat given argument as an URL. Locate a # sensible text based browser (note the order). for p in wget lynx w3m curl links w3c; do if which $p >/dev/null; then DUMPER=$p break fi done # Setup proper options. case "$DUMPER" in wget) OPT="-O-" ;; lynx) OPT="-source" ;; w3m) OPT="-dump_source" ;; curl) OPT="" ;; links) OPT="-source" ;; w3c) OPT="-n -get" ;; "") echo -n >&2 "Needs a program to fetch the URL " echo -n >&2 "(e.g. wget, w3m, lynx, w3c, or curl)." exit 1 ;; esac # Fetch and feed to pandoc. $DUMPER $OPT $1 2>/dev/null | tidy -utf8 2>/dev/null | pandoc -r html -w markdown -s fi