1 files changed, 264 insertions, 601 deletions
diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs
index bd2da945e..52e1447db 100644
--- a/src/Text/Pandoc/Shared.hs
+++ b/src/Text/Pandoc/Shared.hs
@@ -1,8 +1,12 @@
-{-# LANGUAGE DeriveDataTypeable, CPP, MultiParamTypeClasses,
-    FlexibleContexts, ScopedTypeVariables, PatternGuards,
-    ViewPatterns #-}
+{-# LANGUAGE CPP                   #-}
+{-# LANGUAGE DeriveDataTypeable    #-}
+{-# LANGUAGE FlexibleContexts      #-}
+{-# LANGUAGE MultiParamTypeClasses #-}
+
+{-# LANGUAGE ScopedTypeVariables   #-}
+{-# LANGUAGE ViewPatterns          #-}
 {-
-Copyright (C) 2006-2016 John MacFarlane <jgm@berkeley.edu>
+Copyright (C) 2006-2018 John MacFarlane <jgm@berkeley.edu>
 
 This program is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
@@ -21,7 +25,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 
 {- |
    Module      : Text.Pandoc.Shared
-   Copyright   : Copyright (C) 2006-2016 John MacFarlane
+   Copyright   : Copyright (C) 2006-2018 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
@@ -49,21 +53,18 @@ module Text.Pandoc.Shared (
                      toRomanNumeral,
                      escapeURI,
                      tabFilter,
+                     crFilter,
                      -- * Date/time
                      normalizeDate,
                      -- * Pandoc block and inline list processing
                      orderedListMarkers,
-                     normalizeSpaces,
                      extractSpaces,
-                     normalize,
-                     normalizeInlines,
-                     normalizeBlocks,
                      removeFormatting,
+                     deNote,
                      stringify,
                      capitalize,
                      compactify,
-                     compactify',
-                     compactify'DL,
+                     compactifyDL,
                      linesToPara,
                      Element (..),
                      hierarchicalize,
@@ -71,29 +72,26 @@ module Text.Pandoc.Shared (
                      inlineListToIdentifier,
                      isHeaderBlock,
                      headerShift,
+                     stripEmptyParagraphs,
                      isTightList,
                      addMetaField,
                      makeMeta,
+                     eastAsianLineBreakFilter,
+                     underlineSpan,
                      -- * TagSoup HTML handling
                      renderTags',
                      -- * File handling
                      inDirectory,
-                     getDefaultReferenceDocx,
-                     getDefaultReferenceODT,
-                     readDataFile,
-                     readDataFileUTF8,
-                     fetchItem,
-                     fetchItem',
-                     openURL,
                      collapseFilePath,
                      filteredFilesFromArchive,
+                     -- * URI handling
+                     schemes,
+                     isURI,
                      -- * Error handling
-                     err,
-                     warn,
                      mapLeft,
-                     hush,
                      -- * for squashing blocks
                      blocksToInlines,
+                     blocksToInlines',
                      -- * Safe read
                      safeRead,
                      -- * Temp directory
@@ -102,74 +100,37 @@ module Text.Pandoc.Shared (
                      pandocVersion
                     ) where
 
-import Text.Pandoc.Definition
-import Text.Pandoc.Walk
-import Text.Pandoc.MediaBag (MediaBag, lookupMedia)
-import Text.Pandoc.Builder (Inlines, Blocks, ToMetaValue(..))
-import qualified Text.Pandoc.Builder as B
-import qualified Text.Pandoc.UTF8 as UTF8
-import System.Environment (getProgName)
-import System.Exit (exitWith, ExitCode(..))
-import Data.Char ( toLower, isLower, isUpper, isAlpha,
-                   isLetter, isDigit, isSpace )
-import Data.List ( find, stripPrefix, intercalate )
-import Data.Maybe (mapMaybe)
-import Data.Version ( showVersion )
+import Codec.Archive.Zip
+import qualified Control.Exception as E
+import Control.Monad (MonadPlus (..), msum, unless)
+import qualified Control.Monad.State.Strict as S
+import qualified Data.ByteString.Lazy as BL
+import Data.Char (isAlpha, isDigit, isLetter, isLower, isSpace, isUpper,
+                  toLower)
+import Data.Data (Data, Typeable)
+import Data.List (find, intercalate, intersperse, stripPrefix)
 import qualified Data.Map as M
-import Network.URI ( escapeURIString, nonStrictRelativeTo,
-                     unEscapeString, parseURIReference, isAllowedInURI,
-                     parseURI, URI(..) )
+import Data.Maybe (mapMaybe)
+import Data.Monoid ((<>))
+import Data.Sequence (ViewL (..), ViewR (..), viewl, viewr)
 import qualified Data.Set as Set
+import qualified Data.Text as T
+import Data.Version (showVersion)
+import Network.URI (URI (uriScheme), escapeURIString, parseURI)
+import Paths_pandoc (version)
 import System.Directory
-import System.FilePath (splitDirectories, isPathSeparator)
+import System.FilePath (isPathSeparator, splitDirectories)
 import qualified System.FilePath.Posix as Posix
-import Text.Pandoc.MIME (MimeType, getMimeType)
-import System.FilePath ( (</>), takeExtension, dropExtension)
-import Data.Generics (Typeable, Data)
-import qualified Control.Monad.State as S
-import Control.Monad.Trans (MonadIO (..))
-import qualified Control.Exception as E
-import Control.Monad (msum, unless, MonadPlus(..))
-import Text.Pandoc.Pretty (charWidth)
-import Text.Pandoc.Compat.Time
-import Data.Time.Clock.POSIX
-import System.IO (stderr)
 import System.IO.Temp
-import Text.HTML.TagSoup (renderTagsOptions, RenderOptions(..), Tag(..),
-         renderOptions)
-import Data.Monoid ((<>))
-import qualified Data.ByteString as BS
-import qualified Data.ByteString.Char8 as B8
-import Data.ByteString.Base64 (decodeLenient)
-import Data.Sequence (ViewR(..), ViewL(..), viewl, viewr)
-import qualified Data.Text as T (toUpper, pack, unpack)
-import Data.ByteString.Lazy (toChunks, fromChunks)
-import qualified Data.ByteString.Lazy as BL
-import Paths_pandoc (version)
-
-import Codec.Archive.Zip
-
-#ifdef EMBED_DATA_FILES
-import Text.Pandoc.Data (dataFiles)
-#else
-import Paths_pandoc (getDataFileName)
-#endif
-#ifdef HTTP_CLIENT
-import Network.HTTP.Client (httpLbs, responseBody, responseHeaders,
-                            Request(port,host))
-import Network.HTTP.Client (parseRequest)
-import Network.HTTP.Client (newManager)
-import Network.HTTP.Client.Internal (addProxy)
-import Network.HTTP.Client.TLS (tlsManagerSettings)
-import System.Environment (getEnv)
-import Network.HTTP.Types.Header ( hContentType)
-import Network (withSocketsDo)
-#else
-import Network.URI (parseURI)
-import Network.HTTP (findHeader, rspBody,
-                     RequestMethod(..), HeaderName(..), mkRequest)
-import Network.Browser (browse, setAllowRedirects, setOutHandler, request)
-#endif
+import Text.HTML.TagSoup (RenderOptions (..), Tag (..), renderOptions,
+                          renderTagsOptions)
+import Text.Pandoc.Builder (Blocks, Inlines, ToMetaValue (..))
+import qualified Text.Pandoc.Builder as B
+import Text.Pandoc.Compat.Time
+import Text.Pandoc.Definition
+import Text.Pandoc.Generic (bottomUp)
+import Text.Pandoc.Pretty (charWidth)
+import Text.Pandoc.Walk
 
 -- | Version number of pandoc library.
 pandocVersion :: String
@@ -185,11 +146,11 @@ splitBy _ [] = []
 splitBy isSep lst =
   let (first, rest) = break isSep lst
       rest'         = dropWhile isSep rest
-  in  first:(splitBy isSep rest')
+  in  first:splitBy isSep rest'
 
 splitByIndices :: [Int] -> [a] -> [[a]]
 splitByIndices [] lst = [lst]
-splitByIndices (x:xs) lst = first:(splitByIndices (map (\y -> y - x)  xs) rest)
+splitByIndices (x:xs) lst = first:splitByIndices (map (\y -> y - x)  xs) rest
   where (first, rest) = splitAt x lst
 
 -- | Split string into chunks divided at specified indices.
@@ -197,7 +158,7 @@ splitStringByIndices :: [Int] -> [Char] -> [[Char]]
 splitStringByIndices [] lst = [lst]
 splitStringByIndices (x:xs) lst =
   let (first, rest) = splitAt' x lst in
-  first : (splitStringByIndices (map (\y -> y - x) xs) rest)
+  first : splitStringByIndices (map (\y -> y - x) xs) rest
 
 splitAt' :: Int -> [Char] -> ([Char],[Char])
 splitAt' _ []          = ([],[])
@@ -236,9 +197,9 @@ backslashEscapes = map (\ch -> (ch, ['\\',ch]))
 escapeStringUsing :: [(Char, String)] -> String -> String
 escapeStringUsing _ [] = ""
 escapeStringUsing escapeTable (x:xs) =
-  case (lookup x escapeTable) of
-       Just str  -> str ++ rest
-       Nothing   -> x:rest
+  case lookup x escapeTable of
+       Just str -> str ++ rest
+       Nothing  -> x:rest
   where rest = escapeStringUsing escapeTable xs
 
 -- | Strip trailing newlines from string.
@@ -260,35 +221,33 @@ trimr = reverse . triml . reverse
 -- | Strip leading and trailing characters from string
 stripFirstAndLast :: String -> String
 stripFirstAndLast str =
-  drop 1 $ take ((length str) - 1) str
+  drop 1 $ take (length str - 1) str
 
 -- | Change CamelCase word to hyphenated lowercase (e.g., camel-case).
 camelCaseToHyphenated :: String -> String
 camelCaseToHyphenated [] = ""
 camelCaseToHyphenated (a:b:rest) | isLower a && isUpper b =
-  a:'-':(toLower b):(camelCaseToHyphenated rest)
-camelCaseToHyphenated (a:rest) = (toLower a):(camelCaseToHyphenated rest)
+  a:'-':toLower b:camelCaseToHyphenated rest
+camelCaseToHyphenated (a:rest) = toLower a:camelCaseToHyphenated rest
 
 -- | Convert number < 4000 to uppercase roman numeral.
 toRomanNumeral :: Int -> String
-toRomanNumeral x =
-  if x >= 4000 || x < 0
-     then "?"
-     else case x of
-              _ | x >= 1000 -> "M" ++ toRomanNumeral (x - 1000)
-              _ | x >= 900  -> "CM" ++ toRomanNumeral (x - 900)
-              _ | x >= 500  -> "D" ++ toRomanNumeral (x - 500)
-              _ | x >= 400  -> "CD" ++ toRomanNumeral (x - 400)
-              _ | x >= 100  -> "C" ++ toRomanNumeral (x - 100)
-              _ | x >= 90   -> "XC" ++ toRomanNumeral (x - 90)
-              _ | x >= 50   -> "L"  ++ toRomanNumeral (x - 50)
-              _ | x >= 40   -> "XL" ++ toRomanNumeral (x - 40)
-              _ | x >= 10   -> "X" ++ toRomanNumeral (x - 10)
-              _ | x == 9    -> "IX"
-              _ | x >= 5    -> "V" ++ toRomanNumeral (x - 5)
-              _ | x == 4    -> "IV"
-              _ | x >= 1    -> "I" ++ toRomanNumeral (x - 1)
-              _             -> ""
+toRomanNumeral x
+  | x >= 4000 || x < 0 = "?"
+  | x >= 1000 = "M" ++ toRomanNumeral (x - 1000)
+  | x >= 900  = "CM" ++ toRomanNumeral (x - 900)
+  | x >= 500  = "D" ++ toRomanNumeral (x - 500)
+  | x >= 400  = "CD" ++ toRomanNumeral (x - 400)
+  | x >= 100  = "C" ++ toRomanNumeral (x - 100)
+  | x >= 90   = "XC" ++ toRomanNumeral (x - 90)
+  | x >= 50   = "L"  ++ toRomanNumeral (x - 50)
+  | x >= 40   = "XL" ++ toRomanNumeral (x - 40)
+  | x >= 10   = "X" ++ toRomanNumeral (x - 10)
+  | x == 9    = "IX"
+  | x >= 5    = "V" ++ toRomanNumeral (x - 5)
+  | x == 4    = "IV"
+  | x >= 1    = "I" ++ toRomanNumeral (x - 1)
+  | otherwise = ""
 
 -- | Escape whitespace and some punctuation characters in URI.
 escapeURI :: String -> String
@@ -296,26 +255,23 @@ escapeURI = escapeURIString (not . needsEscaping)
   where needsEscaping c = isSpace c || c `elem`
                            ['<','>','|','"','{','}','[',']','^', '`']
 
-
--- | Convert tabs to spaces and filter out DOS line endings.
--- Tabs will be preserved if tab stop is set to 0.
+-- | Convert tabs to spaces. Tabs will be preserved if tab stop is set to 0.
 tabFilter :: Int       -- ^ Tab stop
-          -> String    -- ^ Input
-          -> String
-tabFilter tabStop =
-  let go _ [] = ""
-      go _ ('\n':xs) = '\n' : go tabStop xs
-      go _ ('\r':'\n':xs) = '\n' : go tabStop xs
-      go _ ('\r':xs) = '\n' : go tabStop xs
-      go spsToNextStop ('\t':xs) =
-        if tabStop == 0
-           then '\t' : go tabStop xs
-           else replicate spsToNextStop ' ' ++ go tabStop xs
-      go 1 (x:xs) =
-        x : go tabStop xs
-      go spsToNextStop (x:xs) =
-        x : go (spsToNextStop - 1) xs
-  in  go tabStop
+          -> T.Text    -- ^ Input
+          -> T.Text
+tabFilter 0 = id
+tabFilter tabStop = T.unlines . map go . T.lines
+  where go s =
+         let (s1, s2) = T.break (== '\t') s
+         in  if T.null s2
+                then s1
+                else s1 <> T.replicate
+                       (tabStop - (T.length s1 `mod` tabStop)) (T.pack " ")
+                       <> go (T.drop 1 s2)
+
+-- | Strip out DOS line endings.
+crFilter :: T.Text -> T.Text
+crFilter = T.filter (/= '\r')
 
 --
 -- Date/time
@@ -329,7 +285,7 @@ normalizeDate s = fmap (formatTime defaultTimeLocale "%F")
   (msum $ map (\fs -> parsetimeWith fs s >>= rejectBadYear) formats :: Maybe Day)
   where rejectBadYear day = case toGregorian day of
           (y, _, _) | y >= 1601 && y <= 9999 -> Just day
-          _ -> Nothing
+          _         -> Nothing
         parsetimeWith =
 #if MIN_VERSION_time(1,5,0)
              parseTimeM True defaultTimeLocale
@@ -337,7 +293,7 @@ normalizeDate s = fmap (formatTime defaultTimeLocale "%F")
              parseTime defaultTimeLocale
 #endif
         formats = ["%x","%m/%d/%Y", "%D","%F", "%d %b %Y",
-                    "%d %B %Y", "%b. %d, %Y", "%B %d, %Y",
+                    "%e %B %Y", "%b. %e, %Y", "%B %e, %Y",
                     "%Y%m%d", "%Y%m", "%Y"]
 
 --
@@ -366,23 +322,6 @@ orderedListMarkers (start, numstyle, numdelim) =
                             TwoParens    -> "(" ++ str ++ ")"
   in  map inDelim nums
 
--- | Normalize a list of inline elements: remove leading and trailing
--- @Space@ elements, collapse double @Space@s into singles, and
--- remove empty Str elements.
-normalizeSpaces :: [Inline] -> [Inline]
-normalizeSpaces = cleanup . dropWhile isSpaceOrEmpty
- where  cleanup []              = []
-        cleanup (Space:rest)    = case dropWhile isSpaceOrEmpty rest of
-                                        []     -> []
-                                        (x:xs) -> Space : x : cleanup xs
-        cleanup ((Str ""):rest) = cleanup rest
-        cleanup (x:rest)        = x : cleanup rest
-
-isSpaceOrEmpty :: Inline -> Bool
-isSpaceOrEmpty Space = True
-isSpaceOrEmpty (Str "") = True
-isSpaceOrEmpty _ = False
-
 -- | Extract the leading and trailing spaces from inside an inline element
 -- and place them outside the element.  SoftBreaks count as Spaces for
 -- these purposes.
@@ -399,183 +338,43 @@ extractSpaces f is =
                     _                -> mempty in
   (left <> f (B.trimInlines . B.Many $ contents) <> right)
 
--- | Normalize @Pandoc@ document, consolidating doubled 'Space's,
--- combining adjacent 'Str's and 'Emph's, remove 'Null's and
--- empty elements, etc.
-normalize :: Pandoc -> Pandoc
-normalize (Pandoc (Meta meta) blocks) =
-  Pandoc (Meta $ M.map go meta) (normalizeBlocks blocks)
-  where go (MetaInlines xs) = MetaInlines $ normalizeInlines xs
-        go (MetaBlocks xs)  = MetaBlocks  $ normalizeBlocks xs
-        go (MetaList ms)    = MetaList $ map go ms
-        go (MetaMap m)      = MetaMap $ M.map go m
-        go x                = x
-
-normalizeBlocks :: [Block] -> [Block]
-normalizeBlocks (Null : xs) = normalizeBlocks xs
-normalizeBlocks (Div attr bs : xs) =
-  Div attr (normalizeBlocks bs) : normalizeBlocks xs
-normalizeBlocks (BlockQuote bs : xs) =
-  case normalizeBlocks bs of
-       []    -> normalizeBlocks xs
-       bs'   -> BlockQuote bs' : normalizeBlocks xs
-normalizeBlocks (BulletList [] : xs) = normalizeBlocks xs
-normalizeBlocks (BulletList items : xs) =
-  BulletList (map normalizeBlocks items) : normalizeBlocks xs
-normalizeBlocks (OrderedList _ [] : xs) = normalizeBlocks xs
-normalizeBlocks (OrderedList attr items : xs) =
-  OrderedList attr (map normalizeBlocks items) : normalizeBlocks xs
-normalizeBlocks (DefinitionList [] : xs) = normalizeBlocks xs
-normalizeBlocks (DefinitionList items : xs) =
-  DefinitionList (map go items) : normalizeBlocks xs
-  where go (ils, bs) = (normalizeInlines ils, map normalizeBlocks bs)
-normalizeBlocks (RawBlock _ "" : xs) = normalizeBlocks xs
-normalizeBlocks (RawBlock f x : xs) =
-   case normalizeBlocks xs of
-        (RawBlock f' x' : rest) | f' == f ->
-          RawBlock f (x ++ ('\n':x')) : rest
-        rest -> RawBlock f x : rest
-normalizeBlocks (Para ils : xs) =
-  case normalizeInlines ils of
-       []   -> normalizeBlocks xs
-       ils' -> Para ils' : normalizeBlocks xs
-normalizeBlocks (Plain ils : xs) =
-  case normalizeInlines ils of
-       []   -> normalizeBlocks xs
-       ils' -> Plain ils' : normalizeBlocks xs
-normalizeBlocks (Header lev attr ils : xs) =
-  Header lev attr (normalizeInlines ils) : normalizeBlocks xs
-normalizeBlocks (Table capt aligns widths hdrs rows : xs) =
-  Table (normalizeInlines capt) aligns widths
-    (map normalizeBlocks hdrs) (map (map normalizeBlocks) rows)
-  : normalizeBlocks xs
-normalizeBlocks (x:xs) = x : normalizeBlocks xs
-normalizeBlocks [] = []
-
-normalizeInlines :: [Inline] -> [Inline]
-normalizeInlines (Str x : ys) =
-  case concat (x : map fromStr strs) of
-        ""     -> rest
-        n      -> Str n : rest
-   where
-     (strs, rest)  = span isStr $ normalizeInlines ys
-     isStr (Str _) = True
-     isStr _       = False
-     fromStr (Str z) = z
-     fromStr _       = error "normalizeInlines - fromStr - not a Str"
-normalizeInlines (Space : SoftBreak : ys) =
-  SoftBreak : normalizeInlines ys
-normalizeInlines (Space : ys) =
-  if null rest
-     then []
-     else Space : rest
-   where isSp Space = True
-         isSp _     = False
-         rest       = dropWhile isSp $ normalizeInlines ys
-normalizeInlines (Emph xs : zs) =
-  case normalizeInlines zs of
-       (Emph ys : rest) -> normalizeInlines $
-         Emph (normalizeInlines $ xs ++ ys) : rest
-       rest -> case normalizeInlines xs of
-                    []  -> rest
-                    xs' -> Emph xs' : rest
-normalizeInlines (Strong xs : zs) =
-  case normalizeInlines zs of
-       (Strong ys : rest) -> normalizeInlines $
-         Strong (normalizeInlines $ xs ++ ys) : rest
-       rest -> case normalizeInlines xs of
-                    []  -> rest
-                    xs' -> Strong xs' : rest
-normalizeInlines (Subscript xs : zs) =
-  case normalizeInlines zs of
-       (Subscript ys : rest) -> normalizeInlines $
-         Subscript (normalizeInlines $ xs ++ ys) : rest
-       rest -> case normalizeInlines xs of
-                    []  -> rest
-                    xs' -> Subscript xs' : rest
-normalizeInlines (Superscript xs : zs) =
-  case normalizeInlines zs of
-       (Superscript ys : rest) -> normalizeInlines $
-         Superscript (normalizeInlines $ xs ++ ys) : rest
-       rest -> case normalizeInlines xs of
-                    []  -> rest
-                    xs' -> Superscript xs' : rest
-normalizeInlines (SmallCaps xs : zs) =
-  case normalizeInlines zs of
-       (SmallCaps ys : rest) -> normalizeInlines $
-         SmallCaps (normalizeInlines $ xs ++ ys) : rest
-       rest -> case normalizeInlines xs of
-                    []  -> rest
-                    xs' -> SmallCaps xs' : rest
-normalizeInlines (Strikeout xs : zs) =
-  case normalizeInlines zs of
-       (Strikeout ys : rest) -> normalizeInlines $
-         Strikeout (normalizeInlines $ xs ++ ys) : rest
-       rest -> case normalizeInlines xs of
-                    []  -> rest
-                    xs' -> Strikeout xs' : rest
-normalizeInlines (RawInline _ [] : ys) = normalizeInlines ys
-normalizeInlines (RawInline f xs : zs) =
-  case normalizeInlines zs of
-       (RawInline f' ys : rest) | f == f' -> normalizeInlines $
-         RawInline f (xs ++ ys) : rest
-       rest -> RawInline f xs : rest
-normalizeInlines (Code _ "" : ys) = normalizeInlines ys
-normalizeInlines (Code attr xs : zs) =
-  case normalizeInlines zs of
-       (Code attr' ys : rest) | attr == attr' -> normalizeInlines $
-         Code attr (xs ++ ys) : rest
-       rest -> Code attr xs : rest
--- allow empty spans, they may carry identifiers etc.
--- normalizeInlines (Span _ [] : ys) = normalizeInlines ys
-normalizeInlines (Span attr xs : zs) =
-  case normalizeInlines zs of
-       (Span attr' ys : rest) | attr == attr' -> normalizeInlines $
-         Span attr (normalizeInlines $ xs ++ ys) : rest
-       rest -> Span attr (normalizeInlines xs) : rest
-normalizeInlines (Note bs : ys) = Note (normalizeBlocks bs) :
-  normalizeInlines ys
-normalizeInlines (Quoted qt ils : ys) =
-  Quoted qt (normalizeInlines ils) : normalizeInlines ys
-normalizeInlines (Link attr ils t : ys) =
-  Link attr (normalizeInlines ils) t : normalizeInlines ys
-normalizeInlines (Image attr ils t : ys) =
-  Image attr (normalizeInlines ils) t : normalizeInlines ys
-normalizeInlines (Cite cs ils : ys) =
-  Cite cs (normalizeInlines ils) : normalizeInlines ys
-normalizeInlines (x : xs) = x : normalizeInlines xs
-normalizeInlines [] = []
-
 -- | Extract inlines, removing formatting.
 removeFormatting :: Walkable Inline a => a -> [Inline]
-removeFormatting = query go . walk deNote
+removeFormatting = query go . walk (deNote . deQuote)
   where go :: Inline -> [Inline]
-        go (Str xs)     = [Str xs]
-        go Space        = [Space]
-        go SoftBreak    = [SoftBreak]
-        go (Code _ x)   = [Str x]
-        go (Math _ x)   = [Str x]
-        go LineBreak    = [Space]
-        go _            = []
-        deNote (Note _) = Str ""
-        deNote x        = x
+        go (Str xs)   = [Str xs]
+        go Space      = [Space]
+        go SoftBreak  = [SoftBreak]
+        go (Code _ x) = [Str x]
+        go (Math _ x) = [Str x]
+        go LineBreak  = [Space]
+        go _          = []
+
+deNote :: Inline -> Inline
+deNote (Note _) = Str ""
+deNote x        = x
+
+deQuote :: Inline -> Inline
+deQuote (Quoted SingleQuote xs) =
+  Span ("",[],[]) (Str "\8216" : xs ++ [Str "\8217"])
+deQuote (Quoted DoubleQuote xs) =
+  Span ("",[],[]) (Str "\8220" : xs ++ [Str "\8221"])
+deQuote x = x
 
 -- | Convert pandoc structure to a string with formatting removed.
 -- Footnotes are skipped (since we don't want their contents in link
 -- labels).
 stringify :: Walkable Inline a => a -> String
-stringify = query go . walk deNote
+stringify = query go . walk (deNote . deQuote)
   where go :: Inline -> [Char]
-        go Space = " "
-        go SoftBreak = " "
-        go (Str x) = x
-        go (Code _ x) = x
-        go (Math _ x) = x
+        go Space                                       = " "
+        go SoftBreak                                   = " "
+        go (Str x)                                     = x
+        go (Code _ x)                                  = x
+        go (Math _ x)                                  = x
         go (RawInline (Format "html") ('<':'b':'r':_)) = " " -- see #2105
-        go LineBreak = " "
-        go _ = ""
-        deNote (Note _) = Str ""
-        deNote x = x
+        go LineBreak                                   = " "
+        go _                                           = ""
 
 -- | Bring all regular text in a pandoc structure to uppercase.
 --
@@ -589,28 +388,12 @@ capitalize = walk go
         go x       = x
 
 -- | Change final list item from @Para@ to @Plain@ if the list contains
--- no other @Para@ blocks.
-compactify :: [[Block]]  -- ^ List of list items (each a list of blocks)
-           -> [[Block]]
-compactify [] = []
-compactify items =
-  case (init items, last items) of
-       (_,[])          -> items
-       (others, final) ->
-            case last final of
-                 Para a -> case (filter isPara $ concat items) of
-                                -- if this is only Para, change to Plain
-                                [_] -> others ++ [init final ++ [Plain a]]
-                                _   -> items
-                 _      -> items
-
--- | Change final list item from @Para@ to @Plain@ if the list contains
 -- no other @Para@ blocks.  Like compactify, but operates on @Blocks@ rather
 -- than @[Block]@.
-compactify' :: [Blocks]  -- ^ List of list items (each a list of blocks)
+compactify :: [Blocks]  -- ^ List of list items (each a list of blocks)
            -> [Blocks]
-compactify' [] = []
-compactify' items =
+compactify [] = []
+compactify items =
   let (others, final) = (init items, last items)
   in  case reverse (B.toList final) of
            (Para a:xs) -> case [Para x | Para x <- concatMap B.toList items] of
@@ -619,9 +402,9 @@ compactify' items =
                             _   -> items
            _      -> items
 
--- | Like @compactify'@, but acts on items of definition lists.
-compactify'DL :: [(Inlines, [Blocks])] -> [(Inlines, [Blocks])]
-compactify'DL items =
+-- | Like @compactify@, but acts on items of definition lists.
+compactifyDL :: [(Inlines, [Blocks])] -> [(Inlines, [Blocks])]
+compactifyDL items =
   let defs = concatMap snd items
   in  case reverse (concatMap B.toList defs) of
            (Para x:xs)
@@ -663,7 +446,7 @@ instance Walkable Inline Element where
     ils' <- walkM f ils
     elts' <- walkM f elts
     return $ Sec lev nums attr ils' elts'
-  query f (Blk x) = query f x
+  query f (Blk x)              = query f x
   query f (Sec _ _ _ ils elts) = query f ils <> query f elts
 
 instance Walkable Block Element where
@@ -674,7 +457,7 @@ instance Walkable Block Element where
     ils' <- walkM f ils
     elts' <- walkM f elts
     return $ Sec lev nums attr ils' elts'
-  query f (Blk x) = query f x
+  query f (Blk x)              = query f x
   query f (Sec _ _ _ ils elts) = query f ils <> query f elts
 
 
@@ -687,8 +470,8 @@ inlineListToIdentifier =
     map (nbspToSp . toLower) .
     filter (\c -> isLetter c || isDigit c || c `elem` "_-. ") .
     stringify
- where nbspToSp '\160'     =  ' '
-       nbspToSp x          =  x
+ where nbspToSp '\160' =  ' '
+       nbspToSp x      =  x
 
 -- | Convert list of Pandoc blocks into (hierarchical) list of Elements
 hierarchicalize :: [Block] -> [Element]
@@ -696,7 +479,7 @@ hierarchicalize blocks = S.evalState (hierarchicalizeWithIds blocks) []
 
 hierarchicalizeWithIds :: [Block] -> S.State [Int] [Element]
 hierarchicalizeWithIds [] = return []
-hierarchicalizeWithIds ((Header level attr@(_,classes,_) title'):xs) = do
+hierarchicalizeWithIds (Header level attr@(_,classes,_) title':xs) = do
   lastnum <- S.get
   let lastnum' = take level lastnum
   let newnum = case length lastnum' of
@@ -709,26 +492,26 @@ hierarchicalizeWithIds ((Header level attr@(_,classes,_) title'):xs) = do
   sectionContents' <- hierarchicalizeWithIds sectionContents
   rest' <- hierarchicalizeWithIds rest
   return $ Sec level newnum attr title' sectionContents' : rest'
-hierarchicalizeWithIds ((Div ("",["references"],[])
-                         (Header level (ident,classes,kvs) title' : xs)):ys) =
-  hierarchicalizeWithIds ((Header level (ident,("references":classes),kvs)
-                           title') : (xs ++ ys))
+hierarchicalizeWithIds (Div ("",["references"],[])
+                         (Header level (ident,classes,kvs) title' : xs):ys) =
+  hierarchicalizeWithIds (Header level (ident,"references":classes,kvs)
+                           title' : (xs ++ ys))
 hierarchicalizeWithIds (x:rest) = do
   rest' <- hierarchicalizeWithIds rest
-  return $ (Blk x) : rest'
+  return $ Blk x : rest'
 
 headerLtEq :: Int -> Block -> Bool
-headerLtEq level (Header l _ _) = l <= level
-headerLtEq level (Div ("",["references"],[]) (Header l _ _ : _))  = l <= level
-headerLtEq _ _ = False
+headerLtEq level (Header l _ _)                                  = l <= level
+headerLtEq level (Div ("",["references"],[]) (Header l _ _ : _)) = l <= level
+headerLtEq _ _                                                   = False
 
 -- | Generate a unique identifier from a list of inlines.
 -- Second argument is a list of already used identifiers.
 uniqueIdent :: [Inline] -> Set.Set String -> String
 uniqueIdent title' usedIdents
   =  let baseIdent = case inlineListToIdentifier title' of
-                        ""   -> "section"
-                        x    -> x
+                        "" -> "section"
+                        x  -> x
          numIdent n = baseIdent ++ "-" ++ show n
      in  if baseIdent `Set.member` usedIdents
            then case find (\x -> not $ numIdent x `Set.member` usedIdents) ([1..60000] :: [Int]) of
@@ -738,8 +521,8 @@ uniqueIdent title' usedIdents
 
 -- | True if block is a Header block.
 isHeaderBlock :: Block -> Bool
-isHeaderBlock (Header _ _ _) = True
-isHeaderBlock _ = False
+isHeaderBlock Header{} = True
+isHeaderBlock _        = False
 
 -- | Shift header levels up or down.
 headerShift :: Int -> Pandoc -> Pandoc
@@ -748,6 +531,14 @@ headerShift n = walk shift
         shift (Header level attr inner) = Header (level + n) attr inner
         shift x                         = x
 
+-- | Remove empty paragraphs.
+stripEmptyParagraphs :: Pandoc -> Pandoc
+stripEmptyParagraphs = walk go
+  where go :: [Block] -> [Block]
+        go = filter (not . isEmptyParagraph)
+        isEmptyParagraph (Para []) = True
+        isEmptyParagraph _         = False
+
 -- | Detect if a list is tight.
 isTightList :: [[Block]] -> Bool
 isTightList = all firstIsPlain
@@ -765,8 +556,8 @@ addMetaField key val (Meta meta) =
   Meta $ M.insertWith combine key (toMetaValue val) meta
   where combine newval (MetaList xs) = MetaList (xs ++ tolist newval)
         combine newval x             = MetaList [x, newval]
-        tolist (MetaList ys)         = ys
-        tolist y                     = [y]
+        tolist (MetaList ys) = ys
+        tolist y             = [y]
 
 -- | Create 'Meta' from old-style title, authors, date.  This is
 -- provided to ease the transition from the old API.
@@ -774,8 +565,24 @@ makeMeta :: [Inline] -> [[Inline]] -> [Inline] -> Meta
 makeMeta title authors date =
       addMetaField "title" (B.fromList title)
     $ addMetaField "author" (map B.fromList authors)
-    $ addMetaField "date" (B.fromList date)
-    $ nullMeta
+    $ addMetaField "date" (B.fromList date) nullMeta
+
+-- | Remove soft breaks between East Asian characters.
+eastAsianLineBreakFilter :: Pandoc -> Pandoc
+eastAsianLineBreakFilter = bottomUp go
+  where go (x:SoftBreak:y:zs) =
+         case (stringify x, stringify y) of
+               (xs@(_:_), c:_)
+                 | charWidth (last xs) == 2 && charWidth c == 2 -> x:y:zs
+               _ -> x:SoftBreak:y:zs
+        go xs = xs
+
+-- | Builder for underline.
+-- This probably belongs in Builder.hs in pandoc-types.
+-- Will be replaced once Underline is an element.
+underlineSpan :: Inlines -> Inlines
+underlineSpan = B.spanWith ("", ["underline"], [])
+
 
 --
 -- TagSoup HTML handling
@@ -787,7 +594,7 @@ renderTags' = renderTagsOptions
                renderOptions{ optMinimize = matchTags ["hr", "br", "img",
                                                        "meta", "link"]
                             , optRawTag   = matchTags ["script", "style"] }
-              where matchTags = \tags -> flip elem tags . map toLower
+              where matchTags tags = flip elem tags . map toLower
 
 --
 -- File handling
@@ -800,226 +607,14 @@ inDirectory path action = E.bracket
                              setCurrentDirectory
                              (const $ setCurrentDirectory path >> action)
 
-getDefaultReferenceDocx :: Maybe FilePath -> IO Archive
-getDefaultReferenceDocx datadir = do
-  let paths = ["[Content_Types].xml",
-               "_rels/.rels",
-               "docProps/app.xml",
-               "docProps/core.xml",
-               "word/document.xml",
-               "word/fontTable.xml",
-               "word/footnotes.xml",
-               "word/numbering.xml",
-               "word/settings.xml",
-               "word/webSettings.xml",
-               "word/styles.xml",
-               "word/_rels/document.xml.rels",
-               "word/_rels/footnotes.xml.rels",
-               "word/theme/theme1.xml"]
-  let toLazy = fromChunks . (:[])
-  let pathToEntry path = do epochtime <- (floor . utcTimeToPOSIXSeconds) <$>
-                                          getCurrentTime
-                            contents <- toLazy <$> readDataFile datadir
-                                                       ("docx/" ++ path)
-                            return $ toEntry path epochtime contents
-  mbArchive <- case datadir of
-                    Nothing   -> return Nothing
-                    Just d    -> do
-                       exists <- doesFileExist (d </> "reference.docx")
-                       if exists
-                          then return (Just (d </> "reference.docx"))
-                          else return Nothing
-  case mbArchive of
-     Just arch -> toArchive <$> BL.readFile arch
-     Nothing   -> foldr addEntryToArchive emptyArchive <$>
-                     mapM pathToEntry paths
-
-getDefaultReferenceODT :: Maybe FilePath -> IO Archive
-getDefaultReferenceODT datadir = do
-  let paths = ["mimetype",
-               "manifest.rdf",
-               "styles.xml",
-               "content.xml",
-               "meta.xml",
-               "settings.xml",
-               "Configurations2/accelerator/current.xml",
-               "Thumbnails/thumbnail.png",
-               "META-INF/manifest.xml"]
-  let pathToEntry path = do epochtime <- floor `fmap` getPOSIXTime
-                            contents <- (fromChunks . (:[])) `fmap`
-                                          readDataFile datadir ("odt/" ++ path)
-                            return $ toEntry path epochtime contents
-  mbArchive <- case datadir of
-                    Nothing   -> return Nothing
-                    Just d    -> do
-                       exists <- doesFileExist (d </> "reference.odt")
-                       if exists
-                          then return (Just (d </> "reference.odt"))
-                          else return Nothing
-  case mbArchive of
-     Just arch -> toArchive <$> BL.readFile arch
-     Nothing   -> foldr addEntryToArchive emptyArchive <$>
-                     mapM pathToEntry paths
-
-
-readDefaultDataFile :: FilePath -> IO BS.ByteString
-readDefaultDataFile "reference.docx" =
-  (BS.concat . toChunks . fromArchive) <$> getDefaultReferenceDocx Nothing
-readDefaultDataFile "reference.odt" =
-  (BS.concat . toChunks . fromArchive) <$> getDefaultReferenceODT Nothing
-readDefaultDataFile fname =
-#ifdef EMBED_DATA_FILES
-  case lookup (makeCanonical fname) dataFiles of
-    Nothing       -> err 97 $ "Could not find data file " ++ fname
-    Just contents -> return contents
-  where makeCanonical = Posix.joinPath . transformPathParts . splitDirectories
-        transformPathParts = reverse . foldl go []
-        go as     "."  = as
-        go (_:as) ".." = as
-        go as     x    = x : as
-#else
-  getDataFileName fname' >>= checkExistence >>= BS.readFile
-    where fname' = if fname == "MANUAL.txt" then fname else "data" </> fname
-
-checkExistence :: FilePath -> IO FilePath
-checkExistence fn = do
-  exists <- doesFileExist fn
-  if exists
-     then return fn
-     else err 97 ("Could not find data file " ++ fn)
-#endif
-
--- | Read file from specified user data directory or, if not found there, from
--- Cabal data directory.
-readDataFile :: Maybe FilePath -> FilePath -> IO BS.ByteString
-readDataFile Nothing fname = readDefaultDataFile fname
-readDataFile (Just userDir) fname = do
-  exists <- doesFileExist (userDir </> fname)
-  if exists
-     then BS.readFile (userDir </> fname)
-     else readDefaultDataFile fname
-
--- | Same as 'readDataFile' but returns a String instead of a ByteString.
-readDataFileUTF8 :: Maybe FilePath -> FilePath -> IO String
-readDataFileUTF8 userDir fname =
-  UTF8.toString `fmap` readDataFile userDir fname
-
--- | Specialized version of parseURIReference that disallows
--- single-letter schemes.  Reason:  these are usually windows absolute
--- paths.
-parseURIReference' :: String -> Maybe URI
-parseURIReference' s =
-  case parseURIReference s of
-       Just u
-         | length (uriScheme u) > 2  -> Just u
-         | null (uriScheme u)        -> Just u  -- protocol-relative
-       _                             -> Nothing
-
--- | Fetch an image or other item from the local filesystem or the net.
--- Returns raw content and maybe mime type.
-fetchItem :: Maybe String -> String
-          -> IO (Either E.SomeException (BS.ByteString, Maybe MimeType))
-fetchItem sourceURL s =
-  case (sourceURL >>= parseURIReference' . ensureEscaped, ensureEscaped s) of
-       (Just u, s') -> -- try fetching from relative path at source
-          case parseURIReference' s' of
-               Just u' -> openURL $ show $ u' `nonStrictRelativeTo` u
-               Nothing -> openURL s' -- will throw error
-       (Nothing, s'@('/':'/':_)) ->  -- protocol-relative URI
-          case parseURIReference' s' of
-               Just u' -> openURL $ show $ u' `nonStrictRelativeTo` httpcolon
-               Nothing -> openURL s' -- will throw error
-       (Nothing, s') ->
-          case parseURI s' of  -- requires absolute URI
-               -- We don't want to treat C:/ as a scheme:
-               Just u' | length (uriScheme u') > 2 -> openURL (show u')
-               Just u' | uriScheme u' == "file:" ->
-                    E.try $ readLocalFile $ dropWhile (=='/') (uriPath u')
-               _ -> E.try $ readLocalFile fp -- get from local file system
-  where readLocalFile f = do
-          cont <- BS.readFile f
-          return (cont, mime)
-        httpcolon = URI{ uriScheme = "http:",
-                         uriAuthority = Nothing,
-                         uriPath = "",
-                         uriQuery = "",
-                         uriFragment = "" }
-        dropFragmentAndQuery = takeWhile (\c -> c /= '?' && c /= '#')
-        fp = unEscapeString $ dropFragmentAndQuery s
-        mime = case takeExtension fp of
-                    ".gz" -> getMimeType $ dropExtension fp
-                    ".svgz" -> getMimeType $ dropExtension fp ++ ".svg"
-                    x     -> getMimeType x
-        ensureEscaped = escapeURIString isAllowedInURI . map convertSlash
-        convertSlash '\\' = '/'
-        convertSlash x    = x
-
--- | Like 'fetchItem', but also looks for items in a 'MediaBag'.
-fetchItem' :: MediaBag -> Maybe String -> String
-           -> IO (Either E.SomeException (BS.ByteString, Maybe MimeType))
-fetchItem' media sourceURL s = do
-  case lookupMedia s media of
-       Nothing -> fetchItem sourceURL s
-       Just (mime, bs) -> return $ Right (BS.concat $ toChunks bs, Just mime)
-
--- | Read from a URL and return raw data and maybe mime type.
-openURL :: String -> IO (Either E.SomeException (BS.ByteString, Maybe MimeType))
-openURL u
-  | Just u'' <- stripPrefix "data:" u =
-    let mime     = takeWhile (/=',') u''
-        contents = B8.pack $ unEscapeString $ drop 1 $ dropWhile (/=',') u''
-    in  return $ Right (decodeLenient contents, Just mime)
-#ifdef HTTP_CLIENT
-  | otherwise = withSocketsDo $ E.try $ do
-     let parseReq = parseRequest
-     (proxy :: Either E.SomeException String) <- E.try $ getEnv "http_proxy"
-     req <- parseReq u
-     req' <- case proxy of
-                     Left _   -> return req
-                     Right pr -> (parseReq pr >>= \r ->
-                                  return $ addProxy (host r) (port r) req)
-                                  `mplus` return req
-     resp <- newManager tlsManagerSettings >>= httpLbs req'
-     return (BS.concat $ toChunks $ responseBody resp,
-             UTF8.toString `fmap` lookup hContentType (responseHeaders resp))
-#else
-  | otherwise = E.try $ getBodyAndMimeType `fmap` browse
-              (do liftIO $ UTF8.hPutStrLn stderr $ "Fetching " ++ u ++ "..."
-                  setOutHandler $ const (return ())
-                  setAllowRedirects True
-                  request (getRequest' u'))
-  where getBodyAndMimeType (_, r) = (rspBody r, findHeader HdrContentType r)
-        getRequest' uriString = case parseURI uriString of
-                                   Nothing -> error ("Not a valid URL: " ++
-                                                        uriString)
-                                   Just v  -> mkRequest GET v
-        u' = escapeURIString (/= '|') u  -- pipes are rejected by Network.URI
-#endif
-
 --
 -- Error reporting
 --
 
-err :: Int -> String -> IO a
-err exitCode msg = do
-  name <- getProgName
-  UTF8.hPutStrLn stderr $ name ++ ": " ++ msg
-  exitWith $ ExitFailure exitCode
-  return undefined
-
-warn :: MonadIO m => String -> m ()
-warn msg = liftIO $ do
-  name <- getProgName
-  UTF8.hPutStrLn stderr $ "[" ++ name ++ " warning] " ++ msg
-
 mapLeft :: (a -> b) -> Either a c -> Either b c
-mapLeft f (Left x) = Left (f x)
+mapLeft f (Left x)  = Left (f x)
 mapLeft _ (Right x) = Right x
 
-hush :: Either a b -> Maybe b
-hush (Left _) = Nothing
-hush (Right x) = Just x
-
 -- | Remove intermediate "." and ".." directories from a path.
 --
 -- > collapseFilePath "./foo" == "foo"
@@ -1034,14 +629,14 @@ collapseFilePath = Posix.joinPath . reverse . foldl go [] . splitDirectories
   where
     go rs "." = rs
     go r@(p:rs) ".." = case p of
-                            ".." -> ("..":r)
-                            (checkPathSeperator -> Just True) -> ("..":r)
-                            _ -> rs
+                            ".."                              -> "..":r
+                            (checkPathSeperator -> Just True) -> "..":r
+                            _                                 -> rs
     go _ (checkPathSeperator -> Just True) = [[Posix.pathSeparator]]
     go rs x = x:rs
-    isSingleton [] = Nothing
+    isSingleton []  = Nothing
     isSingleton [x] = Just x
-    isSingleton _ = Nothing
+    isSingleton _   = Nothing
     checkPathSeperator = fmap isPathSeparator . isSingleton
 
 --
@@ -1054,41 +649,109 @@ filteredFilesFromArchive zf f =
     fileAndBinary :: Archive -> FilePath -> Maybe (FilePath, BL.ByteString)
     fileAndBinary a fp = findEntryByPath fp a >>= \e -> Just (fp, fromEntry e)
 
+
+--
+-- IANA URIs
+--
+
+-- | Schemes from http://www.iana.org/assignments/uri-schemes.html plus
+-- the unofficial schemes doi, javascript, isbn, pmid.
+schemes :: Set.Set String
+schemes = Set.fromList
+  -- Official IANA schemes
+  [ "aaa", "aaas", "about", "acap", "acct", "acr", "adiumxtra", "afp", "afs"
+  , "aim", "appdata", "apt", "attachment", "aw", "barion", "beshare", "bitcoin"
+  , "blob", "bolo", "browserext", "callto", "cap", "chrome", "chrome-extension"
+  , "cid", "coap", "coaps", "com-eventbrite-attendee", "content", "crid", "cvs"
+  , "data", "dav", "dict", "dis", "dlna-playcontainer", "dlna-playsingle"
+  , "dns", "dntp", "dtn", "dvb", "ed2k", "example", "facetime", "fax", "feed"
+  , "feedready", "file", "filesystem", "finger", "fish", "ftp", "geo", "gg"
+  , "git", "gizmoproject", "go", "gopher", "graph", "gtalk", "h323", "ham"
+  , "hcp", "http", "https", "hxxp", "hxxps", "hydrazone", "iax", "icap", "icon"
+  , "im", "imap", "info", "iotdisco", "ipn", "ipp", "ipps", "irc", "irc6"
+  , "ircs", "iris", "iris.beep", "iris.lwz", "iris.xpc", "iris.xpcs"
+  , "isostore", "itms", "jabber", "jar", "jms", "keyparc", "lastfm", "ldap"
+  , "ldaps", "lvlt", "magnet", "mailserver", "mailto", "maps", "market"
+  , "message", "mid", "mms", "modem", "mongodb", "moz", "ms-access"
+  , "ms-browser-extension", "ms-drive-to", "ms-enrollment", "ms-excel"
+  , "ms-gamebarservices", "ms-getoffice", "ms-help", "ms-infopath"
+  , "ms-media-stream-id", "ms-officeapp", "ms-project", "ms-powerpoint"
+  , "ms-publisher", "ms-search-repair", "ms-secondary-screen-controller"
+  , "ms-secondary-screen-setup", "ms-settings", "ms-settings-airplanemode"
+  , "ms-settings-bluetooth", "ms-settings-camera", "ms-settings-cellular"
+  , "ms-settings-cloudstorage", "ms-settings-connectabledevices"
+  , "ms-settings-displays-topology", "ms-settings-emailandaccounts"
+  , "ms-settings-language", "ms-settings-location", "ms-settings-lock"
+  , "ms-settings-nfctransactions", "ms-settings-notifications"
+  , "ms-settings-power", "ms-settings-privacy", "ms-settings-proximity"
+  , "ms-settings-screenrotation", "ms-settings-wifi", "ms-settings-workplace"
+  , "ms-spd", "ms-sttoverlay", "ms-transit-to", "ms-virtualtouchpad"
+  , "ms-visio", "ms-walk-to", "ms-whiteboard", "ms-whiteboard-cmd", "ms-word"
+  , "msnim", "msrp", "msrps", "mtqp", "mumble", "mupdate", "mvn", "news", "nfs"
+  , "ni", "nih", "nntp", "notes", "ocf", "oid", "onenote", "onenote-cmd"
+  , "opaquelocktoken", "pack", "palm", "paparazzi", "pkcs11", "platform", "pop"
+  , "pres", "prospero", "proxy", "pwid", "psyc", "qb", "query", "redis"
+  , "rediss", "reload", "res", "resource", "rmi", "rsync", "rtmfp", "rtmp"
+  , "rtsp", "rtsps", "rtspu", "secondlife", "service", "session", "sftp", "sgn"
+  , "shttp", "sieve", "sip", "sips", "skype", "smb", "sms", "smtp", "snews"
+  , "snmp", "soap.beep", "soap.beeps", "soldat", "spotify", "ssh", "steam"
+  , "stun", "stuns", "submit", "svn", "tag", "teamspeak", "tel", "teliaeid"
+  , "telnet", "tftp", "things", "thismessage", "tip", "tn3270", "tool", "turn"
+  , "turns", "tv", "udp", "unreal", "urn", "ut2004", "v-event", "vemmi"
+  , "ventrilo", "videotex", "vnc", "view-source", "wais", "webcal", "wpid"
+  , "ws", "wss", "wtai", "wyciwyg", "xcon", "xcon-userid", "xfire"
+  , "xmlrpc.beep", "xmlrpc.beeps", "xmpp", "xri", "ymsgr", "z39.50", "z39.50r"
+  , "z39.50s"
+  -- Inofficial schemes
+  , "doi", "isbn", "javascript", "pmid"
+  ]
+
+-- | Check if the string is a valid URL with a IANA or frequently used but
+-- unofficial scheme (see @schemes@).
+isURI :: String -> Bool
+isURI = maybe False hasKnownScheme . parseURI
+  where
+    hasKnownScheme = (`Set.member` schemes) . map toLower .
+                     filter (/= ':') . uriScheme
+
 ---
 --- Squash blocks into inlines
 ---
 
-blockToInlines :: Block -> [Inline]
-blockToInlines (Plain ils) = ils
-blockToInlines (Para ils) = ils
-blockToInlines (LineBlock lns) = combineLines lns
-blockToInlines (CodeBlock attr str) = [Code attr str]
-blockToInlines (RawBlock fmt str) = [RawInline fmt str]
-blockToInlines (BlockQuote blks) = blocksToInlines blks
+blockToInlines :: Block -> Inlines
+blockToInlines (Plain ils) = B.fromList ils
+blockToInlines (Para ils) = B.fromList ils
+blockToInlines (LineBlock lns) = B.fromList $ combineLines lns
+blockToInlines (CodeBlock attr str) = B.codeWith attr str
+blockToInlines (RawBlock (Format fmt) str) = B.rawInline fmt str
+blockToInlines (BlockQuote blks) = blocksToInlines' blks
 blockToInlines (OrderedList _ blkslst) =
-  concatMap blocksToInlines blkslst
+  mconcat $ map blocksToInlines' blkslst
 blockToInlines (BulletList blkslst) =
-  concatMap blocksToInlines blkslst
+  mconcat $ map blocksToInlines' blkslst
 blockToInlines (DefinitionList pairslst) =
-  concatMap f pairslst
+  mconcat $ map f pairslst
   where
-    f (ils, blkslst) = ils ++
-      [Str ":", Space] ++
-      (concatMap blocksToInlines blkslst)
-blockToInlines (Header _ _  ils) = ils
-blockToInlines (HorizontalRule) = []
+    f (ils, blkslst) = B.fromList ils <> B.str ":" <> B.space <>
+      mconcat (map blocksToInlines' blkslst)
+blockToInlines (Header _ _  ils) = B.fromList ils
+blockToInlines HorizontalRule = mempty
 blockToInlines (Table _ _ _ headers rows) =
-  intercalate [LineBreak] $ map (concatMap blocksToInlines) tbl
-  where
-    tbl = headers : rows
-blockToInlines (Div _ blks) = blocksToInlines blks
-blockToInlines Null = []
+  mconcat $ intersperse B.linebreak $
+    map (mconcat . map blocksToInlines') (headers:rows)
+blockToInlines (Div _ blks) = blocksToInlines' blks
+blockToInlines Null = mempty
+
+blocksToInlinesWithSep :: Inlines -> [Block] -> Inlines
+blocksToInlinesWithSep sep =
+  mconcat . intersperse sep . map blockToInlines
 
-blocksToInlinesWithSep :: [Inline] -> [Block] -> [Inline]
-blocksToInlinesWithSep sep blks = intercalate sep $ map blockToInlines blks
+blocksToInlines' :: [Block] -> Inlines
+blocksToInlines' = blocksToInlinesWithSep parSep
+  where parSep = B.space <> B.str "¶" <> B.space
 
 blocksToInlines :: [Block] -> [Inline]
-blocksToInlines = blocksToInlinesWithSep [Space, Str "¶", Space]
+blocksToInlines = B.toList . blocksToInlines'
 
 
 --