49 files changed, 4495 insertions, 2612 deletions
diff --git a/src/Tests/Arbitrary.hs b/src/Tests/Arbitrary.hs
index f5a44ceab..9d65e1f1f 100644
--- a/src/Tests/Arbitrary.hs
+++ b/src/Tests/Arbitrary.hs
@@ -1,5 +1,5 @@
 {-# OPTIONS_GHC -fno-warn-orphans #-}
-{-# LANGUAGE TypeSynonymInstances #-}
+{-# LANGUAGE TypeSynonymInstances, FlexibleInstances, ScopedTypeVariables #-}
 -- provides Arbitrary instance for Pandoc types
 module Tests.Arbitrary ()
 where
@@ -22,10 +22,10 @@ arbAttr = do
   return (id',classes,keyvals)
 
 instance Arbitrary Inlines where
-  arbitrary = liftM fromList arbitrary
+  arbitrary = liftM (fromList :: [Inline] -> Inlines) arbitrary
 
 instance Arbitrary Blocks where
-  arbitrary = liftM fromList arbitrary
+  arbitrary = liftM (fromList :: [Block] -> Blocks) arbitrary
 
 instance Arbitrary Inline where
   arbitrary = resize 3 $ arbInline 2
@@ -41,10 +41,6 @@ arbInline :: Int -> Gen Inline
 arbInline n = frequency $ [ (60, liftM Str realString)
                           , (60, return Space)
                           , (10, liftM2 Code arbAttr realString)
-                          , (5,  return EmDash)
-                          , (5,  return EnDash)
-                          , (5,  return Apostrophe)
-                          , (5,  return Ellipses)
                           , (5,  elements [ RawInline "html" "<a id=\"eek\">"
                                           , RawInline "latex" "\\my{command}" ])
                           ] ++ [ x | x <- nesters, n > 1]
diff --git a/src/Tests/Helpers.hs b/src/Tests/Helpers.hs
index b8d6b83a7..66879efed 100644
--- a/src/Tests/Helpers.hs
+++ b/src/Tests/Helpers.hs
@@ -69,17 +69,15 @@ test fn name (input, expected) =
 
 vividize :: (DI,String) -> String
 vividize (B,s) = s
-vividize (_,s) = vivid s
+vividize (F,s) = s
+vividize (S,s) = setSGRCode [SetColor Background Dull Red
+                         , SetColor Foreground Vivid White] ++ s
+                 ++ setSGRCode [Reset]
 
 property :: QP.Testable a => TestName -> a -> Test
 property = testProperty
 
-vivid :: String -> String
-vivid s = setSGRCode [SetColor Background Dull Red
-                     , SetColor Foreground Vivid White] ++ s
-          ++ setSGRCode [Reset]
-
-infix 6 =?>
+infix 5 =?>
 (=?>) :: a -> b -> (a,b)
 x =?> y = (x, y)
 
diff --git a/src/Tests/Old.hs b/src/Tests/Old.hs
index cb1417ffa..1ec32a30d 100644
--- a/src/Tests/Old.hs
+++ b/src/Tests/Old.hs
@@ -13,7 +13,6 @@ import Data.Algorithm.Diff
 import Text.Pandoc.Shared ( normalize, defaultWriterOptions )
 import Text.Pandoc.Writers.Native ( writeNative )
 import Text.Pandoc.Readers.Native ( readNative )
-import Text.Pandoc.Highlighting ( languages )
 import Prelude hiding ( readFile )
 import qualified Data.ByteString.Lazy as B
 import Data.ByteString.Lazy.UTF8 (toString)
@@ -105,7 +104,7 @@ tests = [ testGroup "markdown"
           ]
         , testGroup "other writers" $ map (\f -> testGroup f $ writerTests f)
           [ "docbook", "opendocument" , "context" , "texinfo"
-          , "man" , "plain" , "mediawiki", "rtf", "org"
+          , "man" , "plain" , "mediawiki", "rtf", "org", "asciidoc"
           ]
         ]
 
@@ -121,10 +120,7 @@ lhsWriterTests format
     ]
   where
     t n f = test n ["--columns=78", "-r", "native", "-s", "-w", f]
-             "lhs-test.native" ("lhs-test" <.> ext f)
-    ext f = if null languages && format == "html"
-               then "nohl" <.> f
-               else f
+             "lhs-test.native" ("lhs-test" <.> f)
 
 lhsReaderTest :: String -> Test
 lhsReaderTest format =
diff --git a/src/Tests/Readers/LaTeX.hs b/src/Tests/Readers/LaTeX.hs
index 6d28441f8..d60026b20 100644
--- a/src/Tests/Readers/LaTeX.hs
+++ b/src/Tests/Readers/LaTeX.hs
@@ -11,7 +11,7 @@ import Text.Pandoc
 latex :: String -> Pandoc
 latex = readLaTeX defaultParserState
 
-infix 5 =:
+infix 4 =:
 (=:) :: ToString c
      => String -> (String, c) -> Test
 (=:) = test latex
@@ -35,10 +35,15 @@ tests = [ testGroup "basic"
             "\\subsubsection{header}" =?> header 3 "header"
           , "emph" =:
             "\\section{text \\emph{emph}}" =?>
-             header 1 ("text" +++ space +++ emph "emph")
+             header 1 ("text" <> space <> emph "emph")
           , "link" =:
             "\\section{text \\href{/url}{link}}" =?>
-              header 1 ("text" +++ space +++ link "/url" "" "link")
+              header 1 ("text" <> space <> link "/url" "" "link")
+          ]
+
+        , testGroup "math"
+          [ "escaped $" =:
+            "$x=\\$4$" =?> para (math "x=\\$4")
           ]
 
         , testGroup "space and comments"
@@ -64,98 +69,101 @@ baseCitation = Citation{ citationId      = "item1"
                        , citationNoteNum = 0
                        , citationHash    = 0 }
 
+rt :: String -> Inlines
+rt = rawInline "latex"
+
 natbibCitations :: Test
 natbibCitations = testGroup "natbib"
   [ "citet" =: "\\citet{item1}"
-    =?> para (cite [baseCitation] empty)
+    =?> para (cite [baseCitation] (rt "\\citet{item1}"))
   , "suffix" =: "\\citet[p.~30]{item1}"
     =?> para
-        (cite [baseCitation{ citationSuffix = toList $ text "p.\160\&30" }] empty)
+        (cite [baseCitation{ citationSuffix = toList $ text ", p.\160\&30" }] (rt "\\citet[p.~30]{item1}"))
   , "suffix long" =: "\\citet[p.~30, with suffix]{item1}"
     =?> para (cite [baseCitation{ citationSuffix =
-                       toList $ text "p.\160\&30, with suffix" }] empty)
+                       toList $ text ", p.\160\&30, with suffix" }] (rt "\\citet[p.~30, with suffix]{item1}"))
   , "multiple" =: "\\citeauthor{item1} \\citetext{\\citeyear{item1}; \\citeyear[p.~30]{item2}; \\citealp[see also][]{item3}}"
     =?> para (cite [baseCitation{ citationMode = AuthorInText }
                    ,baseCitation{ citationMode = SuppressAuthor
-                                , citationSuffix = [Str "p.\160\&30"]
+                                , citationSuffix = [Str ",",Space,Str "p.\160\&30"]
                                 , citationId = "item2" }
                    ,baseCitation{ citationId = "item3"
                                 , citationPrefix = [Str "see",Space,Str "also"]
                                 , citationMode = NormalCitation }
-                   ] empty)
+                   ] (rt "\\citetext{\\citeyear{item1}; \\citeyear[p.~30]{item2}; \\citealp[see also][]{item3}}"))
   , "group" =: "\\citetext{\\citealp[see][p.~34--35]{item1}; \\citealp[also][chap. 3]{item3}}"
     =?> para (cite [baseCitation{ citationMode = NormalCitation
                                 , citationPrefix = [Str "see"]
-                                , citationSuffix = [Str "p.\160\&34",EnDash,Str "35"] }
+                                , citationSuffix = [Str ",",Space,Str "p.\160\&34\8211\&35"] }
                    ,baseCitation{ citationMode = NormalCitation
                                 , citationId = "item3"
                                 , citationPrefix = [Str "also"]
-                                , citationSuffix = [Str "chap.",Space,Str "3"] }
-                   ] empty)
+                                , citationSuffix = [Str ",",Space,Str "chap.",Space,Str "3"] }
+                   ] (rt "\\citetext{\\citealp[see][p.~34--35]{item1}; \\citealp[also][chap. 3]{item3}}"))
   , "suffix and locator" =: "\\citep[pp.~33, 35--37, and nowhere else]{item1}"
     =?> para (cite [baseCitation{ citationMode = NormalCitation
-                                , citationSuffix = [Str "pp.\160\&33,",Space,Str "35",EnDash,Str "37,",Space,Str "and",Space,Str "nowhere",Space, Str "else"] }] empty)
+                                , citationSuffix = [Str ",",Space,Str "pp.\160\&33,",Space,Str "35\8211\&37,",Space,Str "and",Space,Str "nowhere",Space, Str "else"] }] (rt "\\citep[pp.~33, 35--37, and nowhere else]{item1}"))
   , "suffix only" =: "\\citep[and nowhere else]{item1}"
     =?> para (cite [baseCitation{ citationMode = NormalCitation
-                                , citationSuffix = toList $ text "and nowhere else" }] empty)
+                                , citationSuffix = toList $ text ", and nowhere else" }] (rt "\\citep[and nowhere else]{item1}"))
   , "no author" =: "\\citeyearpar{item1}, and now Doe with a locator \\citeyearpar[p.~44]{item2}"
-    =?> para (cite [baseCitation{ citationMode = SuppressAuthor }] empty +++
-              text ", and now Doe with a locator " +++
+    =?> para (cite [baseCitation{ citationMode = SuppressAuthor }] (rt "\\citeyearpar{item1}") <>
+              text ", and now Doe with a locator " <>
               cite [baseCitation{ citationMode = SuppressAuthor
-                                , citationSuffix = [Str "p.\160\&44"]
-                                , citationId = "item2" }] empty)
+                                , citationSuffix = [Str ",",Space,Str "p.\160\&44"]
+                                , citationId = "item2" }] (rt "\\citeyearpar[p.~44]{item2}"))
   , "markup" =: "\\citep[\\emph{see}][p. \\textbf{32}]{item1}"
     =?> para (cite [baseCitation{ citationMode = NormalCitation
                                 , citationPrefix = [Emph [Str "see"]]
-                                , citationSuffix = [Str "p.",Space,
-                                    Strong [Str "32"]] }] empty)
+                                , citationSuffix = [Str ",",Space,Str "p.",Space,
+                                    Strong [Str "32"]] }] (rt "\\citep[\\emph{see}][p. \\textbf{32}]{item1}"))
   ]
 
 biblatexCitations :: Test
 biblatexCitations = testGroup "biblatex"
   [ "textcite" =: "\\textcite{item1}"
-    =?> para (cite [baseCitation] empty)
+    =?> para (cite [baseCitation] (rt "\\textcite{item1}"))
   , "suffix" =: "\\textcite[p.~30]{item1}"
     =?> para
-        (cite [baseCitation{ citationSuffix = toList $ text "p.\160\&30" }] empty)
+        (cite [baseCitation{ citationSuffix = toList $ text ", p.\160\&30" }] (rt "\\textcite[p.~30]{item1}"))
   , "suffix long" =: "\\textcite[p.~30, with suffix]{item1}"
     =?> para (cite [baseCitation{ citationSuffix =
-                       toList $ text "p.\160\&30, with suffix" }] empty)
+                       toList $ text ", p.\160\&30, with suffix" }] (rt "\\textcite[p.~30, with suffix]{item1}"))
   , "multiple" =: "\\textcites{item1}[p.~30]{item2}[see also][]{item3}"
     =?> para (cite [baseCitation{ citationMode = AuthorInText }
                    ,baseCitation{ citationMode = NormalCitation
-                                , citationSuffix = [Str "p.\160\&30"]
+                                , citationSuffix = [Str ",",Space,Str "p.\160\&30"]
                                 , citationId = "item2" }
                    ,baseCitation{ citationId = "item3"
                                 , citationPrefix = [Str "see",Space,Str "also"]
                                 , citationMode = NormalCitation }
-                   ] empty)
+                   ] (rt "\\textcites{item1}[p.~30]{item2}[see also][]{item3}"))
   , "group" =: "\\autocites[see][p.~34--35]{item1}[also][chap. 3]{item3}"
     =?> para (cite [baseCitation{ citationMode = NormalCitation
                                 , citationPrefix = [Str "see"]
-                                , citationSuffix = [Str "p.\160\&34",EnDash,Str "35"] }
+                                , citationSuffix = [Str ",",Space,Str "p.\160\&34\8211\&35"] }
                    ,baseCitation{ citationMode = NormalCitation
                                 , citationId = "item3"
                                 , citationPrefix = [Str "also"]
-                                , citationSuffix = [Str "chap.",Space,Str "3"] }
-                   ] empty)
+                                , citationSuffix = [Str ",",Space,Str "chap.",Space,Str "3"] }
+                   ] (rt "\\autocites[see][p.~34--35]{item1}[also][chap. 3]{item3}"))
   , "suffix and locator" =: "\\autocite[pp.~33, 35--37, and nowhere else]{item1}"
     =?> para (cite [baseCitation{ citationMode = NormalCitation
-                                , citationSuffix = [Str "pp.\160\&33,",Space,Str "35",EnDash,Str "37,",Space,Str "and",Space,Str "nowhere",Space, Str "else"] }] empty)
+                                , citationSuffix = [Str ",",Space,Str "pp.\160\&33,",Space,Str "35\8211\&37,",Space,Str "and",Space,Str "nowhere",Space, Str "else"] }] (rt "\\autocite[pp.~33, 35--37, and nowhere else]{item1}"))
   , "suffix only" =: "\\autocite[and nowhere else]{item1}"
     =?> para (cite [baseCitation{ citationMode = NormalCitation
-                                , citationSuffix = toList $ text "and nowhere else" }] empty)
+                                , citationSuffix = toList $ text ", and nowhere else" }] (rt "\\autocite[and nowhere else]{item1}"))
   , "no author" =: "\\autocite*{item1}, and now Doe with a locator \\autocite*[p.~44]{item2}"
-    =?> para (cite [baseCitation{ citationMode = SuppressAuthor }] empty +++
-              text ", and now Doe with a locator " +++
+    =?> para (cite [baseCitation{ citationMode = SuppressAuthor }] (rt "\\autocite*{item1}") <>
+              text ", and now Doe with a locator " <>
               cite [baseCitation{ citationMode = SuppressAuthor
-                                , citationSuffix = [Str "p.\160\&44"]
-                                , citationId = "item2" }] empty)
+                                , citationSuffix = [Str ",",Space,Str "p.\160\&44"]
+                                , citationId = "item2" }] (rt "\\autocite*[p.~44]{item2}"))
   , "markup" =: "\\autocite[\\emph{see}][p. \\textbf{32}]{item1}"
     =?> para (cite [baseCitation{ citationMode = NormalCitation
                                 , citationPrefix = [Emph [Str "see"]]
-                                , citationSuffix = [Str "p.",Space,
-                                    Strong [Str "32"]] }] empty)
+                                , citationSuffix = [Str ",",Space,Str "p.",Space,
+                                    Strong [Str "32"]] }] (rt "\\autocite[\\emph{see}][p. \\textbf{32}]{item1}"))
   , "parencite" =: "\\parencite{item1}"
-    =?> para (cite [baseCitation{ citationMode = NormalCitation }] empty)
+    =?> para (cite [baseCitation{ citationMode = NormalCitation }] (rt "\\parencite{item1}"))
   ]
diff --git a/src/Tests/Readers/Markdown.hs b/src/Tests/Readers/Markdown.hs
index 941762bd0..5ad974adf 100644
--- a/src/Tests/Readers/Markdown.hs
+++ b/src/Tests/Readers/Markdown.hs
@@ -8,7 +8,6 @@ import Tests.Arbitrary()
 import Text.Pandoc.Builder
 -- import Text.Pandoc.Shared ( normalize )
 import Text.Pandoc
-import Data.Sequence (singleton)
 
 markdown :: String -> Pandoc
 markdown = readMarkdown defaultParserState{ stateStandalone = True }
@@ -16,7 +15,7 @@ markdown = readMarkdown defaultParserState{ stateStandalone = True }
 markdownSmart :: String -> Pandoc
 markdownSmart = readMarkdown defaultParserState{ stateSmart = True }
 
-infix 5 =:
+infix 4 =:
 (=:) :: ToString c
      => String -> (String, c) -> Test
 (=:) = test markdown
@@ -44,29 +43,49 @@ tests = [ testGroup "inline code"
             "`*` {.haskell .special x=\"7\"}"
             =?> para (codeWith ("",["haskell","special"],[("x","7")]) "*")
           ]
+        , testGroup "backslash escapes"
+          [ "in URL" =:
+            "[hi](/there\\))"
+            =?> para (link "/there)" "" "hi")
+          , "in title" =:
+            "[hi](/there \"a\\\"a\")"
+            =?> para (link "/there" "a\"a" "hi")
+          , "in reference link title" =:
+            "[hi]\n\n[hi]: /there (a\\)a)"
+            =?> para (link "/there" "a)a" "hi")
+          , "in reference link URL" =:
+            "[hi]\n\n[hi]: /there\\.0"
+            =?> para (link "/there.0" "" "hi")
+          ]
         , testGroup "smart punctuation"
           [ test markdownSmart "quote before ellipses"
             ("'...hi'"
-            =?> para (singleQuoted (singleton Ellipses +++ "hi")))
+            =?> para (singleQuoted ("…hi")))
+          , test markdownSmart "apostrophe before emph"
+            ("D'oh! A l'*aide*!"
+            =?> para ("D’oh! A l’" <> emph "aide" <> "!"))
+          , test markdownSmart "apostrophe in French"
+            ("À l'arrivée de la guerre, le thème de l'«impossibilité du socialisme»"
+            =?> para ("À l’arrivée de la guerre, le thème de l’«impossibilité du socialisme»"))
           ]
         , testGroup "mixed emphasis and strong"
           [ "emph and strong emph alternating" =:
             "*xxx* ***xxx*** xxx\n*xxx* ***xxx*** xxx"
-            =?> para (emph "xxx" +++ space +++ strong (emph "xxx") +++
-                      space +++ "xxx" +++ space +++
-                      emph "xxx" +++ space +++ strong (emph "xxx") +++
-                      space +++ "xxx")
+            =?> para (emph "xxx" <> space <> strong (emph "xxx") <>
+                      space <> "xxx" <> space <>
+                      emph "xxx" <> space <> strong (emph "xxx") <>
+                      space <> "xxx")
           , "emph with spaced strong" =:
             "*x **xx** x*"
-            =?> para (emph ("x" +++ space +++ strong "xx" +++ space +++ "x"))
+            =?> para (emph ("x" <> space <> strong "xx" <> space <> "x"))
           ]
         , testGroup "footnotes"
           [ "indent followed by newline and flush-left text" =:
             "[^1]\n\n[^1]: my note\n\n     \nnot in note\n"
-            =?> para (note (para "my note")) +++ para "not in note"
+            =?> para (note (para "my note")) <> para "not in note"
           , "indent followed by newline and indented text" =:
             "[^1]\n\n[^1]: my note\n     \n    in note\n"
-            =?> para (note (para "my note" +++ para "in note"))
+            =?> para (note (para "my note" <> para "in note"))
           , "recursive note" =:
             "[^1]\n\n[^1]: See [^1]\n"
             =?> para (note (para "See [^1]"))
@@ -76,9 +95,9 @@ tests = [ testGroup "inline code"
               "inverse bird tracks and html" $
               "> a\n\n< b\n\n<div>\n"
               =?> codeBlockWith ("",["sourceCode","literate","haskell"],[]) "a"
-                  +++
+                  <>
                   codeBlockWith ("",["sourceCode","haskell"],[]) "b"
-                  +++
+                  <>
                   rawBlock "html" "<div>\n\n"
           ]
 -- the round-trip properties frequently fail
diff --git a/src/Tests/Readers/RST.hs b/src/Tests/Readers/RST.hs
index 4b8c9301b..3269092a6 100644
--- a/src/Tests/Readers/RST.hs
+++ b/src/Tests/Readers/RST.hs
@@ -11,15 +11,15 @@ import Text.Pandoc
 rst :: String -> Pandoc
 rst = readRST defaultParserState{ stateStandalone = True }
 
-infix 5 =:
+infix 4 =:
 (=:) :: ToString c
      => String -> (String, c) -> Test
 (=:) = test rst
 
 tests :: [Test]
 tests = [ "line block with blank line" =:
-          "| a\n|\n|  b" =?> para (str "a" +++ linebreak +++
-                                   linebreak +++ str " " +++ str "b")
+          "| a\n|\n|  b" =?> para (str "a" <> linebreak <>
+                                   linebreak <> str " " <> str "b")
         , "field list" =:
           [_LIT|
 :Hostname: media08
@@ -51,10 +51,10 @@ tests = [ "line block with blank line" =:
         , "URLs with following punctuation" =:
           ("http://google.com, http://yahoo.com; http://foo.bar.baz.\n" ++
            "http://foo.bar/baz_(bam) (http://foo.bar)") =?>
-          para (link "http://google.com" "" "http://google.com" +++ ", " +++
-                link "http://yahoo.com" "" "http://yahoo.com" +++ "; " +++
-                link "http://foo.bar.baz" "" "http://foo.bar.baz" +++ ". " +++
+          para (link "http://google.com" "" "http://google.com" <> ", " <>
+                link "http://yahoo.com" "" "http://yahoo.com" <> "; " <>
+                link "http://foo.bar.baz" "" "http://foo.bar.baz" <> ". " <>
                 link "http://foo.bar/baz_(bam)" "" "http://foo.bar/baz_(bam)"
-                +++ " (" +++ link "http://foo.bar" "" "http://foo.bar" +++ ")")
+                <> " (" <> link "http://foo.bar" "" "http://foo.bar" <> ")")
         ]
 
diff --git a/src/Tests/Writers/ConTeXt.hs b/src/Tests/Writers/ConTeXt.hs
index 704571e95..506ff698f 100644
--- a/src/Tests/Writers/ConTeXt.hs
+++ b/src/Tests/Writers/ConTeXt.hs
@@ -26,7 +26,7 @@ which is in turn shorthand for
   test context "my test" (X,Y)
 -}
 
-infix 5 =:
+infix 4 =:
 (=:) :: (ToString a, ToPandoc a)
      => String -> (a, String) -> Test
 (=:) = test context
@@ -43,9 +43,7 @@ tests = [ testGroup "inline code"
           ]
         , testGroup "headers"
           [ "level 1" =:
-            header 1 "My header" =?> "\\subject{My header}"
-          , property "header 1 property" $ \ils ->
-                context' (header 1 ils) == "\\subject{" ++ context' ils ++ "}"
+            header 1 "My header" =?> "\\section[my-header]{My header}"
           ]
         , testGroup "bullet lists"
           [ "nested" =:
diff --git a/src/Tests/Writers/HTML.hs b/src/Tests/Writers/HTML.hs
index 3e1e0ddc2..8561aa421 100644
--- a/src/Tests/Writers/HTML.hs
+++ b/src/Tests/Writers/HTML.hs
@@ -23,7 +23,7 @@ which is in turn shorthand for
   test html "my test" (X,Y)
 -}
 
-infix 5 =:
+infix 4 =:
 (=:) :: (ToString a, ToPandoc a)
      => String -> (a, String) -> Test
 (=:) = test html
@@ -40,7 +40,7 @@ tests = [ testGroup "inline code"
           ]
         , testGroup "images"
           [ "alt with formatting" =:
-            image "/url" "title" ("my " +++ emph "image")
+            image "/url" "title" ("my " <> emph "image")
             =?> "<img src=\"/url\" title=\"title\" alt=\"my image\" />"
           ]
         ]
diff --git a/src/Tests/Writers/Markdown.hs b/src/Tests/Writers/Markdown.hs
index 70266a683..d90dc83b1 100644
--- a/src/Tests/Writers/Markdown.hs
+++ b/src/Tests/Writers/Markdown.hs
@@ -22,13 +22,13 @@ which is in turn shorthand for
   test markdown "my test" (X,Y)
 -}
 
-infix 5 =:
+infix 4 =:
 (=:) :: (ToString a, ToPandoc a)
      => String -> (a, String) -> Test
 (=:) = test markdown
 
 tests :: [Test]
 tests = [ "indented code after list"
-             =: (orderedList [ para "one" +++ para "two" ] +++ codeBlock "test")
+             =: (orderedList [ para "one" <> para "two" ] <> codeBlock "test")
              =?> "1.  one\n\n    two\n\n<!-- -->\n\n    test"
         ]
diff --git a/src/Text/Pandoc.hs b/src/Text/Pandoc.hs
index ef8560284..878f0e0dd 100644
--- a/src/Text/Pandoc.hs
+++ b/src/Text/Pandoc.hs
@@ -1,3 +1,4 @@
+{-# LANGUAGE ScopedTypeVariables, FlexibleInstances #-}
 {-
 Copyright (C) 2006-2010 John MacFarlane <jgm@berkeley.edu>
 
@@ -94,8 +95,10 @@ module Text.Pandoc
                , writeTextile
                , writeRTF
                , writeODT
+               , writeDocx
                , writeEPUB
                , writeOrg
+               , writeAsciiDoc
                -- * Writer options used in writers 
                , WriterOptions (..)
                , HTMLSlideVariant (..)
@@ -109,6 +112,7 @@ module Text.Pandoc
                -- * Miscellaneous
                , rtfEmbedImage
                , jsonFilter
+               , ToJsonFilter(..)
              ) where
 
 import Text.Pandoc.Definition
@@ -127,6 +131,7 @@ import Text.Pandoc.Writers.ConTeXt
 import Text.Pandoc.Writers.Texinfo
 import Text.Pandoc.Writers.HTML
 import Text.Pandoc.Writers.ODT
+import Text.Pandoc.Writers.Docx
 import Text.Pandoc.Writers.EPUB
 import Text.Pandoc.Writers.Docbook
 import Text.Pandoc.Writers.OpenDocument
@@ -135,6 +140,7 @@ import Text.Pandoc.Writers.RTF
 import Text.Pandoc.Writers.MediaWiki
 import Text.Pandoc.Writers.Textile
 import Text.Pandoc.Writers.Org
+import Text.Pandoc.Writers.AsciiDoc
 import Text.Pandoc.Templates
 import Text.Pandoc.Parsing
 import Text.Pandoc.Shared
@@ -164,20 +170,28 @@ readers = [("native"       , \_ -> readNative)
           ]
 
 -- | Association list of formats and writers (omitting the
--- binary writers, odt and epub).
+-- binary writers, odt, docx, and epub).
 writers :: [ ( String, WriterOptions -> Pandoc -> String ) ]
 writers = [("native"       , writeNative)
           ,("json"         , \_ -> encodeJSON)
           ,("html"         , writeHtmlString)
+          ,("html5"        , \o ->
+                             writeHtmlString o{ writerHtml5 = True })
           ,("html+lhs"     , \o ->
                              writeHtmlString o{ writerLiterateHaskell = True })
+          ,("html5+lhs"    , \o ->
+                             writeHtmlString o{ writerLiterateHaskell = True,
+                                                writerHtml5 = True })
           ,("s5"           , writeHtmlString)
           ,("slidy"        , writeHtmlString)
+          ,("dzslides"     , writeHtmlString)
           ,("docbook"      , writeDocbook)
           ,("opendocument" , writeOpenDocument)
           ,("latex"        , writeLaTeX)
           ,("latex+lhs"    , \o ->
                              writeLaTeX o{ writerLiterateHaskell = True })
+          ,("beamer"       , \o ->
+                             writeLaTeX o{ writerBeamer = True })
           ,("context"      , writeConTeXt)
           ,("texinfo"      , writeTexinfo)
           ,("man"          , writeMan)
@@ -192,10 +206,55 @@ writers = [("native"       , writeNative)
           ,("textile"      , writeTextile)
           ,("rtf"          , writeRTF)
           ,("org"          , writeOrg)
+          ,("asciidoc"     , writeAsciiDoc)
           ]
 
+{-# DEPRECATED jsonFilter "Use toJsonFilter instead" #-}
 -- | Converts a transformation on the Pandoc AST into a function
 -- that reads and writes a JSON-encoded string.  This is useful
 -- for writing small scripts.
 jsonFilter :: (Pandoc -> Pandoc) -> String -> String
 jsonFilter f = encodeJSON . f . decodeJSON
+
+-- | 'toJsonFilter' convert a function into a filter that reads pandoc's json output
+-- from stdin, transforms it by walking the AST and applying the specified
+-- function, and writes the result as json to stdout.  Usage example:
+--
+-- > -- capitalize.hs
+-- > -- compile with:  ghc --make capitalize
+-- > -- run with:      pandoc -t json | ./capitalize | pandoc -f json
+-- >
+-- > import Text.Pandoc
+-- > import Data.Char (toUpper)
+-- >
+-- > main :: IO ()
+-- > main = toJsonFilter capitalizeStrings
+-- >
+-- > capitalizeStrings :: Inline -> Inline
+-- > capitalizeStrings (Str s) = Str $ map toUpper s
+-- > capitalizeStrings x       = x
+--
+-- The function can be any type @(a -> a)@, @(a -> IO a)@, @(a -> [a])@,
+-- or @(a -> IO [a])@, where @a@ is an instance of 'Data'.
+-- So, for example, @a@ can be 'Pandoc', 'Inline', 'Block', ['Inline'],
+-- ['Block'], 'Meta', 'ListNumberStyle', 'Alignment', 'ListNumberDelim',
+-- 'QuoteType', etc. See 'Text.Pandoc.Definition'.
+class ToJsonFilter a where
+  toJsonFilter :: a -> IO ()
+
+instance (Data a) => ToJsonFilter (a -> a) where
+  toJsonFilter f = getContents
+    >>= putStr . encodeJSON . (bottomUp f :: Pandoc -> Pandoc) . decodeJSON
+
+instance (Data a) => ToJsonFilter (a -> IO a) where
+  toJsonFilter f = getContents >>= (bottomUpM f :: Pandoc -> IO Pandoc) . decodeJSON
+    >>= putStr . encodeJSON
+
+instance (Data a) => ToJsonFilter (a -> [a]) where
+  toJsonFilter f = getContents
+    >>= putStr . encodeJSON . (bottomUp (concatMap f) :: Pandoc -> Pandoc) . decodeJSON
+
+instance (Data a) => ToJsonFilter (a -> IO [a]) where
+  toJsonFilter f = getContents
+    >>= (bottomUpM (fmap concat . mapM f) :: Pandoc -> IO Pandoc) . decodeJSON
+    >>= putStr . encodeJSON
diff --git a/src/Text/Pandoc/Biblio.hs b/src/Text/Pandoc/Biblio.hs
index d65c9de1c..c8e87b2a0 100644
--- a/src/Text/Pandoc/Biblio.hs
+++ b/src/Text/Pandoc/Biblio.hs
@@ -31,7 +31,7 @@ module Text.Pandoc.Biblio ( processBiblio ) where
 
 import Data.List
 import Data.Unique
-import Data.Char ( isDigit, isPunctuation )
+import Data.Char ( isDigit )
 import qualified Data.Map as M
 import Text.CSL hiding ( Cite(..), Citation(..) )
 import qualified Text.CSL as CSL ( Cite(..) )
@@ -43,11 +43,15 @@ import Control.Monad
 
 -- | Process a 'Pandoc' document by adding citations formatted
 -- according to a CSL style, using 'citeproc' from citeproc-hs.
-processBiblio :: FilePath -> [Reference] -> Pandoc -> IO Pandoc
-processBiblio cslfile r p
+processBiblio :: FilePath -> Maybe FilePath -> [Reference] -> Pandoc
+              -> IO Pandoc
+processBiblio cslfile abrfile r p
     = if null r then return p
       else do
         csl <- readCSLFile cslfile
+        abbrevs <- case abrfile of
+                      Just f  -> readJsonAbbrevFile f
+                      Nothing -> return []
         p'   <- bottomUpM setHash p
         let (nts,grps) = if styleClass csl == "note"
                          then let cits   = queryWith getCite p'
@@ -55,29 +59,36 @@ processBiblio cslfile r p
                                   needNt = cits \\ concat ncits
                               in (,) needNt $ getNoteCitations needNt p'
                          else (,) [] $ queryWith getCitation p'
-            result     = citeproc procOpts csl r (setNearNote csl $
+            style      = csl { styleAbbrevs = abbrevs }
+            result     = citeproc procOpts style r (setNearNote style $
                             map (map toCslCite) grps)
             cits_map   = M.fromList $ zip grps (citations result)
-            biblioList = map (renderPandoc' csl) (bibliography result)
-            Pandoc m b = bottomUp (procInlines $ processCite csl cits_map) p'
+            biblioList = map (renderPandoc' style) (bibliography result)
+            Pandoc m b = bottomUp (procInlines $ processCite style cits_map) p'
         return . generateNotes nts . Pandoc m $ b ++ biblioList
 
 -- | Substitute 'Cite' elements with formatted citations.
 processCite :: Style -> M.Map [Citation] [FormattedOutput] -> [Inline] -> [Inline]
+processCite s cs (Cite t _ : rest) =
+   case M.lookup t cs of
+        Just (x:xs) ->
+                   if isTextualCitation t
+                   then renderPandoc s [x] ++
+                         if null xs
+                         then processCite s cs rest
+                         else [Space,  Cite t (renderPandoc s xs)]
+                            ++ processCite s cs rest
+                   else Cite t (renderPandoc s (x:xs)) : processCite s cs rest
+        _ -> Str ("Error processing " ++ show t) : processCite s cs rest
+processCite s cs (x:xs) = x : processCite s cs xs
 processCite _ _ [] = []
-processCite s cs (i:is)
-    | Cite t _ <- i = process t ++ processCite s cs is
-    | otherwise     = i          : processCite s cs is
-    where
-      addNt t x = if null x then [] else [Cite t $ renderPandoc s x]
-      process t = case M.lookup t cs of
-                    Just  x -> if isTextualCitation t && x /= []
-                               then renderPandoc s [head x] ++
-                                    if tail x /= []
-                                    then Space : addNt t (tail x)
-                                    else []
-                               else [Cite t $ renderPandoc s x]
-                    Nothing -> [Str ("Error processing " ++ show t)]
+
+procInlines :: ([Inline] -> [Inline]) -> Block -> Block
+procInlines f b
+    | Plain    inls <- b = Plain    $ f inls
+    | Para     inls <- b = Para     $ f inls
+    | Header i inls <- b = Header i $ f inls
+    | otherwise          = b
 
 isTextualCitation :: [Citation] -> Bool
 isTextualCitation (c:_) = citationMode c == AuthorInText
@@ -112,13 +123,6 @@ setHash (Citation i p s cm nn _)
 generateNotes :: [Inline] -> Pandoc -> Pandoc
 generateNotes needNote = bottomUp (mvCiteInNote needNote)
 
-procInlines :: ([Inline] -> [Inline]) -> Block -> Block
-procInlines f b
-    | Plain    inls <- b = Plain    $ f inls
-    | Para     inls <- b = Para     $ f inls
-    | Header i inls <- b = Header i $ f inls
-    | otherwise          = b
-
 mvCiteInNote :: [Inline] -> Block -> Block
 mvCiteInNote is = procInlines mvCite
     where
@@ -143,9 +147,8 @@ mvCiteInNote is = procInlines mvCite
           | otherwise        = toCapital (i ++ [Str "."])
 
       checkPt i
-          | Cite c o : xs <- i
-          , endWithPunct o, startWithPunct xs
-          , endWithPunct o = Cite c (initInline o) : checkPt xs
+          | Cite c o : xs <- i , endWithPunct o, startWithPunct xs
+                           = Cite c (initInline o) : checkPt xs
           | x:xs <- i      = x : checkPt xs
           | otherwise      = []
       checkNt  = bottomUp $ procInlines checkPt
@@ -165,13 +168,9 @@ toCslCite c
                       AuthorInText   -> (True, False)
                       SuppressAuthor -> (False,True )
                       NormalCitation -> (False,False)
-          s'      = case s of
-                         []                                -> []
-                         (Str (y:_) : _) | isPunctuation y -> s
-                         _                                 -> Str "," : Space : s
       in   emptyCite { CSL.citeId         = citationId c
                      , CSL.citePrefix     = PandocText $ citationPrefix c
-                     , CSL.citeSuffix     = PandocText $ s'
+                     , CSL.citeSuffix     = PandocText $ s
                      , CSL.citeLabel      = la
                      , CSL.citeLocator    = lo
                      , CSL.citeNoteNumber = show $ citationNoteNum c
@@ -182,9 +181,13 @@ toCslCite c
 
 locatorWords :: [Inline] -> (String, [Inline])
 locatorWords inp =
-  case parse pLocatorWords "suffix" inp of
+  case parse pLocatorWords "suffix" $ breakup inp of
        Right r   -> r
        Left _    -> ("",inp)
+   where breakup [] = []
+         breakup (Str x : xs) = map Str (splitup x) ++ breakup xs
+         breakup (x : xs) = x : breakup xs
+         splitup = groupBy (\x y -> x /= '\160' && y /= '\160')
 
 pLocatorWords :: GenParser Inline st (String, [Inline])
 pLocatorWords = do
@@ -201,7 +204,7 @@ pMatch condition = try $ do
   return t
 
 pSpace :: GenParser Inline st Inline
-pSpace = pMatch (== Space)
+pSpace = pMatch (\t -> t == Space || t == Str "\160")
 
 pLocator :: GenParser Inline st String
 pLocator = try $ do
diff --git a/src/Text/Pandoc/CharacterReferences.hs b/src/Text/Pandoc/CharacterReferences.hs
deleted file mode 100644
index 8157d94d3..000000000
--- a/src/Text/Pandoc/CharacterReferences.hs
+++ /dev/null
@@ -1,72 +0,0 @@
-{-
-Copyright (C) 2006-2010 John MacFarlane <jgm@berkeley.edu>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
--}
-
-{- |
-   Module      : Text.Pandoc.CharacterReferences
-   Copyright   : Copyright (C) 2006-2010 John MacFarlane
-   License     : GNU GPL, version 2 or above 
-
-   Maintainer  : John MacFarlane <jgm@berkeley.edu>
-   Stability   : alpha
-   Portability : portable
-
-Functions for parsing character references.
--}
-module Text.Pandoc.CharacterReferences (
-                     characterReference,
-                     decodeCharacterReferences,
-                    ) where
-import Text.ParserCombinators.Parsec
-import Text.HTML.TagSoup.Entity ( lookupNamedEntity, lookupNumericEntity )
-import Data.Maybe ( fromMaybe )
-
--- | Parse character entity.
-characterReference :: GenParser Char st Char
-characterReference = try $ do
-  char '&'
-  character <- numRef <|> entity
-  char ';'
-  return character  
-
-numRef :: GenParser Char st Char
-numRef = do
-  char '#'
-  num <- hexNum <|> decNum
-  return $ fromMaybe '?' $ lookupNumericEntity num
-
-hexNum :: GenParser Char st [Char]
-hexNum = do
-  x <- oneOf "Xx"
-  num <- many1 hexDigit
-  return (x:num)
-
-decNum :: GenParser Char st [Char]
-decNum = many1 digit
-
-entity :: GenParser Char st Char
-entity = do
-  body <- many1 alphaNum
-  return $ fromMaybe '?' $ lookupNamedEntity body
-
--- | Convert entities in a string to characters.
-decodeCharacterReferences :: String -> String
-decodeCharacterReferences str = 
-  case parse (many (characterReference <|> anyChar)) str str of
-	Left err        -> error $ "\nError: " ++ show err
-	Right result    -> result
-
diff --git a/src/Text/Pandoc/Highlighting.hs b/src/Text/Pandoc/Highlighting.hs
index 5ddaf1379..4fb799cf1 100644
--- a/src/Text/Pandoc/Highlighting.hs
+++ b/src/Text/Pandoc/Highlighting.hs
@@ -19,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 {- |
    Module      : Text.Pandoc.Highlighting
    Copyright   : Copyright (C) 2008 John MacFarlane
-   License     : GNU GPL, version 2 or above 
+   License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
    Stability   : alpha
@@ -28,47 +28,49 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 Exports functions for syntax highlighting.
 -}
 
-module Text.Pandoc.Highlighting ( languages, highlightHtml, defaultHighlightingCss, languagesByExtension ) where
-import Text.XHtml
+module Text.Pandoc.Highlighting ( languages
+                                , languagesByExtension
+                                , highlight
+                                , formatLaTeXInline
+                                , formatLaTeXBlock
+                                , styleToLaTeX
+                                , formatHtmlInline
+                                , formatHtmlBlock
+                                , styleToCss
+                                , pygments
+                                , espresso
+                                , tango
+                                , kate
+                                , monochrome
+                                , haddock
+                                , Style
+                                ) where
 import Text.Pandoc.Definition
-#ifdef _HIGHLIGHTING
-import Text.Highlighting.Kate ( languages, highlightAs, formatAsXHtml, FormatOption (..), defaultHighlightingCss, languagesByExtension )
+import Text.Highlighting.Kate
 import Data.List (find)
 import Data.Maybe (fromMaybe)
 import Data.Char (toLower)
 
-highlightHtml :: Bool   -- ^ True if inline HTML
-              -> Attr   -- ^ Attributes of the Code or CodeBlock
-              -> String -- ^ Raw contents of the Code or CodeBlock
-              -> Either String Html  -- ^ An error or the formatted Html
-highlightHtml inline (_, classes, keyvals) rawCode =
-  let firstNum = read $ fromMaybe "1" $ lookup "startFrom" keyvals
-      fmtOpts = [OptNumberFrom firstNum] ++
-                [OptInline | inline] ++
-                case find (`elem` ["number","numberLines","number-lines"]) classes of
-                  Nothing   -> []
-                  Just _    -> [OptNumberLines]
-      addBirdTracks = "literate" `elem` classes
-      lcLanguages = map (map toLower) languages
-  in  case find (\c -> (map toLower c) `elem` lcLanguages) classes of
-            Nothing        -> Left "Unknown or unsupported language"
-            Just language  -> case highlightAs language rawCode of
-                                   Left err -> Left err
-                                   Right hl -> Right $ formatAsXHtml fmtOpts language $
-                                                       if addBirdTracks
-                                                          then map ((["Special"],"> "):) hl
-                                                          else hl
+lcLanguages :: [String]
+lcLanguages = map (map toLower) languages
 
-#else
-defaultHighlightingCss :: String
-defaultHighlightingCss = ""
+highlight :: (FormatOptions -> [SourceLine] -> a) -- ^ Formatter
+          -> Attr   -- ^ Attributes of the CodeBlock
+          -> String -- ^ Raw contents of the CodeBlock
+          -> Maybe a -- ^ Maybe the formatted result
+highlight formatter (_, classes, keyvals) rawCode =
+  let firstNum = case reads (fromMaybe "1" $ lookup "startFrom" keyvals) of
+                      ((n,_):_) -> n
+                      []        -> 1
+      fmtOpts = defaultFormatOpts{
+                  startNumber = firstNum,
+                  numberLines = any (`elem`
+                        ["number","numberLines", "number-lines"]) classes }
+      lcclasses = map (map toLower) classes
+  in  case find (`elem` lcLanguages) lcclasses of
+            Nothing        -> Nothing
+            Just language  -> Just
+                              $ formatter fmtOpts{ codeClasses = [language],
+                                                   containerClasses = classes }
+                              $ highlightAs language rawCode
 
-languages :: [String]
-languages = []
-
-languagesByExtension :: String -> [String]
-languagesByExtension _ = []
-
-highlightHtml :: Bool -> Attr -> String -> Either String Html
-highlightHtml _ _ _ = Left "Pandoc was not compiled with support for highlighting"
-#endif
diff --git a/src/Text/Pandoc/ImageSize.hs b/src/Text/Pandoc/ImageSize.hs
new file mode 100644
index 000000000..d48c6a5ae
--- /dev/null
+++ b/src/Text/Pandoc/ImageSize.hs
@@ -0,0 +1,151 @@
+{-# LANGUAGE OverloadedStrings, ScopedTypeVariables  #-}
+{-
+  Copyright (C) 2011 John MacFarlane <jgm@berkeley.edu>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful, but WITHOUT
+    ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+    FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+    more details.
+
+    You should have received a copy of the GNU General Public License along
+    with this program; if not, write to the Free Software Foundation, Inc., 59
+    Temple Place, Suite 330, Boston, MA  02111-1307  USA
+-}
+
+{- |
+Module      : Text.Pandoc.ImageSize
+Copyright   : Copyright (C) 2011 John MacFarlane
+License     : GNU GPL, version 2 or above
+
+Maintainer  : John MacFarlane <jgm@berkeley.edu>
+Stability   : alpha
+Portability : portable
+
+Functions for determining the size of a PNG, JPEG, or GIF image.
+-}
+module Text.Pandoc.ImageSize ( ImageType(..), imageType, imageSize,
+                    sizeInPixels, sizeInPoints, readImageSize ) where
+import Data.ByteString.Lazy (ByteString, unpack)
+import qualified Data.ByteString.Lazy.Char8 as B
+import Control.Monad
+import Data.Bits
+
+-- quick and dirty functions to get image sizes
+-- algorithms borrowed from wwwis.pl
+
+data ImageType = Png | Gif | Jpeg deriving Show
+
+data ImageSize = ImageSize{
+                     pxX   :: Integer
+                   , pxY   :: Integer
+                   , dpiX  :: Integer
+                   , dpiY  :: Integer
+                   } deriving (Read, Show, Eq)
+
+
+readImageSize :: FilePath -> IO (Maybe ImageSize)
+readImageSize fp = imageSize `fmap` B.readFile fp
+
+imageType :: ByteString -> Maybe ImageType
+imageType img = case B.take 4 img of
+                     "\x89\x50\x4e\x47" -> return Png
+                     "\x47\x49\x46\x38" -> return Gif
+                     "\xff\xd8\xff\xe0" -> return Jpeg
+                     _                  -> fail "Unknown image type"
+
+imageSize :: ByteString -> Maybe ImageSize
+imageSize img = do
+  t <- imageType img
+  case t of
+       Png  -> pngSize img
+       Gif  -> gifSize img
+       Jpeg -> jpegSize img
+
+sizeInPixels :: ImageSize -> (Integer, Integer)
+sizeInPixels s = (pxX s, pxY s)
+
+sizeInPoints :: ImageSize -> (Integer, Integer)
+sizeInPoints s = (pxX s * 72 `div` dpiX s, pxY s * 72 `div` dpiY s)
+
+pngSize :: ByteString -> Maybe ImageSize
+pngSize img = do
+  let (h, rest) = B.splitAt 8 img
+  guard $ h == "\x8a\x4d\x4e\x47\x0d\x0a\x1a\x0a" ||
+          h == "\x89\x50\x4e\x47\x0d\x0a\x1a\x0a"
+  let (i, rest') = B.splitAt 4 $ B.drop 4 rest
+  guard $ i == "MHDR" || i == "IHDR"
+  let (sizes, rest'') = B.splitAt 8 rest'
+  (x,y) <- case map fromIntegral $ unpack $ sizes of
+                ([w1,w2,w3,w4,h1,h2,h3,h4] :: [Integer]) -> return
+                    ((shift w1 24) + (shift w2 16) + (shift w3 8) + w4,
+                     (shift h1 24) + (shift h2 16) + (shift h3 8) + h4)
+                _ -> fail "PNG parse error"
+  let (dpix, dpiy) = findpHYs rest''
+  return $ ImageSize { pxX  = x, pxY = y, dpiX = dpix, dpiY = dpiy }
+
+findpHYs :: ByteString -> (Integer, Integer)
+findpHYs x =
+  if B.null x || "IDAT" `B.isPrefixOf` x
+     then (72,72) -- default, no pHYs
+     else if "pHYs" `B.isPrefixOf` x
+          then let [x1,x2,x3,x4,y1,y2,y3,y4,u] = map fromIntegral
+                                               $ unpack $ B.take 9 $ B.drop 4 x
+                   factor = if u == 1 -- dots per meter
+                               then \z -> z * 254 `div` 10000
+                               else const 72
+               in  ( factor $ (shift x1 24) + (shift x2 16) + (shift x3 8) + x4,
+                     factor $ (shift y1 24) + (shift y2 16) + (shift y3 8) + y4 )
+          else findpHYs $ B.drop 1 x  -- read another byte
+
+gifSize :: ByteString -> Maybe ImageSize
+gifSize img = do
+  let (h, rest) = B.splitAt 6 img
+  guard $ h == "GIF87a" || h == "GIF89a"
+  case map fromIntegral $ unpack $ B.take 4 rest of
+       [w2,w1,h2,h1] -> return ImageSize {
+                          pxX  = shift w1 8 + w2,
+                          pxY  = shift h1 8 + h2,
+                          dpiX = 72,
+                          dpiY = 72
+                          }
+       _             -> fail "GIF parse error"
+
+jpegSize :: ByteString -> Maybe ImageSize
+jpegSize img = do
+  let (hdr, rest) = B.splitAt 4 img
+  guard $ hdr == "\xff\xd8\xff\xe0"
+  guard $ B.length rest >= 14
+  let [dpiDensity,dpix1,dpix2,dpiy1,dpiy2] = map fromIntegral
+                                           $ unpack $ B.take 5 $ B.drop 9 $ rest
+  let factor = case dpiDensity of
+                    1 -> id
+                    2 -> \x -> (x * 254 `div` 10)
+                    _ -> const 72
+  let dpix = factor (shift dpix1 8 + dpix2)
+  let dpiy = factor (shift dpiy1 8 + dpiy2)
+  (w,h) <- findJpegSize rest
+  return $ ImageSize { pxX = w, pxY = h, dpiX = dpix, dpiY = dpiy }
+
+findJpegSize :: ByteString -> Maybe (Integer,Integer)
+findJpegSize bs = do
+  let bs' = B.dropWhile (=='\xff') $ B.dropWhile (/='\xff') bs
+  case B.uncons bs' of
+       Just (c,bs'') | c >= '\xc0' && c <= '\xc3' -> do
+         case map fromIntegral $ unpack $ B.take 4 $ B.drop 3 bs'' of
+              [h1,h2,w1,w2] -> return (shift w1 8 + w2, shift h1 8 + h2)
+              _             -> fail "JPEG parse error"
+       Just (_,bs'') ->  do
+         case map fromIntegral $ unpack $ B.take 2 bs'' of
+              [c1,c2] -> do
+                let len = shift c1 8 + c2
+                -- skip variables
+                findJpegSize $ B.drop len bs''
+              _       -> fail "JPEG parse error"
+       Nothing -> fail "Did not find length record"
+
+
diff --git a/src/Text/Pandoc/MIME.hs b/src/Text/Pandoc/MIME.hs
index c52a4c475..d3df2f2e1 100644
--- a/src/Text/Pandoc/MIME.hs
+++ b/src/Text/Pandoc/MIME.hs
@@ -35,6 +35,7 @@ import qualified Data.Map as M
 
 -- | Determine mime type appropriate for file path.
 getMimeType :: FilePath -> Maybe String
+getMimeType "layout-cache" = Just "application/binary"  -- in ODT
 getMimeType f = M.lookup (map toLower $ drop 1 $ takeExtension f) mimeTypes
   where mimeTypes = M.fromList -- List borrowed from happstack-server.
            [("gz","application/x-gzip")
@@ -294,6 +295,7 @@ getMimeType f = M.lookup (map toLower $ drop 1 $ takeExtension f) mimeTypes
            ,("oth","application/vnd.oasis.opendocument.text-web")
            ,("otp","application/vnd.oasis.opendocument.presentation-template")
            ,("ots","application/vnd.oasis.opendocument.spreadsheet-template")
+           ,("otf","application/x-font-opentype")
            ,("ott","application/vnd.oasis.opendocument.text-template")
            ,("oza","application/x-oz-application")
            ,("p","text/x-pascal")
@@ -427,6 +429,7 @@ getMimeType f = M.lookup (map toLower $ drop 1 $ takeExtension f) mimeTypes
            ,("ts","text/texmacs")
            ,("tsp","application/dsptype")
            ,("tsv","text/tab-separated-values")
+           ,("ttf","application/x-font-truetype")
            ,("txt","text/plain")
            ,("udeb","application/x-debian-package")
            ,("uls","text/iuls")
diff --git a/src/Text/Pandoc/PDF.hs b/src/Text/Pandoc/PDF.hs
new file mode 100644
index 000000000..cc19e1c50
--- /dev/null
+++ b/src/Text/Pandoc/PDF.hs
@@ -0,0 +1,136 @@
+{-# LANGUAGE OverloadedStrings #-}
+{-
+Copyright (C) 2012 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+-}
+
+{- |
+   Module      : Text.Pandoc.PDF
+   Copyright   : Copyright (C) 2012 John MacFarlane
+   License     : GNU GPL, version 2 or above
+
+   Maintainer  : John MacFarlane <jgm@berkeley.edu>
+   Stability   : alpha
+   Portability : portable
+
+Conversion of LaTeX documents to PDF.
+-}
+module Text.Pandoc.PDF ( tex2pdf ) where
+
+import System.IO.Temp
+import Data.ByteString.Lazy (ByteString)
+import qualified Data.ByteString.Lazy as B
+import qualified Data.ByteString.Lazy.Char8 as BC
+import System.Exit (ExitCode (..))
+import System.FilePath
+import System.Directory
+import System.Process
+import Control.Exception (evaluate)
+import System.IO (hClose)
+import Control.Concurrent (putMVar, takeMVar, newEmptyMVar, forkIO)
+import Text.Pandoc.UTF8 as UTF8
+import Control.Monad (unless)
+import Data.List (isInfixOf)
+
+tex2pdf :: String       -- ^ tex program (pdflatex, lualatex, xelatex)
+        -> String       -- ^ latex source
+        -> IO (Either ByteString ByteString)
+tex2pdf program source = withSystemTempDirectory "tex2pdf" $ \tmpdir ->
+  tex2pdf' tmpdir program source
+
+tex2pdf' :: FilePath                        -- ^ temp directory for output
+         -> String                          -- ^ tex program
+         -> String                          -- ^ tex source
+         -> IO (Either ByteString ByteString)
+tex2pdf' tmpDir program source = do
+  let numruns = if "\\tableofcontents" `isInfixOf` source
+                   then 2
+                   else 1
+  (exit, log', mbPdf) <- runTeXProgram program numruns tmpDir source
+  let msg = "Error producing PDF from TeX source."
+  case (exit, mbPdf) of
+       (ExitFailure _, _)      -> return $ Left $
+                                     msg <> "\n" <> extractMsg log'
+       (ExitSuccess, Nothing)  -> return $ Left msg
+       (ExitSuccess, Just pdf) -> return $ Right pdf
+
+(<>) :: ByteString -> ByteString -> ByteString
+(<>) = B.append
+
+-- parsing output
+
+extractMsg :: ByteString -> ByteString
+extractMsg log' = do
+  let msg'  = dropWhile (not . ("!" `BC.isPrefixOf`)) $ BC.lines log'
+  let (msg'',rest) = break ("l." `BC.isPrefixOf`) msg'
+  let lineno = take 1 rest
+  if null msg'
+     then log'
+     else BC.unlines (msg'' ++ lineno)
+
+-- running tex programs
+
+-- Run a TeX program on an input bytestring and return (exit code,
+-- contents of stdout, contents of produced PDF if any).  Rerun
+-- a fixed number of times to resolve references.
+runTeXProgram :: String -> Int -> FilePath -> String
+              -> IO (ExitCode, ByteString, Maybe ByteString)
+runTeXProgram program runsLeft tmpDir source = do
+    let file = tmpDir </> "input.tex"
+    exists <- doesFileExist file
+    unless exists $ UTF8.writeFile file source
+    let programArgs = ["-halt-on-error", "-interaction", "nonstopmode",
+         "-output-directory", tmpDir, file]
+    (exit, out, err) <- readCommand program programArgs
+    if runsLeft > 1
+       then runTeXProgram program (runsLeft - 1) tmpDir source
+       else do
+         let pdfFile = replaceDirectory (replaceExtension file ".pdf") tmpDir
+         pdfExists <- doesFileExist pdfFile
+         pdf <- if pdfExists
+                   then Just `fmap` B.readFile pdfFile
+                   else return Nothing
+         return (exit, out <> err, pdf)
+
+-- utility functions
+
+-- Run a command and return exitcode, contents of stdout, and
+-- contents of stderr. (Based on
+-- 'readProcessWithExitCode' from 'System.Process'.)
+readCommand :: FilePath                            -- ^ command to run
+            -> [String]                            -- ^ any arguments
+            -> IO (ExitCode,ByteString,ByteString) -- ^ exit, stdout, stderr
+readCommand cmd args = do
+    (Just inh, Just outh, Just errh, pid) <-
+        createProcess (proc cmd args){ std_in  = CreatePipe,
+                                       std_out = CreatePipe,
+                                       std_err = CreatePipe }
+    outMVar <- newEmptyMVar
+    -- fork off a thread to start consuming stdout
+    out  <- B.hGetContents outh
+    _ <- forkIO $ evaluate (B.length out) >> putMVar outMVar ()
+    -- fork off a thread to start consuming stderr
+    err  <- B.hGetContents errh
+    _ <- forkIO $ evaluate (B.length err) >> putMVar outMVar ()
+    -- now write and flush any input
+    hClose inh -- done with stdin
+    -- wait on the output
+    takeMVar outMVar
+    takeMVar outMVar
+    hClose outh
+    -- wait on the process
+    ex <- waitForProcess pid
+    return (ex, out, err)
diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs
index eaf0c0f67..725621ce2 100644
--- a/src/Text/Pandoc/Parsing.hs
+++ b/src/Text/Pandoc/Parsing.hs
@@ -42,15 +42,16 @@ module Text.Pandoc.Parsing ( (>>~),
                              parseFromString,
                              lineClump,
                              charsInBalanced,
-                             charsInBalanced',
                              romanNumeral,
                              emailAddress,
                              uri,
                              withHorizDisplacement,
+                             withRaw,
                              nullBlock,
                              failIfStrict,
                              failUnlessLHS,
                              escaped,
+                             characterReference,
                              anyOrderedListMarker,
                              orderedListMarker,
                              charRef,
@@ -78,7 +79,6 @@ import Text.Pandoc.Definition
 import Text.Pandoc.Generic
 import qualified Text.Pandoc.UTF8 as UTF8 (putStrLn)
 import Text.ParserCombinators.Parsec
-import Text.Pandoc.CharacterReferences ( characterReference )
 import Data.Char ( toLower, toUpper, ord, isAscii, isAlphaNum, isDigit, isPunctuation )
 import Data.List ( intercalate, transpose )
 import Network.URI ( parseURI, URI (..), isAllowedInURI )
@@ -86,6 +86,7 @@ import Control.Monad ( join, liftM, guard )
 import Text.Pandoc.Shared
 import qualified Data.Map as M
 import Text.TeXMath.Macros (applyMacros, Macro, parseMacroDefinitions)
+import Text.HTML.TagSoup.Entity ( lookupEntity )
 
 -- | Like >>, but returns the operation on the left.
 -- (Suggested by Tillmann Rendel on Haskell-cafe list.)
@@ -174,29 +175,23 @@ lineClump = blanklines
 -- | Parse a string of characters between an open character
 -- and a close character, including text between balanced
 -- pairs of open and close, which must be different. For example,
--- @charsInBalanced '(' ')'@ will parse "(hello (there))"
--- and return "hello (there)".  Stop if a blank line is
--- encountered.
-charsInBalanced :: Char -> Char -> GenParser Char st String
-charsInBalanced open close = try $ do
+-- @charsInBalanced '(' ')' anyChar@ will parse "(hello (there))"
+-- and return "hello (there)".
+charsInBalanced :: Char -> Char -> GenParser Char st Char
+                -> GenParser Char st String
+charsInBalanced open close parser = try $ do
   char open
-  raw <- many $     (many1 (satisfy $ \c ->
-                             c /= open && c /= close && c /= '\n'))
-                <|> (do res <- charsInBalanced open close
-                        return $ [open] ++ res ++ [close])
-                <|> try (string "\n" >>~ notFollowedBy' blanklines)
+  let isDelim c = c == open || c == close
+  raw <- many $  many1 (notFollowedBy (satisfy isDelim) >> parser)
+             <|> (do res <- charsInBalanced open close parser
+                     return $ [open] ++ res ++ [close])
   char close
   return $ concat raw
 
--- | Like @charsInBalanced@, but allow blank lines in the content.
-charsInBalanced' :: Char -> Char -> GenParser Char st String
-charsInBalanced' open close = try $ do
-  char open
-  raw <- many $       (many1 (satisfy $ \c -> c /= open && c /= close))
-                  <|> (do res <- charsInBalanced' open close
-                          return $ [open] ++ res ++ [close])
-  char close
-  return $ concat raw
+-- old charsInBalanced would be:
+-- charsInBalanced open close (noneOf "\n" <|> char '\n' >> notFollowedBy blankline)
+-- old charsInBalanced' would be:
+-- charsInBalanced open close anyChar
 
 -- Auxiliary functions for romanNumeral:
 
@@ -306,6 +301,23 @@ withHorizDisplacement parser = do
   pos2 <- getPosition
   return (result, sourceColumn pos2 - sourceColumn pos1)
 
+-- | Applies a parser and returns the raw string that was parsed,
+-- along with the value produced by the parser.
+withRaw :: GenParser Char st a -> GenParser Char st (a, [Char])
+withRaw parser = do
+  pos1 <- getPosition
+  inp <- getInput
+  result <- parser
+  pos2 <- getPosition
+  let (l1,c1) = (sourceLine pos1, sourceColumn pos1)
+  let (l2,c2) = (sourceLine pos2, sourceColumn pos2)
+  let inplines = take ((l2 - l1) + 1) $ lines inp
+  let raw = case inplines of
+                []   -> error "raw: inplines is null" -- shouldn't happen
+                [l]  -> take (c2 - c1) l
+                ls   -> unlines (init ls) ++ take (c2 - 1) (last ls)
+  return (result, raw)
+
 -- | Parses a character and returns 'Null' (so that the parser can move on
 -- if it gets stuck).
 nullBlock :: GenParser Char st Block
@@ -319,17 +331,21 @@ failIfStrict = do
 
 -- | Fail unless we're in literate haskell mode.
 failUnlessLHS :: GenParser tok ParserState ()
-failUnlessLHS = do
-  state <- getState
-  if stateLiterateHaskell state then return () else fail "Literate haskell feature"
+failUnlessLHS = getState >>= guard . stateLiterateHaskell
 
 -- | Parses backslash, then applies character parser.
 escaped :: GenParser Char st Char  -- ^ Parser for character to escape
-        -> GenParser Char st Inline
-escaped parser = try $ do
-  char '\\'
-  result <- parser
-  return (Str [result])
+        -> GenParser Char st Char
+escaped parser = try $ char '\\' >> parser
+
+-- | Parse character entity.
+characterReference :: GenParser Char st Char
+characterReference = try $ do
+  char '&'
+  ent <- many1Till nonspaceChar (char ';')
+  case lookupEntity ent of
+       Just c  -> return c
+       Nothing -> fail "entity not found"
 
 -- | Parses an uppercase roman numeral and returns (UpperRoman, number).
 upperRoman :: GenParser Char st (ListNumberStyle, Int)
@@ -512,7 +528,7 @@ gridTableWith block tableCaption headless =
 
 gridTableSplitLine :: [Int] -> String -> [String]
 gridTableSplitLine indices line = map removeFinalBar $ tail $
-  splitByIndices (init indices) $ removeTrailingSpace line
+  splitStringByIndices (init indices) $ removeTrailingSpace line
 
 gridPart :: Char -> GenParser Char st (Int, Int)
 gridPart ch = do
@@ -598,7 +614,7 @@ readWith :: GenParser t ParserState a      -- ^ parser
          -> a
 readWith parser state input = 
     case runParser parser state "source" input of
-      Left err     -> error $ "\nError:\n" ++ show err
+      Left err'    -> error $ "\nError:\n" ++ show err'
       Right result -> result
 
 -- | Parse a string with @parser@ (for testing).
@@ -613,6 +629,8 @@ data ParserState = ParserState
     { stateParseRaw        :: Bool,          -- ^ Parse raw HTML and LaTeX?
       stateParserContext   :: ParserContext, -- ^ Inside list?
       stateQuoteContext    :: QuoteContext,  -- ^ Inside quoted environment?
+      stateMaxNestingLevel :: Int,           -- ^ Max # of nested Strong/Emph
+      stateLastStrPos      :: Maybe SourcePos, -- ^ Position after last str parsed
       stateKeys            :: KeyTable,      -- ^ List of reference keys
       stateCitations       :: [String],      -- ^ List of available citations
       stateNotes           :: NoteTable,     -- ^ List of notes
@@ -623,6 +641,9 @@ data ParserState = ParserState
       stateDate            :: [Inline],      -- ^ Date of document
       stateStrict          :: Bool,          -- ^ Use strict markdown syntax?
       stateSmart           :: Bool,          -- ^ Use smart typography?
+      stateOldDashes       :: Bool,          -- ^ Use pandoc <= 1.8.2.1 behavior
+                                             --   in parsing dashes; -- is em-dash;
+                                             --   before numeral is en-dash
       stateLiterateHaskell :: Bool,          -- ^ Treat input as literate haskell
       stateColumns         :: Int,           -- ^ Number of columns in terminal
       stateHeaderTable     :: [HeaderType],  -- ^ Ordered list of header types used
@@ -640,6 +661,8 @@ defaultParserState =
     ParserState { stateParseRaw        = False,
                   stateParserContext   = NullState,
                   stateQuoteContext    = NoQuote,
+                  stateMaxNestingLevel = 6,
+                  stateLastStrPos      = Nothing,
                   stateKeys            = M.empty,
                   stateCitations       = [],
                   stateNotes           = [],
@@ -650,6 +673,7 @@ defaultParserState =
                   stateDate            = [],
                   stateStrict          = False,
                   stateSmart           = False,
+                  stateOldDashes       = False,
                   stateLiterateHaskell = False,
                   stateColumns         = 80,
                   stateHeaderTable     = [],
@@ -714,7 +738,7 @@ smartPunctuation inlineParser = do
   choice [ quoted inlineParser, apostrophe, dash, ellipses ]
 
 apostrophe :: GenParser Char ParserState Inline
-apostrophe = (char '\'' <|> char '\8217') >> return Apostrophe
+apostrophe = (char '\'' <|> char '\8217') >> return (Str "\x2019")
 
 quoted :: GenParser Char ParserState Inline
        -> GenParser Char ParserState Inline
@@ -761,8 +785,12 @@ charOrRef cs =
                        return c)
 
 singleQuoteStart :: GenParser Char ParserState ()
-singleQuoteStart = do 
+singleQuoteStart = do
   failIfInQuoteContext InSingleQuote
+  pos <- getPosition
+  st <- getState
+  -- single quote start can't be right after str
+  guard $ stateLastStrPos st /= Just pos
   try $ do charOrRef "'\8216\145"
            notFollowedBy (oneOf ")!],;:-? \t\n")
            notFollowedBy (char '.') <|> lookAhead (string "..." >> return ())
@@ -789,22 +817,42 @@ doubleQuoteEnd = do
 
 ellipses :: GenParser Char st Inline
 ellipses = do
-  try (charOrRef "…\133") <|> try (string "..." >> return '…')
-  return Ellipses
-
-dash :: GenParser Char st Inline
-dash = enDash <|> emDash
+  try (charOrRef "\8230\133") <|> try (string "..." >> return '…')
+  return (Str "\8230")
+
+dash :: GenParser Char ParserState Inline
+dash = do
+  oldDashes <- stateOldDashes `fmap` getState
+  if oldDashes
+     then emDashOld <|> enDashOld
+     else Str `fmap` (hyphenDash <|> emDash <|> enDash)
+
+-- Two hyphens = en-dash, three = em-dash
+hyphenDash :: GenParser Char st String
+hyphenDash = do
+  try $ string "--"
+  option "\8211" (char '-' >> return "\8212")
+
+emDash :: GenParser Char st String
+emDash = do
+  try (charOrRef "\8212\151")
+  return "\8212"
 
-enDash :: GenParser Char st Inline
+enDash :: GenParser Char st String
 enDash = do
-  try (charOrRef "–\150") <|>
+  try (charOrRef "\8212\151")
+  return "\8211"
+
+enDashOld :: GenParser Char st Inline
+enDashOld = do
+  try (charOrRef "\8211\150") <|>
     try (char '-' >> lookAhead (satisfy isDigit) >> return '–')
-  return EnDash
+  return (Str "\8211")
 
-emDash :: GenParser Char st Inline
-emDash = do
-  try (charOrRef "—\151") <|> (try $ string "--" >> optional (char '-') >> return '—')
-  return EmDash
+emDashOld :: GenParser Char st Inline
+emDashOld = do
+  try (charOrRef "\8212\151") <|> (try $ string "--" >> optional (char '-') >> return '-')
+  return (Str "\8212")
 
 --
 -- Macros
diff --git a/src/Text/Pandoc/Pretty.hs b/src/Text/Pandoc/Pretty.hs
index 54d65af6f..bf78b2594 100644
--- a/src/Text/Pandoc/Pretty.hs
+++ b/src/Text/Pandoc/Pretty.hs
@@ -42,6 +42,7 @@ module Text.Pandoc.Pretty (
      , flush
      , nest
      , hang
+     , beforeNonBlank
      , nowrap
      , offset
      , height
@@ -59,16 +60,20 @@ module Text.Pandoc.Pretty (
      , hsep
      , vcat
      , vsep
+     , chomp
      , inside
      , braces
      , brackets
      , parens
      , quotes
      , doubleQuotes
+     , charWidth
+     , realLength
      )
 
 where
-import Data.DList (DList, fromList, toList, cons, singleton)
+import Data.Sequence (Seq, fromList, (<|), singleton, mapWithIndex)
+import Data.Foldable (toList)
 import Data.List (intercalate)
 import Data.Monoid
 import Data.String
@@ -90,6 +95,7 @@ type DocState a = State (RenderState a) ()
 data D = Text Int String
        | Block Int [String]
        | Prefixed String Doc
+       | BeforeNonBlank Doc
        | Flush Doc
        | BreakingSpace
        | CarriageReturn
@@ -97,7 +103,7 @@ data D = Text Int String
        | BlankLine
        deriving (Show)
 
-newtype Doc = Doc { unDoc :: DList D }
+newtype Doc = Doc { unDoc :: Seq D }
               deriving (Monoid)
 
 instance Show Doc where
@@ -106,6 +112,14 @@ instance Show Doc where
 instance IsString Doc where
   fromString = text
 
+isBlank :: D -> Bool
+isBlank BreakingSpace  = True
+isBlank CarriageReturn = True
+isBlank NewLine        = True
+isBlank BlankLine      = True
+isBlank (Text _ (c:_)) = isSpace c
+isBlank _              = False
+
 -- | True if the document is empty.
 isEmpty :: Doc -> Bool
 isEmpty = null . toList . unDoc
@@ -114,9 +128,17 @@ isEmpty = null . toList . unDoc
 empty :: Doc
 empty = mempty
 
--- | @a <> b@ is the result of concatenating @a@ with @b@.
-(<>) :: Doc -> Doc -> Doc
+#if MIN_VERSION_base(4,5,0)
+-- (<>) is defined in Data.Monoid
+#else
+infixr 6 <>
+
+-- | An infix synonym for 'mappend'.
+-- @a <> b@ is the result of concatenating @a@ with @b@.
+(<>) :: Monoid m => m -> m -> m
 (<>) = mappend
+{-# INLINE (<>) #-}
+#endif
 
 -- | Concatenate a list of 'Doc's.
 cat :: [Doc] -> Doc
@@ -128,6 +150,7 @@ hcat = mconcat
 
 -- | Concatenate a list of 'Doc's, putting breakable spaces
 -- between them.
+infixr 6 <+>
 (<+>) :: Doc -> Doc -> Doc
 (<+>) x y = if isEmpty x
                then y
@@ -140,6 +163,7 @@ hcat = mconcat
 hsep :: [Doc] -> Doc
 hsep = foldr (<+>) empty
 
+infixr 5 $$
 -- | @a $$ b@ puts @a@ above @b@.
 ($$) :: Doc -> Doc -> Doc
 ($$) x y = if isEmpty x
@@ -148,6 +172,7 @@ hsep = foldr (<+>) empty
                    then x
                    else x <> cr <> y
 
+infixr 5 $+$
 -- | @a $$ b@ puts @a@ above @b@, with a blank line between.
 ($+$) :: Doc -> Doc -> Doc
 ($+$) x y = if isEmpty x
@@ -164,6 +189,17 @@ vcat = foldr ($$) empty
 vsep :: [Doc] -> Doc
 vsep = foldr ($+$) empty
 
+-- | Chomps trailing blank space off of a 'Doc'.
+chomp :: Doc -> Doc
+chomp d = Doc (fromList dl')
+  where dl = toList (unDoc d)
+        dl' = reverse $ dropWhile removeable $ reverse dl
+        removeable BreakingSpace = True
+        removeable CarriageReturn = True
+        removeable NewLine = True
+        removeable BlankLine = True
+        removeable _ = False
+
 outp :: (IsString a, Monoid a)
      => Int -> String -> DocState a
 outp off s | off <= 0 = do
@@ -172,7 +208,7 @@ outp off s | off <= 0 = do
   when (column st' == 0 && usePrefix st' && not (null rawpref)) $ do
     let pref = reverse $ dropWhile isSpace $ reverse rawpref
     modify $ \st -> st{ output = fromString pref : output st
-                      , column = column st + length pref }
+                      , column = column st + realLength pref }
   when (off < 0) $ do
      modify $ \st -> st { output = fromString s : output st
                         , column = 0
@@ -182,7 +218,7 @@ outp off s = do
   let pref = prefix st'
   when (column st' == 0 && usePrefix st' && not (null pref)) $ do
     modify $ \st -> st{ output = fromString pref : output st
-                      , column = column st + length pref }
+                      , column = column st + realLength pref }
   modify $ \st -> st{ output = fromString s : output st
                     , column = column st + off
                     , newlines = 0 }
@@ -229,6 +265,12 @@ renderList (Flush d : xs) = do
   modify $ \s -> s{ usePrefix = oldUsePrefix }
   renderList xs
 
+renderList (BeforeNonBlank d : xs) =
+  case xs of
+    (x:_) | isBlank x -> renderList xs
+          | otherwise -> renderDoc d >> renderList xs
+    []                -> renderList xs
+
 renderList (BlankLine : xs) = do
   st <- get
   case output st of
@@ -283,7 +325,7 @@ renderList (b1@Block{} : BreakingSpace : b2@Block{} : xs) =
 renderList (Block width lns : xs) = do
   st <- get
   let oldPref = prefix st
-  case column st - length oldPref of
+  case column st - realLength oldPref of
         n | n > 0 -> modify $ \s -> s{ prefix = oldPref ++ replicate n ' ' }
         _         -> return ()
   renderDoc $ blockToDoc width lns
@@ -295,7 +337,7 @@ mergeBlocks addSpace (Block w1 lns1) (Block w2 lns2) =
   Block (w1 + w2 + if addSpace then 1 else 0) $
      zipWith (\l1 l2 -> pad w1 l1 ++ l2) (lns1 ++ empties) (map sp lns2 ++ empties)
     where empties = replicate (abs $ length lns1 - length lns2) ""
-          pad n s = s ++ replicate (n - length s) ' '
+          pad n s = s ++ replicate (n - realLength s) ' '
           sp "" = ""
           sp xs = if addSpace then (' ' : xs) else xs
 mergeBlocks _ _ _ = error "mergeBlocks tried on non-Block!"
@@ -312,13 +354,13 @@ offsetOf _                = 0
 -- | A literal string.
 text :: String -> Doc
 text = Doc . toChunks
-  where toChunks :: String -> DList D
+  where toChunks :: String -> Seq D
         toChunks [] = mempty
         toChunks s = case break (=='\n') s of
-                          ([], _:ys) -> NewLine `cons` toChunks ys
-                          (xs, _:ys) -> Text (length xs) xs `cons`
-                                            NewLine `cons` toChunks ys
-                          (xs, [])      -> singleton $ Text (length xs) xs
+                          ([], _:ys) -> NewLine <| toChunks ys
+                          (xs, _:ys) -> Text (realLength xs) xs <|
+                                            (NewLine <| toChunks ys)
+                          (xs, [])      -> singleton $ Text (realLength xs) xs
 
 -- | A character.
 char :: Char -> Doc
@@ -359,15 +401,20 @@ nest ind = prefixed (replicate ind ' ')
 hang :: Int -> Doc -> Doc -> Doc
 hang ind start doc = start <> nest ind doc
 
+-- | @beforeNonBlank d@ conditionally includes @d@ unless it is
+-- followed by blank space.
+beforeNonBlank :: Doc -> Doc
+beforeNonBlank d = Doc $ singleton (BeforeNonBlank d)
+
 -- | Makes a 'Doc' non-reflowable.
 nowrap :: Doc -> Doc
-nowrap doc = Doc $ fromList $ map replaceSpace $ toList $ unDoc doc
-  where replaceSpace BreakingSpace = Text 1 " "
-        replaceSpace x = x
+nowrap doc = Doc $ mapWithIndex replaceSpace $ unDoc doc
+  where replaceSpace _ BreakingSpace = Text 1 " "
+        replaceSpace _ x = x
 
 -- | Returns the width of a 'Doc'.
 offset :: Doc -> Int
-offset d = case map length . lines . render Nothing $ d of
+offset d = case map realLength . lines . render Nothing $ d of
                 []    -> 0
                 os    -> maximum os
 
@@ -382,11 +429,11 @@ lblock = block id
 
 -- | Like 'lblock' but aligned to the right.
 rblock :: Int -> Doc -> Doc
-rblock w = block (\s -> replicate (w - length s) ' ' ++ s) w
+rblock w = block (\s -> replicate (w - realLength s) ' ' ++ s) w
 
 -- | Like 'lblock' but aligned centered.
 cblock :: Int -> Doc -> Doc
-cblock w = block (\s -> replicate ((w - length s) `div` 2) ' ' ++ s) w
+cblock w = block (\s -> replicate ((w - realLength s) `div` 2) ' ' ++ s) w
 
 -- | Returns the height of a block or other 'Doc'.
 height :: Doc -> Int
@@ -401,7 +448,7 @@ chop n cs = case break (=='\n') cs of
                                              (_:[]) -> [xs, ""]
                                              (_:zs) -> xs : chop n zs
                                      else take n xs : chop n (drop n xs ++ ys)
-                                   where len = length xs
+                                   where len = realLength xs
 
 -- | Encloses a 'Doc' inside a start and end 'Doc'.
 inside :: Doc -> Doc -> Doc -> Doc
@@ -427,3 +474,51 @@ quotes = inside (char '\'') (char '\'')
 -- | Wraps a 'Doc' in double quotes.
 doubleQuotes :: Doc -> Doc
 doubleQuotes = inside (char '"') (char '"')
+
+-- | Returns width of a character in a monospace font:  0 for a combining
+-- character, 1 for a regular character, 2 for an East Asian wide character.
+charWidth :: Char -> Int
+charWidth c =
+  case c of
+      _ | c <  '\x0300'                    -> 1
+        | c >= '\x0300' && c <= '\x036F'   -> 0  -- combining
+        | c >= '\x0370' && c <= '\x10FC'   -> 1
+        | c >= '\x1100' && c <= '\x115F'   -> 2
+        | c >= '\x1160' && c <= '\x11A2'   -> 1
+        | c >= '\x11A3' && c <= '\x11A7'   -> 2
+        | c >= '\x11A8' && c <= '\x11F9'   -> 1
+        | c >= '\x11FA' && c <= '\x11FF'   -> 2
+        | c >= '\x1200' && c <= '\x2328'   -> 1
+        | c >= '\x2329' && c <= '\x232A'   -> 2
+        | c >= '\x232B' && c <= '\x2E31'   -> 1
+        | c >= '\x2E80' && c <= '\x303E'   -> 2
+        | c == '\x303F'                    -> 1
+        | c >= '\x3041' && c <= '\x3247'   -> 2
+        | c >= '\x3248' && c <= '\x324F'   -> 1 -- ambiguous
+        | c >= '\x3250' && c <= '\x4DBF'   -> 2
+        | c >= '\x4DC0' && c <= '\x4DFF'   -> 1
+        | c >= '\x4E00' && c <= '\xA4C6'   -> 2
+        | c >= '\xA4D0' && c <= '\xA95F'   -> 1
+        | c >= '\xA960' && c <= '\xA97C'   -> 2
+        | c >= '\xA980' && c <= '\xABF9'   -> 1
+        | c >= '\xAC00' && c <= '\xD7FB'   -> 2
+        | c >= '\xD800' && c <= '\xDFFF'   -> 1
+        | c >= '\xE000' && c <= '\xF8FF'   -> 1 -- ambiguous
+        | c >= '\xF900' && c <= '\xFAFF'   -> 2
+        | c >= '\xFB00' && c <= '\xFDFD'   -> 1
+        | c >= '\xFE00' && c <= '\xFE0F'   -> 1 -- ambiguous
+        | c >= '\xFE10' && c <= '\xFE19'   -> 2
+        | c >= '\xFE20' && c <= '\xFE26'   -> 1
+        | c >= '\xFE30' && c <= '\xFE6B'   -> 2
+        | c >= '\xFE70' && c <= '\x16A38'  -> 1
+        | c >= '\x1B000' && c <= '\x1B001' -> 2
+        | c >= '\x1D000' && c <= '\x1F1FF' -> 1
+        | c >= '\x1F200' && c <= '\x1F251' -> 2
+        | c >= '\x1F300' && c <= '\x1F773' -> 1
+        | c >= '\x20000' && c <= '\x3FFFD' -> 2
+        | otherwise                        -> 1
+
+-- | Get real length of string, taking into account combining and double-wide
+-- characters.
+realLength :: String -> Int
+realLength = sum . map charWidth
diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs
index 7c882f680..43165ceb1 100644
--- a/src/Text/Pandoc/Readers/HTML.hs
+++ b/src/Text/Pandoc/Readers/HTML.hs
@@ -215,6 +215,7 @@ pSimpleTable = try $ do
   TagOpen _ _ <- pSatisfy (~== TagOpen "table" [])
   skipMany pBlank
   head' <- option [] $ pOptInTag "thead" $ pInTags "tr" (pCell "th")
+  skipMany pBlank
   rows <- pOptInTag "tbody"
           $ many1 $ try $ skipMany pBlank >> pInTags "tr" (pCell "td")
   skipMany pBlank
@@ -420,8 +421,12 @@ pTagContents =
   pStr <|> pSpace <|> smartPunctuation pTagContents <|> pSymbol <|> pBad
 
 pStr :: GenParser Char ParserState Inline
-pStr = liftM Str $ many1 $ satisfy $ \c ->
-           not (isSpace c) && not (isSpecial c) && not (isBad c)
+pStr = do
+  result <- many1 $ satisfy $ \c ->
+                     not (isSpace c) && not (isSpecial c) && not (isBad c)
+  pos <- getPosition
+  updateState $ \s -> s{ stateLastStrPos = Just pos }
+  return $ Str result
 
 isSpecial :: Char -> Bool
 isSpecial '"' = True
@@ -502,16 +507,35 @@ blockHtmlTags = ["address", "blockquote", "body", "center", "dir", "div",
                  "dt", "frameset", "li", "tbody", "td", "tfoot",
                  "th", "thead", "tr", "script", "style"]
 
+-- We want to allow raw docbook in markdown documents, so we
+-- include docbook block tags here too.
+blockDocBookTags :: [String]
+blockDocBookTags = ["calloutlist", "bibliolist", "glosslist", "itemizedlist",
+                    "orderedlist", "segmentedlist", "simplelist",
+                    "variablelist", "caution", "important", "note", "tip",
+                    "warning", "address", "literallayout", "programlisting",
+                    "programlistingco", "screen", "screenco", "screenshot",
+                    "synopsis", "example", "informalexample", "figure",
+                    "informalfigure", "table", "informaltable", "para",
+                    "simpara", "formalpara", "equation", "informalequation",
+                    "figure", "screenshot", "mediaobject", "qandaset",
+                    "procedure", "task", "cmdsynopsis", "funcsynopsis",
+                    "classsynopsis", "blockquote", "epigraph", "msgset",
+                    "sidebar", "title"]
+
+blockTags :: [String]
+blockTags = blockHtmlTags ++ blockDocBookTags
+
 isInlineTag :: Tag String -> Bool
-isInlineTag t = tagOpen (`notElem` blockHtmlTags) (const True) t ||
-                tagClose (`notElem` blockHtmlTags) t ||
+isInlineTag t = tagOpen (`notElem` blockTags) (const True) t ||
+                tagClose (`notElem` blockTags) t ||
                 tagComment (const True) t
 
 isBlockTag :: Tag String -> Bool
 isBlockTag t = tagOpen (`elem` blocktags) (const True) t ||
                tagClose (`elem` blocktags) t ||
                tagComment (const True) t
-                 where blocktags = blockHtmlTags ++ eitherBlockOrInline
+                 where blocktags = blockTags ++ eitherBlockOrInline
 
 isTextTag :: Tag String -> Bool
 isTextTag = tagText (const True)
@@ -546,8 +570,8 @@ t `closes` t2 |
    t `elem` ["h1","h2","h3","h4","h5","h6","dl","ol","ul","table","div","p"] &&
    t2 `elem` ["h1","h2","h3","h4","h5","h6","p" ] = True -- not "div"
 t1 `closes` t2 |
-   t1 `elem` blockHtmlTags &&
-   t2 `notElem` (blockHtmlTags ++ eitherBlockOrInline) = True
+   t1 `elem` blockTags &&
+   t2 `notElem` (blockTags ++ eitherBlockOrInline) = True
 _ `closes` _ = False
 
 --- parsers for use in markdown, textile readers
diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index 02c7361d7..5e69347b6 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -1,5 +1,5 @@
 {-
-Copyright (C) 2006-2010 John MacFarlane <jgm@berkeley.edu>
+Copyright (C) 2006-2012 John MacFarlane <jgm@berkeley.edu>
 
 This program is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
@@ -18,8 +18,8 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 
 {- |
    Module      : Text.Pandoc.Readers.LaTeX
-   Copyright   : Copyright (C) 2006-2010 John MacFarlane
-   License     : GNU GPL, version 2 or above 
+   Copyright   : Copyright (C) 2006-2012 John MacFarlane
+   License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
    Stability   : alpha
@@ -27,20 +27,26 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 
 Conversion of LaTeX to 'Pandoc' document.
 -}
-module Text.Pandoc.Readers.LaTeX ( 
-                                  readLaTeX,
-                                  rawLaTeXInline,
-                                  rawLaTeXEnvironment'
+module Text.Pandoc.Readers.LaTeX ( readLaTeX,
+                                   rawLaTeXInline,
+                                   rawLaTeXBlock,
+                                   handleIncludes
                                  ) where
 
-import Text.ParserCombinators.Parsec
+import Text.ParserCombinators.Parsec hiding ((<|>), space, many, optional)
 import Text.Pandoc.Definition
 import Text.Pandoc.Shared
 import Text.Pandoc.Parsing
-import Data.Maybe ( fromMaybe )
-import Data.Char ( chr, toUpper )
-import Data.List ( intercalate, isPrefixOf, isSuffixOf )
+import qualified Text.Pandoc.UTF8 as UTF8
+import Data.Char ( chr, ord )
 import Control.Monad
+import Text.Pandoc.Builder
+import Data.Char (isLetter, isPunctuation, isSpace)
+import Control.Applicative
+import Data.Monoid
+import System.FilePath (replaceExtension)
+import Data.List (intercalate)
+import qualified Data.Map as M
 
 -- | Parse LaTeX from string and return 'Pandoc' document.
 readLaTeX :: ParserState   -- ^ Parser state, including options for parser
@@ -48,985 +54,867 @@ readLaTeX :: ParserState   -- ^ Parser state, including options for parser
           -> Pandoc
 readLaTeX = readWith parseLaTeX
 
--- characters with special meaning
-specialChars :: [Char]
-specialChars = "\\`$%^&_~#{}[]\n \t|<>'\"-"
-
---
--- utility functions
---
-
--- | Returns text between brackets and its matching pair.
-bracketedText :: Char -> Char -> GenParser Char st [Char]
-bracketedText openB closeB = do
-  result <- charsInBalanced' openB closeB
-  return $ [openB] ++ result ++ [closeB]
-
--- | Returns an option or argument of a LaTeX command.
-optOrArg :: GenParser Char st [Char]
-optOrArg = try $ spaces >> (bracketedText '{' '}' <|> bracketedText '[' ']')
+parseLaTeX :: LP Pandoc
+parseLaTeX = do
+  bs <- blocks
+  eof
+  st <- getState
+  let title' = stateTitle st
+  let authors' = stateAuthors st
+  let date' = stateDate st
+  return $ Pandoc (Meta title' authors' date') $ toList bs
 
--- | True if the string begins with '{'.
-isArg :: [Char] -> Bool
-isArg ('{':_) = True
-isArg _       = False
+type LP = GenParser Char ParserState
 
--- | Returns list of options and arguments of a LaTeX command.
-commandArgs :: GenParser Char st [[Char]]
-commandArgs = many optOrArg
+anyControlSeq :: LP String
+anyControlSeq = do
+  char '\\'
+  next <- option '\n' anyChar
+  name <- case next of
+               '\n'           -> return ""
+               c | isLetter c -> (c:) <$> (many letter <* optional sp)
+                 | otherwise  -> return [c]
+  return name
 
--- | Parses LaTeX command, returns (name, star, list of options or arguments).
-command :: GenParser Char st ([Char], [Char], [[Char]])
-command = do
+controlSeq :: String -> LP String
+controlSeq name = try $ do
   char '\\'
-  name <- many1 letter
-  star <- option "" (string "*")  -- some commands have starred versions
-  args <- commandArgs
-  return (name, star, args)
-
-begin :: [Char] -> GenParser Char st [Char]
-begin name = try $ do
-  string "\\begin"
-  spaces
-  char '{'
-  string name
-  char '}'
-  optional commandArgs
-  spaces
+  case name of
+        ""   -> mzero
+        [c] | not (isLetter c) -> string [c]
+        cs   -> string cs <* optional sp
   return name
 
-end :: [Char] -> GenParser Char st [Char]
-end name = try $ do
-  string "\\end"
-  spaces
+sp :: LP ()
+sp = skipMany1 $ satisfy (\c -> c == ' ' || c == '\t')
+        <|> (try $ newline >>~ lookAhead anyChar >>~ notFollowedBy blankline)
+
+isLowerHex :: Char -> Bool
+isLowerHex x = x >= '0' && x <= '9' || x >= 'a' && x <= 'f'
+
+tildeEscape :: LP Char
+tildeEscape = try $ do
+  string "^^"
+  c <- satisfy (\x -> x >= '\0' && x <= '\128')
+  d <- if isLowerHex c
+          then option "" $ count 1 (satisfy isLowerHex)
+          else return ""
+  if null d
+     then case ord c of
+           x | x >= 64 && x <= 127 -> return $ chr (x - 64)
+             | otherwise           -> return $ chr (x + 64)
+     else return $ chr $ read ('0':'x':c:d)
+
+comment :: LP ()
+comment = do
+  char '%'
+  skipMany (satisfy (/='\n'))
+  newline
+  return ()
+
+grouped :: Monoid a => LP a -> LP a
+grouped parser = try $ char '{' *> (mconcat <$> manyTill parser (char '}'))
+
+braced :: LP String
+braced = char '{' *> (concat <$> manyTill
+         (  many1 (satisfy (\c -> c /= '\\' && c /= '}' && c /= '{'))
+        <|> try (string "\\}")
+        <|> try (string "\\{")
+        <|> try (string "\\\\")
+        <|> ((\x -> "{" ++ x ++ "}") <$> braced)
+        <|> count 1 anyChar
+         ) (char '}'))
+
+bracketed :: Monoid a => LP a -> LP a
+bracketed parser = try $ char '[' *> (mconcat <$> manyTill parser (char ']'))
+
+trim :: String -> String
+trim = removeLeadingTrailingSpace
+
+mathDisplay :: LP String -> LP Inlines
+mathDisplay p = displayMath <$> (try p >>= applyMacros' . trim)
+
+mathInline :: LP String -> LP Inlines
+mathInline p = math <$> (try p >>= applyMacros')
+
+mathChars :: LP String
+mathChars = concat <$>
+  many (   many1 (satisfy (\c -> c /= '$' && c /='\\'))
+      <|> (\c -> ['\\',c]) <$> (try $ char '\\' *> anyChar)
+       )
+
+double_quote :: LP Inlines
+double_quote = (doubleQuoted . mconcat) <$>
+  (try $ string "``" *> manyTill inline (try $ string "''"))
+
+single_quote :: LP Inlines
+single_quote = char '`' *>
+  ( try ((singleQuoted . mconcat) <$>
+         manyTill inline (try $ char '\'' >> notFollowedBy letter))
+  <|> lit "`")
+
+inline :: LP Inlines
+inline = (mempty <$ comment)
+     <|> (space  <$ sp)
+     <|> inlineText
+     <|> inlineCommand
+     <|> grouped inline
+     <|> (char '-' *> option (str "-")
+           ((char '-') *> option (str "–") (str "—" <$ char '-')))
+     <|> double_quote
+     <|> single_quote
+     <|> (str "’" <$ char '\'')
+     <|> (str "\160" <$ char '~')
+     <|> (mathDisplay $ string "$$" *> mathChars <* string "$$")
+     <|> (mathInline  $ char '$' *> mathChars <* char '$')
+     <|> (superscript <$> (char '^' *> tok))
+     <|> (subscript <$> (char '_' *> tok))
+     <|> (failUnlessLHS *> char '|' *> doLHSverb)
+     <|> (str <$> count 1 tildeEscape)
+     <|> (str <$> string "]")
+     <|> (str <$> string "#") -- TODO print warning?
+     <|> (str <$> string "&") -- TODO print warning?
+     -- <|> (str <$> count 1 (satisfy (\c -> c /= '\\' && c /='\n' && c /='}' && c /='{'))) -- eat random leftover characters
+
+inlines :: LP Inlines
+inlines = mconcat <$> many (notFollowedBy (char '}') *> inline)
+
+block :: LP Blocks
+block = (mempty <$ comment)
+    <|> (mempty <$ ((spaceChar <|> blankline) *> spaces))
+    <|> environment
+    <|> mempty <$ macro -- TODO improve macros, make them work everywhere
+    <|> blockCommand
+    <|> grouped block
+    <|> paragraph
+    <|> (mempty <$ char '&')  -- loose & in table environment
+
+
+blocks :: LP Blocks
+blocks = mconcat <$> many block
+
+blockCommand :: LP Blocks
+blockCommand = try $ do
+  name <- anyControlSeq
+  star <- option "" (string "*" <* optional sp)
+  let name' = name ++ star
+  case M.lookup name' blockCommands of
+       Just p      -> p
+       Nothing     -> case M.lookup name blockCommands of
+                           Just p    -> p
+                           Nothing   -> mzero
+
+inBrackets :: Inlines -> Inlines
+inBrackets x = (str "[") <> x <> (str "]")
+
+-- eat an optional argument and one or more arguments in braces
+ignoreInlines :: String -> (String, LP Inlines)
+ignoreInlines name = (name, doraw <|> (mempty <$ optargs))
+  where optargs = skipopts *> skipMany (try $ optional sp *> braced)
+        contseq = '\\':name
+        doraw = (rawInline "latex" . (contseq ++) . snd) <$>
+                 (getState >>= guard . stateParseRaw >> (withRaw optargs))
+
+ignoreBlocks :: String -> (String, LP Blocks)
+ignoreBlocks name = (name, doraw <|> (mempty <$ optargs))
+  where optargs = skipopts *> skipMany (try $ optional sp *> braced)
+        contseq = '\\':name
+        doraw = (rawBlock "latex" . (contseq ++) . snd) <$>
+                 (getState >>= guard . stateParseRaw >> (withRaw optargs))
+
+blockCommands :: M.Map String (LP Blocks)
+blockCommands = M.fromList $
+  [ ("par", mempty <$ skipopts)
+  , ("title", mempty <$ (skipopts *> tok >>= addTitle))
+  , ("subtitle", mempty <$ (skipopts *> tok >>= addSubtitle))
+  , ("author", mempty <$ (skipopts *> authors))
+  -- -- in letter class, temp. store address & sig as title, author
+  , ("address", mempty <$ (skipopts *> tok >>= addTitle))
+  , ("signature", mempty <$ (skipopts *> authors))
+  , ("date", mempty <$ (skipopts *> tok >>= addDate))
+  -- sectioning
+  , ("chapter", updateState (\s -> s{ stateHasChapters = True }) *> section 0)
+  , ("section", section 1)
+  , ("subsection", section 2)
+  , ("subsubsection", section 3)
+  , ("paragraph", section 4)
+  , ("subparagraph", section 5)
+  -- beamer slides
+  , ("frametitle", section 3)
+  , ("framesubtitle", section 4)
+  -- letters
+  , ("opening", (para . trimInlines) <$> (skipopts *> tok))
+  , ("closing", skipopts *> closing)
+  --
+  , ("rule", skipopts *> tok *> tok *> pure horizontalRule)
+  , ("begin", mzero)   -- these are here so they won't be interpreted as inline
+  , ("end", mzero)
+  , ("item", skipopts *> loose_item)
+  , ("documentclass", skipopts *> braced *> preamble)
+  ] ++ map ignoreBlocks
+  -- these commands will be ignored unless --parse-raw is specified,
+  -- in which case they will appear as raw latex blocks
+  [ "newcommand", "renewcommand", "newenvironment", "renewenvironment"
+    -- newcommand, etc. should be parsed by macro, but we need this
+    -- here so these aren't parsed as inline commands to ignore
+  , "special", "pdfannot", "pdfstringdef"
+  , "bibliography", "bibliographystyle"
+  , "maketitle", "makeindex", "makeglossary"
+  , "addcontentsline", "addtocontents", "addtocounter"
+     -- \ignore{} is used conventionally in literate haskell for definitions
+     -- that are to be processed by the compiler but not printed.
+  , "ignore"
+  , "hyperdef"
+  , "noindent"
+  , "markboth", "markright", "markleft"
+  , "hspace", "vspace"
+  ]
+
+addTitle :: Inlines -> LP ()
+addTitle tit = updateState (\s -> s{ stateTitle = toList tit })
+
+addSubtitle :: Inlines -> LP ()
+addSubtitle tit = updateState (\s -> s{ stateTitle = stateTitle s ++
+                        toList (str ":" <> linebreak <> tit) })
+
+authors :: LP ()
+authors = try $ do
   char '{'
-  string name
+  let oneAuthor = mconcat <$>
+       many1 (notFollowedBy' (controlSeq "and") >> inline)
+  auths <- sepBy oneAuthor (controlSeq "and")
   char '}'
-  return name
+  updateState (\s -> s { stateAuthors = map (normalizeSpaces . toList) auths })
+
+addDate :: Inlines -> LP ()
+addDate dat = updateState (\s -> s{ stateDate = toList dat })
+
+section :: Int -> LP Blocks
+section lvl = do
+  hasChapters <- stateHasChapters `fmap` getState
+  let lvl' = if hasChapters then lvl + 1 else lvl
+  skipopts
+  contents <- grouped inline
+  return $ header lvl' contents
+
+inlineCommand :: LP Inlines
+inlineCommand = try $ do
+  name <- anyControlSeq
+  guard $ not $ isBlockCommand name
+  parseRaw <- stateParseRaw `fmap` getState
+  star <- option "" (string "*")
+  let name' = name ++ star
+  case M.lookup name' inlineCommands of
+       Just p      -> p
+       Nothing     -> case M.lookup name inlineCommands of
+                           Just p    -> p
+                           Nothing
+                             | parseRaw  ->
+                                (rawInline "latex" . (('\\':name') ++)) <$>
+                                 (withRaw (skipopts *> many braced)
+                                      >>= applyMacros' . snd)
+                             | otherwise -> return mempty
+
+isBlockCommand :: String -> Bool
+isBlockCommand s = maybe False (const True) $ M.lookup s blockCommands
+
+inlineCommands :: M.Map String (LP Inlines)
+inlineCommands = M.fromList $
+  [ ("emph", emph <$> tok)
+  , ("textit", emph <$> tok)
+  , ("textsc", smallcaps <$> tok)
+  , ("sout", strikeout <$> tok)
+  , ("textsuperscript", superscript <$> tok)
+  , ("textsubscript", subscript <$> tok)
+  , ("textbackslash", lit "\\")
+  , ("backslash", lit "\\")
+  , ("textbf", strong <$> tok)
+  , ("ldots", lit "…")
+  , ("dots", lit "…")
+  , ("mdots", lit "…")
+  , ("sim", lit "~")
+  , ("label", inBrackets <$> tok)
+  , ("ref", inBrackets <$> tok)
+  , ("(", mathInline $ manyTill anyChar (try $ string "\\)"))
+  , ("[", mathDisplay $ manyTill anyChar (try $ string "\\]"))
+  , ("ensuremath", mathInline $ braced)
+  , ("P", lit "¶")
+  , ("S", lit "§")
+  , ("$", lit "$")
+  , ("%", lit "%")
+  , ("&", lit "&")
+  , ("#", lit "#")
+  , ("_", lit "_")
+  , ("{", lit "{")
+  , ("}", lit "}")
+  -- old TeX commands
+  , ("em", emph <$> inlines)
+  , ("it", emph <$> inlines)
+  , ("sl", emph <$> inlines)
+  , ("bf", strong <$> inlines)
+  , ("rm", inlines)
+  , ("itshape", emph <$> inlines)
+  , ("slshape", emph <$> inlines)
+  , ("scshape", smallcaps <$> inlines)
+  , ("bfseries", strong <$> inlines)
+  , ("/", pure mempty) -- italic correction
+  , ("cc", lit "ç")
+  , ("cC", lit "Ç")
+  , ("aa", lit "å")
+  , ("AA", lit "Å")
+  , ("ss", lit "ß")
+  , ("o", lit "ø")
+  , ("O", lit "Ø")
+  , ("L", lit "Ł")
+  , ("l", lit "ł")
+  , ("ae", lit "æ")
+  , ("AE", lit "Æ")
+  , ("pounds", lit "£")
+  , ("euro", lit "€")
+  , ("copyright", lit "©")
+  , ("`", option (str "`") $ try $ tok >>= accent grave)
+  , ("'", option (str "'") $ try $ tok >>= accent acute)
+  , ("^", option (str "^") $ try $ tok >>= accent hat)
+  , ("~", option (str "~") $ try $ tok >>= accent circ)
+  , ("\"", option (str "\"") $ try $ tok >>= accent umlaut)
+  , (".", option (str ".") $ try $ tok >>= accent dot)
+  , ("=", option (str "=") $ try $ tok >>= accent macron)
+  , ("i", lit "i")
+  , ("\\", linebreak <$ (optional (bracketed inline) *> optional sp))
+  , (",", pure mempty)
+  , ("@", pure mempty)
+  , (" ", lit "\160")
+  , ("ps", pure $ str "PS." <> space)
+  , ("TeX", lit "TeX")
+  , ("LaTeX", lit "LaTeX")
+  , ("bar", lit "|")
+  , ("textless", lit "<")
+  , ("textgreater", lit ">")
+  , ("thanks", (note . mconcat) <$> (char '{' *> manyTill block (char '}')))
+  , ("footnote", (note . mconcat) <$> (char '{' *> manyTill block (char '}')))
+  , ("verb", doverb)
+  , ("lstinline", doverb)
+  , ("texttt", (code . stringify . toList) <$> tok)
+  , ("url", (unescapeURL <$> braced) >>= \url ->
+       pure (link url "" (codeWith ("",["url"],[]) url)))
+  , ("href", (unescapeURL <$> braced <* optional sp) >>= \url ->
+       tok >>= \lab ->
+         pure (link url "" lab))
+  , ("includegraphics", skipopts *> (unescapeURL <$> braced) >>=
+       (\src -> pure (image src "" (str "image"))))
+  , ("cite", citation "cite" NormalCitation False)
+  , ("citep", citation "citep" NormalCitation False)
+  , ("citep*", citation "citep*" NormalCitation False)
+  , ("citeal", citation "citeal" NormalCitation False)
+  , ("citealp", citation "citealp" NormalCitation False)
+  , ("citealp*", citation "citealp*" NormalCitation False)
+  , ("autocite", citation "autocite" NormalCitation False)
+  , ("footcite", citation "footcite" NormalCitation False)
+  , ("parencite", citation "parencite" NormalCitation False)
+  , ("supercite", citation "supercite" NormalCitation False)
+  , ("footcitetext", citation "footcitetext" NormalCitation False)
+  , ("citeyearpar", citation "citeyearpar" SuppressAuthor False)
+  , ("citeyear", citation "citeyear" SuppressAuthor False)
+  , ("autocite*", citation "autocite*" SuppressAuthor False)
+  , ("cite*", citation "cite*" SuppressAuthor False)
+  , ("parencite*", citation "parencite*" SuppressAuthor False)
+  , ("textcite", citation "textcite" AuthorInText False)
+  , ("citet", citation "citet" AuthorInText False)
+  , ("citet*", citation "citet*" AuthorInText False)
+  , ("citealt", citation "citealt" AuthorInText False)
+  , ("citealt*", citation "citealt*" AuthorInText False)
+  , ("textcites", citation "textcites" AuthorInText True)
+  , ("cites", citation "cites" NormalCitation True)
+  , ("autocites", citation "autocites" NormalCitation True)
+  , ("footcites", citation "footcites" NormalCitation True)
+  , ("parencites", citation "parencites" NormalCitation True)
+  , ("supercites", citation "supercites" NormalCitation True)
+  , ("footcitetexts", citation "footcitetexts" NormalCitation True)
+  , ("Autocite", citation "Autocite" NormalCitation False)
+  , ("Footcite", citation "Footcite" NormalCitation False)
+  , ("Parencite", citation "Parencite" NormalCitation False)
+  , ("Supercite", citation "Supercite" NormalCitation False)
+  , ("Footcitetext", citation "Footcitetext" NormalCitation False)
+  , ("Citeyearpar", citation "Citeyearpar" SuppressAuthor False)
+  , ("Citeyear", citation "Citeyear" SuppressAuthor False)
+  , ("Autocite*", citation "Autocite*" SuppressAuthor False)
+  , ("Cite*", citation "Cite*" SuppressAuthor False)
+  , ("Parencite*", citation "Parencite*" SuppressAuthor False)
+  , ("Textcite", citation "Textcite" AuthorInText False)
+  , ("Textcites", citation "Textcites" AuthorInText True)
+  , ("Cites", citation "Cites" NormalCitation True)
+  , ("Autocites", citation "Autocites" NormalCitation True)
+  , ("Footcites", citation "Footcites" NormalCitation True)
+  , ("Parencites", citation "Parencites" NormalCitation True)
+  , ("Supercites", citation "Supercites" NormalCitation True)
+  , ("Footcitetexts", citation "Footcitetexts" NormalCitation True)
+  , ("citetext", complexNatbibCitation NormalCitation)
+  , ("citeauthor", (try (tok *> optional sp *> controlSeq "citetext") *>
+                        complexNatbibCitation AuthorInText)
+                   <|> citation "citeauthor" AuthorInText False)
+  ] ++ map ignoreInlines
+  -- these commands will be ignored unless --parse-raw is specified,
+  -- in which case they will appear as raw latex blocks:
+  [ "index", "nocite" ]
+
+unescapeURL :: String -> String
+unescapeURL ('\\':x:xs) | isEscapable x = x:unescapeURL xs
+  where isEscapable '%' = True
+        isEscapable '#' = True
+        isEscapable _   = False
+unescapeURL (x:xs) = x:unescapeURL xs
+unescapeURL [] = ""
+
+doverb :: LP Inlines
+doverb = do
+  marker <- anyChar
+  code <$> manyTill (satisfy (/='\n')) (char marker)
+
+doLHSverb :: LP Inlines
+doLHSverb = codeWith ("",["haskell"],[]) <$> manyTill (satisfy (/='\n')) (char '|')
+
+lit :: String -> LP Inlines
+lit = pure . str
+
+accent :: (Char -> Char) -> Inlines -> LP Inlines
+accent f ils =
+  case toList ils of
+       (Str (x:xs) : ys) -> return $ fromList $ (Str (f x : xs) : ys)
+       []                -> mzero
+       _                 -> return ils
+
+grave :: Char -> Char
+grave 'A' = 'À'
+grave 'E' = 'È'
+grave 'I' = 'Ì'
+grave 'O' = 'Ò'
+grave 'U' = 'Ù'
+grave 'a' = 'à'
+grave 'e' = 'è'
+grave 'i' = 'ì'
+grave 'o' = 'ò'
+grave 'u' = 'ù'
+grave c   = c
+
+acute :: Char -> Char
+acute 'A' = 'Á'
+acute 'E' = 'É'
+acute 'I' = 'Í'
+acute 'O' = 'Ó'
+acute 'U' = 'Ú'
+acute 'a' = 'á'
+acute 'e' = 'é'
+acute 'i' = 'í'
+acute 'o' = 'ó'
+acute 'u' = 'ú'
+acute c = c
+
+hat :: Char -> Char
+hat 'A' = 'Â'
+hat 'E' = 'Ê'
+hat 'I' = 'Î'
+hat 'O' = 'Ô'
+hat 'U' = 'Û'
+hat 'a' = 'ã'
+hat 'e' = 'ê'
+hat 'i' = 'î'
+hat 'o' = 'ô'
+hat 'u' = 'û'
+hat c = c
+
+circ :: Char -> Char
+circ 'A' = 'Ã'
+circ 'O' = 'Õ'
+circ 'o' = 'õ'
+circ 'N' = 'Ñ'
+circ 'n' = 'ñ'
+circ c   = c
+
+umlaut :: Char -> Char
+umlaut 'A' = 'Ä'
+umlaut 'E' = 'Ë'
+umlaut 'I' = 'Ï'
+umlaut 'O' = 'Ö'
+umlaut 'U' = 'Ü'
+umlaut 'a' = 'ä'
+umlaut 'e' = 'ë'
+umlaut 'i' = 'ï'
+umlaut 'o' = 'ö'
+umlaut 'u' = 'ü'
+umlaut c = c
+
+dot :: Char -> Char
+dot 'C' = 'Ċ'
+dot 'c' = 'ċ'
+dot 'E' = 'Ė'
+dot 'e' = 'ė'
+dot 'G' = 'Ġ'
+dot 'g' = 'ġ'
+dot 'I' = 'İ'
+dot 'Z' = 'Ż'
+dot 'z' = 'ż'
+dot c = c
+
+macron :: Char -> Char
+macron 'A' = 'Ā'
+macron 'E' = 'Ē'
+macron 'I' = 'Ī'
+macron 'O' = 'Ō'
+macron 'U' = 'Ū'
+macron 'a' = 'ā'
+macron 'e' = 'ē'
+macron 'i' = 'ī'
+macron 'o' = 'ō'
+macron 'u' = 'ū'
+macron c = c
+
+tok :: LP Inlines
+tok = try $ grouped inline <|> inlineCommand <|> str <$> (count 1 $ inlineChar)
+
+opt :: LP Inlines
+opt = bracketed inline <* optional sp
+
+skipopts :: LP ()
+skipopts = skipMany opt
+
+inlineText :: LP Inlines
+inlineText = str <$> many1 inlineChar
+
+inlineChar :: LP Char
+inlineChar = satisfy $ \c ->
+  not (c == '\\' || c == '$' || c == '%' || c == '^' || c == '_' ||
+       c == '&'  || c == '~' || c == '#' || c == '{' || c == '}' ||
+       c == '^'  || c == '\'' || c == '`' || c == '-' || c == ']' ||
+       c == ' '  || c == '\t' || c == '\n' )
+
+environment :: LP Blocks
+environment = do
+  controlSeq "begin"
+  name <- braced
+  case M.lookup name environments of
+       Just p      -> p <|> rawEnv name
+       Nothing     -> rawEnv name
+
+rawEnv :: String -> LP Blocks
+rawEnv name = do
+  let addBegin x = "\\begin{" ++ name ++ "}" ++ x
+  parseRaw <- stateParseRaw `fmap` getState
+  if parseRaw
+     then (rawBlock "latex" . addBegin) <$>
+            (withRaw (env name blocks) >>= applyMacros' . snd)
+     else env name blocks
+
+-- | Replace "include" commands with file contents.
+handleIncludes :: String -> IO String
+handleIncludes [] = return []
+handleIncludes ('\\':xs) =
+  case runParser include defaultParserState "input" ('\\':xs) of
+       Right (fs, rest) -> do let getfile f = catch (UTF8.readFile f)
+                                               (\_ -> return "")
+                              yss <- mapM getfile fs
+                              (intercalate "\n" yss ++) `fmap`
+                                handleIncludes rest
+       _  -> case runParser (verbCmd <|> verbatimEnv) defaultParserState
+                   "input" ('\\':xs) of
+                    Right (r, rest) -> (r ++) `fmap` handleIncludes rest
+                    _               -> ('\\':) `fmap` handleIncludes xs
+handleIncludes (x:xs) = (x:) `fmap` handleIncludes xs
+
+include :: LP ([FilePath], String)
+include = do
+  name <- controlSeq "include" <|> controlSeq "usepackage"
+  skipopts
+  fs <- (splitBy (==',')) <$> braced
+  rest <- getInput
+  let fs' = if name == "include"
+               then map (flip replaceExtension ".tex") fs
+               else map (flip replaceExtension ".sty") fs
+  return (fs', rest)
+
+verbCmd :: LP (String, String)
+verbCmd = do
+  (_,r) <- withRaw $ do
+             controlSeq "verb"
+             c <- anyChar
+             manyTill anyChar (char c)
+  rest <- getInput
+  return (r, rest)
+
+verbatimEnv :: LP (String, String)
+verbatimEnv = do
+  (_,r) <- withRaw $ do
+             controlSeq "begin"
+             name <- braced
+             guard $ name == "verbatim" || name == "Verbatim" ||
+                     name == "lstlisting"
+             verbEnv name
+  rest <- getInput
+  return (r,rest)
 
--- | Returns a list of block elements containing the contents of an
--- environment.
-environment :: [Char] -> GenParser Char ParserState [Block]
-environment name = try $ begin name >> spaces >> manyTill block (end name) >>~ spaces
+rawLaTeXBlock :: GenParser Char ParserState String
+rawLaTeXBlock = snd <$> withRaw (environment <|> blockCommand)
 
-anyEnvironment :: GenParser Char ParserState Block
-anyEnvironment =  try $ do
-  string "\\begin"
-  spaces
-  char '{'
-  name <- many letter
-  star <- option "" (string "*") -- some environments have starred variants
-  char '}'
-  optional commandArgs
+rawLaTeXInline :: GenParser Char ParserState Inline
+rawLaTeXInline = do
+  (res, raw) <- withRaw inlineCommand
+  if res == mempty
+     then return (Str "")
+     else RawInline "latex" <$> (applyMacros' raw)
+
+environments :: M.Map String (LP Blocks)
+environments = M.fromList
+  [ ("document", env "document" blocks <* skipMany anyChar)
+  , ("letter", env "letter" letter_contents)
+  , ("center", env "center" blocks)
+  , ("tabular", env "tabular" simpTable)
+  , ("quote", blockQuote <$> env "quote" blocks)
+  , ("quotation", blockQuote <$> env "quotation" blocks)
+  , ("verse", blockQuote <$> env "verse" blocks)
+  , ("itemize", bulletList <$> listenv "itemize" (many item))
+  , ("description", definitionList <$> listenv "description" (many descItem))
+  , ("enumerate", ordered_list)
+  , ("code", failUnlessLHS *>
+      (codeBlockWith ("",["sourceCode","literate","haskell"],[]) <$>
+        verbEnv "code"))
+  , ("verbatim", codeBlock <$> (verbEnv "verbatim"))
+  , ("Verbatim", codeBlock <$> (verbEnv "Verbatim"))
+  , ("lstlisting", codeBlock <$> (verbEnv "listlisting"))
+  , ("displaymath", mathEnv Nothing "displaymath")
+  , ("equation", mathEnv Nothing "equation")
+  , ("equation*", mathEnv Nothing "equation*")
+  , ("gather", mathEnv (Just "gathered") "gather")
+  , ("gather*", mathEnv (Just "gathered") "gather*")
+  , ("multiline", mathEnv (Just "gathered") "multiline")
+  , ("multiline*", mathEnv (Just "gathered") "multiline*")
+  , ("eqnarray", mathEnv (Just "aligned*") "eqnarray")
+  , ("eqnarray*", mathEnv (Just "aligned*") "eqnarray*")
+  , ("align", mathEnv (Just "aligned*") "align")
+  , ("align*", mathEnv (Just "aligned*") "align*")
+  , ("alignat", mathEnv (Just "aligned*") "alignat")
+  , ("alignat*", mathEnv (Just "aligned*") "alignat*")
+  ]
+
+letter_contents :: LP Blocks
+letter_contents = do
+  bs <- blocks
+  st <- getState
+  -- add signature (author) and address (title)
+  let addr = case stateTitle st of
+                  []   -> mempty
+                  x    -> para $ trimInlines $ fromList x
+  updateState $ \s -> s{ stateAuthors = [], stateTitle = [] }
+  return $ addr <> bs -- sig added by \closing
+
+closing :: LP Blocks
+closing = do
+  contents <- tok
+  st <- getState
+  let sigs = case stateAuthors st of
+                  []   -> mempty
+                  xs   -> para $ trimInlines $ fromList
+                               $ intercalate [LineBreak] xs
+  return $ para (trimInlines contents) <> sigs
+
+item :: LP Blocks
+item = blocks *> controlSeq "item" *> skipopts *> blocks
+
+loose_item :: LP Blocks
+loose_item = do
+  ctx <- stateParserContext `fmap` getState
+  if ctx == ListItemState
+     then mzero
+     else return mempty
+
+descItem :: LP (Inlines, [Blocks])
+descItem = do
+  blocks -- skip blocks before item
+  controlSeq "item"
+  optional sp
+  ils <- opt
+  bs <- blocks
+  return (ils, [bs])
+
+env :: String -> LP a -> LP a
+env name p = p <* (controlSeq "end" *> braced >>= guard . (== name))
+
+listenv :: String -> LP a -> LP a
+listenv name p = try $ do
+  oldCtx <- stateParserContext `fmap` getState
+  updateState $ \st -> st{ stateParserContext = ListItemState }
+  res <- env name p
+  updateState $ \st -> st{ stateParserContext = oldCtx }
+  return res
+
+mathEnv :: Maybe String -> String -> LP Blocks
+mathEnv innerEnv name = para <$> mathDisplay (inner <$> verbEnv name)
+   where inner x = case innerEnv of
+                      Nothing -> x
+                      Just y  -> "\\begin{" ++ y ++ "}\n" ++ x ++
+                                    "\\end{" ++ y ++ "}"
+
+verbEnv :: String -> LP String
+verbEnv name = do
+  skipopts
+  optional blankline
+  let endEnv = try $ controlSeq "end" *> braced >>= guard . (== name)
+  res <- manyTill anyChar endEnv
+  return $ stripTrailingNewlines res
+
+ordered_list :: LP Blocks
+ordered_list = do
+  optional sp
+  (_, style, delim) <- option (1, DefaultStyle, DefaultDelim) $
+                              try $ char '[' *> anyOrderedListMarker <* char ']'
   spaces
-  contents <- manyTill block (end (name ++ star))
+  optional $ try $ controlSeq "setlength" *> grouped (controlSeq "itemindent") *> braced
   spaces
-  return $ BlockQuote contents
-
---
--- parsing documents
---
+  start <- option 1 $ try $ do controlSeq "setcounter"
+                               grouped (string "enum" *> many1 (oneOf "iv"))
+                               optional sp
+                               num <- grouped (many1 digit)
+                               spaces
+                               return $ (read num + 1 :: Int)
+  bs <- listenv "enumerate" (many item)
+  return $ orderedListWith (start, style, delim) bs
+
+paragraph :: LP Blocks
+paragraph = do
+  x <- mconcat <$> many1 inline
+  if x == mempty
+     then return mempty
+     else return $ para $ trimInlines x
+
+preamble :: LP Blocks
+preamble = mempty <$> manyTill preambleBlock beginDoc
+  where beginDoc = lookAhead $ controlSeq "begin" *> string "{document}"
+        preambleBlock =  (mempty <$ comment)
+                     <|> (mempty <$ sp)
+                     <|> (mempty <$ blanklines)
+                     <|> (mempty <$ macro)
+                     <|> blockCommand
+                     <|> (mempty <$ anyControlSeq)
+                     <|> (mempty <$ braced)
+                     <|> (mempty <$ anyChar)
+
+-------
+
+-- citations
 
--- | Process LaTeX preamble, extracting metadata.
-processLaTeXPreamble :: GenParser Char ParserState ()
-processLaTeXPreamble = do
-  try $ string "\\documentclass"
-  skipMany $ bibliographic <|> macro <|> commentBlock <|> skipChar
+addPrefix :: [Inline] -> [Citation] -> [Citation]
+addPrefix p (k:ks)   = k {citationPrefix = p ++ citationPrefix k} : ks
+addPrefix _ _ = []
 
--- | Parse LaTeX and return 'Pandoc'.
-parseLaTeX :: GenParser Char ParserState Pandoc
-parseLaTeX = do
-  spaces
-  skipMany $ comment >> spaces
-  blocks <- try (processLaTeXPreamble >> environment "document")
-           <|> (many block >>~ (spaces >> eof))
-  state <- getState
-  let blocks' = filter (/= Null) blocks
-  let title' = stateTitle state
-  let authors' = stateAuthors state
-  let date' = stateDate state
-  return $ Pandoc (Meta title' authors' date')  blocks'
-
---
--- parsing blocks
---
-
-parseBlocks :: GenParser Char ParserState [Block]
-parseBlocks = spaces >> many block
-
-block :: GenParser Char ParserState Block
-block = choice [ hrule
-               , codeBlock
-               , header
-               , list
-               , blockQuote
-               , simpleTable
-               , commentBlock
-               , macro
-               , bibliographic
-               , para
-               , itemBlock
-               , unknownEnvironment
-               , ignore
-               , unknownCommand
-               ] <?> "block"
-
---
--- header blocks
---
-
-header :: GenParser Char ParserState Block
-header = section <|> chapter
-
-chapter :: GenParser Char ParserState Block
-chapter = try $ do
-  string "\\chapter"
-  result <- headerWithLevel 1
-  updateState $ \s -> s{ stateHasChapters = True }
-  return result
-
-section :: GenParser Char ParserState Block
-section = try $ do
-  char '\\'
-  subs <- many (try (string "sub"))
-  base <- try (string "section" >> return 1) <|> (string "paragraph" >> return 4)
-  st <- getState
-  let lev = if stateHasChapters st
-               then length subs + base + 1
-               else length subs + base
-  headerWithLevel lev
+addSuffix :: [Inline] -> [Citation] -> [Citation]
+addSuffix s ks@(_:_) =
+  let k  = last ks
+      s' = case s of
+                (Str (c:_):_)
+                  | not (isPunctuation c || isSpace c) -> Str "," : Space : s
+                _                                      -> s
+  in  init ks ++ [k {citationSuffix = citationSuffix k ++ s'}]
+addSuffix _ _ = []
 
-headerWithLevel :: Int -> GenParser Char ParserState Block
-headerWithLevel lev = try $ do
-  spaces
-  optional (char '*')
-  spaces
-  optional $ bracketedText '[' ']' -- alt title
-  spaces
+simpleCiteArgs :: LP [Citation]
+simpleCiteArgs = try $ do
+  first  <- optionMaybe $ toList <$> opt
+  second <- optionMaybe $ toList <$> opt
   char '{'
-  title' <- manyTill inline (char '}')
-  spaces
-  return $ Header lev (normalizeSpaces title')
-
---
--- hrule block
---
+  keys <- manyTill citationLabel (char '}')
+  let (pre, suf) = case (first  , second ) of
+        (Just s , Nothing) -> (mempty, s )
+        (Just s , Just t ) -> (s , t )
+        _                  -> (mempty, mempty)
+      conv k = Citation { citationId      = k
+                        , citationPrefix  = []
+                        , citationSuffix  = []
+                        , citationMode    = NormalCitation
+                        , citationHash    = 0
+                        , citationNoteNum = 0
+                        }
+  return $ addPrefix pre $ addSuffix suf $ map conv keys
 
-hrule :: GenParser Char st Block
-hrule = oneOfStrings [ "\\begin{center}\\rule{3in}{0.4pt}\\end{center}\n\n", 
-                       "\\newpage" ] >> spaces >> return HorizontalRule
+citationLabel :: LP String
+citationLabel  = trim <$>
+  (many1 (satisfy $ \c -> c /=',' && c /='}') <* optional (char ',') <* optional sp)
 
--- tables
+cites :: CitationMode -> Bool -> LP [Citation]
+cites mode multi = try $ do
+  cits <- if multi
+             then many1 simpleCiteArgs
+             else count 1 simpleCiteArgs
+  let (c:cs) = concat cits
+  return $ case mode of
+        AuthorInText   -> c {citationMode = mode} : cs
+        _              -> map (\a -> a {citationMode = mode}) (c:cs)
 
-simpleTable :: GenParser Char ParserState Block
-simpleTable = try $ do
-  string "\\begin"
-  spaces
-  string "{tabular}"
-  spaces
-  aligns <- parseAligns
-  let cols = length aligns
-  optional hline
-  header' <- option [] $ parseTableHeader cols
-  rows <- many (parseTableRow cols >>~ optional hline)
-  spaces
-  end "tabular"
-  spaces
-  let header'' = if null header'
-                    then replicate cols []
-                    else header'
-  return $ Table [] aligns (replicate cols 0) header'' rows
+citation :: String -> CitationMode -> Bool -> LP Inlines
+citation name mode multi = do
+  (c,raw) <- withRaw $ cites mode multi
+  return $ cite c (rawInline "latex" $ "\\" ++ name ++ raw)
+
+complexNatbibCitation :: CitationMode -> LP Inlines
+complexNatbibCitation mode = try $ do
+  let ils = (toList . trimInlines . mconcat) <$>
+              many (notFollowedBy (oneOf "\\};") >> inline)
+  let parseOne = try $ do
+                   skipSpaces
+                   pref  <- ils
+                   cit' <- inline -- expect a citation
+                   let citlist = toList cit'
+                   cits' <- case citlist of
+                                 [Cite cs _] -> return cs
+                                 _           -> mzero
+                   suff  <- ils
+                   skipSpaces
+                   optional $ char ';'
+                   return $ addPrefix pref $ addSuffix suff $ cits'
+  (c:cits, raw) <- withRaw $ grouped parseOne
+  return $ cite (c{ citationMode = mode }:cits)
+           (rawInline "latex" $ "\\citetext" ++ raw)
 
-hline :: GenParser Char st ()
-hline = try $ spaces >> string "\\hline" >> return ()
+-- tables
 
-parseAligns :: GenParser Char ParserState [Alignment]
+parseAligns :: LP [Alignment]
 parseAligns = try $ do
   char '{'
   optional $ char '|'
   let cAlign = char 'c' >> return AlignCenter
   let lAlign = char 'l' >> return AlignLeft
   let rAlign = char 'r' >> return AlignRight
-  let alignChar = cAlign <|> lAlign <|> rAlign
+  let alignChar = optional sp *> (cAlign <|> lAlign <|> rAlign)
   aligns' <- sepEndBy alignChar (optional $ char '|')
+  spaces
   char '}'
   spaces
   return aligns'
 
-parseTableHeader :: Int   -- ^ number of columns
-                 -> GenParser Char ParserState [TableCell]
-parseTableHeader cols = try $ do
-  cells' <- parseTableRow cols
-  hline
-  return cells'
+hline :: LP ()
+hline = () <$ (try $ spaces >> controlSeq "hline")
 
 parseTableRow :: Int  -- ^ number of columns
-              -> GenParser Char ParserState [TableCell]
+              -> LP [Blocks]
 parseTableRow cols = try $ do
-  let tableCellInline = notFollowedBy (char '&' <|>
-                          (try $ char '\\' >> char '\\')) >> inline
-  cells' <- sepBy (spaces >> liftM ((:[]) . Plain . normalizeSpaces)
-             (many tableCellInline)) (char '&')
+  let amp = try $ spaces *> string "&"
+  let tableCellInline = notFollowedBy (amp <|> controlSeq "\\") >> inline
+  cells' <- sepBy ((plain . trimInlines . mconcat) <$> many tableCellInline) amp
   guard $ length cells' == cols
   spaces
-  (try $ string "\\\\" >> spaces) <|>
-    (lookAhead (end "tabular") >> return ())
-  return cells'
-
---
--- code blocks
---
-
-codeBlock :: GenParser Char ParserState Block
-codeBlock = codeBlockWith "verbatim" <|> codeBlockWith "Verbatim" <|> codeBlockWith "lstlisting" <|> lhsCodeBlock
--- Note:  Verbatim is from fancyvrb.
-
-codeBlockWith :: String -> GenParser Char st Block
-codeBlockWith env = try $ do
-  string "\\begin"
-  spaces                      -- don't use begin function because it
-  string $ "{" ++ env ++ "}"  -- gobbles whitespace; we want to gobble
-  optional blanklines         -- blank lines, but not leading space
-  contents <- manyTill anyChar (try (string $ "\\end{" ++ env ++ "}"))
-  spaces
-  let classes = if env == "code" then ["haskell"] else []
-  return $ CodeBlock ("",classes,[]) (stripTrailingNewlines contents)
-
-lhsCodeBlock :: GenParser Char ParserState Block
-lhsCodeBlock = do
-  failUnlessLHS
-  (CodeBlock (_,_,_) cont) <- codeBlockWith "code"
-  return $ CodeBlock ("", ["sourceCode","literate","haskell"], []) cont
-
---
--- block quotes
---
-
-blockQuote :: GenParser Char ParserState Block
-blockQuote = (environment "quote" <|> environment "quotation") >>~ spaces >>= 
-             return . BlockQuote
-
---
--- list blocks
---
-
-list :: GenParser Char ParserState Block
-list = bulletList <|> orderedList <|> definitionList <?> "list"
-
-listItem :: GenParser Char ParserState ([Inline], [Block])
-listItem = try $ do
-  ("item", _, args) <- command
-  spaces
-  state <- getState
-  let oldParserContext = stateParserContext state
-  updateState (\s -> s {stateParserContext = ListItemState})
-  blocks <- many block
-  updateState (\s -> s {stateParserContext = oldParserContext})
-  opt <- case args of
-           ([x]) | "[" `isPrefixOf` x && "]" `isSuffixOf` x -> 
-                       parseFromString (many inline) $ tail $ init x
-           _        -> return []
-  return (opt, blocks)
-
-orderedList :: GenParser Char ParserState Block
-orderedList = try $ do
-  string "\\begin"
-  spaces
-  string "{enumerate}"
-  spaces
-  (_, style, delim) <- option (1, DefaultStyle, DefaultDelim) $
-                              try $ do failIfStrict
-                                       char '['
-                                       res <- anyOrderedListMarker
-                                       char ']'
-                                       return res
-  spaces
-  option "" $ try $ do string "\\setlength{\\itemindent}"
-                       char '{'
-                       manyTill anyChar (char '}')
-  spaces
-  start <- option 1 $ try $ do failIfStrict
-                               string "\\setcounter{enum"
-                               many1 (oneOf "iv")
-                               string "}{"
-                               num <- many1 digit
-                               char '}' 
-                               spaces
-                               return $ (read num) + 1
-  items <- many listItem
-  end "enumerate"
-  spaces
-  return $ OrderedList (start, style, delim) $ map snd items
-
-bulletList :: GenParser Char ParserState Block
-bulletList = try $ do
-  begin "itemize"
-  items <- many listItem
-  end "itemize"
-  spaces
-  return (BulletList $ map snd items)
-
-definitionList :: GenParser Char ParserState Block
-definitionList = try $ do
-  begin "description"
-  items <- many listItem
-  end "description"
-  spaces
-  return $ DefinitionList $ map (\(t,d) -> (t,[d])) items
-
---
--- paragraph block
---
-
-para :: GenParser Char ParserState Block
-para = do
-  res <- many1 inline
-  spaces
-  return $ if null (filter (`notElem` [Str "", Space]) res)
-              then Null
-              else Para $ normalizeSpaces res
-
---
--- title authors date
---
-
-bibliographic :: GenParser Char ParserState Block
-bibliographic = choice [ maketitle, title, subtitle, authors, date ]
-
-maketitle :: GenParser Char st Block
-maketitle = try (string "\\maketitle") >> spaces >> return Null
-
-title :: GenParser Char ParserState Block
-title = try $ do
-  string "\\title{"
-  tit <- manyTill inline (char '}')
-  spaces
-  updateState (\state -> state { stateTitle = tit })
-  return Null
-
-subtitle :: GenParser Char ParserState Block
-subtitle = try $ do
-  string "\\subtitle{"
-  tit <- manyTill inline (char '}')
-  spaces
-  updateState (\state -> state { stateTitle = stateTitle state ++
-                                   Str ":" : LineBreak : tit })
-  return Null
-
-authors :: GenParser Char ParserState Block
-authors = try $ do
-  string "\\author{"
-  let andsep = try $ string "\\and" >> notFollowedBy letter >>
-                     spaces >> return '&'
-  raw <- sepBy (many $ notFollowedBy (char '}' <|> andsep) >> inline) andsep
-  let authors' = map normalizeSpaces raw
-  char '}'
+  optional $ controlSeq "\\"
   spaces
-  updateState (\s -> s { stateAuthors = authors' })
-  return Null
-
-date :: GenParser Char ParserState Block
-date = try $ do
-  string "\\date{"
-  date' <- manyTill inline (char '}')
-  spaces
-  updateState (\state -> state { stateDate = normalizeSpaces date' })
-  return Null
-
---
--- item block
--- for use in unknown environments that aren't being parsed as raw latex
---
-
--- this forces items to be parsed in different blocks
-itemBlock :: GenParser Char ParserState Block
-itemBlock = try $ do
-  ("item", _, args) <- command
-  state <- getState
-  if stateParserContext state == ListItemState
-     then fail "item should be handled by list block"
-     else if null args 
-             then return Null
-             else return $ Plain [Str (stripFirstAndLast (head args))]
-
---
--- raw LaTeX 
---
-
--- | Parse any LaTeX environment and return a Para block containing
--- the whole literal environment as raw TeX.
-rawLaTeXEnvironment :: GenParser Char st Block
-rawLaTeXEnvironment = do
-  contents <- rawLaTeXEnvironment'
-  spaces
-  return $ RawBlock "latex" contents
+  return cells'
 
--- | Parse any LaTeX environment and return a string containing
--- the whole literal environment as raw TeX.
-rawLaTeXEnvironment' :: GenParser Char st String 
-rawLaTeXEnvironment' = try $ do
-  string "\\begin"
-  spaces
-  char '{'
-  name <- many1 letter
-  star <- option "" (string "*") -- for starred variants
-  let name' = name ++ star
-  char '}'
-  args <- option [] commandArgs
-  let argStr = concat args
-  contents <- manyTill (choice [ (many1 (noneOf "\\")), 
-                                 rawLaTeXEnvironment',
-                                 string "\\" ]) 
-                       (end name')
-  return $ "\\begin{" ++ name' ++ "}" ++ argStr ++ 
-                 concat contents ++ "\\end{" ++ name' ++ "}"
-
-unknownEnvironment :: GenParser Char ParserState Block
-unknownEnvironment = try $ do
-  state <- getState
-  result <- if stateParseRaw state -- check whether we should include raw TeX 
-               then rawLaTeXEnvironment -- if so, get whole raw environment
-               else anyEnvironment      -- otherwise just the contents
-  return result
-
--- \ignore{} is used conventionally in literate haskell for definitions
--- that are to be processed by the compiler but not printed.
-ignore :: GenParser Char ParserState Block
-ignore = try $ do
-  ("ignore", _, _) <- command
-  spaces
-  return Null
-
-demacro :: (String, String, [String]) -> GenParser Char ParserState Inline
-demacro (n,st,args) = try $ do
-  let raw = "\\" ++ n ++ st ++ concat args
-  s' <- applyMacros' raw
-  if raw == s'
-     then return $ RawInline "latex" raw
-     else do
-       inp <- getInput
-       setInput $ s' ++ inp
-       return $ Str ""
-
-unknownCommand :: GenParser Char ParserState Block
-unknownCommand = try $ do
-  spaces
-  notFollowedBy' $ oneOfStrings ["\\begin","\\end","\\item"]
-  state <- getState
-  when (stateParserContext state == ListItemState) $
-     notFollowedBy' (string "\\item")
-  if stateParseRaw state
-     then command >>= demacro >>= return . Plain . (:[])
-     else do
-        (name, _, args) <- command
-        spaces
-        unless (name `elem` commandsToIgnore) $ do
-        -- put arguments back in input to be parsed
-          inp <- getInput
-          setInput $ intercalate " " args ++ inp
-        return Null
-
-commandsToIgnore :: [String]
-commandsToIgnore = ["special","pdfannot","pdfstringdef", "index","bibliography"]
-
-skipChar :: GenParser Char ParserState Block
-skipChar = do
-  satisfy (/='\\') <|>
-    (notFollowedBy' (try $
-                     string "\\begin" >> spaces >> string "{document}") >>
-     anyChar)
+simpTable :: LP Blocks
+simpTable = try $ do
   spaces
-  return Null
-
-commentBlock :: GenParser Char st Block
-commentBlock = many1 (comment >> spaces) >> return Null
-
--- 
--- inline
---
-
-inline :: GenParser Char ParserState Inline
-inline =  choice [ str
-                 , endline
-                 , whitespace
-                 , quoted
-                 , apostrophe
-                 , strong
-                 , math
-                 , ellipses
-                 , emDash
-                 , enDash
-                 , hyphen
-                 , emph
-                 , strikeout
-                 , superscript
-                 , subscript
-                 , code
-                 , url
-                 , link
-                 , image
-                 , footnote
-                 , linebreak
-                 , accentedChar
-                 , nonbreakingSpace
-                 , cite
-                 , specialChar
-                 , ensureMath
-                 , rawLaTeXInline'
-                 , escapedChar
-                 , unescapedChar
-                 , comment
-                 ] <?> "inline"
-
-
--- latex comment
-comment :: GenParser Char st Inline
-comment = try $ char '%' >> manyTill anyChar newline >> spaces >> return (Str "")
-
-accentedChar :: GenParser Char st Inline
-accentedChar = normalAccentedChar <|> specialAccentedChar
-
-normalAccentedChar :: GenParser Char st Inline
-normalAccentedChar = try $ do
-  char '\\'
-  accent <- oneOf "'`^\"~"
-  character <- (try $ char '{' >> letter >>~ char '}') <|> letter
-  let table = fromMaybe [] $ lookup character accentTable 
-  let result = case lookup accent table of
-                 Just num  -> chr num
-                 Nothing   -> '?'
-  return $ Str [result]
-
--- an association list of letters and association list of accents
--- and decimal character numbers.
-accentTable :: [(Char, [(Char, Int)])]
-accentTable = 
-  [ ('A', [('`', 192), ('\'', 193), ('^', 194), ('~', 195), ('"', 196)]),
-    ('E', [('`', 200), ('\'', 201), ('^', 202), ('"', 203)]),
-    ('I', [('`', 204), ('\'', 205), ('^', 206), ('"', 207)]),
-    ('N', [('~', 209)]),
-    ('O', [('`', 210), ('\'', 211), ('^', 212), ('~', 213), ('"', 214)]),
-    ('U', [('`', 217), ('\'', 218), ('^', 219), ('"', 220)]),
-    ('a', [('`', 224), ('\'', 225), ('^', 227), ('"', 228)]),
-    ('e', [('`', 232), ('\'', 233), ('^', 234), ('"', 235)]),
-    ('i', [('`', 236), ('\'', 237), ('^', 238), ('"', 239)]),
-    ('n', [('~', 241)]),
-    ('o', [('`', 242), ('\'', 243), ('^', 244), ('~', 245), ('"', 246)]),
-    ('u', [('`', 249), ('\'', 250), ('^', 251), ('"', 252)]) ]
-
-specialAccentedChar :: GenParser Char st Inline
-specialAccentedChar = choice [ ccedil, aring, iuml, szlig, aelig, lslash,
-                               oslash, pound, euro, copyright, sect ]
-
-ccedil :: GenParser Char st Inline
-ccedil = try $ do
-  char '\\'
-  letter' <- oneOfStrings ["cc", "cC"]
-  let num = if letter' == "cc" then 231 else 199
-  return $ Str [chr num]
-
-aring :: GenParser Char st Inline
-aring = try $ do
-  char '\\'
-  letter' <- oneOfStrings ["aa", "AA"]
-  let num = if letter' == "aa" then 229 else 197
-  return $ Str [chr num]
-
-iuml :: GenParser Char st Inline
-iuml = try (string "\\\"") >> oneOfStrings ["\\i", "{\\i}"] >> 
-       return (Str [chr 239])
-
-szlig :: GenParser Char st Inline
-szlig = try (string "\\ss") >> return (Str [chr 223])
-
-oslash :: GenParser Char st Inline
-oslash = try $ do
-  char '\\'
-  letter' <- choice [char 'o', char 'O']
-  let num = if letter' == 'o' then 248 else 216
-  return $ Str [chr num]
-
-lslash :: GenParser Char st Inline
-lslash = try $ do
-  cmd <- oneOfStrings ["{\\L}","{\\l}","\\L ","\\l "]
-  return $ if 'l' `elem` cmd
-              then Str "\x142"
-              else Str "\x141"
-
-aelig :: GenParser Char st Inline
-aelig = try $ do
-  char '\\'
-  letter' <- oneOfStrings ["ae", "AE"]
-  let num = if letter' == "ae" then 230 else 198
-  return $ Str [chr num]
-
-pound :: GenParser Char st Inline
-pound = try (string "\\pounds") >> return (Str [chr 163])
-
-euro :: GenParser Char st Inline
-euro = try (string "\\euro") >> return (Str [chr 8364])
-
-copyright :: GenParser Char st Inline
-copyright = try (string "\\copyright") >> return (Str [chr 169])
-
-sect :: GenParser Char st Inline
-sect = try (string "\\S") >> return (Str [chr 167])
-
-escapedChar :: GenParser Char st Inline
-escapedChar = do
-  result <- escaped (oneOf specialChars)
-  return $ if result == Str "\n" then Str " " else result
-
--- nonescaped special characters
-unescapedChar :: GenParser Char st Inline
-unescapedChar = oneOf "`$^&_#{}[]|<>" >>= return . (\c -> Str [c])
-
-specialChar :: GenParser Char st Inline
-specialChar = choice [ spacer, interwordSpace,
-                       backslash, tilde, caret,
-                       bar, lt, gt, doubleQuote ]
-
-spacer :: GenParser Char st Inline
-spacer = try (string "\\,") >> return (Str "")
-
-interwordSpace :: GenParser Char st Inline
-interwordSpace = try (string "\\ ") >> return (Str "\160")
-
-backslash :: GenParser Char st Inline
-backslash = try (string "\\textbackslash") >> optional (try $ string "{}") >> return (Str "\\")
-
-tilde :: GenParser Char st Inline
-tilde = try (string "\\ensuremath{\\sim}") >> return (Str "~")
-
-caret :: GenParser Char st Inline
-caret = try (string "\\^{}") >> return (Str "^")
-
-bar :: GenParser Char st Inline
-bar = try (string "\\textbar") >> optional (try $ string "{}") >> return (Str "\\")
-
-lt :: GenParser Char st Inline
-lt = try (string "\\textless") >> optional (try $ string "{}") >> return (Str "<")
-
-gt :: GenParser Char st Inline
-gt = try (string "\\textgreater") >> optional (try $ string "{}") >> return (Str ">")
-
-doubleQuote :: GenParser Char st Inline
-doubleQuote = char '"' >> return (Str "\"")
-
-code :: GenParser Char ParserState Inline
-code = code1 <|> code2 <|> code3 <|> lhsInlineCode
-
-code1 :: GenParser Char st Inline
-code1 = try $ do 
-  string "\\verb"
-  marker <- anyChar
-  result <- manyTill anyChar (char marker)
-  return $ Code nullAttr $ removeLeadingTrailingSpace result
-
-code2 :: GenParser Char st Inline
-code2 = try $ do
-  string "\\texttt{"
-  result <- manyTill (noneOf "\\\n~$%^&{}") (char '}')
-  return $ Code nullAttr result
-
-code3 :: GenParser Char st Inline
-code3 = try $ do 
-  string "\\lstinline"
-  marker <- anyChar
-  result <- manyTill anyChar (char marker)
-  return $ Code nullAttr $ removeLeadingTrailingSpace result
-
-lhsInlineCode :: GenParser Char ParserState Inline
-lhsInlineCode = try $ do
-  failUnlessLHS
-  char '|'
-  result <- manyTill (noneOf "|\n") (char '|')
-  return $ Code ("",["haskell"],[]) result
-
-emph :: GenParser Char ParserState Inline
-emph = try $ oneOfStrings [ "\\emph{", "\\textit{" ] >>
-             manyTill inline (char '}') >>= return . Emph
-
-strikeout :: GenParser Char ParserState Inline
-strikeout = try $ string "\\sout{" >> manyTill inline (char '}') >>=
-                  return . Strikeout
-
-superscript :: GenParser Char ParserState Inline
-superscript = try $ string "\\textsuperscript{" >> 
-                    manyTill inline (char '}') >>= return . Superscript
-
--- note: \textsubscript isn't a standard latex command, but we use
--- a defined version in pandoc.
-subscript :: GenParser Char ParserState Inline
-subscript = try $ string "\\textsubscript{" >> manyTill inline (char '}') >>=
-                  return . Subscript
-
-apostrophe :: GenParser Char ParserState Inline
-apostrophe = char '\'' >> return Apostrophe
-
-quoted :: GenParser Char ParserState Inline
-quoted = doubleQuoted <|> singleQuoted
-
-singleQuoted :: GenParser Char ParserState Inline
-singleQuoted = enclosed singleQuoteStart singleQuoteEnd inline >>=
-               return . Quoted SingleQuote . normalizeSpaces
-
-doubleQuoted :: GenParser Char ParserState Inline
-doubleQuoted = enclosed doubleQuoteStart doubleQuoteEnd inline >>=
-               return . Quoted DoubleQuote . normalizeSpaces
-
-singleQuoteStart :: GenParser Char st Char
-singleQuoteStart = char '`'
-
-singleQuoteEnd :: GenParser Char st ()
-singleQuoteEnd = try $ char '\'' >> notFollowedBy alphaNum
-
-doubleQuoteStart :: CharParser st String
-doubleQuoteStart = string "``"
-
-doubleQuoteEnd :: CharParser st String
-doubleQuoteEnd = try $ string "''"
-
-ellipses :: GenParser Char st Inline
-ellipses = try $ do
-  char '\\'
-  optional $ char 'l'
-  string "dots"
-  optional $ try $ string "{}"
-  return Ellipses
-
-enDash :: GenParser Char st Inline
-enDash = try (string "--") >> return EnDash
-
-emDash :: GenParser Char st Inline
-emDash = try (string "---") >> return EmDash
-
-hyphen :: GenParser Char st Inline
-hyphen = char '-' >> return (Str "-")
-
-strong :: GenParser Char ParserState Inline
-strong = try (string "\\textbf{") >> manyTill inline (char '}') >>=
-         return . Strong
-
-whitespace :: GenParser Char st Inline
-whitespace = many1 (oneOf " \t") >> return Space
-
-nonbreakingSpace :: GenParser Char st Inline
-nonbreakingSpace = char '~' >> return (Str "\160")
-
--- hard line break
-linebreak :: GenParser Char st Inline
-linebreak = try $ do
-  string "\\\\"
-  optional $ bracketedText '[' ']'  -- e.g. \\[10pt]
+  aligns <- parseAligns
+  let cols = length aligns
+  optional hline
+  header' <- option [] $ try (parseTableRow cols <* hline)
+  rows <- many (parseTableRow cols <* optional hline)
   spaces
-  return LineBreak
-
-str :: GenParser Char st Inline
-str = many1 (noneOf specialChars) >>= return . Str
-
--- endline internal to paragraph
-endline :: GenParser Char st Inline
-endline = try $ newline >> notFollowedBy blankline >> return Space
-
--- math
-math :: GenParser Char ParserState Inline
-math =   (math3 >>= applyMacros' >>= return . Math DisplayMath)
-     <|> (math1 >>= applyMacros' >>= return . Math InlineMath)
-     <|> (math2 >>= applyMacros' >>= return . Math InlineMath)
-     <|> (math4 >>= applyMacros' >>= return . Math DisplayMath)
-     <|> (math5 >>= applyMacros' >>= return . Math DisplayMath)
-     <|> (math6 >>= applyMacros' >>= return . Math DisplayMath)
-     <?> "math"
-
-math1 :: GenParser Char st String 
-math1 = try $ char '$' >> manyTill anyChar (char '$')
-
-math2 :: GenParser Char st String
-math2 = try $ string "\\(" >> manyTill anyChar (try $ string "\\)")
-
-math3 :: GenParser Char st String 
-math3 = try $ char '$' >> math1 >>~ char '$'
-
-math4 :: GenParser Char st String
-math4 = try $ do
-  name <- begin "displaymath" <|> begin "equation" <|> begin "equation*" <|>
-           begin "gather" <|> begin "gather*" <|> begin "gathered" <|>
-             begin "multline" <|> begin "multline*"
-  manyTill anyChar (end name)
-
-math5 :: GenParser Char st String
-math5 = try $ (string "\\[") >> spaces >> manyTill anyChar (try $ string "\\]")
-
-math6 :: GenParser Char st String
-math6 = try $ do
-  name <- begin "eqnarray" <|> begin "eqnarray*" <|> begin "align" <|>
-           begin "align*" <|> begin "alignat" <|> begin "alignat*" <|>
-             begin "split" <|> begin "aligned" <|> begin "alignedat"
-  res <- manyTill anyChar (end name)
-  return $ filter (/= '&') res  -- remove alignment codes
-
-ensureMath :: GenParser Char st Inline
-ensureMath = try $ do
-  (n, _, args) <- command
-  guard $ n == "ensuremath" && not (null args)
-  return $ Math InlineMath $ tail $ init $ head args
-
---
--- links and images
---
-
-url :: GenParser Char ParserState Inline
-url = try $ do
-  string "\\url"
-  url' <- charsInBalanced '{' '}'
-  return $ Link [Code ("",["url"],[]) url'] (escapeURI url', "")
-
-link :: GenParser Char ParserState Inline
-link = try $ do
-  string "\\href{"
-  url' <- manyTill anyChar (char '}')
-  char '{'
-  label' <- manyTill inline (char '}') 
-  return $ Link (normalizeSpaces label') (escapeURI url', "")
-
-image :: GenParser Char ParserState Inline
-image = try $ do
-  ("includegraphics", _, args) <- command
-  let args' = filter isArg args -- filter out options
-  let (src,tit) = case args' of
-                       []    -> ("", "")
-                       (x:_) -> (stripFirstAndLast x, "")
-  return $ Image [Str "image"] (escapeURI src, tit)
-
-footnote :: GenParser Char ParserState Inline
-footnote = try $ do
-  (name, _, (contents:[])) <- command
-  if ((name == "footnote") || (name == "thanks"))
-     then string ""
-     else fail "not a footnote or thanks command"
-  let contents' = stripFirstAndLast contents
-  -- parse the extracted block, which may contain various block elements:
-  rest <- getInput
-  setInput $ contents'
-  blocks <- parseBlocks
-  setInput rest
-  return $ Note blocks
-
--- | citations
-cite :: GenParser Char ParserState Inline
-cite = simpleCite <|> complexNatbibCites
-
-simpleCiteArgs :: GenParser Char ParserState [Citation]
-simpleCiteArgs = try $ do
-  first  <- optionMaybe $ (char '[') >> manyTill inline (char ']')
-  second <- optionMaybe $ (char '[') >> manyTill inline (char ']')
-  char '{'
-  keys <- many1Till citationLabel (char '}')
-  let (pre, suf) = case (first  , second ) of
-        (Just s , Nothing) -> ([], s )
-        (Just s , Just t ) -> (s , t )
-        _                  -> ([], [])
-      conv k = Citation { citationId      = k
-                        , citationPrefix  = []
-                        , citationSuffix  = []
-                        , citationMode    = NormalCitation
-                        , citationHash    = 0
-                        , citationNoteNum = 0
-                        }
-  return $ addPrefix pre $ addSuffix suf $ map conv keys
-
-
-simpleCite :: GenParser Char ParserState Inline
-simpleCite = try $ do
-  char '\\'
-  let biblatex     = [a ++ "cite" | a <- ["auto", "foot", "paren", "super", ""]]
-                     ++ ["footcitetext"]
-      normal       = ["cite" ++ a ++ b | a <- ["al", ""], b <- ["p", "p*", ""]]
-                     ++ biblatex
-      supress      = ["citeyearpar", "citeyear", "autocite*", "cite*", "parencite*"]
-      intext       = ["textcite"] ++ ["cite" ++ a ++ b | a <- ["al", ""], b <- ["t", "t*"]]
-      mintext      = ["textcites"]
-      mnormal      = map (++ "s") biblatex
-      cmdend       = notFollowedBy (letter <|> char '*')
-      capit []     = []
-      capit (x:xs) = toUpper x : xs
-      addUpper xs  = xs ++ map capit xs
-      toparser l t = try $ oneOfStrings (addUpper l) >> cmdend >> return t
-  (mode, multi) <-  toparser normal  (NormalCitation, False)
-                <|> toparser supress (SuppressAuthor, False)
-                <|> toparser intext  (AuthorInText  , False)
-                <|> toparser mnormal (NormalCitation, True )
-                <|> toparser mintext (AuthorInText  , True )
-  cits <- if multi then
-            many1 simpleCiteArgs
-          else
-            simpleCiteArgs >>= \c -> return [c]
-  let (c:cs) = concat cits
-      cits'  = case mode of
-        AuthorInText   -> c {citationMode = mode} : cs
-        _              -> map (\a -> a {citationMode = mode}) (c:cs)
-  return $ Cite cits' []
-
-complexNatbibCites :: GenParser Char ParserState Inline
-complexNatbibCites = complexNatbibTextual <|> complexNatbibParenthetical
-
-complexNatbibTextual :: GenParser Char ParserState Inline
-complexNatbibTextual = try $ do
-  string "\\citeauthor{"
-  manyTill (noneOf "}") (char '}')
-  skipSpaces
-  Cite (c:cs) _ <- complexNatbibParenthetical
-  return $ Cite (c {citationMode = AuthorInText} : cs) []
-
-
-complexNatbibParenthetical :: GenParser Char ParserState Inline
-complexNatbibParenthetical = try $ do
-  string "\\citetext{"
-  cits <- many1Till parseOne (char '}')
-  return $ Cite (concat cits) []
-  where
-    parseOne = do
-                 skipSpaces
-                 pref           <- many (notFollowedBy (oneOf "\\}") >> inline)
-                 (Cite cites _) <- simpleCite
-                 suff           <- many (notFollowedBy (oneOf "\\};") >> inline)
-                 skipSpaces
-                 optional $ char ';'
-                 return $ addPrefix pref $ addSuffix suff $ cites
-
-addPrefix :: [Inline] -> [Citation] -> [Citation]
-addPrefix p (k:ks)   = k {citationPrefix = p ++ citationPrefix k} : ks
-addPrefix _ _ = []
-
-addSuffix :: [Inline] -> [Citation] -> [Citation]
-addSuffix s ks@(_:_) = let k = last ks
-                        in init ks ++ [k {citationSuffix = citationSuffix k ++ s}]
-addSuffix _ _ = []
-
-citationLabel :: GenParser Char ParserState String
-citationLabel  = do
-  res <- many1 $ noneOf ",}"
-  optional $ char ','
-  return $ removeLeadingTrailingSpace res
-
--- | Parse any LaTeX inline command and return it in a raw TeX inline element.
-rawLaTeXInline' :: GenParser Char ParserState Inline
-rawLaTeXInline' = do
-  notFollowedBy' $ oneOfStrings ["\\begin", "\\end", "\\item", "\\ignore",
-                                 "\\section"]
-  rawLaTeXInline
+  let header'' = if null header'
+                    then replicate cols mempty
+                    else header'
+  lookAhead $ controlSeq "end" -- make sure we're at end
+  return $ table mempty (zip aligns (repeat 0)) header'' rows
 
--- | Parse any LaTeX command and return it in a raw TeX inline element.
-rawLaTeXInline :: GenParser Char ParserState Inline
-rawLaTeXInline = try $ do
-  state <- getState
-  if stateParseRaw state
-     then command >>= demacro
-     else do
-        (name,st,args) <- command
-        x <- demacro (name,st,args)
-        unless (x == Str "" || name `elem` commandsToIgnore) $ do
-          inp <- getInput
-          setInput $ intercalate " " args ++ inp
-        return $ Str ""
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index 53531dc1a..666265935 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -39,12 +39,12 @@ import Text.Pandoc.Definition
 import Text.Pandoc.Generic
 import Text.Pandoc.Shared
 import Text.Pandoc.Parsing
-import Text.Pandoc.Readers.LaTeX ( rawLaTeXInline, rawLaTeXEnvironment' )
+import Text.Pandoc.Readers.LaTeX ( rawLaTeXInline, rawLaTeXBlock )
 import Text.Pandoc.Readers.HTML ( htmlTag, htmlInBalanced, isInlineTag, isBlockTag,
                                   isTextTag, isCommentTag )
-import Text.Pandoc.CharacterReferences ( decodeCharacterReferences )
+import Text.Pandoc.XML ( fromEntities )
 import Text.ParserCombinators.Parsec
-import Control.Monad (when, liftM, guard)
+import Control.Monad (when, liftM, guard, mzero)
 import Text.HTML.TagSoup
 import Text.HTML.TagSoup.Match (tagOpen)
 
@@ -108,6 +108,11 @@ atMostSpaces :: Int -> GenParser Char ParserState ()
 atMostSpaces 0 = notFollowedBy (char ' ')
 atMostSpaces n = (char ' ' >> atMostSpaces (n-1)) <|> return ()
 
+litChar :: GenParser Char ParserState Char
+litChar = escapedChar'
+       <|> noneOf "\n"
+       <|> (newline >> notFollowedBy blankline >> return ' ')
+
 -- | Fail unless we're at beginning of a line.
 failUnlessBeginningOfLine :: GenParser tok st () 
 failUnlessBeginningOfLine = do
@@ -212,16 +217,15 @@ referenceKey = try $ do
   lab <- reference
   char ':'
   skipSpaces >> optional newline >> skipSpaces >> notFollowedBy (char '[')
-  let nl = char '\n' >> notFollowedBy blankline >> return ' '
   let sourceURL = liftM unwords $ many $ try $ do
                     notFollowedBy' referenceTitle
                     skipMany spaceChar
-                    optional nl
+                    optional $ newline >> notFollowedBy blankline
                     skipMany spaceChar
                     notFollowedBy' reference
-                    many1 (satisfy $ not . isBlank)
+                    many1 $ escapedChar' <|> satisfy (not . isBlank)
   let betweenAngles = try $ char '<' >>
-                            manyTill (noneOf ">\n" <|> nl) (char '>')
+                       manyTill (escapedChar' <|> litChar) (char '>')
   src <- try betweenAngles <|> sourceURL
   tit <- option "" referenceTitle
   blanklines
@@ -233,14 +237,14 @@ referenceKey = try $ do
   -- return blanks so line count isn't affected
   return $ replicate (sourceLine endPos - sourceLine startPos) '\n'
 
-referenceTitle :: GenParser Char st String
-referenceTitle = try $ do 
+referenceTitle :: GenParser Char ParserState String
+referenceTitle = try $ do
   skipSpaces >> optional newline >> skipSpaces
-  tit <-    (charsInBalanced '(' ')' >>= return . unwords . words)
+  tit <-    (charsInBalanced '(' ')' litChar >>= return . unwords . words)
         <|> do delim <- char '\'' <|> char '"'
-               manyTill anyChar (try (char delim >> skipSpaces >>
+               manyTill litChar (try (char delim >> skipSpaces >>
                                       notFollowedBy (noneOf ")\n")))
-  return $ decodeCharacterReferences tit
+  return $ fromEntities tit
 
 noteMarker :: GenParser Char ParserState [Char]
 noteMarker = string "[^" >> many1Till (satisfy $ not . isBlank) (char ']')
@@ -367,32 +371,37 @@ hrule = try $ do
 indentedLine :: GenParser Char ParserState [Char]
 indentedLine = indentSpaces >> manyTill anyChar newline >>= return . (++ "\n")
 
-codeBlockDelimiter :: Maybe Int
-                   -> GenParser Char st (Int, ([Char], [[Char]], [([Char], [Char])]))
-codeBlockDelimiter len = try $ do
+blockDelimiter :: (Char -> Bool)
+               -> Maybe Int
+               -> GenParser Char st (Int, (String, [String], [(String, String)]), Char)
+blockDelimiter f len = try $ do
+  c <- lookAhead (satisfy f)
   size <- case len of
-              Just l  -> count l (char '~') >> many (char '~') >> return l
-              Nothing -> count 3 (char '~') >> many (char '~') >>= 
-                         return . (+ 3) . length 
+              Just l  -> count l (char c) >> many (char c) >> return l
+              Nothing -> count 3 (char c) >> many (char c) >>=
+                         return . (+ 3) . length
   many spaceChar
-  attr <- option ([],[],[]) attributes
+  attr <- option ([],[],[])
+          $ attributes                                     -- ~~~ {.ruby}
+         <|> (many1 alphaNum >>= \x -> return ([],[x],[])) -- github variant ```ruby
   blankline
-  return (size, attr) 
+  return (size, attr, c)
 
 attributes :: GenParser Char st ([Char], [[Char]], [([Char], [Char])])
 attributes = try $ do
   char '{'
-  many spaceChar
-  attrs <- many (attribute >>~ many spaceChar)
+  spnl
+  attrs <- many (attribute >>~ spnl)
   char '}'
   let (ids, classes, keyvals) = unzip3 attrs
-  let id' = if null ids then "" else head ids
-  return (id', concat classes, concat keyvals)  
+  let firstNonNull [] = ""
+      firstNonNull (x:xs) | not (null x) = x
+                          | otherwise    = firstNonNull xs
+  return (firstNonNull $ reverse ids, concat classes, concat keyvals)
 
 attribute :: GenParser Char st ([Char], [[Char]], [([Char], [Char])])
 attribute = identifierAttr <|> classAttr <|> keyValAttr
 
-
 identifier :: GenParser Char st [Char]
 identifier = do
   first <- letter
@@ -415,14 +424,15 @@ keyValAttr :: GenParser Char st ([Char], [a], [([Char], [Char])])
 keyValAttr = try $ do
   key <- identifier
   char '='
-  char '"'
-  val <- manyTill (satisfy (/='\n')) (char '"')
+  val <- enclosed (char '"') (char '"') anyChar
+     <|> enclosed (char '\'') (char '\'') anyChar
+     <|> many nonspaceChar
   return ("",[],[(key,val)])
 
 codeBlockDelimited :: GenParser Char st Block
 codeBlockDelimited = try $ do
-  (size, attr) <- codeBlockDelimiter Nothing
-  contents <- manyTill anyLine (codeBlockDelimiter (Just size))
+  (size, attr, c) <- blockDelimiter (\c -> c == '~' || c == '`') Nothing
+  contents <- manyTill anyLine (blockDelimiter (== c) (Just size))
   blanklines
   return $ CodeBlock attr $ intercalate "\n" contents
 
@@ -552,9 +562,9 @@ listLine = try $ do
   return $ concat chunks ++ "\n"
 
 -- parse raw text for one list item, excluding start marker and continuations
-rawListItem :: GenParser Char ParserState [Char]
-rawListItem = try $ do
-  listStart
+rawListItem :: GenParser Char ParserState a -> GenParser Char ParserState [Char]
+rawListItem start = try $ do
+  start
   result <- many1 listLine
   blanks <- many blankline
   return $ concat result ++ blanks
@@ -577,9 +587,9 @@ listContinuationLine = try $ do
   result <- manyTill anyChar newline
   return $ result ++ "\n"
 
-listItem :: GenParser Char ParserState [Block]
-listItem = try $ do 
-  first <- rawListItem
+listItem :: GenParser Char ParserState a -> GenParser Char ParserState [Block]
+listItem start = try $ do 
+  first <- rawListItem start
   continuations <- many listContinuation
   -- parsing with ListItemState forces markers at beginning of lines to
   -- count as list item markers, even if not separated by blank space.
@@ -596,13 +606,15 @@ listItem = try $ do
 orderedList :: GenParser Char ParserState Block
 orderedList = try $ do
   (start, style, delim) <- lookAhead anyOrderedListStart
-  items <- many1 listItem
+  items <- many1 $ listItem $ try $
+             do optional newline -- if preceded by a Plain block in a list context
+                skipNonindentSpaces
+                orderedListMarker style delim
   return $ OrderedList (start, style, delim) $ compactify items
 
 bulletList :: GenParser Char ParserState Block
-bulletList = try $ do
-  lookAhead bulletListStart
-  many1 listItem >>= return . BulletList . compactify
+bulletList =
+  many1 (listItem bulletListStart) >>= return . BulletList . compactify
 
 -- definition lists
 
@@ -718,8 +730,8 @@ rawVerbatimBlock = try $ do
 rawTeXBlock :: GenParser Char ParserState Block
 rawTeXBlock = do
   failIfStrict
-  result <- liftM (RawBlock "latex") rawLaTeXEnvironment'
-          <|> liftM (RawBlock "context") rawConTeXtEnvironment'
+  result <- liftM (RawBlock "latex") rawLaTeXBlock
+          <|> liftM (RawBlock "context") rawConTeXtEnvironment
   spaces
   return result
 
@@ -767,7 +779,7 @@ simpleTableHeader headless = try $ do
   let (lengths, lines') = unzip dashes
   let indices  = scanl (+) (length initSp) lines'
   -- If no header, calculate alignment on basis of first row of text
-  rawHeads <- liftM (tail . splitByIndices (init indices)) $
+  rawHeads <- liftM (tail . splitStringByIndices (init indices)) $
               if headless
                  then lookAhead anyLine 
                  else return rawContent
@@ -794,7 +806,7 @@ rawTableLine indices = do
   notFollowedBy' (blanklines <|> tableFooter)
   line <- many1Till anyChar newline
   return $ map removeLeadingTrailingSpace $ tail $ 
-           splitByIndices (init indices) line
+           splitStringByIndices (init indices) line
 
 -- Parse a table line and return a list of lists of blocks (columns).
 tableLine :: [Int]
@@ -844,7 +856,7 @@ multilineTableHeader :: Bool -- ^ Headerless table
 multilineTableHeader headless = try $ do
   if headless
      then return '\n'
-     else tableSep
+     else tableSep >>~ notFollowedBy blankline
   rawContent  <- if headless
                     then return $ repeat "" 
                     else many1
@@ -856,9 +868,9 @@ multilineTableHeader headless = try $ do
   let indices  = scanl (+) (length initSp) lines'
   rawHeadsList <- if headless
                      then liftM (map (:[]) . tail .
-                              splitByIndices (init indices)) $ lookAhead anyLine
+                              splitStringByIndices (init indices)) $ lookAhead anyLine
                      else return $ transpose $ map 
-                           (\ln -> tail $ splitByIndices (init indices) ln)
+                           (\ln -> tail $ splitStringByIndices (init indices) ln)
                            rawContent
   let aligns   = zipWith alignType rawHeadsList lengths
   let rawHeads = if headless
@@ -922,30 +934,25 @@ inlineParsers = [ whitespace
                 , inlineNote  -- after superscript because of ^[link](/foo)^
                 , autoLink
                 , rawHtmlInline
-                , rawLaTeXInline'
                 , escapedChar
+                , rawLaTeXInline'
                 , exampleRef
                 , smartPunctuation inline
                 , charRef
                 , symbol
                 , ltSign ]
 
-inlineNonLink :: GenParser Char ParserState Inline
-inlineNonLink = (choice $
-                 map (\parser -> try (parser >>= failIfLink)) inlineParsers)
-                <?> "inline (non-link)"
-
-failIfLink :: Inline -> GenParser tok st Inline
-failIfLink (Link _ _) = pzero
-failIfLink elt        = return elt
-
-escapedChar :: GenParser Char ParserState Inline
-escapedChar = try $ do
+escapedChar' :: GenParser Char ParserState Char
+escapedChar' = try $ do
   char '\\'
   state <- getState
-  result <- if stateStrict state 
-               then oneOf "\\`*_{}[]()>#+-.!~"
-               else satisfy (not . isAlphaNum)
+  if stateStrict state
+     then oneOf "\\`*_{}[]()>#+-.!~"
+     else satisfy (not . isAlphaNum)
+
+escapedChar :: GenParser Char ParserState Inline
+escapedChar = do
+  result <- escapedChar'
   return $ case result of
                 ' '   -> Str "\160" -- "\ " is a nonbreaking space
                 '\n'  -> LineBreak  -- "\[newline]" is a linebreak
@@ -971,8 +978,7 @@ symbol :: GenParser Char ParserState Inline
 symbol = do 
   result <- noneOf "<\\\n\t "
          <|> try (do lookAhead $ char '\\'
-                     notFollowedBy' $ rawLaTeXEnvironment'
-                                   <|> rawConTeXtEnvironment'
+                     notFollowedBy' rawTeXBlock
                      char '\\')
   return $ Str [result]
 
@@ -1036,8 +1042,20 @@ inlinesBetween start end =
     where inner      = innerSpace <|> (notFollowedBy' whitespace >> inline)
           innerSpace = try $ whitespace >>~ notFollowedBy' end
 
+-- This is used to prevent exponential blowups for things like:
+-- a**a*a**a*a**a*a**a*a**a*a**a*a**
+nested :: GenParser Char ParserState a
+       -> GenParser Char ParserState a
+nested p = do
+  nestlevel <- stateMaxNestingLevel `fmap` getState
+  guard $ nestlevel > 0
+  updateState $ \st -> st{ stateMaxNestingLevel = stateMaxNestingLevel st - 1 }
+  res <- p
+  updateState $ \st -> st{ stateMaxNestingLevel = nestlevel }
+  return res
+
 emph :: GenParser Char ParserState Inline
-emph = Emph `liftM`
+emph = Emph `fmap` nested
   (inlinesBetween starStart starEnd <|> inlinesBetween ulStart ulEnd)
     where starStart = char '*' >> lookAhead nonspaceChar
           starEnd   = notFollowedBy' strong >> char '*'
@@ -1045,7 +1063,7 @@ emph = Emph `liftM`
           ulEnd     = notFollowedBy' strong >> char '_'
 
 strong :: GenParser Char ParserState Inline
-strong = Strong `liftM`
+strong = Strong `liftM` nested
   (inlinesBetween starStart starEnd <|> inlinesBetween ulStart ulEnd)
     where starStart = string "**" >> lookAhead nonspaceChar
           starEnd   = try $ string "**"
@@ -1079,12 +1097,20 @@ nonEndline = satisfy (/='\n')
 
 str :: GenParser Char ParserState Inline
 str = do
+  smart <- stateSmart `fmap` getState
   a <- alphaNum
-  as <- many $ alphaNum <|> (try $ char '_' >>~ lookAhead alphaNum)
+  as <- many $ alphaNum
+            <|> (try $ char '_' >>~ lookAhead alphaNum)
+            <|> if smart
+                   then (try $ satisfy (\c -> c == '\'' || c == '\x2019') >>
+                         lookAhead alphaNum >> return '\x2019')
+                         -- for things like l'aide
+                   else mzero
+  pos <- getPosition
+  updateState $ \s -> s{ stateLastStrPos = Just pos }
   let result = a:as
-  state <- getState
   let spacesToNbr = map (\c -> if c == ' ' then '\160' else c)
-  if stateSmart state
+  if smart
      then case likelyAbbrev result of
                []        -> return $ Str result
                xs        -> choice (map (\x ->
@@ -1128,19 +1154,18 @@ endline = try $ do
 -- a reference label for a link
 reference :: GenParser Char ParserState [Inline]
 reference = do notFollowedBy' (string "[^")   -- footnote reference
-               result <- inlinesInBalancedBrackets inlineNonLink
+               result <- inlinesInBalancedBrackets inline
                return $ normalizeSpaces result
 
 -- source for a link, with optional title
-source :: GenParser Char st (String, [Char])
+source :: GenParser Char ParserState (String, [Char])
 source =
-  (try $ charsInBalanced '(' ')' >>= parseFromString source') <|>
+  (try $ charsInBalanced '(' ')' litChar >>= parseFromString source') <|>
   -- the following is needed for cases like:  [ref](/url(a).
-  (enclosed (char '(') (char ')') anyChar >>=
-   parseFromString source')
+  (enclosed (char '(') (char ')') litChar >>= parseFromString source')
 
 -- auxiliary function for source
-source' :: GenParser Char st (String, [Char])
+source' :: GenParser Char ParserState (String, [Char])
 source' = do
   skipSpaces
   let nl = char '\n' >>~ notFollowedBy blankline
@@ -1149,29 +1174,33 @@ source' = do
                     skipMany spaceChar
                     optional nl
                     skipMany spaceChar
-                    many1 (satisfy $ not . isBlank)
-  let betweenAngles = try $ char '<' >>
-                            manyTill (noneOf ">\n" <|> nl) (char '>')
+                    many1 $ escapedChar' <|> satisfy (not . isBlank)
+  let betweenAngles = try $
+         char '<' >> manyTill (escapedChar' <|> noneOf ">\n" <|> nl) (char '>')
   src <- try betweenAngles <|> sourceURL
   tit <- option "" linkTitle
   skipSpaces
   eof
   return (escapeURI $ removeTrailingSpace src, tit)
 
-linkTitle :: GenParser Char st String
-linkTitle = try $ do 
+linkTitle :: GenParser Char ParserState String
+linkTitle = try $ do
   (many1 spaceChar >> option '\n' newline) <|> newline
   skipSpaces
   delim <- oneOf "'\""
-  tit <-   manyTill (optional (char '\\') >> anyChar)
-                    (try (char delim >> skipSpaces >> eof))
-  return $ decodeCharacterReferences tit
+  tit <-   manyTill litChar (try (char delim >> skipSpaces >> eof))
+  return $ fromEntities tit
 
 link :: GenParser Char ParserState Inline
 link = try $ do
   lab <- reference
   (src, tit) <- source <|> referenceLink lab
-  return $ Link lab (src, tit)
+  return $ Link (delinkify lab) (src, tit)
+
+delinkify :: [Inline] -> [Inline]
+delinkify = bottomUp $ concatMap go
+  where go (Link lab _) = lab
+        go x            = [x]
 
 -- a link like [this][ref] or [this][] or [this]
 referenceLink :: [Inline]
@@ -1198,8 +1227,9 @@ autoLink = try $ do
 image :: GenParser Char ParserState Inline
 image = try $ do
   char '!'
-  (Link lab src) <- link
-  return $ Image lab src
+  lab <- reference
+  (src, tit) <- source <|> referenceLink lab
+  return $ Image lab (src,tit)
 
 note :: GenParser Char ParserState Inline
 note = try $ do
@@ -1228,18 +1258,16 @@ inlineNote = try $ do
 rawLaTeXInline' :: GenParser Char ParserState Inline
 rawLaTeXInline' = try $ do
   failIfStrict
-  lookAhead $ char '\\'
-  notFollowedBy' $ rawLaTeXEnvironment'
-                <|> rawConTeXtEnvironment'
+  lookAhead $ char '\\' >> notFollowedBy' (string "start") -- context env
   RawInline _ s <- rawLaTeXInline
   return $ RawInline "tex" s  -- "tex" because it might be context or latex
 
-rawConTeXtEnvironment' :: GenParser Char st String
-rawConTeXtEnvironment' = try $ do
+rawConTeXtEnvironment :: GenParser Char st String
+rawConTeXtEnvironment = try $ do
   string "\\start"
   completion <- inBrackets (letter <|> digit <|> spaceChar)
                <|> (many1 letter)
-  contents <- manyTill (rawConTeXtEnvironment' <|> (count 1 anyChar))
+  contents <- manyTill (rawConTeXtEnvironment <|> (count 1 anyChar))
                        (try $ string "\\stop" >> string completion)
   return $ "\\start" ++ completion ++ concat contents ++ "\\stop" ++ completion
 
@@ -1312,7 +1340,8 @@ citeKey = try $ do
   suppress_author <- option False (char '-' >> return True)
   char '@'
   first <- letter
-  rest <- many $ (noneOf ",;!?[]()@ \t\n")
+  let internal p = try $ p >>~ lookAhead (letter <|> digit)
+  rest <- many $ letter <|> digit <|> internal (oneOf ":.#$%&-_?<>~")
   let key = first:rest
   st <- getState
   guard $ key `elem` stateCitations st
@@ -1320,8 +1349,12 @@ citeKey = try $ do
 
 suffix :: GenParser Char ParserState [Inline]
 suffix = try $ do
+  hasSpace <- option False (notFollowedBy nonspaceChar >> return True)
   spnl
-  liftM normalizeSpaces $ many $ notFollowedBy (oneOf ";]") >> inline
+  rest <- liftM normalizeSpaces $ many $ notFollowedBy (oneOf ";]") >> inline
+  return $ if hasSpace
+              then Space : rest
+              else rest
 
 prefix :: GenParser Char ParserState [Inline]
 prefix = liftM normalizeSpaces $
diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs
index 7fda0da19..456b23ce8 100644
--- a/src/Text/Pandoc/Readers/RST.hs
+++ b/src/Text/Pandoc/Readers/RST.hs
@@ -128,6 +128,7 @@ block = choice [ codeBlock
                , fieldList
                , imageBlock
                , customCodeBlock
+               , mathBlock
                , unknownDirective
                , header
                , hrule
@@ -360,6 +361,33 @@ customCodeBlock = try $ do
   result <- indentedBlock
   return $ CodeBlock ("", ["sourceCode", language], []) $ stripTrailingNewlines result
 
+-- | The 'math' directive (from Sphinx) for display math.
+mathBlock :: GenParser Char st Block
+mathBlock = try $ do
+  string ".. math::"
+  mathBlockMultiline <|> mathBlockOneLine
+
+mathBlockOneLine :: GenParser Char st Block
+mathBlockOneLine = try $ do
+  result <- manyTill anyChar newline
+  blanklines
+  return $ Para [Math DisplayMath $ removeLeadingTrailingSpace result]
+
+mathBlockMultiline :: GenParser Char st Block
+mathBlockMultiline = try $ do
+  blanklines
+  result <- indentedBlock
+  -- a single block can contain multiple equations, which need to go
+  -- in separate Pandoc math elements
+  let lns = map removeLeadingTrailingSpace $ lines result
+  -- drop :label, :nowrap, etc.
+  let startsWithColon (':':_) = True
+      startsWithColon _       = False
+  let lns' = dropWhile startsWithColon lns
+  let eqs = map (removeLeadingTrailingSpace . unlines)
+            $ filter (not . null) $ splitBy null lns'
+  return $ Para $ map (Math DisplayMath) eqs
+
 lhsCodeBlock :: GenParser Char ParserState Block
 lhsCodeBlock = try $ do
   failUnlessLHS
@@ -526,8 +554,8 @@ noteBlock = try $ do
   string ".."
   spaceChar >> skipMany spaceChar
   ref <- noteMarker
-  spaceChar >> skipMany spaceChar
-  first <- anyLine
+  first <- (spaceChar >> skipMany spaceChar >> anyLine)
+        <|> (newline >> return "")
   blanks <- option "" blanklines
   rest <- option "" indentedBlock
   endPos <- getPosition
@@ -736,6 +764,7 @@ inline = choice [ whitespace
                 , image
                 , superscript
                 , subscript
+                , math
                 , note
                 , smartPunctuation inline
                 , hyphens
@@ -750,7 +779,8 @@ hyphens = do
   return $ Str result
 
 escapedChar :: GenParser Char st Inline
-escapedChar = escaped anyChar
+escapedChar = do c <- escaped anyChar
+                 return $ Str [c]
 
 symbol :: GenParser Char ParserState Inline
 symbol = do 
@@ -773,24 +803,31 @@ strong :: GenParser Char ParserState Inline
 strong = enclosed (string "**") (try $ string "**") inline >>= 
          return . Strong . normalizeSpaces
 
-interpreted :: [Char] -> GenParser Char st [Inline]
+interpreted :: [Char] -> GenParser Char st [Char]
 interpreted role = try $ do
   optional $ try $ string "\\ "
   result <- enclosed (string $ ":" ++ role ++ ":`") (char '`') anyChar
   try (string "\\ ") <|> lookAhead (count 1 $ oneOf " \t\n") <|> (eof >> return "")
-  return [Str result]
+  return result
 
 superscript :: GenParser Char ParserState Inline
-superscript = interpreted "sup" >>= (return . Superscript)
+superscript = interpreted "sup" >>= \x -> return (Superscript [Str x])
 
 subscript :: GenParser Char ParserState Inline
-subscript = interpreted "sub" >>= (return . Subscript)
+subscript = interpreted "sub" >>= \x -> return (Subscript [Str x])
+
+math :: GenParser Char ParserState Inline
+math = interpreted "math" >>= \x -> return (Math InlineMath x)
 
 whitespace :: GenParser Char ParserState Inline
 whitespace = many1 spaceChar >> return Space <?> "whitespace"
 
 str :: GenParser Char ParserState Inline
-str = many1 (noneOf (specialChars ++ "\t\n ")) >>= return . Str
+str = do
+  result <- many1 (noneOf (specialChars ++ "\t\n "))
+  pos <- getPosition
+  updateState $ \s -> s{ stateLastStrPos = Just pos }
+  return $ Str result
 
 -- an endline character that can be treated as a space, not a structural break
 endline :: GenParser Char ParserState Inline
diff --git a/src/Text/Pandoc/Readers/TeXMath.hs b/src/Text/Pandoc/Readers/TeXMath.hs
index b9a46e8ff..67dfe6753 100644
--- a/src/Text/Pandoc/Readers/TeXMath.hs
+++ b/src/Text/Pandoc/Readers/TeXMath.hs
@@ -69,8 +69,17 @@ expToInlines (ESymbol t s) = Just $ addSpace t (Str s)
         medspace  = Str "\x2005"
         widespace = Str "\x2004"
 expToInlines (EStretchy x) = expToInlines x
+expToInlines (EDelimited start end xs) = do
+  xs' <- mapM expToInlines xs
+  return $ [Str start] ++ concat xs' ++ [Str end]
 expToInlines (EGrouped xs) = expsToInlines xs
-expToInlines (ESpace _) = Just [Str " "]  -- variable widths not supported
+expToInlines (ESpace "0.167em") = Just [Str "\x2009"]
+expToInlines (ESpace "0.222em") = Just [Str "\x2005"]
+expToInlines (ESpace "0.278em") = Just [Str "\x2004"]
+expToInlines (ESpace "0.333em") = Just [Str "\x2004"]
+expToInlines (ESpace "1em")     = Just [Str "\x2001"]
+expToInlines (ESpace "2em")     = Just [Str "\x2001\x2001"]
+expToInlines (ESpace _)         = Just [Str " "]
 expToInlines (EBinary _ _ _) = Nothing
 expToInlines (ESub x y) = do
   x' <- expToInlines x
@@ -88,10 +97,10 @@ expToInlines (ESubsup x y z) = do
 expToInlines (EDown x y) = expToInlines (ESub x y)
 expToInlines (EUp x y) = expToInlines (ESuper x y)
 expToInlines (EDownup x y z) = expToInlines (ESubsup x y z)
-expToInlines (EText "normal" x) = Just [Str x]
-expToInlines (EText "bold" x) = Just [Strong [Str x]]
-expToInlines (EText "monospace" x) = Just [Code nullAttr x]
-expToInlines (EText "italic" x) = Just [Emph [Str x]]
+expToInlines (EText TextNormal x) = Just [Str x]
+expToInlines (EText TextBold x) = Just [Strong [Str x]]
+expToInlines (EText TextMonospace x) = Just [Code nullAttr x]
+expToInlines (EText TextItalic x) = Just [Emph [Str x]]
 expToInlines (EText _ x) = Just [Str x]
 expToInlines (EOver (EGrouped [EIdentifier [c]]) (ESymbol Accent [accent])) =
     case accent of
diff --git a/src/Text/Pandoc/Readers/Textile.hs b/src/Text/Pandoc/Readers/Textile.hs
index 12d299aa4..3b5954368 100644
--- a/src/Text/Pandoc/Readers/Textile.hs
+++ b/src/Text/Pandoc/Readers/Textile.hs
@@ -68,7 +68,8 @@ import Control.Monad ( guard, liftM )
 readTextile :: ParserState -- ^ Parser state, including options for parser
              -> String      -- ^ String to parse (assuming @'\n'@ line endings)
              -> Pandoc
-readTextile state s = (readWith parseTextile) state (s ++ "\n\n")
+readTextile state s =
+  (readWith parseTextile) state{ stateOldDashes = True } (s ++ "\n\n")
 
 
 --
@@ -436,6 +437,8 @@ str = do
               next <- lookAhead letter
               guard $ isLetter (last xs) || isLetter next
               return $ xs ++ "-"
+  pos <- getPosition
+  updateState $ \s -> s{ stateLastStrPos = Just pos }
   return $ Str result
 
 -- | Textile allows HTML span infos, we discard them
diff --git a/src/Text/Pandoc/S5.hs b/src/Text/Pandoc/S5.hs
deleted file mode 100644
index b17b052c5..000000000
--- a/src/Text/Pandoc/S5.hs
+++ /dev/null
@@ -1,69 +0,0 @@
-{-
-Copyright (C) 2006-2010 John MacFarlane <jgm@berkeley.edu>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
--}
-
-{- |
-   Module      : Text.Pandoc.S5
-   Copyright   : Copyright (C) 2006-2010 John MacFarlane
-   License     : GNU GPL, version 2 or above 
-
-   Maintainer  : John MacFarlane <jgm@berkeley.edu>
-   Stability   : alpha
-   Portability : portable
-
-Definitions for creation of S5 powerpoint-like HTML.
-(See <http://meyerweb.com/eric/tools/s5/>.)
--}
-module Text.Pandoc.S5 ( s5HeaderIncludes) where
-import Text.Pandoc.Shared ( readDataFile )
-import System.FilePath ( (</>) )
-import Data.ByteString.UTF8 ( toString, fromString )
-import Data.ByteString.Base64 ( encode )
-
-s5HeaderIncludes :: Maybe FilePath -> IO String
-s5HeaderIncludes datadir = do
-  c <- s5CSS datadir
-  j <- s5Javascript datadir
-  return $ c ++ j
-
-s5Javascript :: Maybe FilePath -> IO String
-s5Javascript datadir = do
-  js <- readDataFile datadir $ "s5" </> "default" </> "slides.min.js"
-  return $ "<script type=\"text/javascript\">\n" ++ inCDATA js ++ "</script>\n"
-
-inCDATA :: String -> String
-inCDATA s = "/*<![CDATA[*/\n" ++ s ++ "\n/*]]>*/\n"
-
-base64 :: String -> String
-base64 = toString . encode . fromString
-
-s5CSS :: Maybe FilePath -> IO String
-s5CSS datadir = do
-  s5CoreCSS <- readDataFile datadir $ "s5" </> "default" </> "s5-core.css"
-  s5FramingCSS <- readDataFile datadir $ "s5" </> "default" </> "framing.css"
-  s5PrettyCSS <- readDataFile datadir $ "s5" </> "default" </> "pretty.css"
-  s5OperaCSS <- readDataFile datadir $ "s5" </> "default" </> "opera.css"
-  s5OutlineCSS <- readDataFile datadir $ "s5" </> "default" </> "outline.css"
-  s5PrintCSS <- readDataFile datadir $ "s5" </> "default" </> "print.css"
-  return $ "<link rel=\"stylesheet\" type=\"text/css\" media=\"projection\" id=\"slideProj\" href=\"data:text/css;charset=utf-8;base64," ++
-          base64 (s5CoreCSS ++ "\n" ++ s5FramingCSS ++ "\n" ++ s5PrettyCSS) ++ "\" />\n" ++
-          "<link rel=\"stylesheet\" type=\"text/css\" media=\"screen\" id=\"outlineStyle\" href=\"data:text/css;charset=utf-8;base64," ++
-          base64 s5OutlineCSS ++ "\" />\n" ++
-          "<link rel=\"stylesheet\" type=\"text/css\" media=\"print\" id=\"slidePrint\" href=\"data:text/css;charset=utf-8;base64," ++
-          base64 s5PrintCSS ++ "\" />\n" ++
-          "<link rel=\"stylesheet\" type=\"text/css\" media=\"projection\" id=\"operaFix\" href=\"data:text/css;charset=utf-8;base64," ++
-          base64 s5OperaCSS ++ "\" />\n"
diff --git a/src/Text/Pandoc/SelfContained.hs b/src/Text/Pandoc/SelfContained.hs
new file mode 100644
index 000000000..9c609b8fe
--- /dev/null
+++ b/src/Text/Pandoc/SelfContained.hs
@@ -0,0 +1,162 @@
+{-# LANGUAGE OverloadedStrings #-}
+{-
+Copyright (C) 2011 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+-}
+
+{- |
+   Module      : Text.Pandoc.SelfContained
+   Copyright   : Copyright (C) 2011 John MacFarlane
+   License     : GNU GPL, version 2 or above
+
+   Maintainer  : John MacFarlane <jgm@berkeley.edu>
+   Stability   : alpha
+   Portability : portable
+
+Functions for converting an HTML file into one that can be viewed
+offline, by incorporating linked images, CSS, and scripts into
+the HTML using data URIs.
+-}
+module Text.Pandoc.SelfContained ( makeSelfContained ) where
+import Text.HTML.TagSoup
+import Network.URI (isAbsoluteURI, parseURI, escapeURIString)
+import Network.HTTP
+import Data.ByteString.Base64
+import qualified Data.ByteString.Char8 as B
+import Data.ByteString (ByteString)
+import Data.ByteString.UTF8 (toString, fromString)
+import System.FilePath (takeExtension, dropExtension, takeDirectory, (</>))
+import Data.Char (toLower, isAscii, isAlphaNum)
+import Codec.Compression.GZip as Gzip
+import qualified Data.ByteString.Lazy as L
+import Text.Pandoc.Shared (findDataFile)
+import Text.Pandoc.MIME (getMimeType)
+import System.Directory (doesFileExist)
+
+getItem :: Maybe FilePath -> String -> IO (ByteString, Maybe String)
+getItem userdata f =
+  if isAbsoluteURI f
+     then openURL f
+     else do
+       let mime = case takeExtension f of
+                      ".gz" -> getMimeType $ dropExtension f
+                      x     -> getMimeType x
+       exists <- doesFileExist f
+       if exists
+          then do
+            cont <- B.readFile f
+            return (cont, mime)
+          else do
+            res <- findDataFile userdata f
+            exists' <- doesFileExist res
+            if exists'
+               then do
+                 cont <- B.readFile res
+                 return (cont, mime)
+               else error $ "Could not find `" ++ f ++ "'"
+
+-- TODO - have this return mime type too - then it can work for google
+-- chart API, e.g.
+openURL :: String -> IO (ByteString, Maybe String)
+openURL u = getBodyAndMimeType =<< simpleHTTP (getReq u)
+  where getReq v = case parseURI v of
+                     Nothing  -> error $ "Could not parse URI: " ++ v
+                     Just u'  -> mkRequest GET u'
+        getBodyAndMimeType (Left err) = fail (show err)
+        getBodyAndMimeType (Right r)  = return (rspBody r, findHeader HdrContentType r)
+
+isOk :: Char -> Bool
+isOk c = isAscii c && isAlphaNum c
+
+convertTag :: Maybe FilePath -> Tag String -> IO (Tag String)
+convertTag userdata t@(TagOpen "img" as) =
+       case fromAttrib "src" t of
+         []   -> return t
+         src  -> do
+           (raw, mime) <- getRaw userdata (fromAttrib "type" t) src
+           let enc = "data:" ++ mime ++ ";base64," ++ toString (encode raw)
+           return $ TagOpen "img" (("src",enc) : [(x,y) | (x,y) <- as, x /= "src"])
+convertTag userdata t@(TagOpen "video" as) =
+       case fromAttrib "src" t of
+         []   -> return t
+         src  -> do
+           (raw, mime) <- getRaw userdata (fromAttrib "type" t) src
+           let enc = "data:" ++ mime ++ ";base64," ++ toString (encode raw)
+           return $ TagOpen "video" (("src",enc) : [(x,y) | (x,y) <- as, x /= "src"])
+convertTag userdata t@(TagOpen "script" as) =
+  case fromAttrib "src" t of
+       []     -> return t
+       src    -> do
+           (raw, mime) <- getRaw userdata (fromAttrib "type" t) src
+           let enc = "data:" ++ mime ++ "," ++ escapeURIString isOk (toString raw)
+           return $ TagOpen "script" (("src",enc) : [(x,y) | (x,y) <- as, x /= "src"]) 
+convertTag userdata t@(TagOpen "link" as) =
+  case fromAttrib "href" t of
+       []  -> return t
+       src -> do
+           (raw, mime) <- getRaw userdata (fromAttrib "type" t) src
+           let enc = "data:" ++ mime ++ "," ++ escapeURIString isOk (toString raw)
+           return $ TagOpen "link" (("href",enc) : [(x,y) | (x,y) <- as, x /= "href"]) 
+convertTag _ t = return t
+
+cssURLs :: Maybe FilePath -> FilePath -> ByteString -> IO ByteString
+cssURLs userdata d orig =
+  case B.breakSubstring "url(" orig of
+       (x,y) | B.null y  -> return orig
+             | otherwise -> do
+                  let (u,v) = B.breakSubstring ")" $ B.drop 4 y
+                  let url = toString
+                          $ case B.take 1 u of
+                                 "\"" -> B.takeWhile (/='"') $ B.drop 1 u
+                                 _    -> u
+                  (raw, mime) <- getRaw userdata "" (d </> url)
+                  rest <- cssURLs userdata d v
+                  let enc = "data:" `B.append` fromString mime `B.append`
+                               ";base64," `B.append` (encode raw)
+                  return $ x `B.append` "url(" `B.append` enc `B.append` rest
+
+getRaw :: Maybe FilePath -> String -> String -> IO (ByteString, String)
+getRaw userdata mimetype src = do
+  let ext = map toLower $ takeExtension src
+  (raw, respMime) <- getItem userdata src
+  let raw' = if ext == ".gz"
+                then B.concat $ L.toChunks $ Gzip.decompress $ L.fromChunks
+                      $ [raw]
+                else raw
+  let mime = case (mimetype, respMime) of
+                  ("",Nothing) -> error
+                         $ "Could not determine mime type for `" ++ src ++ "'"
+                  (x, Nothing) -> x
+                  (_, Just x ) -> x
+  result <- if mime == "text/css"
+               then cssURLs userdata (takeDirectory src) raw'
+               else return raw'
+  return (result, mime)
+
+-- | Convert HTML into self-contained HTML, incorporating images,
+-- scripts, and CSS using data: URIs.  Items specified using absolute
+-- URLs will be downloaded; those specified using relative URLs will
+-- be sought first relative to the working directory, then relative
+-- to the user data directory (if the first parameter is 'Just'
+-- a directory), and finally relative to pandoc's default data
+-- directory.
+makeSelfContained :: Maybe FilePath -> String -> IO String
+makeSelfContained userdata inp = do
+  let tags = parseTags inp
+  out' <- mapM (convertTag userdata) tags
+  return $ renderTagsOptions renderOptions{ optMinimize = (\t -> t == "br"
+                 || t == "img" || t == "meta" || t == "link" ) } out'
+
diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs
index 9717e1bc8..cd5b19164 100644
--- a/src/Text/Pandoc/Shared.hs
+++ b/src/Text/Pandoc/Shared.hs
@@ -32,6 +32,7 @@ module Text.Pandoc.Shared (
                      -- * List processing
                      splitBy,
                      splitByIndices,
+                     splitStringByIndices,
                      substitute,
                      -- * Text processing
                      backslashEscapes,
@@ -44,8 +45,9 @@ module Text.Pandoc.Shared (
                      camelCaseToHyphenated,
                      toRomanNumeral,
                      escapeURI,
-                     unescapeURI,
                      tabFilter,
+                     -- * Date/time
+                     normalizeDate,
                      -- * Pandoc block and inline list processing
                      orderedListMarkers,
                      normalizeSpaces,
@@ -68,21 +70,31 @@ module Text.Pandoc.Shared (
                      inDirectory,
                      findDataFile,
                      readDataFile,
+                     -- * Error handling
+                     err,
+                     warn,
                     ) where
 
 import Text.Pandoc.Definition
 import Text.Pandoc.Generic
-import qualified Text.Pandoc.UTF8 as UTF8 (readFile)
-import Data.Char ( toLower, isLower, isUpper, isAlpha, isAscii,
-                   isLetter, isDigit )
+import qualified Text.Pandoc.UTF8 as UTF8
+import System.Environment (getProgName)
+import System.Exit (exitWith, ExitCode(..))
+import Data.Char ( toLower, isLower, isUpper, isAlpha,
+                   isLetter, isDigit, isSpace )
 import Data.List ( find, isPrefixOf, intercalate )
-import Network.URI ( isAllowedInURI, escapeURIString, unEscapeString )
-import Codec.Binary.UTF8.String ( encodeString, decodeString )
+import Network.URI ( escapeURIString )
 import System.Directory
 import System.FilePath ( (</>) )
 import Data.Generics (Typeable, Data)
 import qualified Control.Monad.State as S
+import Control.Monad (msum)
 import Paths_pandoc (getDataFileName)
+import Text.Pandoc.Highlighting (Style, pygments)
+import Text.Pandoc.Pretty (charWidth)
+import System.Locale (defaultTimeLocale)
+import Data.Time
+import System.IO (stderr)
 
 --
 -- List processing
@@ -96,12 +108,23 @@ splitBy isSep lst =
       rest'         = dropWhile isSep rest
   in  first:(splitBy isSep rest')
 
--- | Split list into chunks divided at specified indices.
 splitByIndices :: [Int] -> [a] -> [[a]]
 splitByIndices [] lst = [lst]
-splitByIndices (x:xs) lst =
-    let (first, rest) = splitAt x lst in
-    first:(splitByIndices (map (\y -> y - x)  xs) rest)
+splitByIndices (x:xs) lst = first:(splitByIndices (map (\y -> y - x)  xs) rest)
+  where (first, rest) = splitAt x lst
+
+-- | Split string into chunks divided at specified indices.
+splitStringByIndices :: [Int] -> [Char] -> [[Char]]
+splitStringByIndices [] lst = [lst]
+splitStringByIndices (x:xs) lst =
+  let (first, rest) = splitAt' x lst in
+  first : (splitStringByIndices (map (\y -> y - x) xs) rest)
+
+splitAt' :: Int -> [Char] -> ([Char],[Char])
+splitAt' _ []          = ([],[])
+splitAt' n xs | n <= 0 = ([],xs)
+splitAt' n (x:xs)      = (x:ys,zs)
+  where (ys,zs) = splitAt' (n - charWidth x) xs
 
 -- | Replace each occurrence of one sublist in a list with another.
 substitute :: (Eq a) => [a] -> [a] -> [a] -> [a]
@@ -181,16 +204,9 @@ toRomanNumeral x =
               _ | x >= 1    -> "I" ++ toRomanNumeral (x - 1)
               _             -> ""
 
--- | Escape unicode characters in a URI.  Characters that are
--- already valid in a URI, including % and ?, are left alone.
+-- | Escape whitespace in URI.
 escapeURI :: String -> String
-escapeURI = escapeURIString isAllowedInURI . encodeString
-
--- | Unescape unicode and some special characters in a URI, but
--- without introducing spaces.
-unescapeURI :: String -> String
-unescapeURI = escapeURIString (\c -> isAllowedInURI c || not (isAscii c)) .
-               decodeString . unEscapeString
+escapeURI = escapeURIString (not . isSpace)
 
 -- | Convert tabs to spaces and filter out DOS line endings.
 -- Tabs will be preserved if tab stop is set to 0.
@@ -213,6 +229,18 @@ tabFilter tabStop =
   in  go tabStop
 
 --
+-- Date/time
+--
+
+-- | Parse a date and convert (if possible) to "YYYY-MM-DD" format.
+normalizeDate :: String -> Maybe String
+normalizeDate s = fmap (formatTime defaultTimeLocale "%F")
+  (msum $ map (\fs -> parsetimeWith fs s) formats :: Maybe Day)
+   where parsetimeWith = parseTime defaultTimeLocale
+         formats = ["%x","%m/%d/%Y", "%D","%F", "%d %b %Y",
+                    "%d %B %Y", "%b. %d, %Y", "%B %d, %Y"]
+
+--
 -- Pandoc block and inline list processing
 --
 
@@ -304,9 +332,9 @@ consolidateInlines (Str x : ys) =
      fromStr (Str z) = z
      fromStr _       = error "consolidateInlines - fromStr - not a Str"
 consolidateInlines (Space : ys) = Space : rest
-   where isSpace Space = True
-         isSpace _     = False
-         rest          = consolidateInlines $ dropWhile isSpace ys
+   where isSp Space = True
+         isSp _     = False
+         rest       = consolidateInlines $ dropWhile isSp ys
 consolidateInlines (Emph xs : Emph ys : zs) = consolidateInlines $
   Emph (xs ++ ys) : zs
 consolidateInlines (Strong xs : Strong ys : zs) = consolidateInlines $
@@ -334,10 +362,6 @@ stringify = queryWith go
         go (Str x) = x
         go (Code _ x) = x
         go (Math _ x) = x
-        go EmDash = "--"
-        go EnDash = "-"
-        go Apostrophe = "'"
-        go Ellipses = "..."
         go LineBreak = " "
         go _ = ""
 
@@ -458,6 +482,7 @@ data ObfuscationMethod = NoObfuscation
 -- | Varieties of HTML slide shows.
 data HTMLSlideVariant = S5Slides
                       | SlidySlides
+                      | DZSlides
                       | NoSlides
                       deriving (Show, Read, Eq)
 
@@ -488,9 +513,13 @@ data WriterOptions = WriterOptions
   , writerCiteMethod       :: CiteMethod -- ^ How to print cites
   , writerBiblioFiles      :: [FilePath] -- ^ Biblio files to use for citations
   , writerHtml5            :: Bool       -- ^ Produce HTML5
+  , writerBeamer           :: Bool       -- ^ Produce beamer LaTeX slide show
+  , writerSlideLevel       :: Maybe Int  -- ^ Force header level of slides
   , writerChapters         :: Bool       -- ^ Use "chapter" for top-level sects
   , writerListings         :: Bool       -- ^ Use listings package for code
-  , writerAscii            :: Bool       -- ^ Avoid non-ascii characters
+  , writerHighlight        :: Bool       -- ^ Highlight source code
+  , writerHighlightStyle   :: Style      -- ^ Style to use for highlighting
+  , writerSetextHeaders    :: Bool       -- ^ Use setext headers for levels 1-2 in markdown
   } deriving Show
 
 {-# DEPRECATED writerXeTeX "writerXeTeX no longer does anything" #-}
@@ -522,9 +551,13 @@ defaultWriterOptions =
                 , writerCiteMethod       = Citeproc
                 , writerBiblioFiles      = []
                 , writerHtml5            = False
+                , writerBeamer           = False
+                , writerSlideLevel       = Nothing
                 , writerChapters         = False
                 , writerListings         = False
-                , writerAscii            = False
+                , writerHighlight        = False
+                , writerHighlightStyle   = pygments
+                , writerSetextHeaders    = True
                 }
 
 --
@@ -554,3 +587,19 @@ findDataFile (Just u) f = do
 -- Cabal data directory.
 readDataFile :: Maybe FilePath -> FilePath -> IO String
 readDataFile userDir fname = findDataFile userDir fname >>= UTF8.readFile
+
+--
+-- Error reporting
+--
+
+err :: Int -> String -> IO a
+err exitCode msg = do
+  name <- getProgName
+  UTF8.hPutStrLn stderr $ name ++ ": " ++ msg
+  exitWith $ ExitFailure exitCode
+  return undefined
+
+warn :: String -> IO ()
+warn msg = do
+  name <- getProgName
+  UTF8.hPutStrLn stderr $ name ++ ": " ++ msg
diff --git a/src/Text/Pandoc/Slides.hs b/src/Text/Pandoc/Slides.hs
new file mode 100644
index 000000000..1df556d38
--- /dev/null
+++ b/src/Text/Pandoc/Slides.hs
@@ -0,0 +1,57 @@
+{-
+Copyright (C) 2012 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+-}
+
+{- |
+   Module      : Text.Pandoc.Slides
+   Copyright   : Copyright (C) 2012 John MacFarlane
+   License     : GNU GPL, version 2 or above
+
+   Maintainer  : John MacFarlane <jgm@berkeley.edu>
+   Stability   : alpha
+   Portability : portable
+
+Utility functions for splitting documents into slides for slide
+show formats (dzslides, s5, slidy, beamer).
+-}
+module Text.Pandoc.Slides ( getSlideLevel, prepSlides ) where
+import Text.Pandoc.Definition
+
+-- | Find level of header that starts slides (defined as the least header
+-- level that occurs before a non-header/non-hrule in the blocks).
+getSlideLevel :: [Block] -> Int
+getSlideLevel = go 6
+  where go least (Header n _ : x : xs)
+                 | n < least && nonHOrHR x = go n xs
+                 | otherwise               = go least (x:xs)
+        go least (_ : xs) = go least xs
+        go least [] = least
+        nonHOrHR (Header _ _) = False
+        nonHOrHR (HorizontalRule) = False
+        nonHOrHR _ = True
+
+-- | Prepare a block list to be passed to hierarchicalize.
+prepSlides :: Int -> [Block] -> [Block]
+prepSlides slideLevel = ensureStartWithH . splitHrule
+  where splitHrule (HorizontalRule : Header n xs : ys)
+                       | n == slideLevel = Header slideLevel xs : splitHrule ys
+        splitHrule (HorizontalRule : xs) = Header slideLevel [] : splitHrule xs
+        splitHrule (x : xs)              = x : splitHrule xs
+        splitHrule []                    = []
+        ensureStartWithH bs@(Header n _:_)
+                       | n <= slideLevel = bs
+        ensureStartWithH bs              = Header slideLevel [] : bs
diff --git a/src/Text/Pandoc/Templates.hs b/src/Text/Pandoc/Templates.hs
index 19c9a808a..336efe453 100644
--- a/src/Text/Pandoc/Templates.hs
+++ b/src/Text/Pandoc/Templates.hs
@@ -72,7 +72,7 @@ import Text.ParserCombinators.Parsec
 import Control.Monad (liftM, when, forM)
 import System.FilePath
 import Data.List (intercalate, intersperse)
-import Text.XHtml (primHtml, Html)
+import Text.Blaze (preEscapedString, Html)
 import Data.ByteString.Lazy.UTF8 (ByteString, fromString)
 import Text.Pandoc.Shared (readDataFile)
 import qualified Control.Exception.Extensible as E (try, IOException)
@@ -82,6 +82,8 @@ getDefaultTemplate :: (Maybe FilePath) -- ^ User data directory to search first
                    -> String           -- ^ Name of writer 
                    -> IO (Either E.IOException String)
 getDefaultTemplate _ "native" = return $ Right ""
+getDefaultTemplate _ "json"   = return $ Right ""
+getDefaultTemplate _ "docx"   = return $ Right ""
 getDefaultTemplate user "odt" = getDefaultTemplate user "opendocument"
 getDefaultTemplate user "epub" = getDefaultTemplate user "html"
 getDefaultTemplate user writer = do
@@ -110,7 +112,7 @@ instance TemplateTarget ByteString where
   toTarget = fromString
 
 instance TemplateTarget Html where
-  toTarget = primHtml
+  toTarget = preEscapedString
 
 -- | Renders a template 
 renderTemplate :: TemplateTarget a
diff --git a/src/Text/Pandoc/Writers/AsciiDoc.hs b/src/Text/Pandoc/Writers/AsciiDoc.hs
new file mode 100644
index 000000000..1913eb92b
--- /dev/null
+++ b/src/Text/Pandoc/Writers/AsciiDoc.hs
@@ -0,0 +1,367 @@
+{-# LANGUAGE OverloadedStrings #-}
+{-
+Copyright (C) 2006-2010 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+-}
+
+{- |
+   Module      : Text.Pandoc.Writers.AsciiDoc
+   Copyright   : Copyright (C) 2006-2010 John MacFarlane
+   License     : GNU GPL, version 2 or above
+
+   Maintainer  : John MacFarlane <jgm@berkeley.edu>
+   Stability   : alpha
+   Portability : portable
+
+Conversion of 'Pandoc' documents to asciidoc.
+
+Note that some information may be lost in conversion, due to
+expressive limitations of asciidoc.  Footnotes and table cells with
+paragraphs (or other block items) are not possible in asciidoc.
+If pandoc encounters one of these, it will insert a message indicating
+that it has omitted the construct.
+
+AsciiDoc:  <http://www.methods.co.nz/asciidoc/>
+-}
+module Text.Pandoc.Writers.AsciiDoc (writeAsciiDoc) where
+import Text.Pandoc.Definition
+import Text.Pandoc.Templates (renderTemplate)
+import Text.Pandoc.Shared
+import Text.Pandoc.Parsing hiding (blankline)
+import Text.ParserCombinators.Parsec ( runParser, GenParser )
+import Data.List ( isPrefixOf, intersperse, intercalate )
+import Text.Pandoc.Pretty
+import Control.Monad.State
+
+data WriterState = WriterState { defListMarker :: String
+                               , orderedListLevel :: Int
+                               , bulletListLevel  :: Int
+                               }
+
+-- | Convert Pandoc to AsciiDoc.
+writeAsciiDoc :: WriterOptions -> Pandoc -> String
+writeAsciiDoc opts document =
+  evalState (pandocToAsciiDoc opts document) WriterState{
+      defListMarker = "::"
+    , orderedListLevel = 1
+    , bulletListLevel = 1
+    }
+
+-- | Return asciidoc representation of document.
+pandocToAsciiDoc :: WriterOptions -> Pandoc -> State WriterState String
+pandocToAsciiDoc opts (Pandoc (Meta title authors date) blocks) = do
+  title' <- inlineListToAsciiDoc opts title
+  let title'' = title' $$ text (replicate (offset title') '=')
+  authors' <- mapM (inlineListToAsciiDoc opts) authors
+  -- asciidoc only allows a singel author
+  date' <- inlineListToAsciiDoc opts date
+  let titleblock = not $ null title && null authors && null date
+  body <- blockListToAsciiDoc opts blocks
+  let colwidth = if writerWrapText opts
+                    then Just $ writerColumns opts
+                    else Nothing
+  let main = render colwidth body
+  let context  = writerVariables opts ++
+                 [ ("body", main)
+                 , ("title", render colwidth title'')
+                 , ("date", render colwidth date')
+                 ] ++
+                 [ ("toc", "yes") | writerTableOfContents opts &&
+                                    writerStandalone opts ] ++
+                 [ ("titleblock", "yes") | titleblock ] ++
+                 [ ("author", render colwidth a) | a <- authors' ]
+  if writerStandalone opts
+     then return $ renderTemplate context $ writerTemplate opts
+     else return main
+
+-- | Escape special characters for AsciiDoc.
+escapeString :: String -> String
+escapeString = escapeStringUsing escs
+  where escs = backslashEscapes "{"
+
+-- | Ordered list start parser for use in Para below.
+olMarker :: GenParser Char ParserState Char
+olMarker = do (start, style', delim) <- anyOrderedListMarker
+              if delim == Period &&
+                          (style' == UpperAlpha || (style' == UpperRoman &&
+                          start `elem` [1, 5, 10, 50, 100, 500, 1000]))
+                          then spaceChar >> spaceChar
+                          else spaceChar
+
+-- | True if string begins with an ordered list marker
+beginsWithOrderedListMarker :: String -> Bool
+beginsWithOrderedListMarker str =
+  case runParser olMarker defaultParserState "para start" (take 10 str) of
+         Left  _  -> False
+         Right _  -> True
+
+-- | Convert Pandoc block element to asciidoc.
+blockToAsciiDoc :: WriterOptions -- ^ Options
+                -> Block         -- ^ Block element
+                -> State WriterState Doc
+blockToAsciiDoc _ Null = return empty
+blockToAsciiDoc opts (Plain inlines) = do
+  contents <- inlineListToAsciiDoc opts inlines
+  return $ contents <> cr
+blockToAsciiDoc opts (Para inlines) = do
+  contents <- inlineListToAsciiDoc opts inlines
+  -- escape if para starts with ordered list marker
+  let esc = if beginsWithOrderedListMarker (render Nothing contents)
+               then text "\\"
+               else empty
+  return $ esc <> contents <> blankline
+blockToAsciiDoc _ (RawBlock _ _) = return empty
+blockToAsciiDoc _ HorizontalRule =
+  return $ blankline <> text "'''''" <> blankline
+blockToAsciiDoc opts (Header level inlines) = do
+  contents <- inlineListToAsciiDoc opts inlines
+  let len = offset contents
+  return $ contents <> cr <>
+         (case level of
+               1  -> text $ replicate len '-'
+               2  -> text $ replicate len '~'
+               3  -> text $ replicate len '^'
+               4  -> text $ replicate len '+'
+               _  -> empty) <> blankline
+blockToAsciiDoc _ (CodeBlock (_,classes,_) str) = return $
+  flush (attrs <> dashes <> space <> attrs <> cr <> text str <>
+           cr <> dashes) <> blankline
+     where dashes  = text $ replicate (maximum $ map length $ lines str) '-'
+           attrs = if null classes
+                      then empty
+                      else text $ intercalate "," $ "code" : classes
+blockToAsciiDoc opts (BlockQuote blocks) = do
+  contents <- blockListToAsciiDoc opts blocks
+  let isBlock (BlockQuote _) = True
+      isBlock _              = False
+  -- if there are nested block quotes, put in an open block
+  let contents' = if any isBlock blocks
+                     then "--" $$ contents $$ "--"
+                     else contents
+  let cols = offset contents'
+  let bar = text $ replicate cols '_'
+  return $ bar $$ chomp contents' $$ bar <> blankline
+blockToAsciiDoc opts (Table caption aligns widths headers rows) =  do
+  caption' <- inlineListToAsciiDoc opts caption
+  let caption'' = if null caption
+                     then empty
+                     else "." <> caption' <> cr
+  let isSimple = all (== 0) widths
+  let relativePercentWidths = if isSimple
+                                 then widths
+                                 else map (/ (sum widths)) widths
+  let widths'' :: [Integer]
+      widths'' = map (floor . (* 100)) relativePercentWidths
+  -- ensure that the widths sum to 100
+  let widths' = case widths'' of
+                     _ | isSimple -> widths''
+                     (w:ws) | sum (w:ws) < 100
+                               -> (100 - sum ws) : ws
+                     ws        -> ws
+  let totalwidth :: Integer
+      totalwidth = floor $ sum widths * 100
+  let colspec al wi = (case al of
+                         AlignLeft    -> "<"
+                         AlignCenter  -> "^"
+                         AlignRight   -> ">"
+                         AlignDefault -> "") ++
+                      if wi == 0 then "" else (show wi ++ "%")
+  let headerspec = if all null headers
+                      then empty
+                      else text "options=\"header\","
+  let widthspec = if totalwidth == 0
+                     then empty
+                     else text "width="
+                          <> doubleQuotes (text $ show totalwidth ++ "%")
+                          <> text ","
+  let tablespec = text "["
+         <> widthspec
+         <> text "cols="
+         <> doubleQuotes (text $ intercalate ","
+             $ zipWith colspec aligns widths')
+         <> text ","
+         <> headerspec <> text "]"
+  let makeCell [Plain x] = do d <- blockListToAsciiDoc opts [Plain x]
+                              return $ text "|" <> chomp d
+      makeCell [Para x]  = makeCell [Plain x]
+      makeCell _         = return $ text "|" <> "[multiblock cell omitted]"
+  let makeRow cells = hsep `fmap` mapM makeCell cells
+  rows' <- mapM makeRow rows
+  head' <- makeRow headers
+  let head'' = if all null headers then empty else head'
+  let colwidth = if writerWrapText opts
+                    then writerColumns opts
+                    else 100000
+  let maxwidth = maximum $ map offset (head':rows')
+  let body = if maxwidth > colwidth then vsep rows' else vcat rows'
+  let border = text $ "|" ++ replicate ((min maxwidth colwidth) - 1) '='
+  return $
+    caption'' $$ tablespec $$ border $$ head'' $$ body $$ border $$ blankline
+blockToAsciiDoc opts (BulletList items) = do
+  contents <- mapM (bulletListItemToAsciiDoc opts) items
+  return $ cat contents <> blankline
+blockToAsciiDoc opts (OrderedList (_start, sty, _delim) items) = do
+  let sty' = case sty of
+                  UpperRoman -> UpperAlpha
+                  LowerRoman -> LowerAlpha
+                  x          -> x
+  let markers  = orderedListMarkers (1, sty', Period)  -- start num not used
+  let markers' = map (\m -> if length m < 3
+                               then m ++ replicate (3 - length m) ' '
+                               else m) markers
+  contents <- mapM (\(item, num) -> orderedListItemToAsciiDoc opts item num) $
+              zip markers' items
+  return $ cat contents <> blankline
+blockToAsciiDoc opts (DefinitionList items) = do
+  contents <- mapM (definitionListItemToAsciiDoc opts) items
+  return $ cat contents <> blankline
+
+-- | Convert bullet list item (list of blocks) to asciidoc.
+bulletListItemToAsciiDoc :: WriterOptions -> [Block] -> State WriterState Doc
+bulletListItemToAsciiDoc opts blocks = do
+  let addBlock :: Doc -> Block -> State WriterState Doc
+      addBlock d b | isEmpty d    = chomp `fmap` blockToAsciiDoc opts b
+      addBlock d b@(BulletList _) = do x <- blockToAsciiDoc opts b
+                                       return $ d <> cr <> chomp x
+      addBlock d b@(OrderedList _ _) = do x <- blockToAsciiDoc opts b
+                                          return $ d <> cr <> chomp x
+      addBlock d b = do x <- blockToAsciiDoc opts b
+                        return $ d <> cr <> text "+" <> cr <> chomp x
+  lev <- bulletListLevel `fmap` get
+  modify $ \s -> s{ bulletListLevel = lev + 1 }
+  contents <- foldM addBlock empty blocks
+  modify $ \s -> s{ bulletListLevel = lev }
+  let marker = text (replicate lev '*')
+  return $ marker <> space <> contents <> cr
+
+-- | Convert ordered list item (a list of blocks) to asciidoc.
+orderedListItemToAsciiDoc :: WriterOptions -- ^ options
+                          -> String        -- ^ list item marker
+                          -> [Block]       -- ^ list item (list of blocks)
+                          -> State WriterState Doc
+orderedListItemToAsciiDoc opts marker blocks = do
+  let addBlock :: Doc -> Block -> State WriterState Doc
+      addBlock d b | isEmpty d    = chomp `fmap` blockToAsciiDoc opts b
+      addBlock d b@(BulletList _) = do x <- blockToAsciiDoc opts b
+                                       return $ d <> cr <> chomp x
+      addBlock d b@(OrderedList _ _) = do x <- blockToAsciiDoc opts b
+                                          return $ d <> cr <> chomp x
+      addBlock d b = do x <- blockToAsciiDoc opts b
+                        return $ d <> cr <> text "+" <> cr <> chomp x
+  lev <- orderedListLevel `fmap` get
+  modify $ \s -> s{ orderedListLevel = lev + 1 }
+  contents <- foldM addBlock empty blocks
+  modify $ \s -> s{ orderedListLevel = lev }
+  return $ text marker <> space <> contents <> cr
+
+-- | Convert definition list item (label, list of blocks) to asciidoc.
+definitionListItemToAsciiDoc :: WriterOptions
+                             -> ([Inline],[[Block]])
+                             -> State WriterState Doc
+definitionListItemToAsciiDoc opts (label, defs) = do
+  labelText <- inlineListToAsciiDoc opts label
+  marker <- defListMarker `fmap` get
+  if marker == "::"
+     then modify (\st -> st{ defListMarker = ";;"})
+     else modify (\st -> st{ defListMarker = "::"})
+  let divider = cr <> text "+" <> cr
+  let defsToAsciiDoc :: [Block] -> State WriterState Doc
+      defsToAsciiDoc ds = (vcat . intersperse divider . map chomp)
+           `fmap` mapM (blockToAsciiDoc opts) ds
+  defs' <- mapM defsToAsciiDoc defs
+  modify (\st -> st{ defListMarker = marker })
+  let contents = nest 2 $ vcat $ intersperse divider $ map chomp defs'
+  return $ labelText <> text marker <> cr <> contents <> cr
+
+-- | Convert list of Pandoc block elements to asciidoc.
+blockListToAsciiDoc :: WriterOptions -- ^ Options
+                    -> [Block]       -- ^ List of block elements
+                    -> State WriterState Doc
+blockListToAsciiDoc opts blocks = cat `fmap` mapM (blockToAsciiDoc opts) blocks
+
+-- | Convert list of Pandoc inline elements to asciidoc.
+inlineListToAsciiDoc :: WriterOptions -> [Inline] -> State WriterState Doc
+inlineListToAsciiDoc opts lst =
+  mapM (inlineToAsciiDoc opts) lst >>= return . cat
+
+-- | Convert Pandoc inline element to asciidoc.
+inlineToAsciiDoc :: WriterOptions -> Inline -> State WriterState Doc
+inlineToAsciiDoc opts (Emph lst) = do
+  contents <- inlineListToAsciiDoc opts lst
+  return $ "_" <> contents <> "_"
+inlineToAsciiDoc opts (Strong lst) = do
+  contents <- inlineListToAsciiDoc opts lst
+  return $ "*" <> contents <> "*"
+inlineToAsciiDoc opts (Strikeout lst) = do
+  contents <- inlineListToAsciiDoc opts lst
+  return $ "[line-through]*" <> contents <> "*"
+inlineToAsciiDoc opts (Superscript lst) = do
+  contents <- inlineListToAsciiDoc opts lst
+  return $ "^" <> contents <> "^"
+inlineToAsciiDoc opts (Subscript lst) = do
+  contents <- inlineListToAsciiDoc opts lst
+  return $ "~" <> contents <> "~"
+inlineToAsciiDoc opts (SmallCaps lst) = inlineListToAsciiDoc opts lst
+inlineToAsciiDoc opts (Quoted SingleQuote lst) = do
+  contents <- inlineListToAsciiDoc opts lst
+  return $ "`" <> contents <> "'"
+inlineToAsciiDoc opts (Quoted DoubleQuote lst) = do
+  contents <- inlineListToAsciiDoc opts lst
+  return $ "``" <> contents <> "''"
+inlineToAsciiDoc _ (Code _ str) = return $
+  text "`" <> text (escapeStringUsing (backslashEscapes "`") str) <> "`"
+inlineToAsciiDoc _ (Str str) = return $ text $ escapeString str
+inlineToAsciiDoc _ (Math InlineMath str) =
+  return $ "latexmath:[$" <> text str <> "$]"
+inlineToAsciiDoc _ (Math DisplayMath str) =
+  return $ "latexmath:[\\[" <> text str <> "\\]]"
+inlineToAsciiDoc _ (RawInline _ _) = return empty
+inlineToAsciiDoc _ (LineBreak) = return $ " +" <> cr
+inlineToAsciiDoc _ Space = return space
+inlineToAsciiDoc opts (Cite _ lst) = inlineListToAsciiDoc opts lst
+inlineToAsciiDoc opts (Link txt (src, _tit)) = do
+-- relative:  link:downloads/foo.zip[download foo.zip]
+-- abs:  http://google.cod[Google]
+-- or my@email.com[email john]
+  linktext <- inlineListToAsciiDoc opts txt
+  let isRelative = ':' `notElem` src
+  let prefix = if isRelative
+                  then text "link:"
+                  else empty
+  let srcSuffix = if isPrefixOf "mailto:" src then drop 7 src else src
+  let useAuto = case txt of
+                      [Code _ s] | s == srcSuffix -> True
+                      _                           -> False
+  return $ if useAuto
+              then text srcSuffix
+              else prefix <> text src <> "[" <> linktext <> "]"
+inlineToAsciiDoc opts (Image alternate (src, tit)) = do
+-- image:images/logo.png[Company logo, title="blah"]
+  let txt = if (null alternate) || (alternate == [Str ""])
+               then [Str "image"]
+               else alternate
+  linktext <- inlineListToAsciiDoc opts txt
+  let linktitle = if null tit
+                     then empty
+                     else text $ ",title=\"" ++ tit ++ "\""
+  return $ "image:" <> text src <> "[" <> linktext <> linktitle <> "]"
+inlineToAsciiDoc opts (Note [Para inlines]) =
+  inlineToAsciiDoc opts (Note [Plain inlines])
+inlineToAsciiDoc opts (Note [Plain inlines]) = do
+  contents  <- inlineListToAsciiDoc opts inlines
+  return $ text "footnote:[" <> contents <> "]"
+-- asciidoc can't handle blank lines in notes
+inlineToAsciiDoc _ (Note _) = return "[multiblock footnote omitted]"
diff --git a/src/Text/Pandoc/Writers/ConTeXt.hs b/src/Text/Pandoc/Writers/ConTeXt.hs
index 763f77d7c..dfdf7a140 100644
--- a/src/Text/Pandoc/Writers/ConTeXt.hs
+++ b/src/Text/Pandoc/Writers/ConTeXt.hs
@@ -31,12 +31,13 @@ Conversion of 'Pandoc' format into ConTeXt.
 module Text.Pandoc.Writers.ConTeXt ( writeConTeXt ) where
 import Text.Pandoc.Definition
 import Text.Pandoc.Shared
+import Text.Pandoc.Generic (queryWith)
 import Text.Printf ( printf )
 import Data.List ( intercalate )
 import Control.Monad.State
 import Text.Pandoc.Pretty
 import Text.Pandoc.Templates ( renderTemplate )
-import Network.URI ( isAbsoluteURI, unEscapeString )
+import Network.URI ( isURI, unEscapeString )
 
 data WriterState = 
   WriterState { stNextRef          :: Int  -- number of next URL reference
@@ -68,13 +69,14 @@ pandocToConTeXt options (Pandoc (Meta title authors date) blocks) = do
   datetext <-  if null date
                   then return ""
                   else liftM (render colwidth) $ inlineListToConTeXt date
-  body <- blockListToConTeXt blocks
-  let main = render colwidth $ body
+  body <- mapM (elementToConTeXt options) $ hierarchicalize blocks
+  let main = (render colwidth . vcat) body
   let context  = writerVariables options ++
                  [ ("toc", if writerTableOfContents options then "yes" else "")
                  , ("body", main)
                  , ("title", titletext)
                  , ("date", datetext) ] ++
+                 [ ("number-sections", "yes") | writerNumberSections options ] ++
                  [ ("author", a) | a <- authorstext ]
   return $ if writerStandalone options
               then renderTemplate context $ writerTemplate options
@@ -101,12 +103,24 @@ escapeCharForConTeXt ch =
     ']'    -> "{]}"
     '_'    -> "\\letterunderscore{}"
     '\160' -> "~"
+    '\x2014' -> "---"
+    '\x2013' -> "--"
+    '\x2019' -> "'"
+    '\x2026' -> "\\ldots{}"
     x      -> [x]
 
 -- | Escape string for ConTeXt
 stringToConTeXt :: String -> String
 stringToConTeXt = concatMap escapeCharForConTeXt
 
+-- | Convert Elements to ConTeXt
+elementToConTeXt :: WriterOptions -> Element -> State WriterState Doc
+elementToConTeXt _ (Blk block) = blockToConTeXt block
+elementToConTeXt opts (Sec level _ id' title' elements) = do
+  header' <- sectionHeader id' level title'
+  innerContents <- mapM (elementToConTeXt opts) elements
+  return $ vcat (header' : innerContents)
+
 -- | Convert Pandoc block element to ConTeXt.
 blockToConTeXt :: Block 
                -> State WriterState Doc
@@ -166,18 +180,8 @@ blockToConTeXt (OrderedList (start, style', delim) lst) = do
 blockToConTeXt (DefinitionList lst) =
   liftM vcat $ mapM defListItemToConTeXt lst
 blockToConTeXt HorizontalRule = return $ "\\thinrule" <> blankline
-blockToConTeXt (Header level lst) = do
-  contents <- inlineListToConTeXt lst
-  st <- get
-  let opts = stOptions st
-  let base = if writerNumberSections opts then "section" else "subject"
-  let level' = if writerChapters opts then level - 1 else level
-  return $ if level' >= 1 && level' <= 5
-               then char '\\' <> text (concat (replicate (level' - 1) "sub")) <>
-                         text base <> char '{' <> contents <> char '}' <> blankline
-               else if level' == 0
-                       then "\\chapter{" <> contents <> "}"
-                       else contents <> blankline
+-- If this is ever executed, provide a default for the reference identifier.
+blockToConTeXt (Header level lst) = sectionHeader "" level lst
 blockToConTeXt (Table caption aligns widths heads rows) = do
     let colDescriptor colWidth alignment = (case alignment of
                                                AlignLeft    -> 'l' 
@@ -213,8 +217,8 @@ defListItemToConTeXt :: ([Inline], [[Block]]) -> State WriterState Doc
 defListItemToConTeXt (term, defs) = do
   term' <- inlineListToConTeXt term
   def'  <- liftM vsep $ mapM blockListToConTeXt defs
-  return $ "\\startdescr" <> braces term' $$ nest 2 def' $$
-           "\\stopdescr" <> blankline
+  return $ "\\startdescription" <> braces term' $$ nest 2 def' $$
+           "\\stopdescription" <> blankline
 
 -- | Convert list of block elements to ConTeXt.
 blockListToConTeXt :: [Block] -> State WriterState Doc
@@ -257,10 +261,6 @@ inlineToConTeXt (Quoted DoubleQuote lst) = do
   contents <- inlineListToConTeXt lst
   return $ "\\quotation" <> braces contents
 inlineToConTeXt (Cite _ lst) = inlineListToConTeXt lst
-inlineToConTeXt Apostrophe = return $ char '\''
-inlineToConTeXt EmDash = return "---"
-inlineToConTeXt EnDash = return "--"
-inlineToConTeXt Ellipses = return "\\ldots{}"
 inlineToConTeXt (Str str) = return $ text $ stringToConTeXt str
 inlineToConTeXt (Math InlineMath str) =
   return $ char '$' <> text str <> char '$'
@@ -271,23 +271,69 @@ inlineToConTeXt (RawInline "tex" str) = return $ text str
 inlineToConTeXt (RawInline _ _) = return empty
 inlineToConTeXt (LineBreak) = return $ text "\\crlf" <> cr
 inlineToConTeXt Space = return space
-inlineToConTeXt (Link [Code _ str] (src, tit)) = -- since ConTeXt has its own 
-  inlineToConTeXt (Link [Str str] (src, tit))  -- way of printing links... 
-inlineToConTeXt (Link txt (src, _)) = do
+-- autolink
+inlineToConTeXt (Link [Code _ str] (src, tit)) = inlineToConTeXt (Link
+    [RawInline "context" "\\hyphenatedurl{", Str str, RawInline "context" "}"]
+    (src, tit))
+-- Handle HTML-like internal document references to sections
+inlineToConTeXt (Link txt          (('#' : ref), _)) = do
+  opts <- gets stOptions
+  label <-  inlineListToConTeXt txt
+  return $ text "\\in"
+           <> braces (if writerNumberSections opts
+                         then label <+> text "(\\S"
+                         else label)  -- prefix
+           <> braces (if writerNumberSections opts
+                         then text ")"
+                         else empty)  -- suffix
+           <> brackets (text ref)
+
+inlineToConTeXt (Link txt          (src, _))      = do
   st <- get
   let next = stNextRef st
   put $ st {stNextRef = next + 1}
-  let ref = show next
-  label <- inlineListToConTeXt txt
-  return $ "\\useURL" <> brackets (text ref) <> brackets (text src) <>
-           brackets empty <> brackets label <>
-           "\\from" <> brackets (text ref)
+  let ref = "url" ++ show next
+  label <-  inlineListToConTeXt txt
+  return $ "\\useURL"
+           <> brackets (text ref)
+           <> brackets (text $ escapeStringUsing [('#',"\\#")] src)
+           <> brackets empty
+           <> brackets label
+           <> "\\from"
+           <> brackets (text ref)
 inlineToConTeXt (Image _ (src, _)) = do
-  let src' = if isAbsoluteURI src
+  let src' = if isURI src
                 then src
                 else unEscapeString src
   return $ braces $ "\\externalfigure" <> brackets (text src')
 inlineToConTeXt (Note contents) = do
   contents' <- blockListToConTeXt contents
-  return $ text "\\footnote{" <>
-           nest 2 contents' <> char '}'
+  let codeBlock x@(CodeBlock _ _) = [x]
+      codeBlock _ = []
+  let codeBlocks = queryWith codeBlock contents
+  return $ if null codeBlocks
+              then text "\\footnote{" <> nest 2 contents' <> char '}'
+              else text "\\startbuffer " <> nest 2 contents' <>
+                   text "\\stopbuffer\\footnote{\\getbuffer}"
+
+-- | Craft the section header, inserting the secton reference, if supplied.
+sectionHeader :: [Char]
+              -> Int
+              -> [Inline]
+              -> State WriterState Doc
+sectionHeader ident hdrLevel lst = do
+  contents <- inlineListToConTeXt lst
+  st <- get
+  let opts = stOptions st
+  let level' = if writerChapters opts then hdrLevel - 1 else hdrLevel
+  return $ if level' >= 1 && level' <= 5
+               then char '\\'
+                    <> text (concat (replicate (level' - 1) "sub"))
+                    <> text "section"
+                    <> (if (not . null) ident then brackets (text ident) else empty)
+                    <> braces contents
+                    <> blankline
+               else if level' == 0
+                       then "\\chapter{" <> contents <> "}"
+                       else contents <> blankline
+
diff --git a/src/Text/Pandoc/Writers/Docbook.hs b/src/Text/Pandoc/Writers/Docbook.hs
index 29c042cf9..1bcf99dcf 100644
--- a/src/Text/Pandoc/Writers/Docbook.hs
+++ b/src/Text/Pandoc/Writers/Docbook.hs
@@ -35,8 +35,11 @@ import Text.Pandoc.Templates (renderTemplate)
 import Text.Pandoc.Readers.TeXMath
 import Data.List ( isPrefixOf, intercalate, isSuffixOf )
 import Data.Char ( toLower )
-import Text.Pandoc.Highlighting (languages, languagesByExtension)
+import Text.Pandoc.Highlighting ( languages, languagesByExtension )
 import Text.Pandoc.Pretty
+import Text.TeXMath
+import qualified Text.XML.Light as Xml
+import Data.Generics (everywhere, mkT)
 
 -- | Convert list of authors to a docbook <author> section
 authorToDocbook :: WriterOptions -> [Inline] -> Doc
@@ -69,34 +72,39 @@ writeDocbook opts (Pandoc (Meta tit auths dat) blocks) =
                     then Just $ writerColumns opts
                     else Nothing
       render' = render colwidth
-      opts' = if "</book>" `isSuffixOf`
+      opts' = if "/book>" `isSuffixOf`
                       (removeTrailingSpace $ writerTemplate opts)
                  then opts{ writerChapters = True }
                  else opts
-      main     = render' $ vcat (map (elementToDocbook opts') elements)
+      startLvl = if writerChapters opts' then 0 else 1
+      main     = render' $ vcat (map (elementToDocbook opts' startLvl) elements)
       context = writerVariables opts ++
                 [ ("body", main)
                 , ("title", render' title)
                 , ("date", render' date) ] ++
-                [ ("author", render' a) | a <- authors ]
+                [ ("author", render' a) | a <- authors ] ++
+                [ ("mathml", "yes") | case writerHTMLMathMethod opts of
+                                            MathML _ -> True
+                                            _ -> False ]
   in  if writerStandalone opts
          then renderTemplate context $ writerTemplate opts
          else main
 
 -- | Convert an Element to Docbook.
-elementToDocbook :: WriterOptions -> Element -> Doc
-elementToDocbook opts (Blk block) = blockToDocbook opts block 
-elementToDocbook opts (Sec _ _num id' title elements) =
+elementToDocbook :: WriterOptions -> Int -> Element -> Doc
+elementToDocbook opts _   (Blk block) = blockToDocbook opts block 
+elementToDocbook opts lvl (Sec _ _num id' title elements) =
   -- Docbook doesn't allow sections with no content, so insert some if needed
   let elements' = if null elements
                     then [Blk (Para [])]
                     else elements
-      tag = if writerChapters opts
-               then "chapter"
-               else "section"
+      tag = case lvl of
+                 n | n == 0           -> "chapter"
+                   | n >= 1 && n <= 5 -> "sect" ++ show n
+                   | otherwise        -> "simplesect"
   in  inTags True tag [("id",id')] $
       inTagsSimple "title" (inlinesToDocbook opts title) $$
-      vcat (map (elementToDocbook opts{ writerChapters = False }) elements') 
+      vcat (map (elementToDocbook opts (lvl + 1)) elements')
 
 -- | Convert a list of Pandoc blocks to Docbook.
 blocksToDocbook :: WriterOptions -> [Block] -> Doc
@@ -248,14 +256,27 @@ inlineToDocbook opts (Quoted _ lst) =
   inTagsSimple "quote" $ inlinesToDocbook opts lst
 inlineToDocbook opts (Cite _ lst) =
   inlinesToDocbook opts lst 
-inlineToDocbook _ Apostrophe = char '\''
-inlineToDocbook _ Ellipses = text "…"
-inlineToDocbook _ EmDash = text "—"
-inlineToDocbook _ EnDash = text "–"
 inlineToDocbook _ (Code _ str) = 
   inTagsSimple "literal" $ text (escapeStringForXML str)
-inlineToDocbook opts (Math _ str) = inlinesToDocbook opts $ readTeXMath str
-inlineToDocbook _ (RawInline _ _) = empty
+inlineToDocbook opts (Math t str)
+  | isMathML (writerHTMLMathMethod opts) =
+    case texMathToMathML dt str of
+      Right r -> inTagsSimple tagtype
+                 $ text $ Xml.ppcElement conf
+                 $ fixNS
+                 $ removeAttr r
+      Left  _ -> inlinesToDocbook opts
+                 $ readTeXMath str
+  | otherwise = inlinesToDocbook opts $ readTeXMath str
+     where (dt, tagtype) = case t of
+                            InlineMath  -> (DisplayInline,"inlineequation")
+                            DisplayMath -> (DisplayBlock,"informalequation")
+           conf = Xml.useShortEmptyTags (const False) Xml.defaultConfigPP
+           removeAttr e = e{ Xml.elAttribs = [] }
+           fixNS' qname = qname{ Xml.qPrefix = Just "mml" }
+           fixNS = everywhere (mkT fixNS')
+inlineToDocbook _ (RawInline f x) | f == "html" || f == "docbook" = text x
+                                  | otherwise                     = empty
 inlineToDocbook _ LineBreak = inTagsSimple "literallayout" empty
 inlineToDocbook _ Space = space
 inlineToDocbook opts (Link txt (src, _)) =
@@ -280,3 +301,7 @@ inlineToDocbook _ (Image _ (src, tit)) =
       titleDoc $$ selfClosingTag "imagedata" [("fileref", src)]
 inlineToDocbook opts (Note contents) = 
   inTagsIndented "footnote" $ blocksToDocbook opts contents
+
+isMathML :: HTMLMathMethod -> Bool
+isMathML (MathML _) = True
+isMathML _          = False
diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs
new file mode 100644
index 000000000..22278be7e
--- /dev/null
+++ b/src/Text/Pandoc/Writers/Docx.hs
@@ -0,0 +1,666 @@
+{-
+Copyright (C) 2012 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+-}
+
+{- |
+   Module      : Text.Pandoc.Writers.Docx
+   Copyright   : Copyright (C) 2012 John MacFarlane
+   License     : GNU GPL, version 2 or above
+
+   Maintainer  : John MacFarlane <jgm@berkeley.edu>
+   Stability   : alpha
+   Portability : portable
+
+Conversion of 'Pandoc' documents to docx.
+-}
+module Text.Pandoc.Writers.Docx ( writeDocx ) where
+import Data.List ( intercalate )
+import System.FilePath ( (</>) )
+import qualified Data.ByteString.Lazy as B
+import qualified Data.Map as M
+import Data.ByteString.Lazy.UTF8 ( fromString, toString )
+import Text.Pandoc.UTF8 as UTF8
+import System.IO ( stderr )
+import Codec.Archive.Zip
+import Data.Time.Clock.POSIX
+import Paths_pandoc ( getDataFileName )
+import Text.Pandoc.Definition
+import Text.Pandoc.Generic
+import System.Directory
+import Text.Pandoc.ImageSize
+import Text.Pandoc.Shared hiding (Element)
+import Text.Pandoc.Readers.TeXMath
+import Text.Pandoc.Highlighting ( highlight )
+import Text.Highlighting.Kate.Types ()
+import Text.XML.Light
+import Text.TeXMath
+import Control.Monad.State
+import Text.Highlighting.Kate
+
+data WriterState = WriterState{
+         stTextProperties :: [Element]
+       , stParaProperties :: [Element]
+       , stFootnotes      :: [Element]
+       , stSectionIds     :: [String]
+       , stExternalLinks  :: M.Map String String
+       , stImages         :: M.Map FilePath (String, B.ByteString)
+       , stListLevel      :: Int
+       , stListMarker     :: ListMarker
+       , stNumStyles      :: M.Map ListMarker Int
+       , stLists          :: [ListMarker]
+       }
+
+data ListMarker = NoMarker
+                | BulletMarker
+                | NumberMarker ListNumberStyle ListNumberDelim Int
+                deriving (Show, Read, Eq, Ord)
+
+defaultWriterState :: WriterState
+defaultWriterState = WriterState{
+        stTextProperties = []
+      , stParaProperties = []
+      , stFootnotes      = []
+      , stSectionIds     = []
+      , stExternalLinks  = M.empty
+      , stImages         = M.empty
+      , stListLevel      = -1
+      , stListMarker     = NoMarker
+      , stNumStyles      = M.fromList [(NoMarker, 0)]
+      , stLists          = [NoMarker]
+      }
+
+type WS a = StateT WriterState IO a
+
+showTopElement' :: Element -> String
+showTopElement' x = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" ++ showElement x
+
+mknode :: Node t => String -> [(String,String)] -> t -> Element
+mknode s attrs =
+  add_attrs (map (\(k,v) -> Attr (unqual k) v) attrs) . node (unqual s)
+
+-- | Produce an Docx file from a Pandoc document.
+writeDocx :: Maybe FilePath -- ^ Path specified by --reference-docx
+          -> WriterOptions  -- ^ Writer options
+          -> Pandoc         -- ^ Document to convert
+          -> IO B.ByteString
+writeDocx mbRefDocx opts doc@(Pandoc (Meta tit auths date) _) = do
+  let datadir = writerUserDataDir opts
+  refArchive <- liftM toArchive $
+       case mbRefDocx of
+             Just f -> B.readFile f
+             Nothing -> do
+               let defaultDocx = getDataFileName "reference.docx" >>= B.readFile
+               case datadir of
+                     Nothing  -> defaultDocx
+                     Just d   -> do
+                        exists <- doesFileExist (d </> "reference.docx")
+                        if exists
+                           then B.readFile (d </> "reference.docx")
+                           else defaultDocx
+
+  (newContents, st) <- runStateT (writeOpenXML opts{writerWrapText = False} doc)
+                       defaultWriterState
+  epochtime <- floor `fmap` getPOSIXTime
+  let imgs = M.elems $ stImages st
+  let imgPath ident img = "media/" ++ ident ++
+                            case imageType img of
+                                  Just Png  -> ".png"
+                                  Just Jpeg -> ".jpeg"
+                                  Just Gif  -> ".gif"
+                                  Nothing   -> ""
+  let toImgRel (ident,img) =  mknode "Relationship" [("Type","http://schemas.openxmlformats.org/officeDocument/2006/relationships/image"),("Id",ident),("Target",imgPath ident img)] ()
+  let newrels = map toImgRel imgs
+  let relpath = "word/_rels/document.xml.rels"
+  let reldoc = case findEntryByPath relpath refArchive >>=
+                    parseXMLDoc . toString . fromEntry of
+                      Just d  -> d
+                      Nothing -> error $ relpath ++ "missing in reference docx"
+  let reldoc' = reldoc{ elContent = elContent reldoc ++ map Elem newrels }
+  -- create entries for images
+  let toImageEntry (ident,img) = toEntry ("word/" ++ imgPath ident img)
+         epochtime img
+  let imageEntries = map toImageEntry imgs
+  -- NOW get list of external links and images from this, and do what's needed
+  let toLinkRel (src,ident) =  mknode "Relationship" [("Type","http://schemas.openxmlformats.org/officeDocument/2006/relationships/hyperlink"),("Id",ident),("Target",src),("TargetMode","External") ] ()
+  let newrels' = map toLinkRel $ M.toList $ stExternalLinks st
+  let reldoc'' = reldoc' { elContent = elContent reldoc' ++ map Elem newrels' }
+  let relEntry = toEntry relpath epochtime $ fromString $ showTopElement' reldoc''
+  let contentEntry = toEntry "word/document.xml" epochtime $ fromString $ showTopElement' newContents
+  -- styles
+  let newstyles = styleToOpenXml $ writerHighlightStyle opts
+  let stylepath = "word/styles.xml"
+  let styledoc = case findEntryByPath stylepath refArchive >>=
+                      parseXMLDoc . toString . fromEntry of
+                        Just d  -> d
+                        Nothing -> error $ stylepath ++ "missing in reference docx"
+  let styledoc' = styledoc{ elContent = elContent styledoc ++ map Elem newstyles }
+  let styleEntry = toEntry stylepath epochtime $ fromString $ showTopElement' styledoc'
+  -- construct word/numbering.xml
+  let numpath = "word/numbering.xml"
+  let numEntry = toEntry numpath epochtime $ fromString $ showTopElement'
+                 $ mkNumbering (stNumStyles st) (stLists st)
+  let docPropsPath = "docProps/core.xml"
+  let docProps = mknode "cp:coreProperties"
+          [("xmlns:cp","http://schemas.openxmlformats.org/package/2006/metadata/core-properties")
+          ,("xmlns:dc","http://purl.org/dc/elements/1.1/")
+          ,("xmlns:dcterms","http://purl.org/dc/terms/")
+          ,("xmlns:dcmitype","http://purl.org/dc/dcmitype/")
+          ,("xmlns:xsi","http://www.w3.org/2001/XMLSchema-instance")]
+          $ mknode "dc:title" [] (stringify tit)
+          : mknode "dcterms:created" [("xsi:type","dcterms:W3CDTF")]
+            (maybe "" id $ normalizeDate $ stringify date)
+          : mknode "dcterms:modified" [("xsi:type","dcterms:W3CDTF")] () -- put current time here
+          : map (mknode "dc:creator" [] . stringify) auths
+  let docPropsEntry = toEntry docPropsPath epochtime $ fromString $ showTopElement' docProps
+  let relsPath = "_rels/.rels"
+  rels <- case findEntryByPath relsPath refArchive of
+                   Just e  -> return $ toString $ fromEntry e
+                   Nothing -> err 57 "could not find .rels/_rels in reference docx"
+  -- fix .rels/_rels, which can get screwed up when reference.docx is edited by Word
+  let rels' = substitute "http://schemas.openxmlformats.org/package/2006/relationships/metadata/core-properties"
+                  "http://schemas.openxmlformats.org/officedocument/2006/relationships/metadata/core-properties"
+                  rels
+  let relsEntry = toEntry relsPath epochtime $ fromString rels'
+  let archive = foldr addEntryToArchive refArchive $
+                  relsEntry : contentEntry : relEntry : numEntry : styleEntry : docPropsEntry : imageEntries
+  return $ fromArchive archive
+
+styleToOpenXml :: Style -> [Element]
+styleToOpenXml style = parStyle : map toStyle alltoktypes
+  where alltoktypes = enumFromTo KeywordTok NormalTok
+        toStyle toktype = mknode "w:style" [("w:type","character"),
+                           ("w:customStyle","1"),("w:styleId",show toktype)]
+                             [ mknode "w:name" [("w:val",show toktype)] ()
+                             , mknode "w:basedOn" [("w:val","VerbatimChar")] ()
+                             , mknode "w:rPr" [] $
+                               [ mknode "w:color" [("w:val",tokCol toktype)] ()
+                                 | tokCol toktype /= "auto" ] ++
+                               [ mknode "w:shd" [("w:val","clear"),("w:fill",tokBg toktype)] ()
+                                 | tokBg toktype /= "auto" ] ++
+                               [ mknode "w:b" [] () | tokFeature tokenBold toktype ] ++
+                               [ mknode "w:i" [] () | tokFeature tokenItalic toktype ] ++
+                               [ mknode "w:u" [] () | tokFeature tokenUnderline toktype ]
+                             ]
+        tokStyles = tokenStyles style
+        tokFeature f toktype = maybe False f $ lookup toktype tokStyles
+        tokCol toktype = maybe "auto" (drop 1 . fromColor)
+                         $ (tokenColor =<< lookup toktype tokStyles)
+                           `mplus` defaultColor style
+        tokBg toktype = maybe "auto" (drop 1 . fromColor)
+                         $ (tokenBackground =<< lookup toktype tokStyles)
+                           `mplus` backgroundColor style
+        parStyle = mknode "w:style" [("w:type","paragraph"),
+                           ("w:customStyle","1"),("w:styleId","SourceCode")]
+                             [ mknode "w:name" [("w:val","Source Code")] ()
+                             , mknode "w:basedOn" [("w:val","Normal")] ()
+                             , mknode "w:link" [("w:val","VerbatimChar")] ()
+                             , mknode "w:pPr" []
+                               $ mknode "w:wordWrap" [("w:val","off")] ()
+                               : ( maybe [] (\col -> [mknode "w:shd" [("w:val","clear"),("w:fill",drop 1 $ fromColor col)] ()])
+                                 $ backgroundColor style )
+                             ]
+
+mkNumbering :: M.Map ListMarker Int -> [ListMarker] -> Element
+mkNumbering markers lists =
+  mknode "w:numbering" [("xmlns:w","http://schemas.openxmlformats.org/wordprocessingml/2006/main")]
+   $  map mkAbstractNum (M.toList markers)
+   ++ zipWith (mkNum markers) lists [1..(length lists)]
+
+mkNum :: M.Map ListMarker Int -> ListMarker -> Int -> Element
+mkNum markers marker numid =
+  mknode "w:num" [("w:numId",show numid)]
+   $ mknode "w:abstractNumId" [("w:val",show absnumid)] ()
+   : case marker of
+       NoMarker     -> []
+       BulletMarker -> []
+       NumberMarker _ _ start ->
+          map (\lvl -> mknode "w:lvlOverride" [("w:ilvl",show (lvl :: Int))]
+              $ mknode "w:startOverride" [("w:val",show start)] ()) [0..6]
+   where absnumid = maybe 0 id $ M.lookup marker markers
+
+mkAbstractNum :: (ListMarker,Int) -> Element
+mkAbstractNum (marker,numid) =
+  mknode "w:abstractNum" [("w:abstractNumId",show numid)]
+    $ mknode "w:multiLevelType" [("w:val","multilevel")] ()
+    : map (mkLvl marker) [0..6]
+
+mkLvl :: ListMarker -> Int -> Element
+mkLvl marker lvl =
+  mknode "w:lvl" [("w:ilvl",show lvl)] $
+    [ mknode "w:start" [("w:val",start)] ()
+      | marker /= NoMarker && marker /= BulletMarker ] ++
+    [ mknode "w:numFmt" [("w:val",fmt)] ()
+    , mknode "w:lvlText" [("w:val",lvltxt)] ()
+    , mknode "w:lvlJc" [("w:val","left")] ()
+    , mknode "w:pPr" []
+      [ mknode "w:tabs" []
+        $ mknode "w:tab" [("w:val","num"),("w:pos",show $ lvl * step)] ()
+      , mknode "w:ind" [("w:left",show $ lvl * step + hang),("w:hanging",show hang)] ()
+      ]
+    ]
+    where (fmt, lvltxt, start) =
+            case marker of
+                 NoMarker             -> ("bullet"," ","1")
+                 BulletMarker         -> ("bullet",bulletFor lvl,"1")
+                 NumberMarker st de n -> (styleFor st lvl
+                                         ,patternFor de ("%" ++ show (lvl + 1))
+                                         ,show n)
+          step = 720
+          hang = 480
+          bulletFor 0 = "\8226"
+          bulletFor 1 = "\9702"
+          bulletFor 2 = "\8227"
+          bulletFor 3 = "\8259"
+          bulletFor 4 = "\8226"
+          bulletFor 5 = "\9702"
+          bulletFor _ = "\8227"
+          styleFor UpperAlpha _ = "upperLetter"
+          styleFor LowerAlpha _ = "lowerLetter"
+          styleFor UpperRoman _ = "upperRoman"
+          styleFor LowerRoman _ = "lowerRoman"
+          styleFor Decimal _ = "decimal"
+          styleFor DefaultStyle 1 = "decimal"
+          styleFor DefaultStyle 2 = "lowerLetter"
+          styleFor DefaultStyle 3 = "lowerRoman"
+          styleFor DefaultStyle 4 = "decimal"
+          styleFor DefaultStyle 5 = "lowerLetter"
+          styleFor DefaultStyle 6 = "lowerRoman"
+          styleFor _ _ = "decimal"
+          patternFor OneParen s = s ++ ")"
+          patternFor TwoParens s = "(" ++ s ++ ")"
+          patternFor _ s = s ++ "."
+
+-- | Convert Pandoc document to string in OpenXML format.
+writeOpenXML :: WriterOptions -> Pandoc -> WS Element
+writeOpenXML opts (Pandoc (Meta tit auths dat) blocks) = do
+  title <- withParaProp (pStyle "Title") $ blocksToOpenXML opts [Para tit | not (null tit)]
+  authors <- withParaProp (pStyle "Authors") $ blocksToOpenXML opts
+                 [Para (intercalate [LineBreak] auths) | not (null auths)]
+  date <- withParaProp (pStyle "Date") $ blocksToOpenXML opts [Para dat | not (null dat)]
+  let convertSpace (Str x : Space : Str y : xs) = Str (x ++ " " ++ y) : xs
+      convertSpace (Str x : Str y : xs) = Str (x ++ y) : xs
+      convertSpace xs = xs
+  let blocks' = bottomUp convertSpace $ blocks
+  doc <- blocksToOpenXML opts blocks'
+  notes' <- reverse `fmap` gets stFootnotes
+  let notes = case notes' of
+                   [] -> []
+                   ns -> [mknode "w:footnotes" [] ns]
+  let meta = title ++ authors ++ date
+  return $ mknode "w:document"
+            [("xmlns:w","http://schemas.openxmlformats.org/wordprocessingml/2006/main")
+            ,("xmlns:m","http://schemas.openxmlformats.org/officeDocument/2006/math")
+            ,("xmlns:r","http://schemas.openxmlformats.org/officeDocument/2006/relationships")
+            ,("xmlns:o","urn:schemas-microsoft-com:office:office")
+            ,("xmlns:v","urn:schemas-microsoft-com:vml")
+            ,("xmlns:w10","urn:schemas-microsoft-com:office:word")
+            ,("xmlns:a","http://schemas.openxmlformats.org/drawingml/2006/main")
+            ,("xmlns:pic","http://schemas.openxmlformats.org/drawingml/2006/picture")
+            ,("xmlns:wp","http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing")]
+         $ mknode "w:body" [] (meta ++ doc ++ notes)
+
+-- | Convert a list of Pandoc blocks to OpenXML.
+blocksToOpenXML :: WriterOptions -> [Block] -> WS [Element]
+blocksToOpenXML opts bls = concat `fmap` mapM (blockToOpenXML opts) bls
+
+pStyle :: String -> Element
+pStyle sty = mknode "w:pStyle" [("w:val",sty)] ()
+
+rStyle :: String -> Element
+rStyle sty = mknode "w:rStyle" [("w:val",sty)] ()
+
+-- | Convert a Pandoc block element to OpenXML.
+blockToOpenXML :: WriterOptions -> Block -> WS [Element]
+blockToOpenXML _ Null = return []
+blockToOpenXML opts (Header lev lst) = do
+  contents <- withParaProp (pStyle $ "Heading" ++ show lev) $
+               blockToOpenXML opts (Para lst)
+  usedIdents <- gets stSectionIds
+  let ident = uniqueIdent lst usedIdents
+  modify $ \s -> s{ stSectionIds = ident : stSectionIds s }
+  let bookmarkStart = mknode "w:bookmarkStart" [("w:id",ident)
+                                               ,("w:name",ident)] ()
+  let bookmarkEnd = mknode "w:bookmarkEnd" [("w:id",ident)] ()
+  return $ [bookmarkStart] ++ contents ++ [bookmarkEnd]
+blockToOpenXML opts (Plain lst) = blockToOpenXML opts (Para lst)
+blockToOpenXML opts (Para x@[Image alt _]) = do
+  paraProps <- getParaProps
+  contents <- inlinesToOpenXML opts x
+  captionNode <- withParaProp (pStyle "ImageCaption")
+                 $ blockToOpenXML opts (Para alt)
+  return $ mknode "w:p" [] (paraProps ++ contents) : captionNode
+blockToOpenXML opts (Para lst) = do
+  paraProps <- getParaProps
+  contents <- inlinesToOpenXML opts lst
+  return [mknode "w:p" [] (paraProps ++ contents)]
+blockToOpenXML _ (RawBlock format str)
+  | format == "openxml" = return [ x | Elem x <- parseXML str ]
+  | otherwise           = return []
+blockToOpenXML opts (BlockQuote blocks) =
+  withParaProp (pStyle "BlockQuote") $ blocksToOpenXML opts blocks
+blockToOpenXML opts (CodeBlock attrs str) =
+  withParaProp (pStyle "SourceCode") $ blockToOpenXML opts $ Para [Code attrs str]
+blockToOpenXML _ HorizontalRule = return [
+  mknode "w:p" [] $ mknode "w:r" [] $ mknode "w:pict" []
+    $ mknode "v:rect" [("style","width:0;height:1.5pt"),
+                       ("o:hralign","center"),
+                       ("o:hrstd","t"),("o:hr","t")] () ]
+blockToOpenXML opts (Table caption aligns widths headers rows) = do
+  let captionStr = stringify caption
+  caption' <- if null caption
+                 then return []
+                 else withParaProp (pStyle "TableCaption")
+                      $ blockToOpenXML opts (Para caption)
+  let alignmentFor al = mknode "w:jc" [("w:val",alignmentToString al)] ()
+  let cellToOpenXML (al, cell) = withParaProp (alignmentFor al)
+                                    $ blocksToOpenXML opts cell
+  headers' <- mapM cellToOpenXML $ zip aligns headers
+  rows' <- mapM (\cells -> mapM cellToOpenXML $ zip aligns cells)
+           $ rows
+  let borderProps = mknode "w:tcPr" []
+                    [ mknode "w:tcBorders" []
+                      $ mknode "w:bottom" [("w:val","single")] ()
+                    , mknode "w:vAlign" [("w:val","bottom")] () ]
+  let mkcell border contents = mknode "w:tc" []
+                            $ [ borderProps | border ] ++
+                            if null contents
+                               then [mknode "w:p" [] ()]
+                               else contents
+  let mkrow border cells = mknode "w:tr" [] $ map (mkcell border) cells
+  let textwidth = 7920  -- 5.5 in in twips, 1/20 pt
+  let mkgridcol w = mknode "w:gridCol"
+                       [("w:w", show $ (floor (textwidth * w) :: Integer))] ()
+  return $
+    [ mknode "w:tbl" []
+      ( mknode "w:tblPr" []
+        [ mknode "w:tblCaption" [("w:val", captionStr)] ()
+          | not (null caption) ]
+      : mknode "w:tblGrid" []
+        (if all (==0) widths
+            then []
+            else map mkgridcol widths)
+      : [ mkrow True headers' | not (all null headers) ] ++
+      map (mkrow False) rows'
+      )
+    ] ++ caption'
+blockToOpenXML opts (BulletList lst) = do
+  let marker = BulletMarker
+  addList marker
+  asList $ concat `fmap` mapM (listItemToOpenXML opts marker) lst
+blockToOpenXML opts (OrderedList (start, numstyle, numdelim) lst) = do
+  let marker = NumberMarker numstyle numdelim start
+  addList marker
+  asList $ concat `fmap` mapM (listItemToOpenXML opts marker) lst
+blockToOpenXML opts (DefinitionList items) =
+  concat `fmap` mapM (definitionListItemToOpenXML opts) items
+
+definitionListItemToOpenXML  :: WriterOptions -> ([Inline],[[Block]]) -> WS [Element]
+definitionListItemToOpenXML opts (term,defs) = do
+  term' <- withParaProp (pStyle "DefinitionTerm")
+           $ blockToOpenXML opts (Para term)
+  defs' <- withParaProp (pStyle "Definition")
+           $ concat `fmap` mapM (blocksToOpenXML opts) defs
+  return $ term' ++ defs'
+
+getNumId :: WS Int
+getNumId = length `fmap` gets stLists
+
+addList :: ListMarker -> WS ()
+addList marker = do
+  lists <- gets stLists
+  modify $ \st -> st{ stLists = lists ++ [marker] }
+  numStyles <- gets stNumStyles
+  case M.lookup marker numStyles of
+           Just _  -> return ()
+           Nothing -> modify $ \st ->
+                 st{ stNumStyles = M.insert marker (M.size numStyles + 1) numStyles }
+
+listItemToOpenXML :: WriterOptions -> ListMarker -> [Block] -> WS [Element]
+listItemToOpenXML _ _ []                   = return []
+listItemToOpenXML opts marker (first:rest) = do
+  first' <- withMarker marker $ blockToOpenXML opts first
+  rest'  <- withMarker NoMarker $ blocksToOpenXML opts rest
+  return $ first' ++ rest'
+
+alignmentToString :: Alignment -> [Char]
+alignmentToString alignment = case alignment of
+                                 AlignLeft -> "left"
+                                 AlignRight -> "right"
+                                 AlignCenter -> "center"
+                                 AlignDefault -> "left"
+
+-- | Convert a list of inline elements to OpenXML.
+inlinesToOpenXML :: WriterOptions -> [Inline] -> WS [Element]
+inlinesToOpenXML opts lst = concat `fmap` mapM (inlineToOpenXML opts) lst
+
+withMarker :: ListMarker -> WS a -> WS a
+withMarker m p = do
+  origMarker <- gets stListMarker
+  modify $ \st -> st{ stListMarker = m }
+  result <- p
+  modify $ \st -> st{ stListMarker = origMarker }
+  return result
+
+asList :: WS a -> WS a
+asList p = do
+  origListLevel <- gets stListLevel
+  modify $ \st -> st{ stListLevel = stListLevel st + 1 }
+  result <- p
+  modify $ \st -> st{ stListLevel = origListLevel }
+  return result
+
+getTextProps :: WS [Element]
+getTextProps = do
+  props <- gets stTextProperties
+  return $ if null props
+              then []
+              else [mknode "w:rPr" [] $ props]
+
+pushTextProp :: Element -> WS ()
+pushTextProp d = modify $ \s -> s{ stTextProperties = d : stTextProperties s }
+
+popTextProp :: WS ()
+popTextProp = modify $ \s -> s{ stTextProperties = drop 1 $ stTextProperties s }
+
+withTextProp :: Element -> WS a -> WS a
+withTextProp d p = do
+  pushTextProp d
+  res <- p
+  popTextProp
+  return res
+
+getParaProps :: WS [Element]
+getParaProps = do
+  props <- gets stParaProperties
+  listLevel <- gets stListLevel
+  numid <- getNumId
+  let listPr = if listLevel >= 0
+                  then [ mknode "w:numPr" []
+                         [ mknode "w:numId" [("w:val",show numid)] ()
+                         , mknode "w:ilvl" [("w:val",show listLevel)] () ]
+                       ]
+                  else []
+  return $ case props ++ listPr of
+                [] -> []
+                ps -> [mknode "w:pPr" [] ps]
+
+pushParaProp :: Element -> WS ()
+pushParaProp d = modify $ \s -> s{ stParaProperties = d : stParaProperties s }
+
+popParaProp :: WS ()
+popParaProp = modify $ \s -> s{ stParaProperties = drop 1 $ stParaProperties s }
+
+withParaProp :: Element -> WS a -> WS a
+withParaProp d p = do
+  pushParaProp d
+  res <- p
+  popParaProp
+  return res
+
+formattedString :: String -> WS [Element]
+formattedString str = do
+  props <- getTextProps
+  return [ mknode "w:r" [] $
+             props ++
+             [ mknode "w:t" [("xml:space","preserve")] str ] ]
+
+-- | Convert an inline element to OpenXML.
+inlineToOpenXML :: WriterOptions -> Inline -> WS [Element]
+inlineToOpenXML _ (Str str) = formattedString str
+inlineToOpenXML opts Space = inlineToOpenXML opts (Str " ")
+inlineToOpenXML opts (Strong lst) =
+  withTextProp (mknode "w:b" [] ()) $ inlinesToOpenXML opts lst
+inlineToOpenXML opts (Emph lst) =
+  withTextProp (mknode "w:i" [] ()) $ inlinesToOpenXML opts lst
+inlineToOpenXML opts (Subscript lst) =
+  withTextProp (mknode "w:vertAlign" [("w:val","subscript")] ())
+  $ inlinesToOpenXML opts lst
+inlineToOpenXML opts (Superscript lst) =
+  withTextProp (mknode "w:vertAlign" [("w:val","superscript")] ())
+  $ inlinesToOpenXML opts lst
+inlineToOpenXML opts (SmallCaps lst) =
+  withTextProp (mknode "w:smallCaps" [] ())
+  $ inlinesToOpenXML opts lst
+inlineToOpenXML opts (Strikeout lst) =
+  withTextProp (mknode "w:strike" [] ())
+  $ inlinesToOpenXML opts lst
+inlineToOpenXML _ LineBreak = return [ mknode "w:br" [] () ]
+inlineToOpenXML _ (RawInline f str)
+  | f == "openxml" = return [ x | Elem x <- parseXML str ]
+  | otherwise      = return []
+inlineToOpenXML opts (Quoted quoteType lst) =
+  inlinesToOpenXML opts $ [Str open] ++ lst ++ [Str close]
+    where (open, close) = case quoteType of
+                            SingleQuote -> ("\x2018", "\x2019")
+                            DoubleQuote -> ("\x201C", "\x201D")
+inlineToOpenXML opts (Math InlineMath str) =
+  case texMathToOMML DisplayInline str of
+        Right r -> return [r]
+        Left  _ -> inlinesToOpenXML opts (readTeXMath str)
+inlineToOpenXML opts (Math DisplayMath str) =
+  case texMathToOMML DisplayBlock str of
+        Right r -> return [br, r, br]
+        Left  _ -> do
+            fallback <- inlinesToOpenXML opts (readTeXMath str)
+            return $ [br] ++ fallback ++ [br]
+    where br = mknode "w:br" [] ()
+inlineToOpenXML opts (Cite _ lst) = inlinesToOpenXML opts lst
+inlineToOpenXML _ (Code attrs str) =
+  withTextProp (rStyle "VerbatimChar")
+  $ case highlight formatOpenXML attrs str of
+         Nothing  -> intercalate [mknode "w:br" [] ()]
+                     `fmap` (mapM formattedString $ lines str)
+         Just h   -> return h
+     where formatOpenXML _fmtOpts = intercalate [mknode "w:br" [] ()] .
+                                    map (map toHlTok)
+           toHlTok (toktype,tok) = mknode "w:r" []
+                                     [ mknode "w:rPr" []
+                                       [ rStyle $ show toktype ]
+                                     , mknode "w:t" [("xml:space","preserve")] tok ]
+inlineToOpenXML opts (Note bs) = do
+  notes <- gets stFootnotes
+  let notenum = length notes + 1
+  let notemarker = mknode "w:r" []
+                   [ mknode "w:rPr" [] (rStyle "FootnoteReference")
+                   , mknode "w:footnoteRef" [] () ]
+  let notemarkerXml = RawInline "openxml" $ ppElement notemarker
+  let insertNoteRef (Plain ils : xs) = Plain (notemarkerXml : ils) : xs
+      insertNoteRef (Para ils  : xs) = Para  (notemarkerXml : ils) : xs
+      insertNoteRef xs               = Para [notemarkerXml] : xs
+  oldListLevel <- gets stListLevel
+  oldParaProperties <- gets stParaProperties
+  oldTextProperties <- gets stTextProperties
+  modify $ \st -> st{ stListLevel = -1, stParaProperties = [], stTextProperties = [] }
+  contents <- withParaProp (pStyle "FootnoteText") $ blocksToOpenXML opts
+                $ insertNoteRef bs
+  modify $ \st -> st{ stListLevel = oldListLevel, stParaProperties = oldParaProperties,
+                      stTextProperties = oldTextProperties }
+  let newnote = mknode "w:footnote" [("w:id",show notenum)] $ contents
+  modify $ \s -> s{ stFootnotes = newnote : notes }
+  return [ mknode "w:r" []
+           [ mknode "w:rPr" [] (rStyle "FootnoteReference")
+           , mknode "w:footnoteReference" [("w:id", show notenum)] () ] ]
+-- internal link:
+inlineToOpenXML opts (Link txt ('#':xs,_)) = do
+  contents <- withTextProp (rStyle "Hyperlink") $ inlinesToOpenXML opts txt
+  return [ mknode "w:hyperlink" [("w:anchor",xs)] contents ]
+-- external link:
+inlineToOpenXML opts (Link txt (src,_)) = do
+  contents <- withTextProp (rStyle "Hyperlink") $ inlinesToOpenXML opts txt
+  extlinks <- gets stExternalLinks
+  ind <- case M.lookup src extlinks of
+            Just i   -> return i
+            Nothing  -> do
+              let i = "link" ++ show (M.size extlinks)
+              modify $ \st -> st{ stExternalLinks =
+                        M.insert src i extlinks }
+              return i
+  return [ mknode "w:hyperlink" [("r:id",ind)] contents ]
+inlineToOpenXML opts (Image alt (src, tit)) = do
+  exists <- liftIO $ doesFileExist src
+  if exists
+     then do
+       imgs <- gets stImages
+       (ident,size) <- case M.lookup src imgs of
+                            Just (i,img) -> return (i, imageSize img)
+                            Nothing -> do
+                              img <- liftIO $ B.readFile src
+                              let ident' = "image" ++ show (M.size imgs + 1)
+                              let size'  = imageSize img
+                              modify $ \st -> st{
+                                 stImages = M.insert src (ident',img) $ stImages st }
+                              return (ident',size')
+       let (xpt,ypt) = maybe (120,120) sizeInPoints size
+       -- 12700 emu = 1 pt
+       let (xemu,yemu) = (xpt * 12700, ypt * 12700)
+       let cNvPicPr = mknode "pic:cNvPicPr" [] $
+                        mknode "a:picLocks" [("noChangeArrowheads","1"),("noChangeAspect","1")] ()
+       let nvPicPr  = mknode "pic:nvPicPr" []
+                       [ mknode "pic:cNvPr"
+                           [("descr",src),("id","0"),("name","Picture")] ()
+                       , cNvPicPr ]
+       let blipFill = mknode "pic:blipFill" []
+                        [ mknode "a:blip" [("r:embed",ident)] ()
+                        , mknode "a:stretch" [] $ mknode "a:fillRect" [] () ]
+       let xfrm =    mknode "a:xfrm" []
+                       [ mknode "a:off" [("x","0"),("y","0")] ()
+                       , mknode "a:ext" [("cx",show xemu),("cy",show yemu)] () ]
+       let prstGeom = mknode "a:prstGeom" [("prst","rect")] $
+                        mknode "a:avLst" [] ()
+       let ln =      mknode "a:ln" [("w","9525")]
+                       [ mknode "a:noFill" [] ()
+                       , mknode "a:headEnd" [] ()
+                       , mknode "a:tailEnd" [] () ]
+       let spPr =    mknode "pic:spPr" [("bwMode","auto")]
+                       [xfrm, prstGeom, mknode "a:noFill" [] (), ln]
+       let graphic = mknode "a:graphic" [] $
+                       mknode "a:graphicData" [("uri","http://schemas.openxmlformats.org/drawingml/2006/picture")]
+                         [ mknode "pic:pic" []
+                           [ nvPicPr
+                           , blipFill
+                           , spPr ] ]
+       return [ mknode "w:r" [] $
+           mknode "w:drawing" [] $
+             mknode "wp:inline" []
+               [ mknode "wp:extent" [("cx",show xemu),("cy",show yemu)] ()
+               , mknode "wp:effectExtent" [("b","0"),("l","0"),("r","0"),("t","0")] ()
+               , mknode "wp:docPr" [("descr",tit),("id","1"),("name","Picture")] ()
+               , graphic ] ]
+     else do
+       liftIO $ UTF8.hPutStrLn stderr $
+          "Could not find image `" ++ src ++ "', skipping..."
+       inlinesToOpenXML opts alt
diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs
index 9fc393fed..67048348e 100644
--- a/src/Text/Pandoc/Writers/EPUB.hs
+++ b/src/Text/Pandoc/Writers/EPUB.hs
@@ -32,30 +32,31 @@ import Data.IORef
 import Data.Maybe ( fromMaybe, isNothing )
 import Data.List ( findIndices, isPrefixOf )
 import System.Environment ( getEnv )
-import System.FilePath ( (</>), takeBaseName, takeExtension )
+import System.FilePath ( (</>), (<.>), takeBaseName, takeExtension, takeFileName )
 import qualified Data.ByteString.Lazy as B
 import Data.ByteString.Lazy.UTF8 ( fromString )
 import Codec.Archive.Zip
-import System.Time
+import Data.Time.Clock.POSIX
 import Text.Pandoc.Shared hiding ( Element )
 import Text.Pandoc.Definition
 import Text.Pandoc.Generic
-import Control.Monad (liftM)
+import Control.Monad.State
 import Text.XML.Light hiding (ppTopElement)
 import Text.Pandoc.UUID
 import Text.Pandoc.Writers.HTML
 import Text.Pandoc.Writers.Markdown ( writePlain )
 import Data.Char ( toLower )
-import System.Directory ( copyFile )
 import Network.URI ( unEscapeString )
+import Text.Pandoc.MIME (getMimeType)
 
 -- | Produce an EPUB file from a Pandoc document.
 writeEPUB :: Maybe String   -- ^ EPUB stylesheet specified at command line
+          -> [FilePath]     -- ^ Paths to fonts to embed
           -> WriterOptions  -- ^ Writer options
           -> Pandoc         -- ^ Document to convert
           -> IO B.ByteString
-writeEPUB mbStylesheet opts doc@(Pandoc meta _) = do
-  (TOD epochtime _) <- getClockTime
+writeEPUB mbStylesheet fonts opts doc@(Pandoc meta _) = do
+  epochtime <- floor `fmap` getPOSIXTime
   let mkEntry path content = toEntry path epochtime content
   let opts' = opts{ writerEmailObfuscation = NoObfuscation
                   , writerStandalone = True
@@ -64,17 +65,24 @@ writeEPUB mbStylesheet opts doc@(Pandoc meta _) = do
   let vars = writerVariables opts'
   let mbCoverImage = lookup "epub-cover-image" vars
 
+  titlePageTemplate <- readDataFile (writerUserDataDir opts)
+                       $ "templates" </> "epub-titlepage" <.> "html"
+
+  coverImageTemplate <- readDataFile (writerUserDataDir opts)
+                       $ "templates" </> "epub-coverimage" <.> "html"
+
+  pageTemplate <- readDataFile (writerUserDataDir opts)
+                       $ "templates" </> "epub-page" <.> "html"
+
   -- cover page
   (cpgEntry, cpicEntry) <-
                 case mbCoverImage of
                      Nothing   -> return ([],[])
                      Just img  -> do
                        let coverImage = "cover-image" ++ takeExtension img
-                       copyFile img coverImage
                        let cpContent = fromString $ writeHtmlString
-                             opts'{writerTemplate = pageTemplate
-                                  ,writerVariables =
-                                    ("coverimage",coverImage):vars}
+                             opts'{writerTemplate = coverImageTemplate,
+                                   writerVariables = ("coverimage",coverImage):vars}
                                (Pandoc meta [])
                        imgContent <- B.readFile img
                        return ( [mkEntry "cover.xhtml" cpContent]
@@ -82,37 +90,47 @@ writeEPUB mbStylesheet opts doc@(Pandoc meta _) = do
 
   -- title page
   let tpContent = fromString $ writeHtmlString
-                     opts'{writerTemplate = pageTemplate
-                          ,writerVariables = ("titlepage","yes"):vars}
+                     opts'{writerTemplate = titlePageTemplate}
                      (Pandoc meta [])
   let tpEntry = mkEntry "title_page.xhtml" tpContent
 
   -- handle pictures
   picsRef <- newIORef []
-  Pandoc _ blocks <- liftM (bottomUp transformBlock) $ bottomUpM
+  Pandoc _ blocks <- bottomUpM
        (transformInlines (writerHTMLMathMethod opts) sourceDir picsRef) doc
   pics <- readIORef picsRef
   let readPicEntry (oldsrc, newsrc) = readEntry [] oldsrc >>= \e ->
                                           return e{ eRelativePath = newsrc }
   picEntries <- mapM readPicEntry pics
 
+  -- handle fonts
+  let mkFontEntry f = mkEntry (takeFileName f) `fmap` B.readFile f
+  fontEntries <- mapM mkFontEntry fonts
+
   -- body pages
   let isH1 (Header 1 _) = True
       isH1 _            = False
-  let h1Indices = dropWhile (== 0) $ findIndices isH1 blocks
-  let chunks = splitByIndices h1Indices blocks
+  -- internal reference IDs change when we chunk the file,
+  -- so the next two lines fix that:
+  let reftable = correlateRefs blocks
+  let blocks' = replaceRefs reftable blocks
+  let h1Indices = dropWhile (== 0) $ findIndices isH1 blocks'
+  let chunks = splitByIndices h1Indices blocks'
   let titleize (Header 1 xs : ys) = Pandoc meta{docTitle = xs} ys
       titleize xs                 = Pandoc meta xs
-  let chapToHtml = writeHtmlString opts'{ writerTemplate = pageTemplate }
   let chapters = map titleize chunks
+  let chapToHtml = writeHtmlString opts'{ writerTemplate = pageTemplate }
   let chapterToEntry :: Int -> Pandoc -> Entry
       chapterToEntry num chap = mkEntry ("ch" ++ show num ++ ".xhtml") $
                                    fromString $ chapToHtml chap
   let chapterEntries = zipWith chapterToEntry [1..] chapters
 
   -- contents.opf
-  lang <- catch (liftM (takeWhile (/='.')) $ getEnv "lang")
-                (\_ -> return "en-US")
+  localeLang <- catch (liftM (takeWhile (/='.')) $ getEnv "LANG")
+                    (\_ -> return "en-US")
+  let lang = case lookup "lang" (writerVariables opts') of
+                     Just x  -> x
+                     Nothing -> localeLang
   uuid <- getRandomUUID
   let chapterNode ent = unode "item" !
                            [("id", takeBaseName $ eRelativePath ent),
@@ -125,17 +143,22 @@ writeEPUB mbStylesheet opts doc@(Pandoc meta _) = do
                             ("href", eRelativePath ent),
                             ("media-type", fromMaybe "application/octet-stream"
                                $ imageTypeOf $ eRelativePath ent)] $ ()
+  let fontNode ent = unode "item" !
+                           [("id", takeBaseName $ eRelativePath ent),
+                            ("href", eRelativePath ent),
+                            ("media-type", maybe "" id $ getMimeType $ eRelativePath ent)] $ ()
   let plainify t = removeTrailingSpace $
                     writePlain opts'{ writerStandalone = False } $
                     Pandoc meta [Plain t]
   let plainTitle = plainify $ docTitle meta
   let plainAuthors = map plainify $ docAuthors meta
+  let plainDate = maybe "" id $ normalizeDate $ stringify $ docDate meta
   let contentsData = fromString $ ppTopElement $
         unode "package" ! [("version","2.0")
                           ,("xmlns","http://www.idpf.org/2007/opf")
                           ,("unique-identifier","BookId")] $
           [ metadataElement (writerEPUBMetadata opts')
-              uuid lang plainTitle plainAuthors mbCoverImage
+              uuid lang plainTitle plainAuthors plainDate mbCoverImage
           , unode "manifest" $
              [ unode "item" ! [("id","ncx"), ("href","toc.ncx")
                               ,("media-type","application/x-dtbncx+xml")] $ ()
@@ -143,7 +166,8 @@ writeEPUB mbStylesheet opts doc@(Pandoc meta _) = do
                               ,("media-type","text/css")] $ ()
              ] ++
              map chapterNode (cpgEntry ++ (tpEntry : chapterEntries)) ++
-             map pictureNode (cpicEntry ++ picEntries)
+             map pictureNode (cpicEntry ++ picEntries) ++
+             map fontNode fontEntries
           , unode "spine" ! [("toc","ncx")] $
               case mbCoverImage of
                     Nothing -> []
@@ -197,6 +221,13 @@ writeEPUB mbStylesheet opts doc@(Pandoc meta _) = do
                ,("media-type","application/oebps-package+xml")] $ ()
   let containerEntry = mkEntry "META-INF/container.xml" containerData
 
+  -- com.apple.ibooks.display-options.xml
+  let apple = fromString $ ppTopElement $
+        unode "display_options" $
+          unode "platform" ! [("name","*")] $
+            unode "option" ! [("name","specified-fonts")] $ "true"
+  let appleEntry = mkEntry "META-INF/com.apple.ibooks.display-options.xml" apple
+
   -- stylesheet
   stylesheet <- case mbStylesheet of
                    Just s  -> return s
@@ -205,13 +236,13 @@ writeEPUB mbStylesheet opts doc@(Pandoc meta _) = do
 
   -- construct archive
   let archive = foldr addEntryToArchive emptyArchive
-                 (mimetypeEntry : containerEntry : stylesheetEntry : tpEntry :
+                 (mimetypeEntry : containerEntry : appleEntry : stylesheetEntry : tpEntry :
                   contentsEntry : tocEntry :
-                  (picEntries ++ cpicEntry ++ cpgEntry ++ chapterEntries) )
+                  (picEntries ++ cpicEntry ++ cpgEntry ++ chapterEntries ++ fontEntries) )
   return $ fromArchive archive
 
-metadataElement :: String -> UUID -> String -> String -> [String] -> Maybe a -> Element
-metadataElement metadataXML uuid lang title authors mbCoverImage =
+metadataElement :: String -> UUID -> String -> String -> [String] -> String -> Maybe a -> Element
+metadataElement metadataXML uuid lang title authors date mbCoverImage =
   let userNodes = parseXML metadataXML
       elt = unode "metadata" ! [("xmlns:dc","http://purl.org/dc/elements/1.1/")
                                ,("xmlns:opf","http://www.idpf.org/2007/opf")] $
@@ -227,6 +258,7 @@ metadataElement metadataXML uuid lang title authors mbCoverImage =
            [ unode "dc:identifier" ! [("id","BookId")] $ show uuid |
                not (elt `contains` "identifier") ] ++
            [ unode "dc:creator" ! [("opf:role","aut")] $ a | a <- authors ] ++
+           [ unode "dc:date" date | not (elt `contains` "date") ] ++
            [ unode "meta" ! [("name","cover"), ("content","cover-image")] $ () |
                not (isNothing mbCoverImage) ]
   in  elt{ elContent = elContent elt ++ map Elem newNodes }
@@ -263,20 +295,24 @@ transformInlines (MathML _) _ _ (x@(Math _ _) : xs) = do
        "</ops:switch>"
       result = if "<math" `isPrefixOf` mathml then inOps else mathml
   return $ RawInline "html" result : xs
-transformInlines _ _ _ (RawInline _ _ : xs) = return $ Str "" : xs
-transformInlines _ _ _ (Link lab (_,_) : xs) = return $ lab ++ xs
 transformInlines _ _ _ xs = return xs
 
-transformBlock :: Block -> Block
-transformBlock (RawBlock _ _) = Null
-transformBlock x = x
-
 (!) :: Node t => (t -> Element) -> [(String, String)] -> t -> Element
 (!) f attrs n = add_attrs (map (\(k,v) -> Attr (unqual k) v) attrs) (f n)
 
 -- | Version of 'ppTopElement' that specifies UTF-8 encoding.
 ppTopElement :: Element -> String
-ppTopElement = ("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" ++) . ppElement
+ppTopElement = ("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" ++) . unEntity . ppElement
+  -- unEntity removes numeric  entities introduced by ppElement
+  -- (kindlegen seems to choke on these).
+  where unEntity [] = ""
+        unEntity ('&':'#':xs) =
+                   let (ds,ys) = break (==';') xs
+                       rest = drop 1 ys
+                   in  case reads ('\'':'\\':ds ++ "'") of
+                          ((x,_):_) -> x : unEntity rest
+                          _         -> '&':'#':unEntity xs
+        unEntity (x:xs) = x : unEntity xs
 
 imageTypeOf :: FilePath -> Maybe String
 imageTypeOf x = case drop 1 (map toLower (takeExtension x)) of
@@ -288,38 +324,49 @@ imageTypeOf x = case drop 1 (map toLower (takeExtension x)) of
                      "svg"       -> Just "image/svg+xml"
                      _           -> Nothing
 
-pageTemplate :: String
-pageTemplate = unlines
- [ "<?xml version=\"1.0\" encoding=\"UTF-8\"?>"
- , "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.1//EN\" \"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd\">"
- , "<html xmlns=\"http://www.w3.org/1999/xhtml\">"
- , "<head>"
- , "<title>$title$</title>"
- , "$if(coverimage)$"
- , "<style type=\"text/css\">img{ max-width: 100%; }</style>"
- , "$endif$"
- , "<link href=\"stylesheet.css\" type=\"text/css\" rel=\"stylesheet\" />"
- , "</head>"
- , "<body>"
- , "$if(coverimage)$"
- , "<div id=\"cover-image\">"
- , "<img src=\"$coverimage$\" alt=\"$title$\" />"
- , "</div>"
- , "$else$"
- , "$if(titlepage)$"
- , "<h1 class=\"title\">$title$</h1>"
- , "$for(author)$"
- , "<h2 class=\"author\">$author$</h2>"
- , "$endfor$"
- , "$else$"
- , "<h1>$title$</h1>"
- , "$if(toc)$"
- , "$toc$"
- , "$endif$"
- , "$endif$"
- , "$body$"
- , "$endif$"
- , "</body>"
- , "</html>"
- ]
 
+data IdentState = IdentState{
+       chapterNumber :: Int,
+       runningIdents :: [String],
+       chapterIdents :: [String],
+       identTable    :: [(String,String)]
+       } deriving (Read, Show)
+
+-- Go through a block list and construct a table
+-- correlating the automatically constructed references
+-- that would be used in a normal pandoc document with
+-- new URLs to be used in the EPUB.  For example, what
+-- was "header-1" might turn into "ch6.xhtml#header".
+correlateRefs :: [Block] -> [(String,String)]
+correlateRefs bs = identTable $ execState (mapM_ go bs)
+                                IdentState{ chapterNumber = 0
+                                          , runningIdents = []
+                                          , chapterIdents = []
+                                          , identTable = [] }
+ where go :: Block -> State IdentState ()
+       go (Header n ils) = do
+          when (n == 1) $
+              modify $ \s -> s{ chapterNumber = chapterNumber s + 1
+                              , chapterIdents = [] }
+          st <- get
+          let runningid = uniqueIdent ils (runningIdents st)
+          let chapid    = if n == 1
+                             then Nothing
+                             else Just $ uniqueIdent ils (chapterIdents st)
+          modify $ \s -> s{ runningIdents = runningid : runningIdents st
+                          , chapterIdents = maybe (chapterIdents st)
+                                              (: chapterIdents st) chapid
+                          , identTable = (runningid, "ch" ++ show (chapterNumber st) ++
+                              ".xhtml" ++ maybe "" ('#':) chapid) : identTable st
+                           }
+       go _ = return ()
+
+-- Replace internal link references using the table produced
+-- by correlateRefs.
+replaceRefs :: [(String,String)] -> [Block] -> [Block]
+replaceRefs refTable = bottomUp replaceOneRef
+  where replaceOneRef x@(Link lab ('#':xs,tit)) =
+          case lookup xs refTable of
+                Just url -> Link lab (url,tit)
+                Nothing  -> x
+        replaceOneRef x = x
diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs
index 573adbf4a..f35b29370 100644
--- a/src/Text/Pandoc/Writers/HTML.hs
+++ b/src/Text/Pandoc/Writers/HTML.hs
@@ -1,3 +1,4 @@
+{-# LANGUAGE OverloadedStrings #-}
 {-# OPTIONS_GHC -fno-warn-deprecations #-}
 {-
 Copyright (C) 2006-2010 John MacFarlane <jgm@berkeley.edu>
@@ -30,168 +31,186 @@ Conversion of 'Pandoc' documents to HTML.
 -}
 module Text.Pandoc.Writers.HTML ( writeHtml , writeHtmlString ) where
 import Text.Pandoc.Definition
-import Text.Pandoc.CharacterReferences ( decodeCharacterReferences )
 import Text.Pandoc.Shared
 import Text.Pandoc.Templates
+import Text.Pandoc.Generic
 import Text.Pandoc.Readers.TeXMath
-import Text.Pandoc.Highlighting ( highlightHtml, defaultHighlightingCss )
-import Text.Pandoc.XML (stripTags, escapeStringForXML)
+import Text.Pandoc.Slides
+import Text.Pandoc.Highlighting ( highlight, styleToCss,
+                                  formatHtmlInline, formatHtmlBlock )
+import Text.Pandoc.XML (stripTags, escapeStringForXML, fromEntities)
 import Network.HTTP ( urlEncode )
 import Numeric ( showHex )
 import Data.Char ( ord, toLower )
 import Data.List ( isPrefixOf, intersperse )
+import Data.String ( fromString )
 import Data.Maybe ( catMaybes )
 import Control.Monad.State
-import Text.XHtml.Transitional hiding ( stringToHtml, unordList, ordList )
-import qualified Text.XHtml.Transitional as XHtml
+import Text.Blaze
+import qualified Text.Blaze.Html5 as H5
+import qualified Text.Blaze.XHtml1.Transitional as H
+import qualified Text.Blaze.XHtml1.Transitional.Attributes as A
+import Text.Blaze.Renderer.String (renderHtml)
 import Text.TeXMath
 import Text.XML.Light.Output
 import System.FilePath (takeExtension)
+import Data.Monoid (mempty, mconcat)
 
 data WriterState = WriterState
     { stNotes            :: [Html]  -- ^ List of notes
     , stMath             :: Bool    -- ^ Math is used in document
     , stHighlighting     :: Bool    -- ^ Syntax highlighting is used
     , stSecNum           :: [Int]   -- ^ Number of current section
-    } deriving Show
+    }
 
 defaultWriterState :: WriterState
 defaultWriterState = WriterState {stNotes= [], stMath = False, stHighlighting = False, stSecNum = []}
 
 -- Helpers to render HTML with the appropriate function.
 
--- | Modified version of Text.XHtml's stringToHtml.
--- Use unicode characters wherever possible.
-stringToHtml :: WriterOptions -> String -> Html
-stringToHtml opts = if writerAscii opts
-                       then XHtml.stringToHtml
-                       else primHtml . escapeStringForXML
+strToHtml :: String -> Html
+strToHtml = preEscapedString . escapeStringForXML
+-- strToHtml = toHtml
 
 -- | Hard linebreak.
 nl :: WriterOptions -> Html
 nl opts = if writerWrapText opts
-             then primHtml "\n"
-             else noHtml
+             then preEscapedString "\n"
+             else mempty
 
 -- | Convert Pandoc document to Html string.
 writeHtmlString :: WriterOptions -> Pandoc -> String
 writeHtmlString opts d =
-  let (tit, auths, date, toc, body', newvars) = evalState (pandocToHtml opts d)
-                                                 defaultWriterState
+  let (tit, auths, authsMeta, date, toc, body', newvars) = evalState (pandocToHtml opts d)
+                                                             defaultWriterState
   in  if writerStandalone opts
-         then inTemplate opts tit auths date toc body' newvars
-         else dropWhile (=='\n') $ showHtmlFragment body'
+         then inTemplate opts tit auths authsMeta date toc body' newvars
+         else renderHtml body'
 
 -- | Convert Pandoc document to Html structure.
 writeHtml :: WriterOptions -> Pandoc -> Html
 writeHtml opts d =
-  let (tit, auths, date, toc, body', newvars) = evalState (pandocToHtml opts d)
-                                                 defaultWriterState
+  let (tit, auths, authsMeta, date, toc, body', newvars) = evalState (pandocToHtml opts d)
+                                                            defaultWriterState
   in  if writerStandalone opts
-         then inTemplate opts tit auths date toc body' newvars
+         then inTemplate opts tit auths authsMeta date toc body' newvars
          else body'
 
 -- result is (title, authors, date, toc, body, new variables)
 pandocToHtml :: WriterOptions
              -> Pandoc
-             -> State WriterState (Html, [Html], Html, Maybe Html, Html, [(String,String)])
+             -> State WriterState (Html, [Html], [Html], Html, Maybe Html, Html, [(String,String)])
 pandocToHtml opts (Pandoc (Meta title' authors' date') blocks) = do
   let standalone = writerStandalone opts
   tit <- if standalone
             then inlineListToHtml opts title'
-            else return noHtml
+            else return mempty
   auths <- if standalone
               then mapM (inlineListToHtml opts) authors'
               else return []
+  authsMeta <- if standalone
+                  then mapM (inlineListToHtml opts . prepForMeta) authors'
+                  else return []
   date <- if standalone
              then inlineListToHtml opts date'
-             else return noHtml
+             else return mempty
+  let slideLevel = maybe (getSlideLevel blocks) id $ writerSlideLevel opts
   let sects = hierarchicalize $
               if writerSlideVariant opts == NoSlides
                  then blocks
-                 else case blocks of
-                           (Header 1 _ : _) -> blocks
-                           _                ->
-                                let isL1 (Header 1 _) = True
-                                    isL1 _            = False
-                                    (preBlocks, rest) = break isL1 blocks
-                                in  (RawBlock "html" "<div class=\"slide\">" :
-                                    preBlocks) ++ (RawBlock "html" "</div>" :
-                                    rest)
+                 else prepSlides slideLevel blocks
   toc <- if writerTableOfContents opts
             then tableOfContents opts sects
             else return Nothing
-  blocks' <- liftM (toHtmlFromList . intersperse (nl opts)) $
-                 mapM (elementToHtml opts) sects
+  blocks' <- liftM (mconcat . intersperse (nl opts)) $
+                 mapM (elementToHtml slideLevel opts) sects
   st <- get
   let notes = reverse (stNotes st)
-  let thebody = blocks' +++ footnoteSection opts notes
+  let thebody = blocks' >> footnoteSection opts notes
   let  math = if stMath st
                 then case writerHTMLMathMethod opts of
                            LaTeXMathML (Just url) ->
-                              script !
-                              [src url, thetype "text/javascript"] $ noHtml
+                              H.script ! A.src (toValue url)
+                                       ! A.type_ "text/javascript"
+                                       $ mempty
                            MathML (Just url) ->
-                              script !
-                              [src url, thetype "text/javascript"] $ noHtml
+                              H.script ! A.src (toValue url)
+                                       ! A.type_ "text/javascript"
+                                       $ mempty
                            MathJax url ->
-                              script ! [src url, thetype "text/javascript"] $ noHtml
+                              H.script ! A.src (toValue url)
+                                       ! A.type_ "text/javascript"
+                                       $ mempty
                            JsMath (Just url) ->
-                              script !
-                              [src url, thetype "text/javascript"] $ noHtml
+                              H.script ! A.src (toValue url)
+                                       ! A.type_ "text/javascript"
+                                       $ mempty
                            _ -> case lookup "mathml-script" (writerVariables opts) of
-                                      Just s ->
-                                        script ! [thetype "text/javascript"] <<
-                                           primHtml ("/*<![CDATA[*/\n" ++ s ++
-                                                     "/*]]>*/\n")
-                                      Nothing -> noHtml
-                else noHtml
-  let newvars = [("highlighting-css", defaultHighlightingCss) |
+                                      Just s | not (writerHtml5 opts) ->
+                                        H.script ! A.type_ "text/javascript"
+                                           $ preEscapedString
+                                            ("/*<![CDATA[*/\n" ++ s ++ "/*]]>*/\n")
+                                             | otherwise -> mempty
+                                      Nothing -> mempty
+                else mempty
+  let newvars = [("highlighting-css",
+                   styleToCss $ writerHighlightStyle opts) |
                    stHighlighting st] ++
-                [("math", showHtmlFragment math) | stMath st]
-  return (tit, auths, date, toc, thebody, newvars)
+                [("math", renderHtml math) | stMath st]
+  return (tit, auths, authsMeta, date, toc, thebody, newvars)
+
+-- | Prepare author for meta tag, converting notes into
+-- bracketed text and removing links.
+prepForMeta :: [Inline] -> [Inline]
+prepForMeta = bottomUp (concatMap fixInline)
+  where fixInline (Note [Para xs])  = [Str " ["] ++ xs ++ [Str "]"]
+        fixInline (Note [Plain xs]) = [Str " ["] ++ xs ++ [Str "]"]
+        fixInline (Link lab _)      = lab
+        fixInline (Image lab _)     = lab
+        fixInline x                 = [x]
 
 inTemplate :: TemplateTarget a
            => WriterOptions
            -> Html
            -> [Html]
+           -> [Html]
            -> Html
            -> Maybe Html
            -> Html
            -> [(String,String)]
            -> a
-inTemplate opts tit auths date toc body' newvars =
-  let renderedTit = showHtmlFragment tit
-      topTitle'   = stripTags renderedTit
-      authors     = map (stripTags . showHtmlFragment) auths
-      date'       = stripTags $ showHtmlFragment date
+inTemplate opts tit auths authsMeta date toc body' newvars =
+  let title'      = renderHtml tit
+      date'       = renderHtml date
+      dateMeta    = maybe [] (\x -> [("date-meta",x)]) $ normalizeDate date'
       variables   = writerVariables opts ++ newvars
-      context     = variables ++
-                    [ ("body", dropWhile (=='\n') $ showHtmlFragment body')
-                    , ("pagetitle", topTitle')
-                    , ("title", dropWhile (=='\n') $ showHtmlFragment tit)
+      context     = variables ++ dateMeta ++
+                    [ ("body", dropWhile (=='\n') $ renderHtml body')
+                    , ("pagetitle", stripTags title')
+                    , ("title", title')
                     , ("date", date')
                     , ("idprefix", writerIdentifierPrefix opts)
                     , ("slidy-url", "http://www.w3.org/Talks/Tools/Slidy2")
-                    , ("s5-url", "ui/default") ] ++
+                    , ("s5-url", "s5/default") ] ++
                     [ ("html5","true") | writerHtml5 opts ] ++
                     (case toc of
-                         Just t  -> [ ("toc", showHtmlFragment t)]
+                         Just t  -> [ ("toc", renderHtml t)]
                          Nothing -> [])  ++
-                    [ ("author", a) | a <- authors ]
+                    [ ("author", renderHtml a) | a <- auths ] ++
+                    [ ("author-meta", stripTags $ renderHtml a) | a <- authsMeta ]
   in  renderTemplate context $ writerTemplate opts
 
 -- | Like Text.XHtml's identifier, but adds the writerIdentifierPrefix
-prefixedId :: WriterOptions -> String -> HtmlAttr
-prefixedId opts s = identifier $ writerIdentifierPrefix opts ++ s
+prefixedId :: WriterOptions -> String -> Attribute
+prefixedId opts s = A.id $ toValue $ writerIdentifierPrefix opts ++ s
 
 -- | Replacement for Text.XHtml's unordList.
 unordList :: WriterOptions -> ([Html] -> Html)
-unordList opts items = ulist << toListItems opts items
+unordList opts items = H.ul $ mconcat $ toListItems opts items
 
 -- | Replacement for Text.XHtml's ordList.
 ordList :: WriterOptions -> ([Html] -> Html)
-ordList opts items = olist << toListItems opts items
+ordList opts items = H.ol $ mconcat $ toListItems opts items
 
 -- | Construct table of contents from list of elements.
 tableOfContents :: WriterOptions -> [Element] -> State WriterState (Maybe Html)
@@ -214,52 +233,66 @@ elementToListItem :: WriterOptions -> Element -> State WriterState (Maybe Html)
 elementToListItem _ (Blk _) = return Nothing
 elementToListItem opts (Sec _ num id' headerText subsecs) = do
   let sectnum = if writerNumberSections opts
-                   then (thespan ! [theclass "toc-section-number"] << showSecNum num) +++
-                         stringToHtml opts" "
-                   else noHtml
-  txt <- liftM (sectnum +++) $ inlineListToHtml opts headerText
+                   then (H.span ! A.class_ "toc-section-number" $ toHtml $ showSecNum num) >>
+                     preEscapedString " "
+                   else mempty
+  txt <- liftM (sectnum >>) $ inlineListToHtml opts headerText
   subHeads <- mapM (elementToListItem opts) subsecs >>= return . catMaybes
   let subList = if null subHeads
-                   then noHtml
+                   then mempty
                    else unordList opts subHeads
-  return $ Just $ (anchor ! [href ("#" ++ writerIdentifierPrefix opts ++ id')] $ txt) +++ subList
+  return $ Just $ (H.a ! A.href (toValue $ "#" ++ writerIdentifierPrefix opts ++ id')
+                       $ toHtml txt) >> subList
 
 -- | Convert an Element to Html.
-elementToHtml :: WriterOptions -> Element -> State WriterState Html
-elementToHtml opts (Blk HorizontalRule) | writerSlideVariant opts /= NoSlides =
-  return $ primHtml "</div>" +++ nl opts +++ primHtml "<div class=\"slide\">"
-elementToHtml opts (Blk block) = blockToHtml opts block
-elementToHtml opts (Sec level num id' title' elements) = do
+elementToHtml :: Int -> WriterOptions -> Element -> State WriterState Html
+elementToHtml _slideLevel opts (Blk block) = blockToHtml opts block
+elementToHtml slideLevel opts (Sec level num id' title' elements) = do
+  let slide = writerSlideVariant opts /= NoSlides && level <= slideLevel
   modify $ \st -> st{stSecNum = num}  -- update section number
-  header' <- blockToHtml opts (Header level title')
-  innerContents <- mapM (elementToHtml opts) elements
-  let header'' = header' !  [prefixedId opts id' |
-                             not (writerStrictMarkdown opts ||
-                                  writerSectionDivs opts ||
-                                  writerSlideVariant opts == S5Slides)]
-  let stuff = header'' : innerContents
-  let slide = writerSlideVariant opts /= NoSlides && level == 1
-  let stuff' =  if slide
-                   then [thediv ! [theclass "slide"] <<
-                          (nl opts : intersperse (nl opts) stuff ++ [nl opts])]
-                   else intersperse (nl opts) stuff
-  let inNl x = nl opts : x ++ [nl opts]
-  return $ if writerSectionDivs opts
-              then if writerHtml5 opts
-                      then tag "section" ! [prefixedId opts id'] << inNl stuff'
-                      else thediv ! [prefixedId opts id'] << inNl stuff'
-              else toHtmlFromList stuff'
+  -- always use level 1 for slide titles
+  let level' = if slide then 1 else level
+  let titleSlide = slide && level < slideLevel
+  header' <- blockToHtml opts (Header level' title')
+  let isSec (Sec _ _ _ _ _) = True
+      isSec (Blk _)         = False
+  innerContents <- mapM (elementToHtml slideLevel opts)
+                   $ if titleSlide
+                        -- title slides have no content of their own
+                        then filter isSec elements
+                        else elements
+  let header'' = if (writerStrictMarkdown opts ||
+                     writerSectionDivs opts ||
+                     writerSlideVariant opts == S5Slides)
+                    then header'
+                    else header' ! prefixedId opts id'
+  let inNl x = mconcat $ nl opts : intersperse (nl opts) x ++ [nl opts]
+  let classes = ["titleslide" | titleSlide] ++ ["slide" | slide] ++
+                  ["level" ++ show level]
+  let secttag  = if writerHtml5 opts
+                    then H5.section ! A.class_ (toValue $ unwords classes)
+                    else H.div ! A.class_ (toValue $ unwords ("section":classes))
+  return $ if titleSlide
+              then mconcat $ (secttag ! prefixedId opts id' $ header'') : innerContents
+              else if writerSectionDivs opts || slide
+                   then secttag ! prefixedId opts id' $ inNl $ header'' : innerContents
+                   else mconcat $ intersperse (nl opts) $ header'' : innerContents
 
 -- | Convert list of Note blocks to a footnote <div>.
 -- Assumes notes are sorted.
 footnoteSection :: WriterOptions -> [Html] -> Html
 footnoteSection opts notes =
   if null notes
-     then noHtml
-     else nl opts +++ (thediv ! [theclass "footnotes"]
-          $ nl opts +++ hr +++ nl opts +++
-            (olist << (notes ++ [nl opts])) +++ nl opts)
-
+     then mempty
+     else nl opts >> (container
+          $ nl opts >> hrtag >> nl opts >>
+            H.ol (mconcat notes >> nl opts) >> nl opts)
+   where container x = if writerHtml5 opts
+                          then H5.section ! A.class_ "footnotes" $ x
+                          else if writerSlideVariant opts /= NoSlides
+                               then H.div ! A.class_ "footnotes slide" $ x
+                               else H.div ! A.class_ "footnotes" $ x
+         hrtag = if writerHtml5 opts then H5.hr else H.hr
 
 -- | Parse a mailto link; return Just (name, domain) or Nothing.
 parseMailto :: String -> Maybe (String, String)
@@ -272,7 +305,7 @@ parseMailto _ = Nothing
 -- | Obfuscate a "mailto:" link.
 obfuscateLink :: WriterOptions -> String -> String -> Html
 obfuscateLink opts txt s | writerEmailObfuscation opts == NoObfuscation =
-  anchor ! [href s] << txt
+  H.a ! A.href (toValue s) $ toHtml txt
 obfuscateLink opts txt s =
   let meth = writerEmailObfuscation opts
       s' = map toLower s
@@ -287,19 +320,19 @@ obfuscateLink opts txt s =
                           domain' ++ ")")
           in  case meth of
                 ReferenceObfuscation ->
-                     -- need to use primHtml or &'s are escaped to &amp; in URL
-                     primHtml $ "<a href=\"" ++ (obfuscateString s')
+                     -- need to use preEscapedString or &'s are escaped to &amp; in URL
+                     preEscapedString $ "<a href=\"" ++ (obfuscateString s')
                      ++ "\">" ++ (obfuscateString txt) ++ "</a>"
                 JavascriptObfuscation ->
-                     (script ! [thetype "text/javascript"] $
-                     primHtml ("\n<!--\nh='" ++
+                     (H.script ! A.type_ "text/javascript" $
+                     preEscapedString ("\n<!--\nh='" ++
                      obfuscateString domain ++ "';a='" ++ at' ++ "';n='" ++
                      obfuscateString name' ++ "';e=n+a+h;\n" ++
                      "document.write('<a h'+'ref'+'=\"ma'+'ilto'+':'+e+'\">'+" ++
-                     linkText  ++ "+'<\\/'+'a'+'>');\n// -->\n")) +++
-                     noscript (primHtml $ obfuscateString altText)
+                     linkText  ++ "+'<\\/'+'a'+'>');\n// -->\n")) >>
+                     H.noscript (preEscapedString $ obfuscateString altText)
                 _ -> error $ "Unknown obfuscation method: " ++ show meth
-        _ -> anchor ! [href s] $ stringToHtml opts txt  -- malformed email
+        _ -> H.a ! A.href (toValue s) $ toHtml txt  -- malformed email
 
 -- | Obfuscate character as entity.
 obfuscateChar :: Char -> String
@@ -310,13 +343,13 @@ obfuscateChar char =
 
 -- | Obfuscate string using entities.
 obfuscateString :: String -> String
-obfuscateString = concatMap obfuscateChar . decodeCharacterReferences
+obfuscateString = concatMap obfuscateChar . fromEntities
 
-attrsToHtml :: WriterOptions -> Attr -> [HtmlAttr]
+attrsToHtml :: WriterOptions -> Attr -> [Attribute]
 attrsToHtml opts (id',classes',keyvals) =
-  [theclass (unwords classes') | not (null classes')] ++
+  [A.class_ (toValue $ unwords classes') | not (null classes')] ++
   [prefixedId opts id' | not (null id')] ++
-  map (\(x,y) -> strAttr x y) keyvals
+  map (\(x,y) -> customAttribute (fromString x) (toValue y)) keyvals
 
 imageExts :: [String]
 imageExts = [ "art", "bmp", "cdr", "cdt", "cpt", "cr2", "crw", "djvu", "erf",
@@ -331,40 +364,41 @@ treatAsImage fp =
 
 -- | Convert Pandoc block element to HTML.
 blockToHtml :: WriterOptions -> Block -> State WriterState Html
-blockToHtml _ Null = return noHtml
+blockToHtml _ Null = return mempty
 blockToHtml opts (Plain lst) = inlineListToHtml opts lst
 blockToHtml opts (Para [Image txt (s,tit)]) = do
   img <- inlineToHtml opts (Image txt (s,tit))
   capt <- inlineListToHtml opts txt
   return $ if writerHtml5 opts
-              then tag "figure" <<
-                    [nl opts, img, tag "figcaption" << capt, nl opts]
-              else thediv ! [theclass "figure"] <<
-                    [nl opts, img, paragraph ! [theclass "caption"] << capt,
+              then H5.figure $ mconcat
+                    [nl opts, img, H5.figcaption capt, nl opts]
+              else H.div ! A.class_ "figure" $ mconcat
+                    [nl opts, img, H.p ! A.class_ "caption" $ capt,
                     nl opts]
 blockToHtml opts (Para lst) = do
   contents <- inlineListToHtml opts lst
-  return $ paragraph contents
-blockToHtml _ (RawBlock "html" str) = return $ primHtml str
-blockToHtml _ (RawBlock _ _) = return noHtml
-blockToHtml _ (HorizontalRule) = return hr
+  return $ H.p contents
+blockToHtml _ (RawBlock "html" str) = return $ preEscapedString str
+blockToHtml _ (RawBlock _ _) = return mempty
+blockToHtml opts (HorizontalRule) = return $ if writerHtml5 opts then H5.hr else H.hr
 blockToHtml opts (CodeBlock (id',classes,keyvals) rawCode) = do
-  let classes' = if writerLiterateHaskell opts
-                    then classes
+  let tolhs = writerLiterateHaskell opts &&
+                any (\c -> map toLower c == "haskell") classes &&
+                any (\c -> map toLower c == "literate") classes
+      classes' = if tolhs
+                    then map (\c -> if map toLower c == "haskell"
+                                       then "literatehaskell"
+                                       else c) classes
                     else filter (/= "literate") classes
-  case highlightHtml False (id',classes',keyvals) rawCode of
-         Left _  -> -- change leading newlines into <br /> tags, because some
-                    -- browsers ignore leading newlines in pre blocks
-                    let (leadingBreaks, rawCode') = span (=='\n') rawCode
-                        attrs = attrsToHtml opts (id', classes', keyvals)
-                        addBird = if "literate" `elem` classes'
-                                     then unlines . map ("> " ++) . lines
-                                     else unlines . lines
-                    in  return $ pre ! attrs $ thecode <<
-                                 (replicate (length leadingBreaks) br +++
-                                 [stringToHtml opts $ addBird rawCode'])
-         Right h -> modify (\st -> st{ stHighlighting = True }) >>
-                    return h
+      adjCode  = if tolhs
+                    then unlines . map ("> " ++) . lines $ rawCode
+                    else rawCode
+  case highlight formatHtmlBlock (id',classes,keyvals) adjCode of
+         Nothing -> let attrs = attrsToHtml opts (id', classes', keyvals)
+                    in  return $ foldl (!) H.pre attrs $ H.code
+                                         $ toHtml adjCode
+         Just  h -> modify (\st -> st{ stHighlighting = True }) >>
+                    return (foldl (!) h (attrsToHtml opts (id',[],keyvals)))
 blockToHtml opts (BlockQuote blocks) =
   -- in S5, treat list in blockquote specially
   -- if default is incremental, make it nonincremental;
@@ -378,47 +412,48 @@ blockToHtml opts (BlockQuote blocks) =
                                   blockToHtml (opts {writerIncremental = inc})
                                   (OrderedList attribs lst)
              _                 -> do contents <- blockListToHtml opts blocks
-                                     return $ blockquote (nl opts +++
-                                               contents +++ nl opts)
+                                     return $ H.blockquote
+                                            $ nl opts >> contents >> nl opts
      else do
        contents <- blockListToHtml opts blocks
-       return $ blockquote (nl opts +++ contents +++ nl opts)
+       return $ H.blockquote $ nl opts >> contents >> nl opts
 blockToHtml opts (Header level lst) = do
   contents <- inlineListToHtml opts lst
   secnum <- liftM stSecNum get
   let contents' = if writerNumberSections opts
-                     then (thespan ! [theclass "header-section-number"] << showSecNum secnum) +++
-                            stringToHtml opts " " +++ contents
+                     then (H.span ! A.class_ "header-section-number" $ toHtml $ showSecNum secnum) >>
+                            strToHtml " " >> contents
                      else contents
   let contents''  = if writerTableOfContents opts
-                       then anchor ! [href $ "#" ++ writerIdentifierPrefix opts ++ "TOC"] $ contents'
+                       then H.a ! A.href (toValue $ "#" ++ writerIdentifierPrefix opts ++ "TOC") $ contents'
                        else contents'
   return $ (case level of
-              1 -> h1 contents''
-              2 -> h2 contents''
-              3 -> h3 contents''
-              4 -> h4 contents''
-              5 -> h5 contents''
-              6 -> h6 contents''
-              _ -> paragraph contents'')
+              1 -> H.h1 contents''
+              2 -> H.h2 contents''
+              3 -> H.h3 contents''
+              4 -> H.h4 contents''
+              5 -> H.h5 contents''
+              6 -> H.h6 contents''
+              _ -> H.p contents'')
 blockToHtml opts (BulletList lst) = do
   contents <- mapM (blockListToHtml opts) lst
-  let attribs = if writerIncremental opts
-                   then [theclass "incremental"]
-                   else []
-  return $ (unordList opts contents) ! attribs
+  let lst' = unordList opts contents
+  let lst'' = if writerIncremental opts
+                 then lst' ! A.class_ "incremental"
+                 else lst'
+  return lst''
 blockToHtml opts (OrderedList (startnum, numstyle, _) lst) = do
   contents <- mapM (blockListToHtml opts) lst
   let numstyle' = camelCaseToHyphenated $ show numstyle
   let attribs = (if writerIncremental opts
-                   then [theclass "incremental"]
+                   then [A.class_ "incremental"]
                    else []) ++
                 (if startnum /= 1
-                   then [start startnum]
+                   then [A.start $ toValue startnum]
                    else []) ++
                 (if numstyle /= DefaultStyle
                    then if writerHtml5 opts
-                           then [strAttr "type" $
+                           then [A.type_ $
                                  case numstyle of
                                       Decimal    -> "1"
                                       LowerAlpha -> "a"
@@ -426,44 +461,44 @@ blockToHtml opts (OrderedList (startnum, numstyle, _) lst) = do
                                       LowerRoman -> "i"
                                       UpperRoman -> "I"
                                       _          -> "1"]
-                           else [thestyle $ "list-style-type: " ++
+                           else [A.style $ toValue $ "list-style-type: " ++
                                    numstyle']
                    else [])
-  return $ (ordList opts contents) ! attribs
+  return $ foldl (!) (ordList opts contents) attribs
 blockToHtml opts (DefinitionList lst) = do
   contents <- mapM (\(term, defs) ->
-                  do term' <- liftM (dterm <<) $ inlineListToHtml opts term
-                     defs' <- mapM ((liftM (\x -> ddef << (x +++ nl opts))) .
+                  do term' <- liftM (H.dt) $ inlineListToHtml opts term
+                     defs' <- mapM ((liftM (\x -> H.dd $ (x >> nl opts))) .
                                     blockListToHtml opts) defs
-                     return $ nl opts : term' : nl opts : defs') lst
-  let attribs = if writerIncremental opts
-                   then [theclass "incremental"]
-                   else []
-  return $ dlist ! attribs << (concat contents +++ nl opts)
+                     return $ mconcat $ nl opts : term' : nl opts : defs') lst
+  let lst' = H.dl $ mconcat contents >> nl opts
+  let lst'' = if writerIncremental opts
+                 then lst' ! A.class_ "incremental"
+                 else lst'
+  return lst''
 blockToHtml opts (Table capt aligns widths headers rows') = do
   captionDoc <- if null capt
-                   then return noHtml
+                   then return mempty
                    else do
                      cs <- inlineListToHtml opts capt
-                     return $ caption cs +++ nl opts
+                     return $ H.caption cs >> nl opts
   let percent w = show (truncate (100*w) :: Integer) ++ "%"
-  let widthAttrs w = if writerHtml5 opts
-                        then [thestyle $ "width: " ++ percent w]
-                        else [width $ percent w]
   let coltags = if all (== 0.0) widths
-                   then noHtml
-                   else concatHtml $ map
-                         (\w -> (col ! (widthAttrs w)) noHtml +++ nl opts)
-                         widths
+                   then mempty
+                   else mconcat $ map (\w ->
+                          if writerHtml5 opts
+                             then H.col ! A.style (toValue $ "width: " ++ percent w)
+                             else H.col ! A.width (toValue $ percent w) >> nl opts)
+                          widths
   head' <- if all null headers
-              then return noHtml
+              then return mempty
               else do
                 contents <- tableRowToHtml opts aligns 0 headers
-                return $ thead << (nl opts +++ contents) +++ nl opts
-  body' <- liftM (\x -> tbody << (nl opts +++ x)) $
+                return $ H.thead (nl opts >> contents) >> nl opts
+  body' <- liftM (\x -> H.tbody (nl opts >> mconcat x)) $
                zipWithM (tableRowToHtml opts aligns) [1..] rows'
-  return $ table $ nl opts +++ captionDoc +++ coltags +++ head' +++
-                   body' +++ nl opts
+  return $ H.table $ nl opts >> captionDoc >> coltags >> head' >>
+                   body' >> nl opts
 
 tableRowToHtml :: WriterOptions
                -> [Alignment]
@@ -471,7 +506,7 @@ tableRowToHtml :: WriterOptions
                -> [[Block]]
                -> State WriterState Html
 tableRowToHtml opts aligns rownum cols' = do
-  let mkcell = if rownum == 0 then th else td
+  let mkcell = if rownum == 0 then H.th else H.td
   let rowclass = case rownum of
                       0                  -> "header"
                       x | x `rem` 2 == 1 -> "odd"
@@ -479,8 +514,8 @@ tableRowToHtml opts aligns rownum cols' = do
   cols'' <- sequence $ zipWith
             (\alignment item -> tableItemToHtml opts mkcell alignment item)
             aligns cols'
-  return $ (tr ! [theclass rowclass] $ nl opts +++ toHtmlFromList cols'')
-          +++ nl opts
+  return $ (H.tr ! A.class_ rowclass $ nl opts >> mconcat cols'')
+          >> nl opts
 
 alignmentToString :: Alignment -> [Char]
 alignmentToString alignment = case alignment of
@@ -496,84 +531,87 @@ tableItemToHtml :: WriterOptions
                 -> State WriterState Html
 tableItemToHtml opts tag' align' item = do
   contents <- blockListToHtml opts item
-  let alignAttrs = if writerHtml5 opts
-                      then [thestyle $ "align: " ++ alignmentToString align']
-                      else [align $ alignmentToString align']
-  return $ (tag' ! alignAttrs) contents +++ nl opts
+  let alignStr = alignmentToString align'
+  let attribs = if writerHtml5 opts
+                   then A.style (toValue $ "text-align: " ++ alignStr ++ ";")
+                   else A.align (toValue alignStr)
+  return $ (tag' ! attribs $ contents) >> nl opts
 
 toListItems :: WriterOptions -> [Html] -> [Html]
 toListItems opts items = map (toListItem opts) items ++ [nl opts]
 
 toListItem :: WriterOptions -> Html -> Html
-toListItem opts item = nl opts +++ li item
+toListItem opts item = nl opts >> H.li item
 
 blockListToHtml :: WriterOptions -> [Block] -> State WriterState Html
 blockListToHtml opts lst =
   mapM (blockToHtml opts) lst >>=
-  return . toHtmlFromList . intersperse (nl opts)
+  return . mconcat . intersperse (nl opts)
 
 -- | Convert list of Pandoc inline elements to HTML.
 inlineListToHtml :: WriterOptions -> [Inline] -> State WriterState Html
 inlineListToHtml opts lst =
-  mapM (inlineToHtml opts) lst >>= return . toHtmlFromList
+  mapM (inlineToHtml opts) lst >>= return . mconcat
 
 -- | Convert Pandoc inline element to HTML.
 inlineToHtml :: WriterOptions -> Inline -> State WriterState Html
 inlineToHtml opts inline =
   case inline of
-    (Str str)        -> return $ stringToHtml opts str
-    (Space)          -> return $ stringToHtml opts " "
-    (LineBreak)      -> return br
-    (EmDash)         -> return $ stringToHtml opts "—"
-    (EnDash)         -> return $ stringToHtml opts "–"
-    (Ellipses)       -> return $ stringToHtml opts "…"
-    (Apostrophe)     -> return $ stringToHtml opts "’"
-    (Emph lst)       -> inlineListToHtml opts lst >>= return . emphasize
-    (Strong lst)     -> inlineListToHtml opts lst >>= return . strong
-    (Code attr str)  -> case highlightHtml True attr str of
-                             Left _  -> return
-                                        $ thecode ! (attrsToHtml opts attr)
-                                        $ stringToHtml opts str
-                             Right h -> return h
+    (Str str)        -> return $ strToHtml str
+    (Space)          -> return $ strToHtml " "
+    (LineBreak)      -> return $ if writerHtml5 opts then H5.br else H.br
+    (Emph lst)       -> inlineListToHtml opts lst >>= return . H.em
+    (Strong lst)     -> inlineListToHtml opts lst >>= return . H.strong
+    (Code attr str)  -> case highlight formatHtmlInline attr str of
+                             Nothing -> return
+                                        $ foldl (!) H.code (attrsToHtml opts attr)
+                                        $ strToHtml str
+                             Just  h -> return $ foldl (!) h $
+                                          attrsToHtml opts (id',[],keyvals)
+                                where (id',_,keyvals) = attr
     (Strikeout lst)  -> inlineListToHtml opts lst >>=
-                        return . (thespan ! [thestyle "text-decoration: line-through;"])
+                        return . H.del
     (SmallCaps lst)   -> inlineListToHtml opts lst >>=
-                         return . (thespan ! [thestyle "font-variant: small-caps;"])
-    (Superscript lst) -> inlineListToHtml opts lst >>= return . sup
-    (Subscript lst)   -> inlineListToHtml opts lst >>= return . sub
+                         return . (H.span ! A.style "font-variant: small-caps;")
+    (Superscript lst) -> inlineListToHtml opts lst >>= return . H.sup
+    (Subscript lst)   -> inlineListToHtml opts lst >>= return . H.sub
     (Quoted quoteType lst) ->
                         let (leftQuote, rightQuote) = case quoteType of
-                              SingleQuote -> (stringToHtml opts "‘",
-                                              stringToHtml opts "’")
-                              DoubleQuote -> (stringToHtml opts "“",
-                                              stringToHtml opts "”")
+                              SingleQuote -> (strToHtml "‘",
+                                              strToHtml "’")
+                              DoubleQuote -> (strToHtml "“",
+                                              strToHtml "”")
                         in  do contents <- inlineListToHtml opts lst
-                               return $ leftQuote +++ contents +++ rightQuote
+                               return $ leftQuote >> contents >> rightQuote
     (Math t str) ->     modify (\st -> st {stMath = True}) >>
                         (case writerHTMLMathMethod opts of
                                LaTeXMathML _ ->
                                   -- putting LaTeXMathML in container with class "LaTeX" prevents
                                   -- non-math elements on the page from being treated as math by
                                   -- the javascript
-                                  return $ thespan ! [theclass "LaTeX"] $
+                                  return $ H.span ! A.class_ "LaTeX" $
                                          case t of
-                                           InlineMath  -> primHtml ("$" ++ str ++ "$")
-                                           DisplayMath -> primHtml ("$$" ++ str ++ "$$")
+                                           InlineMath  -> toHtml ("$" ++ str ++ "$")
+                                           DisplayMath -> toHtml ("$$" ++ str ++ "$$")
                                JsMath _ -> do
-                                  let m = primHtml str
+                                  let m = preEscapedString str
                                   return $ case t of
-                                           InlineMath -> thespan ! [theclass "math"] $ m
-                                           DisplayMath -> thediv ! [theclass "math"] $ m
+                                           InlineMath -> H.span ! A.class_ "math" $ m
+                                           DisplayMath -> H.div ! A.class_ "math" $ m
                                WebTeX url -> do
-                                  let m = image ! [src (url ++ urlEncode str),
-                                                         alt str, title str]
+                                  let imtag = if writerHtml5 opts then H5.img else H.img
+                                  let m = imtag ! A.style "vertical-align:middle"
+                                                ! A.src (toValue $ url ++ urlEncode str)
+                                                ! A.alt (toValue str)
+                                                ! A.title (toValue str)
+                                  let brtag = if writerHtml5 opts then H5.br else H.br 
                                   return $ case t of
                                             InlineMath  -> m
-                                            DisplayMath -> br +++ m +++ br
+                                            DisplayMath -> brtag >> m >> brtag
                                GladTeX ->
                                   return $ case t of
-                                             InlineMath -> primHtml $ "<EQ ENV=\"math\">" ++ str ++ "</EQ>"
-                                             DisplayMath -> primHtml $ "<EQ ENV=\"displaymath\">" ++ str ++ "</EQ>"
+                                             InlineMath -> preEscapedString "<EQ ENV=\"math\">" >> toHtml str >> preEscapedString "</EQ>"
+                                             DisplayMath -> preEscapedString "<EQ ENV=\"displaymath\">" >> toHtml str >> preEscapedString "</EQ>"
                                MathML _ -> do
                                   let dt = if t == InlineMath
                                               then DisplayInline
@@ -581,54 +619,57 @@ inlineToHtml opts inline =
                                   let conf = useShortEmptyTags (const False)
                                                defaultConfigPP
                                   case texMathToMathML dt str of
-                                        Right r -> return $ primHtml $
+                                        Right r -> return $ preEscapedString $
                                                     ppcElement conf r
                                         Left  _ -> inlineListToHtml opts
                                                    (readTeXMath str) >>= return .
-                                                     (thespan !  [theclass "math"])
-                               MathJax _ -> return $ primHtml $
+                                                     (H.span ! A.class_ "math")
+                               MathJax _ -> return $ toHtml $
                                   case t of
                                     InlineMath  -> "\\(" ++ str ++ "\\)"
                                     DisplayMath -> "\\[" ++ str ++ "\\]"
                                PlainMath -> do
                                   x <- inlineListToHtml opts (readTeXMath str)
-                                  let m = thespan ! [theclass "math"] $ x
+                                  let m = H.span ! A.class_ "math" $ x
+                                  let brtag = if writerHtml5 opts then H5.br else H.br
                                   return  $ case t of
                                              InlineMath  -> m
-                                             DisplayMath -> br +++ m +++ br )
+                                             DisplayMath -> brtag >> m >> brtag )
     (RawInline "latex" str) -> case writerHTMLMathMethod opts of
                                LaTeXMathML _ -> do modify (\st -> st {stMath = True})
-                                                   return $ primHtml str
-                               _             -> return noHtml
-    (RawInline "html" str) -> return $ primHtml str
-    (RawInline _ _) -> return noHtml
+                                                   return $ toHtml str
+                               _             -> return mempty
+    (RawInline "html" str) -> return $ preEscapedString str
+    (RawInline _ _) -> return mempty
     (Link [Code _ str] (s,_)) | "mailto:" `isPrefixOf` s ->
                         return $ obfuscateLink opts str s
     (Link txt (s,_)) | "mailto:" `isPrefixOf` s -> do
                         linkText <- inlineListToHtml opts txt
-                        return $ obfuscateLink opts (show linkText) s
+                        return $ obfuscateLink opts (renderHtml linkText) s
     (Link txt (s,tit)) -> do
                         linkText <- inlineListToHtml opts txt
-                        return $ anchor ! ([href s] ++
-                                 if null tit then [] else [title tit]) $
-                                 linkText
+                        let link = H.a ! A.href (toValue s) $ linkText
+                        return $ if null tit
+                                    then link
+                                    else link ! A.title (toValue tit)
     (Image txt (s,tit)) | treatAsImage s -> do
                         let alternate' = stringify txt
-                        let attributes = [src s] ++
+                        let attributes = [A.src $ toValue s] ++
                                          (if null tit
                                             then []
-                                            else [title tit]) ++
+                                            else [A.title $ toValue tit]) ++
                                          if null txt
                                             then []
-                                            else [alt alternate']
-                        return $ image ! attributes
+                                            else [A.alt $ toValue alternate']
+                        let tag = if writerHtml5 opts then H5.img else H.img
+                        return $ foldl (!) tag attributes
                         -- note:  null title included, as in Markdown.pl
     (Image _ (s,tit)) -> do
-                        let attributes = [src s] ++
+                        let attributes = [A.src $ toValue s] ++
                                          (if null tit
                                             then []
-                                            else [title tit])
-                        return $ itag "embed" ! attributes
+                                            else [A.title $ toValue tit])
+                        return $ foldl (!) H5.embed attributes
                         -- note:  null title included, as in Markdown.pl
     (Note contents)          -> do
                         st <- get
@@ -638,19 +679,19 @@ inlineToHtml opts inline =
                         htmlContents <- blockListToNote opts ref contents
                         -- push contents onto front of notes
                         put $ st {stNotes = (htmlContents:notes)}
-                        return $ sup <<
-                                 anchor ! [href ("#" ++ writerIdentifierPrefix opts ++ "fn" ++ ref),
-                                           theclass "footnoteRef",
-                                           prefixedId opts ("fnref" ++ ref)] << ref
-    (Cite _ il)  -> inlineListToHtml opts il
+                        return $ H.sup $
+                                 H.a ! A.href (toValue $ "#" ++ writerIdentifierPrefix opts ++ "fn" ++ ref)
+                                     ! A.class_ "footnoteRef"
+                                     ! prefixedId opts ("fnref" ++ ref)
+                                     $ toHtml ref
+    (Cite _ il)  -> do contents <- inlineListToHtml opts il
+                       return $ H.span ! A.class_ "citation" $ contents
 
 blockListToNote :: WriterOptions -> String -> [Block] -> State WriterState Html
 blockListToNote opts ref blocks =
   -- If last block is Para or Plain, include the backlink at the end of
   -- that block. Otherwise, insert a new Plain block with the backlink.
-  let backlink = [RawInline "html" $ " <a href=\"#" ++ writerIdentifierPrefix opts ++ "fnref" ++ ref ++
-                 "\" class=\"footnoteBackLink\">" ++
-                 (if writerAscii opts then "&#8617;" else "↩") ++ "</a>"]
+  let backlink = [Link [Str "↩"] ("#" ++ writerIdentifierPrefix opts ++ "fnref" ++ ref,[])]
       blocks'  = if null blocks
                     then []
                     else let lastBlock   = last blocks
@@ -663,4 +704,4 @@ blockListToNote opts ref blocks =
                                   _           -> otherBlocks ++ [lastBlock,
                                                  Plain backlink]
   in  do contents <- blockListToHtml opts blocks'
-         return $ nl opts +++ (li ! [prefixedId opts ("fn" ++ ref)]) contents
+         return $ nl opts >> (H.li ! (prefixedId opts ("fn" ++ ref)) $ contents)
diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs
index d925b2897..e99b20c60 100644
--- a/src/Text/Pandoc/Writers/LaTeX.hs
+++ b/src/Text/Pandoc/Writers/LaTeX.hs
@@ -20,10 +20,10 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 {- |
    Module      : Text.Pandoc.Writers.LaTeX
    Copyright   : Copyright (C) 2006-2010 John MacFarlane
-   License     : GNU GPL, version 2 or above 
+   License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley.edu>
-   Stability   : alpha 
+   Stability   : alpha
    Portability : portable
 
 Conversion of 'Pandoc' format into LaTeX.
@@ -41,53 +41,68 @@ import Data.Char ( toLower, isPunctuation )
 import Control.Monad.State
 import Text.Pandoc.Pretty
 import System.FilePath (dropExtension)
+import Text.Pandoc.Slides
+import Text.Pandoc.Highlighting (highlight, styleToLaTeX,
+                                 formatLaTeXInline, formatLaTeXBlock)
 
-data WriterState = 
-  WriterState { stInNote     :: Bool          -- @True@ if we're in a note
-              , stInTable    :: Bool          -- @True@ if we're in a table
-              , stTableNotes :: [(Char, Doc)] -- List of markers, notes
-                                              -- in current table
-              , stOLLevel    :: Int           -- level of ordered list nesting
-              , stOptions    :: WriterOptions -- writer options, so they don't have to be parameter 
-              , stVerbInNote :: Bool          -- true if document has verbatim text in note
-              , stEnumerate  :: Bool          -- true if document needs fancy enumerated lists
-              , stTable      :: Bool          -- true if document has a table
-              , stStrikeout  :: Bool          -- true if document has strikeout
-              , stSubscript  :: Bool          -- true if document has subscript
-              , stUrl        :: Bool          -- true if document has visible URL link
-              , stGraphics   :: Bool          -- true if document contains images
-              , stLHS        :: Bool          -- true if document has literate haskell code
-              , stBook       :: Bool          -- true if document uses book or memoir class
-              , stCsquotes   :: Bool          -- true if document uses csquotes
+data WriterState =
+  WriterState { stInNote        :: Bool          -- true if we're in a note
+              , stInTable       :: Bool          -- true if we're in a table
+              , stTableNotes    :: [(Char, Doc)] -- List of markers, notes
+                                                 -- in current table
+              , stOLLevel       :: Int           -- level of ordered list nesting
+              , stOptions       :: WriterOptions -- writer options, so they don't have to be parameter
+              , stVerbInNote    :: Bool          -- true if document has verbatim text in note
+              , stEnumerate     :: Bool          -- true if document needs fancy enumerated lists
+              , stTable         :: Bool          -- true if document has a table
+              , stStrikeout     :: Bool          -- true if document has strikeout
+              , stSubscript     :: Bool          -- true if document has subscript
+              , stUrl           :: Bool          -- true if document has visible URL link
+              , stGraphics      :: Bool          -- true if document contains images
+              , stLHS           :: Bool          -- true if document has literate haskell code
+              , stBook          :: Bool          -- true if document uses book or memoir class
+              , stCsquotes      :: Bool          -- true if document uses csquotes
+              , stHighlighting  :: Bool          -- true if document has highlighted code
+              , stIncremental   :: Bool          -- true if beamer lists should be displayed bit by bit
+              , stInternalLinks :: [String]      -- list of internal link targets
               }
 
 -- | Convert Pandoc to LaTeX.
 writeLaTeX :: WriterOptions -> Pandoc -> String
-writeLaTeX options document = 
-  evalState (pandocToLaTeX options document) $ 
+writeLaTeX options document =
+  evalState (pandocToLaTeX options document) $
   WriterState { stInNote = False, stInTable = False,
                 stTableNotes = [], stOLLevel = 1, stOptions = options,
                 stVerbInNote = False, stEnumerate = False,
                 stTable = False, stStrikeout = False, stSubscript = False,
                 stUrl = False, stGraphics = False,
                 stLHS = False, stBook = writerChapters options,
-                stCsquotes = False }
+                stCsquotes = False, stHighlighting = False,
+                stIncremental = writerIncremental options,
+                stInternalLinks = [] }
 
 pandocToLaTeX :: WriterOptions -> Pandoc -> State WriterState String
 pandocToLaTeX options (Pandoc (Meta title authors date) blocks) = do
+  -- see if there are internal links
+  let isInternalLink (Link _ ('#':xs,_))  = [xs]
+      isInternalLink _                    = []
+  modify $ \s -> s{ stInternalLinks = queryWith isInternalLink blocks }
   let template = writerTemplate options
-  let usesBookClass x = "\\documentclass" `isPrefixOf` x &&
-         ("{memoir}" `isSuffixOf` x || "{book}" `isSuffixOf` x ||
-          "{report}" `isSuffixOf` x)
-  when (any usesBookClass (lines template)) $
-    modify $ \s -> s{stBook = True}
+  -- set stBook depending on documentclass
+  let bookClasses = ["memoir","book","report","scrreprt","scrbook"]
+  case lookup "documentclass" (writerVariables options) of
+         Just x  | x `elem` bookClasses -> modify $ \s -> s{stBook = True}
+                 | otherwise            -> return ()
+         Nothing | any (\x -> "\\documentclass" `isPrefixOf` x &&
+                          (any (`isSuffixOf` x) bookClasses))
+                          (lines template) -> modify $ \s -> s{stBook = True}
+                 | otherwise               -> return ()
   -- check for \usepackage...{csquotes}; if present, we'll use
   -- \enquote{...} for smart quotes:
   when ("{csquotes}" `isInfixOf` template) $
     modify $ \s -> s{stCsquotes = True}
-  opts <- liftM stOptions get
-  let colwidth = if writerWrapText opts
-                    then Just $ writerColumns opts
+  let colwidth = if writerWrapText options
+                    then Just $ writerColumns options
                     else Nothing
   titletext <- liftM (render colwidth) $ inlineListToLaTeX title
   authorsText <- mapM (liftM (render colwidth) . inlineListToLaTeX) authors
@@ -97,9 +112,12 @@ pandocToLaTeX options (Pandoc (Meta title authors date) blocks) = do
                               else case last blocks of
                                 Header 1 il -> (init blocks, il)
                                 _           -> (blocks, [])
-  body <- blockListToLaTeX blocks'
+  blocks'' <- if writerBeamer options
+                 then toSlides blocks'
+                 else return blocks'
+  body <- mapM (elementToLaTeX options) $ hierarchicalize blocks''
   biblioTitle <- liftM (render colwidth) $ inlineListToLaTeX lastHeader
-  let main = render colwidth body
+  let main = render colwidth $ vcat body
   st <- get
   let biblioFiles = intercalate "," $ map dropExtension $  writerBiblioFiles options
       citecontext = case writerCiteMethod options of
@@ -116,7 +134,12 @@ pandocToLaTeX options (Pandoc (Meta title authors date) blocks) = do
                  [ ("toc", if writerTableOfContents options then "yes" else "")
                  , ("body", main)
                  , ("title", titletext)
-                 , ("date", dateText) ] ++
+                 , ("date", dateText)
+                 , ("documentclass", if writerBeamer options
+                                        then "beamer"
+                                        else if writerChapters options
+                                                then "book"
+                                                else "article") ] ++
                  [ ("author", a) | a <- authorsText ] ++
                  [ ("verbatim-in-note", "yes") | stVerbInNote st ] ++
                  [ ("fancy-enums", "yes") | stEnumerate st ] ++
@@ -128,36 +151,102 @@ pandocToLaTeX options (Pandoc (Meta title authors date) blocks) = do
                  [ ("lhs", "yes") | stLHS st ] ++
                  [ ("graphics", "yes") | stGraphics st ] ++
                  [ ("book-class", "yes") | stBook st] ++
-                 [ ("listings", "yes") | writerListings options ] ++
+                 [ ("listings", "yes") | writerListings options || stLHS st ] ++
+                 [ ("beamer", "yes") | writerBeamer options ] ++
+                 [ ("highlighting-macros", styleToLaTeX
+                       $ writerHighlightStyle options ) | stHighlighting st ] ++
                  citecontext
   return $ if writerStandalone options
               then renderTemplate context template
               else main
 
--- escape things as needed for LaTeX
+-- | Convert Elements to LaTeX
+elementToLaTeX :: WriterOptions -> Element -> State WriterState Doc
+elementToLaTeX _ (Blk block) = blockToLaTeX block
+elementToLaTeX opts (Sec level _ id' title' elements) = do
+  header' <- sectionHeader id' level title'
+  innerContents <- mapM (elementToLaTeX opts) elements
+  return $ vcat (header' : innerContents)
 
-stringToLaTeX :: String -> String
-stringToLaTeX = escapeStringUsing latexEscapes
-  where latexEscapes = backslashEscapes "{}$%&_#" ++ 
-                       [ ('^', "\\^{}")
-                       , ('\\', "\\textbackslash{}")
-                       , ('~', "\\ensuremath{\\sim}")
-                       , ('|', "\\textbar{}")
-                       , ('<', "\\textless{}")
-                       , ('>', "\\textgreater{}")
-                       , ('[', "{[}")  -- to avoid interpretation as
-                       , (']', "{]}")  -- optional arguments
-                       , ('\160', "~")
-                       , ('\x2018', "`")
-                       , ('\x2019', "'")
-                       , ('\x201C', "``")
-                       , ('\x201D', "''")
-                       ]
+-- escape things as needed for LaTeX
+stringToLaTeX :: Bool -> String -> String
+stringToLaTeX _     []     = ""
+stringToLaTeX isUrl (x:xs) =
+  case x of
+       '{' -> "\\{" ++ rest
+       '}' -> "\\}" ++ rest
+       '$' -> "\\$" ++ rest
+       '%' -> "\\%" ++ rest
+       '&' -> "\\&" ++ rest
+       '_' -> "\\_" ++ rest
+       '#' -> "\\#" ++ rest
+       '-' -> case xs of   -- prevent adjacent hyphens from forming ligatures
+                   ('-':_) -> "-{}" ++ rest
+                   _       -> '-' : rest
+       '~' | not isUrl -> "\\ensuremath{\\sim}"
+       '^' -> "\\^{}" ++ rest
+       '\\' -> "\\textbackslash{}" ++ rest
+       '€' -> "\\euro{}" ++ rest
+       '|' -> "\\textbar{}" ++ rest
+       '<' -> "\\textless{}" ++ rest
+       '>' -> "\\textgreater{}" ++ rest
+       '[' -> "{[}" ++ rest  -- to avoid interpretation as
+       ']' -> "{]}" ++ rest  -- optional arguments
+       '\160' -> "~" ++ rest
+       '\x2018' -> "`" ++ rest
+       '\x2019' -> "'" ++ rest
+       '\x201C' -> "``" ++ rest
+       '\x201D' -> "''" ++ rest
+       '\x2026' -> "\\ldots{}" ++ rest
+       '\x2014' -> "---" ++ rest
+       '\x2013' -> "--" ++ rest
+       _        -> x : rest
+    where rest = stringToLaTeX isUrl xs
 
 -- | Puts contents into LaTeX command.
 inCmd :: String -> Doc -> Doc
 inCmd cmd contents = char '\\' <> text cmd <> braces contents
 
+toSlides :: [Block] -> State WriterState [Block]
+toSlides bs = do
+  opts <- gets stOptions
+  let slideLevel = maybe (getSlideLevel bs) id $ writerSlideLevel opts
+  let bs' = prepSlides slideLevel bs
+  concat `fmap` (mapM (elementToBeamer slideLevel) $ hierarchicalize bs')
+
+elementToBeamer :: Int -> Element -> State WriterState [Block]
+elementToBeamer _slideLevel (Blk b) = return [b]
+elementToBeamer slideLevel  (Sec lvl _num _ident tit elts)
+  | lvl >  slideLevel = do
+      bs <- concat `fmap` mapM (elementToBeamer slideLevel) elts
+      return $ Para ( RawInline "latex" "\\begin{block}{"
+                    : tit ++ [RawInline "latex" "}"] )
+             : bs ++ [RawBlock "latex" "\\end{block}"]
+  | lvl <  slideLevel = do
+      bs <- concat `fmap` mapM (elementToBeamer slideLevel) elts
+      return $ (Header lvl tit) : bs
+  | otherwise = do -- lvl == slideLevel
+      -- note: [fragile] is required or verbatim breaks
+      let hasCodeBlock (CodeBlock _ _) = [True]
+          hasCodeBlock _               = []
+      let hasCode (Code _ _) = [True]
+          hasCode _          = []
+      let fragile = if not $ null $ queryWith hasCodeBlock elts ++ queryWith hasCode elts
+                       then "[fragile]"
+                       else ""
+      let slideStart = Para $ RawInline "latex" ("\\begin{frame}" ++ fragile ++
+                "\\frametitle{") : tit ++ [RawInline "latex" "}"]
+      let slideEnd = RawBlock "latex" "\\end{frame}"
+      -- now carve up slide into blocks if there are sections inside
+      bs <- concat `fmap` mapM (elementToBeamer slideLevel) elts
+      return $ slideStart : bs ++ [slideEnd]
+
+isListBlock :: Block -> Bool
+isListBlock (BulletList _)     = True
+isListBlock (OrderedList _ _)  = True
+isListBlock (DefinitionList _) = True
+isListBlock _                  = False
+
 -- | Convert Pandoc block element to LaTeX.
 blockToLaTeX :: Block     -- ^ Block to convert
              -> State WriterState Doc
@@ -172,58 +261,80 @@ blockToLaTeX (Para lst) = do
   result <- inlineListToLaTeX lst
   return $ result <> blankline
 blockToLaTeX (BlockQuote lst) = do
-  contents <- blockListToLaTeX lst
-  return $ "\\begin{quote}" $$ contents $$ "\\end{quote}"
+  beamer <- writerBeamer `fmap` gets stOptions
+  case lst of
+       [b] | beamer && isListBlock b -> do
+         oldIncremental <- gets stIncremental
+         modify $ \s -> s{ stIncremental = True }
+         result <- blockToLaTeX b
+         modify $ \s -> s{ stIncremental = oldIncremental }
+         return result
+       _ -> do
+         contents <- blockListToLaTeX lst
+         return $ "\\begin{quote}" $$ contents $$ "\\end{quote}"
 blockToLaTeX (CodeBlock (_,classes,keyvalAttr) str) = do
-  st <- get
-  env <- if writerLiterateHaskell (stOptions st) && "haskell" `elem` classes &&
-                    "literate" `elem` classes
-            then do
-              modify $ \s -> s{ stLHS = True }
-              return "code"
-            else if writerListings (stOptions st)
-                    then return "lstlisting"
-                    else if stInNote st
-                            then do
-                              modify $ \s -> s{ stVerbInNote = True }
-                              return "Verbatim"
-                            else return "verbatim"
-  let params = if writerListings (stOptions st)
-               then take 1
-                    [ "language=" ++ lang | lang <- classes
-                    , lang `elem` ["ABAP","IDL","Plasm","ACSL","inform"
-                                  ,"POV","Ada","Java","Prolog","Algol"
-                                  ,"JVMIS","Promela","Ant","ksh","Python"
-                                  ,"Assembler","Lisp","R","Awk","Logo"
-                                  ,"Reduce","bash","make","Rexx","Basic"
-                                  ,"Mathematica","RSL","C","Matlab","Ruby"
-                                  ,"C++","Mercury","S","Caml","MetaPost"
-                                  ,"SAS","Clean","Miranda","Scilab","Cobol"
-                                  ,"Mizar","sh","Comal","ML","SHELXL","csh"
-                                  ,"Modula-2","Simula","Delphi","MuPAD"
-                                  ,"SQL","Eiffel","NASTRAN","tcl","Elan"
-                                  ,"Oberon-2","TeX","erlang","OCL"
-                                  ,"VBScript","Euphoria","Octave","Verilog"
-                                  ,"Fortran","Oz","VHDL","GCL","Pascal"
-                                  ,"VRML","Gnuplot","Perl","XML","Haskell"
-	                          ,"PHP","XSLT","HTML","PL/I"]
-                    ] ++ 
-                    [ key ++ "=" ++ attr | (key,attr) <- keyvalAttr ]
-               else []
-      printParams 
-          | null params = empty
-          | otherwise   = "[" <> hsep (intersperse "," (map text params)) <>
-                          "]"
-  return $ flush ("\\begin{" <> text env <> "}" <> printParams $$ text str $$
-           "\\end{" <> text env <> "}") $$ cr
-           -- final cr needed because of footnotes
+  opts <- gets stOptions
+  case () of
+     _ | writerLiterateHaskell opts && "haskell" `elem` classes &&
+         "literate" `elem` classes                      -> lhsCodeBlock
+       | writerListings opts                            -> listingsCodeBlock
+       | writerHighlight opts && not (null classes)     -> highlightedCodeBlock
+       | otherwise                                      -> rawCodeBlock
+   where lhsCodeBlock = do
+           modify $ \s -> s{ stLHS = True }
+           return $ flush ("\\begin{code}" $$ text str $$ "\\end{code}") $$ cr
+         rawCodeBlock = do
+           st <- get
+           env <- if stInNote st
+                     then modify (\s -> s{ stVerbInNote = True }) >>
+                          return "Verbatim"
+                     else return "verbatim"
+           return $ flush (text ("\\begin{" ++ env ++ "}") $$ text str $$
+                    text ("\\end{" ++ env ++ "}")) $$ cr -- final cr because of notes
+         listingsCodeBlock = do
+           st <- get
+           let params = if writerListings (stOptions st)
+                        then take 1
+                             [ "language=" ++ lang | lang <- classes
+                             , lang `elem` ["ABAP","IDL","Plasm","ACSL","inform"
+                                           ,"POV","Ada","Java","Prolog","Algol"
+                                           ,"JVMIS","Promela","Ant","ksh","Python"
+                                           ,"Assembler","Lisp","R","Awk","Logo"
+                                           ,"Reduce","bash","make","Rexx","Basic"
+                                           ,"Mathematica","RSL","C","Matlab","Ruby"
+                                           ,"C++","Mercury","S","Caml","MetaPost"
+                                           ,"SAS","Clean","Miranda","Scilab","Cobol"
+                                           ,"Mizar","sh","Comal","ML","SHELXL","csh"
+                                           ,"Modula-2","Simula","Delphi","MuPAD"
+                                           ,"SQL","Eiffel","NASTRAN","tcl","Elan"
+                                           ,"Oberon-2","TeX","erlang","OCL"
+                                           ,"VBScript","Euphoria","Octave","Verilog"
+                                           ,"Fortran","Oz","VHDL","GCL","Pascal"
+                                           ,"VRML","Gnuplot","Perl","XML","Haskell"
+         	                          ,"PHP","XSLT","HTML","PL/I"]
+                             ] ++
+                             [ key ++ "=" ++ attr | (key,attr) <- keyvalAttr ]
+                        else []
+               printParams
+                   | null params = empty
+                   | otherwise   = brackets $ hsep (intersperse "," (map text params))
+           return $ flush ("\\begin{lstlisting}" <> printParams $$ text str $$
+                    "\\end{lstlisting}") $$ cr
+         highlightedCodeBlock =
+           case highlight formatLaTeXBlock ("",classes,keyvalAttr) str of
+                  Nothing -> rawCodeBlock
+                  Just  h -> modify (\st -> st{ stHighlighting = True }) >>
+                             return (flush $ text h)
 blockToLaTeX (RawBlock "latex" x) = return $ text x <> blankline
 blockToLaTeX (RawBlock _ _) = return empty
 blockToLaTeX (BulletList lst) = do
+  incremental <- gets stIncremental
+  let inc = if incremental then "[<+->]" else ""
   items <- mapM listItemToLaTeX lst
-  return $ "\\begin{itemize}" $$ vcat items $$ "\\end{itemize}"
+  return $ text ("\\begin{itemize}" ++ inc) $$ vcat items $$ "\\end{itemize}"
 blockToLaTeX (OrderedList (start, numstyle, numdelim) lst) = do
   st <- get
+  let inc = if stIncremental st then "[<+->]" else ""
   let oldlevel = stOLLevel st
   put $ st {stOLLevel = oldlevel + 1}
   items <- mapM listItemToLaTeX lst
@@ -231,46 +342,25 @@ blockToLaTeX (OrderedList (start, numstyle, numdelim) lst) = do
   exemplar <- if numstyle /= DefaultStyle || numdelim /= DefaultDelim
                  then do
                    modify $ \s -> s{ stEnumerate = True }
-                   return $ char '[' <> 
+                   return $ char '[' <>
                        text (head (orderedListMarkers (1, numstyle,
                             numdelim))) <> char ']'
                  else return empty
   let resetcounter = if start /= 1 && oldlevel <= 4
-                        then text $ "\\setcounter{enum" ++ 
+                        then text $ "\\setcounter{enum" ++
                              map toLower (toRomanNumeral oldlevel) ++
                              "}{" ++ show (start - 1) ++ "}"
-                        else empty 
-  return $ "\\begin{enumerate}" <> exemplar $$ resetcounter $$
+                        else empty
+  return $ text ("\\begin{enumerate}" ++ inc) <> exemplar $$ resetcounter $$
            vcat items $$ "\\end{enumerate}"
 blockToLaTeX (DefinitionList lst) = do
+  incremental <- gets stIncremental
+  let inc = if incremental then "[<+->]" else ""
   items <- mapM defListItemToLaTeX lst
-  return $ "\\begin{description}" $$ vcat items $$ "\\end{description}"
+  return $ text ("\\begin{description}" ++ inc) $$ vcat items $$ "\\end{description}"
 blockToLaTeX HorizontalRule = return $
   "\\begin{center}\\rule{3in}{0.4pt}\\end{center}" $$ blankline
-blockToLaTeX (Header level lst) = do
-  txt <- inlineListToLaTeX lst
-  let noNote (Note _) = Str ""
-      noNote x        = x
-  let lstNoNotes = bottomUp noNote lst
-  -- footnotes in sections don't work unless you specify an optional
-  -- argument:  \section[mysec]{mysec\footnote{blah}}
-  optional <- if lstNoNotes == lst
-                 then return empty
-                 else do
-                   res <- inlineListToLaTeX lstNoNotes
-                   return $ char '[' <> res <> char ']'
-  let stuffing = optional <> char '{' <> txt <> char '}'
-  book <- liftM stBook get
-  let level' = if book then level - 1 else level
-  let headerWith x y = text x <> y $$ blankline
-  return $ case level' of
-                0  -> headerWith "\\chapter" stuffing
-                1  -> headerWith "\\section" stuffing
-                2  -> headerWith "\\subsection" stuffing
-                3  -> headerWith "\\subsubsection" stuffing
-                4  -> headerWith "\\paragraph" stuffing
-                5  -> headerWith "\\subparagraph" stuffing
-                _            -> txt $$ blankline
+blockToLaTeX (Header level lst) = sectionHeader "" level lst
 blockToLaTeX (Table caption aligns widths heads rows) = do
   modify $ \s -> s{ stInTable = True, stTableNotes = [] }
   headers <- if all null heads
@@ -338,6 +428,49 @@ defListItemToLaTeX (term, defs) = do
     def'  <- liftM vsep $ mapM blockListToLaTeX defs
     return $ "\\item" <> brackets term' $$ def'
 
+-- | Craft the section header, inserting the secton reference, if supplied.
+sectionHeader :: [Char]
+              -> Int
+              -> [Inline]
+              -> State WriterState Doc
+sectionHeader ref level lst = do
+  txt <- inlineListToLaTeX lst
+  let noNote (Note _) = Str ""
+      noNote x        = x
+  let lstNoNotes = bottomUp noNote lst
+  -- footnotes in sections don't work unless you specify an optional
+  -- argument:  \section[mysec]{mysec\footnote{blah}}
+  optional <- if lstNoNotes == lst
+                 then return empty
+                 else do
+                   res <- inlineListToLaTeX lstNoNotes
+                   return $ char '[' <> res <> char ']'
+  let stuffing = optional <> char '{' <> txt <> char '}'
+  book <- gets stBook
+  opts <- gets stOptions
+  let level' = if book || writerChapters opts then level - 1 else level
+  internalLinks <- gets stInternalLinks
+  let refLabel lab = (if ref `elem` internalLinks
+                         then text "\\hyperdef"
+                                <> braces empty
+                                <> braces (text ref)
+                                <> braces (lab <> text "\\label"
+                                               <> braces (text ref))
+                         else lab)
+                      $$ blankline
+  let headerWith x y = refLabel $ text x <> y
+  return $ case level' of
+                0  -> if writerBeamer opts
+                         then headerWith "\\part" stuffing
+                         else headerWith "\\chapter" stuffing
+                1  -> headerWith "\\section" stuffing
+                2  -> headerWith "\\subsection" stuffing
+                3  -> headerWith "\\subsubsection" stuffing
+                4  -> headerWith "\\paragraph" stuffing
+                5  -> headerWith "\\subparagraph" stuffing
+                _            -> txt $$ blankline
+
+
 -- | Convert list of inline elements to LaTeX.
 inlineListToLaTeX :: [Inline]  -- ^ Inlines to convert
                   -> State WriterState Doc
@@ -345,7 +478,6 @@ inlineListToLaTeX lst = mapM inlineToLaTeX lst >>= return . hcat
 
 isQuoted :: Inline -> Bool
 isQuoted (Quoted _ _) = True
-isQuoted Apostrophe = True
 isQuoted _ = False
 
 -- | Convert inline element to LaTeX
@@ -353,8 +485,8 @@ inlineToLaTeX :: Inline    -- ^ Inline to convert
               -> State WriterState Doc
 inlineToLaTeX (Emph lst) =
   inlineListToLaTeX lst >>= return . inCmd "emph"
-inlineToLaTeX (Strong lst) = 
-  inlineListToLaTeX lst >>= return . inCmd "textbf" 
+inlineToLaTeX (Strong lst) =
+  inlineListToLaTeX lst >>= return . inCmd "textbf"
 inlineToLaTeX (Strikeout lst) = do
   contents <- inlineListToLaTeX lst
   modify $ \s -> s{ stStrikeout = True }
@@ -377,14 +509,24 @@ inlineToLaTeX (Cite cits lst) = do
      Biblatex -> citationsToBiblatex cits
      _        -> inlineListToLaTeX lst
 
-inlineToLaTeX (Code _ str) = do
-  st <- get
-  if writerListings (stOptions st)
-    then do
-      when (stInNote st) $ modify $ \s -> s{ stVerbInNote = True }
-      let chr = ((enumFromTo '!' '~') \\ str) !! 0
-      return $ text $ "\\lstinline" ++ [chr] ++ str ++ [chr]
-    else return $ text $ "\\texttt{" ++ stringToLaTeX str ++ "}"
+inlineToLaTeX (Code (_,classes,_) str) = do
+  opts <- gets stOptions
+  case () of
+     _ | writerListings opts                         -> listingsCode
+       | writerHighlight opts && not (null classes) -> highlightCode
+       | otherwise                                   -> rawCode
+   where listingsCode = do
+           inNote <- gets stInNote
+           when inNote $ modify $ \s -> s{ stVerbInNote = True }
+           let chr = ((enumFromTo '!' '~') \\ str) !! 0
+           return $ text $ "\\lstinline" ++ [chr] ++ str ++ [chr]
+         highlightCode = do
+           case highlight formatLaTeXInline ("",classes,[]) str of
+                  Nothing -> rawCode
+                  Just  h -> modify (\st -> st{ stHighlighting = True }) >>
+                             return (text h)
+         rawCode = return
+                 $ text $ "\\texttt{" ++ stringToLaTeX False str ++ "}" 
 inlineToLaTeX (Quoted SingleQuote lst) = do
   contents <- inlineListToLaTeX lst
   csquotes <- liftM stCsquotes get
@@ -411,11 +553,7 @@ inlineToLaTeX (Quoted DoubleQuote lst) = do
                    then "\\,"
                    else empty
        return $ "``" <> s1 <> contents <> s2 <> "''"
-inlineToLaTeX Apostrophe = return $ char '\''
-inlineToLaTeX EmDash = return "---"
-inlineToLaTeX EnDash = return "--"
-inlineToLaTeX Ellipses = return "\\ldots{}"
-inlineToLaTeX (Str str) = return $ text $ stringToLaTeX str
+inlineToLaTeX (Str str) = return $ text $ stringToLaTeX False str
 inlineToLaTeX (Math InlineMath str) = return $ char '$' <> text str <> char '$'
 inlineToLaTeX (Math DisplayMath str) = return $ "\\[" <> text str <> "\\]"
 inlineToLaTeX (RawInline "latex" str) = return $ text str
@@ -429,7 +567,7 @@ inlineToLaTeX (Link txt (src, _)) =
              do modify $ \s -> s{ stUrl = True }
                 return $ text $ "\\url{" ++ x ++ "}"
         _ -> do contents <- inlineListToLaTeX txt
-                return $ text ("\\href{" ++ stringToLaTeX src ++ "}{") <>
+                return $ text ("\\href{" ++ stringToLaTeX True src ++ "}{") <>
                          contents <> char '}'
 inlineToLaTeX (Image _ (source, _)) = do
   modify $ \s -> s{ stGraphics = True }
diff --git a/src/Text/Pandoc/Writers/Man.hs b/src/Text/Pandoc/Writers/Man.hs
index 78b9274d6..d3735efa7 100644
--- a/src/Text/Pandoc/Writers/Man.hs
+++ b/src/Text/Pandoc/Writers/Man.hs
@@ -98,7 +98,13 @@ noteToMan opts num note = do
 
 -- | Association list of characters to escape.
 manEscapes :: [(Char, String)]
-manEscapes = [('\160', "\\ "), ('\'', "\\[aq]")] ++ backslashEscapes "@\\"
+manEscapes = [ ('\160', "\\ ")
+             , ('\'', "\\[aq]")
+             , ('’', "'")
+             , ('\x2014', "\\[em]")
+             , ('\x2013', "\\[en]")
+             , ('\x2026', "\\&...")
+             ] ++ backslashEscapes "@\\"
 
 -- | Escape special characters for Man.
 escapeString :: String -> String
@@ -303,10 +309,6 @@ inlineToMan opts (Quoted DoubleQuote lst) = do
   return $ text "\\[lq]" <> contents <> text "\\[rq]"
 inlineToMan opts (Cite _ lst) =
   inlineListToMan opts lst
-inlineToMan _ EmDash = return $ text "\\[em]"
-inlineToMan _ EnDash = return $ text "\\[en]"
-inlineToMan _ Apostrophe = return $ char '\''
-inlineToMan _ Ellipses = return $ text "\\&..."
 inlineToMan _ (Code _ str) =
   return $ text $ "\\f[C]" ++ escapeCode str ++ "\\f[]"
 inlineToMan _ (Str str) = return $ text $ escapeString str
diff --git a/src/Text/Pandoc/Writers/Markdown.hs b/src/Text/Pandoc/Writers/Markdown.hs
index 48e9578b4..7ce939395 100644
--- a/src/Text/Pandoc/Writers/Markdown.hs
+++ b/src/Text/Pandoc/Writers/Markdown.hs
@@ -233,17 +233,19 @@ blockToMarkdown _ HorizontalRule =
 blockToMarkdown opts (Header level inlines) = do
   contents <- inlineListToMarkdown opts inlines
   st <- get
-  -- use setext style headers if in literate haskell mode.
-  -- ghc interprets '#' characters in column 1 as line number specifiers.
-  if writerLiterateHaskell opts || stPlain st
-     then let len = offset contents
-          in  return $ contents <> cr <>
-                       (case level of
-                             1  -> text $ replicate len '='
-                             2  -> text $ replicate len '-'
-                             _  -> empty) <> blankline
-     else return $
-       text ((replicate level '#') ++ " ") <> contents <> blankline
+  let setext = writerSetextHeaders opts
+  return $ nowrap
+         $ case level of
+            1 | setext ->
+                  contents <> cr <> text (replicate (offset contents) '=') <>
+                  blankline
+            2 | setext ->
+                  contents <> cr <> text (replicate (offset contents) '-') <>
+                  blankline
+            -- ghc interprets '#' characters in column 1 as linenum specifiers.
+            _ | stPlain st || writerLiterateHaskell opts ->
+                contents <> blankline
+            _ -> text (replicate level '#') <> space <> contents <> blankline
 blockToMarkdown opts (CodeBlock (_,classes,_) str)
   | "haskell" `elem` classes && "literate" `elem` classes &&
     writerLiterateHaskell opts =
@@ -434,10 +436,6 @@ inlineToMarkdown opts (Quoted SingleQuote lst) = do
 inlineToMarkdown opts (Quoted DoubleQuote lst) = do
   contents <- inlineListToMarkdown opts lst
   return $ "“" <> contents <> "”"
-inlineToMarkdown _ EmDash = return "\8212"
-inlineToMarkdown _ EnDash = return "\8211"
-inlineToMarkdown _ Apostrophe = return "\8217"
-inlineToMarkdown _ Ellipses = return "\8230"
 inlineToMarkdown opts (Code attr str) =
   let tickGroups = filter (\s -> '`' `elem` s) $ group str 
       longest    = if null tickGroups
@@ -495,17 +493,16 @@ inlineToMarkdown opts (Cite (c:cs) lst)
         modekey SuppressAuthor = "-"
         modekey _              = ""
 inlineToMarkdown _ (Cite _ _) = return $ text ""
-inlineToMarkdown opts (Link txt (src', tit)) = do
+inlineToMarkdown opts (Link txt (src, tit)) = do
   linktext <- inlineListToMarkdown opts txt
   let linktitle = if null tit
                      then empty
                      else text $ " \"" ++ tit ++ "\""
-  let src = unescapeURI src'
   let srcSuffix = if isPrefixOf "mailto:" src then drop 7 src else src
-  let useRefLinks = writerReferenceLinks opts
   let useAuto = case (tit,txt) of
                       ("", [Code _ s]) | s == srcSuffix -> True
                       _                                 -> False
+  let useRefLinks = writerReferenceLinks opts && not useAuto
   ref <- if useRefLinks then getReference txt (src, tit) else return []
   reftext <- inlineListToMarkdown opts ref
   return $ if useAuto
diff --git a/src/Text/Pandoc/Writers/MediaWiki.hs b/src/Text/Pandoc/Writers/MediaWiki.hs
index a7c7fc482..f31a2c2d1 100644
--- a/src/Text/Pandoc/Writers/MediaWiki.hs
+++ b/src/Text/Pandoc/Writers/MediaWiki.hs
@@ -346,22 +346,14 @@ inlineToMediaWiki opts (SmallCaps lst) = inlineListToMediaWiki opts lst
 
 inlineToMediaWiki opts (Quoted SingleQuote lst) = do
   contents <- inlineListToMediaWiki opts lst
-  return $ "&lsquo;" ++ contents ++ "&rsquo;"
+  return $ "\8216" ++ contents ++ "\8217"
 
 inlineToMediaWiki opts (Quoted DoubleQuote lst) = do
   contents <- inlineListToMediaWiki opts lst
-  return $ "&ldquo;" ++ contents ++ "&rdquo;"
+  return $ "\8220" ++ contents ++ "\8221"
 
 inlineToMediaWiki opts (Cite _  lst) = inlineListToMediaWiki opts lst
 
-inlineToMediaWiki _ EmDash = return "&mdash;"
-
-inlineToMediaWiki _ EnDash = return "&ndash;"
-
-inlineToMediaWiki _ Apostrophe = return "&rsquo;"
-
-inlineToMediaWiki _ Ellipses = return "&hellip;"
-
 inlineToMediaWiki _ (Code _ str) =
   return $ "<tt>" ++ (escapeString str) ++ "</tt>"
 
diff --git a/src/Text/Pandoc/Writers/ODT.hs b/src/Text/Pandoc/Writers/ODT.hs
index f8030965c..f8f22494f 100644
--- a/src/Text/Pandoc/Writers/ODT.hs
+++ b/src/Text/Pandoc/Writers/ODT.hs
@@ -34,9 +34,10 @@ import System.FilePath ( (</>), takeExtension )
 import qualified Data.ByteString.Lazy as B
 import Data.ByteString.Lazy.UTF8 ( fromString )
 import Codec.Archive.Zip
-import System.Time
+import Data.Time.Clock.POSIX
 import Paths_pandoc ( getDataFileName )
 import Text.Pandoc.Shared ( WriterOptions(..) )
+import Text.Pandoc.ImageSize ( readImageSize, sizeInPoints )
 import Text.Pandoc.MIME ( getMimeType )
 import Text.Pandoc.Definition
 import Text.Pandoc.Generic
@@ -71,7 +72,7 @@ writeODT mbRefOdt opts doc = do
   let sourceDir = writerSourceDirectory opts
   doc' <- bottomUpM (transformPic sourceDir picEntriesRef) doc
   let newContents = writeOpenDocument opts{writerWrapText = False} doc'
-  (TOD epochtime _) <- getClockTime
+  epochtime <- floor `fmap` getPOSIXTime
   let contentEntry = toEntry "content.xml" epochtime $ fromString newContents
   picEntries <- readIORef picEntriesRef
   let archive = foldr addEntryToArchive refArchive $ contentEntry : picEntries
@@ -102,11 +103,16 @@ writeODT mbRefOdt opts doc = do
 transformPic :: FilePath -> IORef [Entry] -> Inline -> IO Inline
 transformPic sourceDir entriesRef (Image lab (src,tit)) = do
   let src' = unEscapeString src
+  mbSize <- readImageSize src'
+  let tit' = case mbSize of
+                  Just s   -> let (w,h) = sizeInPoints s
+                              in  show w ++ "x" ++ show h
+                  Nothing  -> tit
   entries <- readIORef entriesRef
   let newsrc = "Pictures/" ++ show (length entries) ++ takeExtension src'
   catch (readEntry [] (sourceDir </> src') >>= \entry ->
            modifyIORef entriesRef (entry{ eRelativePath = newsrc } :) >>
-           return (Image lab (newsrc, tit)))
+           return (Image lab (newsrc, tit')))
         (\_ -> return (Emph lab))
 transformPic _ _ x = return x
 
diff --git a/src/Text/Pandoc/Writers/OpenDocument.hs b/src/Text/Pandoc/Writers/OpenDocument.hs
index e675f4e65..a0317511a 100644
--- a/src/Text/Pandoc/Writers/OpenDocument.hs
+++ b/src/Text/Pandoc/Writers/OpenDocument.hs
@@ -40,7 +40,7 @@ import Text.Printf ( printf )
 import Control.Applicative ( (<$>) )
 import Control.Arrow ( (***), (>>>) )
 import Control.Monad.State hiding ( when )
-import Data.Char (chr)
+import Data.Char (chr, isDigit)
 import qualified Data.Map as Map
 
 -- | Auxiliary function to convert Plain block to Para.
@@ -154,8 +154,8 @@ inHeaderTags i d =
                                  , ("text:outline-level", show i)] d
 
 inQuotes :: QuoteType -> Doc -> Doc
-inQuotes SingleQuote s = text "&#8216;" <> s <> text "&#8217;"
-inQuotes DoubleQuote s = text "&#8220;" <> s <> text "&#8221;"
+inQuotes SingleQuote s = char '\8216' <> s <> char '\8217'
+inQuotes DoubleQuote s = char '\8220' <> s <> char '\8221'
 
 handleSpaces :: String -> Doc
 handleSpaces s
@@ -361,10 +361,6 @@ inlinesToOpenDocument o l = hcat <$> mapM (inlineToOpenDocument o) l
 -- | Convert an inline element to OpenDocument.
 inlineToOpenDocument :: WriterOptions -> Inline -> State WriterState Doc
 inlineToOpenDocument o ils
-    | Ellipses      <- ils = inTextStyle $ text "&#8230;"
-    | EmDash        <- ils = inTextStyle $ text "&#8212;"
-    | EnDash        <- ils = inTextStyle $ text "&#8211;"
-    | Apostrophe    <- ils = inTextStyle $ text "&#8217;"
     | Space         <- ils = inTextStyle space
     | LineBreak     <- ils = return $ selfClosingTag "text:line-break" []
     | Str         s <- ils = inTextStyle $ handleSpaces $ escapeStringForXML s
@@ -382,7 +378,7 @@ inlineToOpenDocument o ils
     | RawInline "html" s <- ils = preformatted s  -- for backwards compat.
     | RawInline _ _ <- ils = return empty
     | Link  l (s,t) <- ils = mkLink s t <$> inlinesToOpenDocument o l
-    | Image _ (s,_) <- ils = return $ mkImg  s
+    | Image _ (s,t) <- ils = return $ mkImg  s t
     | Note        l <- ils = mkNote l
     | otherwise            = return empty
     where
@@ -391,7 +387,7 @@ inlineToOpenDocument o ils
                                            , ("xlink:href" , s       )
                                            , ("office:name", t       )
                                            ] . inSpanTags "Definition"
-      mkImg  s     = inTags False "draw:frame" [] $
+      mkImg  s t   = inTags False "draw:frame" (attrsFromTitle t) $
                      selfClosingTag "draw:image" [ ("xlink:href"   , s       )
                                                  , ("xlink:type"   , "simple")
                                                  , ("xlink:show"   , "embed" )
@@ -407,6 +403,17 @@ inlineToOpenDocument o ils
         addNote nn
         return nn
 
+-- a title of the form "120x140" will be interpreted as image
+-- size in points.
+attrsFromTitle :: String -> [(String,String)]
+attrsFromTitle s = if null xs || null ys
+                      then []
+                      else [("svg:width",xs ++ "pt"),("svg:height",ys ++ "pt")]
+  where (xs,rest) = span isDigit s
+        ys        = case rest of
+                         ('x':zs) | all isDigit zs -> zs
+                         _ -> ""
+
 bulletListStyle :: Int -> State WriterState (Int,(Int,[Doc]))
 bulletListStyle l =
     let doStyles  i = inTags True "text:list-level-style-bullet"
diff --git a/src/Text/Pandoc/Writers/Org.hs b/src/Text/Pandoc/Writers/Org.hs
index f7f314428..4c77ba7c6 100644
--- a/src/Text/Pandoc/Writers/Org.hs
+++ b/src/Text/Pandoc/Writers/Org.hs
@@ -95,7 +95,12 @@ noteToOrg num note = do
 
 -- | Escape special characters for Org.
 escapeString :: String -> String
-escapeString = escapeStringUsing (backslashEscapes "^_")
+escapeString = escapeStringUsing $
+               [ ('\x2014',"---")
+               , ('\x2013',"--")
+               , ('\x2019',"'")
+               , ('\x2026',"...")
+               ] ++ backslashEscapes "^_"
 
 titleToOrg :: [Inline] -> State WriterState Doc
 titleToOrg [] = return empty
@@ -249,10 +254,6 @@ inlineToOrg (Quoted DoubleQuote lst) = do
   contents <- inlineListToOrg lst
   return $ "\"" <> contents <> "\""
 inlineToOrg (Cite _  lst) = inlineListToOrg lst
-inlineToOrg EmDash = return "---"
-inlineToOrg EnDash = return "--"
-inlineToOrg Apostrophe = return "'"
-inlineToOrg Ellipses = return "..."
 inlineToOrg (Code _ str) = return $ "=" <> text str <> "="
 inlineToOrg (Str str) = return $ text $ escapeString str
 inlineToOrg (Math t str) = do
@@ -272,8 +273,7 @@ inlineToOrg (Link txt (src, _)) = do
         _ -> do contents <- inlineListToOrg txt
                 modify $ \s -> s{ stLinks = True }
                 return $ "[[" <> text src <> "][" <> contents <> "]]"
-inlineToOrg (Image _ (source', _)) = do
-  let source = unescapeURI source'
+inlineToOrg (Image _ (source, _)) = do
   modify $ \s -> s{ stImages = True }
   return $ "[[" <> text source <> "]]"
 inlineToOrg (Note contents) = do 
diff --git a/src/Text/Pandoc/Writers/RST.hs b/src/Text/Pandoc/Writers/RST.hs
index d4adaa929..d6e5b5c9e 100644
--- a/src/Text/Pandoc/Writers/RST.hs
+++ b/src/Text/Pandoc/Writers/RST.hs
@@ -157,7 +157,7 @@ blockToRST (Header level inlines) = do
   contents <- inlineListToRST inlines
   let headerChar = if level > 5 then ' ' else "=-~^'" !! (level - 1)
   let border = text $ replicate (offset contents) headerChar
-  return $ contents $$ border $$ blankline
+  return $ nowrap $ contents $$ border $$ blankline
 blockToRST (CodeBlock (_,classes,_) str) = do
   opts <- stOptions <$> get
   let tabstop = writerTabStop opts
@@ -176,7 +176,7 @@ blockToRST (Table caption _ widths headers rows) =  do
                      else blankline <> text "Table: " <> caption'
   headers' <- mapM blockListToRST headers
   rawRows <- mapM (mapM blockListToRST) rows
-  let isSimple = all (==0) widths && all (all (\bs -> length bs == 1)) rows
+  let isSimple = all (==0) widths && all (all (\bs -> length bs <= 1)) rows
   let numChars = maximum . map offset
   opts <- get >>= return . stOptions
   let widthsInChars =
@@ -281,26 +281,24 @@ inlineToRST (Quoted DoubleQuote lst) = do
   return $ "“" <> contents <> "”"
 inlineToRST (Cite _  lst) =
   inlineListToRST lst
-inlineToRST EmDash = return $ char '\8212'
-inlineToRST EnDash = return $ char '\8211'
-inlineToRST Apostrophe = return $ char '\8217'
-inlineToRST Ellipses = return $ char '\8230'
 inlineToRST (Code _ str) = return $ "``" <> text str <> "``"
 inlineToRST (Str str) = return $ text $ escapeString str
 inlineToRST (Math t str) = do
   modify $ \st -> st{ stHasMath = True }
   return $ if t == InlineMath
-              then ":math:`$" <> text str <> "$`"
-              else ":math:`$$" <> text str <> "$$`"
+              then ":math:`" <> text str <> "`" <> beforeNonBlank "\\ "
+              else if '\n' `elem` str
+                   then blankline $$ ".. math::" $$
+                        blankline $$ nest 3 (text str) $$ blankline
+                   else blankline $$ (".. math:: " <> text str) $$ blankline
 inlineToRST (RawInline _ _) = return empty
 inlineToRST (LineBreak) = return cr -- there's no line break in RST
 inlineToRST Space = return space
 inlineToRST (Link [Code _ str] (src, _)) | src == str ||
                                            src == "mailto:" ++ str = do
   let srcSuffix = if isPrefixOf "mailto:" src then drop 7 src else src
-  return $ text $ unescapeURI srcSuffix
-inlineToRST (Link txt (src', tit)) = do
-  let src = unescapeURI src'
+  return $ text srcSuffix
+inlineToRST (Link txt (src, tit)) = do
   useReferenceLinks <- get >>= return . writerReferenceLinks . stOptions
   linktext <- inlineListToRST $ normalizeSpaces txt
   if useReferenceLinks
@@ -311,8 +309,7 @@ inlineToRST (Link txt (src', tit)) = do
             modify $ \st -> st { stLinks = refs' }
             return $ "`" <> linktext <> "`_"
     else return $ "`" <> linktext <> " <" <> text src <> ">`_"
-inlineToRST (Image alternate (source', tit)) = do
-  let source = unescapeURI source'
+inlineToRST (Image alternate (source, tit)) = do
   pics <- get >>= return . stImages
   let labelsUsed = map fst pics 
   let txt = if null alternate || alternate == [Str ""] ||
diff --git a/src/Text/Pandoc/Writers/RTF.hs b/src/Text/Pandoc/Writers/RTF.hs
index eb36c1ca6..4e7c2a7cd 100644
--- a/src/Text/Pandoc/Writers/RTF.hs
+++ b/src/Text/Pandoc/Writers/RTF.hs
@@ -106,7 +106,15 @@ handleUnicode (c:cs) =
 
 -- | Escape special characters.
 escapeSpecial :: String -> String
-escapeSpecial = escapeStringUsing (('\t',"\\tab "):(backslashEscapes "{\\}"))
+escapeSpecial = escapeStringUsing $
+  [ ('\t',"\\tab ")
+  , ('\8216',"\\u8216'")
+  , ('\8217',"\\u8217'")
+  , ('\8220',"\\u8220\"")
+  , ('\8221',"\\u8221\"")
+  , ('\8211',"\\u8211-")
+  , ('\8212',"\\u8212-")
+  ] ++ backslashEscapes "{\\}"
 
 -- | Escape strings as needed for rich text format.
 stringToRTF :: String -> String
@@ -287,10 +295,6 @@ inlineToRTF (Quoted SingleQuote lst) =
   "\\u8216'" ++ (inlineListToRTF lst) ++ "\\u8217'"
 inlineToRTF (Quoted DoubleQuote lst) = 
   "\\u8220\"" ++ (inlineListToRTF lst) ++ "\\u8221\""
-inlineToRTF Apostrophe = "\\u8217'"
-inlineToRTF Ellipses = "\\u8230?"
-inlineToRTF EmDash = "\\u8212-"
-inlineToRTF EnDash = "\\u8211-"
 inlineToRTF (Code _ str) = "{\\f1 " ++ (codeStringToRTF str) ++ "}"
 inlineToRTF (Str str) = stringToRTF str
 inlineToRTF (Math _ str) = inlineListToRTF $ readTeXMath str
diff --git a/src/Text/Pandoc/Writers/Texinfo.hs b/src/Text/Pandoc/Writers/Texinfo.hs
index 4f6645cd5..563ad7044 100644
--- a/src/Text/Pandoc/Writers/Texinfo.hs
+++ b/src/Text/Pandoc/Writers/Texinfo.hs
@@ -96,6 +96,10 @@ stringToTexinfo = escapeStringUsing texinfoEscapes
                          , ('@', "@@")
                          , (',', "@comma{}") -- only needed in argument lists
                          , ('\160', "@ ")
+                         , ('\x2014', "---")
+                         , ('\x2013', "--")
+                         , ('\x2026', "@dots{}")
+                         , ('\x2019', "'")
                          ]
 
 -- | Puts contents into Texinfo command.
@@ -387,10 +391,6 @@ inlineToTexinfo (Quoted DoubleQuote lst) = do
 
 inlineToTexinfo (Cite _ lst) =
   inlineListToTexinfo lst
-inlineToTexinfo Apostrophe = return $ char '\''
-inlineToTexinfo EmDash = return $ text "---"
-inlineToTexinfo EnDash = return $ text "--"
-inlineToTexinfo Ellipses = return $ text "@dots{}"
 inlineToTexinfo (Str str) = return $ text (stringToTexinfo str)
 inlineToTexinfo (Math _ str) = return $ inCmd "math" $ text str
 inlineToTexinfo (RawInline f str) | f == "latex" || f == "tex" =
diff --git a/src/Text/Pandoc/Writers/Textile.hs b/src/Text/Pandoc/Writers/Textile.hs
index 6614ec28e..26d5ec6d7 100644
--- a/src/Text/Pandoc/Writers/Textile.hs
+++ b/src/Text/Pandoc/Writers/Textile.hs
@@ -72,15 +72,19 @@ withUseTags action = do
 -- | Escape one character as needed for Textile.
 escapeCharForTextile :: Char -> String
 escapeCharForTextile x = case x of
-                         '&'    -> "&amp;"
-                         '<'    -> "&lt;"
-                         '>'    -> "&gt;"
-                         '"'    -> "&quot;"
-                         '*'    -> "&#42;"
-                         '_'    -> "&#95;"
-                         '@'    -> "&#64;"
-                         '|'    -> "&#124;"
-                         c      -> [c]
+                         '&'      -> "&amp;"
+                         '<'      -> "&lt;"
+                         '>'      -> "&gt;"
+                         '"'      -> "&quot;"
+                         '*'      -> "&#42;"
+                         '_'      -> "&#95;"
+                         '@'      -> "&#64;"
+                         '|'      -> "&#124;"
+                         '\x2014' -> " -- "
+                         '\x2013' -> " - "
+                         '\x2019' -> "'"
+                         '\x2026' -> "..."
+                         c        -> [c]
 
 -- | Escape string as needed for Textile.
 escapeStringForTextile :: String -> String
@@ -370,14 +374,6 @@ inlineToTextile opts (Quoted DoubleQuote lst) = do
 
 inlineToTextile opts (Cite _  lst) = inlineListToTextile opts lst
 
-inlineToTextile _ EmDash = return " -- "
-
-inlineToTextile _ EnDash = return " - "
-
-inlineToTextile _ Apostrophe = return "'"
-
-inlineToTextile _ Ellipses = return "..."
-
 inlineToTextile _ (Code _ str) =
   return $ if '@' `elem` str
            then "<tt>" ++ escapeStringForXML str ++ "</tt>"
diff --git a/src/Text/Pandoc/XML.hs b/src/Text/Pandoc/XML.hs
index e21525018..7a1c8bdd8 100644
--- a/src/Text/Pandoc/XML.hs
+++ b/src/Text/Pandoc/XML.hs
@@ -33,9 +33,13 @@ module Text.Pandoc.XML ( stripTags,
                          inTags,
                          selfClosingTag,
                          inTagsSimple,
-                         inTagsIndented  ) where
+                         inTagsIndented,
+                         toEntities,
+                         fromEntities ) where
 
 import Text.Pandoc.Pretty
+import Data.Char (ord, isAscii)
+import Text.HTML.TagSoup.Entity (lookupEntity)
 
 -- | Remove everything between <...>
 stripTags :: String -> String
@@ -89,3 +93,22 @@ inTagsSimple tagType = inTags False tagType []
 -- | Put the supplied contents in indented block btw start and end tags.
 inTagsIndented :: String -> Doc -> Doc
 inTagsIndented tagType = inTags True tagType []
+
+-- | Escape all non-ascii characters using numerical entities.
+toEntities :: String -> String
+toEntities [] = ""
+toEntities (c:cs)
+  | isAscii c = c : toEntities cs
+  | otherwise = "&#" ++ show (ord c) ++ ";" ++ toEntities cs
+
+-- Unescapes XML entities
+fromEntities :: String -> String
+fromEntities ('&':xs) =
+  case lookupEntity ent of
+        Just c  -> c : fromEntities rest
+        Nothing -> '&' : fromEntities rest
+    where (ent, rest) = case break (==';') xs of
+                             (zs,';':ys) -> (zs,ys)
+                             _           -> ("",xs)
+fromEntities (x:xs) = x : fromEntities xs
+fromEntities [] = []
diff --git a/src/markdown2pdf.hs b/src/markdown2pdf.hs
deleted file mode 100644
index d6ee39dab..000000000
--- a/src/markdown2pdf.hs
+++ /dev/null
@@ -1,256 +0,0 @@
-module Main where
-
-import Data.List (isInfixOf, intercalate, isPrefixOf)
-import Data.Maybe (isNothing)
-import qualified Data.ByteString as BS
-import Codec.Binary.UTF8.String (decodeString, encodeString)
-import Data.ByteString.UTF8 (toString)
-import Control.Monad (unless, guard, liftM, when)
-import Control.Concurrent (putMVar, takeMVar, newEmptyMVar, forkIO)
-import Control.Exception (tryJust, bracket, evaluate)
-
-import System.IO
-import System.IO.Error (isDoesNotExistError)
-import System.Environment ( getArgs, getProgName )
-import qualified Text.Pandoc.UTF8 as UTF8
-import System.Exit (ExitCode (..), exitWith)
-import System.FilePath
-import System.Directory
-import System.Process
-
--- A variant of 'readProcessWithExitCode' that does not
--- cause an error if the output is not UTF-8. (Copied
--- with slight variants from 'System.Process'.)
-readProcessWithExitCode'
-    :: FilePath                 -- ^ command to run
-    -> [String]                 -- ^ any arguments
-    -> String                   -- ^ standard input
-    -> IO (ExitCode,String,String) -- ^ exitcode, stdout, stderr
-readProcessWithExitCode' cmd args input = do
-    (Just inh, Just outh, Just errh, pid) <-
-        createProcess (proc cmd args){ std_in  = CreatePipe,
-                                       std_out = CreatePipe,
-                                       std_err = CreatePipe }
-
-    outMVar <- newEmptyMVar
-
-    -- fork off a thread to start consuming stdout
-    out  <- liftM toString $ BS.hGetContents outh
-    _ <- forkIO $ evaluate (length out) >> putMVar outMVar ()
-
-    -- fork off a thread to start consuming stderr
-    err  <- liftM toString $ BS.hGetContents errh
-    _ <- forkIO $ evaluate (length err) >> putMVar outMVar ()
-
-    -- now write and flush any input
-    when (not (null input)) $ do hPutStr inh input; hFlush inh
-    hClose inh -- done with stdin
-
-    -- wait on the output
-    takeMVar outMVar
-    takeMVar outMVar
-    hClose outh
-
-    -- wait on the process
-    ex <- waitForProcess pid
-
-    return (ex, out, err)
-
-run :: FilePath -> [String] -> IO (Either String String)
-run file opts = do
-  (code, out, err) <- readProcessWithExitCode' (encodeString file)
-                        (map encodeString opts) ""
-  let msg = out ++ err
-  case code of
-    ExitFailure _ -> return $ Left  $! msg
-    ExitSuccess   -> return $ Right $! msg
-
-parsePandocArgs :: [String] -> IO (Maybe ([String], String))
-parsePandocArgs args = do
-  result <- run "pandoc" $ ["--dump-args"] ++ args
-  return $ either error (parse . map trim . lines) result
-  where parse []         = Nothing
-        parse ("-":[])   = Just ([], "stdin") -- no output or input
-        parse ("-":x:xs) = Just (x:xs, dropExtension x) -- no output
-        parse ( x :xs)   = Just (xs,   dropExtension x) -- at least output
-        --trim = reverse . dropWhile isSpace . reverse . dropWhile isSpace
-        trim = takeWhile (/='\r') . dropWhile (=='\r')
-
-runPandoc :: [String] -> FilePath -> IO (Either String FilePath)
-runPandoc inputsAndArgs output = do
-  let texFile = addExtension output "tex"
-  result <- run "pandoc" $
-    ["-s", "--no-wrap", "-r", "markdown", "-w", "latex"]
-    ++ inputsAndArgs ++ ["-o", texFile]
-  return $ either Left (const $ Right texFile) result
-
-runLatexRaw :: String -> FilePath -> IO (Either (Either String String) FilePath)
-runLatexRaw latexProgram file = do
-  -- we ignore the ExitCode because pdflatex always fails the first time
-  run latexProgram ["-halt-on-error", "-interaction", "nonstopmode",
-    "-output-directory", takeDirectory file, dropExtension file] >> return ()
-  let pdfFile = replaceExtension file "pdf"
-  let logFile = replaceExtension file "log"
-  txt <- tryJust (guard . isDoesNotExistError)
-         (liftM toString $ BS.readFile logFile)
-  let  checks = checkLatex $ either (const "") id txt
-  case checks of
-  -- err  , bib , ref , msg
-    (True , _    , _   , msg) -> return $ Left $ Left msg   -- failure
-    (False, True , _   , msg) -> runBibtex file >>
-                                (return $ Left $ Right msg) -- citations
-    (False, _    , True, msg) -> return $ Left $ Right msg  -- references
-    (False, False, False, _ ) -> return $ Right pdfFile     -- success
-
-runLatex :: String -> FilePath -> IO (Either String FilePath)
-runLatex latexProgram file = step 3
-  where
-  step n = do
-    result <- runLatexRaw latexProgram file
-    case result of
-      Left (Left err) -> return $ Left err
-      Left (Right _) | n > 1  -> step (n-1 :: Int)
-      Right _ | n > 2 -> step (n-1 :: Int)
-      Left (Right msg) -> return $ Left msg
-      Right pdfFile   -> return $ Right pdfFile
-
-checkLatex :: String -> (Bool, Bool, Bool, String)
-checkLatex ""  = (True, False, False, "Could not read log file")
-checkLatex txt = (err , bib, ref, unlines $! msgs ++ tips)
-  where
-  xs `oneOf` x = any (flip isInfixOf x) xs
-  msgs = dropWhile (not . errorline) $ lines txt
-  errorline ('!':_) = True
-  errorline _ = False
-  tips = checkPackages msgs
-  err = any (oneOf ["!", "LaTeX Error:", "Latex Error:"]) msgs
-  bib = any (oneOf ["Warning: Citation"
-                   ,"Warning: There were undefined citations"]) msgs
-  ref = any (oneOf ["Warning: Reference"
-                   ,"Warning: Label"
-                   ,"Warning: There were undefined references"
-                   ]) msgs
-
-checkPackages :: [String] -> [String]
-checkPackages = concatMap chks
-  where -- for each message, search 'pks' for matches and give a hint
-  chks x = concatMap (chk x) pks
-  chk x (k,v) = if sub k `isInfixOf` x then tip k v else []
-  sub k   = "`" ++ k ++ ".sty' not found"
-  tip k v = ["Please install the '" ++ k ++
-             "' package from CTAN:", "  " ++ v]
-  pks = [("ucs"
-         ,"http://www.ctan.org/tex-archive/macros/latex/contrib/unicode/")
-        ,("ulem"
-         ,"http://www.ctan.org/tex-archive/macros/latex/contrib/misc/")
-        ,("graphicx"
-         ,"http://www.ctan.org/tex-archive/macros/latex/required/graphics/")
-        ,("fancyhdr"
-         ,"http://www.ctan.org/tex-archive/macros/latex/contrib/fancyhdr/")
-        ,("array"
-         ,"http://www.ctan.org/tex-archive/macros/latex/required/tools/")]
-
-runBibtex :: FilePath -> IO (Either String FilePath)
-runBibtex file = do
-  let auxFile = replaceExtension file "aux"
-  result <- run "bibtex" [auxFile]
-  return $ either Left (const $ Right auxFile) result
-
-exit :: String -> IO a
-exit x = do
-  progName <- getProgName
-  UTF8.hPutStrLn stderr $ progName ++ ": " ++ x
-  exitWith $ ExitFailure 1
-
-saveStdin :: FilePath -> IO (Either String FilePath)
-saveStdin file = do
-  text <- liftM toString $ BS.getContents
-  UTF8.writeFile file text
-  fileExist <- doesFileExist (encodeString file)
-  case fileExist of
-    False -> return $ Left $! "Could not create " ++ file
-    True  -> return $ Right file
-
-saveOutput :: FilePath -> FilePath -> IO ()
-saveOutput input output = do
-  copyFile (encodeString input) (encodeString output)
-  UTF8.hPutStrLn stderr $! "Created " ++ output
-
-main :: IO ()
-main = bracket
-  -- acquire resource
-  (do dir <- getTemporaryDirectory
-      let tmp = dir </> "pandoc"
-      createDirectoryIfMissing True tmp
-      return tmp)
-
-  -- release resource
-  ( \tmp -> removeDirectoryRecursive tmp)
-
-  -- run computation
-  $ \tmp -> do
-    args <- liftM (map decodeString) getArgs
-    -- check for invalid arguments and print help message if needed
-    let goodopts = ["-f","-r","-N", "-p","-R","-H","-B","-A", "-C","-o","-V"]
-    let goodoptslong = ["--from","--read","--strict",
-                   "--preserve-tabs","--tab-stop","--parse-raw",
-                   "--toc","--table-of-contents", "--xetex", "--luatex",
-                   "--number-sections","--include-in-header",
-                   "--include-before-body","--include-after-body",
-                   "--custom-header","--output",
-                   "--template", "--variable",
-                   "--csl", "--bibliography", "--data-dir", "--listings"]
-    let isOpt ('-':_) = True
-        isOpt _       = False
-    let opts = filter isOpt args
-    -- note that a long option can come in this form: --opt=val
-    let isGoodopt x = x `elem` (goodopts ++ goodoptslong) ||
-                      any (\o -> (o ++ "=") `isPrefixOf` x) goodoptslong
-    let markdown2pdfOpts = ["--xetex","--luatex"]
-    unless (all isGoodopt opts) $ do
-      (code, out, _err) <- readProcessWithExitCode "pandoc" ["--help"] ""
-      UTF8.putStrLn "markdown2pdf [OPTIONS] [FILES]\nOptions:"
-      UTF8.putStr $ unlines $
-                 filter (\l -> any (`isInfixOf` l) goodoptslong) (lines out)
-                 ++ map (replicate 24 ' ' ++) markdown2pdfOpts
-      exitWith code
-
-    let args' = filter (`notElem` markdown2pdfOpts) args
-
-    -- check for executable files
-    let latexProgram = if "--xetex" `elem` opts
-                          then "xelatex"
-                          else if "--luatex" `elem` opts
-                                  then "lualatex"
-                                  else "pdflatex"
-    let execs = ["pandoc", latexProgram, "bibtex"]
-    paths <- mapM findExecutable execs
-    let miss = map snd $ filter (isNothing . fst) $ zip paths execs
-    unless (null miss) $ exit $! "Could not find " ++ intercalate ", " miss
-
-    -- parse arguments
-    -- if no input given, use 'stdin'
-    pandocArgs <- parsePandocArgs args'
-    (input, output) <- case pandocArgs of
-      Nothing      -> exit "Could not parse arguments"
-      Just ([],out) -> do
-        stdinFile <- saveStdin (replaceDirectory (takeBaseName out) tmp)
-        case stdinFile of
-          Left err  -> exit err
-          Right f   -> return ([f], out)
-      -- no need because we'll pass all arguments to pandoc
-      Just (_ ,out) -> return ([], out)
-    -- run pandoc
-    pandocRes <- runPandoc (input ++ args') $ replaceDirectory output tmp
-    case pandocRes of
-      Left err -> exit err
-      Right texFile  -> do
-        -- run pdflatex
-        latexRes <- runLatex latexProgram texFile
-        case latexRes of
-          Left err      -> exit err
-          Right pdfFile -> do
-            -- save the output creating a backup if necessary
-            saveOutput pdfFile $
-              replaceDirectory pdfFile (takeDirectory output)
-
diff --git a/src/pandoc.hs b/src/pandoc.hs
index 27bc2c25c..3853d360a 100644
--- a/src/pandoc.hs
+++ b/src/pandoc.hs
@@ -1,5 +1,5 @@
 {-
-Copyright (C) 2006-2011 John MacFarlane <jgm@berkeley.edu>
+Copyright (C) 2006-2012 John MacFarlane <jgm@berkeley.edu>
 
 This program is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
@@ -18,7 +18,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 
 {- |
    Module      : Main
-   Copyright   : Copyright (C) 2006-2011 John MacFarlane
+   Copyright   : Copyright (C) 2006-2012 John MacFarlane
    License     : GNU GPL, version 2 or above
 
    Maintainer  : John MacFarlane <jgm@berkeley@edu>
@@ -30,24 +30,26 @@ writers.
 -}
 module Main where
 import Text.Pandoc
-import Text.Pandoc.S5 (s5HeaderIncludes)
+import Text.Pandoc.PDF (tex2pdf)
+import Text.Pandoc.Readers.LaTeX (handleIncludes)
 import Text.Pandoc.Shared ( tabFilter, ObfuscationMethod (..), readDataFile,
-                            headerShift, findDataFile, normalize )
-#ifdef _HIGHLIGHTING
-import Text.Pandoc.Highlighting ( languages )
-#endif
+                            headerShift, findDataFile, normalize, err, warn )
+import Text.Pandoc.XML ( toEntities, fromEntities )
+import Text.Pandoc.SelfContained ( makeSelfContained )
+import Text.Pandoc.Highlighting ( languages, Style, tango, pygments,
+         espresso, kate, haddock, monochrome )
 import System.Environment ( getArgs, getProgName )
 import System.Exit ( exitWith, ExitCode (..) )
 import System.FilePath
 import System.Console.GetOpt
 import Data.Char ( toLower )
 import Data.List ( intercalate, isSuffixOf, isPrefixOf )
-import System.Directory ( getAppUserDataDirectory, doesFileExist )
-import System.IO ( stdout, stderr )
+import System.Directory ( getAppUserDataDirectory, doesFileExist, findExecutable )
+import System.IO ( stdout )
 import System.IO.Error ( isDoesNotExistError )
 import Control.Exception.Extensible ( throwIO )
 import qualified Text.Pandoc.UTF8 as UTF8
-import Text.CSL
+import qualified Text.CSL as CSL
 import Text.Pandoc.Biblio
 import Control.Monad (when, unless, liftM)
 import Network.HTTP (simpleHTTP, mkRequest, getResponseBody, RequestMethod(..))
@@ -55,35 +57,34 @@ import Network.URI (parseURI, isURI, URI(..))
 import qualified Data.ByteString.Lazy as B
 import Data.ByteString.Lazy.UTF8 (toString )
 import Codec.Binary.UTF8.String (decodeString, encodeString)
+import Text.CSL.Reference (Reference(..))
 
 copyrightMessage :: String
-copyrightMessage = "\nCopyright (C) 2006-2011 John MacFarlane\n" ++
+copyrightMessage = "\nCopyright (C) 2006-2012 John MacFarlane\n" ++
                     "Web:  http://johnmacfarlane.net/pandoc\n" ++
                     "This is free software; see the source for copying conditions.  There is no\n" ++
                     "warranty, not even for merchantability or fitness for a particular purpose."
 
 compileInfo :: String
 compileInfo =
-  "\nCompiled with citeproc support." ++
-#ifdef _HIGHLIGHTING
-   "\nCompiled with syntax highlighting support for:\n" ++
-       wrapWords 78 languages ++
-#endif
-   ""
+  "\nCompiled with citeproc-hs " ++ VERSION_citeproc_hs ++ ", texmath " ++
+  VERSION_texmath ++ ", highlighting-kate " ++ VERSION_highlighting_kate ++
+   ".\nSyntax highlighting is supported for the following languages:\n    " ++
+       wrapWords 4 78 languages
 
 -- | Converts a list of strings into a single string with the items printed as
 -- comma separated words in lines with a maximum line length.
-wrapWords :: Int -> [String] -> String
-wrapWords c = wrap' c c where
-              wrap' _    _         []     = ""
+wrapWords :: Int -> Int -> [String] -> String
+wrapWords indent c = wrap' (c - indent) (c - indent)
+        where wrap' _    _         []     = ""
               wrap' cols remaining (x:xs) = if remaining == cols
                                                then x ++ wrap' cols (remaining - length x) xs
                                                else if (length x + 1) > remaining
-                                                       then ",\n" ++ x ++ wrap' cols (cols - length x) xs
+                                                       then ",\n" ++ replicate indent ' ' ++ x ++ wrap' cols (cols - length x) xs
                                                        else ", "  ++ x ++ wrap' cols (remaining - (length x + 2)) xs
 
-isNonTextOutput :: String -> Bool
-isNonTextOutput = (`elem` ["odt","epub"])
+nonTextFormats :: [String]
+nonTextFormats = ["odt","docx","epub"]
 
 -- | Data structure for command line options.
 data Opt = Opt
@@ -101,15 +102,19 @@ data Opt = Opt
     , optNumberSections    :: Bool    -- ^ Number sections in LaTeX
     , optSectionDivs       :: Bool    -- ^ Put sections in div tags in HTML
     , optIncremental       :: Bool    -- ^ Use incremental lists in Slidy/S5
-    , optOffline           :: Bool    -- ^ Make slideshow accessible offline
-    , optXeTeX             :: Bool    -- ^ Format latex for xetex
+    , optSelfContained     :: Bool    -- ^ Make HTML accessible offline
     , optSmart             :: Bool    -- ^ Use smart typography
+    , optOldDashes         :: Bool    -- ^ Parse dashes like pandoc <=1.8.2.1
     , optHtml5             :: Bool    -- ^ Produce HTML5 in HTML
+    , optHighlight         :: Bool    -- ^ Highlight source code
+    , optHighlightStyle    :: Style   -- ^ Style to use for highlighted code
     , optChapters          :: Bool    -- ^ Use chapter for top-level sects
     , optHTMLMathMethod    :: HTMLMathMethod -- ^ Method to print HTML math
     , optReferenceODT      :: Maybe FilePath -- ^ Path of reference.odt
+    , optReferenceDocx     :: Maybe FilePath -- ^ Path of reference.docx
     , optEPUBStylesheet    :: Maybe String   -- ^ EPUB stylesheet
     , optEPUBMetadata      :: String  -- ^ EPUB metadata
+    , optEPUBFonts         :: [FilePath] -- ^ EPUB fonts to embed
     , optDumpArgs          :: Bool    -- ^ Output command-line arguments
     , optIgnoreArgs        :: Bool    -- ^ Ignore command-line arguments
     , optStrict            :: Bool    -- ^ Use strict markdown syntax
@@ -124,8 +129,12 @@ data Opt = Opt
     , optCiteMethod        :: CiteMethod -- ^ Method to output cites
     , optBibliography      :: [String]
     , optCslFile           :: FilePath
+    , optAbbrevsFile       :: Maybe FilePath
     , optListings          :: Bool       -- ^ Use listings package for code blocks
-    , optAscii             :: Bool       -- ^ Avoid using nonascii characters
+    , optLaTeXEngine       :: String     -- ^ Program to use for latex -> pdf
+    , optSlideLevel        :: Maybe Int  -- ^ Header level that creates slides
+    , optSetextHeaders     :: Bool       -- ^ Use atx headers for markdown level 1-2
+    , optAscii             :: Bool       -- ^ Use ascii characters only in html
     }
 
 -- | Defaults for command-line options.
@@ -145,15 +154,19 @@ defaultOpts = Opt
     , optNumberSections    = False
     , optSectionDivs       = False
     , optIncremental       = False
-    , optOffline           = False
-    , optXeTeX             = False
+    , optSelfContained     = False
     , optSmart             = False
+    , optOldDashes         = False
     , optHtml5             = False
+    , optHighlight         = True
+    , optHighlightStyle    = pygments
     , optChapters          = False
     , optHTMLMathMethod    = PlainMath
     , optReferenceODT      = Nothing
+    , optReferenceDocx     = Nothing
     , optEPUBStylesheet    = Nothing
     , optEPUBMetadata      = ""
+    , optEPUBFonts         = []
     , optDumpArgs          = False
     , optIgnoreArgs        = False
     , optStrict            = False
@@ -168,7 +181,11 @@ defaultOpts = Opt
     , optCiteMethod        = Citeproc
     , optBibliography      = []
     , optCslFile           = ""
+    , optAbbrevsFile       = Nothing
     , optListings          = False
+    , optLaTeXEngine       = "pdflatex"
+    , optSlideLevel        = Nothing
+    , optSetextHeaders     = True
     , optAscii             = False
     }
 
@@ -188,50 +205,23 @@ options =
                   "FORMAT")
                  ""
 
-    , Option "s" ["standalone"]
-                 (NoArg
-                  (\opt -> return opt { optStandalone = True }))
-                 "" -- "Include needed header and footer on output"
-
     , Option "o" ["output"]
                  (ReqArg
                   (\arg opt -> return opt { optOutputFile = arg })
                   "FILENAME")
                  "" -- "Name of output file"
 
-    , Option "p" ["preserve-tabs"]
-                 (NoArg
-                  (\opt -> return opt { optPreserveTabs = True }))
-                 "" -- "Preserve tabs instead of converting to spaces"
-
-    , Option "" ["tab-stop"]
+    , Option "" ["data-dir"]
                  (ReqArg
-                  (\arg opt ->
-                      case reads arg of
-                           [(t,"")] | t > 0 -> return opt { optTabStop = t }
-                           _          -> do
-                               UTF8.hPutStrLn stderr $
-                                   "tab-stop must be a number greater than 0"
-                               exitWith $ ExitFailure 31)
-                  "NUMBER")
-                 "" -- "Tab stop (default 4)"
+                  (\arg opt -> return opt { optDataDir = Just arg })
+                 "DIRECTORY") -- "Directory containing pandoc data files."
+                ""
 
     , Option "" ["strict"]
                  (NoArg
                   (\opt -> return opt { optStrict = True } ))
                  "" -- "Disable markdown syntax extensions"
 
-    , Option "" ["normalize"]
-                 (NoArg
-                  (\opt -> return opt { optTransforms =
-                                   normalize : optTransforms opt } ))
-                 "" -- "Normalize the Pandoc AST"
-
-    , Option "" ["reference-links"]
-                 (NoArg
-                  (\opt -> return opt { optReferenceLinks = True } ))
-                 "" -- "Use reference links in parsing HTML"
-
     , Option "R" ["parse-raw"]
                  (NoArg
                   (\opt -> return opt { optParseRaw = True }))
@@ -242,145 +232,25 @@ options =
                   (\opt -> return opt { optSmart = True }))
                  "" -- "Use smart quotes, dashes, and ellipses"
 
-    , Option "5" ["html5"]
-                 (NoArg
-                  (\opt -> return opt { optHtml5 = True }))
-                 "" -- "Produce HTML5 in HTML output"
-
-    , Option "m" ["latexmathml", "asciimathml"]
-                 (OptArg
-                  (\arg opt ->
-                      return opt { optHTMLMathMethod = LaTeXMathML arg })
-                  "URL")
-                 "" -- "Use LaTeXMathML script in html output"
-
-    , Option "" ["mathml"]
-                 (OptArg
-                  (\arg opt ->
-                      return opt { optHTMLMathMethod = MathML arg })
-                   "URL")
-                 "" -- "Use mathml for HTML math"
-
-    , Option "" ["mimetex"]
-                 (OptArg
-                  (\arg opt -> do
-                      let url' = case arg of
-                                      Just u   -> u ++ "?"
-                                      Nothing  -> "/cgi-bin/mimetex.cgi?"
-                      return opt { optHTMLMathMethod = WebTeX url' })
-                  "URL")
-                 "" -- "Use mimetex for HTML math"
-
-    , Option "" ["webtex"]
-                 (OptArg
-                  (\arg opt -> do
-                      let url' = case arg of
-                                      Just u   -> u
-                                      Nothing  -> "http://chart.apis.google.com/chart?cht=tx&chl="
-                      return opt { optHTMLMathMethod = WebTeX url' })
-                  "URL")
-                 "" -- "Use web service for HTML math"
-
-    , Option "" ["jsmath"]
-                 (OptArg
-                  (\arg opt -> return opt { optHTMLMathMethod = JsMath arg})
-                  "URL")
-                 "" -- "Use jsMath for HTML math"
-
-    , Option "" ["mathjax"]
-                 (OptArg
-                  (\arg opt -> do
-                      let url' = case arg of
-                                      Just u   -> u
-                                      Nothing  -> "https://d3eoax9i5htok0.cloudfront.net/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"
-                      return opt { optHTMLMathMethod = MathJax url'})
-                  "URL")
-                 "" -- "Use MathJax for HTML math"
-
-    , Option "" ["gladtex"]
-                 (NoArg
-                  (\opt -> return opt { optHTMLMathMethod = GladTeX }))
-                 "" -- "Use gladtex for HTML math"
-
-    , Option "i" ["incremental"]
-                 (NoArg
-                  (\opt -> return opt { optIncremental = True }))
-                 "" -- "Make list items display incrementally in Slidy/S5"
-
-    , Option "" ["offline"]
-                 (NoArg
-                  (\opt -> return opt { optOffline = True,
-                                        optStandalone = True }))
-                 "" -- "Make slide shows include all the needed js and css"
-
-    , Option "" ["xetex"]
+    , Option "" ["old-dashes"]
                  (NoArg
-                  (\opt -> do
-                     UTF8.hPutStrLn stderr $ "pandoc: --xetex is deprecated. "
-                       ++ "It is no longer needed for use with XeTeX."
-                     return opt { optXeTeX = True }))
-                 "" -- "Format latex for processing by XeTeX"
-
-    , Option "" ["chapters"]
-                 (NoArg
-                  (\opt -> return opt { optChapters = True }))
-                 "" -- "Use chapter for top-level sections in LaTeX, DocBook"
-
-    , Option "N" ["number-sections"]
-                 (NoArg
-                  (\opt -> return opt { optNumberSections = True }))
-                 "" -- "Number sections in LaTeX"
-
-    , Option "" ["listings"]
-                 (NoArg
-                  (\opt -> return opt { optListings = True }))
-                 "" -- "Use listings package for LaTeX code blocks"
-
-    , Option "" ["section-divs"]
-                 (NoArg
-                  (\opt -> return opt { optSectionDivs = True }))
-                 "" -- "Put sections in div tags in HTML"
-
-    , Option "" ["no-wrap"]
-                 (NoArg
-                  (\opt -> return opt { optWrapText = False }))
-                 "" -- "Do not wrap text in output"
+                  (\opt -> return opt { optSmart = True
+                                      , optOldDashes = True }))
+                 "" -- "Use smart quotes, dashes, and ellipses"
 
-    , Option "" ["columns"]
+    , Option "" ["base-header-level"]
                  (ReqArg
                   (\arg opt ->
                       case reads arg of
-                           [(t,"")] | t > 0 -> return opt { optColumns = t }
-                           _          -> do
-                               UTF8.hPutStrLn stderr $
-                                   "columns must be a number greater than 0"
-                               exitWith $ ExitFailure 33)
-                 "NUMBER")
-                 "" -- "Length of line in characters"
-
-    , Option "" ["ascii"]
-                 (NoArg
-                  (\opt -> return opt { optAscii = True }))
-                 "" -- "Avoid using non-ascii characters in output"
-
-    , Option "" ["email-obfuscation"]
-                 (ReqArg
-                  (\arg opt -> do
-                     method <- case arg of
-                            "references" -> return ReferenceObfuscation
-                            "javascript" -> return JavascriptObfuscation
-                            "none"       -> return NoObfuscation
-                            _            -> UTF8.hPutStrLn stderr ("Error: Unknown obfuscation method: " ++ arg) >>
-                                            exitWith (ExitFailure 6)
-                     return opt { optEmailObfuscation = method })
-                  "none|javascript|references")
-                 "" -- "Method for obfuscating email in HTML"
-
-     , Option "" ["id-prefix"]
-                  (ReqArg
-                   (\arg opt -> return opt { optIdentifierPrefix = arg })
-                   "STRING")
-                  "" -- "Prefix to add to automatically generated HTML identifiers"
+                           [(t,"")] | t > 0 -> do
+                               let oldTransforms = optTransforms opt
+                               let shift = t - 1
+                               return opt{ optTransforms =
+                                           headerShift shift : oldTransforms }
+                           _          -> err 19
+                                       "base-header-level must be a number > 0")
+                  "NUMBER")
+                 "" -- "Headers base level"
 
      , Option "" ["indented-code-classes"]
                   (ReqArg
@@ -389,26 +259,31 @@ options =
                    "STRING")
                   "" -- "Classes (whitespace- or comma-separated) to use for indented code-blocks"
 
-    , Option "" ["toc", "table-of-contents"]
-                (NoArg
-                 (\opt -> return opt { optTableOfContents = True }))
-               "" -- "Include table of contents"
+    , Option "" ["normalize"]
+                 (NoArg
+                  (\opt -> return opt { optTransforms =
+                                   normalize : optTransforms opt } ))
+                 "" -- "Normalize the Pandoc AST"
 
-    , Option "" ["base-header-level"]
+    , Option "p" ["preserve-tabs"]
+                 (NoArg
+                  (\opt -> return opt { optPreserveTabs = True }))
+                 "" -- "Preserve tabs instead of converting to spaces"
+
+    , Option "" ["tab-stop"]
                  (ReqArg
                   (\arg opt ->
                       case reads arg of
-                           [(t,"")] | t > 0 -> do
-                               let oldTransforms = optTransforms opt
-                               let shift = t - 1
-                               return opt{ optTransforms =
-                                           headerShift shift : oldTransforms }
-                           _          -> do
-                               UTF8.hPutStrLn stderr $
-                                   "base-header-level must be a number > 0"
-                               exitWith $ ExitFailure 19)
+                           [(t,"")] | t > 0 -> return opt { optTabStop = t }
+                           _                -> err 31
+                                          "tab-stop must be a number greater than 0")
                   "NUMBER")
-                 "" -- "Headers base level"
+                 "" -- "Tab stop (default 4)"
+
+    , Option "s" ["standalone"]
+                 (NoArg
+                  (\opt -> return opt { optStandalone = True }))
+                 "" -- "Include needed header and footer on output"
 
     , Option "" ["template"]
                  (ReqArg
@@ -425,21 +300,62 @@ options =
                           (k,_:v) -> do
                             let newvars = optVariables opt ++ [(k,v)]
                             return opt{ optVariables = newvars }
-                          _  -> do
-                            UTF8.hPutStrLn stderr $ "Could not parse `" ++ arg ++ "' as a key/value pair (k=v or k:v)"
-                            exitWith $ ExitFailure 17)
+                          _  -> err 17 $
+                            "Could not parse `" ++ arg ++ "' as a key/value pair (k=v or k:v)")
                   "KEY:VALUE")
                  "" -- "Use custom template"
 
-    , Option "c" ["css"]
+    , Option "D" ["print-default-template"]
                  (ReqArg
-                  (\arg opt -> do
-                     -- add new link to end, so it is included in proper order
-                     let newvars = optVariables opt ++ [("css",arg)]
-                     return opt { optVariables = newvars,
-                                  optStandalone = True })
-                  "URL")
-                 "" -- "Link to CSS style sheet"
+                  (\arg _ -> do
+                     templ <- getDefaultTemplate Nothing arg
+                     case templ of
+                          Right t -> UTF8.hPutStr stdout t
+                          Left e  -> error $ show e
+                     exitWith ExitSuccess)
+                  "FORMAT")
+                 "" -- "Print default template for FORMAT"
+
+    , Option "" ["no-wrap"]
+                 (NoArg
+                  (\opt -> return opt { optWrapText = False }))
+                 "" -- "Do not wrap text in output"
+
+    , Option "" ["columns"]
+                 (ReqArg
+                  (\arg opt ->
+                      case reads arg of
+                           [(t,"")] | t > 0 -> return opt { optColumns = t }
+                           _          -> err 33 $
+                                   "columns must be a number greater than 0")
+                 "NUMBER")
+                 "" -- "Length of line in characters"
+
+    , Option "" ["toc", "table-of-contents"]
+                (NoArg
+                 (\opt -> return opt { optTableOfContents = True }))
+               "" -- "Include table of contents"
+
+    , Option "" ["no-highlight"]
+                (NoArg
+                 (\opt -> return opt { optHighlight = False }))
+                 "" -- "Don't highlight source code"
+
+    , Option "" ["highlight-style"]
+                (ReqArg
+                 (\arg opt -> do
+                   newStyle <- case map toLower arg of
+                                     "pygments"   -> return pygments
+                                     "tango"      -> return tango
+                                     "espresso"   -> return espresso
+                                     "kate"       -> return kate
+                                     "monochrome" -> return monochrome
+                                     "haddock"    -> return haddock
+                                     _            -> err 39 $
+                                         "Unknown style :" ++ arg
+                   return opt{ optHighlightStyle = newStyle })
+                 "STYLE")
+                 "" -- "Style for highlighted code"
 
     , Option "H" ["include-in-header"]
                  (ReqArg
@@ -474,6 +390,100 @@ options =
                   "FILENAME")
                  "" -- "File to include after document body"
 
+    , Option "" ["self-contained"]
+                 (NoArg
+                  (\opt -> return opt { optSelfContained = True,
+                                        optVariables = ("slidy-url","slidy") :
+                                                       optVariables opt,
+                                        optStandalone = True }))
+                 "" -- "Make slide shows include all the needed js and css"
+
+    , Option "" ["offline"]
+                 (NoArg
+                  (\opt -> do warn $ "--offline is deprecated. Use --self-contained instead."
+                              return opt { optSelfContained = True,
+                                           optStandalone = True }))
+                 "" -- "Make slide shows include all the needed js and css"
+                 -- deprecated synonym for --self-contained
+
+    , Option "5" ["html5"]
+                 (NoArg
+                  (\opt -> do
+                     warn $ "--html5 is deprecated. "
+                       ++ "Use the html5 output format instead."
+                     return opt { optHtml5 = True }))
+                 "" -- "Produce HTML5 in HTML output"
+
+    , Option "" ["ascii"]
+                 (NoArg
+                  (\opt -> return opt { optAscii = True }))
+                 ""  -- "Use ascii characters only in HTML output"
+
+    , Option "" ["reference-links"]
+                 (NoArg
+                  (\opt -> return opt { optReferenceLinks = True } ))
+                 "" -- "Use reference links in parsing HTML"
+
+    , Option "" ["atx-headers"]
+                 (NoArg
+                  (\opt -> return opt { optSetextHeaders = False } ))
+                 "" -- "Use atx-style headers for markdown"
+
+    , Option "" ["chapters"]
+                 (NoArg
+                  (\opt -> return opt { optChapters = True }))
+                 "" -- "Use chapter for top-level sections in LaTeX, DocBook"
+
+    , Option "N" ["number-sections"]
+                 (NoArg
+                  (\opt -> return opt { optNumberSections = True }))
+                 "" -- "Number sections in LaTeX"
+
+    , Option "" ["listings"]
+                 (NoArg
+                  (\opt -> return opt { optListings = True }))
+                 "" -- "Use listings package for LaTeX code blocks"
+
+    , Option "i" ["incremental"]
+                 (NoArg
+                  (\opt -> return opt { optIncremental = True }))
+                 "" -- "Make list items display incrementally in Slidy/S5"
+
+    , Option "" ["slide-level"]
+                 (ReqArg
+                  (\arg opt -> do
+                      case reads arg of
+                           [(t,"")] | t >= 1 && t <= 6 ->
+                                    return opt { optSlideLevel = Just t }
+                           _          -> err 39 $
+                                    "slide level must be a number between 1 and 6")
+                 "NUMBER")
+                 "" -- "Force header level for slides"
+
+    , Option "" ["section-divs"]
+                 (NoArg
+                  (\opt -> return opt { optSectionDivs = True }))
+                 "" -- "Put sections in div tags in HTML"
+
+    , Option "" ["email-obfuscation"]
+                 (ReqArg
+                  (\arg opt -> do
+                     method <- case arg of
+                            "references" -> return ReferenceObfuscation
+                            "javascript" -> return JavascriptObfuscation
+                            "none"       -> return NoObfuscation
+                            _            -> err 6
+                               ("Unknown obfuscation method: " ++ arg)
+                     return opt { optEmailObfuscation = method })
+                  "none|javascript|references")
+                 "" -- "Method for obfuscating email in HTML"
+
+     , Option "" ["id-prefix"]
+                  (ReqArg
+                   (\arg opt -> return opt { optIdentifierPrefix = arg })
+                   "STRING")
+                  "" -- "Prefix to add to automatically generated HTML identifiers"
+
     , Option "T" ["title-prefix"]
                  (ReqArg
                   (\arg opt -> do
@@ -483,6 +493,16 @@ options =
                   "STRING")
                  "" -- "String to prefix to HTML window title"
 
+    , Option "c" ["css"]
+                 (ReqArg
+                  (\arg opt -> do
+                     -- add new link to end, so it is included in proper order
+                     let newvars = optVariables opt ++ [("css",arg)]
+                     return opt { optVariables = newvars,
+                                  optStandalone = True })
+                  "URL")
+                 "" -- "Link to CSS style sheet"
+
     , Option "" ["reference-odt"]
                  (ReqArg
                   (\arg opt -> do
@@ -490,6 +510,13 @@ options =
                   "FILENAME")
                  "" -- "Path of custom reference.odt"
 
+    , Option "" ["reference-docx"]
+                 (ReqArg
+                  (\arg opt -> do
+                    return opt { optReferenceDocx = Just arg })
+                  "FILENAME")
+                 "" -- "Path of custom reference.docx"
+
     , Option "" ["epub-stylesheet"]
                  (ReqArg
                   (\arg opt -> do
@@ -514,16 +541,22 @@ options =
                   "FILENAME")
                  "" -- "Path of epub metadata file"
 
-    , Option "D" ["print-default-template"]
+    , Option "" ["epub-embed-font"]
                  (ReqArg
-                  (\arg _ -> do
-                     templ <- getDefaultTemplate Nothing arg
-                     case templ of
-                          Right t -> UTF8.hPutStr stdout t
-                          Left e  -> error $ show e
-                     exitWith ExitSuccess)
-                  "FORMAT")
-                 "" -- "Print default template for FORMAT"
+                  (\arg opt -> do
+                     return opt{ optEPUBFonts = arg : optEPUBFonts opt })
+                  "FILE")
+                 "" -- "Directory of fonts to embed"
+
+    , Option "" ["latex-engine"]
+                 (ReqArg
+                  (\arg opt -> do
+                     let b = takeBaseName arg
+                     if (b == "pdflatex" || b == "lualatex" || b == "xelatex")
+                        then return opt { optLaTeXEngine = arg }
+                        else err 45 "latex-engine must be pdflatex, lualatex, or xelatex.")
+                  "PROGRAM")
+                 "" -- "Name of latex program to use in generating PDF"
 
     , Option "" ["bibliography"]
                  (ReqArg
@@ -537,6 +570,12 @@ options =
                   "FILENAME")
                  ""
 
+    , Option "" ["citation-abbreviations"]
+                 (ReqArg
+                  (\arg opt -> return opt { optAbbrevsFile = Just arg })
+                  "FILENAME")
+                 ""
+
     , Option "" ["natbib"]
                  (NoArg
                   (\opt -> return opt { optCiteMethod = Natbib }))
@@ -547,11 +586,60 @@ options =
                   (\opt -> return opt { optCiteMethod = Biblatex }))
                  "" -- "Use biblatex cite commands in LaTeX output"
 
-    , Option "" ["data-dir"]
-                 (ReqArg
-                  (\arg opt -> return opt { optDataDir = Just arg })
-                 "DIRECTORY") -- "Directory containing pandoc data files."
-                ""
+    , Option "m" ["latexmathml", "asciimathml"]
+                 (OptArg
+                  (\arg opt ->
+                      return opt { optHTMLMathMethod = LaTeXMathML arg })
+                  "URL")
+                 "" -- "Use LaTeXMathML script in html output"
+
+    , Option "" ["mathml"]
+                 (OptArg
+                  (\arg opt ->
+                      return opt { optHTMLMathMethod = MathML arg })
+                   "URL")
+                 "" -- "Use mathml for HTML math"
+
+    , Option "" ["mimetex"]
+                 (OptArg
+                  (\arg opt -> do
+                      let url' = case arg of
+                                      Just u   -> u ++ "?"
+                                      Nothing  -> "/cgi-bin/mimetex.cgi?"
+                      return opt { optHTMLMathMethod = WebTeX url' })
+                  "URL")
+                 "" -- "Use mimetex for HTML math"
+
+    , Option "" ["webtex"]
+                 (OptArg
+                  (\arg opt -> do
+                      let url' = case arg of
+                                      Just u   -> u
+                                      Nothing  -> "http://chart.apis.google.com/chart?cht=tx&chl="
+                      return opt { optHTMLMathMethod = WebTeX url' })
+                  "URL")
+                 "" -- "Use web service for HTML math"
+
+    , Option "" ["jsmath"]
+                 (OptArg
+                  (\arg opt -> return opt { optHTMLMathMethod = JsMath arg})
+                  "URL")
+                 "" -- "Use jsMath for HTML math"
+
+    , Option "" ["mathjax"]
+                 (OptArg
+                  (\arg opt -> do
+                      let url' = case arg of
+                                      Just u   -> u
+                                      Nothing  -> "https://d3eoax9i5htok0.cloudfront.net/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"
+                      return opt { optHTMLMathMethod = MathJax url'})
+                  "URL")
+                 "" -- "Use MathJax for HTML math"
+
+    , Option "" ["gladtex"]
+                 (NoArg
+                  (\opt -> return opt { optHTMLMathMethod = GladTeX }))
+                 "" -- "Use gladtex for HTML math"
 
     , Option "" ["dump-args"]
                  (NoArg
@@ -579,14 +667,15 @@ options =
                      UTF8.hPutStr stdout (usageMessage prg options)
                      exitWith ExitSuccess ))
                  "" -- "Show help"
+
     ]
 
 -- Returns usage message
 usageMessage :: String -> [OptDescr (Opt -> IO Opt)] -> String
 usageMessage programName = usageInfo
   (programName ++ " [OPTIONS] [FILES]" ++ "\nInput formats:  " ++
-  (intercalate ", " $ map fst readers) ++ "\nOutput formats:  " ++
-  (intercalate ", " $ map fst writers ++ ["odt","epub"]) ++ "\nOptions:")
+  (wrapWords 16 78 $ map fst readers) ++ "\nOutput formats: " ++
+  (wrapWords 16 78 $ map fst writers ++ nonTextFormats) ++ "\nOptions:")
 
 -- Determine default reader based on source file extensions
 defaultReaderName :: String -> [FilePath] -> String
@@ -637,8 +726,11 @@ defaultWriterName x =
     ".texinfo"  -> "texinfo"
     ".db"       -> "docbook"
     ".odt"      -> "odt"
+    ".docx"     -> "docx"
     ".epub"     -> "epub"
     ".org"      -> "org"
+    ".asciidoc" -> "asciidoc"
+    ".pdf"      -> "latex"
     ['.',y] | y `elem` ['1'..'9'] -> "man"
     _          -> "html"
 
@@ -654,10 +746,8 @@ main = do
                                   else getOpt Permute options rawArgs
 
   unless (null errors) $
-    do name <- getProgName
-       mapM_ (\e -> UTF8.hPutStr stderr (name ++ ": ") >> UTF8.hPutStr stderr e) errors
-       UTF8.hPutStrLn stderr $ "Try " ++ name ++ " --help for more information."
-       exitWith $ ExitFailure 2
+     err 2 $ concat $ errors ++
+        ["Try " ++ prg ++ " --help for more information."]
 
   let defaultOpts' = if compatMode
                        then defaultOpts { optReader = "markdown"
@@ -682,14 +772,19 @@ main = do
               , optNumberSections    = numberSections
               , optSectionDivs       = sectionDivs
               , optIncremental       = incremental
-              , optOffline           = offline
+              , optSelfContained     = selfContained
               , optSmart             = smart
+              , optOldDashes         = oldDashes
               , optHtml5             = html5
+              , optHighlight         = highlight
+              , optHighlightStyle    = highlightStyle
               , optChapters          = chapters
               , optHTMLMathMethod    = mathMethod
               , optReferenceODT      = referenceODT
+              , optReferenceDocx     = referenceDocx
               , optEPUBStylesheet    = epubStylesheet
               , optEPUBMetadata      = epubMetadata
+              , optEPUBFonts         = epubFonts
               , optDumpArgs          = dumpArgs
               , optIgnoreArgs        = ignoreArgs
               , optStrict            = strict
@@ -702,8 +797,12 @@ main = do
               , optDataDir           = mbDataDir
               , optBibliography      = reffiles
               , optCslFile           = cslfile
+              , optAbbrevsFile       = cslabbrevs
               , optCiteMethod        = citeMethod
               , optListings          = listings
+              , optLaTeXEngine       = latexEngine
+              , optSlideLevel        = slideLevel
+              , optSetextHeaders     = setextHeaders
               , optAscii             = ascii
              } = opts
 
@@ -726,17 +825,35 @@ main = do
                                              then "html"
                                              else "markdown"
                            in  defaultReaderName fallback sources
-                      else readerName 
+                      else readerName
 
   let writerName' = if null writerName
                       then defaultWriterName outputFile
                       else writerName
 
+  let pdfOutput = map toLower (takeExtension outputFile) == ".pdf"
+
+  when pdfOutput $ do
+    -- make sure writer is latex or beamer
+    unless (writerName' == "latex" || writerName' == "beamer" ||
+            writerName' == "latex+lhs") $
+      err 47 $ "cannot produce pdf output with " ++ writerName' ++ " writer"
+    -- check for latex program
+    mbLatex <- findExecutable latexEngine
+    case mbLatex of
+         Nothing  -> err 41 $
+           latexEngine ++ " not found. " ++
+           latexEngine ++ " is needed for pdf output."
+         Just _   -> return ()
+
   reader <- case (lookup readerName' readers) of
      Just r  -> return r
-     Nothing -> error ("Unknown reader: " ++ readerName')
+     Nothing -> err 7 ("Unknown reader: " ++ readerName')
+
+  let standalone' = standalone || writerName' `elem` nonTextFormats || pdfOutput
 
   templ <- case templatePath of
+                _ | not standalone' -> return ""
                 Nothing -> do
                            deftemp <- getDefaultTemplate datadir writerName'
                            case deftemp of
@@ -756,57 +873,57 @@ main = do
                                              (\_ -> throwIO e)
                                        else throwIO e)
 
-  let standalone' = standalone || isNonTextOutput writerName'
-
-  variables' <- case (writerName', standalone', offline) of
-                      ("s5", True, True) -> do
-                        inc <- s5HeaderIncludes datadir
-                        return $ ("s5includes", inc) : variables
-                      ("slidy", True, True) -> do
-                        slidyJs <- readDataFile datadir $
-                                      "slidy" </> "slidy.min.js"
-                        slidyCss <- readDataFile datadir $
-                                      "slidy" </> "slidy.css"
-                        return $ ("slidy-js", slidyJs) :
-                            ("slidy-css", slidyCss) : variables
-                      _ -> return variables
+  let slideVariant = case writerName' of
+                           "s5"       -> S5Slides
+                           "slidy"    -> SlidySlides
+                           "dzslides" -> DZSlides
+                           _          -> NoSlides
 
-  variables'' <- case mathMethod of
+  variables' <- case mathMethod of
                       LaTeXMathML Nothing -> do
                          s <- readDataFile datadir $ "data" </> "LaTeXMathML.js"
-                         return $ ("mathml-script", s) : variables'
+                         return $ ("mathml-script", s) : variables
                       MathML Nothing -> do
                          s <- readDataFile datadir $ "data"</>"MathMLinHTML.js"
-                         return $ ("mathml-script", s) : variables'
-                      _ -> return variables'
+                         return $ ("mathml-script", s) : variables
+                      _ -> return variables
+
+  variables'' <- case slideVariant of
+                      DZSlides  -> do
+                        dztempl <- readDataFile datadir $ "dzslides" </> "template.html"
+                        let dzcore = unlines $ dropWhile (not . isPrefixOf "<!-- {{{{ dzslides core")
+                                             $ lines dztempl
+                        return $ ("dzslides-core", dzcore) : variables'
+                      _         -> return variables'
 
-  refs <- mapM (\f -> catch (readBiblioFile f) $ \e -> do
-         UTF8.hPutStrLn stderr $ "Error reading bibliography `" ++ f ++ "'"
-         UTF8.hPutStrLn stderr $ show e
-         exitWith (ExitFailure 23)) reffiles >>= \rs -> return $ concat rs
+  -- unescape reference ids, which may contain XML entities, so
+  -- that we can do lookups with regular string equality
+  let unescapeRefId ref = ref{ refId = fromEntities (refId ref) }
+
+  refs <- mapM (\f -> catch (CSL.readBiblioFile f) $ \e ->
+             err 23 $ "Error reading bibliography `" ++ f ++ "'" ++ "\n" ++ show e)
+          reffiles >>=
+           return . map unescapeRefId . concat
 
   let sourceDir = if null sources
                      then "."
                      else takeDirectory (head sources)
 
-  let slideVariant = case writerName' of
-                           "s5"    -> S5Slides
-                           "slidy" -> SlidySlides
-                           _       -> NoSlides
-
   let startParserState =
          defaultParserState { stateParseRaw        = parseRaw,
                               stateTabStop         = tabStop,
                               stateLiterateHaskell = "+lhs" `isSuffixOf` readerName' ||
                                                      lhsExtension sources,
                               stateStandalone      = standalone',
-                              stateCitations       = map refId refs,
+                              stateCitations       = map CSL.refId refs,
                               stateSmart           = smart || writerName' `elem`
-                                                              ["latex", "context", "latex+lhs", "man"],
+                                                     ["latex", "context", "latex+lhs", "beamer"],
+                              stateOldDashes       = oldDashes,
                               stateColumns         = columns,
                               stateStrict          = strict,
                               stateIndentedCodeClasses = codeBlockClasses,
-                              stateApplyMacros     = writerName' `notElem` ["latex", "latex+lhs"] }
+                              stateApplyMacros     = writerName' `notElem`
+                                                     ["latex", "latex+lhs", "beamer"] }
 
   let writerOptions = defaultWriterOptions
                                     { writerStandalone       = standalone',
@@ -836,16 +953,20 @@ main = do
                                       writerIdentifierPrefix = idPrefix,
                                       writerSourceDirectory  = sourceDir,
                                       writerUserDataDir      = datadir,
-                                      writerHtml5            = html5 &&
-                                                               "html" `isPrefixOf` writerName',
-                                      writerChapters         = chapters, 
+                                      writerHtml5            = html5 ||
+                                           slideVariant == DZSlides,
+                                      writerChapters         = chapters,
                                       writerListings         = listings,
-                                      writerAscii            = ascii }
+                                      writerBeamer           = writerName' == "beamer",
+                                      writerSlideLevel       = slideLevel,
+                                      writerHighlight        = highlight,
+                                      writerHighlightStyle   = highlightStyle,
+                                      writerSetextHeaders    = setextHeaders
+                                      }
 
-  when (isNonTextOutput writerName' && outputFile == "-") $
-    do UTF8.hPutStrLn stderr ("Error:  Cannot write " ++ writerName ++ " output to stdout.\n" ++
-                               "Specify an output file using the -o option.")
-       exitWith $ ExitFailure 5
+  when (writerName' `elem` nonTextFormats&& outputFile == "-") $
+    err 5 $ "Cannot write " ++ writerName' ++ " output to stdout.\n" ++
+            "Specify an output file using the -o option."
 
   let readSources [] = mapM readSource ["-"]
       readSources srcs = mapM readSource srcs
@@ -859,7 +980,14 @@ main = do
 
   let convertTabs = tabFilter (if preserveTabs then 0 else tabStop)
 
-  doc <- fmap (reader startParserState . convertTabs . intercalate "\n") (readSources sources)
+  let handleIncludes' = if readerName' == "latex" || readerName' == "beamer" ||
+                           readerName' == "latex+lhs" ||
+                           readerName' == "context"
+                           then handleIncludes
+                           else return
+
+  doc <- (reader startParserState) `fmap` (readSources sources >>=
+             handleIncludes' . convertTabs . intercalate "\n")
 
   let doc0 = foldr ($) doc transforms
 
@@ -881,19 +1009,41 @@ main = do
                                             replaceDirectory
                                             (replaceExtension cslfile "csl")
                                             csldir
-                processBiblio cslfile' refs doc1
+                processBiblio cslfile' cslabbrevs refs doc1
              else return doc1
 
+  let writeBinary :: B.ByteString -> IO ()
+      writeBinary = B.writeFile (encodeString outputFile)
+
+  let writerFn :: FilePath -> String -> IO ()
+      writerFn "-" = UTF8.putStr
+      writerFn f   = UTF8.writeFile f
+
   case lookup writerName' writers of
-        Nothing | writerName' == "epub" ->
-           writeEPUB epubStylesheet writerOptions doc2
-           >>= B.writeFile (encodeString outputFile)
-        Nothing | writerName' == "odt"  ->
-           writeODT referenceODT writerOptions doc2
-           >>= B.writeFile (encodeString outputFile)
-        Just r  -> writerFn outputFile result
-            where writerFn "-" = UTF8.putStr
-                  writerFn f   = UTF8.writeFile f
-                  result       = r writerOptions doc2 ++
-                                 ['\n' | not standalone']
-        Nothing -> error $ "Unknown writer: " ++ writerName'
+        Nothing
+          | writerName' == "epub" ->
+              writeEPUB epubStylesheet epubFonts writerOptions doc2
+               >>= writeBinary
+          | writerName' == "odt"  ->
+              writeODT referenceODT writerOptions doc2 >>= writeBinary
+          | writerName' == "docx"  ->
+              writeDocx referenceDocx writerOptions doc2 >>= writeBinary
+          | otherwise -> err 9 ("Unknown writer: " ++ writerName')
+        Just _
+          | pdfOutput  -> do
+              res <- tex2pdf latexEngine $ writeLaTeX writerOptions doc2
+              case res of
+                   Right pdf -> writeBinary pdf
+                   Left err' -> err 43 $ toString err'
+        Just r
+          | htmlFormat && ascii ->
+                  writerFn outputFile =<< selfcontain (toEntities result)
+          | otherwise ->
+                  writerFn outputFile =<< selfcontain result
+          where result       = r writerOptions doc2 ++ ['\n' | not standalone']
+                htmlFormat = writerName' `elem`
+                               ["html","html+lhs","html5","html5+lhs",
+                               "s5","slidy","dzslides"]
+                selfcontain = if selfContained && htmlFormat
+                                 then makeSelfContained datadir
+                                 else return