diff options
Diffstat (limited to 'src/Text/Pandoc/Writers/HTML.hs')
-rw-r--r-- | src/Text/Pandoc/Writers/HTML.hs | 302 |
1 files changed, 162 insertions, 140 deletions
diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs index e0e3882fe..d33dcff27 100644 --- a/src/Text/Pandoc/Writers/HTML.hs +++ b/src/Text/Pandoc/Writers/HTML.hs @@ -30,132 +30,145 @@ Conversion of 'Pandoc' documents to HTML. -} module Text.Pandoc.Writers.HTML ( writeHtml , writeHtmlString ) where import Text.Pandoc.Definition -import Text.Pandoc.LaTeXMathML import Text.Pandoc.CharacterReferences ( decodeCharacterReferences ) import Text.Pandoc.Shared +import Text.Pandoc.Templates import Text.Pandoc.Readers.TeXMath -import Text.Pandoc.Highlighting ( highlightHtml, defaultHighlightingCss ) -import Text.Pandoc.XML (stripTags) +import Text.Pandoc.Highlighting ( highlightHtml ) +import Text.Pandoc.XML (stripTags, escapeStringForXML) import Numeric ( showHex ) import Data.Char ( ord, toLower ) import Data.List ( isPrefixOf, intersperse ) import Data.Maybe ( catMaybes ) -import qualified Data.Set as S import Control.Monad.State import Text.XHtml.Transitional hiding ( stringToHtml ) +import Text.TeXMath +import Text.XML.Light.Output data WriterState = WriterState - { stNotes :: [Html] -- ^ List of notes - , stMath :: Bool -- ^ Math is used in document - , stCSS :: S.Set String -- ^ CSS to include in header - , stSecNum :: [Int] -- ^ Number of current section + { stNotes :: [Html] -- ^ List of notes + , stMath :: Bool -- ^ Math is used in document + , stHighlighting :: Bool -- ^ Syntax highlighting is used + , stSecNum :: [Int] -- ^ Number of current section } deriving Show defaultWriterState :: WriterState -defaultWriterState = WriterState {stNotes= [], stMath = False, stCSS = S.empty, stSecNum = []} +defaultWriterState = WriterState {stNotes= [], stMath = False, stHighlighting = False, stSecNum = []} -- Helpers to render HTML with the appropriate function. -render :: (HTML html) => WriterOptions -> html -> String -render opts = if writerWrapText opts then renderHtml else showHtml - renderFragment :: (HTML html) => WriterOptions -> html -> String renderFragment opts = if writerWrapText opts then renderHtmlFragment else showHtmlFragment --- | Slightly modified version of Text.XHtml's stringToHtml. --- Only uses numerical entities for 0xff and greater. --- Adds . +-- | Modified version of Text.XHtml's stringToHtml. +-- Use unicode characters wherever possible. stringToHtml :: String -> Html -stringToHtml = primHtml . concatMap fixChar - where - fixChar '<' = "<" - fixChar '>' = ">" - fixChar '&' = "&" - fixChar '"' = """ - fixChar '\160' = " " - fixChar c | ord c < 0xff = [c] - fixChar c = "&#" ++ show (ord c) ++ ";" +stringToHtml = primHtml . escapeStringForXML -- | Convert Pandoc document to Html string. writeHtmlString :: WriterOptions -> Pandoc -> String -writeHtmlString opts = - if writerStandalone opts - then render opts . writeHtml opts - else renderFragment opts . writeHtml opts +writeHtmlString opts d = + let (tit, auths, date, toc, body', newvars) = evalState (pandocToHtml opts d) + defaultWriterState + in if writerStandalone opts + then inTemplate opts tit auths date toc body' newvars + else renderFragment opts body' -- | Convert Pandoc document to Html structure. writeHtml :: WriterOptions -> Pandoc -> Html -writeHtml opts (Pandoc (Meta tit authors date) blocks) = - let titlePrefix = writerTitlePrefix opts - (topTitle,st) = runState (inlineListToHtml opts tit) defaultWriterState - topTitle'' = stripTags $ showHtmlFragment topTitle - topTitle' = titlePrefix ++ - (if null topTitle'' || null titlePrefix - then "" - else " - ") ++ topTitle'' - metadata = thetitle << topTitle' +++ - meta ! [httpequiv "Content-Type", - content "text/html; charset=UTF-8"] +++ - meta ! [name "generator", content "pandoc"] +++ - (toHtmlFromList $ - map (\a -> meta ! [name "author", content a]) authors) +++ - (if null date - then noHtml - else meta ! [name "date", content date]) - titleHeader = if writerStandalone opts && not (null tit) && - not (writerS5 opts) - then h1 ! [theclass "title"] $ topTitle - else noHtml - sects = hierarchicalize blocks - toc = if writerTableOfContents opts - then evalState (tableOfContents opts sects) st - else noHtml - (blocks', st') = runState - (mapM (elementToHtml opts) sects >>= return . toHtmlFromList) - st - cssLines = stCSS st' - css = if S.null cssLines - then noHtml - else style ! [thetype "text/css"] $ primHtml $ - '\n':(unlines $ S.toList cssLines) - math = if stMath st' - then case writerHTMLMathMethod opts of - LaTeXMathML Nothing -> - primHtml latexMathMLScript - LaTeXMathML (Just url) -> - script ! - [src url, thetype "text/javascript"] $ - noHtml - JsMath (Just url) -> - script ! - [src url, thetype "text/javascript"] $ - noHtml - _ -> noHtml - else noHtml - head' = header $ metadata +++ math +++ css +++ - primHtml (writerHeader opts) - notes = reverse (stNotes st') - before = primHtml $ writerIncludeBefore opts - after = primHtml $ writerIncludeAfter opts - thebody = before +++ titleHeader +++ toc +++ blocks' +++ - footnoteSection notes +++ after +writeHtml opts d = + let (tit, auths, date, toc, body', newvars) = evalState (pandocToHtml opts d) + defaultWriterState in if writerStandalone opts - then head' +++ body thebody - else thebody + then inTemplate opts tit auths date toc body' newvars + else body' + +-- result is (title, authors, date, toc, body, new variables) +pandocToHtml :: WriterOptions + -> Pandoc + -> State WriterState (Html, [Html], Html, Maybe Html, Html, [(String,String)]) +pandocToHtml opts (Pandoc (Meta title' authors' date') blocks) = do + let standalone = writerStandalone opts + tit <- if standalone + then inlineListToHtml opts title' + else return noHtml + auths <- if standalone + then mapM (inlineListToHtml opts) authors' + else return [] + date <- if standalone + then inlineListToHtml opts date' + else return noHtml + let sects = hierarchicalize blocks + toc <- if writerTableOfContents opts + then tableOfContents opts sects + else return Nothing + blocks' <- liftM toHtmlFromList $ mapM (elementToHtml opts) sects + st <- get + let notes = reverse (stNotes st) + let thebody = blocks' +++ footnoteSection notes + let math = if stMath st + then case writerHTMLMathMethod opts of + LaTeXMathML (Just url) -> + script ! + [src url, thetype "text/javascript"] $ noHtml + MathML (Just url) -> + script ! + [src url, thetype "text/javascript"] $ noHtml + JsMath (Just url) -> + script ! + [src url, thetype "text/javascript"] $ noHtml + _ -> case lookup "mathml-script" (writerVariables opts) of + Just s -> + script ! [thetype "text/javascript"] << + primHtml s + Nothing -> noHtml + else noHtml + let newvars = [("highlighting","yes") | stHighlighting st] ++ + [("math", renderHtmlFragment math) | stMath st] + return (tit, auths, date, toc, thebody, newvars) + +inTemplate :: TemplateTarget a + => WriterOptions + -> Html + -> [Html] + -> Html + -> Maybe Html + -> Html + -> [(String,String)] + -> a +inTemplate opts tit auths date toc body' newvars = + let renderedTit = showHtmlFragment tit + topTitle' = stripTags renderedTit + authors = map (stripTags . showHtmlFragment) auths + date' = stripTags $ showHtmlFragment date + variables = writerVariables opts ++ newvars + context = variables ++ + [ ("body", renderHtmlFragment body') + , ("pagetitle", topTitle') + , ("title", renderHtmlFragment tit) + , ("date", date') ] ++ + (case toc of + Just t -> [ ("toc", renderHtmlFragment t)] + Nothing -> []) ++ + [ ("author", a) | a <- authors ] + in renderTemplate context $ writerTemplate opts -- | Like Text.XHtml's identifier, but adds the writerIdentifierPrefix prefixedId :: WriterOptions -> String -> HtmlAttr prefixedId opts s = identifier $ writerIdentifierPrefix opts ++ s -- | Construct table of contents from list of elements. -tableOfContents :: WriterOptions -> [Element] -> State WriterState Html -tableOfContents _ [] = return noHtml +tableOfContents :: WriterOptions -> [Element] -> State WriterState (Maybe Html) +tableOfContents _ [] = return Nothing tableOfContents opts sects = do let opts' = opts { writerIgnoreNotes = True } contents <- mapM (elementToListItem opts') sects - return $ thediv ! [prefixedId opts' "TOC"] $ unordList $ catMaybes contents + let tocList = catMaybes contents + return $ if null tocList + then Nothing + else Just $ thediv ! [prefixedId opts' "TOC"] $ unordList tocList -- | Convert section number to string showSecNum :: [Int] -> String @@ -236,7 +249,7 @@ obfuscateLink opts txt s = linkText ++ "+'<\\/'+'a'+'>');\n// -->\n")) +++ noscript (primHtml $ obfuscateString altText) _ -> error $ "Unknown obfuscation method: " ++ show meth - _ -> anchor ! [href s] $ primHtml txt -- malformed email + _ -> anchor ! [href s] $ stringToHtml txt -- malformed email -- | Obfuscate character as entity. obfuscateChar :: Char -> String @@ -249,17 +262,15 @@ obfuscateChar char = obfuscateString :: String -> String obfuscateString = concatMap obfuscateChar . decodeCharacterReferences --- | Add CSS for document header. -addToCSS :: String -> State WriterState () -addToCSS item = do - st <- get - let current = stCSS st - put $ st {stCSS = S.insert item current} - -- | Convert Pandoc block element to HTML. blockToHtml :: WriterOptions -> Block -> State WriterState Html blockToHtml _ Null = return $ noHtml blockToHtml opts (Plain lst) = inlineListToHtml opts lst +blockToHtml opts (Para [Image txt (s,tit)]) = do + img <- inlineToHtml opts (Image txt (s,tit)) + capt <- inlineListToHtml opts txt + return $ thediv ! [theclass "figure"] << + [img, paragraph ! [theclass "caption"] << capt] blockToHtml opts (Para lst) = inlineListToHtml opts lst >>= (return . paragraph) blockToHtml _ (RawHtml str) = return $ primHtml str blockToHtml _ (HorizontalRule) = return $ hr @@ -277,7 +288,7 @@ blockToHtml opts (CodeBlock (id',classes,keyvals) rawCode) = do in return $ pre ! attrs $ thecode << (replicate (length leadingBreaks) br +++ [stringToHtml $ rawCode' ++ "\n"]) - Right h -> addToCSS defaultHighlightingCss >> return h + Right h -> modify (\st -> st{ stHighlighting = True }) >> return h blockToHtml opts (BlockQuote blocks) = -- in S5, treat list in blockquote specially -- if default is incremental, make it nonincremental; @@ -344,21 +355,33 @@ blockToHtml opts (Table capt aligns widths headers rows') = do captionDoc <- if null capt then return noHtml else inlineListToHtml opts capt >>= return . caption - colHeads <- colHeadsToHtml opts alignStrings - widths headers - rows'' <- zipWithM (tableRowToHtml opts alignStrings) (cycle ["odd", "even"]) rows' - return $ table $ captionDoc +++ colHeads +++ rows'' - -colHeadsToHtml :: WriterOptions - -> [[Char]] - -> [Double] + let percent w = show (truncate (100*w) :: Integer) ++ "%" + let coltags = if all (== 0.0) widths + then noHtml + else concatHtml $ map + (\w -> col ! [width $ percent w] $ noHtml) widths + head' <- if all null headers + then return noHtml + else liftM (thead <<) $ tableRowToHtml opts alignStrings 0 headers + body' <- liftM (tbody <<) $ + zipWithM (tableRowToHtml opts alignStrings) [1..] rows' + return $ table $ captionDoc +++ coltags +++ head' +++ body' + +tableRowToHtml :: WriterOptions + -> [String] + -> Int -> [[Block]] -> State WriterState Html -colHeadsToHtml opts alignStrings widths headers = do - heads <- sequence $ zipWith3 - (\alignment columnwidth item -> tableItemToHtml opts th alignment columnwidth item) - alignStrings widths headers - return $ tr ! [theclass "header"] $ toHtmlFromList heads +tableRowToHtml opts alignStrings rownum cols' = do + let mkcell = if rownum == 0 then th else td + let rowclass = case rownum of + 0 -> "header" + x | x `rem` 2 == 1 -> "odd" + _ -> "even" + cols'' <- sequence $ zipWith + (\alignment item -> tableItemToHtml opts mkcell alignment item) + alignStrings cols' + return $ tr ! [theclass rowclass] $ toHtmlFromList cols'' alignmentToString :: Alignment -> [Char] alignmentToString alignment = case alignment of @@ -367,28 +390,14 @@ alignmentToString alignment = case alignment of AlignCenter -> "center" AlignDefault -> "left" -tableRowToHtml :: WriterOptions - -> [[Char]] - -> String - -> [[Block]] - -> State WriterState Html -tableRowToHtml opts aligns rowclass columns = - (sequence $ zipWith3 (tableItemToHtml opts td) aligns (repeat 0) columns) >>= - return . (tr ! [theclass rowclass]) . toHtmlFromList - tableItemToHtml :: WriterOptions -> (Html -> Html) -> [Char] - -> Double -> [Block] -> State WriterState Html -tableItemToHtml opts tag' align' width' item = do +tableItemToHtml opts tag' align' item = do contents <- blockListToHtml opts item - let attrib = [align align'] ++ - if width' /= 0 - then [thestyle ("width: " ++ (show (truncate (100 * width') :: Integer)) ++ "%;")] - else [] - return $ tag' ! attrib $ contents + return $ tag' ! [align align'] $ contents blockListToHtml :: WriterOptions -> [Block] -> State WriterState Html blockListToHtml opts lst = @@ -405,11 +414,11 @@ inlineToHtml opts inline = case inline of (Str str) -> return $ stringToHtml str (Space) -> return $ stringToHtml " " - (LineBreak) -> return $ br - (EmDash) -> return $ primHtmlChar "mdash" - (EnDash) -> return $ primHtmlChar "ndash" - (Ellipses) -> return $ primHtmlChar "hellip" - (Apostrophe) -> return $ primHtmlChar "rsquo" + (LineBreak) -> return br + (EmDash) -> return $ stringToHtml "—" + (EnDash) -> return $ stringToHtml "–" + (Ellipses) -> return $ stringToHtml "…" + (Apostrophe) -> return $ stringToHtml "’" (Emph lst) -> inlineListToHtml opts lst >>= return . emphasize (Strong lst) -> inlineListToHtml opts lst >>= return . strong (Code str) -> return $ thecode << str @@ -421,10 +430,10 @@ inlineToHtml opts inline = (Subscript lst) -> inlineListToHtml opts lst >>= return . sub (Quoted quoteType lst) -> let (leftQuote, rightQuote) = case quoteType of - SingleQuote -> (primHtmlChar "lsquo", - primHtmlChar "rsquo") - DoubleQuote -> (primHtmlChar "ldquo", - primHtmlChar "rdquo") + SingleQuote -> (stringToHtml "‘", + stringToHtml "’") + DoubleQuote -> (stringToHtml "“", + stringToHtml "”") in do contents <- inlineListToHtml opts lst return $ leftQuote +++ contents +++ rightQuote (Math t str) -> @@ -447,7 +456,20 @@ inlineToHtml opts inline = alt str, title str] GladTeX -> return $ primHtml $ "<EQ>" ++ str ++ "</EQ>" - PlainMath -> + MathML _ -> do + let dt = if t == InlineMath + then DisplayInline + else DisplayBlock + let conf = useShortEmptyTags (const False) + defaultConfigPP + case texMathToMathML dt str of + Right r -> return $ primHtml $ + ppcElement conf r + Left _ -> inlineListToHtml opts + (readTeXMath str) >>= + return . (thespan ! + [theclass "math"]) + PlainMath -> inlineListToHtml opts (readTeXMath str) >>= return . (thespan ! [theclass "math"]) ) (TeX str) -> case writerHTMLMathMethod opts of @@ -485,10 +507,10 @@ inlineToHtml opts inline = htmlContents <- blockListToNote opts ref contents -- push contents onto front of notes put $ st {stNotes = (htmlContents:notes)} - return $ anchor ! [href ("#" ++ writerIdentifierPrefix opts ++ "fn" ++ ref), - theclass "footnoteRef", - prefixedId opts ("fnref" ++ ref)] << - sup << ref + return $ sup << + anchor ! [href ("#" ++ writerIdentifierPrefix opts ++ "fn" ++ ref), + theclass "footnoteRef", + prefixedId opts ("fnref" ++ ref)] << ref (Cite _ il) -> inlineListToHtml opts il blockListToNote :: WriterOptions -> String -> [Block] -> State WriterState Html @@ -497,7 +519,7 @@ blockListToNote opts ref blocks = -- that block. Otherwise, insert a new Plain block with the backlink. let backlink = [HtmlInline $ " <a href=\"#" ++ writerIdentifierPrefix opts ++ "fnref" ++ ref ++ "\" class=\"footnoteBackLink\"" ++ - " title=\"Jump back to footnote " ++ ref ++ "\">↩</a>"] + " title=\"Jump back to footnote " ++ ref ++ "\">↩</a>"] blocks' = if null blocks then [] else let lastBlock = last blocks |