diff options
author | fiddlosopher <fiddlosopher@788f1e2b-df1e-0410-8736-df70ead52e1b> | 2007-11-29 02:02:34 +0000 |
---|---|---|
committer | fiddlosopher <fiddlosopher@788f1e2b-df1e-0410-8736-df70ead52e1b> | 2007-11-29 02:02:34 +0000 |
commit | 9a67a486c2dc98d14d9687ceb4b01befa09114df (patch) | |
tree | 42d2f11f5e39a2850934b1ee6e36003df5802993 /src/Text/Pandoc/Writers/HTML.hs | |
parent | 47a4a3ab897ab748a4b7eab2ccb95cd9cb0e3864 (diff) |
Moved everything from src into the top-level directory.
git-svn-id: https://pandoc.googlecode.com/svn/trunk@1104 788f1e2b-df1e-0410-8736-df70ead52e1b
Diffstat (limited to 'src/Text/Pandoc/Writers/HTML.hs')
-rw-r--r-- | src/Text/Pandoc/Writers/HTML.hs | 458 |
1 files changed, 0 insertions, 458 deletions
diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs deleted file mode 100644 index 7ec95d8ef..000000000 --- a/src/Text/Pandoc/Writers/HTML.hs +++ /dev/null @@ -1,458 +0,0 @@ -{- -Copyright (C) 2006-7 John MacFarlane <jgm@berkeley.edu> - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA --} - -{- | - Module : Text.Pandoc.Writers.HTML - Copyright : Copyright (C) 2006-7 John MacFarlane - License : GNU GPL, version 2 or above - - Maintainer : John MacFarlane <jgm@berkeley.edu> - Stability : alpha - Portability : portable - -Conversion of 'Pandoc' documents to HTML. --} -module Text.Pandoc.Writers.HTML ( writeHtml , writeHtmlString ) where -import Text.Pandoc.Definition -import Text.Pandoc.ASCIIMathML -import Text.Pandoc.CharacterReferences ( decodeCharacterReferences ) -import Text.Pandoc.Shared -import Text.Regex ( mkRegex, matchRegex ) -import Numeric ( showHex ) -import Data.Char ( ord, toLower ) -import Data.List ( isPrefixOf, intersperse ) -import qualified Data.Set as S -import Control.Monad.State -import Text.XHtml.Transitional - -data WriterState = WriterState - { stNotes :: [Html] -- ^ List of notes - , stIds :: [String] -- ^ List of header identifiers - , stMath :: Bool -- ^ Math is used in document - , stCSS :: S.Set String -- ^ CSS to include in header - } deriving Show - -defaultWriterState :: WriterState -defaultWriterState = WriterState {stNotes= [], stIds = [], - stMath = False, stCSS = S.empty} - --- Helpers to render HTML with the appropriate function. -render opts = if writerWrapText opts then renderHtml else showHtml -renderFragment opts = if writerWrapText opts - then renderHtmlFragment - else showHtmlFragment - --- | Convert Pandoc document to Html string. -writeHtmlString :: WriterOptions -> Pandoc -> String -writeHtmlString opts = - if writerStandalone opts - then render opts . writeHtml opts - else renderFragment opts . writeHtml opts - --- | Convert Pandoc document to Html structure. -writeHtml :: WriterOptions -> Pandoc -> Html -writeHtml opts (Pandoc (Meta tit authors date) blocks) = - let titlePrefix = writerTitlePrefix opts - topTitle = evalState (inlineListToHtml opts tit) defaultWriterState - topTitle' = if null titlePrefix - then topTitle - else titlePrefix +++ " - " +++ topTitle - metadata = thetitle topTitle' +++ - meta ! [httpequiv "Content-Type", - content "text/html; charset=UTF-8"] +++ - meta ! [name "generator", content "pandoc"] +++ - (toHtmlFromList $ - map (\a -> meta ! [name "author", content a]) authors) +++ - (if null date - then noHtml - else meta ! [name "date", content date]) - titleHeader = if writerStandalone opts && not (null tit) && - not (writerS5 opts) - then h1 ! [theclass "title"] $ topTitle - else noHtml - headerBlocks = filter isHeaderBlock blocks - ids = uniqueIdentifiers $ - map (\(Header _ lst) -> lst) headerBlocks - toc = if writerTableOfContents opts - then tableOfContents opts headerBlocks ids - else noHtml - (blocks', newstate) = - runState (blockListToHtml opts blocks) - (defaultWriterState {stIds = ids}) - cssLines = stCSS newstate - css = if S.null cssLines - then noHtml - else style ! [thetype "text/css"] $ primHtml $ - '\n':(unlines $ S.toList cssLines) - math = if stMath newstate - then case writerASCIIMathMLURL opts of - Just path -> script ! [src path, - thetype "text/javascript"] $ - noHtml - Nothing -> primHtml asciiMathMLScript - else noHtml - head = header $ metadata +++ math +++ css +++ - primHtml (writerHeader opts) - notes = reverse (stNotes newstate) - before = primHtml $ writerIncludeBefore opts - after = primHtml $ writerIncludeAfter opts - thebody = before +++ titleHeader +++ toc +++ blocks' +++ - footnoteSection opts notes +++ after - in if writerStandalone opts - then head +++ body thebody - else thebody - --- | Construct table of contents from list of header blocks and identifiers. --- Assumes there are as many identifiers as header blocks. -tableOfContents :: WriterOptions -> [Block] -> [String] -> Html -tableOfContents _ [] _ = noHtml -tableOfContents opts headers ids = - let opts' = opts { writerIgnoreNotes = True } - contentsTree = hierarchicalize headers - contents = evalState (mapM (elementToListItem opts') contentsTree) - (defaultWriterState {stIds = ids}) - in thediv ! [identifier "toc"] $ unordList contents - --- | Converts an Element to a list item for a table of contents, --- retrieving the appropriate identifier from state. -elementToListItem :: WriterOptions -> Element -> State WriterState Html -elementToListItem opts (Blk _) = return noHtml -elementToListItem opts (Sec headerText subsecs) = do - st <- get - let ids = stIds st - let (id, rest) = if null ids - then ("", []) - else (head ids, tail ids) - put $ st {stIds = rest} - txt <- inlineListToHtml opts headerText - subHeads <- mapM (elementToListItem opts) subsecs - let subList = if null subHeads - then noHtml - else unordList subHeads - return $ (anchor ! [href ("#" ++ id), identifier ("TOC-" ++ id)] $ txt) +++ - subList - --- | Convert list of Note blocks to a footnote <div>. --- Assumes notes are sorted. -footnoteSection :: WriterOptions -> [Html] -> Html -footnoteSection opts notes = - if null notes - then noHtml - else thediv ! [theclass "footnotes"] $ hr +++ (olist << notes) - --- | Obfuscate a "mailto:" link using Javascript. -obfuscateLink :: WriterOptions -> String -> String -> Html -obfuscateLink opts text src = - let emailRegex = mkRegex "^mailto:([^@]*)@(.*)$" - src' = map toLower src - in case (matchRegex emailRegex src') of - (Just [name, domain]) -> - let domain' = substitute "." " dot " domain - at' = obfuscateChar '@' - (linkText, altText) = - if text == drop 7 src' -- autolink - then ("'<code>'+e+'</code>'", name ++ " at " ++ domain') - else ("'" ++ text ++ "'", text ++ " (" ++ name ++ " at " ++ - domain' ++ ")") - in if writerStrictMarkdown opts - then -- need to use primHtml or &'s are escaped to & in URL - primHtml $ "<a href=\"" ++ (obfuscateString src') - ++ "\">" ++ (obfuscateString text) ++ "</a>" - else (script ! [thetype "text/javascript"] $ - primHtml ("\n<!--\nh='" ++ - obfuscateString domain ++ "';a='" ++ at' ++ "';n='" ++ - obfuscateString name ++ "';e=n+a+h;\n" ++ - "document.write('<a h'+'ref'+'=\"ma'+'ilto'+':'+e+'\">'+" ++ - linkText ++ "+'<\\/'+'a'+'>');\n// -->\n")) +++ - noscript (primHtml $ obfuscateString altText) - _ -> anchor ! [href src] $ primHtml text -- malformed email - --- | Obfuscate character as entity. -obfuscateChar :: Char -> String -obfuscateChar char = - let num = ord char - numstr = if even num then show num else "x" ++ showHex num "" - in "&#" ++ numstr ++ ";" - --- | Obfuscate string using entities. -obfuscateString :: String -> String -obfuscateString = concatMap obfuscateChar . decodeCharacterReferences - --- | True if character is a punctuation character (unicode). -isPunctuation :: Char -> Bool -isPunctuation c = - let c' = ord c - in if c `elem` "!\"'()*,-./:;<>?[\\]`{|}~" || c' >= 0x2000 && c' <= 0x206F || - c' >= 0xE000 && c' <= 0xE0FF - then True - else False - --- | Add CSS for document header. -addToCSS :: String -> State WriterState () -addToCSS item = do - st <- get - let current = stCSS st - put $ st {stCSS = S.insert item current} - --- | Convert Pandoc inline list to plain text identifier. -inlineListToIdentifier :: [Inline] -> String -inlineListToIdentifier [] = "" -inlineListToIdentifier (x:xs) = - xAsText ++ inlineListToIdentifier xs - where xAsText = case x of - Str s -> filter (\c -> c == '-' || not (isPunctuation c)) $ - concat $ intersperse "-" $ words $ map toLower s - Emph lst -> inlineListToIdentifier lst - Strikeout lst -> inlineListToIdentifier lst - Superscript lst -> inlineListToIdentifier lst - Subscript lst -> inlineListToIdentifier lst - Strong lst -> inlineListToIdentifier lst - Quoted _ lst -> inlineListToIdentifier lst - Code s -> s - Space -> "-" - EmDash -> "-" - EnDash -> "-" - Apostrophe -> "" - Ellipses -> "" - LineBreak -> "-" - TeX _ -> "" - HtmlInline _ -> "" - Link lst _ -> inlineListToIdentifier lst - Image lst _ -> inlineListToIdentifier lst - Note _ -> "" - --- | Return unique identifiers for list of inline lists. -uniqueIdentifiers :: [[Inline]] -> [String] -uniqueIdentifiers ls = - let addIdentifier (nonuniqueIds, uniqueIds) l = - let new = inlineListToIdentifier l - matches = length $ filter (== new) nonuniqueIds - new' = new ++ if matches > 0 then ("-" ++ show matches) else "" - in (new:nonuniqueIds, new':uniqueIds) - in reverse $ snd $ foldl addIdentifier ([],[]) ls - --- | Convert Pandoc block element to HTML. -blockToHtml :: WriterOptions -> Block -> State WriterState Html -blockToHtml opts Null = return $ noHtml -blockToHtml opts (Plain lst) = inlineListToHtml opts lst -blockToHtml opts (Para lst) = inlineListToHtml opts lst >>= (return . paragraph) -blockToHtml opts (RawHtml str) = return $ primHtml str -blockToHtml opts (HorizontalRule) = return $ hr -blockToHtml opts (CodeBlock str) = return $ pre $ thecode << (str ++ "\n") - -- the final \n for consistency with Markdown.pl -blockToHtml opts (BlockQuote blocks) = - -- in S5, treat list in blockquote specially - -- if default is incremental, make it nonincremental; - -- otherwise incremental - if writerS5 opts - then let inc = not (writerIncremental opts) in - case blocks of - [BulletList lst] -> blockToHtml (opts {writerIncremental = inc}) - (BulletList lst) - [OrderedList attribs lst] -> - blockToHtml (opts {writerIncremental = inc}) - (OrderedList attribs lst) - otherwise -> blockListToHtml opts blocks >>= - (return . blockquote) - else blockListToHtml opts blocks >>= (return . blockquote) -blockToHtml opts (Header level lst) = do - contents <- inlineListToHtml opts lst - st <- get - let ids = stIds st - let (id, rest) = if null ids - then ("", []) - else (head ids, tail ids) - put $ st {stIds = rest} - let attribs = if writerStrictMarkdown opts && not (writerTableOfContents opts) - then [] - else [identifier id] - let contents' = if writerTableOfContents opts - then anchor ! [href ("#TOC-" ++ id)] $ contents - else contents - return $ case level of - 1 -> h1 contents' ! attribs - 2 -> h2 contents' ! attribs - 3 -> h3 contents' ! attribs - 4 -> h4 contents' ! attribs - 5 -> h5 contents' ! attribs - 6 -> h6 contents' ! attribs - _ -> paragraph contents' ! attribs -blockToHtml opts (BulletList lst) = do - contents <- mapM (blockListToHtml opts) lst - let attribs = if writerIncremental opts - then [theclass "incremental"] - else [] - return $ unordList ! attribs $ contents -blockToHtml opts (OrderedList (startnum, numstyle, _) lst) = do - contents <- mapM (blockListToHtml opts) lst - let numstyle' = camelCaseToHyphenated $ show numstyle - let attribs = (if writerIncremental opts - then [theclass "incremental"] - else []) ++ - (if startnum /= 1 - then [start startnum] - else []) ++ - (if numstyle /= DefaultStyle - then [theclass numstyle'] - else []) - if numstyle /= DefaultStyle - then addToCSS $ "ol." ++ numstyle' ++ - " { list-style-type: " ++ - numstyle' ++ "; }" - else return () - return $ ordList ! attribs $ contents -blockToHtml opts (DefinitionList lst) = do - contents <- mapM (\(term, def) -> do term' <- inlineListToHtml opts term - def' <- blockListToHtml opts def - return $ (term', def')) lst - let attribs = if writerIncremental opts - then [theclass "incremental"] - else [] - return $ defList ! attribs $ contents -blockToHtml opts (Table capt aligns widths headers rows) = do - let alignStrings = map alignmentToString aligns - captionDoc <- if null capt - then return noHtml - else inlineListToHtml opts capt >>= return . caption - colHeads <- colHeadsToHtml opts alignStrings - widths headers - rows' <- mapM (tableRowToHtml opts alignStrings) rows - return $ table $ captionDoc +++ colHeads +++ rows' - -colHeadsToHtml opts alignStrings widths headers = do - heads <- sequence $ zipWith3 - (\align width item -> tableItemToHtml opts th align width item) - alignStrings widths headers - return $ tr $ toHtmlFromList heads - -alignmentToString alignment = case alignment of - AlignLeft -> "left" - AlignRight -> "right" - AlignCenter -> "center" - AlignDefault -> "left" - -tableRowToHtml opts aligns cols = - (sequence $ zipWith3 (tableItemToHtml opts td) aligns (repeat 0) cols) >>= - return . tr . toHtmlFromList - -tableItemToHtml opts tag align' width item = do - contents <- blockListToHtml opts item - let attrib = [align align'] ++ - if width /= 0 - then [thestyle ("width: " ++ show (truncate (100*width)) ++ - "%;")] - else [] - return $ tag ! attrib $ contents - -blockListToHtml :: WriterOptions -> [Block] -> State WriterState Html -blockListToHtml opts lst = - mapM (blockToHtml opts) lst >>= return . toHtmlFromList - --- | Convert list of Pandoc inline elements to HTML. -inlineListToHtml :: WriterOptions -> [Inline] -> State WriterState Html -inlineListToHtml opts lst = - mapM (inlineToHtml opts) lst >>= return . toHtmlFromList - --- | Convert Pandoc inline element to HTML. -inlineToHtml :: WriterOptions -> Inline -> State WriterState Html -inlineToHtml opts inline = - case inline of - (Str str) -> return $ stringToHtml str - (Space) -> return $ stringToHtml " " - (LineBreak) -> return $ br - (EmDash) -> return $ primHtmlChar "mdash" - (EnDash) -> return $ primHtmlChar "ndash" - (Ellipses) -> return $ primHtmlChar "hellip" - (Apostrophe) -> return $ primHtmlChar "rsquo" - (Emph lst) -> inlineListToHtml opts lst >>= return . emphasize - (Strong lst) -> inlineListToHtml opts lst >>= return . strong - (Code str) -> return $ thecode << str - (Strikeout lst) -> addToCSS - ".strikeout { text-decoration: line-through; }" >> - inlineListToHtml opts lst >>= - return . (thespan ! [theclass "strikeout"]) - (Superscript lst) -> inlineListToHtml opts lst >>= return . sup - (Subscript lst) -> inlineListToHtml opts lst >>= return . sub - (Quoted quoteType lst) -> - let (leftQuote, rightQuote) = case quoteType of - SingleQuote -> (primHtmlChar "lsquo", - primHtmlChar "rsquo") - DoubleQuote -> (primHtmlChar "ldquo", - primHtmlChar "rdquo") - in do contents <- inlineListToHtml opts lst - return $ leftQuote +++ contents +++ rightQuote - (TeX str) -> (if writerUseASCIIMathML opts - then modify (\st -> st {stMath = True}) - else return ()) >> return (stringToHtml str) - (HtmlInline str) -> return $ primHtml str - (Link [Code str] (src,tit)) | "mailto:" `isPrefixOf` src -> - return $ obfuscateLink opts str src - (Link txt (src,tit)) | "mailto:" `isPrefixOf` src -> do - linkText <- inlineListToHtml opts txt - return $ obfuscateLink opts (show linkText) src - (Link txt (src,tit)) -> do - linkText <- inlineListToHtml opts txt - return $ anchor ! ([href src] ++ - if null tit then [] else [title tit]) $ - linkText - (Image txt (source,tit)) -> do - alternate <- inlineListToHtml opts txt - let alternate' = renderFragment opts alternate - let attributes = [src source] ++ - (if null tit - then [] - else [title tit]) ++ - if null txt - then [] - else [alt alternate'] - return $ image ! attributes - -- note: null title included, as in Markdown.pl - (Note contents) -> do - st <- get - let notes = stNotes st - let number = (length notes) + 1 - let ref = show number - htmlContents <- blockListToNote opts ref contents - -- push contents onto front of notes - put $ st {stNotes = (htmlContents:notes)} - return $ anchor ! [href ("#fn" ++ ref), - theclass "footnoteRef", - identifier ("fnref" ++ ref)] << - sup << ref - -blockListToNote :: WriterOptions -> String -> [Block] -> State WriterState Html -blockListToNote opts ref blocks = - -- If last block is Para or Plain, include the backlink at the end of - -- that block. Otherwise, insert a new Plain block with the backlink. - let backlink = [HtmlInline $ " <a href=\"#fnref" ++ ref ++ - "\" class=\"footnoteBackLink\"" ++ - " title=\"Jump back to footnote " ++ ref ++ "\">↩</a>"] - blocks' = if null blocks - then [] - else let lastBlock = last blocks - otherBlocks = init blocks - in case lastBlock of - (Para lst) -> otherBlocks ++ - [Para (lst ++ backlink)] - (Plain lst) -> otherBlocks ++ - [Plain (lst ++ backlink)] - _ -> otherBlocks ++ [lastBlock, - Plain backlink] - in do contents <- blockListToHtml opts blocks' - return $ li ! [identifier ("fn" ++ ref)] $ contents - |