From 42aca57dee8d88afa5fac512aeb1198102908865 Mon Sep 17 00:00:00 2001 From: fiddlosopher Date: Sat, 24 Jan 2009 20:00:26 +0000 Subject: Moved all haskell source to src subdirectory. git-svn-id: https://pandoc.googlecode.com/svn/trunk@1528 788f1e2b-df1e-0410-8736-df70ead52e1b --- Main.hs | 681 ----------------- Text/Pandoc.hs | 114 --- Text/Pandoc/Biblio.hs | 66 -- Text/Pandoc/Blocks.hs | 146 ---- Text/Pandoc/CharacterReferences.hs | 327 -------- Text/Pandoc/DefaultHeaders.hs | 69 -- Text/Pandoc/Definition.hs | 150 ---- Text/Pandoc/Highlighting.hs | 64 -- Text/Pandoc/LaTeXMathML.hs | 14 - Text/Pandoc/ODT.hs | 88 --- Text/Pandoc/Plugins.hs | 69 -- Text/Pandoc/Readers/HTML.hs | 675 ----------------- Text/Pandoc/Readers/LaTeX.hs | 774 ------------------- Text/Pandoc/Readers/Markdown.hs | 1243 ------------------------------- Text/Pandoc/Readers/RST.hs | 707 ------------------ Text/Pandoc/Readers/TeXMath.hs | 233 ------ Text/Pandoc/Shared.hs | 953 ------------------------ Text/Pandoc/TH.hs | 65 -- Text/Pandoc/Writers/ConTeXt.hs | 302 -------- Text/Pandoc/Writers/Docbook.hs | 262 ------- Text/Pandoc/Writers/HTML.hs | 557 -------------- Text/Pandoc/Writers/LaTeX.hs | 331 -------- Text/Pandoc/Writers/Man.hs | 301 -------- Text/Pandoc/Writers/Markdown.hs | 396 ---------- Text/Pandoc/Writers/MediaWiki.hs | 396 ---------- Text/Pandoc/Writers/OpenDocument.hs | 568 -------------- Text/Pandoc/Writers/RST.hs | 346 --------- Text/Pandoc/Writers/RTF.hs | 291 -------- Text/Pandoc/Writers/S5.hs | 157 ---- Text/Pandoc/Writers/Texinfo.hs | 474 ------------ Text/Pandoc/XML.hs | 88 --- pandoc.cabal | 4 +- src/Main.hs | 681 +++++++++++++++++ src/Text/Pandoc.hs | 114 +++ src/Text/Pandoc/Biblio.hs | 66 ++ src/Text/Pandoc/Blocks.hs | 146 ++++ src/Text/Pandoc/CharacterReferences.hs | 327 ++++++++ src/Text/Pandoc/DefaultHeaders.hs | 69 ++ src/Text/Pandoc/Definition.hs | 150 ++++ src/Text/Pandoc/Highlighting.hs | 64 ++ src/Text/Pandoc/LaTeXMathML.hs | 14 + src/Text/Pandoc/ODT.hs | 88 +++ src/Text/Pandoc/Plugins.hs | 69 ++ src/Text/Pandoc/Readers/HTML.hs | 675 +++++++++++++++++ src/Text/Pandoc/Readers/LaTeX.hs | 774 +++++++++++++++++++ src/Text/Pandoc/Readers/Markdown.hs | 1243 +++++++++++++++++++++++++++++++ src/Text/Pandoc/Readers/RST.hs | 707 ++++++++++++++++++ src/Text/Pandoc/Readers/TeXMath.hs | 233 ++++++ src/Text/Pandoc/Shared.hs | 953 ++++++++++++++++++++++++ src/Text/Pandoc/TH.hs | 65 ++ src/Text/Pandoc/Writers/ConTeXt.hs | 302 ++++++++ src/Text/Pandoc/Writers/Docbook.hs | 262 +++++++ src/Text/Pandoc/Writers/HTML.hs | 557 ++++++++++++++ src/Text/Pandoc/Writers/LaTeX.hs | 331 ++++++++ src/Text/Pandoc/Writers/Man.hs | 301 ++++++++ src/Text/Pandoc/Writers/Markdown.hs | 396 ++++++++++ src/Text/Pandoc/Writers/MediaWiki.hs | 396 ++++++++++ src/Text/Pandoc/Writers/OpenDocument.hs | 568 ++++++++++++++ src/Text/Pandoc/Writers/RST.hs | 346 +++++++++ src/Text/Pandoc/Writers/RTF.hs | 291 ++++++++ src/Text/Pandoc/Writers/S5.hs | 157 ++++ src/Text/Pandoc/Writers/Texinfo.hs | 474 ++++++++++++ src/Text/Pandoc/XML.hs | 88 +++ 63 files changed, 10909 insertions(+), 10909 deletions(-) delete mode 100644 Main.hs delete mode 100644 Text/Pandoc.hs delete mode 100644 Text/Pandoc/Biblio.hs delete mode 100644 Text/Pandoc/Blocks.hs delete mode 100644 Text/Pandoc/CharacterReferences.hs delete mode 100644 Text/Pandoc/DefaultHeaders.hs delete mode 100644 Text/Pandoc/Definition.hs delete mode 100644 Text/Pandoc/Highlighting.hs delete mode 100644 Text/Pandoc/LaTeXMathML.hs delete mode 100644 Text/Pandoc/ODT.hs delete mode 100644 Text/Pandoc/Plugins.hs delete mode 100644 Text/Pandoc/Readers/HTML.hs delete mode 100644 Text/Pandoc/Readers/LaTeX.hs delete mode 100644 Text/Pandoc/Readers/Markdown.hs delete mode 100644 Text/Pandoc/Readers/RST.hs delete mode 100644 Text/Pandoc/Readers/TeXMath.hs delete mode 100644 Text/Pandoc/Shared.hs delete mode 100644 Text/Pandoc/TH.hs delete mode 100644 Text/Pandoc/Writers/ConTeXt.hs delete mode 100644 Text/Pandoc/Writers/Docbook.hs delete mode 100644 Text/Pandoc/Writers/HTML.hs delete mode 100644 Text/Pandoc/Writers/LaTeX.hs delete mode 100644 Text/Pandoc/Writers/Man.hs delete mode 100644 Text/Pandoc/Writers/Markdown.hs delete mode 100644 Text/Pandoc/Writers/MediaWiki.hs delete mode 100644 Text/Pandoc/Writers/OpenDocument.hs delete mode 100644 Text/Pandoc/Writers/RST.hs delete mode 100644 Text/Pandoc/Writers/RTF.hs delete mode 100644 Text/Pandoc/Writers/S5.hs delete mode 100644 Text/Pandoc/Writers/Texinfo.hs delete mode 100644 Text/Pandoc/XML.hs create mode 100644 src/Main.hs create mode 100644 src/Text/Pandoc.hs create mode 100644 src/Text/Pandoc/Biblio.hs create mode 100644 src/Text/Pandoc/Blocks.hs create mode 100644 src/Text/Pandoc/CharacterReferences.hs create mode 100644 src/Text/Pandoc/DefaultHeaders.hs create mode 100644 src/Text/Pandoc/Definition.hs create mode 100644 src/Text/Pandoc/Highlighting.hs create mode 100644 src/Text/Pandoc/LaTeXMathML.hs create mode 100644 src/Text/Pandoc/ODT.hs create mode 100644 src/Text/Pandoc/Plugins.hs create mode 100644 src/Text/Pandoc/Readers/HTML.hs create mode 100644 src/Text/Pandoc/Readers/LaTeX.hs create mode 100644 src/Text/Pandoc/Readers/Markdown.hs create mode 100644 src/Text/Pandoc/Readers/RST.hs create mode 100644 src/Text/Pandoc/Readers/TeXMath.hs create mode 100644 src/Text/Pandoc/Shared.hs create mode 100644 src/Text/Pandoc/TH.hs create mode 100644 src/Text/Pandoc/Writers/ConTeXt.hs create mode 100644 src/Text/Pandoc/Writers/Docbook.hs create mode 100644 src/Text/Pandoc/Writers/HTML.hs create mode 100644 src/Text/Pandoc/Writers/LaTeX.hs create mode 100644 src/Text/Pandoc/Writers/Man.hs create mode 100644 src/Text/Pandoc/Writers/Markdown.hs create mode 100644 src/Text/Pandoc/Writers/MediaWiki.hs create mode 100644 src/Text/Pandoc/Writers/OpenDocument.hs create mode 100644 src/Text/Pandoc/Writers/RST.hs create mode 100644 src/Text/Pandoc/Writers/RTF.hs create mode 100644 src/Text/Pandoc/Writers/S5.hs create mode 100644 src/Text/Pandoc/Writers/Texinfo.hs create mode 100644 src/Text/Pandoc/XML.hs diff --git a/Main.hs b/Main.hs deleted file mode 100644 index e498b3c0a..000000000 --- a/Main.hs +++ /dev/null @@ -1,681 +0,0 @@ -{-# LANGUAGE CPP #-} -{- -Copyright (C) 2006-8 John MacFarlane - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA --} - -{- | - Module : Main - Copyright : Copyright (C) 2006-8 John MacFarlane - License : GNU GPL, version 2 or above - - Maintainer : John MacFarlane - Stability : alpha - Portability : portable - -Parses command-line options and calls the appropriate readers and -writers. --} -module Main where -import Text.Pandoc -import Text.Pandoc.ODT -import Text.Pandoc.Shared ( HTMLMathMethod (..), splitBy, ObfuscationMethod (..) ) -import Text.Pandoc.Highlighting ( languages ) -import System.Environment ( getArgs, getProgName, getEnvironment ) -import System.Exit ( exitWith, ExitCode (..) ) -import System.FilePath -import System.Console.GetOpt -import Data.Maybe ( fromMaybe ) -import Data.Char ( toLower ) -import Data.List ( intercalate, isSuffixOf ) -import Prelude hiding ( putStr, putStrLn, writeFile, readFile, getContents ) -import System.IO ( stdout, stderr ) -import System.IO.UTF8 -#ifdef _CITEPROC -import Text.CSL -import Text.Pandoc.Biblio -#endif -import Text.Pandoc.Plugins (getPlugin) -import Control.Monad (foldM, when, unless) - -copyrightMessage :: String -copyrightMessage = "\nCopyright (C) 2006-8 John MacFarlane\n" ++ - "Web: http://johnmacfarlane.net/pandoc\n" ++ - "This is free software; see the source for copying conditions. There is no\n" ++ - "warranty, not even for merchantability or fitness for a particular purpose." - -compileInfo :: String -compileInfo = -#ifdef _CITEPROC - " +citeproc" ++ -#else - " -citeproc" ++ -#endif -#ifdef _HIGHLIGHTING - " +highlighting" ++ -#else - " -highlighting" ++ -#endif - if null languages - then "\n" - else "\nCompiled with syntax highlighting support for:\n" ++ wrapWords 78 languages - --- | Converts a list of strings into a single string with the items printed as --- comma separated words in lines with a maximum line length. -wrapWords :: Int -> [String] -> String -wrapWords c = wrap' c c where - wrap' _ _ [] = "" - wrap' cols remaining (x:xs) = if remaining == cols - then x ++ wrap' cols (remaining - length x) xs - else if (length x + 1) > remaining - then ",\n" ++ x ++ wrap' cols (cols - length x) xs - else ", " ++ x ++ wrap' cols (remaining - (length x + 2)) xs - --- | Association list of formats and readers. -readers :: [(String, ParserState -> String -> Pandoc)] -readers = [("native" , readPandoc) - ,("markdown" , readMarkdown) - ,("markdown+lhs" , readMarkdown) - ,("rst" , readRST) - ,("rst+lhs" , readRST) - ,("html" , readHtml) - ,("latex" , readLaTeX) - ,("latex+lhs" , readLaTeX) - ] - --- | Reader for native Pandoc format. -readPandoc :: ParserState -> String -> Pandoc -readPandoc _ = read - --- | Association list of formats and pairs of writers and default headers. -writers :: [ ( String, ( WriterOptions -> Pandoc -> String, String ) ) ] -writers = [("native" , (writeDoc, "")) - ,("html" , (writeHtmlString, "")) - ,("html+lhs" , (writeHtmlString, "")) - ,("s5" , (writeS5String, defaultS5Header)) - ,("docbook" , (writeDocbook, defaultDocbookHeader)) - ,("opendocument" , (writeOpenDocument, defaultOpenDocumentHeader)) - ,("odt" , (writeOpenDocument, defaultOpenDocumentHeader)) - ,("latex" , (writeLaTeX, defaultLaTeXHeader)) - ,("latex+lhs" , (writeLaTeX, defaultLaTeXHeader)) - ,("context" , (writeConTeXt, defaultConTeXtHeader)) - ,("texinfo" , (writeTexinfo, "")) - ,("man" , (writeMan, "")) - ,("markdown" , (writeMarkdown, "")) - ,("markdown+lhs" , (writeMarkdown, "")) - ,("rst" , (writeRST, "")) - ,("rst+lhs" , (writeRST, "")) - ,("mediawiki" , (writeMediaWiki, "")) - ,("rtf" , (writeRTF, defaultRTFHeader)) - ] - -isNonTextOutput :: String -> Bool -isNonTextOutput = (`elem` ["odt"]) - --- | Writer for Pandoc native format. -writeDoc :: WriterOptions -> Pandoc -> String -writeDoc _ = prettyPandoc - --- | Data structure for command line options. -data Opt = Opt - { optPreserveTabs :: Bool -- ^ Convert tabs to spaces - , optTabStop :: Int -- ^ Number of spaces per tab - , optStandalone :: Bool -- ^ Include header, footer - , optReader :: String -- ^ Reader format - , optWriter :: String -- ^ Writer format - , optParseRaw :: Bool -- ^ Parse unconvertable HTML and TeX - , optCSS :: [String] -- ^ CSS file to link to - , optTableOfContents :: Bool -- ^ Include table of contents - , optIncludeInHeader :: String -- ^ File to include in header - , optIncludeBeforeBody :: String -- ^ File to include at top of body - , optIncludeAfterBody :: String -- ^ File to include at end of body - , optCustomHeader :: String -- ^ Custom header to use, or "DEFAULT" - , optTitlePrefix :: String -- ^ Optional prefix for HTML title - , optOutputFile :: String -- ^ Name of output file - , optNumberSections :: Bool -- ^ Number sections in LaTeX - , optIncremental :: Bool -- ^ Use incremental lists in S5 - , optSmart :: Bool -- ^ Use smart typography - , optHTMLMathMethod :: HTMLMathMethod -- ^ Method to print HTML math - , optDumpArgs :: Bool -- ^ Output command-line arguments - , optIgnoreArgs :: Bool -- ^ Ignore command-line arguments - , optStrict :: Bool -- ^ Use strict markdown syntax - , optReferenceLinks :: Bool -- ^ Use reference links in writing markdown, rst - , optWrapText :: Bool -- ^ Wrap text - , optSanitizeHTML :: Bool -- ^ Sanitize HTML - , optPlugins :: [Pandoc -> IO Pandoc] -- ^ Plugins to apply - , optEmailObfuscation :: ObfuscationMethod -#ifdef _CITEPROC - , optBiblioFile :: String - , optBiblioFormat :: String - , optCslFile :: String -#endif - } - --- | Defaults for command-line options. -defaultOpts :: Opt -defaultOpts = Opt - { optPreserveTabs = False - , optTabStop = 4 - , optStandalone = False - , optReader = "" -- null for default reader - , optWriter = "" -- null for default writer - , optParseRaw = False - , optCSS = [] - , optTableOfContents = False - , optIncludeInHeader = "" - , optIncludeBeforeBody = "" - , optIncludeAfterBody = "" - , optCustomHeader = "DEFAULT" - , optTitlePrefix = "" - , optOutputFile = "-" -- "-" means stdout - , optNumberSections = False - , optIncremental = False - , optSmart = False - , optHTMLMathMethod = PlainMath - , optDumpArgs = False - , optIgnoreArgs = False - , optStrict = False - , optReferenceLinks = False - , optWrapText = True - , optSanitizeHTML = False - , optPlugins = [] - , optEmailObfuscation = JavascriptObfuscation -#ifdef _CITEPROC - , optBiblioFile = [] - , optBiblioFormat = [] - , optCslFile = [] -#endif - } - --- | A list of functions, each transforming the options data structure --- in response to a command-line option. -options :: [OptDescr (Opt -> IO Opt)] -options = - [ Option "fr" ["from","read"] - (ReqArg - (\arg opt -> return opt { optReader = map toLower arg }) - "FORMAT") - "" -- ("(" ++ (intercalate ", " $ map fst readers) ++ ")") - - , Option "tw" ["to","write"] - (ReqArg - (\arg opt -> return opt { optWriter = map toLower arg }) - "FORMAT") - "" -- ("(" ++ (intercalate ", " $ map fst writers) ++ ")") - - , Option "s" ["standalone"] - (NoArg - (\opt -> return opt { optStandalone = True })) - "" -- "Include needed header and footer on output" - - , Option "o" ["output"] - (ReqArg - (\arg opt -> return opt { optOutputFile = arg }) - "FILENAME") - "" -- "Name of output file" - - , Option "p" ["preserve-tabs"] - (NoArg - (\opt -> return opt { optPreserveTabs = True })) - "" -- "Preserve tabs instead of converting to spaces" - - , Option "" ["tab-stop"] - (ReqArg - (\arg opt -> return opt { optTabStop = (read arg) } ) - "TABSTOP") - "" -- "Tab stop (default 4)" - - , Option "" ["strict"] - (NoArg - (\opt -> return opt { optStrict = True } )) - "" -- "Disable markdown syntax extensions" - - , Option "" ["reference-links"] - (NoArg - (\opt -> return opt { optReferenceLinks = True } )) - "" -- "Use reference links in parsing HTML" - - , Option "R" ["parse-raw"] - (NoArg - (\opt -> return opt { optParseRaw = True })) - "" -- "Parse untranslatable HTML codes and LaTeX environments as raw" - - , Option "S" ["smart"] - (NoArg - (\opt -> return opt { optSmart = True })) - "" -- "Use smart quotes, dashes, and ellipses" - - , Option "m" ["latexmathml", "asciimathml"] - (OptArg - (\arg opt -> return opt { optHTMLMathMethod = - LaTeXMathML arg }) - "URL") - "" -- "Use LaTeXMathML script in html output" - - , Option "" ["mimetex"] - (OptArg - (\arg opt -> return opt { optHTMLMathMethod = MimeTeX - (fromMaybe "/cgi-bin/mimetex.cgi" arg)}) - "URL") - "" -- "Use mimetex for HTML math" - - , Option "" ["jsmath"] - (OptArg - (\arg opt -> return opt { optHTMLMathMethod = JsMath arg}) - "URL") - "" -- "Use jsMath for HTML math" - - , Option "" ["gladtex"] - (NoArg - (\opt -> return opt { optHTMLMathMethod = GladTeX })) - "" -- "Use gladtex for HTML math" - - , Option "i" ["incremental"] - (NoArg - (\opt -> return opt { optIncremental = True })) - "" -- "Make list items display incrementally in S5" - - , Option "N" ["number-sections"] - (NoArg - (\opt -> return opt { optNumberSections = True })) - "" -- "Number sections in LaTeX" - - , Option "" ["no-wrap"] - (NoArg - (\opt -> return opt { optWrapText = False })) - "" -- "Do not wrap text in output" - - , Option "" ["sanitize-html"] - (NoArg - (\opt -> return opt { optSanitizeHTML = True })) - "" -- "Sanitize HTML" - - , Option "" ["email-obfuscation"] - (ReqArg - (\arg opt -> do - method <- case arg of - "references" -> return ReferenceObfuscation - "javascript" -> return JavascriptObfuscation - "none" -> return NoObfuscation - _ -> hPutStrLn stderr ("Error: Unknown obfuscation method: " ++ arg) >> - exitWith (ExitFailure 6) - return opt { optEmailObfuscation = method }) - "none|javascript|references") - "" -- "Method for obfuscating email in HTML" - - , Option "" ["toc", "table-of-contents"] - (NoArg - (\opt -> return opt { optTableOfContents = True })) - "" -- "Include table of contents" - - , Option "c" ["css"] - (ReqArg - (\arg opt -> do - let old = optCSS opt - return opt { optCSS = old ++ [arg], - optStandalone = True }) - "CSS") - "" -- "Link to CSS style sheet" - - , Option "H" ["include-in-header"] - (ReqArg - (\arg opt -> do - let old = optIncludeInHeader opt - text <- readFile arg - return opt { optIncludeInHeader = old ++ text, - optStandalone = True }) - "FILENAME") - "" -- "File to include at end of header (implies -s)" - - , Option "B" ["include-before-body"] - (ReqArg - (\arg opt -> do - let old = optIncludeBeforeBody opt - text <- readFile arg - return opt { optIncludeBeforeBody = old ++ text }) - "FILENAME") - "" -- "File to include before document body" - - , Option "A" ["include-after-body"] - (ReqArg - (\arg opt -> do - let old = optIncludeAfterBody opt - text <- readFile arg - return opt { optIncludeAfterBody = old ++ text }) - "FILENAME") - "" -- "File to include after document body" - - , Option "C" ["custom-header"] - (ReqArg - (\arg opt -> do - text <- readFile arg - return opt { optCustomHeader = text, - optStandalone = True }) - "FILENAME") - "" -- "File to use for custom header (implies -s)" - - , Option "P" ["plugins"] - (ReqArg - (\arg opt -> do - let pluginModules = splitBy ',' arg - plugins <- mapM getPlugin pluginModules - return opt { optPlugins = plugins }) - "MODULE[,MODULE...]") - "" -- "Haskell modules" - - , Option "T" ["title-prefix"] - (ReqArg - (\arg opt -> return opt { optTitlePrefix = arg, - optStandalone = True }) - "STRING") - "" -- "String to prefix to HTML window title" - - , Option "D" ["print-default-header"] - (ReqArg - (\arg _ -> do - let header = case (lookup arg writers) of - Just (_, h) -> h - Nothing -> error ("Unknown reader: " ++ arg) - hPutStr stdout header - exitWith ExitSuccess) - "FORMAT") - "" -- "Print default header for FORMAT" -#ifdef _CITEPROC - , Option "" ["biblio"] - (ReqArg - (\arg opt -> return opt { optBiblioFile = arg} ) - "FILENAME") - "" - , Option "" ["biblio-format"] - (ReqArg - (\arg opt -> return opt { optBiblioFormat = arg} ) - "STRING") - "" - , Option "" ["csl"] - (ReqArg - (\arg opt -> return opt { optCslFile = arg} ) - "FILENAME") - "" -#endif - , Option "" ["dump-args"] - (NoArg - (\opt -> return opt { optDumpArgs = True })) - "" -- "Print output filename and arguments to stdout." - - , Option "" ["ignore-args"] - (NoArg - (\opt -> return opt { optIgnoreArgs = True })) - "" -- "Ignore command-line arguments." - - , Option "v" ["version"] - (NoArg - (\_ -> do - prg <- getProgName - hPutStrLn stderr (prg ++ " " ++ pandocVersion ++ compileInfo ++ - copyrightMessage) - exitWith $ ExitFailure 4)) - "" -- "Print version" - - , Option "h" ["help"] - (NoArg - (\_ -> do - prg <- getProgName - hPutStr stderr (usageMessage prg options) - exitWith $ ExitFailure 2)) - "" -- "Show help" - ] - --- Returns usage message -usageMessage :: String -> [OptDescr (Opt -> IO Opt)] -> String -usageMessage programName = usageInfo - (programName ++ " [OPTIONS] [FILES]" ++ "\nInput formats: " ++ - (intercalate ", " $ map fst readers) ++ "\nOutput formats: " ++ - (intercalate ", " $ map fst writers) ++ "\nOptions:") - --- Determine default reader based on source file extensions -defaultReaderName :: [FilePath] -> String -defaultReaderName [] = "markdown" -defaultReaderName (x:xs) = - case takeExtension (map toLower x) of - ".xhtml" -> "html" - ".html" -> "html" - ".htm" -> "html" - ".tex" -> "latex" - ".latex" -> "latex" - ".ltx" -> "latex" - ".rst" -> "rst" - ".lhs" -> "markdown+lhs" - ".native" -> "native" - _ -> defaultReaderName xs - --- Returns True if extension of first source is .lhs -lhsExtension :: [FilePath] -> Bool -lhsExtension (x:_) = takeExtension x == ".lhs" -lhsExtension _ = False - --- Determine default writer based on output file extension -defaultWriterName :: FilePath -> String -defaultWriterName "-" = "html" -- no output file -defaultWriterName x = - case takeExtension (map toLower x) of - "" -> "markdown" -- empty extension - ".tex" -> "latex" - ".latex" -> "latex" - ".ltx" -> "latex" - ".context" -> "context" - ".ctx" -> "context" - ".rtf" -> "rtf" - ".rst" -> "rst" - ".s5" -> "s5" - ".native" -> "native" - ".txt" -> "markdown" - ".text" -> "markdown" - ".md" -> "markdown" - ".markdown" -> "markdown" - ".lhs" -> "markdown+lhs" - ".texi" -> "texinfo" - ".texinfo" -> "texinfo" - ".db" -> "docbook" - ".odt" -> "odt" - ['.',y] | y `elem` ['1'..'9'] -> "man" - _ -> "html" - -main :: IO () -main = do - - rawArgs <- getArgs - prg <- getProgName - let compatMode = (prg == "hsmarkdown") - - let (actions, args, errors) = if compatMode - then ([], rawArgs, []) - else getOpt Permute options rawArgs - - unless (null errors) $ - do name <- getProgName - mapM_ (\e -> hPutStrLn stderr e) errors - hPutStr stderr (usageMessage name options) - exitWith $ ExitFailure 2 - - let defaultOpts' = if compatMode - then defaultOpts { optReader = "markdown" - , optWriter = "html" - , optStrict = True } - else defaultOpts - - -- thread option data structure through all supplied option actions - opts <- foldl (>>=) (return defaultOpts') actions - - let Opt { optPreserveTabs = preserveTabs - , optTabStop = tabStop - , optStandalone = standalone - , optReader = readerName - , optWriter = writerName - , optParseRaw = parseRaw - , optCSS = css - , optTableOfContents = toc - , optIncludeInHeader = includeHeader - , optIncludeBeforeBody = includeBefore - , optIncludeAfterBody = includeAfter - , optCustomHeader = customHeader - , optTitlePrefix = titlePrefix - , optOutputFile = outputFile - , optNumberSections = numberSections - , optIncremental = incremental - , optSmart = smart - , optHTMLMathMethod = mathMethod - , optDumpArgs = dumpArgs - , optIgnoreArgs = ignoreArgs - , optStrict = strict - , optReferenceLinks = referenceLinks - , optWrapText = wrap - , optSanitizeHTML = sanitize - , optPlugins = plugins - , optEmailObfuscation = obfuscationMethod -#ifdef _CITEPROC - , optBiblioFile = biblioFile - , optBiblioFormat = biblioFormat - , optCslFile = cslFile -#endif - } = opts - - when dumpArgs $ - do hPutStrLn stdout outputFile - mapM_ (\arg -> hPutStrLn stdout arg) args - exitWith ExitSuccess - - let sources = if ignoreArgs then [] else args - - -- assign reader and writer based on options and filenames - let readerName' = if null readerName - then defaultReaderName sources - else readerName - - let writerName' = if null writerName - then defaultWriterName outputFile - else writerName - - reader <- case (lookup readerName' readers) of - Just r -> return r - Nothing -> error ("Unknown reader: " ++ readerName') - - (writer, defaultHeader) <- case (lookup writerName' writers) of - Just (w,h) -> return (w, h) - Nothing -> error ("Unknown writer: " ++ writerName') - - environment <- getEnvironment - let columns = case lookup "COLUMNS" environment of - Just cols -> read cols - Nothing -> stateColumns defaultParserState - - let tabFilter _ [] = "" - tabFilter _ ('\n':xs) = '\n' : tabFilter tabStop xs - -- remove DOS line endings - tabFilter _ ('\r':'\n':xs) = '\n' : tabFilter tabStop xs - tabFilter _ ('\r':xs) = '\n' : tabFilter tabStop xs - tabFilter spsToNextStop ('\t':xs) = - if preserveTabs - then '\t' : tabFilter tabStop xs - else replicate spsToNextStop ' ' ++ tabFilter tabStop xs - tabFilter 1 (x:xs) = - x : tabFilter tabStop xs - tabFilter spsToNextStop (x:xs) = - x : tabFilter (spsToNextStop - 1) xs - - let standalone' = (standalone && not strict) || isNonTextOutput writerName' - -#ifdef _CITEPROC - refs <- if null biblioFile then return [] else readBiblioFile biblioFile biblioFormat -#endif - - let startParserState = - defaultParserState { stateParseRaw = parseRaw, - stateTabStop = tabStop, - stateSanitizeHTML = sanitize, - stateLiterateHaskell = "+lhs" `isSuffixOf` readerName' || - lhsExtension sources, - stateStandalone = standalone', -#ifdef _CITEPROC - stateCitations = map citeKey refs, -#endif - stateSmart = smart || writerName' `elem` - ["latex", "context"], - stateColumns = columns, - stateStrict = strict } - let csslink = if null css - then "" - else concatMap - (\f -> "\n") - css - let header = (if customHeader == "DEFAULT" - then defaultHeader - else customHeader) ++ csslink ++ includeHeader - let writerOptions = WriterOptions { writerStandalone = standalone', - writerHeader = header, - writerTitlePrefix = titlePrefix, - writerTabStop = tabStop, - writerTableOfContents = toc && - not strict && - writerName' /= "s5", - writerHTMLMathMethod = mathMethod, - writerS5 = (writerName' == "s5"), - writerIgnoreNotes = False, - writerIncremental = incremental, - writerNumberSections = numberSections, - writerIncludeBefore = includeBefore, - writerIncludeAfter = includeAfter, - writerStrictMarkdown = strict, - writerReferenceLinks = referenceLinks, - writerWrapText = wrap, - writerLiterateHaskell = "+lhs" `isSuffixOf` writerName' || - lhsExtension [outputFile], - writerEmailObfuscation = if strict - then ReferenceObfuscation - else obfuscationMethod } - - when (isNonTextOutput writerName' && outputFile == "-") $ - do hPutStrLn stderr ("Error: Cannot write " ++ writerName ++ " output to stdout.\n" ++ - "Specify an output file using the -o option.") - exitWith $ ExitFailure 5 - - let sourceDirRelative = if null sources - then "" - else takeDirectory (head sources) - - let readSources [] = mapM readSource ["-"] - readSources srcs = mapM readSource srcs - readSource "-" = getContents - readSource src = readFile src - - doc <- fmap (reader startParserState . tabFilter tabStop . intercalate "\n") (readSources sources) - - doc' <- do -#ifdef _CITEPROC - processBiblio cslFile refs doc -#else - return doc -#endif - - doc'' <- foldM (flip ($)) doc' plugins - - let writerOutput = writer writerOptions doc'' ++ "\n" - - case writerName' of - "odt" -> saveOpenDocumentAsODT outputFile sourceDirRelative writerOutput - _ -> if outputFile == "-" - then putStr writerOutput - else writeFile outputFile writerOutput diff --git a/Text/Pandoc.hs b/Text/Pandoc.hs deleted file mode 100644 index e97103f97..000000000 --- a/Text/Pandoc.hs +++ /dev/null @@ -1,114 +0,0 @@ -{- -Copyright (C) 2006-7 John MacFarlane - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA --} - -{- | - Module : Text.Pandoc - Copyright : Copyright (C) 2006-7 John MacFarlane - License : GNU GPL, version 2 or above - - Maintainer : John MacFarlane - Stability : alpha - Portability : portable - -This helper module exports the main writers, readers, and data -structure definitions from the Pandoc libraries. - -A typical application will chain together a reader and a writer -to convert strings from one format to another. For example, the -following simple program will act as a filter converting markdown -fragments to reStructuredText, using reference-style links instead of -inline links: - -> module Main where -> import Text.Pandoc -> import qualified System.IO.UTF8 as U -> -> markdownToRST :: String -> String -> markdownToRST = -> (writeRST defaultWriterOptions {writerReferenceLinks = True}) . -> readMarkdown defaultParserState -> -> main = U.getContents >>= U.putStrLn . markdownToRST - --} - -module Text.Pandoc - ( - -- * Definitions - module Text.Pandoc.Definition - -- * Readers: converting /to/ Pandoc format - , readMarkdown - , readRST - , readLaTeX - , readHtml - -- * Parser state used in readers - , ParserState (..) - , defaultParserState - , ParserContext (..) - , QuoteContext (..) - , KeyTable - , NoteTable - , HeaderType (..) - -- * Writers: converting /from/ Pandoc format - , writeMarkdown - , writeRST - , writeLaTeX - , writeConTeXt - , writeTexinfo - , writeHtml - , writeHtmlString - , writeS5 - , writeS5String - , writeDocbook - , writeOpenDocument - , writeMan - , writeMediaWiki - , writeRTF - , prettyPandoc - -- * Writer options used in writers - , WriterOptions (..) - , defaultWriterOptions - -- * Default headers for various output formats - , module Text.Pandoc.DefaultHeaders - -- * Version - , pandocVersion - ) where - -import Text.Pandoc.Definition -import Text.Pandoc.Readers.Markdown -import Text.Pandoc.Readers.RST -import Text.Pandoc.Readers.LaTeX -import Text.Pandoc.Readers.HTML -import Text.Pandoc.Writers.Markdown -import Text.Pandoc.Writers.RST -import Text.Pandoc.Writers.LaTeX -import Text.Pandoc.Writers.ConTeXt -import Text.Pandoc.Writers.Texinfo -import Text.Pandoc.Writers.HTML -import Text.Pandoc.Writers.S5 -import Text.Pandoc.Writers.Docbook -import Text.Pandoc.Writers.OpenDocument -import Text.Pandoc.Writers.Man -import Text.Pandoc.Writers.RTF -import Text.Pandoc.Writers.MediaWiki -import Text.Pandoc.DefaultHeaders -import Text.Pandoc.Shared - --- | Version number of pandoc library. -pandocVersion :: String -pandocVersion = "1.1" diff --git a/Text/Pandoc/Biblio.hs b/Text/Pandoc/Biblio.hs deleted file mode 100644 index 1d93f19c1..000000000 --- a/Text/Pandoc/Biblio.hs +++ /dev/null @@ -1,66 +0,0 @@ -{-# LANGUAGE PatternGuards #-} -{- -Copyright (C) 2008 Andrea Rossato - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA --} - -{- | - Module : Text.Pandoc.Biblio - Copyright : Copyright (C) 2008 Andrea Rossato - License : GNU GPL, version 2 or above - - Maintainer : Andrea Rossato - Stability : alpha - Portability : portable --} - -module Text.Pandoc.Biblio ( processBiblio ) where - -import Control.Monad ( when ) -import Data.List -import Text.CSL -import Text.Pandoc.Definition - --- | Process a 'Pandoc' document by adding citations formatted --- according to a CSL style, using 'citeproc' from citeproc-hs. -processBiblio :: String -> [Reference] -> Pandoc -> IO Pandoc -processBiblio cf r p - = if null r then return p - else do - when (null cf) $ error "Missing the needed citation style file" - csl <- readCSLFile cf - let groups = queryPandoc getCite p - result = citeproc csl r groups - cits_map = zip groups (citations result) - biblioList = map (read . renderPandoc' csl) (bibliography result) - Pandoc m b = processPandoc (processCite csl cits_map) p - return $ Pandoc m $ b ++ biblioList - --- | Substitute 'Cite' elements with formatted citations. -processCite :: Style -> [([Target],[FormattedOutput])] -> Inline -> Inline -processCite s cs il - | Cite t _ <- il = Cite t (process t) - | otherwise = il - where - process t = case elemIndex t (map fst cs) of - Just i -> read . renderPandoc s $ snd (cs !! i) - Nothing -> [Str ("Error processing " ++ show t)] - --- | Retrieve all citations from a 'Pandoc' docuument. To be used with --- 'queryPandoc'. -getCite :: Inline -> [[(String,String)]] -getCite i | Cite t _ <- i = [t] - | otherwise = [] diff --git a/Text/Pandoc/Blocks.hs b/Text/Pandoc/Blocks.hs deleted file mode 100644 index 122931773..000000000 --- a/Text/Pandoc/Blocks.hs +++ /dev/null @@ -1,146 +0,0 @@ -{- -Copyright (C) 2007 John MacFarlane - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA --} - -{- | - Module : Text.Pandoc.Blocks - Copyright : Copyright (C) 2007 John MacFarlane - License : GNU GPL, version 2 or above - - Maintainer : John MacFarlane - Stability : alpha - Portability : portable - -Functions for the manipulation of fixed-width blocks of text. -These are used in the construction of plain-text tables. --} - -module Text.Pandoc.Blocks - ( - TextBlock (..), - docToBlock, - blockToDoc, - widthOfBlock, - heightOfBlock, - hcatBlocks, - hsepBlocks, - centerAlignBlock, - leftAlignBlock, - rightAlignBlock - ) -where -import Text.PrettyPrint -import Data.List ( intersperse ) - --- | A fixed-width block of text. Parameters are width of block, --- height of block, and list of lines. -data TextBlock = TextBlock Int Int [String] -instance Show TextBlock where - show x = show $ blockToDoc x - --- | Break lines in a list of lines so that none are greater than --- a given width. -breakLines :: Int -- ^ Maximum length of lines. - -> [String] -- ^ List of lines. - -> [String] -breakLines _ [] = [] -breakLines width (l:ls) = - if length l > width - then (take width l):(breakLines width ((drop width l):ls)) - else l:(breakLines width ls) - --- | Convert a @Doc@ element into a @TextBlock@ with a specified width. -docToBlock :: Int -- ^ Width of text block. - -> Doc -- ^ @Doc@ to convert. - -> TextBlock -docToBlock width doc = - let rendered = renderStyle (style {lineLength = width, - ribbonsPerLine = 1}) doc - lns = breakLines width $ lines rendered - in TextBlock width (length lns) lns - --- | Convert a @TextBlock@ to a @Doc@ element. -blockToDoc :: TextBlock -> Doc -blockToDoc (TextBlock _ _ lns) = - if null lns - then empty - else vcat $ map text lns - --- | Returns width of a @TextBlock@ (number of columns). -widthOfBlock :: TextBlock -> Int -widthOfBlock (TextBlock width _ _) = width - --- | Returns height of a @TextBlock@ (number of rows). -heightOfBlock :: TextBlock -> Int -heightOfBlock (TextBlock _ height _) = height - --- | Pads a string out to a given width using spaces. -hPad :: Int -- ^ Desired width. - -> String -- ^ String to pad. - -> String -hPad width line = - let linelen = length line - in if linelen <= width - then line ++ replicate (width - linelen) ' ' - else take width line - --- | Concatenates a list of @TextBlock@s into a new @TextBlock@ in --- which they appear side by side. -hcatBlocks :: [TextBlock] -> TextBlock -hcatBlocks [] = TextBlock 0 0 [] -hcatBlocks [x] = x -- This is not redundant! We don't want last item hPad'd. -hcatBlocks ((TextBlock width1 height1 lns1):xs) = - let (TextBlock width2 height2 lns2) = hcatBlocks xs - height = max height1 height2 - width = width1 + width2 - lns1' = map (hPad width1) $ lns1 ++ replicate (height - height1) "" - lns2' = lns2 ++ replicate (height - height2) "" - lns = zipWith (++) lns1' lns2' - in TextBlock width height lns - --- | Like @hcatBlocks@, but inserts space between the @TextBlock@s. -hsepBlocks :: [TextBlock] -> TextBlock -hsepBlocks = hcatBlocks . (intersperse (TextBlock 1 1 [" "])) - -isWhitespace :: Char -> Bool -isWhitespace x = x `elem` " \t" - --- | Left-aligns the contents of a @TextBlock@ within the block. -leftAlignBlock :: TextBlock -> TextBlock -leftAlignBlock (TextBlock width height lns) = - TextBlock width height $ map (dropWhile isWhitespace) lns - --- | Right-aligns the contents of a @TextBlock@ within the block. -rightAlignBlock :: TextBlock -> TextBlock -rightAlignBlock (TextBlock width height lns) = - let rightAlignLine ln = - let (spaces, rest) = span isWhitespace $ reverse $ hPad width ln - in reverse (rest ++ spaces) - in TextBlock width height $ map rightAlignLine lns - --- | Centers the contents of a @TextBlock@ within the block. -centerAlignBlock :: TextBlock -> TextBlock -centerAlignBlock (TextBlock width height lns) = - let centerAlignLine ln = - let ln' = hPad width ln - (startSpaces, rest) = span isWhitespace ln' - endSpaces = takeWhile isWhitespace (reverse ln') - numSpaces = length (startSpaces ++ endSpaces) - startSpaces' = replicate (quot numSpaces 2) ' ' - in startSpaces' ++ rest - in TextBlock width height $ map centerAlignLine lns - diff --git a/Text/Pandoc/CharacterReferences.hs b/Text/Pandoc/CharacterReferences.hs deleted file mode 100644 index b0f4f6019..000000000 --- a/Text/Pandoc/CharacterReferences.hs +++ /dev/null @@ -1,327 +0,0 @@ -{- -Copyright (C) 2006-7 John MacFarlane - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA --} - -{- | - Module : Text.Pandoc.CharacterReferences - Copyright : Copyright (C) 2006-7 John MacFarlane - License : GNU GPL, version 2 or above - - Maintainer : John MacFarlane - Stability : alpha - Portability : portable - -Functions for parsing character references. --} -module Text.Pandoc.CharacterReferences ( - characterReference, - decodeCharacterReferences, - ) where -import Data.Char ( chr ) -import Text.ParserCombinators.Parsec -import qualified Data.Map as Map - --- | Parse character entity. -characterReference :: GenParser Char st Char -characterReference = try $ do - char '&' - character <- numRef <|> entity - char ';' - return character - -numRef :: GenParser Char st Char -numRef = do - char '#' - num <- hexNum <|> decNum - return $ chr $ num - -hexNum :: GenParser Char st Int -hexNum = oneOf "Xx" >> many1 hexDigit >>= return . read . ("0x" ++) - -decNum :: GenParser Char st Int -decNum = many1 digit >>= return . read - -entity :: GenParser Char st Char -entity = do - body <- many1 alphaNum - return $ Map.findWithDefault '?' body entityTable - --- | Convert entities in a string to characters. -decodeCharacterReferences :: String -> String -decodeCharacterReferences str = - case parse (many (characterReference <|> anyChar)) str str of - Left err -> error $ "\nError: " ++ show err - Right result -> result - -entityTable :: Map.Map String Char -entityTable = Map.fromList entityTableList - -entityTableList :: [(String, Char)] -entityTableList = [ - ("quot", chr 34), - ("amp", chr 38), - ("lt", chr 60), - ("gt", chr 62), - ("nbsp", chr 160), - ("iexcl", chr 161), - ("cent", chr 162), - ("pound", chr 163), - ("curren", chr 164), - ("yen", chr 165), - ("brvbar", chr 166), - ("sect", chr 167), - ("uml", chr 168), - ("copy", chr 169), - ("ordf", chr 170), - ("laquo", chr 171), - ("not", chr 172), - ("shy", chr 173), - ("reg", chr 174), - ("macr", chr 175), - ("deg", chr 176), - ("plusmn", chr 177), - ("sup2", chr 178), - ("sup3", chr 179), - ("acute", chr 180), - ("micro", chr 181), - ("para", chr 182), - ("middot", chr 183), - ("cedil", chr 184), - ("sup1", chr 185), - ("ordm", chr 186), - ("raquo", chr 187), - ("frac14", chr 188), - ("frac12", chr 189), - ("frac34", chr 190), - ("iquest", chr 191), - ("Agrave", chr 192), - ("Aacute", chr 193), - ("Acirc", chr 194), - ("Atilde", chr 195), - ("Auml", chr 196), - ("Aring", chr 197), - ("AElig", chr 198), - ("Ccedil", chr 199), - ("Egrave", chr 200), - ("Eacute", chr 201), - ("Ecirc", chr 202), - ("Euml", chr 203), - ("Igrave", chr 204), - ("Iacute", chr 205), - ("Icirc", chr 206), - ("Iuml", chr 207), - ("ETH", chr 208), - ("Ntilde", chr 209), - ("Ograve", chr 210), - ("Oacute", chr 211), - ("Ocirc", chr 212), - ("Otilde", chr 213), - ("Ouml", chr 214), - ("times", chr 215), - ("Oslash", chr 216), - ("Ugrave", chr 217), - ("Uacute", chr 218), - ("Ucirc", chr 219), - ("Uuml", chr 220), - ("Yacute", chr 221), - ("THORN", chr 222), - ("szlig", chr 223), - ("agrave", chr 224), - ("aacute", chr 225), - ("acirc", chr 226), - ("atilde", chr 227), - ("auml", chr 228), - ("aring", chr 229), - ("aelig", chr 230), - ("ccedil", chr 231), - ("egrave", chr 232), - ("eacute", chr 233), - ("ecirc", chr 234), - ("euml", chr 235), - ("igrave", chr 236), - ("iacute", chr 237), - ("icirc", chr 238), - ("iuml", chr 239), - ("eth", chr 240), - ("ntilde", chr 241), - ("ograve", chr 242), - ("oacute", chr 243), - ("ocirc", chr 244), - ("otilde", chr 245), - ("ouml", chr 246), - ("divide", chr 247), - ("oslash", chr 248), - ("ugrave", chr 249), - ("uacute", chr 250), - ("ucirc", chr 251), - ("uuml", chr 252), - ("yacute", chr 253), - ("thorn", chr 254), - ("yuml", chr 255), - ("OElig", chr 338), - ("oelig", chr 339), - ("Scaron", chr 352), - ("scaron", chr 353), - ("Yuml", chr 376), - ("fnof", chr 402), - ("circ", chr 710), - ("tilde", chr 732), - ("Alpha", chr 913), - ("Beta", chr 914), - ("Gamma", chr 915), - ("Delta", chr 916), - ("Epsilon", chr 917), - ("Zeta", chr 918), - ("Eta", chr 919), - ("Theta", chr 920), - ("Iota", chr 921), - ("Kappa", chr 922), - ("Lambda", chr 923), - ("Mu", chr 924), - ("Nu", chr 925), - ("Xi", chr 926), - ("Omicron", chr 927), - ("Pi", chr 928), - ("Rho", chr 929), - ("Sigma", chr 931), - ("Tau", chr 932), - ("Upsilon", chr 933), - ("Phi", chr 934), - ("Chi", chr 935), - ("Psi", chr 936), - ("Omega", chr 937), - ("alpha", chr 945), - ("beta", chr 946), - ("gamma", chr 947), - ("delta", chr 948), - ("epsilon", chr 949), - ("zeta", chr 950), - ("eta", chr 951), - ("theta", chr 952), - ("iota", chr 953), - ("kappa", chr 954), - ("lambda", chr 955), - ("mu", chr 956), - ("nu", chr 957), - ("xi", chr 958), - ("omicron", chr 959), - ("pi", chr 960), - ("rho", chr 961), - ("sigmaf", chr 962), - ("sigma", chr 963), - ("tau", chr 964), - ("upsilon", chr 965), - ("phi", chr 966), - ("chi", chr 967), - ("psi", chr 968), - ("omega", chr 969), - ("thetasym", chr 977), - ("upsih", chr 978), - ("piv", chr 982), - ("ensp", chr 8194), - ("emsp", chr 8195), - ("thinsp", chr 8201), - ("zwnj", chr 8204), - ("zwj", chr 8205), - ("lrm", chr 8206), - ("rlm", chr 8207), - ("ndash", chr 8211), - ("mdash", chr 8212), - ("lsquo", chr 8216), - ("rsquo", chr 8217), - ("sbquo", chr 8218), - ("ldquo", chr 8220), - ("rdquo", chr 8221), - ("bdquo", chr 8222), - ("dagger", chr 8224), - ("Dagger", chr 8225), - ("bull", chr 8226), - ("hellip", chr 8230), - ("permil", chr 8240), - ("prime", chr 8242), - ("Prime", chr 8243), - ("lsaquo", chr 8249), - ("rsaquo", chr 8250), - ("oline", chr 8254), - ("frasl", chr 8260), - ("euro", chr 8364), - ("image", chr 8465), - ("weierp", chr 8472), - ("real", chr 8476), - ("trade", chr 8482), - ("alefsym", chr 8501), - ("larr", chr 8592), - ("uarr", chr 8593), - ("rarr", chr 8594), - ("darr", chr 8595), - ("harr", chr 8596), - ("crarr", chr 8629), - ("lArr", chr 8656), - ("uArr", chr 8657), - ("rArr", chr 8658), - ("dArr", chr 8659), - ("hArr", chr 8660), - ("forall", chr 8704), - ("part", chr 8706), - ("exist", chr 8707), - ("empty", chr 8709), - ("nabla", chr 8711), - ("isin", chr 8712), - ("notin", chr 8713), - ("ni", chr 8715), - ("prod", chr 8719), - ("sum", chr 8721), - ("minus", chr 8722), - ("lowast", chr 8727), - ("radic", chr 8730), - ("prop", chr 8733), - ("infin", chr 8734), - ("ang", chr 8736), - ("and", chr 8743), - ("or", chr 8744), - ("cap", chr 8745), - ("cup", chr 8746), - ("int", chr 8747), - ("there4", chr 8756), - ("sim", chr 8764), - ("cong", chr 8773), - ("asymp", chr 8776), - ("ne", chr 8800), - ("equiv", chr 8801), - ("le", chr 8804), - ("ge", chr 8805), - ("sub", chr 8834), - ("sup", chr 8835), - ("nsub", chr 8836), - ("sube", chr 8838), - ("supe", chr 8839), - ("oplus", chr 8853), - ("otimes", chr 8855), - ("perp", chr 8869), - ("sdot", chr 8901), - ("lceil", chr 8968), - ("rceil", chr 8969), - ("lfloor", chr 8970), - ("rfloor", chr 8971), - ("lang", chr 9001), - ("rang", chr 9002), - ("loz", chr 9674), - ("spades", chr 9824), - ("clubs", chr 9827), - ("hearts", chr 9829), - ("diams", chr 9830) - ] diff --git a/Text/Pandoc/DefaultHeaders.hs b/Text/Pandoc/DefaultHeaders.hs deleted file mode 100644 index e9c1f17e5..000000000 --- a/Text/Pandoc/DefaultHeaders.hs +++ /dev/null @@ -1,69 +0,0 @@ -{-# LANGUAGE CPP, TemplateHaskell #-} -{- -Copyright (C) 2006-7 John MacFarlane - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA --} - -{- | - Module : Text.Pandoc.DefaultHeaders - Copyright : Copyright (C) 2006-7 John MacFarlane - License : GNU GPL, version 2 or above - - Maintainer : John MacFarlane - Stability : alpha - Portability : portable - -Default headers for Pandoc writers. --} -module Text.Pandoc.DefaultHeaders ( - defaultLaTeXHeader, - defaultConTeXtHeader, - defaultDocbookHeader, - defaultOpenDocumentHeader, - defaultS5Header, - defaultRTFHeader - ) where -import Text.Pandoc.Writers.S5 -import System.FilePath ( () ) -import Text.Pandoc.TH ( contentsOf ) - -defaultLaTeXHeader :: String -#ifndef __HADDOCK__ -defaultLaTeXHeader = $(contentsOf $ "data" "headers" "LaTeX.header") -#endif - -defaultConTeXtHeader :: String -#ifndef __HADDOCK__ -defaultConTeXtHeader = $(contentsOf $ "data" "headers" "ConTeXt.header") -#endif - -defaultDocbookHeader :: String -#ifndef __HADDOCK__ -defaultDocbookHeader = $(contentsOf $ "data" "headers" "Docbook.header") -#endif - -defaultOpenDocumentHeader :: String -#ifndef __HADDOCK__ -defaultOpenDocumentHeader = $(contentsOf $ "data" "headers" "OpenDocument.header") -#endif - -defaultS5Header :: String -defaultS5Header = s5Meta ++ s5CSS ++ s5Javascript - -defaultRTFHeader :: String -#ifndef __HADDOCK__ -defaultRTFHeader = $(contentsOf $ "data" "headers" "RTF.header") -#endif diff --git a/Text/Pandoc/Definition.hs b/Text/Pandoc/Definition.hs deleted file mode 100644 index 92ce094d4..000000000 --- a/Text/Pandoc/Definition.hs +++ /dev/null @@ -1,150 +0,0 @@ -{-# OPTIONS_GHC -fglasgow-exts #-} -- for deriving Typeable -{- -Copyright (C) 2006-7 John MacFarlane - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA --} - -{- | - Module : Text.Pandoc.Definition - Copyright : Copyright (C) 2006-7 John MacFarlane - License : GNU GPL, version 2 or above - - Maintainer : John MacFarlane - Stability : alpha - Portability : portable - -Definition of 'Pandoc' data structure for format-neutral representation -of documents. --} -module Text.Pandoc.Definition where - -import Data.Generics - -data Pandoc = Pandoc Meta [Block] deriving (Eq, Read, Show, Typeable, Data) - --- | Bibliographic information for the document: title (list of 'Inline'), --- authors (list of strings), date (string). -data Meta = Meta [Inline] -- title - [String] -- authors - String -- date - deriving (Eq, Show, Read, Typeable, Data) - --- | Alignment of a table column. -data Alignment = AlignLeft - | AlignRight - | AlignCenter - | AlignDefault deriving (Eq, Show, Read, Typeable, Data) - --- | List attributes. -type ListAttributes = (Int, ListNumberStyle, ListNumberDelim) - --- | Style of list numbers. -data ListNumberStyle = DefaultStyle - | Decimal - | LowerRoman - | UpperRoman - | LowerAlpha - | UpperAlpha deriving (Eq, Show, Read, Typeable, Data) - --- | Delimiter of list numbers. -data ListNumberDelim = DefaultDelim - | Period - | OneParen - | TwoParens deriving (Eq, Show, Read, Typeable, Data) - --- | Attributes: identifier, classes, key-value pairs -type Attr = (String, [String], [(String, String)]) - --- | Block element. -data Block - = Plain [Inline] -- ^ Plain text, not a paragraph - | Para [Inline] -- ^ Paragraph - | CodeBlock Attr String -- ^ Code block (literal) with attributes - | RawHtml String -- ^ Raw HTML block (literal) - | BlockQuote [Block] -- ^ Block quote (list of blocks) - | OrderedList ListAttributes [[Block]] -- ^ Ordered list (attributes - -- and a list of items, each a list of blocks) - | BulletList [[Block]] -- ^ Bullet list (list of items, each - -- a list of blocks) - | DefinitionList [([Inline],[Block])] -- ^ Definition list - -- (list of items, each a pair of an inline list, - -- the term, and a block list) - | Header Int [Inline] -- ^ Header - level (integer) and text (inlines) - | HorizontalRule -- ^ Horizontal rule - | Table [Inline] [Alignment] [Double] [[Block]] [[[Block]]] -- ^ Table, - -- with caption, column alignments, - -- relative column widths, column headers - -- (each a list of blocks), and rows - -- (each a list of lists of blocks) - | Null -- ^ Nothing - deriving (Eq, Read, Show, Typeable, Data) - --- | Type of quotation marks to use in Quoted inline. -data QuoteType = SingleQuote | DoubleQuote deriving (Show, Eq, Read, Typeable, Data) - --- | Link target (URL, title). -type Target = (String, String) - --- | Type of math element (display or inline). -data MathType = DisplayMath | InlineMath deriving (Show, Eq, Read, Typeable, Data) - --- | Inline elements. -data Inline - = Str String -- ^ Text (string) - | Emph [Inline] -- ^ Emphasized text (list of inlines) - | Strong [Inline] -- ^ Strongly emphasized text (list of inlines) - | Strikeout [Inline] -- ^ Strikeout text (list of inlines) - | Superscript [Inline] -- ^ Superscripted text (list of inlines) - | Subscript [Inline] -- ^ Subscripted text (list of inlines) - | SmallCaps [Inline] -- ^ Small caps text (list of inlines) - | Quoted QuoteType [Inline] -- ^ Quoted text (list of inlines) - | Cite [Target] [Inline] -- ^ Citation (list of inlines) - | Code String -- ^ Inline code (literal) - | Space -- ^ Inter-word space - | EmDash -- ^ Em dash - | EnDash -- ^ En dash - | Apostrophe -- ^ Apostrophe - | Ellipses -- ^ Ellipses - | LineBreak -- ^ Hard line break - | Math MathType String -- ^ TeX math (literal) - | TeX String -- ^ LaTeX code (literal) - | HtmlInline String -- ^ HTML code (literal) - | Link [Inline] Target -- ^ Hyperlink: text (list of inlines), target - | Image [Inline] Target -- ^ Image: alt text (list of inlines), target - -- and target - | Note [Block] -- ^ Footnote or endnote - deriving (Show, Eq, Read, Typeable, Data) - --- | Applies a transformation on @a@s to matching elements in a @b@. -processWith :: (Data a, Data b) => (a -> a) -> b -> b -processWith f = everywhere (mkT f) - --- | Like 'processWith', but with monadic transformations. -processWithM :: (Monad m, Data a, Data b) => (a -> m a) -> b -> m b -processWithM f = everywhereM (mkM f) - --- | Runs a query on matching @a@ elements in a @c@. -queryWith :: (Data a, Data c) => (a -> [b]) -> c -> [b] -queryWith f = everything (++) ([] `mkQ` f) - -{-# DEPRECATED processPandoc "Use processWith instead" #-} -processPandoc :: Data a => (a -> a) -> Pandoc -> Pandoc -processPandoc = processWith - -{-# DEPRECATED queryPandoc "Use queryWith instead" #-} -queryPandoc :: Data a => (a -> [b]) -> Pandoc -> [b] -queryPandoc = queryWith - diff --git a/Text/Pandoc/Highlighting.hs b/Text/Pandoc/Highlighting.hs deleted file mode 100644 index 6726f1a42..000000000 --- a/Text/Pandoc/Highlighting.hs +++ /dev/null @@ -1,64 +0,0 @@ -{-# LANGUAGE CPP #-} -{- -Copyright (C) 2008 John MacFarlane - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA --} - -{- | - Module : Text.Pandoc.Highlighting - Copyright : Copyright (C) 2008 John MacFarlane - License : GNU GPL, version 2 or above - - Maintainer : John MacFarlane - Stability : alpha - Portability : portable - -Exports functions for syntax highlighting. --} - -module Text.Pandoc.Highlighting ( languages, highlightHtml, defaultHighlightingCss ) where -import Text.XHtml -import Text.Pandoc.Definition -#ifdef _HIGHLIGHTING -import Text.Highlighting.Kate ( languages, highlightAs, formatAsXHtml, FormatOption (..), defaultHighlightingCss ) -import Data.List (find, lookup) -import Data.Maybe (fromMaybe) -import Data.Char (toLower) - -highlightHtml :: Attr -> String -> Either String Html -highlightHtml (_, classes, keyvals) rawCode = - let firstNum = read $ fromMaybe "1" $ lookup "startFrom" keyvals - fmtOpts = [OptNumberFrom firstNum] ++ - case find (`elem` ["number","numberLines","number-lines"]) classes of - Nothing -> [] - Just _ -> [OptNumberLines] - lcLanguages = map (map toLower) languages - in case find (\c -> (map toLower c) `elem` lcLanguages) classes of - Nothing -> Left "Unknown or unsupported language" - Just language -> case highlightAs language rawCode of - Left err -> Left err - Right hl -> Right $ formatAsXHtml fmtOpts language hl - -#else -defaultHighlightingCss :: String -defaultHighlightingCss = "" - -languages :: [String] -languages = [] - -highlightHtml :: Attr -> String -> Either String Html -highlightHtml _ _ = Left "Pandoc was not compiled with support for highlighting" -#endif diff --git a/Text/Pandoc/LaTeXMathML.hs b/Text/Pandoc/LaTeXMathML.hs deleted file mode 100644 index 1eb3c23cc..000000000 --- a/Text/Pandoc/LaTeXMathML.hs +++ /dev/null @@ -1,14 +0,0 @@ -{-# LANGUAGE CPP, TemplateHaskell #-} --- | Definitions for use of LaTeXMathML in HTML. --- (See ) -module Text.Pandoc.LaTeXMathML ( latexMathMLScript ) where -import Text.Pandoc.TH ( contentsOf ) -import System.FilePath ( () ) - --- | String containing LaTeXMathML javascript. -latexMathMLScript :: String -#ifndef __HADDOCK__ -latexMathMLScript = "\n" -#endif diff --git a/Text/Pandoc/ODT.hs b/Text/Pandoc/ODT.hs deleted file mode 100644 index f9e4dd8f1..000000000 --- a/Text/Pandoc/ODT.hs +++ /dev/null @@ -1,88 +0,0 @@ -{-# LANGUAGE TemplateHaskell #-} -{- -Copyright (C) 2008 John MacFarlane - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA --} - -{- | - Module : Text.Pandoc.ODT - Copyright : Copyright (C) 2006-7 John MacFarlane - License : GNU GPL, version 2 or above - - Maintainer : John MacFarlane - Stability : alpha - Portability : portable - -Functions for producing an ODT file from OpenDocument XML. --} -module Text.Pandoc.ODT ( saveOpenDocumentAsODT ) where -import Text.Pandoc.TH ( makeZip ) -import Data.List ( find ) -import System.FilePath ( (), takeFileName ) -import qualified Data.ByteString.Lazy as B -import Data.ByteString.Lazy.UTF8 ( fromString ) -import Prelude hiding ( writeFile, readFile ) -import Codec.Archive.Zip -import Control.Applicative ( (<$>) ) -import Text.ParserCombinators.Parsec -import System.Time - --- | Produce an ODT file from OpenDocument XML. -saveOpenDocumentAsODT :: FilePath -- ^ Pathname of ODT file to be produced. - -> FilePath -- ^ Relative directory of source file. - -> String -- ^ OpenDocument XML contents. - -> IO () -saveOpenDocumentAsODT destinationODTPath sourceDirRelative xml = do - let refArchive = read $(makeZip $ "data" "odt-styles") - -- handle pictures - let (newContents, pics) = - case runParser pPictures [] "OpenDocument XML contents" xml of - Left err -> error $ show err - Right x -> x - picEntries <- mapM (makePictureEntry sourceDirRelative) pics - (TOD epochTime _) <- getClockTime - let contentEntry = toEntry "content.xml" epochTime $ fromString newContents - let archive = foldr addEntryToArchive refArchive (contentEntry : picEntries) - B.writeFile destinationODTPath $ fromArchive archive - -makePictureEntry :: FilePath -- ^ Relative directory of source file - -> (FilePath, String) -- ^ Path and new path of picture - -> IO Entry -makePictureEntry sourceDirRelative (path, newPath) = do - entry <- readEntry [] $ sourceDirRelative path - return (entry { eRelativePath = newPath }) - -pPictures :: GenParser Char [(FilePath, String)] ([Char], [(FilePath, String)]) -pPictures = do - contents <- concat <$> many (pPicture <|> many1 (noneOf "<") <|> string "<") - pics <- getState - return (contents, pics) - -pPicture :: GenParser Char [(FilePath, String)] [Char] -pPicture = try $ do - string " o == path) pics of - Just (_, new) -> return new - Nothing -> do - -- get a unique name - let dups = length $ (filter (\(o, _) -> takeFileName o == filename)) pics - let new = "Pictures/" ++ replicate dups '0' ++ filename - updateState ((path, new) :) - return new - return $ " - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA --} - -{- | - Module : Text.Pandoc.Pluigns - Copyright : Copyright (C) 2008 John MacFarlane - License : GNU GPL, version 2 or above - - Maintainer : John MacFarlane - Stability : alpha - Portability : portable - -Support for plugins. --} - -module Text.Pandoc.Plugins (getPlugin) -where - -import Language.Haskell.Interpreter -import Text.Pandoc -import Control.Monad (unless, liftM) -import Control.Monad.Error (throwError) -import Data.List (isInfixOf) - --- | Returns the function named @transform@ in the specified --- module. The module may be identified either by module name --- or by path name. The @transform@ function should have type --- @a -> a@ or @a -> IO a@, where @a@ is an instance of 'Data': --- for example, @Pandoc -> Pandoc@, @Inline -> IO Inline@, --- @Block -> Block@, or @[Inline] -> IO [Inline]@. -getPlugin :: String -> IO (Pandoc -> IO Pandoc) -getPlugin modsrc = do - res <- runInterpreter (evaluatePlugin modsrc) - case res of - Right func -> return func - Left (WontCompile xs) -> error $ "WontCompile error for plugin '" ++ modsrc ++ "'\n" ++ unlines (map errMsg xs) - Left (NotAllowed x) -> error $ "NotAllowed error for plugin '" ++ modsrc ++ "'\n" ++ x - Left (UnknownError x) -> error $ "UnknownError for plugin '" ++ modsrc ++ "'\n" ++ x - Left (GhcException x) -> error $ "GhcException for plugin '" ++ modsrc ++ "'\n" ++ x - -evaluatePlugin :: String -> Interpreter (Pandoc -> IO Pandoc) -evaluatePlugin modsrc = do - set [installedModulesInScope := False] - loadModules [modsrc] - modnames <- getLoadedModules - setTopLevelModules modnames - setImports ["Prelude", "Text.Pandoc", "Text.Pandoc.Definition"] - exports <- liftM concat $ mapM getModuleExports modnames - unless ((Fun "transform") `elem` exports) $ - throwError $ UnknownError $ "The plugin module must define a function 'transform'." - transformType <- typeOf "transform" - if "-> IO" `isInfixOf` transformType - then interpret "processWithM transform" (as :: Pandoc -> IO Pandoc) - else interpret "return . (processWith transform)" (as :: Pandoc -> IO Pandoc) diff --git a/Text/Pandoc/Readers/HTML.hs b/Text/Pandoc/Readers/HTML.hs deleted file mode 100644 index 65e512b5e..000000000 --- a/Text/Pandoc/Readers/HTML.hs +++ /dev/null @@ -1,675 +0,0 @@ -{- -Copyright (C) 2006-8 John MacFarlane - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA --} - -{- | - Module : Text.Pandoc.Readers.HTML - Copyright : Copyright (C) 2006-8 John MacFarlane - License : GNU GPL, version 2 or above - - Maintainer : John MacFarlane - Stability : alpha - Portability : portable - -Conversion of HTML to 'Pandoc' document. --} -module Text.Pandoc.Readers.HTML ( - readHtml, - rawHtmlInline, - rawHtmlBlock, - anyHtmlBlockTag, - anyHtmlInlineTag, - anyHtmlTag, - anyHtmlEndTag, - htmlEndTag, - extractTagType, - htmlBlockElement, - unsanitaryURI - ) where - -import Text.ParserCombinators.Parsec -import Text.Pandoc.Definition -import Text.Pandoc.Shared -import Text.Pandoc.CharacterReferences ( decodeCharacterReferences ) -import Data.Maybe ( fromMaybe ) -import Data.List ( takeWhile, dropWhile, isPrefixOf, isSuffixOf, intercalate ) -import Data.Char ( toLower, isAlphaNum ) -import Network.URI ( parseURIReference, URI (..) ) - --- | Convert HTML-formatted string to 'Pandoc' document. -readHtml :: ParserState -- ^ Parser state - -> String -- ^ String to parse - -> Pandoc -readHtml = readWith parseHtml - --- --- Constants --- - -eitherBlockOrInline :: [[Char]] -eitherBlockOrInline = ["applet", "button", "del", "iframe", "ins", - "map", "area", "object"] - -{- -inlineHtmlTags :: [[Char]] -inlineHtmlTags = ["a", "abbr", "acronym", "b", "basefont", "bdo", "big", - "br", "cite", "code", "dfn", "em", "font", "i", "img", - "input", "kbd", "label", "q", "s", "samp", "select", - "small", "span", "strike", "strong", "sub", "sup", - "textarea", "tt", "u", "var"] ++ eitherBlockOrInline --} - -blockHtmlTags :: [[Char]] -blockHtmlTags = ["address", "blockquote", "body", "center", "dir", "div", - "dl", "fieldset", "form", "h1", "h2", "h3", "h4", - "h5", "h6", "hr", "html", "isindex", "menu", "noframes", - "noscript", "ol", "p", "pre", "table", "ul", "dd", - "dt", "frameset", "li", "tbody", "td", "tfoot", - "th", "thead", "tr", "script"] ++ eitherBlockOrInline - -sanitaryTags :: [[Char]] -sanitaryTags = ["a", "abbr", "acronym", "address", "area", "b", "big", - "blockquote", "br", "button", "caption", "center", - "cite", "code", "col", "colgroup", "dd", "del", "dfn", - "dir", "div", "dl", "dt", "em", "fieldset", "font", - "form", "h1", "h2", "h3", "h4", "h5", "h6", "hr", - "i", "img", "input", "ins", "kbd", "label", "legend", - "li", "map", "menu", "ol", "optgroup", "option", "p", - "pre", "q", "s", "samp", "select", "small", "span", - "strike", "strong", "sub", "sup", "table", "tbody", - "td", "textarea", "tfoot", "th", "thead", "tr", "tt", - "u", "ul", "var"] - -sanitaryAttributes :: [[Char]] -sanitaryAttributes = ["abbr", "accept", "accept-charset", - "accesskey", "action", "align", "alt", "axis", - "border", "cellpadding", "cellspacing", "char", - "charoff", "charset", "checked", "cite", "class", - "clear", "cols", "colspan", "color", "compact", - "coords", "datetime", "dir", "disabled", - "enctype", "for", "frame", "headers", "height", - "href", "hreflang", "hspace", "id", "ismap", - "label", "lang", "longdesc", "maxlength", "media", - "method", "multiple", "name", "nohref", "noshade", - "nowrap", "prompt", "readonly", "rel", "rev", - "rows", "rowspan", "rules", "scope", "selected", - "shape", "size", "span", "src", "start", - "summary", "tabindex", "target", "title", "type", - "usemap", "valign", "value", "vspace", "width"] - --- --- HTML utility functions --- - --- | Returns @True@ if sanitization is specified and the specified tag is --- not on the sanitized tag list. -unsanitaryTag :: [Char] - -> GenParser tok ParserState Bool -unsanitaryTag tag = do - st <- getState - return $ stateSanitizeHTML st && tag `notElem` sanitaryTags - --- | returns @True@ if sanitization is specified and the specified attribute --- is not on the sanitized attribute list. -unsanitaryAttribute :: ([Char], String, t) - -> GenParser tok ParserState Bool -unsanitaryAttribute (attr, val, _) = do - st <- getState - return $ stateSanitizeHTML st && - (attr `notElem` sanitaryAttributes || - (attr `elem` ["href","src"] && unsanitaryURI val)) - --- | Returns @True@ if the specified URI is potentially a security risk. -unsanitaryURI :: String -> Bool -unsanitaryURI u = - let safeURISchemes = [ "", "http:", "https:", "ftp:", "mailto:", "file:", - "telnet:", "gopher:", "aaa:", "aaas:", "acap:", "cap:", "cid:", - "crid:", "dav:", "dict:", "dns:", "fax:", "go:", "h323:", "im:", - "imap:", "ldap:", "mid:", "news:", "nfs:", "nntp:", "pop:", - "pres:", "sip:", "sips:", "snmp:", "tel:", "urn:", "wais:", - "xmpp:", "z39.50r:", "z39.50s:", "aim:", "callto:", "cvs:", - "ed2k:", "feed:", "fish:", "gg:", "irc:", "ircs:", "lastfm:", - "ldaps:", "magnet:", "mms:", "msnim:", "notes:", "rsync:", - "secondlife:", "skype:", "ssh:", "sftp:", "smb:", "sms:", - "snews:", "webcal:", "ymsgr:"] - in case parseURIReference u of - Just p -> (map toLower $ uriScheme p) `notElem` safeURISchemes - Nothing -> True - --- | Read blocks until end tag. -blocksTilEnd :: String -> GenParser Char ParserState [Block] -blocksTilEnd tag = do - blocks <- manyTill (block >>~ spaces) (htmlEndTag tag) - return $ filter (/= Null) blocks - --- | Read inlines until end tag. -inlinesTilEnd :: String -> GenParser Char ParserState [Inline] -inlinesTilEnd tag = manyTill inline (htmlEndTag tag) - --- | Parse blocks between open and close tag. -blocksIn :: String -> GenParser Char ParserState [Block] -blocksIn tag = try $ htmlTag tag >> spaces >> blocksTilEnd tag - --- | Parse inlines between open and close tag. -inlinesIn :: String -> GenParser Char ParserState [Inline] -inlinesIn tag = try $ htmlTag tag >> spaces >> inlinesTilEnd tag - --- | Extract type from a tag: e.g. @br@ from @\@ -extractTagType :: String -> String -extractTagType ('<':rest) = - let isSpaceOrSlash c = c `elem` "/ \n\t" in - map toLower $ takeWhile isAlphaNum $ dropWhile isSpaceOrSlash rest -extractTagType _ = "" - --- | Parse any HTML tag (opening or self-closing) and return text of tag -anyHtmlTag :: GenParser Char ParserState [Char] -anyHtmlTag = try $ do - char '<' - spaces - tag <- many1 alphaNum - attribs <- many htmlAttribute - spaces - ender <- option "" (string "/") - let ender' = if null ender then "" else " /" - spaces - char '>' - let result = "<" ++ tag ++ - concatMap (\(_, _, raw) -> (' ':raw)) attribs ++ ender' ++ ">" - unsanitary <- unsanitaryTag tag - if unsanitary - then return $ "" - else return result - -anyHtmlEndTag :: GenParser Char ParserState [Char] -anyHtmlEndTag = try $ do - char '<' - spaces - char '/' - spaces - tag <- many1 alphaNum - spaces - char '>' - let result = "" - unsanitary <- unsanitaryTag tag - if unsanitary - then return $ "" - else return result - -htmlTag :: String -> GenParser Char ParserState (String, [(String, String)]) -htmlTag tag = try $ do - char '<' - spaces - stringAnyCase tag - attribs <- many htmlAttribute - spaces - optional (string "/") - spaces - char '>' - return (tag, (map (\(name, content, _) -> (name, content)) attribs)) - --- parses a quoted html attribute value -quoted :: Char -> GenParser Char st (String, String) -quoted quoteChar = do - result <- between (char quoteChar) (char quoteChar) - (many (noneOf [quoteChar])) - return (result, [quoteChar]) - -nullAttribute :: ([Char], [Char], [Char]) -nullAttribute = ("", "", "") - -htmlAttribute :: GenParser Char ParserState ([Char], [Char], [Char]) -htmlAttribute = do - attr <- htmlRegularAttribute <|> htmlMinimizedAttribute - unsanitary <- unsanitaryAttribute attr - if unsanitary - then return nullAttribute - else return attr - --- minimized boolean attribute -htmlMinimizedAttribute :: GenParser Char st ([Char], [Char], [Char]) -htmlMinimizedAttribute = try $ do - many1 space - name <- many1 (choice [letter, oneOf ".-_:"]) - return (name, name, name) - -htmlRegularAttribute :: GenParser Char st ([Char], [Char], [Char]) -htmlRegularAttribute = try $ do - many1 space - name <- many1 (choice [letter, oneOf ".-_:"]) - spaces - char '=' - spaces - (content, quoteStr) <- choice [ (quoted '\''), - (quoted '"'), - (do - a <- many (alphaNum <|> (oneOf "-._:")) - return (a,"")) ] - return (name, content, - (name ++ "=" ++ quoteStr ++ content ++ quoteStr)) - --- | Parse an end tag of type 'tag' -htmlEndTag :: [Char] -> GenParser Char st [Char] -htmlEndTag tag = try $ do - char '<' - spaces - char '/' - spaces - stringAnyCase tag - spaces - char '>' - return $ "" - -{- --- | Returns @True@ if the tag is (or can be) an inline tag. -isInline :: String -> Bool -isInline tag = (extractTagType tag) `elem` inlineHtmlTags --} - --- | Returns @True@ if the tag is (or can be) a block tag. -isBlock :: String -> Bool -isBlock tag = (extractTagType tag) `elem` blockHtmlTags - -anyHtmlBlockTag :: GenParser Char ParserState [Char] -anyHtmlBlockTag = try $ do - tag <- anyHtmlTag <|> anyHtmlEndTag - if isBlock tag then return tag else fail "not a block tag" - -anyHtmlInlineTag :: GenParser Char ParserState [Char] -anyHtmlInlineTag = try $ do - tag <- anyHtmlTag <|> anyHtmlEndTag - if not (isBlock tag) then return tag else fail "not an inline tag" - --- | Parses material between script tags. --- Scripts must be treated differently, because they can contain '<>' etc. -htmlScript :: GenParser Char ParserState [Char] -htmlScript = try $ do - open <- string "" - else return $ open ++ rest ++ "" - --- | Parses material between style tags. --- Style tags must be treated differently, because they can contain CSS -htmlStyle :: GenParser Char ParserState [Char] -htmlStyle = try $ do - open <- string "" - else return $ open ++ rest ++ "" - -htmlBlockElement :: GenParser Char ParserState [Char] -htmlBlockElement = choice [ htmlScript, htmlStyle, htmlComment, xmlDec, definition ] - -rawHtmlBlock :: GenParser Char ParserState Block -rawHtmlBlock = try $ do - body <- htmlBlockElement <|> rawVerbatimBlock <|> anyHtmlBlockTag - state <- getState - if stateParseRaw state then return (RawHtml body) else return Null - --- This is a block whose contents should be passed through verbatim, not interpreted. -rawVerbatimBlock :: GenParser Char ParserState [Char] -rawVerbatimBlock = try $ do - start <- anyHtmlBlockTag - let tagtype = extractTagType start - if tagtype `elem` ["pre"] - then do - contents <- many (notFollowedBy' (htmlEndTag tagtype) >> anyChar) - end <- htmlEndTag tagtype - return $ start ++ contents ++ end - else fail "Not a verbatim block" - --- We don't want to parse or as raw HTML, since these --- are handled in parseHtml. -rawHtmlBlock' :: GenParser Char ParserState Block -rawHtmlBlock' = do notFollowedBy' (htmlTag "/body" <|> htmlTag "/html") - rawHtmlBlock - --- | Parses an HTML comment. -htmlComment :: GenParser Char st [Char] -htmlComment = try $ do - string "")) - return $ "" - --- --- parsing documents --- - -xmlDec :: GenParser Char st [Char] -xmlDec = try $ do - string "') - return $ "" - -definition :: GenParser Char st [Char] -definition = try $ do - string "') - return $ "" - -nonTitleNonHead :: GenParser Char ParserState Char -nonTitleNonHead = try $ do - notFollowedBy $ (htmlTag "title" >> return ' ') <|> - (htmlEndTag "head" >> return ' ') - (rawHtmlBlock >> return ' ') <|> anyChar - -parseTitle :: GenParser Char ParserState [Inline] -parseTitle = try $ do - (tag, _) <- htmlTag "title" - contents <- inlinesTilEnd tag - spaces - return contents - --- parse header and return meta-information (for now, just title) -parseHead :: GenParser Char ParserState ([Inline], [a], [Char]) -parseHead = try $ do - htmlTag "head" - spaces - skipMany nonTitleNonHead - contents <- option [] parseTitle - skipMany nonTitleNonHead - htmlEndTag "head" - return (contents, [], "") - -skipHtmlTag :: String -> GenParser Char ParserState () -skipHtmlTag tag = optional (htmlTag tag) - --- h1 class="title" representation of title in body -bodyTitle :: GenParser Char ParserState [Inline] -bodyTitle = try $ do - (_, attribs) <- htmlTag "h1" - case (extractAttribute "class" attribs) of - Just "title" -> return "" - _ -> fail "not title" - inlinesTilEnd "h1" - -parseHtml :: GenParser Char ParserState Pandoc -parseHtml = do - sepEndBy (choice [xmlDec, definition, htmlComment]) spaces - skipHtmlTag "html" - spaces - (title, authors, date) <- option ([], [], "") parseHead - spaces - skipHtmlTag "body" - spaces - optional bodyTitle -- skip title in body, because it's represented in meta - blocks <- parseBlocks - spaces - optional (htmlEndTag "body") - spaces - optional (htmlEndTag "html" >> many anyChar) -- ignore anything after - eof - return $ Pandoc (Meta title authors date) blocks - --- --- parsing blocks --- - -parseBlocks :: GenParser Char ParserState [Block] -parseBlocks = spaces >> sepEndBy block spaces >>= (return . filter (/= Null)) - -block :: GenParser Char ParserState Block -block = choice [ codeBlock - , header - , hrule - , list - , blockQuote - , para - , plain - , rawHtmlBlock' - ] "block" - --- --- header blocks --- - -header :: GenParser Char ParserState Block -header = choice (map headerLevel (enumFromTo 1 5)) "header" - -headerLevel :: Int -> GenParser Char ParserState Block -headerLevel n = try $ do - let level = "h" ++ show n - htmlTag level - contents <- inlinesTilEnd level - return $ Header n (normalizeSpaces contents) - --- --- hrule block --- - -hrule :: GenParser Char ParserState Block -hrule = try $ do - (_, attribs) <- htmlTag "hr" - state <- getState - if not (null attribs) && stateParseRaw state - then unexpected "attributes in hr" -- parse as raw in this case - else return HorizontalRule - --- --- code blocks --- - --- Note: HTML tags in code blocks (e.g. for syntax highlighting) are --- skipped, because they are not portable to output formats other than HTML. -codeBlock :: GenParser Char ParserState Block -codeBlock = try $ do - htmlTag "pre" - result <- manyTill - (many1 (satisfy (/= '<')) <|> - ((anyHtmlTag <|> anyHtmlEndTag) >> return "")) - (htmlEndTag "pre") - let result' = concat result - -- drop leading newline if any - let result'' = if "\n" `isPrefixOf` result' - then drop 1 result' - else result' - -- drop trailing newline if any - let result''' = if "\n" `isSuffixOf` result'' - then init result'' - else result'' - return $ CodeBlock ("",[],[]) $ decodeCharacterReferences result''' - --- --- block quotes --- - -blockQuote :: GenParser Char ParserState Block -blockQuote = try $ htmlTag "blockquote" >> spaces >> - blocksTilEnd "blockquote" >>= (return . BlockQuote) - --- --- list blocks --- - -list :: GenParser Char ParserState Block -list = choice [ bulletList, orderedList, definitionList ] "list" - -orderedList :: GenParser Char ParserState Block -orderedList = try $ do - (_, attribs) <- htmlTag "ol" - (start, style) <- option (1, DefaultStyle) $ - do failIfStrict - let sta = fromMaybe "1" $ - lookup "start" attribs - let sty = fromMaybe (fromMaybe "" $ - lookup "style" attribs) $ - lookup "class" attribs - let sty' = case sty of - "lower-roman" -> LowerRoman - "upper-roman" -> UpperRoman - "lower-alpha" -> LowerAlpha - "upper-alpha" -> UpperAlpha - "decimal" -> Decimal - _ -> DefaultStyle - return (read sta, sty') - spaces - items <- sepEndBy1 (blocksIn "li") spaces - htmlEndTag "ol" - return $ OrderedList (start, style, DefaultDelim) items - -bulletList :: GenParser Char ParserState Block -bulletList = try $ do - htmlTag "ul" - spaces - items <- sepEndBy1 (blocksIn "li") spaces - htmlEndTag "ul" - return $ BulletList items - -definitionList :: GenParser Char ParserState Block -definitionList = try $ do - failIfStrict -- def lists not part of standard markdown - htmlTag "dl" - spaces - items <- sepEndBy1 definitionListItem spaces - htmlEndTag "dl" - return $ DefinitionList items - -definitionListItem :: GenParser Char ParserState ([Inline], [Block]) -definitionListItem = try $ do - terms <- sepEndBy1 (inlinesIn "dt") spaces - defs <- sepEndBy1 (blocksIn "dd") spaces - let term = intercalate [LineBreak] terms - return (term, concat defs) - --- --- paragraph block --- - -para :: GenParser Char ParserState Block -para = try $ htmlTag "p" >> inlinesTilEnd "p" >>= - return . Para . normalizeSpaces - --- --- plain block --- - -plain :: GenParser Char ParserState Block -plain = many1 inline >>= return . Plain . normalizeSpaces - --- --- inline --- - -inline :: GenParser Char ParserState Inline -inline = choice [ charRef - , strong - , emph - , superscript - , subscript - , strikeout - , spanStrikeout - , code - , str - , linebreak - , whitespace - , link - , image - , rawHtmlInline - ] "inline" - -code :: GenParser Char ParserState Inline -code = try $ do - htmlTag "code" - result <- manyTill anyChar (htmlEndTag "code") - -- remove internal line breaks, leading and trailing space, - -- and decode character references - return $ Code $ decodeCharacterReferences $ removeLeadingTrailingSpace $ - intercalate " " $ lines result - -rawHtmlInline :: GenParser Char ParserState Inline -rawHtmlInline = do - result <- htmlScript <|> htmlStyle <|> htmlComment <|> anyHtmlInlineTag - state <- getState - if stateParseRaw state then return (HtmlInline result) else return (Str "") - -betweenTags :: [Char] -> GenParser Char ParserState [Inline] -betweenTags tag = try $ htmlTag tag >> inlinesTilEnd tag >>= - return . normalizeSpaces - -emph :: GenParser Char ParserState Inline -emph = (betweenTags "em" <|> betweenTags "i") >>= return . Emph - -strong :: GenParser Char ParserState Inline -strong = (betweenTags "b" <|> betweenTags "strong") >>= return . Strong - -superscript :: GenParser Char ParserState Inline -superscript = failIfStrict >> betweenTags "sup" >>= return . Superscript - -subscript :: GenParser Char ParserState Inline -subscript = failIfStrict >> betweenTags "sub" >>= return . Subscript - -strikeout :: GenParser Char ParserState Inline -strikeout = failIfStrict >> (betweenTags "s" <|> betweenTags "strike") >>= - return . Strikeout - -spanStrikeout :: GenParser Char ParserState Inline -spanStrikeout = try $ do - failIfStrict -- strict markdown has no strikeout, so treat as raw HTML - (_, attributes) <- htmlTag "span" - result <- case (extractAttribute "class" attributes) of - Just "strikeout" -> inlinesTilEnd "span" - _ -> fail "not a strikeout" - return $ Strikeout result - -whitespace :: GenParser Char st Inline -whitespace = many1 space >> return Space - --- hard line break -linebreak :: GenParser Char ParserState Inline -linebreak = htmlTag "br" >> optional newline >> return LineBreak - -str :: GenParser Char st Inline -str = many1 (noneOf "<& \t\n") >>= return . Str - --- --- links and images --- - --- extract contents of attribute (attribute names are case-insensitive) -extractAttribute :: [Char] -> [([Char], String)] -> Maybe String -extractAttribute _ [] = Nothing -extractAttribute name ((attrName, contents):rest) = - let name' = map toLower name - attrName' = map toLower attrName - in if attrName' == name' - then Just (decodeCharacterReferences contents) - else extractAttribute name rest - -link :: GenParser Char ParserState Inline -link = try $ do - (_, attributes) <- htmlTag "a" - url <- case (extractAttribute "href" attributes) of - Just url -> return url - Nothing -> fail "no href" - let title = fromMaybe "" $ extractAttribute "title" attributes - lab <- inlinesTilEnd "a" - return $ Link (normalizeSpaces lab) (url, title) - -image :: GenParser Char ParserState Inline -image = try $ do - (_, attributes) <- htmlTag "img" - url <- case (extractAttribute "src" attributes) of - Just url -> return url - Nothing -> fail "no src" - let title = fromMaybe "" $ extractAttribute "title" attributes - let alt = fromMaybe "" (extractAttribute "alt" attributes) - return $ Image [Str alt] (url, title) - diff --git a/Text/Pandoc/Readers/LaTeX.hs b/Text/Pandoc/Readers/LaTeX.hs deleted file mode 100644 index 9ba5bf372..000000000 --- a/Text/Pandoc/Readers/LaTeX.hs +++ /dev/null @@ -1,774 +0,0 @@ -{- -Copyright (C) 2006-8 John MacFarlane - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA --} - -{- | - Module : Text.Pandoc.Readers.LaTeX - Copyright : Copyright (C) 2006-8 John MacFarlane - License : GNU GPL, version 2 or above - - Maintainer : John MacFarlane - Stability : alpha - Portability : portable - -Conversion of LaTeX to 'Pandoc' document. --} -module Text.Pandoc.Readers.LaTeX ( - readLaTeX, - rawLaTeXInline, - rawLaTeXEnvironment' - ) where - -import Text.ParserCombinators.Parsec -import Text.Pandoc.Definition -import Text.Pandoc.Shared -import Data.Maybe ( fromMaybe ) -import Data.Char ( chr ) -import Data.List ( isPrefixOf, isSuffixOf ) - --- | Parse LaTeX from string and return 'Pandoc' document. -readLaTeX :: ParserState -- ^ Parser state, including options for parser - -> String -- ^ String to parse - -> Pandoc -readLaTeX = readWith parseLaTeX - --- characters with special meaning -specialChars :: [Char] -specialChars = "\\`$%^&_~#{}\n \t|<>'\"-" - --- --- utility functions --- - --- | Returns text between brackets and its matching pair. -bracketedText :: Char -> Char -> GenParser Char st [Char] -bracketedText openB closeB = do - result <- charsInBalanced' openB closeB - return $ [openB] ++ result ++ [closeB] - --- | Returns an option or argument of a LaTeX command. -optOrArg :: GenParser Char st [Char] -optOrArg = bracketedText '{' '}' <|> bracketedText '[' ']' - --- | True if the string begins with '{'. -isArg :: [Char] -> Bool -isArg ('{':_) = True -isArg _ = False - --- | Returns list of options and arguments of a LaTeX command. -commandArgs :: GenParser Char st [[Char]] -commandArgs = many optOrArg - --- | Parses LaTeX command, returns (name, star, list of options or arguments). -command :: GenParser Char st ([Char], [Char], [[Char]]) -command = do - char '\\' - name <- many1 letter - star <- option "" (string "*") -- some commands have starred versions - args <- commandArgs - return (name, star, args) - -begin :: [Char] -> GenParser Char st [Char] -begin name = try $ do - string $ "\\begin{" ++ name ++ "}" - optional commandArgs - spaces - return name - -end :: [Char] -> GenParser Char st [Char] -end name = try $ do - string $ "\\end{" ++ name ++ "}" - return name - --- | Returns a list of block elements containing the contents of an --- environment. -environment :: [Char] -> GenParser Char ParserState [Block] -environment name = try $ begin name >> spaces >> manyTill block (end name) >>~ spaces - -anyEnvironment :: GenParser Char ParserState Block -anyEnvironment = try $ do - string "\\begin{" - name <- many letter - star <- option "" (string "*") -- some environments have starred variants - char '}' - optional commandArgs - spaces - contents <- manyTill block (end (name ++ star)) - spaces - return $ BlockQuote contents - --- --- parsing documents --- - --- | Process LaTeX preamble, extracting metadata. -processLaTeXPreamble :: GenParser Char ParserState () -processLaTeXPreamble = try $ manyTill - (choice [bibliographic, comment, unknownCommand, nullBlock]) - (try (string "\\begin{document}")) >> - spaces - --- | Parse LaTeX and return 'Pandoc'. -parseLaTeX :: GenParser Char ParserState Pandoc -parseLaTeX = do - optional processLaTeXPreamble -- preamble might not be present (fragment) - spaces - blocks <- parseBlocks - spaces - optional $ try (string "\\end{document}" >> many anyChar) - -- might not be present (fragment) - spaces - eof - state <- getState - let blocks' = filter (/= Null) blocks - let title' = stateTitle state - let authors' = stateAuthors state - let date' = stateDate state - return $ Pandoc (Meta title' authors' date') blocks' - --- --- parsing blocks --- - -parseBlocks :: GenParser Char ParserState [Block] -parseBlocks = spaces >> many block - -block :: GenParser Char ParserState Block -block = choice [ hrule - , codeBlock - , header - , list - , blockQuote - , comment - , bibliographic - , para - , itemBlock - , unknownEnvironment - , ignore - , unknownCommand ] "block" - --- --- header blocks --- - -header :: GenParser Char ParserState Block -header = try $ do - char '\\' - subs <- many (try (string "sub")) - string "section" - optional (char '*') - char '{' - title' <- manyTill inline (char '}') - spaces - return $ Header (length subs + 1) (normalizeSpaces title') - --- --- hrule block --- - -hrule :: GenParser Char st Block -hrule = oneOfStrings [ "\\begin{center}\\rule{3in}{0.4pt}\\end{center}\n\n", - "\\newpage" ] >> spaces >> return HorizontalRule - --- --- code blocks --- - -codeBlock :: GenParser Char ParserState Block -codeBlock = codeBlockWith "verbatim" <|> codeBlockWith "Verbatim" <|> lhsCodeBlock --- Note: Verbatim is from fancyvrb. - -codeBlockWith :: String -> GenParser Char st Block -codeBlockWith env = try $ do - string ("\\begin{" ++ env ++ "}") -- don't use begin function because it - -- gobbles whitespace - optional blanklines -- we want to gobble blank lines, but not - -- leading space - contents <- manyTill anyChar (try (string $ "\\end{" ++ env ++ "}")) - spaces - let classes = if env == "code" then ["haskell"] else [] - return $ CodeBlock ("",classes,[]) (stripTrailingNewlines contents) - -lhsCodeBlock :: GenParser Char ParserState Block -lhsCodeBlock = do - failUnlessLHS - (CodeBlock (_,_,_) cont) <- codeBlockWith "code" - return $ CodeBlock ("", ["sourceCode","haskell"], []) cont - --- --- block quotes --- - -blockQuote :: GenParser Char ParserState Block -blockQuote = (environment "quote" <|> environment "quotation") >>~ spaces >>= - return . BlockQuote - --- --- list blocks --- - -list :: GenParser Char ParserState Block -list = bulletList <|> orderedList <|> definitionList "list" - -listItem :: GenParser Char ParserState ([Inline], [Block]) -listItem = try $ do - ("item", _, args) <- command - spaces - state <- getState - let oldParserContext = stateParserContext state - updateState (\s -> s {stateParserContext = ListItemState}) - blocks <- many block - updateState (\s -> s {stateParserContext = oldParserContext}) - opt <- case args of - ([x]) | "[" `isPrefixOf` x && "]" `isSuffixOf` x -> - parseFromString (many inline) $ tail $ init x - _ -> return [] - return (opt, blocks) - -orderedList :: GenParser Char ParserState Block -orderedList = try $ do - string "\\begin{enumerate}" - (_, style, delim) <- option (1, DefaultStyle, DefaultDelim) $ - try $ do failIfStrict - char '[' - res <- anyOrderedListMarker - char ']' - return res - spaces - option "" $ try $ do string "\\setlength{\\itemindent}" - char '{' - manyTill anyChar (char '}') - spaces - start <- option 1 $ try $ do failIfStrict - string "\\setcounter{enum" - many1 (oneOf "iv") - string "}{" - num <- many1 digit - char '}' - spaces - return $ (read num) + 1 - items <- many listItem - end "enumerate" - spaces - return $ OrderedList (start, style, delim) $ map snd items - -bulletList :: GenParser Char ParserState Block -bulletList = try $ do - begin "itemize" - spaces - items <- many listItem - end "itemize" - spaces - return (BulletList $ map snd items) - -definitionList :: GenParser Char ParserState Block -definitionList = try $ do - begin "description" - spaces - items <- many listItem - end "description" - spaces - return (DefinitionList items) - --- --- paragraph block --- - -para :: GenParser Char ParserState Block -para = do - res <- many1 inline - spaces - return $ if null (filter (`notElem` [Str "", Space]) res) - then Null - else Para $ normalizeSpaces res - --- --- title authors date --- - -bibliographic :: GenParser Char ParserState Block -bibliographic = choice [ maketitle, title, authors, date ] - -maketitle :: GenParser Char st Block -maketitle = try (string "\\maketitle") >> spaces >> return Null - -title :: GenParser Char ParserState Block -title = try $ do - string "\\title{" - tit <- manyTill inline (char '}') - spaces - updateState (\state -> state { stateTitle = tit }) - return Null - -authors :: GenParser Char ParserState Block -authors = try $ do - string "\\author{" - authors' <- manyTill anyChar (char '}') - spaces - let authors'' = map removeLeadingTrailingSpace $ lines $ - substitute "\\\\" "\n" authors' - updateState (\s -> s { stateAuthors = authors'' }) - return Null - -date :: GenParser Char ParserState Block -date = try $ do - string "\\date{" - date' <- manyTill anyChar (char '}') - spaces - updateState (\state -> state { stateDate = date' }) - return Null - --- --- item block --- for use in unknown environments that aren't being parsed as raw latex --- - --- this forces items to be parsed in different blocks -itemBlock :: GenParser Char ParserState Block -itemBlock = try $ do - ("item", _, args) <- command - state <- getState - if stateParserContext state == ListItemState - then fail "item should be handled by list block" - else if null args - then return Null - else return $ Plain [Str (stripFirstAndLast (head args))] - --- --- raw LaTeX --- - --- | Parse any LaTeX environment and return a Para block containing --- the whole literal environment as raw TeX. -rawLaTeXEnvironment :: GenParser Char st Block -rawLaTeXEnvironment = do - contents <- rawLaTeXEnvironment' - spaces - return $ Para [TeX contents] - --- | Parse any LaTeX environment and return a string containing --- the whole literal environment as raw TeX. -rawLaTeXEnvironment' :: GenParser Char st String -rawLaTeXEnvironment' = try $ do - string "\\begin{" - name <- many1 letter - star <- option "" (string "*") -- for starred variants - let name' = name ++ star - char '}' - args <- option [] commandArgs - let argStr = concat args - contents <- manyTill (choice [ (many1 (noneOf "\\")), - rawLaTeXEnvironment', - string "\\" ]) - (end name') - return $ "\\begin{" ++ name' ++ "}" ++ argStr ++ - concat contents ++ "\\end{" ++ name' ++ "}" - -unknownEnvironment :: GenParser Char ParserState Block -unknownEnvironment = try $ do - state <- getState - result <- if stateParseRaw state -- check whether we should include raw TeX - then rawLaTeXEnvironment -- if so, get whole raw environment - else anyEnvironment -- otherwise just the contents - return result - --- \ignore{} is used conventionally in literate haskell for definitions --- that are to be processed by the compiler but not printed. -ignore :: GenParser Char ParserState Block -ignore = try $ do - ("ignore", _, _) <- command - spaces - return Null - -unknownCommand :: GenParser Char ParserState Block -unknownCommand = try $ do - notFollowedBy' $ choice $ map end ["itemize", "enumerate", "description", - "document"] - state <- getState - if stateParserContext state == ListItemState - then notFollowedBy' $ string "\\item" - else return () - if stateParseRaw state - then do - (name, star, args) <- command - spaces - return $ Plain [TeX ("\\" ++ name ++ star ++ concat args)] - else do -- skip unknown command, leaving arguments to be parsed - char '\\' - letter - many (letter <|> digit) - optional (try $ string "{}") - spaces - return Null - --- latex comment -comment :: GenParser Char st Block -comment = try $ char '%' >> manyTill anyChar newline >> spaces >> return Null - --- --- inline --- - -inline :: GenParser Char ParserState Inline -inline = choice [ str - , endline - , whitespace - , quoted - , apostrophe - , spacer - , strong - , math - , ellipses - , emDash - , enDash - , hyphen - , emph - , strikeout - , superscript - , subscript - , ref - , lab - , code - , url - , link - , image - , footnote - , linebreak - , accentedChar - , specialChar - , rawLaTeXInline - , escapedChar - , unescapedChar - ] "inline" - -accentedChar :: GenParser Char st Inline -accentedChar = normalAccentedChar <|> specialAccentedChar - -normalAccentedChar :: GenParser Char st Inline -normalAccentedChar = try $ do - char '\\' - accent <- oneOf "'`^\"~" - character <- (try $ char '{' >> letter >>~ char '}') <|> letter - let table = fromMaybe [] $ lookup character accentTable - let result = case lookup accent table of - Just num -> chr num - Nothing -> '?' - return $ Str [result] - --- an association list of letters and association list of accents --- and decimal character numbers. -accentTable :: [(Char, [(Char, Int)])] -accentTable = - [ ('A', [('`', 192), ('\'', 193), ('^', 194), ('~', 195), ('"', 196)]), - ('E', [('`', 200), ('\'', 201), ('^', 202), ('"', 203)]), - ('I', [('`', 204), ('\'', 205), ('^', 206), ('"', 207)]), - ('N', [('~', 209)]), - ('O', [('`', 210), ('\'', 211), ('^', 212), ('~', 213), ('"', 214)]), - ('U', [('`', 217), ('\'', 218), ('^', 219), ('"', 220)]), - ('a', [('`', 224), ('\'', 225), ('^', 227), ('"', 228)]), - ('e', [('`', 232), ('\'', 233), ('^', 234), ('"', 235)]), - ('i', [('`', 236), ('\'', 237), ('^', 238), ('"', 239)]), - ('n', [('~', 241)]), - ('o', [('`', 242), ('\'', 243), ('^', 244), ('~', 245), ('"', 246)]), - ('u', [('`', 249), ('\'', 250), ('^', 251), ('"', 252)]) ] - -specialAccentedChar :: GenParser Char st Inline -specialAccentedChar = choice [ ccedil, aring, iuml, szlig, aelig, - oslash, pound, euro, copyright, sect ] - -ccedil :: GenParser Char st Inline -ccedil = try $ do - char '\\' - letter' <- oneOfStrings ["cc", "cC"] - let num = if letter' == "cc" then 231 else 199 - return $ Str [chr num] - -aring :: GenParser Char st Inline -aring = try $ do - char '\\' - letter' <- oneOfStrings ["aa", "AA"] - let num = if letter' == "aa" then 229 else 197 - return $ Str [chr num] - -iuml :: GenParser Char st Inline -iuml = try (string "\\\"") >> oneOfStrings ["\\i", "{\\i}"] >> - return (Str [chr 239]) - -szlig :: GenParser Char st Inline -szlig = try (string "\\ss") >> return (Str [chr 223]) - -oslash :: GenParser Char st Inline -oslash = try $ do - char '\\' - letter' <- choice [char 'o', char 'O'] - let num = if letter' == 'o' then 248 else 216 - return $ Str [chr num] - -aelig :: GenParser Char st Inline -aelig = try $ do - char '\\' - letter' <- oneOfStrings ["ae", "AE"] - let num = if letter' == "ae" then 230 else 198 - return $ Str [chr num] - -pound :: GenParser Char st Inline -pound = try (string "\\pounds") >> return (Str [chr 163]) - -euro :: GenParser Char st Inline -euro = try (string "\\euro") >> return (Str [chr 8364]) - -copyright :: GenParser Char st Inline -copyright = try (string "\\copyright") >> return (Str [chr 169]) - -sect :: GenParser Char st Inline -sect = try (string "\\S") >> return (Str [chr 167]) - -escapedChar :: GenParser Char st Inline -escapedChar = do - result <- escaped (oneOf " $%&_#{}\n") - return $ if result == Str "\n" then Str " " else result - --- nonescaped special characters -unescapedChar :: GenParser Char st Inline -unescapedChar = oneOf "`$^&_#{}|<>" >>= return . (\c -> Str [c]) - -specialChar :: GenParser Char st Inline -specialChar = choice [ backslash, tilde, caret, bar, lt, gt, doubleQuote ] - -backslash :: GenParser Char st Inline -backslash = try (string "\\textbackslash") >> optional (try $ string "{}") >> return (Str "\\") - -tilde :: GenParser Char st Inline -tilde = try (string "\\ensuremath{\\sim}") >> return (Str "~") - -caret :: GenParser Char st Inline -caret = try (string "\\^{}") >> return (Str "^") - -bar :: GenParser Char st Inline -bar = try (string "\\textbar") >> optional (try $ string "{}") >> return (Str "\\") - -lt :: GenParser Char st Inline -lt = try (string "\\textless") >> optional (try $ string "{}") >> return (Str "<") - -gt :: GenParser Char st Inline -gt = try (string "\\textgreater") >> optional (try $ string "{}") >> return (Str ">") - -doubleQuote :: GenParser Char st Inline -doubleQuote = char '"' >> return (Str "\"") - -code :: GenParser Char ParserState Inline -code = code1 <|> code2 <|> lhsInlineCode - -code1 :: GenParser Char st Inline -code1 = try $ do - string "\\verb" - marker <- anyChar - result <- manyTill anyChar (char marker) - return $ Code $ removeLeadingTrailingSpace result - -code2 :: GenParser Char st Inline -code2 = try $ do - string "\\texttt{" - result <- manyTill (noneOf "\\\n~$%^&{}") (char '}') - return $ Code result - -lhsInlineCode :: GenParser Char ParserState Inline -lhsInlineCode = try $ do - failUnlessLHS - char '|' - result <- manyTill (noneOf "|\n") (char '|') - return $ Code result - -emph :: GenParser Char ParserState Inline -emph = try $ oneOfStrings [ "\\emph{", "\\textit{" ] >> - manyTill inline (char '}') >>= return . Emph - -strikeout :: GenParser Char ParserState Inline -strikeout = try $ string "\\sout{" >> manyTill inline (char '}') >>= - return . Strikeout - -superscript :: GenParser Char ParserState Inline -superscript = try $ string "\\textsuperscript{" >> - manyTill inline (char '}') >>= return . Superscript - --- note: \textsubscript isn't a standard latex command, but we use --- a defined version in pandoc. -subscript :: GenParser Char ParserState Inline -subscript = try $ string "\\textsubscript{" >> manyTill inline (char '}') >>= - return . Subscript - -apostrophe :: GenParser Char ParserState Inline -apostrophe = char '\'' >> return Apostrophe - -quoted :: GenParser Char ParserState Inline -quoted = doubleQuoted <|> singleQuoted - -singleQuoted :: GenParser Char ParserState Inline -singleQuoted = enclosed singleQuoteStart singleQuoteEnd inline >>= - return . Quoted SingleQuote . normalizeSpaces - -doubleQuoted :: GenParser Char ParserState Inline -doubleQuoted = enclosed doubleQuoteStart doubleQuoteEnd inline >>= - return . Quoted DoubleQuote . normalizeSpaces - -singleQuoteStart :: GenParser Char st Char -singleQuoteStart = char '`' - -singleQuoteEnd :: GenParser Char st () -singleQuoteEnd = try $ char '\'' >> notFollowedBy alphaNum - -doubleQuoteStart :: CharParser st String -doubleQuoteStart = string "``" - -doubleQuoteEnd :: CharParser st String -doubleQuoteEnd = string "\"" <|> try (string "''") - -ellipses :: GenParser Char st Inline -ellipses = try $ string "\\ldots" >> optional (try $ string "{}") >> - return Ellipses - -enDash :: GenParser Char st Inline -enDash = try (string "--") >> return EnDash - -emDash :: GenParser Char st Inline -emDash = try (string "---") >> return EmDash - -hyphen :: GenParser Char st Inline -hyphen = char '-' >> return (Str "-") - -lab :: GenParser Char st Inline -lab = try $ do - string "\\label{" - result <- manyTill anyChar (char '}') - return $ Str $ "(" ++ result ++ ")" - -ref :: GenParser Char st Inline -ref = try (string "\\ref{") >> manyTill anyChar (char '}') >>= return . Str - -strong :: GenParser Char ParserState Inline -strong = try (string "\\textbf{") >> manyTill inline (char '}') >>= - return . Strong - -whitespace :: GenParser Char st Inline -whitespace = many1 (oneOf "~ \t") >> return Space - --- hard line break -linebreak :: GenParser Char st Inline -linebreak = try (string "\\\\") >> return LineBreak - -spacer :: GenParser Char st Inline -spacer = try (string "\\,") >> return (Str "") - -str :: GenParser Char st Inline -str = many1 (noneOf specialChars) >>= return . Str - --- endline internal to paragraph -endline :: GenParser Char st Inline -endline = try $ newline >> notFollowedBy blankline >> return Space - --- math -math :: GenParser Char st Inline -math = (math3 >>= return . Math DisplayMath) - <|> (math1 >>= return . Math InlineMath) - <|> (math2 >>= return . Math InlineMath) - <|> (math4 >>= return . Math DisplayMath) - <|> (math5 >>= return . Math DisplayMath) - <|> (math6 >>= return . Math DisplayMath) - "math" - -math1 :: GenParser Char st String -math1 = try $ char '$' >> manyTill anyChar (char '$') - -math2 :: GenParser Char st String -math2 = try $ string "\\(" >> manyTill anyChar (try $ string "\\)") - -math3 :: GenParser Char st String -math3 = try $ char '$' >> math1 >>~ char '$' - -math4 :: GenParser Char st String -math4 = try $ do - name <- begin "equation" <|> begin "equation*" <|> begin "displaymath" <|> begin "displaymath*" - spaces - manyTill anyChar (end name) - -math5 :: GenParser Char st String -math5 = try $ (string "\\[") >> spaces >> manyTill anyChar (try $ string "\\]") - -math6 :: GenParser Char st String -math6 = try $ do - name <- begin "eqnarray" <|> begin "eqnarray*" - spaces - res <- manyTill anyChar (end name) - return $ filter (/= '&') res -- remove eqnarray alignment codes - --- --- links and images --- - -url :: GenParser Char ParserState Inline -url = try $ do - string "\\url" - url' <- charsInBalanced '{' '}' - return $ Link [Code url'] (url', "") - -link :: GenParser Char ParserState Inline -link = try $ do - string "\\href{" - url' <- manyTill anyChar (char '}') - char '{' - label' <- manyTill inline (char '}') - return $ Link (normalizeSpaces label') (url', "") - -image :: GenParser Char ParserState Inline -image = try $ do - ("includegraphics", _, args) <- command - let args' = filter isArg args -- filter out options - let src = if null args' then - ("", "") - else - (stripFirstAndLast (head args'), "") - return $ Image [Str "image"] src - -footnote :: GenParser Char ParserState Inline -footnote = try $ do - (name, _, (contents:[])) <- command - if ((name == "footnote") || (name == "thanks")) - then string "" - else fail "not a footnote or thanks command" - let contents' = stripFirstAndLast contents - -- parse the extracted block, which may contain various block elements: - rest <- getInput - setInput $ contents' - blocks <- parseBlocks - setInput rest - return $ Note blocks - --- | Parse any LaTeX command and return it in a raw TeX inline element. -rawLaTeXInline :: GenParser Char ParserState Inline -rawLaTeXInline = try $ do - notFollowedBy' $ oneOfStrings ["\\begin", "\\end", "\\item", "\\ignore"] - state <- getState - if stateParseRaw state - then do - (name, star, args) <- command - return $ TeX ("\\" ++ name ++ star ++ concat args) - else do -- skip unknown command, leaving arguments to be parsed - char '\\' - letter - many (letter <|> digit) - optional (try $ string "{}") - return $ Str "" diff --git a/Text/Pandoc/Readers/Markdown.hs b/Text/Pandoc/Readers/Markdown.hs deleted file mode 100644 index 896f5832e..000000000 --- a/Text/Pandoc/Readers/Markdown.hs +++ /dev/null @@ -1,1243 +0,0 @@ -{-# LANGUAGE CPP #-} -{- -Copyright (C) 2006-8 John MacFarlane - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA --} - -{- | - Module : Text.Pandoc.Readers.Markdown - Copyright : Copyright (C) 2006-8 John MacFarlane - License : GNU GPL, version 2 or above - - Maintainer : John MacFarlane - Stability : alpha - Portability : portable - -Conversion of markdown-formatted plain text to 'Pandoc' document. --} -module Text.Pandoc.Readers.Markdown ( - readMarkdown - ) where - -import Data.List ( transpose, isPrefixOf, isSuffixOf, lookup, sortBy, findIndex, intercalate ) -import Data.Ord ( comparing ) -import Data.Char ( isAlphaNum, isAlpha, isLower, isDigit, isUpper ) -import Data.Maybe -import Text.Pandoc.Definition -import Text.Pandoc.Shared -import Text.Pandoc.Readers.LaTeX ( rawLaTeXInline, rawLaTeXEnvironment' ) -import Text.Pandoc.Readers.HTML ( rawHtmlBlock, anyHtmlBlockTag, - anyHtmlInlineTag, anyHtmlTag, - anyHtmlEndTag, htmlEndTag, extractTagType, - htmlBlockElement, unsanitaryURI ) -import Text.Pandoc.CharacterReferences ( decodeCharacterReferences ) -import Text.ParserCombinators.Parsec -import Control.Monad (when) - --- | Read markdown from an input string and return a Pandoc document. -readMarkdown :: ParserState -> String -> Pandoc -readMarkdown state s = (readWith parseMarkdown) state (s ++ "\n\n") - --- --- Constants and data structure definitions --- - -spaceChars :: [Char] -spaceChars = " \t" - -bulletListMarkers :: [Char] -bulletListMarkers = "*+-" - -hruleChars :: [Char] -hruleChars = "*-_" - -setextHChars :: [Char] -setextHChars = "=-" - --- treat these as potentially non-text when parsing inline: -specialChars :: [Char] -specialChars = "\\[]*_~`<>$!^-.&'\"\8216\8217\8220\8221" - --- --- auxiliary functions --- - -indentSpaces :: GenParser Char ParserState [Char] -indentSpaces = try $ do - state <- getState - let tabStop = stateTabStop state - try (count tabStop (char ' ')) <|> - (many (char ' ') >> string "\t") "indentation" - -nonindentSpaces :: GenParser Char ParserState [Char] -nonindentSpaces = do - state <- getState - let tabStop = stateTabStop state - sps <- many (char ' ') - if length sps < tabStop - then return sps - else unexpected "indented line" - --- | Fail unless we're at beginning of a line. -failUnlessBeginningOfLine :: GenParser tok st () -failUnlessBeginningOfLine = do - pos <- getPosition - if sourceColumn pos == 1 then return () else fail "not beginning of line" - --- | Fail unless we're in "smart typography" mode. -failUnlessSmart :: GenParser tok ParserState () -failUnlessSmart = do - state <- getState - if stateSmart state then return () else fail "Smart typography feature" - --- | Parse a sequence of inline elements between square brackets, --- including inlines between balanced pairs of square brackets. -inlinesInBalancedBrackets :: GenParser Char ParserState Inline - -> GenParser Char ParserState [Inline] -inlinesInBalancedBrackets parser = try $ do - char '[' - result <- manyTill ( (do lookAhead $ try $ do (Str res) <- parser - if res == "[" - then return () - else pzero - bal <- inlinesInBalancedBrackets parser - return $ [Str "["] ++ bal ++ [Str "]"]) - <|> (count 1 parser)) - (char ']') - return $ concat result - --- --- document structure --- - -titleLine :: GenParser Char ParserState [Inline] -titleLine = try $ char '%' >> skipSpaces >> manyTill inline newline - -authorsLine :: GenParser Char st [String] -authorsLine = try $ do - char '%' - skipSpaces - authors <- sepEndBy (many1 (noneOf ",;\n")) (oneOf ",;") - newline - return $ map (decodeCharacterReferences . removeLeadingTrailingSpace) authors - -dateLine :: GenParser Char st String -dateLine = try $ do - char '%' - skipSpaces - date <- many (noneOf "\n") - newline - return $ decodeCharacterReferences $ removeTrailingSpace date - -titleBlock :: GenParser Char ParserState ([Inline], [String], [Char]) -titleBlock = try $ do - failIfStrict - title <- option [] titleLine - author <- option [] authorsLine - date <- option "" dateLine - optional blanklines - return (title, author, date) - -parseMarkdown :: GenParser Char ParserState Pandoc -parseMarkdown = do - -- markdown allows raw HTML - updateState (\state -> state { stateParseRaw = True }) - startPos <- getPosition - -- go through once just to get list of reference keys - -- docMinusKeys is the raw document with blanks where the keys were... - docMinusKeys <- manyTill (referenceKey <|> lineClump) eof >>= - return . concat - setInput docMinusKeys - setPosition startPos - st <- getState - -- go through again for notes unless strict... - if stateStrict st - then return () - else do docMinusNotes <- manyTill (noteBlock <|> lineClump) eof >>= - return . concat - st' <- getState - let reversedNotes = stateNotes st' - updateState $ \s -> s { stateNotes = reverse reversedNotes } - setInput docMinusNotes - setPosition startPos - -- now parse it for real... - (title, author, date) <- option ([],[],"") titleBlock - blocks <- parseBlocks - return $ Pandoc (Meta title author date) $ filter (/= Null) blocks - --- --- initial pass for references and notes --- - -referenceKey :: GenParser Char ParserState [Char] -referenceKey = try $ do - startPos <- getPosition - nonindentSpaces - lab <- reference - char ':' - skipSpaces >> optional newline >> skipSpaces >> notFollowedBy (char '[') - let sourceURL excludes = many $ - optional (char '\\') >> (noneOf (' ':excludes) <|> (notFollowedBy' referenceTitle >> char ' ')) - src <- try (char '<' >> sourceURL ">\t\n" >>~ char '>') <|> sourceURL "\t\n" - tit <- option "" referenceTitle - blanklines - endPos <- getPosition - let newkey = (lab, (intercalate "%20" $ words $ removeTrailingSpace src, tit)) - st <- getState - let oldkeys = stateKeys st - updateState $ \s -> s { stateKeys = newkey : oldkeys } - -- return blanks so line count isn't affected - return $ replicate (sourceLine endPos - sourceLine startPos) '\n' - -referenceTitle :: GenParser Char st String -referenceTitle = try $ do - skipSpaces >> optional newline >> skipSpaces - tit <- (charsInBalanced '(' ')' >>= return . unwords . words) - <|> do delim <- char '\'' <|> char '"' - manyTill anyChar (try (char delim >> skipSpaces >> - notFollowedBy (noneOf ")\n"))) - return $ decodeCharacterReferences tit - -noteMarker :: GenParser Char st [Char] -noteMarker = string "[^" >> manyTill (noneOf " \t\n") (char ']') - -rawLine :: GenParser Char ParserState [Char] -rawLine = do - notFollowedBy blankline - notFollowedBy' noteMarker - contents <- many1 nonEndline - end <- option "" (newline >> optional indentSpaces >> return "\n") - return $ contents ++ end - -rawLines :: GenParser Char ParserState [Char] -rawLines = many1 rawLine >>= return . concat - -noteBlock :: GenParser Char ParserState [Char] -noteBlock = try $ do - startPos <- getPosition - ref <- noteMarker - char ':' - optional blankline - optional indentSpaces - raw <- sepBy rawLines (try (blankline >> indentSpaces)) - optional blanklines - endPos <- getPosition - -- parse the extracted text, which may contain various block elements: - contents <- parseFromString parseBlocks $ (intercalate "\n" raw) ++ "\n\n" - let newnote = (ref, contents) - st <- getState - let oldnotes = stateNotes st - updateState $ \s -> s { stateNotes = newnote : oldnotes } - -- return blanks so line count isn't affected - return $ replicate (sourceLine endPos - sourceLine startPos) '\n' - --- --- parsing blocks --- - -parseBlocks :: GenParser Char ParserState [Block] -parseBlocks = manyTill block eof - -block :: GenParser Char ParserState Block -block = do - st <- getState - choice (if stateStrict st - then [ header - , codeBlockIndented - , blockQuote - , hrule - , bulletList - , orderedList - , htmlBlock - , para - , plain - , nullBlock ] - else [ codeBlockDelimited - , header - , table - , codeBlockIndented - , lhsCodeBlock - , blockQuote - , hrule - , bulletList - , orderedList - , definitionList - , para - , rawHtmlBlocks - , plain - , nullBlock ]) "block" - --- --- header blocks --- - -header :: GenParser Char ParserState Block -header = setextHeader <|> atxHeader "header" - -atxHeader :: GenParser Char ParserState Block -atxHeader = try $ do - level <- many1 (char '#') >>= return . length - notFollowedBy (char '.' <|> char ')') -- this would be a list - skipSpaces - text <- manyTill inline atxClosing >>= return . normalizeSpaces - return $ Header level text - -atxClosing :: GenParser Char st [Char] -atxClosing = try $ skipMany (char '#') >> blanklines - -setextHeader :: GenParser Char ParserState Block -setextHeader = try $ do - text <- many1Till inline newline - underlineChar <- oneOf setextHChars - many (char underlineChar) - blanklines - let level = (fromMaybe 0 $ findIndex (== underlineChar) setextHChars) + 1 - return $ Header level (normalizeSpaces text) - --- --- hrule block --- - -hrule :: GenParser Char st Block -hrule = try $ do - skipSpaces - start <- oneOf hruleChars - count 2 (skipSpaces >> char start) - skipMany (oneOf spaceChars <|> char start) - newline - optional blanklines - return HorizontalRule - --- --- code blocks --- - -indentedLine :: GenParser Char ParserState [Char] -indentedLine = indentSpaces >> manyTill anyChar newline >>= return . (++ "\n") - -codeBlockDelimiter :: Maybe Int - -> GenParser Char st (Int, ([Char], [[Char]], [([Char], [Char])])) -codeBlockDelimiter len = try $ do - size <- case len of - Just l -> count l (char '~') >> many (char '~') >> return l - Nothing -> count 3 (char '~') >> many (char '~') >>= - return . (+ 3) . length - many spaceChar - attr <- option ([],[],[]) attributes - blankline - return (size, attr) - -attributes :: GenParser Char st ([Char], [[Char]], [([Char], [Char])]) -attributes = try $ do - char '{' - many spaceChar - attrs <- many (attribute >>~ many spaceChar) - char '}' - let (ids, classes, keyvals) = unzip3 attrs - let id' = if null ids then "" else head ids - return (id', concat classes, concat keyvals) - -attribute :: GenParser Char st ([Char], [[Char]], [([Char], [Char])]) -attribute = identifierAttr <|> classAttr <|> keyValAttr - -identifier :: GenParser Char st [Char] -identifier = do - first <- letter - rest <- many alphaNum - return (first:rest) - -identifierAttr :: GenParser Char st ([Char], [a], [a1]) -identifierAttr = try $ do - char '#' - result <- identifier - return (result,[],[]) - -classAttr :: GenParser Char st ([Char], [[Char]], [a]) -classAttr = try $ do - char '.' - result <- identifier - return ("",[result],[]) - -keyValAttr :: GenParser Char st ([Char], [a], [([Char], [Char])]) -keyValAttr = try $ do - key <- identifier - char '=' - char '"' - val <- manyTill (noneOf "\n") (char '"') - return ("",[],[(key,val)]) - -codeBlockDelimited :: GenParser Char st Block -codeBlockDelimited = try $ do - (size, attr) <- codeBlockDelimiter Nothing - contents <- manyTill anyLine (codeBlockDelimiter (Just size)) - blanklines - return $ CodeBlock attr $ intercalate "\n" contents - -codeBlockIndented :: GenParser Char ParserState Block -codeBlockIndented = do - contents <- many1 (indentedLine <|> - try (do b <- blanklines - l <- indentedLine - return $ b ++ l)) - optional blanklines - return $ CodeBlock ("",[],[]) $ stripTrailingNewlines $ concat contents - -lhsCodeBlock :: GenParser Char ParserState Block -lhsCodeBlock = do - failUnlessLHS - contents <- lhsCodeBlockBird <|> lhsCodeBlockLaTeX - return $ CodeBlock ("",["sourceCode","haskell"],[]) contents - -lhsCodeBlockLaTeX :: GenParser Char ParserState String -lhsCodeBlockLaTeX = try $ do - string "\\begin{code}" - manyTill spaceChar newline - contents <- many1Till anyChar (try $ string "\\end{code}") - blanklines - return $ stripTrailingNewlines contents - -lhsCodeBlockBird :: GenParser Char ParserState String -lhsCodeBlockBird = try $ do - pos <- getPosition - when (sourceColumn pos /= 1) $ fail "Not in first column" - lns <- many1 birdTrackLine - -- if (as is normal) there is always a space after >, drop it - let lns' = if all (\ln -> null ln || take 1 ln == " ") lns - then map (drop 1) lns - else lns - blanklines - return $ intercalate "\n" lns' - -birdTrackLine :: GenParser Char st [Char] -birdTrackLine = do - char '>' - manyTill anyChar newline - - --- --- block quotes --- - -emailBlockQuoteStart :: GenParser Char ParserState Char -emailBlockQuoteStart = try $ nonindentSpaces >> char '>' >>~ optional (char ' ') - -emailBlockQuote :: GenParser Char ParserState [[Char]] -emailBlockQuote = try $ do - emailBlockQuoteStart - raw <- sepBy (many (nonEndline <|> - (try (endline >> notFollowedBy emailBlockQuoteStart >> - return '\n')))) - (try (newline >> emailBlockQuoteStart)) - newline <|> (eof >> return '\n') - optional blanklines - return raw - -blockQuote :: GenParser Char ParserState Block -blockQuote = do - raw <- emailBlockQuote - -- parse the extracted block, which may contain various block elements: - contents <- parseFromString parseBlocks $ (intercalate "\n" raw) ++ "\n\n" - return $ BlockQuote contents - --- --- list blocks --- - -bulletListStart :: GenParser Char ParserState () -bulletListStart = try $ do - optional newline -- if preceded by a Plain block in a list context - nonindentSpaces - notFollowedBy' hrule -- because hrules start out just like lists - oneOf bulletListMarkers - spaceChar - skipSpaces - -anyOrderedListStart :: GenParser Char ParserState (Int, ListNumberStyle, ListNumberDelim) -anyOrderedListStart = try $ do - optional newline -- if preceded by a Plain block in a list context - nonindentSpaces - notFollowedBy $ string "p." >> spaceChar >> digit -- page number - state <- getState - if stateStrict state - then do many1 digit - char '.' - spaceChar - return (1, DefaultStyle, DefaultDelim) - else do (num, style, delim) <- anyOrderedListMarker - -- if it could be an abbreviated first name, insist on more than one space - if delim == Period && (style == UpperAlpha || (style == UpperRoman && - num `elem` [1, 5, 10, 50, 100, 500, 1000])) - then char '\t' <|> (char ' ' >>~ notFollowedBy (satisfy isUpper)) - else spaceChar - skipSpaces - return (num, style, delim) - -listStart :: GenParser Char ParserState () -listStart = bulletListStart <|> (anyOrderedListStart >> return ()) - --- parse a line of a list item (start = parser for beginning of list item) -listLine :: GenParser Char ParserState [Char] -listLine = try $ do - notFollowedBy' listStart - notFollowedBy blankline - notFollowedBy' (do indentSpaces - many (spaceChar) - listStart) - line <- manyTill anyChar newline - return $ line ++ "\n" - --- parse raw text for one list item, excluding start marker and continuations -rawListItem :: GenParser Char ParserState [Char] -rawListItem = try $ do - listStart - result <- many1 listLine - blanks <- many blankline - return $ concat result ++ blanks - --- continuation of a list item - indented and separated by blankline --- or (in compact lists) endline. --- note: nested lists are parsed as continuations -listContinuation :: GenParser Char ParserState [Char] -listContinuation = try $ do - lookAhead indentSpaces - result <- many1 listContinuationLine - blanks <- many blankline - return $ concat result ++ blanks - -listContinuationLine :: GenParser Char ParserState [Char] -listContinuationLine = try $ do - notFollowedBy blankline - notFollowedBy' listStart - optional indentSpaces - result <- manyTill anyChar newline - return $ result ++ "\n" - -listItem :: GenParser Char ParserState [Block] -listItem = try $ do - first <- rawListItem - continuations <- many listContinuation - -- parsing with ListItemState forces markers at beginning of lines to - -- count as list item markers, even if not separated by blank space. - -- see definition of "endline" - state <- getState - let oldContext = stateParserContext state - setState $ state {stateParserContext = ListItemState} - -- parse the extracted block, which may contain various block elements: - let raw = concat (first:continuations) - contents <- parseFromString parseBlocks raw - updateState (\st -> st {stateParserContext = oldContext}) - return contents - -orderedList :: GenParser Char ParserState Block -orderedList = try $ do - (start, style, delim) <- lookAhead anyOrderedListStart - items <- many1 listItem - return $ OrderedList (start, style, delim) $ compactify items - -bulletList :: GenParser Char ParserState Block -bulletList = try $ do - lookAhead bulletListStart - many1 listItem >>= return . BulletList . compactify - --- definition lists - -definitionListItem :: GenParser Char ParserState ([Inline], [Block]) -definitionListItem = try $ do - notFollowedBy blankline - notFollowedBy' indentSpaces - -- first, see if this has any chance of being a definition list: - lookAhead (anyLine >> char ':') - term <- manyTill inline newline - raw <- many1 defRawBlock - state <- getState - let oldContext = stateParserContext state - -- parse the extracted block, which may contain various block elements: - contents <- parseFromString parseBlocks $ concat raw - updateState (\st -> st {stateParserContext = oldContext}) - return ((normalizeSpaces term), contents) - -defRawBlock :: GenParser Char ParserState [Char] -defRawBlock = try $ do - char ':' - state <- getState - let tabStop = stateTabStop state - try (count (tabStop - 1) (char ' ')) <|> (many (char ' ') >> string "\t") - firstline <- anyLine - rawlines <- many (notFollowedBy blankline >> indentSpaces >> anyLine) - trailing <- option "" blanklines - return $ firstline ++ "\n" ++ unlines rawlines ++ trailing - -definitionList :: GenParser Char ParserState Block -definitionList = do - items <- many1 definitionListItem - let (terms, defs) = unzip items - let defs' = compactify defs - let items' = zip terms defs' - return $ DefinitionList items' - --- --- paragraph block --- - -isHtmlOrBlank :: Inline -> Bool -isHtmlOrBlank (HtmlInline _) = True -isHtmlOrBlank (Space) = True -isHtmlOrBlank (LineBreak) = True -isHtmlOrBlank _ = False - -para :: GenParser Char ParserState Block -para = try $ do - result <- many1 inline - if all isHtmlOrBlank result - then fail "treat as raw HTML" - else return () - newline - blanklines <|> do st <- getState - if stateStrict st - then lookAhead (blockQuote <|> header) >> return "" - else pzero - return $ Para $ normalizeSpaces result - -plain :: GenParser Char ParserState Block -plain = many1 inline >>= return . Plain . normalizeSpaces - --- --- raw html --- - -htmlElement :: GenParser Char ParserState [Char] -htmlElement = strictHtmlBlock <|> htmlBlockElement "html element" - -htmlBlock :: GenParser Char ParserState Block -htmlBlock = try $ do - failUnlessBeginningOfLine - first <- htmlElement - finalSpace <- many (oneOf spaceChars) - finalNewlines <- many newline - return $ RawHtml $ first ++ finalSpace ++ finalNewlines - --- True if tag is self-closing -isSelfClosing :: [Char] -> Bool -isSelfClosing tag = - isSuffixOf "/>" $ filter (not . (`elem` " \n\t")) tag - -strictHtmlBlock :: GenParser Char ParserState [Char] -strictHtmlBlock = try $ do - tag <- anyHtmlBlockTag - let tag' = extractTagType tag - if isSelfClosing tag || tag' == "hr" - then return tag - else do contents <- many (notFollowedBy' (htmlEndTag tag') >> - (htmlElement <|> (count 1 anyChar))) - end <- htmlEndTag tag' - return $ tag ++ concat contents ++ end - -rawHtmlBlocks :: GenParser Char ParserState Block -rawHtmlBlocks = do - htmlBlocks <- many1 $ do (RawHtml blk) <- rawHtmlBlock - sps <- do sp1 <- many spaceChar - sp2 <- option "" (blankline >> return "\n") - sp3 <- many spaceChar - sp4 <- option "" blanklines - return $ sp1 ++ sp2 ++ sp3 ++ sp4 - -- note: we want raw html to be able to - -- precede a code block, when separated - -- by a blank line - return $ blk ++ sps - let combined = concat htmlBlocks - let combined' = if last combined == '\n' then init combined else combined - return $ RawHtml combined' - --- --- Tables --- - --- Parse a dashed line with optional trailing spaces; return its length --- and the length including trailing space. -dashedLine :: Char - -> GenParser Char st (Int, Int) -dashedLine ch = do - dashes <- many1 (char ch) - sp <- many spaceChar - return $ (length dashes, length $ dashes ++ sp) - --- Parse a table header with dashed lines of '-' preceded by --- one line of text. -simpleTableHeader :: GenParser Char ParserState ([[Char]], [Alignment], [Int]) -simpleTableHeader = try $ do - rawContent <- anyLine - initSp <- nonindentSpaces - dashes <- many1 (dashedLine '-') - newline - let (lengths, lines') = unzip dashes - let indices = scanl (+) (length initSp) lines' - let rawHeads = tail $ splitByIndices (init indices) rawContent - let aligns = zipWith alignType (map (\a -> [a]) rawHeads) lengths - return (rawHeads, aligns, indices) - --- Parse a table footer - dashed lines followed by blank line. -tableFooter :: GenParser Char ParserState [Char] -tableFooter = try $ nonindentSpaces >> many1 (dashedLine '-') >> blanklines - --- Parse a table separator - dashed line. -tableSep :: GenParser Char ParserState String -tableSep = try $ nonindentSpaces >> many1 (dashedLine '-') >> string "\n" - --- Parse a raw line and split it into chunks by indices. -rawTableLine :: [Int] - -> GenParser Char ParserState [String] -rawTableLine indices = do - notFollowedBy' (blanklines <|> tableFooter) - line <- many1Till anyChar newline - return $ map removeLeadingTrailingSpace $ tail $ - splitByIndices (init indices) line - --- Parse a table line and return a list of lists of blocks (columns). -tableLine :: [Int] - -> GenParser Char ParserState [[Block]] -tableLine indices = rawTableLine indices >>= mapM (parseFromString (many plain)) - --- Parse a multiline table row and return a list of blocks (columns). -multilineRow :: [Int] - -> GenParser Char ParserState [[Block]] -multilineRow indices = do - colLines <- many1 (rawTableLine indices) - optional blanklines - let cols = map unlines $ transpose colLines - mapM (parseFromString (many plain)) cols - --- Calculate relative widths of table columns, based on indices -widthsFromIndices :: Int -- Number of columns on terminal - -> [Int] -- Indices - -> [Double] -- Fractional relative sizes of columns -widthsFromIndices _ [] = [] -widthsFromIndices numColumns indices = - let lengths = zipWith (-) indices (0:indices) - totLength = sum lengths - quotient = if totLength > numColumns - then fromIntegral totLength - else fromIntegral numColumns - fracs = map (\l -> (fromIntegral l) / quotient) lengths in - tail fracs - --- Parses a table caption: inlines beginning with 'Table:' --- and followed by blank lines. -tableCaption :: GenParser Char ParserState [Inline] -tableCaption = try $ do - nonindentSpaces - string "Table:" - result <- many1 inline - blanklines - return $ normalizeSpaces result - --- Parse a table using 'headerParser', 'lineParser', and 'footerParser'. -tableWith :: GenParser Char ParserState ([[Char]], [Alignment], [Int]) - -> ([Int] -> GenParser Char ParserState [[Block]]) - -> GenParser Char ParserState end - -> GenParser Char ParserState Block -tableWith headerParser lineParser footerParser = try $ do - (rawHeads, aligns, indices) <- headerParser - lines' <- many1Till (lineParser indices) footerParser - caption <- option [] tableCaption - heads <- mapM (parseFromString (many plain)) rawHeads - state <- getState - let numColumns = stateColumns state - let widths = widthsFromIndices numColumns indices - return $ Table caption aligns widths heads lines' - --- Parse a simple table with '---' header and one line per row. -simpleTable :: GenParser Char ParserState Block -simpleTable = tableWith simpleTableHeader tableLine blanklines - --- Parse a multiline table: starts with row of '-' on top, then header --- (which may be multiline), then the rows, --- which may be multiline, separated by blank lines, and --- ending with a footer (dashed line followed by blank line). -multilineTable :: GenParser Char ParserState Block -multilineTable = tableWith multilineTableHeader multilineRow tableFooter - -multilineTableHeader :: GenParser Char ParserState ([String], [Alignment], [Int]) -multilineTableHeader = try $ do - tableSep - rawContent <- many1 (notFollowedBy' tableSep >> many1Till anyChar newline) - initSp <- nonindentSpaces - dashes <- many1 (dashedLine '-') - newline - let (lengths, lines') = unzip dashes - let indices = scanl (+) (length initSp) lines' - let rawHeadsList = transpose $ map - (\ln -> tail $ splitByIndices (init indices) ln) - rawContent - let rawHeads = map (intercalate " ") rawHeadsList - let aligns = zipWith alignType rawHeadsList lengths - return ((map removeLeadingTrailingSpace rawHeads), aligns, indices) - --- Returns an alignment type for a table, based on a list of strings --- (the rows of the column header) and a number (the length of the --- dashed line under the rows. -alignType :: [String] - -> Int - -> Alignment -alignType [] _ = AlignDefault -alignType strLst len = - let s = head $ sortBy (comparing length) $ - map removeTrailingSpace strLst - leftSpace = if null s then False else (s !! 0) `elem` " \t" - rightSpace = length s < len || (s !! (len - 1)) `elem` " \t" - in case (leftSpace, rightSpace) of - (True, False) -> AlignRight - (False, True) -> AlignLeft - (True, True) -> AlignCenter - (False, False) -> AlignDefault - -table :: GenParser Char ParserState Block -table = simpleTable <|> multilineTable "table" - --- --- inline --- - -inline :: GenParser Char ParserState Inline -inline = choice inlineParsers "inline" - -inlineParsers :: [GenParser Char ParserState Inline] -inlineParsers = [ abbrev - , str - , smartPunctuation - , whitespace - , endline - , code - , charRef - , strong - , emph - , note - , inlineNote - , link -#ifdef _CITEPROC - , inlineCitation -#endif - , image - , math - , strikeout - , superscript - , subscript - , autoLink - , rawHtmlInline' - , rawLaTeXInline' - , escapedChar - , symbol - , ltSign ] - -inlineNonLink :: GenParser Char ParserState Inline -inlineNonLink = (choice $ - map (\parser -> try (parser >>= failIfLink)) inlineParsers) - "inline (non-link)" - -failIfLink :: Inline -> GenParser tok st Inline -failIfLink (Link _ _) = pzero -failIfLink elt = return elt - -escapedChar :: GenParser Char ParserState Inline -escapedChar = do - char '\\' - state <- getState - result <- option '\\' $ if stateStrict state - then oneOf "\\`*_{}[]()>#+-.!~" - else satisfy (not . isAlphaNum) - let result' = if result == ' ' - then '\160' -- '\ ' is a nonbreaking space - else result - return $ Str [result'] - -ltSign :: GenParser Char ParserState Inline -ltSign = do - st <- getState - if stateStrict st - then char '<' - else notFollowedBy' rawHtmlBlocks >> char '<' -- unless it starts html - return $ Str ['<'] - -specialCharsMinusLt :: [Char] -specialCharsMinusLt = filter (/= '<') specialChars - -symbol :: GenParser Char ParserState Inline -symbol = do - result <- oneOf specialCharsMinusLt - return $ Str [result] - --- parses inline code, between n `s and n `s -code :: GenParser Char ParserState Inline -code = try $ do - starts <- many1 (char '`') - skipSpaces - result <- many1Till (many1 (noneOf "`\n") <|> many1 (char '`') <|> - (char '\n' >> return " ")) - (try (skipSpaces >> count (length starts) (char '`') >> - notFollowedBy (char '`'))) - return $ Code $ removeLeadingTrailingSpace $ concat result - -mathWord :: GenParser Char st [Char] -mathWord = many1 ((noneOf " \t\n\\$") <|> - (try (char '\\') >>~ notFollowedBy (char '$'))) - -math :: GenParser Char ParserState Inline -math = (mathDisplay >>= return . Math DisplayMath) - <|> (mathInline >>= return . Math InlineMath) - -mathDisplay :: GenParser Char ParserState String -mathDisplay = try $ do - failIfStrict - string "$$" - many1Till (noneOf "\n" <|> (newline >>~ notFollowedBy' blankline)) (try $ string "$$") - -mathInline :: GenParser Char ParserState String -mathInline = try $ do - failIfStrict - char '$' - notFollowedBy space - words' <- sepBy1 mathWord (many1 (spaceChar <|> (newline >>~ notFollowedBy' blankline))) - char '$' - notFollowedBy digit - return $ intercalate " " words' - -emph :: GenParser Char ParserState Inline -emph = ((enclosed (char '*') (notFollowedBy' strong >> char '*') inline) <|> - (enclosed (char '_') (notFollowedBy' strong >> char '_' >> - notFollowedBy alphaNum) inline)) >>= - return . Emph . normalizeSpaces - -strong :: GenParser Char ParserState Inline -strong = ((enclosed (string "**") (try $ string "**") inline) <|> - (enclosed (string "__") (try $ string "__") inline)) >>= - return . Strong . normalizeSpaces - -strikeout :: GenParser Char ParserState Inline -strikeout = failIfStrict >> enclosed (string "~~") (try $ string "~~") inline >>= - return . Strikeout . normalizeSpaces - -superscript :: GenParser Char ParserState Inline -superscript = failIfStrict >> enclosed (char '^') (char '^') - (notFollowedBy' whitespace >> inline) >>= -- may not contain Space - return . Superscript - -subscript :: GenParser Char ParserState Inline -subscript = failIfStrict >> enclosed (char '~') (char '~') - (notFollowedBy' whitespace >> inline) >>= -- may not contain Space - return . Subscript - -abbrev :: GenParser Char ParserState Inline -abbrev = failUnlessSmart >> - (assumedAbbrev <|> knownAbbrev) >>= return . Str . (++ ".\160") - --- an string of letters followed by a period that does not end a sentence --- is assumed to be an abbreviation. It is assumed that sentences don't --- start with lowercase letters or numerals. -assumedAbbrev :: GenParser Char ParserState [Char] -assumedAbbrev = try $ do - result <- many1 $ satisfy isAlpha - string ". " - lookAhead $ satisfy (\x -> isLower x || isDigit x) - return result - --- these strings are treated as abbreviations even if they are followed --- by a capital letter (such as a name). -knownAbbrev :: GenParser Char ParserState [Char] -knownAbbrev = try $ do - result <- oneOfStrings [ "Mr", "Mrs", "Ms", "Capt", "Dr", "Prof", "Gen", - "Gov", "e.g", "i.e", "Sgt", "St", "vol", "vs", - "Sen", "Rep", "Pres", "Hon", "Rev" ] - string ". " - return result - -smartPunctuation :: GenParser Char ParserState Inline -smartPunctuation = failUnlessSmart >> - choice [ quoted, apostrophe, dash, ellipses ] - -apostrophe :: GenParser Char ParserState Inline -apostrophe = (char '\'' <|> char '\8217') >> return Apostrophe - -quoted :: GenParser Char ParserState Inline -quoted = doubleQuoted <|> singleQuoted - -withQuoteContext :: QuoteContext - -> (GenParser Char ParserState Inline) - -> GenParser Char ParserState Inline -withQuoteContext context parser = do - oldState <- getState - let oldQuoteContext = stateQuoteContext oldState - setState oldState { stateQuoteContext = context } - result <- parser - newState <- getState - setState newState { stateQuoteContext = oldQuoteContext } - return result - -singleQuoted :: GenParser Char ParserState Inline -singleQuoted = try $ do - singleQuoteStart - withQuoteContext InSingleQuote $ many1Till inline singleQuoteEnd >>= - return . Quoted SingleQuote . normalizeSpaces - -doubleQuoted :: GenParser Char ParserState Inline -doubleQuoted = try $ do - doubleQuoteStart - withQuoteContext InDoubleQuote $ many1Till inline doubleQuoteEnd >>= - return . Quoted DoubleQuote . normalizeSpaces - -failIfInQuoteContext :: QuoteContext -> GenParser tok ParserState () -failIfInQuoteContext context = do - st <- getState - if stateQuoteContext st == context - then fail "already inside quotes" - else return () - -singleQuoteStart :: GenParser Char ParserState Char -singleQuoteStart = do - failIfInQuoteContext InSingleQuote - char '\8216' <|> - (try $ do char '\'' - notFollowedBy (oneOf ")!],.;:-? \t\n") - notFollowedBy (try (oneOfStrings ["s","t","m","ve","ll","re"] >> - satisfy (not . isAlphaNum))) - -- possess/contraction - return '\'') - -singleQuoteEnd :: GenParser Char st Char -singleQuoteEnd = try $ do - char '\8217' <|> char '\'' - notFollowedBy alphaNum - return '\'' - -doubleQuoteStart :: GenParser Char ParserState Char -doubleQuoteStart = do - failIfInQuoteContext InDoubleQuote - char '\8220' <|> - (try $ do char '"' - notFollowedBy (oneOf " \t\n") - return '"') - -doubleQuoteEnd :: GenParser Char st Char -doubleQuoteEnd = char '\8221' <|> char '"' - -ellipses :: GenParser Char st Inline -ellipses = oneOfStrings ["...", " . . . ", ". . .", " . . ."] >> return Ellipses - -dash :: GenParser Char st Inline -dash = enDash <|> emDash - -enDash :: GenParser Char st Inline -enDash = try $ char '-' >> notFollowedBy (noneOf "0123456789") >> return EnDash - -emDash :: GenParser Char st Inline -emDash = oneOfStrings ["---", "--"] >> return EmDash - -whitespace :: GenParser Char ParserState Inline -whitespace = do - sps <- many1 (oneOf spaceChars) - if length sps >= 2 - then option Space (endline >> return LineBreak) - else return Space "whitespace" - -nonEndline :: GenParser Char st Char -nonEndline = satisfy (/='\n') - -strChar :: GenParser Char st Char -strChar = noneOf (specialChars ++ spaceChars ++ "\n") - -str :: GenParser Char st Inline -str = many1 strChar >>= return . Str - --- an endline character that can be treated as a space, not a structural break -endline :: GenParser Char ParserState Inline -endline = try $ do - newline - notFollowedBy blankline - st <- getState - if stateStrict st - then do notFollowedBy emailBlockQuoteStart - notFollowedBy (char '#') -- atx header - else return () - -- parse potential list-starts differently if in a list: - if stateParserContext st == ListItemState - then notFollowedBy' (bulletListStart <|> - (anyOrderedListStart >> return ())) - else return () - return Space - --- --- links --- - --- a reference label for a link -reference :: GenParser Char ParserState [Inline] -reference = do notFollowedBy' (string "[^") -- footnote reference - result <- inlinesInBalancedBrackets inlineNonLink - return $ normalizeSpaces result - --- source for a link, with optional title -source :: GenParser Char st (String, [Char]) -source = - (try $ charsInBalanced '(' ')' >>= parseFromString source') <|> - -- the following is needed for cases like: [ref](/url(a). - (enclosed (char '(') (char ')') anyChar >>= - parseFromString source') - --- auxiliary function for source -source' :: GenParser Char st (String, [Char]) -source' = do - skipSpaces - let sourceURL excludes = many $ - optional (char '\\') >> (noneOf (' ':excludes) <|> (notFollowedBy' linkTitle >> char ' ')) - src <- try (char '<' >> sourceURL ">\t\n" >>~ char '>') <|> sourceURL "\t\n" - tit <- option "" linkTitle - skipSpaces - eof - return (intercalate "%20" $ words $ removeTrailingSpace src, tit) - -linkTitle :: GenParser Char st String -linkTitle = try $ do - (many1 spaceChar >> option '\n' newline) <|> newline - skipSpaces - delim <- oneOf "'\"" - tit <- manyTill (optional (char '\\') >> anyChar) - (try (char delim >> skipSpaces >> eof)) - return $ decodeCharacterReferences tit - -link :: GenParser Char ParserState Inline -link = try $ do - lab <- reference - src <- source <|> referenceLink lab - sanitize <- getState >>= return . stateSanitizeHTML - if sanitize && unsanitaryURI (fst src) - then fail "Unsanitary URI" - else return $ Link lab src - --- a link like [this][ref] or [this][] or [this] -referenceLink :: [Inline] - -> GenParser Char ParserState (String, [Char]) -referenceLink lab = do - ref <- option [] (try (optional (char ' ') >> - optional (newline >> skipSpaces) >> reference)) - let ref' = if null ref then lab else ref - state <- getState - case lookupKeySrc (stateKeys state) ref' of - Nothing -> fail "no corresponding key" - Just target -> return target - -autoLink :: GenParser Char ParserState Inline -autoLink = try $ do - char '<' - src <- uri <|> (emailAddress >>= (return . ("mailto:" ++))) - char '>' - let src' = if "mailto:" `isPrefixOf` src - then drop 7 src - else src - st <- getState - let sanitize = stateSanitizeHTML st - if sanitize && unsanitaryURI src - then fail "Unsanitary URI" - else return $ if stateStrict st - then Link [Str src'] (src, "") - else Link [Code src'] (src, "") - -image :: GenParser Char ParserState Inline -image = try $ do - char '!' - (Link lab src) <- link - return $ Image lab src - -note :: GenParser Char ParserState Inline -note = try $ do - failIfStrict - ref <- noteMarker - state <- getState - let notes = stateNotes state - case lookup ref notes of - Nothing -> fail "note not found" - Just contents -> return $ Note contents - -inlineNote :: GenParser Char ParserState Inline -inlineNote = try $ do - failIfStrict - char '^' - contents <- inlinesInBalancedBrackets inline - return $ Note [Para contents] - -rawLaTeXInline' :: GenParser Char ParserState Inline -rawLaTeXInline' = do - failIfStrict - (rawConTeXtEnvironment' >>= return . TeX) - <|> (rawLaTeXEnvironment' >>= return . TeX) - <|> rawLaTeXInline - -rawConTeXtEnvironment' :: GenParser Char st String -rawConTeXtEnvironment' = try $ do - string "\\start" - completion <- inBrackets (letter <|> digit <|> spaceChar) - <|> (many1 letter) - contents <- manyTill (rawConTeXtEnvironment' <|> (count 1 anyChar)) - (try $ string "\\stop" >> string completion) - return $ "\\start" ++ completion ++ concat contents ++ "\\stop" ++ completion - -inBrackets :: (GenParser Char st Char) -> GenParser Char st String -inBrackets parser = do - char '[' - contents <- many parser - char ']' - return $ "[" ++ contents ++ "]" - -rawHtmlInline' :: GenParser Char ParserState Inline -rawHtmlInline' = do - st <- getState - result <- if stateStrict st - then choice [htmlBlockElement, anyHtmlTag, anyHtmlEndTag] - else anyHtmlInlineTag - return $ HtmlInline result - -#ifdef _CITEPROC -inlineCitation :: GenParser Char ParserState Inline -inlineCitation = try $ do - failIfStrict - cit <- citeMarker - let citations = readWith parseCitation defaultParserState cit - mr <- mapM chkCit citations - if catMaybes mr /= [] - then return $ Cite citations [] - else fail "no citation found" - -chkCit :: Target -> GenParser Char ParserState (Maybe Target) -chkCit t = do - st <- getState - case lookupKeySrc (stateKeys st) [Str $ fst t] of - Just _ -> fail "This is a link" - Nothing -> if elem (fst t) $ stateCitations st - then return $ Just t - else return $ Nothing - -citeMarker :: GenParser Char ParserState String -citeMarker = char '[' >> manyTill ( noneOf "\n" <|> (newline >>~ notFollowedBy blankline) ) (char ']') - -parseCitation :: GenParser Char ParserState [(String,String)] -parseCitation = try $ sepBy (parseLabel) (oneOf ";") - -parseLabel :: GenParser Char ParserState (String,String) -parseLabel = try $ do - res <- sepBy (skipSpaces >> optional newline >> skipSpaces >> many1 (noneOf "@;")) (oneOf "@") - case res of - [lab,loc] -> return (lab, loc) - [lab] -> return (lab, "" ) - _ -> return ("" , "" ) - -#endif diff --git a/Text/Pandoc/Readers/RST.hs b/Text/Pandoc/Readers/RST.hs deleted file mode 100644 index 255054c10..000000000 --- a/Text/Pandoc/Readers/RST.hs +++ /dev/null @@ -1,707 +0,0 @@ -{- -Copyright (C) 2006-8 John MacFarlane - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA --} - -{- | - Module : Text.Pandoc.Readers.RST - Copyright : Copyright (C) 2006-8 John MacFarlane - License : GNU GPL, version 2 or above - - Maintainer : John MacFarlane - Stability : alpha - Portability : portable - -Conversion from reStructuredText to 'Pandoc' document. --} -module Text.Pandoc.Readers.RST ( - readRST - ) where -import Text.Pandoc.Definition -import Text.Pandoc.Shared -import Text.ParserCombinators.Parsec -import Control.Monad ( when ) -import Data.List ( findIndex, delete, intercalate ) - --- | Parse reStructuredText string and return Pandoc document. -readRST :: ParserState -> String -> Pandoc -readRST state s = (readWith parseRST) state (s ++ "\n\n") - --- --- Constants and data structure definitions ---- - -bulletListMarkers :: [Char] -bulletListMarkers = "*+-" - -underlineChars :: [Char] -underlineChars = "!\"#$&'()*+,-./:;<=>?@[\\]^_`{|}~" - --- treat these as potentially non-text when parsing inline: -specialChars :: [Char] -specialChars = "\\`|*_<>$:[-" - --- --- parsing documents --- - -isHeader :: Int -> Block -> Bool -isHeader n (Header x _) = x == n -isHeader _ _ = False - --- | Promote all headers in a list of blocks. (Part of --- title transformation for RST.) -promoteHeaders :: Int -> [Block] -> [Block] -promoteHeaders num ((Header level text):rest) = - (Header (level - num) text):(promoteHeaders num rest) -promoteHeaders num (other:rest) = other:(promoteHeaders num rest) -promoteHeaders _ [] = [] - --- | If list of blocks starts with a header (or a header and subheader) --- of level that are not found elsewhere, return it as a title and --- promote all the other headers. -titleTransform :: [Block] -- ^ list of blocks - -> ([Block], [Inline]) -- ^ modified list of blocks, title -titleTransform ((Header 1 head1):(Header 2 head2):rest) = -- title subtitle - if (any (isHeader 1) rest) || (any (isHeader 2) rest) - then ((Header 1 head1):(Header 2 head2):rest, []) - else ((promoteHeaders 2 rest), head1 ++ [Str ":", Space] ++ head2) -titleTransform ((Header 1 head1):rest) = -- title, no subtitle - if (any (isHeader 1) rest) - then ((Header 1 head1):rest, []) - else ((promoteHeaders 1 rest), head1) -titleTransform blocks = (blocks, []) - -parseRST :: GenParser Char ParserState Pandoc -parseRST = do - startPos <- getPosition - -- go through once just to get list of reference keys - -- docMinusKeys is the raw document with blanks where the keys were... - docMinusKeys <- manyTill (referenceKey <|> lineClump) eof >>= return . concat - setInput docMinusKeys - setPosition startPos - st <- getState - let reversedKeys = stateKeys st - updateState $ \s -> s { stateKeys = reverse reversedKeys } - -- now parse it for real... - blocks <- parseBlocks - let blocks' = filter (/= Null) blocks - state <- getState - let (blocks'', title) = if stateStandalone state - then titleTransform blocks' - else (blocks', []) - let authors = stateAuthors state - let date = stateDate state - let title' = if (null title) then (stateTitle state) else title - return $ Pandoc (Meta title' authors date) blocks'' - --- --- parsing blocks --- - -parseBlocks :: GenParser Char ParserState [Block] -parseBlocks = manyTill block eof - -block :: GenParser Char ParserState Block -block = choice [ codeBlock - , rawHtmlBlock - , rawLaTeXBlock - , fieldList - , blockQuote - , imageBlock - , unknownDirective - , header - , hrule - , list - , lineBlock - , lhsCodeBlock - , para - , plain - , nullBlock ] "block" - --- --- field list --- - -fieldListItem :: String -> GenParser Char st ([Char], [Char]) -fieldListItem indent = try $ do - string indent - char ':' - name <- many1 alphaNum - string ": " - skipSpaces - first <- manyTill anyChar newline - rest <- option "" $ try $ lookAhead (string indent >> oneOf " \t") >> - indentedBlock - return (name, intercalate " " (first:(lines rest))) - -fieldList :: GenParser Char ParserState Block -fieldList = try $ do - indent <- lookAhead $ many (oneOf " \t") - items <- many1 $ fieldListItem indent - blanklines - let authors = case lookup "Authors" items of - Just auth -> [auth] - Nothing -> map snd (filter (\(x,_) -> x == "Author") items) - if null authors - then return () - else updateState $ \st -> st {stateAuthors = authors} - case (lookup "Date" items) of - Just dat -> updateState $ \st -> st {stateDate = dat} - Nothing -> return () - case (lookup "Title" items) of - Just tit -> parseFromString (many inline) tit >>= - \t -> updateState $ \st -> st {stateTitle = t} - Nothing -> return () - let remaining = filter (\(x,_) -> (x /= "Authors") && (x /= "Author") && - (x /= "Date") && (x /= "Title")) items - if null remaining - then return Null - else do terms <- mapM (return . (:[]) . Str . fst) remaining - defs <- mapM (parseFromString (many block) . snd) - remaining - return $ DefinitionList $ zip terms defs - --- --- line block --- - -lineBlockLine :: GenParser Char ParserState [Inline] -lineBlockLine = try $ do - string "| " - white <- many (oneOf " \t") - line <- manyTill inline newline - return $ (if null white then [] else [Str white]) ++ line ++ [LineBreak] - -lineBlock :: GenParser Char ParserState Block -lineBlock = try $ do - lines' <- many1 lineBlockLine - blanklines - return $ Para (concat lines') - --- --- paragraph block --- - -para :: GenParser Char ParserState Block -para = paraBeforeCodeBlock <|> paraNormal "paragraph" - -codeBlockStart :: GenParser Char st Char -codeBlockStart = string "::" >> blankline >> blankline - --- paragraph that ends in a :: starting a code block -paraBeforeCodeBlock :: GenParser Char ParserState Block -paraBeforeCodeBlock = try $ do - result <- many1 (notFollowedBy' codeBlockStart >> inline) - lookAhead (string "::") - return $ Para $ if last result == Space - then normalizeSpaces result - else (normalizeSpaces result) ++ [Str ":"] - --- regular paragraph -paraNormal :: GenParser Char ParserState Block -paraNormal = try $ do - result <- many1 inline - newline - blanklines - return $ Para $ normalizeSpaces result - -plain :: GenParser Char ParserState Block -plain = many1 inline >>= return . Plain . normalizeSpaces - --- --- image block --- - -imageBlock :: GenParser Char st Block -imageBlock = try $ do - string ".. image:: " - src <- manyTill anyChar newline - fields <- option [] $ do indent <- lookAhead $ many (oneOf " /t") - many1 $ fieldListItem indent - optional blanklines - case lookup "alt" fields of - Just alt -> return $ Plain [Image [Str alt] (src, alt)] - Nothing -> return $ Plain [Image [Str "image"] (src, "")] --- --- header blocks --- - -header :: GenParser Char ParserState Block -header = doubleHeader <|> singleHeader "header" - --- a header with lines on top and bottom -doubleHeader :: GenParser Char ParserState Block -doubleHeader = try $ do - c <- oneOf underlineChars - rest <- many (char c) -- the top line - let lenTop = length (c:rest) - skipSpaces - newline - txt <- many1 (notFollowedBy blankline >> inline) - pos <- getPosition - let len = (sourceColumn pos) - 1 - if (len > lenTop) then fail "title longer than border" else return () - blankline -- spaces and newline - count lenTop (char c) -- the bottom line - blanklines - -- check to see if we've had this kind of header before. - -- if so, get appropriate level. if not, add to list. - state <- getState - let headerTable = stateHeaderTable state - let (headerTable',level) = case findIndex (== DoubleHeader c) headerTable of - Just ind -> (headerTable, ind + 1) - Nothing -> (headerTable ++ [DoubleHeader c], (length headerTable) + 1) - setState (state { stateHeaderTable = headerTable' }) - return $ Header level (normalizeSpaces txt) - --- a header with line on the bottom only -singleHeader :: GenParser Char ParserState Block -singleHeader = try $ do - notFollowedBy' whitespace - txt <- many1 (do {notFollowedBy blankline; inline}) - pos <- getPosition - let len = (sourceColumn pos) - 1 - blankline - c <- oneOf underlineChars - count (len - 1) (char c) - many (char c) - blanklines - state <- getState - let headerTable = stateHeaderTable state - let (headerTable',level) = case findIndex (== SingleHeader c) headerTable of - Just ind -> (headerTable, ind + 1) - Nothing -> (headerTable ++ [SingleHeader c], (length headerTable) + 1) - setState (state { stateHeaderTable = headerTable' }) - return $ Header level (normalizeSpaces txt) - --- --- hrule block --- - -hrule :: GenParser Char st Block -hrule = try $ do - chr <- oneOf underlineChars - count 3 (char chr) - skipMany (char chr) - blankline - blanklines - return HorizontalRule - --- --- code blocks --- - --- read a line indented by a given string -indentedLine :: String -> GenParser Char st [Char] -indentedLine indents = try $ do - string indents - result <- manyTill anyChar newline - return $ result ++ "\n" - --- two or more indented lines, possibly separated by blank lines. --- any amount of indentation will work. -indentedBlock :: GenParser Char st [Char] -indentedBlock = do - indents <- lookAhead $ many1 (oneOf " \t") - lns <- many $ choice $ [ indentedLine indents, - try $ do b <- blanklines - l <- indentedLine indents - return (b ++ l) ] - optional blanklines - return $ concat lns - -codeBlock :: GenParser Char st Block -codeBlock = try $ do - codeBlockStart - result <- indentedBlock - return $ CodeBlock ("",[],[]) $ stripTrailingNewlines result - -lhsCodeBlock :: GenParser Char ParserState Block -lhsCodeBlock = try $ do - failUnlessLHS - pos <- getPosition - when (sourceColumn pos /= 1) $ fail "Not in first column" - lns <- many1 birdTrackLine - -- if (as is normal) there is always a space after >, drop it - let lns' = if all (\ln -> null ln || take 1 ln == " ") lns - then map (drop 1) lns - else lns - blanklines - return $ CodeBlock ("", ["sourceCode", "haskell"], []) $ intercalate "\n" lns' - -birdTrackLine :: GenParser Char st [Char] -birdTrackLine = do - char '>' - manyTill anyChar newline - --- --- raw html --- - -rawHtmlBlock :: GenParser Char st Block -rawHtmlBlock = try $ string ".. raw:: html" >> blanklines >> - indentedBlock >>= return . RawHtml - --- --- raw latex --- - -rawLaTeXBlock :: GenParser Char st Block -rawLaTeXBlock = try $ do - string ".. raw:: latex" - blanklines - result <- indentedBlock - return $ Para [(TeX result)] - --- --- block quotes --- - -blockQuote :: GenParser Char ParserState Block -blockQuote = do - raw <- indentedBlock - -- parse the extracted block, which may contain various block elements: - contents <- parseFromString parseBlocks $ raw ++ "\n\n" - return $ BlockQuote contents - --- --- list blocks --- - -list :: GenParser Char ParserState Block -list = choice [ bulletList, orderedList, definitionList ] "list" - -definitionListItem :: GenParser Char ParserState ([Inline], [Block]) -definitionListItem = try $ do - -- avoid capturing a directive or comment - notFollowedBy (try $ char '.' >> char '.') - term <- many1Till inline endline - raw <- indentedBlock - -- parse the extracted block, which may contain various block elements: - contents <- parseFromString parseBlocks $ raw ++ "\n\n" - return (normalizeSpaces term, contents) - -definitionList :: GenParser Char ParserState Block -definitionList = many1 definitionListItem >>= return . DefinitionList - --- parses bullet list start and returns its length (inc. following whitespace) -bulletListStart :: GenParser Char st Int -bulletListStart = try $ do - notFollowedBy' hrule -- because hrules start out just like lists - marker <- oneOf bulletListMarkers - white <- many1 spaceChar - return $ length (marker:white) - --- parses ordered list start and returns its length (inc following whitespace) -orderedListStart :: ListNumberStyle - -> ListNumberDelim - -> GenParser Char st Int -orderedListStart style delim = try $ do - (_, markerLen) <- withHorizDisplacement (orderedListMarker style delim) - white <- many1 spaceChar - return $ markerLen + length white - --- parse a line of a list item -listLine :: Int -> GenParser Char ParserState [Char] -listLine markerLength = try $ do - notFollowedBy blankline - indentWith markerLength - line <- manyTill anyChar newline - return $ line ++ "\n" - --- indent by specified number of spaces (or equiv. tabs) -indentWith :: Int -> GenParser Char ParserState [Char] -indentWith num = do - state <- getState - let tabStop = stateTabStop state - if (num < tabStop) - then count num (char ' ') - else choice [ try (count num (char ' ')), - (try (char '\t' >> count (num - tabStop) (char ' '))) ] - --- parse raw text for one list item, excluding start marker and continuations -rawListItem :: GenParser Char ParserState Int - -> GenParser Char ParserState (Int, [Char]) -rawListItem start = try $ do - markerLength <- start - firstLine <- manyTill anyChar newline - restLines <- many (listLine markerLength) - return (markerLength, (firstLine ++ "\n" ++ (concat restLines))) - --- continuation of a list item - indented and separated by blankline or --- (in compact lists) endline. --- Note: nested lists are parsed as continuations. -listContinuation :: Int -> GenParser Char ParserState [Char] -listContinuation markerLength = try $ do - blanks <- many1 blankline - result <- many1 (listLine markerLength) - return $ blanks ++ concat result - -listItem :: GenParser Char ParserState Int - -> GenParser Char ParserState [Block] -listItem start = try $ do - (markerLength, first) <- rawListItem start - rest <- many (listContinuation markerLength) - blanks <- choice [ try (many blankline >>~ lookAhead start), - many1 blankline ] -- whole list must end with blank. - -- parsing with ListItemState forces markers at beginning of lines to - -- count as list item markers, even if not separated by blank space. - -- see definition of "endline" - state <- getState - let oldContext = stateParserContext state - setState $ state {stateParserContext = ListItemState} - -- parse the extracted block, which may itself contain block elements - parsed <- parseFromString parseBlocks $ concat (first:rest) ++ blanks - updateState (\st -> st {stateParserContext = oldContext}) - return parsed - -orderedList :: GenParser Char ParserState Block -orderedList = try $ do - (start, style, delim) <- lookAhead (anyOrderedListMarker >>~ spaceChar) - items <- many1 (listItem (orderedListStart style delim)) - let items' = compactify items - return $ OrderedList (start, style, delim) items' - -bulletList :: GenParser Char ParserState Block -bulletList = many1 (listItem bulletListStart) >>= - return . BulletList . compactify - --- --- unknown directive (e.g. comment) --- - -unknownDirective :: GenParser Char st Block -unknownDirective = try $ do - string ".." - notFollowedBy (noneOf " \t\n") - manyTill anyChar newline - many $ blanklines <|> (oneOf " \t" >> manyTill anyChar newline) - return Null - --- --- reference key --- - -quotedReferenceName :: GenParser Char ParserState [Inline] -quotedReferenceName = try $ do - char '`' >> notFollowedBy (char '`') -- `` means inline code! - label' <- many1Till inline (char '`') - return label' - -unquotedReferenceName :: GenParser Char ParserState [Inline] -unquotedReferenceName = try $ do - label' <- many1Till inline (lookAhead $ char ':') - return label' - -isolated :: Char -> GenParser Char st Char -isolated ch = try $ char ch >>~ notFollowedBy (char ch) - -simpleReferenceName :: GenParser Char st [Inline] -simpleReferenceName = do - raw <- many1 (alphaNum <|> isolated '-' <|> isolated '.' <|> - (try $ char '_' >>~ lookAhead alphaNum)) - return [Str raw] - -referenceName :: GenParser Char ParserState [Inline] -referenceName = quotedReferenceName <|> - (try $ simpleReferenceName >>~ lookAhead (char ':')) <|> - unquotedReferenceName - -referenceKey :: GenParser Char ParserState [Char] -referenceKey = do - startPos <- getPosition - key <- choice [imageKey, anonymousKey, regularKey] - st <- getState - let oldkeys = stateKeys st - updateState $ \s -> s { stateKeys = key : oldkeys } - optional blanklines - endPos <- getPosition - -- return enough blanks to replace key - return $ replicate (sourceLine endPos - sourceLine startPos) '\n' - -targetURI :: GenParser Char st [Char] -targetURI = do - skipSpaces - optional newline - contents <- many1 (try (many spaceChar >> newline >> - many1 spaceChar >> noneOf " \t\n") <|> noneOf "\n") - blanklines - return contents - -imageKey :: GenParser Char ParserState ([Inline], (String, [Char])) -imageKey = try $ do - string ".. |" - ref <- manyTill inline (char '|') - skipSpaces - string "image::" - src <- targetURI - return (normalizeSpaces ref, (removeLeadingTrailingSpace src, "")) - -anonymousKey :: GenParser Char st ([Inline], (String, [Char])) -anonymousKey = try $ do - oneOfStrings [".. __:", "__"] - src <- targetURI - return ([Str "_"], (removeLeadingTrailingSpace src, "")) - -regularKey :: GenParser Char ParserState ([Inline], (String, [Char])) -regularKey = try $ do - string ".. _" - ref <- referenceName - char ':' - src <- targetURI - return (normalizeSpaces ref, (removeLeadingTrailingSpace src, "")) - - -- - -- inline - -- - -inline :: GenParser Char ParserState Inline -inline = choice [ link - , str - , whitespace - , endline - , strong - , emph - , code - , image - , hyphens - , superscript - , subscript - , escapedChar - , symbol ] "inline" - -hyphens :: GenParser Char ParserState Inline -hyphens = do - result <- many1 (char '-') - option Space endline - -- don't want to treat endline after hyphen or dash as a space - return $ Str result - -escapedChar :: GenParser Char st Inline -escapedChar = escaped anyChar - -symbol :: GenParser Char ParserState Inline -symbol = do - result <- oneOf specialChars - return $ Str [result] - --- parses inline code, between codeStart and codeEnd -code :: GenParser Char ParserState Inline -code = try $ do - string "``" - result <- manyTill anyChar (try (string "``")) - return $ Code $ removeLeadingTrailingSpace $ intercalate " " $ lines result - -emph :: GenParser Char ParserState Inline -emph = enclosed (char '*') (char '*') inline >>= - return . Emph . normalizeSpaces - -strong :: GenParser Char ParserState Inline -strong = enclosed (string "**") (try $ string "**") inline >>= - return . Strong . normalizeSpaces - -interpreted :: [Char] -> GenParser Char st [Inline] -interpreted role = try $ do - optional $ try $ string "\\ " - result <- enclosed (string $ ":" ++ role ++ ":`") (char '`') anyChar - try (string "\\ ") <|> lookAhead (count 1 $ oneOf " \t\n") <|> (eof >> return "") - return [Str result] - -superscript :: GenParser Char ParserState Inline -superscript = interpreted "sup" >>= (return . Superscript) - -subscript :: GenParser Char ParserState Inline -subscript = interpreted "sub" >>= (return . Subscript) - -whitespace :: GenParser Char ParserState Inline -whitespace = many1 spaceChar >> return Space "whitespace" - -str :: GenParser Char ParserState Inline -str = many1 (noneOf (specialChars ++ "\t\n ")) >>= return . Str - --- an endline character that can be treated as a space, not a structural break -endline :: GenParser Char ParserState Inline -endline = try $ do - newline - notFollowedBy blankline - -- parse potential list-starts at beginning of line differently in a list: - st <- getState - if (stateParserContext st) == ListItemState - then notFollowedBy (anyOrderedListMarker >> spaceChar) >> - notFollowedBy' bulletListStart - else return () - return Space - --- --- links --- - -link :: GenParser Char ParserState Inline -link = choice [explicitLink, referenceLink, autoLink] "link" - -explicitLink :: GenParser Char ParserState Inline -explicitLink = try $ do - char '`' - notFollowedBy (char '`') -- `` marks start of inline code - label' <- manyTill (notFollowedBy (char '`') >> inline) - (try (spaces >> char '<')) - src <- manyTill (noneOf ">\n ") (char '>') - skipSpaces - string "`_" - return $ Link (normalizeSpaces label') (removeLeadingTrailingSpace src, "") - -referenceLink :: GenParser Char ParserState Inline -referenceLink = try $ do - label' <- (quotedReferenceName <|> simpleReferenceName) >>~ char '_' - key <- option label' (do{char '_'; return [Str "_"]}) -- anonymous link - state <- getState - let keyTable = stateKeys state - src <- case lookupKeySrc keyTable key of - Nothing -> fail "no corresponding key" - Just target -> return target - -- if anonymous link, remove first anon key so it won't be used again - let keyTable' = if (key == [Str "_"]) -- anonymous link? - then delete ([Str "_"], src) keyTable -- remove first anon key - else keyTable - setState $ state { stateKeys = keyTable' } - return $ Link (normalizeSpaces label') src - -autoURI :: GenParser Char ParserState Inline -autoURI = do - src <- uri - return $ Link [Str src] (src, "") - -autoEmail :: GenParser Char ParserState Inline -autoEmail = do - src <- emailAddress - return $ Link [Str src] ("mailto:" ++ src, "") - -autoLink :: GenParser Char ParserState Inline -autoLink = autoURI <|> autoEmail - --- For now, we assume that all substitution references are for images. -image :: GenParser Char ParserState Inline -image = try $ do - char '|' - ref <- manyTill inline (char '|') - state <- getState - let keyTable = stateKeys state - src <- case lookupKeySrc keyTable ref of - Nothing -> fail "no corresponding key" - Just target -> return target - return $ Image (normalizeSpaces ref) src - diff --git a/Text/Pandoc/Readers/TeXMath.hs b/Text/Pandoc/Readers/TeXMath.hs deleted file mode 100644 index 04b0f3b8f..000000000 --- a/Text/Pandoc/Readers/TeXMath.hs +++ /dev/null @@ -1,233 +0,0 @@ -{- -Copyright (C) 2007 John MacFarlane - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA --} - -{- | - Module : Text.Pandoc.Readers.TeXMath - Copyright : Copyright (C) 2007 John MacFarlane - License : GNU GPL, version 2 or above - - Maintainer : John MacFarlane - Stability : alpha - Portability : portable - -Conversion of TeX math to a list of 'Pandoc' inline elements. --} -module Text.Pandoc.Readers.TeXMath ( - readTeXMath - ) where - -import Text.ParserCombinators.Parsec -import Text.Pandoc.Definition - --- | Converts a string of raw TeX math to a list of 'Pandoc' inlines. -readTeXMath :: String -> [Inline] -readTeXMath inp = case parse teXMath ("formula: " ++ inp) inp of - Left _ -> [Str inp] -- if unparseable, just include original - Right res -> res - -teXMath :: GenParser Char st [Inline] -teXMath = manyTill mathPart eof >>= return . concat - -mathPart :: GenParser Char st [Inline] -mathPart = whitespace <|> superscript <|> subscript <|> symbol <|> - argument <|> digits <|> letters <|> misc - -whitespace :: GenParser Char st [Inline] -whitespace = many1 space >> return [] - -symbol :: GenParser Char st [Inline] -symbol = try $ do - char '\\' - res <- many1 letter - case lookup res teXsymbols of - Just m -> return [Str m] - Nothing -> return [Str $ "\\" ++ res] - -argument :: GenParser Char st [Inline] -argument = try $ do - char '{' - res <- many mathPart - char '}' - return $ if null res - then [Str " "] - else [Str "{"] ++ concat res ++ [Str "}"] - -digits :: GenParser Char st [Inline] -digits = do - res <- many1 digit - return [Str res] - -letters :: GenParser Char st [Inline] -letters = do - res <- many1 letter - return [Emph [Str res]] - -misc :: GenParser Char st [Inline] -misc = do - res <- noneOf "}" - return [Str [res]] - -scriptArg :: GenParser Char st [Inline] -scriptArg = try $ do - (try (do{char '{'; r <- many mathPart; char '}'; return $ concat r})) - <|> symbol - <|> (do{c <- (letter <|> digit); return [Str [c]]}) - -superscript :: GenParser Char st [Inline] -superscript = try $ do - char '^' - arg <- scriptArg - return [Superscript arg] - -subscript :: GenParser Char st [Inline] -subscript = try $ do - char '_' - arg <- scriptArg - return [Subscript arg] - -withThinSpace :: String -> String -withThinSpace str = "\x2009" ++ str ++ "\x2009" - -teXsymbols :: [(String, String)] -teXsymbols = - [("alpha","\x3B1") - ,("beta", "\x3B2") - ,("chi", "\x3C7") - ,("delta", "\x3B4") - ,("Delta", "\x394") - ,("epsilon", "\x3B5") - ,("varepsilon", "\x25B") - ,("eta", "\x3B7") - ,("gamma", "\x3B3") - ,("Gamma", "\x393") - ,("iota", "\x3B9") - ,("kappa", "\x3BA") - ,("lambda", "\x3BB") - ,("Lambda", "\x39B") - ,("mu", "\x3BC") - ,("nu", "\x3BD") - ,("omega", "\x3C9") - ,("Omega", "\x3A9") - ,("phi", "\x3C6") - ,("varphi", "\x3D5") - ,("Phi", "\x3A6") - ,("pi", "\x3C0") - ,("Pi", "\x3A0") - ,("psi", "\x3C8") - ,("Psi", "\x3A8") - ,("rho", "\x3C1") - ,("sigma", "\x3C3") - ,("Sigma", "\x3A3") - ,("tau", "\x3C4") - ,("theta", "\x3B8") - ,("vartheta", "\x3D1") - ,("Theta", "\x398") - ,("upsilon", "\x3C5") - ,("xi", "\x3BE") - ,("Xi", "\x39E") - ,("zeta", "\x3B6") - ,("ne", "\x2260") - ,("lt", withThinSpace "<") - ,("le", withThinSpace "\x2264") - ,("leq", withThinSpace "\x2264") - ,("ge", withThinSpace "\x2265") - ,("geq", withThinSpace "\x2265") - ,("prec", withThinSpace "\x227A") - ,("succ", withThinSpace "\x227B") - ,("preceq", withThinSpace "\x2AAF") - ,("succeq", withThinSpace "\x2AB0") - ,("in", withThinSpace "\x2208") - ,("notin", withThinSpace "\x2209") - ,("subset", withThinSpace "\x2282") - ,("supset", withThinSpace "\x2283") - ,("subseteq", withThinSpace "\x2286") - ,("supseteq", withThinSpace "\x2287") - ,("equiv", withThinSpace "\x2261") - ,("cong", withThinSpace "\x2245") - ,("approx", withThinSpace "\x2248") - ,("propto", withThinSpace "\x221D") - ,("cdot", withThinSpace "\x22C5") - ,("star", withThinSpace "\x22C6") - ,("backslash", "\\") - ,("times", withThinSpace "\x00D7") - ,("divide", withThinSpace "\x00F7") - ,("circ", withThinSpace "\x2218") - ,("oplus", withThinSpace "\x2295") - ,("otimes", withThinSpace "\x2297") - ,("odot", withThinSpace "\x2299") - ,("sum", "\x2211") - ,("prod", "\x220F") - ,("wedge", withThinSpace "\x2227") - ,("bigwedge", withThinSpace "\x22C0") - ,("vee", withThinSpace "\x2228") - ,("bigvee", withThinSpace "\x22C1") - ,("cap", withThinSpace "\x2229") - ,("bigcap", withThinSpace "\x22C2") - ,("cup", withThinSpace "\x222A") - ,("bigcup", withThinSpace "\x22C3") - ,("neg", "\x00AC") - ,("implies", withThinSpace "\x21D2") - ,("iff", withThinSpace "\x21D4") - ,("forall", "\x2200") - ,("exists", "\x2203") - ,("bot", "\x22A5") - ,("top", "\x22A4") - ,("vdash", "\x22A2") - ,("models", withThinSpace "\x22A8") - ,("uparrow", "\x2191") - ,("downarrow", "\x2193") - ,("rightarrow", withThinSpace "\x2192") - ,("to", withThinSpace "\x2192") - ,("rightarrowtail", "\x21A3") - ,("twoheadrightarrow", withThinSpace "\x21A0") - ,("twoheadrightarrowtail", withThinSpace "\x2916") - ,("mapsto", withThinSpace "\x21A6") - ,("leftarrow", withThinSpace "\x2190") - ,("leftrightarrow", withThinSpace "\x2194") - ,("Rightarrow", withThinSpace "\x21D2") - ,("Leftarrow", withThinSpace "\x21D0") - ,("Leftrightarrow", withThinSpace "\x21D4") - ,("partial", "\x2202") - ,("nabla", "\x2207") - ,("pm", "\x00B1") - ,("emptyset", "\x2205") - ,("infty", "\x221E") - ,("aleph", "\x2135") - ,("ldots", "...") - ,("therefore", "\x2234") - ,("angle", "\x2220") - ,("quad", "\x00A0\x00A0") - ,("cdots", "\x22EF") - ,("vdots", "\x22EE") - ,("ddots", "\x22F1") - ,("diamond", "\x22C4") - ,("Box", "\x25A1") - ,("lfloor", "\x230A") - ,("rfloor", "\x230B") - ,("lceiling", "\x2308") - ,("rceiling", "\x2309") - ,("langle", "\x2329") - ,("rangle", "\x232A") - ,("{", "{") - ,("}", "}") - ,("[", "[") - ,("]", "]") - ,("|", "|") - ,("||", "||") - ] - diff --git a/Text/Pandoc/Shared.hs b/Text/Pandoc/Shared.hs deleted file mode 100644 index 6854e5ae6..000000000 --- a/Text/Pandoc/Shared.hs +++ /dev/null @@ -1,953 +0,0 @@ -{-# LANGUAGE CPP #-} -{- -Copyright (C) 2006-8 John MacFarlane - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA --} - -{- | - Module : Text.Pandoc.Shared - Copyright : Copyright (C) 2006-8 John MacFarlane - License : GNU GPL, version 2 or above - - Maintainer : John MacFarlane - Stability : alpha - Portability : portable - -Utility functions and definitions used by the various Pandoc modules. --} -module Text.Pandoc.Shared ( - -- * List processing - splitBy, - splitByIndices, - substitute, - -- * Text processing - backslashEscapes, - escapeStringUsing, - stripTrailingNewlines, - removeLeadingTrailingSpace, - removeLeadingSpace, - removeTrailingSpace, - stripFirstAndLast, - camelCaseToHyphenated, - toRomanNumeral, - wrapped, - wrapIfNeeded, - wrappedTeX, - wrapTeXIfNeeded, - BlockWrapper (..), - wrappedBlocksToDoc, - -- * Parsing - (>>~), - anyLine, - many1Till, - notFollowedBy', - oneOfStrings, - spaceChar, - skipSpaces, - blankline, - blanklines, - enclosed, - stringAnyCase, - parseFromString, - lineClump, - charsInBalanced, - charsInBalanced', - romanNumeral, - emailAddress, - uri, - withHorizDisplacement, - nullBlock, - failIfStrict, - failUnlessLHS, - escaped, - anyOrderedListMarker, - orderedListMarker, - charRef, - readWith, - testStringWith, - ParserState (..), - defaultParserState, - HeaderType (..), - ParserContext (..), - QuoteContext (..), - NoteTable, - KeyTable, - lookupKeySrc, - refsMatch, - -- * Prettyprinting - hang', - prettyPandoc, - -- * Pandoc block and inline list processing - orderedListMarkers, - normalizeSpaces, - compactify, - Element (..), - hierarchicalize, - isHeaderBlock, - -- * Writer options - HTMLMathMethod (..), - ObfuscationMethod (..), - WriterOptions (..), - defaultWriterOptions, - -- * File handling - inDirectory - ) where - -import Text.Pandoc.Definition -import Text.ParserCombinators.Parsec -import Text.PrettyPrint.HughesPJ ( Doc, fsep, ($$), (<>), empty, isEmpty, text, nest ) -import qualified Text.PrettyPrint.HughesPJ as PP -import Text.Pandoc.CharacterReferences ( characterReference ) -import Data.Char ( toLower, toUpper, ord, isLower, isUpper ) -import Data.List ( find, isPrefixOf, intercalate ) -import Control.Monad ( join ) -import Network.URI ( parseURI, URI (..), isAllowedInURI ) -import System.Directory -import Prelude hiding ( putStrLn, writeFile, readFile, getContents ) -import System.IO.UTF8 - --- --- List processing --- - --- | Split list by groups of one or more sep. -splitBy :: (Eq a) => a -> [a] -> [[a]] -splitBy _ [] = [] -splitBy sep lst = - let (first, rest) = break (== sep) lst - rest' = dropWhile (== sep) rest - in first:(splitBy sep rest') - --- | Split list into chunks divided at specified indices. -splitByIndices :: [Int] -> [a] -> [[a]] -splitByIndices [] lst = [lst] -splitByIndices (x:xs) lst = - let (first, rest) = splitAt x lst in - first:(splitByIndices (map (\y -> y - x) xs) rest) - --- | Replace each occurrence of one sublist in a list with another. -substitute :: (Eq a) => [a] -> [a] -> [a] -> [a] -substitute _ _ [] = [] -substitute [] _ lst = lst -substitute target replacement lst = - if target `isPrefixOf` lst - then replacement ++ (substitute target replacement $ drop (length target) lst) - else (head lst):(substitute target replacement $ tail lst) - --- --- Text processing --- - --- | Returns an association list of backslash escapes for the --- designated characters. -backslashEscapes :: [Char] -- ^ list of special characters to escape - -> [(Char, String)] -backslashEscapes = map (\ch -> (ch, ['\\',ch])) - --- | Escape a string of characters, using an association list of --- characters and strings. -escapeStringUsing :: [(Char, String)] -> String -> String -escapeStringUsing _ [] = "" -escapeStringUsing escapeTable (x:xs) = - case (lookup x escapeTable) of - Just str -> str ++ rest - Nothing -> x:rest - where rest = escapeStringUsing escapeTable xs - --- | Strip trailing newlines from string. -stripTrailingNewlines :: String -> String -stripTrailingNewlines = reverse . dropWhile (== '\n') . reverse - --- | Remove leading and trailing space (including newlines) from string. -removeLeadingTrailingSpace :: String -> String -removeLeadingTrailingSpace = removeLeadingSpace . removeTrailingSpace - --- | Remove leading space (including newlines) from string. -removeLeadingSpace :: String -> String -removeLeadingSpace = dropWhile (`elem` " \n\t") - --- | Remove trailing space (including newlines) from string. -removeTrailingSpace :: String -> String -removeTrailingSpace = reverse . removeLeadingSpace . reverse - --- | Strip leading and trailing characters from string -stripFirstAndLast :: String -> String -stripFirstAndLast str = - drop 1 $ take ((length str) - 1) str - --- | Change CamelCase word to hyphenated lowercase (e.g., camel-case). -camelCaseToHyphenated :: String -> String -camelCaseToHyphenated [] = "" -camelCaseToHyphenated (a:b:rest) | isLower a && isUpper b = - a:'-':(toLower b):(camelCaseToHyphenated rest) -camelCaseToHyphenated (a:rest) = (toLower a):(camelCaseToHyphenated rest) - --- | Convert number < 4000 to uppercase roman numeral. -toRomanNumeral :: Int -> String -toRomanNumeral x = - if x >= 4000 || x < 0 - then "?" - else case x of - _ | x >= 1000 -> "M" ++ toRomanNumeral (x - 1000) - _ | x >= 900 -> "CM" ++ toRomanNumeral (x - 900) - _ | x >= 500 -> "D" ++ toRomanNumeral (x - 500) - _ | x >= 400 -> "CD" ++ toRomanNumeral (x - 400) - _ | x >= 100 -> "C" ++ toRomanNumeral (x - 100) - _ | x >= 90 -> "XC" ++ toRomanNumeral (x - 90) - _ | x >= 50 -> "L" ++ toRomanNumeral (x - 50) - _ | x >= 40 -> "XL" ++ toRomanNumeral (x - 40) - _ | x >= 10 -> "X" ++ toRomanNumeral (x - 10) - _ | x >= 9 -> "IX" ++ toRomanNumeral (x - 5) - _ | x >= 5 -> "V" ++ toRomanNumeral (x - 5) - _ | x >= 4 -> "IV" ++ toRomanNumeral (x - 4) - _ | x >= 1 -> "I" ++ toRomanNumeral (x - 1) - _ -> "" - --- | Wrap inlines to line length. -wrapped :: Monad m => ([Inline] -> m Doc) -> [Inline] -> m Doc -wrapped listWriter sect = (mapM listWriter $ splitBy Space sect) >>= - return . fsep - --- | Wrap inlines if the text wrap option is selected. -wrapIfNeeded :: Monad m => WriterOptions -> ([Inline] -> m Doc) -> - [Inline] -> m Doc -wrapIfNeeded opts = if writerWrapText opts - then wrapped - else ($) - --- auxiliary function for wrappedTeX -isNote :: Inline -> Bool -isNote (Note _) = True -isNote _ = False - --- | Wrap inlines to line length, treating footnotes in a way that --- makes sense in LaTeX and ConTeXt. -wrappedTeX :: Monad m - => Bool - -> ([Inline] -> m Doc) - -> [Inline] - -> m Doc -wrappedTeX includePercent listWriter sect = do - let (firstpart, rest) = break isNote sect - firstpartWrapped <- wrapped listWriter firstpart - if null rest - then return firstpartWrapped - else do let (note:rest') = rest - let (rest1, rest2) = break (== Space) rest' - -- rest1 is whatever comes between the note and a Space. - -- if the note is followed directly by a Space, rest1 is null. - -- rest1 is printed after the note but before the line break, - -- to avoid spurious blank space the note and immediately - -- following punctuation. - rest1Out <- if null rest1 - then return empty - else listWriter rest1 - rest2Wrapped <- if null rest2 - then return empty - else wrappedTeX includePercent listWriter (tail rest2) - noteText <- listWriter [note] - return $ (firstpartWrapped <> if includePercent then PP.char '%' else empty) $$ - (noteText <> rest1Out) $$ - rest2Wrapped - --- | Wrap inlines if the text wrap option is selected, specialized --- for LaTeX and ConTeXt. -wrapTeXIfNeeded :: Monad m - => WriterOptions - -> Bool - -> ([Inline] -> m Doc) - -> [Inline] - -> m Doc -wrapTeXIfNeeded opts includePercent = if writerWrapText opts - then wrappedTeX includePercent - else ($) - --- | Indicates whether block should be surrounded by blank lines (@Pad@) or not (@Reg@). -data BlockWrapper = Pad Doc | Reg Doc - --- | Converts a list of wrapped blocks to a Doc, with appropriate spaces around blocks. -wrappedBlocksToDoc :: [BlockWrapper] -> Doc -wrappedBlocksToDoc = foldr addBlock empty - where addBlock (Pad d) accum | isEmpty accum = d - addBlock (Pad d) accum = d $$ text "" $$ accum - addBlock (Reg d) accum = d $$ accum - --- --- Parsing --- - --- | Like >>, but returns the operation on the left. --- (Suggested by Tillmann Rendel on Haskell-cafe list.) -(>>~) :: (Monad m) => m a -> m b -> m a -a >>~ b = a >>= \x -> b >> return x - --- | Parse any line of text -anyLine :: GenParser Char st [Char] -anyLine = manyTill anyChar newline - --- | Like @manyTill@, but reads at least one item. -many1Till :: GenParser tok st a - -> GenParser tok st end - -> GenParser tok st [a] -many1Till p end = do - first <- p - rest <- manyTill p end - return (first:rest) - --- | A more general form of @notFollowedBy@. This one allows any --- type of parser to be specified, and succeeds only if that parser fails. --- It does not consume any input. -notFollowedBy' :: Show b => GenParser a st b -> GenParser a st () -notFollowedBy' p = try $ join $ do a <- try p - return (unexpected (show a)) - <|> - return (return ()) --- (This version due to Andrew Pimlott on the Haskell mailing list.) - --- | Parses one of a list of strings (tried in order). -oneOfStrings :: [String] -> GenParser Char st String -oneOfStrings listOfStrings = choice $ map (try . string) listOfStrings - --- | Parses a space or tab. -spaceChar :: CharParser st Char -spaceChar = char ' ' <|> char '\t' - --- | Skips zero or more spaces or tabs. -skipSpaces :: GenParser Char st () -skipSpaces = skipMany spaceChar - --- | Skips zero or more spaces or tabs, then reads a newline. -blankline :: GenParser Char st Char -blankline = try $ skipSpaces >> newline - --- | Parses one or more blank lines and returns a string of newlines. -blanklines :: GenParser Char st [Char] -blanklines = many1 blankline - --- | Parses material enclosed between start and end parsers. -enclosed :: GenParser Char st t -- ^ start parser - -> GenParser Char st end -- ^ end parser - -> GenParser Char st a -- ^ content parser (to be used repeatedly) - -> GenParser Char st [a] -enclosed start end parser = try $ - start >> notFollowedBy space >> many1Till parser end - --- | Parse string, case insensitive. -stringAnyCase :: [Char] -> CharParser st String -stringAnyCase [] = string "" -stringAnyCase (x:xs) = do - firstChar <- char (toUpper x) <|> char (toLower x) - rest <- stringAnyCase xs - return (firstChar:rest) - --- | Parse contents of 'str' using 'parser' and return result. -parseFromString :: GenParser tok st a -> [tok] -> GenParser tok st a -parseFromString parser str = do - oldPos <- getPosition - oldInput <- getInput - setInput str - result <- parser - setInput oldInput - setPosition oldPos - return result - --- | Parse raw line block up to and including blank lines. -lineClump :: GenParser Char st String -lineClump = blanklines - <|> (many1 (notFollowedBy blankline >> anyLine) >>= return . unlines) - --- | Parse a string of characters between an open character --- and a close character, including text between balanced --- pairs of open and close, which must be different. For example, --- @charsInBalanced '(' ')'@ will parse "(hello (there))" --- and return "hello (there)". Stop if a blank line is --- encountered. -charsInBalanced :: Char -> Char -> GenParser Char st String -charsInBalanced open close = try $ do - char open - raw <- many $ (many1 (noneOf [open, close, '\n'])) - <|> (do res <- charsInBalanced open close - return $ [open] ++ res ++ [close]) - <|> try (string "\n" >>~ notFollowedBy' blanklines) - char close - return $ concat raw - --- | Like @charsInBalanced@, but allow blank lines in the content. -charsInBalanced' :: Char -> Char -> GenParser Char st String -charsInBalanced' open close = try $ do - char open - raw <- many $ (many1 (noneOf [open, close])) - <|> (do res <- charsInBalanced' open close - return $ [open] ++ res ++ [close]) - char close - return $ concat raw - --- Auxiliary functions for romanNumeral: - -lowercaseRomanDigits :: [Char] -lowercaseRomanDigits = ['i','v','x','l','c','d','m'] - -uppercaseRomanDigits :: [Char] -uppercaseRomanDigits = map toUpper lowercaseRomanDigits - --- | Parses a roman numeral (uppercase or lowercase), returns number. -romanNumeral :: Bool -- ^ Uppercase if true - -> GenParser Char st Int -romanNumeral upperCase = do - let romanDigits = if upperCase - then uppercaseRomanDigits - else lowercaseRomanDigits - lookAhead $ oneOf romanDigits - let [one, five, ten, fifty, hundred, fivehundred, thousand] = - map char romanDigits - thousands <- many thousand >>= (return . (1000 *) . length) - ninehundreds <- option 0 $ try $ hundred >> thousand >> return 900 - fivehundreds <- many fivehundred >>= (return . (500 *) . length) - fourhundreds <- option 0 $ try $ hundred >> fivehundred >> return 400 - hundreds <- many hundred >>= (return . (100 *) . length) - nineties <- option 0 $ try $ ten >> hundred >> return 90 - fifties <- many fifty >>= (return . (50 *) . length) - forties <- option 0 $ try $ ten >> fifty >> return 40 - tens <- many ten >>= (return . (10 *) . length) - nines <- option 0 $ try $ one >> ten >> return 9 - fives <- many five >>= (return . (5 *) . length) - fours <- option 0 $ try $ one >> five >> return 4 - ones <- many one >>= (return . length) - let total = thousands + ninehundreds + fivehundreds + fourhundreds + - hundreds + nineties + fifties + forties + tens + nines + - fives + fours + ones - if total == 0 - then fail "not a roman numeral" - else return total - --- Parsers for email addresses and URIs - -emailChar :: GenParser Char st Char -emailChar = alphaNum <|> oneOf "-+_." - -domainChar :: GenParser Char st Char -domainChar = alphaNum <|> char '-' - -domain :: GenParser Char st [Char] -domain = do - first <- many1 domainChar - dom <- many1 $ try (char '.' >> many1 domainChar ) - return $ intercalate "." (first:dom) - --- | Parses an email address; returns string. -emailAddress :: GenParser Char st [Char] -emailAddress = try $ do - firstLetter <- alphaNum - restAddr <- many emailChar - let addr = firstLetter:restAddr - char '@' - dom <- domain - return $ addr ++ '@':dom - --- | Parses a URI. -uri :: GenParser Char st String -uri = try $ do - str <- many1 $ satisfy isAllowedInURI - case parseURI str of - Just uri' -> if uriScheme uri' `elem` [ "http:", "https:", "ftp:", - "file:", "mailto:", - "news:", "telnet:" ] - then return $ show uri' - else fail "not a URI" - Nothing -> fail "not a URI" - --- | Applies a parser, returns tuple of its results and its horizontal --- displacement (the difference between the source column at the end --- and the source column at the beginning). Vertical displacement --- (source row) is ignored. -withHorizDisplacement :: GenParser Char st a -- ^ Parser to apply - -> GenParser Char st (a, Int) -- ^ (result, displacement) -withHorizDisplacement parser = do - pos1 <- getPosition - result <- parser - pos2 <- getPosition - return (result, sourceColumn pos2 - sourceColumn pos1) - --- | Parses a character and returns 'Null' (so that the parser can move on --- if it gets stuck). -nullBlock :: GenParser Char st Block -nullBlock = anyChar >> return Null - --- | Fail if reader is in strict markdown syntax mode. -failIfStrict :: GenParser Char ParserState () -failIfStrict = do - state <- getState - if stateStrict state then fail "strict mode" else return () - --- | Fail unless we're in literate haskell mode. -failUnlessLHS :: GenParser tok ParserState () -failUnlessLHS = do - state <- getState - if stateLiterateHaskell state then return () else fail "Literate haskell feature" - --- | Parses backslash, then applies character parser. -escaped :: GenParser Char st Char -- ^ Parser for character to escape - -> GenParser Char st Inline -escaped parser = try $ do - char '\\' - result <- parser - return (Str [result]) - --- | Parses an uppercase roman numeral and returns (UpperRoman, number). -upperRoman :: GenParser Char st (ListNumberStyle, Int) -upperRoman = do - num <- romanNumeral True - return (UpperRoman, num) - --- | Parses a lowercase roman numeral and returns (LowerRoman, number). -lowerRoman :: GenParser Char st (ListNumberStyle, Int) -lowerRoman = do - num <- romanNumeral False - return (LowerRoman, num) - --- | Parses a decimal numeral and returns (Decimal, number). -decimal :: GenParser Char st (ListNumberStyle, Int) -decimal = do - num <- many1 digit - return (Decimal, read num) - --- | Parses a '#' returns (DefaultStyle, 1). -defaultNum :: GenParser Char st (ListNumberStyle, Int) -defaultNum = do - char '#' - return (DefaultStyle, 1) - --- | Parses a lowercase letter and returns (LowerAlpha, number). -lowerAlpha :: GenParser Char st (ListNumberStyle, Int) -lowerAlpha = do - ch <- oneOf ['a'..'z'] - return (LowerAlpha, ord ch - ord 'a' + 1) - --- | Parses an uppercase letter and returns (UpperAlpha, number). -upperAlpha :: GenParser Char st (ListNumberStyle, Int) -upperAlpha = do - ch <- oneOf ['A'..'Z'] - return (UpperAlpha, ord ch - ord 'A' + 1) - --- | Parses a roman numeral i or I -romanOne :: GenParser Char st (ListNumberStyle, Int) -romanOne = (char 'i' >> return (LowerRoman, 1)) <|> - (char 'I' >> return (UpperRoman, 1)) - --- | Parses an ordered list marker and returns list attributes. -anyOrderedListMarker :: GenParser Char st ListAttributes -anyOrderedListMarker = choice $ - [delimParser numParser | delimParser <- [inPeriod, inOneParen, inTwoParens], - numParser <- [decimal, defaultNum, romanOne, - lowerAlpha, lowerRoman, upperAlpha, upperRoman]] - --- | Parses a list number (num) followed by a period, returns list attributes. -inPeriod :: GenParser Char st (ListNumberStyle, Int) - -> GenParser Char st ListAttributes -inPeriod num = try $ do - (style, start) <- num - char '.' - let delim = if style == DefaultStyle - then DefaultDelim - else Period - return (start, style, delim) - --- | Parses a list number (num) followed by a paren, returns list attributes. -inOneParen :: GenParser Char st (ListNumberStyle, Int) - -> GenParser Char st ListAttributes -inOneParen num = try $ do - (style, start) <- num - char ')' - return (start, style, OneParen) - --- | Parses a list number (num) enclosed in parens, returns list attributes. -inTwoParens :: GenParser Char st (ListNumberStyle, Int) - -> GenParser Char st ListAttributes -inTwoParens num = try $ do - char '(' - (style, start) <- num - char ')' - return (start, style, TwoParens) - --- | Parses an ordered list marker with a given style and delimiter, --- returns number. -orderedListMarker :: ListNumberStyle - -> ListNumberDelim - -> GenParser Char st Int -orderedListMarker style delim = do - let num = case style of - DefaultStyle -> decimal <|> defaultNum - Decimal -> decimal - UpperRoman -> upperRoman - LowerRoman -> lowerRoman - UpperAlpha -> upperAlpha - LowerAlpha -> lowerAlpha - let context = case delim of - DefaultDelim -> inPeriod - Period -> inPeriod - OneParen -> inOneParen - TwoParens -> inTwoParens - (start, _, _) <- context num - return start - --- | Parses a character reference and returns a Str element. -charRef :: GenParser Char st Inline -charRef = do - c <- characterReference - return $ Str [c] - --- | Parse a string with a given parser and state. -readWith :: GenParser Char ParserState a -- ^ parser - -> ParserState -- ^ initial state - -> String -- ^ input string - -> a -readWith parser state input = - case runParser parser state "source" input of - Left err -> error $ "\nError:\n" ++ show err - Right result -> result - --- | Parse a string with @parser@ (for testing). -testStringWith :: (Show a) => GenParser Char ParserState a - -> String - -> IO () -testStringWith parser str = putStrLn $ show $ - readWith parser defaultParserState str - --- | Parsing options. -data ParserState = ParserState - { stateParseRaw :: Bool, -- ^ Parse raw HTML and LaTeX? - stateParserContext :: ParserContext, -- ^ Inside list? - stateQuoteContext :: QuoteContext, -- ^ Inside quoted environment? - stateSanitizeHTML :: Bool, -- ^ Sanitize HTML? - stateKeys :: KeyTable, -- ^ List of reference keys -#ifdef _CITEPROC - stateCitations :: [String], -- ^ List of available citations -#endif - stateNotes :: NoteTable, -- ^ List of notes - stateTabStop :: Int, -- ^ Tab stop - stateStandalone :: Bool, -- ^ Parse bibliographic info? - stateTitle :: [Inline], -- ^ Title of document - stateAuthors :: [String], -- ^ Authors of document - stateDate :: String, -- ^ Date of document - stateStrict :: Bool, -- ^ Use strict markdown syntax? - stateSmart :: Bool, -- ^ Use smart typography? - stateLiterateHaskell :: Bool, -- ^ Treat input as literate haskell - stateColumns :: Int, -- ^ Number of columns in terminal - stateHeaderTable :: [HeaderType] -- ^ Ordered list of header types used - } - deriving Show - -defaultParserState :: ParserState -defaultParserState = - ParserState { stateParseRaw = False, - stateParserContext = NullState, - stateQuoteContext = NoQuote, - stateSanitizeHTML = False, - stateKeys = [], -#ifdef _CITEPROC - stateCitations = [], -#endif - stateNotes = [], - stateTabStop = 4, - stateStandalone = False, - stateTitle = [], - stateAuthors = [], - stateDate = [], - stateStrict = False, - stateSmart = False, - stateLiterateHaskell = False, - stateColumns = 80, - stateHeaderTable = [] } - -data HeaderType - = SingleHeader Char -- ^ Single line of characters underneath - | DoubleHeader Char -- ^ Lines of characters above and below - deriving (Eq, Show) - -data ParserContext - = ListItemState -- ^ Used when running parser on list item contents - | NullState -- ^ Default state - deriving (Eq, Show) - -data QuoteContext - = InSingleQuote -- ^ Used when parsing inside single quotes - | InDoubleQuote -- ^ Used when parsing inside double quotes - | NoQuote -- ^ Used when not parsing inside quotes - deriving (Eq, Show) - -type NoteTable = [(String, [Block])] - -type KeyTable = [([Inline], Target)] - --- | Look up key in key table and return target object. -lookupKeySrc :: KeyTable -- ^ Key table - -> [Inline] -- ^ Key - -> Maybe Target -lookupKeySrc table key = case find (refsMatch key . fst) table of - Nothing -> Nothing - Just (_, src) -> Just src - --- | Returns @True@ if keys match (case insensitive). -refsMatch :: [Inline] -> [Inline] -> Bool -refsMatch ((Str x):restx) ((Str y):resty) = - ((map toLower x) == (map toLower y)) && refsMatch restx resty -refsMatch ((Emph x):restx) ((Emph y):resty) = - refsMatch x y && refsMatch restx resty -refsMatch ((Strong x):restx) ((Strong y):resty) = - refsMatch x y && refsMatch restx resty -refsMatch ((Strikeout x):restx) ((Strikeout y):resty) = - refsMatch x y && refsMatch restx resty -refsMatch ((Superscript x):restx) ((Superscript y):resty) = - refsMatch x y && refsMatch restx resty -refsMatch ((Subscript x):restx) ((Subscript y):resty) = - refsMatch x y && refsMatch restx resty -refsMatch ((SmallCaps x):restx) ((SmallCaps y):resty) = - refsMatch x y && refsMatch restx resty -refsMatch ((Quoted t x):restx) ((Quoted u y):resty) = - t == u && refsMatch x y && refsMatch restx resty -refsMatch ((Code x):restx) ((Code y):resty) = - ((map toLower x) == (map toLower y)) && refsMatch restx resty -refsMatch ((Math t x):restx) ((Math u y):resty) = - ((map toLower x) == (map toLower y)) && t == u && refsMatch restx resty -refsMatch ((TeX x):restx) ((TeX y):resty) = - ((map toLower x) == (map toLower y)) && refsMatch restx resty -refsMatch ((HtmlInline x):restx) ((HtmlInline y):resty) = - ((map toLower x) == (map toLower y)) && refsMatch restx resty -refsMatch (x:restx) (y:resty) = (x == y) && refsMatch restx resty -refsMatch [] x = null x -refsMatch x [] = null x - --- --- Prettyprinting --- - --- | A version of hang that works like the version in pretty-1.0.0.0 -hang' :: Doc -> Int -> Doc -> Doc -hang' d1 n d2 = d1 $$ (nest n d2) - --- | Indent string as a block. -indentBy :: Int -- ^ Number of spaces to indent the block - -> Int -- ^ Number of spaces (rel to block) to indent first line - -> String -- ^ Contents of block to indent - -> String -indentBy _ _ [] = "" -indentBy num first str = - let (firstLine:restLines) = lines str - firstLineIndent = num + first - in (replicate firstLineIndent ' ') ++ firstLine ++ "\n" ++ - (intercalate "\n" $ map ((replicate num ' ') ++ ) restLines) - --- | Prettyprint list of Pandoc blocks elements. -prettyBlockList :: Int -- ^ Number of spaces to indent list of blocks - -> [Block] -- ^ List of blocks - -> String -prettyBlockList indent [] = indentBy indent 0 "[]" -prettyBlockList indent blocks = indentBy indent (-2) $ "[ " ++ - (intercalate "\n, " (map prettyBlock blocks)) ++ " ]" - --- | Prettyprint Pandoc block element. -prettyBlock :: Block -> String -prettyBlock (BlockQuote blocks) = "BlockQuote\n " ++ - (prettyBlockList 2 blocks) -prettyBlock (OrderedList attribs blockLists) = - "OrderedList " ++ show attribs ++ "\n" ++ indentBy 2 0 ("[ " ++ - (intercalate ", " $ map (\blocks -> prettyBlockList 2 blocks) - blockLists)) ++ " ]" -prettyBlock (BulletList blockLists) = "BulletList\n" ++ - indentBy 2 0 ("[ " ++ (intercalate ", " - (map (\blocks -> prettyBlockList 2 blocks) blockLists))) ++ " ]" -prettyBlock (DefinitionList blockLists) = "DefinitionList\n" ++ - indentBy 2 0 ("[" ++ (intercalate ",\n" - (map (\(term, blocks) -> " (" ++ show term ++ ",\n" ++ - indentBy 1 2 (prettyBlockList 2 blocks) ++ " )") blockLists))) ++ " ]" -prettyBlock (Table caption aligns widths header rows) = - "Table " ++ show caption ++ " " ++ show aligns ++ " " ++ - show widths ++ "\n" ++ prettyRow header ++ " [\n" ++ - (intercalate ",\n" (map prettyRow rows)) ++ " ]" - where prettyRow cols = indentBy 2 0 ("[ " ++ (intercalate ", " - (map (\blocks -> prettyBlockList 2 blocks) - cols))) ++ " ]" -prettyBlock block = show block - --- | Prettyprint Pandoc document. -prettyPandoc :: Pandoc -> String -prettyPandoc (Pandoc meta blocks) = "Pandoc " ++ "(" ++ show meta ++ - ")\n" ++ (prettyBlockList 0 blocks) ++ "\n" - --- --- Pandoc block and inline list processing --- - --- | Generate infinite lazy list of markers for an ordered list, --- depending on list attributes. -orderedListMarkers :: (Int, ListNumberStyle, ListNumberDelim) -> [String] -orderedListMarkers (start, numstyle, numdelim) = - let singleton c = [c] - nums = case numstyle of - DefaultStyle -> map show [start..] - Decimal -> map show [start..] - UpperAlpha -> drop (start - 1) $ cycle $ - map singleton ['A'..'Z'] - LowerAlpha -> drop (start - 1) $ cycle $ - map singleton ['a'..'z'] - UpperRoman -> map toRomanNumeral [start..] - LowerRoman -> map (map toLower . toRomanNumeral) [start..] - inDelim str = case numdelim of - DefaultDelim -> str ++ "." - Period -> str ++ "." - OneParen -> str ++ ")" - TwoParens -> "(" ++ str ++ ")" - in map inDelim nums - --- | Normalize a list of inline elements: remove leading and trailing --- @Space@ elements, collapse double @Space@s into singles, and --- remove empty Str elements. -normalizeSpaces :: [Inline] -> [Inline] -normalizeSpaces [] = [] -normalizeSpaces list = - let removeDoubles [] = [] - removeDoubles (Space:Space:rest) = removeDoubles (Space:rest) - removeDoubles (Space:(Str ""):Space:rest) = removeDoubles (Space:rest) - removeDoubles ((Str ""):rest) = removeDoubles rest - removeDoubles (x:rest) = x:(removeDoubles rest) - removeLeading (Space:xs) = removeLeading xs - removeLeading x = x - removeTrailing [] = [] - removeTrailing lst = if (last lst == Space) - then init lst - else lst - in removeLeading $ removeTrailing $ removeDoubles list - --- | Change final list item from @Para@ to @Plain@ if the list should --- be compact. -compactify :: [[Block]] -- ^ List of list items (each a list of blocks) - -> [[Block]] -compactify [] = [] -compactify items = - let final = last items - others = init items - in case last final of - Para a -> if all endsWithPlain others && not (null final) - then others ++ [init final ++ [Plain a]] - else items - _ -> items - -endsWithPlain :: [Block] -> Bool -endsWithPlain [] = False -endsWithPlain blocks = - case last blocks of - Plain _ -> True - (BulletList (x:xs)) -> endsWithPlain $ last (x:xs) - (OrderedList _ (x:xs)) -> endsWithPlain $ last (x:xs) - (DefinitionList (x:xs)) -> endsWithPlain $ last $ map snd (x:xs) - _ -> False - --- | Data structure for defining hierarchical Pandoc documents -data Element = Blk Block - | Sec [Inline] [Element] deriving (Eq, Read, Show) - --- | Returns @True@ on Header block with at least the specified level -headerAtLeast :: Int -> Block -> Bool -headerAtLeast level (Header x _) = x <= level -headerAtLeast _ _ = False - --- | Convert list of Pandoc blocks into (hierarchical) list of Elements -hierarchicalize :: [Block] -> [Element] -hierarchicalize [] = [] -hierarchicalize (block:rest) = - case block of - (Header level title) -> - let (thisSection, rest') = break (headerAtLeast level) rest - in (Sec title (hierarchicalize thisSection)):(hierarchicalize rest') - x -> (Blk x):(hierarchicalize rest) - --- | True if block is a Header block. -isHeaderBlock :: Block -> Bool -isHeaderBlock (Header _ _) = True -isHeaderBlock _ = False - --- --- Writer options --- - -data HTMLMathMethod = PlainMath - | LaTeXMathML (Maybe String) -- url of LaTeXMathML.js - | JsMath (Maybe String) -- url of jsMath load script - | GladTeX - | MimeTeX String -- url of mimetex.cgi - deriving (Show, Read, Eq) - --- | Methods for obfuscating email addresses in HTML. -data ObfuscationMethod = NoObfuscation - | ReferenceObfuscation - | JavascriptObfuscation - deriving (Show, Read, Eq) - --- | Options for writers -data WriterOptions = WriterOptions - { writerStandalone :: Bool -- ^ Include header and footer - , writerHeader :: String -- ^ Header for the document - , writerTitlePrefix :: String -- ^ Prefix for HTML titles - , writerTabStop :: Int -- ^ Tabstop for conversion btw spaces and tabs - , writerTableOfContents :: Bool -- ^ Include table of contents - , writerS5 :: Bool -- ^ We're writing S5 - , writerHTMLMathMethod :: HTMLMathMethod -- ^ How to print math in HTML - , writerIgnoreNotes :: Bool -- ^ Ignore footnotes (used in making toc) - , writerIncremental :: Bool -- ^ Incremental S5 lists - , writerNumberSections :: Bool -- ^ Number sections in LaTeX - , writerIncludeBefore :: String -- ^ String to include before the body - , writerIncludeAfter :: String -- ^ String to include after the body - , writerStrictMarkdown :: Bool -- ^ Use strict markdown syntax - , writerReferenceLinks :: Bool -- ^ Use reference links in writing markdown, rst - , writerWrapText :: Bool -- ^ Wrap text to line length - , writerLiterateHaskell :: Bool -- ^ Write as literate haskell - , writerEmailObfuscation :: ObfuscationMethod -- ^ How to obfuscate emails - } deriving Show - --- | Default writer options. -defaultWriterOptions :: WriterOptions -defaultWriterOptions = - WriterOptions { writerStandalone = False - , writerHeader = "" - , writerTitlePrefix = "" - , writerTabStop = 4 - , writerTableOfContents = False - , writerS5 = False - , writerHTMLMathMethod = PlainMath - , writerIgnoreNotes = False - , writerIncremental = False - , writerNumberSections = False - , writerIncludeBefore = "" - , writerIncludeAfter = "" - , writerStrictMarkdown = False - , writerReferenceLinks = False - , writerWrapText = True - , writerLiterateHaskell = False - , writerEmailObfuscation = JavascriptObfuscation - } - --- --- File handling --- - --- | Perform an IO action in a directory, returning to starting directory. -inDirectory :: FilePath -> IO a -> IO a -inDirectory path action = do - oldDir <- getCurrentDirectory - setCurrentDirectory path - result <- action - setCurrentDirectory oldDir - return result diff --git a/Text/Pandoc/TH.hs b/Text/Pandoc/TH.hs deleted file mode 100644 index 0dc5a6719..000000000 --- a/Text/Pandoc/TH.hs +++ /dev/null @@ -1,65 +0,0 @@ -{-# OPTIONS_GHC -fno-warn-orphans #-} -{- -Copyright (C) 2008 John MacFarlane - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA --} - -{- | - Module : Text.Pandoc.TH - Copyright : Copyright (C) 2006-8 John MacFarlane - License : GNU GPL, version 2 or above - - Maintainer : John MacFarlane - Stability : alpha - Portability : portable - -Template haskell functions used by Pandoc modules. --} -module Text.Pandoc.TH ( - contentsOf, - binaryContentsOf, - makeZip - ) where - -import Language.Haskell.TH -import Language.Haskell.TH.Syntax (Lift (..)) -import qualified Data.ByteString as B -import Data.ByteString.Internal ( w2c ) -import Prelude hiding ( readFile ) -import System.IO.UTF8 -import Codec.Archive.Zip -import Text.Pandoc.Shared ( inDirectory ) - --- | Insert contents of text file into a template. -contentsOf :: FilePath -> ExpQ -contentsOf p = lift =<< (runIO $ readFile p) - --- | Insert contents of binary file into a template. --- Note that @Data.ByteString.readFile@ uses binary mode on windows. -binaryContentsOf :: FilePath -> ExpQ -binaryContentsOf p = lift =<< (runIO $ B.readFile p) - -instance Lift B.ByteString where - lift x = return (LitE (StringL $ map w2c $ B.unpack x)) - -instance Lift Archive where - lift x = return (LitE (StringL $ show x )) - --- | Construct zip file from files in a directory, and --- insert into a template. -makeZip :: FilePath -> ExpQ -makeZip path = lift =<< (runIO $ inDirectory path $ addFilesToArchive [OptRecursive] emptyArchive ["."]) - diff --git a/Text/Pandoc/Writers/ConTeXt.hs b/Text/Pandoc/Writers/ConTeXt.hs deleted file mode 100644 index 014751968..000000000 --- a/Text/Pandoc/Writers/ConTeXt.hs +++ /dev/null @@ -1,302 +0,0 @@ -{- -Copyright (C) 2007-8 John MacFarlane - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA --} - -{- | - Module : Text.Pandoc.Writers.ConTeXt - Copyright : Copyright (C) 2007-8 John MacFarlane - License : GNU GPL, version 2 or above - - Maintainer : John MacFarlane - Stability : alpha - Portability : portable - -Conversion of 'Pandoc' format into ConTeXt. --} -module Text.Pandoc.Writers.ConTeXt ( writeConTeXt ) where -import Text.Pandoc.Definition -import Text.Pandoc.Shared -import Text.Printf ( printf ) -import Data.List ( isSuffixOf, intercalate ) -import Control.Monad.State -import Text.PrettyPrint.HughesPJ hiding ( Str ) - -data WriterState = - WriterState { stNextRef :: Int -- number of next URL reference - , stOrderedListLevel :: Int -- level of ordered list - , stOptions :: WriterOptions -- writer options - } - -orderedListStyles :: [[Char]] -orderedListStyles = cycle ["[n]","[a]", "[r]", "[g]"] - --- | Convert Pandoc to ConTeXt. -writeConTeXt :: WriterOptions -> Pandoc -> String -writeConTeXt options document = - let defaultWriterState = WriterState { stNextRef = 1 - , stOrderedListLevel = 0 - , stOptions = options - } - in render $ - evalState (pandocToConTeXt options document) defaultWriterState - -pandocToConTeXt :: WriterOptions -> Pandoc -> State WriterState Doc -pandocToConTeXt options (Pandoc meta blocks) = do - main <- blockListToConTeXt blocks - let before = if null (writerIncludeBefore options) - then empty - else text $ writerIncludeBefore options - let after = if null (writerIncludeAfter options) - then empty - else text $ writerIncludeAfter options - let body = before $$ main $$ after - head' <- if writerStandalone options - then contextHeader options meta - else return empty - let toc = if writerTableOfContents options - then text "\\placecontent\n" - else empty - let foot = if writerStandalone options - then text "\\stoptext\n" - else empty - return $ head' $$ toc $$ body $$ foot - --- | Insert bibliographic information into ConTeXt header. -contextHeader :: WriterOptions -- ^ Options, including ConTeXt header - -> Meta -- ^ Meta with bibliographic information - -> State WriterState Doc -contextHeader options (Meta title authors date) = do - titletext <- if null title - then return empty - else inlineListToConTeXt title - let authorstext = if null authors - then "" - else if length authors == 1 - then stringToConTeXt $ head authors - else stringToConTeXt $ (intercalate ", " $ - init authors) ++ " & " ++ last authors - let datetext = if date == "" - then "" - else stringToConTeXt date - let titleblock = text "\\doctitle{" <> titletext <> char '}' $$ - text ("\\author{" ++ authorstext ++ "}") $$ - text ("\\date{" ++ datetext ++ "}") - let header = text $ writerHeader options - return $ header $$ titleblock $$ text "\\starttext\n\\maketitle\n" - --- escape things as needed for ConTeXt - -escapeCharForConTeXt :: Char -> String -escapeCharForConTeXt ch = - case ch of - '{' -> "\\letteropenbrace{}" - '}' -> "\\letterclosebrace{}" - '\\' -> "\\letterbackslash{}" - '$' -> "\\$" - '|' -> "\\letterbar{}" - '^' -> "\\letterhat{}" - '%' -> "\\%" - '~' -> "\\lettertilde{}" - '&' -> "\\&" - '#' -> "\\#" - '<' -> "\\letterless{}" - '>' -> "\\lettermore{}" - '_' -> "\\letterunderscore{}" - '\160' -> "~" - x -> [x] - --- | Escape string for ConTeXt -stringToConTeXt :: String -> String -stringToConTeXt = concatMap escapeCharForConTeXt - --- | Convert Pandoc block element to ConTeXt. -blockToConTeXt :: Block - -> State WriterState BlockWrapper -blockToConTeXt Null = return $ Reg empty -blockToConTeXt (Plain lst) = do - st <- get - let options = stOptions st - contents <- wrapTeXIfNeeded options False inlineListToConTeXt lst - return $ Reg contents -blockToConTeXt (Para lst) = do - st <- get - let options = stOptions st - contents <- wrapTeXIfNeeded options False inlineListToConTeXt lst - return $ Pad contents -blockToConTeXt (BlockQuote lst) = do - contents <- blockListToConTeXt lst - return $ Pad $ text "\\startblockquote" $$ contents $$ text "\\stopblockquote" -blockToConTeXt (CodeBlock _ str) = - return $ Reg $ text $ "\\starttyping\n" ++ str ++ "\n\\stoptyping\n" - -- \n because \stoptyping can't have anything after it, inc. } -blockToConTeXt (RawHtml _) = return $ Reg empty -blockToConTeXt (BulletList lst) = do - contents <- mapM listItemToConTeXt lst - return $ Pad $ text "\\startitemize" $$ vcat contents $$ text "\\stopitemize" -blockToConTeXt (OrderedList (start, style', delim) lst) = do - st <- get - let level = stOrderedListLevel st - put $ st {stOrderedListLevel = level + 1} - contents <- mapM listItemToConTeXt lst - put $ st {stOrderedListLevel = level} - let start' = if start == 1 then "" else "start=" ++ show start - let delim' = case delim of - DefaultDelim -> "" - Period -> "stopper=." - OneParen -> "stopper=)" - TwoParens -> "left=(,stopper=)" - let width = maximum $ map length $ take (length contents) - (orderedListMarkers (start, style', delim)) - let width' = (toEnum width + 1) / 2 - let width'' = if width' > (1.5 :: Double) - then "width=" ++ show width' ++ "em" - else "" - let specs2Items = filter (not . null) [start', delim', width''] - let specs2 = if null specs2Items - then "" - else "[" ++ intercalate "," specs2Items ++ "]" - let style'' = case style' of - DefaultStyle -> orderedListStyles !! level - Decimal -> "[n]" - LowerRoman -> "[r]" - UpperRoman -> "[R]" - LowerAlpha -> "[a]" - UpperAlpha -> "[A]" - let specs = style'' ++ specs2 - return $ Pad $ text ("\\startitemize" ++ specs) $$ vcat contents $$ - text "\\stopitemize" -blockToConTeXt (DefinitionList lst) = - mapM defListItemToConTeXt lst >>= return . Pad . wrappedBlocksToDoc -blockToConTeXt HorizontalRule = return $ Pad $ text "\\thinrule" -blockToConTeXt (Header level lst) = do - contents <- inlineListToConTeXt lst - st <- get - let opts = stOptions st - let base = if writerNumberSections opts then "section" else "subject" - return $ Pad $ if level >= 1 && level <= 5 - then char '\\' <> text (concat (replicate (level - 1) "sub")) <> - text base <> char '{' <> contents <> char '}' - else contents -blockToConTeXt (Table caption aligns widths heads rows) = do - let colWidths = map printDecimal widths - let colDescriptor colWidth alignment = (case alignment of - AlignLeft -> 'l' - AlignRight -> 'r' - AlignCenter -> 'c' - AlignDefault -> 'l'): - "p(" ++ colWidth ++ "\\textwidth)|" - let colDescriptors = "|" ++ (concat $ - zipWith colDescriptor colWidths aligns) - headers <- tableRowToConTeXt heads - captionText <- inlineListToConTeXt caption - let captionText' = if null caption then text "none" else captionText - rows' <- mapM tableRowToConTeXt rows - return $ Pad $ text "\\placetable[here]{" <> captionText' <> char '}' $$ - text "\\starttable[" <> text colDescriptors <> char ']' $$ - text "\\HL" $$ headers $$ text "\\HL" $$ - vcat rows' $$ text "\\HL\n\\stoptable" - -printDecimal :: Double -> String -printDecimal = printf "%.2f" - -tableRowToConTeXt :: [[Block]] -> State WriterState Doc -tableRowToConTeXt cols = do - cols' <- mapM blockListToConTeXt cols - return $ (vcat (map (text "\\NC " <>) cols')) $$ - text "\\NC\\AR" - -listItemToConTeXt :: [Block] -> State WriterState Doc -listItemToConTeXt list = blockListToConTeXt list >>= - return . (text "\\item" $$) . (nest 2) - -defListItemToConTeXt :: ([Inline], [Block]) -> State WriterState BlockWrapper -defListItemToConTeXt (term, def) = do - term' <- inlineListToConTeXt term - def' <- blockListToConTeXt def - return $ Pad $ text "\\startdescr{" <> term' <> char '}' $$ def' $$ text "\\stopdescr" - --- | Convert list of block elements to ConTeXt. -blockListToConTeXt :: [Block] -> State WriterState Doc -blockListToConTeXt lst = mapM blockToConTeXt lst >>= return . wrappedBlocksToDoc - --- | Convert list of inline elements to ConTeXt. -inlineListToConTeXt :: [Inline] -- ^ Inlines to convert - -> State WriterState Doc -inlineListToConTeXt lst = mapM inlineToConTeXt lst >>= return . hcat - --- | Convert inline element to ConTeXt -inlineToConTeXt :: Inline -- ^ Inline to convert - -> State WriterState Doc -inlineToConTeXt (Emph lst) = do - contents <- inlineListToConTeXt lst - return $ text "{\\em " <> contents <> char '}' -inlineToConTeXt (Strong lst) = do - contents <- inlineListToConTeXt lst - return $ text "{\\bf " <> contents <> char '}' -inlineToConTeXt (Strikeout lst) = do - contents <- inlineListToConTeXt lst - return $ text "\\overstrikes{" <> contents <> char '}' -inlineToConTeXt (Superscript lst) = do - contents <- inlineListToConTeXt lst - return $ text "\\high{" <> contents <> char '}' -inlineToConTeXt (Subscript lst) = do - contents <- inlineListToConTeXt lst - return $ text "\\low{" <> contents <> char '}' -inlineToConTeXt (SmallCaps lst) = do - contents <- inlineListToConTeXt lst - return $ text "{\\sc " <> contents <> char '}' -inlineToConTeXt (Code str) = return $ text $ "\\type{" ++ str ++ "}" -inlineToConTeXt (Quoted SingleQuote lst) = do - contents <- inlineListToConTeXt lst - return $ text "\\quote{" <> contents <> char '}' -inlineToConTeXt (Quoted DoubleQuote lst) = do - contents <- inlineListToConTeXt lst - return $ text "\\quotation{" <> contents <> char '}' -inlineToConTeXt (Cite _ lst) = inlineListToConTeXt lst -inlineToConTeXt Apostrophe = return $ char '\'' -inlineToConTeXt EmDash = return $ text "---" -inlineToConTeXt EnDash = return $ text "--" -inlineToConTeXt Ellipses = return $ text "\\ldots{}" -inlineToConTeXt (Str str) = return $ text $ stringToConTeXt str -inlineToConTeXt (Math InlineMath str) = return $ char '$' <> text str <> char '$' -inlineToConTeXt (Math DisplayMath str) = return $ text "\\startformula " <> text str <> text " \\stopformula" -inlineToConTeXt (TeX str) = return $ text str -inlineToConTeXt (HtmlInline _) = return empty -inlineToConTeXt (LineBreak) = return $ text "\\crlf\n" -inlineToConTeXt Space = return $ char ' ' -inlineToConTeXt (Link [Code str] (src, tit)) = -- since ConTeXt has its own - inlineToConTeXt (Link [Str str] (src, tit)) -- way of printing links... -inlineToConTeXt (Link txt (src, _)) = do - st <- get - let next = stNextRef st - put $ st {stNextRef = next + 1} - let ref = show next - label <- inlineListToConTeXt txt - return $ text "\\useURL[" <> text ref <> text "][" <> text src <> - text "][][" <> label <> text "]\\from[" <> text ref <> char ']' -inlineToConTeXt (Image alternate (src, tit)) = do - alt <- inlineListToConTeXt alternate - return $ text "\\placefigure\n[]\n[fig:" <> alt <> text "]\n{" <> - text tit <> text "}\n{\\externalfigure[" <> text src <> text "]}" -inlineToConTeXt (Note contents) = do - contents' <- blockListToConTeXt contents - let rawnote = stripTrailingNewlines $ render contents' - -- note: a \n before } is needed when note ends with a \stoptyping - let optNewline = "\\stoptyping" `isSuffixOf` rawnote - return $ text "\\footnote{" <> - text rawnote <> (if optNewline then char '\n' else empty) <> char '}' - diff --git a/Text/Pandoc/Writers/Docbook.hs b/Text/Pandoc/Writers/Docbook.hs deleted file mode 100644 index 3e535a87e..000000000 --- a/Text/Pandoc/Writers/Docbook.hs +++ /dev/null @@ -1,262 +0,0 @@ -{- -Copyright (C) 2006-7 John MacFarlane - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA --} - -{- | - Module : Text.Pandoc.Writers.Docbook - Copyright : Copyright (C) 2006-7 John MacFarlane - License : GNU GPL, version 2 or above - - Maintainer : John MacFarlane - Stability : alpha - Portability : portable - -Conversion of 'Pandoc' documents to Docbook XML. --} -module Text.Pandoc.Writers.Docbook ( writeDocbook) where -import Text.Pandoc.Definition -import Text.Pandoc.XML -import Text.Pandoc.Shared -import Text.Pandoc.Readers.TeXMath -import Data.List ( isPrefixOf, drop, intercalate ) -import Text.PrettyPrint.HughesPJ hiding ( Str ) - --- | Convert list of authors to a docbook section -authorToDocbook :: [Char] -> Doc -authorToDocbook name = inTagsIndented "author" $ - if ',' `elem` name - then -- last name first - let (lastname, rest) = break (==',') name - firstname = removeLeadingSpace rest in - inTagsSimple "firstname" (text $ escapeStringForXML firstname) <> - inTagsSimple "surname" (text $ escapeStringForXML lastname) - else -- last name last - let namewords = words name - lengthname = length namewords - (firstname, lastname) = case lengthname of - 0 -> ("","") - 1 -> ("", name) - n -> (intercalate " " (take (n-1) namewords), last namewords) - in inTagsSimple "firstname" (text $ escapeStringForXML firstname) $$ - inTagsSimple "surname" (text $ escapeStringForXML lastname) - --- | Convert Pandoc document to string in Docbook format. -writeDocbook :: WriterOptions -> Pandoc -> String -writeDocbook opts (Pandoc (Meta title authors date) blocks) = - let head' = if writerStandalone opts - then text (writerHeader opts) - else empty - meta = if writerStandalone opts - then inTagsIndented "articleinfo" $ - (inTagsSimple "title" (wrap opts title)) $$ - (vcat (map authorToDocbook authors)) $$ - (inTagsSimple "date" (text $ escapeStringForXML date)) - else empty - elements = hierarchicalize blocks - before = writerIncludeBefore opts - after = writerIncludeAfter opts - body = (if null before then empty else text before) $$ - vcat (map (elementToDocbook opts) elements) $$ - (if null after then empty else text after) - body' = if writerStandalone opts - then inTagsIndented "article" (meta $$ body) - else body - in render $ head' $$ body' $$ text "" - --- | Convert an Element to Docbook. -elementToDocbook :: WriterOptions -> Element -> Doc -elementToDocbook opts (Blk block) = blockToDocbook opts block -elementToDocbook opts (Sec title elements) = - -- Docbook doesn't allow sections with no content, so insert some if needed - let elements' = if null elements - then [Blk (Para [])] - else elements - in inTagsIndented "section" $ - inTagsSimple "title" (wrap opts title) $$ - vcat (map (elementToDocbook opts) elements') - --- | Convert a list of Pandoc blocks to Docbook. -blocksToDocbook :: WriterOptions -> [Block] -> Doc -blocksToDocbook opts = vcat . map (blockToDocbook opts) - --- | Auxiliary function to convert Plain block to Para. -plainToPara :: Block -> Block -plainToPara (Plain x) = Para x -plainToPara x = x - --- | Convert a list of pairs of terms and definitions into a list of --- Docbook varlistentrys. -deflistItemsToDocbook :: WriterOptions -> [([Inline],[Block])] -> Doc -deflistItemsToDocbook opts items = - vcat $ map (\(term, def) -> deflistItemToDocbook opts term def) items - --- | Convert a term and a list of blocks into a Docbook varlistentry. -deflistItemToDocbook :: WriterOptions -> [Inline] -> [Block] -> Doc -deflistItemToDocbook opts term def = - let def' = map plainToPara def - in inTagsIndented "varlistentry" $ - inTagsIndented "term" (inlinesToDocbook opts term) $$ - inTagsIndented "listitem" (blocksToDocbook opts def') - --- | Convert a list of lists of blocks to a list of Docbook list items. -listItemsToDocbook :: WriterOptions -> [[Block]] -> Doc -listItemsToDocbook opts items = vcat $ map (listItemToDocbook opts) items - --- | Convert a list of blocks into a Docbook list item. -listItemToDocbook :: WriterOptions -> [Block] -> Doc -listItemToDocbook opts item = - inTagsIndented "listitem" $ blocksToDocbook opts $ map plainToPara item - --- | Convert a Pandoc block element to Docbook. -blockToDocbook :: WriterOptions -> Block -> Doc -blockToDocbook _ Null = empty -blockToDocbook _ (Header _ _) = empty -- should not occur after hierarchicalize -blockToDocbook opts (Plain lst) = wrap opts lst -blockToDocbook opts (Para lst) = inTagsIndented "para" $ wrap opts lst -blockToDocbook opts (BlockQuote blocks) = - inTagsIndented "blockquote" $ blocksToDocbook opts blocks -blockToDocbook _ (CodeBlock _ str) = - text "\n" <> text (escapeStringForXML str) <> text "\n" -blockToDocbook opts (BulletList lst) = - inTagsIndented "itemizedlist" $ listItemsToDocbook opts lst -blockToDocbook _ (OrderedList _ []) = empty -blockToDocbook opts (OrderedList (start, numstyle, _) (first:rest)) = - let attribs = case numstyle of - DefaultStyle -> [] - Decimal -> [("numeration", "arabic")] - UpperAlpha -> [("numeration", "upperalpha")] - LowerAlpha -> [("numeration", "loweralpha")] - UpperRoman -> [("numeration", "upperroman")] - LowerRoman -> [("numeration", "lowerroman")] - items = if start == 1 - then listItemsToDocbook opts (first:rest) - else (inTags True "listitem" [("override",show start)] - (blocksToDocbook opts $ map plainToPara first)) $$ - listItemsToDocbook opts rest - in inTags True "orderedlist" attribs items -blockToDocbook opts (DefinitionList lst) = - inTagsIndented "variablelist" $ deflistItemsToDocbook opts lst -blockToDocbook _ (RawHtml str) = text str -- raw XML block -blockToDocbook _ HorizontalRule = empty -- not semantic -blockToDocbook opts (Table caption aligns widths headers rows) = - let alignStrings = map alignmentToString aligns - captionDoc = if null caption - then empty - else inTagsIndented "caption" - (inlinesToDocbook opts caption) - tableType = if isEmpty captionDoc then "informaltable" else "table" - in inTagsIndented tableType $ captionDoc $$ - (colHeadsToDocbook opts alignStrings widths headers) $$ - (vcat $ map (tableRowToDocbook opts alignStrings) rows) - -colHeadsToDocbook :: WriterOptions - -> [[Char]] - -> [Double] - -> [[Block]] - -> Doc -colHeadsToDocbook opts alignStrings widths headers = - let heads = zipWith3 (\align width item -> - tableItemToDocbook opts "th" align width item) - alignStrings widths headers - in inTagsIndented "tr" $ vcat heads - -alignmentToString :: Alignment -> [Char] -alignmentToString alignment = case alignment of - AlignLeft -> "left" - AlignRight -> "right" - AlignCenter -> "center" - AlignDefault -> "left" - -tableRowToDocbook :: WriterOptions -> [[Char]] -> [[Block]] -> Doc -tableRowToDocbook opts aligns cols = inTagsIndented "tr" $ - vcat $ zipWith3 (tableItemToDocbook opts "td") aligns (repeat 0) cols - -tableItemToDocbook :: WriterOptions - -> [Char] - -> [Char] - -> Double - -> [Block] - -> Doc -tableItemToDocbook opts tag align width item = - let attrib = [("align", align)] ++ - if width /= 0 - then [("style", "{width: " ++ - show (truncate (100*width) :: Integer) ++ "%;}")] - else [] - in inTags True tag attrib $ vcat $ map (blockToDocbook opts) item - --- | Take list of inline elements and return wrapped doc. -wrap :: WriterOptions -> [Inline] -> Doc -wrap opts lst = if writerWrapText opts - then fsep $ map (inlinesToDocbook opts) (splitBy Space lst) - else inlinesToDocbook opts lst - --- | Convert a list of inline elements to Docbook. -inlinesToDocbook :: WriterOptions -> [Inline] -> Doc -inlinesToDocbook opts lst = hcat $ map (inlineToDocbook opts) lst - --- | Convert an inline element to Docbook. -inlineToDocbook :: WriterOptions -> Inline -> Doc -inlineToDocbook _ (Str str) = text $ escapeStringForXML str -inlineToDocbook opts (Emph lst) = - inTagsSimple "emphasis" $ inlinesToDocbook opts lst -inlineToDocbook opts (Strong lst) = - inTags False "emphasis" [("role", "strong")] $ inlinesToDocbook opts lst -inlineToDocbook opts (Strikeout lst) = - inTags False "emphasis" [("role", "strikethrough")] $ - inlinesToDocbook opts lst -inlineToDocbook opts (Superscript lst) = - inTagsSimple "superscript" $ inlinesToDocbook opts lst -inlineToDocbook opts (Subscript lst) = - inTagsSimple "subscript" $ inlinesToDocbook opts lst -inlineToDocbook opts (SmallCaps lst) = - inTags False "emphasis" [("role", "smallcaps")] $ - inlinesToDocbook opts lst -inlineToDocbook opts (Quoted _ lst) = - inTagsSimple "quote" $ inlinesToDocbook opts lst -inlineToDocbook opts (Cite _ lst) = - inlinesToDocbook opts lst -inlineToDocbook _ Apostrophe = char '\'' -inlineToDocbook _ Ellipses = text "…" -inlineToDocbook _ EmDash = text "—" -inlineToDocbook _ EnDash = text "–" -inlineToDocbook _ (Code str) = - inTagsSimple "literal" $ text (escapeStringForXML str) -inlineToDocbook opts (Math _ str) = inlinesToDocbook opts $ readTeXMath str -inlineToDocbook _ (TeX _) = empty -inlineToDocbook _ (HtmlInline _) = empty -inlineToDocbook _ LineBreak = text $ "" -inlineToDocbook _ Space = char ' ' -inlineToDocbook opts (Link txt (src, _)) = - if isPrefixOf "mailto:" src - then let src' = drop 7 src - emailLink = inTagsSimple "email" $ text $ - escapeStringForXML $ src' - in if txt == [Code src'] - then emailLink - else inlinesToDocbook opts txt <+> char '(' <> emailLink <> - char ')' - else inTags False "ulink" [("url", src)] $ inlinesToDocbook opts txt -inlineToDocbook _ (Image _ (src, tit)) = - let titleDoc = if null tit - then empty - else inTagsIndented "objectinfo" $ - inTagsIndented "title" (text $ escapeStringForXML tit) - in inTagsIndented "inlinemediaobject" $ inTagsIndented "imageobject" $ - titleDoc $$ selfClosingTag "imagedata" [("fileref", src)] -inlineToDocbook opts (Note contents) = - inTagsIndented "footnote" $ blocksToDocbook opts contents diff --git a/Text/Pandoc/Writers/HTML.hs b/Text/Pandoc/Writers/HTML.hs deleted file mode 100644 index fb7320e92..000000000 --- a/Text/Pandoc/Writers/HTML.hs +++ /dev/null @@ -1,557 +0,0 @@ -{-# OPTIONS_GHC -fno-warn-deprecations #-} -{- -Copyright (C) 2006-8 John MacFarlane - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA --} - -{- | - Module : Text.Pandoc.Writers.HTML - Copyright : Copyright (C) 2006-8 John MacFarlane - License : GNU GPL, version 2 or above - - Maintainer : John MacFarlane - Stability : alpha - Portability : portable - -Conversion of 'Pandoc' documents to HTML. --} -module Text.Pandoc.Writers.HTML ( writeHtml , writeHtmlString ) where -import Text.Pandoc.Definition -import Text.Pandoc.LaTeXMathML -import Text.Pandoc.CharacterReferences ( decodeCharacterReferences ) -import Text.Pandoc.Shared -import Text.Pandoc.Readers.TeXMath -import Text.Pandoc.Highlighting ( highlightHtml, defaultHighlightingCss ) -import Numeric ( showHex ) -import Data.Char ( ord, toLower, isAlpha ) -import Data.List ( isPrefixOf, intercalate ) -import qualified Data.Set as S -import Control.Monad.State -import Text.XHtml.Transitional hiding ( stringToHtml ) - -data WriterState = WriterState - { stNotes :: [Html] -- ^ List of notes - , stIds :: [String] -- ^ List of header identifiers - , stMath :: Bool -- ^ Math is used in document - , stCSS :: S.Set String -- ^ CSS to include in header - } deriving Show - -defaultWriterState :: WriterState -defaultWriterState = WriterState {stNotes= [], stIds = [], - stMath = False, stCSS = S.empty} - --- Helpers to render HTML with the appropriate function. - -render :: (HTML html) => WriterOptions -> html -> String -render opts = if writerWrapText opts then renderHtml else showHtml - -renderFragment :: (HTML html) => WriterOptions -> html -> String -renderFragment opts = if writerWrapText opts - then renderHtmlFragment - else showHtmlFragment - --- | Slightly modified version of Text.XHtml's stringToHtml. --- Only uses numerical entities for 0xff and greater. --- Adds  . -stringToHtml :: String -> Html -stringToHtml = primHtml . concatMap fixChar - where - fixChar '<' = "<" - fixChar '>' = ">" - fixChar '&' = "&" - fixChar '"' = """ - fixChar '\160' = " " - fixChar c | ord c < 0xff = [c] - fixChar c = "&#" ++ show (ord c) ++ ";" - --- | Convert Pandoc document to Html string. -writeHtmlString :: WriterOptions -> Pandoc -> String -writeHtmlString opts = - if writerStandalone opts - then render opts . writeHtml opts - else renderFragment opts . writeHtml opts - --- | Convert Pandoc document to Html structure. -writeHtml :: WriterOptions -> Pandoc -> Html -writeHtml opts (Pandoc (Meta tit authors date) blocks) = - let titlePrefix = writerTitlePrefix opts - topTitle = evalState (inlineListToHtml opts tit) defaultWriterState - topTitle' = if null titlePrefix - then topTitle - else if null tit - then stringToHtml titlePrefix - else titlePrefix +++ " - " +++ topTitle - metadata = thetitle topTitle' +++ - meta ! [httpequiv "Content-Type", - content "text/html; charset=UTF-8"] +++ - meta ! [name "generator", content "pandoc"] +++ - (toHtmlFromList $ - map (\a -> meta ! [name "author", content a]) authors) +++ - (if null date - then noHtml - else meta ! [name "date", content date]) - titleHeader = if writerStandalone opts && not (null tit) && - not (writerS5 opts) - then h1 ! [theclass "title"] $ topTitle - else noHtml - headerBlocks = filter isHeaderBlock blocks - ids = uniqueIdentifiers $ - map (\(Header _ lst) -> lst) headerBlocks - toc = if writerTableOfContents opts - then tableOfContents opts headerBlocks ids - else noHtml - (blocks', newstate) = - runState (blockListToHtml opts blocks) - (defaultWriterState {stIds = ids}) - cssLines = stCSS newstate - css = if S.null cssLines - then noHtml - else style ! [thetype "text/css"] $ primHtml $ - '\n':(unlines $ S.toList cssLines) - math = if stMath newstate - then case writerHTMLMathMethod opts of - LaTeXMathML Nothing -> - primHtml latexMathMLScript - LaTeXMathML (Just url) -> - script ! - [src url, thetype "text/javascript"] $ - noHtml - JsMath (Just url) -> - script ! - [src url, thetype "text/javascript"] $ - noHtml - _ -> noHtml - else noHtml - head' = header $ metadata +++ math +++ css +++ - primHtml (writerHeader opts) - notes = reverse (stNotes newstate) - before = primHtml $ writerIncludeBefore opts - after = primHtml $ writerIncludeAfter opts - thebody = before +++ titleHeader +++ toc +++ blocks' +++ - footnoteSection notes +++ after - in if writerStandalone opts - then head' +++ body thebody - else thebody - --- | Construct table of contents from list of header blocks and identifiers. --- Assumes there are as many identifiers as header blocks. -tableOfContents :: WriterOptions -> [Block] -> [String] -> Html -tableOfContents _ [] _ = noHtml -tableOfContents opts headers ids = - let opts' = opts { writerIgnoreNotes = True } - contentsTree = hierarchicalize headers - contents = evalState (mapM (elementToListItem opts') contentsTree) - (defaultWriterState {stIds = ids}) - in thediv ! [identifier "toc"] $ unordList contents - --- | Converts an Element to a list item for a table of contents, --- retrieving the appropriate identifier from state. -elementToListItem :: WriterOptions -> Element -> State WriterState Html -elementToListItem _ (Blk _) = return noHtml -elementToListItem opts (Sec headerText subsecs) = do - st <- get - let ids = stIds st - let (id', rest) = if null ids - then ("", []) - else (head ids, tail ids) - put $ st {stIds = rest} - txt <- inlineListToHtml opts headerText - subHeads <- mapM (elementToListItem opts) subsecs - let subList = if null subHeads - then noHtml - else unordList subHeads - return $ (anchor ! [href ("#" ++ id'), identifier ("TOC-" ++ id')] $ txt) +++ - subList - --- | Convert list of Note blocks to a footnote
. --- Assumes notes are sorted. -footnoteSection :: [Html] -> Html -footnoteSection notes = - if null notes - then noHtml - else thediv ! [theclass "footnotes"] $ hr +++ (olist << notes) - - --- | Parse a mailto link; return Just (name, domain) or Nothing. -parseMailto :: String -> Maybe (String, String) -parseMailto ('m':'a':'i':'l':'t':'o':':':addr) = - let (name', rest) = span (/='@') addr - domain = drop 1 rest - in Just (name', domain) -parseMailto _ = Nothing - --- | Obfuscate a "mailto:" link. -obfuscateLink :: WriterOptions -> String -> String -> Html -obfuscateLink opts txt s | writerEmailObfuscation opts == NoObfuscation = - anchor ! [href s] << txt -obfuscateLink opts txt s = - let meth = writerEmailObfuscation opts - s' = map toLower s - in case parseMailto s' of - (Just (name', domain)) -> - let domain' = substitute "." " dot " domain - at' = obfuscateChar '@' - (linkText, altText) = - if txt == drop 7 s' -- autolink - then ("''+e+''", name' ++ " at " ++ domain') - else ("'" ++ txt ++ "'", txt ++ " (" ++ name' ++ " at " ++ - domain' ++ ")") - in case meth of - ReferenceObfuscation -> - -- need to use primHtml or &'s are escaped to & in URL - primHtml $ "" ++ (obfuscateString txt) ++ "" - JavascriptObfuscation -> - (script ! [thetype "text/javascript"] $ - primHtml ("\n\n")) +++ - noscript (primHtml $ obfuscateString altText) - _ -> error $ "Unknown obfuscation method: " ++ show meth - _ -> anchor ! [href s] $ primHtml txt -- malformed email - --- | Obfuscate character as entity. -obfuscateChar :: Char -> String -obfuscateChar char = - let num = ord char - numstr = if even num then show num else "x" ++ showHex num "" - in "&#" ++ numstr ++ ";" - --- | Obfuscate string using entities. -obfuscateString :: String -> String -obfuscateString = concatMap obfuscateChar . decodeCharacterReferences - --- | True if character is a punctuation character (unicode). -isPunctuation :: Char -> Bool -isPunctuation c = - let c' = ord c - in if c `elem` "!\"'()*,-./:;<>?[\\]`{|}~" || c' >= 0x2000 && c' <= 0x206F || - c' >= 0xE000 && c' <= 0xE0FF - then True - else False - --- | Add CSS for document header. -addToCSS :: String -> State WriterState () -addToCSS item = do - st <- get - let current = stCSS st - put $ st {stCSS = S.insert item current} - --- | Convert Pandoc inline list to plain text identifier. -inlineListToIdentifier :: [Inline] -> String -inlineListToIdentifier = dropWhile (not . isAlpha) . inlineListToIdentifier' - -inlineListToIdentifier' :: [Inline] -> [Char] -inlineListToIdentifier' [] = "" -inlineListToIdentifier' (x:xs) = - xAsText ++ inlineListToIdentifier' xs - where xAsText = case x of - Str s -> filter (\c -> c == '-' || not (isPunctuation c)) $ - intercalate "-" $ words $ map toLower s - Emph lst -> inlineListToIdentifier' lst - Strikeout lst -> inlineListToIdentifier' lst - Superscript lst -> inlineListToIdentifier' lst - SmallCaps lst -> inlineListToIdentifier' lst - Subscript lst -> inlineListToIdentifier' lst - Strong lst -> inlineListToIdentifier' lst - Quoted _ lst -> inlineListToIdentifier' lst - Cite _ lst -> inlineListToIdentifier' lst - Code s -> s - Space -> "-" - EmDash -> "-" - EnDash -> "-" - Apostrophe -> "" - Ellipses -> "" - LineBreak -> "-" - Math _ _ -> "" - TeX _ -> "" - HtmlInline _ -> "" - Link lst _ -> inlineListToIdentifier' lst - Image lst _ -> inlineListToIdentifier' lst - Note _ -> "" - --- | Return unique identifiers for list of inline lists. -uniqueIdentifiers :: [[Inline]] -> [String] -uniqueIdentifiers ls = - let addIdentifier (nonuniqueIds, uniqueIds) l = - let new = inlineListToIdentifier l - matches = length $ filter (== new) nonuniqueIds - new' = (if null new then "section" else new) ++ - if matches > 0 then ("-" ++ show matches) else "" - in (new:nonuniqueIds, new':uniqueIds) - in reverse $ snd $ foldl addIdentifier ([],[]) ls - --- | Convert Pandoc block element to HTML. -blockToHtml :: WriterOptions -> Block -> State WriterState Html -blockToHtml _ Null = return $ noHtml -blockToHtml opts (Plain lst) = inlineListToHtml opts lst -blockToHtml opts (Para lst) = inlineListToHtml opts lst >>= (return . paragraph) -blockToHtml _ (RawHtml str) = return $ primHtml str -blockToHtml _ (HorizontalRule) = return $ hr -blockToHtml opts (CodeBlock (_,classes,_) rawCode) | "haskell" `elem` classes && - writerLiterateHaskell opts = - let classes' = map (\c -> if c == "haskell" then "literatehaskell" else c) classes - in blockToHtml opts $ CodeBlock ("",classes',[]) $ intercalate "\n" $ map ("> " ++) $ lines rawCode -blockToHtml _ (CodeBlock attr@(_,classes,_) rawCode) = do - case highlightHtml attr rawCode of - Left _ -> -- change leading newlines into
tags, because some - -- browsers ignore leading newlines in pre blocks - let (leadingBreaks, rawCode') = span (=='\n') rawCode - in return $ pre ! (if null classes - then [] - else [theclass $ unwords classes]) $ thecode << - (replicate (length leadingBreaks) br +++ - [stringToHtml $ rawCode' ++ "\n"]) - Right h -> addToCSS defaultHighlightingCss >> return h -blockToHtml opts (BlockQuote blocks) = - -- in S5, treat list in blockquote specially - -- if default is incremental, make it nonincremental; - -- otherwise incremental - if writerS5 opts - then let inc = not (writerIncremental opts) in - case blocks of - [BulletList lst] -> blockToHtml (opts {writerIncremental = inc}) - (BulletList lst) - [OrderedList attribs lst] -> - blockToHtml (opts {writerIncremental = inc}) - (OrderedList attribs lst) - _ -> blockListToHtml opts blocks >>= - (return . blockquote) - else blockListToHtml opts blocks >>= (return . blockquote) -blockToHtml opts (Header level lst) = do - contents <- inlineListToHtml opts lst - st <- get - let ids = stIds st - let (id', rest) = if null ids - then ("", []) - else (head ids, tail ids) - put $ st {stIds = rest} - let attribs = if writerStrictMarkdown opts && not (writerTableOfContents opts) - then [] - else [identifier id'] - let contents' = if writerTableOfContents opts - then anchor ! [href ("#TOC-" ++ id')] $ contents - else contents - return $ case level of - 1 -> h1 contents' ! attribs - 2 -> h2 contents' ! attribs - 3 -> h3 contents' ! attribs - 4 -> h4 contents' ! attribs - 5 -> h5 contents' ! attribs - 6 -> h6 contents' ! attribs - _ -> paragraph contents' ! attribs -blockToHtml opts (BulletList lst) = do - contents <- mapM (blockListToHtml opts) lst - let attribs = if writerIncremental opts - then [theclass "incremental"] - else [] - return $ unordList ! attribs $ contents -blockToHtml opts (OrderedList (startnum, numstyle, _) lst) = do - contents <- mapM (blockListToHtml opts) lst - let numstyle' = camelCaseToHyphenated $ show numstyle - let attribs = (if writerIncremental opts - then [theclass "incremental"] - else []) ++ - (if startnum /= 1 - then [start startnum] - else []) ++ - (if numstyle /= DefaultStyle - then [thestyle $ "list-style-type: " ++ numstyle' ++ ";"] - else []) - return $ ordList ! attribs $ contents -blockToHtml opts (DefinitionList lst) = do - contents <- mapM (\(term, def) -> do term' <- inlineListToHtml opts term - def' <- blockListToHtml opts def - return $ (term', def')) lst - let attribs = if writerIncremental opts - then [theclass "incremental"] - else [] - return $ defList ! attribs $ contents -blockToHtml opts (Table capt aligns widths headers rows') = do - let alignStrings = map alignmentToString aligns - captionDoc <- if null capt - then return noHtml - else inlineListToHtml opts capt >>= return . caption - colHeads <- colHeadsToHtml opts alignStrings - widths headers - rows'' <- zipWithM (tableRowToHtml opts alignStrings) (cycle ["odd", "even"]) rows' - return $ table $ captionDoc +++ colHeads +++ rows'' - -colHeadsToHtml :: WriterOptions - -> [[Char]] - -> [Double] - -> [[Block]] - -> State WriterState Html -colHeadsToHtml opts alignStrings widths headers = do - heads <- sequence $ zipWith3 - (\alignment columnwidth item -> tableItemToHtml opts th alignment columnwidth item) - alignStrings widths headers - return $ tr ! [theclass "header"] $ toHtmlFromList heads - -alignmentToString :: Alignment -> [Char] -alignmentToString alignment = case alignment of - AlignLeft -> "left" - AlignRight -> "right" - AlignCenter -> "center" - AlignDefault -> "left" - -tableRowToHtml :: WriterOptions - -> [[Char]] - -> String - -> [[Block]] - -> State WriterState Html -tableRowToHtml opts aligns rowclass columns = - (sequence $ zipWith3 (tableItemToHtml opts td) aligns (repeat 0) columns) >>= - return . (tr ! [theclass rowclass]) . toHtmlFromList - -tableItemToHtml :: WriterOptions - -> (Html -> Html) - -> [Char] - -> Double - -> [Block] - -> State WriterState Html -tableItemToHtml opts tag' align' width' item = do - contents <- blockListToHtml opts item - let attrib = [align align'] ++ - if width' /= 0 - then [thestyle ("width: " ++ (show (truncate (100 * width') :: Integer)) ++ "%;")] - else [] - return $ tag' ! attrib $ contents - -blockListToHtml :: WriterOptions -> [Block] -> State WriterState Html -blockListToHtml opts lst = - mapM (blockToHtml opts) lst >>= return . toHtmlFromList - --- | Convert list of Pandoc inline elements to HTML. -inlineListToHtml :: WriterOptions -> [Inline] -> State WriterState Html -inlineListToHtml opts lst = - mapM (inlineToHtml opts) lst >>= return . toHtmlFromList - --- | Convert Pandoc inline element to HTML. -inlineToHtml :: WriterOptions -> Inline -> State WriterState Html -inlineToHtml opts inline = - case inline of - (Str str) -> return $ stringToHtml str - (Space) -> return $ stringToHtml " " - (LineBreak) -> return $ br - (EmDash) -> return $ primHtmlChar "mdash" - (EnDash) -> return $ primHtmlChar "ndash" - (Ellipses) -> return $ primHtmlChar "hellip" - (Apostrophe) -> return $ primHtmlChar "rsquo" - (Emph lst) -> inlineListToHtml opts lst >>= return . emphasize - (Strong lst) -> inlineListToHtml opts lst >>= return . strong - (Code str) -> return $ thecode << str - (Strikeout lst) -> inlineListToHtml opts lst >>= - return . (thespan ! [thestyle "text-decoration: line-through;"]) - (SmallCaps lst) -> inlineListToHtml opts lst >>= - return . (thespan ! [thestyle "font-variant: small-caps;"]) - (Superscript lst) -> inlineListToHtml opts lst >>= return . sup - (Subscript lst) -> inlineListToHtml opts lst >>= return . sub - (Quoted quoteType lst) -> - let (leftQuote, rightQuote) = case quoteType of - SingleQuote -> (primHtmlChar "lsquo", - primHtmlChar "rsquo") - DoubleQuote -> (primHtmlChar "ldquo", - primHtmlChar "rdquo") - in do contents <- inlineListToHtml opts lst - return $ leftQuote +++ contents +++ rightQuote - (Math t str) -> - modify (\st -> st {stMath = True}) >> - (case writerHTMLMathMethod opts of - LaTeXMathML _ -> - -- putting LaTeXMathML in container with class "LaTeX" prevents - -- non-math elements on the page from being treated as math by - -- the javascript - return $ thespan ! [theclass "LaTeX"] $ - if t == InlineMath - then primHtml ("$" ++ str ++ "$") - else primHtml ("$$" ++ str ++ "$$") - JsMath _ -> - return $ if t == InlineMath - then thespan ! [theclass "math"] $ primHtml str - else thediv ! [theclass "math"] $ primHtml str - MimeTeX url -> - return $ image ! [src (url ++ "?" ++ str), - alt str, title str] - GladTeX -> - return $ primHtml $ "" ++ str ++ "" - PlainMath -> - inlineListToHtml opts (readTeXMath str) >>= - return . (thespan ! [theclass "math"])) - (TeX str) -> case writerHTMLMathMethod opts of - LaTeXMathML _ -> do modify (\st -> st {stMath = True}) - return $ primHtml str - _ -> return noHtml - (HtmlInline str) -> return $ primHtml str - (Link [Code str] (s,_)) | "mailto:" `isPrefixOf` s -> - return $ obfuscateLink opts str s - (Link txt (s,_)) | "mailto:" `isPrefixOf` s -> do - linkText <- inlineListToHtml opts txt - return $ obfuscateLink opts (show linkText) s - (Link txt (s,tit)) -> do - linkText <- inlineListToHtml opts txt - return $ anchor ! ([href s] ++ - if null tit then [] else [title tit]) $ - linkText - (Image txt (s,tit)) -> do - alternate <- inlineListToHtml opts txt - let alternate' = renderFragment opts alternate - let attributes = [src s] ++ - (if null tit - then [] - else [title tit]) ++ - if null txt - then [] - else [alt alternate'] - return $ image ! attributes - -- note: null title included, as in Markdown.pl - (Note contents) -> do - st <- get - let notes = stNotes st - let number = (length notes) + 1 - let ref = show number - htmlContents <- blockListToNote opts ref contents - -- push contents onto front of notes - put $ st {stNotes = (htmlContents:notes)} - return $ anchor ! [href ("#fn" ++ ref), - theclass "footnoteRef", - identifier ("fnref" ++ ref)] << - sup << ref - (Cite _ il) -> inlineListToHtml opts il - -blockListToNote :: WriterOptions -> String -> [Block] -> State WriterState Html -blockListToNote opts ref blocks = - -- If last block is Para or Plain, include the backlink at the end of - -- that block. Otherwise, insert a new Plain block with the backlink. - let backlink = [HtmlInline $ " "] - blocks' = if null blocks - then [] - else let lastBlock = last blocks - otherBlocks = init blocks - in case lastBlock of - (Para lst) -> otherBlocks ++ - [Para (lst ++ backlink)] - (Plain lst) -> otherBlocks ++ - [Plain (lst ++ backlink)] - _ -> otherBlocks ++ [lastBlock, - Plain backlink] - in do contents <- blockListToHtml opts blocks' - return $ li ! [identifier ("fn" ++ ref)] $ contents - diff --git a/Text/Pandoc/Writers/LaTeX.hs b/Text/Pandoc/Writers/LaTeX.hs deleted file mode 100644 index f3cbf1acb..000000000 --- a/Text/Pandoc/Writers/LaTeX.hs +++ /dev/null @@ -1,331 +0,0 @@ -{- -Copyright (C) 2006-8 John MacFarlane - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA --} - -{- | - Module : Text.Pandoc.Writers.LaTeX - Copyright : Copyright (C) 2006-8 John MacFarlane - License : GNU GPL, version 2 or above - - Maintainer : John MacFarlane - Stability : alpha - Portability : portable - -Conversion of 'Pandoc' format into LaTeX. --} -module Text.Pandoc.Writers.LaTeX ( writeLaTeX ) where -import Text.Pandoc.Definition -import Text.Pandoc.Shared -import Text.Printf ( printf ) -import Data.List ( (\\), isSuffixOf, intercalate ) -import Data.Char ( toLower ) -import qualified Data.Set as S -import Control.Monad.State -import Text.PrettyPrint.HughesPJ hiding ( Str ) - -data WriterState = - WriterState { stIncludes :: S.Set String -- strings to include in header - , stInNote :: Bool -- @True@ if we're in a note - , stOLLevel :: Int -- level of ordered list nesting - , stOptions :: WriterOptions -- writer options, so they don't have to be parameter - } - --- | Add line to header. -addToHeader :: String -> State WriterState () -addToHeader str = do - st <- get - let includes = stIncludes st - put st {stIncludes = S.insert str includes} - --- | Convert Pandoc to LaTeX. -writeLaTeX :: WriterOptions -> Pandoc -> String -writeLaTeX options document = - render $ evalState (pandocToLaTeX options document) $ - WriterState { stIncludes = S.empty, stInNote = False, stOLLevel = 1, stOptions = options } - -pandocToLaTeX :: WriterOptions -> Pandoc -> State WriterState Doc -pandocToLaTeX options (Pandoc meta blocks) = do - main <- blockListToLaTeX blocks - head' <- if writerStandalone options - then latexHeader options meta - else return empty - let before = if null (writerIncludeBefore options) - then empty - else text (writerIncludeBefore options) - let after = if null (writerIncludeAfter options) - then empty - else text (writerIncludeAfter options) - let body = before $$ main $$ after - let toc = if writerTableOfContents options - then text "\\tableofcontents\n" - else empty - let foot = if writerStandalone options - then text "\\end{document}" - else empty - return $ head' $$ toc $$ body $$ foot - --- | Insert bibliographic information into LaTeX header. -latexHeader :: WriterOptions -- ^ Options, including LaTeX header - -> Meta -- ^ Meta with bibliographic information - -> State WriterState Doc -latexHeader options (Meta title authors date) = do - titletext <- if null title - then return empty - else inlineListToLaTeX title >>= return . inCmd "title" - headerIncludes <- get >>= return . S.toList . stIncludes - let extras = text $ unlines headerIncludes - let verbatim = if "\\usepackage{fancyvrb}" `elem` headerIncludes - then text "\\VerbatimFootnotes % allows verbatim text in footnotes" - else empty - let authorstext = text $ "\\author{" ++ - intercalate "\\\\" (map stringToLaTeX authors) ++ "}" - let datetext = if date == "" - then empty - else text $ "\\date{" ++ stringToLaTeX date ++ "}" - let maketitle = if null title then empty else text "\\maketitle" - let secnumline = if (writerNumberSections options) - then empty - else text "\\setcounter{secnumdepth}{0}" - let baseHeader = text $ writerHeader options - let header = baseHeader $$ extras - return $ header $$ secnumline $$ verbatim $$ titletext $$ authorstext $$ - datetext $$ text "\\begin{document}" $$ maketitle $$ text "" - --- escape things as needed for LaTeX - -stringToLaTeX :: String -> String -stringToLaTeX = escapeStringUsing latexEscapes - where latexEscapes = backslashEscapes "{}$%&_#" ++ - [ ('^', "\\^{}") - , ('\\', "\\textbackslash{}") - , ('~', "\\ensuremath{\\sim}") - , ('|', "\\textbar{}") - , ('<', "\\textless{}") - , ('>', "\\textgreater{}") - , ('\160', "~") - ] - --- | Puts contents into LaTeX command. -inCmd :: String -> Doc -> Doc -inCmd cmd contents = char '\\' <> text cmd <> braces contents - --- | Remove all code elements from list of inline elements --- (because it's illegal to have verbatim inside some command arguments) -deVerb :: [Inline] -> [Inline] -deVerb [] = [] -deVerb ((Code str):rest) = - (TeX $ "\\texttt{" ++ stringToLaTeX str ++ "}"):(deVerb rest) -deVerb (other:rest) = other:(deVerb rest) - --- | Convert Pandoc block element to LaTeX. -blockToLaTeX :: Block -- ^ Block to convert - -> State WriterState Doc -blockToLaTeX Null = return empty -blockToLaTeX (Plain lst) = do - st <- get - let opts = stOptions st - wrapTeXIfNeeded opts True inlineListToLaTeX lst -blockToLaTeX (Para lst) = do - st <- get - let opts = stOptions st - result <- wrapTeXIfNeeded opts True inlineListToLaTeX lst - return $ result <> char '\n' -blockToLaTeX (BlockQuote lst) = do - contents <- blockListToLaTeX lst - return $ text "\\begin{quote}" $$ contents $$ text "\\end{quote}" -blockToLaTeX (CodeBlock (_,classes,_) str) = do - st <- get - env <- if writerLiterateHaskell (stOptions st) && "haskell" `elem` classes - then return "code" - else if stInNote st - then do addToHeader "\\usepackage{fancyvrb}" - return "Verbatim" - else return "verbatim" - return $ text ("\\begin{" ++ env ++ "}\n") <> text str <> - text ("\n\\end{" ++ env ++ "}") -blockToLaTeX (RawHtml _) = return empty -blockToLaTeX (BulletList lst) = do - items <- mapM listItemToLaTeX lst - return $ text "\\begin{itemize}" $$ vcat items $$ text "\\end{itemize}" -blockToLaTeX (OrderedList (start, numstyle, numdelim) lst) = do - st <- get - let oldlevel = stOLLevel st - put $ st {stOLLevel = oldlevel + 1} - items <- mapM listItemToLaTeX lst - modify (\s -> s {stOLLevel = oldlevel}) - exemplar <- if numstyle /= DefaultStyle || numdelim /= DefaultDelim - then do addToHeader "\\usepackage{enumerate}" - return $ char '[' <> - text (head (orderedListMarkers (1, numstyle, - numdelim))) <> char ']' - else return empty - let resetcounter = if start /= 1 && oldlevel <= 4 - then text $ "\\setcounter{enum" ++ - map toLower (toRomanNumeral oldlevel) ++ - "}{" ++ show (start - 1) ++ "}" - else empty - return $ text "\\begin{enumerate}" <> exemplar $$ resetcounter $$ - vcat items $$ text "\\end{enumerate}" -blockToLaTeX (DefinitionList lst) = do - items <- mapM defListItemToLaTeX lst - return $ text "\\begin{description}" $$ vcat items $$ - text "\\end{description}" -blockToLaTeX HorizontalRule = return $ text $ - "\\begin{center}\\rule{3in}{0.4pt}\\end{center}\n" -blockToLaTeX (Header level lst) = do - txt <- inlineListToLaTeX (deVerb lst) - return $ if (level > 0) && (level <= 3) - then text ("\\" ++ (concat (replicate (level - 1) "sub")) ++ - "section{") <> txt <> text "}\n" - else txt <> char '\n' -blockToLaTeX (Table caption aligns widths heads rows) = do - headers <- tableRowToLaTeX heads - captionText <- inlineListToLaTeX caption - rows' <- mapM tableRowToLaTeX rows - let colWidths = map (printf "%.2f") widths - let colDescriptors = concat $ zipWith - (\width align -> ">{\\PBS" ++ - (case align of - AlignLeft -> "\\raggedright" - AlignRight -> "\\raggedleft" - AlignCenter -> "\\centering" - AlignDefault -> "\\raggedright") ++ - "\\hspace{0pt}}p{" ++ width ++ - "\\columnwidth}") - colWidths aligns - let tableBody = text ("\\begin{tabular}{" ++ colDescriptors ++ "}") $$ - headers $$ text "\\hline" $$ vcat rows' $$ - text "\\end{tabular}" - let centered txt = text "\\begin{center}" $$ txt $$ text "\\end{center}" - addToHeader $ "\\usepackage{array}\n" ++ - "% This is needed because raggedright in table elements redefines \\\\:\n" ++ - "\\newcommand{\\PreserveBackslash}[1]{\\let\\temp=\\\\#1\\let\\\\=\\temp}\n" ++ - "\\let\\PBS=\\PreserveBackslash" - return $ if isEmpty captionText - then centered tableBody <> char '\n' - else text "\\begin{table}[h]" $$ centered tableBody $$ - inCmd "caption" captionText $$ text "\\end{table}\n" - -blockListToLaTeX :: [Block] -> State WriterState Doc -blockListToLaTeX lst = mapM blockToLaTeX lst >>= return . vcat - -tableRowToLaTeX :: [[Block]] -> State WriterState Doc -tableRowToLaTeX cols = mapM blockListToLaTeX cols >>= - return . ($$ text "\\\\") . foldl (\row item -> row $$ - (if isEmpty row then text "" else text " & ") <> item) empty - -listItemToLaTeX :: [Block] -> State WriterState Doc -listItemToLaTeX lst = blockListToLaTeX lst >>= return . (text "\\item" $$) . - (nest 2) - -defListItemToLaTeX :: ([Inline], [Block]) -> State WriterState Doc -defListItemToLaTeX (term, def) = do - term' <- inlineListToLaTeX $ deVerb term - def' <- blockListToLaTeX def - return $ text "\\item[" <> term' <> text "]" $$ def' - --- | Convert list of inline elements to LaTeX. -inlineListToLaTeX :: [Inline] -- ^ Inlines to convert - -> State WriterState Doc -inlineListToLaTeX lst = mapM inlineToLaTeX lst >>= return . hcat - -isQuoted :: Inline -> Bool -isQuoted (Quoted _ _) = True -isQuoted Apostrophe = True -isQuoted _ = False - --- | Convert inline element to LaTeX -inlineToLaTeX :: Inline -- ^ Inline to convert - -> State WriterState Doc -inlineToLaTeX (Emph lst) = - inlineListToLaTeX (deVerb lst) >>= return . inCmd "emph" -inlineToLaTeX (Strong lst) = - inlineListToLaTeX (deVerb lst) >>= return . inCmd "textbf" -inlineToLaTeX (Strikeout lst) = do - contents <- inlineListToLaTeX $ deVerb lst - addToHeader "\\usepackage[normalem]{ulem}" - return $ inCmd "sout" contents -inlineToLaTeX (Superscript lst) = - inlineListToLaTeX (deVerb lst) >>= return . inCmd "textsuperscript" -inlineToLaTeX (Subscript lst) = do - contents <- inlineListToLaTeX $ deVerb lst - -- oddly, latex includes \textsuperscript but not \textsubscript - -- so we have to define it (using a different name so as not to conflict with memoir class): - addToHeader "\\newcommand{\\textsubscr}[1]{\\ensuremath{_{\\scriptsize\\textrm{#1}}}}" - return $ inCmd "textsubscr" contents -inlineToLaTeX (SmallCaps lst) = - inlineListToLaTeX (deVerb lst) >>= return . inCmd "textsc" -inlineToLaTeX (Cite _ lst) = - inlineListToLaTeX lst -inlineToLaTeX (Code str) = do - st <- get - if stInNote st - then do addToHeader "\\usepackage{fancyvrb}" - else return () - let chr = ((enumFromTo '!' '~') \\ str) !! 0 - return $ text $ "\\verb" ++ [chr] ++ str ++ [chr] -inlineToLaTeX (Quoted SingleQuote lst) = do - contents <- inlineListToLaTeX lst - let s1 = if (not (null lst)) && (isQuoted (head lst)) - then text "\\," - else empty - let s2 = if (not (null lst)) && (isQuoted (last lst)) - then text "\\," - else empty - return $ char '`' <> s1 <> contents <> s2 <> char '\'' -inlineToLaTeX (Quoted DoubleQuote lst) = do - contents <- inlineListToLaTeX lst - let s1 = if (not (null lst)) && (isQuoted (head lst)) - then text "\\," - else empty - let s2 = if (not (null lst)) && (isQuoted (last lst)) - then text "\\," - else empty - return $ text "``" <> s1 <> contents <> s2 <> text "''" -inlineToLaTeX Apostrophe = return $ char '\'' -inlineToLaTeX EmDash = return $ text "---" -inlineToLaTeX EnDash = return $ text "--" -inlineToLaTeX Ellipses = return $ text "\\ldots{}" -inlineToLaTeX (Str str) = return $ text $ stringToLaTeX str -inlineToLaTeX (Math InlineMath str) = return $ char '$' <> text str <> char '$' -inlineToLaTeX (Math DisplayMath str) = return $ text "\\[" <> text str <> text "\\]" -inlineToLaTeX (TeX str) = return $ text str -inlineToLaTeX (HtmlInline _) = return empty -inlineToLaTeX (LineBreak) = return $ text "\\\\" -inlineToLaTeX Space = return $ char ' ' -inlineToLaTeX (Link txt (src, _)) = do - addToHeader "\\usepackage[breaklinks=true]{hyperref}" - case txt of - [Code x] | x == src -> -- autolink - do addToHeader "\\usepackage{url}" - return $ text $ "\\url{" ++ x ++ "}" - _ -> do contents <- inlineListToLaTeX $ deVerb txt - return $ text ("\\href{" ++ src ++ "}{") <> contents <> - char '}' -inlineToLaTeX (Image _ (source, _)) = do - addToHeader "\\usepackage{graphicx}" - return $ text $ "\\includegraphics{" ++ source ++ "}" -inlineToLaTeX (Note contents) = do - st <- get - put (st {stInNote = True}) - contents' <- blockListToLaTeX contents - modify (\s -> s {stInNote = False}) - let rawnote = stripTrailingNewlines $ render contents' - -- note: a \n before } is needed when note ends with a Verbatim environment - let optNewline = "\\end{Verbatim}" `isSuffixOf` rawnote - return $ text "\\footnote{" <> - text rawnote <> (if optNewline then char '\n' else empty) <> char '}' diff --git a/Text/Pandoc/Writers/Man.hs b/Text/Pandoc/Writers/Man.hs deleted file mode 100644 index 210c7ed07..000000000 --- a/Text/Pandoc/Writers/Man.hs +++ /dev/null @@ -1,301 +0,0 @@ -{- -Copyright (C) 2007 John MacFarlane - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA --} - -{- | - Module : Text.Pandoc.Writers.Man - Copyright : Copyright (C) 2007 John MacFarlane - License : GNU GPL, version 2 or above - - Maintainer : John MacFarlane - Stability : alpha - Portability : portable - -Conversion of 'Pandoc' documents to groff man page format. - --} -module Text.Pandoc.Writers.Man ( writeMan) where -import Text.Pandoc.Definition -import Text.Pandoc.Shared -import Text.Printf ( printf ) -import Data.List ( isPrefixOf, drop, nub, intersperse, intercalate ) -import Text.PrettyPrint.HughesPJ hiding ( Str ) -import Control.Monad.State - -type Notes = [[Block]] -type Preprocessors = [String] -- e.g. "t" for tbl -type WriterState = (Notes, Preprocessors) - --- | Convert Pandoc to Man. -writeMan :: WriterOptions -> Pandoc -> String -writeMan opts document = render $ evalState (pandocToMan opts document) ([],[]) - --- | Return groff man representation of document. -pandocToMan :: WriterOptions -> Pandoc -> State WriterState Doc -pandocToMan opts (Pandoc meta blocks) = do - let before = writerIncludeBefore opts - let after = writerIncludeAfter opts - let before' = if null before then empty else text before - let after' = if null after then empty else text after - (head', foot) <- metaToMan opts meta - body <- blockListToMan opts blocks - (notes, preprocessors) <- get - let preamble = if null preprocessors || not (writerStandalone opts) - then empty - else text $ ".\\\" " ++ concat (nub preprocessors) - notes' <- notesToMan opts (reverse notes) - return $ preamble $$ head' $$ before' $$ body $$ notes' $$ foot $$ after' - --- | Insert bibliographic information into Man header and footer. -metaToMan :: WriterOptions -- ^ Options, including Man header - -> Meta -- ^ Meta with bibliographic information - -> State WriterState (Doc, Doc) -metaToMan options (Meta title authors date) = do - titleText <- inlineListToMan options title - let (cmdName, rest) = break (== ' ') $ render titleText - let (title', section) = case reverse cmdName of - (')':d:'(':xs) | d `elem` ['0'..'9'] -> - (text (reverse xs), char d) - xs -> (text (reverse xs), doubleQuotes empty) - let extras = map (doubleQuotes . text . removeLeadingTrailingSpace) $ - splitBy '|' rest - let head' = (text ".TH") <+> title' <+> section <+> - doubleQuotes (text date) <+> hsep extras - let foot = case length authors of - 0 -> empty - 1 -> text ".SH AUTHOR" $$ (text $ intercalate ", " authors) - _ -> text ".SH AUTHORS" $$ (text $ intercalate ", " authors) - return $ if writerStandalone options - then (head', foot) - else (empty, empty) - --- | Return man representation of notes. -notesToMan :: WriterOptions -> [[Block]] -> State WriterState Doc -notesToMan opts notes = - if null notes - then return empty - else mapM (\(num, note) -> noteToMan opts num note) (zip [1..] notes) >>= - return . (text ".SH NOTES" $$) . vcat - --- | Return man representation of a note. -noteToMan :: WriterOptions -> Int -> [Block] -> State WriterState Doc -noteToMan opts num note = do - contents <- blockListToMan opts note - let marker = text "\n.SS [" <> text (show num) <> char ']' - return $ marker $$ contents - --- | Association list of characters to escape. -manEscapes :: [(Char, String)] -manEscapes = [('\160', "\\ "), ('\'', "\\[aq]")] ++ backslashEscapes ".@\\" - --- | Escape special characters for Man. -escapeString :: String -> String -escapeString = escapeStringUsing manEscapes - --- | Escape a literal (code) section for Man. -escapeCode :: String -> String -escapeCode = escapeStringUsing (manEscapes ++ backslashEscapes "\t ") - --- | Convert Pandoc block element to man. -blockToMan :: WriterOptions -- ^ Options - -> Block -- ^ Block element - -> State WriterState Doc -blockToMan _ Null = return empty -blockToMan opts (Plain inlines) = - wrapIfNeeded opts (inlineListToMan opts) inlines -blockToMan opts (Para inlines) = do - contents <- wrapIfNeeded opts (inlineListToMan opts) inlines - return $ text ".PP" $$ contents -blockToMan _ (RawHtml str) = return $ text str -blockToMan _ HorizontalRule = return $ text $ ".PP\n * * * * *" -blockToMan opts (Header level inlines) = do - contents <- inlineListToMan opts inlines - let heading = case level of - 1 -> ".SH " - _ -> ".SS " - return $ text heading <> contents -blockToMan _ (CodeBlock _ str) = return $ - text ".PP" $$ text "\\f[CR]" $$ - text ((unlines . map (" " ++) . lines) (escapeCode str)) <> text "\\f[]" -blockToMan opts (BlockQuote blocks) = do - contents <- blockListToMan opts blocks - return $ text ".RS" $$ contents $$ text ".RE" -blockToMan opts (Table caption alignments widths headers rows) = - let aligncode AlignLeft = "l" - aligncode AlignRight = "r" - aligncode AlignCenter = "c" - aligncode AlignDefault = "l" - in do - caption' <- inlineListToMan opts caption - modify (\(notes, preprocessors) -> (notes, "t":preprocessors)) - let iwidths = map (printf "w(%0.2fn)" . (70 *)) widths - -- 78n default width - 8n indent = 70n - let coldescriptions = text $ intercalate " " - (zipWith (\align width -> aligncode align ++ width) - alignments iwidths) ++ "." - colheadings <- mapM (blockListToMan opts) headers - let makeRow cols = text "T{" $$ - (vcat $ intersperse (text "T}@T{") cols) $$ - text "T}" - let colheadings' = makeRow colheadings - body <- mapM (\row -> do - cols <- mapM (blockListToMan opts) row - return $ makeRow cols) rows - return $ text ".PP" $$ caption' $$ - text ".TS" $$ text "tab(@);" $$ coldescriptions $$ - colheadings' $$ char '_' $$ vcat body $$ text ".TE" - -blockToMan opts (BulletList items) = do - contents <- mapM (bulletListItemToMan opts) items - return (vcat contents) -blockToMan opts (OrderedList attribs items) = do - let markers = take (length items) $ orderedListMarkers attribs - let indent = 1 + (maximum $ map length markers) - contents <- mapM (\(num, item) -> orderedListItemToMan opts num indent item) $ - zip markers items - return (vcat contents) -blockToMan opts (DefinitionList items) = do - contents <- mapM (definitionListItemToMan opts) items - return (vcat contents) - --- | Convert bullet list item (list of blocks) to man. -bulletListItemToMan :: WriterOptions -> [Block] -> State WriterState Doc -bulletListItemToMan _ [] = return empty -bulletListItemToMan opts ((Para first):rest) = - bulletListItemToMan opts ((Plain first):rest) -bulletListItemToMan opts ((Plain first):rest) = do - first' <- blockToMan opts (Plain first) - rest' <- blockListToMan opts rest - let first'' = text ".IP \\[bu] 2" $$ first' - let rest'' = if null rest - then empty - else text ".RS 2" $$ rest' $$ text ".RE" - return (first'' $$ rest'') -bulletListItemToMan opts (first:rest) = do - first' <- blockToMan opts first - rest' <- blockListToMan opts rest - return $ text "\\[bu] .RS 2" $$ first' $$ rest' $$ text ".RE" - --- | Convert ordered list item (a list of blocks) to man. -orderedListItemToMan :: WriterOptions -- ^ options - -> String -- ^ order marker for list item - -> Int -- ^ number of spaces to indent - -> [Block] -- ^ list item (list of blocks) - -> State WriterState Doc -orderedListItemToMan _ _ _ [] = return empty -orderedListItemToMan opts num indent ((Para first):rest) = - orderedListItemToMan opts num indent ((Plain first):rest) -orderedListItemToMan opts num indent (first:rest) = do - first' <- blockToMan opts first - rest' <- blockListToMan opts rest - let num' = printf ("%" ++ show (indent - 1) ++ "s") num - let first'' = text (".IP \"" ++ num' ++ "\" " ++ show indent) $$ first' - let rest'' = if null rest - then empty - else text ".RS 4" $$ rest' $$ text ".RE" - return $ first'' $$ rest'' - --- | Convert definition list item (label, list of blocks) to man. -definitionListItemToMan :: WriterOptions - -> ([Inline],[Block]) - -> State WriterState Doc -definitionListItemToMan opts (label, items) = do - labelText <- inlineListToMan opts label - contents <- if null items - then return empty - else do - let (first, rest) = case items of - ((Para x):y) -> (Plain x,y) - (x:y) -> (x,y) - [] -> error "items is null" - rest' <- mapM (\item -> blockToMan opts item) - rest >>= (return . vcat) - first' <- blockToMan opts first - return $ first' $$ text ".RS" $$ rest' $$ text ".RE" - return $ text ".TP\n.B " <> labelText $+$ contents - --- | Convert list of Pandoc block elements to man. -blockListToMan :: WriterOptions -- ^ Options - -> [Block] -- ^ List of block elements - -> State WriterState Doc -blockListToMan opts blocks = - mapM (blockToMan opts) blocks >>= (return . vcat) - --- | Convert list of Pandoc inline elements to man. -inlineListToMan :: WriterOptions -> [Inline] -> State WriterState Doc -inlineListToMan opts lst = mapM (inlineToMan opts) lst >>= (return . hcat) - --- | Convert Pandoc inline element to man. -inlineToMan :: WriterOptions -> Inline -> State WriterState Doc -inlineToMan opts (Emph lst) = do - contents <- inlineListToMan opts lst - return $ text "\\f[I]" <> contents <> text "\\f[]" -inlineToMan opts (Strong lst) = do - contents <- inlineListToMan opts lst - return $ text "\\f[B]" <> contents <> text "\\f[]" -inlineToMan opts (Strikeout lst) = do - contents <- inlineListToMan opts lst - return $ text "[STRIKEOUT:" <> contents <> char ']' -inlineToMan opts (Superscript lst) = do - contents <- inlineListToMan opts lst - return $ char '^' <> contents <> char '^' -inlineToMan opts (Subscript lst) = do - contents <- inlineListToMan opts lst - return $ char '~' <> contents <> char '~' -inlineToMan opts (SmallCaps lst) = inlineListToMan opts lst -- not supported -inlineToMan opts (Quoted SingleQuote lst) = do - contents <- inlineListToMan opts lst - return $ char '`' <> contents <> char '\'' -inlineToMan opts (Quoted DoubleQuote lst) = do - contents <- inlineListToMan opts lst - return $ text "\\[lq]" <> contents <> text "\\[rq]" -inlineToMan opts (Cite _ lst) = - inlineListToMan opts lst -inlineToMan _ EmDash = return $ text "\\[em]" -inlineToMan _ EnDash = return $ text "\\[en]" -inlineToMan _ Apostrophe = return $ char '\'' -inlineToMan _ Ellipses = return $ text "\\&..." -inlineToMan _ (Code str) = - return $ text $ "\\f[B]" ++ escapeCode str ++ "\\f[]" -inlineToMan _ (Str str) = return $ text $ escapeString str -inlineToMan opts (Math InlineMath str) = inlineToMan opts (Code str) -inlineToMan opts (Math DisplayMath str) = do - contents <- inlineToMan opts (Code str) - return $ text ".RS" $$ contents $$ text ".RE" -inlineToMan _ (TeX _) = return empty -inlineToMan _ (HtmlInline str) = return $ text $ escapeCode str -inlineToMan _ (LineBreak) = return $ text "\n.PD 0\n.P\n.PD\n" -inlineToMan _ Space = return $ char ' ' -inlineToMan opts (Link txt (src, _)) = do - linktext <- inlineListToMan opts txt - let srcSuffix = if isPrefixOf "mailto:" src then drop 7 src else src - return $ if txt == [Code srcSuffix] - then char '<' <> text srcSuffix <> char '>' - else linktext <> text " (" <> text src <> char ')' -inlineToMan opts (Image alternate (source, tit)) = do - let txt = if (null alternate) || (alternate == [Str ""]) || - (alternate == [Str source]) -- to prevent autolinks - then [Str "image"] - else alternate - linkPart <- inlineToMan opts (Link txt (source, tit)) - return $ char '[' <> text "IMAGE: " <> linkPart <> char ']' -inlineToMan _ (Note contents) = do - modify (\(notes, prep) -> (contents:notes, prep)) -- add to notes in state - (notes, _) <- get - let ref = show $ (length notes) - return $ char '[' <> text ref <> char ']' - diff --git a/Text/Pandoc/Writers/Markdown.hs b/Text/Pandoc/Writers/Markdown.hs deleted file mode 100644 index 70d1f0c91..000000000 --- a/Text/Pandoc/Writers/Markdown.hs +++ /dev/null @@ -1,396 +0,0 @@ -{- -Copyright (C) 2006-7 John MacFarlane - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA --} - -{- | - Module : Text.Pandoc.Writers.Markdown - Copyright : Copyright (C) 2006-7 John MacFarlane - License : GNU GPL, version 2 or above - - Maintainer : John MacFarlane - Stability : alpha - Portability : portable - -Conversion of 'Pandoc' documents to markdown-formatted plain text. - -Markdown: --} -module Text.Pandoc.Writers.Markdown ( writeMarkdown) where -import Text.Pandoc.Definition -import Text.Pandoc.Shared -import Text.Pandoc.Blocks -import Text.ParserCombinators.Parsec ( parse, GenParser ) -import Data.List ( group, isPrefixOf, drop, find, intersperse, intercalate ) -import Text.PrettyPrint.HughesPJ hiding ( Str ) -import Control.Monad.State - -type Notes = [[Block]] -type Refs = KeyTable -type WriterState = (Notes, Refs) - --- | Convert Pandoc to Markdown. -writeMarkdown :: WriterOptions -> Pandoc -> String -writeMarkdown opts document = - render $ evalState (pandocToMarkdown opts document) ([],[]) - --- | Return markdown representation of document. -pandocToMarkdown :: WriterOptions -> Pandoc -> State WriterState Doc -pandocToMarkdown opts (Pandoc meta blocks) = do - let before = writerIncludeBefore opts - let after = writerIncludeAfter opts - let before' = if null before then empty else text before - let after' = if null after then empty else text after - metaBlock <- metaToMarkdown opts meta - let head' = if writerStandalone opts - then metaBlock $+$ text (writerHeader opts) - else empty - let headerBlocks = filter isHeaderBlock blocks - let toc = if writerTableOfContents opts - then tableOfContents opts headerBlocks - else empty - body <- blockListToMarkdown opts blocks - (notes, _) <- get - notes' <- notesToMarkdown opts (reverse notes) - (_, refs) <- get -- note that the notes may contain refs - refs' <- keyTableToMarkdown opts (reverse refs) - return $ head' $+$ before' $+$ toc $+$ body $+$ text "" $+$ - notes' $+$ text "" $+$ refs' $+$ after' - --- | Return markdown representation of reference key table. -keyTableToMarkdown :: WriterOptions -> KeyTable -> State WriterState Doc -keyTableToMarkdown opts refs = mapM (keyToMarkdown opts) refs >>= return . vcat - --- | Return markdown representation of a reference key. -keyToMarkdown :: WriterOptions - -> ([Inline], (String, String)) - -> State WriterState Doc -keyToMarkdown opts (label, (src, tit)) = do - label' <- inlineListToMarkdown opts label - let tit' = if null tit then empty else text $ " \"" ++ tit ++ "\"" - return $ text " " <> char '[' <> label' <> char ']' <> text ": " <> - text src <> tit' - --- | Return markdown representation of notes. -notesToMarkdown :: WriterOptions -> [[Block]] -> State WriterState Doc -notesToMarkdown opts notes = - mapM (\(num, note) -> noteToMarkdown opts num note) (zip [1..] notes) >>= - return . vcat - --- | Return markdown representation of a note. -noteToMarkdown :: WriterOptions -> Int -> [Block] -> State WriterState Doc -noteToMarkdown opts num blocks = do - contents <- blockListToMarkdown opts blocks - let marker = text "[^" <> text (show num) <> text "]:" - return $ hang' marker (writerTabStop opts) contents - --- | Escape special characters for Markdown. -escapeString :: String -> String -escapeString = escapeStringUsing markdownEscapes - where markdownEscapes = backslashEscapes "`<\\*_^~" - --- | Convert bibliographic information into Markdown header. -metaToMarkdown :: WriterOptions -> Meta -> State WriterState Doc -metaToMarkdown opts (Meta title authors date) = do - title' <- titleToMarkdown opts title - authors' <- authorsToMarkdown authors - date' <- dateToMarkdown date - return $ title' $+$ authors' $+$ date' - -titleToMarkdown :: WriterOptions -> [Inline] -> State WriterState Doc -titleToMarkdown _ [] = return empty -titleToMarkdown opts lst = do - contents <- inlineListToMarkdown opts lst - return $ text "% " <> contents - -authorsToMarkdown :: [String] -> State WriterState Doc -authorsToMarkdown [] = return empty -authorsToMarkdown lst = return $ - text "% " <> text (intercalate ", " (map escapeString lst)) - -dateToMarkdown :: String -> State WriterState Doc -dateToMarkdown [] = return empty -dateToMarkdown str = return $ text "% " <> text (escapeString str) - --- | Construct table of contents from list of header blocks. -tableOfContents :: WriterOptions -> [Block] -> Doc -tableOfContents opts headers = - let opts' = opts { writerIgnoreNotes = True } - contents = BulletList $ map elementToListItem $ hierarchicalize headers - in evalState (blockToMarkdown opts' contents) ([],[]) - --- | Converts an Element to a list item for a table of contents, -elementToListItem :: Element -> [Block] -elementToListItem (Blk _) = [] -elementToListItem (Sec headerText subsecs) = [Plain headerText] ++ - if null subsecs - then [] - else [BulletList $ map elementToListItem subsecs] - --- | Ordered list start parser for use in Para below. -olMarker :: GenParser Char st Char -olMarker = do (start, style', delim) <- anyOrderedListMarker - if delim == Period && - (style' == UpperAlpha || (style' == UpperRoman && - start `elem` [1, 5, 10, 50, 100, 500, 1000])) - then spaceChar >> spaceChar - else spaceChar - --- | True if string begins with an ordered list marker -beginsWithOrderedListMarker :: String -> Bool -beginsWithOrderedListMarker str = - case parse olMarker "para start" str of - Left _ -> False - Right _ -> True - -wrappedMarkdown :: WriterOptions -> [Inline] -> State WriterState Doc -wrappedMarkdown opts inlines = do - let chunks = splitBy LineBreak inlines - let chunks' = if null chunks - then [] - else (map (++ [Str " "]) $ init chunks) ++ [last chunks] - lns <- mapM (wrapIfNeeded opts (inlineListToMarkdown opts)) chunks' - return $ vcat lns - --- | Convert Pandoc block element to markdown. -blockToMarkdown :: WriterOptions -- ^ Options - -> Block -- ^ Block element - -> State WriterState Doc -blockToMarkdown _ Null = return empty -blockToMarkdown opts (Plain inlines) = - wrappedMarkdown opts inlines -blockToMarkdown opts (Para inlines) = do - contents <- wrappedMarkdown opts inlines - -- escape if para starts with ordered list marker - let esc = if (not (writerStrictMarkdown opts)) && - beginsWithOrderedListMarker (render contents) - then char '\\' - else empty - return $ esc <> contents <> text "\n" -blockToMarkdown _ (RawHtml str) = return $ text str -blockToMarkdown _ HorizontalRule = return $ text "\n* * * * *\n" -blockToMarkdown opts (Header level inlines) = do - contents <- inlineListToMarkdown opts inlines - -- use setext style headers if in literate haskell mode. - -- ghc interprets '#' characters in column 1 as line number specifiers. - if writerLiterateHaskell opts - then let len = length $ render contents - in return $ contents <> text "\n" <> - case level of - 1 -> text $ replicate len '=' ++ "\n" - 2 -> text $ replicate len '-' ++ "\n" - _ -> empty - else return $ text ((replicate level '#') ++ " ") <> contents <> text "\n" -blockToMarkdown opts (CodeBlock (_,classes,_) str) | "haskell" `elem` classes && - writerLiterateHaskell opts = - return $ (vcat $ map (text "> " <>) $ map text (lines str)) <> text "\n" -blockToMarkdown opts (CodeBlock _ str) = return $ - (nest (writerTabStop opts) $ vcat $ map text (lines str)) <> text "\n" -blockToMarkdown opts (BlockQuote blocks) = do - -- if we're writing literate haskell, put a space before the bird tracks - -- so they won't be interpreted as lhs... - let leader = if writerLiterateHaskell opts - then text . (" > " ++) - else text . ("> " ++) - contents <- blockListToMarkdown opts blocks - return $ (vcat $ map leader $ lines $ render contents) <> - text "\n" -blockToMarkdown opts (Table caption aligns widths headers rows) = do - caption' <- inlineListToMarkdown opts caption - let caption'' = if null caption - then empty - else text "" $+$ (text "Table: " <> caption') - headers' <- mapM (blockListToMarkdown opts) headers - let widthsInChars = map (floor . (78 *)) widths - let alignHeader alignment = case alignment of - AlignLeft -> leftAlignBlock - AlignCenter -> centerAlignBlock - AlignRight -> rightAlignBlock - AlignDefault -> leftAlignBlock - let makeRow = hsepBlocks . (zipWith alignHeader aligns) . - (zipWith docToBlock widthsInChars) - let head' = makeRow headers' - rows' <- mapM (\row -> do cols <- mapM (blockListToMarkdown opts) row - return $ makeRow cols) rows - let maxRowHeight = maximum $ map heightOfBlock (head':rows') - let isMultilineTable = maxRowHeight > 1 - let underline = hsep $ - map (\width -> text $ replicate width '-') widthsInChars - let border = if isMultilineTable - then text $ replicate (sum widthsInChars + (length widthsInChars - 1)) '-' - else empty - let spacer = if isMultilineTable - then text "" - else empty - let body = vcat $ intersperse spacer $ map blockToDoc rows' - return $ (nest 2 $ border $+$ (blockToDoc head') $+$ underline $+$ body $+$ - border $+$ caption'') <> text "\n" -blockToMarkdown opts (BulletList items) = do - contents <- mapM (bulletListItemToMarkdown opts) items - return $ (vcat contents) <> text "\n" -blockToMarkdown opts (OrderedList attribs items) = do - let markers = orderedListMarkers attribs - let markers' = map (\m -> if length m < 3 - then m ++ replicate (3 - length m) ' ' - else m) markers - contents <- mapM (\(item, num) -> orderedListItemToMarkdown opts item num) $ - zip markers' items - return $ (vcat contents) <> text "\n" -blockToMarkdown opts (DefinitionList items) = do - contents <- mapM (definitionListItemToMarkdown opts) items - return $ (vcat contents) <> text "\n" - --- | Convert bullet list item (list of blocks) to markdown. -bulletListItemToMarkdown :: WriterOptions -> [Block] -> State WriterState Doc -bulletListItemToMarkdown opts items = do - contents <- blockListToMarkdown opts items - return $ hang' (text "- ") (writerTabStop opts) contents - --- | Convert ordered list item (a list of blocks) to markdown. -orderedListItemToMarkdown :: WriterOptions -- ^ options - -> String -- ^ list item marker - -> [Block] -- ^ list item (list of blocks) - -> State WriterState Doc -orderedListItemToMarkdown opts marker items = do - contents <- blockListToMarkdown opts items - return $ hsep [nest (min (3 - length marker) 0) (text marker), - nest (writerTabStop opts) contents] - --- | Convert definition list item (label, list of blocks) to markdown. -definitionListItemToMarkdown :: WriterOptions - -> ([Inline],[Block]) - -> State WriterState Doc -definitionListItemToMarkdown opts (label, items) = do - labelText <- inlineListToMarkdown opts label - let tabStop = writerTabStop opts - let leader = char ':' - contents <- mapM (\item -> blockToMarkdown opts item >>= - (\txt -> return (leader $$ nest tabStop txt))) - items >>= return . vcat - return $ labelText $+$ contents - --- | Convert list of Pandoc block elements to markdown. -blockListToMarkdown :: WriterOptions -- ^ Options - -> [Block] -- ^ List of block elements - -> State WriterState Doc -blockListToMarkdown opts blocks = - mapM (blockToMarkdown opts) blocks >>= return . vcat - --- | Get reference for target; if none exists, create unique one and return. --- Prefer label if possible; otherwise, generate a unique key. -getReference :: [Inline] -> Target -> State WriterState [Inline] -getReference label (src, tit) = do - (_,refs) <- get - case find ((== (src, tit)) . snd) refs of - Just (ref, _) -> return ref - Nothing -> do - let label' = case find ((== label) . fst) refs of - Just _ -> -- label is used; generate numerical label - case find (\n -> not (any (== [Str (show n)]) - (map fst refs))) [1..(10000 :: Integer)] of - Just x -> [Str (show x)] - Nothing -> error "no unique label" - Nothing -> label - modify (\(notes, refs') -> (notes, (label', (src,tit)):refs')) - return label' - --- | Convert list of Pandoc inline elements to markdown. -inlineListToMarkdown :: WriterOptions -> [Inline] -> State WriterState Doc -inlineListToMarkdown opts lst = - mapM (inlineToMarkdown opts) lst >>= return . hcat - --- | Convert Pandoc inline element to markdown. -inlineToMarkdown :: WriterOptions -> Inline -> State WriterState Doc -inlineToMarkdown opts (Emph lst) = do - contents <- inlineListToMarkdown opts lst - return $ char '*' <> contents <> char '*' -inlineToMarkdown opts (Strong lst) = do - contents <- inlineListToMarkdown opts lst - return $ text "**" <> contents <> text "**" -inlineToMarkdown opts (Strikeout lst) = do - contents <- inlineListToMarkdown opts lst - return $ text "~~" <> contents <> text "~~" -inlineToMarkdown opts (Superscript lst) = do - contents <- inlineListToMarkdown opts lst - let contents' = text $ substitute " " "\\ " $ render contents - return $ char '^' <> contents' <> char '^' -inlineToMarkdown opts (Subscript lst) = do - contents <- inlineListToMarkdown opts lst - let contents' = text $ substitute " " "\\ " $ render contents - return $ char '~' <> contents' <> char '~' -inlineToMarkdown opts (SmallCaps lst) = inlineListToMarkdown opts lst -inlineToMarkdown opts (Quoted SingleQuote lst) = do - contents <- inlineListToMarkdown opts lst - return $ char '\'' <> contents <> char '\'' -inlineToMarkdown opts (Quoted DoubleQuote lst) = do - contents <- inlineListToMarkdown opts lst - return $ char '"' <> contents <> char '"' -inlineToMarkdown _ EmDash = return $ text "--" -inlineToMarkdown _ EnDash = return $ char '-' -inlineToMarkdown _ Apostrophe = return $ char '\'' -inlineToMarkdown _ Ellipses = return $ text "..." -inlineToMarkdown _ (Code str) = - let tickGroups = filter (\s -> '`' `elem` s) $ group str - longest = if null tickGroups - then 0 - else maximum $ map length tickGroups - marker = replicate (longest + 1) '`' - spacer = if (longest == 0) then "" else " " in - return $ text (marker ++ spacer ++ str ++ spacer ++ marker) -inlineToMarkdown _ (Str str) = return $ text $ escapeString str -inlineToMarkdown _ (Math InlineMath str) = return $ char '$' <> text str <> char '$' -inlineToMarkdown _ (Math DisplayMath str) = return $ text "$$" <> text str <> text "$$" -inlineToMarkdown _ (TeX str) = return $ text str -inlineToMarkdown _ (HtmlInline str) = return $ text str -inlineToMarkdown _ (LineBreak) = return $ text " \n" -inlineToMarkdown _ Space = return $ char ' ' -inlineToMarkdown _ (Cite cits _ ) = do - let format (a,b) xs = text a <> - (if b /= [] then char '@' else empty) <> - text b <> - (if isEmpty xs then empty else text "; ") <> - xs - return $ char '[' <> foldr format empty cits <> char ']' -inlineToMarkdown opts (Link txt (src, tit)) = do - linktext <- inlineListToMarkdown opts txt - let linktitle = if null tit then empty else text $ " \"" ++ tit ++ "\"" - let srcSuffix = if isPrefixOf "mailto:" src then drop 7 src else src - let useRefLinks = writerReferenceLinks opts - let useAuto = null tit && txt == [Code srcSuffix] - ref <- if useRefLinks then getReference txt (src, tit) else return [] - reftext <- inlineListToMarkdown opts ref - return $ if useAuto - then char '<' <> text srcSuffix <> char '>' - else if useRefLinks - then let first = char '[' <> linktext <> char ']' - second = if txt == ref - then text "[]" - else char '[' <> reftext <> char ']' - in first <> second - else char '[' <> linktext <> char ']' <> - char '(' <> text src <> linktitle <> char ')' -inlineToMarkdown opts (Image alternate (source, tit)) = do - let txt = if (null alternate) || (alternate == [Str ""]) || - (alternate == [Str source]) -- to prevent autolinks - then [Str "image"] - else alternate - linkPart <- inlineToMarkdown opts (Link txt (source, tit)) - return $ char '!' <> linkPart -inlineToMarkdown _ (Note contents) = do - modify (\(notes, refs) -> (contents:notes, refs)) -- add to notes in state - (notes, _) <- get - let ref = show $ (length notes) - return $ text "[^" <> text ref <> char ']' diff --git a/Text/Pandoc/Writers/MediaWiki.hs b/Text/Pandoc/Writers/MediaWiki.hs deleted file mode 100644 index c5f6b3bf1..000000000 --- a/Text/Pandoc/Writers/MediaWiki.hs +++ /dev/null @@ -1,396 +0,0 @@ -{- -Copyright (C) 2008 John MacFarlane - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA --} - -{- | - Module : Text.Pandoc.Writers.MediaWiki - Copyright : Copyright (C) 2006-7 John MacFarlane - License : GNU GPL, version 2 or above - - Maintainer : John MacFarlane - Stability : alpha - Portability : portable - -Conversion of 'Pandoc' documents to MediaWiki markup. - -MediaWiki: --} -module Text.Pandoc.Writers.MediaWiki ( writeMediaWiki ) where -import Text.Pandoc.Definition -import Text.Pandoc.Shared -import Text.Pandoc.XML ( escapeStringForXML ) -import Data.List ( intersect ) -import Network.URI ( isURI ) -import Control.Monad.State - -data WriterState = WriterState { - stNotes :: Bool -- True if there are notes - , stListLevel :: [Char] -- String at beginning of list items, e.g. "**" - , stUseTags :: Bool -- True if we should use HTML tags because we're in a complex list - } - --- | Convert Pandoc to MediaWiki. -writeMediaWiki :: WriterOptions -> Pandoc -> String -writeMediaWiki opts document = - evalState (pandocToMediaWiki opts document) - (WriterState { stNotes = False, stListLevel = [], stUseTags = False }) - --- | Return MediaWiki representation of document. -pandocToMediaWiki :: WriterOptions -> Pandoc -> State WriterState String -pandocToMediaWiki opts (Pandoc _ blocks) = do - let before = writerIncludeBefore opts - let after = writerIncludeAfter opts - let head' = if writerStandalone opts - then writerHeader opts - else "" - let toc = if writerTableOfContents opts - then "__TOC__\n" - else "" - body <- blockListToMediaWiki opts blocks - notesExist <- get >>= return . stNotes - let notes = if notesExist - then "\n== Notes ==\n" - else "" - return $ head' ++ before ++ toc ++ body ++ after ++ notes - --- | Escape special characters for MediaWiki. -escapeString :: String -> String -escapeString = escapeStringForXML - --- | Convert Pandoc block element to MediaWiki. -blockToMediaWiki :: WriterOptions -- ^ Options - -> Block -- ^ Block element - -> State WriterState String - -blockToMediaWiki _ Null = return "" - -blockToMediaWiki opts (Plain inlines) = - inlineListToMediaWiki opts inlines - -blockToMediaWiki opts (Para inlines) = do - useTags <- get >>= return . stUseTags - listLevel <- get >>= return . stListLevel - contents <- inlineListToMediaWiki opts inlines - return $ if useTags - then "

" ++ contents ++ "

" - else contents ++ if null listLevel then "\n" else "" - -blockToMediaWiki _ (RawHtml str) = return str - -blockToMediaWiki _ HorizontalRule = return "\n-----\n" - -blockToMediaWiki opts (Header level inlines) = do - contents <- inlineListToMediaWiki opts inlines - let eqs = replicate (level + 1) '=' - return $ eqs ++ " " ++ contents ++ " " ++ eqs ++ "\n" - -blockToMediaWiki _ (CodeBlock (_,classes,_) str) = do - let at = classes `intersect` ["actionscript", "ada", "apache", "applescript", "asm", "asp", - "autoit", "bash", "blitzbasic", "bnf", "c", "c_mac", "caddcl", "cadlisp", "cfdg", "cfm", - "cpp", "cpp-qt", "csharp", "css", "d", "delphi", "diff", "div", "dos", "eiffel", "fortran", - "freebasic", "gml", "groovy", "html4strict", "idl", "ini", "inno", "io", "java", "java5", - "javascript", "latex", "lisp", "lua", "matlab", "mirc", "mpasm", "mysql", "nsis", "objc", - "ocaml", "ocaml-brief", "oobas", "oracle8", "pascal", "perl", "php", "php-brief", "plsql", - "python", "qbasic", "rails", "reg", "robots", "ruby", "sas", "scheme", "sdlbasic", - "smalltalk", "smarty", "sql", "tcl", "", "thinbasic", "tsql", "vb", "vbnet", "vhdl", - "visualfoxpro", "winbatch", "xml", "xpp", "z80"] - let (beg, end) = if null at - then ("" else " class=\"" ++ unwords classes ++ "\">", "") - else ("", "") - return $ beg ++ escapeString str ++ end - -blockToMediaWiki opts (BlockQuote blocks) = do - contents <- blockListToMediaWiki opts blocks - return $ "
" ++ contents ++ "
" - -blockToMediaWiki opts (Table caption aligns widths headers rows) = do - let alignStrings = map alignmentToString aligns - captionDoc <- if null caption - then return "" - else do - c <- inlineListToMediaWiki opts caption - return $ "" ++ c ++ "" - colHeads <- colHeadsToMediaWiki opts alignStrings widths headers - rows' <- mapM (tableRowToMediaWiki opts alignStrings) rows - return $ "\n" ++ captionDoc ++ colHeads ++ vcat rows' ++ "\n
" - -blockToMediaWiki opts x@(BulletList items) = do - oldUseTags <- get >>= return . stUseTags - let useTags = oldUseTags || not (isSimpleList x) - if useTags - then do - modify $ \s -> s { stUseTags = True } - contents <- mapM (listItemToMediaWiki opts) items - modify $ \s -> s { stUseTags = oldUseTags } - return $ "
    \n" ++ vcat contents ++ "
\n" - else do - modify $ \s -> s { stListLevel = stListLevel s ++ "*" } - contents <- mapM (listItemToMediaWiki opts) items - modify $ \s -> s { stListLevel = init (stListLevel s) } - return $ vcat contents - -blockToMediaWiki opts x@(OrderedList attribs items) = do - oldUseTags <- get >>= return . stUseTags - let useTags = oldUseTags || not (isSimpleList x) - if useTags - then do - modify $ \s -> s { stUseTags = True } - contents <- mapM (listItemToMediaWiki opts) items - modify $ \s -> s { stUseTags = oldUseTags } - return $ "\n" ++ vcat contents ++ "\n" - else do - modify $ \s -> s { stListLevel = stListLevel s ++ "#" } - contents <- mapM (listItemToMediaWiki opts) items - modify $ \s -> s { stListLevel = init (stListLevel s) } - return $ vcat contents - -blockToMediaWiki opts x@(DefinitionList items) = do - oldUseTags <- get >>= return . stUseTags - let useTags = oldUseTags || not (isSimpleList x) - if useTags - then do - modify $ \s -> s { stUseTags = True } - contents <- mapM (definitionListItemToMediaWiki opts) items - modify $ \s -> s { stUseTags = oldUseTags } - return $ "
\n" ++ vcat contents ++ "
\n" - else do - modify $ \s -> s { stListLevel = stListLevel s ++ ";" } - contents <- mapM (definitionListItemToMediaWiki opts) items - modify $ \s -> s { stListLevel = init (stListLevel s) } - return $ vcat contents - --- Auxiliary functions for lists: - --- | Convert ordered list attributes to HTML attribute string -listAttribsToString :: ListAttributes -> String -listAttribsToString (startnum, numstyle, _) = - let numstyle' = camelCaseToHyphenated $ show numstyle - in (if startnum /= 1 - then " start=\"" ++ show startnum ++ "\"" - else "") ++ - (if numstyle /= DefaultStyle - then " style=\"list-style-type: " ++ numstyle' ++ ";\"" - else "") - --- | Convert bullet or ordered list item (list of blocks) to MediaWiki. -listItemToMediaWiki :: WriterOptions -> [Block] -> State WriterState String -listItemToMediaWiki opts items = do - contents <- blockListToMediaWiki opts items - useTags <- get >>= return . stUseTags - if useTags - then return $ "
  • " ++ contents ++ "
  • " - else do - marker <- get >>= return . stListLevel - return $ marker ++ " " ++ contents - --- | Convert definition list item (label, list of blocks) to MediaWiki. -definitionListItemToMediaWiki :: WriterOptions - -> ([Inline],[Block]) - -> State WriterState String -definitionListItemToMediaWiki opts (label, items) = do - labelText <- inlineListToMediaWiki opts label - contents <- blockListToMediaWiki opts items - useTags <- get >>= return . stUseTags - if useTags - then return $ "
    " ++ labelText ++ "
    \n
    " ++ contents ++ "
    " - else do - marker <- get >>= return . stListLevel - return $ marker ++ " " ++ labelText ++ "\n" ++ (init marker ++ ": ") ++ contents - --- | True if the list can be handled by simple wiki markup, False if HTML tags will be needed. -isSimpleList :: Block -> Bool -isSimpleList x = - case x of - BulletList items -> all isSimpleListItem items - OrderedList (num, sty, _) items -> all isSimpleListItem items && - num == 1 && sty `elem` [DefaultStyle, Decimal] - DefinitionList items -> all isSimpleListItem $ map snd items - _ -> False - --- | True if list item can be handled with the simple wiki syntax. False if --- HTML tags will be needed. -isSimpleListItem :: [Block] -> Bool -isSimpleListItem [] = True -isSimpleListItem [x] = - case x of - Plain _ -> True - Para _ -> True - BulletList _ -> isSimpleList x - OrderedList _ _ -> isSimpleList x - DefinitionList _ -> isSimpleList x - _ -> False -isSimpleListItem [x, y] | isPlainOrPara x = - case y of - BulletList _ -> isSimpleList y - OrderedList _ _ -> isSimpleList y - DefinitionList _ -> isSimpleList y - _ -> False -isSimpleListItem _ = False - -isPlainOrPara :: Block -> Bool -isPlainOrPara (Plain _) = True -isPlainOrPara (Para _) = True -isPlainOrPara _ = False - -tr :: String -> String -tr x = "\n" ++ x ++ "\n" - --- | Concatenates strings with line breaks between them. -vcat :: [String] -> String -vcat [] = "" -vcat [x] = x -vcat (x:xs) = x ++ "\n" ++ vcat xs - --- Auxiliary functions for tables: - -colHeadsToMediaWiki :: WriterOptions - -> [[Char]] - -> [Double] - -> [[Block]] - -> State WriterState String -colHeadsToMediaWiki opts alignStrings widths headers = do - heads <- sequence $ zipWith3 - (\alignment columnwidth item -> tableItemToMediaWiki opts "th" alignment columnwidth item) - alignStrings widths headers - return $ tr $ vcat heads - -alignmentToString :: Alignment -> [Char] -alignmentToString alignment = case alignment of - AlignLeft -> "left" - AlignRight -> "right" - AlignCenter -> "center" - AlignDefault -> "left" - -tableRowToMediaWiki :: WriterOptions - -> [[Char]] - -> [[Block]] - -> State WriterState String -tableRowToMediaWiki opts aligns columns = - (sequence $ zipWith3 (tableItemToMediaWiki opts "td") aligns (repeat 0) columns) >>= - return . tr . vcat - -tableItemToMediaWiki :: WriterOptions - -> [Char] - -> [Char] - -> Double - -> [Block] - -> State WriterState String -tableItemToMediaWiki opts tag' align' width' item = do - contents <- blockListToMediaWiki opts item - let attrib = " align=\"" ++ align' ++ "\"" ++ - if width' /= 0 - then " style=\"width: " ++ (show (truncate (100 * width') :: Integer)) ++ "%;\"" - else "" - return $ "<" ++ tag' ++ attrib ++ ">" ++ contents ++ "" - --- | Convert list of Pandoc block elements to MediaWiki. -blockListToMediaWiki :: WriterOptions -- ^ Options - -> [Block] -- ^ List of block elements - -> State WriterState String -blockListToMediaWiki opts blocks = - mapM (blockToMediaWiki opts) blocks >>= return . vcat - --- | Convert list of Pandoc inline elements to MediaWiki. -inlineListToMediaWiki :: WriterOptions -> [Inline] -> State WriterState String -inlineListToMediaWiki opts lst = - mapM (inlineToMediaWiki opts) lst >>= return . concat - --- | Convert Pandoc inline element to MediaWiki. -inlineToMediaWiki :: WriterOptions -> Inline -> State WriterState String - -inlineToMediaWiki opts (Emph lst) = do - contents <- inlineListToMediaWiki opts lst - return $ "''" ++ contents ++ "''" - -inlineToMediaWiki opts (Strong lst) = do - contents <- inlineListToMediaWiki opts lst - return $ "'''" ++ contents ++ "'''" - -inlineToMediaWiki opts (Strikeout lst) = do - contents <- inlineListToMediaWiki opts lst - return $ "" ++ contents ++ "" - -inlineToMediaWiki opts (Superscript lst) = do - contents <- inlineListToMediaWiki opts lst - return $ "" ++ contents ++ "" - -inlineToMediaWiki opts (Subscript lst) = do - contents <- inlineListToMediaWiki opts lst - return $ "" ++ contents ++ "" - -inlineToMediaWiki opts (SmallCaps lst) = inlineListToMediaWiki opts lst - -inlineToMediaWiki opts (Quoted SingleQuote lst) = do - contents <- inlineListToMediaWiki opts lst - return $ "‘" ++ contents ++ "’" - -inlineToMediaWiki opts (Quoted DoubleQuote lst) = do - contents <- inlineListToMediaWiki opts lst - return $ "“" ++ contents ++ "”" - -inlineToMediaWiki opts (Cite _ lst) = inlineListToMediaWiki opts lst - -inlineToMediaWiki _ EmDash = return "—" - -inlineToMediaWiki _ EnDash = return "–" - -inlineToMediaWiki _ Apostrophe = return "’" - -inlineToMediaWiki _ Ellipses = return "…" - -inlineToMediaWiki _ (Code str) = - return $ "" ++ (escapeString str) ++ "" - -inlineToMediaWiki _ (Str str) = return $ escapeString str - -inlineToMediaWiki _ (Math _ str) = return $ "" ++ str ++ "" - -- note: str should NOT be escaped - -inlineToMediaWiki _ (TeX _) = return "" - -inlineToMediaWiki _ (HtmlInline str) = return str - -inlineToMediaWiki _ (LineBreak) = return "
    \n" - -inlineToMediaWiki _ Space = return " " - -inlineToMediaWiki opts (Link txt (src, _)) = do - link <- inlineListToMediaWiki opts txt - let useAuto = txt == [Code src] - let src' = if isURI src - then src - else if take 1 src == "/" - then "http://{{SERVERNAME}}" ++ src - else "http://{{SERVERNAME}}/" ++ src - return $ if useAuto - then src' - else "[" ++ src' ++ " " ++ link ++ "]" - -inlineToMediaWiki opts (Image alt (source, tit)) = do - alt' <- inlineListToMediaWiki opts alt - let txt = if (null tit) - then if null alt - then "" - else "|" ++ alt' - else "|" ++ tit - return $ "[[Image:" ++ source ++ txt ++ "]]" - -inlineToMediaWiki opts (Note contents) = do - contents' <- blockListToMediaWiki opts contents - modify (\s -> s { stNotes = True }) - return $ "" ++ contents' ++ "" - -- note - may not work for notes with multiple blocks diff --git a/Text/Pandoc/Writers/OpenDocument.hs b/Text/Pandoc/Writers/OpenDocument.hs deleted file mode 100644 index 52438f81e..000000000 --- a/Text/Pandoc/Writers/OpenDocument.hs +++ /dev/null @@ -1,568 +0,0 @@ -{-# LANGUAGE PatternGuards #-} -{- -Copyright (C) 2008 Andrea Rossato - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA --} - -{- | - Module : Text.Pandoc.Writers.OpenDocument - Copyright : Copyright (C) 2008 Andrea Rossato - License : GNU GPL, version 2 or above - - Maintainer : Andrea Rossato - Stability : alpha - Portability : portable - -Conversion of 'Pandoc' documents to OpenDocument XML. --} -module Text.Pandoc.Writers.OpenDocument ( writeOpenDocument ) where -import Text.Pandoc.Definition -import Text.Pandoc.Shared -import Text.Pandoc.XML -import Text.Pandoc.Readers.TeXMath -import Text.PrettyPrint.HughesPJ hiding ( Str ) -import Text.Printf ( printf ) -import Control.Applicative ( (<$>) ) -import Control.Arrow ( (***), (>>>) ) -import Control.Monad.State hiding ( when ) -import Data.Char (chr) -import Data.List (intercalate) - --- | Auxiliary function to convert Plain block to Para. -plainToPara :: Block -> Block -plainToPara (Plain x) = Para x -plainToPara x = x - --- --- OpenDocument writer --- - -data WriterState = - WriterState { stNotes :: [Doc] - , stTableStyles :: [Doc] - , stParaStyles :: [Doc] - , stListStyles :: [(Int, [Doc])] - , stTextStyles :: [Doc] - , stTextStyleAttr :: [(TextStyle,[(String,String)])] - , stIndentPara :: Int - , stInDefinition :: Bool - , stTight :: Bool - } - -defaultWriterState :: WriterState -defaultWriterState = - WriterState { stNotes = [] - , stTableStyles = [] - , stParaStyles = [] - , stListStyles = [] - , stTextStyles = [] - , stTextStyleAttr = [] - , stIndentPara = 0 - , stInDefinition = False - , stTight = False - } - -when :: Bool -> Doc -> Doc -when p a = if p then a else empty - -addTableStyle :: Doc -> State WriterState () -addTableStyle i = modify $ \s -> s { stTableStyles = i : stTableStyles s } - -addNote :: Doc -> State WriterState () -addNote i = modify $ \s -> s { stNotes = i : stNotes s } - -addParaStyle :: Doc -> State WriterState () -addParaStyle i = modify $ \s -> s { stParaStyles = i : stParaStyles s } - -addTextStyle :: Doc -> State WriterState () -addTextStyle i = modify $ \s -> s { stTextStyles = i : stTextStyles s } - -addTextStyleAttr :: (TextStyle, [(String,String)]) -> State WriterState () -addTextStyleAttr i = modify $ \s -> s { stTextStyleAttr = i : stTextStyleAttr s } - -rmTextStyleAttr :: State WriterState () -rmTextStyleAttr = modify $ \s -> s { stTextStyleAttr = rmHead (stTextStyleAttr s) } - where rmHead l = if l /= [] then tail l else [] - -increaseIndent :: State WriterState () -increaseIndent = modify $ \s -> s { stIndentPara = 1 + stIndentPara s } - -resetIndent :: State WriterState () -resetIndent = modify $ \s -> s { stIndentPara = (stIndentPara s) - 1 } - -inTightList :: State WriterState a -> State WriterState a -inTightList f = modify (\s -> s { stTight = True }) >> f >>= \r -> - modify (\s -> s { stTight = False }) >> return r - -setInDefinitionList :: Bool -> State WriterState () -setInDefinitionList b = modify $ \s -> s { stInDefinition = b } - -inParagraphTags :: Doc -> Doc -inParagraphTags = inTags False "text:p" [("text:style-name", "Text_20_body")] - -inParagraphTagsWithStyle :: String -> Doc -> Doc -inParagraphTagsWithStyle sty = inTags False "text:p" [("text:style-name", sty)] - -inSpanTags :: String -> Doc -> Doc -inSpanTags s = inTags False "text:span" [("text:style-name",s)] - -withTextStyle :: TextStyle -> State WriterState a -> State WriterState a -withTextStyle s f = addTextStyleAttr (s,textStyleAttr s) >> - f >>= \r -> rmTextStyleAttr >> return r - -inTextStyle :: Doc -> State WriterState Doc -inTextStyle d = do - at <- gets stTextStyleAttr - if at == [] - then return d - else do - tn <- (+) 1 . length <$> gets stTextStyles - addTextStyle $ inTags False "style:style" [("style:name" , "T" ++ show tn) - ,("style:family", "text" )] - $ selfClosingTag "style:text-properties" (concatMap snd at) - return $ inTags False "text:span" [("text:style-name","T" ++ show tn)] d - -inHeaderTags :: Int -> Doc -> Doc -inHeaderTags i = inTags False "text:h" [ ("text:style-name", "Heading_20_" ++ show i) - , ("text:outline-level", show i)] - -inQuotes :: QuoteType -> Doc -> Doc -inQuotes SingleQuote s = text "‘" <> s <> text "’" -inQuotes DoubleQuote s = text "“" <> s <> text "”" - -handleSpaces :: String -> Doc -handleSpaces s - | ( ' ':_) <- s = genTag s - | ('\t':x) <- s = selfClosingTag "text:tab" [] <> rm x - | otherwise = rm s - where - genTag = span (==' ') >>> tag . length *** rm >>> uncurry (<>) - tag n = when (n /= 0) $ selfClosingTag "text:s" [("text:c", show n)] - rm ( ' ':xs) = char ' ' <> genTag xs - rm ('\t':xs) = selfClosingTag "text:tab" [] <> genTag xs - rm ( x:xs) = char x <> rm xs - rm [] = empty - --- | Convert list of authors to a docbook section -authorToOpenDocument :: [Char] -> Doc -authorToOpenDocument name = - if ',' `elem` name - then -- last name first - let (lastname, rest) = break (==',') name - firstname = removeLeadingSpace rest - in inParagraphTagsWithStyle "Author" $ - (text $ escapeStringForXML firstname) <+> - (text $ escapeStringForXML lastname) - else -- last name last - let namewords = words name - lengthname = length namewords - (firstname, lastname) = case lengthname of - 0 -> ("","") - 1 -> ("", name) - n -> (intercalate " " (take (n-1) namewords), last namewords) - in inParagraphTagsWithStyle "Author" $ - (text $ escapeStringForXML firstname) <+> - (text $ escapeStringForXML lastname) - --- | Convert Pandoc document to string in OpenDocument format. -writeOpenDocument :: WriterOptions -> Pandoc -> String -writeOpenDocument opts (Pandoc (Meta title authors date) blocks) = - let root = inTags True "office:document-content" openDocumentNameSpaces - header = when (writerStandalone opts) $ text (writerHeader opts) - title' = case runState (wrap opts title) defaultWriterState of - (t,_) -> if isEmpty t then empty else inHeaderTags 1 t - authors' = when (authors /= []) $ vcat (map authorToOpenDocument authors) - date' = when (date /= []) $ - inParagraphTagsWithStyle "Date" (text $ escapeStringForXML date) - meta = when (writerStandalone opts) $ title' $$ authors' $$ date' - before = writerIncludeBefore opts - after = writerIncludeAfter opts - (doc, s) = runState (blocksToOpenDocument opts blocks) defaultWriterState - body = (if null before then empty else text before) $$ - doc $$ - (if null after then empty else text after) - body' = if writerStandalone opts - then inTagsIndented "office:body" $ - inTagsIndented "office:text" (meta $$ body) - else body - styles = stTableStyles s ++ stParaStyles s ++ stTextStyles s - listStyle (n,l) = inTags True "text:list-style" [("style:name", "L" ++ show n)] (vcat l) - listStyles = map listStyle (stListStyles s) - in render $ header $$ root (generateStyles (styles ++ listStyles) $$ body' $$ text "") - -withParagraphStyle :: WriterOptions -> String -> [Block] -> State WriterState Doc -withParagraphStyle o s (b:bs) - | Para l <- b = go =<< inParagraphTagsWithStyle s <$> inlinesToOpenDocument o l - | otherwise = go =<< blockToOpenDocument o b - where go i = ($$) i <$> withParagraphStyle o s bs -withParagraphStyle _ _ [] = return empty - -inPreformattedTags :: String -> State WriterState Doc -inPreformattedTags s = do - n <- paraStyle "Preformatted_20_Text" [] - return . inParagraphTagsWithStyle ("P" ++ show n) . handleSpaces $ s - -orderedListToOpenDocument :: WriterOptions -> Int -> [[Block]] -> State WriterState Doc -orderedListToOpenDocument o pn bs = - vcat . map (inTagsIndented "text:list-item") <$> - mapM (orderedItemToOpenDocument o pn . map plainToPara) bs - -orderedItemToOpenDocument :: WriterOptions -> Int -> [Block] -> State WriterState Doc -orderedItemToOpenDocument o n (b:bs) - | OrderedList a l <- b = newLevel a l - | Para l <- b = go =<< inParagraphTagsWithStyle ("P" ++ show n) <$> inlinesToOpenDocument o l - | otherwise = go =<< blockToOpenDocument o b - where - go i = ($$) i <$> orderedItemToOpenDocument o n bs - newLevel a l = do - nn <- length <$> gets stParaStyles - ls <- head <$> gets stListStyles - modify $ \s -> s { stListStyles = orderedListLevelStyle a ls : tail (stListStyles s) } - inTagsIndented "text:list" <$> orderedListToOpenDocument o nn l -orderedItemToOpenDocument _ _ [] = return empty - -isTightList :: [[Block]] -> Bool -isTightList [] = False -isTightList (b:_) - | Plain {} : _ <- b = True - | otherwise = False - -newOrderedListStyle :: Bool -> ListAttributes -> State WriterState (Int,Int) -newOrderedListStyle b a = do - ln <- (+) 1 . length <$> gets stListStyles - let nbs = orderedListLevelStyle a (ln, []) - pn <- if b then inTightList (paraListStyle ln) else paraListStyle ln - modify $ \s -> s { stListStyles = nbs : stListStyles s } - return (ln,pn) - -bulletListToOpenDocument :: WriterOptions -> [[Block]] -> State WriterState Doc -bulletListToOpenDocument o b = do - ln <- (+) 1 . length <$> gets stListStyles - (pn,ns) <- if isTightList b then inTightList (bulletListStyle ln) else bulletListStyle ln - modify $ \s -> s { stListStyles = ns : stListStyles s } - is <- listItemsToOpenDocument ("P" ++ show pn) o b - return $ inTags True "text:list" [("text:style-name", "L" ++ show ln)] is - -listItemsToOpenDocument :: String -> WriterOptions -> [[Block]] -> State WriterState Doc -listItemsToOpenDocument s o is = - vcat . map (inTagsIndented "text:list-item") <$> mapM (withParagraphStyle o s . map plainToPara) is - -deflistItemToOpenDocument :: WriterOptions -> ([Inline],[Block]) -> State WriterState Doc -deflistItemToOpenDocument o (t,d) = do - let ts = if isTightList [d] - then "Definition_20_Term_20_Tight" else "Definition_20_Term" - ds = if isTightList [d] - then "Definition_20_Definition_20_Tight" else "Definition_20_Definition" - t' <- withParagraphStyle o ts [Para t] - d' <- withParagraphStyle o ds (map plainToPara d) - return $ t' $$ d' - -inBlockQuote :: WriterOptions -> Int -> [Block] -> State WriterState Doc -inBlockQuote o i (b:bs) - | BlockQuote l <- b = do increaseIndent - ni <- paraStyle "Quotations" [] - go =<< inBlockQuote o ni (map plainToPara l) - | Para l <- b = do go =<< inParagraphTagsWithStyle ("P" ++ show i) <$> inlinesToOpenDocument o l - | otherwise = do go =<< blockToOpenDocument o b - where go block = ($$) block <$> inBlockQuote o i bs -inBlockQuote _ _ [] = resetIndent >> return empty - --- | Convert a list of Pandoc blocks to OpenDocument. -blocksToOpenDocument :: WriterOptions -> [Block] -> State WriterState Doc -blocksToOpenDocument o b = vcat <$> mapM (blockToOpenDocument o) b - --- | Convert a Pandoc block element to OpenDocument. -blockToOpenDocument :: WriterOptions -> Block -> State WriterState Doc -blockToOpenDocument o bs - | Plain b <- bs = inParagraphTags <$> wrap o b - | Para b <- bs = inParagraphTags <$> wrap o b - | Header i b <- bs = inHeaderTags i <$> wrap o b - | BlockQuote b <- bs = mkBlockQuote b - | CodeBlock _ s <- bs = preformatted s - | RawHtml _ <- bs = return empty - | DefinitionList b <- bs = defList b - | BulletList b <- bs = bulletListToOpenDocument o b - | OrderedList a b <- bs = orderedList a b - | Table c a w h r <- bs = table c a w h r - | Null <- bs = return empty - | HorizontalRule <- bs = return $ selfClosingTag "text:p" [ ("text:style-name", "Horizontal_20_Line") ] - | otherwise = return empty - where - defList b = do setInDefinitionList True - r <- vcat <$> mapM (deflistItemToOpenDocument o) b - setInDefinitionList False - return r - preformatted s = vcat <$> mapM (inPreformattedTags . escapeStringForXML) (lines s) - mkBlockQuote b = do increaseIndent - i <- paraStyle "Quotations" [] - inBlockQuote o i (map plainToPara b) - orderedList a b = do (ln,pn) <- newOrderedListStyle (isTightList b) a - inTags True "text:list" [ ("text:style-name", "L" ++ show ln)] - <$> orderedListToOpenDocument o pn b - table c a w h r = do - tn <- length <$> gets stTableStyles - pn <- length <$> gets stParaStyles - let genIds = map chr [65..] - name = "Table" ++ show (tn + 1) - columnIds = zip genIds w - mkColumn n = selfClosingTag "table:table-column" [("table:style-name", name ++ "." ++ [fst n])] - columns = map mkColumn columnIds - paraHStyles = paraTableStyles "Heading" pn a - paraStyles = paraTableStyles "Contents" (pn + length (newPara paraHStyles)) a - newPara = map snd . filter (not . isEmpty . snd) - addTableStyle $ tableStyle tn columnIds - mapM_ addParaStyle . newPara $ paraHStyles ++ paraStyles - captionDoc <- if null c - then return empty - else withParagraphStyle o "Caption" [Para c] - th <- colHeadsToOpenDocument o name (map fst paraHStyles) h - tr <- mapM (tableRowToOpenDocument o name (map fst paraStyles)) r - return $ inTags True "table:table" [ ("table:name" , name) - , ("table:style-name", name) - ] (vcat columns $$ th $$ vcat tr) $$ captionDoc - -colHeadsToOpenDocument :: WriterOptions -> String -> [String] -> [[Block]] -> State WriterState Doc -colHeadsToOpenDocument o tn ns hs = - inTagsIndented "table:table-header-rows" . inTagsIndented "table:table-row" . vcat <$> - mapM (tableItemToOpenDocument o tn) (zip ns hs) - -tableRowToOpenDocument :: WriterOptions -> String -> [String] -> [[Block]] -> State WriterState Doc -tableRowToOpenDocument o tn ns cs = - inTagsIndented "table:table-row" . vcat <$> - mapM (tableItemToOpenDocument o tn) (zip ns cs) - -tableItemToOpenDocument :: WriterOptions -> String -> (String,[Block])-> State WriterState Doc -tableItemToOpenDocument o tn (n,i) = - let a = [ ("table:style-name" , tn ++ ".A1" ) - , ("office:value-type", "string" ) - ] - in inTags True "table:table-cell" a <$> - withParagraphStyle o n (map plainToPara i) - --- | Take list of inline elements and return wrapped doc. -wrap :: WriterOptions -> [Inline] -> State WriterState Doc -wrap o l = if writerWrapText o - then fsep <$> mapM (inlinesToOpenDocument o) (splitBy Space l) - else inlinesToOpenDocument o l - --- | Convert a list of inline elements to OpenDocument. -inlinesToOpenDocument :: WriterOptions -> [Inline] -> State WriterState Doc -inlinesToOpenDocument o l = hcat <$> mapM (inlineToOpenDocument o) l - --- | Convert an inline element to OpenDocument. -inlineToOpenDocument :: WriterOptions -> Inline -> State WriterState Doc -inlineToOpenDocument o ils - | Ellipses <- ils = inTextStyle $ text "…" - | EmDash <- ils = inTextStyle $ text "—" - | EnDash <- ils = inTextStyle $ text "–" - | Apostrophe <- ils = inTextStyle $ text "’" - | Space <- ils = inTextStyle $ char ' ' - | LineBreak <- ils = return $ selfClosingTag "text:line-break" [] - | Str s <- ils = inTextStyle $ handleSpaces $ escapeStringForXML s - | Emph l <- ils = withTextStyle Italic $ inlinesToOpenDocument o l - | Strong l <- ils = withTextStyle Bold $ inlinesToOpenDocument o l - | Strikeout l <- ils = withTextStyle Strike $ inlinesToOpenDocument o l - | Superscript l <- ils = withTextStyle Sup $ inlinesToOpenDocument o l - | Subscript l <- ils = withTextStyle Sub $ inlinesToOpenDocument o l - | SmallCaps l <- ils = withTextStyle SmallC $ inlinesToOpenDocument o l - | Quoted t l <- ils = inQuotes t <$> inlinesToOpenDocument o l - | Code s <- ils = preformatted s - | Math _ s <- ils = inlinesToOpenDocument o (readTeXMath s) - | Cite _ l <- ils = inlinesToOpenDocument o l - | TeX s <- ils = preformatted s - | HtmlInline s <- ils = preformatted s - | Link l (s,t) <- ils = mkLink s t <$> inlinesToOpenDocument o l - | Image _ (s,_) <- ils = return $ mkImg s - | Note l <- ils = mkNote l - | otherwise = return empty - where - preformatted = return . inSpanTags "Teletype" . handleSpaces . escapeStringForXML - mkLink s t = inTags False "text:a" [ ("xlink:type" , "simple") - , ("xlink:href" , s ) - , ("office:name", t ) - ] . inSpanTags "Definition" - mkImg s = inTags False "draw:frame" [] $ - selfClosingTag "draw:image" [ ("xlink:href" , s ) - , ("xlink:type" , "simple") - , (" xlink:show" , "embed" ) - , ("xlink:actuate", "onLoad")] - mkNote l = do - n <- length <$> gets stNotes - let footNote t = inTags False "text:note" - [ ("text:id" , "ftn" ++ show n) - , ("text:note-class", "footnote" )] $ - inTagsSimple "text:note-citation" (text . show $ n + 1) $$ - inTagsSimple "text:note-body" t - nn <- footNote <$> withParagraphStyle o "Footnote" l - addNote nn - return nn - -generateStyles :: [Doc] -> Doc -generateStyles acc = - let scripts = selfClosingTag "office:scripts" [] - fonts = inTagsIndented "office:font-face-decls" - (vcat $ map font ["Lucida Sans Unicode", "Tahoma", "Times New Roman"]) - font fn = selfClosingTag "style:font-face" - [ ("style:name" , "'" ++ fn ++ "'") - , ("svg:font-family", fn )] - in scripts $$ fonts $$ inTagsIndented "office:automatic-styles" (vcat $ reverse acc) - -bulletListStyle :: Int -> State WriterState (Int,(Int,[Doc])) -bulletListStyle l = - let doStyles i = inTags True "text:list-level-style-bullet" - [ ("text:level" , show (i + 1) ) - , ("text:style-name" , "Bullet_20_Symbols") - , ("style:num-suffix", "." ) - , ("text:bullet-char", [bulletList !! i] ) - ] (listLevelStyle (1 + i)) - bulletList = map chr $ cycle [8226,8227,8259] - listElStyle = map doStyles [0..9] - in do pn <- paraListStyle l - return (pn, (l, listElStyle)) - -orderedListLevelStyle :: ListAttributes -> (Int, [Doc]) -> (Int,[Doc]) -orderedListLevelStyle (s,n, d) (l,ls) = - let suffix = case d of - OneParen -> [("style:num-suffix", ")")] - TwoParens -> [("style:num-prefix", "(") - ,("style:num-suffix", ")")] - _ -> [("style:num-suffix", ".")] - format = case n of - UpperAlpha -> "A" - LowerAlpha -> "a" - UpperRoman -> "I" - LowerRoman -> "i" - _ -> "1" - listStyle = inTags True "text:list-level-style-number" - ([ ("text:level" , show $ 1 + length ls ) - , ("text:style-name" , "Numbering_20_Symbols") - , ("style:num-format", format ) - , ("text:start-value", show s ) - ] ++ suffix) (listLevelStyle (1 + length ls)) - in (l, ls ++ [listStyle]) - -listLevelStyle :: Int -> Doc -listLevelStyle i = - let indent = show (0.25 * fromIntegral i :: Double) in - selfClosingTag "style:list-level-properties" - [ ("text:space-before" , indent ++ "in") - , ("text:min-label-width", "0.25in")] - -tableStyle :: Int -> [(Char,Double)] -> Doc -tableStyle num wcs = - let tableId = "Table" ++ show (num + 1) - table = inTags True "style:style" - [("style:name", tableId)] $ - selfClosingTag "style:table-properties" - [ ("style:rel-width", "100%" ) - , ("table:align" , "center")] - colStyle (c,w) = inTags True "style:style" - [ ("style:name" , tableId ++ "." ++ [c]) - , ("style:family", "table-column" )] $ - selfClosingTag "style:table-column-properties" - [("style:column-width", printf "%.2f" (7 * w) ++ "in")] - cellStyle = inTags True "style:style" - [ ("style:name" , tableId ++ ".A1") - , ("style:family", "table-cell" )] $ - selfClosingTag "style:table-cell-properties" - [ ("fo:border", "none")] - columnStyles = map colStyle wcs - in table $$ vcat columnStyles $$ cellStyle - -paraStyle :: String -> [(String,String)] -> State WriterState Int -paraStyle parent attrs = do - pn <- (+) 1 . length <$> gets stParaStyles - i <- (*) 0.5 . fromIntegral <$> gets stIndentPara :: State WriterState Double - b <- gets stInDefinition - t <- gets stTight - let styleAttr = [ ("style:name" , "P" ++ show pn) - , ("style:family" , "paragraph" ) - , ("style:parent-style-name", parent )] - indentVal = flip (++) "in" . show $ if b then (max 0.5 i) else i - tight = if t then [ ("fo:margin-top" , "0in" ) - , ("fo:margin-bottom" , "0in" )] - else [] - indent = when (i /= 0 || b || t) $ - selfClosingTag "style:paragraph-properties" $ - [ ("fo:margin-left" , indentVal) - , ("fo:margin-right" , "0in" ) - , ("fo:text-indent" , "0in" ) - , ("style:auto-text-indent" , "false" )] - ++ tight - addParaStyle $ inTags True "style:style" (styleAttr ++ attrs) indent - return pn - -paraListStyle :: Int -> State WriterState Int -paraListStyle l = paraStyle "Text_20_body" [("style:list-style-name", "L" ++ show l )] - -paraTableStyles :: String -> Int -> [Alignment] -> [(String, Doc)] -paraTableStyles _ _ [] = [] -paraTableStyles t s (a:xs) - | AlignRight <- a = ( pName s, res s "end" ) : paraTableStyles t (s + 1) xs - | AlignCenter <- a = ( pName s, res s "center") : paraTableStyles t (s + 1) xs - | otherwise = ("Table_20_" ++ t, empty ) : paraTableStyles t s xs - where pName sn = "P" ++ show (sn + 1) - res sn x = inTags True "style:style" - [ ("style:name" , pName sn ) - , ("style:family" , "paragraph" ) - , ("style:parent-style-name", "Table_20_" ++ t)] $ - selfClosingTag "style:paragraph-properties" - [ ("fo:text-align", x) - , ("style:justify-single-word", "false")] - -data TextStyle = Italic | Bold | Strike | Sub | Sup | SmallC deriving ( Eq ) - -textStyleAttr :: TextStyle -> [(String,String)] -textStyleAttr s - | Italic <- s = [("fo:font-style" ,"italic" ) - ,("style:font-style-asian" ,"italic" ) - ,("style:font-style-complex" ,"italic" )] - | Bold <- s = [("fo:font-weight" ,"bold" ) - ,("style:font-weight-asian" ,"bold" ) - ,("style:font-weight-complex" ,"bold" )] - | Strike <- s = [("style:text-line-through-style", "solid" )] - | Sub <- s = [("style:text-position" ,"sub 58%" )] - | Sup <- s = [("style:text-position" ,"super 58%" )] - | SmallC <- s = [("fo:font-variant" ,"small-caps")] - | otherwise = [] - -openDocumentNameSpaces :: [(String, String)] -openDocumentNameSpaces = - [ ("xmlns:office" , "urn:oasis:names:tc:opendocument:xmlns:office:1.0" ) - , ("xmlns:style" , "urn:oasis:names:tc:opendocument:xmlns:style:1.0" ) - , ("xmlns:text" , "urn:oasis:names:tc:opendocument:xmlns:text:1.0" ) - , ("xmlns:table" , "urn:oasis:names:tc:opendocument:xmlns:table:1.0" ) - , ("xmlns:draw" , "urn:oasis:names:tc:opendocument:xmlns:drawing:1.0" ) - , ("xmlns:fo" , "urn:oasis:names:tc:opendocument:xmlns:xsl-fo-compatible:1.0") - , ("xmlns:xlink" , "http://www.w3.org/1999/xlink" ) - , ("xmlns:dc" , "http://purl.org/dc/elements/1.1/" ) - , ("xmlns:meta" , "urn:oasis:names:tc:opendocument:xmlns:meta:1.0" ) - , ("xmlns:number" , "urn:oasis:names:tc:opendocument:xmlns:datastyle:1.0" ) - , ("xmlns:svg" , "urn:oasis:names:tc:opendocument:xmlns:svg-compatible:1.0" ) - , ("xmlns:chart" , "urn:oasis:names:tc:opendocument:xmlns:chart:1.0" ) - , ("xmlns:dr3d" , "urn:oasis:names:tc:opendocument:xmlns:dr3d:1.0" ) - , ("xmlns:math" , "http://www.w3.org/1998/Math/MathML" ) - , ("xmlns:form" , "urn:oasis:names:tc:opendocument:xmlns:form:1.0" ) - , ("xmlns:script" , "urn:oasis:names:tc:opendocument:xmlns:script:1.0" ) - , ("xmlns:ooo" , "http://openoffice.org/2004/office" ) - , ("xmlns:ooow" , "http://openoffice.org/2004/writer" ) - , ("xmlns:oooc" , "http://openoffice.org/2004/calc" ) - , ("xmlns:dom" , "http://www.w3.org/2001/xml-events" ) - , ("xmlns:xforms" , "http://www.w3.org/2002/xforms" ) - , ("xmlns:xsd" , "http://www.w3.org/2001/XMLSchema" ) - , ("xmlns:xsi" , "http://www.w3.org/2001/XMLSchema-instance" ) - , ("office:version", "1.0" ) - ] diff --git a/Text/Pandoc/Writers/RST.hs b/Text/Pandoc/Writers/RST.hs deleted file mode 100644 index 91826cbcd..000000000 --- a/Text/Pandoc/Writers/RST.hs +++ /dev/null @@ -1,346 +0,0 @@ -{- -Copyright (C) 2006-7 John MacFarlane - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA --} - -{- | - Module : Text.Pandoc.Writers.RST - Copyright : Copyright (C) 2006-7 John MacFarlane - License : GNU GPL, version 2 or above - - Maintainer : John MacFarlane - Stability : alpha - Portability : portable - -Conversion of 'Pandoc' documents to reStructuredText. - -reStructuredText: --} -module Text.Pandoc.Writers.RST ( writeRST) where -import Text.Pandoc.Definition -import Text.Pandoc.Shared -import Text.Pandoc.Blocks -import Data.List ( isPrefixOf, isSuffixOf, drop, intersperse ) -import Text.PrettyPrint.HughesPJ hiding ( Str ) -import Control.Monad.State -import Control.Applicative ( (<$>) ) - -data WriterState = - WriterState { stNotes :: [[Block]] - , stLinks :: KeyTable - , stImages :: KeyTable - , stIncludes :: [String] - , stOptions :: WriterOptions - } - --- | Convert Pandoc to RST. -writeRST :: WriterOptions -> Pandoc -> String -writeRST opts document = - let st = WriterState { stNotes = [], stLinks = [], - stImages = [], stIncludes = [], - stOptions = opts } - in render $ evalState (pandocToRST document) st - --- | Return RST representation of document. -pandocToRST :: Pandoc -> State WriterState Doc -pandocToRST (Pandoc meta blocks) = do - opts <- get >>= (return . stOptions) - let before = writerIncludeBefore opts - let after = writerIncludeAfter opts - before' = if null before then empty else text before - after' = if null after then empty else text after - metaBlock <- metaToRST opts meta - let head' = if (writerStandalone opts) - then metaBlock $+$ text (writerHeader opts) - else empty - body <- blockListToRST blocks - includes <- get >>= (return . concat . stIncludes) - let includes' = if null includes then empty else text includes - notes <- get >>= (notesToRST . reverse . stNotes) - -- note that the notes may contain refs, so we do them first - refs <- get >>= (keyTableToRST . reverse . stLinks) - pics <- get >>= (pictTableToRST . reverse . stImages) - return $ head' $+$ before' $+$ includes' $+$ body $+$ notes $+$ text "" $+$ - refs $+$ pics $+$ after' - --- | Return RST representation of reference key table. -keyTableToRST :: KeyTable -> State WriterState Doc -keyTableToRST refs = mapM keyToRST refs >>= return . vcat - --- | Return RST representation of a reference key. -keyToRST :: ([Inline], (String, String)) - -> State WriterState Doc -keyToRST (label, (src, _)) = do - label' <- inlineListToRST label - let label'' = if ':' `elem` (render label') - then char '`' <> label' <> char '`' - else label' - return $ text ".. _" <> label'' <> text ": " <> text src - --- | Return RST representation of notes. -notesToRST :: [[Block]] -> State WriterState Doc -notesToRST notes = - mapM (\(num, note) -> noteToRST num note) (zip [1..] notes) >>= - return . vcat - --- | Return RST representation of a note. -noteToRST :: Int -> [Block] -> State WriterState Doc -noteToRST num note = do - contents <- blockListToRST note - let marker = text ".. [" <> text (show num) <> text "]" - return $ marker $$ nest 3 contents - --- | Return RST representation of picture reference table. -pictTableToRST :: KeyTable -> State WriterState Doc -pictTableToRST refs = mapM pictToRST refs >>= return . vcat - --- | Return RST representation of a picture substitution reference. -pictToRST :: ([Inline], (String, String)) - -> State WriterState Doc -pictToRST (label, (src, _)) = do - label' <- inlineListToRST label - return $ text ".. " <> char '|' <> label' <> char '|' <> text " image:: " <> - text src - --- | Take list of inline elements and return wrapped doc. -wrappedRST :: WriterOptions -> [Inline] -> State WriterState Doc -wrappedRST opts inlines = do - lineBreakDoc <- inlineToRST LineBreak - chunks <- mapM (wrapIfNeeded opts inlineListToRST) - (splitBy LineBreak inlines) - return $ vcat $ intersperse lineBreakDoc chunks - --- | Escape special characters for RST. -escapeString :: String -> String -escapeString = escapeStringUsing (backslashEscapes "`\\|*_") - --- | Convert bibliographic information into RST header. -metaToRST :: WriterOptions -> Meta -> State WriterState Doc -metaToRST opts (Meta title authors date) = do - title' <- titleToRST title - authors' <- authorsToRST authors - date' <- dateToRST date - let toc = if writerTableOfContents opts - then text "" $+$ text ".. contents::" - else empty - return $ title' $+$ authors' $+$ date' $+$ toc - -titleToRST :: [Inline] -> State WriterState Doc -titleToRST [] = return empty -titleToRST lst = do - contents <- inlineListToRST lst - let titleLength = length $ render contents - let border = text (replicate titleLength '=') - return $ border $+$ contents $+$ border <> text "\n" - -authorsToRST :: [String] -> State WriterState Doc -authorsToRST [] = return empty -authorsToRST (first:rest) = do - rest' <- authorsToRST rest - return $ (text ":Author: " <> text first) $+$ rest' - -dateToRST :: String -> State WriterState Doc -dateToRST [] = return empty -dateToRST str = return $ text ":Date: " <> text (escapeString str) - --- | Convert Pandoc block element to RST. -blockToRST :: Block -- ^ Block element - -> State WriterState Doc -blockToRST Null = return empty -blockToRST (Plain inlines) = do - opts <- get >>= (return . stOptions) - wrappedRST opts inlines -blockToRST (Para inlines) = do - opts <- get >>= (return . stOptions) - contents <- wrappedRST opts inlines - return $ contents <> text "\n" -blockToRST (RawHtml str) = - let str' = if "\n" `isSuffixOf` str then str ++ "\n" else str ++ "\n\n" in - return $ (text "\n.. raw:: html\n") $$ (nest 3 $ vcat $ map text (lines str')) -blockToRST HorizontalRule = return $ text "--------------\n" -blockToRST (Header level inlines) = do - contents <- inlineListToRST inlines - let headerLength = length $ render contents - let headerChar = if level > 5 then ' ' else "=-~^'" !! (level - 1) - let border = text $ replicate headerLength headerChar - return $ contents $+$ border <> text "\n" -blockToRST (CodeBlock (_,classes,_) str) = do - opts <- stOptions <$> get - let tabstop = writerTabStop opts - if "haskell" `elem` classes && writerLiterateHaskell opts - then return $ (vcat $ map (text "> " <>) $ map text (lines str)) <> text "\n" - else return $ (text "::\n") $+$ - (nest tabstop $ vcat $ map text (lines str)) <> text "\n" -blockToRST (BlockQuote blocks) = do - tabstop <- get >>= (return . writerTabStop . stOptions) - contents <- blockListToRST blocks - return $ (nest tabstop contents) <> text "\n" -blockToRST (Table caption _ widths headers rows) = do - caption' <- inlineListToRST caption - let caption'' = if null caption - then empty - else text "" $+$ (text "Table: " <> caption') - headers' <- mapM blockListToRST headers - let widthsInChars = map (floor . (78 *)) widths - let hpipeBlocks blocks = hcatBlocks [beg, middle, end] - where height = maximum (map heightOfBlock blocks) - sep' = TextBlock 3 height (replicate height " | ") - beg = TextBlock 2 height (replicate height "| ") - end = TextBlock 2 height (replicate height " |") - middle = hcatBlocks $ intersperse sep' blocks - let makeRow = hpipeBlocks . zipWith docToBlock widthsInChars - let head' = makeRow headers' - rows' <- mapM (\row -> do cols <- mapM blockListToRST row - return $ makeRow cols) rows - let border ch = char '+' <> char ch <> - (hcat $ intersperse (char ch <> char '+' <> char ch) $ - map (\l -> text $ replicate l ch) widthsInChars) <> - char ch <> char '+' - let body = vcat $ intersperse (border '-') $ map blockToDoc rows' - return $ border '-' $+$ blockToDoc head' $+$ border '=' $+$ body $+$ - border '-' $$ caption'' $$ text "" -blockToRST (BulletList items) = do - contents <- mapM bulletListItemToRST items - -- ensure that sublists have preceding blank line - return $ text "" $+$ vcat contents <> text "\n" -blockToRST (OrderedList (start, style', delim) items) = do - let markers = if start == 1 && style' == DefaultStyle && delim == DefaultDelim - then take (length items) $ repeat "#." - else take (length items) $ orderedListMarkers - (start, style', delim) - let maxMarkerLength = maximum $ map length markers - let markers' = map (\m -> let s = maxMarkerLength - length m - in m ++ replicate s ' ') markers - contents <- mapM (\(item, num) -> orderedListItemToRST item num) $ - zip markers' items - -- ensure that sublists have preceding blank line - return $ text "" $+$ vcat contents <> text "\n" -blockToRST (DefinitionList items) = do - contents <- mapM definitionListItemToRST items - return $ (vcat contents) <> text "\n" - --- | Convert bullet list item (list of blocks) to RST. -bulletListItemToRST :: [Block] -> State WriterState Doc -bulletListItemToRST items = do - contents <- blockListToRST items - return $ (text "- ") <> contents - --- | Convert ordered list item (a list of blocks) to RST. -orderedListItemToRST :: String -- ^ marker for list item - -> [Block] -- ^ list item (list of blocks) - -> State WriterState Doc -orderedListItemToRST marker items = do - contents <- blockListToRST items - return $ (text marker <> char ' ') <> contents - --- | Convert defintion list item (label, list of blocks) to RST. -definitionListItemToRST :: ([Inline], [Block]) -> State WriterState Doc -definitionListItemToRST (label, items) = do - label' <- inlineListToRST label - contents <- blockListToRST items - tabstop <- get >>= (return . writerTabStop . stOptions) - return $ label' $+$ nest tabstop contents - --- | Convert list of Pandoc block elements to RST. -blockListToRST :: [Block] -- ^ List of block elements - -> State WriterState Doc -blockListToRST blocks = mapM blockToRST blocks >>= return . vcat - --- | Convert list of Pandoc inline elements to RST. -inlineListToRST :: [Inline] -> State WriterState Doc -inlineListToRST lst = mapM inlineToRST lst >>= return . hcat - --- | Convert Pandoc inline element to RST. -inlineToRST :: Inline -> State WriterState Doc -inlineToRST (Emph lst) = do - contents <- inlineListToRST lst - return $ char '*' <> contents <> char '*' -inlineToRST (Strong lst) = do - contents <- inlineListToRST lst - return $ text "**" <> contents <> text "**" -inlineToRST (Strikeout lst) = do - contents <- inlineListToRST lst - return $ text "[STRIKEOUT:" <> contents <> char ']' -inlineToRST (Superscript lst) = do - contents <- inlineListToRST lst - return $ text "\\ :sup:`" <> contents <> text "`\\ " -inlineToRST (Subscript lst) = do - contents <- inlineListToRST lst - return $ text "\\ :sub:`" <> contents <> text "`\\ " -inlineToRST (SmallCaps lst) = inlineListToRST lst -inlineToRST (Quoted SingleQuote lst) = do - contents <- inlineListToRST lst - return $ char '\'' <> contents <> char '\'' -inlineToRST (Quoted DoubleQuote lst) = do - contents <- inlineListToRST lst - return $ char '"' <> contents <> char '"' -inlineToRST (Cite _ lst) = - inlineListToRST lst -inlineToRST EmDash = return $ text "--" -inlineToRST EnDash = return $ char '-' -inlineToRST Apostrophe = return $ char '\'' -inlineToRST Ellipses = return $ text "..." -inlineToRST (Code str) = return $ text $ "``" ++ str ++ "``" -inlineToRST (Str str) = return $ text $ escapeString str -inlineToRST (Math t str) = do - includes <- get >>= (return . stIncludes) - let rawMathRole = ".. role:: math(raw)\n" ++ - " :format: html latex\n" - if not (rawMathRole `elem` includes) - then modify $ \st -> st { stIncludes = rawMathRole : includes } - else return () - return $ if t == InlineMath - then text $ ":math:`$" ++ str ++ "$`" - else text $ ":math:`$$" ++ str ++ "$$`" -inlineToRST (TeX _) = return empty -inlineToRST (HtmlInline _) = return empty -inlineToRST (LineBreak) = do - return $ empty -- there's no line break in RST -inlineToRST Space = return $ char ' ' -inlineToRST (Link [Code str] (src, _)) | src == str || - src == "mailto:" ++ str = do - let srcSuffix = if isPrefixOf "mailto:" src then drop 7 src else src - return $ text srcSuffix -inlineToRST (Link txt (src, tit)) = do - useReferenceLinks <- get >>= (return . writerReferenceLinks . stOptions) - linktext <- inlineListToRST $ normalizeSpaces txt - if useReferenceLinks - then do refs <- get >>= (return . stLinks) - let refs' = if (txt, (src, tit)) `elem` refs - then refs - else (txt, (src, tit)):refs - modify $ \st -> st { stLinks = refs' } - return $ char '`' <> linktext <> text "`_" - else return $ char '`' <> linktext <> text " <" <> text src <> text ">`_" -inlineToRST (Image alternate (source, tit)) = do - pics <- get >>= (return . stImages) - let labelsUsed = map fst pics - let txt = if null alternate || alternate == [Str ""] || - alternate `elem` labelsUsed - then [Str $ "image" ++ show (length pics)] - else alternate - let pics' = if (txt, (source, tit)) `elem` pics - then pics - else (txt, (source, tit)):pics - modify $ \st -> st { stImages = pics' } - label <- inlineListToRST txt - return $ char '|' <> label <> char '|' -inlineToRST (Note contents) = do - -- add to notes in state - notes <- get >>= (return . stNotes) - modify $ \st -> st { stNotes = contents:notes } - let ref = show $ (length notes) + 1 - return $ text " [" <> text ref <> text "]_" diff --git a/Text/Pandoc/Writers/RTF.hs b/Text/Pandoc/Writers/RTF.hs deleted file mode 100644 index fc6cd1bf0..000000000 --- a/Text/Pandoc/Writers/RTF.hs +++ /dev/null @@ -1,291 +0,0 @@ -{- -Copyright (C) 2006-7 John MacFarlane - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA --} - -{- | - Module : Text.Pandoc.Writers.RTF - Copyright : Copyright (C) 2006-7 John MacFarlane - License : GNU GPL, version 2 or above - - Maintainer : John MacFarlane - Stability : alpha - Portability : portable - -Conversion of 'Pandoc' documents to RTF (rich text format). --} -module Text.Pandoc.Writers.RTF ( writeRTF ) where -import Text.Pandoc.Definition -import Text.Pandoc.Shared -import Text.Pandoc.Readers.TeXMath -import Data.List ( isSuffixOf, intercalate ) -import Data.Char ( ord, isDigit ) - --- | Convert Pandoc to a string in rich text format. -writeRTF :: WriterOptions -> Pandoc -> String -writeRTF options (Pandoc meta blocks) = - let head' = if writerStandalone options - then rtfHeader (writerHeader options) meta - else "" - toc = if writerTableOfContents options - then tableOfContents $ filter isHeaderBlock blocks - else "" - foot = if writerStandalone options then "\n}\n" else "" - body = writerIncludeBefore options ++ - concatMap (blockToRTF 0 AlignDefault) blocks ++ - writerIncludeAfter options - in head' ++ toc ++ body ++ foot - --- | Construct table of contents from list of header blocks. -tableOfContents :: [Block] -> String -tableOfContents headers = - let contentsTree = hierarchicalize headers - in concatMap (blockToRTF 0 AlignDefault) $ - [Header 1 [Str "Contents"], - BulletList (map elementToListItem contentsTree)] - -elementToListItem :: Element -> [Block] -elementToListItem (Blk _) = [] -elementToListItem (Sec sectext subsecs) = [Plain sectext] ++ - if null subsecs - then [] - else [BulletList (map elementToListItem subsecs)] - --- | Convert unicode characters (> 127) into rich text format representation. -handleUnicode :: String -> String -handleUnicode [] = [] -handleUnicode (c:cs) = - if ord c > 127 - then '\\':'u':(show (ord c)) ++ "?" ++ handleUnicode cs - else c:(handleUnicode cs) - --- | Escape special characters. -escapeSpecial :: String -> String -escapeSpecial = escapeStringUsing (('\t',"\\tab "):(backslashEscapes "{\\}")) - --- | Escape strings as needed for rich text format. -stringToRTF :: String -> String -stringToRTF = handleUnicode . escapeSpecial - --- | Escape things as needed for code block in RTF. -codeStringToRTF :: String -> String -codeStringToRTF str = intercalate "\\line\n" $ lines (stringToRTF str) - --- | Make a paragraph with first-line indent, block indent, and space after. -rtfParSpaced :: Int -- ^ space after (in twips) - -> Int -- ^ block indent (in twips) - -> Int -- ^ first line indent (relative to block) (in twips) - -> Alignment -- ^ alignment - -> String -- ^ string with content - -> String -rtfParSpaced spaceAfter indent firstLineIndent alignment content = - let alignString = case alignment of - AlignLeft -> "\\ql " - AlignRight -> "\\qr " - AlignCenter -> "\\qc " - AlignDefault -> "\\ql " - in "{\\pard " ++ alignString ++ - "\\f0 \\sa" ++ (show spaceAfter) ++ " \\li" ++ (show indent) ++ - " \\fi" ++ (show firstLineIndent) ++ " " ++ content ++ "\\par}\n" - --- | Default paragraph. -rtfPar :: Int -- ^ block indent (in twips) - -> Int -- ^ first line indent (relative to block) (in twips) - -> Alignment -- ^ alignment - -> String -- ^ string with content - -> String -rtfPar = rtfParSpaced 180 - --- | Compact paragraph (e.g. for compact list items). -rtfCompact :: Int -- ^ block indent (in twips) - -> Int -- ^ first line indent (relative to block) (in twips) - -> Alignment -- ^ alignment - -> String -- ^ string with content - -> String -rtfCompact = rtfParSpaced 0 - --- number of twips to indent -indentIncrement :: Int -indentIncrement = 720 - -listIncrement :: Int -listIncrement = 360 - --- | Returns appropriate bullet list marker for indent level. -bulletMarker :: Int -> String -bulletMarker indent = case indent `mod` 720 of - 0 -> "\\bullet " - _ -> "\\endash " - --- | Returns appropriate (list of) ordered list markers for indent level. -orderedMarkers :: Int -> ListAttributes -> [String] -orderedMarkers indent (start, style, delim) = - if style == DefaultStyle && delim == DefaultDelim - then case indent `mod` 720 of - 0 -> orderedListMarkers (start, Decimal, Period) - _ -> orderedListMarkers (start, LowerAlpha, Period) - else orderedListMarkers (start, style, delim) - --- | Returns RTF header. -rtfHeader :: String -- ^ header text - -> Meta -- ^ bibliographic information - -> String -rtfHeader headerText (Meta title authors date) = - let titletext = if null title - then "" - else rtfPar 0 0 AlignCenter $ - "\\b \\fs36 " ++ inlineListToRTF title - authorstext = if null authors - then "" - else rtfPar 0 0 AlignCenter (" " ++ (intercalate "\\" $ - map stringToRTF authors)) - datetext = if date == "" - then "" - else rtfPar 0 0 AlignCenter (" " ++ stringToRTF date) in - let spacer = if null (titletext ++ authorstext ++ datetext) - then "" - else rtfPar 0 0 AlignDefault "" in - headerText ++ titletext ++ authorstext ++ datetext ++ spacer - --- | Convert Pandoc block element to RTF. -blockToRTF :: Int -- ^ indent level - -> Alignment -- ^ alignment - -> Block -- ^ block to convert - -> String -blockToRTF _ _ Null = "" -blockToRTF indent alignment (Plain lst) = - rtfCompact indent 0 alignment $ inlineListToRTF lst -blockToRTF indent alignment (Para lst) = - rtfPar indent 0 alignment $ inlineListToRTF lst -blockToRTF indent alignment (BlockQuote lst) = - concatMap (blockToRTF (indent + indentIncrement) alignment) lst -blockToRTF indent _ (CodeBlock _ str) = - rtfPar indent 0 AlignLeft ("\\f1 " ++ (codeStringToRTF str)) -blockToRTF _ _ (RawHtml _) = "" -blockToRTF indent alignment (BulletList lst) = spaceAtEnd $ - concatMap (listItemToRTF alignment indent (bulletMarker indent)) lst -blockToRTF indent alignment (OrderedList attribs lst) = spaceAtEnd $ concat $ - zipWith (listItemToRTF alignment indent) (orderedMarkers indent attribs) lst -blockToRTF indent alignment (DefinitionList lst) = spaceAtEnd $ - concatMap (definitionListItemToRTF alignment indent) lst -blockToRTF indent _ HorizontalRule = - rtfPar indent 0 AlignCenter "\\emdash\\emdash\\emdash\\emdash\\emdash" -blockToRTF indent alignment (Header level lst) = rtfPar indent 0 alignment $ - "\\b \\fs" ++ (show (40 - (level * 4))) ++ " " ++ inlineListToRTF lst -blockToRTF indent alignment (Table caption aligns sizes headers rows) = - tableRowToRTF True indent aligns sizes headers ++ - concatMap (tableRowToRTF False indent aligns sizes) rows ++ - rtfPar indent 0 alignment (inlineListToRTF caption) - -tableRowToRTF :: Bool -> Int -> [Alignment] -> [Double] -> [[Block]] -> String -tableRowToRTF header indent aligns sizes cols = - let columns = concat $ zipWith (tableItemToRTF indent) aligns cols - totalTwips = 6 * 1440 -- 6 inches - rightEdges = tail $ scanl (\sofar new -> sofar + floor (new * totalTwips)) - (0 :: Integer) sizes - cellDefs = map (\edge -> (if header - then "\\clbrdrb\\brdrs" - else "") ++ "\\cellx" ++ show edge) - rightEdges - start = "{\n\\trowd \\trgaph120\n" ++ concat cellDefs ++ "\n" ++ - "\\trkeep\\intbl\n{\n" - end = "}\n\\intbl\\row}\n" - in start ++ columns ++ end - -tableItemToRTF :: Int -> Alignment -> [Block] -> String -tableItemToRTF indent alignment item = - let contents = concatMap (blockToRTF indent alignment) item - in "{\\intbl " ++ contents ++ "\\cell}\n" - --- | Ensure that there's the same amount of space after compact --- lists as after regular lists. -spaceAtEnd :: String -> String -spaceAtEnd str = - if isSuffixOf "\\par}\n" str - then (take ((length str) - 6) str) ++ "\\sa180\\par}\n" - else str - --- | Convert list item (list of blocks) to RTF. -listItemToRTF :: Alignment -- ^ alignment - -> Int -- ^ indent level - -> String -- ^ list start marker - -> [Block] -- ^ list item (list of blocks) - -> [Char] -listItemToRTF alignment indent marker [] = - rtfCompact (indent + listIncrement) (0 - listIncrement) alignment - (marker ++ "\\tx" ++ (show listIncrement) ++ "\\tab ") -listItemToRTF alignment indent marker list = - let (first:rest) = map (blockToRTF (indent + listIncrement) alignment) list - listMarker = "\\fi" ++ show (0 - listIncrement) ++ " " ++ marker ++ "\\tx" ++ - show listIncrement ++ "\\tab" - insertListMarker ('\\':'f':'i':'-':d:xs) | isDigit d = - listMarker ++ dropWhile isDigit xs - insertListMarker ('\\':'f':'i':d:xs) | isDigit d = - listMarker ++ dropWhile isDigit xs - insertListMarker (x:xs) = - x : insertListMarker xs - insertListMarker [] = [] - -- insert the list marker into the (processed) first block - in insertListMarker first ++ concat rest - --- | Convert definition list item (label, list of blocks) to RTF. -definitionListItemToRTF :: Alignment -- ^ alignment - -> Int -- ^ indent level - -> ([Inline],[Block]) -- ^ list item (list of blocks) - -> [Char] -definitionListItemToRTF alignment indent (label, items) = - let labelText = blockToRTF indent alignment (Plain label) - itemsText = concatMap (blockToRTF (indent + listIncrement) alignment) items - in labelText ++ itemsText - --- | Convert list of inline items to RTF. -inlineListToRTF :: [Inline] -- ^ list of inlines to convert - -> String -inlineListToRTF lst = concatMap inlineToRTF lst - --- | Convert inline item to RTF. -inlineToRTF :: Inline -- ^ inline to convert - -> String -inlineToRTF (Emph lst) = "{\\i " ++ (inlineListToRTF lst) ++ "}" -inlineToRTF (Strong lst) = "{\\b " ++ (inlineListToRTF lst) ++ "}" -inlineToRTF (Strikeout lst) = "{\\strike " ++ (inlineListToRTF lst) ++ "}" -inlineToRTF (Superscript lst) = "{\\super " ++ (inlineListToRTF lst) ++ "}" -inlineToRTF (Subscript lst) = "{\\sub " ++ (inlineListToRTF lst) ++ "}" -inlineToRTF (SmallCaps lst) = "{\\scaps " ++ (inlineListToRTF lst) ++ "}" -inlineToRTF (Quoted SingleQuote lst) = - "\\u8216'" ++ (inlineListToRTF lst) ++ "\\u8217'" -inlineToRTF (Quoted DoubleQuote lst) = - "\\u8220\"" ++ (inlineListToRTF lst) ++ "\\u8221\"" -inlineToRTF Apostrophe = "\\u8217'" -inlineToRTF Ellipses = "\\u8230?" -inlineToRTF EmDash = "\\u8212-" -inlineToRTF EnDash = "\\u8211-" -inlineToRTF (Code str) = "{\\f1 " ++ (codeStringToRTF str) ++ "}" -inlineToRTF (Str str) = stringToRTF str -inlineToRTF (Math _ str) = inlineListToRTF $ readTeXMath str -inlineToRTF (Cite _ lst) = inlineListToRTF lst -inlineToRTF (TeX _) = "" -inlineToRTF (HtmlInline _) = "" -inlineToRTF (LineBreak) = "\\line " -inlineToRTF Space = " " -inlineToRTF (Link text (src, _)) = - "{\\field{\\*\\fldinst{HYPERLINK \"" ++ (codeStringToRTF src) ++ - "\"}}{\\fldrslt{\\ul\n" ++ (inlineListToRTF text) ++ "\n}}}\n" -inlineToRTF (Image _ (source, _)) = - "{\\cf1 [image: " ++ source ++ "]\\cf0}" -inlineToRTF (Note contents) = - "{\\super\\chftn}{\\*\\footnote\\chftn\\~\\plain\\pard " ++ - (concatMap (blockToRTF 0 AlignDefault) contents) ++ "}" diff --git a/Text/Pandoc/Writers/S5.hs b/Text/Pandoc/Writers/S5.hs deleted file mode 100644 index 6f528503a..000000000 --- a/Text/Pandoc/Writers/S5.hs +++ /dev/null @@ -1,157 +0,0 @@ -{-# LANGUAGE CPP, TemplateHaskell #-} -{- -Copyright (C) 2006-7 John MacFarlane - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA --} - -{- | - Module : Text.Pandoc.Writers.S5 - Copyright : Copyright (C) 2006-7 John MacFarlane - License : GNU GPL, version 2 or above - - Maintainer : John MacFarlane - Stability : alpha - Portability : portable - -Definitions for creation of S5 powerpoint-like HTML. -(See .) --} -module Text.Pandoc.Writers.S5 ( - -- * Strings - s5Meta, - s5Javascript, - s5CSS, - s5Links, - -- * Functions - writeS5, - writeS5String, - insertS5Structure - ) where -import Text.Pandoc.Shared ( WriterOptions ) -import Text.Pandoc.TH ( contentsOf ) -import Text.Pandoc.Writers.HTML ( writeHtml, writeHtmlString ) -import Text.Pandoc.Definition -import Text.XHtml.Strict -import System.FilePath ( () ) -import Data.List ( intercalate ) - -s5Meta :: String -s5Meta = "\n\n\n" - -s5Javascript :: String -#ifndef __HADDOCK__ -s5Javascript = "\n" -#endif - -s5CoreCSS :: String -#ifndef __HADDOCK__ -s5CoreCSS = $(contentsOf $ "data" "ui" "default" "s5-core.css") -#endif - -s5FramingCSS :: String -#ifndef __HADDOCK__ -s5FramingCSS = $(contentsOf $ "data" "ui" "default" "framing.css") -#endif - -s5PrettyCSS :: String -#ifndef __HADDOCK__ -s5PrettyCSS = $(contentsOf $ "data" "ui" "default" "pretty.css") -#endif - -s5OperaCSS :: String -#ifndef __HADDOCK__ -s5OperaCSS = $(contentsOf $ "data" "ui" "default" "opera.css") -#endif - -s5OutlineCSS :: String -#ifndef __HADDOCK__ -s5OutlineCSS = $(contentsOf $ "data" "ui" "default" "outline.css") -#endif - -s5PrintCSS :: String -#ifndef __HADDOCK__ -s5PrintCSS = $(contentsOf $ "data" "ui" "default" "print.css") -#endif - -s5CSS :: String -s5CSS = "\n\n\n\n" - -s5Links :: String -s5Links = "\n\n\n\n\n\n\n" - --- | Converts Pandoc document to an S5 HTML presentation (Html structure). -writeS5 :: WriterOptions -> Pandoc -> Html -writeS5 options = (writeHtml options) . insertS5Structure - --- | Converts Pandoc document to an S5 HTML presentation (string). -writeS5String :: WriterOptions -> Pandoc -> String -writeS5String options = (writeHtmlString options) . insertS5Structure - --- | Inserts HTML needed for an S5 presentation (e.g. around slides). -layoutDiv :: [Inline] -- ^ Title of document (for header or footer) - -> String -- ^ Date of document (for header or footer) - -> [Block] -- ^ List of block elements returned -layoutDiv title' date = [(RawHtml "
    \n
    \n
    \n
    \n
    \n"), (Header 1 [Str date]), (Header 2 title'), (RawHtml "
    \n
    \n")] - -presentationStart :: Block -presentationStart = RawHtml "
    \n\n" - -presentationEnd :: Block -presentationEnd = RawHtml "
    \n" - -slideStart :: Block -slideStart = RawHtml "
    \n" - -slideEnd :: Block -slideEnd = RawHtml "
    \n" - --- | Returns 'True' if block is a Header 1. -isH1 :: Block -> Bool -isH1 (Header 1 _) = True -isH1 _ = False - --- | Insert HTML around sections to make individual slides. -insertSlides :: Bool -> [Block] -> [Block] -insertSlides beginning blocks = - let (beforeHead, rest) = break isH1 blocks in - if (null rest) then - if beginning then - beforeHead - else - beforeHead ++ [slideEnd] - else - if beginning then - beforeHead ++ - slideStart:(head rest):(insertSlides False (tail rest)) - else - beforeHead ++ - slideEnd:slideStart:(head rest):(insertSlides False (tail rest)) - --- | Insert blocks into 'Pandoc' for slide structure. -insertS5Structure :: Pandoc -> Pandoc -insertS5Structure (Pandoc meta' []) = Pandoc meta' [] -insertS5Structure (Pandoc (Meta title' authors date) blocks) = - let slides = insertSlides True blocks - firstSlide = if not (null title') - then [slideStart, (Header 1 title'), - (Header 3 [Str (intercalate ", " authors)]), - (Header 4 [Str date]), slideEnd] - else [] - newBlocks = (layoutDiv title' date) ++ presentationStart:firstSlide ++ - slides ++ [presentationEnd] - in Pandoc (Meta title' authors date) newBlocks diff --git a/Text/Pandoc/Writers/Texinfo.hs b/Text/Pandoc/Writers/Texinfo.hs deleted file mode 100644 index 305a1a8d0..000000000 --- a/Text/Pandoc/Writers/Texinfo.hs +++ /dev/null @@ -1,474 +0,0 @@ -{- -Copyright (C) 2008 John MacFarlane and Peter Wang - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA --} - -{- | - Module : Text.Pandoc.Writers.Texinfo - Copyright : Copyright (C) 2008 John MacFarlane and Peter Wang - License : GNU GPL, version 2 or above - - Maintainer : John MacFarlane - Stability : alpha - Portability : portable - -Conversion of 'Pandoc' format into Texinfo. --} -module Text.Pandoc.Writers.Texinfo ( writeTexinfo ) where -import Text.Pandoc.Definition -import Text.Pandoc.Shared -import Text.Pandoc.Readers.TeXMath -import Text.Printf ( printf ) -import Data.List ( isSuffixOf ) -import Data.Char ( chr, ord ) -import qualified Data.Set as S -import Control.Monad.State -import Text.PrettyPrint.HughesPJ hiding ( Str ) - -data WriterState = - WriterState { stIncludes :: S.Set String -- strings to include in header - } - -{- TODO: - - internal cross references a la HTML - - generated .texi files don't work when run through texi2dvi - -} - --- | Add line to header. -addToHeader :: String -> State WriterState () -addToHeader str = do - st <- get - let includes = stIncludes st - put st {stIncludes = S.insert str includes} - --- | Convert Pandoc to Texinfo. -writeTexinfo :: WriterOptions -> Pandoc -> String -writeTexinfo options document = - render $ evalState (pandocToTexinfo options $ wrapTop document) $ - WriterState { stIncludes = S.empty } - --- | Add a "Top" node around the document, needed by Texinfo. -wrapTop :: Pandoc -> Pandoc -wrapTop (Pandoc (Meta title authors date) blocks) = - Pandoc (Meta title authors date) (Header 0 title : blocks) - -pandocToTexinfo :: WriterOptions -> Pandoc -> State WriterState Doc -pandocToTexinfo options (Pandoc meta blocks) = do - main <- blockListToTexinfo blocks - head' <- if writerStandalone options - then texinfoHeader options meta - else return empty - let before = if null (writerIncludeBefore options) - then empty - else text (writerIncludeBefore options) - let after = if null (writerIncludeAfter options) - then empty - else text (writerIncludeAfter options) - let body = before $$ main $$ after - -- XXX toc untested - let toc = if writerTableOfContents options - then text "@contents" - else empty - let foot = if writerStandalone options - then text "@bye" - else empty - return $ head' $$ toc $$ body $$ foot - --- | Insert bibliographic information into Texinfo header. -texinfoHeader :: WriterOptions -- ^ Options, including Texinfo header - -> Meta -- ^ Meta with bibliographic information - -> State WriterState Doc -texinfoHeader options (Meta title authors date) = do - titletext <- if null title - then return empty - else do - t <- inlineListToTexinfo title - return $ text "@title " <> t - headerIncludes <- get >>= return . S.toList . stIncludes - let extras = text $ unlines headerIncludes - let authorstext = map makeAuthor authors - let datetext = if date == "" - then empty - else text $ stringToTexinfo date - - let baseHeader = text $ writerHeader options - let header = baseHeader $$ extras - return $ text "\\input texinfo" $$ - header $$ - text "@ifnottex" $$ - text "@paragraphindent 0" $$ - text "@end ifnottex" $$ - text "@titlepage" $$ - titletext $$ vcat authorstext $$ - datetext $$ - text "@end titlepage" - -makeAuthor :: String -> Doc -makeAuthor author = text $ "@author " ++ (stringToTexinfo author) - --- | Escape things as needed for Texinfo. -stringToTexinfo :: String -> String -stringToTexinfo = escapeStringUsing texinfoEscapes - where texinfoEscapes = [ ('{', "@{") - , ('}', "@}") - , ('@', "@@") - , (',', "@comma{}") -- only needed in argument lists - , ('\160', "@ ") - ] - --- | Puts contents into Texinfo command. -inCmd :: String -> Doc -> Doc -inCmd cmd contents = char '@' <> text cmd <> braces contents - --- | Convert Pandoc block element to Texinfo. -blockToTexinfo :: Block -- ^ Block to convert - -> State WriterState Doc - -blockToTexinfo Null = return empty - -blockToTexinfo (Plain lst) = - inlineListToTexinfo lst - -blockToTexinfo (Para lst) = - inlineListToTexinfo lst -- this is handled differently from Plain in blockListToTexinfo - -blockToTexinfo (BlockQuote lst) = do - contents <- blockListToTexinfo lst - return $ text "@quotation" $$ - contents $$ - text "@end quotation" - -blockToTexinfo (CodeBlock _ str) = do - return $ text "@verbatim" $$ - vcat (map text (lines str)) $$ - text "@end verbatim\n" - -blockToTexinfo (RawHtml _) = return empty - -blockToTexinfo (BulletList lst) = do - items <- mapM listItemToTexinfo lst - return $ text "@itemize" $$ - vcat items $$ - text "@end itemize\n" - -blockToTexinfo (OrderedList (start, numstyle, _) lst) = do - items <- mapM listItemToTexinfo lst - return $ text "@enumerate " <> exemplar $$ - vcat items $$ - text "@end enumerate\n" - where - exemplar = case numstyle of - DefaultStyle -> decimal - Decimal -> decimal - UpperRoman -> decimal -- Roman numerals not supported - LowerRoman -> decimal - UpperAlpha -> upperAlpha - LowerAlpha -> lowerAlpha - decimal = if start == 1 - then empty - else text (show start) - upperAlpha = text [chr $ ord 'A' + start - 1] - lowerAlpha = text [chr $ ord 'a' + start - 1] - -blockToTexinfo (DefinitionList lst) = do - items <- mapM defListItemToTexinfo lst - return $ text "@table @asis" $$ - vcat items $$ - text "@end table\n" - -blockToTexinfo HorizontalRule = - -- XXX can't get the equivalent from LaTeX.hs to work - return $ text "@iftex" $$ - text "@bigskip@hrule@bigskip" $$ - text "@end iftex" $$ - text "@ifnottex" $$ - text (take 72 $ repeat '-') $$ - text "@end ifnottex" - -blockToTexinfo (Header 0 lst) = do - txt <- if null lst - then return $ text "Top" - else inlineListToTexinfo lst - return $ text "@node Top" $$ - text "@top " <> txt <> char '\n' - -blockToTexinfo (Header level lst) = do - node <- inlineListForNode lst - txt <- inlineListToTexinfo lst - return $ if (level > 0) && (level <= 4) - then text "\n@node " <> node <> char '\n' <> - text (seccmd level) <> txt - else txt - where - seccmd 1 = "@chapter " - seccmd 2 = "@section " - seccmd 3 = "@subsection " - seccmd 4 = "@subsubsection " - seccmd _ = error "illegal seccmd level" - -blockToTexinfo (Table caption aligns widths heads rows) = do - headers <- tableHeadToTexinfo aligns heads - captionText <- inlineListToTexinfo caption - rowsText <- mapM (tableRowToTexinfo aligns) rows - let colWidths = map (printf "%.2f ") widths - let colDescriptors = concat colWidths - let tableBody = text ("@multitable @columnfractions " ++ colDescriptors) $$ - headers $$ - vcat rowsText $$ - text "@end multitable" - return $ if isEmpty captionText - then tableBody <> char '\n' - else text "@float" $$ - tableBody $$ - inCmd "caption" captionText $$ - text "@end float" - -tableHeadToTexinfo :: [Alignment] - -> [[Block]] - -> State WriterState Doc -tableHeadToTexinfo = tableAnyRowToTexinfo "@headitem " - -tableRowToTexinfo :: [Alignment] - -> [[Block]] - -> State WriterState Doc -tableRowToTexinfo = tableAnyRowToTexinfo "@item " - -tableAnyRowToTexinfo :: String - -> [Alignment] - -> [[Block]] - -> State WriterState Doc -tableAnyRowToTexinfo itemtype aligns cols = - zipWithM alignedBlock aligns cols >>= - return . (text itemtype $$) . foldl (\row item -> row $$ - (if isEmpty row then empty else text " @tab ") <> item) empty - -alignedBlock :: Alignment - -> [Block] - -> State WriterState Doc --- XXX @flushleft and @flushright text won't get word wrapped. Since word --- wrapping is more important than alignment, we ignore the alignment. -alignedBlock _ = blockListToTexinfo -{- -alignedBlock AlignLeft col = do - b <- blockListToTexinfo col - return $ text "@flushleft" $$ b $$ text "@end flushleft" -alignedBlock AlignRight col = do - b <- blockListToTexinfo col - return $ text "@flushright" $$ b $$ text "@end flushright" -alignedBlock _ col = blockListToTexinfo col --} - --- | Convert Pandoc block elements to Texinfo. -blockListToTexinfo :: [Block] - -> State WriterState Doc -blockListToTexinfo [] = return $ empty -blockListToTexinfo (x:xs) = do - x' <- blockToTexinfo x - case x of - Header level _ -> do - -- We need need to insert a menu for this node. - let (before, after) = break isHeader xs - before' <- blockListToTexinfo before - let menu = if level < 4 - then collectNodes (level + 1) after - else [] - lines' <- mapM makeMenuLine menu - let menu' = if null lines' - then empty - else text "@menu" $$ - vcat lines' $$ - text "@end menu" - after' <- blockListToTexinfo after - return $ x' $$ before' $$ menu' $$ after' - Para _ -> do - xs' <- blockListToTexinfo xs - case xs of - ((CodeBlock _ _):_) -> return $ x' $$ xs' - _ -> return $ x' $$ text "" $$ xs' - _ -> do - xs' <- blockListToTexinfo xs - return $ x' $$ xs' - -isHeader :: Block -> Bool -isHeader (Header _ _) = True -isHeader _ = False - -collectNodes :: Int -> [Block] -> [Block] -collectNodes _ [] = [] -collectNodes level (x:xs) = - case x of - (Header hl _) -> - if hl < level - then [] - else if hl == level - then x : collectNodes level xs - else collectNodes level xs - _ -> - collectNodes level xs - -makeMenuLine :: Block - -> State WriterState Doc -makeMenuLine (Header _ lst) = do - txt <- inlineListForNode lst - return $ text "* " <> txt <> text "::" -makeMenuLine _ = error "makeMenuLine called with non-Header block" - -listItemToTexinfo :: [Block] - -> State WriterState Doc -listItemToTexinfo lst = blockListToTexinfo lst >>= - return . (text "@item" $$) - -defListItemToTexinfo :: ([Inline], [Block]) - -> State WriterState Doc -defListItemToTexinfo (term, def) = do - term' <- inlineListToTexinfo term - def' <- blockListToTexinfo def - return $ text "@item " <> term' <> text "\n" $$ def' - --- | Convert list of inline elements to Texinfo. -inlineListToTexinfo :: [Inline] -- ^ Inlines to convert - -> State WriterState Doc -inlineListToTexinfo lst = mapM inlineToTexinfo lst >>= return . hcat - --- | Convert list of inline elements to Texinfo acceptable for a node name. -inlineListForNode :: [Inline] -- ^ Inlines to convert - -> State WriterState Doc -inlineListForNode lst = mapM inlineForNode lst >>= return . hcat - -inlineForNode :: Inline -> State WriterState Doc -inlineForNode (Str str) = return $ text $ filter (not.disallowedInNode) str -inlineForNode (Emph lst) = inlineListForNode lst -inlineForNode (Strong lst) = inlineListForNode lst -inlineForNode (Strikeout lst) = inlineListForNode lst -inlineForNode (Superscript lst) = inlineListForNode lst -inlineForNode (Subscript lst) = inlineListForNode lst -inlineForNode (SmallCaps lst) = inlineListForNode lst -inlineForNode (Quoted _ lst) = inlineListForNode lst -inlineForNode (Cite _ lst) = inlineListForNode lst -inlineForNode (Code str) = inlineForNode (Str str) -inlineForNode Space = return $ char ' ' -inlineForNode EmDash = return $ text "---" -inlineForNode EnDash = return $ text "--" -inlineForNode Apostrophe = return $ char '\'' -inlineForNode Ellipses = return $ text "..." -inlineForNode LineBreak = return empty -inlineForNode (Math _ str) = inlineListForNode $ readTeXMath str -inlineForNode (TeX _) = return empty -inlineForNode (HtmlInline _) = return empty -inlineForNode (Link lst _) = inlineListForNode lst -inlineForNode (Image lst _) = inlineListForNode lst -inlineForNode (Note _) = return empty - --- periods, commas, colons, and parentheses are disallowed in node names -disallowedInNode :: Char -> Bool -disallowedInNode c = c `elem` ".,:()" - --- | Convert inline element to Texinfo -inlineToTexinfo :: Inline -- ^ Inline to convert - -> State WriterState Doc - -inlineToTexinfo (Emph lst) = - inlineListToTexinfo lst >>= return . inCmd "emph" - -inlineToTexinfo (Strong lst) = - inlineListToTexinfo lst >>= return . inCmd "strong" - -inlineToTexinfo (Strikeout lst) = do - addToHeader $ "@macro textstrikeout{text}\n" ++ - "~~\\text\\~~\n" ++ - "@end macro\n" - contents <- inlineListToTexinfo lst - return $ text "@textstrikeout{" <> contents <> text "}" - -inlineToTexinfo (Superscript lst) = do - addToHeader $ "@macro textsuperscript{text}\n" ++ - "@iftex\n" ++ - "@textsuperscript{\\text\\}\n" ++ - "@end iftex\n" ++ - "@ifnottex\n" ++ - "^@{\\text\\@}\n" ++ - "@end ifnottex\n" ++ - "@end macro\n" - contents <- inlineListToTexinfo lst - return $ text "@textsuperscript{" <> contents <> char '}' - -inlineToTexinfo (Subscript lst) = do - addToHeader $ "@macro textsubscript{text}\n" ++ - "@iftex\n" ++ - "@textsubscript{\\text\\}\n" ++ - "@end iftex\n" ++ - "@ifnottex\n" ++ - "_@{\\text\\@}\n" ++ - "@end ifnottex\n" ++ - "@end macro\n" - contents <- inlineListToTexinfo lst - return $ text "@textsubscript{" <> contents <> char '}' - -inlineToTexinfo (SmallCaps lst) = - inlineListToTexinfo lst >>= return . inCmd "sc" - -inlineToTexinfo (Code str) = do - return $ text $ "@code{" ++ stringToTexinfo str ++ "}" - -inlineToTexinfo (Quoted SingleQuote lst) = do - contents <- inlineListToTexinfo lst - return $ char '`' <> contents <> char '\'' - -inlineToTexinfo (Quoted DoubleQuote lst) = do - contents <- inlineListToTexinfo lst - return $ text "``" <> contents <> text "''" - -inlineToTexinfo (Cite _ lst) = - inlineListToTexinfo lst -inlineToTexinfo Apostrophe = return $ char '\'' -inlineToTexinfo EmDash = return $ text "---" -inlineToTexinfo EnDash = return $ text "--" -inlineToTexinfo Ellipses = return $ text "@dots{}" -inlineToTexinfo (Str str) = return $ text (stringToTexinfo str) -inlineToTexinfo (Math _ str) = return $ inCmd "math" $ text str -inlineToTexinfo (TeX str) = return $ text "@tex" $$ text str $$ text "@end tex" -inlineToTexinfo (HtmlInline _) = return empty -inlineToTexinfo (LineBreak) = return $ text "@*" -inlineToTexinfo Space = return $ char ' ' - -inlineToTexinfo (Link txt (src, _)) = do - case txt of - [Code x] | x == src -> -- autolink - do return $ text $ "@url{" ++ x ++ "}" - _ -> do contents <- inlineListToTexinfo txt - let src1 = stringToTexinfo src - return $ text ("@uref{" ++ src1 ++ ",") <> contents <> - char '}' - -inlineToTexinfo (Image alternate (source, _)) = do - content <- inlineListToTexinfo alternate - return $ text ("@image{" ++ base ++ ",,,") <> content <> text "," <> - text (ext ++ "}") - where - (revext, revbase) = break (=='.') (reverse source) - ext = reverse revext - base = case revbase of - ('.' : rest) -> reverse rest - _ -> reverse revbase - -inlineToTexinfo (Note contents) = do - contents' <- blockListToTexinfo contents - let rawnote = stripTrailingNewlines $ render contents' - let optNewline = "@end verbatim" `isSuffixOf` rawnote - return $ text "@footnote{" <> - text rawnote <> - (if optNewline then char '\n' else empty) <> - char '}' diff --git a/Text/Pandoc/XML.hs b/Text/Pandoc/XML.hs deleted file mode 100644 index 14e2eebbb..000000000 --- a/Text/Pandoc/XML.hs +++ /dev/null @@ -1,88 +0,0 @@ -{- -Copyright (C) 2006-7 John MacFarlane - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA --} - -{- | - Module : Text.Pandoc.XML - Copyright : Copyright (C) 2006-7 John MacFarlane - License : GNU GPL, version 2 or above - - Maintainer : John MacFarlane - Stability : alpha - Portability : portable - -Functions for escaping and formatting XML. --} -module Text.Pandoc.XML ( escapeCharForXML, - escapeStringForXML, - inTags, - selfClosingTag, - inTagsSimple, - inTagsIndented ) where -import Text.PrettyPrint.HughesPJ - --- | Escape one character as needed for XML. -escapeCharForXML :: Char -> String -escapeCharForXML x = case x of - '&' -> "&" - '<' -> "<" - '>' -> ">" - '"' -> """ - '\160' -> " " - c -> [c] - --- | True if the character needs to be escaped. -needsEscaping :: Char -> Bool -needsEscaping c = c `elem` "&<>\"\160" - --- | Escape string as needed for XML. Entity references are not preserved. -escapeStringForXML :: String -> String -escapeStringForXML "" = "" -escapeStringForXML str = - case break needsEscaping str of - (okay, "") -> okay - (okay, (c:cs)) -> okay ++ escapeCharForXML c ++ escapeStringForXML cs - --- | Return a text object with a string of formatted XML attributes. -attributeList :: [(String, String)] -> Doc -attributeList = text . concatMap - (\(a, b) -> " " ++ escapeStringForXML a ++ "=\"" ++ - escapeStringForXML b ++ "\"") - --- | Put the supplied contents between start and end tags of tagType, --- with specified attributes and (if specified) indentation. -inTags:: Bool -> String -> [(String, String)] -> Doc -> Doc -inTags isIndented tagType attribs contents = - let openTag = char '<' <> text tagType <> attributeList attribs <> - char '>' - closeTag = text " text tagType <> char '>' - in if isIndented - then openTag $$ nest 2 contents $$ closeTag - else openTag <> contents <> closeTag - --- | Return a self-closing tag of tagType with specified attributes -selfClosingTag :: String -> [(String, String)] -> Doc -selfClosingTag tagType attribs = - char '<' <> text tagType <> attributeList attribs <> text " />" - --- | Put the supplied contents between start and end tags of tagType. -inTagsSimple :: String -> Doc -> Doc -inTagsSimple tagType = inTags False tagType [] - --- | Put the supplied contents in indented block btw start and end tags. -inTagsIndented :: String -> Doc -> Doc -inTagsIndented tagType = inTags True tagType [] diff --git a/pandoc.cabal b/pandoc.cabal index 011d6814f..13cdb4b3c 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -158,7 +158,7 @@ Library Build-depends: citeproc-hs Exposed-Modules: Text.Pandoc.Biblio cpp-options: -D_CITEPROC - Hs-Source-Dirs: . + Hs-Source-Dirs: src Exposed-Modules: Text.Pandoc, Text.Pandoc.Blocks, Text.Pandoc.Definition, @@ -198,7 +198,7 @@ Library Buildable: False Executable pandoc - Hs-Source-Dirs: . + Hs-Source-Dirs: src Main-Is: Main.hs Ghc-Options: -O2 -Wall -threaded Ghc-Prof-Options: -auto-all diff --git a/src/Main.hs b/src/Main.hs new file mode 100644 index 000000000..e498b3c0a --- /dev/null +++ b/src/Main.hs @@ -0,0 +1,681 @@ +{-# LANGUAGE CPP #-} +{- +Copyright (C) 2006-8 John MacFarlane + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Main + Copyright : Copyright (C) 2006-8 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane + Stability : alpha + Portability : portable + +Parses command-line options and calls the appropriate readers and +writers. +-} +module Main where +import Text.Pandoc +import Text.Pandoc.ODT +import Text.Pandoc.Shared ( HTMLMathMethod (..), splitBy, ObfuscationMethod (..) ) +import Text.Pandoc.Highlighting ( languages ) +import System.Environment ( getArgs, getProgName, getEnvironment ) +import System.Exit ( exitWith, ExitCode (..) ) +import System.FilePath +import System.Console.GetOpt +import Data.Maybe ( fromMaybe ) +import Data.Char ( toLower ) +import Data.List ( intercalate, isSuffixOf ) +import Prelude hiding ( putStr, putStrLn, writeFile, readFile, getContents ) +import System.IO ( stdout, stderr ) +import System.IO.UTF8 +#ifdef _CITEPROC +import Text.CSL +import Text.Pandoc.Biblio +#endif +import Text.Pandoc.Plugins (getPlugin) +import Control.Monad (foldM, when, unless) + +copyrightMessage :: String +copyrightMessage = "\nCopyright (C) 2006-8 John MacFarlane\n" ++ + "Web: http://johnmacfarlane.net/pandoc\n" ++ + "This is free software; see the source for copying conditions. There is no\n" ++ + "warranty, not even for merchantability or fitness for a particular purpose." + +compileInfo :: String +compileInfo = +#ifdef _CITEPROC + " +citeproc" ++ +#else + " -citeproc" ++ +#endif +#ifdef _HIGHLIGHTING + " +highlighting" ++ +#else + " -highlighting" ++ +#endif + if null languages + then "\n" + else "\nCompiled with syntax highlighting support for:\n" ++ wrapWords 78 languages + +-- | Converts a list of strings into a single string with the items printed as +-- comma separated words in lines with a maximum line length. +wrapWords :: Int -> [String] -> String +wrapWords c = wrap' c c where + wrap' _ _ [] = "" + wrap' cols remaining (x:xs) = if remaining == cols + then x ++ wrap' cols (remaining - length x) xs + else if (length x + 1) > remaining + then ",\n" ++ x ++ wrap' cols (cols - length x) xs + else ", " ++ x ++ wrap' cols (remaining - (length x + 2)) xs + +-- | Association list of formats and readers. +readers :: [(String, ParserState -> String -> Pandoc)] +readers = [("native" , readPandoc) + ,("markdown" , readMarkdown) + ,("markdown+lhs" , readMarkdown) + ,("rst" , readRST) + ,("rst+lhs" , readRST) + ,("html" , readHtml) + ,("latex" , readLaTeX) + ,("latex+lhs" , readLaTeX) + ] + +-- | Reader for native Pandoc format. +readPandoc :: ParserState -> String -> Pandoc +readPandoc _ = read + +-- | Association list of formats and pairs of writers and default headers. +writers :: [ ( String, ( WriterOptions -> Pandoc -> String, String ) ) ] +writers = [("native" , (writeDoc, "")) + ,("html" , (writeHtmlString, "")) + ,("html+lhs" , (writeHtmlString, "")) + ,("s5" , (writeS5String, defaultS5Header)) + ,("docbook" , (writeDocbook, defaultDocbookHeader)) + ,("opendocument" , (writeOpenDocument, defaultOpenDocumentHeader)) + ,("odt" , (writeOpenDocument, defaultOpenDocumentHeader)) + ,("latex" , (writeLaTeX, defaultLaTeXHeader)) + ,("latex+lhs" , (writeLaTeX, defaultLaTeXHeader)) + ,("context" , (writeConTeXt, defaultConTeXtHeader)) + ,("texinfo" , (writeTexinfo, "")) + ,("man" , (writeMan, "")) + ,("markdown" , (writeMarkdown, "")) + ,("markdown+lhs" , (writeMarkdown, "")) + ,("rst" , (writeRST, "")) + ,("rst+lhs" , (writeRST, "")) + ,("mediawiki" , (writeMediaWiki, "")) + ,("rtf" , (writeRTF, defaultRTFHeader)) + ] + +isNonTextOutput :: String -> Bool +isNonTextOutput = (`elem` ["odt"]) + +-- | Writer for Pandoc native format. +writeDoc :: WriterOptions -> Pandoc -> String +writeDoc _ = prettyPandoc + +-- | Data structure for command line options. +data Opt = Opt + { optPreserveTabs :: Bool -- ^ Convert tabs to spaces + , optTabStop :: Int -- ^ Number of spaces per tab + , optStandalone :: Bool -- ^ Include header, footer + , optReader :: String -- ^ Reader format + , optWriter :: String -- ^ Writer format + , optParseRaw :: Bool -- ^ Parse unconvertable HTML and TeX + , optCSS :: [String] -- ^ CSS file to link to + , optTableOfContents :: Bool -- ^ Include table of contents + , optIncludeInHeader :: String -- ^ File to include in header + , optIncludeBeforeBody :: String -- ^ File to include at top of body + , optIncludeAfterBody :: String -- ^ File to include at end of body + , optCustomHeader :: String -- ^ Custom header to use, or "DEFAULT" + , optTitlePrefix :: String -- ^ Optional prefix for HTML title + , optOutputFile :: String -- ^ Name of output file + , optNumberSections :: Bool -- ^ Number sections in LaTeX + , optIncremental :: Bool -- ^ Use incremental lists in S5 + , optSmart :: Bool -- ^ Use smart typography + , optHTMLMathMethod :: HTMLMathMethod -- ^ Method to print HTML math + , optDumpArgs :: Bool -- ^ Output command-line arguments + , optIgnoreArgs :: Bool -- ^ Ignore command-line arguments + , optStrict :: Bool -- ^ Use strict markdown syntax + , optReferenceLinks :: Bool -- ^ Use reference links in writing markdown, rst + , optWrapText :: Bool -- ^ Wrap text + , optSanitizeHTML :: Bool -- ^ Sanitize HTML + , optPlugins :: [Pandoc -> IO Pandoc] -- ^ Plugins to apply + , optEmailObfuscation :: ObfuscationMethod +#ifdef _CITEPROC + , optBiblioFile :: String + , optBiblioFormat :: String + , optCslFile :: String +#endif + } + +-- | Defaults for command-line options. +defaultOpts :: Opt +defaultOpts = Opt + { optPreserveTabs = False + , optTabStop = 4 + , optStandalone = False + , optReader = "" -- null for default reader + , optWriter = "" -- null for default writer + , optParseRaw = False + , optCSS = [] + , optTableOfContents = False + , optIncludeInHeader = "" + , optIncludeBeforeBody = "" + , optIncludeAfterBody = "" + , optCustomHeader = "DEFAULT" + , optTitlePrefix = "" + , optOutputFile = "-" -- "-" means stdout + , optNumberSections = False + , optIncremental = False + , optSmart = False + , optHTMLMathMethod = PlainMath + , optDumpArgs = False + , optIgnoreArgs = False + , optStrict = False + , optReferenceLinks = False + , optWrapText = True + , optSanitizeHTML = False + , optPlugins = [] + , optEmailObfuscation = JavascriptObfuscation +#ifdef _CITEPROC + , optBiblioFile = [] + , optBiblioFormat = [] + , optCslFile = [] +#endif + } + +-- | A list of functions, each transforming the options data structure +-- in response to a command-line option. +options :: [OptDescr (Opt -> IO Opt)] +options = + [ Option "fr" ["from","read"] + (ReqArg + (\arg opt -> return opt { optReader = map toLower arg }) + "FORMAT") + "" -- ("(" ++ (intercalate ", " $ map fst readers) ++ ")") + + , Option "tw" ["to","write"] + (ReqArg + (\arg opt -> return opt { optWriter = map toLower arg }) + "FORMAT") + "" -- ("(" ++ (intercalate ", " $ map fst writers) ++ ")") + + , Option "s" ["standalone"] + (NoArg + (\opt -> return opt { optStandalone = True })) + "" -- "Include needed header and footer on output" + + , Option "o" ["output"] + (ReqArg + (\arg opt -> return opt { optOutputFile = arg }) + "FILENAME") + "" -- "Name of output file" + + , Option "p" ["preserve-tabs"] + (NoArg + (\opt -> return opt { optPreserveTabs = True })) + "" -- "Preserve tabs instead of converting to spaces" + + , Option "" ["tab-stop"] + (ReqArg + (\arg opt -> return opt { optTabStop = (read arg) } ) + "TABSTOP") + "" -- "Tab stop (default 4)" + + , Option "" ["strict"] + (NoArg + (\opt -> return opt { optStrict = True } )) + "" -- "Disable markdown syntax extensions" + + , Option "" ["reference-links"] + (NoArg + (\opt -> return opt { optReferenceLinks = True } )) + "" -- "Use reference links in parsing HTML" + + , Option "R" ["parse-raw"] + (NoArg + (\opt -> return opt { optParseRaw = True })) + "" -- "Parse untranslatable HTML codes and LaTeX environments as raw" + + , Option "S" ["smart"] + (NoArg + (\opt -> return opt { optSmart = True })) + "" -- "Use smart quotes, dashes, and ellipses" + + , Option "m" ["latexmathml", "asciimathml"] + (OptArg + (\arg opt -> return opt { optHTMLMathMethod = + LaTeXMathML arg }) + "URL") + "" -- "Use LaTeXMathML script in html output" + + , Option "" ["mimetex"] + (OptArg + (\arg opt -> return opt { optHTMLMathMethod = MimeTeX + (fromMaybe "/cgi-bin/mimetex.cgi" arg)}) + "URL") + "" -- "Use mimetex for HTML math" + + , Option "" ["jsmath"] + (OptArg + (\arg opt -> return opt { optHTMLMathMethod = JsMath arg}) + "URL") + "" -- "Use jsMath for HTML math" + + , Option "" ["gladtex"] + (NoArg + (\opt -> return opt { optHTMLMathMethod = GladTeX })) + "" -- "Use gladtex for HTML math" + + , Option "i" ["incremental"] + (NoArg + (\opt -> return opt { optIncremental = True })) + "" -- "Make list items display incrementally in S5" + + , Option "N" ["number-sections"] + (NoArg + (\opt -> return opt { optNumberSections = True })) + "" -- "Number sections in LaTeX" + + , Option "" ["no-wrap"] + (NoArg + (\opt -> return opt { optWrapText = False })) + "" -- "Do not wrap text in output" + + , Option "" ["sanitize-html"] + (NoArg + (\opt -> return opt { optSanitizeHTML = True })) + "" -- "Sanitize HTML" + + , Option "" ["email-obfuscation"] + (ReqArg + (\arg opt -> do + method <- case arg of + "references" -> return ReferenceObfuscation + "javascript" -> return JavascriptObfuscation + "none" -> return NoObfuscation + _ -> hPutStrLn stderr ("Error: Unknown obfuscation method: " ++ arg) >> + exitWith (ExitFailure 6) + return opt { optEmailObfuscation = method }) + "none|javascript|references") + "" -- "Method for obfuscating email in HTML" + + , Option "" ["toc", "table-of-contents"] + (NoArg + (\opt -> return opt { optTableOfContents = True })) + "" -- "Include table of contents" + + , Option "c" ["css"] + (ReqArg + (\arg opt -> do + let old = optCSS opt + return opt { optCSS = old ++ [arg], + optStandalone = True }) + "CSS") + "" -- "Link to CSS style sheet" + + , Option "H" ["include-in-header"] + (ReqArg + (\arg opt -> do + let old = optIncludeInHeader opt + text <- readFile arg + return opt { optIncludeInHeader = old ++ text, + optStandalone = True }) + "FILENAME") + "" -- "File to include at end of header (implies -s)" + + , Option "B" ["include-before-body"] + (ReqArg + (\arg opt -> do + let old = optIncludeBeforeBody opt + text <- readFile arg + return opt { optIncludeBeforeBody = old ++ text }) + "FILENAME") + "" -- "File to include before document body" + + , Option "A" ["include-after-body"] + (ReqArg + (\arg opt -> do + let old = optIncludeAfterBody opt + text <- readFile arg + return opt { optIncludeAfterBody = old ++ text }) + "FILENAME") + "" -- "File to include after document body" + + , Option "C" ["custom-header"] + (ReqArg + (\arg opt -> do + text <- readFile arg + return opt { optCustomHeader = text, + optStandalone = True }) + "FILENAME") + "" -- "File to use for custom header (implies -s)" + + , Option "P" ["plugins"] + (ReqArg + (\arg opt -> do + let pluginModules = splitBy ',' arg + plugins <- mapM getPlugin pluginModules + return opt { optPlugins = plugins }) + "MODULE[,MODULE...]") + "" -- "Haskell modules" + + , Option "T" ["title-prefix"] + (ReqArg + (\arg opt -> return opt { optTitlePrefix = arg, + optStandalone = True }) + "STRING") + "" -- "String to prefix to HTML window title" + + , Option "D" ["print-default-header"] + (ReqArg + (\arg _ -> do + let header = case (lookup arg writers) of + Just (_, h) -> h + Nothing -> error ("Unknown reader: " ++ arg) + hPutStr stdout header + exitWith ExitSuccess) + "FORMAT") + "" -- "Print default header for FORMAT" +#ifdef _CITEPROC + , Option "" ["biblio"] + (ReqArg + (\arg opt -> return opt { optBiblioFile = arg} ) + "FILENAME") + "" + , Option "" ["biblio-format"] + (ReqArg + (\arg opt -> return opt { optBiblioFormat = arg} ) + "STRING") + "" + , Option "" ["csl"] + (ReqArg + (\arg opt -> return opt { optCslFile = arg} ) + "FILENAME") + "" +#endif + , Option "" ["dump-args"] + (NoArg + (\opt -> return opt { optDumpArgs = True })) + "" -- "Print output filename and arguments to stdout." + + , Option "" ["ignore-args"] + (NoArg + (\opt -> return opt { optIgnoreArgs = True })) + "" -- "Ignore command-line arguments." + + , Option "v" ["version"] + (NoArg + (\_ -> do + prg <- getProgName + hPutStrLn stderr (prg ++ " " ++ pandocVersion ++ compileInfo ++ + copyrightMessage) + exitWith $ ExitFailure 4)) + "" -- "Print version" + + , Option "h" ["help"] + (NoArg + (\_ -> do + prg <- getProgName + hPutStr stderr (usageMessage prg options) + exitWith $ ExitFailure 2)) + "" -- "Show help" + ] + +-- Returns usage message +usageMessage :: String -> [OptDescr (Opt -> IO Opt)] -> String +usageMessage programName = usageInfo + (programName ++ " [OPTIONS] [FILES]" ++ "\nInput formats: " ++ + (intercalate ", " $ map fst readers) ++ "\nOutput formats: " ++ + (intercalate ", " $ map fst writers) ++ "\nOptions:") + +-- Determine default reader based on source file extensions +defaultReaderName :: [FilePath] -> String +defaultReaderName [] = "markdown" +defaultReaderName (x:xs) = + case takeExtension (map toLower x) of + ".xhtml" -> "html" + ".html" -> "html" + ".htm" -> "html" + ".tex" -> "latex" + ".latex" -> "latex" + ".ltx" -> "latex" + ".rst" -> "rst" + ".lhs" -> "markdown+lhs" + ".native" -> "native" + _ -> defaultReaderName xs + +-- Returns True if extension of first source is .lhs +lhsExtension :: [FilePath] -> Bool +lhsExtension (x:_) = takeExtension x == ".lhs" +lhsExtension _ = False + +-- Determine default writer based on output file extension +defaultWriterName :: FilePath -> String +defaultWriterName "-" = "html" -- no output file +defaultWriterName x = + case takeExtension (map toLower x) of + "" -> "markdown" -- empty extension + ".tex" -> "latex" + ".latex" -> "latex" + ".ltx" -> "latex" + ".context" -> "context" + ".ctx" -> "context" + ".rtf" -> "rtf" + ".rst" -> "rst" + ".s5" -> "s5" + ".native" -> "native" + ".txt" -> "markdown" + ".text" -> "markdown" + ".md" -> "markdown" + ".markdown" -> "markdown" + ".lhs" -> "markdown+lhs" + ".texi" -> "texinfo" + ".texinfo" -> "texinfo" + ".db" -> "docbook" + ".odt" -> "odt" + ['.',y] | y `elem` ['1'..'9'] -> "man" + _ -> "html" + +main :: IO () +main = do + + rawArgs <- getArgs + prg <- getProgName + let compatMode = (prg == "hsmarkdown") + + let (actions, args, errors) = if compatMode + then ([], rawArgs, []) + else getOpt Permute options rawArgs + + unless (null errors) $ + do name <- getProgName + mapM_ (\e -> hPutStrLn stderr e) errors + hPutStr stderr (usageMessage name options) + exitWith $ ExitFailure 2 + + let defaultOpts' = if compatMode + then defaultOpts { optReader = "markdown" + , optWriter = "html" + , optStrict = True } + else defaultOpts + + -- thread option data structure through all supplied option actions + opts <- foldl (>>=) (return defaultOpts') actions + + let Opt { optPreserveTabs = preserveTabs + , optTabStop = tabStop + , optStandalone = standalone + , optReader = readerName + , optWriter = writerName + , optParseRaw = parseRaw + , optCSS = css + , optTableOfContents = toc + , optIncludeInHeader = includeHeader + , optIncludeBeforeBody = includeBefore + , optIncludeAfterBody = includeAfter + , optCustomHeader = customHeader + , optTitlePrefix = titlePrefix + , optOutputFile = outputFile + , optNumberSections = numberSections + , optIncremental = incremental + , optSmart = smart + , optHTMLMathMethod = mathMethod + , optDumpArgs = dumpArgs + , optIgnoreArgs = ignoreArgs + , optStrict = strict + , optReferenceLinks = referenceLinks + , optWrapText = wrap + , optSanitizeHTML = sanitize + , optPlugins = plugins + , optEmailObfuscation = obfuscationMethod +#ifdef _CITEPROC + , optBiblioFile = biblioFile + , optBiblioFormat = biblioFormat + , optCslFile = cslFile +#endif + } = opts + + when dumpArgs $ + do hPutStrLn stdout outputFile + mapM_ (\arg -> hPutStrLn stdout arg) args + exitWith ExitSuccess + + let sources = if ignoreArgs then [] else args + + -- assign reader and writer based on options and filenames + let readerName' = if null readerName + then defaultReaderName sources + else readerName + + let writerName' = if null writerName + then defaultWriterName outputFile + else writerName + + reader <- case (lookup readerName' readers) of + Just r -> return r + Nothing -> error ("Unknown reader: " ++ readerName') + + (writer, defaultHeader) <- case (lookup writerName' writers) of + Just (w,h) -> return (w, h) + Nothing -> error ("Unknown writer: " ++ writerName') + + environment <- getEnvironment + let columns = case lookup "COLUMNS" environment of + Just cols -> read cols + Nothing -> stateColumns defaultParserState + + let tabFilter _ [] = "" + tabFilter _ ('\n':xs) = '\n' : tabFilter tabStop xs + -- remove DOS line endings + tabFilter _ ('\r':'\n':xs) = '\n' : tabFilter tabStop xs + tabFilter _ ('\r':xs) = '\n' : tabFilter tabStop xs + tabFilter spsToNextStop ('\t':xs) = + if preserveTabs + then '\t' : tabFilter tabStop xs + else replicate spsToNextStop ' ' ++ tabFilter tabStop xs + tabFilter 1 (x:xs) = + x : tabFilter tabStop xs + tabFilter spsToNextStop (x:xs) = + x : tabFilter (spsToNextStop - 1) xs + + let standalone' = (standalone && not strict) || isNonTextOutput writerName' + +#ifdef _CITEPROC + refs <- if null biblioFile then return [] else readBiblioFile biblioFile biblioFormat +#endif + + let startParserState = + defaultParserState { stateParseRaw = parseRaw, + stateTabStop = tabStop, + stateSanitizeHTML = sanitize, + stateLiterateHaskell = "+lhs" `isSuffixOf` readerName' || + lhsExtension sources, + stateStandalone = standalone', +#ifdef _CITEPROC + stateCitations = map citeKey refs, +#endif + stateSmart = smart || writerName' `elem` + ["latex", "context"], + stateColumns = columns, + stateStrict = strict } + let csslink = if null css + then "" + else concatMap + (\f -> "\n") + css + let header = (if customHeader == "DEFAULT" + then defaultHeader + else customHeader) ++ csslink ++ includeHeader + let writerOptions = WriterOptions { writerStandalone = standalone', + writerHeader = header, + writerTitlePrefix = titlePrefix, + writerTabStop = tabStop, + writerTableOfContents = toc && + not strict && + writerName' /= "s5", + writerHTMLMathMethod = mathMethod, + writerS5 = (writerName' == "s5"), + writerIgnoreNotes = False, + writerIncremental = incremental, + writerNumberSections = numberSections, + writerIncludeBefore = includeBefore, + writerIncludeAfter = includeAfter, + writerStrictMarkdown = strict, + writerReferenceLinks = referenceLinks, + writerWrapText = wrap, + writerLiterateHaskell = "+lhs" `isSuffixOf` writerName' || + lhsExtension [outputFile], + writerEmailObfuscation = if strict + then ReferenceObfuscation + else obfuscationMethod } + + when (isNonTextOutput writerName' && outputFile == "-") $ + do hPutStrLn stderr ("Error: Cannot write " ++ writerName ++ " output to stdout.\n" ++ + "Specify an output file using the -o option.") + exitWith $ ExitFailure 5 + + let sourceDirRelative = if null sources + then "" + else takeDirectory (head sources) + + let readSources [] = mapM readSource ["-"] + readSources srcs = mapM readSource srcs + readSource "-" = getContents + readSource src = readFile src + + doc <- fmap (reader startParserState . tabFilter tabStop . intercalate "\n") (readSources sources) + + doc' <- do +#ifdef _CITEPROC + processBiblio cslFile refs doc +#else + return doc +#endif + + doc'' <- foldM (flip ($)) doc' plugins + + let writerOutput = writer writerOptions doc'' ++ "\n" + + case writerName' of + "odt" -> saveOpenDocumentAsODT outputFile sourceDirRelative writerOutput + _ -> if outputFile == "-" + then putStr writerOutput + else writeFile outputFile writerOutput diff --git a/src/Text/Pandoc.hs b/src/Text/Pandoc.hs new file mode 100644 index 000000000..e97103f97 --- /dev/null +++ b/src/Text/Pandoc.hs @@ -0,0 +1,114 @@ +{- +Copyright (C) 2006-7 John MacFarlane + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc + Copyright : Copyright (C) 2006-7 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane + Stability : alpha + Portability : portable + +This helper module exports the main writers, readers, and data +structure definitions from the Pandoc libraries. + +A typical application will chain together a reader and a writer +to convert strings from one format to another. For example, the +following simple program will act as a filter converting markdown +fragments to reStructuredText, using reference-style links instead of +inline links: + +> module Main where +> import Text.Pandoc +> import qualified System.IO.UTF8 as U +> +> markdownToRST :: String -> String +> markdownToRST = +> (writeRST defaultWriterOptions {writerReferenceLinks = True}) . +> readMarkdown defaultParserState +> +> main = U.getContents >>= U.putStrLn . markdownToRST + +-} + +module Text.Pandoc + ( + -- * Definitions + module Text.Pandoc.Definition + -- * Readers: converting /to/ Pandoc format + , readMarkdown + , readRST + , readLaTeX + , readHtml + -- * Parser state used in readers + , ParserState (..) + , defaultParserState + , ParserContext (..) + , QuoteContext (..) + , KeyTable + , NoteTable + , HeaderType (..) + -- * Writers: converting /from/ Pandoc format + , writeMarkdown + , writeRST + , writeLaTeX + , writeConTeXt + , writeTexinfo + , writeHtml + , writeHtmlString + , writeS5 + , writeS5String + , writeDocbook + , writeOpenDocument + , writeMan + , writeMediaWiki + , writeRTF + , prettyPandoc + -- * Writer options used in writers + , WriterOptions (..) + , defaultWriterOptions + -- * Default headers for various output formats + , module Text.Pandoc.DefaultHeaders + -- * Version + , pandocVersion + ) where + +import Text.Pandoc.Definition +import Text.Pandoc.Readers.Markdown +import Text.Pandoc.Readers.RST +import Text.Pandoc.Readers.LaTeX +import Text.Pandoc.Readers.HTML +import Text.Pandoc.Writers.Markdown +import Text.Pandoc.Writers.RST +import Text.Pandoc.Writers.LaTeX +import Text.Pandoc.Writers.ConTeXt +import Text.Pandoc.Writers.Texinfo +import Text.Pandoc.Writers.HTML +import Text.Pandoc.Writers.S5 +import Text.Pandoc.Writers.Docbook +import Text.Pandoc.Writers.OpenDocument +import Text.Pandoc.Writers.Man +import Text.Pandoc.Writers.RTF +import Text.Pandoc.Writers.MediaWiki +import Text.Pandoc.DefaultHeaders +import Text.Pandoc.Shared + +-- | Version number of pandoc library. +pandocVersion :: String +pandocVersion = "1.1" diff --git a/src/Text/Pandoc/Biblio.hs b/src/Text/Pandoc/Biblio.hs new file mode 100644 index 000000000..1d93f19c1 --- /dev/null +++ b/src/Text/Pandoc/Biblio.hs @@ -0,0 +1,66 @@ +{-# LANGUAGE PatternGuards #-} +{- +Copyright (C) 2008 Andrea Rossato + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Biblio + Copyright : Copyright (C) 2008 Andrea Rossato + License : GNU GPL, version 2 or above + + Maintainer : Andrea Rossato + Stability : alpha + Portability : portable +-} + +module Text.Pandoc.Biblio ( processBiblio ) where + +import Control.Monad ( when ) +import Data.List +import Text.CSL +import Text.Pandoc.Definition + +-- | Process a 'Pandoc' document by adding citations formatted +-- according to a CSL style, using 'citeproc' from citeproc-hs. +processBiblio :: String -> [Reference] -> Pandoc -> IO Pandoc +processBiblio cf r p + = if null r then return p + else do + when (null cf) $ error "Missing the needed citation style file" + csl <- readCSLFile cf + let groups = queryPandoc getCite p + result = citeproc csl r groups + cits_map = zip groups (citations result) + biblioList = map (read . renderPandoc' csl) (bibliography result) + Pandoc m b = processPandoc (processCite csl cits_map) p + return $ Pandoc m $ b ++ biblioList + +-- | Substitute 'Cite' elements with formatted citations. +processCite :: Style -> [([Target],[FormattedOutput])] -> Inline -> Inline +processCite s cs il + | Cite t _ <- il = Cite t (process t) + | otherwise = il + where + process t = case elemIndex t (map fst cs) of + Just i -> read . renderPandoc s $ snd (cs !! i) + Nothing -> [Str ("Error processing " ++ show t)] + +-- | Retrieve all citations from a 'Pandoc' docuument. To be used with +-- 'queryPandoc'. +getCite :: Inline -> [[(String,String)]] +getCite i | Cite t _ <- i = [t] + | otherwise = [] diff --git a/src/Text/Pandoc/Blocks.hs b/src/Text/Pandoc/Blocks.hs new file mode 100644 index 000000000..122931773 --- /dev/null +++ b/src/Text/Pandoc/Blocks.hs @@ -0,0 +1,146 @@ +{- +Copyright (C) 2007 John MacFarlane + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Blocks + Copyright : Copyright (C) 2007 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane + Stability : alpha + Portability : portable + +Functions for the manipulation of fixed-width blocks of text. +These are used in the construction of plain-text tables. +-} + +module Text.Pandoc.Blocks + ( + TextBlock (..), + docToBlock, + blockToDoc, + widthOfBlock, + heightOfBlock, + hcatBlocks, + hsepBlocks, + centerAlignBlock, + leftAlignBlock, + rightAlignBlock + ) +where +import Text.PrettyPrint +import Data.List ( intersperse ) + +-- | A fixed-width block of text. Parameters are width of block, +-- height of block, and list of lines. +data TextBlock = TextBlock Int Int [String] +instance Show TextBlock where + show x = show $ blockToDoc x + +-- | Break lines in a list of lines so that none are greater than +-- a given width. +breakLines :: Int -- ^ Maximum length of lines. + -> [String] -- ^ List of lines. + -> [String] +breakLines _ [] = [] +breakLines width (l:ls) = + if length l > width + then (take width l):(breakLines width ((drop width l):ls)) + else l:(breakLines width ls) + +-- | Convert a @Doc@ element into a @TextBlock@ with a specified width. +docToBlock :: Int -- ^ Width of text block. + -> Doc -- ^ @Doc@ to convert. + -> TextBlock +docToBlock width doc = + let rendered = renderStyle (style {lineLength = width, + ribbonsPerLine = 1}) doc + lns = breakLines width $ lines rendered + in TextBlock width (length lns) lns + +-- | Convert a @TextBlock@ to a @Doc@ element. +blockToDoc :: TextBlock -> Doc +blockToDoc (TextBlock _ _ lns) = + if null lns + then empty + else vcat $ map text lns + +-- | Returns width of a @TextBlock@ (number of columns). +widthOfBlock :: TextBlock -> Int +widthOfBlock (TextBlock width _ _) = width + +-- | Returns height of a @TextBlock@ (number of rows). +heightOfBlock :: TextBlock -> Int +heightOfBlock (TextBlock _ height _) = height + +-- | Pads a string out to a given width using spaces. +hPad :: Int -- ^ Desired width. + -> String -- ^ String to pad. + -> String +hPad width line = + let linelen = length line + in if linelen <= width + then line ++ replicate (width - linelen) ' ' + else take width line + +-- | Concatenates a list of @TextBlock@s into a new @TextBlock@ in +-- which they appear side by side. +hcatBlocks :: [TextBlock] -> TextBlock +hcatBlocks [] = TextBlock 0 0 [] +hcatBlocks [x] = x -- This is not redundant! We don't want last item hPad'd. +hcatBlocks ((TextBlock width1 height1 lns1):xs) = + let (TextBlock width2 height2 lns2) = hcatBlocks xs + height = max height1 height2 + width = width1 + width2 + lns1' = map (hPad width1) $ lns1 ++ replicate (height - height1) "" + lns2' = lns2 ++ replicate (height - height2) "" + lns = zipWith (++) lns1' lns2' + in TextBlock width height lns + +-- | Like @hcatBlocks@, but inserts space between the @TextBlock@s. +hsepBlocks :: [TextBlock] -> TextBlock +hsepBlocks = hcatBlocks . (intersperse (TextBlock 1 1 [" "])) + +isWhitespace :: Char -> Bool +isWhitespace x = x `elem` " \t" + +-- | Left-aligns the contents of a @TextBlock@ within the block. +leftAlignBlock :: TextBlock -> TextBlock +leftAlignBlock (TextBlock width height lns) = + TextBlock width height $ map (dropWhile isWhitespace) lns + +-- | Right-aligns the contents of a @TextBlock@ within the block. +rightAlignBlock :: TextBlock -> TextBlock +rightAlignBlock (TextBlock width height lns) = + let rightAlignLine ln = + let (spaces, rest) = span isWhitespace $ reverse $ hPad width ln + in reverse (rest ++ spaces) + in TextBlock width height $ map rightAlignLine lns + +-- | Centers the contents of a @TextBlock@ within the block. +centerAlignBlock :: TextBlock -> TextBlock +centerAlignBlock (TextBlock width height lns) = + let centerAlignLine ln = + let ln' = hPad width ln + (startSpaces, rest) = span isWhitespace ln' + endSpaces = takeWhile isWhitespace (reverse ln') + numSpaces = length (startSpaces ++ endSpaces) + startSpaces' = replicate (quot numSpaces 2) ' ' + in startSpaces' ++ rest + in TextBlock width height $ map centerAlignLine lns + diff --git a/src/Text/Pandoc/CharacterReferences.hs b/src/Text/Pandoc/CharacterReferences.hs new file mode 100644 index 000000000..b0f4f6019 --- /dev/null +++ b/src/Text/Pandoc/CharacterReferences.hs @@ -0,0 +1,327 @@ +{- +Copyright (C) 2006-7 John MacFarlane + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.CharacterReferences + Copyright : Copyright (C) 2006-7 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane + Stability : alpha + Portability : portable + +Functions for parsing character references. +-} +module Text.Pandoc.CharacterReferences ( + characterReference, + decodeCharacterReferences, + ) where +import Data.Char ( chr ) +import Text.ParserCombinators.Parsec +import qualified Data.Map as Map + +-- | Parse character entity. +characterReference :: GenParser Char st Char +characterReference = try $ do + char '&' + character <- numRef <|> entity + char ';' + return character + +numRef :: GenParser Char st Char +numRef = do + char '#' + num <- hexNum <|> decNum + return $ chr $ num + +hexNum :: GenParser Char st Int +hexNum = oneOf "Xx" >> many1 hexDigit >>= return . read . ("0x" ++) + +decNum :: GenParser Char st Int +decNum = many1 digit >>= return . read + +entity :: GenParser Char st Char +entity = do + body <- many1 alphaNum + return $ Map.findWithDefault '?' body entityTable + +-- | Convert entities in a string to characters. +decodeCharacterReferences :: String -> String +decodeCharacterReferences str = + case parse (many (characterReference <|> anyChar)) str str of + Left err -> error $ "\nError: " ++ show err + Right result -> result + +entityTable :: Map.Map String Char +entityTable = Map.fromList entityTableList + +entityTableList :: [(String, Char)] +entityTableList = [ + ("quot", chr 34), + ("amp", chr 38), + ("lt", chr 60), + ("gt", chr 62), + ("nbsp", chr 160), + ("iexcl", chr 161), + ("cent", chr 162), + ("pound", chr 163), + ("curren", chr 164), + ("yen", chr 165), + ("brvbar", chr 166), + ("sect", chr 167), + ("uml", chr 168), + ("copy", chr 169), + ("ordf", chr 170), + ("laquo", chr 171), + ("not", chr 172), + ("shy", chr 173), + ("reg", chr 174), + ("macr", chr 175), + ("deg", chr 176), + ("plusmn", chr 177), + ("sup2", chr 178), + ("sup3", chr 179), + ("acute", chr 180), + ("micro", chr 181), + ("para", chr 182), + ("middot", chr 183), + ("cedil", chr 184), + ("sup1", chr 185), + ("ordm", chr 186), + ("raquo", chr 187), + ("frac14", chr 188), + ("frac12", chr 189), + ("frac34", chr 190), + ("iquest", chr 191), + ("Agrave", chr 192), + ("Aacute", chr 193), + ("Acirc", chr 194), + ("Atilde", chr 195), + ("Auml", chr 196), + ("Aring", chr 197), + ("AElig", chr 198), + ("Ccedil", chr 199), + ("Egrave", chr 200), + ("Eacute", chr 201), + ("Ecirc", chr 202), + ("Euml", chr 203), + ("Igrave", chr 204), + ("Iacute", chr 205), + ("Icirc", chr 206), + ("Iuml", chr 207), + ("ETH", chr 208), + ("Ntilde", chr 209), + ("Ograve", chr 210), + ("Oacute", chr 211), + ("Ocirc", chr 212), + ("Otilde", chr 213), + ("Ouml", chr 214), + ("times", chr 215), + ("Oslash", chr 216), + ("Ugrave", chr 217), + ("Uacute", chr 218), + ("Ucirc", chr 219), + ("Uuml", chr 220), + ("Yacute", chr 221), + ("THORN", chr 222), + ("szlig", chr 223), + ("agrave", chr 224), + ("aacute", chr 225), + ("acirc", chr 226), + ("atilde", chr 227), + ("auml", chr 228), + ("aring", chr 229), + ("aelig", chr 230), + ("ccedil", chr 231), + ("egrave", chr 232), + ("eacute", chr 233), + ("ecirc", chr 234), + ("euml", chr 235), + ("igrave", chr 236), + ("iacute", chr 237), + ("icirc", chr 238), + ("iuml", chr 239), + ("eth", chr 240), + ("ntilde", chr 241), + ("ograve", chr 242), + ("oacute", chr 243), + ("ocirc", chr 244), + ("otilde", chr 245), + ("ouml", chr 246), + ("divide", chr 247), + ("oslash", chr 248), + ("ugrave", chr 249), + ("uacute", chr 250), + ("ucirc", chr 251), + ("uuml", chr 252), + ("yacute", chr 253), + ("thorn", chr 254), + ("yuml", chr 255), + ("OElig", chr 338), + ("oelig", chr 339), + ("Scaron", chr 352), + ("scaron", chr 353), + ("Yuml", chr 376), + ("fnof", chr 402), + ("circ", chr 710), + ("tilde", chr 732), + ("Alpha", chr 913), + ("Beta", chr 914), + ("Gamma", chr 915), + ("Delta", chr 916), + ("Epsilon", chr 917), + ("Zeta", chr 918), + ("Eta", chr 919), + ("Theta", chr 920), + ("Iota", chr 921), + ("Kappa", chr 922), + ("Lambda", chr 923), + ("Mu", chr 924), + ("Nu", chr 925), + ("Xi", chr 926), + ("Omicron", chr 927), + ("Pi", chr 928), + ("Rho", chr 929), + ("Sigma", chr 931), + ("Tau", chr 932), + ("Upsilon", chr 933), + ("Phi", chr 934), + ("Chi", chr 935), + ("Psi", chr 936), + ("Omega", chr 937), + ("alpha", chr 945), + ("beta", chr 946), + ("gamma", chr 947), + ("delta", chr 948), + ("epsilon", chr 949), + ("zeta", chr 950), + ("eta", chr 951), + ("theta", chr 952), + ("iota", chr 953), + ("kappa", chr 954), + ("lambda", chr 955), + ("mu", chr 956), + ("nu", chr 957), + ("xi", chr 958), + ("omicron", chr 959), + ("pi", chr 960), + ("rho", chr 961), + ("sigmaf", chr 962), + ("sigma", chr 963), + ("tau", chr 964), + ("upsilon", chr 965), + ("phi", chr 966), + ("chi", chr 967), + ("psi", chr 968), + ("omega", chr 969), + ("thetasym", chr 977), + ("upsih", chr 978), + ("piv", chr 982), + ("ensp", chr 8194), + ("emsp", chr 8195), + ("thinsp", chr 8201), + ("zwnj", chr 8204), + ("zwj", chr 8205), + ("lrm", chr 8206), + ("rlm", chr 8207), + ("ndash", chr 8211), + ("mdash", chr 8212), + ("lsquo", chr 8216), + ("rsquo", chr 8217), + ("sbquo", chr 8218), + ("ldquo", chr 8220), + ("rdquo", chr 8221), + ("bdquo", chr 8222), + ("dagger", chr 8224), + ("Dagger", chr 8225), + ("bull", chr 8226), + ("hellip", chr 8230), + ("permil", chr 8240), + ("prime", chr 8242), + ("Prime", chr 8243), + ("lsaquo", chr 8249), + ("rsaquo", chr 8250), + ("oline", chr 8254), + ("frasl", chr 8260), + ("euro", chr 8364), + ("image", chr 8465), + ("weierp", chr 8472), + ("real", chr 8476), + ("trade", chr 8482), + ("alefsym", chr 8501), + ("larr", chr 8592), + ("uarr", chr 8593), + ("rarr", chr 8594), + ("darr", chr 8595), + ("harr", chr 8596), + ("crarr", chr 8629), + ("lArr", chr 8656), + ("uArr", chr 8657), + ("rArr", chr 8658), + ("dArr", chr 8659), + ("hArr", chr 8660), + ("forall", chr 8704), + ("part", chr 8706), + ("exist", chr 8707), + ("empty", chr 8709), + ("nabla", chr 8711), + ("isin", chr 8712), + ("notin", chr 8713), + ("ni", chr 8715), + ("prod", chr 8719), + ("sum", chr 8721), + ("minus", chr 8722), + ("lowast", chr 8727), + ("radic", chr 8730), + ("prop", chr 8733), + ("infin", chr 8734), + ("ang", chr 8736), + ("and", chr 8743), + ("or", chr 8744), + ("cap", chr 8745), + ("cup", chr 8746), + ("int", chr 8747), + ("there4", chr 8756), + ("sim", chr 8764), + ("cong", chr 8773), + ("asymp", chr 8776), + ("ne", chr 8800), + ("equiv", chr 8801), + ("le", chr 8804), + ("ge", chr 8805), + ("sub", chr 8834), + ("sup", chr 8835), + ("nsub", chr 8836), + ("sube", chr 8838), + ("supe", chr 8839), + ("oplus", chr 8853), + ("otimes", chr 8855), + ("perp", chr 8869), + ("sdot", chr 8901), + ("lceil", chr 8968), + ("rceil", chr 8969), + ("lfloor", chr 8970), + ("rfloor", chr 8971), + ("lang", chr 9001), + ("rang", chr 9002), + ("loz", chr 9674), + ("spades", chr 9824), + ("clubs", chr 9827), + ("hearts", chr 9829), + ("diams", chr 9830) + ] diff --git a/src/Text/Pandoc/DefaultHeaders.hs b/src/Text/Pandoc/DefaultHeaders.hs new file mode 100644 index 000000000..e9c1f17e5 --- /dev/null +++ b/src/Text/Pandoc/DefaultHeaders.hs @@ -0,0 +1,69 @@ +{-# LANGUAGE CPP, TemplateHaskell #-} +{- +Copyright (C) 2006-7 John MacFarlane + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.DefaultHeaders + Copyright : Copyright (C) 2006-7 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane + Stability : alpha + Portability : portable + +Default headers for Pandoc writers. +-} +module Text.Pandoc.DefaultHeaders ( + defaultLaTeXHeader, + defaultConTeXtHeader, + defaultDocbookHeader, + defaultOpenDocumentHeader, + defaultS5Header, + defaultRTFHeader + ) where +import Text.Pandoc.Writers.S5 +import System.FilePath ( () ) +import Text.Pandoc.TH ( contentsOf ) + +defaultLaTeXHeader :: String +#ifndef __HADDOCK__ +defaultLaTeXHeader = $(contentsOf $ "data" "headers" "LaTeX.header") +#endif + +defaultConTeXtHeader :: String +#ifndef __HADDOCK__ +defaultConTeXtHeader = $(contentsOf $ "data" "headers" "ConTeXt.header") +#endif + +defaultDocbookHeader :: String +#ifndef __HADDOCK__ +defaultDocbookHeader = $(contentsOf $ "data" "headers" "Docbook.header") +#endif + +defaultOpenDocumentHeader :: String +#ifndef __HADDOCK__ +defaultOpenDocumentHeader = $(contentsOf $ "data" "headers" "OpenDocument.header") +#endif + +defaultS5Header :: String +defaultS5Header = s5Meta ++ s5CSS ++ s5Javascript + +defaultRTFHeader :: String +#ifndef __HADDOCK__ +defaultRTFHeader = $(contentsOf $ "data" "headers" "RTF.header") +#endif diff --git a/src/Text/Pandoc/Definition.hs b/src/Text/Pandoc/Definition.hs new file mode 100644 index 000000000..92ce094d4 --- /dev/null +++ b/src/Text/Pandoc/Definition.hs @@ -0,0 +1,150 @@ +{-# OPTIONS_GHC -fglasgow-exts #-} -- for deriving Typeable +{- +Copyright (C) 2006-7 John MacFarlane + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Definition + Copyright : Copyright (C) 2006-7 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane + Stability : alpha + Portability : portable + +Definition of 'Pandoc' data structure for format-neutral representation +of documents. +-} +module Text.Pandoc.Definition where + +import Data.Generics + +data Pandoc = Pandoc Meta [Block] deriving (Eq, Read, Show, Typeable, Data) + +-- | Bibliographic information for the document: title (list of 'Inline'), +-- authors (list of strings), date (string). +data Meta = Meta [Inline] -- title + [String] -- authors + String -- date + deriving (Eq, Show, Read, Typeable, Data) + +-- | Alignment of a table column. +data Alignment = AlignLeft + | AlignRight + | AlignCenter + | AlignDefault deriving (Eq, Show, Read, Typeable, Data) + +-- | List attributes. +type ListAttributes = (Int, ListNumberStyle, ListNumberDelim) + +-- | Style of list numbers. +data ListNumberStyle = DefaultStyle + | Decimal + | LowerRoman + | UpperRoman + | LowerAlpha + | UpperAlpha deriving (Eq, Show, Read, Typeable, Data) + +-- | Delimiter of list numbers. +data ListNumberDelim = DefaultDelim + | Period + | OneParen + | TwoParens deriving (Eq, Show, Read, Typeable, Data) + +-- | Attributes: identifier, classes, key-value pairs +type Attr = (String, [String], [(String, String)]) + +-- | Block element. +data Block + = Plain [Inline] -- ^ Plain text, not a paragraph + | Para [Inline] -- ^ Paragraph + | CodeBlock Attr String -- ^ Code block (literal) with attributes + | RawHtml String -- ^ Raw HTML block (literal) + | BlockQuote [Block] -- ^ Block quote (list of blocks) + | OrderedList ListAttributes [[Block]] -- ^ Ordered list (attributes + -- and a list of items, each a list of blocks) + | BulletList [[Block]] -- ^ Bullet list (list of items, each + -- a list of blocks) + | DefinitionList [([Inline],[Block])] -- ^ Definition list + -- (list of items, each a pair of an inline list, + -- the term, and a block list) + | Header Int [Inline] -- ^ Header - level (integer) and text (inlines) + | HorizontalRule -- ^ Horizontal rule + | Table [Inline] [Alignment] [Double] [[Block]] [[[Block]]] -- ^ Table, + -- with caption, column alignments, + -- relative column widths, column headers + -- (each a list of blocks), and rows + -- (each a list of lists of blocks) + | Null -- ^ Nothing + deriving (Eq, Read, Show, Typeable, Data) + +-- | Type of quotation marks to use in Quoted inline. +data QuoteType = SingleQuote | DoubleQuote deriving (Show, Eq, Read, Typeable, Data) + +-- | Link target (URL, title). +type Target = (String, String) + +-- | Type of math element (display or inline). +data MathType = DisplayMath | InlineMath deriving (Show, Eq, Read, Typeable, Data) + +-- | Inline elements. +data Inline + = Str String -- ^ Text (string) + | Emph [Inline] -- ^ Emphasized text (list of inlines) + | Strong [Inline] -- ^ Strongly emphasized text (list of inlines) + | Strikeout [Inline] -- ^ Strikeout text (list of inlines) + | Superscript [Inline] -- ^ Superscripted text (list of inlines) + | Subscript [Inline] -- ^ Subscripted text (list of inlines) + | SmallCaps [Inline] -- ^ Small caps text (list of inlines) + | Quoted QuoteType [Inline] -- ^ Quoted text (list of inlines) + | Cite [Target] [Inline] -- ^ Citation (list of inlines) + | Code String -- ^ Inline code (literal) + | Space -- ^ Inter-word space + | EmDash -- ^ Em dash + | EnDash -- ^ En dash + | Apostrophe -- ^ Apostrophe + | Ellipses -- ^ Ellipses + | LineBreak -- ^ Hard line break + | Math MathType String -- ^ TeX math (literal) + | TeX String -- ^ LaTeX code (literal) + | HtmlInline String -- ^ HTML code (literal) + | Link [Inline] Target -- ^ Hyperlink: text (list of inlines), target + | Image [Inline] Target -- ^ Image: alt text (list of inlines), target + -- and target + | Note [Block] -- ^ Footnote or endnote + deriving (Show, Eq, Read, Typeable, Data) + +-- | Applies a transformation on @a@s to matching elements in a @b@. +processWith :: (Data a, Data b) => (a -> a) -> b -> b +processWith f = everywhere (mkT f) + +-- | Like 'processWith', but with monadic transformations. +processWithM :: (Monad m, Data a, Data b) => (a -> m a) -> b -> m b +processWithM f = everywhereM (mkM f) + +-- | Runs a query on matching @a@ elements in a @c@. +queryWith :: (Data a, Data c) => (a -> [b]) -> c -> [b] +queryWith f = everything (++) ([] `mkQ` f) + +{-# DEPRECATED processPandoc "Use processWith instead" #-} +processPandoc :: Data a => (a -> a) -> Pandoc -> Pandoc +processPandoc = processWith + +{-# DEPRECATED queryPandoc "Use queryWith instead" #-} +queryPandoc :: Data a => (a -> [b]) -> Pandoc -> [b] +queryPandoc = queryWith + diff --git a/src/Text/Pandoc/Highlighting.hs b/src/Text/Pandoc/Highlighting.hs new file mode 100644 index 000000000..6726f1a42 --- /dev/null +++ b/src/Text/Pandoc/Highlighting.hs @@ -0,0 +1,64 @@ +{-# LANGUAGE CPP #-} +{- +Copyright (C) 2008 John MacFarlane + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Highlighting + Copyright : Copyright (C) 2008 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane + Stability : alpha + Portability : portable + +Exports functions for syntax highlighting. +-} + +module Text.Pandoc.Highlighting ( languages, highlightHtml, defaultHighlightingCss ) where +import Text.XHtml +import Text.Pandoc.Definition +#ifdef _HIGHLIGHTING +import Text.Highlighting.Kate ( languages, highlightAs, formatAsXHtml, FormatOption (..), defaultHighlightingCss ) +import Data.List (find, lookup) +import Data.Maybe (fromMaybe) +import Data.Char (toLower) + +highlightHtml :: Attr -> String -> Either String Html +highlightHtml (_, classes, keyvals) rawCode = + let firstNum = read $ fromMaybe "1" $ lookup "startFrom" keyvals + fmtOpts = [OptNumberFrom firstNum] ++ + case find (`elem` ["number","numberLines","number-lines"]) classes of + Nothing -> [] + Just _ -> [OptNumberLines] + lcLanguages = map (map toLower) languages + in case find (\c -> (map toLower c) `elem` lcLanguages) classes of + Nothing -> Left "Unknown or unsupported language" + Just language -> case highlightAs language rawCode of + Left err -> Left err + Right hl -> Right $ formatAsXHtml fmtOpts language hl + +#else +defaultHighlightingCss :: String +defaultHighlightingCss = "" + +languages :: [String] +languages = [] + +highlightHtml :: Attr -> String -> Either String Html +highlightHtml _ _ = Left "Pandoc was not compiled with support for highlighting" +#endif diff --git a/src/Text/Pandoc/LaTeXMathML.hs b/src/Text/Pandoc/LaTeXMathML.hs new file mode 100644 index 000000000..1eb3c23cc --- /dev/null +++ b/src/Text/Pandoc/LaTeXMathML.hs @@ -0,0 +1,14 @@ +{-# LANGUAGE CPP, TemplateHaskell #-} +-- | Definitions for use of LaTeXMathML in HTML. +-- (See ) +module Text.Pandoc.LaTeXMathML ( latexMathMLScript ) where +import Text.Pandoc.TH ( contentsOf ) +import System.FilePath ( () ) + +-- | String containing LaTeXMathML javascript. +latexMathMLScript :: String +#ifndef __HADDOCK__ +latexMathMLScript = "\n" +#endif diff --git a/src/Text/Pandoc/ODT.hs b/src/Text/Pandoc/ODT.hs new file mode 100644 index 000000000..f9e4dd8f1 --- /dev/null +++ b/src/Text/Pandoc/ODT.hs @@ -0,0 +1,88 @@ +{-# LANGUAGE TemplateHaskell #-} +{- +Copyright (C) 2008 John MacFarlane + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.ODT + Copyright : Copyright (C) 2006-7 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane + Stability : alpha + Portability : portable + +Functions for producing an ODT file from OpenDocument XML. +-} +module Text.Pandoc.ODT ( saveOpenDocumentAsODT ) where +import Text.Pandoc.TH ( makeZip ) +import Data.List ( find ) +import System.FilePath ( (), takeFileName ) +import qualified Data.ByteString.Lazy as B +import Data.ByteString.Lazy.UTF8 ( fromString ) +import Prelude hiding ( writeFile, readFile ) +import Codec.Archive.Zip +import Control.Applicative ( (<$>) ) +import Text.ParserCombinators.Parsec +import System.Time + +-- | Produce an ODT file from OpenDocument XML. +saveOpenDocumentAsODT :: FilePath -- ^ Pathname of ODT file to be produced. + -> FilePath -- ^ Relative directory of source file. + -> String -- ^ OpenDocument XML contents. + -> IO () +saveOpenDocumentAsODT destinationODTPath sourceDirRelative xml = do + let refArchive = read $(makeZip $ "data" "odt-styles") + -- handle pictures + let (newContents, pics) = + case runParser pPictures [] "OpenDocument XML contents" xml of + Left err -> error $ show err + Right x -> x + picEntries <- mapM (makePictureEntry sourceDirRelative) pics + (TOD epochTime _) <- getClockTime + let contentEntry = toEntry "content.xml" epochTime $ fromString newContents + let archive = foldr addEntryToArchive refArchive (contentEntry : picEntries) + B.writeFile destinationODTPath $ fromArchive archive + +makePictureEntry :: FilePath -- ^ Relative directory of source file + -> (FilePath, String) -- ^ Path and new path of picture + -> IO Entry +makePictureEntry sourceDirRelative (path, newPath) = do + entry <- readEntry [] $ sourceDirRelative path + return (entry { eRelativePath = newPath }) + +pPictures :: GenParser Char [(FilePath, String)] ([Char], [(FilePath, String)]) +pPictures = do + contents <- concat <$> many (pPicture <|> many1 (noneOf "<") <|> string "<") + pics <- getState + return (contents, pics) + +pPicture :: GenParser Char [(FilePath, String)] [Char] +pPicture = try $ do + string " o == path) pics of + Just (_, new) -> return new + Nothing -> do + -- get a unique name + let dups = length $ (filter (\(o, _) -> takeFileName o == filename)) pics + let new = "Pictures/" ++ replicate dups '0' ++ filename + updateState ((path, new) :) + return new + return $ " + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Pluigns + Copyright : Copyright (C) 2008 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane + Stability : alpha + Portability : portable + +Support for plugins. +-} + +module Text.Pandoc.Plugins (getPlugin) +where + +import Language.Haskell.Interpreter +import Text.Pandoc +import Control.Monad (unless, liftM) +import Control.Monad.Error (throwError) +import Data.List (isInfixOf) + +-- | Returns the function named @transform@ in the specified +-- module. The module may be identified either by module name +-- or by path name. The @transform@ function should have type +-- @a -> a@ or @a -> IO a@, where @a@ is an instance of 'Data': +-- for example, @Pandoc -> Pandoc@, @Inline -> IO Inline@, +-- @Block -> Block@, or @[Inline] -> IO [Inline]@. +getPlugin :: String -> IO (Pandoc -> IO Pandoc) +getPlugin modsrc = do + res <- runInterpreter (evaluatePlugin modsrc) + case res of + Right func -> return func + Left (WontCompile xs) -> error $ "WontCompile error for plugin '" ++ modsrc ++ "'\n" ++ unlines (map errMsg xs) + Left (NotAllowed x) -> error $ "NotAllowed error for plugin '" ++ modsrc ++ "'\n" ++ x + Left (UnknownError x) -> error $ "UnknownError for plugin '" ++ modsrc ++ "'\n" ++ x + Left (GhcException x) -> error $ "GhcException for plugin '" ++ modsrc ++ "'\n" ++ x + +evaluatePlugin :: String -> Interpreter (Pandoc -> IO Pandoc) +evaluatePlugin modsrc = do + set [installedModulesInScope := False] + loadModules [modsrc] + modnames <- getLoadedModules + setTopLevelModules modnames + setImports ["Prelude", "Text.Pandoc", "Text.Pandoc.Definition"] + exports <- liftM concat $ mapM getModuleExports modnames + unless ((Fun "transform") `elem` exports) $ + throwError $ UnknownError $ "The plugin module must define a function 'transform'." + transformType <- typeOf "transform" + if "-> IO" `isInfixOf` transformType + then interpret "processWithM transform" (as :: Pandoc -> IO Pandoc) + else interpret "return . (processWith transform)" (as :: Pandoc -> IO Pandoc) diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs new file mode 100644 index 000000000..65e512b5e --- /dev/null +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -0,0 +1,675 @@ +{- +Copyright (C) 2006-8 John MacFarlane + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Readers.HTML + Copyright : Copyright (C) 2006-8 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane + Stability : alpha + Portability : portable + +Conversion of HTML to 'Pandoc' document. +-} +module Text.Pandoc.Readers.HTML ( + readHtml, + rawHtmlInline, + rawHtmlBlock, + anyHtmlBlockTag, + anyHtmlInlineTag, + anyHtmlTag, + anyHtmlEndTag, + htmlEndTag, + extractTagType, + htmlBlockElement, + unsanitaryURI + ) where + +import Text.ParserCombinators.Parsec +import Text.Pandoc.Definition +import Text.Pandoc.Shared +import Text.Pandoc.CharacterReferences ( decodeCharacterReferences ) +import Data.Maybe ( fromMaybe ) +import Data.List ( takeWhile, dropWhile, isPrefixOf, isSuffixOf, intercalate ) +import Data.Char ( toLower, isAlphaNum ) +import Network.URI ( parseURIReference, URI (..) ) + +-- | Convert HTML-formatted string to 'Pandoc' document. +readHtml :: ParserState -- ^ Parser state + -> String -- ^ String to parse + -> Pandoc +readHtml = readWith parseHtml + +-- +-- Constants +-- + +eitherBlockOrInline :: [[Char]] +eitherBlockOrInline = ["applet", "button", "del", "iframe", "ins", + "map", "area", "object"] + +{- +inlineHtmlTags :: [[Char]] +inlineHtmlTags = ["a", "abbr", "acronym", "b", "basefont", "bdo", "big", + "br", "cite", "code", "dfn", "em", "font", "i", "img", + "input", "kbd", "label", "q", "s", "samp", "select", + "small", "span", "strike", "strong", "sub", "sup", + "textarea", "tt", "u", "var"] ++ eitherBlockOrInline +-} + +blockHtmlTags :: [[Char]] +blockHtmlTags = ["address", "blockquote", "body", "center", "dir", "div", + "dl", "fieldset", "form", "h1", "h2", "h3", "h4", + "h5", "h6", "hr", "html", "isindex", "menu", "noframes", + "noscript", "ol", "p", "pre", "table", "ul", "dd", + "dt", "frameset", "li", "tbody", "td", "tfoot", + "th", "thead", "tr", "script"] ++ eitherBlockOrInline + +sanitaryTags :: [[Char]] +sanitaryTags = ["a", "abbr", "acronym", "address", "area", "b", "big", + "blockquote", "br", "button", "caption", "center", + "cite", "code", "col", "colgroup", "dd", "del", "dfn", + "dir", "div", "dl", "dt", "em", "fieldset", "font", + "form", "h1", "h2", "h3", "h4", "h5", "h6", "hr", + "i", "img", "input", "ins", "kbd", "label", "legend", + "li", "map", "menu", "ol", "optgroup", "option", "p", + "pre", "q", "s", "samp", "select", "small", "span", + "strike", "strong", "sub", "sup", "table", "tbody", + "td", "textarea", "tfoot", "th", "thead", "tr", "tt", + "u", "ul", "var"] + +sanitaryAttributes :: [[Char]] +sanitaryAttributes = ["abbr", "accept", "accept-charset", + "accesskey", "action", "align", "alt", "axis", + "border", "cellpadding", "cellspacing", "char", + "charoff", "charset", "checked", "cite", "class", + "clear", "cols", "colspan", "color", "compact", + "coords", "datetime", "dir", "disabled", + "enctype", "for", "frame", "headers", "height", + "href", "hreflang", "hspace", "id", "ismap", + "label", "lang", "longdesc", "maxlength", "media", + "method", "multiple", "name", "nohref", "noshade", + "nowrap", "prompt", "readonly", "rel", "rev", + "rows", "rowspan", "rules", "scope", "selected", + "shape", "size", "span", "src", "start", + "summary", "tabindex", "target", "title", "type", + "usemap", "valign", "value", "vspace", "width"] + +-- +-- HTML utility functions +-- + +-- | Returns @True@ if sanitization is specified and the specified tag is +-- not on the sanitized tag list. +unsanitaryTag :: [Char] + -> GenParser tok ParserState Bool +unsanitaryTag tag = do + st <- getState + return $ stateSanitizeHTML st && tag `notElem` sanitaryTags + +-- | returns @True@ if sanitization is specified and the specified attribute +-- is not on the sanitized attribute list. +unsanitaryAttribute :: ([Char], String, t) + -> GenParser tok ParserState Bool +unsanitaryAttribute (attr, val, _) = do + st <- getState + return $ stateSanitizeHTML st && + (attr `notElem` sanitaryAttributes || + (attr `elem` ["href","src"] && unsanitaryURI val)) + +-- | Returns @True@ if the specified URI is potentially a security risk. +unsanitaryURI :: String -> Bool +unsanitaryURI u = + let safeURISchemes = [ "", "http:", "https:", "ftp:", "mailto:", "file:", + "telnet:", "gopher:", "aaa:", "aaas:", "acap:", "cap:", "cid:", + "crid:", "dav:", "dict:", "dns:", "fax:", "go:", "h323:", "im:", + "imap:", "ldap:", "mid:", "news:", "nfs:", "nntp:", "pop:", + "pres:", "sip:", "sips:", "snmp:", "tel:", "urn:", "wais:", + "xmpp:", "z39.50r:", "z39.50s:", "aim:", "callto:", "cvs:", + "ed2k:", "feed:", "fish:", "gg:", "irc:", "ircs:", "lastfm:", + "ldaps:", "magnet:", "mms:", "msnim:", "notes:", "rsync:", + "secondlife:", "skype:", "ssh:", "sftp:", "smb:", "sms:", + "snews:", "webcal:", "ymsgr:"] + in case parseURIReference u of + Just p -> (map toLower $ uriScheme p) `notElem` safeURISchemes + Nothing -> True + +-- | Read blocks until end tag. +blocksTilEnd :: String -> GenParser Char ParserState [Block] +blocksTilEnd tag = do + blocks <- manyTill (block >>~ spaces) (htmlEndTag tag) + return $ filter (/= Null) blocks + +-- | Read inlines until end tag. +inlinesTilEnd :: String -> GenParser Char ParserState [Inline] +inlinesTilEnd tag = manyTill inline (htmlEndTag tag) + +-- | Parse blocks between open and close tag. +blocksIn :: String -> GenParser Char ParserState [Block] +blocksIn tag = try $ htmlTag tag >> spaces >> blocksTilEnd tag + +-- | Parse inlines between open and close tag. +inlinesIn :: String -> GenParser Char ParserState [Inline] +inlinesIn tag = try $ htmlTag tag >> spaces >> inlinesTilEnd tag + +-- | Extract type from a tag: e.g. @br@ from @\@ +extractTagType :: String -> String +extractTagType ('<':rest) = + let isSpaceOrSlash c = c `elem` "/ \n\t" in + map toLower $ takeWhile isAlphaNum $ dropWhile isSpaceOrSlash rest +extractTagType _ = "" + +-- | Parse any HTML tag (opening or self-closing) and return text of tag +anyHtmlTag :: GenParser Char ParserState [Char] +anyHtmlTag = try $ do + char '<' + spaces + tag <- many1 alphaNum + attribs <- many htmlAttribute + spaces + ender <- option "" (string "/") + let ender' = if null ender then "" else " /" + spaces + char '>' + let result = "<" ++ tag ++ + concatMap (\(_, _, raw) -> (' ':raw)) attribs ++ ender' ++ ">" + unsanitary <- unsanitaryTag tag + if unsanitary + then return $ "" + else return result + +anyHtmlEndTag :: GenParser Char ParserState [Char] +anyHtmlEndTag = try $ do + char '<' + spaces + char '/' + spaces + tag <- many1 alphaNum + spaces + char '>' + let result = "" + unsanitary <- unsanitaryTag tag + if unsanitary + then return $ "" + else return result + +htmlTag :: String -> GenParser Char ParserState (String, [(String, String)]) +htmlTag tag = try $ do + char '<' + spaces + stringAnyCase tag + attribs <- many htmlAttribute + spaces + optional (string "/") + spaces + char '>' + return (tag, (map (\(name, content, _) -> (name, content)) attribs)) + +-- parses a quoted html attribute value +quoted :: Char -> GenParser Char st (String, String) +quoted quoteChar = do + result <- between (char quoteChar) (char quoteChar) + (many (noneOf [quoteChar])) + return (result, [quoteChar]) + +nullAttribute :: ([Char], [Char], [Char]) +nullAttribute = ("", "", "") + +htmlAttribute :: GenParser Char ParserState ([Char], [Char], [Char]) +htmlAttribute = do + attr <- htmlRegularAttribute <|> htmlMinimizedAttribute + unsanitary <- unsanitaryAttribute attr + if unsanitary + then return nullAttribute + else return attr + +-- minimized boolean attribute +htmlMinimizedAttribute :: GenParser Char st ([Char], [Char], [Char]) +htmlMinimizedAttribute = try $ do + many1 space + name <- many1 (choice [letter, oneOf ".-_:"]) + return (name, name, name) + +htmlRegularAttribute :: GenParser Char st ([Char], [Char], [Char]) +htmlRegularAttribute = try $ do + many1 space + name <- many1 (choice [letter, oneOf ".-_:"]) + spaces + char '=' + spaces + (content, quoteStr) <- choice [ (quoted '\''), + (quoted '"'), + (do + a <- many (alphaNum <|> (oneOf "-._:")) + return (a,"")) ] + return (name, content, + (name ++ "=" ++ quoteStr ++ content ++ quoteStr)) + +-- | Parse an end tag of type 'tag' +htmlEndTag :: [Char] -> GenParser Char st [Char] +htmlEndTag tag = try $ do + char '<' + spaces + char '/' + spaces + stringAnyCase tag + spaces + char '>' + return $ "" + +{- +-- | Returns @True@ if the tag is (or can be) an inline tag. +isInline :: String -> Bool +isInline tag = (extractTagType tag) `elem` inlineHtmlTags +-} + +-- | Returns @True@ if the tag is (or can be) a block tag. +isBlock :: String -> Bool +isBlock tag = (extractTagType tag) `elem` blockHtmlTags + +anyHtmlBlockTag :: GenParser Char ParserState [Char] +anyHtmlBlockTag = try $ do + tag <- anyHtmlTag <|> anyHtmlEndTag + if isBlock tag then return tag else fail "not a block tag" + +anyHtmlInlineTag :: GenParser Char ParserState [Char] +anyHtmlInlineTag = try $ do + tag <- anyHtmlTag <|> anyHtmlEndTag + if not (isBlock tag) then return tag else fail "not an inline tag" + +-- | Parses material between script tags. +-- Scripts must be treated differently, because they can contain '<>' etc. +htmlScript :: GenParser Char ParserState [Char] +htmlScript = try $ do + open <- string "" + else return $ open ++ rest ++ "" + +-- | Parses material between style tags. +-- Style tags must be treated differently, because they can contain CSS +htmlStyle :: GenParser Char ParserState [Char] +htmlStyle = try $ do + open <- string "" + else return $ open ++ rest ++ "" + +htmlBlockElement :: GenParser Char ParserState [Char] +htmlBlockElement = choice [ htmlScript, htmlStyle, htmlComment, xmlDec, definition ] + +rawHtmlBlock :: GenParser Char ParserState Block +rawHtmlBlock = try $ do + body <- htmlBlockElement <|> rawVerbatimBlock <|> anyHtmlBlockTag + state <- getState + if stateParseRaw state then return (RawHtml body) else return Null + +-- This is a block whose contents should be passed through verbatim, not interpreted. +rawVerbatimBlock :: GenParser Char ParserState [Char] +rawVerbatimBlock = try $ do + start <- anyHtmlBlockTag + let tagtype = extractTagType start + if tagtype `elem` ["pre"] + then do + contents <- many (notFollowedBy' (htmlEndTag tagtype) >> anyChar) + end <- htmlEndTag tagtype + return $ start ++ contents ++ end + else fail "Not a verbatim block" + +-- We don't want to parse or as raw HTML, since these +-- are handled in parseHtml. +rawHtmlBlock' :: GenParser Char ParserState Block +rawHtmlBlock' = do notFollowedBy' (htmlTag "/body" <|> htmlTag "/html") + rawHtmlBlock + +-- | Parses an HTML comment. +htmlComment :: GenParser Char st [Char] +htmlComment = try $ do + string "")) + return $ "" + +-- +-- parsing documents +-- + +xmlDec :: GenParser Char st [Char] +xmlDec = try $ do + string "') + return $ "" + +definition :: GenParser Char st [Char] +definition = try $ do + string "') + return $ "" + +nonTitleNonHead :: GenParser Char ParserState Char +nonTitleNonHead = try $ do + notFollowedBy $ (htmlTag "title" >> return ' ') <|> + (htmlEndTag "head" >> return ' ') + (rawHtmlBlock >> return ' ') <|> anyChar + +parseTitle :: GenParser Char ParserState [Inline] +parseTitle = try $ do + (tag, _) <- htmlTag "title" + contents <- inlinesTilEnd tag + spaces + return contents + +-- parse header and return meta-information (for now, just title) +parseHead :: GenParser Char ParserState ([Inline], [a], [Char]) +parseHead = try $ do + htmlTag "head" + spaces + skipMany nonTitleNonHead + contents <- option [] parseTitle + skipMany nonTitleNonHead + htmlEndTag "head" + return (contents, [], "") + +skipHtmlTag :: String -> GenParser Char ParserState () +skipHtmlTag tag = optional (htmlTag tag) + +-- h1 class="title" representation of title in body +bodyTitle :: GenParser Char ParserState [Inline] +bodyTitle = try $ do + (_, attribs) <- htmlTag "h1" + case (extractAttribute "class" attribs) of + Just "title" -> return "" + _ -> fail "not title" + inlinesTilEnd "h1" + +parseHtml :: GenParser Char ParserState Pandoc +parseHtml = do + sepEndBy (choice [xmlDec, definition, htmlComment]) spaces + skipHtmlTag "html" + spaces + (title, authors, date) <- option ([], [], "") parseHead + spaces + skipHtmlTag "body" + spaces + optional bodyTitle -- skip title in body, because it's represented in meta + blocks <- parseBlocks + spaces + optional (htmlEndTag "body") + spaces + optional (htmlEndTag "html" >> many anyChar) -- ignore anything after + eof + return $ Pandoc (Meta title authors date) blocks + +-- +-- parsing blocks +-- + +parseBlocks :: GenParser Char ParserState [Block] +parseBlocks = spaces >> sepEndBy block spaces >>= (return . filter (/= Null)) + +block :: GenParser Char ParserState Block +block = choice [ codeBlock + , header + , hrule + , list + , blockQuote + , para + , plain + , rawHtmlBlock' + ] "block" + +-- +-- header blocks +-- + +header :: GenParser Char ParserState Block +header = choice (map headerLevel (enumFromTo 1 5)) "header" + +headerLevel :: Int -> GenParser Char ParserState Block +headerLevel n = try $ do + let level = "h" ++ show n + htmlTag level + contents <- inlinesTilEnd level + return $ Header n (normalizeSpaces contents) + +-- +-- hrule block +-- + +hrule :: GenParser Char ParserState Block +hrule = try $ do + (_, attribs) <- htmlTag "hr" + state <- getState + if not (null attribs) && stateParseRaw state + then unexpected "attributes in hr" -- parse as raw in this case + else return HorizontalRule + +-- +-- code blocks +-- + +-- Note: HTML tags in code blocks (e.g. for syntax highlighting) are +-- skipped, because they are not portable to output formats other than HTML. +codeBlock :: GenParser Char ParserState Block +codeBlock = try $ do + htmlTag "pre" + result <- manyTill + (many1 (satisfy (/= '<')) <|> + ((anyHtmlTag <|> anyHtmlEndTag) >> return "")) + (htmlEndTag "pre") + let result' = concat result + -- drop leading newline if any + let result'' = if "\n" `isPrefixOf` result' + then drop 1 result' + else result' + -- drop trailing newline if any + let result''' = if "\n" `isSuffixOf` result'' + then init result'' + else result'' + return $ CodeBlock ("",[],[]) $ decodeCharacterReferences result''' + +-- +-- block quotes +-- + +blockQuote :: GenParser Char ParserState Block +blockQuote = try $ htmlTag "blockquote" >> spaces >> + blocksTilEnd "blockquote" >>= (return . BlockQuote) + +-- +-- list blocks +-- + +list :: GenParser Char ParserState Block +list = choice [ bulletList, orderedList, definitionList ] "list" + +orderedList :: GenParser Char ParserState Block +orderedList = try $ do + (_, attribs) <- htmlTag "ol" + (start, style) <- option (1, DefaultStyle) $ + do failIfStrict + let sta = fromMaybe "1" $ + lookup "start" attribs + let sty = fromMaybe (fromMaybe "" $ + lookup "style" attribs) $ + lookup "class" attribs + let sty' = case sty of + "lower-roman" -> LowerRoman + "upper-roman" -> UpperRoman + "lower-alpha" -> LowerAlpha + "upper-alpha" -> UpperAlpha + "decimal" -> Decimal + _ -> DefaultStyle + return (read sta, sty') + spaces + items <- sepEndBy1 (blocksIn "li") spaces + htmlEndTag "ol" + return $ OrderedList (start, style, DefaultDelim) items + +bulletList :: GenParser Char ParserState Block +bulletList = try $ do + htmlTag "ul" + spaces + items <- sepEndBy1 (blocksIn "li") spaces + htmlEndTag "ul" + return $ BulletList items + +definitionList :: GenParser Char ParserState Block +definitionList = try $ do + failIfStrict -- def lists not part of standard markdown + htmlTag "dl" + spaces + items <- sepEndBy1 definitionListItem spaces + htmlEndTag "dl" + return $ DefinitionList items + +definitionListItem :: GenParser Char ParserState ([Inline], [Block]) +definitionListItem = try $ do + terms <- sepEndBy1 (inlinesIn "dt") spaces + defs <- sepEndBy1 (blocksIn "dd") spaces + let term = intercalate [LineBreak] terms + return (term, concat defs) + +-- +-- paragraph block +-- + +para :: GenParser Char ParserState Block +para = try $ htmlTag "p" >> inlinesTilEnd "p" >>= + return . Para . normalizeSpaces + +-- +-- plain block +-- + +plain :: GenParser Char ParserState Block +plain = many1 inline >>= return . Plain . normalizeSpaces + +-- +-- inline +-- + +inline :: GenParser Char ParserState Inline +inline = choice [ charRef + , strong + , emph + , superscript + , subscript + , strikeout + , spanStrikeout + , code + , str + , linebreak + , whitespace + , link + , image + , rawHtmlInline + ] "inline" + +code :: GenParser Char ParserState Inline +code = try $ do + htmlTag "code" + result <- manyTill anyChar (htmlEndTag "code") + -- remove internal line breaks, leading and trailing space, + -- and decode character references + return $ Code $ decodeCharacterReferences $ removeLeadingTrailingSpace $ + intercalate " " $ lines result + +rawHtmlInline :: GenParser Char ParserState Inline +rawHtmlInline = do + result <- htmlScript <|> htmlStyle <|> htmlComment <|> anyHtmlInlineTag + state <- getState + if stateParseRaw state then return (HtmlInline result) else return (Str "") + +betweenTags :: [Char] -> GenParser Char ParserState [Inline] +betweenTags tag = try $ htmlTag tag >> inlinesTilEnd tag >>= + return . normalizeSpaces + +emph :: GenParser Char ParserState Inline +emph = (betweenTags "em" <|> betweenTags "i") >>= return . Emph + +strong :: GenParser Char ParserState Inline +strong = (betweenTags "b" <|> betweenTags "strong") >>= return . Strong + +superscript :: GenParser Char ParserState Inline +superscript = failIfStrict >> betweenTags "sup" >>= return . Superscript + +subscript :: GenParser Char ParserState Inline +subscript = failIfStrict >> betweenTags "sub" >>= return . Subscript + +strikeout :: GenParser Char ParserState Inline +strikeout = failIfStrict >> (betweenTags "s" <|> betweenTags "strike") >>= + return . Strikeout + +spanStrikeout :: GenParser Char ParserState Inline +spanStrikeout = try $ do + failIfStrict -- strict markdown has no strikeout, so treat as raw HTML + (_, attributes) <- htmlTag "span" + result <- case (extractAttribute "class" attributes) of + Just "strikeout" -> inlinesTilEnd "span" + _ -> fail "not a strikeout" + return $ Strikeout result + +whitespace :: GenParser Char st Inline +whitespace = many1 space >> return Space + +-- hard line break +linebreak :: GenParser Char ParserState Inline +linebreak = htmlTag "br" >> optional newline >> return LineBreak + +str :: GenParser Char st Inline +str = many1 (noneOf "<& \t\n") >>= return . Str + +-- +-- links and images +-- + +-- extract contents of attribute (attribute names are case-insensitive) +extractAttribute :: [Char] -> [([Char], String)] -> Maybe String +extractAttribute _ [] = Nothing +extractAttribute name ((attrName, contents):rest) = + let name' = map toLower name + attrName' = map toLower attrName + in if attrName' == name' + then Just (decodeCharacterReferences contents) + else extractAttribute name rest + +link :: GenParser Char ParserState Inline +link = try $ do + (_, attributes) <- htmlTag "a" + url <- case (extractAttribute "href" attributes) of + Just url -> return url + Nothing -> fail "no href" + let title = fromMaybe "" $ extractAttribute "title" attributes + lab <- inlinesTilEnd "a" + return $ Link (normalizeSpaces lab) (url, title) + +image :: GenParser Char ParserState Inline +image = try $ do + (_, attributes) <- htmlTag "img" + url <- case (extractAttribute "src" attributes) of + Just url -> return url + Nothing -> fail "no src" + let title = fromMaybe "" $ extractAttribute "title" attributes + let alt = fromMaybe "" (extractAttribute "alt" attributes) + return $ Image [Str alt] (url, title) + diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs new file mode 100644 index 000000000..f35ab4f29 --- /dev/null +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -0,0 +1,774 @@ +{- +Copyright (C) 2006-8 John MacFarlane + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Readers.LaTeX + Copyright : Copyright (C) 2006-8 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane + Stability : alpha + Portability : portable + +Conversion of LaTeX to 'Pandoc' document. +-} +module Text.Pandoc.Readers.LaTeX ( + readLaTeX, + rawLaTeXInline, + rawLaTeXEnvironment' + ) where + +import Text.ParserCombinators.Parsec +import Text.Pandoc.Definition +import Text.Pandoc.Shared +import Data.Maybe ( fromMaybe ) +import Data.Char ( chr ) +import Data.List ( isPrefixOf, isSuffixOf ) + +-- | Parse LaTeX from string and return 'Pandoc' document. +readLaTeX :: ParserState -- ^ Parser state, including options for parser + -> String -- ^ String to parse + -> Pandoc +readLaTeX = readWith parseLaTeX + +-- characters with special meaning +specialChars :: [Char] +specialChars = "\\`$%^&_~#{}\n \t|<>'\"-" + +-- +-- utility functions +-- + +-- | Returns text between brackets and its matching pair. +bracketedText :: Char -> Char -> GenParser Char st [Char] +bracketedText openB closeB = do + result <- charsInBalanced' openB closeB + return $ [openB] ++ result ++ [closeB] + +-- | Returns an option or argument of a LaTeX command. +optOrArg :: GenParser Char st [Char] +optOrArg = bracketedText '{' '}' <|> bracketedText '[' ']' + +-- | True if the string begins with '{'. +isArg :: [Char] -> Bool +isArg ('{':_) = True +isArg _ = False + +-- | Returns list of options and arguments of a LaTeX command. +commandArgs :: GenParser Char st [[Char]] +commandArgs = many optOrArg + +-- | Parses LaTeX command, returns (name, star, list of options or arguments). +command :: GenParser Char st ([Char], [Char], [[Char]]) +command = do + char '\\' + name <- many1 letter + star <- option "" (string "*") -- some commands have starred versions + args <- commandArgs + return (name, star, args) + +begin :: [Char] -> GenParser Char st [Char] +begin name = try $ do + string $ "\\begin{" ++ name ++ "}" + optional commandArgs + spaces + return name + +end :: [Char] -> GenParser Char st [Char] +end name = try $ do + string $ "\\end{" ++ name ++ "}" + return name + +-- | Returns a list of block elements containing the contents of an +-- environment. +environment :: [Char] -> GenParser Char ParserState [Block] +environment name = try $ begin name >> spaces >> manyTill block (end name) >>~ spaces + +anyEnvironment :: GenParser Char ParserState Block +anyEnvironment = try $ do + string "\\begin{" + name <- many letter + star <- option "" (string "*") -- some environments have starred variants + char '}' + optional commandArgs + spaces + contents <- manyTill block (end (name ++ star)) + spaces + return $ BlockQuote contents + +-- +-- parsing documents +-- + +-- | Process LaTeX preamble, extracting metadata. +processLaTeXPreamble :: GenParser Char ParserState () +processLaTeXPreamble = try $ manyTill + (choice [bibliographic, comment, unknownCommand, nullBlock]) + (try (string "\\begin{document}")) >> + spaces + +-- | Parse LaTeX and return 'Pandoc'. +parseLaTeX :: GenParser Char ParserState Pandoc +parseLaTeX = do + optional processLaTeXPreamble -- preamble might not be present (fragment) + spaces + blocks <- parseBlocks + spaces + optional $ try (string "\\end{document}" >> many anyChar) + -- might not be present (fragment) + spaces + eof + state <- getState + let blocks' = filter (/= Null) blocks + let title' = stateTitle state + let authors' = stateAuthors state + let date' = stateDate state + return $ Pandoc (Meta title' authors' date') blocks' + +-- +-- parsing blocks +-- + +parseBlocks :: GenParser Char ParserState [Block] +parseBlocks = spaces >> many block + +block :: GenParser Char ParserState Block +block = choice [ hrule + , codeBlock + , header + , list + , blockQuote + , comment + , bibliographic + , para + , itemBlock + , unknownEnvironment + , ignore + , unknownCommand ] "block" + +-- +-- header blocks +-- + +header :: GenParser Char ParserState Block +header = try $ do + char '\\' + subs <- many (try (string "sub")) + string "section" + optional (char '*') + char '{' + title' <- manyTill inline (char '}') + spaces + return $ Header (length subs + 1) (normalizeSpaces title') + +-- +-- hrule block +-- + +hrule :: GenParser Char st Block +hrule = oneOfStrings [ "\\begin{center}\\rule{3in}{0.4pt}\\end{center}\n\n", + "\\newpage" ] >> spaces >> return HorizontalRule + +-- +-- code blocks +-- + +codeBlock :: GenParser Char ParserState Block +codeBlock = codeBlockWith "verbatim" <|> codeBlockWith "Verbatim" <|> lhsCodeBlock +-- Note: Verbatim is from fancyvrb. + +codeBlockWith :: String -> GenParser Char st Block +codeBlockWith env = try $ do + string ("\\begin{" ++ env ++ "}") -- don't use begin function because it + -- gobbles whitespace + optional blanklines -- we want to gobble blank lines, but not + -- leading space + contents <- manyTill anyChar (try (string $ "\\end{" ++ env ++ "}")) + spaces + let classes = if env == "code" then ["haskell"] else [] + return $ CodeBlock ("",classes,[]) (stripTrailingNewlines contents) + +lhsCodeBlock :: GenParser Char ParserState Block +lhsCodeBlock = do + failUnlessLHS + (CodeBlock (_,_,_) cont) <- codeBlockWith "code" + return $ CodeBlock ("", ["sourceCode","haskell"], []) cont + +-- +-- block quotes +-- + +blockQuote :: GenParser Char ParserState Block +blockQuote = (environment "quote" <|> environment "quotation") >>~ spaces >>= + return . BlockQuote + +-- +-- list blocks +-- + +list :: GenParser Char ParserState Block +list = bulletList <|> orderedList <|> definitionList "list" + +listItem :: GenParser Char ParserState ([Inline], [Block]) +listItem = try $ do + ("item", _, args) <- command + spaces + state <- getState + let oldParserContext = stateParserContext state + updateState (\s -> s {stateParserContext = ListItemState}) + blocks <- many block + updateState (\s -> s {stateParserContext = oldParserContext}) + opt <- case args of + ([x]) | "[" `isPrefixOf` x && "]" `isSuffixOf` x -> + parseFromString (many inline) $ tail $ init x + _ -> return [] + return (opt, blocks) + +orderedList :: GenParser Char ParserState Block +orderedList = try $ do + string "\\begin{enumerate}" + (_, style, delim) <- option (1, DefaultStyle, DefaultDelim) $ + try $ do failIfStrict + char '[' + res <- anyOrderedListMarker + char ']' + return res + spaces + option "" $ try $ do string "\\setlength{\\itemindent}" + char '{' + manyTill anyChar (char '}') + spaces + start <- option 1 $ try $ do failIfStrict + string "\\setcounter{enum" + many1 (oneOf "iv") + string "}{" + num <- many1 digit + char '}' + spaces + return $ (read num) + 1 + items <- many listItem + end "enumerate" + spaces + return $ OrderedList (start, style, delim) $ map snd items + +bulletList :: GenParser Char ParserState Block +bulletList = try $ do + begin "itemize" + spaces + items <- many listItem + end "itemize" + spaces + return (BulletList $ map snd items) + +definitionList :: GenParser Char ParserState Block +definitionList = try $ do + begin "description" + spaces + items <- many listItem + end "description" + spaces + return (DefinitionList items) + +-- +-- paragraph block +-- + +para :: GenParser Char ParserState Block +para = do + res <- many1 inline + spaces + return $ if null (filter (`notElem` [Str "", Space]) res) + then Null + else Para $ normalizeSpaces res + +-- +-- title authors date +-- + +bibliographic :: GenParser Char ParserState Block +bibliographic = choice [ maketitle, title, authors, date ] + +maketitle :: GenParser Char st Block +maketitle = try (string "\\maketitle") >> spaces >> return Null + +title :: GenParser Char ParserState Block +title = try $ do + string "\\title{" + tit <- manyTill inline (char '}') + spaces + updateState (\state -> state { stateTitle = tit }) + return Null + +authors :: GenParser Char ParserState Block +authors = try $ do + string "\\author{" + authors' <- manyTill anyChar (char '}') + spaces + let authors'' = map removeLeadingTrailingSpace $ lines $ + substitute "\\\\" "\n" authors' + updateState (\s -> s { stateAuthors = authors'' }) + return Null + +date :: GenParser Char ParserState Block +date = try $ do + string "\\date{" + date' <- manyTill anyChar (char '}') + spaces + updateState (\state -> state { stateDate = date' }) + return Null + +-- +-- item block +-- for use in unknown environments that aren't being parsed as raw latex +-- + +-- this forces items to be parsed in different blocks +itemBlock :: GenParser Char ParserState Block +itemBlock = try $ do + ("item", _, args) <- command + state <- getState + if stateParserContext state == ListItemState + then fail "item should be handled by list block" + else if null args + then return Null + else return $ Plain [Str (stripFirstAndLast (head args))] + +-- +-- raw LaTeX +-- + +-- | Parse any LaTeX environment and return a Para block containing +-- the whole literal environment as raw TeX. +rawLaTeXEnvironment :: GenParser Char st Block +rawLaTeXEnvironment = do + contents <- rawLaTeXEnvironment' + spaces + return $ Para [TeX contents] + +-- | Parse any LaTeX environment and return a string containing +-- the whole literal environment as raw TeX. +rawLaTeXEnvironment' :: GenParser Char st String +rawLaTeXEnvironment' = try $ do + string "\\begin{" + name <- many1 letter + star <- option "" (string "*") -- for starred variants + let name' = name ++ star + char '}' + args <- option [] commandArgs + let argStr = concat args + contents <- manyTill (choice [ (many1 (noneOf "\\")), + rawLaTeXEnvironment', + string "\\" ]) + (end name') + return $ "\\begin{" ++ name' ++ "}" ++ argStr ++ + concat contents ++ "\\end{" ++ name' ++ "}" + +unknownEnvironment :: GenParser Char ParserState Block +unknownEnvironment = try $ do + state <- getState + result <- if stateParseRaw state -- check whether we should include raw TeX + then rawLaTeXEnvironment -- if so, get whole raw environment + else anyEnvironment -- otherwise just the contents + return result + +-- \ignore{} is used conventionally in literate haskell for definitions +-- that are to be processed by the compiler but not printed. +ignore :: GenParser Char ParserState Block +ignore = try $ do + ("ignore", _, _) <- command + spaces + return Null + +unknownCommand :: GenParser Char ParserState Block +unknownCommand = try $ do + notFollowedBy' $ choice $ map end ["itemize", "enumerate", "description", + "document"] + state <- getState + if stateParserContext state == ListItemState + then notFollowedBy' $ string "\\item" + else return () + if stateParseRaw state + then do + (name, star, args) <- command + spaces + return $ Plain [TeX ("\\" ++ name ++ star ++ concat args)] + else do -- skip unknown command, leaving arguments to be parsed + char '\\' + letter + many (letter <|> digit) + optional (try $ string "{}") + spaces + return Null + +-- latex comment +comment :: GenParser Char st Block +comment = try $ char '%' >> manyTill anyChar newline >> spaces >> return Null + +-- +-- inline +-- + +inline :: GenParser Char ParserState Inline +inline = choice [ str + , endline + , whitespace + , quoted + , apostrophe + , spacer + , strong + , math + , ellipses + , emDash + , enDash + , hyphen + , emph + , strikeout + , superscript + , subscript + , ref + , lab + , code + , url + , link + , image + , footnote + , linebreak + , accentedChar + , specialChar + , rawLaTeXInline + , escapedChar + , unescapedChar + ] "inline" + +accentedChar :: GenParser Char st Inline +accentedChar = normalAccentedChar <|> specialAccentedChar + +normalAccentedChar :: GenParser Char st Inline +normalAccentedChar = try $ do + char '\\' + accent <- oneOf "'`^\"~" + character <- (try $ char '{' >> letter >>~ char '}') <|> letter + let table = fromMaybe [] $ lookup character accentTable + let result = case lookup accent table of + Just num -> chr num + Nothing -> '?' + return $ Str [result] + +-- an association list of letters and association list of accents +-- and decimal character numbers. +accentTable :: [(Char, [(Char, Int)])] +accentTable = + [ ('A', [('`', 192), ('\'', 193), ('^', 194), ('~', 195), ('"', 196)]), + ('E', [('`', 200), ('\'', 201), ('^', 202), ('"', 203)]), + ('I', [('`', 204), ('\'', 205), ('^', 206), ('"', 207)]), + ('N', [('~', 209)]), + ('O', [('`', 210), ('\'', 211), ('^', 212), ('~', 213), ('"', 214)]), + ('U', [('`', 217), ('\'', 218), ('^', 219), ('"', 220)]), + ('a', [('`', 224), ('\'', 225), ('^', 227), ('"', 228)]), + ('e', [('`', 232), ('\'', 233), ('^', 234), ('"', 235)]), + ('i', [('`', 236), ('\'', 237), ('^', 238), ('"', 239)]), + ('n', [('~', 241)]), + ('o', [('`', 242), ('\'', 243), ('^', 244), ('~', 245), ('"', 246)]), + ('u', [('`', 249), ('\'', 250), ('^', 251), ('"', 252)]) ] + +specialAccentedChar :: GenParser Char st Inline +specialAccentedChar = choice [ ccedil, aring, iuml, szlig, aelig, + oslash, pound, euro, copyright, sect ] + +ccedil :: GenParser Char st Inline +ccedil = try $ do + char '\\' + letter' <- oneOfStrings ["cc", "cC"] + let num = if letter' == "cc" then 231 else 199 + return $ Str [chr num] + +aring :: GenParser Char st Inline +aring = try $ do + char '\\' + letter' <- oneOfStrings ["aa", "AA"] + let num = if letter' == "aa" then 229 else 197 + return $ Str [chr num] + +iuml :: GenParser Char st Inline +iuml = try (string "\\\"") >> oneOfStrings ["\\i", "{\\i}"] >> + return (Str [chr 239]) + +szlig :: GenParser Char st Inline +szlig = try (string "\\ss") >> return (Str [chr 223]) + +oslash :: GenParser Char st Inline +oslash = try $ do + char '\\' + letter' <- choice [char 'o', char 'O'] + let num = if letter' == 'o' then 248 else 216 + return $ Str [chr num] + +aelig :: GenParser Char st Inline +aelig = try $ do + char '\\' + letter' <- oneOfStrings ["ae", "AE"] + let num = if letter' == "ae" then 230 else 198 + return $ Str [chr num] + +pound :: GenParser Char st Inline +pound = try (string "\\pounds") >> return (Str [chr 163]) + +euro :: GenParser Char st Inline +euro = try (string "\\euro") >> return (Str [chr 8364]) + +copyright :: GenParser Char st Inline +copyright = try (string "\\copyright") >> return (Str [chr 169]) + +sect :: GenParser Char st Inline +sect = try (string "\\S") >> return (Str [chr 167]) + +escapedChar :: GenParser Char st Inline +escapedChar = do + result <- escaped (oneOf " $%&_#{}\n") + return $ if result == Str "\n" then Str " " else result + +-- nonescaped special characters +unescapedChar :: GenParser Char st Inline +unescapedChar = oneOf "`$^&_#{}|<>" >>= return . (\c -> Str [c]) + +specialChar :: GenParser Char st Inline +specialChar = choice [ backslash, tilde, caret, bar, lt, gt, doubleQuote ] + +backslash :: GenParser Char st Inline +backslash = try (string "\\textbackslash") >> optional (try $ string "{}") >> return (Str "\\") + +tilde :: GenParser Char st Inline +tilde = try (string "\\ensuremath{\\sim}") >> return (Str "~") + +caret :: GenParser Char st Inline +caret = try (string "\\^{}") >> return (Str "^") + +bar :: GenParser Char st Inline +bar = try (string "\\textbar") >> optional (try $ string "{}") >> return (Str "\\") + +lt :: GenParser Char st Inline +lt = try (string "\\textless") >> optional (try $ string "{}") >> return (Str "<") + +gt :: GenParser Char st Inline +gt = try (string "\\textgreater") >> optional (try $ string "{}") >> return (Str ">") + +doubleQuote :: GenParser Char st Inline +doubleQuote = char '"' >> return (Str "\"") + +code :: GenParser Char ParserState Inline +code = code1 <|> code2 <|> lhsInlineCode + +code1 :: GenParser Char st Inline +code1 = try $ do + string "\\verb" + marker <- anyChar + result <- manyTill anyChar (char marker) + return $ Code $ removeLeadingTrailingSpace result + +code2 :: GenParser Char st Inline +code2 = try $ do + string "\\texttt{" + result <- manyTill (noneOf "\\\n~$%^&{}") (char '}') + return $ Code result + +lhsInlineCode :: GenParser Char ParserState Inline +lhsInlineCode = try $ do + failUnlessLHS + char '|' + result <- manyTill (noneOf "|\n") (char '|') + return $ Code result + +emph :: GenParser Char ParserState Inline +emph = try $ oneOfStrings [ "\\emph{", "\\textit{" ] >> + manyTill inline (char '}') >>= return . Emph + +strikeout :: GenParser Char ParserState Inline +strikeout = try $ string "\\sout{" >> manyTill inline (char '}') >>= + return . Strikeout + +superscript :: GenParser Char ParserState Inline +superscript = try $ string "\\textsuperscript{" >> + manyTill inline (char '}') >>= return . Superscript + +-- note: \textsubscript isn't a standard latex command, but we use +-- a defined version in pandoc. +subscript :: GenParser Char ParserState Inline +subscript = try $ string "\\textsubscript{" >> manyTill inline (char '}') >>= + return . Subscript + +apostrophe :: GenParser Char ParserState Inline +apostrophe = char '\'' >> return Apostrophe + +quoted :: GenParser Char ParserState Inline +quoted = doubleQuoted <|> singleQuoted + +singleQuoted :: GenParser Char ParserState Inline +singleQuoted = enclosed singleQuoteStart singleQuoteEnd inline >>= + return . Quoted SingleQuote . normalizeSpaces + +doubleQuoted :: GenParser Char ParserState Inline +doubleQuoted = enclosed doubleQuoteStart doubleQuoteEnd inline >>= + return . Quoted DoubleQuote . normalizeSpaces + +singleQuoteStart :: GenParser Char st Char +singleQuoteStart = char '`' + +singleQuoteEnd :: GenParser Char st () +singleQuoteEnd = try $ char '\'' >> notFollowedBy alphaNum + +doubleQuoteStart :: CharParser st String +doubleQuoteStart = string "``" + +doubleQuoteEnd :: CharParser st String +doubleQuoteEnd = try $ string "''" + +ellipses :: GenParser Char st Inline +ellipses = try $ string "\\ldots" >> optional (try $ string "{}") >> + return Ellipses + +enDash :: GenParser Char st Inline +enDash = try (string "--") >> return EnDash + +emDash :: GenParser Char st Inline +emDash = try (string "---") >> return EmDash + +hyphen :: GenParser Char st Inline +hyphen = char '-' >> return (Str "-") + +lab :: GenParser Char st Inline +lab = try $ do + string "\\label{" + result <- manyTill anyChar (char '}') + return $ Str $ "(" ++ result ++ ")" + +ref :: GenParser Char st Inline +ref = try (string "\\ref{") >> manyTill anyChar (char '}') >>= return . Str + +strong :: GenParser Char ParserState Inline +strong = try (string "\\textbf{") >> manyTill inline (char '}') >>= + return . Strong + +whitespace :: GenParser Char st Inline +whitespace = many1 (oneOf "~ \t") >> return Space + +-- hard line break +linebreak :: GenParser Char st Inline +linebreak = try (string "\\\\") >> return LineBreak + +spacer :: GenParser Char st Inline +spacer = try (string "\\,") >> return (Str "") + +str :: GenParser Char st Inline +str = many1 (noneOf specialChars) >>= return . Str + +-- endline internal to paragraph +endline :: GenParser Char st Inline +endline = try $ newline >> notFollowedBy blankline >> return Space + +-- math +math :: GenParser Char st Inline +math = (math3 >>= return . Math DisplayMath) + <|> (math1 >>= return . Math InlineMath) + <|> (math2 >>= return . Math InlineMath) + <|> (math4 >>= return . Math DisplayMath) + <|> (math5 >>= return . Math DisplayMath) + <|> (math6 >>= return . Math DisplayMath) + "math" + +math1 :: GenParser Char st String +math1 = try $ char '$' >> manyTill anyChar (char '$') + +math2 :: GenParser Char st String +math2 = try $ string "\\(" >> manyTill anyChar (try $ string "\\)") + +math3 :: GenParser Char st String +math3 = try $ char '$' >> math1 >>~ char '$' + +math4 :: GenParser Char st String +math4 = try $ do + name <- begin "equation" <|> begin "equation*" <|> begin "displaymath" <|> begin "displaymath*" + spaces + manyTill anyChar (end name) + +math5 :: GenParser Char st String +math5 = try $ (string "\\[") >> spaces >> manyTill anyChar (try $ string "\\]") + +math6 :: GenParser Char st String +math6 = try $ do + name <- begin "eqnarray" <|> begin "eqnarray*" + spaces + res <- manyTill anyChar (end name) + return $ filter (/= '&') res -- remove eqnarray alignment codes + +-- +-- links and images +-- + +url :: GenParser Char ParserState Inline +url = try $ do + string "\\url" + url' <- charsInBalanced '{' '}' + return $ Link [Code url'] (url', "") + +link :: GenParser Char ParserState Inline +link = try $ do + string "\\href{" + url' <- manyTill anyChar (char '}') + char '{' + label' <- manyTill inline (char '}') + return $ Link (normalizeSpaces label') (url', "") + +image :: GenParser Char ParserState Inline +image = try $ do + ("includegraphics", _, args) <- command + let args' = filter isArg args -- filter out options + let src = if null args' then + ("", "") + else + (stripFirstAndLast (head args'), "") + return $ Image [Str "image"] src + +footnote :: GenParser Char ParserState Inline +footnote = try $ do + (name, _, (contents:[])) <- command + if ((name == "footnote") || (name == "thanks")) + then string "" + else fail "not a footnote or thanks command" + let contents' = stripFirstAndLast contents + -- parse the extracted block, which may contain various block elements: + rest <- getInput + setInput $ contents' + blocks <- parseBlocks + setInput rest + return $ Note blocks + +-- | Parse any LaTeX command and return it in a raw TeX inline element. +rawLaTeXInline :: GenParser Char ParserState Inline +rawLaTeXInline = try $ do + notFollowedBy' $ oneOfStrings ["\\begin", "\\end", "\\item", "\\ignore"] + state <- getState + if stateParseRaw state + then do + (name, star, args) <- command + return $ TeX ("\\" ++ name ++ star ++ concat args) + else do -- skip unknown command, leaving arguments to be parsed + char '\\' + letter + many (letter <|> digit) + optional (try $ string "{}") + return $ Str "" diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs new file mode 100644 index 000000000..896f5832e --- /dev/null +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -0,0 +1,1243 @@ +{-# LANGUAGE CPP #-} +{- +Copyright (C) 2006-8 John MacFarlane + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Readers.Markdown + Copyright : Copyright (C) 2006-8 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane + Stability : alpha + Portability : portable + +Conversion of markdown-formatted plain text to 'Pandoc' document. +-} +module Text.Pandoc.Readers.Markdown ( + readMarkdown + ) where + +import Data.List ( transpose, isPrefixOf, isSuffixOf, lookup, sortBy, findIndex, intercalate ) +import Data.Ord ( comparing ) +import Data.Char ( isAlphaNum, isAlpha, isLower, isDigit, isUpper ) +import Data.Maybe +import Text.Pandoc.Definition +import Text.Pandoc.Shared +import Text.Pandoc.Readers.LaTeX ( rawLaTeXInline, rawLaTeXEnvironment' ) +import Text.Pandoc.Readers.HTML ( rawHtmlBlock, anyHtmlBlockTag, + anyHtmlInlineTag, anyHtmlTag, + anyHtmlEndTag, htmlEndTag, extractTagType, + htmlBlockElement, unsanitaryURI ) +import Text.Pandoc.CharacterReferences ( decodeCharacterReferences ) +import Text.ParserCombinators.Parsec +import Control.Monad (when) + +-- | Read markdown from an input string and return a Pandoc document. +readMarkdown :: ParserState -> String -> Pandoc +readMarkdown state s = (readWith parseMarkdown) state (s ++ "\n\n") + +-- +-- Constants and data structure definitions +-- + +spaceChars :: [Char] +spaceChars = " \t" + +bulletListMarkers :: [Char] +bulletListMarkers = "*+-" + +hruleChars :: [Char] +hruleChars = "*-_" + +setextHChars :: [Char] +setextHChars = "=-" + +-- treat these as potentially non-text when parsing inline: +specialChars :: [Char] +specialChars = "\\[]*_~`<>$!^-.&'\"\8216\8217\8220\8221" + +-- +-- auxiliary functions +-- + +indentSpaces :: GenParser Char ParserState [Char] +indentSpaces = try $ do + state <- getState + let tabStop = stateTabStop state + try (count tabStop (char ' ')) <|> + (many (char ' ') >> string "\t") "indentation" + +nonindentSpaces :: GenParser Char ParserState [Char] +nonindentSpaces = do + state <- getState + let tabStop = stateTabStop state + sps <- many (char ' ') + if length sps < tabStop + then return sps + else unexpected "indented line" + +-- | Fail unless we're at beginning of a line. +failUnlessBeginningOfLine :: GenParser tok st () +failUnlessBeginningOfLine = do + pos <- getPosition + if sourceColumn pos == 1 then return () else fail "not beginning of line" + +-- | Fail unless we're in "smart typography" mode. +failUnlessSmart :: GenParser tok ParserState () +failUnlessSmart = do + state <- getState + if stateSmart state then return () else fail "Smart typography feature" + +-- | Parse a sequence of inline elements between square brackets, +-- including inlines between balanced pairs of square brackets. +inlinesInBalancedBrackets :: GenParser Char ParserState Inline + -> GenParser Char ParserState [Inline] +inlinesInBalancedBrackets parser = try $ do + char '[' + result <- manyTill ( (do lookAhead $ try $ do (Str res) <- parser + if res == "[" + then return () + else pzero + bal <- inlinesInBalancedBrackets parser + return $ [Str "["] ++ bal ++ [Str "]"]) + <|> (count 1 parser)) + (char ']') + return $ concat result + +-- +-- document structure +-- + +titleLine :: GenParser Char ParserState [Inline] +titleLine = try $ char '%' >> skipSpaces >> manyTill inline newline + +authorsLine :: GenParser Char st [String] +authorsLine = try $ do + char '%' + skipSpaces + authors <- sepEndBy (many1 (noneOf ",;\n")) (oneOf ",;") + newline + return $ map (decodeCharacterReferences . removeLeadingTrailingSpace) authors + +dateLine :: GenParser Char st String +dateLine = try $ do + char '%' + skipSpaces + date <- many (noneOf "\n") + newline + return $ decodeCharacterReferences $ removeTrailingSpace date + +titleBlock :: GenParser Char ParserState ([Inline], [String], [Char]) +titleBlock = try $ do + failIfStrict + title <- option [] titleLine + author <- option [] authorsLine + date <- option "" dateLine + optional blanklines + return (title, author, date) + +parseMarkdown :: GenParser Char ParserState Pandoc +parseMarkdown = do + -- markdown allows raw HTML + updateState (\state -> state { stateParseRaw = True }) + startPos <- getPosition + -- go through once just to get list of reference keys + -- docMinusKeys is the raw document with blanks where the keys were... + docMinusKeys <- manyTill (referenceKey <|> lineClump) eof >>= + return . concat + setInput docMinusKeys + setPosition startPos + st <- getState + -- go through again for notes unless strict... + if stateStrict st + then return () + else do docMinusNotes <- manyTill (noteBlock <|> lineClump) eof >>= + return . concat + st' <- getState + let reversedNotes = stateNotes st' + updateState $ \s -> s { stateNotes = reverse reversedNotes } + setInput docMinusNotes + setPosition startPos + -- now parse it for real... + (title, author, date) <- option ([],[],"") titleBlock + blocks <- parseBlocks + return $ Pandoc (Meta title author date) $ filter (/= Null) blocks + +-- +-- initial pass for references and notes +-- + +referenceKey :: GenParser Char ParserState [Char] +referenceKey = try $ do + startPos <- getPosition + nonindentSpaces + lab <- reference + char ':' + skipSpaces >> optional newline >> skipSpaces >> notFollowedBy (char '[') + let sourceURL excludes = many $ + optional (char '\\') >> (noneOf (' ':excludes) <|> (notFollowedBy' referenceTitle >> char ' ')) + src <- try (char '<' >> sourceURL ">\t\n" >>~ char '>') <|> sourceURL "\t\n" + tit <- option "" referenceTitle + blanklines + endPos <- getPosition + let newkey = (lab, (intercalate "%20" $ words $ removeTrailingSpace src, tit)) + st <- getState + let oldkeys = stateKeys st + updateState $ \s -> s { stateKeys = newkey : oldkeys } + -- return blanks so line count isn't affected + return $ replicate (sourceLine endPos - sourceLine startPos) '\n' + +referenceTitle :: GenParser Char st String +referenceTitle = try $ do + skipSpaces >> optional newline >> skipSpaces + tit <- (charsInBalanced '(' ')' >>= return . unwords . words) + <|> do delim <- char '\'' <|> char '"' + manyTill anyChar (try (char delim >> skipSpaces >> + notFollowedBy (noneOf ")\n"))) + return $ decodeCharacterReferences tit + +noteMarker :: GenParser Char st [Char] +noteMarker = string "[^" >> manyTill (noneOf " \t\n") (char ']') + +rawLine :: GenParser Char ParserState [Char] +rawLine = do + notFollowedBy blankline + notFollowedBy' noteMarker + contents <- many1 nonEndline + end <- option "" (newline >> optional indentSpaces >> return "\n") + return $ contents ++ end + +rawLines :: GenParser Char ParserState [Char] +rawLines = many1 rawLine >>= return . concat + +noteBlock :: GenParser Char ParserState [Char] +noteBlock = try $ do + startPos <- getPosition + ref <- noteMarker + char ':' + optional blankline + optional indentSpaces + raw <- sepBy rawLines (try (blankline >> indentSpaces)) + optional blanklines + endPos <- getPosition + -- parse the extracted text, which may contain various block elements: + contents <- parseFromString parseBlocks $ (intercalate "\n" raw) ++ "\n\n" + let newnote = (ref, contents) + st <- getState + let oldnotes = stateNotes st + updateState $ \s -> s { stateNotes = newnote : oldnotes } + -- return blanks so line count isn't affected + return $ replicate (sourceLine endPos - sourceLine startPos) '\n' + +-- +-- parsing blocks +-- + +parseBlocks :: GenParser Char ParserState [Block] +parseBlocks = manyTill block eof + +block :: GenParser Char ParserState Block +block = do + st <- getState + choice (if stateStrict st + then [ header + , codeBlockIndented + , blockQuote + , hrule + , bulletList + , orderedList + , htmlBlock + , para + , plain + , nullBlock ] + else [ codeBlockDelimited + , header + , table + , codeBlockIndented + , lhsCodeBlock + , blockQuote + , hrule + , bulletList + , orderedList + , definitionList + , para + , rawHtmlBlocks + , plain + , nullBlock ]) "block" + +-- +-- header blocks +-- + +header :: GenParser Char ParserState Block +header = setextHeader <|> atxHeader "header" + +atxHeader :: GenParser Char ParserState Block +atxHeader = try $ do + level <- many1 (char '#') >>= return . length + notFollowedBy (char '.' <|> char ')') -- this would be a list + skipSpaces + text <- manyTill inline atxClosing >>= return . normalizeSpaces + return $ Header level text + +atxClosing :: GenParser Char st [Char] +atxClosing = try $ skipMany (char '#') >> blanklines + +setextHeader :: GenParser Char ParserState Block +setextHeader = try $ do + text <- many1Till inline newline + underlineChar <- oneOf setextHChars + many (char underlineChar) + blanklines + let level = (fromMaybe 0 $ findIndex (== underlineChar) setextHChars) + 1 + return $ Header level (normalizeSpaces text) + +-- +-- hrule block +-- + +hrule :: GenParser Char st Block +hrule = try $ do + skipSpaces + start <- oneOf hruleChars + count 2 (skipSpaces >> char start) + skipMany (oneOf spaceChars <|> char start) + newline + optional blanklines + return HorizontalRule + +-- +-- code blocks +-- + +indentedLine :: GenParser Char ParserState [Char] +indentedLine = indentSpaces >> manyTill anyChar newline >>= return . (++ "\n") + +codeBlockDelimiter :: Maybe Int + -> GenParser Char st (Int, ([Char], [[Char]], [([Char], [Char])])) +codeBlockDelimiter len = try $ do + size <- case len of + Just l -> count l (char '~') >> many (char '~') >> return l + Nothing -> count 3 (char '~') >> many (char '~') >>= + return . (+ 3) . length + many spaceChar + attr <- option ([],[],[]) attributes + blankline + return (size, attr) + +attributes :: GenParser Char st ([Char], [[Char]], [([Char], [Char])]) +attributes = try $ do + char '{' + many spaceChar + attrs <- many (attribute >>~ many spaceChar) + char '}' + let (ids, classes, keyvals) = unzip3 attrs + let id' = if null ids then "" else head ids + return (id', concat classes, concat keyvals) + +attribute :: GenParser Char st ([Char], [[Char]], [([Char], [Char])]) +attribute = identifierAttr <|> classAttr <|> keyValAttr + +identifier :: GenParser Char st [Char] +identifier = do + first <- letter + rest <- many alphaNum + return (first:rest) + +identifierAttr :: GenParser Char st ([Char], [a], [a1]) +identifierAttr = try $ do + char '#' + result <- identifier + return (result,[],[]) + +classAttr :: GenParser Char st ([Char], [[Char]], [a]) +classAttr = try $ do + char '.' + result <- identifier + return ("",[result],[]) + +keyValAttr :: GenParser Char st ([Char], [a], [([Char], [Char])]) +keyValAttr = try $ do + key <- identifier + char '=' + char '"' + val <- manyTill (noneOf "\n") (char '"') + return ("",[],[(key,val)]) + +codeBlockDelimited :: GenParser Char st Block +codeBlockDelimited = try $ do + (size, attr) <- codeBlockDelimiter Nothing + contents <- manyTill anyLine (codeBlockDelimiter (Just size)) + blanklines + return $ CodeBlock attr $ intercalate "\n" contents + +codeBlockIndented :: GenParser Char ParserState Block +codeBlockIndented = do + contents <- many1 (indentedLine <|> + try (do b <- blanklines + l <- indentedLine + return $ b ++ l)) + optional blanklines + return $ CodeBlock ("",[],[]) $ stripTrailingNewlines $ concat contents + +lhsCodeBlock :: GenParser Char ParserState Block +lhsCodeBlock = do + failUnlessLHS + contents <- lhsCodeBlockBird <|> lhsCodeBlockLaTeX + return $ CodeBlock ("",["sourceCode","haskell"],[]) contents + +lhsCodeBlockLaTeX :: GenParser Char ParserState String +lhsCodeBlockLaTeX = try $ do + string "\\begin{code}" + manyTill spaceChar newline + contents <- many1Till anyChar (try $ string "\\end{code}") + blanklines + return $ stripTrailingNewlines contents + +lhsCodeBlockBird :: GenParser Char ParserState String +lhsCodeBlockBird = try $ do + pos <- getPosition + when (sourceColumn pos /= 1) $ fail "Not in first column" + lns <- many1 birdTrackLine + -- if (as is normal) there is always a space after >, drop it + let lns' = if all (\ln -> null ln || take 1 ln == " ") lns + then map (drop 1) lns + else lns + blanklines + return $ intercalate "\n" lns' + +birdTrackLine :: GenParser Char st [Char] +birdTrackLine = do + char '>' + manyTill anyChar newline + + +-- +-- block quotes +-- + +emailBlockQuoteStart :: GenParser Char ParserState Char +emailBlockQuoteStart = try $ nonindentSpaces >> char '>' >>~ optional (char ' ') + +emailBlockQuote :: GenParser Char ParserState [[Char]] +emailBlockQuote = try $ do + emailBlockQuoteStart + raw <- sepBy (many (nonEndline <|> + (try (endline >> notFollowedBy emailBlockQuoteStart >> + return '\n')))) + (try (newline >> emailBlockQuoteStart)) + newline <|> (eof >> return '\n') + optional blanklines + return raw + +blockQuote :: GenParser Char ParserState Block +blockQuote = do + raw <- emailBlockQuote + -- parse the extracted block, which may contain various block elements: + contents <- parseFromString parseBlocks $ (intercalate "\n" raw) ++ "\n\n" + return $ BlockQuote contents + +-- +-- list blocks +-- + +bulletListStart :: GenParser Char ParserState () +bulletListStart = try $ do + optional newline -- if preceded by a Plain block in a list context + nonindentSpaces + notFollowedBy' hrule -- because hrules start out just like lists + oneOf bulletListMarkers + spaceChar + skipSpaces + +anyOrderedListStart :: GenParser Char ParserState (Int, ListNumberStyle, ListNumberDelim) +anyOrderedListStart = try $ do + optional newline -- if preceded by a Plain block in a list context + nonindentSpaces + notFollowedBy $ string "p." >> spaceChar >> digit -- page number + state <- getState + if stateStrict state + then do many1 digit + char '.' + spaceChar + return (1, DefaultStyle, DefaultDelim) + else do (num, style, delim) <- anyOrderedListMarker + -- if it could be an abbreviated first name, insist on more than one space + if delim == Period && (style == UpperAlpha || (style == UpperRoman && + num `elem` [1, 5, 10, 50, 100, 500, 1000])) + then char '\t' <|> (char ' ' >>~ notFollowedBy (satisfy isUpper)) + else spaceChar + skipSpaces + return (num, style, delim) + +listStart :: GenParser Char ParserState () +listStart = bulletListStart <|> (anyOrderedListStart >> return ()) + +-- parse a line of a list item (start = parser for beginning of list item) +listLine :: GenParser Char ParserState [Char] +listLine = try $ do + notFollowedBy' listStart + notFollowedBy blankline + notFollowedBy' (do indentSpaces + many (spaceChar) + listStart) + line <- manyTill anyChar newline + return $ line ++ "\n" + +-- parse raw text for one list item, excluding start marker and continuations +rawListItem :: GenParser Char ParserState [Char] +rawListItem = try $ do + listStart + result <- many1 listLine + blanks <- many blankline + return $ concat result ++ blanks + +-- continuation of a list item - indented and separated by blankline +-- or (in compact lists) endline. +-- note: nested lists are parsed as continuations +listContinuation :: GenParser Char ParserState [Char] +listContinuation = try $ do + lookAhead indentSpaces + result <- many1 listContinuationLine + blanks <- many blankline + return $ concat result ++ blanks + +listContinuationLine :: GenParser Char ParserState [Char] +listContinuationLine = try $ do + notFollowedBy blankline + notFollowedBy' listStart + optional indentSpaces + result <- manyTill anyChar newline + return $ result ++ "\n" + +listItem :: GenParser Char ParserState [Block] +listItem = try $ do + first <- rawListItem + continuations <- many listContinuation + -- parsing with ListItemState forces markers at beginning of lines to + -- count as list item markers, even if not separated by blank space. + -- see definition of "endline" + state <- getState + let oldContext = stateParserContext state + setState $ state {stateParserContext = ListItemState} + -- parse the extracted block, which may contain various block elements: + let raw = concat (first:continuations) + contents <- parseFromString parseBlocks raw + updateState (\st -> st {stateParserContext = oldContext}) + return contents + +orderedList :: GenParser Char ParserState Block +orderedList = try $ do + (start, style, delim) <- lookAhead anyOrderedListStart + items <- many1 listItem + return $ OrderedList (start, style, delim) $ compactify items + +bulletList :: GenParser Char ParserState Block +bulletList = try $ do + lookAhead bulletListStart + many1 listItem >>= return . BulletList . compactify + +-- definition lists + +definitionListItem :: GenParser Char ParserState ([Inline], [Block]) +definitionListItem = try $ do + notFollowedBy blankline + notFollowedBy' indentSpaces + -- first, see if this has any chance of being a definition list: + lookAhead (anyLine >> char ':') + term <- manyTill inline newline + raw <- many1 defRawBlock + state <- getState + let oldContext = stateParserContext state + -- parse the extracted block, which may contain various block elements: + contents <- parseFromString parseBlocks $ concat raw + updateState (\st -> st {stateParserContext = oldContext}) + return ((normalizeSpaces term), contents) + +defRawBlock :: GenParser Char ParserState [Char] +defRawBlock = try $ do + char ':' + state <- getState + let tabStop = stateTabStop state + try (count (tabStop - 1) (char ' ')) <|> (many (char ' ') >> string "\t") + firstline <- anyLine + rawlines <- many (notFollowedBy blankline >> indentSpaces >> anyLine) + trailing <- option "" blanklines + return $ firstline ++ "\n" ++ unlines rawlines ++ trailing + +definitionList :: GenParser Char ParserState Block +definitionList = do + items <- many1 definitionListItem + let (terms, defs) = unzip items + let defs' = compactify defs + let items' = zip terms defs' + return $ DefinitionList items' + +-- +-- paragraph block +-- + +isHtmlOrBlank :: Inline -> Bool +isHtmlOrBlank (HtmlInline _) = True +isHtmlOrBlank (Space) = True +isHtmlOrBlank (LineBreak) = True +isHtmlOrBlank _ = False + +para :: GenParser Char ParserState Block +para = try $ do + result <- many1 inline + if all isHtmlOrBlank result + then fail "treat as raw HTML" + else return () + newline + blanklines <|> do st <- getState + if stateStrict st + then lookAhead (blockQuote <|> header) >> return "" + else pzero + return $ Para $ normalizeSpaces result + +plain :: GenParser Char ParserState Block +plain = many1 inline >>= return . Plain . normalizeSpaces + +-- +-- raw html +-- + +htmlElement :: GenParser Char ParserState [Char] +htmlElement = strictHtmlBlock <|> htmlBlockElement "html element" + +htmlBlock :: GenParser Char ParserState Block +htmlBlock = try $ do + failUnlessBeginningOfLine + first <- htmlElement + finalSpace <- many (oneOf spaceChars) + finalNewlines <- many newline + return $ RawHtml $ first ++ finalSpace ++ finalNewlines + +-- True if tag is self-closing +isSelfClosing :: [Char] -> Bool +isSelfClosing tag = + isSuffixOf "/>" $ filter (not . (`elem` " \n\t")) tag + +strictHtmlBlock :: GenParser Char ParserState [Char] +strictHtmlBlock = try $ do + tag <- anyHtmlBlockTag + let tag' = extractTagType tag + if isSelfClosing tag || tag' == "hr" + then return tag + else do contents <- many (notFollowedBy' (htmlEndTag tag') >> + (htmlElement <|> (count 1 anyChar))) + end <- htmlEndTag tag' + return $ tag ++ concat contents ++ end + +rawHtmlBlocks :: GenParser Char ParserState Block +rawHtmlBlocks = do + htmlBlocks <- many1 $ do (RawHtml blk) <- rawHtmlBlock + sps <- do sp1 <- many spaceChar + sp2 <- option "" (blankline >> return "\n") + sp3 <- many spaceChar + sp4 <- option "" blanklines + return $ sp1 ++ sp2 ++ sp3 ++ sp4 + -- note: we want raw html to be able to + -- precede a code block, when separated + -- by a blank line + return $ blk ++ sps + let combined = concat htmlBlocks + let combined' = if last combined == '\n' then init combined else combined + return $ RawHtml combined' + +-- +-- Tables +-- + +-- Parse a dashed line with optional trailing spaces; return its length +-- and the length including trailing space. +dashedLine :: Char + -> GenParser Char st (Int, Int) +dashedLine ch = do + dashes <- many1 (char ch) + sp <- many spaceChar + return $ (length dashes, length $ dashes ++ sp) + +-- Parse a table header with dashed lines of '-' preceded by +-- one line of text. +simpleTableHeader :: GenParser Char ParserState ([[Char]], [Alignment], [Int]) +simpleTableHeader = try $ do + rawContent <- anyLine + initSp <- nonindentSpaces + dashes <- many1 (dashedLine '-') + newline + let (lengths, lines') = unzip dashes + let indices = scanl (+) (length initSp) lines' + let rawHeads = tail $ splitByIndices (init indices) rawContent + let aligns = zipWith alignType (map (\a -> [a]) rawHeads) lengths + return (rawHeads, aligns, indices) + +-- Parse a table footer - dashed lines followed by blank line. +tableFooter :: GenParser Char ParserState [Char] +tableFooter = try $ nonindentSpaces >> many1 (dashedLine '-') >> blanklines + +-- Parse a table separator - dashed line. +tableSep :: GenParser Char ParserState String +tableSep = try $ nonindentSpaces >> many1 (dashedLine '-') >> string "\n" + +-- Parse a raw line and split it into chunks by indices. +rawTableLine :: [Int] + -> GenParser Char ParserState [String] +rawTableLine indices = do + notFollowedBy' (blanklines <|> tableFooter) + line <- many1Till anyChar newline + return $ map removeLeadingTrailingSpace $ tail $ + splitByIndices (init indices) line + +-- Parse a table line and return a list of lists of blocks (columns). +tableLine :: [Int] + -> GenParser Char ParserState [[Block]] +tableLine indices = rawTableLine indices >>= mapM (parseFromString (many plain)) + +-- Parse a multiline table row and return a list of blocks (columns). +multilineRow :: [Int] + -> GenParser Char ParserState [[Block]] +multilineRow indices = do + colLines <- many1 (rawTableLine indices) + optional blanklines + let cols = map unlines $ transpose colLines + mapM (parseFromString (many plain)) cols + +-- Calculate relative widths of table columns, based on indices +widthsFromIndices :: Int -- Number of columns on terminal + -> [Int] -- Indices + -> [Double] -- Fractional relative sizes of columns +widthsFromIndices _ [] = [] +widthsFromIndices numColumns indices = + let lengths = zipWith (-) indices (0:indices) + totLength = sum lengths + quotient = if totLength > numColumns + then fromIntegral totLength + else fromIntegral numColumns + fracs = map (\l -> (fromIntegral l) / quotient) lengths in + tail fracs + +-- Parses a table caption: inlines beginning with 'Table:' +-- and followed by blank lines. +tableCaption :: GenParser Char ParserState [Inline] +tableCaption = try $ do + nonindentSpaces + string "Table:" + result <- many1 inline + blanklines + return $ normalizeSpaces result + +-- Parse a table using 'headerParser', 'lineParser', and 'footerParser'. +tableWith :: GenParser Char ParserState ([[Char]], [Alignment], [Int]) + -> ([Int] -> GenParser Char ParserState [[Block]]) + -> GenParser Char ParserState end + -> GenParser Char ParserState Block +tableWith headerParser lineParser footerParser = try $ do + (rawHeads, aligns, indices) <- headerParser + lines' <- many1Till (lineParser indices) footerParser + caption <- option [] tableCaption + heads <- mapM (parseFromString (many plain)) rawHeads + state <- getState + let numColumns = stateColumns state + let widths = widthsFromIndices numColumns indices + return $ Table caption aligns widths heads lines' + +-- Parse a simple table with '---' header and one line per row. +simpleTable :: GenParser Char ParserState Block +simpleTable = tableWith simpleTableHeader tableLine blanklines + +-- Parse a multiline table: starts with row of '-' on top, then header +-- (which may be multiline), then the rows, +-- which may be multiline, separated by blank lines, and +-- ending with a footer (dashed line followed by blank line). +multilineTable :: GenParser Char ParserState Block +multilineTable = tableWith multilineTableHeader multilineRow tableFooter + +multilineTableHeader :: GenParser Char ParserState ([String], [Alignment], [Int]) +multilineTableHeader = try $ do + tableSep + rawContent <- many1 (notFollowedBy' tableSep >> many1Till anyChar newline) + initSp <- nonindentSpaces + dashes <- many1 (dashedLine '-') + newline + let (lengths, lines') = unzip dashes + let indices = scanl (+) (length initSp) lines' + let rawHeadsList = transpose $ map + (\ln -> tail $ splitByIndices (init indices) ln) + rawContent + let rawHeads = map (intercalate " ") rawHeadsList + let aligns = zipWith alignType rawHeadsList lengths + return ((map removeLeadingTrailingSpace rawHeads), aligns, indices) + +-- Returns an alignment type for a table, based on a list of strings +-- (the rows of the column header) and a number (the length of the +-- dashed line under the rows. +alignType :: [String] + -> Int + -> Alignment +alignType [] _ = AlignDefault +alignType strLst len = + let s = head $ sortBy (comparing length) $ + map removeTrailingSpace strLst + leftSpace = if null s then False else (s !! 0) `elem` " \t" + rightSpace = length s < len || (s !! (len - 1)) `elem` " \t" + in case (leftSpace, rightSpace) of + (True, False) -> AlignRight + (False, True) -> AlignLeft + (True, True) -> AlignCenter + (False, False) -> AlignDefault + +table :: GenParser Char ParserState Block +table = simpleTable <|> multilineTable "table" + +-- +-- inline +-- + +inline :: GenParser Char ParserState Inline +inline = choice inlineParsers "inline" + +inlineParsers :: [GenParser Char ParserState Inline] +inlineParsers = [ abbrev + , str + , smartPunctuation + , whitespace + , endline + , code + , charRef + , strong + , emph + , note + , inlineNote + , link +#ifdef _CITEPROC + , inlineCitation +#endif + , image + , math + , strikeout + , superscript + , subscript + , autoLink + , rawHtmlInline' + , rawLaTeXInline' + , escapedChar + , symbol + , ltSign ] + +inlineNonLink :: GenParser Char ParserState Inline +inlineNonLink = (choice $ + map (\parser -> try (parser >>= failIfLink)) inlineParsers) + "inline (non-link)" + +failIfLink :: Inline -> GenParser tok st Inline +failIfLink (Link _ _) = pzero +failIfLink elt = return elt + +escapedChar :: GenParser Char ParserState Inline +escapedChar = do + char '\\' + state <- getState + result <- option '\\' $ if stateStrict state + then oneOf "\\`*_{}[]()>#+-.!~" + else satisfy (not . isAlphaNum) + let result' = if result == ' ' + then '\160' -- '\ ' is a nonbreaking space + else result + return $ Str [result'] + +ltSign :: GenParser Char ParserState Inline +ltSign = do + st <- getState + if stateStrict st + then char '<' + else notFollowedBy' rawHtmlBlocks >> char '<' -- unless it starts html + return $ Str ['<'] + +specialCharsMinusLt :: [Char] +specialCharsMinusLt = filter (/= '<') specialChars + +symbol :: GenParser Char ParserState Inline +symbol = do + result <- oneOf specialCharsMinusLt + return $ Str [result] + +-- parses inline code, between n `s and n `s +code :: GenParser Char ParserState Inline +code = try $ do + starts <- many1 (char '`') + skipSpaces + result <- many1Till (many1 (noneOf "`\n") <|> many1 (char '`') <|> + (char '\n' >> return " ")) + (try (skipSpaces >> count (length starts) (char '`') >> + notFollowedBy (char '`'))) + return $ Code $ removeLeadingTrailingSpace $ concat result + +mathWord :: GenParser Char st [Char] +mathWord = many1 ((noneOf " \t\n\\$") <|> + (try (char '\\') >>~ notFollowedBy (char '$'))) + +math :: GenParser Char ParserState Inline +math = (mathDisplay >>= return . Math DisplayMath) + <|> (mathInline >>= return . Math InlineMath) + +mathDisplay :: GenParser Char ParserState String +mathDisplay = try $ do + failIfStrict + string "$$" + many1Till (noneOf "\n" <|> (newline >>~ notFollowedBy' blankline)) (try $ string "$$") + +mathInline :: GenParser Char ParserState String +mathInline = try $ do + failIfStrict + char '$' + notFollowedBy space + words' <- sepBy1 mathWord (many1 (spaceChar <|> (newline >>~ notFollowedBy' blankline))) + char '$' + notFollowedBy digit + return $ intercalate " " words' + +emph :: GenParser Char ParserState Inline +emph = ((enclosed (char '*') (notFollowedBy' strong >> char '*') inline) <|> + (enclosed (char '_') (notFollowedBy' strong >> char '_' >> + notFollowedBy alphaNum) inline)) >>= + return . Emph . normalizeSpaces + +strong :: GenParser Char ParserState Inline +strong = ((enclosed (string "**") (try $ string "**") inline) <|> + (enclosed (string "__") (try $ string "__") inline)) >>= + return . Strong . normalizeSpaces + +strikeout :: GenParser Char ParserState Inline +strikeout = failIfStrict >> enclosed (string "~~") (try $ string "~~") inline >>= + return . Strikeout . normalizeSpaces + +superscript :: GenParser Char ParserState Inline +superscript = failIfStrict >> enclosed (char '^') (char '^') + (notFollowedBy' whitespace >> inline) >>= -- may not contain Space + return . Superscript + +subscript :: GenParser Char ParserState Inline +subscript = failIfStrict >> enclosed (char '~') (char '~') + (notFollowedBy' whitespace >> inline) >>= -- may not contain Space + return . Subscript + +abbrev :: GenParser Char ParserState Inline +abbrev = failUnlessSmart >> + (assumedAbbrev <|> knownAbbrev) >>= return . Str . (++ ".\160") + +-- an string of letters followed by a period that does not end a sentence +-- is assumed to be an abbreviation. It is assumed that sentences don't +-- start with lowercase letters or numerals. +assumedAbbrev :: GenParser Char ParserState [Char] +assumedAbbrev = try $ do + result <- many1 $ satisfy isAlpha + string ". " + lookAhead $ satisfy (\x -> isLower x || isDigit x) + return result + +-- these strings are treated as abbreviations even if they are followed +-- by a capital letter (such as a name). +knownAbbrev :: GenParser Char ParserState [Char] +knownAbbrev = try $ do + result <- oneOfStrings [ "Mr", "Mrs", "Ms", "Capt", "Dr", "Prof", "Gen", + "Gov", "e.g", "i.e", "Sgt", "St", "vol", "vs", + "Sen", "Rep", "Pres", "Hon", "Rev" ] + string ". " + return result + +smartPunctuation :: GenParser Char ParserState Inline +smartPunctuation = failUnlessSmart >> + choice [ quoted, apostrophe, dash, ellipses ] + +apostrophe :: GenParser Char ParserState Inline +apostrophe = (char '\'' <|> char '\8217') >> return Apostrophe + +quoted :: GenParser Char ParserState Inline +quoted = doubleQuoted <|> singleQuoted + +withQuoteContext :: QuoteContext + -> (GenParser Char ParserState Inline) + -> GenParser Char ParserState Inline +withQuoteContext context parser = do + oldState <- getState + let oldQuoteContext = stateQuoteContext oldState + setState oldState { stateQuoteContext = context } + result <- parser + newState <- getState + setState newState { stateQuoteContext = oldQuoteContext } + return result + +singleQuoted :: GenParser Char ParserState Inline +singleQuoted = try $ do + singleQuoteStart + withQuoteContext InSingleQuote $ many1Till inline singleQuoteEnd >>= + return . Quoted SingleQuote . normalizeSpaces + +doubleQuoted :: GenParser Char ParserState Inline +doubleQuoted = try $ do + doubleQuoteStart + withQuoteContext InDoubleQuote $ many1Till inline doubleQuoteEnd >>= + return . Quoted DoubleQuote . normalizeSpaces + +failIfInQuoteContext :: QuoteContext -> GenParser tok ParserState () +failIfInQuoteContext context = do + st <- getState + if stateQuoteContext st == context + then fail "already inside quotes" + else return () + +singleQuoteStart :: GenParser Char ParserState Char +singleQuoteStart = do + failIfInQuoteContext InSingleQuote + char '\8216' <|> + (try $ do char '\'' + notFollowedBy (oneOf ")!],.;:-? \t\n") + notFollowedBy (try (oneOfStrings ["s","t","m","ve","ll","re"] >> + satisfy (not . isAlphaNum))) + -- possess/contraction + return '\'') + +singleQuoteEnd :: GenParser Char st Char +singleQuoteEnd = try $ do + char '\8217' <|> char '\'' + notFollowedBy alphaNum + return '\'' + +doubleQuoteStart :: GenParser Char ParserState Char +doubleQuoteStart = do + failIfInQuoteContext InDoubleQuote + char '\8220' <|> + (try $ do char '"' + notFollowedBy (oneOf " \t\n") + return '"') + +doubleQuoteEnd :: GenParser Char st Char +doubleQuoteEnd = char '\8221' <|> char '"' + +ellipses :: GenParser Char st Inline +ellipses = oneOfStrings ["...", " . . . ", ". . .", " . . ."] >> return Ellipses + +dash :: GenParser Char st Inline +dash = enDash <|> emDash + +enDash :: GenParser Char st Inline +enDash = try $ char '-' >> notFollowedBy (noneOf "0123456789") >> return EnDash + +emDash :: GenParser Char st Inline +emDash = oneOfStrings ["---", "--"] >> return EmDash + +whitespace :: GenParser Char ParserState Inline +whitespace = do + sps <- many1 (oneOf spaceChars) + if length sps >= 2 + then option Space (endline >> return LineBreak) + else return Space "whitespace" + +nonEndline :: GenParser Char st Char +nonEndline = satisfy (/='\n') + +strChar :: GenParser Char st Char +strChar = noneOf (specialChars ++ spaceChars ++ "\n") + +str :: GenParser Char st Inline +str = many1 strChar >>= return . Str + +-- an endline character that can be treated as a space, not a structural break +endline :: GenParser Char ParserState Inline +endline = try $ do + newline + notFollowedBy blankline + st <- getState + if stateStrict st + then do notFollowedBy emailBlockQuoteStart + notFollowedBy (char '#') -- atx header + else return () + -- parse potential list-starts differently if in a list: + if stateParserContext st == ListItemState + then notFollowedBy' (bulletListStart <|> + (anyOrderedListStart >> return ())) + else return () + return Space + +-- +-- links +-- + +-- a reference label for a link +reference :: GenParser Char ParserState [Inline] +reference = do notFollowedBy' (string "[^") -- footnote reference + result <- inlinesInBalancedBrackets inlineNonLink + return $ normalizeSpaces result + +-- source for a link, with optional title +source :: GenParser Char st (String, [Char]) +source = + (try $ charsInBalanced '(' ')' >>= parseFromString source') <|> + -- the following is needed for cases like: [ref](/url(a). + (enclosed (char '(') (char ')') anyChar >>= + parseFromString source') + +-- auxiliary function for source +source' :: GenParser Char st (String, [Char]) +source' = do + skipSpaces + let sourceURL excludes = many $ + optional (char '\\') >> (noneOf (' ':excludes) <|> (notFollowedBy' linkTitle >> char ' ')) + src <- try (char '<' >> sourceURL ">\t\n" >>~ char '>') <|> sourceURL "\t\n" + tit <- option "" linkTitle + skipSpaces + eof + return (intercalate "%20" $ words $ removeTrailingSpace src, tit) + +linkTitle :: GenParser Char st String +linkTitle = try $ do + (many1 spaceChar >> option '\n' newline) <|> newline + skipSpaces + delim <- oneOf "'\"" + tit <- manyTill (optional (char '\\') >> anyChar) + (try (char delim >> skipSpaces >> eof)) + return $ decodeCharacterReferences tit + +link :: GenParser Char ParserState Inline +link = try $ do + lab <- reference + src <- source <|> referenceLink lab + sanitize <- getState >>= return . stateSanitizeHTML + if sanitize && unsanitaryURI (fst src) + then fail "Unsanitary URI" + else return $ Link lab src + +-- a link like [this][ref] or [this][] or [this] +referenceLink :: [Inline] + -> GenParser Char ParserState (String, [Char]) +referenceLink lab = do + ref <- option [] (try (optional (char ' ') >> + optional (newline >> skipSpaces) >> reference)) + let ref' = if null ref then lab else ref + state <- getState + case lookupKeySrc (stateKeys state) ref' of + Nothing -> fail "no corresponding key" + Just target -> return target + +autoLink :: GenParser Char ParserState Inline +autoLink = try $ do + char '<' + src <- uri <|> (emailAddress >>= (return . ("mailto:" ++))) + char '>' + let src' = if "mailto:" `isPrefixOf` src + then drop 7 src + else src + st <- getState + let sanitize = stateSanitizeHTML st + if sanitize && unsanitaryURI src + then fail "Unsanitary URI" + else return $ if stateStrict st + then Link [Str src'] (src, "") + else Link [Code src'] (src, "") + +image :: GenParser Char ParserState Inline +image = try $ do + char '!' + (Link lab src) <- link + return $ Image lab src + +note :: GenParser Char ParserState Inline +note = try $ do + failIfStrict + ref <- noteMarker + state <- getState + let notes = stateNotes state + case lookup ref notes of + Nothing -> fail "note not found" + Just contents -> return $ Note contents + +inlineNote :: GenParser Char ParserState Inline +inlineNote = try $ do + failIfStrict + char '^' + contents <- inlinesInBalancedBrackets inline + return $ Note [Para contents] + +rawLaTeXInline' :: GenParser Char ParserState Inline +rawLaTeXInline' = do + failIfStrict + (rawConTeXtEnvironment' >>= return . TeX) + <|> (rawLaTeXEnvironment' >>= return . TeX) + <|> rawLaTeXInline + +rawConTeXtEnvironment' :: GenParser Char st String +rawConTeXtEnvironment' = try $ do + string "\\start" + completion <- inBrackets (letter <|> digit <|> spaceChar) + <|> (many1 letter) + contents <- manyTill (rawConTeXtEnvironment' <|> (count 1 anyChar)) + (try $ string "\\stop" >> string completion) + return $ "\\start" ++ completion ++ concat contents ++ "\\stop" ++ completion + +inBrackets :: (GenParser Char st Char) -> GenParser Char st String +inBrackets parser = do + char '[' + contents <- many parser + char ']' + return $ "[" ++ contents ++ "]" + +rawHtmlInline' :: GenParser Char ParserState Inline +rawHtmlInline' = do + st <- getState + result <- if stateStrict st + then choice [htmlBlockElement, anyHtmlTag, anyHtmlEndTag] + else anyHtmlInlineTag + return $ HtmlInline result + +#ifdef _CITEPROC +inlineCitation :: GenParser Char ParserState Inline +inlineCitation = try $ do + failIfStrict + cit <- citeMarker + let citations = readWith parseCitation defaultParserState cit + mr <- mapM chkCit citations + if catMaybes mr /= [] + then return $ Cite citations [] + else fail "no citation found" + +chkCit :: Target -> GenParser Char ParserState (Maybe Target) +chkCit t = do + st <- getState + case lookupKeySrc (stateKeys st) [Str $ fst t] of + Just _ -> fail "This is a link" + Nothing -> if elem (fst t) $ stateCitations st + then return $ Just t + else return $ Nothing + +citeMarker :: GenParser Char ParserState String +citeMarker = char '[' >> manyTill ( noneOf "\n" <|> (newline >>~ notFollowedBy blankline) ) (char ']') + +parseCitation :: GenParser Char ParserState [(String,String)] +parseCitation = try $ sepBy (parseLabel) (oneOf ";") + +parseLabel :: GenParser Char ParserState (String,String) +parseLabel = try $ do + res <- sepBy (skipSpaces >> optional newline >> skipSpaces >> many1 (noneOf "@;")) (oneOf "@") + case res of + [lab,loc] -> return (lab, loc) + [lab] -> return (lab, "" ) + _ -> return ("" , "" ) + +#endif diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs new file mode 100644 index 000000000..255054c10 --- /dev/null +++ b/src/Text/Pandoc/Readers/RST.hs @@ -0,0 +1,707 @@ +{- +Copyright (C) 2006-8 John MacFarlane + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Readers.RST + Copyright : Copyright (C) 2006-8 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane + Stability : alpha + Portability : portable + +Conversion from reStructuredText to 'Pandoc' document. +-} +module Text.Pandoc.Readers.RST ( + readRST + ) where +import Text.Pandoc.Definition +import Text.Pandoc.Shared +import Text.ParserCombinators.Parsec +import Control.Monad ( when ) +import Data.List ( findIndex, delete, intercalate ) + +-- | Parse reStructuredText string and return Pandoc document. +readRST :: ParserState -> String -> Pandoc +readRST state s = (readWith parseRST) state (s ++ "\n\n") + +-- +-- Constants and data structure definitions +--- + +bulletListMarkers :: [Char] +bulletListMarkers = "*+-" + +underlineChars :: [Char] +underlineChars = "!\"#$&'()*+,-./:;<=>?@[\\]^_`{|}~" + +-- treat these as potentially non-text when parsing inline: +specialChars :: [Char] +specialChars = "\\`|*_<>$:[-" + +-- +-- parsing documents +-- + +isHeader :: Int -> Block -> Bool +isHeader n (Header x _) = x == n +isHeader _ _ = False + +-- | Promote all headers in a list of blocks. (Part of +-- title transformation for RST.) +promoteHeaders :: Int -> [Block] -> [Block] +promoteHeaders num ((Header level text):rest) = + (Header (level - num) text):(promoteHeaders num rest) +promoteHeaders num (other:rest) = other:(promoteHeaders num rest) +promoteHeaders _ [] = [] + +-- | If list of blocks starts with a header (or a header and subheader) +-- of level that are not found elsewhere, return it as a title and +-- promote all the other headers. +titleTransform :: [Block] -- ^ list of blocks + -> ([Block], [Inline]) -- ^ modified list of blocks, title +titleTransform ((Header 1 head1):(Header 2 head2):rest) = -- title subtitle + if (any (isHeader 1) rest) || (any (isHeader 2) rest) + then ((Header 1 head1):(Header 2 head2):rest, []) + else ((promoteHeaders 2 rest), head1 ++ [Str ":", Space] ++ head2) +titleTransform ((Header 1 head1):rest) = -- title, no subtitle + if (any (isHeader 1) rest) + then ((Header 1 head1):rest, []) + else ((promoteHeaders 1 rest), head1) +titleTransform blocks = (blocks, []) + +parseRST :: GenParser Char ParserState Pandoc +parseRST = do + startPos <- getPosition + -- go through once just to get list of reference keys + -- docMinusKeys is the raw document with blanks where the keys were... + docMinusKeys <- manyTill (referenceKey <|> lineClump) eof >>= return . concat + setInput docMinusKeys + setPosition startPos + st <- getState + let reversedKeys = stateKeys st + updateState $ \s -> s { stateKeys = reverse reversedKeys } + -- now parse it for real... + blocks <- parseBlocks + let blocks' = filter (/= Null) blocks + state <- getState + let (blocks'', title) = if stateStandalone state + then titleTransform blocks' + else (blocks', []) + let authors = stateAuthors state + let date = stateDate state + let title' = if (null title) then (stateTitle state) else title + return $ Pandoc (Meta title' authors date) blocks'' + +-- +-- parsing blocks +-- + +parseBlocks :: GenParser Char ParserState [Block] +parseBlocks = manyTill block eof + +block :: GenParser Char ParserState Block +block = choice [ codeBlock + , rawHtmlBlock + , rawLaTeXBlock + , fieldList + , blockQuote + , imageBlock + , unknownDirective + , header + , hrule + , list + , lineBlock + , lhsCodeBlock + , para + , plain + , nullBlock ] "block" + +-- +-- field list +-- + +fieldListItem :: String -> GenParser Char st ([Char], [Char]) +fieldListItem indent = try $ do + string indent + char ':' + name <- many1 alphaNum + string ": " + skipSpaces + first <- manyTill anyChar newline + rest <- option "" $ try $ lookAhead (string indent >> oneOf " \t") >> + indentedBlock + return (name, intercalate " " (first:(lines rest))) + +fieldList :: GenParser Char ParserState Block +fieldList = try $ do + indent <- lookAhead $ many (oneOf " \t") + items <- many1 $ fieldListItem indent + blanklines + let authors = case lookup "Authors" items of + Just auth -> [auth] + Nothing -> map snd (filter (\(x,_) -> x == "Author") items) + if null authors + then return () + else updateState $ \st -> st {stateAuthors = authors} + case (lookup "Date" items) of + Just dat -> updateState $ \st -> st {stateDate = dat} + Nothing -> return () + case (lookup "Title" items) of + Just tit -> parseFromString (many inline) tit >>= + \t -> updateState $ \st -> st {stateTitle = t} + Nothing -> return () + let remaining = filter (\(x,_) -> (x /= "Authors") && (x /= "Author") && + (x /= "Date") && (x /= "Title")) items + if null remaining + then return Null + else do terms <- mapM (return . (:[]) . Str . fst) remaining + defs <- mapM (parseFromString (many block) . snd) + remaining + return $ DefinitionList $ zip terms defs + +-- +-- line block +-- + +lineBlockLine :: GenParser Char ParserState [Inline] +lineBlockLine = try $ do + string "| " + white <- many (oneOf " \t") + line <- manyTill inline newline + return $ (if null white then [] else [Str white]) ++ line ++ [LineBreak] + +lineBlock :: GenParser Char ParserState Block +lineBlock = try $ do + lines' <- many1 lineBlockLine + blanklines + return $ Para (concat lines') + +-- +-- paragraph block +-- + +para :: GenParser Char ParserState Block +para = paraBeforeCodeBlock <|> paraNormal "paragraph" + +codeBlockStart :: GenParser Char st Char +codeBlockStart = string "::" >> blankline >> blankline + +-- paragraph that ends in a :: starting a code block +paraBeforeCodeBlock :: GenParser Char ParserState Block +paraBeforeCodeBlock = try $ do + result <- many1 (notFollowedBy' codeBlockStart >> inline) + lookAhead (string "::") + return $ Para $ if last result == Space + then normalizeSpaces result + else (normalizeSpaces result) ++ [Str ":"] + +-- regular paragraph +paraNormal :: GenParser Char ParserState Block +paraNormal = try $ do + result <- many1 inline + newline + blanklines + return $ Para $ normalizeSpaces result + +plain :: GenParser Char ParserState Block +plain = many1 inline >>= return . Plain . normalizeSpaces + +-- +-- image block +-- + +imageBlock :: GenParser Char st Block +imageBlock = try $ do + string ".. image:: " + src <- manyTill anyChar newline + fields <- option [] $ do indent <- lookAhead $ many (oneOf " /t") + many1 $ fieldListItem indent + optional blanklines + case lookup "alt" fields of + Just alt -> return $ Plain [Image [Str alt] (src, alt)] + Nothing -> return $ Plain [Image [Str "image"] (src, "")] +-- +-- header blocks +-- + +header :: GenParser Char ParserState Block +header = doubleHeader <|> singleHeader "header" + +-- a header with lines on top and bottom +doubleHeader :: GenParser Char ParserState Block +doubleHeader = try $ do + c <- oneOf underlineChars + rest <- many (char c) -- the top line + let lenTop = length (c:rest) + skipSpaces + newline + txt <- many1 (notFollowedBy blankline >> inline) + pos <- getPosition + let len = (sourceColumn pos) - 1 + if (len > lenTop) then fail "title longer than border" else return () + blankline -- spaces and newline + count lenTop (char c) -- the bottom line + blanklines + -- check to see if we've had this kind of header before. + -- if so, get appropriate level. if not, add to list. + state <- getState + let headerTable = stateHeaderTable state + let (headerTable',level) = case findIndex (== DoubleHeader c) headerTable of + Just ind -> (headerTable, ind + 1) + Nothing -> (headerTable ++ [DoubleHeader c], (length headerTable) + 1) + setState (state { stateHeaderTable = headerTable' }) + return $ Header level (normalizeSpaces txt) + +-- a header with line on the bottom only +singleHeader :: GenParser Char ParserState Block +singleHeader = try $ do + notFollowedBy' whitespace + txt <- many1 (do {notFollowedBy blankline; inline}) + pos <- getPosition + let len = (sourceColumn pos) - 1 + blankline + c <- oneOf underlineChars + count (len - 1) (char c) + many (char c) + blanklines + state <- getState + let headerTable = stateHeaderTable state + let (headerTable',level) = case findIndex (== SingleHeader c) headerTable of + Just ind -> (headerTable, ind + 1) + Nothing -> (headerTable ++ [SingleHeader c], (length headerTable) + 1) + setState (state { stateHeaderTable = headerTable' }) + return $ Header level (normalizeSpaces txt) + +-- +-- hrule block +-- + +hrule :: GenParser Char st Block +hrule = try $ do + chr <- oneOf underlineChars + count 3 (char chr) + skipMany (char chr) + blankline + blanklines + return HorizontalRule + +-- +-- code blocks +-- + +-- read a line indented by a given string +indentedLine :: String -> GenParser Char st [Char] +indentedLine indents = try $ do + string indents + result <- manyTill anyChar newline + return $ result ++ "\n" + +-- two or more indented lines, possibly separated by blank lines. +-- any amount of indentation will work. +indentedBlock :: GenParser Char st [Char] +indentedBlock = do + indents <- lookAhead $ many1 (oneOf " \t") + lns <- many $ choice $ [ indentedLine indents, + try $ do b <- blanklines + l <- indentedLine indents + return (b ++ l) ] + optional blanklines + return $ concat lns + +codeBlock :: GenParser Char st Block +codeBlock = try $ do + codeBlockStart + result <- indentedBlock + return $ CodeBlock ("",[],[]) $ stripTrailingNewlines result + +lhsCodeBlock :: GenParser Char ParserState Block +lhsCodeBlock = try $ do + failUnlessLHS + pos <- getPosition + when (sourceColumn pos /= 1) $ fail "Not in first column" + lns <- many1 birdTrackLine + -- if (as is normal) there is always a space after >, drop it + let lns' = if all (\ln -> null ln || take 1 ln == " ") lns + then map (drop 1) lns + else lns + blanklines + return $ CodeBlock ("", ["sourceCode", "haskell"], []) $ intercalate "\n" lns' + +birdTrackLine :: GenParser Char st [Char] +birdTrackLine = do + char '>' + manyTill anyChar newline + +-- +-- raw html +-- + +rawHtmlBlock :: GenParser Char st Block +rawHtmlBlock = try $ string ".. raw:: html" >> blanklines >> + indentedBlock >>= return . RawHtml + +-- +-- raw latex +-- + +rawLaTeXBlock :: GenParser Char st Block +rawLaTeXBlock = try $ do + string ".. raw:: latex" + blanklines + result <- indentedBlock + return $ Para [(TeX result)] + +-- +-- block quotes +-- + +blockQuote :: GenParser Char ParserState Block +blockQuote = do + raw <- indentedBlock + -- parse the extracted block, which may contain various block elements: + contents <- parseFromString parseBlocks $ raw ++ "\n\n" + return $ BlockQuote contents + +-- +-- list blocks +-- + +list :: GenParser Char ParserState Block +list = choice [ bulletList, orderedList, definitionList ] "list" + +definitionListItem :: GenParser Char ParserState ([Inline], [Block]) +definitionListItem = try $ do + -- avoid capturing a directive or comment + notFollowedBy (try $ char '.' >> char '.') + term <- many1Till inline endline + raw <- indentedBlock + -- parse the extracted block, which may contain various block elements: + contents <- parseFromString parseBlocks $ raw ++ "\n\n" + return (normalizeSpaces term, contents) + +definitionList :: GenParser Char ParserState Block +definitionList = many1 definitionListItem >>= return . DefinitionList + +-- parses bullet list start and returns its length (inc. following whitespace) +bulletListStart :: GenParser Char st Int +bulletListStart = try $ do + notFollowedBy' hrule -- because hrules start out just like lists + marker <- oneOf bulletListMarkers + white <- many1 spaceChar + return $ length (marker:white) + +-- parses ordered list start and returns its length (inc following whitespace) +orderedListStart :: ListNumberStyle + -> ListNumberDelim + -> GenParser Char st Int +orderedListStart style delim = try $ do + (_, markerLen) <- withHorizDisplacement (orderedListMarker style delim) + white <- many1 spaceChar + return $ markerLen + length white + +-- parse a line of a list item +listLine :: Int -> GenParser Char ParserState [Char] +listLine markerLength = try $ do + notFollowedBy blankline + indentWith markerLength + line <- manyTill anyChar newline + return $ line ++ "\n" + +-- indent by specified number of spaces (or equiv. tabs) +indentWith :: Int -> GenParser Char ParserState [Char] +indentWith num = do + state <- getState + let tabStop = stateTabStop state + if (num < tabStop) + then count num (char ' ') + else choice [ try (count num (char ' ')), + (try (char '\t' >> count (num - tabStop) (char ' '))) ] + +-- parse raw text for one list item, excluding start marker and continuations +rawListItem :: GenParser Char ParserState Int + -> GenParser Char ParserState (Int, [Char]) +rawListItem start = try $ do + markerLength <- start + firstLine <- manyTill anyChar newline + restLines <- many (listLine markerLength) + return (markerLength, (firstLine ++ "\n" ++ (concat restLines))) + +-- continuation of a list item - indented and separated by blankline or +-- (in compact lists) endline. +-- Note: nested lists are parsed as continuations. +listContinuation :: Int -> GenParser Char ParserState [Char] +listContinuation markerLength = try $ do + blanks <- many1 blankline + result <- many1 (listLine markerLength) + return $ blanks ++ concat result + +listItem :: GenParser Char ParserState Int + -> GenParser Char ParserState [Block] +listItem start = try $ do + (markerLength, first) <- rawListItem start + rest <- many (listContinuation markerLength) + blanks <- choice [ try (many blankline >>~ lookAhead start), + many1 blankline ] -- whole list must end with blank. + -- parsing with ListItemState forces markers at beginning of lines to + -- count as list item markers, even if not separated by blank space. + -- see definition of "endline" + state <- getState + let oldContext = stateParserContext state + setState $ state {stateParserContext = ListItemState} + -- parse the extracted block, which may itself contain block elements + parsed <- parseFromString parseBlocks $ concat (first:rest) ++ blanks + updateState (\st -> st {stateParserContext = oldContext}) + return parsed + +orderedList :: GenParser Char ParserState Block +orderedList = try $ do + (start, style, delim) <- lookAhead (anyOrderedListMarker >>~ spaceChar) + items <- many1 (listItem (orderedListStart style delim)) + let items' = compactify items + return $ OrderedList (start, style, delim) items' + +bulletList :: GenParser Char ParserState Block +bulletList = many1 (listItem bulletListStart) >>= + return . BulletList . compactify + +-- +-- unknown directive (e.g. comment) +-- + +unknownDirective :: GenParser Char st Block +unknownDirective = try $ do + string ".." + notFollowedBy (noneOf " \t\n") + manyTill anyChar newline + many $ blanklines <|> (oneOf " \t" >> manyTill anyChar newline) + return Null + +-- +-- reference key +-- + +quotedReferenceName :: GenParser Char ParserState [Inline] +quotedReferenceName = try $ do + char '`' >> notFollowedBy (char '`') -- `` means inline code! + label' <- many1Till inline (char '`') + return label' + +unquotedReferenceName :: GenParser Char ParserState [Inline] +unquotedReferenceName = try $ do + label' <- many1Till inline (lookAhead $ char ':') + return label' + +isolated :: Char -> GenParser Char st Char +isolated ch = try $ char ch >>~ notFollowedBy (char ch) + +simpleReferenceName :: GenParser Char st [Inline] +simpleReferenceName = do + raw <- many1 (alphaNum <|> isolated '-' <|> isolated '.' <|> + (try $ char '_' >>~ lookAhead alphaNum)) + return [Str raw] + +referenceName :: GenParser Char ParserState [Inline] +referenceName = quotedReferenceName <|> + (try $ simpleReferenceName >>~ lookAhead (char ':')) <|> + unquotedReferenceName + +referenceKey :: GenParser Char ParserState [Char] +referenceKey = do + startPos <- getPosition + key <- choice [imageKey, anonymousKey, regularKey] + st <- getState + let oldkeys = stateKeys st + updateState $ \s -> s { stateKeys = key : oldkeys } + optional blanklines + endPos <- getPosition + -- return enough blanks to replace key + return $ replicate (sourceLine endPos - sourceLine startPos) '\n' + +targetURI :: GenParser Char st [Char] +targetURI = do + skipSpaces + optional newline + contents <- many1 (try (many spaceChar >> newline >> + many1 spaceChar >> noneOf " \t\n") <|> noneOf "\n") + blanklines + return contents + +imageKey :: GenParser Char ParserState ([Inline], (String, [Char])) +imageKey = try $ do + string ".. |" + ref <- manyTill inline (char '|') + skipSpaces + string "image::" + src <- targetURI + return (normalizeSpaces ref, (removeLeadingTrailingSpace src, "")) + +anonymousKey :: GenParser Char st ([Inline], (String, [Char])) +anonymousKey = try $ do + oneOfStrings [".. __:", "__"] + src <- targetURI + return ([Str "_"], (removeLeadingTrailingSpace src, "")) + +regularKey :: GenParser Char ParserState ([Inline], (String, [Char])) +regularKey = try $ do + string ".. _" + ref <- referenceName + char ':' + src <- targetURI + return (normalizeSpaces ref, (removeLeadingTrailingSpace src, "")) + + -- + -- inline + -- + +inline :: GenParser Char ParserState Inline +inline = choice [ link + , str + , whitespace + , endline + , strong + , emph + , code + , image + , hyphens + , superscript + , subscript + , escapedChar + , symbol ] "inline" + +hyphens :: GenParser Char ParserState Inline +hyphens = do + result <- many1 (char '-') + option Space endline + -- don't want to treat endline after hyphen or dash as a space + return $ Str result + +escapedChar :: GenParser Char st Inline +escapedChar = escaped anyChar + +symbol :: GenParser Char ParserState Inline +symbol = do + result <- oneOf specialChars + return $ Str [result] + +-- parses inline code, between codeStart and codeEnd +code :: GenParser Char ParserState Inline +code = try $ do + string "``" + result <- manyTill anyChar (try (string "``")) + return $ Code $ removeLeadingTrailingSpace $ intercalate " " $ lines result + +emph :: GenParser Char ParserState Inline +emph = enclosed (char '*') (char '*') inline >>= + return . Emph . normalizeSpaces + +strong :: GenParser Char ParserState Inline +strong = enclosed (string "**") (try $ string "**") inline >>= + return . Strong . normalizeSpaces + +interpreted :: [Char] -> GenParser Char st [Inline] +interpreted role = try $ do + optional $ try $ string "\\ " + result <- enclosed (string $ ":" ++ role ++ ":`") (char '`') anyChar + try (string "\\ ") <|> lookAhead (count 1 $ oneOf " \t\n") <|> (eof >> return "") + return [Str result] + +superscript :: GenParser Char ParserState Inline +superscript = interpreted "sup" >>= (return . Superscript) + +subscript :: GenParser Char ParserState Inline +subscript = interpreted "sub" >>= (return . Subscript) + +whitespace :: GenParser Char ParserState Inline +whitespace = many1 spaceChar >> return Space "whitespace" + +str :: GenParser Char ParserState Inline +str = many1 (noneOf (specialChars ++ "\t\n ")) >>= return . Str + +-- an endline character that can be treated as a space, not a structural break +endline :: GenParser Char ParserState Inline +endline = try $ do + newline + notFollowedBy blankline + -- parse potential list-starts at beginning of line differently in a list: + st <- getState + if (stateParserContext st) == ListItemState + then notFollowedBy (anyOrderedListMarker >> spaceChar) >> + notFollowedBy' bulletListStart + else return () + return Space + +-- +-- links +-- + +link :: GenParser Char ParserState Inline +link = choice [explicitLink, referenceLink, autoLink] "link" + +explicitLink :: GenParser Char ParserState Inline +explicitLink = try $ do + char '`' + notFollowedBy (char '`') -- `` marks start of inline code + label' <- manyTill (notFollowedBy (char '`') >> inline) + (try (spaces >> char '<')) + src <- manyTill (noneOf ">\n ") (char '>') + skipSpaces + string "`_" + return $ Link (normalizeSpaces label') (removeLeadingTrailingSpace src, "") + +referenceLink :: GenParser Char ParserState Inline +referenceLink = try $ do + label' <- (quotedReferenceName <|> simpleReferenceName) >>~ char '_' + key <- option label' (do{char '_'; return [Str "_"]}) -- anonymous link + state <- getState + let keyTable = stateKeys state + src <- case lookupKeySrc keyTable key of + Nothing -> fail "no corresponding key" + Just target -> return target + -- if anonymous link, remove first anon key so it won't be used again + let keyTable' = if (key == [Str "_"]) -- anonymous link? + then delete ([Str "_"], src) keyTable -- remove first anon key + else keyTable + setState $ state { stateKeys = keyTable' } + return $ Link (normalizeSpaces label') src + +autoURI :: GenParser Char ParserState Inline +autoURI = do + src <- uri + return $ Link [Str src] (src, "") + +autoEmail :: GenParser Char ParserState Inline +autoEmail = do + src <- emailAddress + return $ Link [Str src] ("mailto:" ++ src, "") + +autoLink :: GenParser Char ParserState Inline +autoLink = autoURI <|> autoEmail + +-- For now, we assume that all substitution references are for images. +image :: GenParser Char ParserState Inline +image = try $ do + char '|' + ref <- manyTill inline (char '|') + state <- getState + let keyTable = stateKeys state + src <- case lookupKeySrc keyTable ref of + Nothing -> fail "no corresponding key" + Just target -> return target + return $ Image (normalizeSpaces ref) src + diff --git a/src/Text/Pandoc/Readers/TeXMath.hs b/src/Text/Pandoc/Readers/TeXMath.hs new file mode 100644 index 000000000..04b0f3b8f --- /dev/null +++ b/src/Text/Pandoc/Readers/TeXMath.hs @@ -0,0 +1,233 @@ +{- +Copyright (C) 2007 John MacFarlane + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Readers.TeXMath + Copyright : Copyright (C) 2007 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane + Stability : alpha + Portability : portable + +Conversion of TeX math to a list of 'Pandoc' inline elements. +-} +module Text.Pandoc.Readers.TeXMath ( + readTeXMath + ) where + +import Text.ParserCombinators.Parsec +import Text.Pandoc.Definition + +-- | Converts a string of raw TeX math to a list of 'Pandoc' inlines. +readTeXMath :: String -> [Inline] +readTeXMath inp = case parse teXMath ("formula: " ++ inp) inp of + Left _ -> [Str inp] -- if unparseable, just include original + Right res -> res + +teXMath :: GenParser Char st [Inline] +teXMath = manyTill mathPart eof >>= return . concat + +mathPart :: GenParser Char st [Inline] +mathPart = whitespace <|> superscript <|> subscript <|> symbol <|> + argument <|> digits <|> letters <|> misc + +whitespace :: GenParser Char st [Inline] +whitespace = many1 space >> return [] + +symbol :: GenParser Char st [Inline] +symbol = try $ do + char '\\' + res <- many1 letter + case lookup res teXsymbols of + Just m -> return [Str m] + Nothing -> return [Str $ "\\" ++ res] + +argument :: GenParser Char st [Inline] +argument = try $ do + char '{' + res <- many mathPart + char '}' + return $ if null res + then [Str " "] + else [Str "{"] ++ concat res ++ [Str "}"] + +digits :: GenParser Char st [Inline] +digits = do + res <- many1 digit + return [Str res] + +letters :: GenParser Char st [Inline] +letters = do + res <- many1 letter + return [Emph [Str res]] + +misc :: GenParser Char st [Inline] +misc = do + res <- noneOf "}" + return [Str [res]] + +scriptArg :: GenParser Char st [Inline] +scriptArg = try $ do + (try (do{char '{'; r <- many mathPart; char '}'; return $ concat r})) + <|> symbol + <|> (do{c <- (letter <|> digit); return [Str [c]]}) + +superscript :: GenParser Char st [Inline] +superscript = try $ do + char '^' + arg <- scriptArg + return [Superscript arg] + +subscript :: GenParser Char st [Inline] +subscript = try $ do + char '_' + arg <- scriptArg + return [Subscript arg] + +withThinSpace :: String -> String +withThinSpace str = "\x2009" ++ str ++ "\x2009" + +teXsymbols :: [(String, String)] +teXsymbols = + [("alpha","\x3B1") + ,("beta", "\x3B2") + ,("chi", "\x3C7") + ,("delta", "\x3B4") + ,("Delta", "\x394") + ,("epsilon", "\x3B5") + ,("varepsilon", "\x25B") + ,("eta", "\x3B7") + ,("gamma", "\x3B3") + ,("Gamma", "\x393") + ,("iota", "\x3B9") + ,("kappa", "\x3BA") + ,("lambda", "\x3BB") + ,("Lambda", "\x39B") + ,("mu", "\x3BC") + ,("nu", "\x3BD") + ,("omega", "\x3C9") + ,("Omega", "\x3A9") + ,("phi", "\x3C6") + ,("varphi", "\x3D5") + ,("Phi", "\x3A6") + ,("pi", "\x3C0") + ,("Pi", "\x3A0") + ,("psi", "\x3C8") + ,("Psi", "\x3A8") + ,("rho", "\x3C1") + ,("sigma", "\x3C3") + ,("Sigma", "\x3A3") + ,("tau", "\x3C4") + ,("theta", "\x3B8") + ,("vartheta", "\x3D1") + ,("Theta", "\x398") + ,("upsilon", "\x3C5") + ,("xi", "\x3BE") + ,("Xi", "\x39E") + ,("zeta", "\x3B6") + ,("ne", "\x2260") + ,("lt", withThinSpace "<") + ,("le", withThinSpace "\x2264") + ,("leq", withThinSpace "\x2264") + ,("ge", withThinSpace "\x2265") + ,("geq", withThinSpace "\x2265") + ,("prec", withThinSpace "\x227A") + ,("succ", withThinSpace "\x227B") + ,("preceq", withThinSpace "\x2AAF") + ,("succeq", withThinSpace "\x2AB0") + ,("in", withThinSpace "\x2208") + ,("notin", withThinSpace "\x2209") + ,("subset", withThinSpace "\x2282") + ,("supset", withThinSpace "\x2283") + ,("subseteq", withThinSpace "\x2286") + ,("supseteq", withThinSpace "\x2287") + ,("equiv", withThinSpace "\x2261") + ,("cong", withThinSpace "\x2245") + ,("approx", withThinSpace "\x2248") + ,("propto", withThinSpace "\x221D") + ,("cdot", withThinSpace "\x22C5") + ,("star", withThinSpace "\x22C6") + ,("backslash", "\\") + ,("times", withThinSpace "\x00D7") + ,("divide", withThinSpace "\x00F7") + ,("circ", withThinSpace "\x2218") + ,("oplus", withThinSpace "\x2295") + ,("otimes", withThinSpace "\x2297") + ,("odot", withThinSpace "\x2299") + ,("sum", "\x2211") + ,("prod", "\x220F") + ,("wedge", withThinSpace "\x2227") + ,("bigwedge", withThinSpace "\x22C0") + ,("vee", withThinSpace "\x2228") + ,("bigvee", withThinSpace "\x22C1") + ,("cap", withThinSpace "\x2229") + ,("bigcap", withThinSpace "\x22C2") + ,("cup", withThinSpace "\x222A") + ,("bigcup", withThinSpace "\x22C3") + ,("neg", "\x00AC") + ,("implies", withThinSpace "\x21D2") + ,("iff", withThinSpace "\x21D4") + ,("forall", "\x2200") + ,("exists", "\x2203") + ,("bot", "\x22A5") + ,("top", "\x22A4") + ,("vdash", "\x22A2") + ,("models", withThinSpace "\x22A8") + ,("uparrow", "\x2191") + ,("downarrow", "\x2193") + ,("rightarrow", withThinSpace "\x2192") + ,("to", withThinSpace "\x2192") + ,("rightarrowtail", "\x21A3") + ,("twoheadrightarrow", withThinSpace "\x21A0") + ,("twoheadrightarrowtail", withThinSpace "\x2916") + ,("mapsto", withThinSpace "\x21A6") + ,("leftarrow", withThinSpace "\x2190") + ,("leftrightarrow", withThinSpace "\x2194") + ,("Rightarrow", withThinSpace "\x21D2") + ,("Leftarrow", withThinSpace "\x21D0") + ,("Leftrightarrow", withThinSpace "\x21D4") + ,("partial", "\x2202") + ,("nabla", "\x2207") + ,("pm", "\x00B1") + ,("emptyset", "\x2205") + ,("infty", "\x221E") + ,("aleph", "\x2135") + ,("ldots", "...") + ,("therefore", "\x2234") + ,("angle", "\x2220") + ,("quad", "\x00A0\x00A0") + ,("cdots", "\x22EF") + ,("vdots", "\x22EE") + ,("ddots", "\x22F1") + ,("diamond", "\x22C4") + ,("Box", "\x25A1") + ,("lfloor", "\x230A") + ,("rfloor", "\x230B") + ,("lceiling", "\x2308") + ,("rceiling", "\x2309") + ,("langle", "\x2329") + ,("rangle", "\x232A") + ,("{", "{") + ,("}", "}") + ,("[", "[") + ,("]", "]") + ,("|", "|") + ,("||", "||") + ] + diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs new file mode 100644 index 000000000..6854e5ae6 --- /dev/null +++ b/src/Text/Pandoc/Shared.hs @@ -0,0 +1,953 @@ +{-# LANGUAGE CPP #-} +{- +Copyright (C) 2006-8 John MacFarlane + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Shared + Copyright : Copyright (C) 2006-8 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane + Stability : alpha + Portability : portable + +Utility functions and definitions used by the various Pandoc modules. +-} +module Text.Pandoc.Shared ( + -- * List processing + splitBy, + splitByIndices, + substitute, + -- * Text processing + backslashEscapes, + escapeStringUsing, + stripTrailingNewlines, + removeLeadingTrailingSpace, + removeLeadingSpace, + removeTrailingSpace, + stripFirstAndLast, + camelCaseToHyphenated, + toRomanNumeral, + wrapped, + wrapIfNeeded, + wrappedTeX, + wrapTeXIfNeeded, + BlockWrapper (..), + wrappedBlocksToDoc, + -- * Parsing + (>>~), + anyLine, + many1Till, + notFollowedBy', + oneOfStrings, + spaceChar, + skipSpaces, + blankline, + blanklines, + enclosed, + stringAnyCase, + parseFromString, + lineClump, + charsInBalanced, + charsInBalanced', + romanNumeral, + emailAddress, + uri, + withHorizDisplacement, + nullBlock, + failIfStrict, + failUnlessLHS, + escaped, + anyOrderedListMarker, + orderedListMarker, + charRef, + readWith, + testStringWith, + ParserState (..), + defaultParserState, + HeaderType (..), + ParserContext (..), + QuoteContext (..), + NoteTable, + KeyTable, + lookupKeySrc, + refsMatch, + -- * Prettyprinting + hang', + prettyPandoc, + -- * Pandoc block and inline list processing + orderedListMarkers, + normalizeSpaces, + compactify, + Element (..), + hierarchicalize, + isHeaderBlock, + -- * Writer options + HTMLMathMethod (..), + ObfuscationMethod (..), + WriterOptions (..), + defaultWriterOptions, + -- * File handling + inDirectory + ) where + +import Text.Pandoc.Definition +import Text.ParserCombinators.Parsec +import Text.PrettyPrint.HughesPJ ( Doc, fsep, ($$), (<>), empty, isEmpty, text, nest ) +import qualified Text.PrettyPrint.HughesPJ as PP +import Text.Pandoc.CharacterReferences ( characterReference ) +import Data.Char ( toLower, toUpper, ord, isLower, isUpper ) +import Data.List ( find, isPrefixOf, intercalate ) +import Control.Monad ( join ) +import Network.URI ( parseURI, URI (..), isAllowedInURI ) +import System.Directory +import Prelude hiding ( putStrLn, writeFile, readFile, getContents ) +import System.IO.UTF8 + +-- +-- List processing +-- + +-- | Split list by groups of one or more sep. +splitBy :: (Eq a) => a -> [a] -> [[a]] +splitBy _ [] = [] +splitBy sep lst = + let (first, rest) = break (== sep) lst + rest' = dropWhile (== sep) rest + in first:(splitBy sep rest') + +-- | Split list into chunks divided at specified indices. +splitByIndices :: [Int] -> [a] -> [[a]] +splitByIndices [] lst = [lst] +splitByIndices (x:xs) lst = + let (first, rest) = splitAt x lst in + first:(splitByIndices (map (\y -> y - x) xs) rest) + +-- | Replace each occurrence of one sublist in a list with another. +substitute :: (Eq a) => [a] -> [a] -> [a] -> [a] +substitute _ _ [] = [] +substitute [] _ lst = lst +substitute target replacement lst = + if target `isPrefixOf` lst + then replacement ++ (substitute target replacement $ drop (length target) lst) + else (head lst):(substitute target replacement $ tail lst) + +-- +-- Text processing +-- + +-- | Returns an association list of backslash escapes for the +-- designated characters. +backslashEscapes :: [Char] -- ^ list of special characters to escape + -> [(Char, String)] +backslashEscapes = map (\ch -> (ch, ['\\',ch])) + +-- | Escape a string of characters, using an association list of +-- characters and strings. +escapeStringUsing :: [(Char, String)] -> String -> String +escapeStringUsing _ [] = "" +escapeStringUsing escapeTable (x:xs) = + case (lookup x escapeTable) of + Just str -> str ++ rest + Nothing -> x:rest + where rest = escapeStringUsing escapeTable xs + +-- | Strip trailing newlines from string. +stripTrailingNewlines :: String -> String +stripTrailingNewlines = reverse . dropWhile (== '\n') . reverse + +-- | Remove leading and trailing space (including newlines) from string. +removeLeadingTrailingSpace :: String -> String +removeLeadingTrailingSpace = removeLeadingSpace . removeTrailingSpace + +-- | Remove leading space (including newlines) from string. +removeLeadingSpace :: String -> String +removeLeadingSpace = dropWhile (`elem` " \n\t") + +-- | Remove trailing space (including newlines) from string. +removeTrailingSpace :: String -> String +removeTrailingSpace = reverse . removeLeadingSpace . reverse + +-- | Strip leading and trailing characters from string +stripFirstAndLast :: String -> String +stripFirstAndLast str = + drop 1 $ take ((length str) - 1) str + +-- | Change CamelCase word to hyphenated lowercase (e.g., camel-case). +camelCaseToHyphenated :: String -> String +camelCaseToHyphenated [] = "" +camelCaseToHyphenated (a:b:rest) | isLower a && isUpper b = + a:'-':(toLower b):(camelCaseToHyphenated rest) +camelCaseToHyphenated (a:rest) = (toLower a):(camelCaseToHyphenated rest) + +-- | Convert number < 4000 to uppercase roman numeral. +toRomanNumeral :: Int -> String +toRomanNumeral x = + if x >= 4000 || x < 0 + then "?" + else case x of + _ | x >= 1000 -> "M" ++ toRomanNumeral (x - 1000) + _ | x >= 900 -> "CM" ++ toRomanNumeral (x - 900) + _ | x >= 500 -> "D" ++ toRomanNumeral (x - 500) + _ | x >= 400 -> "CD" ++ toRomanNumeral (x - 400) + _ | x >= 100 -> "C" ++ toRomanNumeral (x - 100) + _ | x >= 90 -> "XC" ++ toRomanNumeral (x - 90) + _ | x >= 50 -> "L" ++ toRomanNumeral (x - 50) + _ | x >= 40 -> "XL" ++ toRomanNumeral (x - 40) + _ | x >= 10 -> "X" ++ toRomanNumeral (x - 10) + _ | x >= 9 -> "IX" ++ toRomanNumeral (x - 5) + _ | x >= 5 -> "V" ++ toRomanNumeral (x - 5) + _ | x >= 4 -> "IV" ++ toRomanNumeral (x - 4) + _ | x >= 1 -> "I" ++ toRomanNumeral (x - 1) + _ -> "" + +-- | Wrap inlines to line length. +wrapped :: Monad m => ([Inline] -> m Doc) -> [Inline] -> m Doc +wrapped listWriter sect = (mapM listWriter $ splitBy Space sect) >>= + return . fsep + +-- | Wrap inlines if the text wrap option is selected. +wrapIfNeeded :: Monad m => WriterOptions -> ([Inline] -> m Doc) -> + [Inline] -> m Doc +wrapIfNeeded opts = if writerWrapText opts + then wrapped + else ($) + +-- auxiliary function for wrappedTeX +isNote :: Inline -> Bool +isNote (Note _) = True +isNote _ = False + +-- | Wrap inlines to line length, treating footnotes in a way that +-- makes sense in LaTeX and ConTeXt. +wrappedTeX :: Monad m + => Bool + -> ([Inline] -> m Doc) + -> [Inline] + -> m Doc +wrappedTeX includePercent listWriter sect = do + let (firstpart, rest) = break isNote sect + firstpartWrapped <- wrapped listWriter firstpart + if null rest + then return firstpartWrapped + else do let (note:rest') = rest + let (rest1, rest2) = break (== Space) rest' + -- rest1 is whatever comes between the note and a Space. + -- if the note is followed directly by a Space, rest1 is null. + -- rest1 is printed after the note but before the line break, + -- to avoid spurious blank space the note and immediately + -- following punctuation. + rest1Out <- if null rest1 + then return empty + else listWriter rest1 + rest2Wrapped <- if null rest2 + then return empty + else wrappedTeX includePercent listWriter (tail rest2) + noteText <- listWriter [note] + return $ (firstpartWrapped <> if includePercent then PP.char '%' else empty) $$ + (noteText <> rest1Out) $$ + rest2Wrapped + +-- | Wrap inlines if the text wrap option is selected, specialized +-- for LaTeX and ConTeXt. +wrapTeXIfNeeded :: Monad m + => WriterOptions + -> Bool + -> ([Inline] -> m Doc) + -> [Inline] + -> m Doc +wrapTeXIfNeeded opts includePercent = if writerWrapText opts + then wrappedTeX includePercent + else ($) + +-- | Indicates whether block should be surrounded by blank lines (@Pad@) or not (@Reg@). +data BlockWrapper = Pad Doc | Reg Doc + +-- | Converts a list of wrapped blocks to a Doc, with appropriate spaces around blocks. +wrappedBlocksToDoc :: [BlockWrapper] -> Doc +wrappedBlocksToDoc = foldr addBlock empty + where addBlock (Pad d) accum | isEmpty accum = d + addBlock (Pad d) accum = d $$ text "" $$ accum + addBlock (Reg d) accum = d $$ accum + +-- +-- Parsing +-- + +-- | Like >>, but returns the operation on the left. +-- (Suggested by Tillmann Rendel on Haskell-cafe list.) +(>>~) :: (Monad m) => m a -> m b -> m a +a >>~ b = a >>= \x -> b >> return x + +-- | Parse any line of text +anyLine :: GenParser Char st [Char] +anyLine = manyTill anyChar newline + +-- | Like @manyTill@, but reads at least one item. +many1Till :: GenParser tok st a + -> GenParser tok st end + -> GenParser tok st [a] +many1Till p end = do + first <- p + rest <- manyTill p end + return (first:rest) + +-- | A more general form of @notFollowedBy@. This one allows any +-- type of parser to be specified, and succeeds only if that parser fails. +-- It does not consume any input. +notFollowedBy' :: Show b => GenParser a st b -> GenParser a st () +notFollowedBy' p = try $ join $ do a <- try p + return (unexpected (show a)) + <|> + return (return ()) +-- (This version due to Andrew Pimlott on the Haskell mailing list.) + +-- | Parses one of a list of strings (tried in order). +oneOfStrings :: [String] -> GenParser Char st String +oneOfStrings listOfStrings = choice $ map (try . string) listOfStrings + +-- | Parses a space or tab. +spaceChar :: CharParser st Char +spaceChar = char ' ' <|> char '\t' + +-- | Skips zero or more spaces or tabs. +skipSpaces :: GenParser Char st () +skipSpaces = skipMany spaceChar + +-- | Skips zero or more spaces or tabs, then reads a newline. +blankline :: GenParser Char st Char +blankline = try $ skipSpaces >> newline + +-- | Parses one or more blank lines and returns a string of newlines. +blanklines :: GenParser Char st [Char] +blanklines = many1 blankline + +-- | Parses material enclosed between start and end parsers. +enclosed :: GenParser Char st t -- ^ start parser + -> GenParser Char st end -- ^ end parser + -> GenParser Char st a -- ^ content parser (to be used repeatedly) + -> GenParser Char st [a] +enclosed start end parser = try $ + start >> notFollowedBy space >> many1Till parser end + +-- | Parse string, case insensitive. +stringAnyCase :: [Char] -> CharParser st String +stringAnyCase [] = string "" +stringAnyCase (x:xs) = do + firstChar <- char (toUpper x) <|> char (toLower x) + rest <- stringAnyCase xs + return (firstChar:rest) + +-- | Parse contents of 'str' using 'parser' and return result. +parseFromString :: GenParser tok st a -> [tok] -> GenParser tok st a +parseFromString parser str = do + oldPos <- getPosition + oldInput <- getInput + setInput str + result <- parser + setInput oldInput + setPosition oldPos + return result + +-- | Parse raw line block up to and including blank lines. +lineClump :: GenParser Char st String +lineClump = blanklines + <|> (many1 (notFollowedBy blankline >> anyLine) >>= return . unlines) + +-- | Parse a string of characters between an open character +-- and a close character, including text between balanced +-- pairs of open and close, which must be different. For example, +-- @charsInBalanced '(' ')'@ will parse "(hello (there))" +-- and return "hello (there)". Stop if a blank line is +-- encountered. +charsInBalanced :: Char -> Char -> GenParser Char st String +charsInBalanced open close = try $ do + char open + raw <- many $ (many1 (noneOf [open, close, '\n'])) + <|> (do res <- charsInBalanced open close + return $ [open] ++ res ++ [close]) + <|> try (string "\n" >>~ notFollowedBy' blanklines) + char close + return $ concat raw + +-- | Like @charsInBalanced@, but allow blank lines in the content. +charsInBalanced' :: Char -> Char -> GenParser Char st String +charsInBalanced' open close = try $ do + char open + raw <- many $ (many1 (noneOf [open, close])) + <|> (do res <- charsInBalanced' open close + return $ [open] ++ res ++ [close]) + char close + return $ concat raw + +-- Auxiliary functions for romanNumeral: + +lowercaseRomanDigits :: [Char] +lowercaseRomanDigits = ['i','v','x','l','c','d','m'] + +uppercaseRomanDigits :: [Char] +uppercaseRomanDigits = map toUpper lowercaseRomanDigits + +-- | Parses a roman numeral (uppercase or lowercase), returns number. +romanNumeral :: Bool -- ^ Uppercase if true + -> GenParser Char st Int +romanNumeral upperCase = do + let romanDigits = if upperCase + then uppercaseRomanDigits + else lowercaseRomanDigits + lookAhead $ oneOf romanDigits + let [one, five, ten, fifty, hundred, fivehundred, thousand] = + map char romanDigits + thousands <- many thousand >>= (return . (1000 *) . length) + ninehundreds <- option 0 $ try $ hundred >> thousand >> return 900 + fivehundreds <- many fivehundred >>= (return . (500 *) . length) + fourhundreds <- option 0 $ try $ hundred >> fivehundred >> return 400 + hundreds <- many hundred >>= (return . (100 *) . length) + nineties <- option 0 $ try $ ten >> hundred >> return 90 + fifties <- many fifty >>= (return . (50 *) . length) + forties <- option 0 $ try $ ten >> fifty >> return 40 + tens <- many ten >>= (return . (10 *) . length) + nines <- option 0 $ try $ one >> ten >> return 9 + fives <- many five >>= (return . (5 *) . length) + fours <- option 0 $ try $ one >> five >> return 4 + ones <- many one >>= (return . length) + let total = thousands + ninehundreds + fivehundreds + fourhundreds + + hundreds + nineties + fifties + forties + tens + nines + + fives + fours + ones + if total == 0 + then fail "not a roman numeral" + else return total + +-- Parsers for email addresses and URIs + +emailChar :: GenParser Char st Char +emailChar = alphaNum <|> oneOf "-+_." + +domainChar :: GenParser Char st Char +domainChar = alphaNum <|> char '-' + +domain :: GenParser Char st [Char] +domain = do + first <- many1 domainChar + dom <- many1 $ try (char '.' >> many1 domainChar ) + return $ intercalate "." (first:dom) + +-- | Parses an email address; returns string. +emailAddress :: GenParser Char st [Char] +emailAddress = try $ do + firstLetter <- alphaNum + restAddr <- many emailChar + let addr = firstLetter:restAddr + char '@' + dom <- domain + return $ addr ++ '@':dom + +-- | Parses a URI. +uri :: GenParser Char st String +uri = try $ do + str <- many1 $ satisfy isAllowedInURI + case parseURI str of + Just uri' -> if uriScheme uri' `elem` [ "http:", "https:", "ftp:", + "file:", "mailto:", + "news:", "telnet:" ] + then return $ show uri' + else fail "not a URI" + Nothing -> fail "not a URI" + +-- | Applies a parser, returns tuple of its results and its horizontal +-- displacement (the difference between the source column at the end +-- and the source column at the beginning). Vertical displacement +-- (source row) is ignored. +withHorizDisplacement :: GenParser Char st a -- ^ Parser to apply + -> GenParser Char st (a, Int) -- ^ (result, displacement) +withHorizDisplacement parser = do + pos1 <- getPosition + result <- parser + pos2 <- getPosition + return (result, sourceColumn pos2 - sourceColumn pos1) + +-- | Parses a character and returns 'Null' (so that the parser can move on +-- if it gets stuck). +nullBlock :: GenParser Char st Block +nullBlock = anyChar >> return Null + +-- | Fail if reader is in strict markdown syntax mode. +failIfStrict :: GenParser Char ParserState () +failIfStrict = do + state <- getState + if stateStrict state then fail "strict mode" else return () + +-- | Fail unless we're in literate haskell mode. +failUnlessLHS :: GenParser tok ParserState () +failUnlessLHS = do + state <- getState + if stateLiterateHaskell state then return () else fail "Literate haskell feature" + +-- | Parses backslash, then applies character parser. +escaped :: GenParser Char st Char -- ^ Parser for character to escape + -> GenParser Char st Inline +escaped parser = try $ do + char '\\' + result <- parser + return (Str [result]) + +-- | Parses an uppercase roman numeral and returns (UpperRoman, number). +upperRoman :: GenParser Char st (ListNumberStyle, Int) +upperRoman = do + num <- romanNumeral True + return (UpperRoman, num) + +-- | Parses a lowercase roman numeral and returns (LowerRoman, number). +lowerRoman :: GenParser Char st (ListNumberStyle, Int) +lowerRoman = do + num <- romanNumeral False + return (LowerRoman, num) + +-- | Parses a decimal numeral and returns (Decimal, number). +decimal :: GenParser Char st (ListNumberStyle, Int) +decimal = do + num <- many1 digit + return (Decimal, read num) + +-- | Parses a '#' returns (DefaultStyle, 1). +defaultNum :: GenParser Char st (ListNumberStyle, Int) +defaultNum = do + char '#' + return (DefaultStyle, 1) + +-- | Parses a lowercase letter and returns (LowerAlpha, number). +lowerAlpha :: GenParser Char st (ListNumberStyle, Int) +lowerAlpha = do + ch <- oneOf ['a'..'z'] + return (LowerAlpha, ord ch - ord 'a' + 1) + +-- | Parses an uppercase letter and returns (UpperAlpha, number). +upperAlpha :: GenParser Char st (ListNumberStyle, Int) +upperAlpha = do + ch <- oneOf ['A'..'Z'] + return (UpperAlpha, ord ch - ord 'A' + 1) + +-- | Parses a roman numeral i or I +romanOne :: GenParser Char st (ListNumberStyle, Int) +romanOne = (char 'i' >> return (LowerRoman, 1)) <|> + (char 'I' >> return (UpperRoman, 1)) + +-- | Parses an ordered list marker and returns list attributes. +anyOrderedListMarker :: GenParser Char st ListAttributes +anyOrderedListMarker = choice $ + [delimParser numParser | delimParser <- [inPeriod, inOneParen, inTwoParens], + numParser <- [decimal, defaultNum, romanOne, + lowerAlpha, lowerRoman, upperAlpha, upperRoman]] + +-- | Parses a list number (num) followed by a period, returns list attributes. +inPeriod :: GenParser Char st (ListNumberStyle, Int) + -> GenParser Char st ListAttributes +inPeriod num = try $ do + (style, start) <- num + char '.' + let delim = if style == DefaultStyle + then DefaultDelim + else Period + return (start, style, delim) + +-- | Parses a list number (num) followed by a paren, returns list attributes. +inOneParen :: GenParser Char st (ListNumberStyle, Int) + -> GenParser Char st ListAttributes +inOneParen num = try $ do + (style, start) <- num + char ')' + return (start, style, OneParen) + +-- | Parses a list number (num) enclosed in parens, returns list attributes. +inTwoParens :: GenParser Char st (ListNumberStyle, Int) + -> GenParser Char st ListAttributes +inTwoParens num = try $ do + char '(' + (style, start) <- num + char ')' + return (start, style, TwoParens) + +-- | Parses an ordered list marker with a given style and delimiter, +-- returns number. +orderedListMarker :: ListNumberStyle + -> ListNumberDelim + -> GenParser Char st Int +orderedListMarker style delim = do + let num = case style of + DefaultStyle -> decimal <|> defaultNum + Decimal -> decimal + UpperRoman -> upperRoman + LowerRoman -> lowerRoman + UpperAlpha -> upperAlpha + LowerAlpha -> lowerAlpha + let context = case delim of + DefaultDelim -> inPeriod + Period -> inPeriod + OneParen -> inOneParen + TwoParens -> inTwoParens + (start, _, _) <- context num + return start + +-- | Parses a character reference and returns a Str element. +charRef :: GenParser Char st Inline +charRef = do + c <- characterReference + return $ Str [c] + +-- | Parse a string with a given parser and state. +readWith :: GenParser Char ParserState a -- ^ parser + -> ParserState -- ^ initial state + -> String -- ^ input string + -> a +readWith parser state input = + case runParser parser state "source" input of + Left err -> error $ "\nError:\n" ++ show err + Right result -> result + +-- | Parse a string with @parser@ (for testing). +testStringWith :: (Show a) => GenParser Char ParserState a + -> String + -> IO () +testStringWith parser str = putStrLn $ show $ + readWith parser defaultParserState str + +-- | Parsing options. +data ParserState = ParserState + { stateParseRaw :: Bool, -- ^ Parse raw HTML and LaTeX? + stateParserContext :: ParserContext, -- ^ Inside list? + stateQuoteContext :: QuoteContext, -- ^ Inside quoted environment? + stateSanitizeHTML :: Bool, -- ^ Sanitize HTML? + stateKeys :: KeyTable, -- ^ List of reference keys +#ifdef _CITEPROC + stateCitations :: [String], -- ^ List of available citations +#endif + stateNotes :: NoteTable, -- ^ List of notes + stateTabStop :: Int, -- ^ Tab stop + stateStandalone :: Bool, -- ^ Parse bibliographic info? + stateTitle :: [Inline], -- ^ Title of document + stateAuthors :: [String], -- ^ Authors of document + stateDate :: String, -- ^ Date of document + stateStrict :: Bool, -- ^ Use strict markdown syntax? + stateSmart :: Bool, -- ^ Use smart typography? + stateLiterateHaskell :: Bool, -- ^ Treat input as literate haskell + stateColumns :: Int, -- ^ Number of columns in terminal + stateHeaderTable :: [HeaderType] -- ^ Ordered list of header types used + } + deriving Show + +defaultParserState :: ParserState +defaultParserState = + ParserState { stateParseRaw = False, + stateParserContext = NullState, + stateQuoteContext = NoQuote, + stateSanitizeHTML = False, + stateKeys = [], +#ifdef _CITEPROC + stateCitations = [], +#endif + stateNotes = [], + stateTabStop = 4, + stateStandalone = False, + stateTitle = [], + stateAuthors = [], + stateDate = [], + stateStrict = False, + stateSmart = False, + stateLiterateHaskell = False, + stateColumns = 80, + stateHeaderTable = [] } + +data HeaderType + = SingleHeader Char -- ^ Single line of characters underneath + | DoubleHeader Char -- ^ Lines of characters above and below + deriving (Eq, Show) + +data ParserContext + = ListItemState -- ^ Used when running parser on list item contents + | NullState -- ^ Default state + deriving (Eq, Show) + +data QuoteContext + = InSingleQuote -- ^ Used when parsing inside single quotes + | InDoubleQuote -- ^ Used when parsing inside double quotes + | NoQuote -- ^ Used when not parsing inside quotes + deriving (Eq, Show) + +type NoteTable = [(String, [Block])] + +type KeyTable = [([Inline], Target)] + +-- | Look up key in key table and return target object. +lookupKeySrc :: KeyTable -- ^ Key table + -> [Inline] -- ^ Key + -> Maybe Target +lookupKeySrc table key = case find (refsMatch key . fst) table of + Nothing -> Nothing + Just (_, src) -> Just src + +-- | Returns @True@ if keys match (case insensitive). +refsMatch :: [Inline] -> [Inline] -> Bool +refsMatch ((Str x):restx) ((Str y):resty) = + ((map toLower x) == (map toLower y)) && refsMatch restx resty +refsMatch ((Emph x):restx) ((Emph y):resty) = + refsMatch x y && refsMatch restx resty +refsMatch ((Strong x):restx) ((Strong y):resty) = + refsMatch x y && refsMatch restx resty +refsMatch ((Strikeout x):restx) ((Strikeout y):resty) = + refsMatch x y && refsMatch restx resty +refsMatch ((Superscript x):restx) ((Superscript y):resty) = + refsMatch x y && refsMatch restx resty +refsMatch ((Subscript x):restx) ((Subscript y):resty) = + refsMatch x y && refsMatch restx resty +refsMatch ((SmallCaps x):restx) ((SmallCaps y):resty) = + refsMatch x y && refsMatch restx resty +refsMatch ((Quoted t x):restx) ((Quoted u y):resty) = + t == u && refsMatch x y && refsMatch restx resty +refsMatch ((Code x):restx) ((Code y):resty) = + ((map toLower x) == (map toLower y)) && refsMatch restx resty +refsMatch ((Math t x):restx) ((Math u y):resty) = + ((map toLower x) == (map toLower y)) && t == u && refsMatch restx resty +refsMatch ((TeX x):restx) ((TeX y):resty) = + ((map toLower x) == (map toLower y)) && refsMatch restx resty +refsMatch ((HtmlInline x):restx) ((HtmlInline y):resty) = + ((map toLower x) == (map toLower y)) && refsMatch restx resty +refsMatch (x:restx) (y:resty) = (x == y) && refsMatch restx resty +refsMatch [] x = null x +refsMatch x [] = null x + +-- +-- Prettyprinting +-- + +-- | A version of hang that works like the version in pretty-1.0.0.0 +hang' :: Doc -> Int -> Doc -> Doc +hang' d1 n d2 = d1 $$ (nest n d2) + +-- | Indent string as a block. +indentBy :: Int -- ^ Number of spaces to indent the block + -> Int -- ^ Number of spaces (rel to block) to indent first line + -> String -- ^ Contents of block to indent + -> String +indentBy _ _ [] = "" +indentBy num first str = + let (firstLine:restLines) = lines str + firstLineIndent = num + first + in (replicate firstLineIndent ' ') ++ firstLine ++ "\n" ++ + (intercalate "\n" $ map ((replicate num ' ') ++ ) restLines) + +-- | Prettyprint list of Pandoc blocks elements. +prettyBlockList :: Int -- ^ Number of spaces to indent list of blocks + -> [Block] -- ^ List of blocks + -> String +prettyBlockList indent [] = indentBy indent 0 "[]" +prettyBlockList indent blocks = indentBy indent (-2) $ "[ " ++ + (intercalate "\n, " (map prettyBlock blocks)) ++ " ]" + +-- | Prettyprint Pandoc block element. +prettyBlock :: Block -> String +prettyBlock (BlockQuote blocks) = "BlockQuote\n " ++ + (prettyBlockList 2 blocks) +prettyBlock (OrderedList attribs blockLists) = + "OrderedList " ++ show attribs ++ "\n" ++ indentBy 2 0 ("[ " ++ + (intercalate ", " $ map (\blocks -> prettyBlockList 2 blocks) + blockLists)) ++ " ]" +prettyBlock (BulletList blockLists) = "BulletList\n" ++ + indentBy 2 0 ("[ " ++ (intercalate ", " + (map (\blocks -> prettyBlockList 2 blocks) blockLists))) ++ " ]" +prettyBlock (DefinitionList blockLists) = "DefinitionList\n" ++ + indentBy 2 0 ("[" ++ (intercalate ",\n" + (map (\(term, blocks) -> " (" ++ show term ++ ",\n" ++ + indentBy 1 2 (prettyBlockList 2 blocks) ++ " )") blockLists))) ++ " ]" +prettyBlock (Table caption aligns widths header rows) = + "Table " ++ show caption ++ " " ++ show aligns ++ " " ++ + show widths ++ "\n" ++ prettyRow header ++ " [\n" ++ + (intercalate ",\n" (map prettyRow rows)) ++ " ]" + where prettyRow cols = indentBy 2 0 ("[ " ++ (intercalate ", " + (map (\blocks -> prettyBlockList 2 blocks) + cols))) ++ " ]" +prettyBlock block = show block + +-- | Prettyprint Pandoc document. +prettyPandoc :: Pandoc -> String +prettyPandoc (Pandoc meta blocks) = "Pandoc " ++ "(" ++ show meta ++ + ")\n" ++ (prettyBlockList 0 blocks) ++ "\n" + +-- +-- Pandoc block and inline list processing +-- + +-- | Generate infinite lazy list of markers for an ordered list, +-- depending on list attributes. +orderedListMarkers :: (Int, ListNumberStyle, ListNumberDelim) -> [String] +orderedListMarkers (start, numstyle, numdelim) = + let singleton c = [c] + nums = case numstyle of + DefaultStyle -> map show [start..] + Decimal -> map show [start..] + UpperAlpha -> drop (start - 1) $ cycle $ + map singleton ['A'..'Z'] + LowerAlpha -> drop (start - 1) $ cycle $ + map singleton ['a'..'z'] + UpperRoman -> map toRomanNumeral [start..] + LowerRoman -> map (map toLower . toRomanNumeral) [start..] + inDelim str = case numdelim of + DefaultDelim -> str ++ "." + Period -> str ++ "." + OneParen -> str ++ ")" + TwoParens -> "(" ++ str ++ ")" + in map inDelim nums + +-- | Normalize a list of inline elements: remove leading and trailing +-- @Space@ elements, collapse double @Space@s into singles, and +-- remove empty Str elements. +normalizeSpaces :: [Inline] -> [Inline] +normalizeSpaces [] = [] +normalizeSpaces list = + let removeDoubles [] = [] + removeDoubles (Space:Space:rest) = removeDoubles (Space:rest) + removeDoubles (Space:(Str ""):Space:rest) = removeDoubles (Space:rest) + removeDoubles ((Str ""):rest) = removeDoubles rest + removeDoubles (x:rest) = x:(removeDoubles rest) + removeLeading (Space:xs) = removeLeading xs + removeLeading x = x + removeTrailing [] = [] + removeTrailing lst = if (last lst == Space) + then init lst + else lst + in removeLeading $ removeTrailing $ removeDoubles list + +-- | Change final list item from @Para@ to @Plain@ if the list should +-- be compact. +compactify :: [[Block]] -- ^ List of list items (each a list of blocks) + -> [[Block]] +compactify [] = [] +compactify items = + let final = last items + others = init items + in case last final of + Para a -> if all endsWithPlain others && not (null final) + then others ++ [init final ++ [Plain a]] + else items + _ -> items + +endsWithPlain :: [Block] -> Bool +endsWithPlain [] = False +endsWithPlain blocks = + case last blocks of + Plain _ -> True + (BulletList (x:xs)) -> endsWithPlain $ last (x:xs) + (OrderedList _ (x:xs)) -> endsWithPlain $ last (x:xs) + (DefinitionList (x:xs)) -> endsWithPlain $ last $ map snd (x:xs) + _ -> False + +-- | Data structure for defining hierarchical Pandoc documents +data Element = Blk Block + | Sec [Inline] [Element] deriving (Eq, Read, Show) + +-- | Returns @True@ on Header block with at least the specified level +headerAtLeast :: Int -> Block -> Bool +headerAtLeast level (Header x _) = x <= level +headerAtLeast _ _ = False + +-- | Convert list of Pandoc blocks into (hierarchical) list of Elements +hierarchicalize :: [Block] -> [Element] +hierarchicalize [] = [] +hierarchicalize (block:rest) = + case block of + (Header level title) -> + let (thisSection, rest') = break (headerAtLeast level) rest + in (Sec title (hierarchicalize thisSection)):(hierarchicalize rest') + x -> (Blk x):(hierarchicalize rest) + +-- | True if block is a Header block. +isHeaderBlock :: Block -> Bool +isHeaderBlock (Header _ _) = True +isHeaderBlock _ = False + +-- +-- Writer options +-- + +data HTMLMathMethod = PlainMath + | LaTeXMathML (Maybe String) -- url of LaTeXMathML.js + | JsMath (Maybe String) -- url of jsMath load script + | GladTeX + | MimeTeX String -- url of mimetex.cgi + deriving (Show, Read, Eq) + +-- | Methods for obfuscating email addresses in HTML. +data ObfuscationMethod = NoObfuscation + | ReferenceObfuscation + | JavascriptObfuscation + deriving (Show, Read, Eq) + +-- | Options for writers +data WriterOptions = WriterOptions + { writerStandalone :: Bool -- ^ Include header and footer + , writerHeader :: String -- ^ Header for the document + , writerTitlePrefix :: String -- ^ Prefix for HTML titles + , writerTabStop :: Int -- ^ Tabstop for conversion btw spaces and tabs + , writerTableOfContents :: Bool -- ^ Include table of contents + , writerS5 :: Bool -- ^ We're writing S5 + , writerHTMLMathMethod :: HTMLMathMethod -- ^ How to print math in HTML + , writerIgnoreNotes :: Bool -- ^ Ignore footnotes (used in making toc) + , writerIncremental :: Bool -- ^ Incremental S5 lists + , writerNumberSections :: Bool -- ^ Number sections in LaTeX + , writerIncludeBefore :: String -- ^ String to include before the body + , writerIncludeAfter :: String -- ^ String to include after the body + , writerStrictMarkdown :: Bool -- ^ Use strict markdown syntax + , writerReferenceLinks :: Bool -- ^ Use reference links in writing markdown, rst + , writerWrapText :: Bool -- ^ Wrap text to line length + , writerLiterateHaskell :: Bool -- ^ Write as literate haskell + , writerEmailObfuscation :: ObfuscationMethod -- ^ How to obfuscate emails + } deriving Show + +-- | Default writer options. +defaultWriterOptions :: WriterOptions +defaultWriterOptions = + WriterOptions { writerStandalone = False + , writerHeader = "" + , writerTitlePrefix = "" + , writerTabStop = 4 + , writerTableOfContents = False + , writerS5 = False + , writerHTMLMathMethod = PlainMath + , writerIgnoreNotes = False + , writerIncremental = False + , writerNumberSections = False + , writerIncludeBefore = "" + , writerIncludeAfter = "" + , writerStrictMarkdown = False + , writerReferenceLinks = False + , writerWrapText = True + , writerLiterateHaskell = False + , writerEmailObfuscation = JavascriptObfuscation + } + +-- +-- File handling +-- + +-- | Perform an IO action in a directory, returning to starting directory. +inDirectory :: FilePath -> IO a -> IO a +inDirectory path action = do + oldDir <- getCurrentDirectory + setCurrentDirectory path + result <- action + setCurrentDirectory oldDir + return result diff --git a/src/Text/Pandoc/TH.hs b/src/Text/Pandoc/TH.hs new file mode 100644 index 000000000..0dc5a6719 --- /dev/null +++ b/src/Text/Pandoc/TH.hs @@ -0,0 +1,65 @@ +{-# OPTIONS_GHC -fno-warn-orphans #-} +{- +Copyright (C) 2008 John MacFarlane + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.TH + Copyright : Copyright (C) 2006-8 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane + Stability : alpha + Portability : portable + +Template haskell functions used by Pandoc modules. +-} +module Text.Pandoc.TH ( + contentsOf, + binaryContentsOf, + makeZip + ) where + +import Language.Haskell.TH +import Language.Haskell.TH.Syntax (Lift (..)) +import qualified Data.ByteString as B +import Data.ByteString.Internal ( w2c ) +import Prelude hiding ( readFile ) +import System.IO.UTF8 +import Codec.Archive.Zip +import Text.Pandoc.Shared ( inDirectory ) + +-- | Insert contents of text file into a template. +contentsOf :: FilePath -> ExpQ +contentsOf p = lift =<< (runIO $ readFile p) + +-- | Insert contents of binary file into a template. +-- Note that @Data.ByteString.readFile@ uses binary mode on windows. +binaryContentsOf :: FilePath -> ExpQ +binaryContentsOf p = lift =<< (runIO $ B.readFile p) + +instance Lift B.ByteString where + lift x = return (LitE (StringL $ map w2c $ B.unpack x)) + +instance Lift Archive where + lift x = return (LitE (StringL $ show x )) + +-- | Construct zip file from files in a directory, and +-- insert into a template. +makeZip :: FilePath -> ExpQ +makeZip path = lift =<< (runIO $ inDirectory path $ addFilesToArchive [OptRecursive] emptyArchive ["."]) + diff --git a/src/Text/Pandoc/Writers/ConTeXt.hs b/src/Text/Pandoc/Writers/ConTeXt.hs new file mode 100644 index 000000000..014751968 --- /dev/null +++ b/src/Text/Pandoc/Writers/ConTeXt.hs @@ -0,0 +1,302 @@ +{- +Copyright (C) 2007-8 John MacFarlane + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Writers.ConTeXt + Copyright : Copyright (C) 2007-8 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane + Stability : alpha + Portability : portable + +Conversion of 'Pandoc' format into ConTeXt. +-} +module Text.Pandoc.Writers.ConTeXt ( writeConTeXt ) where +import Text.Pandoc.Definition +import Text.Pandoc.Shared +import Text.Printf ( printf ) +import Data.List ( isSuffixOf, intercalate ) +import Control.Monad.State +import Text.PrettyPrint.HughesPJ hiding ( Str ) + +data WriterState = + WriterState { stNextRef :: Int -- number of next URL reference + , stOrderedListLevel :: Int -- level of ordered list + , stOptions :: WriterOptions -- writer options + } + +orderedListStyles :: [[Char]] +orderedListStyles = cycle ["[n]","[a]", "[r]", "[g]"] + +-- | Convert Pandoc to ConTeXt. +writeConTeXt :: WriterOptions -> Pandoc -> String +writeConTeXt options document = + let defaultWriterState = WriterState { stNextRef = 1 + , stOrderedListLevel = 0 + , stOptions = options + } + in render $ + evalState (pandocToConTeXt options document) defaultWriterState + +pandocToConTeXt :: WriterOptions -> Pandoc -> State WriterState Doc +pandocToConTeXt options (Pandoc meta blocks) = do + main <- blockListToConTeXt blocks + let before = if null (writerIncludeBefore options) + then empty + else text $ writerIncludeBefore options + let after = if null (writerIncludeAfter options) + then empty + else text $ writerIncludeAfter options + let body = before $$ main $$ after + head' <- if writerStandalone options + then contextHeader options meta + else return empty + let toc = if writerTableOfContents options + then text "\\placecontent\n" + else empty + let foot = if writerStandalone options + then text "\\stoptext\n" + else empty + return $ head' $$ toc $$ body $$ foot + +-- | Insert bibliographic information into ConTeXt header. +contextHeader :: WriterOptions -- ^ Options, including ConTeXt header + -> Meta -- ^ Meta with bibliographic information + -> State WriterState Doc +contextHeader options (Meta title authors date) = do + titletext <- if null title + then return empty + else inlineListToConTeXt title + let authorstext = if null authors + then "" + else if length authors == 1 + then stringToConTeXt $ head authors + else stringToConTeXt $ (intercalate ", " $ + init authors) ++ " & " ++ last authors + let datetext = if date == "" + then "" + else stringToConTeXt date + let titleblock = text "\\doctitle{" <> titletext <> char '}' $$ + text ("\\author{" ++ authorstext ++ "}") $$ + text ("\\date{" ++ datetext ++ "}") + let header = text $ writerHeader options + return $ header $$ titleblock $$ text "\\starttext\n\\maketitle\n" + +-- escape things as needed for ConTeXt + +escapeCharForConTeXt :: Char -> String +escapeCharForConTeXt ch = + case ch of + '{' -> "\\letteropenbrace{}" + '}' -> "\\letterclosebrace{}" + '\\' -> "\\letterbackslash{}" + '$' -> "\\$" + '|' -> "\\letterbar{}" + '^' -> "\\letterhat{}" + '%' -> "\\%" + '~' -> "\\lettertilde{}" + '&' -> "\\&" + '#' -> "\\#" + '<' -> "\\letterless{}" + '>' -> "\\lettermore{}" + '_' -> "\\letterunderscore{}" + '\160' -> "~" + x -> [x] + +-- | Escape string for ConTeXt +stringToConTeXt :: String -> String +stringToConTeXt = concatMap escapeCharForConTeXt + +-- | Convert Pandoc block element to ConTeXt. +blockToConTeXt :: Block + -> State WriterState BlockWrapper +blockToConTeXt Null = return $ Reg empty +blockToConTeXt (Plain lst) = do + st <- get + let options = stOptions st + contents <- wrapTeXIfNeeded options False inlineListToConTeXt lst + return $ Reg contents +blockToConTeXt (Para lst) = do + st <- get + let options = stOptions st + contents <- wrapTeXIfNeeded options False inlineListToConTeXt lst + return $ Pad contents +blockToConTeXt (BlockQuote lst) = do + contents <- blockListToConTeXt lst + return $ Pad $ text "\\startblockquote" $$ contents $$ text "\\stopblockquote" +blockToConTeXt (CodeBlock _ str) = + return $ Reg $ text $ "\\starttyping\n" ++ str ++ "\n\\stoptyping\n" + -- \n because \stoptyping can't have anything after it, inc. } +blockToConTeXt (RawHtml _) = return $ Reg empty +blockToConTeXt (BulletList lst) = do + contents <- mapM listItemToConTeXt lst + return $ Pad $ text "\\startitemize" $$ vcat contents $$ text "\\stopitemize" +blockToConTeXt (OrderedList (start, style', delim) lst) = do + st <- get + let level = stOrderedListLevel st + put $ st {stOrderedListLevel = level + 1} + contents <- mapM listItemToConTeXt lst + put $ st {stOrderedListLevel = level} + let start' = if start == 1 then "" else "start=" ++ show start + let delim' = case delim of + DefaultDelim -> "" + Period -> "stopper=." + OneParen -> "stopper=)" + TwoParens -> "left=(,stopper=)" + let width = maximum $ map length $ take (length contents) + (orderedListMarkers (start, style', delim)) + let width' = (toEnum width + 1) / 2 + let width'' = if width' > (1.5 :: Double) + then "width=" ++ show width' ++ "em" + else "" + let specs2Items = filter (not . null) [start', delim', width''] + let specs2 = if null specs2Items + then "" + else "[" ++ intercalate "," specs2Items ++ "]" + let style'' = case style' of + DefaultStyle -> orderedListStyles !! level + Decimal -> "[n]" + LowerRoman -> "[r]" + UpperRoman -> "[R]" + LowerAlpha -> "[a]" + UpperAlpha -> "[A]" + let specs = style'' ++ specs2 + return $ Pad $ text ("\\startitemize" ++ specs) $$ vcat contents $$ + text "\\stopitemize" +blockToConTeXt (DefinitionList lst) = + mapM defListItemToConTeXt lst >>= return . Pad . wrappedBlocksToDoc +blockToConTeXt HorizontalRule = return $ Pad $ text "\\thinrule" +blockToConTeXt (Header level lst) = do + contents <- inlineListToConTeXt lst + st <- get + let opts = stOptions st + let base = if writerNumberSections opts then "section" else "subject" + return $ Pad $ if level >= 1 && level <= 5 + then char '\\' <> text (concat (replicate (level - 1) "sub")) <> + text base <> char '{' <> contents <> char '}' + else contents +blockToConTeXt (Table caption aligns widths heads rows) = do + let colWidths = map printDecimal widths + let colDescriptor colWidth alignment = (case alignment of + AlignLeft -> 'l' + AlignRight -> 'r' + AlignCenter -> 'c' + AlignDefault -> 'l'): + "p(" ++ colWidth ++ "\\textwidth)|" + let colDescriptors = "|" ++ (concat $ + zipWith colDescriptor colWidths aligns) + headers <- tableRowToConTeXt heads + captionText <- inlineListToConTeXt caption + let captionText' = if null caption then text "none" else captionText + rows' <- mapM tableRowToConTeXt rows + return $ Pad $ text "\\placetable[here]{" <> captionText' <> char '}' $$ + text "\\starttable[" <> text colDescriptors <> char ']' $$ + text "\\HL" $$ headers $$ text "\\HL" $$ + vcat rows' $$ text "\\HL\n\\stoptable" + +printDecimal :: Double -> String +printDecimal = printf "%.2f" + +tableRowToConTeXt :: [[Block]] -> State WriterState Doc +tableRowToConTeXt cols = do + cols' <- mapM blockListToConTeXt cols + return $ (vcat (map (text "\\NC " <>) cols')) $$ + text "\\NC\\AR" + +listItemToConTeXt :: [Block] -> State WriterState Doc +listItemToConTeXt list = blockListToConTeXt list >>= + return . (text "\\item" $$) . (nest 2) + +defListItemToConTeXt :: ([Inline], [Block]) -> State WriterState BlockWrapper +defListItemToConTeXt (term, def) = do + term' <- inlineListToConTeXt term + def' <- blockListToConTeXt def + return $ Pad $ text "\\startdescr{" <> term' <> char '}' $$ def' $$ text "\\stopdescr" + +-- | Convert list of block elements to ConTeXt. +blockListToConTeXt :: [Block] -> State WriterState Doc +blockListToConTeXt lst = mapM blockToConTeXt lst >>= return . wrappedBlocksToDoc + +-- | Convert list of inline elements to ConTeXt. +inlineListToConTeXt :: [Inline] -- ^ Inlines to convert + -> State WriterState Doc +inlineListToConTeXt lst = mapM inlineToConTeXt lst >>= return . hcat + +-- | Convert inline element to ConTeXt +inlineToConTeXt :: Inline -- ^ Inline to convert + -> State WriterState Doc +inlineToConTeXt (Emph lst) = do + contents <- inlineListToConTeXt lst + return $ text "{\\em " <> contents <> char '}' +inlineToConTeXt (Strong lst) = do + contents <- inlineListToConTeXt lst + return $ text "{\\bf " <> contents <> char '}' +inlineToConTeXt (Strikeout lst) = do + contents <- inlineListToConTeXt lst + return $ text "\\overstrikes{" <> contents <> char '}' +inlineToConTeXt (Superscript lst) = do + contents <- inlineListToConTeXt lst + return $ text "\\high{" <> contents <> char '}' +inlineToConTeXt (Subscript lst) = do + contents <- inlineListToConTeXt lst + return $ text "\\low{" <> contents <> char '}' +inlineToConTeXt (SmallCaps lst) = do + contents <- inlineListToConTeXt lst + return $ text "{\\sc " <> contents <> char '}' +inlineToConTeXt (Code str) = return $ text $ "\\type{" ++ str ++ "}" +inlineToConTeXt (Quoted SingleQuote lst) = do + contents <- inlineListToConTeXt lst + return $ text "\\quote{" <> contents <> char '}' +inlineToConTeXt (Quoted DoubleQuote lst) = do + contents <- inlineListToConTeXt lst + return $ text "\\quotation{" <> contents <> char '}' +inlineToConTeXt (Cite _ lst) = inlineListToConTeXt lst +inlineToConTeXt Apostrophe = return $ char '\'' +inlineToConTeXt EmDash = return $ text "---" +inlineToConTeXt EnDash = return $ text "--" +inlineToConTeXt Ellipses = return $ text "\\ldots{}" +inlineToConTeXt (Str str) = return $ text $ stringToConTeXt str +inlineToConTeXt (Math InlineMath str) = return $ char '$' <> text str <> char '$' +inlineToConTeXt (Math DisplayMath str) = return $ text "\\startformula " <> text str <> text " \\stopformula" +inlineToConTeXt (TeX str) = return $ text str +inlineToConTeXt (HtmlInline _) = return empty +inlineToConTeXt (LineBreak) = return $ text "\\crlf\n" +inlineToConTeXt Space = return $ char ' ' +inlineToConTeXt (Link [Code str] (src, tit)) = -- since ConTeXt has its own + inlineToConTeXt (Link [Str str] (src, tit)) -- way of printing links... +inlineToConTeXt (Link txt (src, _)) = do + st <- get + let next = stNextRef st + put $ st {stNextRef = next + 1} + let ref = show next + label <- inlineListToConTeXt txt + return $ text "\\useURL[" <> text ref <> text "][" <> text src <> + text "][][" <> label <> text "]\\from[" <> text ref <> char ']' +inlineToConTeXt (Image alternate (src, tit)) = do + alt <- inlineListToConTeXt alternate + return $ text "\\placefigure\n[]\n[fig:" <> alt <> text "]\n{" <> + text tit <> text "}\n{\\externalfigure[" <> text src <> text "]}" +inlineToConTeXt (Note contents) = do + contents' <- blockListToConTeXt contents + let rawnote = stripTrailingNewlines $ render contents' + -- note: a \n before } is needed when note ends with a \stoptyping + let optNewline = "\\stoptyping" `isSuffixOf` rawnote + return $ text "\\footnote{" <> + text rawnote <> (if optNewline then char '\n' else empty) <> char '}' + diff --git a/src/Text/Pandoc/Writers/Docbook.hs b/src/Text/Pandoc/Writers/Docbook.hs new file mode 100644 index 000000000..3e535a87e --- /dev/null +++ b/src/Text/Pandoc/Writers/Docbook.hs @@ -0,0 +1,262 @@ +{- +Copyright (C) 2006-7 John MacFarlane + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Writers.Docbook + Copyright : Copyright (C) 2006-7 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane + Stability : alpha + Portability : portable + +Conversion of 'Pandoc' documents to Docbook XML. +-} +module Text.Pandoc.Writers.Docbook ( writeDocbook) where +import Text.Pandoc.Definition +import Text.Pandoc.XML +import Text.Pandoc.Shared +import Text.Pandoc.Readers.TeXMath +import Data.List ( isPrefixOf, drop, intercalate ) +import Text.PrettyPrint.HughesPJ hiding ( Str ) + +-- | Convert list of authors to a docbook section +authorToDocbook :: [Char] -> Doc +authorToDocbook name = inTagsIndented "author" $ + if ',' `elem` name + then -- last name first + let (lastname, rest) = break (==',') name + firstname = removeLeadingSpace rest in + inTagsSimple "firstname" (text $ escapeStringForXML firstname) <> + inTagsSimple "surname" (text $ escapeStringForXML lastname) + else -- last name last + let namewords = words name + lengthname = length namewords + (firstname, lastname) = case lengthname of + 0 -> ("","") + 1 -> ("", name) + n -> (intercalate " " (take (n-1) namewords), last namewords) + in inTagsSimple "firstname" (text $ escapeStringForXML firstname) $$ + inTagsSimple "surname" (text $ escapeStringForXML lastname) + +-- | Convert Pandoc document to string in Docbook format. +writeDocbook :: WriterOptions -> Pandoc -> String +writeDocbook opts (Pandoc (Meta title authors date) blocks) = + let head' = if writerStandalone opts + then text (writerHeader opts) + else empty + meta = if writerStandalone opts + then inTagsIndented "articleinfo" $ + (inTagsSimple "title" (wrap opts title)) $$ + (vcat (map authorToDocbook authors)) $$ + (inTagsSimple "date" (text $ escapeStringForXML date)) + else empty + elements = hierarchicalize blocks + before = writerIncludeBefore opts + after = writerIncludeAfter opts + body = (if null before then empty else text before) $$ + vcat (map (elementToDocbook opts) elements) $$ + (if null after then empty else text after) + body' = if writerStandalone opts + then inTagsIndented "article" (meta $$ body) + else body + in render $ head' $$ body' $$ text "" + +-- | Convert an Element to Docbook. +elementToDocbook :: WriterOptions -> Element -> Doc +elementToDocbook opts (Blk block) = blockToDocbook opts block +elementToDocbook opts (Sec title elements) = + -- Docbook doesn't allow sections with no content, so insert some if needed + let elements' = if null elements + then [Blk (Para [])] + else elements + in inTagsIndented "section" $ + inTagsSimple "title" (wrap opts title) $$ + vcat (map (elementToDocbook opts) elements') + +-- | Convert a list of Pandoc blocks to Docbook. +blocksToDocbook :: WriterOptions -> [Block] -> Doc +blocksToDocbook opts = vcat . map (blockToDocbook opts) + +-- | Auxiliary function to convert Plain block to Para. +plainToPara :: Block -> Block +plainToPara (Plain x) = Para x +plainToPara x = x + +-- | Convert a list of pairs of terms and definitions into a list of +-- Docbook varlistentrys. +deflistItemsToDocbook :: WriterOptions -> [([Inline],[Block])] -> Doc +deflistItemsToDocbook opts items = + vcat $ map (\(term, def) -> deflistItemToDocbook opts term def) items + +-- | Convert a term and a list of blocks into a Docbook varlistentry. +deflistItemToDocbook :: WriterOptions -> [Inline] -> [Block] -> Doc +deflistItemToDocbook opts term def = + let def' = map plainToPara def + in inTagsIndented "varlistentry" $ + inTagsIndented "term" (inlinesToDocbook opts term) $$ + inTagsIndented "listitem" (blocksToDocbook opts def') + +-- | Convert a list of lists of blocks to a list of Docbook list items. +listItemsToDocbook :: WriterOptions -> [[Block]] -> Doc +listItemsToDocbook opts items = vcat $ map (listItemToDocbook opts) items + +-- | Convert a list of blocks into a Docbook list item. +listItemToDocbook :: WriterOptions -> [Block] -> Doc +listItemToDocbook opts item = + inTagsIndented "listitem" $ blocksToDocbook opts $ map plainToPara item + +-- | Convert a Pandoc block element to Docbook. +blockToDocbook :: WriterOptions -> Block -> Doc +blockToDocbook _ Null = empty +blockToDocbook _ (Header _ _) = empty -- should not occur after hierarchicalize +blockToDocbook opts (Plain lst) = wrap opts lst +blockToDocbook opts (Para lst) = inTagsIndented "para" $ wrap opts lst +blockToDocbook opts (BlockQuote blocks) = + inTagsIndented "blockquote" $ blocksToDocbook opts blocks +blockToDocbook _ (CodeBlock _ str) = + text "\n" <> text (escapeStringForXML str) <> text "\n" +blockToDocbook opts (BulletList lst) = + inTagsIndented "itemizedlist" $ listItemsToDocbook opts lst +blockToDocbook _ (OrderedList _ []) = empty +blockToDocbook opts (OrderedList (start, numstyle, _) (first:rest)) = + let attribs = case numstyle of + DefaultStyle -> [] + Decimal -> [("numeration", "arabic")] + UpperAlpha -> [("numeration", "upperalpha")] + LowerAlpha -> [("numeration", "loweralpha")] + UpperRoman -> [("numeration", "upperroman")] + LowerRoman -> [("numeration", "lowerroman")] + items = if start == 1 + then listItemsToDocbook opts (first:rest) + else (inTags True "listitem" [("override",show start)] + (blocksToDocbook opts $ map plainToPara first)) $$ + listItemsToDocbook opts rest + in inTags True "orderedlist" attribs items +blockToDocbook opts (DefinitionList lst) = + inTagsIndented "variablelist" $ deflistItemsToDocbook opts lst +blockToDocbook _ (RawHtml str) = text str -- raw XML block +blockToDocbook _ HorizontalRule = empty -- not semantic +blockToDocbook opts (Table caption aligns widths headers rows) = + let alignStrings = map alignmentToString aligns + captionDoc = if null caption + then empty + else inTagsIndented "caption" + (inlinesToDocbook opts caption) + tableType = if isEmpty captionDoc then "informaltable" else "table" + in inTagsIndented tableType $ captionDoc $$ + (colHeadsToDocbook opts alignStrings widths headers) $$ + (vcat $ map (tableRowToDocbook opts alignStrings) rows) + +colHeadsToDocbook :: WriterOptions + -> [[Char]] + -> [Double] + -> [[Block]] + -> Doc +colHeadsToDocbook opts alignStrings widths headers = + let heads = zipWith3 (\align width item -> + tableItemToDocbook opts "th" align width item) + alignStrings widths headers + in inTagsIndented "tr" $ vcat heads + +alignmentToString :: Alignment -> [Char] +alignmentToString alignment = case alignment of + AlignLeft -> "left" + AlignRight -> "right" + AlignCenter -> "center" + AlignDefault -> "left" + +tableRowToDocbook :: WriterOptions -> [[Char]] -> [[Block]] -> Doc +tableRowToDocbook opts aligns cols = inTagsIndented "tr" $ + vcat $ zipWith3 (tableItemToDocbook opts "td") aligns (repeat 0) cols + +tableItemToDocbook :: WriterOptions + -> [Char] + -> [Char] + -> Double + -> [Block] + -> Doc +tableItemToDocbook opts tag align width item = + let attrib = [("align", align)] ++ + if width /= 0 + then [("style", "{width: " ++ + show (truncate (100*width) :: Integer) ++ "%;}")] + else [] + in inTags True tag attrib $ vcat $ map (blockToDocbook opts) item + +-- | Take list of inline elements and return wrapped doc. +wrap :: WriterOptions -> [Inline] -> Doc +wrap opts lst = if writerWrapText opts + then fsep $ map (inlinesToDocbook opts) (splitBy Space lst) + else inlinesToDocbook opts lst + +-- | Convert a list of inline elements to Docbook. +inlinesToDocbook :: WriterOptions -> [Inline] -> Doc +inlinesToDocbook opts lst = hcat $ map (inlineToDocbook opts) lst + +-- | Convert an inline element to Docbook. +inlineToDocbook :: WriterOptions -> Inline -> Doc +inlineToDocbook _ (Str str) = text $ escapeStringForXML str +inlineToDocbook opts (Emph lst) = + inTagsSimple "emphasis" $ inlinesToDocbook opts lst +inlineToDocbook opts (Strong lst) = + inTags False "emphasis" [("role", "strong")] $ inlinesToDocbook opts lst +inlineToDocbook opts (Strikeout lst) = + inTags False "emphasis" [("role", "strikethrough")] $ + inlinesToDocbook opts lst +inlineToDocbook opts (Superscript lst) = + inTagsSimple "superscript" $ inlinesToDocbook opts lst +inlineToDocbook opts (Subscript lst) = + inTagsSimple "subscript" $ inlinesToDocbook opts lst +inlineToDocbook opts (SmallCaps lst) = + inTags False "emphasis" [("role", "smallcaps")] $ + inlinesToDocbook opts lst +inlineToDocbook opts (Quoted _ lst) = + inTagsSimple "quote" $ inlinesToDocbook opts lst +inlineToDocbook opts (Cite _ lst) = + inlinesToDocbook opts lst +inlineToDocbook _ Apostrophe = char '\'' +inlineToDocbook _ Ellipses = text "…" +inlineToDocbook _ EmDash = text "—" +inlineToDocbook _ EnDash = text "–" +inlineToDocbook _ (Code str) = + inTagsSimple "literal" $ text (escapeStringForXML str) +inlineToDocbook opts (Math _ str) = inlinesToDocbook opts $ readTeXMath str +inlineToDocbook _ (TeX _) = empty +inlineToDocbook _ (HtmlInline _) = empty +inlineToDocbook _ LineBreak = text $ "" +inlineToDocbook _ Space = char ' ' +inlineToDocbook opts (Link txt (src, _)) = + if isPrefixOf "mailto:" src + then let src' = drop 7 src + emailLink = inTagsSimple "email" $ text $ + escapeStringForXML $ src' + in if txt == [Code src'] + then emailLink + else inlinesToDocbook opts txt <+> char '(' <> emailLink <> + char ')' + else inTags False "ulink" [("url", src)] $ inlinesToDocbook opts txt +inlineToDocbook _ (Image _ (src, tit)) = + let titleDoc = if null tit + then empty + else inTagsIndented "objectinfo" $ + inTagsIndented "title" (text $ escapeStringForXML tit) + in inTagsIndented "inlinemediaobject" $ inTagsIndented "imageobject" $ + titleDoc $$ selfClosingTag "imagedata" [("fileref", src)] +inlineToDocbook opts (Note contents) = + inTagsIndented "footnote" $ blocksToDocbook opts contents diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs new file mode 100644 index 000000000..fb7320e92 --- /dev/null +++ b/src/Text/Pandoc/Writers/HTML.hs @@ -0,0 +1,557 @@ +{-# OPTIONS_GHC -fno-warn-deprecations #-} +{- +Copyright (C) 2006-8 John MacFarlane + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Writers.HTML + Copyright : Copyright (C) 2006-8 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane + Stability : alpha + Portability : portable + +Conversion of 'Pandoc' documents to HTML. +-} +module Text.Pandoc.Writers.HTML ( writeHtml , writeHtmlString ) where +import Text.Pandoc.Definition +import Text.Pandoc.LaTeXMathML +import Text.Pandoc.CharacterReferences ( decodeCharacterReferences ) +import Text.Pandoc.Shared +import Text.Pandoc.Readers.TeXMath +import Text.Pandoc.Highlighting ( highlightHtml, defaultHighlightingCss ) +import Numeric ( showHex ) +import Data.Char ( ord, toLower, isAlpha ) +import Data.List ( isPrefixOf, intercalate ) +import qualified Data.Set as S +import Control.Monad.State +import Text.XHtml.Transitional hiding ( stringToHtml ) + +data WriterState = WriterState + { stNotes :: [Html] -- ^ List of notes + , stIds :: [String] -- ^ List of header identifiers + , stMath :: Bool -- ^ Math is used in document + , stCSS :: S.Set String -- ^ CSS to include in header + } deriving Show + +defaultWriterState :: WriterState +defaultWriterState = WriterState {stNotes= [], stIds = [], + stMath = False, stCSS = S.empty} + +-- Helpers to render HTML with the appropriate function. + +render :: (HTML html) => WriterOptions -> html -> String +render opts = if writerWrapText opts then renderHtml else showHtml + +renderFragment :: (HTML html) => WriterOptions -> html -> String +renderFragment opts = if writerWrapText opts + then renderHtmlFragment + else showHtmlFragment + +-- | Slightly modified version of Text.XHtml's stringToHtml. +-- Only uses numerical entities for 0xff and greater. +-- Adds  . +stringToHtml :: String -> Html +stringToHtml = primHtml . concatMap fixChar + where + fixChar '<' = "<" + fixChar '>' = ">" + fixChar '&' = "&" + fixChar '"' = """ + fixChar '\160' = " " + fixChar c | ord c < 0xff = [c] + fixChar c = "&#" ++ show (ord c) ++ ";" + +-- | Convert Pandoc document to Html string. +writeHtmlString :: WriterOptions -> Pandoc -> String +writeHtmlString opts = + if writerStandalone opts + then render opts . writeHtml opts + else renderFragment opts . writeHtml opts + +-- | Convert Pandoc document to Html structure. +writeHtml :: WriterOptions -> Pandoc -> Html +writeHtml opts (Pandoc (Meta tit authors date) blocks) = + let titlePrefix = writerTitlePrefix opts + topTitle = evalState (inlineListToHtml opts tit) defaultWriterState + topTitle' = if null titlePrefix + then topTitle + else if null tit + then stringToHtml titlePrefix + else titlePrefix +++ " - " +++ topTitle + metadata = thetitle topTitle' +++ + meta ! [httpequiv "Content-Type", + content "text/html; charset=UTF-8"] +++ + meta ! [name "generator", content "pandoc"] +++ + (toHtmlFromList $ + map (\a -> meta ! [name "author", content a]) authors) +++ + (if null date + then noHtml + else meta ! [name "date", content date]) + titleHeader = if writerStandalone opts && not (null tit) && + not (writerS5 opts) + then h1 ! [theclass "title"] $ topTitle + else noHtml + headerBlocks = filter isHeaderBlock blocks + ids = uniqueIdentifiers $ + map (\(Header _ lst) -> lst) headerBlocks + toc = if writerTableOfContents opts + then tableOfContents opts headerBlocks ids + else noHtml + (blocks', newstate) = + runState (blockListToHtml opts blocks) + (defaultWriterState {stIds = ids}) + cssLines = stCSS newstate + css = if S.null cssLines + then noHtml + else style ! [thetype "text/css"] $ primHtml $ + '\n':(unlines $ S.toList cssLines) + math = if stMath newstate + then case writerHTMLMathMethod opts of + LaTeXMathML Nothing -> + primHtml latexMathMLScript + LaTeXMathML (Just url) -> + script ! + [src url, thetype "text/javascript"] $ + noHtml + JsMath (Just url) -> + script ! + [src url, thetype "text/javascript"] $ + noHtml + _ -> noHtml + else noHtml + head' = header $ metadata +++ math +++ css +++ + primHtml (writerHeader opts) + notes = reverse (stNotes newstate) + before = primHtml $ writerIncludeBefore opts + after = primHtml $ writerIncludeAfter opts + thebody = before +++ titleHeader +++ toc +++ blocks' +++ + footnoteSection notes +++ after + in if writerStandalone opts + then head' +++ body thebody + else thebody + +-- | Construct table of contents from list of header blocks and identifiers. +-- Assumes there are as many identifiers as header blocks. +tableOfContents :: WriterOptions -> [Block] -> [String] -> Html +tableOfContents _ [] _ = noHtml +tableOfContents opts headers ids = + let opts' = opts { writerIgnoreNotes = True } + contentsTree = hierarchicalize headers + contents = evalState (mapM (elementToListItem opts') contentsTree) + (defaultWriterState {stIds = ids}) + in thediv ! [identifier "toc"] $ unordList contents + +-- | Converts an Element to a list item for a table of contents, +-- retrieving the appropriate identifier from state. +elementToListItem :: WriterOptions -> Element -> State WriterState Html +elementToListItem _ (Blk _) = return noHtml +elementToListItem opts (Sec headerText subsecs) = do + st <- get + let ids = stIds st + let (id', rest) = if null ids + then ("", []) + else (head ids, tail ids) + put $ st {stIds = rest} + txt <- inlineListToHtml opts headerText + subHeads <- mapM (elementToListItem opts) subsecs + let subList = if null subHeads + then noHtml + else unordList subHeads + return $ (anchor ! [href ("#" ++ id'), identifier ("TOC-" ++ id')] $ txt) +++ + subList + +-- | Convert list of Note blocks to a footnote
    . +-- Assumes notes are sorted. +footnoteSection :: [Html] -> Html +footnoteSection notes = + if null notes + then noHtml + else thediv ! [theclass "footnotes"] $ hr +++ (olist << notes) + + +-- | Parse a mailto link; return Just (name, domain) or Nothing. +parseMailto :: String -> Maybe (String, String) +parseMailto ('m':'a':'i':'l':'t':'o':':':addr) = + let (name', rest) = span (/='@') addr + domain = drop 1 rest + in Just (name', domain) +parseMailto _ = Nothing + +-- | Obfuscate a "mailto:" link. +obfuscateLink :: WriterOptions -> String -> String -> Html +obfuscateLink opts txt s | writerEmailObfuscation opts == NoObfuscation = + anchor ! [href s] << txt +obfuscateLink opts txt s = + let meth = writerEmailObfuscation opts + s' = map toLower s + in case parseMailto s' of + (Just (name', domain)) -> + let domain' = substitute "." " dot " domain + at' = obfuscateChar '@' + (linkText, altText) = + if txt == drop 7 s' -- autolink + then ("''+e+''", name' ++ " at " ++ domain') + else ("'" ++ txt ++ "'", txt ++ " (" ++ name' ++ " at " ++ + domain' ++ ")") + in case meth of + ReferenceObfuscation -> + -- need to use primHtml or &'s are escaped to & in URL + primHtml $ "" ++ (obfuscateString txt) ++ "" + JavascriptObfuscation -> + (script ! [thetype "text/javascript"] $ + primHtml ("\n\n")) +++ + noscript (primHtml $ obfuscateString altText) + _ -> error $ "Unknown obfuscation method: " ++ show meth + _ -> anchor ! [href s] $ primHtml txt -- malformed email + +-- | Obfuscate character as entity. +obfuscateChar :: Char -> String +obfuscateChar char = + let num = ord char + numstr = if even num then show num else "x" ++ showHex num "" + in "&#" ++ numstr ++ ";" + +-- | Obfuscate string using entities. +obfuscateString :: String -> String +obfuscateString = concatMap obfuscateChar . decodeCharacterReferences + +-- | True if character is a punctuation character (unicode). +isPunctuation :: Char -> Bool +isPunctuation c = + let c' = ord c + in if c `elem` "!\"'()*,-./:;<>?[\\]`{|}~" || c' >= 0x2000 && c' <= 0x206F || + c' >= 0xE000 && c' <= 0xE0FF + then True + else False + +-- | Add CSS for document header. +addToCSS :: String -> State WriterState () +addToCSS item = do + st <- get + let current = stCSS st + put $ st {stCSS = S.insert item current} + +-- | Convert Pandoc inline list to plain text identifier. +inlineListToIdentifier :: [Inline] -> String +inlineListToIdentifier = dropWhile (not . isAlpha) . inlineListToIdentifier' + +inlineListToIdentifier' :: [Inline] -> [Char] +inlineListToIdentifier' [] = "" +inlineListToIdentifier' (x:xs) = + xAsText ++ inlineListToIdentifier' xs + where xAsText = case x of + Str s -> filter (\c -> c == '-' || not (isPunctuation c)) $ + intercalate "-" $ words $ map toLower s + Emph lst -> inlineListToIdentifier' lst + Strikeout lst -> inlineListToIdentifier' lst + Superscript lst -> inlineListToIdentifier' lst + SmallCaps lst -> inlineListToIdentifier' lst + Subscript lst -> inlineListToIdentifier' lst + Strong lst -> inlineListToIdentifier' lst + Quoted _ lst -> inlineListToIdentifier' lst + Cite _ lst -> inlineListToIdentifier' lst + Code s -> s + Space -> "-" + EmDash -> "-" + EnDash -> "-" + Apostrophe -> "" + Ellipses -> "" + LineBreak -> "-" + Math _ _ -> "" + TeX _ -> "" + HtmlInline _ -> "" + Link lst _ -> inlineListToIdentifier' lst + Image lst _ -> inlineListToIdentifier' lst + Note _ -> "" + +-- | Return unique identifiers for list of inline lists. +uniqueIdentifiers :: [[Inline]] -> [String] +uniqueIdentifiers ls = + let addIdentifier (nonuniqueIds, uniqueIds) l = + let new = inlineListToIdentifier l + matches = length $ filter (== new) nonuniqueIds + new' = (if null new then "section" else new) ++ + if matches > 0 then ("-" ++ show matches) else "" + in (new:nonuniqueIds, new':uniqueIds) + in reverse $ snd $ foldl addIdentifier ([],[]) ls + +-- | Convert Pandoc block element to HTML. +blockToHtml :: WriterOptions -> Block -> State WriterState Html +blockToHtml _ Null = return $ noHtml +blockToHtml opts (Plain lst) = inlineListToHtml opts lst +blockToHtml opts (Para lst) = inlineListToHtml opts lst >>= (return . paragraph) +blockToHtml _ (RawHtml str) = return $ primHtml str +blockToHtml _ (HorizontalRule) = return $ hr +blockToHtml opts (CodeBlock (_,classes,_) rawCode) | "haskell" `elem` classes && + writerLiterateHaskell opts = + let classes' = map (\c -> if c == "haskell" then "literatehaskell" else c) classes + in blockToHtml opts $ CodeBlock ("",classes',[]) $ intercalate "\n" $ map ("> " ++) $ lines rawCode +blockToHtml _ (CodeBlock attr@(_,classes,_) rawCode) = do + case highlightHtml attr rawCode of + Left _ -> -- change leading newlines into
    tags, because some + -- browsers ignore leading newlines in pre blocks + let (leadingBreaks, rawCode') = span (=='\n') rawCode + in return $ pre ! (if null classes + then [] + else [theclass $ unwords classes]) $ thecode << + (replicate (length leadingBreaks) br +++ + [stringToHtml $ rawCode' ++ "\n"]) + Right h -> addToCSS defaultHighlightingCss >> return h +blockToHtml opts (BlockQuote blocks) = + -- in S5, treat list in blockquote specially + -- if default is incremental, make it nonincremental; + -- otherwise incremental + if writerS5 opts + then let inc = not (writerIncremental opts) in + case blocks of + [BulletList lst] -> blockToHtml (opts {writerIncremental = inc}) + (BulletList lst) + [OrderedList attribs lst] -> + blockToHtml (opts {writerIncremental = inc}) + (OrderedList attribs lst) + _ -> blockListToHtml opts blocks >>= + (return . blockquote) + else blockListToHtml opts blocks >>= (return . blockquote) +blockToHtml opts (Header level lst) = do + contents <- inlineListToHtml opts lst + st <- get + let ids = stIds st + let (id', rest) = if null ids + then ("", []) + else (head ids, tail ids) + put $ st {stIds = rest} + let attribs = if writerStrictMarkdown opts && not (writerTableOfContents opts) + then [] + else [identifier id'] + let contents' = if writerTableOfContents opts + then anchor ! [href ("#TOC-" ++ id')] $ contents + else contents + return $ case level of + 1 -> h1 contents' ! attribs + 2 -> h2 contents' ! attribs + 3 -> h3 contents' ! attribs + 4 -> h4 contents' ! attribs + 5 -> h5 contents' ! attribs + 6 -> h6 contents' ! attribs + _ -> paragraph contents' ! attribs +blockToHtml opts (BulletList lst) = do + contents <- mapM (blockListToHtml opts) lst + let attribs = if writerIncremental opts + then [theclass "incremental"] + else [] + return $ unordList ! attribs $ contents +blockToHtml opts (OrderedList (startnum, numstyle, _) lst) = do + contents <- mapM (blockListToHtml opts) lst + let numstyle' = camelCaseToHyphenated $ show numstyle + let attribs = (if writerIncremental opts + then [theclass "incremental"] + else []) ++ + (if startnum /= 1 + then [start startnum] + else []) ++ + (if numstyle /= DefaultStyle + then [thestyle $ "list-style-type: " ++ numstyle' ++ ";"] + else []) + return $ ordList ! attribs $ contents +blockToHtml opts (DefinitionList lst) = do + contents <- mapM (\(term, def) -> do term' <- inlineListToHtml opts term + def' <- blockListToHtml opts def + return $ (term', def')) lst + let attribs = if writerIncremental opts + then [theclass "incremental"] + else [] + return $ defList ! attribs $ contents +blockToHtml opts (Table capt aligns widths headers rows') = do + let alignStrings = map alignmentToString aligns + captionDoc <- if null capt + then return noHtml + else inlineListToHtml opts capt >>= return . caption + colHeads <- colHeadsToHtml opts alignStrings + widths headers + rows'' <- zipWithM (tableRowToHtml opts alignStrings) (cycle ["odd", "even"]) rows' + return $ table $ captionDoc +++ colHeads +++ rows'' + +colHeadsToHtml :: WriterOptions + -> [[Char]] + -> [Double] + -> [[Block]] + -> State WriterState Html +colHeadsToHtml opts alignStrings widths headers = do + heads <- sequence $ zipWith3 + (\alignment columnwidth item -> tableItemToHtml opts th alignment columnwidth item) + alignStrings widths headers + return $ tr ! [theclass "header"] $ toHtmlFromList heads + +alignmentToString :: Alignment -> [Char] +alignmentToString alignment = case alignment of + AlignLeft -> "left" + AlignRight -> "right" + AlignCenter -> "center" + AlignDefault -> "left" + +tableRowToHtml :: WriterOptions + -> [[Char]] + -> String + -> [[Block]] + -> State WriterState Html +tableRowToHtml opts aligns rowclass columns = + (sequence $ zipWith3 (tableItemToHtml opts td) aligns (repeat 0) columns) >>= + return . (tr ! [theclass rowclass]) . toHtmlFromList + +tableItemToHtml :: WriterOptions + -> (Html -> Html) + -> [Char] + -> Double + -> [Block] + -> State WriterState Html +tableItemToHtml opts tag' align' width' item = do + contents <- blockListToHtml opts item + let attrib = [align align'] ++ + if width' /= 0 + then [thestyle ("width: " ++ (show (truncate (100 * width') :: Integer)) ++ "%;")] + else [] + return $ tag' ! attrib $ contents + +blockListToHtml :: WriterOptions -> [Block] -> State WriterState Html +blockListToHtml opts lst = + mapM (blockToHtml opts) lst >>= return . toHtmlFromList + +-- | Convert list of Pandoc inline elements to HTML. +inlineListToHtml :: WriterOptions -> [Inline] -> State WriterState Html +inlineListToHtml opts lst = + mapM (inlineToHtml opts) lst >>= return . toHtmlFromList + +-- | Convert Pandoc inline element to HTML. +inlineToHtml :: WriterOptions -> Inline -> State WriterState Html +inlineToHtml opts inline = + case inline of + (Str str) -> return $ stringToHtml str + (Space) -> return $ stringToHtml " " + (LineBreak) -> return $ br + (EmDash) -> return $ primHtmlChar "mdash" + (EnDash) -> return $ primHtmlChar "ndash" + (Ellipses) -> return $ primHtmlChar "hellip" + (Apostrophe) -> return $ primHtmlChar "rsquo" + (Emph lst) -> inlineListToHtml opts lst >>= return . emphasize + (Strong lst) -> inlineListToHtml opts lst >>= return . strong + (Code str) -> return $ thecode << str + (Strikeout lst) -> inlineListToHtml opts lst >>= + return . (thespan ! [thestyle "text-decoration: line-through;"]) + (SmallCaps lst) -> inlineListToHtml opts lst >>= + return . (thespan ! [thestyle "font-variant: small-caps;"]) + (Superscript lst) -> inlineListToHtml opts lst >>= return . sup + (Subscript lst) -> inlineListToHtml opts lst >>= return . sub + (Quoted quoteType lst) -> + let (leftQuote, rightQuote) = case quoteType of + SingleQuote -> (primHtmlChar "lsquo", + primHtmlChar "rsquo") + DoubleQuote -> (primHtmlChar "ldquo", + primHtmlChar "rdquo") + in do contents <- inlineListToHtml opts lst + return $ leftQuote +++ contents +++ rightQuote + (Math t str) -> + modify (\st -> st {stMath = True}) >> + (case writerHTMLMathMethod opts of + LaTeXMathML _ -> + -- putting LaTeXMathML in container with class "LaTeX" prevents + -- non-math elements on the page from being treated as math by + -- the javascript + return $ thespan ! [theclass "LaTeX"] $ + if t == InlineMath + then primHtml ("$" ++ str ++ "$") + else primHtml ("$$" ++ str ++ "$$") + JsMath _ -> + return $ if t == InlineMath + then thespan ! [theclass "math"] $ primHtml str + else thediv ! [theclass "math"] $ primHtml str + MimeTeX url -> + return $ image ! [src (url ++ "?" ++ str), + alt str, title str] + GladTeX -> + return $ primHtml $ "" ++ str ++ "" + PlainMath -> + inlineListToHtml opts (readTeXMath str) >>= + return . (thespan ! [theclass "math"])) + (TeX str) -> case writerHTMLMathMethod opts of + LaTeXMathML _ -> do modify (\st -> st {stMath = True}) + return $ primHtml str + _ -> return noHtml + (HtmlInline str) -> return $ primHtml str + (Link [Code str] (s,_)) | "mailto:" `isPrefixOf` s -> + return $ obfuscateLink opts str s + (Link txt (s,_)) | "mailto:" `isPrefixOf` s -> do + linkText <- inlineListToHtml opts txt + return $ obfuscateLink opts (show linkText) s + (Link txt (s,tit)) -> do + linkText <- inlineListToHtml opts txt + return $ anchor ! ([href s] ++ + if null tit then [] else [title tit]) $ + linkText + (Image txt (s,tit)) -> do + alternate <- inlineListToHtml opts txt + let alternate' = renderFragment opts alternate + let attributes = [src s] ++ + (if null tit + then [] + else [title tit]) ++ + if null txt + then [] + else [alt alternate'] + return $ image ! attributes + -- note: null title included, as in Markdown.pl + (Note contents) -> do + st <- get + let notes = stNotes st + let number = (length notes) + 1 + let ref = show number + htmlContents <- blockListToNote opts ref contents + -- push contents onto front of notes + put $ st {stNotes = (htmlContents:notes)} + return $ anchor ! [href ("#fn" ++ ref), + theclass "footnoteRef", + identifier ("fnref" ++ ref)] << + sup << ref + (Cite _ il) -> inlineListToHtml opts il + +blockListToNote :: WriterOptions -> String -> [Block] -> State WriterState Html +blockListToNote opts ref blocks = + -- If last block is Para or Plain, include the backlink at the end of + -- that block. Otherwise, insert a new Plain block with the backlink. + let backlink = [HtmlInline $ " "] + blocks' = if null blocks + then [] + else let lastBlock = last blocks + otherBlocks = init blocks + in case lastBlock of + (Para lst) -> otherBlocks ++ + [Para (lst ++ backlink)] + (Plain lst) -> otherBlocks ++ + [Plain (lst ++ backlink)] + _ -> otherBlocks ++ [lastBlock, + Plain backlink] + in do contents <- blockListToHtml opts blocks' + return $ li ! [identifier ("fn" ++ ref)] $ contents + diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs new file mode 100644 index 000000000..f3cbf1acb --- /dev/null +++ b/src/Text/Pandoc/Writers/LaTeX.hs @@ -0,0 +1,331 @@ +{- +Copyright (C) 2006-8 John MacFarlane + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Writers.LaTeX + Copyright : Copyright (C) 2006-8 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane + Stability : alpha + Portability : portable + +Conversion of 'Pandoc' format into LaTeX. +-} +module Text.Pandoc.Writers.LaTeX ( writeLaTeX ) where +import Text.Pandoc.Definition +import Text.Pandoc.Shared +import Text.Printf ( printf ) +import Data.List ( (\\), isSuffixOf, intercalate ) +import Data.Char ( toLower ) +import qualified Data.Set as S +import Control.Monad.State +import Text.PrettyPrint.HughesPJ hiding ( Str ) + +data WriterState = + WriterState { stIncludes :: S.Set String -- strings to include in header + , stInNote :: Bool -- @True@ if we're in a note + , stOLLevel :: Int -- level of ordered list nesting + , stOptions :: WriterOptions -- writer options, so they don't have to be parameter + } + +-- | Add line to header. +addToHeader :: String -> State WriterState () +addToHeader str = do + st <- get + let includes = stIncludes st + put st {stIncludes = S.insert str includes} + +-- | Convert Pandoc to LaTeX. +writeLaTeX :: WriterOptions -> Pandoc -> String +writeLaTeX options document = + render $ evalState (pandocToLaTeX options document) $ + WriterState { stIncludes = S.empty, stInNote = False, stOLLevel = 1, stOptions = options } + +pandocToLaTeX :: WriterOptions -> Pandoc -> State WriterState Doc +pandocToLaTeX options (Pandoc meta blocks) = do + main <- blockListToLaTeX blocks + head' <- if writerStandalone options + then latexHeader options meta + else return empty + let before = if null (writerIncludeBefore options) + then empty + else text (writerIncludeBefore options) + let after = if null (writerIncludeAfter options) + then empty + else text (writerIncludeAfter options) + let body = before $$ main $$ after + let toc = if writerTableOfContents options + then text "\\tableofcontents\n" + else empty + let foot = if writerStandalone options + then text "\\end{document}" + else empty + return $ head' $$ toc $$ body $$ foot + +-- | Insert bibliographic information into LaTeX header. +latexHeader :: WriterOptions -- ^ Options, including LaTeX header + -> Meta -- ^ Meta with bibliographic information + -> State WriterState Doc +latexHeader options (Meta title authors date) = do + titletext <- if null title + then return empty + else inlineListToLaTeX title >>= return . inCmd "title" + headerIncludes <- get >>= return . S.toList . stIncludes + let extras = text $ unlines headerIncludes + let verbatim = if "\\usepackage{fancyvrb}" `elem` headerIncludes + then text "\\VerbatimFootnotes % allows verbatim text in footnotes" + else empty + let authorstext = text $ "\\author{" ++ + intercalate "\\\\" (map stringToLaTeX authors) ++ "}" + let datetext = if date == "" + then empty + else text $ "\\date{" ++ stringToLaTeX date ++ "}" + let maketitle = if null title then empty else text "\\maketitle" + let secnumline = if (writerNumberSections options) + then empty + else text "\\setcounter{secnumdepth}{0}" + let baseHeader = text $ writerHeader options + let header = baseHeader $$ extras + return $ header $$ secnumline $$ verbatim $$ titletext $$ authorstext $$ + datetext $$ text "\\begin{document}" $$ maketitle $$ text "" + +-- escape things as needed for LaTeX + +stringToLaTeX :: String -> String +stringToLaTeX = escapeStringUsing latexEscapes + where latexEscapes = backslashEscapes "{}$%&_#" ++ + [ ('^', "\\^{}") + , ('\\', "\\textbackslash{}") + , ('~', "\\ensuremath{\\sim}") + , ('|', "\\textbar{}") + , ('<', "\\textless{}") + , ('>', "\\textgreater{}") + , ('\160', "~") + ] + +-- | Puts contents into LaTeX command. +inCmd :: String -> Doc -> Doc +inCmd cmd contents = char '\\' <> text cmd <> braces contents + +-- | Remove all code elements from list of inline elements +-- (because it's illegal to have verbatim inside some command arguments) +deVerb :: [Inline] -> [Inline] +deVerb [] = [] +deVerb ((Code str):rest) = + (TeX $ "\\texttt{" ++ stringToLaTeX str ++ "}"):(deVerb rest) +deVerb (other:rest) = other:(deVerb rest) + +-- | Convert Pandoc block element to LaTeX. +blockToLaTeX :: Block -- ^ Block to convert + -> State WriterState Doc +blockToLaTeX Null = return empty +blockToLaTeX (Plain lst) = do + st <- get + let opts = stOptions st + wrapTeXIfNeeded opts True inlineListToLaTeX lst +blockToLaTeX (Para lst) = do + st <- get + let opts = stOptions st + result <- wrapTeXIfNeeded opts True inlineListToLaTeX lst + return $ result <> char '\n' +blockToLaTeX (BlockQuote lst) = do + contents <- blockListToLaTeX lst + return $ text "\\begin{quote}" $$ contents $$ text "\\end{quote}" +blockToLaTeX (CodeBlock (_,classes,_) str) = do + st <- get + env <- if writerLiterateHaskell (stOptions st) && "haskell" `elem` classes + then return "code" + else if stInNote st + then do addToHeader "\\usepackage{fancyvrb}" + return "Verbatim" + else return "verbatim" + return $ text ("\\begin{" ++ env ++ "}\n") <> text str <> + text ("\n\\end{" ++ env ++ "}") +blockToLaTeX (RawHtml _) = return empty +blockToLaTeX (BulletList lst) = do + items <- mapM listItemToLaTeX lst + return $ text "\\begin{itemize}" $$ vcat items $$ text "\\end{itemize}" +blockToLaTeX (OrderedList (start, numstyle, numdelim) lst) = do + st <- get + let oldlevel = stOLLevel st + put $ st {stOLLevel = oldlevel + 1} + items <- mapM listItemToLaTeX lst + modify (\s -> s {stOLLevel = oldlevel}) + exemplar <- if numstyle /= DefaultStyle || numdelim /= DefaultDelim + then do addToHeader "\\usepackage{enumerate}" + return $ char '[' <> + text (head (orderedListMarkers (1, numstyle, + numdelim))) <> char ']' + else return empty + let resetcounter = if start /= 1 && oldlevel <= 4 + then text $ "\\setcounter{enum" ++ + map toLower (toRomanNumeral oldlevel) ++ + "}{" ++ show (start - 1) ++ "}" + else empty + return $ text "\\begin{enumerate}" <> exemplar $$ resetcounter $$ + vcat items $$ text "\\end{enumerate}" +blockToLaTeX (DefinitionList lst) = do + items <- mapM defListItemToLaTeX lst + return $ text "\\begin{description}" $$ vcat items $$ + text "\\end{description}" +blockToLaTeX HorizontalRule = return $ text $ + "\\begin{center}\\rule{3in}{0.4pt}\\end{center}\n" +blockToLaTeX (Header level lst) = do + txt <- inlineListToLaTeX (deVerb lst) + return $ if (level > 0) && (level <= 3) + then text ("\\" ++ (concat (replicate (level - 1) "sub")) ++ + "section{") <> txt <> text "}\n" + else txt <> char '\n' +blockToLaTeX (Table caption aligns widths heads rows) = do + headers <- tableRowToLaTeX heads + captionText <- inlineListToLaTeX caption + rows' <- mapM tableRowToLaTeX rows + let colWidths = map (printf "%.2f") widths + let colDescriptors = concat $ zipWith + (\width align -> ">{\\PBS" ++ + (case align of + AlignLeft -> "\\raggedright" + AlignRight -> "\\raggedleft" + AlignCenter -> "\\centering" + AlignDefault -> "\\raggedright") ++ + "\\hspace{0pt}}p{" ++ width ++ + "\\columnwidth}") + colWidths aligns + let tableBody = text ("\\begin{tabular}{" ++ colDescriptors ++ "}") $$ + headers $$ text "\\hline" $$ vcat rows' $$ + text "\\end{tabular}" + let centered txt = text "\\begin{center}" $$ txt $$ text "\\end{center}" + addToHeader $ "\\usepackage{array}\n" ++ + "% This is needed because raggedright in table elements redefines \\\\:\n" ++ + "\\newcommand{\\PreserveBackslash}[1]{\\let\\temp=\\\\#1\\let\\\\=\\temp}\n" ++ + "\\let\\PBS=\\PreserveBackslash" + return $ if isEmpty captionText + then centered tableBody <> char '\n' + else text "\\begin{table}[h]" $$ centered tableBody $$ + inCmd "caption" captionText $$ text "\\end{table}\n" + +blockListToLaTeX :: [Block] -> State WriterState Doc +blockListToLaTeX lst = mapM blockToLaTeX lst >>= return . vcat + +tableRowToLaTeX :: [[Block]] -> State WriterState Doc +tableRowToLaTeX cols = mapM blockListToLaTeX cols >>= + return . ($$ text "\\\\") . foldl (\row item -> row $$ + (if isEmpty row then text "" else text " & ") <> item) empty + +listItemToLaTeX :: [Block] -> State WriterState Doc +listItemToLaTeX lst = blockListToLaTeX lst >>= return . (text "\\item" $$) . + (nest 2) + +defListItemToLaTeX :: ([Inline], [Block]) -> State WriterState Doc +defListItemToLaTeX (term, def) = do + term' <- inlineListToLaTeX $ deVerb term + def' <- blockListToLaTeX def + return $ text "\\item[" <> term' <> text "]" $$ def' + +-- | Convert list of inline elements to LaTeX. +inlineListToLaTeX :: [Inline] -- ^ Inlines to convert + -> State WriterState Doc +inlineListToLaTeX lst = mapM inlineToLaTeX lst >>= return . hcat + +isQuoted :: Inline -> Bool +isQuoted (Quoted _ _) = True +isQuoted Apostrophe = True +isQuoted _ = False + +-- | Convert inline element to LaTeX +inlineToLaTeX :: Inline -- ^ Inline to convert + -> State WriterState Doc +inlineToLaTeX (Emph lst) = + inlineListToLaTeX (deVerb lst) >>= return . inCmd "emph" +inlineToLaTeX (Strong lst) = + inlineListToLaTeX (deVerb lst) >>= return . inCmd "textbf" +inlineToLaTeX (Strikeout lst) = do + contents <- inlineListToLaTeX $ deVerb lst + addToHeader "\\usepackage[normalem]{ulem}" + return $ inCmd "sout" contents +inlineToLaTeX (Superscript lst) = + inlineListToLaTeX (deVerb lst) >>= return . inCmd "textsuperscript" +inlineToLaTeX (Subscript lst) = do + contents <- inlineListToLaTeX $ deVerb lst + -- oddly, latex includes \textsuperscript but not \textsubscript + -- so we have to define it (using a different name so as not to conflict with memoir class): + addToHeader "\\newcommand{\\textsubscr}[1]{\\ensuremath{_{\\scriptsize\\textrm{#1}}}}" + return $ inCmd "textsubscr" contents +inlineToLaTeX (SmallCaps lst) = + inlineListToLaTeX (deVerb lst) >>= return . inCmd "textsc" +inlineToLaTeX (Cite _ lst) = + inlineListToLaTeX lst +inlineToLaTeX (Code str) = do + st <- get + if stInNote st + then do addToHeader "\\usepackage{fancyvrb}" + else return () + let chr = ((enumFromTo '!' '~') \\ str) !! 0 + return $ text $ "\\verb" ++ [chr] ++ str ++ [chr] +inlineToLaTeX (Quoted SingleQuote lst) = do + contents <- inlineListToLaTeX lst + let s1 = if (not (null lst)) && (isQuoted (head lst)) + then text "\\," + else empty + let s2 = if (not (null lst)) && (isQuoted (last lst)) + then text "\\," + else empty + return $ char '`' <> s1 <> contents <> s2 <> char '\'' +inlineToLaTeX (Quoted DoubleQuote lst) = do + contents <- inlineListToLaTeX lst + let s1 = if (not (null lst)) && (isQuoted (head lst)) + then text "\\," + else empty + let s2 = if (not (null lst)) && (isQuoted (last lst)) + then text "\\," + else empty + return $ text "``" <> s1 <> contents <> s2 <> text "''" +inlineToLaTeX Apostrophe = return $ char '\'' +inlineToLaTeX EmDash = return $ text "---" +inlineToLaTeX EnDash = return $ text "--" +inlineToLaTeX Ellipses = return $ text "\\ldots{}" +inlineToLaTeX (Str str) = return $ text $ stringToLaTeX str +inlineToLaTeX (Math InlineMath str) = return $ char '$' <> text str <> char '$' +inlineToLaTeX (Math DisplayMath str) = return $ text "\\[" <> text str <> text "\\]" +inlineToLaTeX (TeX str) = return $ text str +inlineToLaTeX (HtmlInline _) = return empty +inlineToLaTeX (LineBreak) = return $ text "\\\\" +inlineToLaTeX Space = return $ char ' ' +inlineToLaTeX (Link txt (src, _)) = do + addToHeader "\\usepackage[breaklinks=true]{hyperref}" + case txt of + [Code x] | x == src -> -- autolink + do addToHeader "\\usepackage{url}" + return $ text $ "\\url{" ++ x ++ "}" + _ -> do contents <- inlineListToLaTeX $ deVerb txt + return $ text ("\\href{" ++ src ++ "}{") <> contents <> + char '}' +inlineToLaTeX (Image _ (source, _)) = do + addToHeader "\\usepackage{graphicx}" + return $ text $ "\\includegraphics{" ++ source ++ "}" +inlineToLaTeX (Note contents) = do + st <- get + put (st {stInNote = True}) + contents' <- blockListToLaTeX contents + modify (\s -> s {stInNote = False}) + let rawnote = stripTrailingNewlines $ render contents' + -- note: a \n before } is needed when note ends with a Verbatim environment + let optNewline = "\\end{Verbatim}" `isSuffixOf` rawnote + return $ text "\\footnote{" <> + text rawnote <> (if optNewline then char '\n' else empty) <> char '}' diff --git a/src/Text/Pandoc/Writers/Man.hs b/src/Text/Pandoc/Writers/Man.hs new file mode 100644 index 000000000..210c7ed07 --- /dev/null +++ b/src/Text/Pandoc/Writers/Man.hs @@ -0,0 +1,301 @@ +{- +Copyright (C) 2007 John MacFarlane + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Writers.Man + Copyright : Copyright (C) 2007 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane + Stability : alpha + Portability : portable + +Conversion of 'Pandoc' documents to groff man page format. + +-} +module Text.Pandoc.Writers.Man ( writeMan) where +import Text.Pandoc.Definition +import Text.Pandoc.Shared +import Text.Printf ( printf ) +import Data.List ( isPrefixOf, drop, nub, intersperse, intercalate ) +import Text.PrettyPrint.HughesPJ hiding ( Str ) +import Control.Monad.State + +type Notes = [[Block]] +type Preprocessors = [String] -- e.g. "t" for tbl +type WriterState = (Notes, Preprocessors) + +-- | Convert Pandoc to Man. +writeMan :: WriterOptions -> Pandoc -> String +writeMan opts document = render $ evalState (pandocToMan opts document) ([],[]) + +-- | Return groff man representation of document. +pandocToMan :: WriterOptions -> Pandoc -> State WriterState Doc +pandocToMan opts (Pandoc meta blocks) = do + let before = writerIncludeBefore opts + let after = writerIncludeAfter opts + let before' = if null before then empty else text before + let after' = if null after then empty else text after + (head', foot) <- metaToMan opts meta + body <- blockListToMan opts blocks + (notes, preprocessors) <- get + let preamble = if null preprocessors || not (writerStandalone opts) + then empty + else text $ ".\\\" " ++ concat (nub preprocessors) + notes' <- notesToMan opts (reverse notes) + return $ preamble $$ head' $$ before' $$ body $$ notes' $$ foot $$ after' + +-- | Insert bibliographic information into Man header and footer. +metaToMan :: WriterOptions -- ^ Options, including Man header + -> Meta -- ^ Meta with bibliographic information + -> State WriterState (Doc, Doc) +metaToMan options (Meta title authors date) = do + titleText <- inlineListToMan options title + let (cmdName, rest) = break (== ' ') $ render titleText + let (title', section) = case reverse cmdName of + (')':d:'(':xs) | d `elem` ['0'..'9'] -> + (text (reverse xs), char d) + xs -> (text (reverse xs), doubleQuotes empty) + let extras = map (doubleQuotes . text . removeLeadingTrailingSpace) $ + splitBy '|' rest + let head' = (text ".TH") <+> title' <+> section <+> + doubleQuotes (text date) <+> hsep extras + let foot = case length authors of + 0 -> empty + 1 -> text ".SH AUTHOR" $$ (text $ intercalate ", " authors) + _ -> text ".SH AUTHORS" $$ (text $ intercalate ", " authors) + return $ if writerStandalone options + then (head', foot) + else (empty, empty) + +-- | Return man representation of notes. +notesToMan :: WriterOptions -> [[Block]] -> State WriterState Doc +notesToMan opts notes = + if null notes + then return empty + else mapM (\(num, note) -> noteToMan opts num note) (zip [1..] notes) >>= + return . (text ".SH NOTES" $$) . vcat + +-- | Return man representation of a note. +noteToMan :: WriterOptions -> Int -> [Block] -> State WriterState Doc +noteToMan opts num note = do + contents <- blockListToMan opts note + let marker = text "\n.SS [" <> text (show num) <> char ']' + return $ marker $$ contents + +-- | Association list of characters to escape. +manEscapes :: [(Char, String)] +manEscapes = [('\160', "\\ "), ('\'', "\\[aq]")] ++ backslashEscapes ".@\\" + +-- | Escape special characters for Man. +escapeString :: String -> String +escapeString = escapeStringUsing manEscapes + +-- | Escape a literal (code) section for Man. +escapeCode :: String -> String +escapeCode = escapeStringUsing (manEscapes ++ backslashEscapes "\t ") + +-- | Convert Pandoc block element to man. +blockToMan :: WriterOptions -- ^ Options + -> Block -- ^ Block element + -> State WriterState Doc +blockToMan _ Null = return empty +blockToMan opts (Plain inlines) = + wrapIfNeeded opts (inlineListToMan opts) inlines +blockToMan opts (Para inlines) = do + contents <- wrapIfNeeded opts (inlineListToMan opts) inlines + return $ text ".PP" $$ contents +blockToMan _ (RawHtml str) = return $ text str +blockToMan _ HorizontalRule = return $ text $ ".PP\n * * * * *" +blockToMan opts (Header level inlines) = do + contents <- inlineListToMan opts inlines + let heading = case level of + 1 -> ".SH " + _ -> ".SS " + return $ text heading <> contents +blockToMan _ (CodeBlock _ str) = return $ + text ".PP" $$ text "\\f[CR]" $$ + text ((unlines . map (" " ++) . lines) (escapeCode str)) <> text "\\f[]" +blockToMan opts (BlockQuote blocks) = do + contents <- blockListToMan opts blocks + return $ text ".RS" $$ contents $$ text ".RE" +blockToMan opts (Table caption alignments widths headers rows) = + let aligncode AlignLeft = "l" + aligncode AlignRight = "r" + aligncode AlignCenter = "c" + aligncode AlignDefault = "l" + in do + caption' <- inlineListToMan opts caption + modify (\(notes, preprocessors) -> (notes, "t":preprocessors)) + let iwidths = map (printf "w(%0.2fn)" . (70 *)) widths + -- 78n default width - 8n indent = 70n + let coldescriptions = text $ intercalate " " + (zipWith (\align width -> aligncode align ++ width) + alignments iwidths) ++ "." + colheadings <- mapM (blockListToMan opts) headers + let makeRow cols = text "T{" $$ + (vcat $ intersperse (text "T}@T{") cols) $$ + text "T}" + let colheadings' = makeRow colheadings + body <- mapM (\row -> do + cols <- mapM (blockListToMan opts) row + return $ makeRow cols) rows + return $ text ".PP" $$ caption' $$ + text ".TS" $$ text "tab(@);" $$ coldescriptions $$ + colheadings' $$ char '_' $$ vcat body $$ text ".TE" + +blockToMan opts (BulletList items) = do + contents <- mapM (bulletListItemToMan opts) items + return (vcat contents) +blockToMan opts (OrderedList attribs items) = do + let markers = take (length items) $ orderedListMarkers attribs + let indent = 1 + (maximum $ map length markers) + contents <- mapM (\(num, item) -> orderedListItemToMan opts num indent item) $ + zip markers items + return (vcat contents) +blockToMan opts (DefinitionList items) = do + contents <- mapM (definitionListItemToMan opts) items + return (vcat contents) + +-- | Convert bullet list item (list of blocks) to man. +bulletListItemToMan :: WriterOptions -> [Block] -> State WriterState Doc +bulletListItemToMan _ [] = return empty +bulletListItemToMan opts ((Para first):rest) = + bulletListItemToMan opts ((Plain first):rest) +bulletListItemToMan opts ((Plain first):rest) = do + first' <- blockToMan opts (Plain first) + rest' <- blockListToMan opts rest + let first'' = text ".IP \\[bu] 2" $$ first' + let rest'' = if null rest + then empty + else text ".RS 2" $$ rest' $$ text ".RE" + return (first'' $$ rest'') +bulletListItemToMan opts (first:rest) = do + first' <- blockToMan opts first + rest' <- blockListToMan opts rest + return $ text "\\[bu] .RS 2" $$ first' $$ rest' $$ text ".RE" + +-- | Convert ordered list item (a list of blocks) to man. +orderedListItemToMan :: WriterOptions -- ^ options + -> String -- ^ order marker for list item + -> Int -- ^ number of spaces to indent + -> [Block] -- ^ list item (list of blocks) + -> State WriterState Doc +orderedListItemToMan _ _ _ [] = return empty +orderedListItemToMan opts num indent ((Para first):rest) = + orderedListItemToMan opts num indent ((Plain first):rest) +orderedListItemToMan opts num indent (first:rest) = do + first' <- blockToMan opts first + rest' <- blockListToMan opts rest + let num' = printf ("%" ++ show (indent - 1) ++ "s") num + let first'' = text (".IP \"" ++ num' ++ "\" " ++ show indent) $$ first' + let rest'' = if null rest + then empty + else text ".RS 4" $$ rest' $$ text ".RE" + return $ first'' $$ rest'' + +-- | Convert definition list item (label, list of blocks) to man. +definitionListItemToMan :: WriterOptions + -> ([Inline],[Block]) + -> State WriterState Doc +definitionListItemToMan opts (label, items) = do + labelText <- inlineListToMan opts label + contents <- if null items + then return empty + else do + let (first, rest) = case items of + ((Para x):y) -> (Plain x,y) + (x:y) -> (x,y) + [] -> error "items is null" + rest' <- mapM (\item -> blockToMan opts item) + rest >>= (return . vcat) + first' <- blockToMan opts first + return $ first' $$ text ".RS" $$ rest' $$ text ".RE" + return $ text ".TP\n.B " <> labelText $+$ contents + +-- | Convert list of Pandoc block elements to man. +blockListToMan :: WriterOptions -- ^ Options + -> [Block] -- ^ List of block elements + -> State WriterState Doc +blockListToMan opts blocks = + mapM (blockToMan opts) blocks >>= (return . vcat) + +-- | Convert list of Pandoc inline elements to man. +inlineListToMan :: WriterOptions -> [Inline] -> State WriterState Doc +inlineListToMan opts lst = mapM (inlineToMan opts) lst >>= (return . hcat) + +-- | Convert Pandoc inline element to man. +inlineToMan :: WriterOptions -> Inline -> State WriterState Doc +inlineToMan opts (Emph lst) = do + contents <- inlineListToMan opts lst + return $ text "\\f[I]" <> contents <> text "\\f[]" +inlineToMan opts (Strong lst) = do + contents <- inlineListToMan opts lst + return $ text "\\f[B]" <> contents <> text "\\f[]" +inlineToMan opts (Strikeout lst) = do + contents <- inlineListToMan opts lst + return $ text "[STRIKEOUT:" <> contents <> char ']' +inlineToMan opts (Superscript lst) = do + contents <- inlineListToMan opts lst + return $ char '^' <> contents <> char '^' +inlineToMan opts (Subscript lst) = do + contents <- inlineListToMan opts lst + return $ char '~' <> contents <> char '~' +inlineToMan opts (SmallCaps lst) = inlineListToMan opts lst -- not supported +inlineToMan opts (Quoted SingleQuote lst) = do + contents <- inlineListToMan opts lst + return $ char '`' <> contents <> char '\'' +inlineToMan opts (Quoted DoubleQuote lst) = do + contents <- inlineListToMan opts lst + return $ text "\\[lq]" <> contents <> text "\\[rq]" +inlineToMan opts (Cite _ lst) = + inlineListToMan opts lst +inlineToMan _ EmDash = return $ text "\\[em]" +inlineToMan _ EnDash = return $ text "\\[en]" +inlineToMan _ Apostrophe = return $ char '\'' +inlineToMan _ Ellipses = return $ text "\\&..." +inlineToMan _ (Code str) = + return $ text $ "\\f[B]" ++ escapeCode str ++ "\\f[]" +inlineToMan _ (Str str) = return $ text $ escapeString str +inlineToMan opts (Math InlineMath str) = inlineToMan opts (Code str) +inlineToMan opts (Math DisplayMath str) = do + contents <- inlineToMan opts (Code str) + return $ text ".RS" $$ contents $$ text ".RE" +inlineToMan _ (TeX _) = return empty +inlineToMan _ (HtmlInline str) = return $ text $ escapeCode str +inlineToMan _ (LineBreak) = return $ text "\n.PD 0\n.P\n.PD\n" +inlineToMan _ Space = return $ char ' ' +inlineToMan opts (Link txt (src, _)) = do + linktext <- inlineListToMan opts txt + let srcSuffix = if isPrefixOf "mailto:" src then drop 7 src else src + return $ if txt == [Code srcSuffix] + then char '<' <> text srcSuffix <> char '>' + else linktext <> text " (" <> text src <> char ')' +inlineToMan opts (Image alternate (source, tit)) = do + let txt = if (null alternate) || (alternate == [Str ""]) || + (alternate == [Str source]) -- to prevent autolinks + then [Str "image"] + else alternate + linkPart <- inlineToMan opts (Link txt (source, tit)) + return $ char '[' <> text "IMAGE: " <> linkPart <> char ']' +inlineToMan _ (Note contents) = do + modify (\(notes, prep) -> (contents:notes, prep)) -- add to notes in state + (notes, _) <- get + let ref = show $ (length notes) + return $ char '[' <> text ref <> char ']' + diff --git a/src/Text/Pandoc/Writers/Markdown.hs b/src/Text/Pandoc/Writers/Markdown.hs new file mode 100644 index 000000000..70d1f0c91 --- /dev/null +++ b/src/Text/Pandoc/Writers/Markdown.hs @@ -0,0 +1,396 @@ +{- +Copyright (C) 2006-7 John MacFarlane + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Writers.Markdown + Copyright : Copyright (C) 2006-7 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane + Stability : alpha + Portability : portable + +Conversion of 'Pandoc' documents to markdown-formatted plain text. + +Markdown: +-} +module Text.Pandoc.Writers.Markdown ( writeMarkdown) where +import Text.Pandoc.Definition +import Text.Pandoc.Shared +import Text.Pandoc.Blocks +import Text.ParserCombinators.Parsec ( parse, GenParser ) +import Data.List ( group, isPrefixOf, drop, find, intersperse, intercalate ) +import Text.PrettyPrint.HughesPJ hiding ( Str ) +import Control.Monad.State + +type Notes = [[Block]] +type Refs = KeyTable +type WriterState = (Notes, Refs) + +-- | Convert Pandoc to Markdown. +writeMarkdown :: WriterOptions -> Pandoc -> String +writeMarkdown opts document = + render $ evalState (pandocToMarkdown opts document) ([],[]) + +-- | Return markdown representation of document. +pandocToMarkdown :: WriterOptions -> Pandoc -> State WriterState Doc +pandocToMarkdown opts (Pandoc meta blocks) = do + let before = writerIncludeBefore opts + let after = writerIncludeAfter opts + let before' = if null before then empty else text before + let after' = if null after then empty else text after + metaBlock <- metaToMarkdown opts meta + let head' = if writerStandalone opts + then metaBlock $+$ text (writerHeader opts) + else empty + let headerBlocks = filter isHeaderBlock blocks + let toc = if writerTableOfContents opts + then tableOfContents opts headerBlocks + else empty + body <- blockListToMarkdown opts blocks + (notes, _) <- get + notes' <- notesToMarkdown opts (reverse notes) + (_, refs) <- get -- note that the notes may contain refs + refs' <- keyTableToMarkdown opts (reverse refs) + return $ head' $+$ before' $+$ toc $+$ body $+$ text "" $+$ + notes' $+$ text "" $+$ refs' $+$ after' + +-- | Return markdown representation of reference key table. +keyTableToMarkdown :: WriterOptions -> KeyTable -> State WriterState Doc +keyTableToMarkdown opts refs = mapM (keyToMarkdown opts) refs >>= return . vcat + +-- | Return markdown representation of a reference key. +keyToMarkdown :: WriterOptions + -> ([Inline], (String, String)) + -> State WriterState Doc +keyToMarkdown opts (label, (src, tit)) = do + label' <- inlineListToMarkdown opts label + let tit' = if null tit then empty else text $ " \"" ++ tit ++ "\"" + return $ text " " <> char '[' <> label' <> char ']' <> text ": " <> + text src <> tit' + +-- | Return markdown representation of notes. +notesToMarkdown :: WriterOptions -> [[Block]] -> State WriterState Doc +notesToMarkdown opts notes = + mapM (\(num, note) -> noteToMarkdown opts num note) (zip [1..] notes) >>= + return . vcat + +-- | Return markdown representation of a note. +noteToMarkdown :: WriterOptions -> Int -> [Block] -> State WriterState Doc +noteToMarkdown opts num blocks = do + contents <- blockListToMarkdown opts blocks + let marker = text "[^" <> text (show num) <> text "]:" + return $ hang' marker (writerTabStop opts) contents + +-- | Escape special characters for Markdown. +escapeString :: String -> String +escapeString = escapeStringUsing markdownEscapes + where markdownEscapes = backslashEscapes "`<\\*_^~" + +-- | Convert bibliographic information into Markdown header. +metaToMarkdown :: WriterOptions -> Meta -> State WriterState Doc +metaToMarkdown opts (Meta title authors date) = do + title' <- titleToMarkdown opts title + authors' <- authorsToMarkdown authors + date' <- dateToMarkdown date + return $ title' $+$ authors' $+$ date' + +titleToMarkdown :: WriterOptions -> [Inline] -> State WriterState Doc +titleToMarkdown _ [] = return empty +titleToMarkdown opts lst = do + contents <- inlineListToMarkdown opts lst + return $ text "% " <> contents + +authorsToMarkdown :: [String] -> State WriterState Doc +authorsToMarkdown [] = return empty +authorsToMarkdown lst = return $ + text "% " <> text (intercalate ", " (map escapeString lst)) + +dateToMarkdown :: String -> State WriterState Doc +dateToMarkdown [] = return empty +dateToMarkdown str = return $ text "% " <> text (escapeString str) + +-- | Construct table of contents from list of header blocks. +tableOfContents :: WriterOptions -> [Block] -> Doc +tableOfContents opts headers = + let opts' = opts { writerIgnoreNotes = True } + contents = BulletList $ map elementToListItem $ hierarchicalize headers + in evalState (blockToMarkdown opts' contents) ([],[]) + +-- | Converts an Element to a list item for a table of contents, +elementToListItem :: Element -> [Block] +elementToListItem (Blk _) = [] +elementToListItem (Sec headerText subsecs) = [Plain headerText] ++ + if null subsecs + then [] + else [BulletList $ map elementToListItem subsecs] + +-- | Ordered list start parser for use in Para below. +olMarker :: GenParser Char st Char +olMarker = do (start, style', delim) <- anyOrderedListMarker + if delim == Period && + (style' == UpperAlpha || (style' == UpperRoman && + start `elem` [1, 5, 10, 50, 100, 500, 1000])) + then spaceChar >> spaceChar + else spaceChar + +-- | True if string begins with an ordered list marker +beginsWithOrderedListMarker :: String -> Bool +beginsWithOrderedListMarker str = + case parse olMarker "para start" str of + Left _ -> False + Right _ -> True + +wrappedMarkdown :: WriterOptions -> [Inline] -> State WriterState Doc +wrappedMarkdown opts inlines = do + let chunks = splitBy LineBreak inlines + let chunks' = if null chunks + then [] + else (map (++ [Str " "]) $ init chunks) ++ [last chunks] + lns <- mapM (wrapIfNeeded opts (inlineListToMarkdown opts)) chunks' + return $ vcat lns + +-- | Convert Pandoc block element to markdown. +blockToMarkdown :: WriterOptions -- ^ Options + -> Block -- ^ Block element + -> State WriterState Doc +blockToMarkdown _ Null = return empty +blockToMarkdown opts (Plain inlines) = + wrappedMarkdown opts inlines +blockToMarkdown opts (Para inlines) = do + contents <- wrappedMarkdown opts inlines + -- escape if para starts with ordered list marker + let esc = if (not (writerStrictMarkdown opts)) && + beginsWithOrderedListMarker (render contents) + then char '\\' + else empty + return $ esc <> contents <> text "\n" +blockToMarkdown _ (RawHtml str) = return $ text str +blockToMarkdown _ HorizontalRule = return $ text "\n* * * * *\n" +blockToMarkdown opts (Header level inlines) = do + contents <- inlineListToMarkdown opts inlines + -- use setext style headers if in literate haskell mode. + -- ghc interprets '#' characters in column 1 as line number specifiers. + if writerLiterateHaskell opts + then let len = length $ render contents + in return $ contents <> text "\n" <> + case level of + 1 -> text $ replicate len '=' ++ "\n" + 2 -> text $ replicate len '-' ++ "\n" + _ -> empty + else return $ text ((replicate level '#') ++ " ") <> contents <> text "\n" +blockToMarkdown opts (CodeBlock (_,classes,_) str) | "haskell" `elem` classes && + writerLiterateHaskell opts = + return $ (vcat $ map (text "> " <>) $ map text (lines str)) <> text "\n" +blockToMarkdown opts (CodeBlock _ str) = return $ + (nest (writerTabStop opts) $ vcat $ map text (lines str)) <> text "\n" +blockToMarkdown opts (BlockQuote blocks) = do + -- if we're writing literate haskell, put a space before the bird tracks + -- so they won't be interpreted as lhs... + let leader = if writerLiterateHaskell opts + then text . (" > " ++) + else text . ("> " ++) + contents <- blockListToMarkdown opts blocks + return $ (vcat $ map leader $ lines $ render contents) <> + text "\n" +blockToMarkdown opts (Table caption aligns widths headers rows) = do + caption' <- inlineListToMarkdown opts caption + let caption'' = if null caption + then empty + else text "" $+$ (text "Table: " <> caption') + headers' <- mapM (blockListToMarkdown opts) headers + let widthsInChars = map (floor . (78 *)) widths + let alignHeader alignment = case alignment of + AlignLeft -> leftAlignBlock + AlignCenter -> centerAlignBlock + AlignRight -> rightAlignBlock + AlignDefault -> leftAlignBlock + let makeRow = hsepBlocks . (zipWith alignHeader aligns) . + (zipWith docToBlock widthsInChars) + let head' = makeRow headers' + rows' <- mapM (\row -> do cols <- mapM (blockListToMarkdown opts) row + return $ makeRow cols) rows + let maxRowHeight = maximum $ map heightOfBlock (head':rows') + let isMultilineTable = maxRowHeight > 1 + let underline = hsep $ + map (\width -> text $ replicate width '-') widthsInChars + let border = if isMultilineTable + then text $ replicate (sum widthsInChars + (length widthsInChars - 1)) '-' + else empty + let spacer = if isMultilineTable + then text "" + else empty + let body = vcat $ intersperse spacer $ map blockToDoc rows' + return $ (nest 2 $ border $+$ (blockToDoc head') $+$ underline $+$ body $+$ + border $+$ caption'') <> text "\n" +blockToMarkdown opts (BulletList items) = do + contents <- mapM (bulletListItemToMarkdown opts) items + return $ (vcat contents) <> text "\n" +blockToMarkdown opts (OrderedList attribs items) = do + let markers = orderedListMarkers attribs + let markers' = map (\m -> if length m < 3 + then m ++ replicate (3 - length m) ' ' + else m) markers + contents <- mapM (\(item, num) -> orderedListItemToMarkdown opts item num) $ + zip markers' items + return $ (vcat contents) <> text "\n" +blockToMarkdown opts (DefinitionList items) = do + contents <- mapM (definitionListItemToMarkdown opts) items + return $ (vcat contents) <> text "\n" + +-- | Convert bullet list item (list of blocks) to markdown. +bulletListItemToMarkdown :: WriterOptions -> [Block] -> State WriterState Doc +bulletListItemToMarkdown opts items = do + contents <- blockListToMarkdown opts items + return $ hang' (text "- ") (writerTabStop opts) contents + +-- | Convert ordered list item (a list of blocks) to markdown. +orderedListItemToMarkdown :: WriterOptions -- ^ options + -> String -- ^ list item marker + -> [Block] -- ^ list item (list of blocks) + -> State WriterState Doc +orderedListItemToMarkdown opts marker items = do + contents <- blockListToMarkdown opts items + return $ hsep [nest (min (3 - length marker) 0) (text marker), + nest (writerTabStop opts) contents] + +-- | Convert definition list item (label, list of blocks) to markdown. +definitionListItemToMarkdown :: WriterOptions + -> ([Inline],[Block]) + -> State WriterState Doc +definitionListItemToMarkdown opts (label, items) = do + labelText <- inlineListToMarkdown opts label + let tabStop = writerTabStop opts + let leader = char ':' + contents <- mapM (\item -> blockToMarkdown opts item >>= + (\txt -> return (leader $$ nest tabStop txt))) + items >>= return . vcat + return $ labelText $+$ contents + +-- | Convert list of Pandoc block elements to markdown. +blockListToMarkdown :: WriterOptions -- ^ Options + -> [Block] -- ^ List of block elements + -> State WriterState Doc +blockListToMarkdown opts blocks = + mapM (blockToMarkdown opts) blocks >>= return . vcat + +-- | Get reference for target; if none exists, create unique one and return. +-- Prefer label if possible; otherwise, generate a unique key. +getReference :: [Inline] -> Target -> State WriterState [Inline] +getReference label (src, tit) = do + (_,refs) <- get + case find ((== (src, tit)) . snd) refs of + Just (ref, _) -> return ref + Nothing -> do + let label' = case find ((== label) . fst) refs of + Just _ -> -- label is used; generate numerical label + case find (\n -> not (any (== [Str (show n)]) + (map fst refs))) [1..(10000 :: Integer)] of + Just x -> [Str (show x)] + Nothing -> error "no unique label" + Nothing -> label + modify (\(notes, refs') -> (notes, (label', (src,tit)):refs')) + return label' + +-- | Convert list of Pandoc inline elements to markdown. +inlineListToMarkdown :: WriterOptions -> [Inline] -> State WriterState Doc +inlineListToMarkdown opts lst = + mapM (inlineToMarkdown opts) lst >>= return . hcat + +-- | Convert Pandoc inline element to markdown. +inlineToMarkdown :: WriterOptions -> Inline -> State WriterState Doc +inlineToMarkdown opts (Emph lst) = do + contents <- inlineListToMarkdown opts lst + return $ char '*' <> contents <> char '*' +inlineToMarkdown opts (Strong lst) = do + contents <- inlineListToMarkdown opts lst + return $ text "**" <> contents <> text "**" +inlineToMarkdown opts (Strikeout lst) = do + contents <- inlineListToMarkdown opts lst + return $ text "~~" <> contents <> text "~~" +inlineToMarkdown opts (Superscript lst) = do + contents <- inlineListToMarkdown opts lst + let contents' = text $ substitute " " "\\ " $ render contents + return $ char '^' <> contents' <> char '^' +inlineToMarkdown opts (Subscript lst) = do + contents <- inlineListToMarkdown opts lst + let contents' = text $ substitute " " "\\ " $ render contents + return $ char '~' <> contents' <> char '~' +inlineToMarkdown opts (SmallCaps lst) = inlineListToMarkdown opts lst +inlineToMarkdown opts (Quoted SingleQuote lst) = do + contents <- inlineListToMarkdown opts lst + return $ char '\'' <> contents <> char '\'' +inlineToMarkdown opts (Quoted DoubleQuote lst) = do + contents <- inlineListToMarkdown opts lst + return $ char '"' <> contents <> char '"' +inlineToMarkdown _ EmDash = return $ text "--" +inlineToMarkdown _ EnDash = return $ char '-' +inlineToMarkdown _ Apostrophe = return $ char '\'' +inlineToMarkdown _ Ellipses = return $ text "..." +inlineToMarkdown _ (Code str) = + let tickGroups = filter (\s -> '`' `elem` s) $ group str + longest = if null tickGroups + then 0 + else maximum $ map length tickGroups + marker = replicate (longest + 1) '`' + spacer = if (longest == 0) then "" else " " in + return $ text (marker ++ spacer ++ str ++ spacer ++ marker) +inlineToMarkdown _ (Str str) = return $ text $ escapeString str +inlineToMarkdown _ (Math InlineMath str) = return $ char '$' <> text str <> char '$' +inlineToMarkdown _ (Math DisplayMath str) = return $ text "$$" <> text str <> text "$$" +inlineToMarkdown _ (TeX str) = return $ text str +inlineToMarkdown _ (HtmlInline str) = return $ text str +inlineToMarkdown _ (LineBreak) = return $ text " \n" +inlineToMarkdown _ Space = return $ char ' ' +inlineToMarkdown _ (Cite cits _ ) = do + let format (a,b) xs = text a <> + (if b /= [] then char '@' else empty) <> + text b <> + (if isEmpty xs then empty else text "; ") <> + xs + return $ char '[' <> foldr format empty cits <> char ']' +inlineToMarkdown opts (Link txt (src, tit)) = do + linktext <- inlineListToMarkdown opts txt + let linktitle = if null tit then empty else text $ " \"" ++ tit ++ "\"" + let srcSuffix = if isPrefixOf "mailto:" src then drop 7 src else src + let useRefLinks = writerReferenceLinks opts + let useAuto = null tit && txt == [Code srcSuffix] + ref <- if useRefLinks then getReference txt (src, tit) else return [] + reftext <- inlineListToMarkdown opts ref + return $ if useAuto + then char '<' <> text srcSuffix <> char '>' + else if useRefLinks + then let first = char '[' <> linktext <> char ']' + second = if txt == ref + then text "[]" + else char '[' <> reftext <> char ']' + in first <> second + else char '[' <> linktext <> char ']' <> + char '(' <> text src <> linktitle <> char ')' +inlineToMarkdown opts (Image alternate (source, tit)) = do + let txt = if (null alternate) || (alternate == [Str ""]) || + (alternate == [Str source]) -- to prevent autolinks + then [Str "image"] + else alternate + linkPart <- inlineToMarkdown opts (Link txt (source, tit)) + return $ char '!' <> linkPart +inlineToMarkdown _ (Note contents) = do + modify (\(notes, refs) -> (contents:notes, refs)) -- add to notes in state + (notes, _) <- get + let ref = show $ (length notes) + return $ text "[^" <> text ref <> char ']' diff --git a/src/Text/Pandoc/Writers/MediaWiki.hs b/src/Text/Pandoc/Writers/MediaWiki.hs new file mode 100644 index 000000000..c5f6b3bf1 --- /dev/null +++ b/src/Text/Pandoc/Writers/MediaWiki.hs @@ -0,0 +1,396 @@ +{- +Copyright (C) 2008 John MacFarlane + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Writers.MediaWiki + Copyright : Copyright (C) 2006-7 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane + Stability : alpha + Portability : portable + +Conversion of 'Pandoc' documents to MediaWiki markup. + +MediaWiki: +-} +module Text.Pandoc.Writers.MediaWiki ( writeMediaWiki ) where +import Text.Pandoc.Definition +import Text.Pandoc.Shared +import Text.Pandoc.XML ( escapeStringForXML ) +import Data.List ( intersect ) +import Network.URI ( isURI ) +import Control.Monad.State + +data WriterState = WriterState { + stNotes :: Bool -- True if there are notes + , stListLevel :: [Char] -- String at beginning of list items, e.g. "**" + , stUseTags :: Bool -- True if we should use HTML tags because we're in a complex list + } + +-- | Convert Pandoc to MediaWiki. +writeMediaWiki :: WriterOptions -> Pandoc -> String +writeMediaWiki opts document = + evalState (pandocToMediaWiki opts document) + (WriterState { stNotes = False, stListLevel = [], stUseTags = False }) + +-- | Return MediaWiki representation of document. +pandocToMediaWiki :: WriterOptions -> Pandoc -> State WriterState String +pandocToMediaWiki opts (Pandoc _ blocks) = do + let before = writerIncludeBefore opts + let after = writerIncludeAfter opts + let head' = if writerStandalone opts + then writerHeader opts + else "" + let toc = if writerTableOfContents opts + then "__TOC__\n" + else "" + body <- blockListToMediaWiki opts blocks + notesExist <- get >>= return . stNotes + let notes = if notesExist + then "\n== Notes ==\n" + else "" + return $ head' ++ before ++ toc ++ body ++ after ++ notes + +-- | Escape special characters for MediaWiki. +escapeString :: String -> String +escapeString = escapeStringForXML + +-- | Convert Pandoc block element to MediaWiki. +blockToMediaWiki :: WriterOptions -- ^ Options + -> Block -- ^ Block element + -> State WriterState String + +blockToMediaWiki _ Null = return "" + +blockToMediaWiki opts (Plain inlines) = + inlineListToMediaWiki opts inlines + +blockToMediaWiki opts (Para inlines) = do + useTags <- get >>= return . stUseTags + listLevel <- get >>= return . stListLevel + contents <- inlineListToMediaWiki opts inlines + return $ if useTags + then "

    " ++ contents ++ "

    " + else contents ++ if null listLevel then "\n" else "" + +blockToMediaWiki _ (RawHtml str) = return str + +blockToMediaWiki _ HorizontalRule = return "\n-----\n" + +blockToMediaWiki opts (Header level inlines) = do + contents <- inlineListToMediaWiki opts inlines + let eqs = replicate (level + 1) '=' + return $ eqs ++ " " ++ contents ++ " " ++ eqs ++ "\n" + +blockToMediaWiki _ (CodeBlock (_,classes,_) str) = do + let at = classes `intersect` ["actionscript", "ada", "apache", "applescript", "asm", "asp", + "autoit", "bash", "blitzbasic", "bnf", "c", "c_mac", "caddcl", "cadlisp", "cfdg", "cfm", + "cpp", "cpp-qt", "csharp", "css", "d", "delphi", "diff", "div", "dos", "eiffel", "fortran", + "freebasic", "gml", "groovy", "html4strict", "idl", "ini", "inno", "io", "java", "java5", + "javascript", "latex", "lisp", "lua", "matlab", "mirc", "mpasm", "mysql", "nsis", "objc", + "ocaml", "ocaml-brief", "oobas", "oracle8", "pascal", "perl", "php", "php-brief", "plsql", + "python", "qbasic", "rails", "reg", "robots", "ruby", "sas", "scheme", "sdlbasic", + "smalltalk", "smarty", "sql", "tcl", "", "thinbasic", "tsql", "vb", "vbnet", "vhdl", + "visualfoxpro", "winbatch", "xml", "xpp", "z80"] + let (beg, end) = if null at + then ("" else " class=\"" ++ unwords classes ++ "\">", "") + else ("", "") + return $ beg ++ escapeString str ++ end + +blockToMediaWiki opts (BlockQuote blocks) = do + contents <- blockListToMediaWiki opts blocks + return $ "
    " ++ contents ++ "
    " + +blockToMediaWiki opts (Table caption aligns widths headers rows) = do + let alignStrings = map alignmentToString aligns + captionDoc <- if null caption + then return "" + else do + c <- inlineListToMediaWiki opts caption + return $ "" ++ c ++ "" + colHeads <- colHeadsToMediaWiki opts alignStrings widths headers + rows' <- mapM (tableRowToMediaWiki opts alignStrings) rows + return $ "\n" ++ captionDoc ++ colHeads ++ vcat rows' ++ "\n
    " + +blockToMediaWiki opts x@(BulletList items) = do + oldUseTags <- get >>= return . stUseTags + let useTags = oldUseTags || not (isSimpleList x) + if useTags + then do + modify $ \s -> s { stUseTags = True } + contents <- mapM (listItemToMediaWiki opts) items + modify $ \s -> s { stUseTags = oldUseTags } + return $ "
      \n" ++ vcat contents ++ "
    \n" + else do + modify $ \s -> s { stListLevel = stListLevel s ++ "*" } + contents <- mapM (listItemToMediaWiki opts) items + modify $ \s -> s { stListLevel = init (stListLevel s) } + return $ vcat contents + +blockToMediaWiki opts x@(OrderedList attribs items) = do + oldUseTags <- get >>= return . stUseTags + let useTags = oldUseTags || not (isSimpleList x) + if useTags + then do + modify $ \s -> s { stUseTags = True } + contents <- mapM (listItemToMediaWiki opts) items + modify $ \s -> s { stUseTags = oldUseTags } + return $ "\n" ++ vcat contents ++ "\n" + else do + modify $ \s -> s { stListLevel = stListLevel s ++ "#" } + contents <- mapM (listItemToMediaWiki opts) items + modify $ \s -> s { stListLevel = init (stListLevel s) } + return $ vcat contents + +blockToMediaWiki opts x@(DefinitionList items) = do + oldUseTags <- get >>= return . stUseTags + let useTags = oldUseTags || not (isSimpleList x) + if useTags + then do + modify $ \s -> s { stUseTags = True } + contents <- mapM (definitionListItemToMediaWiki opts) items + modify $ \s -> s { stUseTags = oldUseTags } + return $ "
    \n" ++ vcat contents ++ "
    \n" + else do + modify $ \s -> s { stListLevel = stListLevel s ++ ";" } + contents <- mapM (definitionListItemToMediaWiki opts) items + modify $ \s -> s { stListLevel = init (stListLevel s) } + return $ vcat contents + +-- Auxiliary functions for lists: + +-- | Convert ordered list attributes to HTML attribute string +listAttribsToString :: ListAttributes -> String +listAttribsToString (startnum, numstyle, _) = + let numstyle' = camelCaseToHyphenated $ show numstyle + in (if startnum /= 1 + then " start=\"" ++ show startnum ++ "\"" + else "") ++ + (if numstyle /= DefaultStyle + then " style=\"list-style-type: " ++ numstyle' ++ ";\"" + else "") + +-- | Convert bullet or ordered list item (list of blocks) to MediaWiki. +listItemToMediaWiki :: WriterOptions -> [Block] -> State WriterState String +listItemToMediaWiki opts items = do + contents <- blockListToMediaWiki opts items + useTags <- get >>= return . stUseTags + if useTags + then return $ "
  • " ++ contents ++ "
  • " + else do + marker <- get >>= return . stListLevel + return $ marker ++ " " ++ contents + +-- | Convert definition list item (label, list of blocks) to MediaWiki. +definitionListItemToMediaWiki :: WriterOptions + -> ([Inline],[Block]) + -> State WriterState String +definitionListItemToMediaWiki opts (label, items) = do + labelText <- inlineListToMediaWiki opts label + contents <- blockListToMediaWiki opts items + useTags <- get >>= return . stUseTags + if useTags + then return $ "
    " ++ labelText ++ "
    \n
    " ++ contents ++ "
    " + else do + marker <- get >>= return . stListLevel + return $ marker ++ " " ++ labelText ++ "\n" ++ (init marker ++ ": ") ++ contents + +-- | True if the list can be handled by simple wiki markup, False if HTML tags will be needed. +isSimpleList :: Block -> Bool +isSimpleList x = + case x of + BulletList items -> all isSimpleListItem items + OrderedList (num, sty, _) items -> all isSimpleListItem items && + num == 1 && sty `elem` [DefaultStyle, Decimal] + DefinitionList items -> all isSimpleListItem $ map snd items + _ -> False + +-- | True if list item can be handled with the simple wiki syntax. False if +-- HTML tags will be needed. +isSimpleListItem :: [Block] -> Bool +isSimpleListItem [] = True +isSimpleListItem [x] = + case x of + Plain _ -> True + Para _ -> True + BulletList _ -> isSimpleList x + OrderedList _ _ -> isSimpleList x + DefinitionList _ -> isSimpleList x + _ -> False +isSimpleListItem [x, y] | isPlainOrPara x = + case y of + BulletList _ -> isSimpleList y + OrderedList _ _ -> isSimpleList y + DefinitionList _ -> isSimpleList y + _ -> False +isSimpleListItem _ = False + +isPlainOrPara :: Block -> Bool +isPlainOrPara (Plain _) = True +isPlainOrPara (Para _) = True +isPlainOrPara _ = False + +tr :: String -> String +tr x = "\n" ++ x ++ "\n" + +-- | Concatenates strings with line breaks between them. +vcat :: [String] -> String +vcat [] = "" +vcat [x] = x +vcat (x:xs) = x ++ "\n" ++ vcat xs + +-- Auxiliary functions for tables: + +colHeadsToMediaWiki :: WriterOptions + -> [[Char]] + -> [Double] + -> [[Block]] + -> State WriterState String +colHeadsToMediaWiki opts alignStrings widths headers = do + heads <- sequence $ zipWith3 + (\alignment columnwidth item -> tableItemToMediaWiki opts "th" alignment columnwidth item) + alignStrings widths headers + return $ tr $ vcat heads + +alignmentToString :: Alignment -> [Char] +alignmentToString alignment = case alignment of + AlignLeft -> "left" + AlignRight -> "right" + AlignCenter -> "center" + AlignDefault -> "left" + +tableRowToMediaWiki :: WriterOptions + -> [[Char]] + -> [[Block]] + -> State WriterState String +tableRowToMediaWiki opts aligns columns = + (sequence $ zipWith3 (tableItemToMediaWiki opts "td") aligns (repeat 0) columns) >>= + return . tr . vcat + +tableItemToMediaWiki :: WriterOptions + -> [Char] + -> [Char] + -> Double + -> [Block] + -> State WriterState String +tableItemToMediaWiki opts tag' align' width' item = do + contents <- blockListToMediaWiki opts item + let attrib = " align=\"" ++ align' ++ "\"" ++ + if width' /= 0 + then " style=\"width: " ++ (show (truncate (100 * width') :: Integer)) ++ "%;\"" + else "" + return $ "<" ++ tag' ++ attrib ++ ">" ++ contents ++ "" + +-- | Convert list of Pandoc block elements to MediaWiki. +blockListToMediaWiki :: WriterOptions -- ^ Options + -> [Block] -- ^ List of block elements + -> State WriterState String +blockListToMediaWiki opts blocks = + mapM (blockToMediaWiki opts) blocks >>= return . vcat + +-- | Convert list of Pandoc inline elements to MediaWiki. +inlineListToMediaWiki :: WriterOptions -> [Inline] -> State WriterState String +inlineListToMediaWiki opts lst = + mapM (inlineToMediaWiki opts) lst >>= return . concat + +-- | Convert Pandoc inline element to MediaWiki. +inlineToMediaWiki :: WriterOptions -> Inline -> State WriterState String + +inlineToMediaWiki opts (Emph lst) = do + contents <- inlineListToMediaWiki opts lst + return $ "''" ++ contents ++ "''" + +inlineToMediaWiki opts (Strong lst) = do + contents <- inlineListToMediaWiki opts lst + return $ "'''" ++ contents ++ "'''" + +inlineToMediaWiki opts (Strikeout lst) = do + contents <- inlineListToMediaWiki opts lst + return $ "" ++ contents ++ "" + +inlineToMediaWiki opts (Superscript lst) = do + contents <- inlineListToMediaWiki opts lst + return $ "" ++ contents ++ "" + +inlineToMediaWiki opts (Subscript lst) = do + contents <- inlineListToMediaWiki opts lst + return $ "" ++ contents ++ "" + +inlineToMediaWiki opts (SmallCaps lst) = inlineListToMediaWiki opts lst + +inlineToMediaWiki opts (Quoted SingleQuote lst) = do + contents <- inlineListToMediaWiki opts lst + return $ "‘" ++ contents ++ "’" + +inlineToMediaWiki opts (Quoted DoubleQuote lst) = do + contents <- inlineListToMediaWiki opts lst + return $ "“" ++ contents ++ "”" + +inlineToMediaWiki opts (Cite _ lst) = inlineListToMediaWiki opts lst + +inlineToMediaWiki _ EmDash = return "—" + +inlineToMediaWiki _ EnDash = return "–" + +inlineToMediaWiki _ Apostrophe = return "’" + +inlineToMediaWiki _ Ellipses = return "…" + +inlineToMediaWiki _ (Code str) = + return $ "" ++ (escapeString str) ++ "" + +inlineToMediaWiki _ (Str str) = return $ escapeString str + +inlineToMediaWiki _ (Math _ str) = return $ "" ++ str ++ "" + -- note: str should NOT be escaped + +inlineToMediaWiki _ (TeX _) = return "" + +inlineToMediaWiki _ (HtmlInline str) = return str + +inlineToMediaWiki _ (LineBreak) = return "
    \n" + +inlineToMediaWiki _ Space = return " " + +inlineToMediaWiki opts (Link txt (src, _)) = do + link <- inlineListToMediaWiki opts txt + let useAuto = txt == [Code src] + let src' = if isURI src + then src + else if take 1 src == "/" + then "http://{{SERVERNAME}}" ++ src + else "http://{{SERVERNAME}}/" ++ src + return $ if useAuto + then src' + else "[" ++ src' ++ " " ++ link ++ "]" + +inlineToMediaWiki opts (Image alt (source, tit)) = do + alt' <- inlineListToMediaWiki opts alt + let txt = if (null tit) + then if null alt + then "" + else "|" ++ alt' + else "|" ++ tit + return $ "[[Image:" ++ source ++ txt ++ "]]" + +inlineToMediaWiki opts (Note contents) = do + contents' <- blockListToMediaWiki opts contents + modify (\s -> s { stNotes = True }) + return $ "" ++ contents' ++ "" + -- note - may not work for notes with multiple blocks diff --git a/src/Text/Pandoc/Writers/OpenDocument.hs b/src/Text/Pandoc/Writers/OpenDocument.hs new file mode 100644 index 000000000..52438f81e --- /dev/null +++ b/src/Text/Pandoc/Writers/OpenDocument.hs @@ -0,0 +1,568 @@ +{-# LANGUAGE PatternGuards #-} +{- +Copyright (C) 2008 Andrea Rossato + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Writers.OpenDocument + Copyright : Copyright (C) 2008 Andrea Rossato + License : GNU GPL, version 2 or above + + Maintainer : Andrea Rossato + Stability : alpha + Portability : portable + +Conversion of 'Pandoc' documents to OpenDocument XML. +-} +module Text.Pandoc.Writers.OpenDocument ( writeOpenDocument ) where +import Text.Pandoc.Definition +import Text.Pandoc.Shared +import Text.Pandoc.XML +import Text.Pandoc.Readers.TeXMath +import Text.PrettyPrint.HughesPJ hiding ( Str ) +import Text.Printf ( printf ) +import Control.Applicative ( (<$>) ) +import Control.Arrow ( (***), (>>>) ) +import Control.Monad.State hiding ( when ) +import Data.Char (chr) +import Data.List (intercalate) + +-- | Auxiliary function to convert Plain block to Para. +plainToPara :: Block -> Block +plainToPara (Plain x) = Para x +plainToPara x = x + +-- +-- OpenDocument writer +-- + +data WriterState = + WriterState { stNotes :: [Doc] + , stTableStyles :: [Doc] + , stParaStyles :: [Doc] + , stListStyles :: [(Int, [Doc])] + , stTextStyles :: [Doc] + , stTextStyleAttr :: [(TextStyle,[(String,String)])] + , stIndentPara :: Int + , stInDefinition :: Bool + , stTight :: Bool + } + +defaultWriterState :: WriterState +defaultWriterState = + WriterState { stNotes = [] + , stTableStyles = [] + , stParaStyles = [] + , stListStyles = [] + , stTextStyles = [] + , stTextStyleAttr = [] + , stIndentPara = 0 + , stInDefinition = False + , stTight = False + } + +when :: Bool -> Doc -> Doc +when p a = if p then a else empty + +addTableStyle :: Doc -> State WriterState () +addTableStyle i = modify $ \s -> s { stTableStyles = i : stTableStyles s } + +addNote :: Doc -> State WriterState () +addNote i = modify $ \s -> s { stNotes = i : stNotes s } + +addParaStyle :: Doc -> State WriterState () +addParaStyle i = modify $ \s -> s { stParaStyles = i : stParaStyles s } + +addTextStyle :: Doc -> State WriterState () +addTextStyle i = modify $ \s -> s { stTextStyles = i : stTextStyles s } + +addTextStyleAttr :: (TextStyle, [(String,String)]) -> State WriterState () +addTextStyleAttr i = modify $ \s -> s { stTextStyleAttr = i : stTextStyleAttr s } + +rmTextStyleAttr :: State WriterState () +rmTextStyleAttr = modify $ \s -> s { stTextStyleAttr = rmHead (stTextStyleAttr s) } + where rmHead l = if l /= [] then tail l else [] + +increaseIndent :: State WriterState () +increaseIndent = modify $ \s -> s { stIndentPara = 1 + stIndentPara s } + +resetIndent :: State WriterState () +resetIndent = modify $ \s -> s { stIndentPara = (stIndentPara s) - 1 } + +inTightList :: State WriterState a -> State WriterState a +inTightList f = modify (\s -> s { stTight = True }) >> f >>= \r -> + modify (\s -> s { stTight = False }) >> return r + +setInDefinitionList :: Bool -> State WriterState () +setInDefinitionList b = modify $ \s -> s { stInDefinition = b } + +inParagraphTags :: Doc -> Doc +inParagraphTags = inTags False "text:p" [("text:style-name", "Text_20_body")] + +inParagraphTagsWithStyle :: String -> Doc -> Doc +inParagraphTagsWithStyle sty = inTags False "text:p" [("text:style-name", sty)] + +inSpanTags :: String -> Doc -> Doc +inSpanTags s = inTags False "text:span" [("text:style-name",s)] + +withTextStyle :: TextStyle -> State WriterState a -> State WriterState a +withTextStyle s f = addTextStyleAttr (s,textStyleAttr s) >> + f >>= \r -> rmTextStyleAttr >> return r + +inTextStyle :: Doc -> State WriterState Doc +inTextStyle d = do + at <- gets stTextStyleAttr + if at == [] + then return d + else do + tn <- (+) 1 . length <$> gets stTextStyles + addTextStyle $ inTags False "style:style" [("style:name" , "T" ++ show tn) + ,("style:family", "text" )] + $ selfClosingTag "style:text-properties" (concatMap snd at) + return $ inTags False "text:span" [("text:style-name","T" ++ show tn)] d + +inHeaderTags :: Int -> Doc -> Doc +inHeaderTags i = inTags False "text:h" [ ("text:style-name", "Heading_20_" ++ show i) + , ("text:outline-level", show i)] + +inQuotes :: QuoteType -> Doc -> Doc +inQuotes SingleQuote s = text "‘" <> s <> text "’" +inQuotes DoubleQuote s = text "“" <> s <> text "”" + +handleSpaces :: String -> Doc +handleSpaces s + | ( ' ':_) <- s = genTag s + | ('\t':x) <- s = selfClosingTag "text:tab" [] <> rm x + | otherwise = rm s + where + genTag = span (==' ') >>> tag . length *** rm >>> uncurry (<>) + tag n = when (n /= 0) $ selfClosingTag "text:s" [("text:c", show n)] + rm ( ' ':xs) = char ' ' <> genTag xs + rm ('\t':xs) = selfClosingTag "text:tab" [] <> genTag xs + rm ( x:xs) = char x <> rm xs + rm [] = empty + +-- | Convert list of authors to a docbook section +authorToOpenDocument :: [Char] -> Doc +authorToOpenDocument name = + if ',' `elem` name + then -- last name first + let (lastname, rest) = break (==',') name + firstname = removeLeadingSpace rest + in inParagraphTagsWithStyle "Author" $ + (text $ escapeStringForXML firstname) <+> + (text $ escapeStringForXML lastname) + else -- last name last + let namewords = words name + lengthname = length namewords + (firstname, lastname) = case lengthname of + 0 -> ("","") + 1 -> ("", name) + n -> (intercalate " " (take (n-1) namewords), last namewords) + in inParagraphTagsWithStyle "Author" $ + (text $ escapeStringForXML firstname) <+> + (text $ escapeStringForXML lastname) + +-- | Convert Pandoc document to string in OpenDocument format. +writeOpenDocument :: WriterOptions -> Pandoc -> String +writeOpenDocument opts (Pandoc (Meta title authors date) blocks) = + let root = inTags True "office:document-content" openDocumentNameSpaces + header = when (writerStandalone opts) $ text (writerHeader opts) + title' = case runState (wrap opts title) defaultWriterState of + (t,_) -> if isEmpty t then empty else inHeaderTags 1 t + authors' = when (authors /= []) $ vcat (map authorToOpenDocument authors) + date' = when (date /= []) $ + inParagraphTagsWithStyle "Date" (text $ escapeStringForXML date) + meta = when (writerStandalone opts) $ title' $$ authors' $$ date' + before = writerIncludeBefore opts + after = writerIncludeAfter opts + (doc, s) = runState (blocksToOpenDocument opts blocks) defaultWriterState + body = (if null before then empty else text before) $$ + doc $$ + (if null after then empty else text after) + body' = if writerStandalone opts + then inTagsIndented "office:body" $ + inTagsIndented "office:text" (meta $$ body) + else body + styles = stTableStyles s ++ stParaStyles s ++ stTextStyles s + listStyle (n,l) = inTags True "text:list-style" [("style:name", "L" ++ show n)] (vcat l) + listStyles = map listStyle (stListStyles s) + in render $ header $$ root (generateStyles (styles ++ listStyles) $$ body' $$ text "") + +withParagraphStyle :: WriterOptions -> String -> [Block] -> State WriterState Doc +withParagraphStyle o s (b:bs) + | Para l <- b = go =<< inParagraphTagsWithStyle s <$> inlinesToOpenDocument o l + | otherwise = go =<< blockToOpenDocument o b + where go i = ($$) i <$> withParagraphStyle o s bs +withParagraphStyle _ _ [] = return empty + +inPreformattedTags :: String -> State WriterState Doc +inPreformattedTags s = do + n <- paraStyle "Preformatted_20_Text" [] + return . inParagraphTagsWithStyle ("P" ++ show n) . handleSpaces $ s + +orderedListToOpenDocument :: WriterOptions -> Int -> [[Block]] -> State WriterState Doc +orderedListToOpenDocument o pn bs = + vcat . map (inTagsIndented "text:list-item") <$> + mapM (orderedItemToOpenDocument o pn . map plainToPara) bs + +orderedItemToOpenDocument :: WriterOptions -> Int -> [Block] -> State WriterState Doc +orderedItemToOpenDocument o n (b:bs) + | OrderedList a l <- b = newLevel a l + | Para l <- b = go =<< inParagraphTagsWithStyle ("P" ++ show n) <$> inlinesToOpenDocument o l + | otherwise = go =<< blockToOpenDocument o b + where + go i = ($$) i <$> orderedItemToOpenDocument o n bs + newLevel a l = do + nn <- length <$> gets stParaStyles + ls <- head <$> gets stListStyles + modify $ \s -> s { stListStyles = orderedListLevelStyle a ls : tail (stListStyles s) } + inTagsIndented "text:list" <$> orderedListToOpenDocument o nn l +orderedItemToOpenDocument _ _ [] = return empty + +isTightList :: [[Block]] -> Bool +isTightList [] = False +isTightList (b:_) + | Plain {} : _ <- b = True + | otherwise = False + +newOrderedListStyle :: Bool -> ListAttributes -> State WriterState (Int,Int) +newOrderedListStyle b a = do + ln <- (+) 1 . length <$> gets stListStyles + let nbs = orderedListLevelStyle a (ln, []) + pn <- if b then inTightList (paraListStyle ln) else paraListStyle ln + modify $ \s -> s { stListStyles = nbs : stListStyles s } + return (ln,pn) + +bulletListToOpenDocument :: WriterOptions -> [[Block]] -> State WriterState Doc +bulletListToOpenDocument o b = do + ln <- (+) 1 . length <$> gets stListStyles + (pn,ns) <- if isTightList b then inTightList (bulletListStyle ln) else bulletListStyle ln + modify $ \s -> s { stListStyles = ns : stListStyles s } + is <- listItemsToOpenDocument ("P" ++ show pn) o b + return $ inTags True "text:list" [("text:style-name", "L" ++ show ln)] is + +listItemsToOpenDocument :: String -> WriterOptions -> [[Block]] -> State WriterState Doc +listItemsToOpenDocument s o is = + vcat . map (inTagsIndented "text:list-item") <$> mapM (withParagraphStyle o s . map plainToPara) is + +deflistItemToOpenDocument :: WriterOptions -> ([Inline],[Block]) -> State WriterState Doc +deflistItemToOpenDocument o (t,d) = do + let ts = if isTightList [d] + then "Definition_20_Term_20_Tight" else "Definition_20_Term" + ds = if isTightList [d] + then "Definition_20_Definition_20_Tight" else "Definition_20_Definition" + t' <- withParagraphStyle o ts [Para t] + d' <- withParagraphStyle o ds (map plainToPara d) + return $ t' $$ d' + +inBlockQuote :: WriterOptions -> Int -> [Block] -> State WriterState Doc +inBlockQuote o i (b:bs) + | BlockQuote l <- b = do increaseIndent + ni <- paraStyle "Quotations" [] + go =<< inBlockQuote o ni (map plainToPara l) + | Para l <- b = do go =<< inParagraphTagsWithStyle ("P" ++ show i) <$> inlinesToOpenDocument o l + | otherwise = do go =<< blockToOpenDocument o b + where go block = ($$) block <$> inBlockQuote o i bs +inBlockQuote _ _ [] = resetIndent >> return empty + +-- | Convert a list of Pandoc blocks to OpenDocument. +blocksToOpenDocument :: WriterOptions -> [Block] -> State WriterState Doc +blocksToOpenDocument o b = vcat <$> mapM (blockToOpenDocument o) b + +-- | Convert a Pandoc block element to OpenDocument. +blockToOpenDocument :: WriterOptions -> Block -> State WriterState Doc +blockToOpenDocument o bs + | Plain b <- bs = inParagraphTags <$> wrap o b + | Para b <- bs = inParagraphTags <$> wrap o b + | Header i b <- bs = inHeaderTags i <$> wrap o b + | BlockQuote b <- bs = mkBlockQuote b + | CodeBlock _ s <- bs = preformatted s + | RawHtml _ <- bs = return empty + | DefinitionList b <- bs = defList b + | BulletList b <- bs = bulletListToOpenDocument o b + | OrderedList a b <- bs = orderedList a b + | Table c a w h r <- bs = table c a w h r + | Null <- bs = return empty + | HorizontalRule <- bs = return $ selfClosingTag "text:p" [ ("text:style-name", "Horizontal_20_Line") ] + | otherwise = return empty + where + defList b = do setInDefinitionList True + r <- vcat <$> mapM (deflistItemToOpenDocument o) b + setInDefinitionList False + return r + preformatted s = vcat <$> mapM (inPreformattedTags . escapeStringForXML) (lines s) + mkBlockQuote b = do increaseIndent + i <- paraStyle "Quotations" [] + inBlockQuote o i (map plainToPara b) + orderedList a b = do (ln,pn) <- newOrderedListStyle (isTightList b) a + inTags True "text:list" [ ("text:style-name", "L" ++ show ln)] + <$> orderedListToOpenDocument o pn b + table c a w h r = do + tn <- length <$> gets stTableStyles + pn <- length <$> gets stParaStyles + let genIds = map chr [65..] + name = "Table" ++ show (tn + 1) + columnIds = zip genIds w + mkColumn n = selfClosingTag "table:table-column" [("table:style-name", name ++ "." ++ [fst n])] + columns = map mkColumn columnIds + paraHStyles = paraTableStyles "Heading" pn a + paraStyles = paraTableStyles "Contents" (pn + length (newPara paraHStyles)) a + newPara = map snd . filter (not . isEmpty . snd) + addTableStyle $ tableStyle tn columnIds + mapM_ addParaStyle . newPara $ paraHStyles ++ paraStyles + captionDoc <- if null c + then return empty + else withParagraphStyle o "Caption" [Para c] + th <- colHeadsToOpenDocument o name (map fst paraHStyles) h + tr <- mapM (tableRowToOpenDocument o name (map fst paraStyles)) r + return $ inTags True "table:table" [ ("table:name" , name) + , ("table:style-name", name) + ] (vcat columns $$ th $$ vcat tr) $$ captionDoc + +colHeadsToOpenDocument :: WriterOptions -> String -> [String] -> [[Block]] -> State WriterState Doc +colHeadsToOpenDocument o tn ns hs = + inTagsIndented "table:table-header-rows" . inTagsIndented "table:table-row" . vcat <$> + mapM (tableItemToOpenDocument o tn) (zip ns hs) + +tableRowToOpenDocument :: WriterOptions -> String -> [String] -> [[Block]] -> State WriterState Doc +tableRowToOpenDocument o tn ns cs = + inTagsIndented "table:table-row" . vcat <$> + mapM (tableItemToOpenDocument o tn) (zip ns cs) + +tableItemToOpenDocument :: WriterOptions -> String -> (String,[Block])-> State WriterState Doc +tableItemToOpenDocument o tn (n,i) = + let a = [ ("table:style-name" , tn ++ ".A1" ) + , ("office:value-type", "string" ) + ] + in inTags True "table:table-cell" a <$> + withParagraphStyle o n (map plainToPara i) + +-- | Take list of inline elements and return wrapped doc. +wrap :: WriterOptions -> [Inline] -> State WriterState Doc +wrap o l = if writerWrapText o + then fsep <$> mapM (inlinesToOpenDocument o) (splitBy Space l) + else inlinesToOpenDocument o l + +-- | Convert a list of inline elements to OpenDocument. +inlinesToOpenDocument :: WriterOptions -> [Inline] -> State WriterState Doc +inlinesToOpenDocument o l = hcat <$> mapM (inlineToOpenDocument o) l + +-- | Convert an inline element to OpenDocument. +inlineToOpenDocument :: WriterOptions -> Inline -> State WriterState Doc +inlineToOpenDocument o ils + | Ellipses <- ils = inTextStyle $ text "…" + | EmDash <- ils = inTextStyle $ text "—" + | EnDash <- ils = inTextStyle $ text "–" + | Apostrophe <- ils = inTextStyle $ text "’" + | Space <- ils = inTextStyle $ char ' ' + | LineBreak <- ils = return $ selfClosingTag "text:line-break" [] + | Str s <- ils = inTextStyle $ handleSpaces $ escapeStringForXML s + | Emph l <- ils = withTextStyle Italic $ inlinesToOpenDocument o l + | Strong l <- ils = withTextStyle Bold $ inlinesToOpenDocument o l + | Strikeout l <- ils = withTextStyle Strike $ inlinesToOpenDocument o l + | Superscript l <- ils = withTextStyle Sup $ inlinesToOpenDocument o l + | Subscript l <- ils = withTextStyle Sub $ inlinesToOpenDocument o l + | SmallCaps l <- ils = withTextStyle SmallC $ inlinesToOpenDocument o l + | Quoted t l <- ils = inQuotes t <$> inlinesToOpenDocument o l + | Code s <- ils = preformatted s + | Math _ s <- ils = inlinesToOpenDocument o (readTeXMath s) + | Cite _ l <- ils = inlinesToOpenDocument o l + | TeX s <- ils = preformatted s + | HtmlInline s <- ils = preformatted s + | Link l (s,t) <- ils = mkLink s t <$> inlinesToOpenDocument o l + | Image _ (s,_) <- ils = return $ mkImg s + | Note l <- ils = mkNote l + | otherwise = return empty + where + preformatted = return . inSpanTags "Teletype" . handleSpaces . escapeStringForXML + mkLink s t = inTags False "text:a" [ ("xlink:type" , "simple") + , ("xlink:href" , s ) + , ("office:name", t ) + ] . inSpanTags "Definition" + mkImg s = inTags False "draw:frame" [] $ + selfClosingTag "draw:image" [ ("xlink:href" , s ) + , ("xlink:type" , "simple") + , (" xlink:show" , "embed" ) + , ("xlink:actuate", "onLoad")] + mkNote l = do + n <- length <$> gets stNotes + let footNote t = inTags False "text:note" + [ ("text:id" , "ftn" ++ show n) + , ("text:note-class", "footnote" )] $ + inTagsSimple "text:note-citation" (text . show $ n + 1) $$ + inTagsSimple "text:note-body" t + nn <- footNote <$> withParagraphStyle o "Footnote" l + addNote nn + return nn + +generateStyles :: [Doc] -> Doc +generateStyles acc = + let scripts = selfClosingTag "office:scripts" [] + fonts = inTagsIndented "office:font-face-decls" + (vcat $ map font ["Lucida Sans Unicode", "Tahoma", "Times New Roman"]) + font fn = selfClosingTag "style:font-face" + [ ("style:name" , "'" ++ fn ++ "'") + , ("svg:font-family", fn )] + in scripts $$ fonts $$ inTagsIndented "office:automatic-styles" (vcat $ reverse acc) + +bulletListStyle :: Int -> State WriterState (Int,(Int,[Doc])) +bulletListStyle l = + let doStyles i = inTags True "text:list-level-style-bullet" + [ ("text:level" , show (i + 1) ) + , ("text:style-name" , "Bullet_20_Symbols") + , ("style:num-suffix", "." ) + , ("text:bullet-char", [bulletList !! i] ) + ] (listLevelStyle (1 + i)) + bulletList = map chr $ cycle [8226,8227,8259] + listElStyle = map doStyles [0..9] + in do pn <- paraListStyle l + return (pn, (l, listElStyle)) + +orderedListLevelStyle :: ListAttributes -> (Int, [Doc]) -> (Int,[Doc]) +orderedListLevelStyle (s,n, d) (l,ls) = + let suffix = case d of + OneParen -> [("style:num-suffix", ")")] + TwoParens -> [("style:num-prefix", "(") + ,("style:num-suffix", ")")] + _ -> [("style:num-suffix", ".")] + format = case n of + UpperAlpha -> "A" + LowerAlpha -> "a" + UpperRoman -> "I" + LowerRoman -> "i" + _ -> "1" + listStyle = inTags True "text:list-level-style-number" + ([ ("text:level" , show $ 1 + length ls ) + , ("text:style-name" , "Numbering_20_Symbols") + , ("style:num-format", format ) + , ("text:start-value", show s ) + ] ++ suffix) (listLevelStyle (1 + length ls)) + in (l, ls ++ [listStyle]) + +listLevelStyle :: Int -> Doc +listLevelStyle i = + let indent = show (0.25 * fromIntegral i :: Double) in + selfClosingTag "style:list-level-properties" + [ ("text:space-before" , indent ++ "in") + , ("text:min-label-width", "0.25in")] + +tableStyle :: Int -> [(Char,Double)] -> Doc +tableStyle num wcs = + let tableId = "Table" ++ show (num + 1) + table = inTags True "style:style" + [("style:name", tableId)] $ + selfClosingTag "style:table-properties" + [ ("style:rel-width", "100%" ) + , ("table:align" , "center")] + colStyle (c,w) = inTags True "style:style" + [ ("style:name" , tableId ++ "." ++ [c]) + , ("style:family", "table-column" )] $ + selfClosingTag "style:table-column-properties" + [("style:column-width", printf "%.2f" (7 * w) ++ "in")] + cellStyle = inTags True "style:style" + [ ("style:name" , tableId ++ ".A1") + , ("style:family", "table-cell" )] $ + selfClosingTag "style:table-cell-properties" + [ ("fo:border", "none")] + columnStyles = map colStyle wcs + in table $$ vcat columnStyles $$ cellStyle + +paraStyle :: String -> [(String,String)] -> State WriterState Int +paraStyle parent attrs = do + pn <- (+) 1 . length <$> gets stParaStyles + i <- (*) 0.5 . fromIntegral <$> gets stIndentPara :: State WriterState Double + b <- gets stInDefinition + t <- gets stTight + let styleAttr = [ ("style:name" , "P" ++ show pn) + , ("style:family" , "paragraph" ) + , ("style:parent-style-name", parent )] + indentVal = flip (++) "in" . show $ if b then (max 0.5 i) else i + tight = if t then [ ("fo:margin-top" , "0in" ) + , ("fo:margin-bottom" , "0in" )] + else [] + indent = when (i /= 0 || b || t) $ + selfClosingTag "style:paragraph-properties" $ + [ ("fo:margin-left" , indentVal) + , ("fo:margin-right" , "0in" ) + , ("fo:text-indent" , "0in" ) + , ("style:auto-text-indent" , "false" )] + ++ tight + addParaStyle $ inTags True "style:style" (styleAttr ++ attrs) indent + return pn + +paraListStyle :: Int -> State WriterState Int +paraListStyle l = paraStyle "Text_20_body" [("style:list-style-name", "L" ++ show l )] + +paraTableStyles :: String -> Int -> [Alignment] -> [(String, Doc)] +paraTableStyles _ _ [] = [] +paraTableStyles t s (a:xs) + | AlignRight <- a = ( pName s, res s "end" ) : paraTableStyles t (s + 1) xs + | AlignCenter <- a = ( pName s, res s "center") : paraTableStyles t (s + 1) xs + | otherwise = ("Table_20_" ++ t, empty ) : paraTableStyles t s xs + where pName sn = "P" ++ show (sn + 1) + res sn x = inTags True "style:style" + [ ("style:name" , pName sn ) + , ("style:family" , "paragraph" ) + , ("style:parent-style-name", "Table_20_" ++ t)] $ + selfClosingTag "style:paragraph-properties" + [ ("fo:text-align", x) + , ("style:justify-single-word", "false")] + +data TextStyle = Italic | Bold | Strike | Sub | Sup | SmallC deriving ( Eq ) + +textStyleAttr :: TextStyle -> [(String,String)] +textStyleAttr s + | Italic <- s = [("fo:font-style" ,"italic" ) + ,("style:font-style-asian" ,"italic" ) + ,("style:font-style-complex" ,"italic" )] + | Bold <- s = [("fo:font-weight" ,"bold" ) + ,("style:font-weight-asian" ,"bold" ) + ,("style:font-weight-complex" ,"bold" )] + | Strike <- s = [("style:text-line-through-style", "solid" )] + | Sub <- s = [("style:text-position" ,"sub 58%" )] + | Sup <- s = [("style:text-position" ,"super 58%" )] + | SmallC <- s = [("fo:font-variant" ,"small-caps")] + | otherwise = [] + +openDocumentNameSpaces :: [(String, String)] +openDocumentNameSpaces = + [ ("xmlns:office" , "urn:oasis:names:tc:opendocument:xmlns:office:1.0" ) + , ("xmlns:style" , "urn:oasis:names:tc:opendocument:xmlns:style:1.0" ) + , ("xmlns:text" , "urn:oasis:names:tc:opendocument:xmlns:text:1.0" ) + , ("xmlns:table" , "urn:oasis:names:tc:opendocument:xmlns:table:1.0" ) + , ("xmlns:draw" , "urn:oasis:names:tc:opendocument:xmlns:drawing:1.0" ) + , ("xmlns:fo" , "urn:oasis:names:tc:opendocument:xmlns:xsl-fo-compatible:1.0") + , ("xmlns:xlink" , "http://www.w3.org/1999/xlink" ) + , ("xmlns:dc" , "http://purl.org/dc/elements/1.1/" ) + , ("xmlns:meta" , "urn:oasis:names:tc:opendocument:xmlns:meta:1.0" ) + , ("xmlns:number" , "urn:oasis:names:tc:opendocument:xmlns:datastyle:1.0" ) + , ("xmlns:svg" , "urn:oasis:names:tc:opendocument:xmlns:svg-compatible:1.0" ) + , ("xmlns:chart" , "urn:oasis:names:tc:opendocument:xmlns:chart:1.0" ) + , ("xmlns:dr3d" , "urn:oasis:names:tc:opendocument:xmlns:dr3d:1.0" ) + , ("xmlns:math" , "http://www.w3.org/1998/Math/MathML" ) + , ("xmlns:form" , "urn:oasis:names:tc:opendocument:xmlns:form:1.0" ) + , ("xmlns:script" , "urn:oasis:names:tc:opendocument:xmlns:script:1.0" ) + , ("xmlns:ooo" , "http://openoffice.org/2004/office" ) + , ("xmlns:ooow" , "http://openoffice.org/2004/writer" ) + , ("xmlns:oooc" , "http://openoffice.org/2004/calc" ) + , ("xmlns:dom" , "http://www.w3.org/2001/xml-events" ) + , ("xmlns:xforms" , "http://www.w3.org/2002/xforms" ) + , ("xmlns:xsd" , "http://www.w3.org/2001/XMLSchema" ) + , ("xmlns:xsi" , "http://www.w3.org/2001/XMLSchema-instance" ) + , ("office:version", "1.0" ) + ] diff --git a/src/Text/Pandoc/Writers/RST.hs b/src/Text/Pandoc/Writers/RST.hs new file mode 100644 index 000000000..91826cbcd --- /dev/null +++ b/src/Text/Pandoc/Writers/RST.hs @@ -0,0 +1,346 @@ +{- +Copyright (C) 2006-7 John MacFarlane + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Writers.RST + Copyright : Copyright (C) 2006-7 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane + Stability : alpha + Portability : portable + +Conversion of 'Pandoc' documents to reStructuredText. + +reStructuredText: +-} +module Text.Pandoc.Writers.RST ( writeRST) where +import Text.Pandoc.Definition +import Text.Pandoc.Shared +import Text.Pandoc.Blocks +import Data.List ( isPrefixOf, isSuffixOf, drop, intersperse ) +import Text.PrettyPrint.HughesPJ hiding ( Str ) +import Control.Monad.State +import Control.Applicative ( (<$>) ) + +data WriterState = + WriterState { stNotes :: [[Block]] + , stLinks :: KeyTable + , stImages :: KeyTable + , stIncludes :: [String] + , stOptions :: WriterOptions + } + +-- | Convert Pandoc to RST. +writeRST :: WriterOptions -> Pandoc -> String +writeRST opts document = + let st = WriterState { stNotes = [], stLinks = [], + stImages = [], stIncludes = [], + stOptions = opts } + in render $ evalState (pandocToRST document) st + +-- | Return RST representation of document. +pandocToRST :: Pandoc -> State WriterState Doc +pandocToRST (Pandoc meta blocks) = do + opts <- get >>= (return . stOptions) + let before = writerIncludeBefore opts + let after = writerIncludeAfter opts + before' = if null before then empty else text before + after' = if null after then empty else text after + metaBlock <- metaToRST opts meta + let head' = if (writerStandalone opts) + then metaBlock $+$ text (writerHeader opts) + else empty + body <- blockListToRST blocks + includes <- get >>= (return . concat . stIncludes) + let includes' = if null includes then empty else text includes + notes <- get >>= (notesToRST . reverse . stNotes) + -- note that the notes may contain refs, so we do them first + refs <- get >>= (keyTableToRST . reverse . stLinks) + pics <- get >>= (pictTableToRST . reverse . stImages) + return $ head' $+$ before' $+$ includes' $+$ body $+$ notes $+$ text "" $+$ + refs $+$ pics $+$ after' + +-- | Return RST representation of reference key table. +keyTableToRST :: KeyTable -> State WriterState Doc +keyTableToRST refs = mapM keyToRST refs >>= return . vcat + +-- | Return RST representation of a reference key. +keyToRST :: ([Inline], (String, String)) + -> State WriterState Doc +keyToRST (label, (src, _)) = do + label' <- inlineListToRST label + let label'' = if ':' `elem` (render label') + then char '`' <> label' <> char '`' + else label' + return $ text ".. _" <> label'' <> text ": " <> text src + +-- | Return RST representation of notes. +notesToRST :: [[Block]] -> State WriterState Doc +notesToRST notes = + mapM (\(num, note) -> noteToRST num note) (zip [1..] notes) >>= + return . vcat + +-- | Return RST representation of a note. +noteToRST :: Int -> [Block] -> State WriterState Doc +noteToRST num note = do + contents <- blockListToRST note + let marker = text ".. [" <> text (show num) <> text "]" + return $ marker $$ nest 3 contents + +-- | Return RST representation of picture reference table. +pictTableToRST :: KeyTable -> State WriterState Doc +pictTableToRST refs = mapM pictToRST refs >>= return . vcat + +-- | Return RST representation of a picture substitution reference. +pictToRST :: ([Inline], (String, String)) + -> State WriterState Doc +pictToRST (label, (src, _)) = do + label' <- inlineListToRST label + return $ text ".. " <> char '|' <> label' <> char '|' <> text " image:: " <> + text src + +-- | Take list of inline elements and return wrapped doc. +wrappedRST :: WriterOptions -> [Inline] -> State WriterState Doc +wrappedRST opts inlines = do + lineBreakDoc <- inlineToRST LineBreak + chunks <- mapM (wrapIfNeeded opts inlineListToRST) + (splitBy LineBreak inlines) + return $ vcat $ intersperse lineBreakDoc chunks + +-- | Escape special characters for RST. +escapeString :: String -> String +escapeString = escapeStringUsing (backslashEscapes "`\\|*_") + +-- | Convert bibliographic information into RST header. +metaToRST :: WriterOptions -> Meta -> State WriterState Doc +metaToRST opts (Meta title authors date) = do + title' <- titleToRST title + authors' <- authorsToRST authors + date' <- dateToRST date + let toc = if writerTableOfContents opts + then text "" $+$ text ".. contents::" + else empty + return $ title' $+$ authors' $+$ date' $+$ toc + +titleToRST :: [Inline] -> State WriterState Doc +titleToRST [] = return empty +titleToRST lst = do + contents <- inlineListToRST lst + let titleLength = length $ render contents + let border = text (replicate titleLength '=') + return $ border $+$ contents $+$ border <> text "\n" + +authorsToRST :: [String] -> State WriterState Doc +authorsToRST [] = return empty +authorsToRST (first:rest) = do + rest' <- authorsToRST rest + return $ (text ":Author: " <> text first) $+$ rest' + +dateToRST :: String -> State WriterState Doc +dateToRST [] = return empty +dateToRST str = return $ text ":Date: " <> text (escapeString str) + +-- | Convert Pandoc block element to RST. +blockToRST :: Block -- ^ Block element + -> State WriterState Doc +blockToRST Null = return empty +blockToRST (Plain inlines) = do + opts <- get >>= (return . stOptions) + wrappedRST opts inlines +blockToRST (Para inlines) = do + opts <- get >>= (return . stOptions) + contents <- wrappedRST opts inlines + return $ contents <> text "\n" +blockToRST (RawHtml str) = + let str' = if "\n" `isSuffixOf` str then str ++ "\n" else str ++ "\n\n" in + return $ (text "\n.. raw:: html\n") $$ (nest 3 $ vcat $ map text (lines str')) +blockToRST HorizontalRule = return $ text "--------------\n" +blockToRST (Header level inlines) = do + contents <- inlineListToRST inlines + let headerLength = length $ render contents + let headerChar = if level > 5 then ' ' else "=-~^'" !! (level - 1) + let border = text $ replicate headerLength headerChar + return $ contents $+$ border <> text "\n" +blockToRST (CodeBlock (_,classes,_) str) = do + opts <- stOptions <$> get + let tabstop = writerTabStop opts + if "haskell" `elem` classes && writerLiterateHaskell opts + then return $ (vcat $ map (text "> " <>) $ map text (lines str)) <> text "\n" + else return $ (text "::\n") $+$ + (nest tabstop $ vcat $ map text (lines str)) <> text "\n" +blockToRST (BlockQuote blocks) = do + tabstop <- get >>= (return . writerTabStop . stOptions) + contents <- blockListToRST blocks + return $ (nest tabstop contents) <> text "\n" +blockToRST (Table caption _ widths headers rows) = do + caption' <- inlineListToRST caption + let caption'' = if null caption + then empty + else text "" $+$ (text "Table: " <> caption') + headers' <- mapM blockListToRST headers + let widthsInChars = map (floor . (78 *)) widths + let hpipeBlocks blocks = hcatBlocks [beg, middle, end] + where height = maximum (map heightOfBlock blocks) + sep' = TextBlock 3 height (replicate height " | ") + beg = TextBlock 2 height (replicate height "| ") + end = TextBlock 2 height (replicate height " |") + middle = hcatBlocks $ intersperse sep' blocks + let makeRow = hpipeBlocks . zipWith docToBlock widthsInChars + let head' = makeRow headers' + rows' <- mapM (\row -> do cols <- mapM blockListToRST row + return $ makeRow cols) rows + let border ch = char '+' <> char ch <> + (hcat $ intersperse (char ch <> char '+' <> char ch) $ + map (\l -> text $ replicate l ch) widthsInChars) <> + char ch <> char '+' + let body = vcat $ intersperse (border '-') $ map blockToDoc rows' + return $ border '-' $+$ blockToDoc head' $+$ border '=' $+$ body $+$ + border '-' $$ caption'' $$ text "" +blockToRST (BulletList items) = do + contents <- mapM bulletListItemToRST items + -- ensure that sublists have preceding blank line + return $ text "" $+$ vcat contents <> text "\n" +blockToRST (OrderedList (start, style', delim) items) = do + let markers = if start == 1 && style' == DefaultStyle && delim == DefaultDelim + then take (length items) $ repeat "#." + else take (length items) $ orderedListMarkers + (start, style', delim) + let maxMarkerLength = maximum $ map length markers + let markers' = map (\m -> let s = maxMarkerLength - length m + in m ++ replicate s ' ') markers + contents <- mapM (\(item, num) -> orderedListItemToRST item num) $ + zip markers' items + -- ensure that sublists have preceding blank line + return $ text "" $+$ vcat contents <> text "\n" +blockToRST (DefinitionList items) = do + contents <- mapM definitionListItemToRST items + return $ (vcat contents) <> text "\n" + +-- | Convert bullet list item (list of blocks) to RST. +bulletListItemToRST :: [Block] -> State WriterState Doc +bulletListItemToRST items = do + contents <- blockListToRST items + return $ (text "- ") <> contents + +-- | Convert ordered list item (a list of blocks) to RST. +orderedListItemToRST :: String -- ^ marker for list item + -> [Block] -- ^ list item (list of blocks) + -> State WriterState Doc +orderedListItemToRST marker items = do + contents <- blockListToRST items + return $ (text marker <> char ' ') <> contents + +-- | Convert defintion list item (label, list of blocks) to RST. +definitionListItemToRST :: ([Inline], [Block]) -> State WriterState Doc +definitionListItemToRST (label, items) = do + label' <- inlineListToRST label + contents <- blockListToRST items + tabstop <- get >>= (return . writerTabStop . stOptions) + return $ label' $+$ nest tabstop contents + +-- | Convert list of Pandoc block elements to RST. +blockListToRST :: [Block] -- ^ List of block elements + -> State WriterState Doc +blockListToRST blocks = mapM blockToRST blocks >>= return . vcat + +-- | Convert list of Pandoc inline elements to RST. +inlineListToRST :: [Inline] -> State WriterState Doc +inlineListToRST lst = mapM inlineToRST lst >>= return . hcat + +-- | Convert Pandoc inline element to RST. +inlineToRST :: Inline -> State WriterState Doc +inlineToRST (Emph lst) = do + contents <- inlineListToRST lst + return $ char '*' <> contents <> char '*' +inlineToRST (Strong lst) = do + contents <- inlineListToRST lst + return $ text "**" <> contents <> text "**" +inlineToRST (Strikeout lst) = do + contents <- inlineListToRST lst + return $ text "[STRIKEOUT:" <> contents <> char ']' +inlineToRST (Superscript lst) = do + contents <- inlineListToRST lst + return $ text "\\ :sup:`" <> contents <> text "`\\ " +inlineToRST (Subscript lst) = do + contents <- inlineListToRST lst + return $ text "\\ :sub:`" <> contents <> text "`\\ " +inlineToRST (SmallCaps lst) = inlineListToRST lst +inlineToRST (Quoted SingleQuote lst) = do + contents <- inlineListToRST lst + return $ char '\'' <> contents <> char '\'' +inlineToRST (Quoted DoubleQuote lst) = do + contents <- inlineListToRST lst + return $ char '"' <> contents <> char '"' +inlineToRST (Cite _ lst) = + inlineListToRST lst +inlineToRST EmDash = return $ text "--" +inlineToRST EnDash = return $ char '-' +inlineToRST Apostrophe = return $ char '\'' +inlineToRST Ellipses = return $ text "..." +inlineToRST (Code str) = return $ text $ "``" ++ str ++ "``" +inlineToRST (Str str) = return $ text $ escapeString str +inlineToRST (Math t str) = do + includes <- get >>= (return . stIncludes) + let rawMathRole = ".. role:: math(raw)\n" ++ + " :format: html latex\n" + if not (rawMathRole `elem` includes) + then modify $ \st -> st { stIncludes = rawMathRole : includes } + else return () + return $ if t == InlineMath + then text $ ":math:`$" ++ str ++ "$`" + else text $ ":math:`$$" ++ str ++ "$$`" +inlineToRST (TeX _) = return empty +inlineToRST (HtmlInline _) = return empty +inlineToRST (LineBreak) = do + return $ empty -- there's no line break in RST +inlineToRST Space = return $ char ' ' +inlineToRST (Link [Code str] (src, _)) | src == str || + src == "mailto:" ++ str = do + let srcSuffix = if isPrefixOf "mailto:" src then drop 7 src else src + return $ text srcSuffix +inlineToRST (Link txt (src, tit)) = do + useReferenceLinks <- get >>= (return . writerReferenceLinks . stOptions) + linktext <- inlineListToRST $ normalizeSpaces txt + if useReferenceLinks + then do refs <- get >>= (return . stLinks) + let refs' = if (txt, (src, tit)) `elem` refs + then refs + else (txt, (src, tit)):refs + modify $ \st -> st { stLinks = refs' } + return $ char '`' <> linktext <> text "`_" + else return $ char '`' <> linktext <> text " <" <> text src <> text ">`_" +inlineToRST (Image alternate (source, tit)) = do + pics <- get >>= (return . stImages) + let labelsUsed = map fst pics + let txt = if null alternate || alternate == [Str ""] || + alternate `elem` labelsUsed + then [Str $ "image" ++ show (length pics)] + else alternate + let pics' = if (txt, (source, tit)) `elem` pics + then pics + else (txt, (source, tit)):pics + modify $ \st -> st { stImages = pics' } + label <- inlineListToRST txt + return $ char '|' <> label <> char '|' +inlineToRST (Note contents) = do + -- add to notes in state + notes <- get >>= (return . stNotes) + modify $ \st -> st { stNotes = contents:notes } + let ref = show $ (length notes) + 1 + return $ text " [" <> text ref <> text "]_" diff --git a/src/Text/Pandoc/Writers/RTF.hs b/src/Text/Pandoc/Writers/RTF.hs new file mode 100644 index 000000000..fc6cd1bf0 --- /dev/null +++ b/src/Text/Pandoc/Writers/RTF.hs @@ -0,0 +1,291 @@ +{- +Copyright (C) 2006-7 John MacFarlane + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Writers.RTF + Copyright : Copyright (C) 2006-7 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane + Stability : alpha + Portability : portable + +Conversion of 'Pandoc' documents to RTF (rich text format). +-} +module Text.Pandoc.Writers.RTF ( writeRTF ) where +import Text.Pandoc.Definition +import Text.Pandoc.Shared +import Text.Pandoc.Readers.TeXMath +import Data.List ( isSuffixOf, intercalate ) +import Data.Char ( ord, isDigit ) + +-- | Convert Pandoc to a string in rich text format. +writeRTF :: WriterOptions -> Pandoc -> String +writeRTF options (Pandoc meta blocks) = + let head' = if writerStandalone options + then rtfHeader (writerHeader options) meta + else "" + toc = if writerTableOfContents options + then tableOfContents $ filter isHeaderBlock blocks + else "" + foot = if writerStandalone options then "\n}\n" else "" + body = writerIncludeBefore options ++ + concatMap (blockToRTF 0 AlignDefault) blocks ++ + writerIncludeAfter options + in head' ++ toc ++ body ++ foot + +-- | Construct table of contents from list of header blocks. +tableOfContents :: [Block] -> String +tableOfContents headers = + let contentsTree = hierarchicalize headers + in concatMap (blockToRTF 0 AlignDefault) $ + [Header 1 [Str "Contents"], + BulletList (map elementToListItem contentsTree)] + +elementToListItem :: Element -> [Block] +elementToListItem (Blk _) = [] +elementToListItem (Sec sectext subsecs) = [Plain sectext] ++ + if null subsecs + then [] + else [BulletList (map elementToListItem subsecs)] + +-- | Convert unicode characters (> 127) into rich text format representation. +handleUnicode :: String -> String +handleUnicode [] = [] +handleUnicode (c:cs) = + if ord c > 127 + then '\\':'u':(show (ord c)) ++ "?" ++ handleUnicode cs + else c:(handleUnicode cs) + +-- | Escape special characters. +escapeSpecial :: String -> String +escapeSpecial = escapeStringUsing (('\t',"\\tab "):(backslashEscapes "{\\}")) + +-- | Escape strings as needed for rich text format. +stringToRTF :: String -> String +stringToRTF = handleUnicode . escapeSpecial + +-- | Escape things as needed for code block in RTF. +codeStringToRTF :: String -> String +codeStringToRTF str = intercalate "\\line\n" $ lines (stringToRTF str) + +-- | Make a paragraph with first-line indent, block indent, and space after. +rtfParSpaced :: Int -- ^ space after (in twips) + -> Int -- ^ block indent (in twips) + -> Int -- ^ first line indent (relative to block) (in twips) + -> Alignment -- ^ alignment + -> String -- ^ string with content + -> String +rtfParSpaced spaceAfter indent firstLineIndent alignment content = + let alignString = case alignment of + AlignLeft -> "\\ql " + AlignRight -> "\\qr " + AlignCenter -> "\\qc " + AlignDefault -> "\\ql " + in "{\\pard " ++ alignString ++ + "\\f0 \\sa" ++ (show spaceAfter) ++ " \\li" ++ (show indent) ++ + " \\fi" ++ (show firstLineIndent) ++ " " ++ content ++ "\\par}\n" + +-- | Default paragraph. +rtfPar :: Int -- ^ block indent (in twips) + -> Int -- ^ first line indent (relative to block) (in twips) + -> Alignment -- ^ alignment + -> String -- ^ string with content + -> String +rtfPar = rtfParSpaced 180 + +-- | Compact paragraph (e.g. for compact list items). +rtfCompact :: Int -- ^ block indent (in twips) + -> Int -- ^ first line indent (relative to block) (in twips) + -> Alignment -- ^ alignment + -> String -- ^ string with content + -> String +rtfCompact = rtfParSpaced 0 + +-- number of twips to indent +indentIncrement :: Int +indentIncrement = 720 + +listIncrement :: Int +listIncrement = 360 + +-- | Returns appropriate bullet list marker for indent level. +bulletMarker :: Int -> String +bulletMarker indent = case indent `mod` 720 of + 0 -> "\\bullet " + _ -> "\\endash " + +-- | Returns appropriate (list of) ordered list markers for indent level. +orderedMarkers :: Int -> ListAttributes -> [String] +orderedMarkers indent (start, style, delim) = + if style == DefaultStyle && delim == DefaultDelim + then case indent `mod` 720 of + 0 -> orderedListMarkers (start, Decimal, Period) + _ -> orderedListMarkers (start, LowerAlpha, Period) + else orderedListMarkers (start, style, delim) + +-- | Returns RTF header. +rtfHeader :: String -- ^ header text + -> Meta -- ^ bibliographic information + -> String +rtfHeader headerText (Meta title authors date) = + let titletext = if null title + then "" + else rtfPar 0 0 AlignCenter $ + "\\b \\fs36 " ++ inlineListToRTF title + authorstext = if null authors + then "" + else rtfPar 0 0 AlignCenter (" " ++ (intercalate "\\" $ + map stringToRTF authors)) + datetext = if date == "" + then "" + else rtfPar 0 0 AlignCenter (" " ++ stringToRTF date) in + let spacer = if null (titletext ++ authorstext ++ datetext) + then "" + else rtfPar 0 0 AlignDefault "" in + headerText ++ titletext ++ authorstext ++ datetext ++ spacer + +-- | Convert Pandoc block element to RTF. +blockToRTF :: Int -- ^ indent level + -> Alignment -- ^ alignment + -> Block -- ^ block to convert + -> String +blockToRTF _ _ Null = "" +blockToRTF indent alignment (Plain lst) = + rtfCompact indent 0 alignment $ inlineListToRTF lst +blockToRTF indent alignment (Para lst) = + rtfPar indent 0 alignment $ inlineListToRTF lst +blockToRTF indent alignment (BlockQuote lst) = + concatMap (blockToRTF (indent + indentIncrement) alignment) lst +blockToRTF indent _ (CodeBlock _ str) = + rtfPar indent 0 AlignLeft ("\\f1 " ++ (codeStringToRTF str)) +blockToRTF _ _ (RawHtml _) = "" +blockToRTF indent alignment (BulletList lst) = spaceAtEnd $ + concatMap (listItemToRTF alignment indent (bulletMarker indent)) lst +blockToRTF indent alignment (OrderedList attribs lst) = spaceAtEnd $ concat $ + zipWith (listItemToRTF alignment indent) (orderedMarkers indent attribs) lst +blockToRTF indent alignment (DefinitionList lst) = spaceAtEnd $ + concatMap (definitionListItemToRTF alignment indent) lst +blockToRTF indent _ HorizontalRule = + rtfPar indent 0 AlignCenter "\\emdash\\emdash\\emdash\\emdash\\emdash" +blockToRTF indent alignment (Header level lst) = rtfPar indent 0 alignment $ + "\\b \\fs" ++ (show (40 - (level * 4))) ++ " " ++ inlineListToRTF lst +blockToRTF indent alignment (Table caption aligns sizes headers rows) = + tableRowToRTF True indent aligns sizes headers ++ + concatMap (tableRowToRTF False indent aligns sizes) rows ++ + rtfPar indent 0 alignment (inlineListToRTF caption) + +tableRowToRTF :: Bool -> Int -> [Alignment] -> [Double] -> [[Block]] -> String +tableRowToRTF header indent aligns sizes cols = + let columns = concat $ zipWith (tableItemToRTF indent) aligns cols + totalTwips = 6 * 1440 -- 6 inches + rightEdges = tail $ scanl (\sofar new -> sofar + floor (new * totalTwips)) + (0 :: Integer) sizes + cellDefs = map (\edge -> (if header + then "\\clbrdrb\\brdrs" + else "") ++ "\\cellx" ++ show edge) + rightEdges + start = "{\n\\trowd \\trgaph120\n" ++ concat cellDefs ++ "\n" ++ + "\\trkeep\\intbl\n{\n" + end = "}\n\\intbl\\row}\n" + in start ++ columns ++ end + +tableItemToRTF :: Int -> Alignment -> [Block] -> String +tableItemToRTF indent alignment item = + let contents = concatMap (blockToRTF indent alignment) item + in "{\\intbl " ++ contents ++ "\\cell}\n" + +-- | Ensure that there's the same amount of space after compact +-- lists as after regular lists. +spaceAtEnd :: String -> String +spaceAtEnd str = + if isSuffixOf "\\par}\n" str + then (take ((length str) - 6) str) ++ "\\sa180\\par}\n" + else str + +-- | Convert list item (list of blocks) to RTF. +listItemToRTF :: Alignment -- ^ alignment + -> Int -- ^ indent level + -> String -- ^ list start marker + -> [Block] -- ^ list item (list of blocks) + -> [Char] +listItemToRTF alignment indent marker [] = + rtfCompact (indent + listIncrement) (0 - listIncrement) alignment + (marker ++ "\\tx" ++ (show listIncrement) ++ "\\tab ") +listItemToRTF alignment indent marker list = + let (first:rest) = map (blockToRTF (indent + listIncrement) alignment) list + listMarker = "\\fi" ++ show (0 - listIncrement) ++ " " ++ marker ++ "\\tx" ++ + show listIncrement ++ "\\tab" + insertListMarker ('\\':'f':'i':'-':d:xs) | isDigit d = + listMarker ++ dropWhile isDigit xs + insertListMarker ('\\':'f':'i':d:xs) | isDigit d = + listMarker ++ dropWhile isDigit xs + insertListMarker (x:xs) = + x : insertListMarker xs + insertListMarker [] = [] + -- insert the list marker into the (processed) first block + in insertListMarker first ++ concat rest + +-- | Convert definition list item (label, list of blocks) to RTF. +definitionListItemToRTF :: Alignment -- ^ alignment + -> Int -- ^ indent level + -> ([Inline],[Block]) -- ^ list item (list of blocks) + -> [Char] +definitionListItemToRTF alignment indent (label, items) = + let labelText = blockToRTF indent alignment (Plain label) + itemsText = concatMap (blockToRTF (indent + listIncrement) alignment) items + in labelText ++ itemsText + +-- | Convert list of inline items to RTF. +inlineListToRTF :: [Inline] -- ^ list of inlines to convert + -> String +inlineListToRTF lst = concatMap inlineToRTF lst + +-- | Convert inline item to RTF. +inlineToRTF :: Inline -- ^ inline to convert + -> String +inlineToRTF (Emph lst) = "{\\i " ++ (inlineListToRTF lst) ++ "}" +inlineToRTF (Strong lst) = "{\\b " ++ (inlineListToRTF lst) ++ "}" +inlineToRTF (Strikeout lst) = "{\\strike " ++ (inlineListToRTF lst) ++ "}" +inlineToRTF (Superscript lst) = "{\\super " ++ (inlineListToRTF lst) ++ "}" +inlineToRTF (Subscript lst) = "{\\sub " ++ (inlineListToRTF lst) ++ "}" +inlineToRTF (SmallCaps lst) = "{\\scaps " ++ (inlineListToRTF lst) ++ "}" +inlineToRTF (Quoted SingleQuote lst) = + "\\u8216'" ++ (inlineListToRTF lst) ++ "\\u8217'" +inlineToRTF (Quoted DoubleQuote lst) = + "\\u8220\"" ++ (inlineListToRTF lst) ++ "\\u8221\"" +inlineToRTF Apostrophe = "\\u8217'" +inlineToRTF Ellipses = "\\u8230?" +inlineToRTF EmDash = "\\u8212-" +inlineToRTF EnDash = "\\u8211-" +inlineToRTF (Code str) = "{\\f1 " ++ (codeStringToRTF str) ++ "}" +inlineToRTF (Str str) = stringToRTF str +inlineToRTF (Math _ str) = inlineListToRTF $ readTeXMath str +inlineToRTF (Cite _ lst) = inlineListToRTF lst +inlineToRTF (TeX _) = "" +inlineToRTF (HtmlInline _) = "" +inlineToRTF (LineBreak) = "\\line " +inlineToRTF Space = " " +inlineToRTF (Link text (src, _)) = + "{\\field{\\*\\fldinst{HYPERLINK \"" ++ (codeStringToRTF src) ++ + "\"}}{\\fldrslt{\\ul\n" ++ (inlineListToRTF text) ++ "\n}}}\n" +inlineToRTF (Image _ (source, _)) = + "{\\cf1 [image: " ++ source ++ "]\\cf0}" +inlineToRTF (Note contents) = + "{\\super\\chftn}{\\*\\footnote\\chftn\\~\\plain\\pard " ++ + (concatMap (blockToRTF 0 AlignDefault) contents) ++ "}" diff --git a/src/Text/Pandoc/Writers/S5.hs b/src/Text/Pandoc/Writers/S5.hs new file mode 100644 index 000000000..6f528503a --- /dev/null +++ b/src/Text/Pandoc/Writers/S5.hs @@ -0,0 +1,157 @@ +{-# LANGUAGE CPP, TemplateHaskell #-} +{- +Copyright (C) 2006-7 John MacFarlane + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Writers.S5 + Copyright : Copyright (C) 2006-7 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane + Stability : alpha + Portability : portable + +Definitions for creation of S5 powerpoint-like HTML. +(See .) +-} +module Text.Pandoc.Writers.S5 ( + -- * Strings + s5Meta, + s5Javascript, + s5CSS, + s5Links, + -- * Functions + writeS5, + writeS5String, + insertS5Structure + ) where +import Text.Pandoc.Shared ( WriterOptions ) +import Text.Pandoc.TH ( contentsOf ) +import Text.Pandoc.Writers.HTML ( writeHtml, writeHtmlString ) +import Text.Pandoc.Definition +import Text.XHtml.Strict +import System.FilePath ( () ) +import Data.List ( intercalate ) + +s5Meta :: String +s5Meta = "\n\n\n" + +s5Javascript :: String +#ifndef __HADDOCK__ +s5Javascript = "\n" +#endif + +s5CoreCSS :: String +#ifndef __HADDOCK__ +s5CoreCSS = $(contentsOf $ "data" "ui" "default" "s5-core.css") +#endif + +s5FramingCSS :: String +#ifndef __HADDOCK__ +s5FramingCSS = $(contentsOf $ "data" "ui" "default" "framing.css") +#endif + +s5PrettyCSS :: String +#ifndef __HADDOCK__ +s5PrettyCSS = $(contentsOf $ "data" "ui" "default" "pretty.css") +#endif + +s5OperaCSS :: String +#ifndef __HADDOCK__ +s5OperaCSS = $(contentsOf $ "data" "ui" "default" "opera.css") +#endif + +s5OutlineCSS :: String +#ifndef __HADDOCK__ +s5OutlineCSS = $(contentsOf $ "data" "ui" "default" "outline.css") +#endif + +s5PrintCSS :: String +#ifndef __HADDOCK__ +s5PrintCSS = $(contentsOf $ "data" "ui" "default" "print.css") +#endif + +s5CSS :: String +s5CSS = "\n\n\n\n" + +s5Links :: String +s5Links = "\n\n\n\n\n\n\n" + +-- | Converts Pandoc document to an S5 HTML presentation (Html structure). +writeS5 :: WriterOptions -> Pandoc -> Html +writeS5 options = (writeHtml options) . insertS5Structure + +-- | Converts Pandoc document to an S5 HTML presentation (string). +writeS5String :: WriterOptions -> Pandoc -> String +writeS5String options = (writeHtmlString options) . insertS5Structure + +-- | Inserts HTML needed for an S5 presentation (e.g. around slides). +layoutDiv :: [Inline] -- ^ Title of document (for header or footer) + -> String -- ^ Date of document (for header or footer) + -> [Block] -- ^ List of block elements returned +layoutDiv title' date = [(RawHtml "
    \n
    \n
    \n
    \n
    \n"), (Header 1 [Str date]), (Header 2 title'), (RawHtml "
    \n
    \n")] + +presentationStart :: Block +presentationStart = RawHtml "
    \n\n" + +presentationEnd :: Block +presentationEnd = RawHtml "
    \n" + +slideStart :: Block +slideStart = RawHtml "
    \n" + +slideEnd :: Block +slideEnd = RawHtml "
    \n" + +-- | Returns 'True' if block is a Header 1. +isH1 :: Block -> Bool +isH1 (Header 1 _) = True +isH1 _ = False + +-- | Insert HTML around sections to make individual slides. +insertSlides :: Bool -> [Block] -> [Block] +insertSlides beginning blocks = + let (beforeHead, rest) = break isH1 blocks in + if (null rest) then + if beginning then + beforeHead + else + beforeHead ++ [slideEnd] + else + if beginning then + beforeHead ++ + slideStart:(head rest):(insertSlides False (tail rest)) + else + beforeHead ++ + slideEnd:slideStart:(head rest):(insertSlides False (tail rest)) + +-- | Insert blocks into 'Pandoc' for slide structure. +insertS5Structure :: Pandoc -> Pandoc +insertS5Structure (Pandoc meta' []) = Pandoc meta' [] +insertS5Structure (Pandoc (Meta title' authors date) blocks) = + let slides = insertSlides True blocks + firstSlide = if not (null title') + then [slideStart, (Header 1 title'), + (Header 3 [Str (intercalate ", " authors)]), + (Header 4 [Str date]), slideEnd] + else [] + newBlocks = (layoutDiv title' date) ++ presentationStart:firstSlide ++ + slides ++ [presentationEnd] + in Pandoc (Meta title' authors date) newBlocks diff --git a/src/Text/Pandoc/Writers/Texinfo.hs b/src/Text/Pandoc/Writers/Texinfo.hs new file mode 100644 index 000000000..305a1a8d0 --- /dev/null +++ b/src/Text/Pandoc/Writers/Texinfo.hs @@ -0,0 +1,474 @@ +{- +Copyright (C) 2008 John MacFarlane and Peter Wang + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Writers.Texinfo + Copyright : Copyright (C) 2008 John MacFarlane and Peter Wang + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane + Stability : alpha + Portability : portable + +Conversion of 'Pandoc' format into Texinfo. +-} +module Text.Pandoc.Writers.Texinfo ( writeTexinfo ) where +import Text.Pandoc.Definition +import Text.Pandoc.Shared +import Text.Pandoc.Readers.TeXMath +import Text.Printf ( printf ) +import Data.List ( isSuffixOf ) +import Data.Char ( chr, ord ) +import qualified Data.Set as S +import Control.Monad.State +import Text.PrettyPrint.HughesPJ hiding ( Str ) + +data WriterState = + WriterState { stIncludes :: S.Set String -- strings to include in header + } + +{- TODO: + - internal cross references a la HTML + - generated .texi files don't work when run through texi2dvi + -} + +-- | Add line to header. +addToHeader :: String -> State WriterState () +addToHeader str = do + st <- get + let includes = stIncludes st + put st {stIncludes = S.insert str includes} + +-- | Convert Pandoc to Texinfo. +writeTexinfo :: WriterOptions -> Pandoc -> String +writeTexinfo options document = + render $ evalState (pandocToTexinfo options $ wrapTop document) $ + WriterState { stIncludes = S.empty } + +-- | Add a "Top" node around the document, needed by Texinfo. +wrapTop :: Pandoc -> Pandoc +wrapTop (Pandoc (Meta title authors date) blocks) = + Pandoc (Meta title authors date) (Header 0 title : blocks) + +pandocToTexinfo :: WriterOptions -> Pandoc -> State WriterState Doc +pandocToTexinfo options (Pandoc meta blocks) = do + main <- blockListToTexinfo blocks + head' <- if writerStandalone options + then texinfoHeader options meta + else return empty + let before = if null (writerIncludeBefore options) + then empty + else text (writerIncludeBefore options) + let after = if null (writerIncludeAfter options) + then empty + else text (writerIncludeAfter options) + let body = before $$ main $$ after + -- XXX toc untested + let toc = if writerTableOfContents options + then text "@contents" + else empty + let foot = if writerStandalone options + then text "@bye" + else empty + return $ head' $$ toc $$ body $$ foot + +-- | Insert bibliographic information into Texinfo header. +texinfoHeader :: WriterOptions -- ^ Options, including Texinfo header + -> Meta -- ^ Meta with bibliographic information + -> State WriterState Doc +texinfoHeader options (Meta title authors date) = do + titletext <- if null title + then return empty + else do + t <- inlineListToTexinfo title + return $ text "@title " <> t + headerIncludes <- get >>= return . S.toList . stIncludes + let extras = text $ unlines headerIncludes + let authorstext = map makeAuthor authors + let datetext = if date == "" + then empty + else text $ stringToTexinfo date + + let baseHeader = text $ writerHeader options + let header = baseHeader $$ extras + return $ text "\\input texinfo" $$ + header $$ + text "@ifnottex" $$ + text "@paragraphindent 0" $$ + text "@end ifnottex" $$ + text "@titlepage" $$ + titletext $$ vcat authorstext $$ + datetext $$ + text "@end titlepage" + +makeAuthor :: String -> Doc +makeAuthor author = text $ "@author " ++ (stringToTexinfo author) + +-- | Escape things as needed for Texinfo. +stringToTexinfo :: String -> String +stringToTexinfo = escapeStringUsing texinfoEscapes + where texinfoEscapes = [ ('{', "@{") + , ('}', "@}") + , ('@', "@@") + , (',', "@comma{}") -- only needed in argument lists + , ('\160', "@ ") + ] + +-- | Puts contents into Texinfo command. +inCmd :: String -> Doc -> Doc +inCmd cmd contents = char '@' <> text cmd <> braces contents + +-- | Convert Pandoc block element to Texinfo. +blockToTexinfo :: Block -- ^ Block to convert + -> State WriterState Doc + +blockToTexinfo Null = return empty + +blockToTexinfo (Plain lst) = + inlineListToTexinfo lst + +blockToTexinfo (Para lst) = + inlineListToTexinfo lst -- this is handled differently from Plain in blockListToTexinfo + +blockToTexinfo (BlockQuote lst) = do + contents <- blockListToTexinfo lst + return $ text "@quotation" $$ + contents $$ + text "@end quotation" + +blockToTexinfo (CodeBlock _ str) = do + return $ text "@verbatim" $$ + vcat (map text (lines str)) $$ + text "@end verbatim\n" + +blockToTexinfo (RawHtml _) = return empty + +blockToTexinfo (BulletList lst) = do + items <- mapM listItemToTexinfo lst + return $ text "@itemize" $$ + vcat items $$ + text "@end itemize\n" + +blockToTexinfo (OrderedList (start, numstyle, _) lst) = do + items <- mapM listItemToTexinfo lst + return $ text "@enumerate " <> exemplar $$ + vcat items $$ + text "@end enumerate\n" + where + exemplar = case numstyle of + DefaultStyle -> decimal + Decimal -> decimal + UpperRoman -> decimal -- Roman numerals not supported + LowerRoman -> decimal + UpperAlpha -> upperAlpha + LowerAlpha -> lowerAlpha + decimal = if start == 1 + then empty + else text (show start) + upperAlpha = text [chr $ ord 'A' + start - 1] + lowerAlpha = text [chr $ ord 'a' + start - 1] + +blockToTexinfo (DefinitionList lst) = do + items <- mapM defListItemToTexinfo lst + return $ text "@table @asis" $$ + vcat items $$ + text "@end table\n" + +blockToTexinfo HorizontalRule = + -- XXX can't get the equivalent from LaTeX.hs to work + return $ text "@iftex" $$ + text "@bigskip@hrule@bigskip" $$ + text "@end iftex" $$ + text "@ifnottex" $$ + text (take 72 $ repeat '-') $$ + text "@end ifnottex" + +blockToTexinfo (Header 0 lst) = do + txt <- if null lst + then return $ text "Top" + else inlineListToTexinfo lst + return $ text "@node Top" $$ + text "@top " <> txt <> char '\n' + +blockToTexinfo (Header level lst) = do + node <- inlineListForNode lst + txt <- inlineListToTexinfo lst + return $ if (level > 0) && (level <= 4) + then text "\n@node " <> node <> char '\n' <> + text (seccmd level) <> txt + else txt + where + seccmd 1 = "@chapter " + seccmd 2 = "@section " + seccmd 3 = "@subsection " + seccmd 4 = "@subsubsection " + seccmd _ = error "illegal seccmd level" + +blockToTexinfo (Table caption aligns widths heads rows) = do + headers <- tableHeadToTexinfo aligns heads + captionText <- inlineListToTexinfo caption + rowsText <- mapM (tableRowToTexinfo aligns) rows + let colWidths = map (printf "%.2f ") widths + let colDescriptors = concat colWidths + let tableBody = text ("@multitable @columnfractions " ++ colDescriptors) $$ + headers $$ + vcat rowsText $$ + text "@end multitable" + return $ if isEmpty captionText + then tableBody <> char '\n' + else text "@float" $$ + tableBody $$ + inCmd "caption" captionText $$ + text "@end float" + +tableHeadToTexinfo :: [Alignment] + -> [[Block]] + -> State WriterState Doc +tableHeadToTexinfo = tableAnyRowToTexinfo "@headitem " + +tableRowToTexinfo :: [Alignment] + -> [[Block]] + -> State WriterState Doc +tableRowToTexinfo = tableAnyRowToTexinfo "@item " + +tableAnyRowToTexinfo :: String + -> [Alignment] + -> [[Block]] + -> State WriterState Doc +tableAnyRowToTexinfo itemtype aligns cols = + zipWithM alignedBlock aligns cols >>= + return . (text itemtype $$) . foldl (\row item -> row $$ + (if isEmpty row then empty else text " @tab ") <> item) empty + +alignedBlock :: Alignment + -> [Block] + -> State WriterState Doc +-- XXX @flushleft and @flushright text won't get word wrapped. Since word +-- wrapping is more important than alignment, we ignore the alignment. +alignedBlock _ = blockListToTexinfo +{- +alignedBlock AlignLeft col = do + b <- blockListToTexinfo col + return $ text "@flushleft" $$ b $$ text "@end flushleft" +alignedBlock AlignRight col = do + b <- blockListToTexinfo col + return $ text "@flushright" $$ b $$ text "@end flushright" +alignedBlock _ col = blockListToTexinfo col +-} + +-- | Convert Pandoc block elements to Texinfo. +blockListToTexinfo :: [Block] + -> State WriterState Doc +blockListToTexinfo [] = return $ empty +blockListToTexinfo (x:xs) = do + x' <- blockToTexinfo x + case x of + Header level _ -> do + -- We need need to insert a menu for this node. + let (before, after) = break isHeader xs + before' <- blockListToTexinfo before + let menu = if level < 4 + then collectNodes (level + 1) after + else [] + lines' <- mapM makeMenuLine menu + let menu' = if null lines' + then empty + else text "@menu" $$ + vcat lines' $$ + text "@end menu" + after' <- blockListToTexinfo after + return $ x' $$ before' $$ menu' $$ after' + Para _ -> do + xs' <- blockListToTexinfo xs + case xs of + ((CodeBlock _ _):_) -> return $ x' $$ xs' + _ -> return $ x' $$ text "" $$ xs' + _ -> do + xs' <- blockListToTexinfo xs + return $ x' $$ xs' + +isHeader :: Block -> Bool +isHeader (Header _ _) = True +isHeader _ = False + +collectNodes :: Int -> [Block] -> [Block] +collectNodes _ [] = [] +collectNodes level (x:xs) = + case x of + (Header hl _) -> + if hl < level + then [] + else if hl == level + then x : collectNodes level xs + else collectNodes level xs + _ -> + collectNodes level xs + +makeMenuLine :: Block + -> State WriterState Doc +makeMenuLine (Header _ lst) = do + txt <- inlineListForNode lst + return $ text "* " <> txt <> text "::" +makeMenuLine _ = error "makeMenuLine called with non-Header block" + +listItemToTexinfo :: [Block] + -> State WriterState Doc +listItemToTexinfo lst = blockListToTexinfo lst >>= + return . (text "@item" $$) + +defListItemToTexinfo :: ([Inline], [Block]) + -> State WriterState Doc +defListItemToTexinfo (term, def) = do + term' <- inlineListToTexinfo term + def' <- blockListToTexinfo def + return $ text "@item " <> term' <> text "\n" $$ def' + +-- | Convert list of inline elements to Texinfo. +inlineListToTexinfo :: [Inline] -- ^ Inlines to convert + -> State WriterState Doc +inlineListToTexinfo lst = mapM inlineToTexinfo lst >>= return . hcat + +-- | Convert list of inline elements to Texinfo acceptable for a node name. +inlineListForNode :: [Inline] -- ^ Inlines to convert + -> State WriterState Doc +inlineListForNode lst = mapM inlineForNode lst >>= return . hcat + +inlineForNode :: Inline -> State WriterState Doc +inlineForNode (Str str) = return $ text $ filter (not.disallowedInNode) str +inlineForNode (Emph lst) = inlineListForNode lst +inlineForNode (Strong lst) = inlineListForNode lst +inlineForNode (Strikeout lst) = inlineListForNode lst +inlineForNode (Superscript lst) = inlineListForNode lst +inlineForNode (Subscript lst) = inlineListForNode lst +inlineForNode (SmallCaps lst) = inlineListForNode lst +inlineForNode (Quoted _ lst) = inlineListForNode lst +inlineForNode (Cite _ lst) = inlineListForNode lst +inlineForNode (Code str) = inlineForNode (Str str) +inlineForNode Space = return $ char ' ' +inlineForNode EmDash = return $ text "---" +inlineForNode EnDash = return $ text "--" +inlineForNode Apostrophe = return $ char '\'' +inlineForNode Ellipses = return $ text "..." +inlineForNode LineBreak = return empty +inlineForNode (Math _ str) = inlineListForNode $ readTeXMath str +inlineForNode (TeX _) = return empty +inlineForNode (HtmlInline _) = return empty +inlineForNode (Link lst _) = inlineListForNode lst +inlineForNode (Image lst _) = inlineListForNode lst +inlineForNode (Note _) = return empty + +-- periods, commas, colons, and parentheses are disallowed in node names +disallowedInNode :: Char -> Bool +disallowedInNode c = c `elem` ".,:()" + +-- | Convert inline element to Texinfo +inlineToTexinfo :: Inline -- ^ Inline to convert + -> State WriterState Doc + +inlineToTexinfo (Emph lst) = + inlineListToTexinfo lst >>= return . inCmd "emph" + +inlineToTexinfo (Strong lst) = + inlineListToTexinfo lst >>= return . inCmd "strong" + +inlineToTexinfo (Strikeout lst) = do + addToHeader $ "@macro textstrikeout{text}\n" ++ + "~~\\text\\~~\n" ++ + "@end macro\n" + contents <- inlineListToTexinfo lst + return $ text "@textstrikeout{" <> contents <> text "}" + +inlineToTexinfo (Superscript lst) = do + addToHeader $ "@macro textsuperscript{text}\n" ++ + "@iftex\n" ++ + "@textsuperscript{\\text\\}\n" ++ + "@end iftex\n" ++ + "@ifnottex\n" ++ + "^@{\\text\\@}\n" ++ + "@end ifnottex\n" ++ + "@end macro\n" + contents <- inlineListToTexinfo lst + return $ text "@textsuperscript{" <> contents <> char '}' + +inlineToTexinfo (Subscript lst) = do + addToHeader $ "@macro textsubscript{text}\n" ++ + "@iftex\n" ++ + "@textsubscript{\\text\\}\n" ++ + "@end iftex\n" ++ + "@ifnottex\n" ++ + "_@{\\text\\@}\n" ++ + "@end ifnottex\n" ++ + "@end macro\n" + contents <- inlineListToTexinfo lst + return $ text "@textsubscript{" <> contents <> char '}' + +inlineToTexinfo (SmallCaps lst) = + inlineListToTexinfo lst >>= return . inCmd "sc" + +inlineToTexinfo (Code str) = do + return $ text $ "@code{" ++ stringToTexinfo str ++ "}" + +inlineToTexinfo (Quoted SingleQuote lst) = do + contents <- inlineListToTexinfo lst + return $ char '`' <> contents <> char '\'' + +inlineToTexinfo (Quoted DoubleQuote lst) = do + contents <- inlineListToTexinfo lst + return $ text "``" <> contents <> text "''" + +inlineToTexinfo (Cite _ lst) = + inlineListToTexinfo lst +inlineToTexinfo Apostrophe = return $ char '\'' +inlineToTexinfo EmDash = return $ text "---" +inlineToTexinfo EnDash = return $ text "--" +inlineToTexinfo Ellipses = return $ text "@dots{}" +inlineToTexinfo (Str str) = return $ text (stringToTexinfo str) +inlineToTexinfo (Math _ str) = return $ inCmd "math" $ text str +inlineToTexinfo (TeX str) = return $ text "@tex" $$ text str $$ text "@end tex" +inlineToTexinfo (HtmlInline _) = return empty +inlineToTexinfo (LineBreak) = return $ text "@*" +inlineToTexinfo Space = return $ char ' ' + +inlineToTexinfo (Link txt (src, _)) = do + case txt of + [Code x] | x == src -> -- autolink + do return $ text $ "@url{" ++ x ++ "}" + _ -> do contents <- inlineListToTexinfo txt + let src1 = stringToTexinfo src + return $ text ("@uref{" ++ src1 ++ ",") <> contents <> + char '}' + +inlineToTexinfo (Image alternate (source, _)) = do + content <- inlineListToTexinfo alternate + return $ text ("@image{" ++ base ++ ",,,") <> content <> text "," <> + text (ext ++ "}") + where + (revext, revbase) = break (=='.') (reverse source) + ext = reverse revext + base = case revbase of + ('.' : rest) -> reverse rest + _ -> reverse revbase + +inlineToTexinfo (Note contents) = do + contents' <- blockListToTexinfo contents + let rawnote = stripTrailingNewlines $ render contents' + let optNewline = "@end verbatim" `isSuffixOf` rawnote + return $ text "@footnote{" <> + text rawnote <> + (if optNewline then char '\n' else empty) <> + char '}' diff --git a/src/Text/Pandoc/XML.hs b/src/Text/Pandoc/XML.hs new file mode 100644 index 000000000..14e2eebbb --- /dev/null +++ b/src/Text/Pandoc/XML.hs @@ -0,0 +1,88 @@ +{- +Copyright (C) 2006-7 John MacFarlane + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.XML + Copyright : Copyright (C) 2006-7 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane + Stability : alpha + Portability : portable + +Functions for escaping and formatting XML. +-} +module Text.Pandoc.XML ( escapeCharForXML, + escapeStringForXML, + inTags, + selfClosingTag, + inTagsSimple, + inTagsIndented ) where +import Text.PrettyPrint.HughesPJ + +-- | Escape one character as needed for XML. +escapeCharForXML :: Char -> String +escapeCharForXML x = case x of + '&' -> "&" + '<' -> "<" + '>' -> ">" + '"' -> """ + '\160' -> " " + c -> [c] + +-- | True if the character needs to be escaped. +needsEscaping :: Char -> Bool +needsEscaping c = c `elem` "&<>\"\160" + +-- | Escape string as needed for XML. Entity references are not preserved. +escapeStringForXML :: String -> String +escapeStringForXML "" = "" +escapeStringForXML str = + case break needsEscaping str of + (okay, "") -> okay + (okay, (c:cs)) -> okay ++ escapeCharForXML c ++ escapeStringForXML cs + +-- | Return a text object with a string of formatted XML attributes. +attributeList :: [(String, String)] -> Doc +attributeList = text . concatMap + (\(a, b) -> " " ++ escapeStringForXML a ++ "=\"" ++ + escapeStringForXML b ++ "\"") + +-- | Put the supplied contents between start and end tags of tagType, +-- with specified attributes and (if specified) indentation. +inTags:: Bool -> String -> [(String, String)] -> Doc -> Doc +inTags isIndented tagType attribs contents = + let openTag = char '<' <> text tagType <> attributeList attribs <> + char '>' + closeTag = text " text tagType <> char '>' + in if isIndented + then openTag $$ nest 2 contents $$ closeTag + else openTag <> contents <> closeTag + +-- | Return a self-closing tag of tagType with specified attributes +selfClosingTag :: String -> [(String, String)] -> Doc +selfClosingTag tagType attribs = + char '<' <> text tagType <> attributeList attribs <> text " />" + +-- | Put the supplied contents between start and end tags of tagType. +inTagsSimple :: String -> Doc -> Doc +inTagsSimple tagType = inTags False tagType [] + +-- | Put the supplied contents in indented block btw start and end tags. +inTagsIndented :: String -> Doc -> Doc +inTagsIndented tagType = inTags True tagType [] -- cgit v1.2.3