summaryrefslogtreecommitdiff
path: root/src/Text/Pandoc/Shared.hs
diff options
context:
space:
mode:
Diffstat (limited to 'src/Text/Pandoc/Shared.hs')
-rw-r--r--src/Text/Pandoc/Shared.hs953
1 files changed, 953 insertions, 0 deletions
diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs
new file mode 100644
index 000000000..6854e5ae6
--- /dev/null
+++ b/src/Text/Pandoc/Shared.hs
@@ -0,0 +1,953 @@
+{-# LANGUAGE CPP #-}
+{-
+Copyright (C) 2006-8 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Shared
+ Copyright : Copyright (C) 2006-8 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+
+Utility functions and definitions used by the various Pandoc modules.
+-}
+module Text.Pandoc.Shared (
+ -- * List processing
+ splitBy,
+ splitByIndices,
+ substitute,
+ -- * Text processing
+ backslashEscapes,
+ escapeStringUsing,
+ stripTrailingNewlines,
+ removeLeadingTrailingSpace,
+ removeLeadingSpace,
+ removeTrailingSpace,
+ stripFirstAndLast,
+ camelCaseToHyphenated,
+ toRomanNumeral,
+ wrapped,
+ wrapIfNeeded,
+ wrappedTeX,
+ wrapTeXIfNeeded,
+ BlockWrapper (..),
+ wrappedBlocksToDoc,
+ -- * Parsing
+ (>>~),
+ anyLine,
+ many1Till,
+ notFollowedBy',
+ oneOfStrings,
+ spaceChar,
+ skipSpaces,
+ blankline,
+ blanklines,
+ enclosed,
+ stringAnyCase,
+ parseFromString,
+ lineClump,
+ charsInBalanced,
+ charsInBalanced',
+ romanNumeral,
+ emailAddress,
+ uri,
+ withHorizDisplacement,
+ nullBlock,
+ failIfStrict,
+ failUnlessLHS,
+ escaped,
+ anyOrderedListMarker,
+ orderedListMarker,
+ charRef,
+ readWith,
+ testStringWith,
+ ParserState (..),
+ defaultParserState,
+ HeaderType (..),
+ ParserContext (..),
+ QuoteContext (..),
+ NoteTable,
+ KeyTable,
+ lookupKeySrc,
+ refsMatch,
+ -- * Prettyprinting
+ hang',
+ prettyPandoc,
+ -- * Pandoc block and inline list processing
+ orderedListMarkers,
+ normalizeSpaces,
+ compactify,
+ Element (..),
+ hierarchicalize,
+ isHeaderBlock,
+ -- * Writer options
+ HTMLMathMethod (..),
+ ObfuscationMethod (..),
+ WriterOptions (..),
+ defaultWriterOptions,
+ -- * File handling
+ inDirectory
+ ) where
+
+import Text.Pandoc.Definition
+import Text.ParserCombinators.Parsec
+import Text.PrettyPrint.HughesPJ ( Doc, fsep, ($$), (<>), empty, isEmpty, text, nest )
+import qualified Text.PrettyPrint.HughesPJ as PP
+import Text.Pandoc.CharacterReferences ( characterReference )
+import Data.Char ( toLower, toUpper, ord, isLower, isUpper )
+import Data.List ( find, isPrefixOf, intercalate )
+import Control.Monad ( join )
+import Network.URI ( parseURI, URI (..), isAllowedInURI )
+import System.Directory
+import Prelude hiding ( putStrLn, writeFile, readFile, getContents )
+import System.IO.UTF8
+
+--
+-- List processing
+--
+
+-- | Split list by groups of one or more sep.
+splitBy :: (Eq a) => a -> [a] -> [[a]]
+splitBy _ [] = []
+splitBy sep lst =
+ let (first, rest) = break (== sep) lst
+ rest' = dropWhile (== sep) rest
+ in first:(splitBy sep rest')
+
+-- | Split list into chunks divided at specified indices.
+splitByIndices :: [Int] -> [a] -> [[a]]
+splitByIndices [] lst = [lst]
+splitByIndices (x:xs) lst =
+ let (first, rest) = splitAt x lst in
+ first:(splitByIndices (map (\y -> y - x) xs) rest)
+
+-- | Replace each occurrence of one sublist in a list with another.
+substitute :: (Eq a) => [a] -> [a] -> [a] -> [a]
+substitute _ _ [] = []
+substitute [] _ lst = lst
+substitute target replacement lst =
+ if target `isPrefixOf` lst
+ then replacement ++ (substitute target replacement $ drop (length target) lst)
+ else (head lst):(substitute target replacement $ tail lst)
+
+--
+-- Text processing
+--
+
+-- | Returns an association list of backslash escapes for the
+-- designated characters.
+backslashEscapes :: [Char] -- ^ list of special characters to escape
+ -> [(Char, String)]
+backslashEscapes = map (\ch -> (ch, ['\\',ch]))
+
+-- | Escape a string of characters, using an association list of
+-- characters and strings.
+escapeStringUsing :: [(Char, String)] -> String -> String
+escapeStringUsing _ [] = ""
+escapeStringUsing escapeTable (x:xs) =
+ case (lookup x escapeTable) of
+ Just str -> str ++ rest
+ Nothing -> x:rest
+ where rest = escapeStringUsing escapeTable xs
+
+-- | Strip trailing newlines from string.
+stripTrailingNewlines :: String -> String
+stripTrailingNewlines = reverse . dropWhile (== '\n') . reverse
+
+-- | Remove leading and trailing space (including newlines) from string.
+removeLeadingTrailingSpace :: String -> String
+removeLeadingTrailingSpace = removeLeadingSpace . removeTrailingSpace
+
+-- | Remove leading space (including newlines) from string.
+removeLeadingSpace :: String -> String
+removeLeadingSpace = dropWhile (`elem` " \n\t")
+
+-- | Remove trailing space (including newlines) from string.
+removeTrailingSpace :: String -> String
+removeTrailingSpace = reverse . removeLeadingSpace . reverse
+
+-- | Strip leading and trailing characters from string
+stripFirstAndLast :: String -> String
+stripFirstAndLast str =
+ drop 1 $ take ((length str) - 1) str
+
+-- | Change CamelCase word to hyphenated lowercase (e.g., camel-case).
+camelCaseToHyphenated :: String -> String
+camelCaseToHyphenated [] = ""
+camelCaseToHyphenated (a:b:rest) | isLower a && isUpper b =
+ a:'-':(toLower b):(camelCaseToHyphenated rest)
+camelCaseToHyphenated (a:rest) = (toLower a):(camelCaseToHyphenated rest)
+
+-- | Convert number < 4000 to uppercase roman numeral.
+toRomanNumeral :: Int -> String
+toRomanNumeral x =
+ if x >= 4000 || x < 0
+ then "?"
+ else case x of
+ _ | x >= 1000 -> "M" ++ toRomanNumeral (x - 1000)
+ _ | x >= 900 -> "CM" ++ toRomanNumeral (x - 900)
+ _ | x >= 500 -> "D" ++ toRomanNumeral (x - 500)
+ _ | x >= 400 -> "CD" ++ toRomanNumeral (x - 400)
+ _ | x >= 100 -> "C" ++ toRomanNumeral (x - 100)
+ _ | x >= 90 -> "XC" ++ toRomanNumeral (x - 90)
+ _ | x >= 50 -> "L" ++ toRomanNumeral (x - 50)
+ _ | x >= 40 -> "XL" ++ toRomanNumeral (x - 40)
+ _ | x >= 10 -> "X" ++ toRomanNumeral (x - 10)
+ _ | x >= 9 -> "IX" ++ toRomanNumeral (x - 5)
+ _ | x >= 5 -> "V" ++ toRomanNumeral (x - 5)
+ _ | x >= 4 -> "IV" ++ toRomanNumeral (x - 4)
+ _ | x >= 1 -> "I" ++ toRomanNumeral (x - 1)
+ _ -> ""
+
+-- | Wrap inlines to line length.
+wrapped :: Monad m => ([Inline] -> m Doc) -> [Inline] -> m Doc
+wrapped listWriter sect = (mapM listWriter $ splitBy Space sect) >>=
+ return . fsep
+
+-- | Wrap inlines if the text wrap option is selected.
+wrapIfNeeded :: Monad m => WriterOptions -> ([Inline] -> m Doc) ->
+ [Inline] -> m Doc
+wrapIfNeeded opts = if writerWrapText opts
+ then wrapped
+ else ($)
+
+-- auxiliary function for wrappedTeX
+isNote :: Inline -> Bool
+isNote (Note _) = True
+isNote _ = False
+
+-- | Wrap inlines to line length, treating footnotes in a way that
+-- makes sense in LaTeX and ConTeXt.
+wrappedTeX :: Monad m
+ => Bool
+ -> ([Inline] -> m Doc)
+ -> [Inline]
+ -> m Doc
+wrappedTeX includePercent listWriter sect = do
+ let (firstpart, rest) = break isNote sect
+ firstpartWrapped <- wrapped listWriter firstpart
+ if null rest
+ then return firstpartWrapped
+ else do let (note:rest') = rest
+ let (rest1, rest2) = break (== Space) rest'
+ -- rest1 is whatever comes between the note and a Space.
+ -- if the note is followed directly by a Space, rest1 is null.
+ -- rest1 is printed after the note but before the line break,
+ -- to avoid spurious blank space the note and immediately
+ -- following punctuation.
+ rest1Out <- if null rest1
+ then return empty
+ else listWriter rest1
+ rest2Wrapped <- if null rest2
+ then return empty
+ else wrappedTeX includePercent listWriter (tail rest2)
+ noteText <- listWriter [note]
+ return $ (firstpartWrapped <> if includePercent then PP.char '%' else empty) $$
+ (noteText <> rest1Out) $$
+ rest2Wrapped
+
+-- | Wrap inlines if the text wrap option is selected, specialized
+-- for LaTeX and ConTeXt.
+wrapTeXIfNeeded :: Monad m
+ => WriterOptions
+ -> Bool
+ -> ([Inline] -> m Doc)
+ -> [Inline]
+ -> m Doc
+wrapTeXIfNeeded opts includePercent = if writerWrapText opts
+ then wrappedTeX includePercent
+ else ($)
+
+-- | Indicates whether block should be surrounded by blank lines (@Pad@) or not (@Reg@).
+data BlockWrapper = Pad Doc | Reg Doc
+
+-- | Converts a list of wrapped blocks to a Doc, with appropriate spaces around blocks.
+wrappedBlocksToDoc :: [BlockWrapper] -> Doc
+wrappedBlocksToDoc = foldr addBlock empty
+ where addBlock (Pad d) accum | isEmpty accum = d
+ addBlock (Pad d) accum = d $$ text "" $$ accum
+ addBlock (Reg d) accum = d $$ accum
+
+--
+-- Parsing
+--
+
+-- | Like >>, but returns the operation on the left.
+-- (Suggested by Tillmann Rendel on Haskell-cafe list.)
+(>>~) :: (Monad m) => m a -> m b -> m a
+a >>~ b = a >>= \x -> b >> return x
+
+-- | Parse any line of text
+anyLine :: GenParser Char st [Char]
+anyLine = manyTill anyChar newline
+
+-- | Like @manyTill@, but reads at least one item.
+many1Till :: GenParser tok st a
+ -> GenParser tok st end
+ -> GenParser tok st [a]
+many1Till p end = do
+ first <- p
+ rest <- manyTill p end
+ return (first:rest)
+
+-- | A more general form of @notFollowedBy@. This one allows any
+-- type of parser to be specified, and succeeds only if that parser fails.
+-- It does not consume any input.
+notFollowedBy' :: Show b => GenParser a st b -> GenParser a st ()
+notFollowedBy' p = try $ join $ do a <- try p
+ return (unexpected (show a))
+ <|>
+ return (return ())
+-- (This version due to Andrew Pimlott on the Haskell mailing list.)
+
+-- | Parses one of a list of strings (tried in order).
+oneOfStrings :: [String] -> GenParser Char st String
+oneOfStrings listOfStrings = choice $ map (try . string) listOfStrings
+
+-- | Parses a space or tab.
+spaceChar :: CharParser st Char
+spaceChar = char ' ' <|> char '\t'
+
+-- | Skips zero or more spaces or tabs.
+skipSpaces :: GenParser Char st ()
+skipSpaces = skipMany spaceChar
+
+-- | Skips zero or more spaces or tabs, then reads a newline.
+blankline :: GenParser Char st Char
+blankline = try $ skipSpaces >> newline
+
+-- | Parses one or more blank lines and returns a string of newlines.
+blanklines :: GenParser Char st [Char]
+blanklines = many1 blankline
+
+-- | Parses material enclosed between start and end parsers.
+enclosed :: GenParser Char st t -- ^ start parser
+ -> GenParser Char st end -- ^ end parser
+ -> GenParser Char st a -- ^ content parser (to be used repeatedly)
+ -> GenParser Char st [a]
+enclosed start end parser = try $
+ start >> notFollowedBy space >> many1Till parser end
+
+-- | Parse string, case insensitive.
+stringAnyCase :: [Char] -> CharParser st String
+stringAnyCase [] = string ""
+stringAnyCase (x:xs) = do
+ firstChar <- char (toUpper x) <|> char (toLower x)
+ rest <- stringAnyCase xs
+ return (firstChar:rest)
+
+-- | Parse contents of 'str' using 'parser' and return result.
+parseFromString :: GenParser tok st a -> [tok] -> GenParser tok st a
+parseFromString parser str = do
+ oldPos <- getPosition
+ oldInput <- getInput
+ setInput str
+ result <- parser
+ setInput oldInput
+ setPosition oldPos
+ return result
+
+-- | Parse raw line block up to and including blank lines.
+lineClump :: GenParser Char st String
+lineClump = blanklines
+ <|> (many1 (notFollowedBy blankline >> anyLine) >>= return . unlines)
+
+-- | Parse a string of characters between an open character
+-- and a close character, including text between balanced
+-- pairs of open and close, which must be different. For example,
+-- @charsInBalanced '(' ')'@ will parse "(hello (there))"
+-- and return "hello (there)". Stop if a blank line is
+-- encountered.
+charsInBalanced :: Char -> Char -> GenParser Char st String
+charsInBalanced open close = try $ do
+ char open
+ raw <- many $ (many1 (noneOf [open, close, '\n']))
+ <|> (do res <- charsInBalanced open close
+ return $ [open] ++ res ++ [close])
+ <|> try (string "\n" >>~ notFollowedBy' blanklines)
+ char close
+ return $ concat raw
+
+-- | Like @charsInBalanced@, but allow blank lines in the content.
+charsInBalanced' :: Char -> Char -> GenParser Char st String
+charsInBalanced' open close = try $ do
+ char open
+ raw <- many $ (many1 (noneOf [open, close]))
+ <|> (do res <- charsInBalanced' open close
+ return $ [open] ++ res ++ [close])
+ char close
+ return $ concat raw
+
+-- Auxiliary functions for romanNumeral:
+
+lowercaseRomanDigits :: [Char]
+lowercaseRomanDigits = ['i','v','x','l','c','d','m']
+
+uppercaseRomanDigits :: [Char]
+uppercaseRomanDigits = map toUpper lowercaseRomanDigits
+
+-- | Parses a roman numeral (uppercase or lowercase), returns number.
+romanNumeral :: Bool -- ^ Uppercase if true
+ -> GenParser Char st Int
+romanNumeral upperCase = do
+ let romanDigits = if upperCase
+ then uppercaseRomanDigits
+ else lowercaseRomanDigits
+ lookAhead $ oneOf romanDigits
+ let [one, five, ten, fifty, hundred, fivehundred, thousand] =
+ map char romanDigits
+ thousands <- many thousand >>= (return . (1000 *) . length)
+ ninehundreds <- option 0 $ try $ hundred >> thousand >> return 900
+ fivehundreds <- many fivehundred >>= (return . (500 *) . length)
+ fourhundreds <- option 0 $ try $ hundred >> fivehundred >> return 400
+ hundreds <- many hundred >>= (return . (100 *) . length)
+ nineties <- option 0 $ try $ ten >> hundred >> return 90
+ fifties <- many fifty >>= (return . (50 *) . length)
+ forties <- option 0 $ try $ ten >> fifty >> return 40
+ tens <- many ten >>= (return . (10 *) . length)
+ nines <- option 0 $ try $ one >> ten >> return 9
+ fives <- many five >>= (return . (5 *) . length)
+ fours <- option 0 $ try $ one >> five >> return 4
+ ones <- many one >>= (return . length)
+ let total = thousands + ninehundreds + fivehundreds + fourhundreds +
+ hundreds + nineties + fifties + forties + tens + nines +
+ fives + fours + ones
+ if total == 0
+ then fail "not a roman numeral"
+ else return total
+
+-- Parsers for email addresses and URIs
+
+emailChar :: GenParser Char st Char
+emailChar = alphaNum <|> oneOf "-+_."
+
+domainChar :: GenParser Char st Char
+domainChar = alphaNum <|> char '-'
+
+domain :: GenParser Char st [Char]
+domain = do
+ first <- many1 domainChar
+ dom <- many1 $ try (char '.' >> many1 domainChar )
+ return $ intercalate "." (first:dom)
+
+-- | Parses an email address; returns string.
+emailAddress :: GenParser Char st [Char]
+emailAddress = try $ do
+ firstLetter <- alphaNum
+ restAddr <- many emailChar
+ let addr = firstLetter:restAddr
+ char '@'
+ dom <- domain
+ return $ addr ++ '@':dom
+
+-- | Parses a URI.
+uri :: GenParser Char st String
+uri = try $ do
+ str <- many1 $ satisfy isAllowedInURI
+ case parseURI str of
+ Just uri' -> if uriScheme uri' `elem` [ "http:", "https:", "ftp:",
+ "file:", "mailto:",
+ "news:", "telnet:" ]
+ then return $ show uri'
+ else fail "not a URI"
+ Nothing -> fail "not a URI"
+
+-- | Applies a parser, returns tuple of its results and its horizontal
+-- displacement (the difference between the source column at the end
+-- and the source column at the beginning). Vertical displacement
+-- (source row) is ignored.
+withHorizDisplacement :: GenParser Char st a -- ^ Parser to apply
+ -> GenParser Char st (a, Int) -- ^ (result, displacement)
+withHorizDisplacement parser = do
+ pos1 <- getPosition
+ result <- parser
+ pos2 <- getPosition
+ return (result, sourceColumn pos2 - sourceColumn pos1)
+
+-- | Parses a character and returns 'Null' (so that the parser can move on
+-- if it gets stuck).
+nullBlock :: GenParser Char st Block
+nullBlock = anyChar >> return Null
+
+-- | Fail if reader is in strict markdown syntax mode.
+failIfStrict :: GenParser Char ParserState ()
+failIfStrict = do
+ state <- getState
+ if stateStrict state then fail "strict mode" else return ()
+
+-- | Fail unless we're in literate haskell mode.
+failUnlessLHS :: GenParser tok ParserState ()
+failUnlessLHS = do
+ state <- getState
+ if stateLiterateHaskell state then return () else fail "Literate haskell feature"
+
+-- | Parses backslash, then applies character parser.
+escaped :: GenParser Char st Char -- ^ Parser for character to escape
+ -> GenParser Char st Inline
+escaped parser = try $ do
+ char '\\'
+ result <- parser
+ return (Str [result])
+
+-- | Parses an uppercase roman numeral and returns (UpperRoman, number).
+upperRoman :: GenParser Char st (ListNumberStyle, Int)
+upperRoman = do
+ num <- romanNumeral True
+ return (UpperRoman, num)
+
+-- | Parses a lowercase roman numeral and returns (LowerRoman, number).
+lowerRoman :: GenParser Char st (ListNumberStyle, Int)
+lowerRoman = do
+ num <- romanNumeral False
+ return (LowerRoman, num)
+
+-- | Parses a decimal numeral and returns (Decimal, number).
+decimal :: GenParser Char st (ListNumberStyle, Int)
+decimal = do
+ num <- many1 digit
+ return (Decimal, read num)
+
+-- | Parses a '#' returns (DefaultStyle, 1).
+defaultNum :: GenParser Char st (ListNumberStyle, Int)
+defaultNum = do
+ char '#'
+ return (DefaultStyle, 1)
+
+-- | Parses a lowercase letter and returns (LowerAlpha, number).
+lowerAlpha :: GenParser Char st (ListNumberStyle, Int)
+lowerAlpha = do
+ ch <- oneOf ['a'..'z']
+ return (LowerAlpha, ord ch - ord 'a' + 1)
+
+-- | Parses an uppercase letter and returns (UpperAlpha, number).
+upperAlpha :: GenParser Char st (ListNumberStyle, Int)
+upperAlpha = do
+ ch <- oneOf ['A'..'Z']
+ return (UpperAlpha, ord ch - ord 'A' + 1)
+
+-- | Parses a roman numeral i or I
+romanOne :: GenParser Char st (ListNumberStyle, Int)
+romanOne = (char 'i' >> return (LowerRoman, 1)) <|>
+ (char 'I' >> return (UpperRoman, 1))
+
+-- | Parses an ordered list marker and returns list attributes.
+anyOrderedListMarker :: GenParser Char st ListAttributes
+anyOrderedListMarker = choice $
+ [delimParser numParser | delimParser <- [inPeriod, inOneParen, inTwoParens],
+ numParser <- [decimal, defaultNum, romanOne,
+ lowerAlpha, lowerRoman, upperAlpha, upperRoman]]
+
+-- | Parses a list number (num) followed by a period, returns list attributes.
+inPeriod :: GenParser Char st (ListNumberStyle, Int)
+ -> GenParser Char st ListAttributes
+inPeriod num = try $ do
+ (style, start) <- num
+ char '.'
+ let delim = if style == DefaultStyle
+ then DefaultDelim
+ else Period
+ return (start, style, delim)
+
+-- | Parses a list number (num) followed by a paren, returns list attributes.
+inOneParen :: GenParser Char st (ListNumberStyle, Int)
+ -> GenParser Char st ListAttributes
+inOneParen num = try $ do
+ (style, start) <- num
+ char ')'
+ return (start, style, OneParen)
+
+-- | Parses a list number (num) enclosed in parens, returns list attributes.
+inTwoParens :: GenParser Char st (ListNumberStyle, Int)
+ -> GenParser Char st ListAttributes
+inTwoParens num = try $ do
+ char '('
+ (style, start) <- num
+ char ')'
+ return (start, style, TwoParens)
+
+-- | Parses an ordered list marker with a given style and delimiter,
+-- returns number.
+orderedListMarker :: ListNumberStyle
+ -> ListNumberDelim
+ -> GenParser Char st Int
+orderedListMarker style delim = do
+ let num = case style of
+ DefaultStyle -> decimal <|> defaultNum
+ Decimal -> decimal
+ UpperRoman -> upperRoman
+ LowerRoman -> lowerRoman
+ UpperAlpha -> upperAlpha
+ LowerAlpha -> lowerAlpha
+ let context = case delim of
+ DefaultDelim -> inPeriod
+ Period -> inPeriod
+ OneParen -> inOneParen
+ TwoParens -> inTwoParens
+ (start, _, _) <- context num
+ return start
+
+-- | Parses a character reference and returns a Str element.
+charRef :: GenParser Char st Inline
+charRef = do
+ c <- characterReference
+ return $ Str [c]
+
+-- | Parse a string with a given parser and state.
+readWith :: GenParser Char ParserState a -- ^ parser
+ -> ParserState -- ^ initial state
+ -> String -- ^ input string
+ -> a
+readWith parser state input =
+ case runParser parser state "source" input of
+ Left err -> error $ "\nError:\n" ++ show err
+ Right result -> result
+
+-- | Parse a string with @parser@ (for testing).
+testStringWith :: (Show a) => GenParser Char ParserState a
+ -> String
+ -> IO ()
+testStringWith parser str = putStrLn $ show $
+ readWith parser defaultParserState str
+
+-- | Parsing options.
+data ParserState = ParserState
+ { stateParseRaw :: Bool, -- ^ Parse raw HTML and LaTeX?
+ stateParserContext :: ParserContext, -- ^ Inside list?
+ stateQuoteContext :: QuoteContext, -- ^ Inside quoted environment?
+ stateSanitizeHTML :: Bool, -- ^ Sanitize HTML?
+ stateKeys :: KeyTable, -- ^ List of reference keys
+#ifdef _CITEPROC
+ stateCitations :: [String], -- ^ List of available citations
+#endif
+ stateNotes :: NoteTable, -- ^ List of notes
+ stateTabStop :: Int, -- ^ Tab stop
+ stateStandalone :: Bool, -- ^ Parse bibliographic info?
+ stateTitle :: [Inline], -- ^ Title of document
+ stateAuthors :: [String], -- ^ Authors of document
+ stateDate :: String, -- ^ Date of document
+ stateStrict :: Bool, -- ^ Use strict markdown syntax?
+ stateSmart :: Bool, -- ^ Use smart typography?
+ stateLiterateHaskell :: Bool, -- ^ Treat input as literate haskell
+ stateColumns :: Int, -- ^ Number of columns in terminal
+ stateHeaderTable :: [HeaderType] -- ^ Ordered list of header types used
+ }
+ deriving Show
+
+defaultParserState :: ParserState
+defaultParserState =
+ ParserState { stateParseRaw = False,
+ stateParserContext = NullState,
+ stateQuoteContext = NoQuote,
+ stateSanitizeHTML = False,
+ stateKeys = [],
+#ifdef _CITEPROC
+ stateCitations = [],
+#endif
+ stateNotes = [],
+ stateTabStop = 4,
+ stateStandalone = False,
+ stateTitle = [],
+ stateAuthors = [],
+ stateDate = [],
+ stateStrict = False,
+ stateSmart = False,
+ stateLiterateHaskell = False,
+ stateColumns = 80,
+ stateHeaderTable = [] }
+
+data HeaderType
+ = SingleHeader Char -- ^ Single line of characters underneath
+ | DoubleHeader Char -- ^ Lines of characters above and below
+ deriving (Eq, Show)
+
+data ParserContext
+ = ListItemState -- ^ Used when running parser on list item contents
+ | NullState -- ^ Default state
+ deriving (Eq, Show)
+
+data QuoteContext
+ = InSingleQuote -- ^ Used when parsing inside single quotes
+ | InDoubleQuote -- ^ Used when parsing inside double quotes
+ | NoQuote -- ^ Used when not parsing inside quotes
+ deriving (Eq, Show)
+
+type NoteTable = [(String, [Block])]
+
+type KeyTable = [([Inline], Target)]
+
+-- | Look up key in key table and return target object.
+lookupKeySrc :: KeyTable -- ^ Key table
+ -> [Inline] -- ^ Key
+ -> Maybe Target
+lookupKeySrc table key = case find (refsMatch key . fst) table of
+ Nothing -> Nothing
+ Just (_, src) -> Just src
+
+-- | Returns @True@ if keys match (case insensitive).
+refsMatch :: [Inline] -> [Inline] -> Bool
+refsMatch ((Str x):restx) ((Str y):resty) =
+ ((map toLower x) == (map toLower y)) && refsMatch restx resty
+refsMatch ((Emph x):restx) ((Emph y):resty) =
+ refsMatch x y && refsMatch restx resty
+refsMatch ((Strong x):restx) ((Strong y):resty) =
+ refsMatch x y && refsMatch restx resty
+refsMatch ((Strikeout x):restx) ((Strikeout y):resty) =
+ refsMatch x y && refsMatch restx resty
+refsMatch ((Superscript x):restx) ((Superscript y):resty) =
+ refsMatch x y && refsMatch restx resty
+refsMatch ((Subscript x):restx) ((Subscript y):resty) =
+ refsMatch x y && refsMatch restx resty
+refsMatch ((SmallCaps x):restx) ((SmallCaps y):resty) =
+ refsMatch x y && refsMatch restx resty
+refsMatch ((Quoted t x):restx) ((Quoted u y):resty) =
+ t == u && refsMatch x y && refsMatch restx resty
+refsMatch ((Code x):restx) ((Code y):resty) =
+ ((map toLower x) == (map toLower y)) && refsMatch restx resty
+refsMatch ((Math t x):restx) ((Math u y):resty) =
+ ((map toLower x) == (map toLower y)) && t == u && refsMatch restx resty
+refsMatch ((TeX x):restx) ((TeX y):resty) =
+ ((map toLower x) == (map toLower y)) && refsMatch restx resty
+refsMatch ((HtmlInline x):restx) ((HtmlInline y):resty) =
+ ((map toLower x) == (map toLower y)) && refsMatch restx resty
+refsMatch (x:restx) (y:resty) = (x == y) && refsMatch restx resty
+refsMatch [] x = null x
+refsMatch x [] = null x
+
+--
+-- Prettyprinting
+--
+
+-- | A version of hang that works like the version in pretty-1.0.0.0
+hang' :: Doc -> Int -> Doc -> Doc
+hang' d1 n d2 = d1 $$ (nest n d2)
+
+-- | Indent string as a block.
+indentBy :: Int -- ^ Number of spaces to indent the block
+ -> Int -- ^ Number of spaces (rel to block) to indent first line
+ -> String -- ^ Contents of block to indent
+ -> String
+indentBy _ _ [] = ""
+indentBy num first str =
+ let (firstLine:restLines) = lines str
+ firstLineIndent = num + first
+ in (replicate firstLineIndent ' ') ++ firstLine ++ "\n" ++
+ (intercalate "\n" $ map ((replicate num ' ') ++ ) restLines)
+
+-- | Prettyprint list of Pandoc blocks elements.
+prettyBlockList :: Int -- ^ Number of spaces to indent list of blocks
+ -> [Block] -- ^ List of blocks
+ -> String
+prettyBlockList indent [] = indentBy indent 0 "[]"
+prettyBlockList indent blocks = indentBy indent (-2) $ "[ " ++
+ (intercalate "\n, " (map prettyBlock blocks)) ++ " ]"
+
+-- | Prettyprint Pandoc block element.
+prettyBlock :: Block -> String
+prettyBlock (BlockQuote blocks) = "BlockQuote\n " ++
+ (prettyBlockList 2 blocks)
+prettyBlock (OrderedList attribs blockLists) =
+ "OrderedList " ++ show attribs ++ "\n" ++ indentBy 2 0 ("[ " ++
+ (intercalate ", " $ map (\blocks -> prettyBlockList 2 blocks)
+ blockLists)) ++ " ]"
+prettyBlock (BulletList blockLists) = "BulletList\n" ++
+ indentBy 2 0 ("[ " ++ (intercalate ", "
+ (map (\blocks -> prettyBlockList 2 blocks) blockLists))) ++ " ]"
+prettyBlock (DefinitionList blockLists) = "DefinitionList\n" ++
+ indentBy 2 0 ("[" ++ (intercalate ",\n"
+ (map (\(term, blocks) -> " (" ++ show term ++ ",\n" ++
+ indentBy 1 2 (prettyBlockList 2 blocks) ++ " )") blockLists))) ++ " ]"
+prettyBlock (Table caption aligns widths header rows) =
+ "Table " ++ show caption ++ " " ++ show aligns ++ " " ++
+ show widths ++ "\n" ++ prettyRow header ++ " [\n" ++
+ (intercalate ",\n" (map prettyRow rows)) ++ " ]"
+ where prettyRow cols = indentBy 2 0 ("[ " ++ (intercalate ", "
+ (map (\blocks -> prettyBlockList 2 blocks)
+ cols))) ++ " ]"
+prettyBlock block = show block
+
+-- | Prettyprint Pandoc document.
+prettyPandoc :: Pandoc -> String
+prettyPandoc (Pandoc meta blocks) = "Pandoc " ++ "(" ++ show meta ++
+ ")\n" ++ (prettyBlockList 0 blocks) ++ "\n"
+
+--
+-- Pandoc block and inline list processing
+--
+
+-- | Generate infinite lazy list of markers for an ordered list,
+-- depending on list attributes.
+orderedListMarkers :: (Int, ListNumberStyle, ListNumberDelim) -> [String]
+orderedListMarkers (start, numstyle, numdelim) =
+ let singleton c = [c]
+ nums = case numstyle of
+ DefaultStyle -> map show [start..]
+ Decimal -> map show [start..]
+ UpperAlpha -> drop (start - 1) $ cycle $
+ map singleton ['A'..'Z']
+ LowerAlpha -> drop (start - 1) $ cycle $
+ map singleton ['a'..'z']
+ UpperRoman -> map toRomanNumeral [start..]
+ LowerRoman -> map (map toLower . toRomanNumeral) [start..]
+ inDelim str = case numdelim of
+ DefaultDelim -> str ++ "."
+ Period -> str ++ "."
+ OneParen -> str ++ ")"
+ TwoParens -> "(" ++ str ++ ")"
+ in map inDelim nums
+
+-- | Normalize a list of inline elements: remove leading and trailing
+-- @Space@ elements, collapse double @Space@s into singles, and
+-- remove empty Str elements.
+normalizeSpaces :: [Inline] -> [Inline]
+normalizeSpaces [] = []
+normalizeSpaces list =
+ let removeDoubles [] = []
+ removeDoubles (Space:Space:rest) = removeDoubles (Space:rest)
+ removeDoubles (Space:(Str ""):Space:rest) = removeDoubles (Space:rest)
+ removeDoubles ((Str ""):rest) = removeDoubles rest
+ removeDoubles (x:rest) = x:(removeDoubles rest)
+ removeLeading (Space:xs) = removeLeading xs
+ removeLeading x = x
+ removeTrailing [] = []
+ removeTrailing lst = if (last lst == Space)
+ then init lst
+ else lst
+ in removeLeading $ removeTrailing $ removeDoubles list
+
+-- | Change final list item from @Para@ to @Plain@ if the list should
+-- be compact.
+compactify :: [[Block]] -- ^ List of list items (each a list of blocks)
+ -> [[Block]]
+compactify [] = []
+compactify items =
+ let final = last items
+ others = init items
+ in case last final of
+ Para a -> if all endsWithPlain others && not (null final)
+ then others ++ [init final ++ [Plain a]]
+ else items
+ _ -> items
+
+endsWithPlain :: [Block] -> Bool
+endsWithPlain [] = False
+endsWithPlain blocks =
+ case last blocks of
+ Plain _ -> True
+ (BulletList (x:xs)) -> endsWithPlain $ last (x:xs)
+ (OrderedList _ (x:xs)) -> endsWithPlain $ last (x:xs)
+ (DefinitionList (x:xs)) -> endsWithPlain $ last $ map snd (x:xs)
+ _ -> False
+
+-- | Data structure for defining hierarchical Pandoc documents
+data Element = Blk Block
+ | Sec [Inline] [Element] deriving (Eq, Read, Show)
+
+-- | Returns @True@ on Header block with at least the specified level
+headerAtLeast :: Int -> Block -> Bool
+headerAtLeast level (Header x _) = x <= level
+headerAtLeast _ _ = False
+
+-- | Convert list of Pandoc blocks into (hierarchical) list of Elements
+hierarchicalize :: [Block] -> [Element]
+hierarchicalize [] = []
+hierarchicalize (block:rest) =
+ case block of
+ (Header level title) ->
+ let (thisSection, rest') = break (headerAtLeast level) rest
+ in (Sec title (hierarchicalize thisSection)):(hierarchicalize rest')
+ x -> (Blk x):(hierarchicalize rest)
+
+-- | True if block is a Header block.
+isHeaderBlock :: Block -> Bool
+isHeaderBlock (Header _ _) = True
+isHeaderBlock _ = False
+
+--
+-- Writer options
+--
+
+data HTMLMathMethod = PlainMath
+ | LaTeXMathML (Maybe String) -- url of LaTeXMathML.js
+ | JsMath (Maybe String) -- url of jsMath load script
+ | GladTeX
+ | MimeTeX String -- url of mimetex.cgi
+ deriving (Show, Read, Eq)
+
+-- | Methods for obfuscating email addresses in HTML.
+data ObfuscationMethod = NoObfuscation
+ | ReferenceObfuscation
+ | JavascriptObfuscation
+ deriving (Show, Read, Eq)
+
+-- | Options for writers
+data WriterOptions = WriterOptions
+ { writerStandalone :: Bool -- ^ Include header and footer
+ , writerHeader :: String -- ^ Header for the document
+ , writerTitlePrefix :: String -- ^ Prefix for HTML titles
+ , writerTabStop :: Int -- ^ Tabstop for conversion btw spaces and tabs
+ , writerTableOfContents :: Bool -- ^ Include table of contents
+ , writerS5 :: Bool -- ^ We're writing S5
+ , writerHTMLMathMethod :: HTMLMathMethod -- ^ How to print math in HTML
+ , writerIgnoreNotes :: Bool -- ^ Ignore footnotes (used in making toc)
+ , writerIncremental :: Bool -- ^ Incremental S5 lists
+ , writerNumberSections :: Bool -- ^ Number sections in LaTeX
+ , writerIncludeBefore :: String -- ^ String to include before the body
+ , writerIncludeAfter :: String -- ^ String to include after the body
+ , writerStrictMarkdown :: Bool -- ^ Use strict markdown syntax
+ , writerReferenceLinks :: Bool -- ^ Use reference links in writing markdown, rst
+ , writerWrapText :: Bool -- ^ Wrap text to line length
+ , writerLiterateHaskell :: Bool -- ^ Write as literate haskell
+ , writerEmailObfuscation :: ObfuscationMethod -- ^ How to obfuscate emails
+ } deriving Show
+
+-- | Default writer options.
+defaultWriterOptions :: WriterOptions
+defaultWriterOptions =
+ WriterOptions { writerStandalone = False
+ , writerHeader = ""
+ , writerTitlePrefix = ""
+ , writerTabStop = 4
+ , writerTableOfContents = False
+ , writerS5 = False
+ , writerHTMLMathMethod = PlainMath
+ , writerIgnoreNotes = False
+ , writerIncremental = False
+ , writerNumberSections = False
+ , writerIncludeBefore = ""
+ , writerIncludeAfter = ""
+ , writerStrictMarkdown = False
+ , writerReferenceLinks = False
+ , writerWrapText = True
+ , writerLiterateHaskell = False
+ , writerEmailObfuscation = JavascriptObfuscation
+ }
+
+--
+-- File handling
+--
+
+-- | Perform an IO action in a directory, returning to starting directory.
+inDirectory :: FilePath -> IO a -> IO a
+inDirectory path action = do
+ oldDir <- getCurrentDirectory
+ setCurrentDirectory path
+ result <- action
+ setCurrentDirectory oldDir
+ return result