{- Copyright (C) 2006-7 John MacFarlane This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -} {- | Module : Text.Pandoc.Shared Copyright : Copyright (C) 2006-7 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane Stability : alpha Portability : portable Utility functions and definitions used by the various Pandoc modules. -} module Text.Pandoc.Shared ( -- * List processing splitBy, splitByIndices, substitute, -- * Text processing joinWithSep, tabsToSpaces, backslashEscape, endsWith, stripTrailingNewlines, removeLeadingTrailingSpace, removeLeadingSpace, removeTrailingSpace, stripFirstAndLast, -- * Parsing readWith, testStringWith, Reference (..), isNoteBlock, isKeyBlock, isLineClump, HeaderType (..), ParserContext (..), QuoteContext (..), ParserState (..), NoteTable, defaultParserState, nullBlock, escaped, -- * Native format prettyprinting prettyPandoc, -- * Pandoc block and inline list processing normalizeSpaces, compactify, Element (..), hierarchicalize, isHeaderBlock, -- * Writer options WriterOptions (..), defaultWriterOptions, -- * Reference key lookup functions KeyTable, lookupKeySrc, refsMatch, ) where import Text.Pandoc.Definition import Text.ParserCombinators.Parsec import Text.Pandoc.Entities ( decodeEntities, escapeStringForXML ) import Data.Char ( toLower, ord ) import Data.List ( find, groupBy, isPrefixOf ) -- | Parse a string with a given parser and state. readWith :: GenParser Char ParserState a -- ^ parser -> ParserState -- ^ initial state -> String -- ^ input string -> a readWith parser state input = case runParser parser state "source" input of Left err -> error $ "\nError:\n" ++ show err Right result -> result -- | Parse a string with @parser@ (for testing). testStringWith :: (Show a) => GenParser Char ParserState a -> String -> IO () testStringWith parser str = putStrLn $ show $ readWith parser defaultParserState str data HeaderType = SingleHeader Char -- ^ Single line of characters underneath | DoubleHeader Char -- ^ Lines of characters above and below deriving (Eq, Show) data ParserContext = ListItemState -- ^ Used when running parser on list item contents | NullState -- ^ Default state deriving (Eq, Show) data QuoteContext = InSingleQuote -- ^ Used when we're parsing inside single quotes | InDoubleQuote -- ^ Used when we're parsing inside double quotes | NoQuote -- ^ Used when we're not parsing inside quotes deriving (Eq, Show) type KeyTable = [([Inline], Target)] type NoteTable = [(String, [Block])] -- | References from preliminary parsing data Reference = KeyBlock [Inline] Target -- ^ Key for reference-style link (label URL title) | NoteBlock String [Block] -- ^ Footnote reference and contents | LineClump String -- ^ Raw clump of lines with blanks at end deriving (Eq, Read, Show) -- | Auxiliary functions used in preliminary parsing isNoteBlock :: Reference -> Bool isNoteBlock (NoteBlock _ _) = True isNoteBlock _ = False isKeyBlock :: Reference -> Bool isKeyBlock (KeyBlock _ _) = True isKeyBlock _ = False isLineClump :: Reference -> Bool isLineClump (LineClump _) = True isLineClump _ = False data ParserState = ParserState { stateParseRaw :: Bool, -- ^ Parse untranslatable HTML -- and LaTeX? stateParserContext :: ParserContext, -- ^ What are we parsing? stateQuoteContext :: QuoteContext, -- ^ Inside quoted environment? stateKeys :: KeyTable, -- ^ List of reference keys stateNotes :: NoteTable, -- ^ List of notes stateTabStop :: Int, -- ^ Tab stop stateStandalone :: Bool, -- ^ If @True@, parse -- bibliographic info stateTitle :: [Inline], -- ^ Title of document stateAuthors :: [String], -- ^ Authors of document stateDate :: String, -- ^ Date of document stateStrict :: Bool, -- ^ Use strict markdown syntax stateSmart :: Bool, -- ^ Use smart typography stateColumns :: Int, -- ^ Number of columns in -- terminal (used for tables) stateHeaderTable :: [HeaderType] -- ^ List of header types used, -- in what order (rst only) } deriving Show defaultParserState :: ParserState defaultParserState = ParserState { stateParseRaw = False, stateParserContext = NullState, stateQuoteContext = NoQuote, stateKeys = [], stateNotes = [], stateTabStop = 4, stateStandalone = False, stateTitle = [], stateAuthors = [], stateDate = [], stateStrict = False, stateSmart = False, stateColumns = 80, stateHeaderTable = [] } -- | Parses a character and returns 'Null' (so that the parser can move on -- if it gets stuck). nullBlock :: GenParser Char st Block nullBlock = do anyChar return Null -- | Parses backslash, then applies character parser. escaped :: GenParser Char st Char -- ^ Parser for character to escape -> GenParser Char st Inline escaped parser = try (do char '\\' result <- parser return (Str [result])) -- | Indent string as a block. indentBy :: Int -- ^ Number of spaces to indent the block -> Int -- ^ Number of spaces (rel to block) to indent first line -> String -- ^ Contents of block to indent -> String indentBy num first [] = "" indentBy num first str = let (firstLine:restLines) = lines str firstLineIndent = num + first in (replicate firstLineIndent ' ') ++ firstLine ++ "\n" ++ (joinWithSep "\n" $ map (\line -> (replicate num ' ') ++ line) restLines) -- | Prettyprint list of Pandoc blocks elements. prettyBlockList :: Int -- ^ Number of spaces to indent list of blocks -> [Block] -- ^ List of blocks -> String prettyBlockList indent [] = indentBy indent 0 "[]" prettyBlockList indent blocks = indentBy indent (-2) $ "[ " ++ (joinWithSep "\n, " (map prettyBlock blocks)) ++ " ]" -- | Prettyprint Pandoc block element. prettyBlock :: Block -> String prettyBlock (BlockQuote blocks) = "BlockQuote\n " ++ (prettyBlockList 2 blocks) prettyBlock (OrderedList blockLists) = "OrderedList\n" ++ indentBy 2 0 ("[ " ++ (joinWithSep ", " (map (\blocks -> prettyBlockList 2 blocks) blockLists))) ++ " ]" prettyBlock (BulletList blockLists) = "BulletList\n" ++ indentBy 2 0 ("[ " ++ (joinWithSep ", " (map (\blocks -> prettyBlockList 2 blocks) blockLists))) ++ " ]" prettyBlock (DefinitionList blockLists) = "DefinitionList\n" ++ indentBy 2 0 ("[" ++ (joinWithSep ",\n" (map (\(term, blocks) -> " (" ++ show term ++ ",\n" ++ indentBy 1 2 (prettyBlockList 2 blocks) ++ " )") blockLists))) ++ " ]" prettyBlock (Table caption aligns widths header rows) = "Table " ++ show caption ++ " " ++ show aligns ++ " " ++ show widths ++ "\n" ++ prettyRow header ++ " [\n" ++ (joinWithSep ",\n" (map prettyRow rows)) ++ " ]" where prettyRow cols = indentBy 2 0 ("[ " ++ (joinWithSep ", " (map (\blocks -> prettyBlockList 2 blocks) cols))) ++ " ]" prettyBlock block = show block -- | Prettyprint Pandoc document. prettyPandoc :: Pandoc -> String prettyPandoc (Pandoc meta blocks) = "Pandoc " ++ "(" ++ (show meta) ++ ")\n" ++ (prettyBlockList 0 blocks) ++ "\n" -- | Convert tabs to spaces (with adjustable tab stop). tabsToSpaces :: Int -- ^ Tabstop -> String -- ^ String to convert -> String tabsToSpaces tabstop str = unlines (map (tabsInLine tabstop tabstop) (lines str)) -- | Convert tabs to spaces in one line. tabsInLine :: Int -- ^ Number of spaces to next tab stop -> Int -- ^ Tabstop -> String -- ^ Line to convert -> String tabsInLine num tabstop "" = "" tabsInLine num tabstop (c:cs) = let replacement = (if (c == '\t') then (replicate num ' ') else [c]) in let nextnumraw = (num - (length replacement)) in let nextnum = if (nextnumraw < 1) then (nextnumraw + tabstop) else nextnumraw in replacement ++ (tabsInLine nextnum tabstop cs) -- | Escape designated characters with backslash. backslashEscape :: [Char] -- ^ list of special characters to escape -> String -- ^ string input -> String backslashEscape special [] = [] backslashEscape special (x:xs) = if x `elem` special then '\\':x:(backslashEscape special xs) else x:(backslashEscape special xs) -- | Returns @True@ if string ends with given character. endsWith :: Char -> [Char] -> Bool endsWith char [] = False endsWith char str = (char == last str) -- | Joins a list of lists, separated by another list. joinWithSep :: [a] -- ^ List to use as separator -> [[a]] -- ^ Lists to join -> [a] joinWithSep sep [] = [] joinWithSep sep lst = foldr1 (\a b -> a ++ sep ++ b) lst -- | Strip trailing newlines from string. stripTrailingNewlines :: String -> String stripTrailingNewlines "" = "" stripTrailingNewlines str = if (last str) == '\n' then stripTrailingNewlines (init str) else str -- | Remove leading and trailing space (including newlines) from string. removeLeadingTrailingSpace :: String -> String removeLeadingTrailingSpace = removeLeadingSpace . removeTrailingSpace -- | Remove leading space (including newlines) from string. removeLeadingSpace :: String -> String removeLeadingSpace = dropWhile (\x -> (x == ' ') || (x == '\n') || (x == '\t')) -- | Remove trailing space (including newlines) from string. removeTrailingSpace :: String -> String removeTrailingSpace = reverse . removeLeadingSpace . reverse -- | Strip leading and trailing characters from string stripFirstAndLast str = drop 1 $ take ((length str) - 1) str -- | Replace each occurrence of one sublist in a list with another. substitute :: (Eq a) => [a] -> [a] -> [a] -> [a] substitute _ _ [] = [] substitute [] _ lst = lst substitute target replacement lst = if isPrefixOf target lst then replacement ++ (substitute target replacement $ drop (length target) lst) else (head lst):(substitute target replacement $ tail lst) -- | Split list into groups separated by sep. splitBy :: (Eq a) => a -> [a] -> [[a]] splitBy _ [] = [] splitBy sep lst = let (first, rest) = break (== sep) lst rest' = dropWhile (== sep) rest in first:(splitBy sep rest') -- | Split list into chunks divided at specified indices. splitByIndices :: [Int] -> [a] -> [[a]] splitByIndices [] lst = [lst] splitByIndices (x:xs) lst = let (first, rest) = splitAt x lst in first:(splitByIndices (map (\y -> y - x) xs) rest) -- | Normalize a list of inline elements: remove leading and trailing -- @Space@ elements, collapse double @Space@s into singles, and -- remove empty Str elements. normalizeSpaces :: [Inline] -> [Inline] normalizeSpaces [] = [] normalizeSpaces list = let removeDoubles [] = [] removeDoubles (Space:Space:rest) = removeDoubles (Space:rest) removeDoubles ((Str ""):rest) = removeDoubles rest removeDoubles (x:rest) = x:(removeDoubles rest) in let removeLeading [] = [] removeLeading lst = if ((head lst) == Space) then tail lst else lst in let removeTrailing [] = [] removeTrailing lst = if ((last lst) == Space) then init lst else lst in removeLeading $ removeTrailing $ removeDoubles list -- | Change final list item from @Para@ to @Plain@ if the list should -- be compact. compactify :: [[Block]] -- ^ List of list items (each a list of blocks) -> [[Block]] compactify [] = [] compactify items = let final = last items others = init items in case final of [Para a] -> if any containsPara others then items else others ++ [[Plain a]] otherwise -> items containsPara :: [Block] -> Bool containsPara [] = False containsPara ((Para a):rest) = True containsPara ((BulletList items):rest) = (any containsPara items) || (containsPara rest) containsPara ((OrderedList items):rest) = (any containsPara items) || (containsPara rest) containsPara (x:rest) = containsPara rest -- | Data structure for defining hierarchical Pandoc documents data Element = Blk Block | Sec [Inline] [Element] deriving (Eq, Read, Show) -- | Returns true on Header block with level at least 'level' headerAtLeast :: Int -> Block -> Bool headerAtLeast level (Header x _) = x <= level headerAtLeast level _ = False -- | Convert list of Pandoc blocks into list of Elements (hierarchical) hierarchicalize :: [Block] -> [Element] hierarchicalize [] = [] hierarchicalize (block:rest) = case block of (Header level title) -> let (thisSection, rest') = break (headerAtLeast level) rest in (Sec title (hierarchicalize thisSection)): (hierarchicalize rest') x -> (Blk x):(hierarchicalize rest) -- | True if block is a Header block. isHeaderBlock :: Block -> Bool isHeaderBlock (Header _ _) = True isHeaderBlock _ = False -- | Options for writers data WriterOptions = WriterOptions { writerStandalone :: Bool -- ^ Include header and footer , writerTitlePrefix :: String -- ^ Prefix for HTML titles , writerHeader :: String -- ^ Header for the document , writerIncludeBefore :: String -- ^ String to include before the body , writerIncludeAfter :: String -- ^ String to include after the body , writerTableOfContents :: Bool -- ^ Include table of contents , writerS5 :: Bool -- ^ We're writing S5 , writerIgnoreNotes :: Bool -- ^ Ignore footnotes (used in making toc) , writerIncremental :: Bool -- ^ Incremental S5 lists , writerNumberSections :: Bool -- ^ Number sections in LaTeX , writerStrictMarkdown :: Bool -- ^ Use strict markdown syntax , writerReferenceLinks :: Bool -- ^ Use reference links in writing markdown, rst , writerTabStop :: Int -- ^ Tabstop for conversion between -- spaces and tabs } deriving Show -- | Default writer options. defaultWriterOptions = WriterOptions { writerStandalone = False, writerHeader = "", writerTitlePrefix = "", writerTabStop = 4, writerTableOfContents = False, writerS5 = False, writerIgnoreNotes = False, writerIncremental = False, writerNumberSections = False, writerIncludeBefore = "", writerIncludeAfter = "", writerStrictMarkdown = False, writerReferenceLinks = False } -- -- code to lookup reference keys in key table -- -- | Look up key in key table and return target object. lookupKeySrc :: KeyTable -- ^ Key table -> [Inline] -- ^ Key -> Maybe Target lookupKeySrc table key = case table of [] -> Nothing (k, src):rest -> if (refsMatch k key) then Just src else lookupKeySrc rest key -- | Returns @True@ if keys match (case insensitive). refsMatch :: [Inline] -> [Inline] -> Bool refsMatch ((Str x):restx) ((Str y):resty) = ((map toLower x) == (map toLower y)) && refsMatch restx resty refsMatch ((Code x):restx) ((Code y):resty) = ((map toLower x) == (map toLower y)) && refsMatch restx resty refsMatch ((TeX x):restx) ((TeX y):resty) = ((map toLower x) == (map toLower y)) && refsMatch restx resty refsMatch ((HtmlInline x):restx) ((HtmlInline y):resty) = ((map toLower x) == (map toLower y)) && refsMatch restx resty refsMatch ((Emph x):restx) ((Emph y):resty) = refsMatch x y && refsMatch restx resty refsMatch ((Strong x):restx) ((Strong y):resty) = refsMatch x y && refsMatch restx resty refsMatch ((Quoted t x):restx) ((Quoted u y):resty) = t == u && refsMatch x y && refsMatch restx resty refsMatch (x:restx) (y:resty) = (x == y) && refsMatch restx resty refsMatch [] x = null x refsMatch x [] = null x