summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authordr@jones.dk <dr@jones.dk>2010-07-31 00:15:41 +0200
committerdr@jones.dk <dr@jones.dk>2010-07-31 00:15:41 +0200
commit1f6b4aee268fefc72c84bd305b10d4f9103901eb (patch)
tree06068a6ea16e5fcd9fce72d04c15a69089f85694 /src
parentc5408a001e497aed5733e00346bcba7e06cb65ba (diff)
Imported Upstream version 1.6
Diffstat (limited to 'src')
-rw-r--r--src/Text/Pandoc.hs12
-rw-r--r--src/Text/Pandoc/Definition.hs19
-rw-r--r--src/Text/Pandoc/ODT.hs102
-rw-r--r--src/Text/Pandoc/Parsing.hs700
-rw-r--r--src/Text/Pandoc/Readers/HTML.hs84
-rw-r--r--src/Text/Pandoc/Readers/LaTeX.hs39
-rw-r--r--src/Text/Pandoc/Readers/Markdown.hs127
-rw-r--r--src/Text/Pandoc/Readers/RST.hs178
-rw-r--r--src/Text/Pandoc/Readers/TeXMath.hs253
-rw-r--r--src/Text/Pandoc/S5.hs57
-rw-r--r--src/Text/Pandoc/Shared.hs668
-rw-r--r--src/Text/Pandoc/Templates.hs3
-rw-r--r--src/Text/Pandoc/UTF8.hs72
-rw-r--r--src/Text/Pandoc/UUID.hs77
-rw-r--r--src/Text/Pandoc/Writers/ConTeXt.hs3
-rw-r--r--src/Text/Pandoc/Writers/Docbook.hs1
-rw-r--r--src/Text/Pandoc/Writers/EPUB.hs283
-rw-r--r--src/Text/Pandoc/Writers/HTML.hs84
-rw-r--r--src/Text/Pandoc/Writers/LaTeX.hs2
-rw-r--r--src/Text/Pandoc/Writers/Man.hs17
-rw-r--r--src/Text/Pandoc/Writers/Markdown.hs33
-rw-r--r--src/Text/Pandoc/Writers/Native.hs86
-rw-r--r--src/Text/Pandoc/Writers/ODT.hs83
-rw-r--r--src/Text/Pandoc/Writers/RST.hs32
-rw-r--r--src/Text/Pandoc/Writers/S5.hs136
-rw-r--r--src/Text/Pandoc/Writers/Texinfo.hs1
-rw-r--r--src/markdown2pdf.hs26
-rw-r--r--src/pandoc.hs210
28 files changed, 1926 insertions, 1462 deletions
diff --git a/src/Text/Pandoc.hs b/src/Text/Pandoc.hs
index 9cad5fb34..ad429bc93 100644
--- a/src/Text/Pandoc.hs
+++ b/src/Text/Pandoc.hs
@@ -71,6 +71,7 @@ module Text.Pandoc
, NoteTable
, HeaderType (..)
-- * Writers: converting /from/ Pandoc format
+ , writeNative
, writeMarkdown
, writePlain
, writeRST
@@ -79,16 +80,16 @@ module Text.Pandoc
, writeTexinfo
, writeHtml
, writeHtmlString
- , writeS5
- , writeS5String
, writeDocbook
, writeOpenDocument
, writeMan
, writeMediaWiki
, writeRTF
- , prettyPandoc
+ , writeODT
+ , writeEPUB
-- * Writer options used in writers
, WriterOptions (..)
+ , HTMLSlideVariant (..)
, HTMLMathMethod (..)
, defaultWriterOptions
-- * Rendering templates and default templates
@@ -102,19 +103,22 @@ import Text.Pandoc.Readers.Markdown
import Text.Pandoc.Readers.RST
import Text.Pandoc.Readers.LaTeX
import Text.Pandoc.Readers.HTML
+import Text.Pandoc.Writers.Native
import Text.Pandoc.Writers.Markdown
import Text.Pandoc.Writers.RST
import Text.Pandoc.Writers.LaTeX
import Text.Pandoc.Writers.ConTeXt
import Text.Pandoc.Writers.Texinfo
import Text.Pandoc.Writers.HTML
-import Text.Pandoc.Writers.S5
+import Text.Pandoc.Writers.ODT
+import Text.Pandoc.Writers.EPUB
import Text.Pandoc.Writers.Docbook
import Text.Pandoc.Writers.OpenDocument
import Text.Pandoc.Writers.Man
import Text.Pandoc.Writers.RTF
import Text.Pandoc.Writers.MediaWiki
import Text.Pandoc.Templates
+import Text.Pandoc.Parsing
import Text.Pandoc.Shared
import Data.Version (showVersion)
import Paths_pandoc (version)
diff --git a/src/Text/Pandoc/Definition.hs b/src/Text/Pandoc/Definition.hs
index 169c4d1a6..fffca3b2e 100644
--- a/src/Text/Pandoc/Definition.hs
+++ b/src/Text/Pandoc/Definition.hs
@@ -33,36 +33,37 @@ module Text.Pandoc.Definition where
import Data.Generics
-data Pandoc = Pandoc Meta [Block] deriving (Eq, Read, Show, Typeable, Data)
+data Pandoc = Pandoc Meta [Block] deriving (Eq, Ord, Read, Show, Typeable, Data)
-- | Bibliographic information for the document: title, authors, date.
data Meta = Meta { docTitle :: [Inline]
, docAuthors :: [[Inline]]
, docDate :: [Inline] }
- deriving (Eq, Show, Read, Typeable, Data)
+ deriving (Eq, Ord, Show, Read, Typeable, Data)
-- | Alignment of a table column.
data Alignment = AlignLeft
| AlignRight
| AlignCenter
- | AlignDefault deriving (Eq, Show, Read, Typeable, Data)
+ | AlignDefault deriving (Eq, Ord, Show, Read, Typeable, Data)
-- | List attributes.
type ListAttributes = (Int, ListNumberStyle, ListNumberDelim)
-- | Style of list numbers.
data ListNumberStyle = DefaultStyle
+ | Example
| Decimal
| LowerRoman
| UpperRoman
| LowerAlpha
- | UpperAlpha deriving (Eq, Show, Read, Typeable, Data)
+ | UpperAlpha deriving (Eq, Ord, Show, Read, Typeable, Data)
-- | Delimiter of list numbers.
data ListNumberDelim = DefaultDelim
| Period
| OneParen
- | TwoParens deriving (Eq, Show, Read, Typeable, Data)
+ | TwoParens deriving (Eq, Ord, Show, Read, Typeable, Data)
-- | Attributes: identifier, classes, key-value pairs
type Attr = (String, [String], [(String, String)])
@@ -90,16 +91,16 @@ data Block
-- column headers (each a list of blocks), and
-- rows (each a list of lists of blocks)
| Null -- ^ Nothing
- deriving (Eq, Read, Show, Typeable, Data)
+ deriving (Eq, Ord, Read, Show, Typeable, Data)
-- | Type of quotation marks to use in Quoted inline.
-data QuoteType = SingleQuote | DoubleQuote deriving (Show, Eq, Read, Typeable, Data)
+data QuoteType = SingleQuote | DoubleQuote deriving (Show, Eq, Ord, Read, Typeable, Data)
-- | Link target (URL, title).
type Target = (String, String)
-- | Type of math element (display or inline).
-data MathType = DisplayMath | InlineMath deriving (Show, Eq, Read, Typeable, Data)
+data MathType = DisplayMath | InlineMath deriving (Show, Eq, Ord, Read, Typeable, Data)
-- | Inline elements.
data Inline
@@ -126,7 +127,7 @@ data Inline
| Image [Inline] Target -- ^ Image: alt text (list of inlines), target
-- and target
| Note [Block] -- ^ Footnote or endnote
- deriving (Show, Eq, Read, Typeable, Data)
+ deriving (Show, Eq, Ord, Read, Typeable, Data)
-- | Applies a transformation on @a@s to matching elements in a @b@.
processWith :: (Data a, Data b) => (a -> a) -> b -> b
diff --git a/src/Text/Pandoc/ODT.hs b/src/Text/Pandoc/ODT.hs
deleted file mode 100644
index d978c0cb4..000000000
--- a/src/Text/Pandoc/ODT.hs
+++ /dev/null
@@ -1,102 +0,0 @@
-{-
-Copyright (C) 2008-2010 John MacFarlane <jgm@berkeley.edu>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
--}
-
-{- |
- Module : Text.Pandoc.ODT
- Copyright : Copyright (C) 2008-2010 John MacFarlane
- License : GNU GPL, version 2 or above
-
- Maintainer : John MacFarlane <jgm@berkeley.edu>
- Stability : alpha
- Portability : portable
-
-Functions for producing an ODT file from OpenDocument XML.
--}
-module Text.Pandoc.ODT ( saveOpenDocumentAsODT ) where
-import Data.List ( find )
-import System.FilePath ( (</>), takeFileName )
-import qualified Data.ByteString.Lazy as B
-import Data.ByteString.Lazy.UTF8 ( fromString )
-import Prelude hiding ( writeFile, readFile )
-import Codec.Archive.Zip
-import Control.Applicative ( (<$>) )
-import Text.ParserCombinators.Parsec
-import System.Time
-import Paths_pandoc ( getDataFileName )
-import System.Directory
-import Control.Monad (liftM)
-
--- | Produce an ODT file from OpenDocument XML.
-saveOpenDocumentAsODT :: Maybe FilePath -- ^ Path of user data directory
- -> FilePath -- ^ Pathname of ODT file to be produced
- -> FilePath -- ^ Relative directory of source file
- -> Maybe FilePath -- ^ Path specified by --reference-odt
- -> String -- ^ OpenDocument XML contents
- -> IO ()
-saveOpenDocumentAsODT datadir destinationODTPath sourceDirRelative mbRefOdt xml = do
- refArchive <- liftM toArchive $
- case mbRefOdt of
- Just f -> B.readFile f
- Nothing -> do
- let defaultODT = getDataFileName "reference.odt" >>= B.readFile
- case datadir of
- Nothing -> defaultODT
- Just d -> do
- exists <- doesFileExist (d </> "reference.odt")
- if exists
- then B.readFile (d </> "reference.odt")
- else defaultODT
- -- handle pictures
- let (newContents, pics) =
- case runParser pPictures [] "OpenDocument XML contents" xml of
- Left err -> error $ show err
- Right x -> x
- picEntries <- mapM (makePictureEntry sourceDirRelative) pics
- (TOD epochTime _) <- getClockTime
- let contentEntry = toEntry "content.xml" epochTime $ fromString newContents
- let archive = foldr addEntryToArchive refArchive (contentEntry : picEntries)
- B.writeFile destinationODTPath $ fromArchive archive
-
-makePictureEntry :: FilePath -- ^ Relative directory of source file
- -> (FilePath, String) -- ^ Path and new path of picture
- -> IO Entry
-makePictureEntry sourceDirRelative (path, newPath) = do
- entry <- readEntry [] $ sourceDirRelative </> path
- return (entry { eRelativePath = newPath })
-
-pPictures :: GenParser Char [(FilePath, String)] ([Char], [(FilePath, String)])
-pPictures = do
- contents <- concat <$> many (pPicture <|> many1 (noneOf "<") <|> string "<")
- pics <- getState
- return (contents, pics)
-
-pPicture :: GenParser Char [(FilePath, String)] [Char]
-pPicture = try $ do
- string "<draw:image xlink:href=\""
- path <- manyTill anyChar (char '"')
- let filename = takeFileName path
- pics <- getState
- newPath <- case find (\(o, _) -> o == path) pics of
- Just (_, new) -> return new
- Nothing -> do
- -- get a unique name
- let dups = length $ (filter (\(o, _) -> takeFileName o == filename)) pics
- let new = "Pictures/" ++ replicate dups '0' ++ filename
- updateState ((path, new) :)
- return new
- return $ "<draw:image xlink:href=\"" ++ newPath ++ "\""
diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs
new file mode 100644
index 000000000..3678fc22a
--- /dev/null
+++ b/src/Text/Pandoc/Parsing.hs
@@ -0,0 +1,700 @@
+{-
+Copyright (C) 2006-2010 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Parsing
+ Copyright : Copyright (C) 2006-2010 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+
+A utility library with parsers used in pandoc readers.
+-}
+module Text.Pandoc.Parsing ( (>>~),
+ anyLine,
+ many1Till,
+ notFollowedBy',
+ oneOfStrings,
+ spaceChar,
+ skipSpaces,
+ blankline,
+ blanklines,
+ enclosed,
+ stringAnyCase,
+ parseFromString,
+ lineClump,
+ charsInBalanced,
+ charsInBalanced',
+ romanNumeral,
+ emailAddress,
+ uri,
+ withHorizDisplacement,
+ nullBlock,
+ failIfStrict,
+ failUnlessLHS,
+ escaped,
+ anyOrderedListMarker,
+ orderedListMarker,
+ charRef,
+ tableWith,
+ gridTableWith,
+ readWith,
+ testStringWith,
+ ParserState (..),
+ defaultParserState,
+ HeaderType (..),
+ ParserContext (..),
+ QuoteContext (..),
+ NoteTable,
+ KeyTable,
+ Key (..),
+ lookupKeySrc,
+ refsMatch )
+where
+
+import Text.Pandoc.Definition
+import qualified Text.Pandoc.UTF8 as UTF8 (putStrLn)
+import Text.ParserCombinators.Parsec
+import Text.Pandoc.CharacterReferences ( characterReference )
+import Data.Char ( toLower, toUpper, ord, isAscii )
+import Data.List ( intercalate, transpose )
+import Network.URI ( parseURI, URI (..), isAllowedInURI )
+import Control.Monad ( join, liftM )
+import Text.Pandoc.Shared
+import qualified Data.Map as M
+
+-- | Like >>, but returns the operation on the left.
+-- (Suggested by Tillmann Rendel on Haskell-cafe list.)
+(>>~) :: (Monad m) => m a -> m b -> m a
+a >>~ b = a >>= \x -> b >> return x
+
+-- | Parse any line of text
+anyLine :: GenParser Char st [Char]
+anyLine = manyTill anyChar newline
+
+-- | Like @manyTill@, but reads at least one item.
+many1Till :: GenParser tok st a
+ -> GenParser tok st end
+ -> GenParser tok st [a]
+many1Till p end = do
+ first <- p
+ rest <- manyTill p end
+ return (first:rest)
+
+-- | A more general form of @notFollowedBy@. This one allows any
+-- type of parser to be specified, and succeeds only if that parser fails.
+-- It does not consume any input.
+notFollowedBy' :: Show b => GenParser a st b -> GenParser a st ()
+notFollowedBy' p = try $ join $ do a <- try p
+ return (unexpected (show a))
+ <|>
+ return (return ())
+-- (This version due to Andrew Pimlott on the Haskell mailing list.)
+
+-- | Parses one of a list of strings (tried in order).
+oneOfStrings :: [String] -> GenParser Char st String
+oneOfStrings listOfStrings = choice $ map (try . string) listOfStrings
+
+-- | Parses a space or tab.
+spaceChar :: CharParser st Char
+spaceChar = char ' ' <|> char '\t'
+
+-- | Skips zero or more spaces or tabs.
+skipSpaces :: GenParser Char st ()
+skipSpaces = skipMany spaceChar
+
+-- | Skips zero or more spaces or tabs, then reads a newline.
+blankline :: GenParser Char st Char
+blankline = try $ skipSpaces >> newline
+
+-- | Parses one or more blank lines and returns a string of newlines.
+blanklines :: GenParser Char st [Char]
+blanklines = many1 blankline
+
+-- | Parses material enclosed between start and end parsers.
+enclosed :: GenParser Char st t -- ^ start parser
+ -> GenParser Char st end -- ^ end parser
+ -> GenParser Char st a -- ^ content parser (to be used repeatedly)
+ -> GenParser Char st [a]
+enclosed start end parser = try $
+ start >> notFollowedBy space >> many1Till parser end
+
+-- | Parse string, case insensitive.
+stringAnyCase :: [Char] -> CharParser st String
+stringAnyCase [] = string ""
+stringAnyCase (x:xs) = do
+ firstChar <- char (toUpper x) <|> char (toLower x)
+ rest <- stringAnyCase xs
+ return (firstChar:rest)
+
+-- | Parse contents of 'str' using 'parser' and return result.
+parseFromString :: GenParser tok st a -> [tok] -> GenParser tok st a
+parseFromString parser str = do
+ oldPos <- getPosition
+ oldInput <- getInput
+ setInput str
+ result <- parser
+ setInput oldInput
+ setPosition oldPos
+ return result
+
+-- | Parse raw line block up to and including blank lines.
+lineClump :: GenParser Char st String
+lineClump = blanklines
+ <|> (many1 (notFollowedBy blankline >> anyLine) >>= return . unlines)
+
+-- | Parse a string of characters between an open character
+-- and a close character, including text between balanced
+-- pairs of open and close, which must be different. For example,
+-- @charsInBalanced '(' ')'@ will parse "(hello (there))"
+-- and return "hello (there)". Stop if a blank line is
+-- encountered.
+charsInBalanced :: Char -> Char -> GenParser Char st String
+charsInBalanced open close = try $ do
+ char open
+ raw <- many $ (many1 (noneOf [open, close, '\n']))
+ <|> (do res <- charsInBalanced open close
+ return $ [open] ++ res ++ [close])
+ <|> try (string "\n" >>~ notFollowedBy' blanklines)
+ char close
+ return $ concat raw
+
+-- | Like @charsInBalanced@, but allow blank lines in the content.
+charsInBalanced' :: Char -> Char -> GenParser Char st String
+charsInBalanced' open close = try $ do
+ char open
+ raw <- many $ (many1 (noneOf [open, close]))
+ <|> (do res <- charsInBalanced' open close
+ return $ [open] ++ res ++ [close])
+ char close
+ return $ concat raw
+
+-- Auxiliary functions for romanNumeral:
+
+lowercaseRomanDigits :: [Char]
+lowercaseRomanDigits = ['i','v','x','l','c','d','m']
+
+uppercaseRomanDigits :: [Char]
+uppercaseRomanDigits = map toUpper lowercaseRomanDigits
+
+-- | Parses a roman numeral (uppercase or lowercase), returns number.
+romanNumeral :: Bool -- ^ Uppercase if true
+ -> GenParser Char st Int
+romanNumeral upperCase = do
+ let romanDigits = if upperCase
+ then uppercaseRomanDigits
+ else lowercaseRomanDigits
+ lookAhead $ oneOf romanDigits
+ let [one, five, ten, fifty, hundred, fivehundred, thousand] =
+ map char romanDigits
+ thousands <- many thousand >>= (return . (1000 *) . length)
+ ninehundreds <- option 0 $ try $ hundred >> thousand >> return 900
+ fivehundreds <- many fivehundred >>= (return . (500 *) . length)
+ fourhundreds <- option 0 $ try $ hundred >> fivehundred >> return 400
+ hundreds <- many hundred >>= (return . (100 *) . length)
+ nineties <- option 0 $ try $ ten >> hundred >> return 90
+ fifties <- many fifty >>= (return . (50 *) . length)
+ forties <- option 0 $ try $ ten >> fifty >> return 40
+ tens <- many ten >>= (return . (10 *) . length)
+ nines <- option 0 $ try $ one >> ten >> return 9
+ fives <- many five >>= (return . (5 *) . length)
+ fours <- option 0 $ try $ one >> five >> return 4
+ ones <- many one >>= (return . length)
+ let total = thousands + ninehundreds + fivehundreds + fourhundreds +
+ hundreds + nineties + fifties + forties + tens + nines +
+ fives + fours + ones
+ if total == 0
+ then fail "not a roman numeral"
+ else return total
+
+-- Parsers for email addresses and URIs
+
+emailChar :: GenParser Char st Char
+emailChar = alphaNum <|> oneOf "-+_."
+
+domainChar :: GenParser Char st Char
+domainChar = alphaNum <|> char '-'
+
+domain :: GenParser Char st [Char]
+domain = do
+ first <- many1 domainChar
+ dom <- many1 $ try (char '.' >> many1 domainChar )
+ return $ intercalate "." (first:dom)
+
+-- | Parses an email address; returns original and corresponding
+-- escaped mailto: URI.
+emailAddress :: GenParser Char st (String, String)
+emailAddress = try $ do
+ firstLetter <- alphaNum
+ restAddr <- many emailChar
+ let addr = firstLetter:restAddr
+ char '@'
+ dom <- domain
+ let full = addr ++ '@':dom
+ return (full, escapeURI $ "mailto:" ++ full)
+
+-- | Parses a URI. Returns pair of original and URI-escaped version.
+uri :: GenParser Char st (String, String)
+uri = try $ do
+ let protocols = [ "http:", "https:", "ftp:", "file:", "mailto:",
+ "news:", "telnet:" ]
+ lookAhead $ oneOfStrings protocols
+ -- scan non-ascii characters and ascii characters allowed in a URI
+ str <- many1 $ satisfy (\c -> not (isAscii c) || isAllowedInURI c)
+ -- now see if they amount to an absolute URI
+ case parseURI (escapeURI str) of
+ Just uri' -> if uriScheme uri' `elem` protocols
+ then return (str, show uri')
+ else fail "not a URI"
+ Nothing -> fail "not a URI"
+
+-- | Applies a parser, returns tuple of its results and its horizontal
+-- displacement (the difference between the source column at the end
+-- and the source column at the beginning). Vertical displacement
+-- (source row) is ignored.
+withHorizDisplacement :: GenParser Char st a -- ^ Parser to apply
+ -> GenParser Char st (a, Int) -- ^ (result, displacement)
+withHorizDisplacement parser = do
+ pos1 <- getPosition
+ result <- parser
+ pos2 <- getPosition
+ return (result, sourceColumn pos2 - sourceColumn pos1)
+
+-- | Parses a character and returns 'Null' (so that the parser can move on
+-- if it gets stuck).
+nullBlock :: GenParser Char st Block
+nullBlock = anyChar >> return Null
+
+-- | Fail if reader is in strict markdown syntax mode.
+failIfStrict :: GenParser Char ParserState ()
+failIfStrict = do
+ state <- getState
+ if stateStrict state then fail "strict mode" else return ()
+
+-- | Fail unless we're in literate haskell mode.
+failUnlessLHS :: GenParser tok ParserState ()
+failUnlessLHS = do
+ state <- getState
+ if stateLiterateHaskell state then return () else fail "Literate haskell feature"
+
+-- | Parses backslash, then applies character parser.
+escaped :: GenParser Char st Char -- ^ Parser for character to escape
+ -> GenParser Char st Inline
+escaped parser = try $ do
+ char '\\'
+ result <- parser
+ return (Str [result])
+
+-- | Parses an uppercase roman numeral and returns (UpperRoman, number).
+upperRoman :: GenParser Char st (ListNumberStyle, Int)
+upperRoman = do
+ num <- romanNumeral True
+ return (UpperRoman, num)
+
+-- | Parses a lowercase roman numeral and returns (LowerRoman, number).
+lowerRoman :: GenParser Char st (ListNumberStyle, Int)
+lowerRoman = do
+ num <- romanNumeral False
+ return (LowerRoman, num)
+
+-- | Parses a decimal numeral and returns (Decimal, number).
+decimal :: GenParser Char st (ListNumberStyle, Int)
+decimal = do
+ num <- many1 digit
+ return (Decimal, read num)
+
+-- | Parses a '@' and optional label and
+-- returns (DefaultStyle, [next example number]). The next
+-- example number is incremented in parser state, and the label
+-- (if present) is added to the label table.
+exampleNum :: GenParser Char ParserState (ListNumberStyle, Int)
+exampleNum = do
+ char '@'
+ lab <- many (alphaNum <|> oneOf "_-")
+ st <- getState
+ let num = stateNextExample st
+ let newlabels = if null lab
+ then stateExamples st
+ else M.insert lab num $ stateExamples st
+ updateState $ \s -> s{ stateNextExample = num + 1
+ , stateExamples = newlabels }
+ return (Example, num)
+
+-- | Parses a '#' returns (DefaultStyle, 1).
+defaultNum :: GenParser Char st (ListNumberStyle, Int)
+defaultNum = do
+ char '#'
+ return (DefaultStyle, 1)
+
+-- | Parses a lowercase letter and returns (LowerAlpha, number).
+lowerAlpha :: GenParser Char st (ListNumberStyle, Int)
+lowerAlpha = do
+ ch <- oneOf ['a'..'z']
+ return (LowerAlpha, ord ch - ord 'a' + 1)
+
+-- | Parses an uppercase letter and returns (UpperAlpha, number).
+upperAlpha :: GenParser Char st (ListNumberStyle, Int)
+upperAlpha = do
+ ch <- oneOf ['A'..'Z']
+ return (UpperAlpha, ord ch - ord 'A' + 1)
+
+-- | Parses a roman numeral i or I
+romanOne :: GenParser Char st (ListNumberStyle, Int)
+romanOne = (char 'i' >> return (LowerRoman, 1)) <|>
+ (char 'I' >> return (UpperRoman, 1))
+
+-- | Parses an ordered list marker and returns list attributes.
+anyOrderedListMarker :: GenParser Char ParserState ListAttributes
+anyOrderedListMarker = choice $
+ [delimParser numParser | delimParser <- [inPeriod, inOneParen, inTwoParens],
+ numParser <- [decimal, exampleNum, defaultNum, romanOne,
+ lowerAlpha, lowerRoman, upperAlpha, upperRoman]]
+
+-- | Parses a list number (num) followed by a period, returns list attributes.
+inPeriod :: GenParser Char st (ListNumberStyle, Int)
+ -> GenParser Char st ListAttributes
+inPeriod num = try $ do
+ (style, start) <- num
+ char '.'
+ let delim = if style == DefaultStyle
+ then DefaultDelim
+ else Period
+ return (start, style, delim)
+
+-- | Parses a list number (num) followed by a paren, returns list attributes.
+inOneParen :: GenParser Char st (ListNumberStyle, Int)
+ -> GenParser Char st ListAttributes
+inOneParen num = try $ do
+ (style, start) <- num
+ char ')'
+ return (start, style, OneParen)
+
+-- | Parses a list number (num) enclosed in parens, returns list attributes.
+inTwoParens :: GenParser Char st (ListNumberStyle, Int)
+ -> GenParser Char st ListAttributes
+inTwoParens num = try $ do
+ char '('
+ (style, start) <- num
+ char ')'
+ return (start, style, TwoParens)
+
+-- | Parses an ordered list marker with a given style and delimiter,
+-- returns number.
+orderedListMarker :: ListNumberStyle
+ -> ListNumberDelim
+ -> GenParser Char ParserState Int
+orderedListMarker style delim = do
+ let num = defaultNum <|> -- # can continue any kind of list
+ case style of
+ DefaultStyle -> decimal
+ Example -> exampleNum
+ Decimal -> decimal
+ UpperRoman -> upperRoman
+ LowerRoman -> lowerRoman
+ UpperAlpha -> upperAlpha
+ LowerAlpha -> lowerAlpha
+ let context = case delim of
+ DefaultDelim -> inPeriod
+ Period -> inPeriod
+ OneParen -> inOneParen
+ TwoParens -> inTwoParens
+ (start, _, _) <- context num
+ return start
+
+-- | Parses a character reference and returns a Str element.
+charRef :: GenParser Char st Inline
+charRef = do
+ c <- characterReference
+ return $ Str [c]
+
+-- | Parse a table using 'headerParser', 'rowParser',
+-- 'lineParser', and 'footerParser'.
+tableWith :: GenParser Char ParserState ([[Block]], [Alignment], [Int])
+ -> ([Int] -> GenParser Char ParserState [[Block]])
+ -> GenParser Char ParserState sep
+ -> GenParser Char ParserState end
+ -> GenParser Char ParserState [Inline]
+ -> GenParser Char ParserState Block
+tableWith headerParser rowParser lineParser footerParser captionParser = try $ do
+ caption' <- option [] captionParser
+ (heads, aligns, indices) <- headerParser
+ lines' <- rowParser indices `sepEndBy` lineParser
+ footerParser
+ caption <- if null caption'
+ then option [] captionParser
+ else return caption'
+ state <- getState
+ let numColumns = stateColumns state
+ let widths = widthsFromIndices numColumns indices
+ return $ Table caption aligns widths heads lines'
+
+-- Calculate relative widths of table columns, based on indices
+widthsFromIndices :: Int -- Number of columns on terminal
+ -> [Int] -- Indices
+ -> [Double] -- Fractional relative sizes of columns
+widthsFromIndices _ [] = []
+widthsFromIndices numColumns indices =
+ let lengths' = zipWith (-) indices (0:indices)
+ lengths = reverse $
+ case reverse lengths' of
+ [] -> []
+ [x] -> [x]
+ -- compensate for the fact that intercolumn
+ -- spaces are counted in widths of all columns
+ -- but the last...
+ (x:y:zs) -> if x < y && y - x <= 2
+ then y:y:zs
+ else x:y:zs
+ totLength = sum lengths
+ quotient = if totLength > numColumns
+ then fromIntegral totLength
+ else fromIntegral numColumns
+ fracs = map (\l -> (fromIntegral l) / quotient) lengths in
+ tail fracs
+
+-- Parse a grid table: starts with row of '-' on top, then header
+-- (which may be grid), then the rows,
+-- which may be grid, separated by blank lines, and
+-- ending with a footer (dashed line followed by blank line).
+gridTableWith :: GenParser Char ParserState Block -- ^ Block parser
+ -> GenParser Char ParserState [Inline] -- ^ Caption parser
+ -> Bool -- ^ Headerless table
+ -> GenParser Char ParserState Block
+gridTableWith block tableCaption headless =
+ tableWith (gridTableHeader headless block) (gridTableRow block) (gridTableSep '-') gridTableFooter tableCaption
+
+gridTableSplitLine :: [Int] -> String -> [String]
+gridTableSplitLine indices line =
+ map removeFinalBar $ tail $ splitByIndices (init indices) line
+
+gridPart :: Char -> GenParser Char st (Int, Int)
+gridPart ch = do
+ dashes <- many1 (char ch)
+ char '+'
+ return (length dashes, length dashes + 1)
+
+gridDashedLines :: Char -> GenParser Char st [(Int,Int)]
+gridDashedLines ch = try $ char '+' >> many1 (gridPart ch) >>~ blankline
+
+removeFinalBar :: String -> String
+removeFinalBar = reverse . dropWhile (=='|') . dropWhile (`elem` " \t") .
+ reverse
+
+-- | Separator between rows of grid table.
+gridTableSep :: Char -> GenParser Char ParserState Char
+gridTableSep ch = try $ gridDashedLines ch >> return '\n'
+
+-- | Parse header for a grid table.
+gridTableHeader :: Bool -- ^ Headerless table
+ -> GenParser Char ParserState Block
+ -> GenParser Char ParserState ([[Block]], [Alignment], [Int])
+gridTableHeader headless block = try $ do
+ optional blanklines
+ dashes <- gridDashedLines '-'
+ rawContent <- if headless
+ then return $ repeat ""
+ else many1
+ (notFollowedBy (gridTableSep '=') >> char '|' >>
+ many1Till anyChar newline)
+ if headless
+ then return ()
+ else gridTableSep '=' >> return ()
+ let lines' = map snd dashes
+ let indices = scanl (+) 0 lines'
+ let aligns = replicate (length lines') AlignDefault
+ -- RST does not have a notion of alignments
+ let rawHeads = if headless
+ then replicate (length dashes) ""
+ else map (intercalate " ") $ transpose
+ $ map (gridTableSplitLine indices) rawContent
+ heads <- mapM (parseFromString $ many block) $
+ map removeLeadingTrailingSpace rawHeads
+ return (heads, aligns, indices)
+
+gridTableRawLine :: [Int] -> GenParser Char ParserState [String]
+gridTableRawLine indices = do
+ char '|'
+ line <- many1Till anyChar newline
+ return (gridTableSplitLine indices $ removeTrailingSpace line)
+
+-- | Parse row of grid table.
+gridTableRow :: GenParser Char ParserState Block
+ -> [Int]
+ -> GenParser Char ParserState [[Block]]
+gridTableRow block indices = do
+ colLines <- many1 (gridTableRawLine indices)
+ let cols = map ((++ "\n") . unlines . removeOneLeadingSpace) $
+ transpose colLines
+ mapM (liftM compactifyCell . parseFromString (many block)) cols
+
+removeOneLeadingSpace :: [String] -> [String]
+removeOneLeadingSpace xs =
+ if all startsWithSpace xs
+ then map (drop 1) xs
+ else xs
+ where startsWithSpace "" = True
+ startsWithSpace (y:_) = y == ' '
+
+compactifyCell :: [Block] -> [Block]
+compactifyCell bs = head $ compactify [bs]
+
+-- | Parse footer for a grid table.
+gridTableFooter :: GenParser Char ParserState [Char]
+gridTableFooter = blanklines
+
+---
+
+-- | Parse a string with a given parser and state.
+readWith :: GenParser Char ParserState a -- ^ parser
+ -> ParserState -- ^ initial state
+ -> String -- ^ input string
+ -> a
+readWith parser state input =
+ case runParser parser state "source" input of
+ Left err -> error $ "\nError:\n" ++ show err
+ Right result -> result
+
+-- | Parse a string with @parser@ (for testing).
+testStringWith :: (Show a) => GenParser Char ParserState a
+ -> String
+ -> IO ()
+testStringWith parser str = UTF8.putStrLn $ show $
+ readWith parser defaultParserState str
+
+-- | Parsing options.
+data ParserState = ParserState
+ { stateParseRaw :: Bool, -- ^ Parse raw HTML and LaTeX?
+ stateParserContext :: ParserContext, -- ^ Inside list?
+ stateQuoteContext :: QuoteContext, -- ^ Inside quoted environment?
+ stateSanitizeHTML :: Bool, -- ^ Sanitize HTML?
+ stateKeys :: KeyTable, -- ^ List of reference keys
+#ifdef _CITEPROC
+ stateCitations :: [String], -- ^ List of available citations
+#endif
+ stateNotes :: NoteTable, -- ^ List of notes
+ stateTabStop :: Int, -- ^ Tab stop
+ stateStandalone :: Bool, -- ^ Parse bibliographic info?
+ stateTitle :: [Inline], -- ^ Title of document
+ stateAuthors :: [[Inline]], -- ^ Authors of document
+ stateDate :: [Inline], -- ^ Date of document
+ stateStrict :: Bool, -- ^ Use strict markdown syntax?
+ stateSmart :: Bool, -- ^ Use smart typography?
+ stateLiterateHaskell :: Bool, -- ^ Treat input as literate haskell
+ stateColumns :: Int, -- ^ Number of columns in terminal
+ stateHeaderTable :: [HeaderType], -- ^ Ordered list of header types used
+ stateIndentedCodeClasses :: [String], -- ^ Classes to use for indented code blocks
+ stateNextExample :: Int, -- ^ Number of next example
+ stateExamples :: M.Map String Int, -- ^ Map from example labels to numbers
+ stateHasChapters :: Bool -- ^ True if \chapter encountered
+ }
+ deriving Show
+
+defaultParserState :: ParserState
+defaultParserState =
+ ParserState { stateParseRaw = False,
+ stateParserContext = NullState,
+ stateQuoteContext = NoQuote,
+ stateSanitizeHTML = False,
+ stateKeys = M.empty,
+#ifdef _CITEPROC
+ stateCitations = [],
+#endif
+ stateNotes = [],
+ stateTabStop = 4,
+ stateStandalone = False,
+ stateTitle = [],
+ stateAuthors = [],
+ stateDate = [],
+ stateStrict = False,
+ stateSmart = False,
+ stateLiterateHaskell = False,
+ stateColumns = 80,
+ stateHeaderTable = [],
+ stateIndentedCodeClasses = [],
+ stateNextExample = 1,
+ stateExamples = M.empty,
+ stateHasChapters = False }
+
+data HeaderType
+ = SingleHeader Char -- ^ Single line of characters underneath
+ | DoubleHeader Char -- ^ Lines of characters above and below
+ deriving (Eq, Show)
+
+data ParserContext
+ = ListItemState -- ^ Used when running parser on list item contents
+ | NullState -- ^ Default state
+ deriving (Eq, Show)
+
+data QuoteContext
+ = InSingleQuote -- ^ Used when parsing inside single quotes
+ | InDoubleQuote -- ^ Used when parsing inside double quotes
+ | NoQuote -- ^ Used when not parsing inside quotes
+ deriving (Eq, Show)
+
+type NoteTable = [(String, String)]
+
+newtype Key = Key [Inline] deriving (Show, Read)
+
+instance Eq Key where
+ Key a == Key b = refsMatch a b
+
+instance Ord Key where
+ compare (Key a) (Key b) = if a == b then EQ else compare a b
+
+type KeyTable = M.Map Key Target
+
+-- | Look up key in key table and return target object.
+lookupKeySrc :: KeyTable -- ^ Key table
+ -> Key -- ^ Key
+ -> Maybe Target
+lookupKeySrc table key = case M.lookup key table of
+ Nothing -> Nothing
+ Just src -> Just src
+
+-- | Returns @True@ if keys match (case insensitive).
+refsMatch :: [Inline] -> [Inline] -> Bool
+refsMatch ((Str x):restx) ((Str y):resty) =
+ ((map toLower x) == (map toLower y)) && refsMatch restx resty
+refsMatch ((Emph x):restx) ((Emph y):resty) =
+ refsMatch x y && refsMatch restx resty
+refsMatch ((Strong x):restx) ((Strong y):resty) =
+ refsMatch x y && refsMatch restx resty
+refsMatch ((Strikeout x):restx) ((Strikeout y):resty) =
+ refsMatch x y && refsMatch restx resty
+refsMatch ((Superscript x):restx) ((Superscript y):resty) =
+ refsMatch x y && refsMatch restx resty
+refsMatch ((Subscript x):restx) ((Subscript y):resty) =
+ refsMatch x y && refsMatch restx resty
+refsMatch ((SmallCaps x):restx) ((SmallCaps y):resty) =
+ refsMatch x y && refsMatch restx resty
+refsMatch ((Quoted t x):restx) ((Quoted u y):resty) =
+ t == u && refsMatch x y && refsMatch restx resty
+refsMatch ((Code x):restx) ((Code y):resty) =
+ ((map toLower x) == (map toLower y)) && refsMatch restx resty
+refsMatch ((Math t x):restx) ((Math u y):resty) =
+ ((map toLower x) == (map toLower y)) && t == u && refsMatch restx resty
+refsMatch ((TeX x):restx) ((TeX y):resty) =
+ ((map toLower x) == (map toLower y)) && refsMatch restx resty
+refsMatch ((HtmlInline x):restx) ((HtmlInline y):resty) =
+ ((map toLower x) == (map toLower y)) && refsMatch restx resty
+refsMatch (x:restx) (y:resty) = (x == y) && refsMatch restx resty
+refsMatch [] x = null x
+refsMatch x [] = null x
+
diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs
index 5c188e3d9..f47309d3f 100644
--- a/src/Text/Pandoc/Readers/HTML.hs
+++ b/src/Text/Pandoc/Readers/HTML.hs
@@ -44,13 +44,14 @@ module Text.Pandoc.Readers.HTML (
import Text.ParserCombinators.Parsec
import Text.Pandoc.Definition
-import Text.Pandoc.Shared
+import Text.Pandoc.Shared
+import Text.Pandoc.Parsing
import Text.Pandoc.CharacterReferences ( decodeCharacterReferences )
import Data.Maybe ( fromMaybe )
import Data.List ( isPrefixOf, isSuffixOf, intercalate )
import Data.Char ( toLower, isAlphaNum )
import Network.URI ( parseURIReference, URI (..) )
-import Control.Monad ( liftM )
+import Control.Monad ( liftM, when )
-- | Convert HTML-formatted string to 'Pandoc' document.
readHtml :: ParserState -- ^ Parser state
@@ -198,11 +199,11 @@ inlinesTilEnd tag = manyTill inline (htmlEndTag tag)
-- | Parse blocks between open and close tag.
blocksIn :: String -> GenParser Char ParserState [Block]
-blocksIn tag = try $ htmlTag tag >> spaces >> blocksTilEnd tag
+blocksIn tag = try $ htmlOpenTag tag >> spaces >> blocksTilEnd tag
-- | Parse inlines between open and close tag.
inlinesIn :: String -> GenParser Char ParserState [Inline]
-inlinesIn tag = try $ htmlTag tag >> spaces >> inlinesTilEnd tag
+inlinesIn tag = try $ htmlOpenTag tag >> spaces >> inlinesTilEnd tag
-- | Extract type from a tag: e.g. @br@ from @\<br\>@
extractTagType :: String -> String
@@ -258,18 +259,33 @@ anyHtmlEndTag = try $ do
then return $ "<!-- unsafe HTML removed -->"
else return result
-htmlTag :: String -> GenParser Char ParserState (String, [(String, String)])
-htmlTag tag = try $ do
+htmlTag :: Bool
+ -> String
+ -> GenParser Char ParserState (String, [(String, String)])
+htmlTag selfClosing tag = try $ do
char '<'
spaces
stringAnyCase tag
attribs <- many htmlAttribute
spaces
- optional (string "/")
- spaces
+ -- note: we want to handle both HTML and XHTML,
+ -- so we don't require the /
+ when selfClosing $ optional $ char '/' >> spaces
char '>'
return (tag, (map (\(name, content, _) -> (name, content)) attribs))
+htmlOpenTag :: String
+ -> GenParser Char ParserState (String, [(String, String)])
+htmlOpenTag = htmlTag False
+
+htmlCloseTag :: String
+ -> GenParser Char ParserState (String, [(String, String)])
+htmlCloseTag = htmlTag False . ('/':)
+
+htmlSelfClosingTag :: String
+ -> GenParser Char ParserState (String, [(String, String)])
+htmlSelfClosingTag = htmlTag True
+
-- parses a quoted html attribute value
quoted :: Char -> GenParser Char st (String, String)
quoted quoteChar = do
@@ -344,7 +360,7 @@ anyHtmlInlineTag = try $ do
-- Scripts must be treated differently, because they can contain '<>' etc.
htmlScript :: GenParser Char ParserState [Char]
htmlScript = try $ do
- lookAhead $ htmlTag "script"
+ lookAhead $ htmlOpenTag "script"
open <- anyHtmlTag
rest <- liftM concat $ manyTill scriptChunk (htmlEndTag "script")
st <- getState
@@ -379,7 +395,7 @@ scriptChunk = jsComment <|> jsString <|> jsChars
-- Style tags must be treated differently, because they can contain CSS
htmlStyle :: GenParser Char ParserState [Char]
htmlStyle = try $ do
- lookAhead $ htmlTag "style"
+ lookAhead $ htmlOpenTag "style"
open <- anyHtmlTag
rest <- manyTill anyChar (htmlEndTag "style")
st <- getState
@@ -411,7 +427,8 @@ rawVerbatimBlock = try $ do
-- We don't want to parse </body> or </html> as raw HTML, since these
-- are handled in parseHtml.
rawHtmlBlock' :: GenParser Char ParserState Block
-rawHtmlBlock' = do notFollowedBy' (htmlTag "/body" <|> htmlTag "/html")
+rawHtmlBlock' = do notFollowedBy' (htmlCloseTag "body" <|>
+ htmlCloseTag "html")
rawHtmlBlock
-- | Parses an HTML comment.
@@ -441,13 +458,13 @@ definition = try $ do
nonTitleNonHead :: GenParser Char ParserState Char
nonTitleNonHead = try $ do
- notFollowedBy $ (htmlTag "title" >> return ' ') <|>
+ notFollowedBy $ (htmlOpenTag "title" >> return ' ') <|>
(htmlEndTag "head" >> return ' ')
(rawHtmlBlock >> return ' ') <|> anyChar
parseTitle :: GenParser Char ParserState [Inline]
parseTitle = try $ do
- (tag, _) <- htmlTag "title"
+ (tag, _) <- htmlOpenTag "title"
contents <- inlinesTilEnd tag
spaces
return contents
@@ -455,7 +472,7 @@ parseTitle = try $ do
-- parse header and return meta-information (for now, just title)
parseHead :: GenParser Char ParserState Meta
parseHead = try $ do
- htmlTag "head"
+ htmlOpenTag "head"
spaces
skipMany nonTitleNonHead
contents <- option [] parseTitle
@@ -463,13 +480,10 @@ parseHead = try $ do
htmlEndTag "head"
return $ Meta contents [] []
-skipHtmlTag :: String -> GenParser Char ParserState ()
-skipHtmlTag tag = optional (htmlTag tag)
-
-- h1 class="title" representation of title in body
bodyTitle :: GenParser Char ParserState [Inline]
bodyTitle = try $ do
- (_, attribs) <- htmlTag "h1"
+ (_, attribs) <- htmlOpenTag "h1"
case (extractAttribute "class" attribs) of
Just "title" -> return ""
_ -> fail "not title"
@@ -487,11 +501,11 @@ parseHtml :: GenParser Char ParserState Pandoc
parseHtml = do
sepEndBy (choice [xmlDec, definition, htmlComment]) spaces
spaces
- skipHtmlTag "html"
+ optional $ htmlOpenTag "html"
spaces
meta <- option (Meta [] [] []) parseHead
spaces
- skipHtmlTag "body"
+ optional $ htmlOpenTag "body"
spaces
optional bodyTitle -- skip title in body, because it's represented in meta
blocks <- parseBlocks
@@ -527,7 +541,7 @@ header = choice (map headerLevel (enumFromTo 1 5)) <?> "header"
headerLevel :: Int -> GenParser Char ParserState Block
headerLevel n = try $ do
let level = "h" ++ show n
- htmlTag level
+ htmlOpenTag level
contents <- inlinesTilEnd level
return $ Header n (normalizeSpaces contents)
@@ -537,7 +551,7 @@ headerLevel n = try $ do
hrule :: GenParser Char ParserState Block
hrule = try $ do
- (_, attribs) <- htmlTag "hr"
+ (_, attribs) <- htmlSelfClosingTag "hr"
state <- getState
if not (null attribs) && stateParseRaw state
then unexpected "attributes in hr" -- parse as raw in this case
@@ -551,7 +565,7 @@ hrule = try $ do
-- skipped, because they are not portable to output formats other than HTML.
codeBlock :: GenParser Char ParserState Block
codeBlock = try $ do
- htmlTag "pre"
+ htmlOpenTag "pre"
result <- manyTill
(many1 (satisfy (/= '<')) <|>
((anyHtmlTag <|> anyHtmlEndTag) >> return ""))
@@ -572,7 +586,7 @@ codeBlock = try $ do
--
blockQuote :: GenParser Char ParserState Block
-blockQuote = try $ htmlTag "blockquote" >> spaces >>
+blockQuote = try $ htmlOpenTag "blockquote" >> spaces >>
blocksTilEnd "blockquote" >>= (return . BlockQuote)
--
@@ -584,7 +598,7 @@ list = choice [ bulletList, orderedList, definitionList ] <?> "list"
orderedList :: GenParser Char ParserState Block
orderedList = try $ do
- (_, attribs) <- htmlTag "ol"
+ (_, attribs) <- htmlOpenTag "ol"
(start, style) <- option (1, DefaultStyle) $
do failIfStrict
let sta = fromMaybe "1" $
@@ -609,7 +623,7 @@ orderedList = try $ do
bulletList :: GenParser Char ParserState Block
bulletList = try $ do
- htmlTag "ul"
+ htmlOpenTag "ul"
spaces
-- note: if they have an <ol> or <ul> not in scope of a <li>,
-- treat it as a list item, though it's not valid xhtml...
@@ -620,7 +634,7 @@ bulletList = try $ do
definitionList :: GenParser Char ParserState Block
definitionList = try $ do
failIfStrict -- def lists not part of standard markdown
- htmlTag "dl"
+ htmlOpenTag "dl"
spaces
items <- sepEndBy1 definitionListItem spaces
htmlEndTag "dl"
@@ -638,7 +652,7 @@ definitionListItem = try $ do
--
para :: GenParser Char ParserState Block
-para = try $ htmlTag "p" >> inlinesTilEnd "p" >>=
+para = try $ htmlOpenTag "p" >> inlinesTilEnd "p" >>=
return . Para . normalizeSpaces
--
@@ -672,8 +686,8 @@ inline = choice [ charRef
code :: GenParser Char ParserState Inline
code = try $ do
- htmlTag "code"
- result <- manyTill anyChar (htmlEndTag "code")
+ result <- (htmlOpenTag "code" >> manyTill anyChar (htmlEndTag "code"))
+ <|> (htmlOpenTag "tt" >> manyTill anyChar (htmlEndTag "tt"))
-- remove internal line breaks, leading and trailing space,
-- and decode character references
return $ Code $ decodeCharacterReferences $ removeLeadingTrailingSpace $
@@ -686,7 +700,7 @@ rawHtmlInline = do
if stateParseRaw state then return (HtmlInline result) else return (Str "")
betweenTags :: [Char] -> GenParser Char ParserState [Inline]
-betweenTags tag = try $ htmlTag tag >> inlinesTilEnd tag >>=
+betweenTags tag = try $ htmlOpenTag tag >> inlinesTilEnd tag >>=
return . normalizeSpaces
emph :: GenParser Char ParserState Inline
@@ -708,7 +722,7 @@ strikeout = failIfStrict >> (betweenTags "s" <|> betweenTags "strike") >>=
spanStrikeout :: GenParser Char ParserState Inline
spanStrikeout = try $ do
failIfStrict -- strict markdown has no strikeout, so treat as raw HTML
- (_, attributes) <- htmlTag "span"
+ (_, attributes) <- htmlOpenTag "span"
result <- case (extractAttribute "class" attributes) of
Just "strikeout" -> inlinesTilEnd "span"
_ -> fail "not a strikeout"
@@ -719,7 +733,7 @@ whitespace = many1 space >> return Space
-- hard line break
linebreak :: GenParser Char ParserState Inline
-linebreak = htmlTag "br" >> optional newline >> return LineBreak
+linebreak = htmlSelfClosingTag "br" >> optional newline >> return LineBreak
str :: GenParser Char st Inline
str = many1 (noneOf "< \t\n&") >>= return . Str
@@ -740,7 +754,7 @@ extractAttribute name ((attrName, contents):rest) =
link :: GenParser Char ParserState Inline
link = try $ do
- (_, attributes) <- htmlTag "a"
+ (_, attributes) <- htmlOpenTag "a"
url <- case (extractAttribute "href" attributes) of
Just url -> return url
Nothing -> fail "no href"
@@ -750,7 +764,7 @@ link = try $ do
image :: GenParser Char ParserState Inline
image = try $ do
- (_, attributes) <- htmlTag "img"
+ (_, attributes) <- htmlSelfClosingTag "img"
url <- case (extractAttribute "src" attributes) of
Just url -> return url
Nothing -> fail "no src"
diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index 36940fab0..406809dfc 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -35,7 +35,8 @@ module Text.Pandoc.Readers.LaTeX (
import Text.ParserCombinators.Parsec
import Text.Pandoc.Definition
-import Text.Pandoc.Shared
+import Text.Pandoc.Shared
+import Text.Pandoc.Parsing
import Data.Maybe ( fromMaybe )
import Data.Char ( chr )
import Data.List ( isPrefixOf, isSuffixOf )
@@ -167,16 +168,37 @@ block = choice [ hrule
--
header :: GenParser Char ParserState Block
-header = try $ do
+header = section <|> chapter
+
+chapter :: GenParser Char ParserState Block
+chapter = try $ do
+ string "\\chapter"
+ result <- headerWithLevel 1
+ updateState $ \s -> s{ stateHasChapters = True }
+ return result
+
+section :: GenParser Char ParserState Block
+section = try $ do
char '\\'
subs <- many (try (string "sub"))
base <- try (string "section" >> return 1) <|> (string "paragraph" >> return 4)
+ st <- getState
+ let lev = if stateHasChapters st
+ then length subs + base + 1
+ else length subs + base
+ headerWithLevel lev
+
+headerWithLevel :: Int -> GenParser Char ParserState Block
+headerWithLevel lev = try $ do
+ spaces
optional (char '*')
+ spaces
optional $ bracketedText '[' ']' -- alt title
+ spaces
char '{'
title' <- manyTill inline (char '}')
spaces
- return $ Header (length subs + base) (normalizeSpaces title')
+ return $ Header lev (normalizeSpaces title')
--
-- hrule block
@@ -453,7 +475,7 @@ inline = choice [ str
, accentedChar
, nonbreakingSpace
, specialChar
- , rawLaTeXInline
+ , rawLaTeXInline'
, escapedChar
, unescapedChar
] <?> "inline"
@@ -771,11 +793,16 @@ footnote = try $ do
setInput rest
return $ Note blocks
+-- | Parse any LaTeX inline command and return it in a raw TeX inline element.
+rawLaTeXInline' :: GenParser Char ParserState Inline
+rawLaTeXInline' = do
+ notFollowedBy' $ oneOfStrings ["\\begin", "\\end", "\\item", "\\ignore",
+ "\\section"]
+ rawLaTeXInline
+
-- | Parse any LaTeX command and return it in a raw TeX inline element.
rawLaTeXInline :: GenParser Char ParserState Inline
rawLaTeXInline = try $ do
- notFollowedBy' $ oneOfStrings ["\\begin", "\\end", "\\item", "\\ignore",
- "\\section"]
state <- getState
if stateParseRaw state
then do
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index 13edd0586..33fb3d8e6 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -32,11 +32,13 @@ module Text.Pandoc.Readers.Markdown (
) where
import Data.List ( transpose, isSuffixOf, sortBy, findIndex, intercalate )
+import qualified Data.Map as M
import Data.Ord ( comparing )
import Data.Char ( isAlphaNum )
import Data.Maybe
import Text.Pandoc.Definition
-import Text.Pandoc.Shared
+import Text.Pandoc.Shared
+import Text.Pandoc.Parsing
import Text.Pandoc.Readers.LaTeX ( rawLaTeXInline, rawLaTeXEnvironment' )
import Text.Pandoc.Readers.HTML ( rawHtmlBlock, anyHtmlBlockTag,
anyHtmlInlineTag, anyHtmlTag,
@@ -67,7 +69,7 @@ setextHChars = "=-"
-- treat these as potentially non-text when parsing inline:
specialChars :: [Char]
-specialChars = "\\[]*_~`<>$!^-.&'\"\8216\8217\8220\8221;"
+specialChars = "\\[]*_~`<>$!^-.&@'\";"
--
-- auxiliary functions
@@ -183,7 +185,18 @@ parseMarkdown = do
-- now parse it for real...
(title, author, date) <- option ([],[],[]) titleBlock
blocks <- parseBlocks
- return $ Pandoc (Meta title author date) $ filter (/= Null) blocks
+ let doc = Pandoc (Meta title author date) $ filter (/= Null) blocks
+ -- if there are labeled examples, change references into numbers
+ examples <- liftM stateExamples getState
+ let handleExampleRef :: Inline -> Inline
+ handleExampleRef z@(Str ('@':xs)) =
+ case M.lookup xs examples of
+ Just n -> Str (show n)
+ Nothing -> z
+ handleExampleRef z = z
+ if M.null examples
+ then return doc
+ else return $ processWith handleExampleRef doc
--
-- initial pass for references and notes
@@ -202,10 +215,10 @@ referenceKey = try $ do
tit <- option "" referenceTitle
blanklines
endPos <- getPosition
- let newkey = (lab, (escapeURI $ removeTrailingSpace src, tit))
+ let target = (escapeURI $ removeTrailingSpace src, tit)
st <- getState
let oldkeys = stateKeys st
- updateState $ \s -> s { stateKeys = newkey : oldkeys }
+ updateState $ \s -> s { stateKeys = M.insert (Key lab) target oldkeys }
-- return blanks so line count isn't affected
return $ replicate (sourceLine endPos - sourceLine startPos) '\n'
@@ -715,7 +728,7 @@ dashedLine ch = do
-- Parse a table header with dashed lines of '-' preceded by
-- one (or zero) line of text.
simpleTableHeader :: Bool -- ^ Headerless table
- -> GenParser Char ParserState ([[Char]], [Alignment], [Int])
+ -> GenParser Char ParserState ([[Block]], [Alignment], [Int])
simpleTableHeader headless = try $ do
rawContent <- if headless
then return ""
@@ -734,7 +747,9 @@ simpleTableHeader headless = try $ do
let rawHeads' = if headless
then replicate (length dashes) ""
else rawHeads
- return (rawHeads', aligns, indices)
+ heads <- mapM (parseFromString (many plain)) $
+ map removeLeadingTrailingSpace rawHeads'
+ return (heads, aligns, indices)
-- Parse a table footer - dashed lines followed by blank line.
tableFooter :: GenParser Char ParserState [Char]
@@ -763,65 +778,27 @@ multilineRow :: [Int]
-> GenParser Char ParserState [[Block]]
multilineRow indices = do
colLines <- many1 (rawTableLine indices)
- optional blanklines
let cols = map unlines $ transpose colLines
mapM (parseFromString (many plain)) cols
--- Calculate relative widths of table columns, based on indices
-widthsFromIndices :: Int -- Number of columns on terminal
- -> [Int] -- Indices
- -> [Double] -- Fractional relative sizes of columns
-widthsFromIndices _ [] = []
-widthsFromIndices numColumns indices =
- let lengths' = zipWith (-) indices (0:indices)
- lengths = reverse $
- case reverse lengths' of
- [] -> []
- [x] -> [x]
- -- compensate for the fact that intercolumn
- -- spaces are counted in widths of all columns
- -- but the last...
- (x:y:zs) -> if x < y && y - x <= 2
- then y:y:zs
- else x:y:zs
- totLength = sum lengths
- quotient = if totLength > numColumns
- then fromIntegral totLength
- else fromIntegral numColumns
- fracs = map (\l -> (fromIntegral l) / quotient) lengths in
- tail fracs
-
-- Parses a table caption: inlines beginning with 'Table:'
-- and followed by blank lines.
tableCaption :: GenParser Char ParserState [Inline]
tableCaption = try $ do
skipNonindentSpaces
- string "Table:"
+ string ":" <|> string "Table:"
result <- many1 inline
blanklines
return $ normalizeSpaces result
--- Parse a table using 'headerParser', 'lineParser', and 'footerParser'.
-tableWith :: GenParser Char ParserState ([[Char]], [Alignment], [Int])
- -> ([Int] -> GenParser Char ParserState [[Block]])
- -> GenParser Char ParserState end
- -> GenParser Char ParserState Block
-tableWith headerParser lineParser footerParser = try $ do
- (rawHeads, aligns, indices) <- headerParser
- lines' <- many1Till (lineParser indices) footerParser
- caption <- option [] tableCaption
- heads <- mapM (parseFromString (many plain)) rawHeads
- state <- getState
- let numColumns = stateColumns state
- let widths = widthsFromIndices numColumns indices
- return $ Table caption aligns widths heads lines'
-
-- Parse a simple table with '---' header and one line per row.
simpleTable :: Bool -- ^ Headerless table
-> GenParser Char ParserState Block
simpleTable headless = do
Table c a _w h l <- tableWith (simpleTableHeader headless) tableLine
- (if headless then tableFooter else tableFooter <|> blanklines)
+ (return ())
+ (if headless then tableFooter else tableFooter <|> blanklines)
+ tableCaption
-- Simple tables get 0s for relative column widths (i.e., use default)
return $ Table c a (replicate (length a) 0) h l
@@ -832,10 +809,10 @@ simpleTable headless = do
multilineTable :: Bool -- ^ Headerless table
-> GenParser Char ParserState Block
multilineTable headless =
- tableWith (multilineTableHeader headless) multilineRow tableFooter
+ tableWith (multilineTableHeader headless) multilineRow blanklines tableFooter tableCaption
multilineTableHeader :: Bool -- ^ Headerless table
- -> GenParser Char ParserState ([String], [Alignment], [Int])
+ -> GenParser Char ParserState ([[Block]], [Alignment], [Int])
multilineTableHeader headless = try $ do
if headless
then return '\n'
@@ -859,7 +836,9 @@ multilineTableHeader headless = try $ do
let rawHeads = if headless
then replicate (length dashes) ""
else map (intercalate " ") rawHeadsList
- return ((map removeLeadingTrailingSpace rawHeads), aligns, indices)
+ heads <- mapM (parseFromString (many plain)) $
+ map removeLeadingTrailingSpace rawHeads
+ return (heads, aligns, indices)
-- Returns an alignment type for a table, based on a list of strings
-- (the rows of the column header) and a number (the length of the
@@ -879,9 +858,14 @@ alignType strLst len =
(True, True) -> AlignCenter
(False, False) -> AlignDefault
+gridTable :: Bool -- ^ Headerless table
+ -> GenParser Char ParserState Block
+gridTable = gridTableWith block tableCaption
+
table :: GenParser Char ParserState Block
table = multilineTable False <|> simpleTable True <|>
- simpleTable False <|> multilineTable True <?> "table"
+ simpleTable False <|> multilineTable True <|>
+ gridTable False <|> gridTable True <?> "table"
--
-- inline
@@ -915,6 +899,7 @@ inlineParsers = [ str
, rawHtmlInline'
, rawLaTeXInline'
, escapedChar
+ , exampleRef
, symbol
, ltSign ]
@@ -950,6 +935,14 @@ ltSign = do
specialCharsMinusLt :: [Char]
specialCharsMinusLt = filter (/= '<') specialChars
+exampleRef :: GenParser Char ParserState Inline
+exampleRef = try $ do
+ char '@'
+ lab <- many1 (alphaNum <|> oneOf "-_")
+ -- We just return a Str. These are replaced with numbers
+ -- later. See the end of parseMarkdown.
+ return $ Str $ '@' : lab
+
symbol :: GenParser Char ParserState Inline
symbol = do
result <- oneOf specialCharsMinusLt
@@ -1070,30 +1063,28 @@ failIfInQuoteContext context = do
singleQuoteStart :: GenParser Char ParserState Char
singleQuoteStart = do
failIfInQuoteContext InSingleQuote
- char '\8216' <|>
- (try $ do char '\''
- notFollowedBy (oneOf ")!],.;:-? \t\n")
- notFollowedBy (try (oneOfStrings ["s","t","m","ve","ll","re"] >>
- satisfy (not . isAlphaNum)))
- -- possess/contraction
- return '\'')
+ try $ do char '\''
+ notFollowedBy (oneOf ")!],.;:-? \t\n")
+ notFollowedBy (try (oneOfStrings ["s","t","m","ve","ll","re"] >>
+ satisfy (not . isAlphaNum)))
+ -- possess/contraction
+ return '\''
singleQuoteEnd :: GenParser Char st Char
singleQuoteEnd = try $ do
- char '\8217' <|> char '\''
+ char '\''
notFollowedBy alphaNum
return '\''
doubleQuoteStart :: GenParser Char ParserState Char
doubleQuoteStart = do
failIfInQuoteContext InDoubleQuote
- char '\8220' <|>
- (try $ do char '"'
- notFollowedBy (oneOf " \t\n")
- return '"')
+ try $ do char '"'
+ notFollowedBy (oneOf " \t\n")
+ return '"'
doubleQuoteEnd :: GenParser Char st Char
-doubleQuoteEnd = char '\8221' <|> char '"'
+doubleQuoteEnd = char '"'
ellipses :: GenParser Char st Inline
ellipses = oneOfStrings ["...", " . . . ", ". . .", " . . ."] >> return Ellipses
@@ -1218,7 +1209,7 @@ referenceLink lab = do
optional (newline >> skipSpaces) >> reference))
let ref' = if null ref then lab else ref
state <- getState
- case lookupKeySrc (stateKeys state) ref' of
+ case lookupKeySrc (stateKeys state) (Key ref') of
Nothing -> fail "no corresponding key"
Just target -> return target
@@ -1303,7 +1294,7 @@ inlineCitation = try $ do
chkCit :: Target -> GenParser Char ParserState (Maybe Target)
chkCit t = do
st <- getState
- case lookupKeySrc (stateKeys st) [Str $ fst t] of
+ case lookupKeySrc (stateKeys st) (Key [Str $ fst t]) of
Just _ -> fail "This is a link"
Nothing -> if elem (fst t) $ stateCitations st
then return $ Just t
diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs
index 5e7ea512e..13afe5053 100644
--- a/src/Text/Pandoc/Readers/RST.hs
+++ b/src/Text/Pandoc/Readers/RST.hs
@@ -31,10 +31,13 @@ module Text.Pandoc.Readers.RST (
readRST
) where
import Text.Pandoc.Definition
-import Text.Pandoc.Shared
+import Text.Pandoc.Shared
+import Text.Pandoc.Parsing
import Text.ParserCombinators.Parsec
-import Control.Monad ( when, unless, liftM )
-import Data.List ( findIndex, delete, intercalate, transpose )
+import Control.Monad ( when, unless )
+import Data.List ( findIndex, intercalate, transpose, sort )
+import qualified Data.Map as M
+import Text.Printf ( printf )
-- | Parse reStructuredText string and return Pandoc document.
readRST :: ParserState -- ^ Parser state, including options for parser
@@ -93,9 +96,6 @@ parseRST = do
docMinusKeys <- manyTill (referenceKey <|> lineClump) eof >>= return . concat
setInput docMinusKeys
setPosition startPos
- st <- getState
- let reversedKeys = stateKeys st
- updateState $ \s -> s { stateKeys = reverse reversedKeys }
-- now parse it for real...
blocks <- parseBlocks
let blocks' = filter (/= Null) blocks
@@ -425,7 +425,7 @@ bulletListStart = try $ do
-- parses ordered list start and returns its length (inc following whitespace)
orderedListStart :: ListNumberStyle
-> ListNumberDelim
- -> GenParser Char st Int
+ -> GenParser Char ParserState Int
orderedListStart style delim = try $ do
(_, markerLen) <- withHorizDisplacement (orderedListMarker style delim)
white <- many1 spaceChar
@@ -540,10 +540,10 @@ referenceName = quotedReferenceName <|>
referenceKey :: GenParser Char ParserState [Char]
referenceKey = do
startPos <- getPosition
- key <- choice [imageKey, anonymousKey, regularKey]
+ (key, target) <- choice [imageKey, anonymousKey, regularKey]
st <- getState
let oldkeys = stateKeys st
- updateState $ \s -> s { stateKeys = key : oldkeys }
+ updateState $ \s -> s { stateKeys = M.insert key target oldkeys }
optional blanklines
endPos <- getPosition
-- return enough blanks to replace key
@@ -558,28 +558,29 @@ targetURI = do
blanklines
return $ escapeURI $ removeLeadingTrailingSpace $ contents
-imageKey :: GenParser Char ParserState ([Inline], (String, [Char]))
+imageKey :: GenParser Char ParserState (Key, Target)
imageKey = try $ do
string ".. |"
ref <- manyTill inline (char '|')
skipSpaces
string "image::"
src <- targetURI
- return (normalizeSpaces ref, (src, ""))
+ return (Key (normalizeSpaces ref), (src, ""))
-anonymousKey :: GenParser Char st ([Inline], (String, [Char]))
+anonymousKey :: GenParser Char st (Key, Target)
anonymousKey = try $ do
oneOfStrings [".. __:", "__"]
src <- targetURI
- return ([Str "_"], (src, ""))
+ pos <- getPosition
+ return (Key [Str $ "_" ++ printf "%09d" (sourceLine pos)], (src, ""))
-regularKey :: GenParser Char ParserState ([Inline], (String, [Char]))
+regularKey :: GenParser Char ParserState (Key, Target)
regularKey = try $ do
string ".. _"
ref <- referenceName
char ':'
src <- targetURI
- return (normalizeSpaces ref, (src, ""))
+ return (Key (normalizeSpaces ref), (src, ""))
--
-- tables
@@ -607,41 +608,20 @@ dashedLine ch = do
simpleDashedLines :: Char -> GenParser Char st [(Int,Int)]
simpleDashedLines ch = try $ many1 (dashedLine ch)
-gridPart :: Char -> GenParser Char st (Int, Int)
-gridPart ch = do
- dashes <- many1 (char ch)
- char '+'
- return (length dashes, length dashes + 1)
-
-gridDashedLines :: Char -> GenParser Char st [(Int,Int)]
-gridDashedLines ch = try $ char '+' >> many1 (gridPart ch) >>~ blankline
-
-- Parse a table row separator
simpleTableSep :: Char -> GenParser Char ParserState Char
simpleTableSep ch = try $ simpleDashedLines ch >> newline
-gridTableSep :: Char -> GenParser Char ParserState Char
-gridTableSep ch = try $ gridDashedLines ch >> return '\n'
-
-- Parse a table footer
simpleTableFooter :: GenParser Char ParserState [Char]
simpleTableFooter = try $ simpleTableSep '=' >> blanklines
-gridTableFooter :: GenParser Char ParserState [Char]
-gridTableFooter = blanklines
-
-- Parse a raw line and split it into chunks by indices.
simpleTableRawLine :: [Int] -> GenParser Char ParserState [String]
simpleTableRawLine indices = do
line <- many1Till anyChar newline
return (simpleTableSplitLine indices line)
-gridTableRawLine :: [Int] -> GenParser Char ParserState [String]
-gridTableRawLine indices = do
- char '|'
- line <- many1Till anyChar newline
- return (gridTableSplitLine indices $ removeTrailingSpace line)
-
-- Parse a table row and return a list of blocks (columns).
simpleTableRow :: [Int] -> GenParser Char ParserState [[Block]]
simpleTableRow indices = do
@@ -651,64 +631,13 @@ simpleTableRow indices = do
let cols = map unlines . transpose $ firstLine : colLines
mapM (parseFromString (many plain)) cols
-gridTableRow :: [Int]
- -> GenParser Char ParserState [[Block]]
-gridTableRow indices = do
- colLines <- many1 (gridTableRawLine indices)
- let cols = map ((++ "\n") . unlines . removeOneLeadingSpace) $
- transpose colLines
- mapM (liftM compactifyCell . parseFromString (many block)) cols
-
-compactifyCell :: [Block] -> [Block]
-compactifyCell bs = head $ compactify [bs]
-
simpleTableSplitLine :: [Int] -> String -> [String]
simpleTableSplitLine indices line =
map removeLeadingTrailingSpace
$ tail $ splitByIndices (init indices) line
-gridTableSplitLine :: [Int] -> String -> [String]
-gridTableSplitLine indices line =
- map removeFinalBar $ tail $ splitByIndices (init indices) line
-
-removeFinalBar :: String -> String
-removeFinalBar = reverse . dropWhile (=='|') . dropWhile (`elem` " \t") .
- reverse
-
-removeOneLeadingSpace :: [String] -> [String]
-removeOneLeadingSpace xs =
- if all startsWithSpace xs
- then map (drop 1) xs
- else xs
- where startsWithSpace "" = True
- startsWithSpace (y:_) = y == ' '
-
--- Calculate relative widths of table columns, based on indices
-widthsFromIndices :: Int -- Number of columns on terminal
- -> [Int] -- Indices
- -> [Double] -- Fractional relative sizes of columns
-widthsFromIndices _ [] = []
-widthsFromIndices numColumns indices =
- let lengths' = zipWith (-) indices (0:indices)
- lengths = reverse $
- case reverse lengths' of
- [] -> []
- [x] -> [x]
- -- compensate for the fact that intercolumn
- -- spaces are counted in widths of all columns
- -- but the last...
- (x:y:zs) -> if x < y && y - x <= 2
- then y:y:zs
- else x:y:zs
- totLength = sum lengths
- quotient = if totLength > numColumns
- then fromIntegral totLength
- else fromIntegral numColumns
- fracs = map (\l -> (fromIntegral l) / quotient) lengths in
- tail fracs
-
simpleTableHeader :: Bool -- ^ Headerless table
- -> GenParser Char ParserState ([[Char]], [Alignment], [Int])
+ -> GenParser Char ParserState ([[Block]], [Alignment], [Int])
simpleTableHeader headless = try $ do
optional blanklines
rawContent <- if headless
@@ -722,64 +651,23 @@ simpleTableHeader headless = try $ do
let rawHeads = if headless
then replicate (length dashes) ""
else simpleTableSplitLine indices rawContent
- return (rawHeads, aligns, indices)
+ heads <- mapM (parseFromString (many plain)) $
+ map removeLeadingTrailingSpace rawHeads
+ return (heads, aligns, indices)
-gridTableHeader :: Bool -- ^ Headerless table
- -> GenParser Char ParserState ([String], [Alignment], [Int])
-gridTableHeader headless = try $ do
- optional blanklines
- dashes <- gridDashedLines '-'
- rawContent <- if headless
- then return $ repeat ""
- else many1
- (notFollowedBy (gridTableSep '=') >> char '|' >> many1Till anyChar newline)
- if headless
- then return ()
- else gridTableSep '=' >> return ()
- let lines' = map snd dashes
- let indices = scanl (+) 0 lines'
- let aligns = replicate (length lines') AlignDefault -- RST does not have a notion of alignments
- let rawHeads = if headless
- then replicate (length dashes) ""
- else map (intercalate " ") $ transpose
- $ map (gridTableSplitLine indices) rawContent
- return (rawHeads, aligns, indices)
-
--- Parse a table using 'headerParser', 'lineParser', and 'footerParser'.
-tableWith :: GenParser Char ParserState ([[Char]], [Alignment], [Int])
- -> ([Int] -> GenParser Char ParserState [[Block]])
- -> GenParser Char ParserState sep
- -> GenParser Char ParserState end
- -> GenParser Char ParserState Block
-tableWith headerParser rowParser lineParser footerParser = try $ do
- (rawHeads, aligns, indices) <- headerParser
- lines' <- rowParser indices `sepEndBy` lineParser
- footerParser
- heads <- mapM (parseFromString (many plain)) rawHeads
- state <- getState
- let captions = [] -- no notion of captions in RST
- let numColumns = stateColumns state
- let widths = widthsFromIndices numColumns indices
- return $ Table captions aligns widths heads lines'
-
--- Parse a simple table with '---' header and one line per row.
+-- Parse a simple table.
simpleTable :: Bool -- ^ Headerless table
-> GenParser Char ParserState Block
simpleTable headless = do
- Table c a _w h l <- tableWith (simpleTableHeader headless) simpleTableRow sep simpleTableFooter
+ Table c a _w h l <- tableWith (simpleTableHeader headless) simpleTableRow sep simpleTableFooter (return [])
-- Simple tables get 0s for relative column widths (i.e., use default)
return $ Table c a (replicate (length a) 0) h l
where
sep = return () -- optional (simpleTableSep '-')
--- Parse a grid table: starts with row of '-' on top, then header
--- (which may be grid), then the rows,
--- which may be grid, separated by blank lines, and
--- ending with a footer (dashed line followed by blank line).
gridTable :: Bool -- ^ Headerless table
- -> GenParser Char ParserState Block
-gridTable headless =
- tableWith (gridTableHeader headless) gridTableRow (gridTableSep '-') gridTableFooter
+ -> GenParser Char ParserState Block
+gridTable = gridTableWith block (return [])
table :: GenParser Char ParserState Block
table = gridTable False <|> simpleTable False <|>
@@ -889,17 +777,21 @@ explicitLink = try $ do
referenceLink :: GenParser Char ParserState Inline
referenceLink = try $ do
label' <- (quotedReferenceName <|> simpleReferenceName) >>~ char '_'
- key <- option label' (do{char '_'; return [Str "_"]}) -- anonymous link
state <- getState
let keyTable = stateKeys state
+ let isAnonKey (Key [Str ('_':_)]) = True
+ isAnonKey _ = False
+ key <- option (Key label') $
+ do char '_'
+ let anonKeys = sort $ filter isAnonKey $ M.keys keyTable
+ if null anonKeys
+ then pzero
+ else return (head anonKeys)
(src,tit) <- case lookupKeySrc keyTable key of
Nothing -> fail "no corresponding key"
Just target -> return target
- -- if anonymous link, remove first anon key so it won't be used again
- let keyTable' = if (key == [Str "_"]) -- anonymous link?
- then delete ([Str "_"], (src,tit)) keyTable -- remove first anon key
- else keyTable
- setState $ state { stateKeys = keyTable' }
+ -- if anonymous link, remove key so it won't be used again
+ when (isAnonKey key) $ updateState $ \s -> s{ stateKeys = M.delete key keyTable }
return $ Link (normalizeSpaces label') (src, tit)
autoURI :: GenParser Char ParserState Inline
@@ -922,7 +814,7 @@ image = try $ do
ref <- manyTill inline (char '|')
state <- getState
let keyTable = stateKeys state
- (src,tit) <- case lookupKeySrc keyTable ref of
+ (src,tit) <- case lookupKeySrc keyTable (Key ref) of
Nothing -> fail "no corresponding key"
Just target -> return target
return $ Image (normalizeSpaces ref) (src, tit)
diff --git a/src/Text/Pandoc/Readers/TeXMath.hs b/src/Text/Pandoc/Readers/TeXMath.hs
index 080354be1..b0c6e86d4 100644
--- a/src/Text/Pandoc/Readers/TeXMath.hs
+++ b/src/Text/Pandoc/Readers/TeXMath.hs
@@ -28,208 +28,67 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
Conversion of TeX math to a list of 'Pandoc' inline elements.
-}
module Text.Pandoc.Readers.TeXMath (
- readTeXMath
+ readTeXMath
) where
import Text.ParserCombinators.Parsec
import Text.Pandoc.Definition
+import Text.TeXMath.Parser
--- | Converts a string of raw TeX math to a list of 'Pandoc' inlines.
+-- | Converts a raw TeX math formula to a list of 'Pandoc' inlines.
+-- Defaults to raw formula between @$@ characters if entire formula
+-- can't be converted.
readTeXMath :: String -- ^ String to parse (assumes @'\n'@ line endings)
-> [Inline]
-readTeXMath inp = case parse teXMath ("formula: " ++ inp) inp of
- Left _ -> [Str inp] -- if unparseable, just include original
- Right res -> res
-
-teXMath :: GenParser Char st [Inline]
-teXMath = manyTill mathPart eof >>= return . concat
-
-mathPart :: GenParser Char st [Inline]
-mathPart = whitespace <|> superscript <|> subscript <|> symbol <|>
- argument <|> digits <|> letters <|> misc
-
-whitespace :: GenParser Char st [Inline]
-whitespace = many1 space >> return []
-
-symbol :: GenParser Char st [Inline]
-symbol = try $ do
- char '\\'
- res <- many1 letter
- case lookup res teXsymbols of
- Just m -> return [Str m]
- Nothing -> return [Str $ "\\" ++ res]
-
-argument :: GenParser Char st [Inline]
-argument = try $ do
- char '{'
- res <- many mathPart
- char '}'
- return $ if null res
- then [Str " "]
- else [Str "{"] ++ concat res ++ [Str "}"]
-
-digits :: GenParser Char st [Inline]
-digits = do
- res <- many1 digit
- return [Str res]
-
-letters :: GenParser Char st [Inline]
-letters = do
- res <- many1 letter
- return [Emph [Str res]]
-
-misc :: GenParser Char st [Inline]
-misc = do
- res <- noneOf "}"
- return [Str [res]]
-
-scriptArg :: GenParser Char st [Inline]
-scriptArg = try $ do
- (try (do{char '{'; r <- many mathPart; char '}'; return $ concat r}))
- <|> symbol
- <|> (do{c <- (letter <|> digit); return [Str [c]]})
-
-superscript :: GenParser Char st [Inline]
-superscript = try $ do
- char '^'
- arg <- scriptArg
- return [Superscript arg]
-
-subscript :: GenParser Char st [Inline]
-subscript = try $ do
- char '_'
- arg <- scriptArg
- return [Subscript arg]
-
-withThinSpace :: String -> String
-withThinSpace str = "\x2009" ++ str ++ "\x2009"
-
-teXsymbols :: [(String, String)]
-teXsymbols =
- [("alpha","\x3B1")
- ,("beta", "\x3B2")
- ,("chi", "\x3C7")
- ,("delta", "\x3B4")
- ,("Delta", "\x394")
- ,("epsilon", "\x3B5")
- ,("varepsilon", "\x25B")
- ,("eta", "\x3B7")
- ,("gamma", "\x3B3")
- ,("Gamma", "\x393")
- ,("iota", "\x3B9")
- ,("kappa", "\x3BA")
- ,("lambda", "\x3BB")
- ,("Lambda", "\x39B")
- ,("mu", "\x3BC")
- ,("nu", "\x3BD")
- ,("omega", "\x3C9")
- ,("Omega", "\x3A9")
- ,("phi", "\x3C6")
- ,("varphi", "\x3D5")
- ,("Phi", "\x3A6")
- ,("pi", "\x3C0")
- ,("Pi", "\x3A0")
- ,("psi", "\x3C8")
- ,("Psi", "\x3A8")
- ,("rho", "\x3C1")
- ,("sigma", "\x3C3")
- ,("Sigma", "\x3A3")
- ,("tau", "\x3C4")
- ,("theta", "\x3B8")
- ,("vartheta", "\x3D1")
- ,("Theta", "\x398")
- ,("upsilon", "\x3C5")
- ,("xi", "\x3BE")
- ,("Xi", "\x39E")
- ,("zeta", "\x3B6")
- ,("ne", "\x2260")
- ,("lt", withThinSpace "<")
- ,("le", withThinSpace "\x2264")
- ,("leq", withThinSpace "\x2264")
- ,("ge", withThinSpace "\x2265")
- ,("geq", withThinSpace "\x2265")
- ,("prec", withThinSpace "\x227A")
- ,("succ", withThinSpace "\x227B")
- ,("preceq", withThinSpace "\x2AAF")
- ,("succeq", withThinSpace "\x2AB0")
- ,("in", withThinSpace "\x2208")
- ,("notin", withThinSpace "\x2209")
- ,("subset", withThinSpace "\x2282")
- ,("supset", withThinSpace "\x2283")
- ,("subseteq", withThinSpace "\x2286")
- ,("supseteq", withThinSpace "\x2287")
- ,("equiv", withThinSpace "\x2261")
- ,("cong", withThinSpace "\x2245")
- ,("approx", withThinSpace "\x2248")
- ,("propto", withThinSpace "\x221D")
- ,("cdot", withThinSpace "\x22C5")
- ,("star", withThinSpace "\x22C6")
- ,("backslash", "\\")
- ,("times", withThinSpace "\x00D7")
- ,("divide", withThinSpace "\x00F7")
- ,("circ", withThinSpace "\x2218")
- ,("oplus", withThinSpace "\x2295")
- ,("otimes", withThinSpace "\x2297")
- ,("odot", withThinSpace "\x2299")
- ,("sum", "\x2211")
- ,("prod", "\x220F")
- ,("wedge", withThinSpace "\x2227")
- ,("bigwedge", withThinSpace "\x22C0")
- ,("vee", withThinSpace "\x2228")
- ,("bigvee", withThinSpace "\x22C1")
- ,("cap", withThinSpace "\x2229")
- ,("bigcap", withThinSpace "\x22C2")
- ,("cup", withThinSpace "\x222A")
- ,("bigcup", withThinSpace "\x22C3")
- ,("neg", "\x00AC")
- ,("implies", withThinSpace "\x21D2")
- ,("iff", withThinSpace "\x21D4")
- ,("forall", "\x2200")
- ,("exists", "\x2203")
- ,("bot", "\x22A5")
- ,("top", "\x22A4")
- ,("vdash", "\x22A2")
- ,("models", withThinSpace "\x22A8")
- ,("uparrow", "\x2191")
- ,("downarrow", "\x2193")
- ,("rightarrow", withThinSpace "\x2192")
- ,("to", withThinSpace "\x2192")
- ,("rightarrowtail", "\x21A3")
- ,("twoheadrightarrow", withThinSpace "\x21A0")
- ,("twoheadrightarrowtail", withThinSpace "\x2916")
- ,("mapsto", withThinSpace "\x21A6")
- ,("leftarrow", withThinSpace "\x2190")
- ,("leftrightarrow", withThinSpace "\x2194")
- ,("Rightarrow", withThinSpace "\x21D2")
- ,("Leftarrow", withThinSpace "\x21D0")
- ,("Leftrightarrow", withThinSpace "\x21D4")
- ,("partial", "\x2202")
- ,("nabla", "\x2207")
- ,("pm", "\x00B1")
- ,("emptyset", "\x2205")
- ,("infty", "\x221E")
- ,("aleph", "\x2135")
- ,("ldots", "...")
- ,("therefore", "\x2234")
- ,("angle", "\x2220")
- ,("quad", "\x00A0\x00A0")
- ,("cdots", "\x22EF")
- ,("vdots", "\x22EE")
- ,("ddots", "\x22F1")
- ,("diamond", "\x22C4")
- ,("Box", "\x25A1")
- ,("lfloor", "\x230A")
- ,("rfloor", "\x230B")
- ,("lceiling", "\x2308")
- ,("rceiling", "\x2309")
- ,("langle", "\x2329")
- ,("rangle", "\x232A")
- ,("int", "\8747")
- ,("{", "{")
- ,("}", "}")
- ,("[", "[")
- ,("]", "]")
- ,("|", "|")
- ,("||", "||")
- ]
+readTeXMath inp = case readTeXMath' inp of
+ Nothing -> [Str ("$" ++ inp ++ "$")]
+ Just res -> res
+
+-- | Like 'readTeXMath', but without the default.
+readTeXMath' :: String -- ^ String to parse (assumes @'\n'@ line endings)
+ -> Maybe [Inline]
+readTeXMath' inp = case parse formula "formula" inp of
+ Left _ -> Just [Str inp]
+ Right exps -> expsToInlines exps
+
+expsToInlines :: [Exp] -> Maybe [Inline]
+expsToInlines xs = do
+ res <- mapM expToInlines xs
+ return (concat res)
+
+expToInlines :: Exp -> Maybe [Inline]
+expToInlines (ENumber s) = Just [Str s]
+expToInlines (EIdentifier s) = Just [Emph [Str s]]
+expToInlines (EMathOperator s) = Just [Str s]
+expToInlines (ESymbol t s) = Just $ addSpace t (Str s)
+ where addSpace Op x = [x, thinspace]
+ addSpace Bin x = [medspace, x, medspace]
+ addSpace Rel x = [widespace, x, widespace]
+ addSpace Pun x = [x, thinspace]
+ addSpace _ x = [x]
+ thinspace = Str "\x2006"
+ medspace = Str "\x2005"
+ widespace = Str "\x2004"
+expToInlines (EStretchy x) = expToInlines x
+expToInlines (EGrouped xs) = expsToInlines xs
+expToInlines (ESpace _) = Just [Str " "] -- variable widths not supported
+expToInlines (EBinary _ _ _) = Nothing
+expToInlines (ESub x y) = do
+ x' <- expToInlines x
+ y' <- expToInlines y
+ return $ x' ++ [Subscript y']
+expToInlines (ESuper x y) = do
+ x' <- expToInlines x
+ y' <- expToInlines y
+ return $ x' ++ [Superscript y']
+expToInlines (ESubsup x y z) = do
+ x' <- expToInlines x
+ y' <- expToInlines y
+ z' <- expToInlines z
+ return $ x' ++ [Subscript y'] ++ [Superscript z']
+expToInlines (EDown x y) = expToInlines (ESub x y)
+expToInlines (EUp x y) = expToInlines (ESuper x y)
+expToInlines (EDownup x y z) = expToInlines (ESubsup x y z)
+expToInlines (EText _ x) = Just [Emph [Str x]]
+expToInlines _ = Nothing
diff --git a/src/Text/Pandoc/S5.hs b/src/Text/Pandoc/S5.hs
new file mode 100644
index 000000000..1567a3ede
--- /dev/null
+++ b/src/Text/Pandoc/S5.hs
@@ -0,0 +1,57 @@
+{-
+Copyright (C) 2006-2010 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.S5
+ Copyright : Copyright (C) 2006-2010 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+
+Definitions for creation of S5 powerpoint-like HTML.
+(See <http://meyerweb.com/eric/tools/s5/>.)
+-}
+module Text.Pandoc.S5 ( s5HeaderIncludes) where
+import Text.Pandoc.Shared ( readDataFile )
+import System.FilePath ( (</>) )
+
+s5HeaderIncludes :: Maybe FilePath -> IO String
+s5HeaderIncludes datadir = do
+ c <- s5CSS datadir
+ j <- s5Javascript datadir
+ return $ c ++ j
+
+s5Javascript :: Maybe FilePath -> IO String
+s5Javascript datadir = do
+ jsCom <- readDataFile datadir $ "s5" </> "default" </> "slides.js.comment"
+ jsPacked <- readDataFile datadir $ "s5" </> "default" </> "slides.js.packed"
+ return $ "<script type=\"text/javascript\">\n" ++ jsCom ++ jsPacked ++
+ "</script>\n"
+
+s5CSS :: Maybe FilePath -> IO String
+s5CSS datadir = do
+ s5CoreCSS <- readDataFile datadir $ "s5" </> "default" </> "s5-core.css"
+ s5FramingCSS <- readDataFile datadir $ "s5" </> "default" </> "framing.css"
+ s5PrettyCSS <- readDataFile datadir $ "s5" </> "default" </> "pretty.css"
+ s5OperaCSS <- readDataFile datadir $ "s5" </> "default" </> "opera.css"
+ s5OutlineCSS <- readDataFile datadir $ "s5" </> "default" </> "outline.css"
+ s5PrintCSS <- readDataFile datadir $ "s5" </> "default" </> "print.css"
+ return $ "<style type=\"text/css\" media=\"projection\" id=\"slideProj\">\n" ++ s5CoreCSS ++ "\n" ++ s5FramingCSS ++ "\n" ++ s5PrettyCSS ++ "\n</style>\n<style type=\"text/css\" media=\"projection\" id=\"operaFix\">\n" ++ s5OperaCSS ++ "\n</style>\n<style type=\"text/css\" media=\"screen\" id=\"outlineStyle\">\n" ++ s5OutlineCSS ++ "\n</style>\n<style type=\"text/css\" media=\"print\" id=\"slidePrint\">\n" ++ s5PrintCSS ++ "\n</style>\n"
+
diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs
index 26aff4250..633708046 100644
--- a/src/Text/Pandoc/Shared.hs
+++ b/src/Text/Pandoc/Shared.hs
@@ -45,54 +45,15 @@ module Text.Pandoc.Shared (
toRomanNumeral,
escapeURI,
unescapeURI,
+ tabFilter,
+ -- * Prettyprinting
wrapped,
wrapIfNeeded,
wrappedTeX,
wrapTeXIfNeeded,
BlockWrapper (..),
wrappedBlocksToDoc,
- tabFilter,
- -- * Parsing
- (>>~),
- anyLine,
- many1Till,
- notFollowedBy',
- oneOfStrings,
- spaceChar,
- skipSpaces,
- blankline,
- blanklines,
- enclosed,
- stringAnyCase,
- parseFromString,
- lineClump,
- charsInBalanced,
- charsInBalanced',
- romanNumeral,
- emailAddress,
- uri,
- withHorizDisplacement,
- nullBlock,
- failIfStrict,
- failUnlessLHS,
- escaped,
- anyOrderedListMarker,
- orderedListMarker,
- charRef,
- readWith,
- testStringWith,
- ParserState (..),
- defaultParserState,
- HeaderType (..),
- ParserContext (..),
- QuoteContext (..),
- NoteTable,
- KeyTable,
- lookupKeySrc,
- refsMatch,
- -- * Prettyprinting
hang',
- prettyPandoc,
-- * Pandoc block and inline list processing
orderedListMarkers,
normalizeSpaces,
@@ -101,9 +62,11 @@ module Text.Pandoc.Shared (
hierarchicalize,
uniqueIdent,
isHeaderBlock,
+ headerShift,
-- * Writer options
HTMLMathMethod (..),
ObfuscationMethod (..),
+ HTMLSlideVariant (..),
WriterOptions (..),
defaultWriterOptions,
-- * File handling
@@ -112,27 +75,18 @@ module Text.Pandoc.Shared (
) where
import Text.Pandoc.Definition
-import Text.ParserCombinators.Parsec
+import qualified Text.Pandoc.UTF8 as UTF8 (readFile)
import Text.PrettyPrint.HughesPJ ( Doc, fsep, ($$), (<>), empty, isEmpty, text, nest )
import qualified Text.PrettyPrint.HughesPJ as PP
-import Text.Pandoc.CharacterReferences ( characterReference )
-import Data.Char ( toLower, toUpper, ord, isLower, isUpper, isAlpha, isAscii,
+import Data.Char ( toLower, isLower, isUpper, isAlpha, isAscii,
isLetter, isDigit )
import Data.List ( find, isPrefixOf, intercalate )
-import Network.URI ( parseURI, URI (..), isAllowedInURI, escapeURIString, unEscapeString )
+import Network.URI ( isAllowedInURI, escapeURIString, unEscapeString )
import Codec.Binary.UTF8.String ( encodeString, decodeString )
import System.Directory
import System.FilePath ( (</>) )
--- Note: ghc >= 6.12 (base >=4.2) supports unicode through iconv
--- So we use System.IO.UTF8 only if we have an earlier version
-#if MIN_VERSION_base(4,2,0)
-#else
-import Prelude hiding ( putStr, putStrLn, writeFile, readFile, getContents )
-import System.IO.UTF8
-#endif
-import Data.Generics
+import Data.Generics (Typeable, Data)
import qualified Control.Monad.State as S
-import Control.Monad (join)
import Paths_pandoc (getDataFileName)
--
@@ -157,11 +111,11 @@ splitByIndices (x:xs) lst =
-- | Replace each occurrence of one sublist in a list with another.
substitute :: (Eq a) => [a] -> [a] -> [a] -> [a]
substitute _ _ [] = []
-substitute [] _ lst = lst
-substitute target replacement lst =
+substitute [] _ xs = xs
+substitute target replacement lst@(x:xs) =
if target `isPrefixOf` lst
- then replacement ++ (substitute target replacement $ drop (length target) lst)
- else (head lst):(substitute target replacement $ tail lst)
+ then replacement ++ substitute target replacement (drop (length target) lst)
+ else x : substitute target replacement xs
--
-- Text processing
@@ -243,6 +197,30 @@ unescapeURI :: String -> String
unescapeURI = escapeURIString (\c -> isAllowedInURI c || not (isAscii c)) .
decodeString . unEscapeString
+-- | Convert tabs to spaces and filter out DOS line endings.
+-- Tabs will be preserved if tab stop is set to 0.
+tabFilter :: Int -- ^ Tab stop
+ -> String -- ^ Input
+ -> String
+tabFilter tabStop =
+ let go _ [] = ""
+ go _ ('\n':xs) = '\n' : go tabStop xs
+ go _ ('\r':'\n':xs) = '\n' : go tabStop xs
+ go _ ('\r':xs) = '\n' : go tabStop xs
+ go spsToNextStop ('\t':xs) =
+ if tabStop == 0
+ then '\t' : go tabStop xs
+ else replicate spsToNextStop ' ' ++ go tabStop xs
+ go 1 (x:xs) =
+ x : go tabStop xs
+ go spsToNextStop (x:xs) =
+ x : go (spsToNextStop - 1) xs
+ in go tabStop
+
+--
+-- Prettyprinting
+--
+
-- | Wrap inlines to line length.
wrapped :: Monad m => ([Inline] -> m Doc) -> [Inline] -> m Doc
wrapped listWriter sect = (mapM listWriter $ splitBy Space sect) >>=
@@ -312,538 +290,10 @@ wrappedBlocksToDoc = foldr addBlock empty
addBlock (Pad d) accum = d $$ text "" $$ accum
addBlock (Reg d) accum = d $$ accum
--- | Convert tabs to spaces and filter out DOS line endings.
--- Tabs will be preserved if tab stop is set to 0.
-tabFilter :: Int -- ^ Tab stop
- -> String -- ^ Input
- -> String
-tabFilter tabStop =
- let go _ [] = ""
- go _ ('\n':xs) = '\n' : go tabStop xs
- go _ ('\r':'\n':xs) = '\n' : go tabStop xs
- go _ ('\r':xs) = '\n' : go tabStop xs
- go spsToNextStop ('\t':xs) =
- if tabStop == 0
- then '\t' : go tabStop xs
- else replicate spsToNextStop ' ' ++ go tabStop xs
- go 1 (x:xs) =
- x : go tabStop xs
- go spsToNextStop (x:xs) =
- x : go (spsToNextStop - 1) xs
- in go tabStop
-
---
--- Parsing
---
-
--- | Like >>, but returns the operation on the left.
--- (Suggested by Tillmann Rendel on Haskell-cafe list.)
-(>>~) :: (Monad m) => m a -> m b -> m a
-a >>~ b = a >>= \x -> b >> return x
-
--- | Parse any line of text
-anyLine :: GenParser Char st [Char]
-anyLine = manyTill anyChar newline
-
--- | Like @manyTill@, but reads at least one item.
-many1Till :: GenParser tok st a
- -> GenParser tok st end
- -> GenParser tok st [a]
-many1Till p end = do
- first <- p
- rest <- manyTill p end
- return (first:rest)
-
--- | A more general form of @notFollowedBy@. This one allows any
--- type of parser to be specified, and succeeds only if that parser fails.
--- It does not consume any input.
-notFollowedBy' :: Show b => GenParser a st b -> GenParser a st ()
-notFollowedBy' p = try $ join $ do a <- try p
- return (unexpected (show a))
- <|>
- return (return ())
--- (This version due to Andrew Pimlott on the Haskell mailing list.)
-
--- | Parses one of a list of strings (tried in order).
-oneOfStrings :: [String] -> GenParser Char st String
-oneOfStrings listOfStrings = choice $ map (try . string) listOfStrings
-
--- | Parses a space or tab.
-spaceChar :: CharParser st Char
-spaceChar = char ' ' <|> char '\t'
-
--- | Skips zero or more spaces or tabs.
-skipSpaces :: GenParser Char st ()
-skipSpaces = skipMany spaceChar
-
--- | Skips zero or more spaces or tabs, then reads a newline.
-blankline :: GenParser Char st Char
-blankline = try $ skipSpaces >> newline
-
--- | Parses one or more blank lines and returns a string of newlines.
-blanklines :: GenParser Char st [Char]
-blanklines = many1 blankline
-
--- | Parses material enclosed between start and end parsers.
-enclosed :: GenParser Char st t -- ^ start parser
- -> GenParser Char st end -- ^ end parser
- -> GenParser Char st a -- ^ content parser (to be used repeatedly)
- -> GenParser Char st [a]
-enclosed start end parser = try $
- start >> notFollowedBy space >> many1Till parser end
-
--- | Parse string, case insensitive.
-stringAnyCase :: [Char] -> CharParser st String
-stringAnyCase [] = string ""
-stringAnyCase (x:xs) = do
- firstChar <- char (toUpper x) <|> char (toLower x)
- rest <- stringAnyCase xs
- return (firstChar:rest)
-
--- | Parse contents of 'str' using 'parser' and return result.
-parseFromString :: GenParser tok st a -> [tok] -> GenParser tok st a
-parseFromString parser str = do
- oldPos <- getPosition
- oldInput <- getInput
- setInput str
- result <- parser
- setInput oldInput
- setPosition oldPos
- return result
-
--- | Parse raw line block up to and including blank lines.
-lineClump :: GenParser Char st String
-lineClump = blanklines
- <|> (many1 (notFollowedBy blankline >> anyLine) >>= return . unlines)
-
--- | Parse a string of characters between an open character
--- and a close character, including text between balanced
--- pairs of open and close, which must be different. For example,
--- @charsInBalanced '(' ')'@ will parse "(hello (there))"
--- and return "hello (there)". Stop if a blank line is
--- encountered.
-charsInBalanced :: Char -> Char -> GenParser Char st String
-charsInBalanced open close = try $ do
- char open
- raw <- many $ (many1 (noneOf [open, close, '\n']))
- <|> (do res <- charsInBalanced open close
- return $ [open] ++ res ++ [close])
- <|> try (string "\n" >>~ notFollowedBy' blanklines)
- char close
- return $ concat raw
-
--- | Like @charsInBalanced@, but allow blank lines in the content.
-charsInBalanced' :: Char -> Char -> GenParser Char st String
-charsInBalanced' open close = try $ do
- char open
- raw <- many $ (many1 (noneOf [open, close]))
- <|> (do res <- charsInBalanced' open close
- return $ [open] ++ res ++ [close])
- char close
- return $ concat raw
-
--- Auxiliary functions for romanNumeral:
-
-lowercaseRomanDigits :: [Char]
-lowercaseRomanDigits = ['i','v','x','l','c','d','m']
-
-uppercaseRomanDigits :: [Char]
-uppercaseRomanDigits = map toUpper lowercaseRomanDigits
-
--- | Parses a roman numeral (uppercase or lowercase), returns number.
-romanNumeral :: Bool -- ^ Uppercase if true
- -> GenParser Char st Int
-romanNumeral upperCase = do
- let romanDigits = if upperCase
- then uppercaseRomanDigits
- else lowercaseRomanDigits
- lookAhead $ oneOf romanDigits
- let [one, five, ten, fifty, hundred, fivehundred, thousand] =
- map char romanDigits
- thousands <- many thousand >>= (return . (1000 *) . length)
- ninehundreds <- option 0 $ try $ hundred >> thousand >> return 900
- fivehundreds <- many fivehundred >>= (return . (500 *) . length)
- fourhundreds <- option 0 $ try $ hundred >> fivehundred >> return 400
- hundreds <- many hundred >>= (return . (100 *) . length)
- nineties <- option 0 $ try $ ten >> hundred >> return 90
- fifties <- many fifty >>= (return . (50 *) . length)
- forties <- option 0 $ try $ ten >> fifty >> return 40
- tens <- many ten >>= (return . (10 *) . length)
- nines <- option 0 $ try $ one >> ten >> return 9
- fives <- many five >>= (return . (5 *) . length)
- fours <- option 0 $ try $ one >> five >> return 4
- ones <- many one >>= (return . length)
- let total = thousands + ninehundreds + fivehundreds + fourhundreds +
- hundreds + nineties + fifties + forties + tens + nines +
- fives + fours + ones
- if total == 0
- then fail "not a roman numeral"
- else return total
-
--- Parsers for email addresses and URIs
-
-emailChar :: GenParser Char st Char
-emailChar = alphaNum <|> oneOf "-+_."
-
-domainChar :: GenParser Char st Char
-domainChar = alphaNum <|> char '-'
-
-domain :: GenParser Char st [Char]
-domain = do
- first <- many1 domainChar
- dom <- many1 $ try (char '.' >> many1 domainChar )
- return $ intercalate "." (first:dom)
-
--- | Parses an email address; returns original and corresponding
--- escaped mailto: URI.
-emailAddress :: GenParser Char st (String, String)
-emailAddress = try $ do
- firstLetter <- alphaNum
- restAddr <- many emailChar
- let addr = firstLetter:restAddr
- char '@'
- dom <- domain
- let full = addr ++ '@':dom
- return (full, escapeURI $ "mailto:" ++ full)
-
--- | Parses a URI. Returns pair of original and URI-escaped version.
-uri :: GenParser Char st (String, String)
-uri = try $ do
- let protocols = [ "http:", "https:", "ftp:", "file:", "mailto:",
- "news:", "telnet:" ]
- lookAhead $ oneOfStrings protocols
- -- scan non-ascii characters and ascii characters allowed in a URI
- str <- many1 $ satisfy (\c -> not (isAscii c) || isAllowedInURI c)
- -- now see if they amount to an absolute URI
- case parseURI (escapeURI str) of
- Just uri' -> if uriScheme uri' `elem` protocols
- then return (str, show uri')
- else fail "not a URI"
- Nothing -> fail "not a URI"
-
--- | Applies a parser, returns tuple of its results and its horizontal
--- displacement (the difference between the source column at the end
--- and the source column at the beginning). Vertical displacement
--- (source row) is ignored.
-withHorizDisplacement :: GenParser Char st a -- ^ Parser to apply
- -> GenParser Char st (a, Int) -- ^ (result, displacement)
-withHorizDisplacement parser = do
- pos1 <- getPosition
- result <- parser
- pos2 <- getPosition
- return (result, sourceColumn pos2 - sourceColumn pos1)
-
--- | Parses a character and returns 'Null' (so that the parser can move on
--- if it gets stuck).
-nullBlock :: GenParser Char st Block
-nullBlock = anyChar >> return Null
-
--- | Fail if reader is in strict markdown syntax mode.
-failIfStrict :: GenParser Char ParserState ()
-failIfStrict = do
- state <- getState
- if stateStrict state then fail "strict mode" else return ()
-
--- | Fail unless we're in literate haskell mode.
-failUnlessLHS :: GenParser tok ParserState ()
-failUnlessLHS = do
- state <- getState
- if stateLiterateHaskell state then return () else fail "Literate haskell feature"
-
--- | Parses backslash, then applies character parser.
-escaped :: GenParser Char st Char -- ^ Parser for character to escape
- -> GenParser Char st Inline
-escaped parser = try $ do
- char '\\'
- result <- parser
- return (Str [result])
-
--- | Parses an uppercase roman numeral and returns (UpperRoman, number).
-upperRoman :: GenParser Char st (ListNumberStyle, Int)
-upperRoman = do
- num <- romanNumeral True
- return (UpperRoman, num)
-
--- | Parses a lowercase roman numeral and returns (LowerRoman, number).
-lowerRoman :: GenParser Char st (ListNumberStyle, Int)
-lowerRoman = do
- num <- romanNumeral False
- return (LowerRoman, num)
-
--- | Parses a decimal numeral and returns (Decimal, number).
-decimal :: GenParser Char st (ListNumberStyle, Int)
-decimal = do
- num <- many1 digit
- return (Decimal, read num)
-
--- | Parses a '#' returns (DefaultStyle, 1).
-defaultNum :: GenParser Char st (ListNumberStyle, Int)
-defaultNum = do
- char '#'
- return (DefaultStyle, 1)
-
--- | Parses a lowercase letter and returns (LowerAlpha, number).
-lowerAlpha :: GenParser Char st (ListNumberStyle, Int)
-lowerAlpha = do
- ch <- oneOf ['a'..'z']
- return (LowerAlpha, ord ch - ord 'a' + 1)
-
--- | Parses an uppercase letter and returns (UpperAlpha, number).
-upperAlpha :: GenParser Char st (ListNumberStyle, Int)
-upperAlpha = do
- ch <- oneOf ['A'..'Z']
- return (UpperAlpha, ord ch - ord 'A' + 1)
-
--- | Parses a roman numeral i or I
-romanOne :: GenParser Char st (ListNumberStyle, Int)
-romanOne = (char 'i' >> return (LowerRoman, 1)) <|>
- (char 'I' >> return (UpperRoman, 1))
-
--- | Parses an ordered list marker and returns list attributes.
-anyOrderedListMarker :: GenParser Char st ListAttributes
-anyOrderedListMarker = choice $
- [delimParser numParser | delimParser <- [inPeriod, inOneParen, inTwoParens],
- numParser <- [decimal, defaultNum, romanOne,
- lowerAlpha, lowerRoman, upperAlpha, upperRoman]]
-
--- | Parses a list number (num) followed by a period, returns list attributes.
-inPeriod :: GenParser Char st (ListNumberStyle, Int)
- -> GenParser Char st ListAttributes
-inPeriod num = try $ do
- (style, start) <- num
- char '.'
- let delim = if style == DefaultStyle
- then DefaultDelim
- else Period
- return (start, style, delim)
-
--- | Parses a list number (num) followed by a paren, returns list attributes.
-inOneParen :: GenParser Char st (ListNumberStyle, Int)
- -> GenParser Char st ListAttributes
-inOneParen num = try $ do
- (style, start) <- num
- char ')'
- return (start, style, OneParen)
-
--- | Parses a list number (num) enclosed in parens, returns list attributes.
-inTwoParens :: GenParser Char st (ListNumberStyle, Int)
- -> GenParser Char st ListAttributes
-inTwoParens num = try $ do
- char '('
- (style, start) <- num
- char ')'
- return (start, style, TwoParens)
-
--- | Parses an ordered list marker with a given style and delimiter,
--- returns number.
-orderedListMarker :: ListNumberStyle
- -> ListNumberDelim
- -> GenParser Char st Int
-orderedListMarker style delim = do
- let num = defaultNum <|> -- # can continue any kind of list
- case style of
- DefaultStyle -> decimal
- Decimal -> decimal
- UpperRoman -> upperRoman
- LowerRoman -> lowerRoman
- UpperAlpha -> upperAlpha
- LowerAlpha -> lowerAlpha
- let context = case delim of
- DefaultDelim -> inPeriod
- Period -> inPeriod
- OneParen -> inOneParen
- TwoParens -> inTwoParens
- (start, _, _) <- context num
- return start
-
--- | Parses a character reference and returns a Str element.
-charRef :: GenParser Char st Inline
-charRef = do
- c <- characterReference
- return $ Str [c]
-
--- | Parse a string with a given parser and state.
-readWith :: GenParser Char ParserState a -- ^ parser
- -> ParserState -- ^ initial state
- -> String -- ^ input string
- -> a
-readWith parser state input =
- case runParser parser state "source" input of
- Left err -> error $ "\nError:\n" ++ show err
- Right result -> result
-
--- | Parse a string with @parser@ (for testing).
-testStringWith :: (Show a) => GenParser Char ParserState a
- -> String
- -> IO ()
-testStringWith parser str = putStrLn $ show $
- readWith parser defaultParserState str
-
--- | Parsing options.
-data ParserState = ParserState
- { stateParseRaw :: Bool, -- ^ Parse raw HTML and LaTeX?
- stateParserContext :: ParserContext, -- ^ Inside list?
- stateQuoteContext :: QuoteContext, -- ^ Inside quoted environment?
- stateSanitizeHTML :: Bool, -- ^ Sanitize HTML?
- stateKeys :: KeyTable, -- ^ List of reference keys
-#ifdef _CITEPROC
- stateCitations :: [String], -- ^ List of available citations
-#endif
- stateNotes :: NoteTable, -- ^ List of notes
- stateTabStop :: Int, -- ^ Tab stop
- stateStandalone :: Bool, -- ^ Parse bibliographic info?
- stateTitle :: [Inline], -- ^ Title of document
- stateAuthors :: [[Inline]], -- ^ Authors of document
- stateDate :: [Inline], -- ^ Date of document
- stateStrict :: Bool, -- ^ Use strict markdown syntax?
- stateSmart :: Bool, -- ^ Use smart typography?
- stateLiterateHaskell :: Bool, -- ^ Treat input as literate haskell
- stateColumns :: Int, -- ^ Number of columns in terminal
- stateHeaderTable :: [HeaderType], -- ^ Ordered list of header types used
- stateIndentedCodeClasses :: [String] -- ^ Classes to use for indented code blocks
- }
- deriving Show
-
-defaultParserState :: ParserState
-defaultParserState =
- ParserState { stateParseRaw = False,
- stateParserContext = NullState,
- stateQuoteContext = NoQuote,
- stateSanitizeHTML = False,
- stateKeys = [],
-#ifdef _CITEPROC
- stateCitations = [],
-#endif
- stateNotes = [],
- stateTabStop = 4,
- stateStandalone = False,
- stateTitle = [],
- stateAuthors = [],
- stateDate = [],
- stateStrict = False,
- stateSmart = False,
- stateLiterateHaskell = False,
- stateColumns = 80,
- stateHeaderTable = [],
- stateIndentedCodeClasses = [] }
-
-data HeaderType
- = SingleHeader Char -- ^ Single line of characters underneath
- | DoubleHeader Char -- ^ Lines of characters above and below
- deriving (Eq, Show)
-
-data ParserContext
- = ListItemState -- ^ Used when running parser on list item contents
- | NullState -- ^ Default state
- deriving (Eq, Show)
-
-data QuoteContext
- = InSingleQuote -- ^ Used when parsing inside single quotes
- | InDoubleQuote -- ^ Used when parsing inside double quotes
- | NoQuote -- ^ Used when not parsing inside quotes
- deriving (Eq, Show)
-
-type NoteTable = [(String, String)]
-
-type KeyTable = [([Inline], Target)]
-
--- | Look up key in key table and return target object.
-lookupKeySrc :: KeyTable -- ^ Key table
- -> [Inline] -- ^ Key
- -> Maybe Target
-lookupKeySrc table key = case find (refsMatch key . fst) table of
- Nothing -> Nothing
- Just (_, src) -> Just src
-
--- | Returns @True@ if keys match (case insensitive).
-refsMatch :: [Inline] -> [Inline] -> Bool
-refsMatch ((Str x):restx) ((Str y):resty) =
- ((map toLower x) == (map toLower y)) && refsMatch restx resty
-refsMatch ((Emph x):restx) ((Emph y):resty) =
- refsMatch x y && refsMatch restx resty
-refsMatch ((Strong x):restx) ((Strong y):resty) =
- refsMatch x y && refsMatch restx resty
-refsMatch ((Strikeout x):restx) ((Strikeout y):resty) =
- refsMatch x y && refsMatch restx resty
-refsMatch ((Superscript x):restx) ((Superscript y):resty) =
- refsMatch x y && refsMatch restx resty
-refsMatch ((Subscript x):restx) ((Subscript y):resty) =
- refsMatch x y && refsMatch restx resty
-refsMatch ((SmallCaps x):restx) ((SmallCaps y):resty) =
- refsMatch x y && refsMatch restx resty
-refsMatch ((Quoted t x):restx) ((Quoted u y):resty) =
- t == u && refsMatch x y && refsMatch restx resty
-refsMatch ((Code x):restx) ((Code y):resty) =
- ((map toLower x) == (map toLower y)) && refsMatch restx resty
-refsMatch ((Math t x):restx) ((Math u y):resty) =
- ((map toLower x) == (map toLower y)) && t == u && refsMatch restx resty
-refsMatch ((TeX x):restx) ((TeX y):resty) =
- ((map toLower x) == (map toLower y)) && refsMatch restx resty
-refsMatch ((HtmlInline x):restx) ((HtmlInline y):resty) =
- ((map toLower x) == (map toLower y)) && refsMatch restx resty
-refsMatch (x:restx) (y:resty) = (x == y) && refsMatch restx resty
-refsMatch [] x = null x
-refsMatch x [] = null x
-
---
--- Prettyprinting
---
-
-- | A version of hang that works like the version in pretty-1.0.0.0
hang' :: Doc -> Int -> Doc -> Doc
hang' d1 n d2 = d1 $$ (nest n d2)
--- | Indent string as a block.
-indentBy :: Int -- ^ Number of spaces to indent the block
- -> Int -- ^ Number of spaces (rel to block) to indent first line
- -> String -- ^ Contents of block to indent
- -> String
-indentBy _ _ [] = ""
-indentBy num first str =
- let (firstLine:restLines) = lines str
- firstLineIndent = num + first
- in (replicate firstLineIndent ' ') ++ firstLine ++ "\n" ++
- (intercalate "\n" $ map ((replicate num ' ') ++ ) restLines)
-
--- | Prettyprint list of Pandoc blocks elements.
-prettyBlockList :: Int -- ^ Number of spaces to indent list of blocks
- -> [Block] -- ^ List of blocks
- -> String
-prettyBlockList indent [] = indentBy indent 0 "[]"
-prettyBlockList indent blocks = indentBy indent (-2) $ "[ " ++
- (intercalate "\n, " (map prettyBlock blocks)) ++ " ]"
-
--- | Prettyprint Pandoc block element.
-prettyBlock :: Block -> String
-prettyBlock (BlockQuote blocks) = "BlockQuote\n " ++
- (prettyBlockList 2 blocks)
-prettyBlock (OrderedList attribs blockLists) =
- "OrderedList " ++ show attribs ++ "\n" ++ indentBy 2 0 ("[ " ++
- (intercalate ", " $ map (\blocks -> prettyBlockList 2 blocks)
- blockLists)) ++ " ]"
-prettyBlock (BulletList blockLists) = "BulletList\n" ++
- indentBy 2 0 ("[ " ++ (intercalate ", "
- (map (\blocks -> prettyBlockList 2 blocks) blockLists))) ++ " ]"
-prettyBlock (DefinitionList items) = "DefinitionList\n" ++
- indentBy 2 0 ("[ " ++ (intercalate "\n, "
- (map (\(term, defs) -> "(" ++ show term ++ ",\n" ++
- indentBy 3 0 ("[ " ++ (intercalate ", "
- (map (\blocks -> prettyBlockList 2 blocks) defs)) ++ "]") ++
- ")") items))) ++ " ]"
-prettyBlock (Table caption aligns widths header rows) =
- "Table " ++ show caption ++ " " ++ show aligns ++ " " ++
- show widths ++ "\n" ++ prettyRow header ++ " [\n" ++
- (intercalate ",\n" (map prettyRow rows)) ++ " ]"
- where prettyRow cols = indentBy 2 0 ("[ " ++ (intercalate ", "
- (map (\blocks -> prettyBlockList 2 blocks)
- cols))) ++ " ]"
-prettyBlock block = show block
-
--- | Prettyprint Pandoc document.
-prettyPandoc :: Pandoc -> String
-prettyPandoc (Pandoc meta blocks) = "Pandoc " ++ "(" ++ show meta ++
- ")\n" ++ (prettyBlockList 0 blocks) ++ "\n"
-
--
-- Pandoc block and inline list processing
--
@@ -855,6 +305,7 @@ orderedListMarkers (start, numstyle, numdelim) =
let singleton c = [c]
nums = case numstyle of
DefaultStyle -> map show [start..]
+ Example -> map show [start..]
Decimal -> map show [start..]
UpperAlpha -> drop (start - 1) $ cycle $
map singleton ['A'..'Z']
@@ -916,11 +367,11 @@ data Element = Blk Block
-- | Convert Pandoc inline list to plain text identifier. HTML
-- identifiers must start with a letter, and may contain only
--- letters, digits, and the characters _-:.
+-- letters, digits, and the characters _-.
inlineListToIdentifier :: [Inline] -> String
inlineListToIdentifier =
dropWhile (not . isAlpha) . intercalate "-" . words . map toLower .
- filter (\c -> isLetter c || isDigit c || c `elem` "_-:. ") .
+ filter (\c -> isLetter c || isDigit c || c `elem` "_-. ") .
concatMap extractText
where extractText x = case x of
Str s -> s
@@ -991,6 +442,13 @@ isHeaderBlock :: Block -> Bool
isHeaderBlock (Header _ _) = True
isHeaderBlock _ = False
+-- | Shift header levels up or down.
+headerShift :: Int -> Pandoc -> Pandoc
+headerShift n = processWith shift
+ where shift :: Block -> Block
+ shift (Header level inner) = Header (level + n) inner
+ shift x = x
+
--
-- Writer options
--
@@ -999,7 +457,7 @@ data HTMLMathMethod = PlainMath
| LaTeXMathML (Maybe String) -- url of LaTeXMathML.js
| JsMath (Maybe String) -- url of jsMath load script
| GladTeX
- | MimeTeX String -- url of mimetex.cgi
+ | WebTeX String -- url of TeX->image script.
| MathML (Maybe String) -- url of MathMLinHTML.js
deriving (Show, Read, Eq)
@@ -1009,27 +467,35 @@ data ObfuscationMethod = NoObfuscation
| JavascriptObfuscation
deriving (Show, Read, Eq)
+-- | Varieties of HTML slide shows.
+data HTMLSlideVariant = S5Slides
+ | SlidySlides
+ | NoSlides
+ deriving (Show, Read, Eq)
+
-- | Options for writers
data WriterOptions = WriterOptions
{ writerStandalone :: Bool -- ^ Include header and footer
, writerTemplate :: String -- ^ Template to use in standalone mode
, writerVariables :: [(String, String)] -- ^ Variables to set in template
- , writerIncludeBefore :: String -- ^ Text to include before the body
- , writerIncludeAfter :: String -- ^ Text to include after the body
+ , writerEPUBMetadata :: String -- ^ Metadata to include in EPUB
, writerTabStop :: Int -- ^ Tabstop for conversion btw spaces and tabs
, writerTableOfContents :: Bool -- ^ Include table of contents
- , writerS5 :: Bool -- ^ We're writing S5
+ , writerSlideVariant :: HTMLSlideVariant -- ^ Are we writing S5 or Slidy?
+ , writerIncremental :: Bool -- ^ True if lists should be incremental
, writerXeTeX :: Bool -- ^ Create latex suitable for use by xetex
, writerHTMLMathMethod :: HTMLMathMethod -- ^ How to print math in HTML
, writerIgnoreNotes :: Bool -- ^ Ignore footnotes (used in making toc)
- , writerIncremental :: Bool -- ^ Incremental S5 lists
, writerNumberSections :: Bool -- ^ Number sections in LaTeX
+ , writerSectionDivs :: Bool -- ^ Put sections in div tags in HTML
, writerStrictMarkdown :: Bool -- ^ Use strict markdown syntax
, writerReferenceLinks :: Bool -- ^ Use reference links in writing markdown, rst
, writerWrapText :: Bool -- ^ Wrap text to line length
, writerLiterateHaskell :: Bool -- ^ Write as literate haskell
, writerEmailObfuscation :: ObfuscationMethod -- ^ How to obfuscate emails
, writerIdentifierPrefix :: String -- ^ Prefix for section & note ids in HTML
+ , writerSourceDirectory :: FilePath -- ^ Directory path of 1st source file
+ , writerUserDataDir :: Maybe FilePath -- ^ Path of user data directory
} deriving Show
-- | Default writer options.
@@ -1038,22 +504,24 @@ defaultWriterOptions =
WriterOptions { writerStandalone = False
, writerTemplate = ""
, writerVariables = []
- , writerIncludeBefore = ""
- , writerIncludeAfter = ""
+ , writerEPUBMetadata = ""
, writerTabStop = 4
, writerTableOfContents = False
- , writerS5 = False
+ , writerSlideVariant = NoSlides
+ , writerIncremental = False
, writerXeTeX = False
, writerHTMLMathMethod = PlainMath
, writerIgnoreNotes = False
- , writerIncremental = False
, writerNumberSections = False
+ , writerSectionDivs = True
, writerStrictMarkdown = False
, writerReferenceLinks = False
, writerWrapText = True
, writerLiterateHaskell = False
, writerEmailObfuscation = JavascriptObfuscation
, writerIdentifierPrefix = ""
+ , writerSourceDirectory = "."
+ , writerUserDataDir = Nothing
}
--
@@ -1074,6 +542,6 @@ inDirectory path action = do
readDataFile :: Maybe FilePath -> FilePath -> IO String
readDataFile userDir fname =
case userDir of
- Nothing -> getDataFileName fname >>= readFile
- Just u -> catch (readFile $ u </> fname)
- (\_ -> getDataFileName fname >>= readFile)
+ Nothing -> getDataFileName fname >>= UTF8.readFile
+ Just u -> catch (UTF8.readFile $ u </> fname)
+ (\_ -> getDataFileName fname >>= UTF8.readFile)
diff --git a/src/Text/Pandoc/Templates.hs b/src/Text/Pandoc/Templates.hs
index 2238f4da8..c8ddc3abf 100644
--- a/src/Text/Pandoc/Templates.hs
+++ b/src/Text/Pandoc/Templates.hs
@@ -83,7 +83,6 @@ getDefaultTemplate :: (Maybe FilePath) -- ^ User data directory to search first
-> String -- ^ Name of writer
-> IO (Either E.IOException String)
getDefaultTemplate _ "native" = return $ Right ""
-getDefaultTemplate user "s5" = getDefaultTemplate user "html"
getDefaultTemplate user "odt" = getDefaultTemplate user "opendocument"
getDefaultTemplate user writer = do
let format = takeWhile (/='+') writer -- strip off "+lhs" if present
@@ -173,7 +172,7 @@ for = try $ do
string "$for("
id' <- ident
string ")$"
- -- if newline after the "if", then a newline after "endif" will be swallowed
+ -- if newline after the "for", then a newline after "endfor" will be swallowed
multiline <- option False $ try $ skipEndline >> return True
let matches = filter (\(k,_) -> k == id') vars
let indent = replicate pos ' '
diff --git a/src/Text/Pandoc/UTF8.hs b/src/Text/Pandoc/UTF8.hs
new file mode 100644
index 000000000..96d6e6218
--- /dev/null
+++ b/src/Text/Pandoc/UTF8.hs
@@ -0,0 +1,72 @@
+{-
+Copyright (C) 2010 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.UTF8
+ Copyright : Copyright (C) 2010 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+
+UTF-8 aware string IO functions that will work with GHC 6.10 or 6.12.
+-}
+module Text.Pandoc.UTF8 ( readFile
+ , writeFile
+ , getContents
+ , putStr
+ , putStrLn
+ , hPutStr
+ , hPutStrLn
+ )
+
+where
+import qualified Data.ByteString as B
+import Data.ByteString.UTF8 (toString, fromString)
+import Prelude hiding (readFile, writeFile, getContents, putStr, putStrLn)
+import System.IO (Handle)
+import Control.Monad (liftM)
+
+bom :: B.ByteString
+bom = B.pack [0xEF, 0xBB, 0xBF]
+
+stripBOM :: B.ByteString -> B.ByteString
+stripBOM s | bom `B.isPrefixOf` s = B.drop 3 s
+stripBOM s = s
+
+readFile :: FilePath -> IO String
+readFile = liftM (toString . stripBOM) . B.readFile
+
+writeFile :: FilePath -> String -> IO ()
+writeFile f = B.writeFile f . fromString
+
+getContents :: IO String
+getContents = liftM (toString . stripBOM) B.getContents
+
+putStr :: String -> IO ()
+putStr = B.putStr . fromString
+
+putStrLn :: String -> IO ()
+putStrLn = B.putStrLn . fromString
+
+hPutStr :: Handle -> String -> IO ()
+hPutStr h = B.hPutStr h . fromString
+
+hPutStrLn :: Handle -> String -> IO ()
+hPutStrLn h s = hPutStr h (s ++ "\n")
diff --git a/src/Text/Pandoc/UUID.hs b/src/Text/Pandoc/UUID.hs
new file mode 100644
index 000000000..082644eea
--- /dev/null
+++ b/src/Text/Pandoc/UUID.hs
@@ -0,0 +1,77 @@
+{-
+Copyright (C) 2010 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.UUID
+ Copyright : Copyright (C) 2010 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+
+UUID generation using Version 4 (random method) described
+in RFC4122. See http://tools.ietf.org/html/rfc4122
+-}
+
+module Text.Pandoc.UUID ( UUID, getRandomUUID ) where
+
+import Text.Printf ( printf )
+import System.Random ( randomIO )
+import Data.Word
+import Data.Bits ( setBit, clearBit )
+import Control.Monad ( liftM )
+
+data UUID = UUID Word8 Word8 Word8 Word8 Word8 Word8 Word8 Word8
+ Word8 Word8 Word8 Word8 Word8 Word8 Word8 Word8
+
+instance Show UUID where
+ show (UUID a b c d e f g h i j k l m n o p) =
+ "urn:uuid:" ++
+ printf "%02x" a ++
+ printf "%02x" b ++
+ printf "%02x" c ++
+ printf "%02x" d ++
+ "-" ++
+ printf "%02x" e ++
+ printf "%02x" f ++
+ "-" ++
+ printf "%02x" g ++
+ printf "%02x" h ++
+ "-" ++
+ printf "%02x" i ++
+ printf "%02x" j ++
+ "-" ++
+ printf "%02x" k ++
+ printf "%02x" l ++
+ printf "%02x" m ++
+ printf "%02x" n ++
+ printf "%02x" o ++
+ printf "%02x" p
+
+getRandomUUID :: IO UUID
+getRandomUUID = do
+ let getRN :: a -> IO Word8
+ getRN _ = liftM fromIntegral (randomIO :: IO Int)
+ [a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p] <- mapM getRN ([1..16] :: [Int])
+ -- set variant
+ let i' = i `setBit` 7 `clearBit` 6
+ -- set version (0100 for random)
+ let g' = g `clearBit` 7 `setBit` 6 `clearBit` 5 `clearBit` 4
+ return $ UUID a b c d e f g' h i' j k l m n o p
+
diff --git a/src/Text/Pandoc/Writers/ConTeXt.hs b/src/Text/Pandoc/Writers/ConTeXt.hs
index 32948e292..395bc2d30 100644
--- a/src/Text/Pandoc/Writers/ConTeXt.hs
+++ b/src/Text/Pandoc/Writers/ConTeXt.hs
@@ -64,7 +64,7 @@ pandocToConTeXt options (Pandoc (Meta title authors date) blocks) = do
then return ""
else liftM render $ inlineListToConTeXt date
body <- blockListToConTeXt blocks
- let main = render body
+ let main = render $ body $$ text ""
let context = writerVariables options ++
[ ("toc", if writerTableOfContents options then "yes" else "")
, ("body", main)
@@ -153,6 +153,7 @@ blockToConTeXt (OrderedList (start, style', delim) lst) = do
let style'' = case style' of
DefaultStyle -> orderedListStyles !! level
Decimal -> "[n]"
+ Example -> "[n]"
LowerRoman -> "[r]"
UpperRoman -> "[R]"
LowerAlpha -> "[a]"
diff --git a/src/Text/Pandoc/Writers/Docbook.hs b/src/Text/Pandoc/Writers/Docbook.hs
index 3abed1610..5223259eb 100644
--- a/src/Text/Pandoc/Writers/Docbook.hs
+++ b/src/Text/Pandoc/Writers/Docbook.hs
@@ -154,6 +154,7 @@ blockToDocbook opts (OrderedList (start, numstyle, _) (first:rest)) =
let attribs = case numstyle of
DefaultStyle -> []
Decimal -> [("numeration", "arabic")]
+ Example -> [("numeration", "arabic")]
UpperAlpha -> [("numeration", "upperalpha")]
LowerAlpha -> [("numeration", "loweralpha")]
UpperRoman -> [("numeration", "upperroman")]
diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs
new file mode 100644
index 000000000..deaa2fe33
--- /dev/null
+++ b/src/Text/Pandoc/Writers/EPUB.hs
@@ -0,0 +1,283 @@
+{-
+Copyright (C) 2010 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Writers.EPUB
+ Copyright : Copyright (C) 2010 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+
+Conversion of 'Pandoc' documents to EPUB.
+-}
+module Text.Pandoc.Writers.EPUB ( writeEPUB ) where
+import Data.IORef
+import Data.Maybe ( fromMaybe, isNothing )
+import Data.List ( findIndices, isPrefixOf )
+import System.Environment ( getEnv )
+import System.FilePath ( (</>), takeBaseName, takeExtension )
+import qualified Data.ByteString.Lazy as B
+import Data.ByteString.Lazy.UTF8 ( fromString )
+import Codec.Archive.Zip
+import System.Time
+import Text.Pandoc.Shared hiding ( Element )
+import Text.Pandoc.Definition
+import Control.Monad (liftM)
+import Text.XML.Light hiding (ppTopElement)
+import Text.Pandoc.UUID
+import Text.Pandoc.Writers.HTML
+import Text.Pandoc.Writers.Markdown ( writePlain )
+import Data.Char ( toLower )
+
+-- | Produce an EPUB file from a Pandoc document.
+writeEPUB :: Maybe String -- ^ EPUB stylesheet specified at command line
+ -> WriterOptions -- ^ Writer options
+ -> Pandoc -- ^ Document to convert
+ -> IO B.ByteString
+writeEPUB mbStylesheet opts doc@(Pandoc meta _) = do
+ (TOD epochtime _) <- getClockTime
+ let mkEntry path content = toEntry path epochtime content
+ let opts' = opts{ writerEmailObfuscation = NoObfuscation
+ , writerStandalone = True
+ , writerWrapText = False }
+ let sourceDir = writerSourceDirectory opts'
+
+ -- title page
+ let vars = writerVariables opts'
+ let tpContent = fromString $ writeHtmlString
+ opts'{writerTemplate = pageTemplate
+ ,writerVariables = ("titlepage","yes"):vars}
+ (Pandoc meta [])
+ let tpEntry = mkEntry "title_page.xhtml" tpContent
+
+ -- handle pictures
+ picsRef <- newIORef []
+ Pandoc _ blocks <- liftM (processWith transformBlock) $ processWithM
+ (transformInlines (writerHTMLMathMethod opts) sourceDir picsRef) doc
+ pics <- readIORef picsRef
+ let readPicEntry (oldsrc, newsrc) = readEntry [] oldsrc >>= \e ->
+ return e{ eRelativePath = newsrc }
+ picEntries <- mapM readPicEntry pics
+
+ -- body pages
+ let isH1 (Header 1 _) = True
+ isH1 _ = False
+ let h1Indices = dropWhile (== 0) $ findIndices isH1 blocks
+ let chunks = splitByIndices h1Indices blocks
+ let titleize (Header 1 xs : ys) = Pandoc meta{docTitle = xs} ys
+ titleize xs = Pandoc meta xs
+ let chapToHtml = writeHtmlString opts'{ writerTemplate = pageTemplate
+ , writerHTMLMathMethod = PlainMath }
+ let chapters = map titleize chunks
+ let chapterToEntry :: Int -> Pandoc -> Entry
+ chapterToEntry num chap = mkEntry ("ch" ++ show num ++ ".xhtml") $
+ fromString $ chapToHtml chap
+ let chapterEntries = zipWith chapterToEntry [1..] chapters
+
+ -- contents.opf
+ lang <- catch (liftM (takeWhile (/='.')) $ getEnv "lang")
+ (\_ -> return "en-US")
+ uuid <- getRandomUUID
+ let chapterNode ent = unode "item" !
+ [("id", takeBaseName $ eRelativePath ent),
+ ("href", eRelativePath ent),
+ ("media-type", "application/xhtml+xml")] $ ()
+ let chapterRefNode ent = unode "itemref" !
+ [("idref", takeBaseName $ eRelativePath ent)] $ ()
+ let pictureNode ent = unode "item" !
+ [("id", takeBaseName $ eRelativePath ent),
+ ("href", eRelativePath ent),
+ ("media-type", fromMaybe "application/octet-stream"
+ $ imageTypeOf $ eRelativePath ent)] $ ()
+ let plainify t = removeTrailingSpace $
+ writePlain opts'{ writerStandalone = False } $
+ Pandoc meta [Plain t]
+ let plainTitle = plainify $ docTitle meta
+ let plainAuthors = map plainify $ docAuthors meta
+ let contentsData = fromString $ ppTopElement $
+ unode "package" ! [("version","2.0")
+ ,("xmlns","http://www.idpf.org/2007/opf")
+ ,("unique-identifier","BookId")] $
+ [ metadataElement (writerEPUBMetadata opts')
+ uuid lang plainTitle plainAuthors
+ , unode "manifest" $
+ [ unode "item" ! [("id","ncx"), ("href","toc.ncx")
+ ,("media-type","application/x-dtbncx+xml")] $ ()
+ , unode "item" ! [("id","style"), ("href","stylesheet.css")
+ ,("media-type","text/css")] $ ()
+ ] ++
+ map chapterNode (tpEntry : chapterEntries) ++
+ map pictureNode picEntries
+ , unode "spine" ! [("toc","ncx")] $
+ map chapterRefNode (tpEntry : chapterEntries)
+ ]
+ let contentsEntry = mkEntry "content.opf" contentsData
+
+ -- toc.ncx
+ let navPointNode ent n tit = unode "navPoint" !
+ [("id", "navPoint-" ++ show n)
+ ,("playOrder", show n)] $
+ [ unode "navLabel" $ unode "text" tit
+ , unode "content" ! [("src",
+ eRelativePath ent)] $ ()
+ ]
+ let tocData = fromString $ ppTopElement $
+ unode "ncx" ! [("version","2005-1")
+ ,("xmlns","http://www.daisy.org/z3986/2005/ncx/")] $
+ [ unode "head"
+ [ unode "meta" ! [("name","dtb:uid")
+ ,("content", show uuid)] $ ()
+ , unode "meta" ! [("name","dtb:depth")
+ ,("content", "1")] $ ()
+ , unode "meta" ! [("name","dtb:totalPageCount")
+ ,("content", "0")] $ ()
+ , unode "meta" ! [("name","dtb:maxPageNumber")
+ ,("content", "0")] $ ()
+ ]
+ , unode "docTitle" $ unode "text" $ plainTitle
+ , unode "navMap" $ zipWith3 navPointNode (tpEntry : chapterEntries)
+ [1..(length chapterEntries + 1)]
+ ("Title Page" : map (\(Pandoc m _) ->
+ plainify $ docTitle m) chapters)
+ ]
+ let tocEntry = mkEntry "toc.ncx" tocData
+
+ -- mimetype
+ let mimetypeEntry = mkEntry "mimetype" $ fromString "application/epub+zip"
+
+ -- container.xml
+ let containerData = fromString $ ppTopElement $
+ unode "container" ! [("version","1.0")
+ ,("xmlns","urn:oasis:names:tc:opendocument:xmlns:container")] $
+ unode "rootfiles" $
+ unode "rootfile" ! [("full-path","content.opf")
+ ,("media-type","application/oebps-package+xml")] $ ()
+ let containerEntry = mkEntry "META-INF/container.xml" containerData
+
+ -- stylesheet
+ stylesheet <- case mbStylesheet of
+ Just s -> return s
+ Nothing -> readDataFile (writerUserDataDir opts) "epub.css"
+ let stylesheetEntry = mkEntry "stylesheet.css" $ fromString stylesheet
+
+ -- construct archive
+ let archive = foldr addEntryToArchive emptyArchive
+ (mimetypeEntry : containerEntry : stylesheetEntry : tpEntry :
+ contentsEntry : tocEntry : (picEntries ++ chapterEntries) )
+ return $ fromArchive archive
+
+metadataElement :: String -> UUID -> String -> String -> [String] -> Element
+metadataElement metadataXML uuid lang title authors =
+ let userNodes = parseXML metadataXML
+ elt = unode "metadata" ! [("xmlns:dc","http://purl.org/dc/elements/1.1/")
+ ,("xmlns:opf","http://www.idpf.org/2007/opf")] $
+ filter isDublinCoreElement $ onlyElems userNodes
+ dublinElements = ["contributor","coverage","creator","date",
+ "description","format","identifier","language","publisher",
+ "relation","rights","source","subject","title","type"]
+ isDublinCoreElement e = qPrefix (elName e) == Just "dc" &&
+ qName (elName e) `elem` dublinElements
+ contains e n = not (null (findElements (QName n Nothing (Just "dc")) e))
+ newNodes = [ unode "dc:title" title | not (elt `contains` "title") ] ++
+ [ unode "dc:language" lang | not (elt `contains` "language") ] ++
+ [ unode "dc:identifier" ! [("id","BookId")] $ show uuid |
+ not (elt `contains` "identifier") ] ++
+ [ unode "dc:creator" ! [("opf:role","aut")] $ a | a <- authors ]
+ in elt{ elContent = elContent elt ++ map Elem newNodes }
+
+transformInlines :: HTMLMathMethod
+ -> FilePath
+ -> IORef [(FilePath, FilePath)] -- ^ (oldpath, newpath) images
+ -> [Inline]
+ -> IO [Inline]
+transformInlines _ _ _ (Image lab (src,_) : xs) | isNothing (imageTypeOf src) =
+ return $ Emph lab : xs
+transformInlines _ sourceDir picsRef (Image lab (src,tit) : xs) = do
+ pics <- readIORef picsRef
+ let oldsrc = sourceDir </> src
+ let ext = takeExtension src
+ newsrc <- case lookup oldsrc pics of
+ Just n -> return n
+ Nothing -> do
+ let new = "images/img" ++ show (length pics) ++ ext
+ modifyIORef picsRef ( (oldsrc, new): )
+ return new
+ return $ Image lab (newsrc, tit) : xs
+transformInlines (MathML _) _ _ (x@(Math _ _) : xs) = do
+ let writeHtmlInline opts z = removeTrailingSpace $
+ writeHtmlString opts $ Pandoc (Meta [] [] []) [Plain [z]]
+ mathml = writeHtmlInline defaultWriterOptions{
+ writerHTMLMathMethod = MathML Nothing } x
+ fallback = writeHtmlInline defaultWriterOptions{
+ writerHTMLMathMethod = PlainMath } x
+ inOps = "<ops:switch xmlns:ops=\"http://www.idpf.org/2007/ops\">" ++
+ "<ops:case required-namespace=\"http://www.w3.org/1998/Math/MathML\">" ++
+ mathml ++ "</ops:case><ops:default>" ++ fallback ++ "</ops:default>" ++
+ "</ops:switch>"
+ result = if "<math" `isPrefixOf` mathml then inOps else mathml
+ return $ HtmlInline result : xs
+transformInlines _ _ _ (HtmlInline _ : xs) = return $ Str "" : xs
+transformInlines _ _ _ (Link lab (_,_) : xs) = return $ lab ++ xs
+transformInlines _ _ _ xs = return xs
+
+transformBlock :: Block -> Block
+transformBlock (RawHtml _) = Null
+transformBlock x = x
+
+(!) :: Node t => (t -> Element) -> [(String, String)] -> t -> Element
+(!) f attrs n = add_attrs (map (\(k,v) -> Attr (unqual k) v) attrs) (f n)
+
+-- | Version of 'ppTopElement' that specifies UTF-8 encoding.
+ppTopElement :: Element -> String
+ppTopElement = ("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" ++) . ppElement
+
+imageTypeOf :: FilePath -> Maybe String
+imageTypeOf x = case drop 1 (map toLower (takeExtension x)) of
+ "jpg" -> Just "image/jpeg"
+ "jpeg" -> Just "image/jpeg"
+ "jfif" -> Just "image/jpeg"
+ "png" -> Just "image/png"
+ "gif" -> Just "image/gif"
+ "svg" -> Just "image/svg+xml"
+ _ -> Nothing
+
+pageTemplate :: String
+pageTemplate = unlines
+ [ "<?xml version=\"1.0\" encoding=\"UTF-8\"?>"
+ , "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.1//EN\" \"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd\">"
+ , "<html xmlns=\"http://www.w3.org/1999/xhtml\">"
+ , "<head>"
+ , "<title>$title$</title>"
+ , "<link href=\"stylesheet.css\" type=\"text/css\" rel=\"stylesheet\" />"
+ , "</head>"
+ , "<body>"
+ , "$if(titlepage)$"
+ , "<h1 class=\"title\">$title$</h1>"
+ , "$for(author)$"
+ , "<h2 class=\"author\">$author$</h2>"
+ , "$endfor$"
+ , "$else$"
+ , "<h1>$title$</h1>"
+ , "$body$"
+ , "$endif$"
+ , "</body>"
+ , "</html>"
+ ]
+
diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs
index 299471328..d2a400c5c 100644
--- a/src/Text/Pandoc/Writers/HTML.hs
+++ b/src/Text/Pandoc/Writers/HTML.hs
@@ -34,8 +34,9 @@ import Text.Pandoc.CharacterReferences ( decodeCharacterReferences )
import Text.Pandoc.Shared
import Text.Pandoc.Templates
import Text.Pandoc.Readers.TeXMath
-import Text.Pandoc.Highlighting ( highlightHtml )
+import Text.Pandoc.Highlighting ( highlightHtml, defaultHighlightingCss )
import Text.Pandoc.XML (stripTags, escapeStringForXML)
+import Network.HTTP ( urlEncode )
import Numeric ( showHex )
import Data.Char ( ord, toLower )
import Data.List ( isPrefixOf, intersperse )
@@ -104,7 +105,24 @@ pandocToHtml opts (Pandoc (Meta title' authors' date') blocks) = do
toc <- if writerTableOfContents opts
then tableOfContents opts sects
else return Nothing
- blocks' <- liftM toHtmlFromList $ mapM (elementToHtml opts) sects
+ let startSlide = RawHtml "<div class=\"slide\">\n"
+ endSlide = RawHtml "</div>\n"
+ let cutUp (HorizontalRule : Header 1 ys : xs) = cutUp (Header 1 ys : xs)
+ cutUp (HorizontalRule : xs) = [endSlide, startSlide] ++ cutUp xs
+ cutUp (Header 1 ys : xs) = [endSlide, startSlide] ++
+ (Header 1 ys : cutUp xs)
+ cutUp (x:xs) = x : cutUp xs
+ cutUp [] = []
+ let slides = case blocks of
+ (HorizontalRule : xs) -> [startSlide] ++ cutUp xs ++ [endSlide]
+ (Header 1 ys : xs) -> [startSlide, Header 1 ys] ++
+ cutUp xs ++ [endSlide]
+ _ -> [startSlide] ++ cutUp blocks ++
+ [endSlide]
+ blocks' <- liftM toHtmlFromList $
+ if writerSlideVariant opts `elem` [SlidySlides, S5Slides]
+ then mapM (blockToHtml opts) slides
+ else mapM (elementToHtml opts) sects
st <- get
let notes = reverse (stNotes st)
let thebody = blocks' +++ footnoteSection notes
@@ -125,7 +143,8 @@ pandocToHtml opts (Pandoc (Meta title' authors' date') blocks) = do
primHtml s
Nothing -> noHtml
else noHtml
- let newvars = [("highlighting","yes") | stHighlighting st] ++
+ let newvars = [("highlighting-css", defaultHighlightingCss) |
+ stHighlighting st] ++
[("math", renderHtmlFragment math) | stMath st]
return (tit, auths, date, toc, thebody, newvars)
@@ -197,10 +216,16 @@ elementToHtml opts (Sec level num id' title' elements) = do
innerContents <- mapM (elementToHtml opts) elements
modify $ \st -> st{stSecNum = num} -- update section number
header' <- blockToHtml opts (Header level title')
- return $ if writerS5 opts || (writerStrictMarkdown opts && not (writerTableOfContents opts))
- -- S5 gets confused by the extra divs around sections
- then toHtmlFromList (header' : innerContents)
- else thediv ! [prefixedId opts id'] << (header' : innerContents)
+ let slides = writerSlideVariant opts `elem` [SlidySlides, S5Slides]
+ let header'' = header' ! [prefixedId opts id' |
+ not (writerStrictMarkdown opts ||
+ writerSectionDivs opts || slides)]
+ let stuff = header'' : innerContents
+ return $ if slides -- S5 gets confused by the extra divs around sections
+ then toHtmlFromList stuff
+ else if writerSectionDivs opts
+ then thediv ! [prefixedId opts id'] << stuff
+ else toHtmlFromList stuff
-- | Convert list of Note blocks to a footnote <div>.
-- Assumes notes are sorted.
@@ -285,15 +310,18 @@ blockToHtml opts (CodeBlock (id',classes,keyvals) rawCode) = do
attrs = [theclass (unwords classes') | not (null classes')] ++
[prefixedId opts id' | not (null id')] ++
map (\(x,y) -> strAttr x y) keyvals
+ addBird = if "literate" `elem` classes'
+ then unlines . map ("> " ++) . lines
+ else unlines . lines
in return $ pre ! attrs $ thecode <<
(replicate (length leadingBreaks) br +++
- [stringToHtml $ rawCode' ++ "\n"])
+ [stringToHtml $ addBird rawCode'])
Right h -> modify (\st -> st{ stHighlighting = True }) >> return h
blockToHtml opts (BlockQuote blocks) =
-- in S5, treat list in blockquote specially
-- if default is incremental, make it nonincremental;
-- otherwise incremental
- if writerS5 opts
+ if writerSlideVariant opts /= NoSlides
then let inc = not (writerIncremental opts) in
case blocks of
[BulletList lst] -> blockToHtml (opts {writerIncremental = inc})
@@ -444,16 +472,20 @@ inlineToHtml opts inline =
-- non-math elements on the page from being treated as math by
-- the javascript
return $ thespan ! [theclass "LaTeX"] $
- if t == InlineMath
- then primHtml ("$" ++ str ++ "$")
- else primHtml ("$$" ++ str ++ "$$")
- JsMath _ ->
- return $ if t == InlineMath
- then thespan ! [theclass "math"] $ primHtml str
- else thediv ! [theclass "math"] $ primHtml str
- MimeTeX url ->
- return $ image ! [src (url ++ "?" ++ str),
- alt str, title str]
+ case t of
+ InlineMath -> primHtml ("$" ++ str ++ "$")
+ DisplayMath -> primHtml ("$$" ++ str ++ "$$")
+ JsMath _ -> do
+ let m = primHtml str
+ return $ case t of
+ InlineMath -> thespan ! [theclass "math"] $ m
+ DisplayMath -> thediv ! [theclass "math"] $ m
+ WebTeX url -> do
+ let m = image ! [src (url ++ urlEncode str),
+ alt str, title str]
+ return $ case t of
+ InlineMath -> m
+ DisplayMath -> br +++ m +++ br
GladTeX ->
return $ primHtml $ "<EQ>" ++ str ++ "</EQ>"
MathML _ -> do
@@ -466,12 +498,14 @@ inlineToHtml opts inline =
Right r -> return $ primHtml $
ppcElement conf r
Left _ -> inlineListToHtml opts
- (readTeXMath str) >>=
- return . (thespan !
- [theclass "math"])
- PlainMath ->
- inlineListToHtml opts (readTeXMath str) >>=
- return . (thespan ! [theclass "math"]) )
+ (readTeXMath str) >>= return .
+ (thespan ! [theclass "math"])
+ PlainMath -> do
+ x <- inlineListToHtml opts (readTeXMath str)
+ let m = thespan ! [theclass "math"] $ x
+ return $ case t of
+ InlineMath -> m
+ DisplayMath -> br +++ m +++ br )
(TeX str) -> case writerHTMLMathMethod opts of
LaTeXMathML _ -> do modify (\st -> st {stMath = True})
return $ primHtml str
diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs
index 8aa028bd7..720c00ac8 100644
--- a/src/Text/Pandoc/Writers/LaTeX.hs
+++ b/src/Text/Pandoc/Writers/LaTeX.hs
@@ -338,7 +338,7 @@ inlineToLaTeX (Link txt (src, _)) =
char '}'
inlineToLaTeX (Image _ (source, _)) = do
modify $ \s -> s{ stGraphics = True }
- return $ text $ "\\includegraphics{" ++ source ++ "}"
+ return $ text $ "\\includegraphics{" ++ source ++ "}"
inlineToLaTeX (Note contents) = do
st <- get
put (st {stInNote = True})
diff --git a/src/Text/Pandoc/Writers/Man.hs b/src/Text/Pandoc/Writers/Man.hs
index 77dead196..a46a18893 100644
--- a/src/Text/Pandoc/Writers/Man.hs
+++ b/src/Text/Pandoc/Writers/Man.hs
@@ -32,6 +32,7 @@ module Text.Pandoc.Writers.Man ( writeMan) where
import Text.Pandoc.Definition
import Text.Pandoc.Templates
import Text.Pandoc.Shared
+import Text.Pandoc.Readers.TeXMath
import Text.Printf ( printf )
import Data.List ( isPrefixOf, intersperse, intercalate )
import Text.PrettyPrint.HughesPJ hiding ( Str )
@@ -62,7 +63,7 @@ pandocToMan opts (Pandoc (Meta title authors date) blocks) = do
body <- blockListToMan opts blocks
notes <- liftM stNotes get
notes' <- notesToMan opts (reverse notes)
- let main = render $ body $$ notes'
+ let main = render $ body $$ notes' $$ text ""
hasTables <- liftM stHasTables get
let context = writerVariables opts ++
[ ("body", main)
@@ -150,8 +151,12 @@ blockToMan opts (Header level inlines) = do
_ -> ".SS "
return $ text heading <> contents
blockToMan _ (CodeBlock _ str) = return $
- text ".PP" $$ text "\\f[CR]" $$
- text ((unlines . map (" " ++) . lines) (escapeCode str)) <> text "\\f[]"
+ text ".IP" $$
+ text ".nf" $$
+ text "\\f[C]" $$
+ text (escapeCode str) $$
+ text "\\f[]" $$
+ text ".fi"
blockToMan opts (BlockQuote blocks) = do
contents <- blockListToMan opts blocks
return $ text ".RS" $$ contents $$ text ".RE"
@@ -299,11 +304,11 @@ inlineToMan _ EnDash = return $ text "\\[en]"
inlineToMan _ Apostrophe = return $ char '\''
inlineToMan _ Ellipses = return $ text "\\&..."
inlineToMan _ (Code str) =
- return $ text $ "\\f[B]" ++ escapeCode str ++ "\\f[]"
+ return $ text $ "\\f[C]" ++ escapeCode str ++ "\\f[]"
inlineToMan _ (Str str) = return $ text $ escapeString str
-inlineToMan opts (Math InlineMath str) = inlineToMan opts (Code str)
+inlineToMan opts (Math InlineMath str) = inlineListToMan opts $ readTeXMath str
inlineToMan opts (Math DisplayMath str) = do
- contents <- inlineToMan opts (Code str)
+ contents <- inlineListToMan opts $ readTeXMath str
return $ text ".RS" $$ contents $$ text ".RE"
inlineToMan _ (TeX _) = return empty
inlineToMan _ (HtmlInline _) = return empty
diff --git a/src/Text/Pandoc/Writers/Markdown.hs b/src/Text/Pandoc/Writers/Markdown.hs
index fe8e0c2de..1b612006b 100644
--- a/src/Text/Pandoc/Writers/Markdown.hs
+++ b/src/Text/Pandoc/Writers/Markdown.hs
@@ -32,15 +32,16 @@ Markdown: <http://daringfireball.net/projects/markdown/>
module Text.Pandoc.Writers.Markdown (writeMarkdown, writePlain) where
import Text.Pandoc.Definition
import Text.Pandoc.Templates (renderTemplate)
-import Text.Pandoc.Shared
+import Text.Pandoc.Shared
+import Text.Pandoc.Parsing
import Text.Pandoc.Blocks
-import Text.ParserCombinators.Parsec ( parse, GenParser )
+import Text.ParserCombinators.Parsec ( runParser, GenParser )
import Data.List ( group, isPrefixOf, find, intersperse, transpose )
import Text.PrettyPrint.HughesPJ hiding ( Str )
import Control.Monad.State
type Notes = [[Block]]
-type Refs = KeyTable
+type Refs = [([Inline], Target)]
data WriterState = WriterState { stNotes :: Notes
, stRefs :: Refs
, stPlain :: Bool }
@@ -94,8 +95,8 @@ pandocToMarkdown opts (Pandoc (Meta title authors date) blocks) = do
st <- get
notes' <- notesToMarkdown opts (reverse $ stNotes st)
st' <- get -- note that the notes may contain refs
- refs' <- keyTableToMarkdown opts (reverse $ stRefs st')
- let main = render $ body $+$ text "" $+$ notes' $+$ text "" $+$ refs'
+ refs' <- refsToMarkdown opts (reverse $ stRefs st')
+ let main = render $ foldl ($+$) empty $ [body, notes', refs']
let context = writerVariables opts ++
[ ("toc", render toc)
, ("body", main)
@@ -109,8 +110,8 @@ pandocToMarkdown opts (Pandoc (Meta title authors date) blocks) = do
else return main
-- | Return markdown representation of reference key table.
-keyTableToMarkdown :: WriterOptions -> KeyTable -> State WriterState Doc
-keyTableToMarkdown opts refs = mapM (keyToMarkdown opts) refs >>= return . vcat
+refsToMarkdown :: WriterOptions -> Refs -> State WriterState Doc
+refsToMarkdown opts refs = mapM (keyToMarkdown opts) refs >>= return . vcat
-- | Return markdown representation of a reference key.
keyToMarkdown :: WriterOptions
@@ -158,7 +159,7 @@ elementToListItem (Sec _ _ _ headerText subsecs) = [Plain headerText] ++
else [BulletList $ map elementToListItem subsecs]
-- | Ordered list start parser for use in Para below.
-olMarker :: GenParser Char st Char
+olMarker :: GenParser Char ParserState Char
olMarker = do (start, style', delim) <- anyOrderedListMarker
if delim == Period &&
(style' == UpperAlpha || (style' == UpperRoman &&
@@ -169,7 +170,7 @@ olMarker = do (start, style', delim) <- anyOrderedListMarker
-- | True if string begins with an ordered list marker
beginsWithOrderedListMarker :: String -> Bool
beginsWithOrderedListMarker str =
- case parse olMarker "para start" str of
+ case runParser olMarker defaultParserState "para start" str of
Left _ -> False
Right _ -> True
@@ -238,7 +239,7 @@ blockToMarkdown opts (Table caption aligns widths headers rows) = do
caption' <- inlineListToMarkdown opts caption
let caption'' = if null caption
then empty
- else text "" $+$ (text "Table: " <> caption')
+ else text "" $+$ (text ": " <> caption')
headers' <- mapM (blockListToMarkdown opts) headers
let alignHeader alignment = case alignment of
AlignLeft -> leftAlignBlock
@@ -372,14 +373,14 @@ inlineToMarkdown opts (Subscript lst) = do
inlineToMarkdown opts (SmallCaps lst) = inlineListToMarkdown opts lst
inlineToMarkdown opts (Quoted SingleQuote lst) = do
contents <- inlineListToMarkdown opts lst
- return $ char '\'' <> contents <> char '\''
+ return $ char '‘' <> contents <> char '’'
inlineToMarkdown opts (Quoted DoubleQuote lst) = do
contents <- inlineListToMarkdown opts lst
- return $ char '"' <> contents <> char '"'
-inlineToMarkdown _ EmDash = return $ text "--"
-inlineToMarkdown _ EnDash = return $ char '-'
-inlineToMarkdown _ Apostrophe = return $ char '\''
-inlineToMarkdown _ Ellipses = return $ text "..."
+ return $ char '“' <> contents <> char '”'
+inlineToMarkdown _ EmDash = return $ char '\8212'
+inlineToMarkdown _ EnDash = return $ char '\8211'
+inlineToMarkdown _ Apostrophe = return $ char '\8217'
+inlineToMarkdown _ Ellipses = return $ char '\8230'
inlineToMarkdown _ (Code str) =
let tickGroups = filter (\s -> '`' `elem` s) $ group str
longest = if null tickGroups
diff --git a/src/Text/Pandoc/Writers/Native.hs b/src/Text/Pandoc/Writers/Native.hs
new file mode 100644
index 000000000..3b5ea7481
--- /dev/null
+++ b/src/Text/Pandoc/Writers/Native.hs
@@ -0,0 +1,86 @@
+{-
+Copyright (C) 2006-2010 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Writers.Native
+ Copyright : Copyright (C) 2006-2010 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+
+Utility functions and definitions used by the various Pandoc modules.
+-}
+module Text.Pandoc.Writers.Native ( writeNative )
+where
+import Text.Pandoc.Shared ( WriterOptions )
+import Data.List ( intercalate )
+import Text.Pandoc.Definition
+
+-- | Indent string as a block.
+indentBy :: Int -- ^ Number of spaces to indent the block
+ -> Int -- ^ Number of spaces (rel to block) to indent first line
+ -> String -- ^ Contents of block to indent
+ -> String
+indentBy _ _ [] = ""
+indentBy num first str =
+ let (firstLine:restLines) = lines str
+ firstLineIndent = num + first
+ in (replicate firstLineIndent ' ') ++ firstLine ++ "\n" ++
+ (intercalate "\n" $ map ((replicate num ' ') ++ ) restLines)
+
+-- | Prettyprint list of Pandoc blocks elements.
+prettyBlockList :: Int -- ^ Number of spaces to indent list of blocks
+ -> [Block] -- ^ List of blocks
+ -> String
+prettyBlockList indent [] = indentBy indent 0 "[]"
+prettyBlockList indent blocks = indentBy indent (-2) $ "[ " ++
+ (intercalate "\n, " (map prettyBlock blocks)) ++ " ]"
+
+-- | Prettyprint Pandoc block element.
+prettyBlock :: Block -> String
+prettyBlock (BlockQuote blocks) = "BlockQuote\n " ++
+ (prettyBlockList 2 blocks)
+prettyBlock (OrderedList attribs blockLists) =
+ "OrderedList " ++ show attribs ++ "\n" ++ indentBy 2 0 ("[ " ++
+ (intercalate ", " $ map (\blocks -> prettyBlockList 2 blocks)
+ blockLists)) ++ " ]"
+prettyBlock (BulletList blockLists) = "BulletList\n" ++
+ indentBy 2 0 ("[ " ++ (intercalate ", "
+ (map (\blocks -> prettyBlockList 2 blocks) blockLists))) ++ " ]"
+prettyBlock (DefinitionList items) = "DefinitionList\n" ++
+ indentBy 2 0 ("[ " ++ (intercalate "\n, "
+ (map (\(term, defs) -> "(" ++ show term ++ ",\n" ++
+ indentBy 3 0 ("[ " ++ (intercalate ", "
+ (map (\blocks -> prettyBlockList 2 blocks) defs)) ++ "]") ++
+ ")") items))) ++ " ]"
+prettyBlock (Table caption aligns widths header rows) =
+ "Table " ++ show caption ++ " " ++ show aligns ++ " " ++
+ show widths ++ "\n" ++ prettyRow header ++ " [\n" ++
+ (intercalate ",\n" (map prettyRow rows)) ++ " ]"
+ where prettyRow cols = indentBy 2 0 ("[ " ++ (intercalate ", "
+ (map (\blocks -> prettyBlockList 2 blocks)
+ cols))) ++ " ]"
+prettyBlock block = show block
+
+-- | Prettyprint Pandoc document.
+writeNative :: WriterOptions -> Pandoc -> String
+writeNative _ (Pandoc meta blocks) = "Pandoc " ++ "(" ++ show meta ++
+ ")\n" ++ (prettyBlockList 0 blocks) ++ "\n"
+
diff --git a/src/Text/Pandoc/Writers/ODT.hs b/src/Text/Pandoc/Writers/ODT.hs
new file mode 100644
index 000000000..5aa0fd310
--- /dev/null
+++ b/src/Text/Pandoc/Writers/ODT.hs
@@ -0,0 +1,83 @@
+{-
+Copyright (C) 2008-2010 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Writers.ODT
+ Copyright : Copyright (C) 2008-2010 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+
+Conversion of 'Pandoc' documents to ODT.
+-}
+module Text.Pandoc.Writers.ODT ( writeODT ) where
+import Data.IORef
+import System.FilePath ( (</>), takeExtension )
+import qualified Data.ByteString.Lazy as B
+import Data.ByteString.Lazy.UTF8 ( fromString )
+import Codec.Archive.Zip
+import System.Time
+import Paths_pandoc ( getDataFileName )
+import Text.Pandoc.Shared ( WriterOptions(..) )
+import Text.Pandoc.Definition
+import Text.Pandoc.Writers.OpenDocument ( writeOpenDocument )
+import System.Directory
+import Control.Monad (liftM)
+
+-- | Produce an ODT file from a Pandoc document.
+writeODT :: Maybe FilePath -- ^ Path specified by --reference-odt
+ -> WriterOptions -- ^ Writer options
+ -> Pandoc -- ^ Document to convert
+ -> IO B.ByteString
+writeODT mbRefOdt opts doc = do
+ let datadir = writerUserDataDir opts
+ refArchive <- liftM toArchive $
+ case mbRefOdt of
+ Just f -> B.readFile f
+ Nothing -> do
+ let defaultODT = getDataFileName "reference.odt" >>= B.readFile
+ case datadir of
+ Nothing -> defaultODT
+ Just d -> do
+ exists <- doesFileExist (d </> "reference.odt")
+ if exists
+ then B.readFile (d </> "reference.odt")
+ else defaultODT
+ -- handle pictures
+ picEntriesRef <- newIORef ([] :: [Entry])
+ let sourceDir = writerSourceDirectory opts
+ doc' <- processWithM (transformPic sourceDir picEntriesRef) doc
+ let newContents = writeOpenDocument opts doc'
+ (TOD epochtime _) <- getClockTime
+ let contentEntry = toEntry "content.xml" epochtime $ fromString newContents
+ picEntries <- readIORef picEntriesRef
+ let archive = foldr addEntryToArchive refArchive (contentEntry : picEntries)
+ return $ fromArchive archive
+
+transformPic :: FilePath -> IORef [Entry] -> Inline -> IO Inline
+transformPic sourceDir entriesRef (Image lab (src,tit)) = do
+ entries <- readIORef entriesRef
+ let newsrc = "Pictures/" ++ show (length entries) ++ takeExtension src
+ catch (readEntry [] (sourceDir </> src) >>= \entry ->
+ modifyIORef entriesRef (entry{ eRelativePath = newsrc } :) >>
+ return (Image lab (newsrc, tit)))
+ (\_ -> return (Emph lab))
+transformPic _ _ x = return x
+
diff --git a/src/Text/Pandoc/Writers/RST.hs b/src/Text/Pandoc/Writers/RST.hs
index f4dfb2aa6..e79f97b33 100644
--- a/src/Text/Pandoc/Writers/RST.hs
+++ b/src/Text/Pandoc/Writers/RST.hs
@@ -39,10 +39,12 @@ import Text.PrettyPrint.HughesPJ hiding ( Str )
import Control.Monad.State
import Control.Applicative ( (<$>) )
+type Refs = [([Inline], Target)]
+
data WriterState =
WriterState { stNotes :: [[Block]]
- , stLinks :: KeyTable
- , stImages :: KeyTable
+ , stLinks :: Refs
+ , stImages :: Refs
, stHasMath :: Bool
, stOptions :: WriterOptions
}
@@ -65,10 +67,10 @@ pandocToRST (Pandoc (Meta tit auth dat) blocks) = do
body <- blockListToRST blocks
notes <- liftM (reverse . stNotes) get >>= notesToRST
-- note that the notes may contain refs, so we do them first
- refs <- liftM (reverse . stLinks) get >>= keyTableToRST
- pics <- liftM (reverse . stImages) get >>= pictTableToRST
+ refs <- liftM (reverse . stLinks) get >>= refsToRST
+ pics <- liftM (reverse . stImages) get >>= pictRefsToRST
hasMath <- liftM stHasMath get
- let main = render $ body $+$ notes $+$ text "" $+$ refs $+$ pics
+ let main = render $ foldl ($+$) empty $ [body, notes, refs, pics]
let context = writerVariables opts ++
[ ("body", main)
, ("title", render title)
@@ -80,8 +82,8 @@ pandocToRST (Pandoc (Meta tit auth dat) blocks) = do
else return main
-- | Return RST representation of reference key table.
-keyTableToRST :: KeyTable -> State WriterState Doc
-keyTableToRST refs = mapM keyToRST refs >>= return . vcat
+refsToRST :: Refs -> State WriterState Doc
+refsToRST refs = mapM keyToRST refs >>= return . vcat
-- | Return RST representation of a reference key.
keyToRST :: ([Inline], (String, String))
@@ -107,8 +109,8 @@ noteToRST num note = do
return $ marker $$ nest 3 contents
-- | Return RST representation of picture reference table.
-pictTableToRST :: KeyTable -> State WriterState Doc
-pictTableToRST refs = mapM pictToRST refs >>= return . vcat
+pictRefsToRST :: Refs -> State WriterState Doc
+pictRefsToRST refs = mapM pictToRST refs >>= return . vcat
-- | Return RST representation of a picture substitution reference.
pictToRST :: ([Inline], (String, String))
@@ -280,16 +282,16 @@ inlineToRST (Subscript lst) = do
inlineToRST (SmallCaps lst) = inlineListToRST lst
inlineToRST (Quoted SingleQuote lst) = do
contents <- inlineListToRST lst
- return $ char '\'' <> contents <> char '\''
+ return $ char '‘' <> contents <> char '’'
inlineToRST (Quoted DoubleQuote lst) = do
contents <- inlineListToRST lst
- return $ char '"' <> contents <> char '"'
+ return $ char '“' <> contents <> char '”'
inlineToRST (Cite _ lst) =
inlineListToRST lst
-inlineToRST EmDash = return $ text "--"
-inlineToRST EnDash = return $ char '-'
-inlineToRST Apostrophe = return $ char '\''
-inlineToRST Ellipses = return $ text "..."
+inlineToRST EmDash = return $ char '\8212'
+inlineToRST EnDash = return $ char '\8211'
+inlineToRST Apostrophe = return $ char '\8217'
+inlineToRST Ellipses = return $ char '\8230'
inlineToRST (Code str) = return $ text $ "``" ++ str ++ "``"
inlineToRST (Str str) = return $ text $ escapeString str
inlineToRST (Math t str) = do
diff --git a/src/Text/Pandoc/Writers/S5.hs b/src/Text/Pandoc/Writers/S5.hs
deleted file mode 100644
index 1a2639a50..000000000
--- a/src/Text/Pandoc/Writers/S5.hs
+++ /dev/null
@@ -1,136 +0,0 @@
-{-
-Copyright (C) 2006-2010 John MacFarlane <jgm@berkeley.edu>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
--}
-
-{- |
- Module : Text.Pandoc.Writers.S5
- Copyright : Copyright (C) 2006-2010 John MacFarlane
- License : GNU GPL, version 2 or above
-
- Maintainer : John MacFarlane <jgm@berkeley.edu>
- Stability : alpha
- Portability : portable
-
-Definitions for creation of S5 powerpoint-like HTML.
-(See <http://meyerweb.com/eric/tools/s5/>.)
--}
-module Text.Pandoc.Writers.S5 (
- -- * Header includes
- s5HeaderIncludes,
- s5Links,
- -- * Functions
- writeS5,
- writeS5String,
- insertS5Structure
- ) where
-import Text.Pandoc.Shared ( WriterOptions, readDataFile )
-import Text.Pandoc.Writers.HTML ( writeHtml, writeHtmlString )
-import Text.Pandoc.Definition
-import Text.XHtml.Strict
-import System.FilePath ( (</>) )
-import Data.List ( intercalate )
-
-s5HeaderIncludes :: Maybe FilePath -> IO String
-s5HeaderIncludes datadir = do
- c <- s5CSS datadir
- j <- s5Javascript datadir
- return $ s5Meta ++ c ++ j
-
-s5Meta :: String
-s5Meta = "<!-- configuration parameters -->\n<meta name=\"defaultView\" content=\"slideshow\" />\n<meta name=\"controlVis\" content=\"hidden\" />\n"
-
-s5Javascript :: Maybe FilePath -> IO String
-s5Javascript datadir = do
- jsCom <- readDataFile datadir $ "s5" </> "default" </> "slides.js.comment"
- jsPacked <- readDataFile datadir $ "s5" </> "default" </> "slides.js.packed"
- return $ "<script type=\"text/javascript\">\n" ++ jsCom ++ jsPacked ++
- "</script>\n"
-
-s5CSS :: Maybe FilePath -> IO String
-s5CSS datadir = do
- s5CoreCSS <- readDataFile datadir $ "s5" </> "default" </> "s5-core.css"
- s5FramingCSS <- readDataFile datadir $ "s5" </> "default" </> "framing.css"
- s5PrettyCSS <- readDataFile datadir $ "s5" </> "default" </> "pretty.css"
- s5OperaCSS <- readDataFile datadir $ "s5" </> "default" </> "opera.css"
- s5OutlineCSS <- readDataFile datadir $ "s5" </> "default" </> "outline.css"
- s5PrintCSS <- readDataFile datadir $ "s5" </> "default" </> "print.css"
- return $ "<style type=\"text/css\" media=\"projection\" id=\"slideProj\">\n" ++ s5CoreCSS ++ "\n" ++ s5FramingCSS ++ "\n" ++ s5PrettyCSS ++ "\n</style>\n<style type=\"text/css\" media=\"projection\" id=\"operaFix\">\n" ++ s5OperaCSS ++ "\n</style>\n<style type=\"text/css\" media=\"screen\" id=\"outlineStyle\">\n" ++ s5OutlineCSS ++ "\n</style>\n<style type=\"text/css\" media=\"print\" id=\"slidePrint\">\n" ++ s5PrintCSS ++ "\n</style>\n"
-
-s5Links :: String
-s5Links = "<!-- style sheet links -->\n<link rel=\"stylesheet\" href=\"ui/default/slides.css\" type=\"text/css\" media=\"projection\" id=\"slideProj\" />\n<link rel=\"stylesheet\" href=\"ui/default/outline.css\" type=\"text/css\" media=\"screen\" id=\"outlineStyle\" />\n<link rel=\"stylesheet\" href=\"ui/default/print.css\" type=\"text/css\" media=\"print\" id=\"slidePrint\" />\n<link rel=\"stylesheet\" href=\"ui/default/opera.css\" type=\"text/css\" media=\"projection\" id=\"operaFix\" />\n<!-- S5 JS -->\n<script src=\"ui/default/slides.js\" type=\"text/javascript\"></script>\n"
-
--- | Converts Pandoc document to an S5 HTML presentation (Html structure).
-writeS5 :: WriterOptions -> Pandoc -> Html
-writeS5 options = (writeHtml options) . insertS5Structure
-
--- | Converts Pandoc document to an S5 HTML presentation (string).
-writeS5String :: WriterOptions -> Pandoc -> String
-writeS5String options = (writeHtmlString options) . insertS5Structure
-
--- | Inserts HTML needed for an S5 presentation (e.g. around slides).
-layoutDiv :: [Inline] -- ^ Title of document (for header or footer)
- -> [Inline] -- ^ Date of document (for header or footer)
- -> [Block] -- ^ List of block elements returned
-layoutDiv title' date = [(RawHtml "<div class=\"layout\">\n<div id=\"controls\"></div>\n<div id=\"currentSlide\"></div>\n<div id=\"header\"></div>\n<div id=\"footer\">\n"), (Header 1 date), (Header 2 title'), (RawHtml "</div>\n</div>\n")]
-
-presentationStart :: Block
-presentationStart = RawHtml "<div class=\"presentation\">\n\n"
-
-presentationEnd :: Block
-presentationEnd = RawHtml "</div>\n"
-
-slideStart :: Block
-slideStart = RawHtml "<div class=\"slide\">\n"
-
-slideEnd :: Block
-slideEnd = RawHtml "</div>\n"
-
--- | Returns 'True' if block is a Header 1.
-isH1 :: Block -> Bool
-isH1 (Header 1 _) = True
-isH1 _ = False
-
--- | Insert HTML around sections to make individual slides.
-insertSlides :: Bool -> [Block] -> [Block]
-insertSlides beginning blocks =
- let (beforeHead, rest) = break isH1 blocks in
- if (null rest) then
- if beginning then
- beforeHead
- else
- beforeHead ++ [slideEnd]
- else
- if beginning then
- beforeHead ++
- slideStart:(head rest):(insertSlides False (tail rest))
- else
- beforeHead ++
- slideEnd:slideStart:(head rest):(insertSlides False (tail rest))
-
--- | Insert blocks into 'Pandoc' for slide structure.
-insertS5Structure :: Pandoc -> Pandoc
-insertS5Structure (Pandoc meta' []) = Pandoc meta' []
-insertS5Structure (Pandoc (Meta title' authors date) blocks) =
- let slides = insertSlides True blocks
- firstSlide = if not (null title')
- then [slideStart, (Header 1 title'),
- (Header 3 (intercalate [LineBreak] authors)),
- (Header 4 date), slideEnd]
- else []
- newBlocks = (layoutDiv title' date) ++ presentationStart:firstSlide ++
- slides ++ [presentationEnd]
- in Pandoc (Meta title' authors date) newBlocks
diff --git a/src/Text/Pandoc/Writers/Texinfo.hs b/src/Text/Pandoc/Writers/Texinfo.hs
index 503222754..65e053827 100644
--- a/src/Text/Pandoc/Writers/Texinfo.hs
+++ b/src/Text/Pandoc/Writers/Texinfo.hs
@@ -144,6 +144,7 @@ blockToTexinfo (OrderedList (start, numstyle, _) lst) = do
exemplar = case numstyle of
DefaultStyle -> decimal
Decimal -> decimal
+ Example -> decimal
UpperRoman -> decimal -- Roman numerals not supported
LowerRoman -> decimal
UpperAlpha -> upperAlpha
diff --git a/src/markdown2pdf.hs b/src/markdown2pdf.hs
index d713ae263..cc6a034c0 100644
--- a/src/markdown2pdf.hs
+++ b/src/markdown2pdf.hs
@@ -9,14 +9,7 @@ import Control.Exception (tryJust, bracket)
import System.IO (stderr)
import System.IO.Error (isDoesNotExistError)
import System.Environment ( getArgs, getProgName )
--- Note: ghc >= 6.12 (base >=4.2) supports unicode through iconv
--- So we use System.IO.UTF8 only if we have an earlier version
-#if MIN_VERSION_base(4,2,0)
-import System.IO (hPutStrLn)
-#else
-import Prelude hiding ( putStr, putStrLn, writeFile, readFile, getContents )
-import System.IO.UTF8
-#endif
+import qualified Text.Pandoc.UTF8 as UTF8
import System.Exit (ExitCode (..), exitWith)
import System.FilePath
import System.Directory
@@ -57,7 +50,7 @@ runLatexRaw latexProgram file = do
takeDirectory file, dropExtension file] >> return ()
let pdfFile = replaceExtension file "pdf"
let logFile = replaceExtension file "log"
- txt <- tryJust (guard . isDoesNotExistError) (readFile logFile)
+ txt <- tryJust (guard . isDoesNotExistError) (UTF8.readFile logFile)
let checks = checkLatex $ either (const "") id txt
case checks of
-- err , bib , ref , msg
@@ -122,13 +115,13 @@ runBibtex file = do
exit :: String -> IO a
exit x = do
progName <- getProgName
- hPutStrLn stderr $ progName ++ ": " ++ x
+ UTF8.hPutStrLn stderr $ progName ++ ": " ++ x
exitWith $ ExitFailure 1
saveStdin :: FilePath -> IO (Either String FilePath)
saveStdin file = do
- text <- getContents
- writeFile file text
+ text <- UTF8.getContents
+ UTF8.writeFile file text
fileExist <- doesFileExist file
case fileExist of
False -> return $ Left $! "Could not create " ++ file
@@ -137,7 +130,7 @@ saveStdin file = do
saveOutput :: FilePath -> FilePath -> IO ()
saveOutput input output = do
copyFile input output
- hPutStrLn stderr $! "Created " ++ output
+ UTF8.hPutStrLn stderr $! "Created " ++ output
main :: IO ()
main = bracket
@@ -161,7 +154,8 @@ main = bracket
"--number-sections","--include-in-header",
"--include-before-body","--include-after-body",
"--custom-header","--output",
- "--template", "--variable"]
+ "--template", "--variable",
+ "--csl", "--biblio", "--biblio-format"]
let isOpt ('-':_) = True
isOpt _ = False
let opts = filter isOpt args
@@ -170,8 +164,8 @@ main = bracket
any (\o -> (o ++ "=") `isPrefixOf` x) goodoptslong
unless (all isGoodopt opts) $ do
(code, out, _err) <- readProcessWithExitCode "pandoc" ["--help"] ""
- putStrLn "markdown2pdf [OPTIONS] [FILES]\nOptions:"
- putStr $ unlines $
+ UTF8.putStrLn "markdown2pdf [OPTIONS] [FILES]\nOptions:"
+ UTF8.putStr $ unlines $
filter (\l -> any (`isInfixOf` l) goodoptslong) $ lines out
exitWith code
diff --git a/src/pandoc.hs b/src/pandoc.hs
index 0560efc0a..ef38c0332 100644
--- a/src/pandoc.hs
+++ b/src/pandoc.hs
@@ -30,9 +30,9 @@ writers.
-}
module Main where
import Text.Pandoc
-import Text.Pandoc.ODT
-import Text.Pandoc.Writers.S5 (s5HeaderIncludes)
-import Text.Pandoc.Shared ( tabFilter, ObfuscationMethod (..), readDataFile )
+import Text.Pandoc.S5 (s5HeaderIncludes)
+import Text.Pandoc.Shared ( tabFilter, ObfuscationMethod (..), readDataFile,
+ headerShift )
#ifdef _HIGHLIGHTING
import Text.Pandoc.Highlighting ( languages )
#endif
@@ -40,19 +40,11 @@ import System.Environment ( getArgs, getProgName, getEnvironment )
import System.Exit ( exitWith, ExitCode (..) )
import System.FilePath
import System.Console.GetOpt
-import Data.Maybe ( fromMaybe )
import Data.Char ( toLower, isDigit )
import Data.List ( intercalate, isSuffixOf )
import System.Directory ( getAppUserDataDirectory )
import System.IO ( stdout, stderr )
--- Note: ghc >= 6.12 (base >=4.2) supports unicode through iconv
--- So we use System.IO.UTF8 only if we have an earlier version
-#if MIN_VERSION_base(4,2,0)
-import System.IO ( hPutStr, hPutStrLn )
-#else
-import Prelude hiding ( putStr, putStrLn, writeFile, readFile, getContents )
-import System.IO.UTF8
-#endif
+import qualified Text.Pandoc.UTF8 as UTF8
#ifdef _CITEPROC
import Text.CSL
import Text.Pandoc.Biblio
@@ -60,7 +52,9 @@ import Text.Pandoc.Biblio
import Control.Monad (when, unless, liftM)
import Network.HTTP (simpleHTTP, mkRequest, getResponseBody, RequestMethod(..))
import Network.URI (parseURI, isURI)
-import Data.ByteString.Lazy.UTF8 (toString)
+import qualified Data.ByteString.Lazy as B
+import Data.ByteString.Lazy.UTF8 (toString, fromString)
+import Codec.Binary.UTF8.String (decodeString)
copyrightMessage :: String
copyrightMessage = "\nCopyright (C) 2006-2010 John MacFarlane\n" ++
@@ -108,13 +102,15 @@ readPandoc _ = read
-- | Association list of formats and writers.
writers :: [ ( String, WriterOptions -> Pandoc -> String ) ]
-writers = [("native" , writeDoc)
+writers = [("native" , writeNative)
,("html" , writeHtmlString)
,("html+lhs" , writeHtmlString)
- ,("s5" , writeS5String)
+ ,("s5" , writeHtmlString)
+ ,("slidy" , writeHtmlString)
,("docbook" , writeDocbook)
,("opendocument" , writeOpenDocument)
- ,("odt" , writeOpenDocument)
+ ,("odt" , \_ _ -> "")
+ ,("epub" , \_ _ -> "")
,("latex" , writeLaTeX)
,("latex+lhs" , writeLaTeX)
,("context" , writeConTeXt)
@@ -130,17 +126,7 @@ writers = [("native" , writeDoc)
]
isNonTextOutput :: String -> Bool
-isNonTextOutput = (`elem` ["odt"])
-
--- | Writer for Pandoc native format.
-writeDoc :: WriterOptions -> Pandoc -> String
-writeDoc _ = prettyPandoc
-
-headerShift :: Int -> Pandoc -> Pandoc
-headerShift n = processWith shift
- where shift :: Block -> Block
- shift (Header level inner) = Header (level + n) inner
- shift x = x
+isNonTextOutput = (`elem` ["odt","epub"])
-- | Data structure for command line options.
data Opt = Opt
@@ -154,15 +140,17 @@ data Opt = Opt
, optTransforms :: [Pandoc -> Pandoc] -- ^ Doc transforms to apply
, optTemplate :: String -- ^ Custom template
, optVariables :: [(String,String)] -- ^ Template variables to set
- , optBefore :: [String] -- ^ Texts to include before body
- , optAfter :: [String] -- ^ Texts to include after body
, optOutputFile :: String -- ^ Name of output file
, optNumberSections :: Bool -- ^ Number sections in LaTeX
- , optIncremental :: Bool -- ^ Use incremental lists in S5
+ , optSectionDivs :: Bool -- ^ Put sections in div tags in HTML
+ , optIncremental :: Bool -- ^ Use incremental lists in Slidy/S5
+ , optOffline :: Bool -- ^ Make slideshow accessible offline
, optXeTeX :: Bool -- ^ Format latex for xetex
, optSmart :: Bool -- ^ Use smart typography
, optHTMLMathMethod :: HTMLMathMethod -- ^ Method to print HTML math
, optReferenceODT :: Maybe FilePath -- ^ Path of reference.odt
+ , optEPUBStylesheet :: Maybe String -- ^ EPUB stylesheet
+ , optEPUBMetadata :: String -- ^ EPUB metadata
, optDumpArgs :: Bool -- ^ Output command-line arguments
, optIgnoreArgs :: Bool -- ^ Ignore command-line arguments
, optStrict :: Bool -- ^ Use strict markdown syntax
@@ -194,15 +182,17 @@ defaultOpts = Opt
, optTransforms = []
, optTemplate = ""
, optVariables = []
- , optBefore = []
- , optAfter = []
, optOutputFile = "-" -- "-" means stdout
, optNumberSections = False
+ , optSectionDivs = False
, optIncremental = False
+ , optOffline = False
, optXeTeX = False
, optSmart = False
, optHTMLMathMethod = PlainMath
, optReferenceODT = Nothing
+ , optEPUBStylesheet = Nothing
+ , optEPUBMetadata = ""
, optDumpArgs = False
, optIgnoreArgs = False
, optStrict = False
@@ -295,11 +285,24 @@ options =
, Option "" ["mimetex"]
(OptArg
- (\arg opt -> return opt { optHTMLMathMethod = MimeTeX
- (fromMaybe "/cgi-bin/mimetex.cgi" arg)})
+ (\arg opt -> do
+ let url' = case arg of
+ Just u -> u ++ "?"
+ Nothing -> "/cgi-bin/mimetex.cgi?"
+ return opt { optHTMLMathMethod = WebTeX url' })
"URL")
"" -- "Use mimetex for HTML math"
+ , Option "" ["webtex"]
+ (OptArg
+ (\arg opt -> do
+ let url' = case arg of
+ Just u -> u
+ Nothing -> "http://chart.apis.google.com/chart?cht=tx&chl="
+ return opt { optHTMLMathMethod = WebTeX url' })
+ "URL")
+ "" -- "Use web service for HTML math"
+
, Option "" ["jsmath"]
(OptArg
(\arg opt -> return opt { optHTMLMathMethod = JsMath arg})
@@ -314,7 +317,13 @@ options =
, Option "i" ["incremental"]
(NoArg
(\opt -> return opt { optIncremental = True }))
- "" -- "Make list items display incrementally in S5"
+ "" -- "Make list items display incrementally in Slidy/S5"
+
+ , Option "" ["offline"]
+ (NoArg
+ (\opt -> return opt { optOffline = True,
+ optStandalone = True }))
+ "" -- "Make slide shows include all the needed js and css"
, Option "" ["xetex"]
(NoArg
@@ -326,6 +335,11 @@ options =
(\opt -> return opt { optNumberSections = True }))
"" -- "Number sections in LaTeX"
+ , Option "" ["section-divs"]
+ (NoArg
+ (\opt -> return opt { optSectionDivs = True }))
+ "" -- "Put sections in div tags in HTML"
+
, Option "" ["no-wrap"]
(NoArg
(\opt -> return opt { optWrapText = False }))
@@ -343,7 +357,7 @@ options =
"references" -> return ReferenceObfuscation
"javascript" -> return JavascriptObfuscation
"none" -> return NoObfuscation
- _ -> hPutStrLn stderr ("Error: Unknown obfuscation method: " ++ arg) >>
+ _ -> UTF8.hPutStrLn stderr ("Error: Unknown obfuscation method: " ++ arg) >>
exitWith (ExitFailure 6)
return opt { optEmailObfuscation = method })
"none|javascript|references")
@@ -377,7 +391,7 @@ options =
return opt{ optTransforms =
headerShift shift : oldTransforms }
else do
- hPutStrLn stderr $ "base-header-level must be a number >= 1"
+ UTF8.hPutStrLn stderr $ "base-header-level must be a number >= 1"
exitWith $ ExitFailure 19)
"LEVEL")
"" -- "Headers base level"
@@ -385,7 +399,7 @@ options =
, Option "" ["template"]
(ReqArg
(\arg opt -> do
- text <- readFile arg
+ text <- UTF8.readFile arg
return opt{ optTemplate = text,
optStandalone = True })
"FILENAME")
@@ -399,7 +413,7 @@ options =
let newvars = optVariables opt ++ [(k,v)]
return opt{ optVariables = newvars }
_ -> do
- hPutStrLn stderr $ "Could not parse `" ++ arg ++ "' as a key/value pair (k=v or k:v)"
+ UTF8.hPutStrLn stderr $ "Could not parse `" ++ arg ++ "' as a key/value pair (k=v or k:v)"
exitWith $ ExitFailure 17)
"FILENAME")
"" -- "Use custom template"
@@ -417,7 +431,7 @@ options =
, Option "H" ["include-in-header"]
(ReqArg
(\arg opt -> do
- text <- readFile arg
+ text <- UTF8.readFile arg
-- add new ones to end, so they're included in order specified
let newvars = optVariables opt ++ [("header-includes",text)]
return opt { optVariables = newvars,
@@ -428,7 +442,7 @@ options =
, Option "B" ["include-before-body"]
(ReqArg
(\arg opt -> do
- text <- readFile arg
+ text <- UTF8.readFile arg
-- add new ones to end, so they're included in order specified
let newvars = optVariables opt ++ [("include-before",text)]
return opt { optVariables = newvars,
@@ -439,7 +453,7 @@ options =
, Option "A" ["include-after-body"]
(ReqArg
(\arg opt -> do
- text <- readFile arg
+ text <- UTF8.readFile arg
-- add new ones to end, so they're included in order specified
let newvars = optVariables opt ++ [("include-after",text)]
return opt { optVariables = newvars,
@@ -450,7 +464,7 @@ options =
, Option "C" ["custom-header"]
(ReqArg
(\arg opt -> do
- text <- readFile arg
+ text <- UTF8.readFile arg
let newVars = ("legacy-header", text) : optVariables opt
return opt { optVariables = newVars
, optStandalone = True })
@@ -473,12 +487,28 @@ options =
"FILENAME")
"" -- "Path of custom reference.odt"
+ , Option "" ["epub-stylesheet"]
+ (ReqArg
+ (\arg opt -> do
+ text <- UTF8.readFile arg
+ return opt { optEPUBStylesheet = Just text })
+ "FILENAME")
+ "" -- "Path of epub.css"
+
+ , Option "" ["epub-metadata"]
+ (ReqArg
+ (\arg opt -> do
+ text <- UTF8.readFile arg
+ return opt { optEPUBMetadata = text })
+ "FILENAME")
+ "" -- "Path of epub metadata file"
+
, Option "D" ["print-default-template"]
(ReqArg
(\arg _ -> do
templ <- getDefaultTemplate Nothing arg
case templ of
- Right t -> hPutStr stdout t
+ Right t -> UTF8.hPutStr stdout t
Left e -> error $ show e
exitWith ExitSuccess)
"FORMAT")
@@ -520,7 +550,7 @@ options =
(NoArg
(\_ -> do
prg <- getProgName
- hPutStrLn stdout (prg ++ " " ++ pandocVersion ++ compileInfo ++
+ UTF8.hPutStrLn stdout (prg ++ " " ++ pandocVersion ++ compileInfo ++
copyrightMessage)
exitWith ExitSuccess ))
"" -- "Print version"
@@ -529,7 +559,7 @@ options =
(NoArg
(\_ -> do
prg <- getProgName
- hPutStr stdout (usageMessage prg options)
+ UTF8.hPutStr stdout (usageMessage prg options)
exitWith ExitSuccess ))
"" -- "Show help"
]
@@ -586,13 +616,14 @@ defaultWriterName x =
".texinfo" -> "texinfo"
".db" -> "docbook"
".odt" -> "odt"
+ ".epub" -> "epub"
['.',y] | y `elem` ['1'..'9'] -> "man"
_ -> "html"
main :: IO ()
main = do
- rawArgs <- getArgs
+ rawArgs <- liftM (map decodeString) getArgs
prg <- getProgName
let compatMode = (prg == "hsmarkdown")
@@ -602,8 +633,8 @@ main = do
unless (null errors) $
do name <- getProgName
- mapM_ (\e -> hPutStr stderr (name ++ ": ") >> hPutStr stderr e) errors
- hPutStrLn stderr $ "Try " ++ name ++ " --help for more information."
+ mapM_ (\e -> UTF8.hPutStr stderr (name ++ ": ") >> UTF8.hPutStr stderr e) errors
+ UTF8.hPutStrLn stderr $ "Try " ++ name ++ " --help for more information."
exitWith $ ExitFailure 2
let defaultOpts' = if compatMode
@@ -622,18 +653,20 @@ main = do
, optWriter = writerName
, optParseRaw = parseRaw
, optVariables = variables
- , optBefore = befores
- , optAfter = afters
, optTableOfContents = toc
, optTransforms = transforms
, optTemplate = template
, optOutputFile = outputFile
, optNumberSections = numberSections
+ , optSectionDivs = sectionDivs
, optIncremental = incremental
+ , optOffline = offline
, optXeTeX = xetex
, optSmart = smart
, optHTMLMathMethod = mathMethod
, optReferenceODT = referenceODT
+ , optEPUBStylesheet = epubStylesheet
+ , optEPUBMetadata = epubMetadata
, optDumpArgs = dumpArgs
, optIgnoreArgs = ignoreArgs
, optStrict = strict
@@ -652,13 +685,13 @@ main = do
} = opts
when dumpArgs $
- do hPutStrLn stdout outputFile
- mapM_ (\arg -> hPutStrLn stdout arg) args
+ do UTF8.hPutStrLn stdout outputFile
+ mapM_ (\arg -> UTF8.hPutStrLn stdout arg) args
exitWith ExitSuccess
-- warn about deprecated options
case lookup "legacy-header" variables of
- Just _ -> hPutStrLn stderr $
+ Just _ -> UTF8.hPutStrLn stderr $
"Warning: The -C/--custom-header is deprecated.\n" ++
"Please transition to using --template instead."
Nothing -> return ()
@@ -687,9 +720,13 @@ main = do
Just r -> return r
Nothing -> error ("Unknown reader: " ++ readerName')
- writer <- case (lookup writerName' writers) of
- Just r -> return r
- Nothing -> error ("Unknown writer: " ++ writerName')
+ let writer = case lookup writerName' writers of
+ Just _ | writerName' == "epub" -> writeEPUB epubStylesheet
+ Just _ | writerName' == "odt" -> writeODT referenceODT
+ Just r -> \o ->
+ return . fromString . r o
+ Nothing -> error $ "Unknown writer: " ++
+ writerName'
templ <- getDefaultTemplate datadir writerName'
let defaultTemplate = case templ of
@@ -707,11 +744,18 @@ main = do
refs <- if null biblioFile then return [] else readBiblioFile biblioFile biblioFormat
#endif
- variables' <- if writerName' == "s5" && standalone'
- then do
- inc <- s5HeaderIncludes datadir
- return $ ("header-includes", inc) : variables
- else return variables
+ variables' <- case (writerName', standalone', offline) of
+ ("s5", True, True) -> do
+ inc <- s5HeaderIncludes datadir
+ return $ ("s5includes", inc) : variables
+ ("slidy", True, True) -> do
+ slidyJs <- readDataFile datadir $
+ "slidy" </> "slidy.min.js"
+ slidyCss <- readDataFile datadir $
+ "slidy" </> "slidy.min.css"
+ return $ ("slidy-js", slidyJs) :
+ ("slidy-css", slidyCss) : variables
+ _ -> return variables
variables'' <- case mathMethod of
LaTeXMathML Nothing -> do
@@ -722,6 +766,15 @@ main = do
return $ ("mathml-script", s) : variables'
_ -> return variables'
+ let sourceDir = if null sources
+ then "."
+ else takeDirectory (head sources)
+
+ let slideVariant = case writerName' of
+ "s5" -> S5Slides
+ "slidy" -> SlidySlides
+ _ -> NoSlides
+
let startParserState =
defaultParserState { stateParseRaw = parseRaw,
stateTabStop = tabStop,
@@ -737,22 +790,23 @@ main = do
stateColumns = columns,
stateStrict = strict,
stateIndentedCodeClasses = codeBlockClasses }
+
let writerOptions = WriterOptions { writerStandalone = standalone',
writerTemplate = if null template
then defaultTemplate
else template,
writerVariables = variables'',
- writerIncludeBefore = concat befores,
- writerIncludeAfter = concat afters,
+ writerEPUBMetadata = epubMetadata,
writerTabStop = tabStop,
writerTableOfContents = toc &&
writerName' /= "s5",
writerHTMLMathMethod = mathMethod,
- writerS5 = (writerName' == "s5"),
+ writerSlideVariant = slideVariant,
+ writerIncremental = incremental,
writerXeTeX = xetex,
writerIgnoreNotes = False,
- writerIncremental = incremental,
writerNumberSections = numberSections,
+ writerSectionDivs = sectionDivs,
writerStrictMarkdown = strict,
writerReferenceLinks = referenceLinks,
writerWrapText = wrap,
@@ -761,23 +815,21 @@ main = do
writerEmailObfuscation = if strict
then ReferenceObfuscation
else obfuscationMethod,
- writerIdentifierPrefix = idPrefix }
+ writerIdentifierPrefix = idPrefix,
+ writerSourceDirectory = sourceDir,
+ writerUserDataDir = datadir }
when (isNonTextOutput writerName' && outputFile == "-") $
- do hPutStrLn stderr ("Error: Cannot write " ++ writerName ++ " output to stdout.\n" ++
+ do UTF8.hPutStrLn stderr ("Error: Cannot write " ++ writerName ++ " output to stdout.\n" ++
"Specify an output file using the -o option.")
exitWith $ ExitFailure 5
- let sourceDirRelative = if null sources
- then ""
- else takeDirectory (head sources)
-
let readSources [] = mapM readSource ["-"]
readSources srcs = mapM readSource srcs
- readSource "-" = getContents
+ readSource "-" = UTF8.getContents
readSource src = case parseURI src of
Just u -> readURI u
- Nothing -> readFile src
+ Nothing -> UTF8.readFile src
readURI uri = simpleHTTP (mkRequest GET uri) >>= getResponseBody >>=
return . toString -- treat all as UTF8
@@ -794,10 +846,8 @@ main = do
return doc'
#endif
- let writerOutput = writer writerOptions doc'' ++ "\n"
+ writerOutput <- writer writerOptions doc''
- case writerName' of
- "odt" -> saveOpenDocumentAsODT datadir outputFile sourceDirRelative referenceODT writerOutput
- _ -> if outputFile == "-"
- then putStr writerOutput
- else writeFile outputFile writerOutput
+ if outputFile == "-"
+ then B.putStr writerOutput
+ else B.writeFile outputFile writerOutput