summaryrefslogtreecommitdiff
path: root/src/Text/Pandoc/Readers/LaTeX.hs
diff options
context:
space:
mode:
Diffstat (limited to 'src/Text/Pandoc/Readers/LaTeX.hs')
-rw-r--r--src/Text/Pandoc/Readers/LaTeX.hs357
1 files changed, 277 insertions, 80 deletions
diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index 406809dfc..dca745b56 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -38,9 +38,9 @@ import Text.Pandoc.Definition
import Text.Pandoc.Shared
import Text.Pandoc.Parsing
import Data.Maybe ( fromMaybe )
-import Data.Char ( chr )
-import Data.List ( isPrefixOf, isSuffixOf )
-import Control.Monad ( when )
+import Data.Char ( chr, toUpper )
+import Data.List ( intercalate, isPrefixOf, isSuffixOf )
+import Control.Monad
-- | Parse LaTeX from string and return 'Pandoc' document.
readLaTeX :: ParserState -- ^ Parser state, including options for parser
@@ -50,7 +50,7 @@ readLaTeX = readWith parseLaTeX
-- characters with special meaning
specialChars :: [Char]
-specialChars = "\\`$%^&_~#{}\n \t|<>'\"-"
+specialChars = "\\`$%^&_~#{}[]\n \t|<>'\"-"
--
-- utility functions
@@ -64,7 +64,7 @@ bracketedText openB closeB = do
-- | Returns an option or argument of a LaTeX command.
optOrArg :: GenParser Char st [Char]
-optOrArg = bracketedText '{' '}' <|> bracketedText '[' ']'
+optOrArg = try $ spaces >> (bracketedText '{' '}' <|> bracketedText '[' ']')
-- | True if the string begins with '{'.
isArg :: [Char] -> Bool
@@ -86,14 +86,22 @@ command = do
begin :: [Char] -> GenParser Char st [Char]
begin name = try $ do
- string $ "\\begin{" ++ name ++ "}"
+ string "\\begin"
+ spaces
+ char '{'
+ string name
+ char '}'
optional commandArgs
spaces
return name
end :: [Char] -> GenParser Char st [Char]
end name = try $ do
- string $ "\\end{" ++ name ++ "}"
+ string "\\end"
+ spaces
+ char '{'
+ string name
+ char '}'
return name
-- | Returns a list of block elements containing the contents of an
@@ -103,7 +111,9 @@ environment name = try $ begin name >> spaces >> manyTill block (end name) >>~ s
anyEnvironment :: GenParser Char ParserState Block
anyEnvironment = try $ do
- string "\\begin{"
+ string "\\begin"
+ spaces
+ char '{'
name <- many letter
star <- option "" (string "*") -- some environments have starred variants
char '}'
@@ -119,22 +129,17 @@ anyEnvironment = try $ do
-- | Process LaTeX preamble, extracting metadata.
processLaTeXPreamble :: GenParser Char ParserState ()
-processLaTeXPreamble = try $ manyTill
- (choice [bibliographic, comment, unknownCommand, nullBlock])
- (try (string "\\begin{document}")) >>
- spaces
+processLaTeXPreamble = do
+ try $ string "\\documentclass"
+ skipMany $ bibliographic <|> macro <|> commentBlock <|> skipChar
-- | Parse LaTeX and return 'Pandoc'.
parseLaTeX :: GenParser Char ParserState Pandoc
parseLaTeX = do
- optional processLaTeXPreamble -- preamble might not be present (fragment)
- spaces
- blocks <- parseBlocks
spaces
- optional $ try (string "\\end{document}" >> many anyChar)
- -- might not be present (fragment)
- spaces
- eof
+ skipMany $ comment >> spaces
+ blocks <- try (processLaTeXPreamble >> environment "document")
+ <|> (many block >>~ (spaces >> eof))
state <- getState
let blocks' = filter (/= Null) blocks
let title' = stateTitle state
@@ -155,13 +160,16 @@ block = choice [ hrule
, header
, list
, blockQuote
- , comment
+ , simpleTable
+ , commentBlock
+ , macro
, bibliographic
, para
, itemBlock
, unknownEnvironment
, ignore
- , unknownCommand ] <?> "block"
+ , unknownCommand
+ ] <?> "block"
--
-- header blocks
@@ -208,20 +216,77 @@ hrule :: GenParser Char st Block
hrule = oneOfStrings [ "\\begin{center}\\rule{3in}{0.4pt}\\end{center}\n\n",
"\\newpage" ] >> spaces >> return HorizontalRule
+-- tables
+
+simpleTable :: GenParser Char ParserState Block
+simpleTable = try $ do
+ string "\\begin"
+ spaces
+ string "{tabular}"
+ spaces
+ aligns <- parseAligns
+ let cols = length aligns
+ optional hline
+ header' <- option [] $ parseTableHeader cols
+ rows <- many (parseTableRow cols >>~ optional hline)
+ spaces
+ end "tabular"
+ spaces
+ let header'' = if null header'
+ then replicate cols []
+ else header'
+ return $ Table [] aligns (replicate cols 0) header'' rows
+
+hline :: GenParser Char st ()
+hline = try $ spaces >> string "\\hline" >> return ()
+
+parseAligns :: GenParser Char ParserState [Alignment]
+parseAligns = try $ do
+ char '{'
+ optional $ char '|'
+ let cAlign = char 'c' >> return AlignCenter
+ let lAlign = char 'l' >> return AlignLeft
+ let rAlign = char 'r' >> return AlignRight
+ let alignChar = cAlign <|> lAlign <|> rAlign
+ aligns' <- sepEndBy alignChar (optional $ char '|')
+ char '}'
+ spaces
+ return aligns'
+
+parseTableHeader :: Int -- ^ number of columns
+ -> GenParser Char ParserState [TableCell]
+parseTableHeader cols = try $ do
+ cells' <- parseTableRow cols
+ hline
+ return cells'
+
+parseTableRow :: Int -- ^ number of columns
+ -> GenParser Char ParserState [TableCell]
+parseTableRow cols = try $ do
+ let tableCellInline = notFollowedBy (char '&' <|>
+ (try $ char '\\' >> char '\\')) >> inline
+ cells' <- sepBy (spaces >> liftM ((:[]) . Plain . normalizeSpaces)
+ (many tableCellInline)) (char '&')
+ guard $ length cells' == cols
+ spaces
+ (try $ string "\\\\" >> spaces) <|>
+ (lookAhead (end "tabular") >> return ())
+ return cells'
+
--
-- code blocks
--
codeBlock :: GenParser Char ParserState Block
-codeBlock = codeBlockWith "verbatim" <|> codeBlockWith "Verbatim" <|> lhsCodeBlock
+codeBlock = codeBlockWith "verbatim" <|> codeBlockWith "Verbatim" <|> codeBlockWith "lstlisting" <|> lhsCodeBlock
-- Note: Verbatim is from fancyvrb.
codeBlockWith :: String -> GenParser Char st Block
codeBlockWith env = try $ do
- string ("\\begin{" ++ env ++ "}") -- don't use begin function because it
- -- gobbles whitespace
- optional blanklines -- we want to gobble blank lines, but not
- -- leading space
+ string "\\begin"
+ spaces -- don't use begin function because it
+ string $ "{" ++ env ++ "}" -- gobbles whitespace; we want to gobble
+ optional blanklines -- blank lines, but not leading space
contents <- manyTill anyChar (try (string $ "\\end{" ++ env ++ "}"))
spaces
let classes = if env == "code" then ["haskell"] else []
@@ -265,7 +330,10 @@ listItem = try $ do
orderedList :: GenParser Char ParserState Block
orderedList = try $ do
- string "\\begin{enumerate}"
+ string "\\begin"
+ spaces
+ string "{enumerate}"
+ spaces
(_, style, delim) <- option (1, DefaultStyle, DefaultDelim) $
try $ do failIfStrict
char '['
@@ -293,7 +361,6 @@ orderedList = try $ do
bulletList :: GenParser Char ParserState Block
bulletList = try $ do
begin "itemize"
- spaces
items <- many listItem
end "itemize"
spaces
@@ -302,7 +369,6 @@ bulletList = try $ do
definitionList :: GenParser Char ParserState Block
definitionList = try $ do
begin "description"
- spaces
items <- many listItem
end "description"
spaces
@@ -342,7 +408,7 @@ authors :: GenParser Char ParserState Block
authors = try $ do
string "\\author{"
raw <- many1 (notFollowedBy (char '}') >> inline)
- let authors' = map normalizeSpaces $ splitBy LineBreak raw
+ let authors' = map normalizeSpaces $ splitBy (== LineBreak) raw
char '}'
spaces
updateState (\s -> s { stateAuthors = authors' })
@@ -382,13 +448,15 @@ rawLaTeXEnvironment :: GenParser Char st Block
rawLaTeXEnvironment = do
contents <- rawLaTeXEnvironment'
spaces
- return $ Para [TeX contents]
+ return $ RawBlock "latex" contents
-- | Parse any LaTeX environment and return a string containing
-- the whole literal environment as raw TeX.
rawLaTeXEnvironment' :: GenParser Char st String
rawLaTeXEnvironment' = try $ do
- string "\\begin{"
+ string "\\begin"
+ spaces
+ char '{'
name <- many1 letter
star <- option "" (string "*") -- for starred variants
let name' = name ++ star
@@ -418,31 +486,49 @@ ignore = try $ do
spaces
return Null
+demacro :: (String, String, [String]) -> GenParser Char ParserState Inline
+demacro (n,st,args) = try $ do
+ let raw = "\\" ++ n ++ st ++ concat args
+ s' <- applyMacros' raw
+ if raw == s'
+ then return $ RawInline "latex" raw
+ else do
+ inp <- getInput
+ setInput $ s' ++ inp
+ return $ Str ""
+
unknownCommand :: GenParser Char ParserState Block
unknownCommand = try $ do
- notFollowedBy' $ choice $ map end ["itemize", "enumerate", "description",
- "document"]
+ spaces
+ notFollowedBy' $ oneOfStrings ["\\begin","\\end","\\item"]
state <- getState
when (stateParserContext state == ListItemState) $
notFollowedBy' (string "\\item")
if stateParseRaw state
- then do
- (name, star, args) <- command
- spaces
- return $ Plain [TeX ("\\" ++ name ++ star ++ concat args)]
+ then command >>= demacro >>= return . Plain . (:[])
else do
(name, _, args) <- command
spaces
- if name `elem` commandsToIgnore
- then return Null
- else return $ Plain [Str $ concat args]
+ unless (name `elem` commandsToIgnore) $ do
+ -- put arguments back in input to be parsed
+ inp <- getInput
+ setInput $ intercalate " " args ++ inp
+ return Null
commandsToIgnore :: [String]
-commandsToIgnore = ["special","pdfannot","pdfstringdef"]
+commandsToIgnore = ["special","pdfannot","pdfstringdef", "index","bibliography"]
+
+skipChar :: GenParser Char ParserState Block
+skipChar = do
+ satisfy (/='\\') <|>
+ (notFollowedBy' (try $
+ string "\\begin" >> spaces >> string "{document}") >>
+ anyChar)
+ spaces
+ return Null
--- latex comment
-comment :: GenParser Char st Block
-comment = try $ char '%' >> manyTill anyChar newline >> spaces >> return Null
+commentBlock :: GenParser Char st Block
+commentBlock = many1 (comment >> spaces) >> return Null
--
-- inline
@@ -464,8 +550,6 @@ inline = choice [ str
, strikeout
, superscript
, subscript
- , ref
- , lab
, code
, url
, link
@@ -474,12 +558,20 @@ inline = choice [ str
, linebreak
, accentedChar
, nonbreakingSpace
+ , cite
, specialChar
+ , ensureMath
, rawLaTeXInline'
, escapedChar
, unescapedChar
+ , comment
] <?> "inline"
+
+-- latex comment
+comment :: GenParser Char st Inline
+comment = try $ char '%' >> manyTill anyChar newline >> spaces >> return (Str "")
+
accentedChar :: GenParser Char st Inline
accentedChar = normalAccentedChar <|> specialAccentedChar
@@ -512,7 +604,7 @@ accentTable =
('u', [('`', 249), ('\'', 250), ('^', 251), ('"', 252)]) ]
specialAccentedChar :: GenParser Char st Inline
-specialAccentedChar = choice [ ccedil, aring, iuml, szlig, aelig,
+specialAccentedChar = choice [ ccedil, aring, iuml, szlig, aelig, lslash,
oslash, pound, euro, copyright, sect ]
ccedil :: GenParser Char st Inline
@@ -543,6 +635,13 @@ oslash = try $ do
let num = if letter' == 'o' then 248 else 216
return $ Str [chr num]
+lslash :: GenParser Char st Inline
+lslash = try $ do
+ cmd <- oneOfStrings ["{\\L}","{\\l}","\\L ","\\l "]
+ return $ if 'l' `elem` cmd
+ then Str "\x142"
+ else Str "\x141"
+
aelig :: GenParser Char st Inline
aelig = try $ do
char '\\'
@@ -569,7 +668,7 @@ escapedChar = do
-- nonescaped special characters
unescapedChar :: GenParser Char st Inline
-unescapedChar = oneOf "`$^&_#{}|<>" >>= return . (\c -> Str [c])
+unescapedChar = oneOf "`$^&_#{}[]|<>" >>= return . (\c -> Str [c])
specialChar :: GenParser Char st Inline
specialChar = choice [ spacer, interwordSpace,
@@ -604,27 +703,34 @@ doubleQuote :: GenParser Char st Inline
doubleQuote = char '"' >> return (Str "\"")
code :: GenParser Char ParserState Inline
-code = code1 <|> code2 <|> lhsInlineCode
+code = code1 <|> code2 <|> code3 <|> lhsInlineCode
code1 :: GenParser Char st Inline
code1 = try $ do
string "\\verb"
marker <- anyChar
result <- manyTill anyChar (char marker)
- return $ Code $ removeLeadingTrailingSpace result
+ return $ Code nullAttr $ removeLeadingTrailingSpace result
code2 :: GenParser Char st Inline
code2 = try $ do
string "\\texttt{"
result <- manyTill (noneOf "\\\n~$%^&{}") (char '}')
- return $ Code result
+ return $ Code nullAttr result
+
+code3 :: GenParser Char st Inline
+code3 = try $ do
+ string "\\lstinline"
+ marker <- anyChar
+ result <- manyTill anyChar (char marker)
+ return $ Code nullAttr $ removeLeadingTrailingSpace result
lhsInlineCode :: GenParser Char ParserState Inline
lhsInlineCode = try $ do
failUnlessLHS
char '|'
result <- manyTill (noneOf "|\n") (char '|')
- return $ Code result
+ return $ Code ("",["haskell"],[]) result
emph :: GenParser Char ParserState Inline
emph = try $ oneOfStrings [ "\\emph{", "\\textit{" ] >>
@@ -683,15 +789,6 @@ emDash = try (string "---") >> return EmDash
hyphen :: GenParser Char st Inline
hyphen = char '-' >> return (Str "-")
-lab :: GenParser Char st Inline
-lab = try $ do
- string "\\label{"
- result <- manyTill anyChar (char '}')
- return $ Str $ "(" ++ result ++ ")"
-
-ref :: GenParser Char st Inline
-ref = try (string "\\ref{") >> manyTill anyChar (char '}') >>= return . Str
-
strong :: GenParser Char ParserState Inline
strong = try (string "\\textbf{") >> manyTill inline (char '}') >>=
return . Strong
@@ -714,13 +811,13 @@ endline :: GenParser Char st Inline
endline = try $ newline >> notFollowedBy blankline >> return Space
-- math
-math :: GenParser Char st Inline
-math = (math3 >>= return . Math DisplayMath)
- <|> (math1 >>= return . Math InlineMath)
- <|> (math2 >>= return . Math InlineMath)
- <|> (math4 >>= return . Math DisplayMath)
- <|> (math5 >>= return . Math DisplayMath)
- <|> (math6 >>= return . Math DisplayMath)
+math :: GenParser Char ParserState Inline
+math = (math3 >>= applyMacros' >>= return . Math DisplayMath)
+ <|> (math1 >>= applyMacros' >>= return . Math InlineMath)
+ <|> (math2 >>= applyMacros' >>= return . Math InlineMath)
+ <|> (math4 >>= applyMacros' >>= return . Math DisplayMath)
+ <|> (math5 >>= applyMacros' >>= return . Math DisplayMath)
+ <|> (math6 >>= applyMacros' >>= return . Math DisplayMath)
<?> "math"
math1 :: GenParser Char st String
@@ -737,7 +834,6 @@ math4 = try $ do
name <- begin "displaymath" <|> begin "equation" <|> begin "equation*" <|>
begin "gather" <|> begin "gather*" <|> begin "gathered" <|>
begin "multline" <|> begin "multline*"
- spaces
manyTill anyChar (end name)
math5 :: GenParser Char st String
@@ -748,10 +844,15 @@ math6 = try $ do
name <- begin "eqnarray" <|> begin "eqnarray*" <|> begin "align" <|>
begin "align*" <|> begin "alignat" <|> begin "alignat*" <|>
begin "split" <|> begin "aligned" <|> begin "alignedat"
- spaces
res <- manyTill anyChar (end name)
return $ filter (/= '&') res -- remove alignment codes
+ensureMath :: GenParser Char st Inline
+ensureMath = try $ do
+ (n, _, args) <- command
+ guard $ n == "ensuremath" && not (null args)
+ return $ Math InlineMath $ tail $ init $ head args
+
--
-- links and images
--
@@ -760,7 +861,7 @@ url :: GenParser Char ParserState Inline
url = try $ do
string "\\url"
url' <- charsInBalanced '{' '}'
- return $ Link [Code url'] (escapeURI url', "")
+ return $ Link [Code ("",["url"],[]) url'] (escapeURI url', "")
link :: GenParser Char ParserState Inline
link = try $ do
@@ -793,6 +894,103 @@ footnote = try $ do
setInput rest
return $ Note blocks
+-- | citations
+cite :: GenParser Char ParserState Inline
+cite = simpleCite <|> complexNatbibCites
+
+simpleCiteArgs :: GenParser Char ParserState [Citation]
+simpleCiteArgs = try $ do
+ first <- optionMaybe $ (char '[') >> manyTill inline (char ']')
+ second <- optionMaybe $ (char '[') >> manyTill inline (char ']')
+ char '{'
+ keys <- many1Till citationLabel (char '}')
+ let (pre, suf) = case (first , second ) of
+ (Just s , Nothing) -> ([], s )
+ (Just s , Just t ) -> (s , t )
+ _ -> ([], [])
+ conv k = Citation { citationId = k
+ , citationPrefix = []
+ , citationSuffix = []
+ , citationMode = NormalCitation
+ , citationHash = 0
+ , citationNoteNum = 0
+ }
+ return $ addPrefix pre $ addSuffix suf $ map conv keys
+
+
+simpleCite :: GenParser Char ParserState Inline
+simpleCite = try $ do
+ char '\\'
+ let biblatex = [a ++ "cite" | a <- ["auto", "foot", "paren", "super", ""]]
+ ++ ["footcitetext"]
+ normal = ["cite" ++ a ++ b | a <- ["al", ""], b <- ["p", "p*", ""]]
+ ++ biblatex
+ supress = ["citeyearpar", "citeyear", "autocite*", "cite*", "parencite*"]
+ intext = ["textcite"] ++ ["cite" ++ a ++ b | a <- ["al", ""], b <- ["t", "t*"]]
+ mintext = ["textcites"]
+ mnormal = map (++ "s") biblatex
+ cmdend = notFollowedBy (letter <|> char '*')
+ capit [] = []
+ capit (x:xs) = toUpper x : xs
+ addUpper xs = xs ++ map capit xs
+ toparser l t = try $ oneOfStrings (addUpper l) >> cmdend >> return t
+ (mode, multi) <- toparser normal (NormalCitation, False)
+ <|> toparser supress (SuppressAuthor, False)
+ <|> toparser intext (AuthorInText , False)
+ <|> toparser mnormal (NormalCitation, True )
+ <|> toparser mintext (AuthorInText , True )
+ cits <- if multi then
+ many1 simpleCiteArgs
+ else
+ simpleCiteArgs >>= \c -> return [c]
+ let (c:cs) = concat cits
+ cits' = case mode of
+ AuthorInText -> c {citationMode = mode} : cs
+ _ -> map (\a -> a {citationMode = mode}) (c:cs)
+ return $ Cite cits' []
+
+complexNatbibCites :: GenParser Char ParserState Inline
+complexNatbibCites = complexNatbibTextual <|> complexNatbibParenthetical
+
+complexNatbibTextual :: GenParser Char ParserState Inline
+complexNatbibTextual = try $ do
+ string "\\citeauthor{"
+ manyTill (noneOf "}") (char '}')
+ skipSpaces
+ Cite (c:cs) _ <- complexNatbibParenthetical
+ return $ Cite (c {citationMode = AuthorInText} : cs) []
+
+
+complexNatbibParenthetical :: GenParser Char ParserState Inline
+complexNatbibParenthetical = try $ do
+ string "\\citetext{"
+ cits <- many1Till parseOne (char '}')
+ return $ Cite (concat cits) []
+ where
+ parseOne = do
+ skipSpaces
+ pref <- many (notFollowedBy (oneOf "\\}") >> inline)
+ (Cite cites _) <- simpleCite
+ suff <- many (notFollowedBy (oneOf "\\};") >> inline)
+ skipSpaces
+ optional $ char ';'
+ return $ addPrefix pref $ addSuffix suff $ cites
+
+addPrefix :: [Inline] -> [Citation] -> [Citation]
+addPrefix p (k:ks) = k {citationPrefix = p ++ citationPrefix k} : ks
+addPrefix _ _ = []
+
+addSuffix :: [Inline] -> [Citation] -> [Citation]
+addSuffix s ks@(_:_) = let k = last ks
+ in init ks ++ [k {citationSuffix = citationSuffix k ++ s}]
+addSuffix _ _ = []
+
+citationLabel :: GenParser Char ParserState String
+citationLabel = do
+ res <- many1 $ noneOf ",}"
+ optional $ char ','
+ return $ removeLeadingTrailingSpace res
+
-- | Parse any LaTeX inline command and return it in a raw TeX inline element.
rawLaTeXInline' :: GenParser Char ParserState Inline
rawLaTeXInline' = do
@@ -805,12 +1003,11 @@ rawLaTeXInline :: GenParser Char ParserState Inline
rawLaTeXInline = try $ do
state <- getState
if stateParseRaw state
- then do
- (name, star, args) <- command
- return $ TeX ("\\" ++ name ++ star ++ concat args)
+ then command >>= demacro
else do
- (name, _, args) <- command
- spaces
- if name `elem` commandsToIgnore
- then return $ Str ""
- else return $ Str (concat args)
+ (name,st,args) <- command
+ x <- demacro (name,st,args)
+ unless (x == Str "" || name `elem` commandsToIgnore) $ do
+ inp <- getInput
+ setInput $ intercalate " " args ++ inp
+ return $ Str ""