summaryrefslogtreecommitdiff
path: root/src/Text/Pandoc/Readers/Markdown.hs
diff options
context:
space:
mode:
Diffstat (limited to 'src/Text/Pandoc/Readers/Markdown.hs')
-rw-r--r--src/Text/Pandoc/Readers/Markdown.hs481
1 files changed, 238 insertions, 243 deletions
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index 33fb3d8e6..58d2158bf 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -27,26 +27,25 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
Conversion of markdown-formatted plain text to 'Pandoc' document.
-}
-module Text.Pandoc.Readers.Markdown (
- readMarkdown
- ) where
+module Text.Pandoc.Readers.Markdown ( readMarkdown ) where
-import Data.List ( transpose, isSuffixOf, sortBy, findIndex, intercalate )
+import Data.List ( transpose, sortBy, findIndex, intercalate )
import qualified Data.Map as M
import Data.Ord ( comparing )
import Data.Char ( isAlphaNum )
import Data.Maybe
import Text.Pandoc.Definition
+import Text.Pandoc.Generic
import Text.Pandoc.Shared
import Text.Pandoc.Parsing
import Text.Pandoc.Readers.LaTeX ( rawLaTeXInline, rawLaTeXEnvironment' )
-import Text.Pandoc.Readers.HTML ( rawHtmlBlock, anyHtmlBlockTag,
- anyHtmlInlineTag, anyHtmlTag,
- anyHtmlEndTag, htmlEndTag, extractTagType,
- htmlBlockElement, htmlComment, unsanitaryURI )
+import Text.Pandoc.Readers.HTML ( htmlTag, htmlInBalanced, isInlineTag, isBlockTag,
+ isTextTag, isCommentTag )
import Text.Pandoc.CharacterReferences ( decodeCharacterReferences )
import Text.ParserCombinators.Parsec
-import Control.Monad (when, liftM, unless)
+import Control.Monad (when, liftM, guard)
+import Text.HTML.TagSoup
+import Text.HTML.TagSoup.Match (tagOpen)
-- | Read markdown from an input string and return a Pandoc document.
readMarkdown :: ParserState -- ^ Parser state, including options for parser
@@ -58,18 +57,26 @@ readMarkdown state s = (readWith parseMarkdown) state (s ++ "\n\n")
-- Constants and data structure definitions
--
-bulletListMarkers :: [Char]
-bulletListMarkers = "*+-"
+isBulletListMarker :: Char -> Bool
+isBulletListMarker '*' = True
+isBulletListMarker '+' = True
+isBulletListMarker '-' = True
+isBulletListMarker _ = False
-hruleChars :: [Char]
-hruleChars = "*-_"
+isHruleChar :: Char -> Bool
+isHruleChar '*' = True
+isHruleChar '-' = True
+isHruleChar '_' = True
+isHruleChar _ = False
setextHChars :: [Char]
setextHChars = "=-"
--- treat these as potentially non-text when parsing inline:
-specialChars :: [Char]
-specialChars = "\\[]*_~`<>$!^-.&@'\";"
+isBlank :: Char -> Bool
+isBlank ' ' = True
+isBlank '\t' = True
+isBlank '\n' = True
+isBlank _ = False
--
-- auxiliary functions
@@ -106,12 +113,6 @@ failUnlessBeginningOfLine = do
pos <- getPosition
if sourceColumn pos == 1 then return () else fail "not beginning of line"
--- | Fail unless we're in "smart typography" mode.
-failUnlessSmart :: GenParser tok ParserState ()
-failUnlessSmart = do
- state <- getState
- if stateSmart state then return () else pzero
-
-- | Parse a sequence of inline elements between square brackets,
-- including inlines between balanced pairs of square brackets.
inlinesInBalancedBrackets :: GenParser Char ParserState Inline
@@ -119,7 +120,7 @@ inlinesInBalancedBrackets :: GenParser Char ParserState Inline
inlinesInBalancedBrackets parser = try $ do
char '['
result <- manyTill ( (do lookAhead $ try $ do (Str res) <- parser
- unless (res == "[") pzero
+ guard (res == "[")
bal <- inlinesInBalancedBrackets parser
return $ [Str "["] ++ bal ++ [Str "]"])
<|> (count 1 parser))
@@ -143,7 +144,8 @@ authorsLine :: GenParser Char ParserState [[Inline]]
authorsLine = try $ do
char '%'
skipSpaces
- authors <- sepEndBy (many (notFollowedBy (oneOf ";\n") >> inline))
+ authors <- sepEndBy (many (notFollowedBy (satisfy $ \c ->
+ c == ';' || c == '\n') >> inline))
(char ';' <|>
try (newline >> notFollowedBy blankline >> spaceChar))
newline
@@ -196,7 +198,7 @@ parseMarkdown = do
handleExampleRef z = z
if M.null examples
then return doc
- else return $ processWith handleExampleRef doc
+ else return $ bottomUp handleExampleRef doc
--
-- initial pass for references and notes
@@ -209,16 +211,24 @@ referenceKey = try $ do
lab <- reference
char ':'
skipSpaces >> optional newline >> skipSpaces >> notFollowedBy (char '[')
- let sourceURL excludes = many $
- optional (char '\\') >> (noneOf (' ':excludes) <|> (notFollowedBy' referenceTitle >> char ' '))
- src <- try (char '<' >> sourceURL ">\t\n" >>~ char '>') <|> sourceURL "\t\n"
+ let nl = char '\n' >> notFollowedBy blankline >> return ' '
+ let sourceURL = liftM unwords $ many $ try $ do
+ notFollowedBy' referenceTitle
+ skipMany spaceChar
+ optional nl
+ skipMany spaceChar
+ notFollowedBy' reference
+ many1 (satisfy $ not . isBlank)
+ let betweenAngles = try $ char '<' >>
+ manyTill (noneOf ">\n" <|> nl) (char '>')
+ src <- try betweenAngles <|> sourceURL
tit <- option "" referenceTitle
blanklines
endPos <- getPosition
let target = (escapeURI $ removeTrailingSpace src, tit)
st <- getState
let oldkeys = stateKeys st
- updateState $ \s -> s { stateKeys = M.insert (Key lab) target oldkeys }
+ updateState $ \s -> s { stateKeys = M.insert (toKey lab) target oldkeys }
-- return blanks so line count isn't affected
return $ replicate (sourceLine endPos - sourceLine startPos) '\n'
@@ -232,12 +242,12 @@ referenceTitle = try $ do
return $ decodeCharacterReferences tit
noteMarker :: GenParser Char ParserState [Char]
-noteMarker = skipNonindentSpaces >> string "[^" >> manyTill (noneOf " \t\n") (char ']')
+noteMarker = string "[^" >> many1Till (satisfy $ not . isBlank) (char ']')
rawLine :: GenParser Char ParserState [Char]
rawLine = do
notFollowedBy blankline
- notFollowedBy' noteMarker
+ notFollowedBy' $ try $ skipNonindentSpaces >> noteMarker
contents <- many1 nonEndline
end <- option "" (newline >> optional indentSpaces >> return "\n")
return $ contents ++ end
@@ -248,6 +258,7 @@ rawLines = many1 rawLine >>= return . concat
noteBlock :: GenParser Char ParserState [Char]
noteBlock = try $ do
startPos <- getPosition
+ skipNonindentSpaces
ref <- noteMarker
char ':'
optional blankline
@@ -284,6 +295,7 @@ block = do
, plain
, nullBlock ]
else [ codeBlockDelimited
+ , macro
, header
, table
, codeBlockIndented
@@ -293,6 +305,7 @@ block = do
, bulletList
, orderedList
, definitionList
+ , rawTeXBlock
, para
, rawHtmlBlocks
, plain
@@ -318,6 +331,9 @@ atxClosing = try $ skipMany (char '#') >> blanklines
setextHeader :: GenParser Char ParserState Block
setextHeader = try $ do
+ -- This lookahead prevents us from wasting time parsing Inlines
+ -- unless necessary -- it gives a significant performance boost.
+ lookAhead $ anyLine >> many1 (oneOf setextHChars) >> blankline
text <- many1Till inline newline
underlineChar <- oneOf setextHChars
many (char underlineChar)
@@ -332,7 +348,7 @@ setextHeader = try $ do
hrule :: GenParser Char st Block
hrule = try $ do
skipSpaces
- start <- oneOf hruleChars
+ start <- satisfy isHruleChar
count 2 (skipSpaces >> char start)
skipMany (spaceChar <|> char start)
newline
@@ -371,6 +387,7 @@ attributes = try $ do
attribute :: GenParser Char st ([Char], [[Char]], [([Char], [Char])])
attribute = identifierAttr <|> classAttr <|> keyValAttr
+
identifier :: GenParser Char st [Char]
identifier = do
first <- letter
@@ -394,7 +411,7 @@ keyValAttr = try $ do
key <- identifier
char '='
char '"'
- val <- manyTill (noneOf "\n") (char '"')
+ val <- manyTill (satisfy (/='\n')) (char '"')
return ("",[],[(key,val)])
codeBlockDelimited :: GenParser Char st Block
@@ -489,7 +506,7 @@ bulletListStart = try $ do
optional newline -- if preceded by a Plain block in a list context
skipNonindentSpaces
notFollowedBy' hrule -- because hrules start out just like lists
- oneOf bulletListMarkers
+ satisfy isBulletListMarker
spaceChar
skipSpaces
@@ -524,7 +541,7 @@ listLine = try $ do
notFollowedBy' (do indentSpaces
many (spaceChar)
listStart)
- chunks <- manyTill (htmlComment <|> count 1 anyChar) newline
+ chunks <- manyTill (liftM snd (htmlTag isCommentTag) <|> count 1 anyChar) newline
return $ concat chunks ++ "\n"
-- parse raw text for one list item, excluding start marker and continuations
@@ -644,23 +661,21 @@ definitionList = do
--
isHtmlOrBlank :: Inline -> Bool
-isHtmlOrBlank (HtmlInline _) = True
-isHtmlOrBlank (Space) = True
-isHtmlOrBlank (LineBreak) = True
-isHtmlOrBlank _ = False
+isHtmlOrBlank (RawInline "html" _) = True
+isHtmlOrBlank (Space) = True
+isHtmlOrBlank (LineBreak) = True
+isHtmlOrBlank _ = False
para :: GenParser Char ParserState Block
para = try $ do
- result <- many1 inline
- if all isHtmlOrBlank result
- then fail "treat as raw HTML"
- else return ()
- newline
- blanklines <|> do st <- getState
- if stateStrict st
- then lookAhead (blockQuote <|> header) >> return ""
- else pzero
- return $ Para $ normalizeSpaces result
+ result <- liftM normalizeSpaces $ many1 inline
+ guard $ not . all isHtmlOrBlank $ result
+ option (Plain result) $ try $ do
+ newline
+ blanklines <|>
+ (getState >>= guard . stateStrict >>
+ lookAhead (blockQuote <|> header) >> return "")
+ return $ Para result
plain :: GenParser Char ParserState Block
plain = many1 inline >>~ spaces >>= return . Plain . normalizeSpaces
@@ -670,7 +685,7 @@ plain = many1 inline >>~ spaces >>= return . Plain . normalizeSpaces
--
htmlElement :: GenParser Char ParserState [Char]
-htmlElement = strictHtmlBlock <|> htmlBlockElement <?> "html element"
+htmlElement = strictHtmlBlock <|> liftM snd (htmlTag isBlockTag)
htmlBlock :: GenParser Char ParserState Block
htmlBlock = try $ do
@@ -678,27 +693,33 @@ htmlBlock = try $ do
first <- htmlElement
finalSpace <- many spaceChar
finalNewlines <- many newline
- return $ RawHtml $ first ++ finalSpace ++ finalNewlines
-
--- True if tag is self-closing
-isSelfClosing :: [Char] -> Bool
-isSelfClosing tag =
- isSuffixOf "/>" $ filter (not . (`elem` " \n\t")) tag
+ return $ RawBlock "html" $ first ++ finalSpace ++ finalNewlines
strictHtmlBlock :: GenParser Char ParserState [Char]
-strictHtmlBlock = try $ do
- tag <- anyHtmlBlockTag
- let tag' = extractTagType tag
- if isSelfClosing tag || tag' == "hr"
- then return tag
- else do contents <- many (notFollowedBy' (htmlEndTag tag') >>
- (htmlElement <|> (count 1 anyChar)))
- end <- htmlEndTag tag'
- return $ tag ++ concat contents ++ end
+strictHtmlBlock = do
+ failUnlessBeginningOfLine
+ htmlInBalanced (not . isInlineTag)
+
+rawVerbatimBlock :: GenParser Char ParserState String
+rawVerbatimBlock = try $ do
+ (TagOpen tag _, open) <- htmlTag (tagOpen (\t ->
+ t == "pre" || t == "style" || t == "script")
+ (const True))
+ contents <- manyTill anyChar (htmlTag (~== TagClose tag))
+ return $ open ++ contents ++ renderTags [TagClose tag]
+
+rawTeXBlock :: GenParser Char ParserState Block
+rawTeXBlock = do
+ failIfStrict
+ result <- liftM (RawBlock "latex") rawLaTeXEnvironment'
+ <|> liftM (RawBlock "context") rawConTeXtEnvironment'
+ spaces
+ return result
rawHtmlBlocks :: GenParser Char ParserState Block
rawHtmlBlocks = do
- htmlBlocks <- many1 $ do (RawHtml blk) <- rawHtmlBlock
+ htmlBlocks <- many1 $ do blk <- rawVerbatimBlock <|>
+ liftM snd (htmlTag isBlockTag)
sps <- do sp1 <- many spaceChar
sp2 <- option "" (blankline >> return "\n")
sp3 <- many spaceChar
@@ -710,7 +731,7 @@ rawHtmlBlocks = do
return $ blk ++ sps
let combined = concat htmlBlocks
let combined' = if last combined == '\n' then init combined else combined
- return $ RawHtml combined'
+ return $ RawBlock "html" combined'
--
-- Tables
@@ -848,10 +869,11 @@ alignType :: [String]
-> Alignment
alignType [] _ = AlignDefault
alignType strLst len =
- let s = head $ sortBy (comparing length) $
- map removeTrailingSpace strLst
- leftSpace = if null s then False else (s !! 0) `elem` " \t"
- rightSpace = length s < len || (s !! (len - 1)) `elem` " \t"
+ let nonempties = filter (not . null) $ map removeTrailingSpace strLst
+ (leftSpace, rightSpace) =
+ case sortBy (comparing length) nonempties of
+ (x:_) -> (head x `elem` " \t", length x < len)
+ [] -> (False, False)
in case (leftSpace, rightSpace) of
(True, False) -> AlignRight
(False, True) -> AlignLeft
@@ -875,31 +897,29 @@ inline :: GenParser Char ParserState Inline
inline = choice inlineParsers <?> "inline"
inlineParsers :: [GenParser Char ParserState Inline]
-inlineParsers = [ str
- , smartPunctuation
- , whitespace
+inlineParsers = [ whitespace
+ , str
, endline
, code
- , charRef
, (fourOrMore '*' <|> fourOrMore '_')
, strong
, emph
, note
- , inlineNote
, link
-#ifdef _CITEPROC
- , inlineCitation
-#endif
+ , cite
, image
, math
, strikeout
, superscript
, subscript
+ , inlineNote -- after superscript because of ^[link](/foo)^
, autoLink
- , rawHtmlInline'
+ , rawHtmlInline
, rawLaTeXInline'
, escapedChar
, exampleRef
+ , smartPunctuation inline
+ , charRef
, symbol
, ltSign ]
@@ -913,12 +933,12 @@ failIfLink (Link _ _) = pzero
failIfLink elt = return elt
escapedChar :: GenParser Char ParserState Inline
-escapedChar = do
+escapedChar = try $ do
char '\\'
state <- getState
- result <- option '\\' $ if stateStrict state
- then oneOf "\\`*_{}[]()>#+-.!~"
- else satisfy (not . isAlphaNum)
+ result <- if stateStrict state
+ then oneOf "\\`*_{}[]()>#+-.!~"
+ else satisfy (not . isAlphaNum)
return $ case result of
' ' -> Str "\160" -- "\ " is a nonbreaking space
'\n' -> LineBreak -- "\[newline]" is a linebreak
@@ -932,9 +952,6 @@ ltSign = do
else notFollowedBy' rawHtmlBlocks >> char '<' -- unless it starts html
return $ Str ['<']
-specialCharsMinusLt :: [Char]
-specialCharsMinusLt = filter (/= '<') specialChars
-
exampleRef :: GenParser Char ParserState Inline
exampleRef = try $ do
char '@'
@@ -945,7 +962,11 @@ exampleRef = try $ do
symbol :: GenParser Char ParserState Inline
symbol = do
- result <- oneOf specialCharsMinusLt
+ result <- noneOf "<\\\n\t "
+ <|> try (do lookAhead $ char '\\'
+ notFollowedBy' $ rawLaTeXEnvironment'
+ <|> rawConTeXtEnvironment'
+ char '\\')
return $ Str [result]
-- parses inline code, between n `s and n `s
@@ -957,7 +978,8 @@ code = try $ do
(char '\n' >> notFollowedBy' blankline >> return " "))
(try (skipSpaces >> count (length starts) (char '`') >>
notFollowedBy (char '`')))
- return $ Code $ removeLeadingTrailingSpace $ concat result
+ attr <- option ([],[],[]) (try $ optional whitespace >> attributes)
+ return $ Code attr $ removeLeadingTrailingSpace $ concat result
mathWord :: GenParser Char st [Char]
mathWord = liftM concat $ many1 mathChunk
@@ -966,11 +988,11 @@ mathChunk :: GenParser Char st [Char]
mathChunk = do char '\\'
c <- anyChar
return ['\\',c]
- <|> many1 (noneOf " \t\n\\$")
+ <|> many1 (satisfy $ \c -> not (isBlank c || c == '\\' || c == '$'))
math :: GenParser Char ParserState Inline
-math = (mathDisplay >>= return . Math DisplayMath)
- <|> (mathInline >>= return . Math InlineMath)
+math = (mathDisplay >>= applyMacros' >>= return . Math DisplayMath)
+ <|> (mathInline >>= applyMacros' >>= return . Math InlineMath)
mathDisplay :: GenParser Char ParserState String
mathDisplay = try $ do
@@ -1019,85 +1041,6 @@ subscript = failIfStrict >> enclosed (char '~') (char '~')
(notFollowedBy spaceChar >> inline) >>= -- may not contain Space
return . Subscript
-smartPunctuation :: GenParser Char ParserState Inline
-smartPunctuation = failUnlessSmart >>
- choice [ quoted, apostrophe, dash, ellipses ]
-
-apostrophe :: GenParser Char ParserState Inline
-apostrophe = (char '\'' <|> char '\8217') >> return Apostrophe
-
-quoted :: GenParser Char ParserState Inline
-quoted = doubleQuoted <|> singleQuoted
-
-withQuoteContext :: QuoteContext
- -> (GenParser Char ParserState Inline)
- -> GenParser Char ParserState Inline
-withQuoteContext context parser = do
- oldState <- getState
- let oldQuoteContext = stateQuoteContext oldState
- setState oldState { stateQuoteContext = context }
- result <- parser
- newState <- getState
- setState newState { stateQuoteContext = oldQuoteContext }
- return result
-
-singleQuoted :: GenParser Char ParserState Inline
-singleQuoted = try $ do
- singleQuoteStart
- withQuoteContext InSingleQuote $ many1Till inline singleQuoteEnd >>=
- return . Quoted SingleQuote . normalizeSpaces
-
-doubleQuoted :: GenParser Char ParserState Inline
-doubleQuoted = try $ do
- doubleQuoteStart
- withQuoteContext InDoubleQuote $ many1Till inline doubleQuoteEnd >>=
- return . Quoted DoubleQuote . normalizeSpaces
-
-failIfInQuoteContext :: QuoteContext -> GenParser tok ParserState ()
-failIfInQuoteContext context = do
- st <- getState
- if stateQuoteContext st == context
- then fail "already inside quotes"
- else return ()
-
-singleQuoteStart :: GenParser Char ParserState Char
-singleQuoteStart = do
- failIfInQuoteContext InSingleQuote
- try $ do char '\''
- notFollowedBy (oneOf ")!],.;:-? \t\n")
- notFollowedBy (try (oneOfStrings ["s","t","m","ve","ll","re"] >>
- satisfy (not . isAlphaNum)))
- -- possess/contraction
- return '\''
-
-singleQuoteEnd :: GenParser Char st Char
-singleQuoteEnd = try $ do
- char '\''
- notFollowedBy alphaNum
- return '\''
-
-doubleQuoteStart :: GenParser Char ParserState Char
-doubleQuoteStart = do
- failIfInQuoteContext InDoubleQuote
- try $ do char '"'
- notFollowedBy (oneOf " \t\n")
- return '"'
-
-doubleQuoteEnd :: GenParser Char st Char
-doubleQuoteEnd = char '"'
-
-ellipses :: GenParser Char st Inline
-ellipses = oneOfStrings ["...", " . . . ", ". . .", " . . ."] >> return Ellipses
-
-dash :: GenParser Char st Inline
-dash = enDash <|> emDash
-
-enDash :: GenParser Char st Inline
-enDash = try $ char '-' >> notFollowedBy (noneOf "0123456789") >> return EnDash
-
-emDash :: GenParser Char st Inline
-emDash = oneOfStrings ["---", "--"] >> return EmDash
-
whitespace :: GenParser Char ParserState Inline
whitespace = spaceChar >>
( (spaceChar >> skipMany spaceChar >> option Space (endline >> return LineBreak))
@@ -1106,20 +1049,19 @@ whitespace = spaceChar >>
nonEndline :: GenParser Char st Char
nonEndline = satisfy (/='\n')
-strChar :: GenParser Char st Char
-strChar = noneOf (specialChars ++ " \t\n")
-
str :: GenParser Char ParserState Inline
str = do
- result <- many1 strChar
+ a <- alphaNum
+ as <- many $ alphaNum <|> (try $ char '_' >>~ lookAhead alphaNum)
+ let result = a:as
state <- getState
let spacesToNbr = map (\c -> if c == ' ' then '\160' else c)
if stateSmart state
then case likelyAbbrev result of
[] -> return $ Str result
xs -> choice (map (\x ->
- try (string x >> char ' ' >>
- notFollowedBy spaceChar >>
+ try (string x >> oneOf " \n" >>
+ lookAhead alphaNum >>
return (Str $ result ++ spacesToNbr x ++ "\160"))) xs)
<|> (return $ Str result)
else return $ Str result
@@ -1142,15 +1084,13 @@ endline = try $ do
newline
notFollowedBy blankline
st <- getState
- if stateStrict st
- then do notFollowedBy emailBlockQuoteStart
- notFollowedBy (char '#') -- atx header
- else return ()
+ when (stateStrict st) $ do
+ notFollowedBy emailBlockQuoteStart
+ notFollowedBy (char '#') -- atx header
-- parse potential list-starts differently if in a list:
- if stateParserContext st == ListItemState
- then notFollowedBy' (bulletListStart <|>
- (anyOrderedListStart >> return ()))
- else return ()
+ when (stateParserContext st == ListItemState) $ do
+ notFollowedBy' bulletListStart
+ notFollowedBy' anyOrderedListStart
return Space
--
@@ -1175,9 +1115,16 @@ source =
source' :: GenParser Char st (String, [Char])
source' = do
skipSpaces
- let sourceURL excludes = many $
- optional (char '\\') >> (noneOf (' ':excludes) <|> (notFollowedBy' linkTitle >> char ' '))
- src <- try (char '<' >> sourceURL ">\t\n" >>~ char '>') <|> sourceURL "\t\n"
+ let nl = char '\n' >>~ notFollowedBy blankline
+ let sourceURL = liftM unwords $ many $ try $ do
+ notFollowedBy' linkTitle
+ skipMany spaceChar
+ optional nl
+ skipMany spaceChar
+ many1 (satisfy $ not . isBlank)
+ let betweenAngles = try $ char '<' >>
+ manyTill (noneOf ">\n" <|> nl) (char '>')
+ src <- try betweenAngles <|> sourceURL
tit <- option "" linkTitle
skipSpaces
eof
@@ -1196,10 +1143,7 @@ link :: GenParser Char ParserState Inline
link = try $ do
lab <- reference
(src, tit) <- source <|> referenceLink lab
- sanitize <- getState >>= return . stateSanitizeHTML
- if sanitize && unsanitaryURI src
- then fail "Unsanitary URI"
- else return $ Link lab (src, tit)
+ return $ Link lab (src, tit)
-- a link like [this][ref] or [this][] or [this]
referenceLink :: [Inline]
@@ -1209,7 +1153,7 @@ referenceLink lab = do
optional (newline >> skipSpaces) >> reference))
let ref' = if null ref then lab else ref
state <- getState
- case lookupKeySrc (stateKeys state) (Key ref') of
+ case lookupKeySrc (stateKeys state) (toKey ref') of
Nothing -> fail "no corresponding key"
Just target -> return target
@@ -1219,12 +1163,9 @@ autoLink = try $ do
(orig, src) <- uri <|> emailAddress
char '>'
st <- getState
- let sanitize = stateSanitizeHTML st
- if sanitize && unsanitaryURI src
- then fail "Unsanitary URI"
- else return $ if stateStrict st
- then Link [Str orig] (src, "")
- else Link [Code orig] (src, "")
+ return $ if stateStrict st
+ then Link [Str orig] (src, "")
+ else Link [Code ("",["url"],[]) orig] (src, "")
image :: GenParser Char ParserState Inline
image = try $ do
@@ -1250,11 +1191,13 @@ inlineNote = try $ do
return $ Note [Para contents]
rawLaTeXInline' :: GenParser Char ParserState Inline
-rawLaTeXInline' = do
+rawLaTeXInline' = try $ do
failIfStrict
- (rawConTeXtEnvironment' >>= return . TeX)
- <|> (rawLaTeXEnvironment' >>= return . TeX)
- <|> rawLaTeXInline
+ lookAhead $ char '\\'
+ notFollowedBy' $ rawLaTeXEnvironment'
+ <|> rawConTeXtEnvironment'
+ RawInline _ s <- rawLaTeXInline
+ return $ RawInline "tex" s -- "tex" because it might be context or latex
rawConTeXtEnvironment' :: GenParser Char st String
rawConTeXtEnvironment' = try $ do
@@ -1272,46 +1215,98 @@ inBrackets parser = do
char ']'
return $ "[" ++ contents ++ "]"
-rawHtmlInline' :: GenParser Char ParserState Inline
-rawHtmlInline' = do
+rawHtmlInline :: GenParser Char ParserState Inline
+rawHtmlInline = do
st <- getState
- result <- if stateStrict st
- then choice [htmlBlockElement, anyHtmlTag, anyHtmlEndTag]
- else anyHtmlInlineTag
- return $ HtmlInline result
-
-#ifdef _CITEPROC
-inlineCitation :: GenParser Char ParserState Inline
-inlineCitation = try $ do
+ (_,result) <- if stateStrict st
+ then htmlTag (not . isTextTag)
+ else htmlTag isInlineTag
+ return $ RawInline "html" result
+
+-- Citations
+
+cite :: GenParser Char ParserState Inline
+cite = do
failIfStrict
- cit <- citeMarker
- let citations = readWith parseCitation defaultParserState cit
- mr <- mapM chkCit citations
- if catMaybes mr /= []
- then return $ Cite citations []
- else fail "no citation found"
-
-chkCit :: Target -> GenParser Char ParserState (Maybe Target)
-chkCit t = do
+ citations <- textualCite <|> normalCite
+ return $ Cite citations []
+
+spnl :: GenParser Char st ()
+spnl = try $ do
+ skipSpaces
+ optional newline
+ skipSpaces
+ notFollowedBy (char '\n')
+
+textualCite :: GenParser Char ParserState [Citation]
+textualCite = try $ do
+ (_, key) <- citeKey
+ let first = Citation{ citationId = key
+ , citationPrefix = []
+ , citationSuffix = []
+ , citationMode = AuthorInText
+ , citationNoteNum = 0
+ , citationHash = 0
+ }
+ rest <- option [] $ try $ spnl >> normalCite
+ if null rest
+ then option [first] $ bareloc first
+ else return $ first : rest
+
+bareloc :: Citation -> GenParser Char ParserState [Citation]
+bareloc c = try $ do
+ spnl
+ char '['
+ suff <- suffix
+ rest <- option [] $ try $ char ';' >> citeList
+ spnl
+ char ']'
+ return $ c{ citationSuffix = suff } : rest
+
+normalCite :: GenParser Char ParserState [Citation]
+normalCite = try $ do
+ char '['
+ spnl
+ citations <- citeList
+ spnl
+ char ']'
+ return citations
+
+citeKey :: GenParser Char ParserState (Bool, String)
+citeKey = try $ do
+ suppress_author <- option False (char '-' >> return True)
+ char '@'
+ first <- letter
+ rest <- many $ (noneOf ",;]@ \t\n")
+ let key = first:rest
st <- getState
- case lookupKeySrc (stateKeys st) (Key [Str $ fst t]) of
- Just _ -> fail "This is a link"
- Nothing -> if elem (fst t) $ stateCitations st
- then return $ Just t
- else return $ Nothing
-
-citeMarker :: GenParser Char ParserState String
-citeMarker = char '[' >> manyTill ( noneOf "\n" <|> (newline >>~ notFollowedBy blankline) ) (char ']')
-
-parseCitation :: GenParser Char ParserState [(String,String)]
-parseCitation = try $ sepBy (parseLabel) (oneOf ";")
-
-parseLabel :: GenParser Char ParserState (String,String)
-parseLabel = try $ do
- res <- sepBy (skipSpaces >> optional newline >> skipSpaces >> many1 (noneOf "@;")) (oneOf "@")
- case res of
- [lab,loc] -> return (lab, loc)
- [lab] -> return (lab, "" )
- _ -> return ("" , "" )
-
-#endif
+ guard $ key `elem` stateCitations st
+ return (suppress_author, key)
+
+suffix :: GenParser Char ParserState [Inline]
+suffix = try $ do
+ spnl
+ liftM normalizeSpaces $ many $ notFollowedBy (oneOf ";]") >> inline
+
+prefix :: GenParser Char ParserState [Inline]
+prefix = liftM normalizeSpaces $
+ manyTill inline (char ']' <|> liftM (const ']') (lookAhead citeKey))
+
+citeList :: GenParser Char ParserState [Citation]
+citeList = sepBy1 citation (try $ char ';' >> spnl)
+
+citation :: GenParser Char ParserState Citation
+citation = try $ do
+ pref <- prefix
+ (suppress_author, key) <- citeKey
+ suff <- suffix
+ return $ Citation{ citationId = key
+ , citationPrefix = pref
+ , citationSuffix = suff
+ , citationMode = if suppress_author
+ then SuppressAuthor
+ else NormalCitation
+ , citationNoteNum = 0
+ , citationHash = 0
+ }
+