summaryrefslogtreecommitdiff
path: root/src/Text/Pandoc
diff options
context:
space:
mode:
authorJohn MacFarlane <jgm@berkeley.edu>2014-05-14 06:37:29 -0700
committerJohn MacFarlane <jgm@berkeley.edu>2014-05-14 06:37:29 -0700
commitb5959b20073f04b7b5ecb19e2ab0ce56a5ecadb0 (patch)
tree4f05dcecdf9cae5fb6bf123bade09c71e77b316c /src/Text/Pandoc
parent222a51bf99bc6b0bfc6be6ebc03159a0ad875e4f (diff)
parentceeb701c254c6dc4c054e10dd151d9ef6f751ad7 (diff)
Merge pull request #1297 from tarleb/citations
Org reader: support Pandocs citation extension
Diffstat (limited to 'src/Text/Pandoc')
-rw-r--r--src/Text/Pandoc/Parsing.hs45
-rw-r--r--src/Text/Pandoc/Readers/Markdown.hs23
-rw-r--r--src/Text/Pandoc/Readers/Org.hs66
3 files changed, 95 insertions, 39 deletions
diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs
index d1e55cbc4..4cd6591c0 100644
--- a/src/Text/Pandoc/Parsing.hs
+++ b/src/Text/Pandoc/Parsing.hs
@@ -54,7 +54,6 @@ module Text.Pandoc.Parsing ( (>>~),
withRaw,
escaped,
characterReference,
- updateLastStrPos,
anyOrderedListMarker,
orderedListMarker,
charRef,
@@ -66,11 +65,14 @@ module Text.Pandoc.Parsing ( (>>~),
testStringWith,
guardEnabled,
guardDisabled,
+ updateLastStrPos,
+ notAfterString,
ParserState (..),
HasReaderOptions (..),
HasHeaderMap (..),
HasIdentifierList (..),
HasMacros (..),
+ HasLastStrPosition (..),
defaultParserState,
HeaderType (..),
ParserContext (..),
@@ -92,6 +94,7 @@ module Text.Pandoc.Parsing ( (>>~),
apostrophe,
dash,
nested,
+ citeKey,
macro,
applyMacros',
Parser,
@@ -904,6 +907,14 @@ instance HasMacros ParserState where
extractMacros = stateMacros
updateMacros f st = st{ stateMacros = f $ stateMacros st }
+class HasLastStrPosition st where
+ setLastStrPos :: SourcePos -> st -> st
+ getLastStrPos :: st -> Maybe SourcePos
+
+instance HasLastStrPosition ParserState where
+ setLastStrPos pos st = st{ stateLastStrPos = Just pos }
+ getLastStrPos st = stateLastStrPos st
+
defaultParserState :: ParserState
defaultParserState =
ParserState { stateOptions = def,
@@ -938,6 +949,17 @@ guardEnabled ext = getOption readerExtensions >>= guard . Set.member ext
guardDisabled :: HasReaderOptions st => Extension -> Parser s st ()
guardDisabled ext = getOption readerExtensions >>= guard . not . Set.member ext
+-- | Update the position on which the last string ended.
+updateLastStrPos :: HasLastStrPosition st => Parser s st ()
+updateLastStrPos = getPosition >>= updateState . setLastStrPos
+
+-- | Whether we are right after the end of a string.
+notAfterString :: HasLastStrPosition st => Parser s st Bool
+notAfterString = do
+ pos <- getPosition
+ st <- getState
+ return $ getLastStrPos st /= Just pos
+
data HeaderType
= SingleHeader Char -- ^ Single line of characters underneath
| DoubleHeader Char -- ^ Lines of characters above and below
@@ -1049,17 +1071,11 @@ charOrRef cs =
guard (c `elem` cs)
return c)
-updateLastStrPos :: Parser [Char] ParserState ()
-updateLastStrPos = getPosition >>= \p ->
- updateState $ \s -> s{ stateLastStrPos = Just p }
-
singleQuoteStart :: Parser [Char] ParserState ()
singleQuoteStart = do
failIfInQuoteContext InSingleQuote
- pos <- getPosition
- st <- getState
-- single quote start can't be right after str
- guard $ stateLastStrPos st /= Just pos
+ guard =<< notAfterString
() <$ charOrRef "'\8216\145"
singleQuoteEnd :: Parser [Char] st ()
@@ -1129,6 +1145,18 @@ nested p = do
updateState $ \st -> st{ stateMaxNestingLevel = nestlevel }
return res
+citeKey :: HasLastStrPosition st => Parser [Char] st (Bool, String)
+citeKey = try $ do
+ guard =<< notAfterString
+ suppress_author <- option False (char '-' *> return True)
+ char '@'
+ firstChar <- letter <|> char '_'
+ let regchar = satisfy (\c -> isAlphaNum c || c == '_')
+ let internal p = try $ p <* lookAhead regchar
+ rest <- many $ regchar <|> internal (oneOf ":.#$%&-+?<>~/")
+ let key = firstChar:rest
+ return (suppress_author, key)
+
--
-- Macros
--
@@ -1156,4 +1184,3 @@ applyMacros' target = do
then do macros <- extractMacros `fmap` getState
return $ applyMacros macros target
else return target
-
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index d1637b701..5129bc2e3 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -1474,9 +1474,7 @@ strongOrEmph = enclosure '*' <|> (checkIntraword >> enclosure '_')
where checkIntraword = do
exts <- getOption readerExtensions
when (Ext_intraword_underscores `Set.member` exts) $ do
- pos <- getPosition
- lastStrPos <- stateLastStrPos <$> getState
- guard $ lastStrPos /= Just pos
+ guard =<< notAfterString
-- | Parses a list of inlines between start and end delimiters.
inlinesBetween :: (Show b)
@@ -1518,8 +1516,7 @@ nonEndline = satisfy (/='\n')
str :: MarkdownParser (F Inlines)
str = do
result <- many1 alphaNum
- pos <- getPosition
- updateState $ \s -> s{ stateLastStrPos = Just pos }
+ updateLastStrPos
let spacesToNbr = map (\c -> if c == ' ' then '\160' else c)
isSmart <- getOption readerSmart
if isSmart
@@ -1817,22 +1814,6 @@ normalCite = try $ do
char ']'
return citations
-citeKey :: MarkdownParser (Bool, String)
-citeKey = try $ do
- -- make sure we're not right after an alphanumeric,
- -- since foo@bar.baz is probably an email address
- lastStrPos <- stateLastStrPos <$> getState
- pos <- getPosition
- guard $ lastStrPos /= Just pos
- suppress_author <- option False (char '-' >> return True)
- char '@'
- first <- letter <|> char '_'
- let regchar = satisfy (\c -> isAlphaNum c || c == '_')
- let internal p = try $ p >>~ lookAhead regchar
- rest <- many $ regchar <|> internal (oneOf ":.#$%&-+?<>~/")
- let key = first:rest
- return (suppress_author, key)
-
suffix :: MarkdownParser (F Inlines)
suffix = try $ do
hasSpace <- option False (notFollowedBy nonspaceChar >> return True)
diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs
index 2e4a29beb..86dda2732 100644
--- a/src/Text/Pandoc/Readers/Org.hs
+++ b/src/Text/Pandoc/Readers/Org.hs
@@ -105,6 +105,10 @@ instance HasMeta OrgParserState where
deleteMeta field st =
st{ orgStateMeta = deleteMeta field $ orgStateMeta st }
+instance HasLastStrPosition OrgParserState where
+ getLastStrPos = orgStateLastStrPos
+ setLastStrPos pos st = st{ orgStateLastStrPos = Just pos }
+
instance Default OrgParserState where
def = defaultOrgParserState
@@ -865,6 +869,7 @@ inline :: OrgParser (F Inlines)
inline =
choice [ whitespace
, linebreak
+ , cite
, footnote
, linkOrImage
, anchor
@@ -929,6 +934,51 @@ endline = try $ do
updateLastPreCharPos
return . return $ B.space
+cite :: OrgParser (F Inlines)
+cite = try $ do
+ guardEnabled Ext_citations
+ (cs, raw) <- withRaw normalCite
+ return $ (flip B.cite (B.text raw)) <$> cs
+
+normalCite :: OrgParser (F [Citation])
+normalCite = try $ char '['
+ *> skipSpaces
+ *> citeList
+ <* skipSpaces
+ <* char ']'
+
+citeList :: OrgParser (F [Citation])
+citeList = sequence <$> sepBy1 citation (try $ char ';' *> skipSpaces)
+
+citation :: OrgParser (F Citation)
+citation = try $ do
+ pref <- prefix
+ (suppress_author, key) <- citeKey
+ suff <- suffix
+ return $ do
+ x <- pref
+ y <- suff
+ return $ Citation{ citationId = key
+ , citationPrefix = B.toList x
+ , citationSuffix = B.toList y
+ , citationMode = if suppress_author
+ then SuppressAuthor
+ else NormalCitation
+ , citationNoteNum = 0
+ , citationHash = 0
+ }
+ where
+ prefix = trimInlinesF . mconcat <$>
+ manyTill inline (char ']' <|> (']' <$ lookAhead citeKey))
+ suffix = try $ do
+ hasSpace <- option False (notFollowedBy nonspaceChar >> return True)
+ skipSpaces
+ rest <- trimInlinesF . mconcat <$>
+ many (notFollowedBy (oneOf ";]") *> inline)
+ return $ if hasSpace
+ then (B.space <>) <$> rest
+ else rest
+
footnote :: OrgParser (F Inlines)
footnote = try $ inlineNote <|> referencedNote
@@ -1003,7 +1053,7 @@ selfTarget :: OrgParser String
selfTarget = try $ char '[' *> linkTarget <* char ']'
linkTarget :: OrgParser String
-linkTarget = enclosed (char '[') (char ']') (noneOf "\n\r[]")
+linkTarget = enclosedByPair '[' ']' (noneOf "\n\r[]")
applyCustomLinkFormat :: String -> OrgParser (F String)
applyCustomLinkFormat link = do
@@ -1079,7 +1129,12 @@ inlineCodeBlock = try $ do
let attrClasses = [translateLang lang, rundocBlockClass]
let attrKeyVal = map toRundocAttrib (("language", lang) : opts)
returnF $ B.codeWith ("", attrClasses, attrKeyVal) inlineCode
- where enclosedByPair s e p = char s *> many1Till p (char e)
+
+enclosedByPair :: Char -- ^ opening char
+ -> Char -- ^ closing char
+ -> OrgParser a -- ^ parser
+ -> OrgParser [a]
+enclosedByPair s e p = char s *> many1Till p (char e)
emph :: OrgParser (F Inlines)
emph = fmap B.emph <$> emphasisBetween '/'
@@ -1274,13 +1329,6 @@ afterEmphasisPreChar = do
lastPrePos <- orgStateLastPreCharPos <$> getState
return . fromMaybe True $ (== pos) <$> lastPrePos
--- | Whether we are right after the end of a string
-notAfterString :: OrgParser Bool
-notAfterString = do
- pos <- getPosition
- lastStrPos <- orgStateLastStrPos <$> getState
- return $ lastStrPos /= Just pos
-
-- | Whether the parser is right after a forbidden border char
notAfterForbiddenBorderChar :: OrgParser Bool
notAfterForbiddenBorderChar = do