summaryrefslogtreecommitdiff
path: root/src/Text/Pandoc
diff options
context:
space:
mode:
authorJohn MacFarlane <jgm@berkeley.edu>2014-04-25 08:08:00 -0700
committerJohn MacFarlane <jgm@berkeley.edu>2014-04-25 08:08:00 -0700
commit60297089f642818599b925c5e3bd7ecdfbf93c1d (patch)
tree5466dcff5cd5529eadca899577da4c918d85f228 /src/Text/Pandoc
parentcbeb3bb2132908b76e3a83e61ff99418ebdf83b4 (diff)
parentb09412d852880a0c8e18e1cab9b0ce33f0e0e8a2 (diff)
Merge pull request #1265 from tarleb/org-links
Improvements handling of internal links
Diffstat (limited to 'src/Text/Pandoc')
-rw-r--r--src/Text/Pandoc/Readers/Org.hs93
-rw-r--r--src/Text/Pandoc/Readers/RST.hs2
-rw-r--r--src/Text/Pandoc/Writers/LaTeX.hs18
3 files changed, 92 insertions, 21 deletions
diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs
index c71cc24be..0e52bff90 100644
--- a/src/Text/Pandoc/Readers/Org.hs
+++ b/src/Text/Pandoc/Readers/Org.hs
@@ -45,7 +45,7 @@ import Control.Applicative ( Applicative, pure
, (<$>), (<$), (<*>), (<*), (*>), (<**>) )
import Control.Monad (foldM, guard, liftM, liftM2, when)
import Control.Monad.Reader (Reader, runReader, ask, asks)
-import Data.Char (toLower)
+import Data.Char (isAlphaNum, toLower)
import Data.Default
import Data.List (intersperse, isPrefixOf, isSuffixOf)
import qualified Data.Map as M
@@ -79,6 +79,7 @@ type OrgBlockAttributes = M.Map String String
-- | Org-mode parser state
data OrgParserState = OrgParserState
{ orgStateOptions :: ReaderOptions
+ , orgStateAnchorIds :: [String]
, orgStateBlockAttributes :: OrgBlockAttributes
, orgStateEmphasisCharStack :: [Char]
, orgStateEmphasisNewlines :: Maybe Int
@@ -105,6 +106,7 @@ instance Default OrgParserState where
defaultOrgParserState :: OrgParserState
defaultOrgParserState = OrgParserState
{ orgStateOptions = def
+ , orgStateAnchorIds = []
, orgStateBlockAttributes = M.empty
, orgStateEmphasisCharStack = []
, orgStateEmphasisNewlines = Nothing
@@ -116,6 +118,10 @@ defaultOrgParserState = OrgParserState
, orgStateNotes' = []
}
+recordAnchorId :: String -> OrgParser ()
+recordAnchorId i = updateState $ \s ->
+ s{ orgStateAnchorIds = i : (orgStateAnchorIds s) }
+
addBlockAttribute :: String -> String -> OrgParser ()
addBlockAttribute key val = updateState $ \s ->
let attrs = orgStateBlockAttributes s
@@ -209,6 +215,9 @@ instance Monoid a => Monoid (F a) where
trimInlinesF :: F Inlines -> F Inlines
trimInlinesF = liftM trimInlines
+returnF :: a -> OrgParser (F a)
+returnF = return . return
+
-- | Like @Text.Parsec.Char.newline@, but causes additional state changes.
newline :: OrgParser Char
@@ -291,9 +300,6 @@ orgBlock = try $ do
"src" -> codeBlockWithAttr classArgs content
_ -> return $ B.divWith ("", [blockType], []) <$> contentBlocks
where
- returnF :: a -> OrgParser (F a)
- returnF = return . return
-
parseVerse :: String -> OrgParser (F Blocks)
parseVerse cs =
fmap B.para . mconcat . intersperse (pure B.linebreak)
@@ -740,6 +746,7 @@ inline =
, linebreak
, footnote
, linkOrImage
+ , anchor
, str
, endline
, emph
@@ -834,7 +841,11 @@ noteMarker = try $ do
]
linkOrImage :: OrgParser (F Inlines)
-linkOrImage = explicitOrImageLink <|> selflinkOrImage <?> "link or image"
+linkOrImage = explicitOrImageLink
+ <|> selflinkOrImage
+ <|> angleLink
+ <|> plainLink
+ <?> "link or image"
explicitOrImageLink :: OrgParser (F Inlines)
explicitOrImageLink = try $ do
@@ -843,23 +854,52 @@ explicitOrImageLink = try $ do
title <- enclosedRaw (char '[') (char ']')
title' <- parseFromString (mconcat <$> many inline) title
char ']'
- return $ B.link src "" <$>
- if isImageFilename src && isImageFilename title
- then return $ B.image title mempty mempty
- else title'
+ return $ if isImageFilename src && isImageFilename title
+ then pure $ B.link src "" $ B.image title mempty mempty
+ else linkToInlinesF src =<< title'
selflinkOrImage :: OrgParser (F Inlines)
selflinkOrImage = try $ do
src <- char '[' *> linkTarget <* char ']'
- return . return $ if isImageFilename src
- then B.image src "" ""
- else B.link src "" (B.str src)
+ return $ linkToInlinesF src (B.str src)
+
+plainLink :: OrgParser (F Inlines)
+plainLink = try $ do
+ (orig, src) <- uri
+ returnF $ B.link src "" (B.str orig)
+
+angleLink :: OrgParser (F Inlines)
+angleLink = try $ do
+ char '<'
+ link <- plainLink
+ char '>'
+ return link
selfTarget :: OrgParser String
selfTarget = try $ char '[' *> linkTarget <* char ']'
linkTarget :: OrgParser String
-linkTarget = enclosed (char '[') (char ']') (noneOf "\n\r]")
+linkTarget = enclosed (char '[') (char ']') (noneOf "\n\r[]")
+
+linkToInlinesF :: String -> Inlines -> F Inlines
+linkToInlinesF s@('#':_) = pure . B.link s ""
+linkToInlinesF s
+ | isImageFilename s = const . pure $ B.image s "" ""
+ | isUri s = pure . B.link s ""
+ | isRelativeUrl s = pure . B.link s ""
+linkToInlinesF s = \title -> do
+ anchorB <- (s `elem`) <$> asksF orgStateAnchorIds
+ if anchorB
+ then pure $ B.link ('#':s) "" title
+ else pure $ B.emph title
+
+isRelativeUrl :: String -> Bool
+isRelativeUrl s = (':' `notElem` s) && ("./" `isPrefixOf` s)
+
+isUri :: String -> Bool
+isUri s = let (scheme, path) = break (== ':') s
+ in all (\c -> isAlphaNum c || c `elem` ".-") scheme
+ && not (null path)
isImageFilename :: String -> Bool
isImageFilename filename =
@@ -870,6 +910,33 @@ isImageFilename filename =
imageExtensions = [ "jpeg" , "jpg" , "png" , "gif" , "svg" ]
protocols = [ "file", "http", "https" ]
+-- | Parse an anchor like @<<anchor-id>>@ and return an empty span with
+-- @anchor-id@ set as id. Legal anchors in org-mode are defined through
+-- @org-target-regexp@, which is fairly liberal. Since no link is created if
+-- @anchor-id@ contains spaces, we are more restrictive in what is accepted as
+-- an anchor.
+
+anchor :: OrgParser (F Inlines)
+anchor = try $ do
+ anchorId <- parseAnchor
+ recordAnchorId anchorId
+ returnF $ B.spanWith (solidify anchorId, [], []) mempty
+ where
+ parseAnchor = string "<<"
+ *> many1 (noneOf "\t\n\r<>\"' ")
+ <* string ">>"
+ <* skipSpaces
+
+-- | Replace every char but [a-zA-Z0-9_.-:] with a hypen '-'. This mirrors
+-- the org function @org-export-solidify-link-text@.
+
+solidify :: String -> String
+solidify = map replaceSpecialChar
+ where replaceSpecialChar c
+ | isAlphaNum c = c
+ | c `elem` "_.-:" = c
+ | otherwise = '-'
+
emph :: OrgParser (F Inlines)
emph = fmap B.emph <$> emphasisBetween '/'
diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs
index a574f343a..7785861cc 100644
--- a/src/Text/Pandoc/Readers/RST.hs
+++ b/src/Text/Pandoc/Readers/RST.hs
@@ -1005,7 +1005,7 @@ renderRole contents fmt role attr = case role of
where rfcUrl = "http://www.faqs.org/rfcs/rfc" ++ rfcNo ++ ".html"
pepLink pepNo = B.link pepUrl ("PEP " ++ pepNo) $ B.str ("PEP " ++ pepNo)
where padNo = replicate (4 - length pepNo) '0' ++ pepNo
- pepUrl = "http://http://www.python.org/dev/peps/pep-" ++ padNo ++ "/"
+ pepUrl = "http://www.python.org/dev/peps/pep-" ++ padNo ++ "/"
roleNameEndingIn :: RSTParser Char -> RSTParser String
roleNameEndingIn end = many1Till (letter <|> char '-') end
diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs
index e12c9078f..e52220f01 100644
--- a/src/Text/Pandoc/Writers/LaTeX.hs
+++ b/src/Text/Pandoc/Writers/LaTeX.hs
@@ -655,16 +655,20 @@ isQuoted _ = False
-- | Convert inline element to LaTeX
inlineToLaTeX :: Inline -- ^ Inline to convert
-> State WriterState Doc
-inlineToLaTeX (Span (_,classes,_) ils) = do
+inlineToLaTeX (Span (id',classes,_) ils) = do
let noEmph = "csl-no-emph" `elem` classes
let noStrong = "csl-no-strong" `elem` classes
let noSmallCaps = "csl-no-smallcaps" `elem` classes
- ((if noEmph then inCmd "textup" else id) .
- (if noStrong then inCmd "textnormal" else id) .
- (if noSmallCaps then inCmd "textnormal" else id) .
- (if not (noEmph || noStrong || noSmallCaps)
- then braces
- else id)) `fmap` inlineListToLaTeX ils
+ let label' = if (null id')
+ then empty
+ else text "\\label" <> braces (text $ toLabel id')
+ fmap (label' <>)
+ ((if noEmph then inCmd "textup" else id) .
+ (if noStrong then inCmd "textnormal" else id) .
+ (if noSmallCaps then inCmd "textnormal" else id) .
+ (if not (noEmph || noStrong || noSmallCaps)
+ then braces
+ else id)) `fmap` inlineListToLaTeX ils
inlineToLaTeX (Emph lst) =
inlineListToLaTeX lst >>= return . inCmd "emph"
inlineToLaTeX (Strong lst) =