summaryrefslogtreecommitdiff
path: root/src/Text/Pandoc/Readers
diff options
context:
space:
mode:
Diffstat (limited to 'src/Text/Pandoc/Readers')
-rw-r--r--src/Text/Pandoc/Readers/MediaWiki.hs22
-rw-r--r--src/Text/Pandoc/Readers/Org/Blocks.hs45
-rw-r--r--src/Text/Pandoc/Readers/Org/Inlines.hs33
-rw-r--r--src/Text/Pandoc/Readers/Org/Shared.hs25
-rw-r--r--src/Text/Pandoc/Readers/Textile.hs5
5 files changed, 79 insertions, 51 deletions
diff --git a/src/Text/Pandoc/Readers/MediaWiki.hs b/src/Text/Pandoc/Readers/MediaWiki.hs
index d3cee08e2..dcf0c5f4a 100644
--- a/src/Text/Pandoc/Readers/MediaWiki.hs
+++ b/src/Text/Pandoc/Readers/MediaWiki.hs
@@ -253,7 +253,7 @@ parseAttr = try $ do
k <- many1 letter
char '='
v <- (char '"' >> many1Till (satisfy (/='\n')) (char '"'))
- <|> many1 nonspaceChar
+ <|> many1 (satisfy $ \c -> not (isSpace c) && c /= '|')
return (k,v)
tableStart :: MWParser ()
@@ -376,11 +376,17 @@ preformatted = try $ do
spacesStr _ = False
if F.all spacesStr contents
then return mempty
- else return $ B.para $ walk strToCode contents
-
-strToCode :: Inline -> Inline
-strToCode (Str s) = Code ("",[],[]) s
-strToCode x = x
+ else return $ B.para $ encode contents
+
+encode :: Inlines -> Inlines
+encode = B.fromList . normalizeCode . B.toList . walk strToCode
+ where strToCode (Str s) = Code ("",[],[]) s
+ strToCode Space = Code ("",[],[]) " "
+ strToCode x = x
+ normalizeCode [] = []
+ normalizeCode (Code a1 x : Code a2 y : zs) | a1 == a2 =
+ normalizeCode $ (Code a1 (x ++ y)) : zs
+ normalizeCode (x:xs) = x : normalizeCode xs
header :: MWParser Blocks
header = try $ do
@@ -545,8 +551,8 @@ inlineTag = do
TagOpen "del" _ -> B.strikeout <$> inlinesInTags "del"
TagOpen "sub" _ -> B.subscript <$> inlinesInTags "sub"
TagOpen "sup" _ -> B.superscript <$> inlinesInTags "sup"
- TagOpen "code" _ -> walk strToCode <$> inlinesInTags "code"
- TagOpen "tt" _ -> walk strToCode <$> inlinesInTags "tt"
+ TagOpen "code" _ -> encode <$> inlinesInTags "code"
+ TagOpen "tt" _ -> encode <$> inlinesInTags "tt"
TagOpen "hask" _ -> B.codeWith ("",["haskell"],[]) <$> charsInTags "hask"
_ -> B.rawInline "html" . snd <$> htmlTag (~== tag)
diff --git a/src/Text/Pandoc/Readers/Org/Blocks.hs b/src/Text/Pandoc/Readers/Org/Blocks.hs
index 0bd82ce2f..6a8bb8b28 100644
--- a/src/Text/Pandoc/Readers/Org/Blocks.hs
+++ b/src/Text/Pandoc/Readers/Org/Blocks.hs
@@ -39,8 +39,8 @@ import Text.Pandoc.Readers.Org.Inlines
import Text.Pandoc.Readers.Org.ParserState
import Text.Pandoc.Readers.Org.Parsing
import Text.Pandoc.Readers.Org.Shared
- ( isImageFilename, rundocBlockClass, toRundocAttrib
- , translateLang )
+ ( cleanLinkString, isImageFilename, rundocBlockClass
+ , toRundocAttrib, translateLang )
import qualified Text.Pandoc.Builder as B
import Text.Pandoc.Builder ( Inlines, Blocks )
@@ -422,7 +422,16 @@ verseBlock blockType = try $ do
ignHeaders
content <- rawBlockContent blockType
fmap B.para . mconcat . intersperse (pure B.linebreak)
- <$> mapM (parseFromString inlines) (map (++ "\n") . lines $ content)
+ <$> mapM parseVerseLine (lines content)
+ where
+ -- replace initial spaces with nonbreaking spaces to preserve
+ -- indentation, parse the rest as normal inline
+ parseVerseLine :: String -> OrgParser (F Inlines)
+ parseVerseLine cs = do
+ let (initialSpaces, indentedLine) = span isSpace cs
+ let nbspIndent = B.str $ map (const '\160') initialSpaces
+ line <- parseFromString inlines (indentedLine ++ "\n")
+ return (pure nbspIndent <> line)
-- | Read a code block and the associated results block if present. Which of
-- boths blocks is included in the output is determined using the "exports"
@@ -571,23 +580,33 @@ figure :: OrgParser (F Blocks)
figure = try $ do
figAttrs <- blockAttributes
src <- skipSpaces *> selfTarget <* skipSpaces <* newline
- guard . not . isNothing . blockAttrCaption $ figAttrs
- guard (isImageFilename src)
- let figName = fromMaybe mempty $ blockAttrName figAttrs
- let figLabel = fromMaybe mempty $ blockAttrLabel figAttrs
- let figCaption = fromMaybe mempty $ blockAttrCaption figAttrs
- let figKeyVals = blockAttrKeyValues figAttrs
- let attr = (figLabel, mempty, figKeyVals)
- return $ (B.para . B.imageWith attr src (withFigPrefix figName) <$> figCaption)
+ case cleanLinkString src of
+ Nothing -> mzero
+ Just imgSrc -> do
+ guard (not . isNothing . blockAttrCaption $ figAttrs)
+ guard (isImageFilename imgSrc)
+ return $ figureBlock figAttrs imgSrc
where
+ selfTarget :: OrgParser String
+ selfTarget = try $ char '[' *> linkTarget <* char ']'
+
+ figureBlock :: BlockAttributes -> String -> (F Blocks)
+ figureBlock figAttrs imgSrc =
+ let
+ figName = fromMaybe mempty $ blockAttrName figAttrs
+ figLabel = fromMaybe mempty $ blockAttrLabel figAttrs
+ figCaption = fromMaybe mempty $ blockAttrCaption figAttrs
+ figKeyVals = blockAttrKeyValues figAttrs
+ attr = (figLabel, mempty, figKeyVals)
+ in
+ B.para . B.imageWith attr imgSrc (withFigPrefix figName) <$> figCaption
+
withFigPrefix :: String -> String
withFigPrefix cs =
if "fig:" `isPrefixOf` cs
then cs
else "fig:" ++ cs
- selfTarget :: OrgParser String
- selfTarget = try $ char '[' *> linkTarget <* char ']'
--
-- Examples
diff --git a/src/Text/Pandoc/Readers/Org/Inlines.hs b/src/Text/Pandoc/Readers/Org/Inlines.hs
index e1a66a8c7..31f098d27 100644
--- a/src/Text/Pandoc/Readers/Org/Inlines.hs
+++ b/src/Text/Pandoc/Readers/Org/Inlines.hs
@@ -37,8 +37,8 @@ import Text.Pandoc.Readers.Org.BlockStarts
import Text.Pandoc.Readers.Org.ParserState
import Text.Pandoc.Readers.Org.Parsing
import Text.Pandoc.Readers.Org.Shared
- ( isImageFilename, rundocBlockClass, toRundocAttrib
- , translateLang )
+ ( cleanLinkString, isImageFilename, rundocBlockClass
+ , toRundocAttrib, translateLang )
import qualified Text.Pandoc.Builder as B
import Text.Pandoc.Builder ( Inlines )
@@ -52,7 +52,7 @@ import qualified Text.TeXMath.Readers.MathML.EntityMap as MathMLEntityMap
import Prelude hiding (sequence)
import Control.Monad ( guard, mplus, mzero, when, void )
import Data.Char ( isAlphaNum, isSpace )
-import Data.List ( intersperse, isPrefixOf )
+import Data.List ( intersperse )
import Data.Maybe ( fromMaybe )
import qualified Data.Map as M
import Data.Traversable (sequence)
@@ -435,9 +435,11 @@ explicitOrImageLink = try $ do
char ']'
return $ do
src <- srcF
- if isImageFilename title
- then pure $ B.link src "" $ B.image title mempty mempty
- else linkToInlinesF src =<< title'
+ case cleanLinkString title of
+ Just imgSrc | isImageFilename imgSrc ->
+ pure $ B.link src "" $ B.image imgSrc mempty mempty
+ _ ->
+ linkToInlinesF src =<< title'
selflinkOrImage :: OrgParser (F Inlines)
selflinkOrImage = try $ do
@@ -482,25 +484,6 @@ linkToInlinesF linkStr =
else pure . B.link cleanedLink ""
Nothing -> internalLink linkStr -- other internal link
--- | Cleanup and canonicalize a string describing a link. Return @Nothing@ if
--- the string does not appear to be a link.
-cleanLinkString :: String -> Maybe String
-cleanLinkString s =
- case s of
- '/':_ -> Just $ "file://" ++ s -- absolute path
- '.':'/':_ -> Just s -- relative path
- '.':'.':'/':_ -> Just s -- relative path
- -- Relative path or URL (file schema)
- 'f':'i':'l':'e':':':s' -> Just $ if ("//" `isPrefixOf` s') then s else s'
- _ | isUrl s -> Just s -- URL
- _ -> Nothing
- where
- isUrl :: String -> Bool
- isUrl cs =
- let (scheme, path) = break (== ':') cs
- in all (\c -> isAlphaNum c || c `elem` (".-"::String)) scheme
- && not (null path)
-
internalLink :: String -> Inlines -> F Inlines
internalLink link title = do
anchorB <- (link `elem`) <$> asksF orgStateAnchorIds
diff --git a/src/Text/Pandoc/Readers/Org/Shared.hs b/src/Text/Pandoc/Readers/Org/Shared.hs
index 3ba46b9e4..8c87cfa25 100644
--- a/src/Text/Pandoc/Readers/Org/Shared.hs
+++ b/src/Text/Pandoc/Readers/Org/Shared.hs
@@ -27,13 +27,15 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
Utility functions used in other Pandoc Org modules.
-}
module Text.Pandoc.Readers.Org.Shared
- ( isImageFilename
+ ( cleanLinkString
+ , isImageFilename
, rundocBlockClass
, toRundocAttrib
, translateLang
) where
import Control.Arrow ( first )
+import Data.Char ( isAlphaNum )
import Data.List ( isPrefixOf, isSuffixOf )
@@ -41,12 +43,31 @@ import Data.List ( isPrefixOf, isSuffixOf )
isImageFilename :: String -> Bool
isImageFilename filename =
any (\x -> ('.':x) `isSuffixOf` filename) imageExtensions &&
- (any (\x -> (x++":") `isPrefixOf` filename) protocols ||
+ (any (\x -> (x ++ "://") `isPrefixOf` filename) protocols ||
':' `notElem` filename)
where
imageExtensions = [ "jpeg" , "jpg" , "png" , "gif" , "svg" ]
protocols = [ "file", "http", "https" ]
+-- | Cleanup and canonicalize a string describing a link. Return @Nothing@ if
+-- the string does not appear to be a link.
+cleanLinkString :: String -> Maybe String
+cleanLinkString s =
+ case s of
+ '/':_ -> Just $ "file://" ++ s -- absolute path
+ '.':'/':_ -> Just s -- relative path
+ '.':'.':'/':_ -> Just s -- relative path
+ -- Relative path or URL (file schema)
+ 'f':'i':'l':'e':':':s' -> Just $ if ("//" `isPrefixOf` s') then s else s'
+ _ | isUrl s -> Just s -- URL
+ _ -> Nothing
+ where
+ isUrl :: String -> Bool
+ isUrl cs =
+ let (scheme, path) = break (== ':') cs
+ in all (\c -> isAlphaNum c || c `elem` (".-"::String)) scheme
+ && not (null path)
+
-- | Prefix used for Rundoc classes and arguments.
rundocPrefix :: String
rundocPrefix = "rundoc-"
diff --git a/src/Text/Pandoc/Readers/Textile.hs b/src/Text/Pandoc/Readers/Textile.hs
index 4ab0243fe..a4de85dfb 100644
--- a/src/Text/Pandoc/Readers/Textile.hs
+++ b/src/Text/Pandoc/Readers/Textile.hs
@@ -60,7 +60,7 @@ import Text.Pandoc.Parsing
import Text.Pandoc.Readers.HTML ( htmlTag, isBlockTag, isInlineTag )
import Text.Pandoc.Shared (trim)
import Text.Pandoc.Readers.LaTeX ( rawLaTeXInline, rawLaTeXBlock )
-import Text.HTML.TagSoup (parseTags, innerText, fromAttrib, Tag(..))
+import Text.HTML.TagSoup (fromAttrib, Tag(..))
import Text.HTML.TagSoup.Match
import Data.List ( intercalate, transpose, intersperse )
import Data.Char ( digitToInt, isUpper )
@@ -182,8 +182,7 @@ trimTrailingNewlines = reverse . dropWhile (=='\n') . reverse
codeBlockPre :: Parser [Char] ParserState Blocks
codeBlockPre = try $ do
(t@(TagOpen _ attrs),_) <- htmlTag (tagOpen (=="pre") (const True))
- result' <- (innerText . parseTags) `fmap` -- remove internal tags
- manyTill anyChar (htmlTag (tagClose (=="pre")))
+ result' <- manyTill anyChar (htmlTag (tagClose (=="pre")))
optional blanklines
-- drop leading newline if any
let result'' = case result' of