5 files changed, 79 insertions, 51 deletions
diff --git a/src/Text/Pandoc/Readers/MediaWiki.hs b/src/Text/Pandoc/Readers/MediaWiki.hs
index d3cee08e2..dcf0c5f4a 100644
--- a/src/Text/Pandoc/Readers/MediaWiki.hs
+++ b/src/Text/Pandoc/Readers/MediaWiki.hs
@@ -253,7 +253,7 @@ parseAttr = try $ do
   k <- many1 letter
   char '='
   v <- (char '"' >> many1Till (satisfy (/='\n')) (char '"'))
-       <|> many1 nonspaceChar
+       <|> many1 (satisfy $ \c -> not (isSpace c) && c /= '|')
   return (k,v)
 
 tableStart :: MWParser ()
@@ -376,11 +376,17 @@ preformatted = try $ do
       spacesStr _        = False
   if F.all spacesStr contents
      then return mempty
-     else return $ B.para $ walk strToCode contents
-
-strToCode :: Inline -> Inline
-strToCode (Str s) = Code ("",[],[]) s
-strToCode  x      = x
+     else return $ B.para $ encode contents
+
+encode :: Inlines -> Inlines
+encode = B.fromList . normalizeCode . B.toList . walk strToCode
+  where strToCode (Str s) = Code ("",[],[]) s
+        strToCode Space   = Code ("",[],[]) " "
+        strToCode  x      = x
+        normalizeCode []  = []
+        normalizeCode (Code a1 x : Code a2 y : zs) | a1 == a2 =
+          normalizeCode $ (Code a1 (x ++ y)) : zs
+        normalizeCode (x:xs) = x : normalizeCode xs
 
 header :: MWParser Blocks
 header = try $ do
@@ -545,8 +551,8 @@ inlineTag = do
        TagOpen "del" _ -> B.strikeout <$> inlinesInTags "del"
        TagOpen "sub" _ -> B.subscript <$> inlinesInTags "sub"
        TagOpen "sup" _ -> B.superscript <$> inlinesInTags "sup"
-       TagOpen "code" _ -> walk strToCode <$> inlinesInTags "code"
-       TagOpen "tt" _ -> walk strToCode <$> inlinesInTags "tt"
+       TagOpen "code" _ -> encode <$> inlinesInTags "code"
+       TagOpen "tt" _ -> encode <$> inlinesInTags "tt"
        TagOpen "hask" _ -> B.codeWith ("",["haskell"],[]) <$> charsInTags "hask"
        _ -> B.rawInline "html" . snd <$> htmlTag (~== tag)
 
diff --git a/src/Text/Pandoc/Readers/Org/Blocks.hs b/src/Text/Pandoc/Readers/Org/Blocks.hs
index 0bd82ce2f..6a8bb8b28 100644
--- a/src/Text/Pandoc/Readers/Org/Blocks.hs
+++ b/src/Text/Pandoc/Readers/Org/Blocks.hs
@@ -39,8 +39,8 @@ import           Text.Pandoc.Readers.Org.Inlines
 import           Text.Pandoc.Readers.Org.ParserState
 import           Text.Pandoc.Readers.Org.Parsing
 import           Text.Pandoc.Readers.Org.Shared
-                   ( isImageFilename, rundocBlockClass, toRundocAttrib
-                   , translateLang )
+                   ( cleanLinkString, isImageFilename, rundocBlockClass
+                   , toRundocAttrib, translateLang )
 
 import qualified Text.Pandoc.Builder as B
 import           Text.Pandoc.Builder ( Inlines, Blocks )
@@ -422,7 +422,16 @@ verseBlock blockType = try $ do
   ignHeaders
   content <- rawBlockContent blockType
   fmap B.para . mconcat . intersperse (pure B.linebreak)
-    <$> mapM (parseFromString inlines) (map (++ "\n") . lines $ content)
+    <$> mapM parseVerseLine (lines content)
+ where
+   -- replace initial spaces with nonbreaking spaces to preserve
+   -- indentation, parse the rest as normal inline
+   parseVerseLine :: String -> OrgParser (F Inlines)
+   parseVerseLine cs = do
+     let (initialSpaces, indentedLine) = span isSpace cs
+     let nbspIndent = B.str $ map (const '\160') initialSpaces
+     line <- parseFromString inlines (indentedLine ++ "\n")
+     return (pure nbspIndent <> line)
 
 -- | Read a code block and the associated results block if present.  Which of
 -- boths blocks is included in the output is determined using the "exports"
@@ -571,23 +580,33 @@ figure :: OrgParser (F Blocks)
 figure = try $ do
   figAttrs <- blockAttributes
   src <- skipSpaces *> selfTarget <* skipSpaces <* newline
-  guard . not . isNothing . blockAttrCaption $ figAttrs
-  guard (isImageFilename src)
-  let figName    = fromMaybe mempty $ blockAttrName figAttrs
-  let figLabel   = fromMaybe mempty $ blockAttrLabel figAttrs
-  let figCaption = fromMaybe mempty $ blockAttrCaption figAttrs
-  let figKeyVals = blockAttrKeyValues figAttrs
-  let attr       = (figLabel, mempty, figKeyVals)
-  return $ (B.para . B.imageWith attr src (withFigPrefix figName) <$> figCaption)
+  case cleanLinkString src of
+    Nothing     -> mzero
+    Just imgSrc -> do
+      guard (not . isNothing . blockAttrCaption $ figAttrs)
+      guard (isImageFilename imgSrc)
+      return $ figureBlock figAttrs imgSrc
  where
+   selfTarget :: OrgParser String
+   selfTarget = try $ char '[' *> linkTarget <* char ']'
+
+   figureBlock :: BlockAttributes -> String -> (F Blocks)
+   figureBlock figAttrs imgSrc =
+     let
+       figName    = fromMaybe mempty $ blockAttrName figAttrs
+       figLabel   = fromMaybe mempty $ blockAttrLabel figAttrs
+       figCaption = fromMaybe mempty $ blockAttrCaption figAttrs
+       figKeyVals = blockAttrKeyValues figAttrs
+       attr       = (figLabel, mempty, figKeyVals)
+     in
+       B.para . B.imageWith attr imgSrc (withFigPrefix figName) <$> figCaption
+
    withFigPrefix :: String -> String
    withFigPrefix cs =
      if "fig:" `isPrefixOf` cs
      then cs
      else "fig:" ++ cs
 
-   selfTarget :: OrgParser String
-   selfTarget = try $ char '[' *> linkTarget <* char ']'
 
 --
 -- Examples
diff --git a/src/Text/Pandoc/Readers/Org/Inlines.hs b/src/Text/Pandoc/Readers/Org/Inlines.hs
index e1a66a8c7..31f098d27 100644
--- a/src/Text/Pandoc/Readers/Org/Inlines.hs
+++ b/src/Text/Pandoc/Readers/Org/Inlines.hs
@@ -37,8 +37,8 @@ import           Text.Pandoc.Readers.Org.BlockStarts
 import           Text.Pandoc.Readers.Org.ParserState
 import           Text.Pandoc.Readers.Org.Parsing
 import           Text.Pandoc.Readers.Org.Shared
-                   ( isImageFilename, rundocBlockClass, toRundocAttrib
-                   , translateLang )
+                   ( cleanLinkString, isImageFilename, rundocBlockClass
+                   , toRundocAttrib, translateLang )
 
 import qualified Text.Pandoc.Builder as B
 import           Text.Pandoc.Builder ( Inlines )
@@ -52,7 +52,7 @@ import qualified Text.TeXMath.Readers.MathML.EntityMap as MathMLEntityMap
 import           Prelude hiding (sequence)
 import           Control.Monad ( guard, mplus, mzero, when, void )
 import           Data.Char ( isAlphaNum, isSpace )
-import           Data.List ( intersperse, isPrefixOf )
+import           Data.List ( intersperse )
 import           Data.Maybe ( fromMaybe )
 import qualified Data.Map as M
 import           Data.Traversable (sequence)
@@ -435,9 +435,11 @@ explicitOrImageLink = try $ do
   char ']'
   return $ do
     src <- srcF
-    if isImageFilename title
-      then pure $ B.link src "" $ B.image title mempty mempty
-      else linkToInlinesF src =<< title'
+    case cleanLinkString title of
+      Just imgSrc | isImageFilename imgSrc ->
+        pure $ B.link src "" $ B.image imgSrc mempty mempty
+      _ ->
+        linkToInlinesF src =<< title'
 
 selflinkOrImage :: OrgParser (F Inlines)
 selflinkOrImage = try $ do
@@ -482,25 +484,6 @@ linkToInlinesF linkStr =
                                        else pure . B.link cleanedLink ""
                  Nothing -> internalLink linkStr  -- other internal link
 
--- | Cleanup and canonicalize a string describing a link.  Return @Nothing@ if
--- the string does not appear to be a link.
-cleanLinkString :: String -> Maybe String
-cleanLinkString s =
-  case s of
-    '/':_                  -> Just $ "file://" ++ s  -- absolute path
-    '.':'/':_              -> Just s                 -- relative path
-    '.':'.':'/':_          -> Just s                 -- relative path
-    -- Relative path or URL (file schema)
-    'f':'i':'l':'e':':':s' -> Just $ if ("//" `isPrefixOf` s') then s else s'
-    _ | isUrl s            -> Just s                 -- URL
-    _                      -> Nothing
- where
-   isUrl :: String -> Bool
-   isUrl cs =
-     let (scheme, path) = break (== ':') cs
-     in all (\c -> isAlphaNum c || c `elem` (".-"::String)) scheme
-          && not (null path)
-
 internalLink :: String -> Inlines -> F Inlines
 internalLink link title = do
   anchorB <- (link `elem`) <$> asksF orgStateAnchorIds
diff --git a/src/Text/Pandoc/Readers/Org/Shared.hs b/src/Text/Pandoc/Readers/Org/Shared.hs
index 3ba46b9e4..8c87cfa25 100644
--- a/src/Text/Pandoc/Readers/Org/Shared.hs
+++ b/src/Text/Pandoc/Readers/Org/Shared.hs
@@ -27,13 +27,15 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 Utility functions used in other Pandoc Org modules.
 -}
 module Text.Pandoc.Readers.Org.Shared
-  ( isImageFilename
+  ( cleanLinkString
+  , isImageFilename
   , rundocBlockClass
   , toRundocAttrib
   , translateLang
   ) where
 
 import           Control.Arrow ( first )
+import           Data.Char ( isAlphaNum )
 import           Data.List ( isPrefixOf, isSuffixOf )
 
 
@@ -41,12 +43,31 @@ import           Data.List ( isPrefixOf, isSuffixOf )
 isImageFilename :: String -> Bool
 isImageFilename filename =
   any (\x -> ('.':x)  `isSuffixOf` filename) imageExtensions &&
-  (any (\x -> (x++":") `isPrefixOf` filename) protocols ||
+  (any (\x -> (x ++ "://") `isPrefixOf` filename) protocols ||
    ':' `notElem` filename)
  where
    imageExtensions = [ "jpeg" , "jpg" , "png" , "gif" , "svg" ]
    protocols = [ "file", "http", "https" ]
 
+-- | Cleanup and canonicalize a string describing a link.  Return @Nothing@ if
+-- the string does not appear to be a link.
+cleanLinkString :: String -> Maybe String
+cleanLinkString s =
+  case s of
+    '/':_                  -> Just $ "file://" ++ s  -- absolute path
+    '.':'/':_              -> Just s                 -- relative path
+    '.':'.':'/':_          -> Just s                 -- relative path
+    -- Relative path or URL (file schema)
+    'f':'i':'l':'e':':':s' -> Just $ if ("//" `isPrefixOf` s') then s else s'
+    _ | isUrl s            -> Just s                 -- URL
+    _                      -> Nothing
+ where
+   isUrl :: String -> Bool
+   isUrl cs =
+     let (scheme, path) = break (== ':') cs
+     in all (\c -> isAlphaNum c || c `elem` (".-"::String)) scheme
+          && not (null path)
+
 -- | Prefix used for Rundoc classes and arguments.
 rundocPrefix :: String
 rundocPrefix = "rundoc-"
diff --git a/src/Text/Pandoc/Readers/Textile.hs b/src/Text/Pandoc/Readers/Textile.hs
index 4ab0243fe..a4de85dfb 100644
--- a/src/Text/Pandoc/Readers/Textile.hs
+++ b/src/Text/Pandoc/Readers/Textile.hs
@@ -60,7 +60,7 @@ import Text.Pandoc.Parsing
 import Text.Pandoc.Readers.HTML ( htmlTag, isBlockTag, isInlineTag )
 import Text.Pandoc.Shared (trim)
 import Text.Pandoc.Readers.LaTeX ( rawLaTeXInline, rawLaTeXBlock )
-import Text.HTML.TagSoup (parseTags, innerText, fromAttrib, Tag(..))
+import Text.HTML.TagSoup (fromAttrib, Tag(..))
 import Text.HTML.TagSoup.Match
 import Data.List ( intercalate, transpose, intersperse )
 import Data.Char ( digitToInt, isUpper )
@@ -182,8 +182,7 @@ trimTrailingNewlines = reverse . dropWhile (=='\n') . reverse
 codeBlockPre :: Parser [Char] ParserState Blocks
 codeBlockPre = try $ do
   (t@(TagOpen _ attrs),_) <- htmlTag (tagOpen (=="pre") (const True))
-  result' <- (innerText . parseTags) `fmap` -- remove internal tags
-               manyTill anyChar (htmlTag (tagClose (=="pre")))
+  result' <- manyTill anyChar (htmlTag (tagClose (=="pre")))
   optional blanklines
   -- drop leading newline if any
   let result'' = case result' of