12 files changed, 136 insertions, 61 deletions
diff --git a/src/Text/Pandoc.hs b/src/Text/Pandoc.hs
index a4d963221..d7311d978 100644
--- a/src/Text/Pandoc.hs
+++ b/src/Text/Pandoc.hs
@@ -320,14 +320,14 @@ getDefaultExtensions "markdown_mmd" = multimarkdownExtensions
 getDefaultExtensions "markdown_github" = githubMarkdownExtensions
 getDefaultExtensions "markdown"        = pandocExtensions
 getDefaultExtensions "plain"           = plainExtensions
-getDefaultExtensions "org"             = Set.fromList [Ext_citations]
+getDefaultExtensions "org"             = Set.fromList [Ext_citations,
+                                                       Ext_auto_identifiers]
 getDefaultExtensions "textile"         = Set.fromList [Ext_auto_identifiers]
 getDefaultExtensions "html"            = Set.fromList [Ext_auto_identifiers,
                                                        Ext_native_divs,
                                                        Ext_native_spans]
 getDefaultExtensions "html5"           = getDefaultExtensions "html"
-getDefaultExtensions "epub"            = Set.fromList [Ext_auto_identifiers,
-                                                       Ext_raw_html,
+getDefaultExtensions "epub"            = Set.fromList [Ext_raw_html,
                                                        Ext_native_divs,
                                                        Ext_native_spans,
                                                        Ext_epub_html_exts]
diff --git a/src/Text/Pandoc/CSS.hs b/src/Text/Pandoc/CSS.hs
new file mode 100644
index 000000000..32a5ea129
--- /dev/null
+++ b/src/Text/Pandoc/CSS.hs
@@ -0,0 +1,35 @@
+module Text.Pandoc.CSS ( foldOrElse,
+                         pickStyleAttrProps
+                       )
+where
+
+import Text.Pandoc.Shared (trim)
+import Text.Parsec
+import Text.Parsec.String
+import Control.Applicative ((<*))
+
+ruleParser :: Parser (String, String)
+ruleParser = do
+    p <- many1 (noneOf ":")  <* char ':'
+    v <- many1 (noneOf ":;") <* char ';' <* spaces
+    return (trim p, trim v)
+
+styleAttrParser :: Parser [(String, String)]
+styleAttrParser = do
+    p <- many1 ruleParser
+    return p
+
+orElse :: Eq a => a -> a -> a -> a
+orElse v x y = if v == x then y else x
+
+foldOrElse :: Eq a => a -> [a] -> a
+foldOrElse v xs = foldr (orElse v) v xs
+
+eitherToMaybe :: Either a b -> Maybe b
+eitherToMaybe (Right x) = Just x
+eitherToMaybe _ = Nothing
+
+pickStyleAttrProps :: [String] -> String -> Maybe String
+pickStyleAttrProps lookupProps styleAttr = do
+    styles <- eitherToMaybe $ parse styleAttrParser "" styleAttr
+    foldOrElse Nothing $ map (flip lookup styles) lookupProps
diff --git a/src/Text/Pandoc/Readers/DocBook.hs b/src/Text/Pandoc/Readers/DocBook.hs
index 3cc2a4479..352b94496 100644
--- a/src/Text/Pandoc/Readers/DocBook.hs
+++ b/src/Text/Pandoc/Readers/DocBook.hs
@@ -194,7 +194,7 @@ List of all DocBook tags, with [x] indicating implemented,
 [x] indexterm - A wrapper for terms to be indexed
 [x] info - A wrapper for information about a component or other block. (DocBook v5)
 [x] informalequation - A displayed mathematical equation without a title
-[ ] informalexample - A displayed example without a title
+[x] informalexample - A displayed example without a title
 [ ] informalfigure - A untitled figure
 [ ] informaltable - A table without a title
 [ ] initializer - The initializer for a FieldSynopsis
@@ -611,6 +611,7 @@ isBlockElement (Elem e) = qName (elName e) `elem` blocktags
            "important","caution","note","tip","warning","qandadiv",
            "question","answer","abstract","itemizedlist","orderedlist",
            "variablelist","article","book","table","informaltable",
+           "informalexample",
            "screen","programlisting","example","calloutlist"]
 isBlockElement _ = False
 
@@ -766,6 +767,8 @@ parseBlock (Elem e) =
         "book" -> modify (\st -> st{ dbBook = True }) >>  getBlocks e
         "table" -> parseTable
         "informaltable" -> parseTable
+        "informalexample" -> divWith ("", ["informalexample"], []) <$>
+                             getBlocks e
         "literallayout" -> codeBlockWithLang
         "screen" -> codeBlockWithLang
         "programlisting" -> codeBlockWithLang
diff --git a/src/Text/Pandoc/Readers/EPUB.hs b/src/Text/Pandoc/Readers/EPUB.hs
index 338540533..aefc32e0e 100644
--- a/src/Text/Pandoc/Readers/EPUB.hs
+++ b/src/Text/Pandoc/Readers/EPUB.hs
@@ -181,7 +181,6 @@ getManifest archive = do
 fixInternalReferences :: FilePath -> Pandoc -> Pandoc
 fixInternalReferences pathToFile =
   (walk $ renameImages root)
-  . (walk normalisePath)
   . (walk $ fixBlockIRs filename)
   . (walk $ fixInlineIRs filename)
   where
@@ -196,12 +195,6 @@ fixInlineIRs s (Link t ('#':url, tit)) =
   Link t (addHash s url, tit)
 fixInlineIRs _ v = v
 
-normalisePath :: Inline -> Inline
-normalisePath (Link t (url, tit)) =
-  let (path, uid) = span (/= '#') url in
-  Link t (takeFileName path ++ uid, tit)
-normalisePath s = s
-
 prependHash :: [String] -> Inline -> Inline
 prependHash ps l@(Link is (url, tit))
   | or [s `isPrefixOf` url | s <- ps] =
@@ -223,7 +216,7 @@ fixAttrs s (ident, cs, kvs) = (addHash s ident, filter (not . null) cs, removeEP
 
 addHash :: String -> String -> String
 addHash _ "" = ""
-addHash s ident = s ++ "#" ++ ident
+addHash s ident = takeFileName s ++ "#" ++ ident
 
 removeEPUBAttrs :: [(String, String)] -> [(String, String)]
 removeEPUBAttrs kvs = filter (not . isEPUBAttr) kvs
diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs
index fcba16e04..b32264d61 100644
--- a/src/Text/Pandoc/Readers/HTML.hs
+++ b/src/Text/Pandoc/Readers/HTML.hs
@@ -50,6 +50,7 @@ import Text.Pandoc.Options (ReaderOptions(readerParseRaw, readerTrace)
                                Ext_native_divs, Ext_native_spans))
 import Text.Pandoc.Parsing hiding ((<|>))
 import Text.Pandoc.Walk
+import qualified Data.Map as M
 import Data.Maybe ( fromMaybe, isJust)
 import Data.List ( intercalate, isInfixOf, isPrefixOf, isSuffixOf )
 import Data.Char ( isDigit )
@@ -64,6 +65,7 @@ import Data.Default (Default (..), def)
 import Control.Monad.Reader (Reader,ask, asks, local, runReader)
 import Network.URI (isURI)
 import Text.Pandoc.Error
+import Text.Pandoc.CSS (foldOrElse, pickStyleAttrProps)
 
 import Text.Parsec.Error
 
@@ -74,8 +76,9 @@ readHtml :: ReaderOptions -- ^ Reader options
          -> Either PandocError Pandoc
 readHtml opts inp =
     mapLeft (ParseFailure . getError) . flip runReader def $
-      runParserT parseDoc (HTMLState def{ stateOptions = opts } [] Nothing)
-             "source" tags
+      runParserT parseDoc
+        (HTMLState def{ stateOptions = opts } [] Nothing [] M.empty)
+        "source" tags
     where tags = stripPrefixes . canonicalizeTags $
                    parseTagsOptions parseOptions{ optTagPosition = True } inp
           parseDoc = do
@@ -100,7 +103,9 @@ data HTMLState =
   HTMLState
   {  parserState :: ParserState,
      noteTable   :: [(String, Blocks)],
-     baseHref    :: Maybe String
+     baseHref    :: Maybe String,
+     identifiers :: [String],
+     headerMap   :: M.Map Inlines String
   }
 
 data HTMLLocal = HTMLLocal { quoteContext :: QuoteContext
@@ -252,6 +257,22 @@ pListItem nonItem = do
   let liDiv = maybe mempty (\x -> B.divWith (x, [], []) mempty) (lookup "id" attr)
   (liDiv <>) <$> pInTags "li" block <* skipMany nonItem
 
+parseListStyleType :: String -> ListNumberStyle
+parseListStyleType "lower-roman" = LowerRoman
+parseListStyleType "upper-roman" = UpperRoman
+parseListStyleType "lower-alpha" = LowerAlpha
+parseListStyleType "upper-alpha" = UpperAlpha
+parseListStyleType "decimal"     = Decimal
+parseListStyleType _             = DefaultStyle
+
+parseTypeAttr :: String -> ListNumberStyle
+parseTypeAttr "i" = LowerRoman
+parseTypeAttr "I" = UpperRoman
+parseTypeAttr "a" = LowerAlpha
+parseTypeAttr "A" = UpperAlpha
+parseTypeAttr "1" = Decimal
+parseTypeAttr _   = DefaultStyle
+
 pOrderedList :: TagParser Blocks
 pOrderedList = try $ do
   TagOpen _ attribs <- pSatisfy (~== TagOpen "ol" [])
@@ -261,23 +282,19 @@ pOrderedList = try $ do
                              sta' = if all isDigit sta
                                        then read sta
                                        else 1
-                             sty = fromMaybe (fromMaybe "" $
-                                   lookup "style" attribs) $
-                                   lookup "class" attribs
-                             sty' = case sty of
-                                     "lower-roman"  -> LowerRoman
-                                     "upper-roman"  -> UpperRoman
-                                     "lower-alpha"  -> LowerAlpha
-                                     "upper-alpha"  -> UpperAlpha
-                                     "decimal"      -> Decimal
-                                     _              ->
-                                       case lookup "type" attribs of
-                                               Just "1" -> Decimal
-                                               Just "I" -> UpperRoman
-                                               Just "i" -> LowerRoman
-                                               Just "A" -> UpperAlpha
-                                               Just "a" -> LowerAlpha
-                                               _        -> DefaultStyle
+
+                             pickListStyle = pickStyleAttrProps ["list-style-type", "list-style"]
+
+                             typeAttr  = fromMaybe "" $ lookup "type"  attribs
+                             classAttr = fromMaybe "" $ lookup "class" attribs
+                             styleAttr = fromMaybe "" $ lookup "style" attribs
+                             listStyle = fromMaybe "" $ pickListStyle styleAttr
+
+                             sty' = foldOrElse DefaultStyle
+                                      [ parseTypeAttr      typeAttr
+                                      , parseListStyleType classAttr
+                                      , parseListStyleType listStyle
+                                      ]
   let nonItem = pSatisfy (\t ->
                   not (tagOpen (`elem` ["li","ol","ul","dl"]) (const True) t) &&
                   not (t ~== TagClose "ol"))
@@ -385,9 +402,10 @@ pHeader = try $ do
   let ident = fromMaybe "" $ lookup "id" attr
   let classes = maybe [] words $ lookup "class" attr
   let keyvals = [(k,v) | (k,v) <- attr, k /= "class", k /= "id"]
+  attr' <- registerHeader (ident, classes, keyvals) contents
   return $ if bodyTitle
               then mempty  -- skip a representation of the title in the body
-              else B.headerWith (ident, classes, keyvals) level contents
+              else B.headerWith attr' level contents
 
 pHrule :: TagParser Blocks
 pHrule = do
@@ -622,12 +640,11 @@ pSpan = try $ do
   guardEnabled Ext_native_spans
   TagOpen _ attr <- lookAhead $ pSatisfy $ tagOpen (=="span") (const True)
   contents <- pInTags "span" inline
-  let attr' = mkAttr attr
-  return $ case attr' of
-                ("",[],[("style",s)])
-                  | filter (`notElem` " \t;") s == "font-variant:small-caps" ->
-                     B.smallcaps contents
-                _ -> B.spanWith (mkAttr attr) contents
+  let isSmallCaps = fontVariant == "small-caps"
+                    where styleAttr   = fromMaybe "" $ lookup "style" attr
+                          fontVariant = fromMaybe "" $ pickStyleAttrProps ["font-variant"] styleAttr
+  let tag = if isSmallCaps then B.smallcaps else B.spanWith (mkAttr attr)
+  return $ tag contents
 
 pRawHtmlInline :: TagParser Inlines
 pRawHtmlInline = do
@@ -971,6 +988,14 @@ isSpace _    = False
 
 -- Instances
 
+instance HasIdentifierList HTMLState where
+  extractIdentifierList = identifiers
+  updateIdentifierList f s = s{ identifiers = f (identifiers s) }
+
+instance HasHeaderMap HTMLState where
+  extractHeaderMap = headerMap
+  updateHeaderMap  f s = s{ headerMap = f (headerMap s) }
+
 -- This signature should be more general
 -- MonadReader HTMLLocal m => HasQuoteContext st m
 instance HasQuoteContext st (Reader HTMLLocal) where
@@ -980,9 +1005,6 @@ instance HasQuoteContext st (Reader HTMLLocal) where
 instance HasReaderOptions HTMLState where
     extractReaderOptions = extractReaderOptions . parserState
 
-instance Default HTMLState where
-  def = HTMLState def [] Nothing
-
 instance HasMeta HTMLState where
   setMeta s b st = st {parserState = setMeta s b $ parserState st}
   deleteMeta s st = st {parserState = deleteMeta s $ parserState st}
diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index 0da912ea6..16d387dc4 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -524,6 +524,7 @@ inlineCommands = M.fromList $
   , ("includegraphics", skipopts *> (unescapeURL <$> braced) >>= mkImage)
   , ("enquote", enquote)
   , ("cite", citation "cite" AuthorInText False)
+  , ("Cite", citation "cite" AuthorInText False)
   , ("citep", citation "citep" NormalCitation False)
   , ("citep*", citation "citep*" NormalCitation False)
   , ("citeal", citation "citeal" NormalCitation False)
diff --git a/src/Text/Pandoc/Readers/MediaWiki.hs b/src/Text/Pandoc/Readers/MediaWiki.hs
index 2a5adab22..b23b44544 100644
--- a/src/Text/Pandoc/Readers/MediaWiki.hs
+++ b/src/Text/Pandoc/Readers/MediaWiki.hs
@@ -252,8 +252,8 @@ parseAttr = try $ do
   skipMany spaceChar
   k <- many1 letter
   char '='
-  char '"'
-  v <- many1Till (satisfy (/='\n')) (char '"')
+  v <- (char '"' >> many1Till (satisfy (/='\n')) (char '"'))
+       <|> many1 nonspaceChar
   return (k,v)
 
 tableStart :: MWParser ()
diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs
index 980f63504..55ac92bcb 100644
--- a/src/Text/Pandoc/Readers/Org.hs
+++ b/src/Text/Pandoc/Readers/Org.hs
@@ -70,6 +70,14 @@ data OrgParserLocal = OrgParserLocal { orgLocalQuoteContext :: QuoteContext }
 
 type OrgParser = ParserT [Char] OrgParserState (Reader OrgParserLocal)
 
+instance HasIdentifierList OrgParserState where
+  extractIdentifierList = orgStateIdentifiers
+  updateIdentifierList f s = s{ orgStateIdentifiers = f (orgStateIdentifiers s) }
+
+instance HasHeaderMap OrgParserState where
+  extractHeaderMap = orgStateHeaderMap
+  updateHeaderMap  f s = s{ orgStateHeaderMap = f (orgStateHeaderMap s) }
+
 parseOrg :: OrgParser Pandoc
 parseOrg = do
   blocks' <- parseBlocks
@@ -135,6 +143,8 @@ data OrgParserState = OrgParserState
                       , orgStateMeta                 :: Meta
                       , orgStateMeta'                :: F Meta
                       , orgStateNotes'               :: OrgNoteTable
+                      , orgStateIdentifiers          :: [String]
+                      , orgStateHeaderMap            :: M.Map Inlines String
                       }
 
 instance Default OrgParserLocal where
@@ -174,6 +184,8 @@ defaultOrgParserState = OrgParserState
                         , orgStateMeta = nullMeta
                         , orgStateMeta' = return nullMeta
                         , orgStateNotes' = []
+                        , orgStateIdentifiers = []
+                        , orgStateHeaderMap = M.empty
                         }
 
 recordAnchorId :: String -> OrgParser ()
@@ -668,7 +680,10 @@ header = try $ do
   title <- manyTill inline (lookAhead headerEnd)
   tags <- headerEnd
   let inlns = trimInlinesF . mconcat $ title <> map tagToInlineF tags
-  return $ B.header level <$> inlns
+  st <- getState
+  let inlines = runF inlns st
+  attr <- registerHeader nullAttr inlines
+  return $ pure (B.headerWith attr level inlines)
  where
    tagToInlineF :: String -> F Inlines
    tagToInlineF t = return $ B.spanWith ("", ["tag"], [("data-tag-name", t)]) mempty
diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs
index 678eecc52..f9663b19a 100644
--- a/src/Text/Pandoc/Readers/RST.hs
+++ b/src/Text/Pandoc/Readers/RST.hs
@@ -614,20 +614,22 @@ directive' = do
             return mempty
 
 -- TODO:
---  - Silently ignores illegal fields
 --  - Only supports :format: fields with a single format for :raw: roles,
 --    change Text.Pandoc.Definition.Format to fix
 addNewRole :: String -> [(String, String)] -> RSTParser Blocks
 addNewRole roleString fields = do
     (role, parentRole) <- parseFromString inheritedRole roleString
     customRoles <- stateRstCustomRoles <$> getState
-    let (baseRole, baseFmt, baseAttr) =
-            maybe (parentRole, Nothing, nullAttr) id $
-              M.lookup parentRole customRoles
+    let getBaseRole (r, f, a) roles =
+            case M.lookup r roles of
+                 Just (r', f', a') -> getBaseRole (r', f', a') roles
+                 Nothing -> (r, f, a)
+        (baseRole, baseFmt, baseAttr) =
+               getBaseRole (parentRole, Nothing, nullAttr) customRoles
         fmt = if parentRole == "raw" then lookup "format" fields else baseFmt
         annotate :: [String] -> [String]
         annotate = maybe id (:) $
-            if parentRole == "code"
+            if baseRole == "code"
                then lookup "language" fields
                else Nothing
         attr = let (ident, classes, keyValues) = baseAttr
@@ -636,12 +638,12 @@ addNewRole roleString fields = do
 
     -- warn about syntax we ignore
     flip mapM_ fields $ \(key, _) -> case key of
-        "language" -> when (parentRole /= "code") $ addWarning Nothing $
+        "language" -> when (baseRole /= "code") $ addWarning Nothing $
             "ignoring :language: field because the parent of role :" ++
-            role ++ ": is :" ++ parentRole ++ ": not :code:"
-        "format" -> when (parentRole /= "raw") $ addWarning Nothing $
+            role ++ ": is :" ++ baseRole ++ ": not :code:"
+        "format" -> when (baseRole /= "raw") $ addWarning Nothing $
             "ignoring :format: field because the parent of role :" ++
-            role ++ ": is :" ++ parentRole ++ ": not :raw:"
+            role ++ ": is :" ++ baseRole ++ ": not :raw:"
         _ -> addWarning Nothing $ "ignoring unknown field :" ++ key ++
              ": in definition of role :" ++ role ++ ": in"
     when (parentRole == "raw" && countKeys "format" > 1) $
diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs
index 07a7e962c..c44133e12 100644
--- a/src/Text/Pandoc/Shared.hs
+++ b/src/Text/Pandoc/Shared.hs
@@ -540,6 +540,7 @@ stringify = query go . walk deNote
         go (Str x) = x
         go (Code _ x) = x
         go (Math _ x) = x
+        go (RawInline (Format "html") ('<':'b':'r':_)) = " " -- see #2105
         go LineBreak = " "
         go _ = ""
         deNote (Note _) = Str ""
diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs
index da4c78cef..0cb313e7b 100644
--- a/src/Text/Pandoc/Writers/Docx.hs
+++ b/src/Text/Pandoc/Writers/Docx.hs
@@ -181,8 +181,8 @@ renumIds f renumMap = map (renumId f renumMap)
 
 -- | Certain characters are invalid in XML even if escaped.
 -- See #1992
-stripInvalidChars :: Pandoc -> Pandoc
-stripInvalidChars = bottomUp (filter isValidChar)
+stripInvalidChars :: String -> String
+stripInvalidChars = filter isValidChar
 
 -- | See XML reference
 isValidChar :: Char -> Bool
@@ -208,7 +208,7 @@ writeDocx :: WriterOptions  -- ^ Writer options
           -> IO BL.ByteString
 writeDocx opts doc@(Pandoc meta _) = do
   let datadir = writerUserDataDir opts
-  let doc' = stripInvalidChars . walk fixDisplayMath $ doc
+  let doc' = walk fixDisplayMath $ doc
   username <- lookup "USERNAME" <$> getEnvironment
   utctime <- getCurrentTime
   distArchive <- getDefaultReferenceDocx Nothing
@@ -974,7 +974,7 @@ formattedString str = do
   return [ mknode "w:r" [] $
              props ++
              [ mknode (if inDel then "w:delText" else "w:t")
-               [("xml:space","preserve")] str ] ]
+               [("xml:space","preserve")] (stripInvalidChars str) ] ]
 
 setFirstPara :: WS ()
 setFirstPara =  modify $ \s -> s { stFirstPara = True }
diff --git a/src/Text/Pandoc/Writers/RST.hs b/src/Text/Pandoc/Writers/RST.hs
index 151d3c2ae..fae908f30 100644
--- a/src/Text/Pandoc/Writers/RST.hs
+++ b/src/Text/Pandoc/Writers/RST.hs
@@ -333,7 +333,8 @@ blockListToRST = blockListToRST' False
 -- | Convert list of Pandoc inline elements to RST.
 inlineListToRST :: [Inline] -> State WriterState Doc
 inlineListToRST lst =
-  mapM inlineToRST (removeSpaceAfterDisplayMath $ insertBS lst) >>= return . hcat
+  mapM inlineToRST (removeSpaceAfterDisplayMath $ insertBS lst) >>=
+    return . hcat
   where -- remove spaces after displaymath, as they screw up indentation:
         removeSpaceAfterDisplayMath (Math DisplayMath x : zs) =
               Math DisplayMath x : dropWhile (==Space) zs
@@ -341,8 +342,8 @@ inlineListToRST lst =
         removeSpaceAfterDisplayMath [] = []
         insertBS :: [Inline] -> [Inline] -- insert '\ ' where needed
         insertBS (x:y:z:zs)
-          | isComplex y && surroundComplex x z =
-             x : y : RawInline "rst" "\\ " : insertBS (z:zs)
+          | isComplex y && (surroundComplex x z) =
+              x : y : insertBS (z : zs)
         insertBS (x:y:zs)
           | isComplex x && not (okAfterComplex y) =
               x : RawInline "rst" "\\ " : insertBS (y : zs)
@@ -383,6 +384,8 @@ inlineListToRST lst =
         isComplex (Image _ _) = True
         isComplex (Code _ _) = True
         isComplex (Math _ _) = True
+        isComplex (Cite _ (x:_)) = isComplex x
+        isComplex (Span _ (x:_)) = isComplex x
         isComplex _ = False
 
 -- | Convert Pandoc inline element to RST.