summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohn MacFarlane <jgm@berkeley.edu>2014-08-16 22:57:00 -0700
committerJohn MacFarlane <jgm@berkeley.edu>2014-08-16 22:57:00 -0700
commit9d52ecdd422a6821c9f37f53e3d30d4be4e41e8f (patch)
tree93d7539fb19d4cbcbbc526fec704a5067c6c9c87
parentcb4ae6112e4bf9b12b3131fa96fc8d29b1e77dec (diff)
HTML reader: Parse appropriately styled span as SmallCaps.
-rw-r--r--src/Text/Pandoc/Readers/HTML.hs7
-rw-r--r--tests/html-reader.html1
-rw-r--r--tests/html-reader.native1
3 files changed, 8 insertions, 1 deletions
diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs
index cee7ea300..bd60a74fa 100644
--- a/src/Text/Pandoc/Readers/HTML.hs
+++ b/src/Text/Pandoc/Readers/HTML.hs
@@ -570,7 +570,12 @@ pSpan = try $ do
guardEnabled Ext_native_spans
TagOpen _ attr <- lookAhead $ pSatisfy $ tagOpen (=="span") (const True)
contents <- pInTags "span" inline
- return $ B.spanWith (mkAttr attr) contents
+ let attr' = mkAttr attr
+ return $ case attr' of
+ ("",[],[("style",s)])
+ | filter (`notElem` " \t;") s == "font-variant:small-caps" ->
+ B.smallcaps contents
+ _ -> B.spanWith (mkAttr attr) contents
pRawHtmlInline :: TagParser Inlines
pRawHtmlInline = do
diff --git a/tests/html-reader.html b/tests/html-reader.html
index d059d7b4b..14ad3ed54 100644
--- a/tests/html-reader.html
+++ b/tests/html-reader.html
@@ -309,6 +309,7 @@ These should not be escaped: \$ \\ \> \[ \{
<p><strong><em>This is strong and em.</em></strong></p>
<p>So is <strong><em>this</em></strong> word.</p>
<p>This is code: <code>&gt;</code>, <code>$</code>, <code>\</code>, <code>\$</code>, <code>&lt;html&gt;</code>.</p>
+<p>This is <span style="font-variant: small-caps;">small caps</span>.</p>
<hr />
<h1>Smart quotes, ellipses, dashes</h1>
<p>"Hello," said the spider. "'Shelob' is my name."</p>
diff --git a/tests/html-reader.native b/tests/html-reader.native
index c6ed36910..aef6e40fc 100644
--- a/tests/html-reader.native
+++ b/tests/html-reader.native
@@ -193,6 +193,7 @@ Pandoc (Meta {unMeta = fromList [("generator",MetaInlines [Str "pandoc"]),("titl
,Para [Strong [Emph [Str "This",Space,Str "is",Space,Str "strong",Space,Str "and",Space,Str "em."]]]
,Para [Str "So",Space,Str "is",Space,Strong [Emph [Str "this"]],Space,Str "word."]
,Para [Str "This",Space,Str "is",Space,Str "code:",Space,Code ("",[],[]) ">",Str ",",Space,Code ("",[],[]) "$",Str ",",Space,Code ("",[],[]) "\\",Str ",",Space,Code ("",[],[]) "\\$",Str ",",Space,Code ("",[],[]) "<html>",Str "."]
+,Para [Str "This",Space,Str "is",Space,SmallCaps [Str "small",Space,Str "caps"],Str "."]
,HorizontalRule
,Header 1 ("",[],[]) [Str "Smart",Space,Str "quotes,",Space,Str "ellipses,",Space,Str "dashes"]
,Para [Str "\"Hello,\"",Space,Str "said",Space,Str "the",Space,Str "spider.",Space,Str "\"'Shelob'",Space,Str "is",Space,Str "my",Space,Str "name.\""]