summaryrefslogtreecommitdiff
path: root/src/Text/Pandoc
diff options
context:
space:
mode:
authorJohn MacFarlane <jgm@berkeley.edu>2016-01-08 17:08:01 -0800
committerJohn MacFarlane <jgm@berkeley.edu>2016-01-08 17:08:01 -0800
commit12a5bd3c8d34eddbabee0dc54fd7ce6d9539c9d4 (patch)
tree417b9b88ebf3a96fd8ac5a754c0aefe160e75adf /src/Text/Pandoc
parent52d95ddde15e92d74c2d8d0d6aa27c922c54c5e0 (diff)
Entity handling fixes:
- Text.Pandoc.XML.fromEntities: handle entities without a semicolon. Always lookup character references with the trailing ';', even if it wasn't present. And never add it when looking up numerical entities. (This is what tagsoup seems to require.) - Text.Pandoc.Parsing.characterReference: Always lookup character references with the trailing ';', and leave off the ';' when looking up numerical entities. This fixes a regression for e.g. `&lang;`.
Diffstat (limited to 'src/Text/Pandoc')
-rw-r--r--src/Text/Pandoc/Parsing.hs5
-rw-r--r--src/Text/Pandoc/XML.hs8
2 files changed, 10 insertions, 3 deletions
diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs
index 85786eb3e..db891d5d4 100644
--- a/src/Text/Pandoc/Parsing.hs
+++ b/src/Text/Pandoc/Parsing.hs
@@ -573,7 +573,10 @@ characterReference :: Stream s m Char => ParserT s st m Char
characterReference = try $ do
char '&'
ent <- many1Till nonspaceChar (char ';')
- case lookupEntity ent of
+ let ent' = case ent of
+ '#':_ -> ent
+ _ -> ent ++ ";"
+ case lookupEntity ent' of
Just c -> return c
Nothing -> fail "entity not found"
diff --git a/src/Text/Pandoc/XML.hs b/src/Text/Pandoc/XML.hs
index caa13f177..1e01b62f2 100644
--- a/src/Text/Pandoc/XML.hs
+++ b/src/Text/Pandoc/XML.hs
@@ -100,11 +100,15 @@ toEntities (c:cs)
-- Unescapes XML entities
fromEntities :: String -> String
fromEntities ('&':xs) =
- case lookupEntity ent of
+ case lookupEntity ent' of
Just c -> c : fromEntities rest
Nothing -> '&' : fromEntities xs
where (ent, rest) = case break (\c -> isSpace c || c == ';') xs of
(zs,';':ys) -> (zs,ys)
- _ -> ("",xs)
+ (zs, ys) -> (zs,ys)
+ ent' = case ent of
+ '#':_ -> ent
+ _ -> ent ++ ";"
+
fromEntities (x:xs) = x : fromEntities xs
fromEntities [] = []