summaryrefslogtreecommitdiff
path: root/src/Text/Pandoc/XML.hs
diff options
context:
space:
mode:
Diffstat (limited to 'src/Text/Pandoc/XML.hs')
-rw-r--r--src/Text/Pandoc/XML.hs33
1 files changed, 17 insertions, 16 deletions
diff --git a/src/Text/Pandoc/XML.hs b/src/Text/Pandoc/XML.hs
index e105aee91..62874f0b9 100644
--- a/src/Text/Pandoc/XML.hs
+++ b/src/Text/Pandoc/XML.hs
@@ -1,5 +1,5 @@
{-
-Copyright (C) 2006-2016 John MacFarlane <jgm@berkeley.edu>
+Copyright (C) 2006-2018 John MacFarlane <jgm@berkeley.edu>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -18,7 +18,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
{- |
Module : Text.Pandoc.XML
- Copyright : Copyright (C) 2006-2016 John MacFarlane
+ Copyright : Copyright (C) 2006-2018 John MacFarlane
License : GNU GPL, version 2 or above
Maintainer : John MacFarlane <jgm@berkeley.edu>
@@ -36,18 +36,20 @@ module Text.Pandoc.XML ( escapeCharForXML,
toEntities,
fromEntities ) where
-import Text.Pandoc.Pretty
-import Data.Char (ord, isAscii, isSpace)
+import Data.Char (isAscii, isSpace, ord)
+import Data.Text (Text)
+import qualified Data.Text as T
import Text.HTML.TagSoup.Entity (lookupEntity)
+import Text.Pandoc.Pretty
-- | Escape one character as needed for XML.
escapeCharForXML :: Char -> String
escapeCharForXML x = case x of
- '&' -> "&amp;"
- '<' -> "&lt;"
- '>' -> "&gt;"
- '"' -> "&quot;"
- c -> [c]
+ '&' -> "&amp;"
+ '<' -> "&lt;"
+ '>' -> "&gt;"
+ '"' -> "&quot;"
+ c -> [c]
-- | Escape string as needed for XML. Entity references are not preserved.
escapeStringForXML :: String -> String
@@ -91,11 +93,10 @@ inTagsIndented :: String -> Doc -> Doc
inTagsIndented tagType = inTags True tagType []
-- | Escape all non-ascii characters using numerical entities.
-toEntities :: String -> String
-toEntities [] = ""
-toEntities (c:cs)
- | isAscii c = c : toEntities cs
- | otherwise = "&#" ++ show (ord c) ++ ";" ++ toEntities cs
+toEntities :: Text -> Text
+toEntities = T.concatMap go
+ where go c | isAscii c = T.singleton c
+ | otherwise = T.pack ("&#" ++ show (ord c) ++ ";")
-- Unescapes XML entities
fromEntities :: String -> String
@@ -108,8 +109,8 @@ fromEntities ('&':xs) =
(zs, ys) -> (zs,ys)
ent' = case ent of
'#':'X':ys -> '#':'x':ys -- workaround tagsoup bug
- '#':_ -> ent
- _ -> ent ++ ";"
+ '#':_ -> ent
+ _ -> ent ++ ";"
fromEntities (x:xs) = x : fromEntities xs
fromEntities [] = []