diff options
Diffstat (limited to 'src/Text/Pandoc/XML.hs')
-rw-r--r-- | src/Text/Pandoc/XML.hs | 33 |
1 files changed, 17 insertions, 16 deletions
diff --git a/src/Text/Pandoc/XML.hs b/src/Text/Pandoc/XML.hs index e105aee91..62874f0b9 100644 --- a/src/Text/Pandoc/XML.hs +++ b/src/Text/Pandoc/XML.hs @@ -1,5 +1,5 @@ {- -Copyright (C) 2006-2016 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2006-2018 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -18,7 +18,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.XML - Copyright : Copyright (C) 2006-2016 John MacFarlane + Copyright : Copyright (C) 2006-2018 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -36,18 +36,20 @@ module Text.Pandoc.XML ( escapeCharForXML, toEntities, fromEntities ) where -import Text.Pandoc.Pretty -import Data.Char (ord, isAscii, isSpace) +import Data.Char (isAscii, isSpace, ord) +import Data.Text (Text) +import qualified Data.Text as T import Text.HTML.TagSoup.Entity (lookupEntity) +import Text.Pandoc.Pretty -- | Escape one character as needed for XML. escapeCharForXML :: Char -> String escapeCharForXML x = case x of - '&' -> "&" - '<' -> "<" - '>' -> ">" - '"' -> """ - c -> [c] + '&' -> "&" + '<' -> "<" + '>' -> ">" + '"' -> """ + c -> [c] -- | Escape string as needed for XML. Entity references are not preserved. escapeStringForXML :: String -> String @@ -91,11 +93,10 @@ inTagsIndented :: String -> Doc -> Doc inTagsIndented tagType = inTags True tagType [] -- | Escape all non-ascii characters using numerical entities. -toEntities :: String -> String -toEntities [] = "" -toEntities (c:cs) - | isAscii c = c : toEntities cs - | otherwise = "&#" ++ show (ord c) ++ ";" ++ toEntities cs +toEntities :: Text -> Text +toEntities = T.concatMap go + where go c | isAscii c = T.singleton c + | otherwise = T.pack ("&#" ++ show (ord c) ++ ";") -- Unescapes XML entities fromEntities :: String -> String @@ -108,8 +109,8 @@ fromEntities ('&':xs) = (zs, ys) -> (zs,ys) ent' = case ent of '#':'X':ys -> '#':'x':ys -- workaround tagsoup bug - '#':_ -> ent - _ -> ent ++ ";" + '#':_ -> ent + _ -> ent ++ ";" fromEntities (x:xs) = x : fromEntities xs fromEntities [] = [] |