summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorfiddlosopher <fiddlosopher@788f1e2b-df1e-0410-8736-df70ead52e1b>2007-01-02 00:29:22 +0000
committerfiddlosopher <fiddlosopher@788f1e2b-df1e-0410-8736-df70ead52e1b>2007-01-02 00:29:22 +0000
commit4e5745134a8867d68a4678285272869d1c0ebce3 (patch)
tree367c6ad81b30db706f69e139f4a074d0352ebb45 /src
parent27169438551c1edfb8a2251a70ae607a7db36b29 (diff)
Use entities for all characters above 127 in docbook output.
Though XML tools should support unicode, some people will be using SGML tools, and these do not. Using entities makes the docbook files more portable. Also refactored encodeEntities and charToHtmlEntity in HtmlEntities.hs git-svn-id: https://pandoc.googlecode.com/svn/trunk@394 788f1e2b-df1e-0410-8736-df70ead52e1b
Diffstat (limited to 'src')
-rw-r--r--src/Text/Pandoc/HtmlEntities.hs17
-rw-r--r--src/Text/Pandoc/Writers/Docbook.hs12
2 files changed, 17 insertions, 12 deletions
diff --git a/src/Text/Pandoc/HtmlEntities.hs b/src/Text/Pandoc/HtmlEntities.hs
index 07eb3fb53..157588262 100644
--- a/src/Text/Pandoc/HtmlEntities.hs
+++ b/src/Text/Pandoc/HtmlEntities.hs
@@ -57,7 +57,10 @@ decodeEntities str =
-- | Returns a string with characters replaced with entity references where
-- possible.
encodeEntities :: String -> String
-encodeEntities = concatMap (\c -> fromMaybe [c] (charToHtmlEntity c))
+encodeEntities [] = []
+encodeEntities (c:cs) = if ord c < 127
+ then c:(encodeEntities cs)
+ else (charToHtmlEntity c) ++ (encodeEntities cs)
-- | If the string is a valid entity reference, returns @Just@ the character,
-- otherwise @Nothing@.
@@ -69,14 +72,14 @@ htmlEntityToChar entity =
Just (_, _, _, [sub]) -> Just (chr (read sub))
Nothing -> Nothing
--- | If there is an entity reference corresponding to the character, returns
--- @Just@ the entity reference, otherwise @Nothing@.
-charToHtmlEntity :: Char -> Maybe String
+-- | Returns a string containing an entity reference for the character.
+charToHtmlEntity :: Char -> String
charToHtmlEntity char =
- let matches = filter (\(entity, character) -> (character == char)) htmlEntityTable in
+ let matches = filter (\(entity, character) -> (character == char))
+ htmlEntityTable in
if (length matches) == 0
- then Nothing
- else Just (fst (head matches))
+ then "&#" ++ show (ord char) ++ ";"
+ else fst (head matches)
htmlEntityTable :: [(String, Char)]
htmlEntityTable = [
diff --git a/src/Text/Pandoc/Writers/Docbook.hs b/src/Text/Pandoc/Writers/Docbook.hs
index 3916aa214..61f24807b 100644
--- a/src/Text/Pandoc/Writers/Docbook.hs
+++ b/src/Text/Pandoc/Writers/Docbook.hs
@@ -33,9 +33,10 @@ module Text.Pandoc.Writers.Docbook (
import Text.Pandoc.Definition
import Text.Pandoc.Shared
import Text.Pandoc.Writers.HTML ( stringToSmartHtml, stringToHtml )
+import Text.Pandoc.HtmlEntities ( encodeEntities )
import Text.Html ( stringToHtmlString )
import Text.Regex ( mkRegex, matchRegex )
-import Data.Char ( toLower )
+import Data.Char ( toLower, ord )
import Data.List ( isPrefixOf, partition )
import Text.PrettyPrint.HughesPJ hiding ( Str )
@@ -179,9 +180,10 @@ wrap options lst = fsep $ map (hcat . (map (inlineToDocbook options))) (splitByS
-- | Escape a string for XML (with "smart" option if specified).
stringToXML :: WriterOptions -> String -> String
-stringToXML options = if writerSmart options
- then stringToSmartHtml
- else stringToHtml
+stringToXML options = encodeEntities .
+ (if writerSmart options
+ then stringToSmartHtml
+ else stringToHtml)
-- | Escape string to XML appropriate for attributes
attributeStringToXML :: String -> String
@@ -189,7 +191,7 @@ attributeStringToXML = gsub "\"" "&quot;" . codeStringToXML
-- | Escape a literal string for XML.
codeStringToXML :: String -> String
-codeStringToXML = gsub "<" "&lt;" . gsub "&" "&amp;"
+codeStringToXML = encodeEntities . gsub "<" "&lt;" . gsub "&" "&amp;"
-- | Convert a list of inline elements to Docbook.
inlinesToDocbook :: WriterOptions -> [Inline] -> Doc