summaryrefslogtreecommitdiff
path: root/src/Text/Pandoc/Writers/Docbook.hs
diff options
context:
space:
mode:
authorfiddlosopher <fiddlosopher@788f1e2b-df1e-0410-8736-df70ead52e1b>2007-01-27 03:04:40 +0000
committerfiddlosopher <fiddlosopher@788f1e2b-df1e-0410-8736-df70ead52e1b>2007-01-27 03:04:40 +0000
commitd06417125dd4d8cb177abd2d472c0c1cad4c49be (patch)
treea3a6d7991f01e4184b678a5fcce023947d015451 /src/Text/Pandoc/Writers/Docbook.hs
parentf2de08864ee9870147839389554b9353631d21f8 (diff)
Changes in entity handling:
+ Entities are parsed (and unicode characters returned) in both Markdown and HTML readers. + Parsers characterEntity, namedEntity, decimalEntity, hexEntity added to Entities.hs; these parse a string and return a unicode character. + Changed 'entity' parser in HTML reader to use the 'characterEntity' parser from Entities.hs. + Added new 'entity' parser to Markdown reader, and added '&' as a special character. Adjusted test suite accordingly since now we get 'Str "AT",Str "&",Str "T"' instead of 'Str "AT&T".. + stringToSGML moved to Entities.hs. escapeSGML removed as redundant, given encodeEntities. + stringToSGML, encodeEntities, and specialCharToEntity are given a boolean parameter that causes only numerical entities to be used. This is used in the docbook writer. The HTML writer uses named entities where possible, but not all docbook-consumers know about the named entities without special instructions, so it seems safer to use numerical entities there. + decodeEntities is rewritten in a way that avoids Text.Regex, using the new parsers. + charToEntity and charToNumericalEntity added to Entities.hs. + Moved specialCharToEntity from Shared.hs to Entities.hs. + Removed unneeded 'decodeEntities' from 'str' parser in HTML and Markdown readers. + Removed sgmlHexEntity, sgmlDecimalEntity, sgmlNamedEntity, and sgmlCharacterEntity from Shared.hs. + Modified Docbook writer so that it doesn't rely on Text.Regex for detecting "mailto" links. git-svn-id: https://pandoc.googlecode.com/svn/trunk@515 788f1e2b-df1e-0410-8736-df70ead52e1b
Diffstat (limited to 'src/Text/Pandoc/Writers/Docbook.hs')
-rw-r--r--src/Text/Pandoc/Writers/Docbook.hs30
1 files changed, 14 insertions, 16 deletions
diff --git a/src/Text/Pandoc/Writers/Docbook.hs b/src/Text/Pandoc/Writers/Docbook.hs
index ec3801a9a..0c83d0ea0 100644
--- a/src/Text/Pandoc/Writers/Docbook.hs
+++ b/src/Text/Pandoc/Writers/Docbook.hs
@@ -32,10 +32,9 @@ module Text.Pandoc.Writers.Docbook (
) where
import Text.Pandoc.Definition
import Text.Pandoc.Shared
-import Text.Pandoc.Entities ( encodeEntities )
-import Text.Regex ( mkRegex, matchRegex )
+import Text.Pandoc.Entities ( encodeEntities, stringToSGML )
import Data.Char ( toLower, ord )
-import Data.List ( isPrefixOf, partition )
+import Data.List ( isPrefixOf, partition, drop )
import Text.PrettyPrint.HughesPJ hiding ( Str )
-- | Data structure for defining hierarchical Pandoc documents
@@ -65,8 +64,8 @@ authorToDocbook name = inTagsIndented "author" $
then -- last name first
let (lastname, rest) = break (==',') name
firstname = removeLeadingSpace rest in
- inTagsSimple "firstname" (text $ stringToSGML firstname) <>
- inTagsSimple "surname" (text $ stringToSGML lastname)
+ inTagsSimple "firstname" (text $ stringToSGML True firstname) <>
+ inTagsSimple "surname" (text $ stringToSGML True lastname)
else -- last name last
let namewords = words name
lengthname = length namewords
@@ -74,8 +73,8 @@ authorToDocbook name = inTagsIndented "author" $
0 -> ("","")
1 -> ("", name)
n -> (joinWithSep " " (take (n-1) namewords), last namewords) in
- inTagsSimple "firstname" (text $ stringToSGML firstname) $$
- inTagsSimple "surname" (text $ stringToSGML lastname)
+ inTagsSimple "firstname" (text $ stringToSGML True firstname) $$
+ inTagsSimple "surname" (text $ stringToSGML True lastname)
-- | Convert Pandoc document to string in Docbook format.
writeDocbook :: WriterOptions -> Pandoc -> String
@@ -87,7 +86,7 @@ writeDocbook opts (Pandoc (Meta title authors date) blocks) =
then inTagsIndented "articleinfo" $
(inTagsSimple "title" (wrap opts title)) $$
(vcat (map authorToDocbook authors)) $$
- (inTagsSimple "date" (text $ stringToSGML date))
+ (inTagsSimple "date" (text $ stringToSGML True date))
else empty
blocks' = replaceReferenceLinks blocks
(noteBlocks, blocks'') = partition isNoteBlock blocks'
@@ -142,7 +141,7 @@ blockToDocbook opts (Para lst) =
blockToDocbook opts (BlockQuote blocks) =
inTagsIndented "blockquote" (blocksToDocbook opts blocks)
blockToDocbook opts (CodeBlock str) =
- text "<screen>\n" <> text (escapeSGML str) <> text "\n</screen>"
+ text "<screen>\n" <> text (encodeEntities True str) <> text "\n</screen>"
blockToDocbook opts (BulletList lst) =
inTagsIndented "itemizedlist" $ listItemsToDocbook opts lst
blockToDocbook opts (OrderedList lst) =
@@ -199,7 +198,7 @@ inlinesToDocbook opts lst = hcat (map (inlineToDocbook opts) lst)
-- | Convert an inline element to Docbook.
inlineToDocbook :: WriterOptions -> Inline -> Doc
-inlineToDocbook opts (Str str) = text $ stringToSGML str
+inlineToDocbook opts (Str str) = text $ stringToSGML True str
inlineToDocbook opts (Emph lst) =
inTagsSimple "emphasis" (inlinesToDocbook opts lst)
inlineToDocbook opts (Strong lst) =
@@ -212,24 +211,23 @@ inlineToDocbook opts Ellipses = text "&#8230;"
inlineToDocbook opts EmDash = text "&#8212;"
inlineToDocbook opts EnDash = text "&#8211;"
inlineToDocbook opts (Code str) =
- inTagsSimple "literal" $ text (escapeSGML str)
+ inTagsSimple "literal" $ text (encodeEntities True str)
inlineToDocbook opts (TeX str) = inlineToDocbook opts (Code str)
inlineToDocbook opts (HtmlInline str) = empty
inlineToDocbook opts LineBreak =
text $ "<literallayout></literallayout>"
inlineToDocbook opts Space = char ' '
inlineToDocbook opts (Link txt (Src src tit)) =
- case (matchRegex (mkRegex "mailto:(.*)") src) of
- Just [addr] -> inTagsSimple "email" $ text (escapeSGML addr)
- Nothing -> inTags False "ulink" [("url", src)] $
- inlinesToDocbook opts txt
+ if isPrefixOf "mailto:" src
+ then inTagsSimple "email" $ text (encodeEntities True $ drop 7 src)
+ else inTags False "ulink" [("url", src)] $ inlinesToDocbook opts txt
inlineToDocbook opts (Link text (Ref ref)) = empty -- shouldn't occur
inlineToDocbook opts (Image alt (Src src tit)) =
let titleDoc = if null tit
then empty
else inTagsIndented "objectinfo" $
inTagsIndented "title"
- (text $ stringToSGML tit) in
+ (text $ stringToSGML True tit) in
inTagsIndented "inlinemediaobject" $
inTagsIndented "imageobject" $
titleDoc $$ selfClosingTag "imagedata" [("fileref", src)]