diff options
author | fiddlosopher <fiddlosopher@788f1e2b-df1e-0410-8736-df70ead52e1b> | 2007-01-27 03:04:40 +0000 |
---|---|---|
committer | fiddlosopher <fiddlosopher@788f1e2b-df1e-0410-8736-df70ead52e1b> | 2007-01-27 03:04:40 +0000 |
commit | d06417125dd4d8cb177abd2d472c0c1cad4c49be (patch) | |
tree | a3a6d7991f01e4184b678a5fcce023947d015451 /src/Text/Pandoc/Writers/Docbook.hs | |
parent | f2de08864ee9870147839389554b9353631d21f8 (diff) |
Changes in entity handling:
+ Entities are parsed (and unicode characters returned) in both
Markdown and HTML readers.
+ Parsers characterEntity, namedEntity, decimalEntity, hexEntity added
to Entities.hs; these parse a string and return a unicode character.
+ Changed 'entity' parser in HTML reader to use the 'characterEntity'
parser from Entities.hs.
+ Added new 'entity' parser to Markdown reader, and added '&' as a
special character. Adjusted test suite accordingly since now we
get 'Str "AT",Str "&",Str "T"' instead of 'Str "AT&T"..
+ stringToSGML moved to Entities.hs. escapeSGML removed as redundant,
given encodeEntities.
+ stringToSGML, encodeEntities, and specialCharToEntity are given a
boolean parameter that causes only numerical entities to be used.
This is used in the docbook writer. The HTML writer uses named
entities where possible, but not all docbook-consumers know about
the named entities without special instructions, so it seems safer
to use numerical entities there.
+ decodeEntities is rewritten in a way that avoids Text.Regex, using
the new parsers.
+ charToEntity and charToNumericalEntity added to Entities.hs.
+ Moved specialCharToEntity from Shared.hs to Entities.hs.
+ Removed unneeded 'decodeEntities' from 'str' parser in HTML and
Markdown readers.
+ Removed sgmlHexEntity, sgmlDecimalEntity, sgmlNamedEntity, and
sgmlCharacterEntity from Shared.hs.
+ Modified Docbook writer so that it doesn't rely on Text.Regex for
detecting "mailto" links.
git-svn-id: https://pandoc.googlecode.com/svn/trunk@515 788f1e2b-df1e-0410-8736-df70ead52e1b
Diffstat (limited to 'src/Text/Pandoc/Writers/Docbook.hs')
-rw-r--r-- | src/Text/Pandoc/Writers/Docbook.hs | 30 |
1 files changed, 14 insertions, 16 deletions
diff --git a/src/Text/Pandoc/Writers/Docbook.hs b/src/Text/Pandoc/Writers/Docbook.hs index ec3801a9a..0c83d0ea0 100644 --- a/src/Text/Pandoc/Writers/Docbook.hs +++ b/src/Text/Pandoc/Writers/Docbook.hs @@ -32,10 +32,9 @@ module Text.Pandoc.Writers.Docbook ( ) where import Text.Pandoc.Definition import Text.Pandoc.Shared -import Text.Pandoc.Entities ( encodeEntities ) -import Text.Regex ( mkRegex, matchRegex ) +import Text.Pandoc.Entities ( encodeEntities, stringToSGML ) import Data.Char ( toLower, ord ) -import Data.List ( isPrefixOf, partition ) +import Data.List ( isPrefixOf, partition, drop ) import Text.PrettyPrint.HughesPJ hiding ( Str ) -- | Data structure for defining hierarchical Pandoc documents @@ -65,8 +64,8 @@ authorToDocbook name = inTagsIndented "author" $ then -- last name first let (lastname, rest) = break (==',') name firstname = removeLeadingSpace rest in - inTagsSimple "firstname" (text $ stringToSGML firstname) <> - inTagsSimple "surname" (text $ stringToSGML lastname) + inTagsSimple "firstname" (text $ stringToSGML True firstname) <> + inTagsSimple "surname" (text $ stringToSGML True lastname) else -- last name last let namewords = words name lengthname = length namewords @@ -74,8 +73,8 @@ authorToDocbook name = inTagsIndented "author" $ 0 -> ("","") 1 -> ("", name) n -> (joinWithSep " " (take (n-1) namewords), last namewords) in - inTagsSimple "firstname" (text $ stringToSGML firstname) $$ - inTagsSimple "surname" (text $ stringToSGML lastname) + inTagsSimple "firstname" (text $ stringToSGML True firstname) $$ + inTagsSimple "surname" (text $ stringToSGML True lastname) -- | Convert Pandoc document to string in Docbook format. writeDocbook :: WriterOptions -> Pandoc -> String @@ -87,7 +86,7 @@ writeDocbook opts (Pandoc (Meta title authors date) blocks) = then inTagsIndented "articleinfo" $ (inTagsSimple "title" (wrap opts title)) $$ (vcat (map authorToDocbook authors)) $$ - (inTagsSimple "date" (text $ stringToSGML date)) + (inTagsSimple "date" (text $ stringToSGML True date)) else empty blocks' = replaceReferenceLinks blocks (noteBlocks, blocks'') = partition isNoteBlock blocks' @@ -142,7 +141,7 @@ blockToDocbook opts (Para lst) = blockToDocbook opts (BlockQuote blocks) = inTagsIndented "blockquote" (blocksToDocbook opts blocks) blockToDocbook opts (CodeBlock str) = - text "<screen>\n" <> text (escapeSGML str) <> text "\n</screen>" + text "<screen>\n" <> text (encodeEntities True str) <> text "\n</screen>" blockToDocbook opts (BulletList lst) = inTagsIndented "itemizedlist" $ listItemsToDocbook opts lst blockToDocbook opts (OrderedList lst) = @@ -199,7 +198,7 @@ inlinesToDocbook opts lst = hcat (map (inlineToDocbook opts) lst) -- | Convert an inline element to Docbook. inlineToDocbook :: WriterOptions -> Inline -> Doc -inlineToDocbook opts (Str str) = text $ stringToSGML str +inlineToDocbook opts (Str str) = text $ stringToSGML True str inlineToDocbook opts (Emph lst) = inTagsSimple "emphasis" (inlinesToDocbook opts lst) inlineToDocbook opts (Strong lst) = @@ -212,24 +211,23 @@ inlineToDocbook opts Ellipses = text "…" inlineToDocbook opts EmDash = text "—" inlineToDocbook opts EnDash = text "–" inlineToDocbook opts (Code str) = - inTagsSimple "literal" $ text (escapeSGML str) + inTagsSimple "literal" $ text (encodeEntities True str) inlineToDocbook opts (TeX str) = inlineToDocbook opts (Code str) inlineToDocbook opts (HtmlInline str) = empty inlineToDocbook opts LineBreak = text $ "<literallayout></literallayout>" inlineToDocbook opts Space = char ' ' inlineToDocbook opts (Link txt (Src src tit)) = - case (matchRegex (mkRegex "mailto:(.*)") src) of - Just [addr] -> inTagsSimple "email" $ text (escapeSGML addr) - Nothing -> inTags False "ulink" [("url", src)] $ - inlinesToDocbook opts txt + if isPrefixOf "mailto:" src + then inTagsSimple "email" $ text (encodeEntities True $ drop 7 src) + else inTags False "ulink" [("url", src)] $ inlinesToDocbook opts txt inlineToDocbook opts (Link text (Ref ref)) = empty -- shouldn't occur inlineToDocbook opts (Image alt (Src src tit)) = let titleDoc = if null tit then empty else inTagsIndented "objectinfo" $ inTagsIndented "title" - (text $ stringToSGML tit) in + (text $ stringToSGML True tit) in inTagsIndented "inlinemediaobject" $ inTagsIndented "imageobject" $ titleDoc $$ selfClosingTag "imagedata" [("fileref", src)] |