summaryrefslogtreecommitdiff
path: root/src/Text/Pandoc/Readers/DocBook.hs
diff options
context:
space:
mode:
Diffstat (limited to 'src/Text/Pandoc/Readers/DocBook.hs')
-rw-r--r--src/Text/Pandoc/Readers/DocBook.hs117
1 files changed, 62 insertions, 55 deletions
diff --git a/src/Text/Pandoc/Readers/DocBook.hs b/src/Text/Pandoc/Readers/DocBook.hs
index 68552ccb3..728f77a05 100644
--- a/src/Text/Pandoc/Readers/DocBook.hs
+++ b/src/Text/Pandoc/Readers/DocBook.hs
@@ -1,22 +1,22 @@
+{-# LANGUAGE ExplicitForAll #-}
module Text.Pandoc.Readers.DocBook ( readDocBook ) where
-import Data.Char (toUpper)
-import Text.Pandoc.Shared (safeRead)
-import Text.Pandoc.Options
-import Text.Pandoc.Definition
-import Text.Pandoc.Builder
-import Text.XML.Light
-import Text.HTML.TagSoup.Entity (lookupEntity)
+import Control.Monad.State.Strict
+import Data.Char (isSpace, toUpper)
+import Data.Default
import Data.Either (rights)
+import Data.Foldable (asum)
import Data.Generics
-import Data.Char (isSpace)
-import Control.Monad.State
import Data.List (intersperse)
import Data.Maybe (fromMaybe)
+import Data.Text (Text)
+import qualified Data.Text as T
+import Text.HTML.TagSoup.Entity (lookupEntity)
+import Text.Pandoc.Builder
+import Text.Pandoc.Class (PandocMonad)
+import Text.Pandoc.Options
+import Text.Pandoc.Shared (crFilter, safeRead)
import Text.TeXMath (readMathML, writeTeX)
-import Text.Pandoc.Error (PandocError)
-import Control.Monad.Except
-import Data.Default
-import Data.Foldable (asum)
+import Text.XML.Light
{-
@@ -50,7 +50,7 @@ List of all DocBook tags, with [x] indicating implemented,
[x] author - The name of an individual author
[ ] authorblurb - A short description or note about an author
[x] authorgroup - Wrapper for author information when a document has
- multiple authors or collabarators
+ multiple authors or collaborators
[x] authorinitials - The initials or other short identifier for an author
[o] beginpage - The location of a page break in a print version of the document
[ ] bibliocoverage - The spatial or temporal coverage of a document
@@ -502,7 +502,7 @@ List of all DocBook tags, with [x] indicating implemented,
[x] ?asciidoc-br? - line break from asciidoc docbook output
-}
-type DB = ExceptT PandocError (State DBState)
+type DB m = StateT DBState m
data DBState = DBState{ dbSectionLevel :: Int
, dbQuoteType :: QuoteType
@@ -523,10 +523,12 @@ instance Default DBState where
, dbContent = [] }
-readDocBook :: ReaderOptions -> String -> Either PandocError Pandoc
-readDocBook _ inp = (\blocks -> Pandoc (dbMeta st') (toList . mconcat $ blocks)) <$> bs
- where (bs , st') = flip runState (def{ dbContent = tree }) . runExceptT . mapM parseBlock $ tree
- tree = normalizeTree . parseXML . handleInstructions $ inp
+readDocBook :: PandocMonad m => ReaderOptions -> Text -> m Pandoc
+readDocBook _ inp = do
+ let tree = normalizeTree . parseXML . handleInstructions
+ $ T.unpack $ crFilter inp
+ (bs, st') <- flip runStateT (def{ dbContent = tree }) $ mapM parseBlock tree
+ return $ Pandoc (dbMeta st') (toList . mconcat $ bs)
-- We treat <?asciidoc-br?> specially (issue #1236), converting it
-- to <br/>, since xml-light doesn't parse the instruction correctly.
@@ -536,12 +538,12 @@ handleInstructions ('<':'?':'a':'s':'c':'i':'i':'d':'o':'c':'-':'b':'r':'?':'>':
handleInstructions xs = case break (=='<') xs of
(ys, []) -> ys
([], '<':zs) -> '<' : handleInstructions zs
- (ys, zs) -> ys ++ handleInstructions zs
+ (ys, zs) -> ys ++ handleInstructions zs
-getFigure :: Element -> DB Blocks
+getFigure :: PandocMonad m => Element -> DB m Blocks
getFigure e = do
tit <- case filterChild (named "title") e of
- Just t -> getInlines t
+ Just t -> getInlines t
Nothing -> return mempty
modify $ \st -> st{ dbFigureTitle = tit }
res <- getBlocks e
@@ -564,14 +566,12 @@ normalizeTree = everywhere (mkT go)
go xs = xs
convertEntity :: String -> String
-convertEntity e = maybe (map toUpper e) id (lookupEntity e)
+convertEntity e = Data.Maybe.fromMaybe (map toUpper e) (lookupEntity e)
-- convenience function to get an attribute value, defaulting to ""
attrValue :: String -> Element -> String
attrValue attr elt =
- case lookupAttrBy (\x -> qName x == attr) (elAttribs elt) of
- Just z -> z
- Nothing -> ""
+ fromMaybe "" (lookupAttrBy (\x -> qName x == attr) (elAttribs elt))
-- convenience function
named :: String -> Element -> Bool
@@ -579,20 +579,20 @@ named s e = qName (elName e) == s
--
-acceptingMetadata :: DB a -> DB a
+acceptingMetadata :: PandocMonad m => DB m a -> DB m a
acceptingMetadata p = do
modify (\s -> s { dbAcceptsMeta = True } )
res <- p
modify (\s -> s { dbAcceptsMeta = False })
return res
-checkInMeta :: Monoid a => DB () -> DB a
+checkInMeta :: (PandocMonad m, Monoid a) => DB m () -> DB m a
checkInMeta p = do
accepts <- dbAcceptsMeta <$> get
when accepts p
return mempty
-addMeta :: ToMetaValue a => String -> a -> DB ()
+addMeta :: PandocMonad m => ToMetaValue a => String -> a -> DB m ()
addMeta field val = modify (setMeta field val)
instance HasMeta DBState where
@@ -631,7 +631,7 @@ addToStart toadd bs =
-- function that is used by both mediaobject (in parseBlock)
-- and inlinemediaobject (in parseInline)
-- A DocBook mediaobject is a wrapper around a set of alternative presentations
-getMediaobject :: Element -> DB Inlines
+getMediaobject :: PandocMonad m => Element -> DB m Inlines
getMediaobject e = do
(imageUrl, attr) <-
case filterChild (named "imageobject") e of
@@ -651,18 +651,20 @@ getMediaobject e = do
|| named "textobject" x
|| named "alt" x) el of
Nothing -> return mempty
- Just z -> mconcat <$> (mapM parseInline $ elContent z)
+ Just z -> mconcat <$>
+ mapM parseInline (elContent z)
figTitle <- gets dbFigureTitle
let (caption, title) = if isNull figTitle
then (getCaption e, "")
else (return figTitle, "fig:")
- liftM (imageWith attr imageUrl title) caption
+ fmap (imageWith attr imageUrl title) caption
-getBlocks :: Element -> DB Blocks
-getBlocks e = mconcat <$> (mapM parseBlock $ elContent e)
+getBlocks :: PandocMonad m => Element -> DB m Blocks
+getBlocks e = mconcat <$>
+ mapM parseBlock (elContent e)
-parseBlock :: Content -> DB Blocks
+parseBlock :: PandocMonad m => Content -> DB m Blocks
parseBlock (Text (CData CDataRaw _ _)) = return mempty -- DOCTYPE
parseBlock (Text (CData _ s _)) = if all isSpace s
then return mempty
@@ -795,15 +797,16 @@ parseBlock (Elem e) =
return $ p <> b <> x
codeBlockWithLang = do
let classes' = case attrValue "language" e of
- "" -> []
- x -> [x]
+ "" -> []
+ x -> [x]
return $ codeBlockWith (attrValue "id" e, classes', [])
$ trimNl $ strContentRecursive e
parseBlockquote = do
attrib <- case filterChild (named "attribution") e of
Nothing -> return mempty
Just z -> (para . (str "— " <>) . mconcat)
- <$> (mapM parseInline $ elContent z)
+ <$>
+ mapM parseInline (elContent z)
contents <- getBlocks e
return $ blockQuote (contents <> attrib)
listitems = mapM getBlocks $ filterChildren (named "listitem") e
@@ -868,11 +871,11 @@ parseBlock (Elem e) =
|| x == '.') w
Nothing -> 0 :: Double
let numrows = case bodyrows of
- [] -> 0
- xs -> maximum $ map length xs
+ [] -> 0
+ xs -> maximum $ map length xs
let aligns = case colspecs of
- [] -> replicate numrows AlignDefault
- cs -> map toAlignment cs
+ [] -> replicate numrows AlignDefault
+ cs -> map toAlignment cs
let widths = case colspecs of
[] -> replicate numrows 0
cs -> let ws = map toWidth cs
@@ -892,7 +895,7 @@ parseBlock (Elem e) =
headerText <- case filterChild (named "title") e `mplus`
(filterChild (named "info") e >>=
filterChild (named "title")) of
- Just t -> getInlines t
+ Just t -> getInlines t
Nothing -> return mempty
modify $ \st -> st{ dbSectionLevel = n }
b <- getBlocks e
@@ -902,8 +905,9 @@ parseBlock (Elem e) =
lineItems = mapM getInlines $ filterChildren (named "line") e
metaBlock = acceptingMetadata (getBlocks e) >> return mempty
-getInlines :: Element -> DB Inlines
-getInlines e' = (trimInlines . mconcat) <$> (mapM parseInline $ elContent e')
+getInlines :: PandocMonad m => Element -> DB m Inlines
+getInlines e' = (trimInlines . mconcat) <$>
+ mapM parseInline (elContent e')
strContentRecursive :: Element -> String
strContentRecursive = strContent .
@@ -913,10 +917,10 @@ elementToStr :: Content -> Content
elementToStr (Elem e') = Text $ CData CDataText (strContentRecursive e') Nothing
elementToStr x = x
-parseInline :: Content -> DB Inlines
+parseInline :: PandocMonad m => Content -> DB m Inlines
parseInline (Text (CData _ s _)) = return $ text s
parseInline (CRef ref) =
- return $ maybe (text $ map toUpper ref) (text) $ lookupEntity ref
+ return $ maybe (text $ map toUpper ref) text $ lookupEntity ref
parseInline (Elem e) =
case qName (elName e) of
"equation" -> equation displayMath
@@ -957,8 +961,10 @@ parseInline (Elem e) =
"userinput" -> codeWithLang
"varargs" -> return $ code "(...)"
"keycap" -> return (str $ strContent e)
- "keycombo" -> keycombo <$> (mapM parseInline $ elContent e)
- "menuchoice" -> menuchoice <$> (mapM parseInline $
+ "keycombo" -> keycombo <$>
+ mapM parseInline (elContent e)
+ "menuchoice" -> menuchoice <$>
+ mapM parseInline (
filter isGuiMenu $ elContent e)
"xref" -> do
content <- dbContent <$> get
@@ -977,17 +983,18 @@ parseInline (Elem e) =
ils <- innerInlines
let href = case findAttr (QName "href" (Just "http://www.w3.org/1999/xlink") Nothing) e of
Just h -> h
- _ -> ('#' : attrValue "linkend" e)
+ _ -> '#' : attrValue "linkend" e
let ils' = if ils == mempty then str href else ils
let attr = (attrValue "id" e, words $ attrValue "role" e, [])
return $ linkWith attr href "" ils'
"foreignphrase" -> emph <$> innerInlines
"emphasis" -> case attrValue "role" e of
- "bold" -> strong <$> innerInlines
- "strong" -> strong <$> innerInlines
+ "bold" -> strong <$> innerInlines
+ "strong" -> strong <$> innerInlines
"strikethrough" -> strikeout <$> innerInlines
- _ -> emph <$> innerInlines
- "footnote" -> (note . mconcat) <$> (mapM parseBlock $ elContent e)
+ _ -> emph <$> innerInlines
+ "footnote" -> (note . mconcat) <$>
+ mapM parseBlock (elContent e)
"title" -> return mempty
"affiliation" -> return mempty
-- Note: this isn't a real docbook tag; it's what we convert
@@ -996,7 +1003,7 @@ parseInline (Elem e) =
"br" -> return linebreak
_ -> innerInlines
where innerInlines = (trimInlines . mconcat) <$>
- (mapM parseInline $ elContent e)
+ mapM parseInline (elContent e)
equation constructor = return $ mconcat $
map (constructor . writeTeX)
$ rights