diff options
Diffstat (limited to 'src/Text/Pandoc/Readers/DocBook.hs')
-rw-r--r-- | src/Text/Pandoc/Readers/DocBook.hs | 117 |
1 files changed, 62 insertions, 55 deletions
diff --git a/src/Text/Pandoc/Readers/DocBook.hs b/src/Text/Pandoc/Readers/DocBook.hs index 68552ccb3..728f77a05 100644 --- a/src/Text/Pandoc/Readers/DocBook.hs +++ b/src/Text/Pandoc/Readers/DocBook.hs @@ -1,22 +1,22 @@ +{-# LANGUAGE ExplicitForAll #-} module Text.Pandoc.Readers.DocBook ( readDocBook ) where -import Data.Char (toUpper) -import Text.Pandoc.Shared (safeRead) -import Text.Pandoc.Options -import Text.Pandoc.Definition -import Text.Pandoc.Builder -import Text.XML.Light -import Text.HTML.TagSoup.Entity (lookupEntity) +import Control.Monad.State.Strict +import Data.Char (isSpace, toUpper) +import Data.Default import Data.Either (rights) +import Data.Foldable (asum) import Data.Generics -import Data.Char (isSpace) -import Control.Monad.State import Data.List (intersperse) import Data.Maybe (fromMaybe) +import Data.Text (Text) +import qualified Data.Text as T +import Text.HTML.TagSoup.Entity (lookupEntity) +import Text.Pandoc.Builder +import Text.Pandoc.Class (PandocMonad) +import Text.Pandoc.Options +import Text.Pandoc.Shared (crFilter, safeRead) import Text.TeXMath (readMathML, writeTeX) -import Text.Pandoc.Error (PandocError) -import Control.Monad.Except -import Data.Default -import Data.Foldable (asum) +import Text.XML.Light {- @@ -50,7 +50,7 @@ List of all DocBook tags, with [x] indicating implemented, [x] author - The name of an individual author [ ] authorblurb - A short description or note about an author [x] authorgroup - Wrapper for author information when a document has - multiple authors or collabarators + multiple authors or collaborators [x] authorinitials - The initials or other short identifier for an author [o] beginpage - The location of a page break in a print version of the document [ ] bibliocoverage - The spatial or temporal coverage of a document @@ -502,7 +502,7 @@ List of all DocBook tags, with [x] indicating implemented, [x] ?asciidoc-br? - line break from asciidoc docbook output -} -type DB = ExceptT PandocError (State DBState) +type DB m = StateT DBState m data DBState = DBState{ dbSectionLevel :: Int , dbQuoteType :: QuoteType @@ -523,10 +523,12 @@ instance Default DBState where , dbContent = [] } -readDocBook :: ReaderOptions -> String -> Either PandocError Pandoc -readDocBook _ inp = (\blocks -> Pandoc (dbMeta st') (toList . mconcat $ blocks)) <$> bs - where (bs , st') = flip runState (def{ dbContent = tree }) . runExceptT . mapM parseBlock $ tree - tree = normalizeTree . parseXML . handleInstructions $ inp +readDocBook :: PandocMonad m => ReaderOptions -> Text -> m Pandoc +readDocBook _ inp = do + let tree = normalizeTree . parseXML . handleInstructions + $ T.unpack $ crFilter inp + (bs, st') <- flip runStateT (def{ dbContent = tree }) $ mapM parseBlock tree + return $ Pandoc (dbMeta st') (toList . mconcat $ bs) -- We treat <?asciidoc-br?> specially (issue #1236), converting it -- to <br/>, since xml-light doesn't parse the instruction correctly. @@ -536,12 +538,12 @@ handleInstructions ('<':'?':'a':'s':'c':'i':'i':'d':'o':'c':'-':'b':'r':'?':'>': handleInstructions xs = case break (=='<') xs of (ys, []) -> ys ([], '<':zs) -> '<' : handleInstructions zs - (ys, zs) -> ys ++ handleInstructions zs + (ys, zs) -> ys ++ handleInstructions zs -getFigure :: Element -> DB Blocks +getFigure :: PandocMonad m => Element -> DB m Blocks getFigure e = do tit <- case filterChild (named "title") e of - Just t -> getInlines t + Just t -> getInlines t Nothing -> return mempty modify $ \st -> st{ dbFigureTitle = tit } res <- getBlocks e @@ -564,14 +566,12 @@ normalizeTree = everywhere (mkT go) go xs = xs convertEntity :: String -> String -convertEntity e = maybe (map toUpper e) id (lookupEntity e) +convertEntity e = Data.Maybe.fromMaybe (map toUpper e) (lookupEntity e) -- convenience function to get an attribute value, defaulting to "" attrValue :: String -> Element -> String attrValue attr elt = - case lookupAttrBy (\x -> qName x == attr) (elAttribs elt) of - Just z -> z - Nothing -> "" + fromMaybe "" (lookupAttrBy (\x -> qName x == attr) (elAttribs elt)) -- convenience function named :: String -> Element -> Bool @@ -579,20 +579,20 @@ named s e = qName (elName e) == s -- -acceptingMetadata :: DB a -> DB a +acceptingMetadata :: PandocMonad m => DB m a -> DB m a acceptingMetadata p = do modify (\s -> s { dbAcceptsMeta = True } ) res <- p modify (\s -> s { dbAcceptsMeta = False }) return res -checkInMeta :: Monoid a => DB () -> DB a +checkInMeta :: (PandocMonad m, Monoid a) => DB m () -> DB m a checkInMeta p = do accepts <- dbAcceptsMeta <$> get when accepts p return mempty -addMeta :: ToMetaValue a => String -> a -> DB () +addMeta :: PandocMonad m => ToMetaValue a => String -> a -> DB m () addMeta field val = modify (setMeta field val) instance HasMeta DBState where @@ -631,7 +631,7 @@ addToStart toadd bs = -- function that is used by both mediaobject (in parseBlock) -- and inlinemediaobject (in parseInline) -- A DocBook mediaobject is a wrapper around a set of alternative presentations -getMediaobject :: Element -> DB Inlines +getMediaobject :: PandocMonad m => Element -> DB m Inlines getMediaobject e = do (imageUrl, attr) <- case filterChild (named "imageobject") e of @@ -651,18 +651,20 @@ getMediaobject e = do || named "textobject" x || named "alt" x) el of Nothing -> return mempty - Just z -> mconcat <$> (mapM parseInline $ elContent z) + Just z -> mconcat <$> + mapM parseInline (elContent z) figTitle <- gets dbFigureTitle let (caption, title) = if isNull figTitle then (getCaption e, "") else (return figTitle, "fig:") - liftM (imageWith attr imageUrl title) caption + fmap (imageWith attr imageUrl title) caption -getBlocks :: Element -> DB Blocks -getBlocks e = mconcat <$> (mapM parseBlock $ elContent e) +getBlocks :: PandocMonad m => Element -> DB m Blocks +getBlocks e = mconcat <$> + mapM parseBlock (elContent e) -parseBlock :: Content -> DB Blocks +parseBlock :: PandocMonad m => Content -> DB m Blocks parseBlock (Text (CData CDataRaw _ _)) = return mempty -- DOCTYPE parseBlock (Text (CData _ s _)) = if all isSpace s then return mempty @@ -795,15 +797,16 @@ parseBlock (Elem e) = return $ p <> b <> x codeBlockWithLang = do let classes' = case attrValue "language" e of - "" -> [] - x -> [x] + "" -> [] + x -> [x] return $ codeBlockWith (attrValue "id" e, classes', []) $ trimNl $ strContentRecursive e parseBlockquote = do attrib <- case filterChild (named "attribution") e of Nothing -> return mempty Just z -> (para . (str "— " <>) . mconcat) - <$> (mapM parseInline $ elContent z) + <$> + mapM parseInline (elContent z) contents <- getBlocks e return $ blockQuote (contents <> attrib) listitems = mapM getBlocks $ filterChildren (named "listitem") e @@ -868,11 +871,11 @@ parseBlock (Elem e) = || x == '.') w Nothing -> 0 :: Double let numrows = case bodyrows of - [] -> 0 - xs -> maximum $ map length xs + [] -> 0 + xs -> maximum $ map length xs let aligns = case colspecs of - [] -> replicate numrows AlignDefault - cs -> map toAlignment cs + [] -> replicate numrows AlignDefault + cs -> map toAlignment cs let widths = case colspecs of [] -> replicate numrows 0 cs -> let ws = map toWidth cs @@ -892,7 +895,7 @@ parseBlock (Elem e) = headerText <- case filterChild (named "title") e `mplus` (filterChild (named "info") e >>= filterChild (named "title")) of - Just t -> getInlines t + Just t -> getInlines t Nothing -> return mempty modify $ \st -> st{ dbSectionLevel = n } b <- getBlocks e @@ -902,8 +905,9 @@ parseBlock (Elem e) = lineItems = mapM getInlines $ filterChildren (named "line") e metaBlock = acceptingMetadata (getBlocks e) >> return mempty -getInlines :: Element -> DB Inlines -getInlines e' = (trimInlines . mconcat) <$> (mapM parseInline $ elContent e') +getInlines :: PandocMonad m => Element -> DB m Inlines +getInlines e' = (trimInlines . mconcat) <$> + mapM parseInline (elContent e') strContentRecursive :: Element -> String strContentRecursive = strContent . @@ -913,10 +917,10 @@ elementToStr :: Content -> Content elementToStr (Elem e') = Text $ CData CDataText (strContentRecursive e') Nothing elementToStr x = x -parseInline :: Content -> DB Inlines +parseInline :: PandocMonad m => Content -> DB m Inlines parseInline (Text (CData _ s _)) = return $ text s parseInline (CRef ref) = - return $ maybe (text $ map toUpper ref) (text) $ lookupEntity ref + return $ maybe (text $ map toUpper ref) text $ lookupEntity ref parseInline (Elem e) = case qName (elName e) of "equation" -> equation displayMath @@ -957,8 +961,10 @@ parseInline (Elem e) = "userinput" -> codeWithLang "varargs" -> return $ code "(...)" "keycap" -> return (str $ strContent e) - "keycombo" -> keycombo <$> (mapM parseInline $ elContent e) - "menuchoice" -> menuchoice <$> (mapM parseInline $ + "keycombo" -> keycombo <$> + mapM parseInline (elContent e) + "menuchoice" -> menuchoice <$> + mapM parseInline ( filter isGuiMenu $ elContent e) "xref" -> do content <- dbContent <$> get @@ -977,17 +983,18 @@ parseInline (Elem e) = ils <- innerInlines let href = case findAttr (QName "href" (Just "http://www.w3.org/1999/xlink") Nothing) e of Just h -> h - _ -> ('#' : attrValue "linkend" e) + _ -> '#' : attrValue "linkend" e let ils' = if ils == mempty then str href else ils let attr = (attrValue "id" e, words $ attrValue "role" e, []) return $ linkWith attr href "" ils' "foreignphrase" -> emph <$> innerInlines "emphasis" -> case attrValue "role" e of - "bold" -> strong <$> innerInlines - "strong" -> strong <$> innerInlines + "bold" -> strong <$> innerInlines + "strong" -> strong <$> innerInlines "strikethrough" -> strikeout <$> innerInlines - _ -> emph <$> innerInlines - "footnote" -> (note . mconcat) <$> (mapM parseBlock $ elContent e) + _ -> emph <$> innerInlines + "footnote" -> (note . mconcat) <$> + mapM parseBlock (elContent e) "title" -> return mempty "affiliation" -> return mempty -- Note: this isn't a real docbook tag; it's what we convert @@ -996,7 +1003,7 @@ parseInline (Elem e) = "br" -> return linebreak _ -> innerInlines where innerInlines = (trimInlines . mconcat) <$> - (mapM parseInline $ elContent e) + mapM parseInline (elContent e) equation constructor = return $ mconcat $ map (constructor . writeTeX) $ rights |