summaryrefslogtreecommitdiff
path: root/src/Text/Pandoc/Readers
diff options
context:
space:
mode:
authorJohn MacFarlane <jgm@berkeley.edu>2014-06-17 12:14:02 -0700
committerJohn MacFarlane <jgm@berkeley.edu>2014-06-17 12:14:02 -0700
commit59272e4d99668ddc48f07eb761979c2f49cf76d5 (patch)
tree06558a4280af9ecf8863e02b11ad34b1ed09d338 /src/Text/Pandoc/Readers
parentfc291efad3430be0645e979e0279c93195012075 (diff)
DocBook reader: Support <?asciidoc-br?>.
Closes #1236. Note, this is a bit of a kludge, to work around the fact that xml-light doesn't parse `<?asciidoc-br?>` correctly. We preprocess the input, replacing that instruction with `<br/>`, and then parse that as a line break. Other XML instructions are simply removed from the input stream.
Diffstat (limited to 'src/Text/Pandoc/Readers')
-rw-r--r--src/Text/Pandoc/Readers/DocBook.hs19
1 files changed, 17 insertions, 2 deletions
diff --git a/src/Text/Pandoc/Readers/DocBook.hs b/src/Text/Pandoc/Readers/DocBook.hs
index d58f8b3c5..cf1d5132e 100644
--- a/src/Text/Pandoc/Readers/DocBook.hs
+++ b/src/Text/Pandoc/Readers/DocBook.hs
@@ -492,7 +492,7 @@ List of all DocBook tags, with [x] indicating implemented,
anything else
[ ] xref - A cross reference to another part of the document
[ ] year - The year of publication of a document
-
+[x] ?asciidoc-br? - line break from asciidoc docbook output
-}
type DB = State DBState
@@ -507,7 +507,7 @@ data DBState = DBState{ dbSectionLevel :: Int
readDocBook :: ReaderOptions -> String -> Pandoc
readDocBook _ inp = Pandoc (dbMeta st') (toList $ mconcat bs)
- where (bs, st') = runState (mapM parseBlock $ normalizeTree $ parseXML inp)
+ where (bs, st') = runState (mapM parseBlock $ normalizeTree $ parseXML inp')
DBState{ dbSectionLevel = 0
, dbQuoteType = DoubleQuote
, dbMeta = mempty
@@ -515,6 +515,17 @@ readDocBook _ inp = Pandoc (dbMeta st') (toList $ mconcat bs)
, dbBook = False
, dbFigureTitle = mempty
}
+ inp' = handleInstructions inp
+
+-- We treat <?asciidoc-br?> specially (issue #1236), converting it
+-- to <br/>, since xml-light doesn't parse the instruction correctly.
+-- Other xml instructions are simply removed from the input stream.
+handleInstructions :: String -> String
+handleInstructions ('<':'?':'a':'s':'c':'i':'i':'d':'o':'c':'-':'b':'r':'?':'>':xs) = '<':'b':'r':'/':'>': handleInstructions xs
+handleInstructions xs = case break (=='<') xs of
+ (ys, []) -> ys
+ ([], '<':zs) -> '<' : handleInstructions zs
+ (ys, zs) -> ys ++ handleInstructions zs
getFigure :: Element -> DB Blocks
getFigure e = do
@@ -920,6 +931,10 @@ parseInline (Elem e) =
"footnote" -> (note . mconcat) <$> (mapM parseBlock $ elContent e)
"title" -> return mempty
"affiliation" -> return mempty
+ -- Note: this isn't a real docbook tag; it's what we convert
+ -- <?asciidor-br?> to in handleInstructions, above. A kludge to
+ -- work around xml-light's inability to parse an instruction.
+ "br" -> return linebreak
_ -> innerInlines
where innerInlines = (trimInlines . mconcat) <$>
(mapM parseInline $ elContent e)