summaryrefslogtreecommitdiff
path: root/src/Text/Pandoc/Readers
diff options
context:
space:
mode:
authorJesse Rosenthal <jrosenthal@jhu.edu>2018-02-22 12:56:19 -0500
committerJesse Rosenthal <jrosenthal@jhu.edu>2018-02-22 13:05:25 -0500
commit5262c0853acbef6dd25d9d6cbda26120859b45ff (patch)
tree7dc8328b5f381a251d47bf1b5d9d1bed207ae494 /src/Text/Pandoc/Readers
parent88d17c6c66954be47bfa0f625323aca2ba48a9e9 (diff)
Docx reader: read custom styles
This will read all paragraph and character classes as divs and spans, respectively. Dependent styles will still be resolved, but will be wrapped with appropriate style tags. It is controlled by the `+styles` extension (`-f docx+styles`). This can be used in conjunction with the `custom-style` feature in the docx writer for a pandoc-docx editing workflow. Users can convert from an input docx, reading the custom-styles, and then use that same input docx file as a reference-doc for producing an output docx file. Styles will be maintained across the conversion, even if pandoc doesn't understand them. Without the extension: $ pandoc test/docx/custom-style-reference.docx -f docx -t markdown This is some text. This is text with an *emphasized* text style. And this is text with a **strengthened** text style. > Here is a styled paragraph that inherits from Block Text. With the extension: $ pandoc test/docx/custom-style-reference.docx -f docx+styles -t markdown ::: {custom-style="FirstParagraph"} This is some text. ::: ::: {custom-style="BodyText"} This is text with an *[[emphasized]{custom-style="Emphatic"}]{custom-style="Emphatic"}* text style. And this is text with a **[[strengthened]{custom-style="Strengthened"}]{custom-style="Strengthened"}** text style. ::: ::: {custom-style="MyBlockStyle"} Closes: #1843
Diffstat (limited to 'src/Text/Pandoc/Readers')
-rw-r--r--src/Text/Pandoc/Readers/Docx.hs156
1 files changed, 96 insertions, 60 deletions
diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs
index 098759a61..491eea753 100644
--- a/src/Text/Pandoc/Readers/Docx.hs
+++ b/src/Text/Pandoc/Readers/Docx.hs
@@ -278,28 +278,51 @@ resolveDependentRunStyle rPr
, rStyle = rStyle rPr }
| otherwise = rPr
-runStyleToTransform :: RunStyle -> (Inlines -> Inlines)
+extraRunStyleInfo :: PandocMonad m => RunStyle -> DocxContext m (Inlines -> Inlines)
+extraRunStyleInfo rPr
+ | Just (s, _) <- rStyle rPr = do
+ opts <- asks docxOptions
+ return $ if isEnabled Ext_styles opts
+ then spanWith ("", [], [("custom-style", s)])
+ else id
+ | otherwise = return id
+
+runStyleToTransform :: PandocMonad m => RunStyle -> DocxContext m (Inlines -> Inlines)
runStyleToTransform rPr
| Just (s, _) <- rStyle rPr
- , s `elem` spansToKeep =
- let rPr' = rPr{rStyle = Nothing}
- in
- spanWith ("", [s], []) . runStyleToTransform rPr'
- | Just True <- isItalic rPr =
- emph . runStyleToTransform rPr {isItalic = Nothing}
- | Just True <- isBold rPr =
- strong . runStyleToTransform rPr {isBold = Nothing}
- | Just True <- isSmallCaps rPr =
- smallcaps . runStyleToTransform rPr {isSmallCaps = Nothing}
- | Just True <- isStrike rPr =
- strikeout . runStyleToTransform rPr {isStrike = Nothing}
- | Just SupScrpt <- rVertAlign rPr =
- superscript . runStyleToTransform rPr {rVertAlign = Nothing}
- | Just SubScrpt <- rVertAlign rPr =
- subscript . runStyleToTransform rPr {rVertAlign = Nothing}
- | Just "single" <- rUnderline rPr =
- underlineSpan . runStyleToTransform rPr {rUnderline = Nothing}
- | otherwise = id
+ , s `elem` spansToKeep = do
+ let rPr' = rPr{rStyle = Nothing}
+ transform <- runStyleToTransform rPr'
+ return $ spanWith ("", [s], []) . transform
+ | Just True <- isItalic rPr = do
+ transform <- runStyleToTransform rPr {isItalic = Nothing}
+ extraInfo <- extraRunStyleInfo rPr
+ return $ emph . extraInfo . transform
+ | Just True <- isBold rPr = do
+ transform <- runStyleToTransform rPr {isBold = Nothing}
+ extraInfo <- extraRunStyleInfo rPr
+ return $ strong . extraInfo . transform
+ | Just True <- isSmallCaps rPr = do
+ transform <- runStyleToTransform rPr {isSmallCaps = Nothing}
+ extraInfo <- extraRunStyleInfo rPr
+ return $ smallcaps . extraInfo .transform
+ | Just True <- isStrike rPr = do
+ transform <- runStyleToTransform rPr {isStrike = Nothing}
+ extraInfo <- extraRunStyleInfo rPr
+ return $ strikeout . extraInfo . transform
+ | Just SupScrpt <- rVertAlign rPr = do
+ transform <- runStyleToTransform rPr {rVertAlign = Nothing}
+ extraInfo <- extraRunStyleInfo rPr
+ return $ superscript . extraInfo . transform
+ | Just SubScrpt <- rVertAlign rPr = do
+ transform <- runStyleToTransform rPr {rVertAlign = Nothing}
+ extraInfo <- extraRunStyleInfo rPr
+ return $ subscript . extraInfo . transform
+ | Just "single" <- rUnderline rPr = do
+ transform <- runStyleToTransform rPr {rUnderline = Nothing}
+ extraInfo <- extraRunStyleInfo rPr
+ return $ underlineSpan . extraInfo . transform
+ | otherwise = extraRunStyleInfo rPr
runToInlines :: PandocMonad m => Run -> DocxContext m Inlines
runToInlines (Run rs runElems)
@@ -314,7 +337,8 @@ runToInlines (Run rs runElems)
_ -> codeString
| otherwise = do
let ils = smushInlines (map runElemToInlines runElems)
- return $ (runStyleToTransform $ resolveDependentRunStyle rs) ils
+ transform <- runStyleToTransform $ resolveDependentRunStyle rs
+ return $ transform ils
runToInlines (Footnote bps) = do
blksList <- smushBlocks <$> mapM bodyPartToBlocks bps
return $ note blksList
@@ -516,51 +540,60 @@ trimSps (Many ils) = Many $ Seq.dropWhileL isSp $Seq.dropWhileR isSp ils
isSp LineBreak = True
isSp _ = False
-parStyleToTransform :: ParagraphStyle -> (Blocks -> Blocks)
+parStyleToTransform :: PandocMonad m => ParagraphStyle -> DocxContext m (Blocks -> Blocks)
parStyleToTransform pPr
| (c:cs) <- pStyle pPr
- , c `elem` divsToKeep =
- let pPr' = pPr { pStyle = cs }
- in
- divWith ("", [c], []) . parStyleToTransform pPr'
+ , c `elem` divsToKeep = do
+ let pPr' = pPr { pStyle = cs }
+ transform <- parStyleToTransform pPr'
+ return $ divWith ("", [c], []) . transform
| (c:cs) <- pStyle pPr,
- c `elem` listParagraphDivs =
+ c `elem` listParagraphDivs = do
let pPr' = pPr { pStyle = cs, indentation = Nothing}
- in
- divWith ("", [c], []) . parStyleToTransform pPr'
- | (_:cs) <- pStyle pPr
- , Just True <- pBlockQuote pPr =
- let pPr' = pPr { pStyle = cs }
- in
- blockQuote . parStyleToTransform pPr'
- | (_:cs) <- pStyle pPr =
+ transform <- parStyleToTransform pPr'
+ return $ divWith ("", [c], []) . transform
+ | (c:cs) <- pStyle pPr
+ , Just True <- pBlockQuote pPr = do
+ opts <- asks docxOptions
+ let pPr' = pPr { pStyle = cs }
+ transform <- parStyleToTransform pPr'
+ let extraInfo = if isEnabled Ext_styles opts
+ then divWith ("", [], [("custom-style", c)])
+ else id
+ return $ extraInfo . blockQuote . transform
+ | (c:cs) <- pStyle pPr = do
+ opts <- asks docxOptions
let pPr' = pPr { pStyle = cs}
- in
- parStyleToTransform pPr'
+ transform <- parStyleToTransform pPr'
+ let extraInfo = if isEnabled Ext_styles opts
+ then divWith ("", [], [("custom-style", c)])
+ else id
+ return $ extraInfo . transform
| null (pStyle pPr)
, Just left <- indentation pPr >>= leftParIndent
- , Just hang <- indentation pPr >>= hangingParIndent =
+ , Just hang <- indentation pPr >>= hangingParIndent = do
let pPr' = pPr { indentation = Nothing }
- in
- case (left - hang) > 0 of
- True -> blockQuote . (parStyleToTransform pPr')
- False -> parStyleToTransform pPr'
+ transform <- parStyleToTransform pPr'
+ return $ case (left - hang) > 0 of
+ True -> blockQuote . transform
+ False -> transform
| null (pStyle pPr),
- Just left <- indentation pPr >>= leftParIndent =
+ Just left <- indentation pPr >>= leftParIndent = do
let pPr' = pPr { indentation = Nothing }
- in
- case left > 0 of
- True -> blockQuote . (parStyleToTransform pPr')
- False -> parStyleToTransform pPr'
-parStyleToTransform _ = id
+ transform <- parStyleToTransform pPr'
+ return $ case left > 0 of
+ True -> blockQuote . transform
+ False -> transform
+parStyleToTransform _ = return id
bodyPartToBlocks :: PandocMonad m => BodyPart -> DocxContext m Blocks
bodyPartToBlocks (Paragraph pPr parparts)
- | not $ null $ codeDivs `intersect` (pStyle pPr) =
- return
- $ parStyleToTransform pPr
- $ codeBlock
- $ concatMap parPartToString parparts
+ | not $ null $ codeDivs `intersect` (pStyle pPr) = do
+ transform <- parStyleToTransform pPr
+ return $
+ transform $
+ codeBlock $
+ concatMap parPartToString parparts
| Just (style, n) <- pHeading pPr = do
ils <-local (\s-> s{docxInHeaderBlock=True})
(smushInlines <$> mapM parPartToInlines parparts)
@@ -586,7 +619,8 @@ bodyPartToBlocks (Paragraph pPr parparts)
_ | Just (TrackedChange Insertion _) <- pChange pPr
, AcceptChanges <- readerTrackChanges opts -> do
modify $ \s -> s {docxPrevPara = mempty}
- return $ parStyleToTransform pPr $ para ils''
+ transform <- parStyleToTransform pPr
+ return $ transform $ para ils''
_ | Just (TrackedChange Insertion _) <- pChange pPr
, RejectChanges <- readerTrackChanges opts -> do
modify $ \s -> s {docxPrevPara = ils''}
@@ -596,8 +630,8 @@ bodyPartToBlocks (Paragraph pPr parparts)
, ChangeInfo _ cAuthor cDate <- cInfo -> do
let attr = ("", ["paragraph-insertion"], [("author", cAuthor), ("date", cDate)])
insertMark = spanWith attr mempty
- return $
- parStyleToTransform pPr $
+ transform <- parStyleToTransform pPr
+ return $ transform $
para $ ils'' <> insertMark
_ | Just (TrackedChange Deletion _) <- pChange pPr
, AcceptChanges <- readerTrackChanges opts -> do
@@ -606,18 +640,20 @@ bodyPartToBlocks (Paragraph pPr parparts)
_ | Just (TrackedChange Deletion _) <- pChange pPr
, RejectChanges <- readerTrackChanges opts -> do
modify $ \s -> s {docxPrevPara = mempty}
- return $ parStyleToTransform pPr $ para ils''
+ transform <- parStyleToTransform pPr
+ return $ transform $ para ils''
_ | Just (TrackedChange Deletion cInfo) <- pChange pPr
, AllChanges <- readerTrackChanges opts
, ChangeInfo _ cAuthor cDate <- cInfo -> do
let attr = ("", ["paragraph-deletion"], [("author", cAuthor), ("date", cDate)])
insertMark = spanWith attr mempty
- return $
- parStyleToTransform pPr $
+ transform <- parStyleToTransform pPr
+ return $ transform $
para $ ils'' <> insertMark
_ | otherwise -> do
modify $ \s -> s {docxPrevPara = mempty}
- return $ parStyleToTransform pPr $ para ils''
+ transform <- parStyleToTransform pPr
+ return $ transform $ para ils''
bodyPartToBlocks (ListItem pPr numId lvl (Just levelInfo) parparts) = do
-- We check whether this current numId has previously been used,
-- since Docx expects us to pick up where we left off.