summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJesse Rosenthal <jrosenthal@jhu.edu>2017-12-31 09:09:22 -0500
committerJesse Rosenthal <jrosenthal@jhu.edu>2017-12-31 09:29:51 -0500
commit836153de43933dca3205e4459f55979d467f927e (patch)
treedaeb7a8301f6a63d3fc0c458013c305ae1ac4e2c
parenta274e15f0d236140fec7e4554117fcb55a219566 (diff)
Docx Reader: Combine adjacent anchors.
There isn't any reason to have numberous anchors in the same place, since we can't maintain docx's non-nesting overlapping. So we reduce to a single anchor, and have all links pointing to one of the overlapping anchors point to that one. This changes the behavior from commit e90c714c7 slightly (use the first anchor instead of the last) so we change the expected test result. Note that because this produces a state that has to be set after every invocation of `parPartToInlines`, we make the main function into a primed subfunction `parPartToInlines'`, and make `parPartToInlines` a wrapper around that.
-rw-r--r--src/Text/Pandoc/Readers/Docx.hs67
-rw-r--r--test/docx/unused_anchors.native4
2 files changed, 49 insertions, 22 deletions
diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs
index 248cb0b84..6ca1590a4 100644
--- a/src/Text/Pandoc/Readers/Docx.hs
+++ b/src/Text/Pandoc/Readers/Docx.hs
@@ -82,6 +82,7 @@ import qualified Data.ByteString.Lazy as B
import Data.Default (Default)
import Data.List (delete, intersect)
import qualified Data.Map as M
+import Data.Maybe (isJust)
import Data.Sequence (ViewL (..), viewl)
import qualified Data.Sequence as Seq
import qualified Data.Set as Set
@@ -119,6 +120,7 @@ readDocx _ _ =
data DState = DState { docxAnchorMap :: M.Map String String
, docxAnchorSet :: Set.Set String
+ , docxImmedPrevAnchor :: Maybe String
, docxMediaBag :: MediaBag
, docxDropCap :: Inlines
, docxWarnings :: [String]
@@ -130,6 +132,7 @@ data DState = DState { docxAnchorMap :: M.Map String String
instance Default DState where
def = DState { docxAnchorMap = M.empty
, docxAnchorSet = mempty
+ , docxImmedPrevAnchor = Nothing
, docxMediaBag = mempty
, docxDropCap = mempty
, docxWarnings = []
@@ -341,9 +344,26 @@ blocksToInlinesWarn cmtId blks = do
"Docx comment " ++ cmtId ++ " will not retain formatting"
return $ blocksToInlines' blkList
+-- The majority of work in this function is done in the primted
+-- subfunction `partPartToInlines'`. We make this wrapper so that we
+-- don't have to modify `docxImmedPrevAnchor` state after every function.
parPartToInlines :: PandocMonad m => ParPart -> DocxContext m Inlines
-parPartToInlines (PlainRun r) = runToInlines r
-parPartToInlines (Insertion _ author date runs) = do
+parPartToInlines parPart =
+ case parPart of
+ (BookMark _ anchor) | not $ anchor `elem` dummyAnchors -> do
+ inHdrBool <- asks docxInHeaderBlock
+ ils <- parPartToInlines' parPart
+ immedPrevAnchor <- gets docxImmedPrevAnchor
+ unless (isJust immedPrevAnchor || inHdrBool)
+ (modify $ \s -> s{ docxImmedPrevAnchor = Just anchor})
+ return ils
+ _ -> do
+ ils <- parPartToInlines' parPart
+ modify $ \s -> s{ docxImmedPrevAnchor = Nothing}
+ return ils
+parPartToInlines' :: PandocMonad m => ParPart -> DocxContext m Inlines
+parPartToInlines' (PlainRun r) = runToInlines r
+parPartToInlines' (Insertion _ author date runs) = do
opts <- asks docxOptions
case readerTrackChanges opts of
AcceptChanges -> smushInlines <$> mapM runToInlines runs
@@ -352,7 +372,7 @@ parPartToInlines (Insertion _ author date runs) = do
ils <- smushInlines <$> mapM runToInlines runs
let attr = ("", ["insertion"], [("author", author), ("date", date)])
return $ spanWith attr ils
-parPartToInlines (Deletion _ author date runs) = do
+parPartToInlines' (Deletion _ author date runs) = do
opts <- asks docxOptions
case readerTrackChanges opts of
AcceptChanges -> return mempty
@@ -361,7 +381,7 @@ parPartToInlines (Deletion _ author date runs) = do
ils <- smushInlines <$> mapM runToInlines runs
let attr = ("", ["deletion"], [("author", author), ("date", date)])
return $ spanWith attr ils
-parPartToInlines (CommentStart cmtId author date bodyParts) = do
+parPartToInlines' (CommentStart cmtId author date bodyParts) = do
opts <- asks docxOptions
case readerTrackChanges opts of
AllChanges -> do
@@ -370,16 +390,16 @@ parPartToInlines (CommentStart cmtId author date bodyParts) = do
let attr = ("", ["comment-start"], [("id", cmtId), ("author", author), ("date", date)])
return $ spanWith attr ils
_ -> return mempty
-parPartToInlines (CommentEnd cmtId) = do
+parPartToInlines' (CommentEnd cmtId) = do
opts <- asks docxOptions
case readerTrackChanges opts of
AllChanges -> do
let attr = ("", ["comment-end"], [("id", cmtId)])
return $ spanWith attr mempty
_ -> return mempty
-parPartToInlines (BookMark _ anchor) | anchor `elem` dummyAnchors =
+parPartToInlines' (BookMark _ anchor) | anchor `elem` dummyAnchors =
return mempty
-parPartToInlines (BookMark _ anchor) =
+parPartToInlines' (BookMark _ anchor) =
-- We record these, so we can make sure not to overwrite
-- user-defined anchor links with header auto ids.
do
@@ -395,27 +415,34 @@ parPartToInlines (BookMark _ anchor) =
-- of rewriting user-defined anchor links. However, since these
-- are not defined in pandoc, it seems like a necessary evil to
-- avoid an extra pass.
- let newAnchor =
- if not inHdrBool && anchor `elem` M.elems anchorMap
- then uniqueIdent [Str anchor] (Set.fromList $ M.elems anchorMap)
- else anchor
- unless inHdrBool
- (modify $ \s -> s { docxAnchorMap = M.insert anchor newAnchor anchorMap})
- return $ spanWith (newAnchor, ["anchor"], []) mempty
-parPartToInlines (Drawing fp title alt bs ext) = do
+ immedPrevAnchor <- gets docxImmedPrevAnchor
+ case immedPrevAnchor of
+ Just prevAnchor -> do
+ unless inHdrBool
+ (modify $ \s -> s { docxAnchorMap = M.insert anchor prevAnchor anchorMap})
+ return mempty
+ Nothing -> do
+ let newAnchor =
+ if not inHdrBool && anchor `elem` M.elems anchorMap
+ then uniqueIdent [Str anchor] (Set.fromList $ M.elems anchorMap)
+ else anchor
+ unless inHdrBool
+ (modify $ \s -> s { docxAnchorMap = M.insert anchor newAnchor anchorMap})
+ return $ spanWith (newAnchor, ["anchor"], []) mempty
+parPartToInlines' (Drawing fp title alt bs ext) = do
(lift . lift) $ P.insertMedia fp Nothing bs
return $ imageWith (extentToAttr ext) fp title $ text alt
-parPartToInlines Chart =
+parPartToInlines' Chart =
return $ spanWith ("", ["chart"], []) $ text "[CHART]"
-parPartToInlines (InternalHyperLink anchor runs) = do
+parPartToInlines' (InternalHyperLink anchor runs) = do
ils <- smushInlines <$> mapM runToInlines runs
return $ link ('#' : anchor) "" ils
-parPartToInlines (ExternalHyperLink target runs) = do
+parPartToInlines' (ExternalHyperLink target runs) = do
ils <- smushInlines <$> mapM runToInlines runs
return $ link target "" ils
-parPartToInlines (PlainOMath exps) =
+parPartToInlines' (PlainOMath exps) =
return $ math $ writeTeX exps
-parPartToInlines (SmartTag runs) = do
+parPartToInlines' (SmartTag runs) = do
smushInlines <$> mapM runToInlines runs
isAnchorSpan :: Inline -> Bool
diff --git a/test/docx/unused_anchors.native b/test/docx/unused_anchors.native
index 334269793..051dfe424 100644
--- a/test/docx/unused_anchors.native
+++ b/test/docx/unused_anchors.native
@@ -1,3 +1,3 @@
[Header 1 ("my-section",[],[]) [Str "My",Space,Str "Section"]
-,Para [Link ("",[],[]) [Str "Here",Space,Str "is",Space,Str "a",Space,Str "link."] ("#Bar","")]
-,Para [Span ("Bar",["anchor"],[]) [],Str "Here",Space,Str "is",Space,Str "the",Space,Str "target."]]
+,Para [Link ("",[],[]) [Str "Here",Space,Str "is",Space,Str "a",Space,Str "link."] ("#Foo","")]
+,Para [Span ("Foo",["anchor"],[]) [],Str "Here",Space,Str "is",Space,Str "the",Space,Str "target."]]