summaryrefslogtreecommitdiff
path: root/src/Text/Pandoc/Readers/Docx
diff options
context:
space:
mode:
authorJesse Rosenthal <jrosenthal@jhu.edu>2016-06-22 13:49:19 -0400
committerJesse Rosenthal <jrosenthal@jhu.edu>2016-06-23 10:50:46 -0400
commit8bb739f7ff353722981fe442ae0c137910604850 (patch)
treefa239e4f13e0174456fac3f96e84120d1b2f3cac /src/Text/Pandoc/Readers/Docx
parentcbc2c15f0ffb1737a0f6540fb282adab7094423b (diff)
Docx reader: add simple comment functionality.
This adds simple track-changes comment parsing to the docx reader. It is turned on with `--track-changes=all`. All comments are converted to inlines, which can list some information. In the future a warning will be added for comments with formatting that seems like it will be excessively denatured. Note that comments can extend across blocks. For that reason there are two spans: `comment-start` and `comment-end`. `comment-start` will contain the comment. `comment-end` will always be empty. The two will be associated by a numeric id.
Diffstat (limited to 'src/Text/Pandoc/Readers/Docx')
-rw-r--r--src/Text/Pandoc/Readers/Docx/Parse.hs58
1 files changed, 57 insertions, 1 deletions
diff --git a/src/Text/Pandoc/Readers/Docx/Parse.hs b/src/Text/Pandoc/Readers/Docx/Parse.hs
index 7265ef8dd..055a67288 100644
--- a/src/Text/Pandoc/Readers/Docx/Parse.hs
+++ b/src/Text/Pandoc/Readers/Docx/Parse.hs
@@ -73,6 +73,7 @@ import Text.Pandoc.Readers.Docx.Util
import Data.Char (readLitChar, ord, chr, isDigit)
data ReaderEnv = ReaderEnv { envNotes :: Notes
+ , envComments :: Comments
, envNumbering :: Numbering
, envRelationships :: [Relationship]
, envMedia :: Media
@@ -160,6 +161,9 @@ data Notes = Notes NameSpaces
(Maybe (M.Map String Element))
deriving Show
+data Comments = Comments NameSpaces (M.Map String Element)
+ deriving Show
+
data ParIndentation = ParIndentation { leftParIndent :: Maybe Integer
, rightParIndent :: Maybe Integer
, hangingParIndent :: Maybe Integer}
@@ -210,6 +214,8 @@ type Extent = Maybe (Double, Double)
data ParPart = PlainRun Run
| Insertion ChangeId Author ChangeDate [Run]
| Deletion ChangeId Author ChangeDate [Run]
+ | CommentStart CommentId Author CommentDate [BodyPart]
+ | CommentEnd CommentId
| BookMark BookMarkId Anchor
| InternalHyperLink Anchor [Run]
| ExternalHyperLink URL [Run]
@@ -259,8 +265,10 @@ type URL = String
type BookMarkId = String
type RelId = String
type ChangeId = String
+type CommentId = String
type Author = String
type ChangeDate = String
+type CommentDate = String
archiveToDocx :: Archive -> Either DocxError Docx
archiveToDocx archive = fst <$> archiveToDocxWithWarnings archive
@@ -268,12 +276,13 @@ archiveToDocx archive = fst <$> archiveToDocxWithWarnings archive
archiveToDocxWithWarnings :: Archive -> Either DocxError (Docx, [String])
archiveToDocxWithWarnings archive = do
let notes = archiveToNotes archive
+ comments = archiveToComments archive
numbering = archiveToNumbering archive
rels = archiveToRelationships archive
media = archiveToMedia archive
(styles, parstyles) = archiveToStyles archive
rEnv =
- ReaderEnv notes numbering rels media Nothing styles parstyles InDocument
+ ReaderEnv notes comments numbering rels media Nothing styles parstyles InDocument
rState = ReaderState { stateWarnings = [] }
(eitherDoc, st) = runD (archiveToDocument archive) rEnv rState
case eitherDoc of
@@ -384,6 +393,20 @@ archiveToNotes zf =
in
Notes ns fn en
+archiveToComments :: Archive -> Comments
+archiveToComments zf =
+ let cmtsElem = findEntryByPath "word/comments.xml" zf
+ >>= (parseXMLDoc . UTF8.toStringLazy . fromEntry)
+ cmts_namespaces = case cmtsElem of
+ Just e -> elemToNameSpaces e
+ Nothing -> []
+ cmts = (elemToComments cmts_namespaces) <$> cmtsElem
+ in
+ case cmts of
+ Just c -> Comments cmts_namespaces c
+ Nothing -> Comments cmts_namespaces M.empty
+
+
filePathToRelType :: FilePath -> Maybe DocumentLocation
filePathToRelType "word/_rels/document.xml.rels" = Just InDocument
filePathToRelType "word/_rels/footnotes.xml.rels" = Just InFootnote
@@ -504,6 +527,18 @@ elemToNotes ns notetype element
Just $ M.fromList $ pairs
elemToNotes _ _ _ = Nothing
+elemToComments :: NameSpaces -> Element -> M.Map String Element
+elemToComments ns element
+ | isElem ns "w" "comments" element =
+ let pairs = mapMaybe
+ (\e -> findAttr (elemName ns "w" "id") e >>=
+ (\a -> Just (a, e)))
+ (findChildren (elemName ns "w" "comment") element)
+ in
+ M.fromList $ pairs
+elemToComments _ _ = M.empty
+
+
---------------------------------------------
---------------------------------------------
@@ -697,10 +732,31 @@ elemToParPart ns element
runs <- mapD (elemToRun ns) (elChildren element)
return $ InternalHyperLink anchor runs
elemToParPart ns element
+ | isElem ns "w" "commentRangeStart" element
+ , Just cmtId <- findAttr (elemName ns "w" "id") element = do
+ (Comments _ commentMap) <- asks envComments
+ case M.lookup cmtId commentMap of
+ Just cmtElem -> elemToCommentStart ns cmtElem
+ Nothing -> throwError WrongElem
+elemToParPart ns element
+ | isElem ns "w" "commentRangeEnd" element
+ , Just cmtId <- findAttr (elemName ns "w" "id") element =
+ return $ CommentEnd cmtId
+elemToParPart ns element
| isElem ns "m" "oMath" element =
(eitherToD $ readOMML $ showElement element) >>= (return . PlainOMath)
elemToParPart _ _ = throwError WrongElem
+elemToCommentStart :: NameSpaces -> Element -> D ParPart
+elemToCommentStart ns element
+ | isElem ns "w" "comment" element
+ , Just cmtId <- findAttr (elemName ns "w" "id") element
+ , Just cmtAuthor <- findAttr (elemName ns "w" "author") element
+ , Just cmtDate <- findAttr (elemName ns "w" "date") element = do
+ bps <- mapD (elemToBodyPart ns) (elChildren element)
+ return $ CommentStart cmtId cmtAuthor cmtDate bps
+elemToCommentStart _ _ = throwError WrongElem
+
lookupFootnote :: String -> Notes -> Maybe Element
lookupFootnote s (Notes _ fns _) = fns >>= (M.lookup s)