summaryrefslogtreecommitdiff
path: root/src/Text/Pandoc/Readers/Docx/Parse.hs
diff options
context:
space:
mode:
Diffstat (limited to 'src/Text/Pandoc/Readers/Docx/Parse.hs')
-rw-r--r--src/Text/Pandoc/Readers/Docx/Parse.hs526
1 files changed, 333 insertions, 193 deletions
diff --git a/src/Text/Pandoc/Readers/Docx/Parse.hs b/src/Text/Pandoc/Readers/Docx/Parse.hs
index deb2caccf..1f7f07e36 100644
--- a/src/Text/Pandoc/Readers/Docx/Parse.hs
+++ b/src/Text/Pandoc/Readers/Docx/Parse.hs
@@ -1,7 +1,9 @@
-{-# LANGUAGE PatternGuards, ViewPatterns, FlexibleInstances #-}
+{-# LANGUAGE FlexibleInstances #-}
+{-# LANGUAGE PatternGuards #-}
+{-# LANGUAGE ViewPatterns #-}
{-
-Copyright (C) 2014-2016 Jesse Rosenthal <jrosenthal@jhu.edu>
+Copyright (C) 2014-2018 Jesse Rosenthal <jrosenthal@jhu.edu>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -20,7 +22,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
{- |
Module : Text.Pandoc.Readers.Docx.Parse
- Copyright : Copyright (C) 2014-2016 Jesse Rosenthal
+ Copyright : Copyright (C) 2014-2018 Jesse Rosenthal
License : GNU GPL, version 2 or above
Maintainer : Jesse Rosenthal <jrosenthal@jhu.edu>
@@ -49,28 +51,34 @@ module Text.Pandoc.Readers.Docx.Parse ( Docx(..)
, ParagraphStyle(..)
, Row(..)
, Cell(..)
+ , TrackedChange(..)
+ , ChangeType(..)
+ , ChangeInfo(..)
+ , FieldInfo(..)
, archiveToDocx
, archiveToDocxWithWarnings
) where
import Codec.Archive.Zip
-import Text.XML.Light
-import Data.Maybe
-import Data.List
-import System.FilePath
+import Control.Applicative ((<|>))
+import Control.Monad.Except
+import Control.Monad.Reader
+import Control.Monad.State.Strict
import Data.Bits ((.|.))
import qualified Data.ByteString.Lazy as B
-import qualified Text.Pandoc.UTF8 as UTF8
-import Control.Monad.Reader
-import Control.Monad.State
-import Control.Applicative ((<|>))
+import Data.Char (chr, ord, readLitChar)
+import Data.List
import qualified Data.Map as M
-import Control.Monad.Except
-import Text.Pandoc.Shared (safeRead, filteredFilesFromArchive)
-import Text.TeXMath.Readers.OMML (readOMML)
-import Text.TeXMath.Unicode.Fonts (getUnicode, stringToFont, Font(..))
-import Text.TeXMath (Exp)
+import Data.Maybe
+import System.FilePath
import Text.Pandoc.Readers.Docx.Util
-import Data.Char (readLitChar, ord, chr, isDigit)
+import Text.Pandoc.Readers.Docx.Fields
+import Text.Pandoc.Shared (filteredFilesFromArchive, safeRead)
+import qualified Text.Pandoc.UTF8 as UTF8
+import Text.TeXMath (Exp)
+import Text.TeXMath.Readers.OMML (readOMML)
+import Text.TeXMath.Unicode.Fonts (Font (..), getUnicode, stringToFont)
+import Text.XML.Light
+import qualified Text.XML.Light.Cursor as XMLC
data ReaderEnv = ReaderEnv { envNotes :: Notes
, envComments :: Comments
@@ -84,10 +92,19 @@ data ReaderEnv = ReaderEnv { envNotes :: Notes
}
deriving Show
-data ReaderState = ReaderState { stateWarnings :: [String] }
+data ReaderState = ReaderState { stateWarnings :: [String]
+ , stateFldCharState :: FldCharState
+ }
deriving Show
-data DocxError = DocxError | WrongElem
+data FldCharState = FldCharOpen
+ | FldCharFieldInfo FieldInfo
+ | FldCharContent FieldInfo [Run]
+ | FldCharClosed
+ deriving (Show)
+
+data DocxError = DocxError
+ | WrongElem
deriving Show
type D = ExceptT DocxError (ReaderT ReaderEnv (State ReaderState))
@@ -97,7 +114,7 @@ runD dx re rs = runState (runReaderT (runExceptT dx) re) rs
maybeToD :: Maybe a -> D a
maybeToD (Just a) = return a
-maybeToD Nothing = throwError DocxError
+maybeToD Nothing = throwError DocxError
eitherToD :: Either a b -> D b
eitherToD (Right b) = return b
@@ -115,6 +132,36 @@ mapD f xs =
in
concatMapM handler xs
+unwrapSDT :: NameSpaces -> Content -> [Content]
+unwrapSDT ns (Elem element)
+ | isElem ns "w" "sdt" element
+ , Just sdtContent <- findChildByName ns "w" "sdtContent" element
+ = concatMap (unwrapSDT ns) $ map Elem $ elChildren sdtContent
+unwrapSDT _ content = [content]
+
+unwrapSDTchild :: NameSpaces -> Content -> Content
+unwrapSDTchild ns (Elem element) =
+ Elem $ element { elContent = concatMap (unwrapSDT ns) (elContent element) }
+unwrapSDTchild _ content = content
+
+walkDocument' :: NameSpaces -> XMLC.Cursor -> XMLC.Cursor
+walkDocument' ns cur =
+ let modifiedCur = XMLC.modifyContent (unwrapSDTchild ns) cur
+ in
+ case XMLC.nextDF modifiedCur of
+ Just cur' -> walkDocument' ns cur'
+ Nothing -> XMLC.root modifiedCur
+
+walkDocument :: NameSpaces -> Element -> Maybe Element
+walkDocument ns element =
+ let cur = XMLC.fromContent (Elem element)
+ cur' = walkDocument' ns cur
+ in
+ case XMLC.toTree cur' of
+ Elem element' -> Just element'
+ _ -> Nothing
+
+
data Docx = Docx Document
deriving Show
@@ -160,17 +207,27 @@ data Notes = Notes NameSpaces
data Comments = Comments NameSpaces (M.Map String Element)
deriving Show
-data ParIndentation = ParIndentation { leftParIndent :: Maybe Integer
- , rightParIndent :: Maybe Integer
+data ParIndentation = ParIndentation { leftParIndent :: Maybe Integer
+ , rightParIndent :: Maybe Integer
, hangingParIndent :: Maybe Integer}
deriving Show
-data ParagraphStyle = ParagraphStyle { pStyle :: [String]
+data ChangeType = Insertion | Deletion
+ deriving Show
+
+data ChangeInfo = ChangeInfo ChangeId Author ChangeDate
+ deriving Show
+
+data TrackedChange = TrackedChange ChangeType ChangeInfo
+ deriving Show
+
+data ParagraphStyle = ParagraphStyle { pStyle :: [String]
, indentation :: Maybe ParIndentation
, dropCap :: Bool
, pHeading :: Maybe (String, Int)
, pNumInfo :: Maybe (String, String)
, pBlockQuote :: Maybe Bool
+ , pChange :: Maybe TrackedChange
}
deriving Show
@@ -181,6 +238,7 @@ defaultParagraphStyle = ParagraphStyle { pStyle = []
, pHeading = Nothing
, pNumInfo = Nothing
, pBlockQuote = Nothing
+ , pChange = Nothing
}
@@ -208,8 +266,7 @@ data Cell = Cell [BodyPart]
type Extent = Maybe (Double, Double)
data ParPart = PlainRun Run
- | Insertion ChangeId Author ChangeDate [Run]
- | Deletion ChangeId Author ChangeDate [Run]
+ | ChangedRuns TrackedChange [Run]
| CommentStart CommentId Author CommentDate [BodyPart]
| CommentEnd CommentId
| BookMark BookMarkId Anchor
@@ -218,6 +275,10 @@ data ParPart = PlainRun Run
| Drawing FilePath String String B.ByteString Extent -- title, alt
| Chart -- placeholder for now
| PlainOMath [Exp]
+ | SmartTag [Run]
+ | Field FieldInfo [Run]
+ | NullParPart -- when we need to return nothing, but
+ -- not because of an error.
deriving Show
data Run = Run RunStyle [RunElem]
@@ -233,19 +294,19 @@ data RunElem = TextRun String | LnBrk | Tab | SoftHyphen | NoBreakHyphen
data VertAlign = BaseLn | SupScrpt | SubScrpt
deriving Show
-data RunStyle = RunStyle { isBold :: Maybe Bool
- , isItalic :: Maybe Bool
+data RunStyle = RunStyle { isBold :: Maybe Bool
+ , isItalic :: Maybe Bool
, isSmallCaps :: Maybe Bool
- , isStrike :: Maybe Bool
- , rVertAlign :: Maybe VertAlign
- , rUnderline :: Maybe String
- , rStyle :: Maybe CharStyle}
+ , isStrike :: Maybe Bool
+ , rVertAlign :: Maybe VertAlign
+ , rUnderline :: Maybe String
+ , rStyle :: Maybe CharStyle}
deriving Show
-data ParStyleData = ParStyleData { headingLev :: Maybe (String, Int)
+data ParStyleData = ParStyleData { headingLev :: Maybe (String, Int)
, isBlockQuote :: Maybe Bool
- , numInfo :: Maybe (String, String)
- , psStyle :: Maybe ParStyle}
+ , numInfo :: Maybe (String, String)
+ , psStyle :: Maybe ParStyle}
deriving Show
defaultRunStyle :: RunStyle
@@ -281,7 +342,9 @@ archiveToDocxWithWarnings archive = do
(styles, parstyles) = archiveToStyles archive
rEnv =
ReaderEnv notes comments numbering rels media Nothing styles parstyles InDocument
- rState = ReaderState { stateWarnings = [] }
+ rState = ReaderState { stateWarnings = []
+ , stateFldCharState = FldCharClosed
+ }
(eitherDoc, st) = runD (archiveToDocument archive) rEnv rState
case eitherDoc of
Right doc -> Right (Docx doc, stateWarnings st)
@@ -294,14 +357,14 @@ archiveToDocument zf = do
entry <- maybeToD $ findEntryByPath "word/document.xml" zf
docElem <- maybeToD $ (parseXMLDoc . UTF8.toStringLazy . fromEntry) entry
let namespaces = elemToNameSpaces docElem
- bodyElem <- maybeToD $ findChild (elemName namespaces "w" "body") docElem
- body <- elemToBody namespaces bodyElem
+ bodyElem <- maybeToD $ findChildByName namespaces "w" "body" docElem
+ let bodyElem' = fromMaybe bodyElem (walkDocument namespaces bodyElem)
+ body <- elemToBody namespaces bodyElem'
return $ Document namespaces body
elemToBody :: NameSpaces -> Element -> D Body
elemToBody ns element | isElem ns "w" "body" element =
- mapD (elemToBodyPart ns) (elChildren element) >>=
- (\bps -> return $ Body bps)
+ fmap Body (mapD (elemToBodyPart ns) (elChildren element))
elemToBody _ _ = throwError WrongElem
archiveToStyles :: Archive -> (CharStyleMap, ParStyleMap)
@@ -322,15 +385,15 @@ archiveToStyles zf =
isBasedOnStyle :: (ElemToStyle a) => NameSpaces -> Element -> Maybe a -> Bool
isBasedOnStyle ns element parentStyle
| isElem ns "w" "style" element
- , Just styleType <- findAttr (elemName ns "w" "type") element
+ , Just styleType <- findAttrByName ns "w" "type" element
, styleType == cStyleType parentStyle
- , Just basedOnVal <- findChild (elemName ns "w" "basedOn") element >>=
- findAttr (elemName ns "w" "val")
- , Just ps <- parentStyle = (basedOnVal == getStyleId ps)
+ , Just basedOnVal <- findChildByName ns "w" "basedOn" element >>=
+ findAttrByName ns "w" "val"
+ , Just ps <- parentStyle = basedOnVal == getStyleId ps
| isElem ns "w" "style" element
- , Just styleType <- findAttr (elemName ns "w" "type") element
+ , Just styleType <- findAttrByName ns "w" "type" element
, styleType == cStyleType parentStyle
- , Nothing <- findChild (elemName ns "w" "basedOn") element
+ , Nothing <- findChildByName ns "w" "basedOn" element
, Nothing <- parentStyle = True
| otherwise = False
@@ -343,8 +406,8 @@ instance ElemToStyle CharStyle where
cStyleType _ = "character"
elemToStyle ns element parentStyle
| isElem ns "w" "style" element
- , Just "character" <- findAttr (elemName ns "w" "type") element
- , Just styleId <- findAttr (elemName ns "w" "styleId") element =
+ , Just "character" <- findAttrByName ns "w" "type" element
+ , Just styleId <- findAttrByName ns "w" "styleId" element =
Just (styleId, elemToRunStyle ns element parentStyle)
| otherwise = Nothing
getStyleId s = fst s
@@ -353,8 +416,8 @@ instance ElemToStyle ParStyle where
cStyleType _ = "paragraph"
elemToStyle ns element parentStyle
| isElem ns "w" "style" element
- , Just "paragraph" <- findAttr (elemName ns "w" "type") element
- , Just styleId <- findAttr (elemName ns "w" "styleId") element =
+ , Just "paragraph" <- findAttrByName ns "w" "type" element
+ , Just styleId <- findAttrByName ns "w" "styleId" element =
Just (styleId, elemToParStyleData ns element parentStyle)
| otherwise = Nothing
getStyleId s = fst s
@@ -368,10 +431,10 @@ getStyleChildren ns element parentStyle
buildBasedOnList :: (ElemToStyle a) => NameSpaces -> Element -> Maybe a -> [a]
buildBasedOnList ns element rootStyle =
- case (getStyleChildren ns element rootStyle) of
+ case getStyleChildren ns element rootStyle of
[] -> []
stys -> stys ++
- (concatMap (\s -> buildBasedOnList ns element (Just s)) stys)
+ concatMap (buildBasedOnList ns element . Just) stys
archiveToNotes :: Archive -> Notes
archiveToNotes zf =
@@ -380,14 +443,14 @@ archiveToNotes zf =
enElem = findEntryByPath "word/endnotes.xml" zf
>>= (parseXMLDoc . UTF8.toStringLazy . fromEntry)
fn_namespaces = case fnElem of
- Just e -> elemToNameSpaces e
+ Just e -> elemToNameSpaces e
Nothing -> []
en_namespaces = case enElem of
- Just e -> elemToNameSpaces e
+ Just e -> elemToNameSpaces e
Nothing -> []
ns = unionBy (\x y -> fst x == fst y) fn_namespaces en_namespaces
- fn = fnElem >>= (elemToNotes ns "footnote")
- en = enElem >>= (elemToNotes ns "endnote")
+ fn = fnElem >>= elemToNotes ns "footnote"
+ en = enElem >>= elemToNotes ns "endnote"
in
Notes ns fn en
@@ -396,19 +459,19 @@ archiveToComments zf =
let cmtsElem = findEntryByPath "word/comments.xml" zf
>>= (parseXMLDoc . UTF8.toStringLazy . fromEntry)
cmts_namespaces = case cmtsElem of
- Just e -> elemToNameSpaces e
+ Just e -> elemToNameSpaces e
Nothing -> []
- cmts = (elemToComments cmts_namespaces) <$> cmtsElem
+ cmts = elemToComments cmts_namespaces <$> cmtsElem
in
case cmts of
- Just c -> Comments cmts_namespaces c
+ Just c -> Comments cmts_namespaces c
Nothing -> Comments cmts_namespaces M.empty
filePathToRelType :: FilePath -> Maybe DocumentLocation
-filePathToRelType "word/_rels/document.xml.rels" = Just InDocument
+filePathToRelType "word/_rels/document.xml.rels" = Just InDocument
filePathToRelType "word/_rels/footnotes.xml.rels" = Just InFootnote
-filePathToRelType "word/_rels/endnotes.xml.rels" = Just InEndnote
-filePathToRelType _ = Nothing
+filePathToRelType "word/_rels/endnotes.xml.rels" = Just InEndnote
+filePathToRelType _ = Nothing
relElemToRelationship :: DocumentLocation -> Element -> Maybe Relationship
relElemToRelationship relType element | qName (elName element) == "Relationship" =
@@ -439,24 +502,23 @@ lookupLevel :: String -> String -> Numbering -> Maybe Level
lookupLevel numId ilvl (Numbering _ numbs absNumbs) = do
absNumId <- lookup numId $ map (\(Numb nid absnumid) -> (nid, absnumid)) numbs
lvls <- lookup absNumId $ map (\(AbstractNumb aid ls) -> (aid, ls)) absNumbs
- lvl <- lookup ilvl $ map (\l@(i, _, _, _) -> (i, l)) lvls
- return lvl
+ lookup ilvl $ map (\l@(i, _, _, _) -> (i, l)) lvls
numElemToNum :: NameSpaces -> Element -> Maybe Numb
numElemToNum ns element
| isElem ns "w" "num" element = do
- numId <- findAttr (elemName ns "w" "numId") element
- absNumId <- findChild (elemName ns "w" "abstractNumId") element
- >>= findAttr (elemName ns "w" "val")
+ numId <- findAttrByName ns "w" "numId" element
+ absNumId <- findChildByName ns "w" "abstractNumId" element
+ >>= findAttrByName ns "w" "val"
return $ Numb numId absNumId
numElemToNum _ _ = Nothing
absNumElemToAbsNum :: NameSpaces -> Element -> Maybe AbstractNumb
absNumElemToAbsNum ns element
| isElem ns "w" "abstractNum" element = do
- absNumId <- findAttr (elemName ns "w" "abstractNumId") element
- let levelElems = findChildren (elemName ns "w" "lvl") element
+ absNumId <- findAttrByName ns "w" "abstractNumId" element
+ let levelElems = findChildrenByName ns "w" "lvl" element
levels = mapMaybe (levelElemToLevel ns) levelElems
return $ AbstractNumb absNumId levels
absNumElemToAbsNum _ _ = Nothing
@@ -464,26 +526,26 @@ absNumElemToAbsNum _ _ = Nothing
levelElemToLevel :: NameSpaces -> Element -> Maybe Level
levelElemToLevel ns element
| isElem ns "w" "lvl" element = do
- ilvl <- findAttr (elemName ns "w" "ilvl") element
- fmt <- findChild (elemName ns "w" "numFmt") element
- >>= findAttr (elemName ns "w" "val")
- txt <- findChild (elemName ns "w" "lvlText") element
- >>= findAttr (elemName ns "w" "val")
- let start = findChild (elemName ns "w" "start") element
- >>= findAttr (elemName ns "w" "val")
+ ilvl <- findAttrByName ns "w" "ilvl" element
+ fmt <- findChildByName ns "w" "numFmt" element
+ >>= findAttrByName ns "w" "val"
+ txt <- findChildByName ns "w" "lvlText" element
+ >>= findAttrByName ns "w" "val"
+ let start = findChildByName ns "w" "start" element
+ >>= findAttrByName ns "w" "val"
>>= (\s -> listToMaybe (map fst (reads s :: [(Integer, String)])))
return (ilvl, fmt, txt, start)
levelElemToLevel _ _ = Nothing
archiveToNumbering' :: Archive -> Maybe Numbering
-archiveToNumbering' zf = do
+archiveToNumbering' zf =
case findEntryByPath "word/numbering.xml" zf of
Nothing -> Just $ Numbering [] [] []
Just entry -> do
numberingElem <- (parseXMLDoc . UTF8.toStringLazy . fromEntry) entry
let namespaces = elemToNameSpaces numberingElem
- numElems = findChildren (elemName namespaces "w" "num") numberingElem
- absNumElems = findChildren (elemName namespaces "w" "abstractNum") numberingElem
+ numElems = findChildrenByName namespaces "w" "num" numberingElem
+ absNumElems = findChildrenByName namespaces "w" "abstractNum" numberingElem
nums = mapMaybe (numElemToNum namespaces) numElems
absNums = mapMaybe (absNumElemToAbsNum namespaces) absNumElems
return $ Numbering namespaces nums absNums
@@ -496,22 +558,23 @@ elemToNotes :: NameSpaces -> String -> Element -> Maybe (M.Map String Element)
elemToNotes ns notetype element
| isElem ns "w" (notetype ++ "s") element =
let pairs = mapMaybe
- (\e -> findAttr (elemName ns "w" "id") e >>=
+ (\e -> findAttrByName ns "w" "id" e >>=
(\a -> Just (a, e)))
- (findChildren (elemName ns "w" notetype) element)
+ (findChildrenByName ns "w" notetype element)
in
- Just $ M.fromList $ pairs
+ Just $
+ M.fromList pairs
elemToNotes _ _ _ = Nothing
elemToComments :: NameSpaces -> Element -> M.Map String Element
elemToComments ns element
| isElem ns "w" "comments" element =
let pairs = mapMaybe
- (\e -> findAttr (elemName ns "w" "id") e >>=
+ (\e -> findAttrByName ns "w" "id" e >>=
(\a -> Just (a, e)))
- (findChildren (elemName ns "w" "comment") element)
+ (findChildrenByName ns "w" "comment" element)
in
- M.fromList $ pairs
+ M.fromList pairs
elemToComments _ _ = M.empty
@@ -520,16 +583,16 @@ elemToComments _ _ = M.empty
elemToTblGrid :: NameSpaces -> Element -> D TblGrid
elemToTblGrid ns element | isElem ns "w" "tblGrid" element =
- let cols = findChildren (elemName ns "w" "gridCol") element
+ let cols = findChildrenByName ns "w" "gridCol" element
in
- mapD (\e -> maybeToD (findAttr (elemName ns "w" "val") e >>= stringToInteger))
+ mapD (\e -> maybeToD (findAttrByName ns "w" "val" e >>= stringToInteger))
cols
elemToTblGrid _ _ = throwError WrongElem
elemToTblLook :: NameSpaces -> Element -> D TblLook
elemToTblLook ns element | isElem ns "w" "tblLook" element =
- let firstRow = findAttr (elemName ns "w" "firstRow") element
- val = findAttr (elemName ns "w" "val") element
+ let firstRow = findAttrByName ns "w" "firstRow" element
+ val = findAttrByName ns "w" "val" element
firstRowFmt =
case firstRow of
Just "1" -> True
@@ -538,13 +601,13 @@ elemToTblLook ns element | isElem ns "w" "tblLook" element =
Just bitMask -> testBitMask bitMask 0x020
Nothing -> False
in
- return $ TblLook{firstRowFormatting = firstRowFmt}
+ return TblLook{firstRowFormatting = firstRowFmt}
elemToTblLook _ _ = throwError WrongElem
elemToRow :: NameSpaces -> Element -> D Row
elemToRow ns element | isElem ns "w" "tr" element =
do
- let cellElems = findChildren (elemName ns "w" "tc") element
+ let cellElems = findChildrenByName ns "w" "tc" element
cells <- mapD (elemToCell ns) cellElems
return $ Row cells
elemToRow _ _ = throwError WrongElem
@@ -558,15 +621,15 @@ elemToCell _ _ = throwError WrongElem
elemToParIndentation :: NameSpaces -> Element -> Maybe ParIndentation
elemToParIndentation ns element | isElem ns "w" "ind" element =
- Just $ ParIndentation {
+ Just ParIndentation {
leftParIndent =
- findAttr (elemName ns "w" "left") element >>=
+ findAttrByName ns "w" "left" element >>=
stringToInteger
, rightParIndent =
- findAttr (elemName ns "w" "right") element >>=
+ findAttrByName ns "w" "right" element >>=
stringToInteger
, hangingParIndent =
- findAttr (elemName ns "w" "hanging") element >>=
+ findAttrByName ns "w" "hanging" element >>=
stringToInteger}
elemToParIndentation _ _ = Nothing
@@ -574,7 +637,7 @@ testBitMask :: String -> Int -> Bool
testBitMask bitMaskS n =
case (reads ("0x" ++ bitMaskS) :: [(Int, String)]) of
[] -> False
- ((n', _) : _) -> ((n' .|. n) /= 0)
+ ((n', _) : _) -> (n' .|. n) /= 0
stringToInteger :: String -> Maybe Integer
stringToInteger s = listToMaybe $ map fst (reads s :: [(Integer, String)])
@@ -582,7 +645,7 @@ stringToInteger s = listToMaybe $ map fst (reads s :: [(Integer, String)])
elemToBodyPart :: NameSpaces -> Element -> D BodyPart
elemToBodyPart ns element
| isElem ns "w" "p" element
- , (c:_) <- findChildren (elemName ns "m" "oMathPara") element =
+ , (c:_) <- findChildrenByName ns "m" "oMathPara" element =
do
expsLst <- eitherToD $ readOMML $ showElement c
return $ OMathPara expsLst
@@ -610,17 +673,17 @@ elemToBodyPart ns element
_ -> return $ Paragraph parstyle parparts
elemToBodyPart ns element
| isElem ns "w" "tbl" element = do
- let caption' = findChild (elemName ns "w" "tblPr") element
- >>= findChild (elemName ns "w" "tblCaption")
- >>= findAttr (elemName ns "w" "val")
+ let caption' = findChildByName ns "w" "tblPr" element
+ >>= findChildByName ns "w" "tblCaption"
+ >>= findAttrByName ns "w" "val"
caption = (fromMaybe "" caption')
- grid' = case findChild (elemName ns "w" "tblGrid") element of
- Just g -> elemToTblGrid ns g
+ grid' = case findChildByName ns "w" "tblGrid" element of
+ Just g -> elemToTblGrid ns g
Nothing -> return []
- tblLook' = case findChild (elemName ns "w" "tblPr") element >>=
- findChild (elemName ns "w" "tblLook")
+ tblLook' = case findChildByName ns "w" "tblPr" element >>=
+ findChildByName ns "w" "tblLook"
of
- Just l -> elemToTblLook ns l
+ Just l -> elemToTblLook ns l
Nothing -> return defaultTblLook
grid <- grid'
@@ -649,26 +712,22 @@ expandDrawingId s = do
getTitleAndAlt :: NameSpaces -> Element -> (String, String)
getTitleAndAlt ns element =
- let mbDocPr = findChild (elemName ns "wp" "inline") element >>=
- findChild (elemName ns "wp" "docPr")
- title = case mbDocPr >>= findAttr (elemName ns "" "title") of
- Just title' -> title'
- Nothing -> ""
- alt = case mbDocPr >>= findAttr (elemName ns "" "descr") of
- Just alt' -> alt'
- Nothing -> ""
+ let mbDocPr = findChildByName ns "wp" "inline" element >>=
+ findChildByName ns "wp" "docPr"
+ title = fromMaybe "" (mbDocPr >>= findAttrByName ns "" "title")
+ alt = fromMaybe "" (mbDocPr >>= findAttrByName ns "" "descr")
in (title, alt)
elemToParPart :: NameSpaces -> Element -> D ParPart
elemToParPart ns element
| isElem ns "w" "r" element
- , Just drawingElem <- findChild (elemName ns "w" "drawing") element
+ , Just drawingElem <- findChildByName ns "w" "drawing" element
, pic_ns <- "http://schemas.openxmlformats.org/drawingml/2006/picture"
, Just picElem <- findElement (QName "pic" (Just pic_ns) (Just "pic")) drawingElem
= let (title, alt) = getTitleAndAlt ns drawingElem
a_ns = "http://schemas.openxmlformats.org/drawingml/2006/main"
drawing = findElement (QName "blip" (Just a_ns) (Just "a")) picElem
- >>= findAttr (elemName ns "r" "embed")
+ >>= findAttrByName ns "r" "embed"
in
case drawing of
Just s -> expandDrawingId s >>= (\(fp, bs) -> return $ Drawing fp title alt bs $ elemToExtent drawingElem)
@@ -676,9 +735,9 @@ elemToParPart ns element
-- The below is an attempt to deal with images in deprecated vml format.
elemToParPart ns element
| isElem ns "w" "r" element
- , Just _ <- findChild (elemName ns "w" "pict") element =
+ , Just _ <- findChildByName ns "w" "pict" element =
let drawing = findElement (elemName ns "v" "imagedata") element
- >>= findAttr (elemName ns "r" "id")
+ >>= findAttrByName ns "r" "id"
in
case drawing of
-- Todo: check out title and attr for deprecated format.
@@ -687,86 +746,148 @@ elemToParPart ns element
-- Chart
elemToParPart ns element
| isElem ns "w" "r" element
- , Just drawingElem <- findChild (elemName ns "w" "drawing") element
+ , Just drawingElem <- findChildByName ns "w" "drawing" element
, c_ns <- "http://schemas.openxmlformats.org/drawingml/2006/chart"
, Just _ <- findElement (QName "chart" (Just c_ns) (Just "c")) drawingElem
- = return Chart
+ = return Chart
+{-
+The next one is a bit complicated. fldChar fields work by first
+having a <w:fldChar fldCharType="begin"> in a run, then a run with
+<w:instrText>, then a <w:fldChar fldCharType="separate"> run, then the
+content runs, and finally a <w:fldChar fldCharType="end"> run. For
+example (omissions and my comments in brackets):
+
+ <w:r>
+ [...]
+ <w:fldChar w:fldCharType="begin"/>
+ </w:r>
+ <w:r>
+ [...]
+ <w:instrText xml:space="preserve"> HYPERLINK [hyperlink url] </w:instrText>
+ </w:r>
+ <w:r>
+ [...]
+ <w:fldChar w:fldCharType="separate"/>
+ </w:r>
+ <w:r w:rsidRPr=[...]>
+ [...]
+ <w:t>Foundations of Analysis, 2nd Edition</w:t>
+ </w:r>
+ <w:r>
+ [...]
+ <w:fldChar w:fldCharType="end"/>
+ </w:r>
+
+So we do this in a number of steps. If we encounter the fldchar begin
+tag, we start open a fldchar state variable (see state above). We add
+the instrtext to it as FieldInfo. Then we close that and start adding
+the runs when we get to separate. Then when we get to end, we produce
+the Field type with approriate FieldInfo and Runs.
+-}
elemToParPart ns element
- | isElem ns "w" "r" element =
- elemToRun ns element >>= (\r -> return $ PlainRun r)
+ | isElem ns "w" "r" element
+ , Just fldChar <- findChildByName ns "w" "fldChar" element
+ , Just fldCharType <- findAttrByName ns "w" "fldCharType" fldChar = do
+ fldCharState <- gets stateFldCharState
+ case fldCharState of
+ FldCharClosed | fldCharType == "begin" -> do
+ modify $ \st -> st {stateFldCharState = FldCharOpen}
+ return NullParPart
+ FldCharFieldInfo info | fldCharType == "separate" -> do
+ modify $ \st -> st {stateFldCharState = FldCharContent info []}
+ return NullParPart
+ FldCharContent info runs | fldCharType == "end" -> do
+ modify $ \st -> st {stateFldCharState = FldCharClosed}
+ return $ Field info $ reverse runs
+ _ -> throwError WrongElem
elemToParPart ns element
- | isElem ns "w" "ins" element || isElem ns "w" "moveTo" element
- , Just cId <- findAttr (elemName ns "w" "id") element
- , Just cAuthor <- findAttr (elemName ns "w" "author") element
- , Just cDate <- findAttr (elemName ns "w" "date") element = do
- runs <- mapD (elemToRun ns) (elChildren element)
- return $ Insertion cId cAuthor cDate runs
+ | isElem ns "w" "r" element
+ , Just instrText <- findChildByName ns "w" "instrText" element = do
+ fldCharState <- gets stateFldCharState
+ case fldCharState of
+ FldCharOpen -> do
+ info <- eitherToD $ parseFieldInfo $ strContent instrText
+ modify $ \st -> st{stateFldCharState = FldCharFieldInfo info}
+ return NullParPart
+ _ -> return NullParPart
elemToParPart ns element
- | isElem ns "w" "del" element || isElem ns "w" "moveFrom" element
- , Just cId <- findAttr (elemName ns "w" "id") element
- , Just cAuthor <- findAttr (elemName ns "w" "author") element
- , Just cDate <- findAttr (elemName ns "w" "date") element = do
+ | isElem ns "w" "r" element = do
+ run <- elemToRun ns element
+ -- we check to see if we have an open FldChar in state that we're
+ -- recording.
+ fldCharState <- gets stateFldCharState
+ case fldCharState of
+ FldCharContent info runs -> do
+ modify $ \st -> st{stateFldCharState = FldCharContent info (run : runs)}
+ return NullParPart
+ _ -> return $ PlainRun run
+elemToParPart ns element
+ | Just change <- getTrackedChange ns element = do
+ runs <- mapD (elemToRun ns) (elChildren element)
+ return $ ChangedRuns change runs
+elemToParPart ns element
+ | isElem ns "w" "smartTag" element = do
runs <- mapD (elemToRun ns) (elChildren element)
- return $ Deletion cId cAuthor cDate runs
+ return $ SmartTag runs
elemToParPart ns element
| isElem ns "w" "bookmarkStart" element
- , Just bmId <- findAttr (elemName ns "w" "id") element
- , Just bmName <- findAttr (elemName ns "w" "name") element =
+ , Just bmId <- findAttrByName ns "w" "id" element
+ , Just bmName <- findAttrByName ns "w" "name" element =
return $ BookMark bmId bmName
elemToParPart ns element
| isElem ns "w" "hyperlink" element
- , Just relId <- findAttr (elemName ns "r" "id") element = do
+ , Just relId <- findAttrByName ns "r" "id" element = do
location <- asks envLocation
runs <- mapD (elemToRun ns) (elChildren element)
rels <- asks envRelationships
case lookupRelationship location relId rels of
- Just target -> do
- case findAttr (elemName ns "w" "anchor") element of
+ Just target ->
+ case findAttrByName ns "w" "anchor" element of
Just anchor -> return $ ExternalHyperLink (target ++ '#':anchor) runs
Nothing -> return $ ExternalHyperLink target runs
Nothing -> return $ ExternalHyperLink "" runs
elemToParPart ns element
| isElem ns "w" "hyperlink" element
- , Just anchor <- findAttr (elemName ns "w" "anchor") element = do
+ , Just anchor <- findAttrByName ns "w" "anchor" element = do
runs <- mapD (elemToRun ns) (elChildren element)
return $ InternalHyperLink anchor runs
elemToParPart ns element
| isElem ns "w" "commentRangeStart" element
- , Just cmtId <- findAttr (elemName ns "w" "id") element = do
+ , Just cmtId <- findAttrByName ns "w" "id" element = do
(Comments _ commentMap) <- asks envComments
case M.lookup cmtId commentMap of
Just cmtElem -> elemToCommentStart ns cmtElem
- Nothing -> throwError WrongElem
+ Nothing -> throwError WrongElem
elemToParPart ns element
| isElem ns "w" "commentRangeEnd" element
- , Just cmtId <- findAttr (elemName ns "w" "id") element =
+ , Just cmtId <- findAttrByName ns "w" "id" element =
return $ CommentEnd cmtId
elemToParPart ns element
| isElem ns "m" "oMath" element =
- (eitherToD $ readOMML $ showElement element) >>= (return . PlainOMath)
+ fmap PlainOMath (eitherToD $ readOMML $ showElement element)
elemToParPart _ _ = throwError WrongElem
elemToCommentStart :: NameSpaces -> Element -> D ParPart
elemToCommentStart ns element
| isElem ns "w" "comment" element
- , Just cmtId <- findAttr (elemName ns "w" "id") element
- , Just cmtAuthor <- findAttr (elemName ns "w" "author") element
- , Just cmtDate <- findAttr (elemName ns "w" "date") element = do
+ , Just cmtId <- findAttrByName ns "w" "id" element
+ , Just cmtAuthor <- findAttrByName ns "w" "author" element
+ , Just cmtDate <- findAttrByName ns "w" "date" element = do
bps <- mapD (elemToBodyPart ns) (elChildren element)
return $ CommentStart cmtId cmtAuthor cmtDate bps
elemToCommentStart _ _ = throwError WrongElem
lookupFootnote :: String -> Notes -> Maybe Element
-lookupFootnote s (Notes _ fns _) = fns >>= (M.lookup s)
+lookupFootnote s (Notes _ fns _) = fns >>= M.lookup s
lookupEndnote :: String -> Notes -> Maybe Element
-lookupEndnote s (Notes _ _ ens) = ens >>= (M.lookup s)
+lookupEndnote s (Notes _ _ ens) = ens >>= M.lookup s
elemToExtent :: Element -> Extent
elemToExtent drawingElem =
case (getDim "cx", getDim "cy") of
(Just w, Just h) -> Just (w, h)
- _ -> Nothing
+ _ -> Nothing
where
wp_ns = "http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing"
getDim at = findElement (QName "extent" (Just wp_ns) (Just "wp")) drawingElem
@@ -794,7 +915,7 @@ childElemToRun ns element
= return InlineChart
childElemToRun ns element
| isElem ns "w" "footnoteReference" element
- , Just fnId <- findAttr (elemName ns "w" "id") element = do
+ , Just fnId <- findAttrByName ns "w" "id" element = do
notes <- asks envNotes
case lookupFootnote fnId notes of
Just e -> do bps <- local (\r -> r {envLocation=InFootnote}) $ mapD (elemToBodyPart ns) (elChildren e)
@@ -802,7 +923,7 @@ childElemToRun ns element
Nothing -> return $ Footnote []
childElemToRun ns element
| isElem ns "w" "endnoteReference" element
- , Just enId <- findAttr (elemName ns "w" "id") element = do
+ , Just enId <- findAttrByName ns "w" "id" element = do
notes <- asks envNotes
case lookupEndnote enId notes of
Just e -> do bps <- local (\r -> r {envLocation=InEndnote}) $ mapD (elemToBodyPart ns) (elChildren e)
@@ -813,8 +934,8 @@ childElemToRun _ _ = throwError WrongElem
elemToRun :: NameSpaces -> Element -> D Run
elemToRun ns element
| isElem ns "w" "r" element
- , Just altCont <- findChild (elemName ns "mc" "AlternateContent") element =
- do let choices = findChildren (elemName ns "mc" "Choice") altCont
+ , Just altCont <- findChildByName ns "mc" "AlternateContent" element =
+ do let choices = findChildrenByName ns "mc" "Choice" altCont
choiceChildren = map head $ filter (not . null) $ map elChildren choices
outputs <- mapD (childElemToRun ns) choiceChildren
case outputs of
@@ -822,15 +943,15 @@ elemToRun ns element
[] -> throwError WrongElem
elemToRun ns element
| isElem ns "w" "r" element
- , Just drawingElem <- findChild (elemName ns "w" "drawing") element =
+ , Just drawingElem <- findChildByName ns "w" "drawing" element =
childElemToRun ns drawingElem
elemToRun ns element
| isElem ns "w" "r" element
- , Just ref <- findChild (elemName ns "w" "footnoteReference") element =
+ , Just ref <- findChildByName ns "w" "footnoteReference" element =
childElemToRun ns ref
elemToRun ns element
| isElem ns "w" "r" element
- , Just ref <- findChild (elemName ns "w" "endnoteReference") element =
+ , Just ref <- findChildByName ns "w" "endnoteReference" element =
childElemToRun ns ref
elemToRun ns element
| isElem ns "w" "r" element = do
@@ -854,22 +975,37 @@ getParStyleField field stylemap styles
= Just y
getParStyleField _ _ _ = Nothing
+getTrackedChange :: NameSpaces -> Element -> Maybe TrackedChange
+getTrackedChange ns element
+ | isElem ns "w" "ins" element || isElem ns "w" "moveTo" element
+ , Just cId <- findAttrByName ns "w" "id" element
+ , Just cAuthor <- findAttrByName ns "w" "author" element
+ , Just cDate <- findAttrByName ns "w" "date" element =
+ Just $ TrackedChange Insertion (ChangeInfo cId cAuthor cDate)
+getTrackedChange ns element
+ | isElem ns "w" "del" element || isElem ns "w" "moveFrom" element
+ , Just cId <- findAttrByName ns "w" "id" element
+ , Just cAuthor <- findAttrByName ns "w" "author" element
+ , Just cDate <- findAttrByName ns "w" "date" element =
+ Just $ TrackedChange Deletion (ChangeInfo cId cAuthor cDate)
+getTrackedChange _ _ = Nothing
+
elemToParagraphStyle :: NameSpaces -> Element -> ParStyleMap -> ParagraphStyle
elemToParagraphStyle ns element sty
- | Just pPr <- findChild (elemName ns "w" "pPr") element =
+ | Just pPr <- findChildByName ns "w" "pPr" element =
let style =
mapMaybe
- (findAttr (elemName ns "w" "val"))
- (findChildren (elemName ns "w" "pStyle") pPr)
+ (findAttrByName ns "w" "val")
+ (findChildrenByName ns "w" "pStyle" pPr)
in ParagraphStyle
{pStyle = style
, indentation =
- findChild (elemName ns "w" "ind") pPr >>=
+ findChildByName ns "w" "ind" pPr >>=
elemToParIndentation ns
, dropCap =
case
- findChild (elemName ns "w" "framePr") pPr >>=
- findAttr (elemName ns "w" "dropCap")
+ findChildByName ns "w" "framePr" pPr >>=
+ findAttrByName ns "w" "dropCap"
of
Just "none" -> False
Just _ -> True
@@ -877,13 +1013,20 @@ elemToParagraphStyle ns element sty
, pHeading = getParStyleField headingLev sty style
, pNumInfo = getParStyleField numInfo sty style
, pBlockQuote = getParStyleField isBlockQuote sty style
+ , pChange = findChildByName ns "w" "rPr" pPr >>=
+ filterChild (\e -> isElem ns "w" "ins" e ||
+ isElem ns "w" "moveTo" e ||
+ isElem ns "w" "del" e ||
+ isElem ns "w" "moveFrom" e
+ ) >>=
+ getTrackedChange ns
}
elemToParagraphStyle _ _ _ = defaultParagraphStyle
checkOnOff :: NameSpaces -> Element -> QName -> Maybe Bool
checkOnOff ns rPr tag
| Just t <- findChild tag rPr
- , Just val <- findAttr (elemName ns "w" "val") t =
+ , Just val <- findAttrByName ns "w" "val" t =
Just $ case val of
"true" -> True
"false" -> False
@@ -897,11 +1040,11 @@ checkOnOff _ _ _ = Nothing
elemToRunStyleD :: NameSpaces -> Element -> D RunStyle
elemToRunStyleD ns element
- | Just rPr <- findChild (elemName ns "w" "rPr") element = do
+ | Just rPr <- findChildByName ns "w" "rPr" element = do
charStyles <- asks envCharStyles
let parentSty = case
- findChild (elemName ns "w" "rStyle") rPr >>=
- findAttr (elemName ns "w" "val")
+ findChildByName ns "w" "rStyle" rPr >>=
+ findAttrByName ns "w" "val"
of
Just styName | Just style <- M.lookup styName charStyles ->
Just (styName, style)
@@ -911,7 +1054,7 @@ elemToRunStyleD _ _ = return defaultRunStyle
elemToRunStyle :: NameSpaces -> Element -> Maybe CharStyle -> RunStyle
elemToRunStyle ns element parentStyle
- | Just rPr <- findChild (elemName ns "w" "rPr") element =
+ | Just rPr <- findChildByName ns "w" "rPr" element =
RunStyle
{
isBold = checkOnOff ns rPr (elemName ns "w" "b")
@@ -919,32 +1062,31 @@ elemToRunStyle ns element parentStyle
, isSmallCaps = checkOnOff ns rPr (elemName ns "w" "smallCaps")
, isStrike = checkOnOff ns rPr (elemName ns "w" "strike")
, rVertAlign =
- findChild (elemName ns "w" "vertAlign") rPr >>=
- findAttr (elemName ns "w" "val") >>=
+ findChildByName ns "w" "vertAlign" rPr >>=
+ findAttrByName ns "w" "val" >>=
\v -> Just $ case v of
"superscript" -> SupScrpt
"subscript" -> SubScrpt
_ -> BaseLn
, rUnderline =
- findChild (elemName ns "w" "u") rPr >>=
- findAttr (elemName ns "w" "val")
+ findChildByName ns "w" "u" rPr >>=
+ findAttrByName ns "w" "val"
, rStyle = parentStyle
}
elemToRunStyle _ _ _ = defaultRunStyle
-isNumericNotNull :: String -> Bool
-isNumericNotNull str = (str /= []) && (all isDigit str)
-
getHeaderLevel :: NameSpaces -> Element -> Maybe (String,Int)
getHeaderLevel ns element
- | Just styleId <- findAttr (elemName ns "w" "styleId") element
+ | Just styleId <- findAttrByName ns "w" "styleId" element
, Just index <- stripPrefix "Heading" styleId
- , isNumericNotNull index = Just (styleId, read index)
- | Just styleId <- findAttr (elemName ns "w" "styleId") element
- , Just index <- findChild (elemName ns "w" "name") element >>=
- findAttr (elemName ns "w" "val") >>=
+ , Just n <- stringToInteger index
+ , n > 0 = Just (styleId, fromInteger n)
+ | Just styleId <- findAttrByName ns "w" "styleId" element
+ , Just index <- findChildByName ns "w" "name" element >>=
+ findAttrByName ns "w" "val" >>=
stripPrefix "heading "
- , isNumericNotNull index = Just (styleId, read index)
+ , Just n <- stringToInteger index
+ , n > 0 = Just (styleId, fromInteger n)
getHeaderLevel _ _ = Nothing
blockQuoteStyleIds :: [String]
@@ -955,23 +1097,23 @@ blockQuoteStyleNames = ["Quote", "Block Text"]
getBlockQuote :: NameSpaces -> Element -> Maybe Bool
getBlockQuote ns element
- | Just styleId <- findAttr (elemName ns "w" "styleId") element
+ | Just styleId <- findAttrByName ns "w" "styleId" element
, styleId `elem` blockQuoteStyleIds = Just True
- | Just styleName <- findChild (elemName ns "w" "name") element >>=
- findAttr (elemName ns "w" "val")
+ | Just styleName <- findChildByName ns "w" "name" element >>=
+ findAttrByName ns "w" "val"
, styleName `elem` blockQuoteStyleNames = Just True
getBlockQuote _ _ = Nothing
getNumInfo :: NameSpaces -> Element -> Maybe (String, String)
getNumInfo ns element = do
- let numPr = findChild (elemName ns "w" "pPr") element >>=
- findChild (elemName ns "w" "numPr")
+ let numPr = findChildByName ns "w" "pPr" element >>=
+ findChildByName ns "w" "numPr"
lvl = fromMaybe "0" (numPr >>=
- findChild (elemName ns "w" "ilvl") >>=
- findAttr (elemName ns "w" "val"))
+ findChildByName ns "w" "ilvl" >>=
+ findAttrByName ns "w" "val")
numId <- numPr >>=
- findChild (elemName ns "w" "numId") >>=
- findAttr (elemName ns "w" "val")
+ findChildByName ns "w" "numId" >>=
+ findAttrByName ns "w" "val"
return (numId, lvl)
@@ -1015,10 +1157,10 @@ getSymChar ns element
let [(char, _)] = readLitChar ("\\x" ++ s) in
TextRun . maybe "" (:[]) $ getUnicode font char
where
- getCodepoint = findAttr (elemName ns "w" "char") element
- getFont = stringToFont =<< findAttr (elemName ns "w" "font") element
+ getCodepoint = findAttrByName ns "w" "char" element
+ getFont = stringToFont =<< findAttrByName ns "w" "font" element
lowerFromPrivate ('F':xs) = '0':xs
- lowerFromPrivate xs = xs
+ lowerFromPrivate xs = xs
getSymChar _ _ = TextRun ""
elemToRunElems :: NameSpaces -> Element -> D [RunElem]
@@ -1029,11 +1171,9 @@ elemToRunElems ns element
let font = do
fontElem <- findElement (qualName "rFonts") element
stringToFont =<<
- (foldr (<|>) Nothing $
- map (flip findAttr fontElem . qualName) ["ascii", "hAnsi"])
+ foldr ((<|>) . (flip findAttr fontElem . qualName)) Nothing ["ascii", "hAnsi"]
local (setFont font) (mapD (elemToRunElem ns) (elChildren element))
elemToRunElems _ _ = throwError WrongElem
setFont :: Maybe Font -> ReaderEnv -> ReaderEnv
setFont f s = s{envFont = f}
-