diff options
Diffstat (limited to 'src/Text/Pandoc/Readers/Docx/Parse.hs')
-rw-r--r-- | src/Text/Pandoc/Readers/Docx/Parse.hs | 77 |
1 files changed, 45 insertions, 32 deletions
diff --git a/src/Text/Pandoc/Readers/Docx/Parse.hs b/src/Text/Pandoc/Readers/Docx/Parse.hs index cce80fb48..5910a476b 100644 --- a/src/Text/Pandoc/Readers/Docx/Parse.hs +++ b/src/Text/Pandoc/Readers/Docx/Parse.hs @@ -59,7 +59,7 @@ import Data.Bits ((.|.)) import qualified Data.ByteString.Lazy as B import qualified Text.Pandoc.UTF8 as UTF8 import Control.Monad.Reader -import Control.Applicative ((<$>), (<|>)) +import Control.Applicative ((<|>)) import qualified Data.Map as M import Text.Pandoc.Compat.Except import Text.TeXMath.Readers.OMML (readOMML) @@ -75,6 +75,7 @@ data ReaderEnv = ReaderEnv { envNotes :: Notes , envFont :: Maybe Font , envCharStyles :: CharStyleMap , envParStyles :: ParStyleMap + , envLocation :: DocumentLocation } deriving Show @@ -87,7 +88,7 @@ instance Error DocxError where type D = ExceptT DocxError (Reader ReaderEnv) runD :: D a -> ReaderEnv -> Either DocxError a -runD dx re = runReader (runExceptT dx ) re +runD dx re = runReader (runExceptT dx) re maybeToD :: Maybe a -> D a maybeToD (Just a) = return a @@ -140,7 +141,10 @@ data AbstractNumb = AbstractNumb String [Level] -- (ilvl, format, string, start) type Level = (String, String, String, Maybe Integer) -data Relationship = Relationship (RelId, Target) +data DocumentLocation = InDocument | InFootnote | InEndnote + deriving (Eq,Show) + +data Relationship = Relationship DocumentLocation RelId Target deriving Show data Notes = Notes NameSpaces @@ -174,6 +178,7 @@ defaultParagraphStyle = ParagraphStyle { pStyle = [] data BodyPart = Paragraph ParagraphStyle [ParPart] | ListItem ParagraphStyle String String Level [ParPart] + | DummyListItem ParagraphStyle String [ParPart] | Tbl String TblGrid TblLook [Row] | OMathPara [Exp] deriving Show @@ -208,7 +213,7 @@ data Run = Run RunStyle [RunElem] | InlineDrawing FilePath B.ByteString deriving Show -data RunElem = TextRun String | LnBrk | Tab +data RunElem = TextRun String | LnBrk | Tab | SoftHyphen | NoBreakHyphen deriving Show data VertAlign = BaseLn | SupScrpt | SubScrpt @@ -238,7 +243,6 @@ defaultRunStyle = RunStyle { isBold = Nothing , rUnderline = Nothing , rStyle = Nothing} - type Target = String type Anchor = String type URL = String @@ -255,7 +259,8 @@ archiveToDocx archive = do rels = archiveToRelationships archive media = archiveToMedia archive (styles, parstyles) = archiveToStyles archive - rEnv = ReaderEnv notes numbering rels media Nothing styles parstyles + rEnv = + ReaderEnv notes numbering rels media Nothing styles parstyles InDocument doc <- runD (archiveToDocument archive) rEnv return $ Docx doc @@ -362,29 +367,30 @@ archiveToNotes zf = in Notes ns fn en -filePathIsRel :: FilePath -> Bool -filePathIsRel fp = - let (dir, name) = splitFileName fp - in - (dir == "word/_rels/") && ((takeExtension name) == ".rels") +filePathToRelType :: FilePath -> Maybe DocumentLocation +filePathToRelType "word/_rels/document.xml.rels" = Just InDocument +filePathToRelType "word/_rels/footnotes.xml.rels" = Just InFootnote +filePathToRelType "word/_rels/endnotes.xml.rels" = Just InEndnote +filePathToRelType _ = Nothing -relElemToRelationship :: Element -> Maybe Relationship -relElemToRelationship element | qName (elName element) == "Relationship" = +relElemToRelationship :: DocumentLocation -> Element -> Maybe Relationship +relElemToRelationship relType element | qName (elName element) == "Relationship" = do relId <- findAttr (QName "Id" Nothing Nothing) element target <- findAttr (QName "Target" Nothing Nothing) element - return $ Relationship (relId, target) -relElemToRelationship _ = Nothing - - + return $ Relationship relType relId target +relElemToRelationship _ _ = Nothing + +filePathToRelationships :: Archive -> FilePath -> [Relationship] +filePathToRelationships ar fp | Just relType <- filePathToRelType fp + , Just entry <- findEntryByPath fp ar + , Just relElems <- (parseXMLDoc . UTF8.toStringLazy . fromEntry) entry = + mapMaybe (relElemToRelationship relType) $ elChildren relElems +filePathToRelationships _ _ = [] + archiveToRelationships :: Archive -> [Relationship] archiveToRelationships archive = - let relPaths = filter filePathIsRel (filesInArchive archive) - entries = mapMaybe (\f -> findEntryByPath f archive) relPaths - relElems = mapMaybe (parseXMLDoc . UTF8.toStringLazy . fromEntry) entries - rels = mapMaybe relElemToRelationship $ concatMap elChildren relElems - in - rels + concatMap (filePathToRelationships archive) $ filesInArchive archive filePathIsMedia :: FilePath -> Bool filePathIsMedia fp = @@ -409,6 +415,7 @@ lookupLevel numId ilvl (Numbering _ numbs absNumbs) = do lvl <- lookup ilvl $ map (\l@(i, _, _, _) -> (i, l)) lvls return lvl + numElemToNum :: NameSpaces -> Element -> Maybe Numb numElemToNum ns element | qName (elName element) == "num" && @@ -560,7 +567,7 @@ elemToBodyPart ns element num <- asks envNumbering case lookupLevel numId lvl num of Just levelInfo -> return $ ListItem parstyle numId lvl levelInfo parparts - Nothing -> throwError WrongElem + Nothing -> return $ DummyListItem parstyle lvl parparts elemToBodyPart ns element | isElem ns "w" "p" element = do sty <- asks envParStyles @@ -573,7 +580,7 @@ elemToBodyPart ns element Just levelInfo -> return $ ListItem parstyle numId lvl levelInfo parparts Nothing -> - throwError WrongElem + return $ DummyListItem parstyle lvl parparts Nothing -> return $ Paragraph parstyle parparts elemToBodyPart ns element | isElem ns "w" "tbl" element = do @@ -596,13 +603,16 @@ elemToBodyPart ns element return $ Tbl caption grid tblLook rows elemToBodyPart _ _ = throwError WrongElem -lookupRelationship :: RelId -> [Relationship] -> Maybe Target -lookupRelationship relid rels = - lookup relid (map (\(Relationship pair) -> pair) rels) +lookupRelationship :: DocumentLocation -> RelId -> [Relationship] -> Maybe Target +lookupRelationship docLocation relid rels = + lookup (docLocation, relid) pairs + where + pairs = map (\(Relationship loc relid' target) -> ((loc, relid'), target)) rels expandDrawingId :: String -> D (FilePath, B.ByteString) expandDrawingId s = do - target <- asks (lookupRelationship s . envRelationships) + location <- asks envLocation + target <- asks (lookupRelationship location s . envRelationships) case target of Just filepath -> do bytes <- asks (lookup ("word/" ++ filepath) . envMedia) @@ -657,9 +667,10 @@ elemToParPart ns element elemToParPart ns element | isElem ns "w" "hyperlink" element , Just relId <- findAttr (elemName ns "r" "id") element = do + location <- asks envLocation runs <- mapD (elemToRun ns) (elChildren element) rels <- asks envRelationships - case lookupRelationship relId rels of + case lookupRelationship location relId rels of Just target -> do case findAttr (elemName ns "w" "anchor") element of Just anchor -> return $ ExternalHyperLink (target ++ '#':anchor) runs @@ -699,7 +710,7 @@ elemToRun ns element , Just fnId <- findAttr (elemName ns "w" "id") ref = do notes <- asks envNotes case lookupFootnote fnId notes of - Just e -> do bps <- mapD (elemToBodyPart ns) (elChildren e) + Just e -> do bps <- local (\r -> r {envLocation=InFootnote}) $ mapD (elemToBodyPart ns) (elChildren e) return $ Footnote bps Nothing -> return $ Footnote [] elemToRun ns element @@ -708,7 +719,7 @@ elemToRun ns element , Just enId <- findAttr (elemName ns "w" "id") ref = do notes <- asks envNotes case lookupEndnote enId notes of - Just e -> do bps <- mapD (elemToBodyPart ns) (elChildren e) + Just e -> do bps <- local (\r -> r {envLocation=InEndnote}) $ mapD (elemToBodyPart ns) (elChildren e) return $ Endnote bps Nothing -> return $ Endnote [] elemToRun ns element @@ -877,6 +888,8 @@ elemToRunElem ns element map (\x -> fromMaybe x . getUnicode f . lowerFromPrivate $ x) str | isElem ns "w" "br" element = return LnBrk | isElem ns "w" "tab" element = return Tab + | isElem ns "w" "softHyphen" element = return SoftHyphen + | isElem ns "w" "noBreakHyphen" element = return NoBreakHyphen | isElem ns "w" "sym" element = return (getSymChar ns element) | otherwise = throwError WrongElem where |