summaryrefslogtreecommitdiff
path: root/src/Text/Pandoc/Readers/Docx/Parse.hs
diff options
context:
space:
mode:
Diffstat (limited to 'src/Text/Pandoc/Readers/Docx/Parse.hs')
-rw-r--r--src/Text/Pandoc/Readers/Docx/Parse.hs77
1 files changed, 45 insertions, 32 deletions
diff --git a/src/Text/Pandoc/Readers/Docx/Parse.hs b/src/Text/Pandoc/Readers/Docx/Parse.hs
index cce80fb48..5910a476b 100644
--- a/src/Text/Pandoc/Readers/Docx/Parse.hs
+++ b/src/Text/Pandoc/Readers/Docx/Parse.hs
@@ -59,7 +59,7 @@ import Data.Bits ((.|.))
import qualified Data.ByteString.Lazy as B
import qualified Text.Pandoc.UTF8 as UTF8
import Control.Monad.Reader
-import Control.Applicative ((<$>), (<|>))
+import Control.Applicative ((<|>))
import qualified Data.Map as M
import Text.Pandoc.Compat.Except
import Text.TeXMath.Readers.OMML (readOMML)
@@ -75,6 +75,7 @@ data ReaderEnv = ReaderEnv { envNotes :: Notes
, envFont :: Maybe Font
, envCharStyles :: CharStyleMap
, envParStyles :: ParStyleMap
+ , envLocation :: DocumentLocation
}
deriving Show
@@ -87,7 +88,7 @@ instance Error DocxError where
type D = ExceptT DocxError (Reader ReaderEnv)
runD :: D a -> ReaderEnv -> Either DocxError a
-runD dx re = runReader (runExceptT dx ) re
+runD dx re = runReader (runExceptT dx) re
maybeToD :: Maybe a -> D a
maybeToD (Just a) = return a
@@ -140,7 +141,10 @@ data AbstractNumb = AbstractNumb String [Level]
-- (ilvl, format, string, start)
type Level = (String, String, String, Maybe Integer)
-data Relationship = Relationship (RelId, Target)
+data DocumentLocation = InDocument | InFootnote | InEndnote
+ deriving (Eq,Show)
+
+data Relationship = Relationship DocumentLocation RelId Target
deriving Show
data Notes = Notes NameSpaces
@@ -174,6 +178,7 @@ defaultParagraphStyle = ParagraphStyle { pStyle = []
data BodyPart = Paragraph ParagraphStyle [ParPart]
| ListItem ParagraphStyle String String Level [ParPart]
+ | DummyListItem ParagraphStyle String [ParPart]
| Tbl String TblGrid TblLook [Row]
| OMathPara [Exp]
deriving Show
@@ -208,7 +213,7 @@ data Run = Run RunStyle [RunElem]
| InlineDrawing FilePath B.ByteString
deriving Show
-data RunElem = TextRun String | LnBrk | Tab
+data RunElem = TextRun String | LnBrk | Tab | SoftHyphen | NoBreakHyphen
deriving Show
data VertAlign = BaseLn | SupScrpt | SubScrpt
@@ -238,7 +243,6 @@ defaultRunStyle = RunStyle { isBold = Nothing
, rUnderline = Nothing
, rStyle = Nothing}
-
type Target = String
type Anchor = String
type URL = String
@@ -255,7 +259,8 @@ archiveToDocx archive = do
rels = archiveToRelationships archive
media = archiveToMedia archive
(styles, parstyles) = archiveToStyles archive
- rEnv = ReaderEnv notes numbering rels media Nothing styles parstyles
+ rEnv =
+ ReaderEnv notes numbering rels media Nothing styles parstyles InDocument
doc <- runD (archiveToDocument archive) rEnv
return $ Docx doc
@@ -362,29 +367,30 @@ archiveToNotes zf =
in
Notes ns fn en
-filePathIsRel :: FilePath -> Bool
-filePathIsRel fp =
- let (dir, name) = splitFileName fp
- in
- (dir == "word/_rels/") && ((takeExtension name) == ".rels")
+filePathToRelType :: FilePath -> Maybe DocumentLocation
+filePathToRelType "word/_rels/document.xml.rels" = Just InDocument
+filePathToRelType "word/_rels/footnotes.xml.rels" = Just InFootnote
+filePathToRelType "word/_rels/endnotes.xml.rels" = Just InEndnote
+filePathToRelType _ = Nothing
-relElemToRelationship :: Element -> Maybe Relationship
-relElemToRelationship element | qName (elName element) == "Relationship" =
+relElemToRelationship :: DocumentLocation -> Element -> Maybe Relationship
+relElemToRelationship relType element | qName (elName element) == "Relationship" =
do
relId <- findAttr (QName "Id" Nothing Nothing) element
target <- findAttr (QName "Target" Nothing Nothing) element
- return $ Relationship (relId, target)
-relElemToRelationship _ = Nothing
-
-
+ return $ Relationship relType relId target
+relElemToRelationship _ _ = Nothing
+
+filePathToRelationships :: Archive -> FilePath -> [Relationship]
+filePathToRelationships ar fp | Just relType <- filePathToRelType fp
+ , Just entry <- findEntryByPath fp ar
+ , Just relElems <- (parseXMLDoc . UTF8.toStringLazy . fromEntry) entry =
+ mapMaybe (relElemToRelationship relType) $ elChildren relElems
+filePathToRelationships _ _ = []
+
archiveToRelationships :: Archive -> [Relationship]
archiveToRelationships archive =
- let relPaths = filter filePathIsRel (filesInArchive archive)
- entries = mapMaybe (\f -> findEntryByPath f archive) relPaths
- relElems = mapMaybe (parseXMLDoc . UTF8.toStringLazy . fromEntry) entries
- rels = mapMaybe relElemToRelationship $ concatMap elChildren relElems
- in
- rels
+ concatMap (filePathToRelationships archive) $ filesInArchive archive
filePathIsMedia :: FilePath -> Bool
filePathIsMedia fp =
@@ -409,6 +415,7 @@ lookupLevel numId ilvl (Numbering _ numbs absNumbs) = do
lvl <- lookup ilvl $ map (\l@(i, _, _, _) -> (i, l)) lvls
return lvl
+
numElemToNum :: NameSpaces -> Element -> Maybe Numb
numElemToNum ns element |
qName (elName element) == "num" &&
@@ -560,7 +567,7 @@ elemToBodyPart ns element
num <- asks envNumbering
case lookupLevel numId lvl num of
Just levelInfo -> return $ ListItem parstyle numId lvl levelInfo parparts
- Nothing -> throwError WrongElem
+ Nothing -> return $ DummyListItem parstyle lvl parparts
elemToBodyPart ns element
| isElem ns "w" "p" element = do
sty <- asks envParStyles
@@ -573,7 +580,7 @@ elemToBodyPart ns element
Just levelInfo ->
return $ ListItem parstyle numId lvl levelInfo parparts
Nothing ->
- throwError WrongElem
+ return $ DummyListItem parstyle lvl parparts
Nothing -> return $ Paragraph parstyle parparts
elemToBodyPart ns element
| isElem ns "w" "tbl" element = do
@@ -596,13 +603,16 @@ elemToBodyPart ns element
return $ Tbl caption grid tblLook rows
elemToBodyPart _ _ = throwError WrongElem
-lookupRelationship :: RelId -> [Relationship] -> Maybe Target
-lookupRelationship relid rels =
- lookup relid (map (\(Relationship pair) -> pair) rels)
+lookupRelationship :: DocumentLocation -> RelId -> [Relationship] -> Maybe Target
+lookupRelationship docLocation relid rels =
+ lookup (docLocation, relid) pairs
+ where
+ pairs = map (\(Relationship loc relid' target) -> ((loc, relid'), target)) rels
expandDrawingId :: String -> D (FilePath, B.ByteString)
expandDrawingId s = do
- target <- asks (lookupRelationship s . envRelationships)
+ location <- asks envLocation
+ target <- asks (lookupRelationship location s . envRelationships)
case target of
Just filepath -> do
bytes <- asks (lookup ("word/" ++ filepath) . envMedia)
@@ -657,9 +667,10 @@ elemToParPart ns element
elemToParPart ns element
| isElem ns "w" "hyperlink" element
, Just relId <- findAttr (elemName ns "r" "id") element = do
+ location <- asks envLocation
runs <- mapD (elemToRun ns) (elChildren element)
rels <- asks envRelationships
- case lookupRelationship relId rels of
+ case lookupRelationship location relId rels of
Just target -> do
case findAttr (elemName ns "w" "anchor") element of
Just anchor -> return $ ExternalHyperLink (target ++ '#':anchor) runs
@@ -699,7 +710,7 @@ elemToRun ns element
, Just fnId <- findAttr (elemName ns "w" "id") ref = do
notes <- asks envNotes
case lookupFootnote fnId notes of
- Just e -> do bps <- mapD (elemToBodyPart ns) (elChildren e)
+ Just e -> do bps <- local (\r -> r {envLocation=InFootnote}) $ mapD (elemToBodyPart ns) (elChildren e)
return $ Footnote bps
Nothing -> return $ Footnote []
elemToRun ns element
@@ -708,7 +719,7 @@ elemToRun ns element
, Just enId <- findAttr (elemName ns "w" "id") ref = do
notes <- asks envNotes
case lookupEndnote enId notes of
- Just e -> do bps <- mapD (elemToBodyPart ns) (elChildren e)
+ Just e -> do bps <- local (\r -> r {envLocation=InEndnote}) $ mapD (elemToBodyPart ns) (elChildren e)
return $ Endnote bps
Nothing -> return $ Endnote []
elemToRun ns element
@@ -877,6 +888,8 @@ elemToRunElem ns element
map (\x -> fromMaybe x . getUnicode f . lowerFromPrivate $ x) str
| isElem ns "w" "br" element = return LnBrk
| isElem ns "w" "tab" element = return Tab
+ | isElem ns "w" "softHyphen" element = return SoftHyphen
+ | isElem ns "w" "noBreakHyphen" element = return NoBreakHyphen
| isElem ns "w" "sym" element = return (getSymChar ns element)
| otherwise = throwError WrongElem
where