summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohn MacFarlane <jgm@berkeley.edu>2014-08-18 14:14:01 -0700
committerJohn MacFarlane <jgm@berkeley.edu>2014-08-18 14:14:01 -0700
commit716ad5fd8aa3be2cfe46ab3d06fc1c9aee1f27f0 (patch)
tree7e6dcb7b693b15fb53427e8c2a381aefea6ea15b
parent6dce8c67601dcb89d8ff69adf83fac7d27353db2 (diff)
parent4b38e9f1f0d12e46c27fd3782d8f3e32d8ee90a0 (diff)
Merge pull request #1547 from jkr/styleparse
Docx reader: parsing styles
-rw-r--r--src/Text/Pandoc/Readers/Docx.hs57
-rw-r--r--src/Text/Pandoc/Readers/Docx/Parse.hs89
2 files changed, 110 insertions, 36 deletions
diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs
index 3e4ac9647..188fa4a42 100644
--- a/src/Text/Pandoc/Readers/Docx.hs
+++ b/src/Text/Pandoc/Readers/Docx.hs
@@ -196,12 +196,6 @@ fixAuthors mv = mv
codeStyles :: [String]
codeStyles = ["VerbatimChar"]
-strongStyles :: [String]
-strongStyles = ["Strong", "Bold"]
-
-emphStyles :: [String]
-emphStyles = ["Emphasis", "Italic"]
-
blockQuoteDivs :: [String]
blockQuoteDivs = ["Quote", "BlockQuote", "BlockQuotation"]
@@ -228,27 +222,44 @@ parPartToString (InternalHyperLink _ runs) = concatMap runToString runs
parPartToString (ExternalHyperLink _ runs) = concatMap runToString runs
parPartToString _ = ""
+blacklistedCharStyles :: [String]
+blacklistedCharStyles = ["Hyperlink"]
+
+resolveDependentRunStyle :: RunStyle -> RunStyle
+resolveDependentRunStyle rPr
+ | Just (s, _) <- rStyle rPr, s `elem` blacklistedCharStyles =
+ rPr
+ | Just (_, cs) <- rStyle rPr =
+ let rPr' = resolveDependentRunStyle cs
+ in
+ RunStyle { isBold = case isBold rPr of
+ Just bool -> Just bool
+ Nothing -> isBold rPr'
+ , isItalic = case isItalic rPr of
+ Just bool -> Just bool
+ Nothing -> isItalic rPr'
+ , isSmallCaps = case isSmallCaps rPr of
+ Just bool -> Just bool
+ Nothing -> isSmallCaps rPr'
+ , isStrike = case isStrike rPr of
+ Just bool -> Just bool
+ Nothing -> isStrike rPr'
+ , rVertAlign = case rVertAlign rPr of
+ Just valign -> Just valign
+ Nothing -> rVertAlign rPr'
+ , rUnderline = case rUnderline rPr of
+ Just ulstyle -> Just ulstyle
+ Nothing -> rUnderline rPr'
+ , rStyle = rStyle rPr }
+ | otherwise = rPr
+
runStyleToTransform :: RunStyle -> (Inlines -> Inlines)
runStyleToTransform rPr
- | Just s <- rStyle rPr
+ | Just (s, _) <- rStyle rPr
, s `elem` spansToKeep =
let rPr' = rPr{rStyle = Nothing}
in
(spanWith ("", [s], [])) . (runStyleToTransform rPr')
- | Just s <- rStyle rPr
- , s `elem` emphStyles =
- let rPr' = rPr{rStyle = Nothing, isItalic = Nothing}
- in
- case isItalic rPr of
- Just False -> runStyleToTransform rPr'
- _ -> emph . (runStyleToTransform rPr')
- | Just s <- rStyle rPr
- , s `elem` strongStyles =
- let rPr' = rPr{rStyle = Nothing, isBold = Nothing}
- in
- case isBold rPr of
- Just False -> runStyleToTransform rPr'
- _ -> strong . (runStyleToTransform rPr')
| Just True <- isItalic rPr =
emph . (runStyleToTransform rPr {isItalic = Nothing})
| Just True <- isBold rPr =
@@ -267,12 +278,12 @@ runStyleToTransform rPr
runToInlines :: Run -> DocxContext Inlines
runToInlines (Run rs runElems)
- | Just s <- rStyle rs
+ | Just (s, _) <- rStyle rs
, s `elem` codeStyles =
return $ code $ concatMap runElemToString runElems
| otherwise = do
let ils = concatReduce (map runElemToInlines runElems)
- return $ (runStyleToTransform rs) ils
+ return $ (runStyleToTransform $ resolveDependentRunStyle rs) ils
runToInlines (Footnote bps) = do
blksList <- concatReduce <$> (mapM bodyPartToBlocks bps)
return $ note blksList
diff --git a/src/Text/Pandoc/Readers/Docx/Parse.hs b/src/Text/Pandoc/Readers/Docx/Parse.hs
index 1775a19c3..e7a6c3ffb 100644
--- a/src/Text/Pandoc/Readers/Docx/Parse.hs
+++ b/src/Text/Pandoc/Readers/Docx/Parse.hs
@@ -50,7 +50,6 @@ module Text.Pandoc.Readers.Docx.Parse ( Docx(..)
, Cell(..)
, archiveToDocx
) where
-
import Codec.Archive.Zip
import Text.XML.Light
import Data.Maybe
@@ -73,6 +72,7 @@ data ReaderEnv = ReaderEnv { envNotes :: Notes
, envRelationships :: [Relationship]
, envMedia :: Media
, envFont :: Maybe Font
+ , envCharStyles :: CharStyleMap
}
deriving Show
@@ -120,6 +120,10 @@ data Body = Body [BodyPart]
type Media = [(FilePath, B.ByteString)]
+type CharStyle = (String, RunStyle)
+
+type CharStyleMap = M.Map String RunStyle
+
data Numbering = Numbering NameSpaces [Numb] [AbstractNumb]
deriving Show
@@ -206,7 +210,7 @@ data RunStyle = RunStyle { isBold :: Maybe Bool
, isStrike :: Maybe Bool
, rVertAlign :: Maybe VertAlign
, rUnderline :: Maybe String
- , rStyle :: Maybe String }
+ , rStyle :: Maybe CharStyle}
deriving Show
defaultRunStyle :: RunStyle
@@ -216,8 +220,7 @@ defaultRunStyle = RunStyle { isBold = Nothing
, isStrike = Nothing
, rVertAlign = Nothing
, rUnderline = Nothing
- , rStyle = Nothing
- }
+ , rStyle = Nothing}
type Target = String
@@ -239,7 +242,8 @@ archiveToDocx archive = do
numbering = archiveToNumbering archive
rels = archiveToRelationships archive
media = archiveToMedia archive
- rEnv = ReaderEnv notes numbering rels media Nothing
+ styles = archiveToStyles archive
+ rEnv = ReaderEnv notes numbering rels media Nothing styles
doc <- runD (archiveToDocument archive) rEnv
return $ Docx doc
@@ -259,6 +263,53 @@ elemToBody ns element | isElem ns "w" "body" element =
(\bps -> return $ Body bps)
elemToBody _ _ = throwError WrongElem
+archiveToStyles :: Archive -> CharStyleMap
+archiveToStyles zf =
+ let stylesElem = findEntryByPath "word/styles.xml" zf >>=
+ (parseXMLDoc . UTF8.toStringLazy . fromEntry)
+ in
+ case stylesElem of
+ Nothing -> M.empty
+ Just styElem ->
+ let namespaces = mapMaybe attrToNSPair (elAttribs styElem)
+ in
+ M.fromList $ buildBasedOnList namespaces styElem Nothing
+
+isBasedOnStyle :: NameSpaces -> Element -> Maybe CharStyle -> Bool
+isBasedOnStyle ns element parentStyle
+ | isElem ns "w" "style" element
+ , Just "character" <- findAttr (elemName ns "w" "type") element
+ , Just basedOnVal <- findChild (elemName ns "w" "basedOn") element >>=
+ findAttr (elemName ns "w" "val")
+ , Just (parentId, _) <- parentStyle = (basedOnVal == parentId)
+ | isElem ns "w" "style" element
+ , Just "character" <- findAttr (elemName ns "w" "type") element
+ , Nothing <- findChild (elemName ns "w" "basedOn") element
+ , Nothing <- parentStyle = True
+ | otherwise = False
+
+elemToCharStyle :: NameSpaces -> Element -> Maybe CharStyle -> Maybe CharStyle
+elemToCharStyle ns element parentStyle
+ | isElem ns "w" "style" element
+ , Just "character" <- findAttr (elemName ns "w" "type") element
+ , Just styleId <- findAttr (elemName ns "w" "styleId") element =
+ Just (styleId, elemToRunStyle ns element parentStyle)
+ | otherwise = Nothing
+
+getStyleChildren :: NameSpaces -> Element -> Maybe CharStyle -> [CharStyle]
+getStyleChildren ns element parentStyle
+ | isElem ns "w" "styles" element =
+ mapMaybe (\e -> elemToCharStyle ns e parentStyle) $
+ filterChildren (\e' -> isBasedOnStyle ns e' parentStyle) element
+ | otherwise = []
+
+buildBasedOnList :: NameSpaces -> Element -> Maybe CharStyle -> [CharStyle]
+buildBasedOnList ns element rootStyle =
+ case (getStyleChildren ns element rootStyle) of
+ [] -> []
+ stys -> stys ++
+ (concatMap (\s -> buildBasedOnList ns element (Just s)) stys)
+
archiveToNotes :: Archive -> Notes
archiveToNotes zf =
let fnElem = findEntryByPath "word/footnotes.xml" zf
@@ -629,7 +680,8 @@ elemToRun ns element
elemToRun ns element
| isElem ns "w" "r" element = do
runElems <- elemToRunElems ns element
- return $ Run (elemToRunStyle ns element) runElems
+ runStyle <- elemToRunStyleD ns element
+ return $ Run runStyle runElems
elemToRun _ _ = throwError WrongElem
elemToParagraphStyle :: NameSpaces -> Element -> ParagraphStyle
@@ -669,9 +721,22 @@ checkOnOff ns rPr tag
| Just _ <- findChild tag rPr = Just True
checkOnOff _ _ _ = Nothing
-
-elemToRunStyle :: NameSpaces -> Element -> RunStyle
-elemToRunStyle ns element
+elemToRunStyleD :: NameSpaces -> Element -> D RunStyle
+elemToRunStyleD ns element
+ | Just rPr <- findChild (elemName ns "w" "rPr") element = do
+ charStyles <- asks envCharStyles
+ let parentSty = case
+ findChild (elemName ns "w" "rStyle") rPr >>=
+ findAttr (elemName ns "w" "val")
+ of
+ Just styName | Just style <- M.lookup styName charStyles ->
+ Just (styName, style)
+ _ -> Nothing
+ return $ elemToRunStyle ns element parentSty
+elemToRunStyleD _ _ = return defaultRunStyle
+
+elemToRunStyle :: NameSpaces -> Element -> Maybe CharStyle -> RunStyle
+elemToRunStyle ns element parentStyle
| Just rPr <- findChild (elemName ns "w" "rPr") element =
RunStyle
{
@@ -689,11 +754,9 @@ elemToRunStyle ns element
, rUnderline =
findChild (elemName ns "w" "u") rPr >>=
findAttr (elemName ns "w" "val")
- , rStyle =
- findChild (elemName ns "w" "rStyle") rPr >>=
- findAttr (elemName ns "w" "val")
+ , rStyle = parentStyle
}
-elemToRunStyle _ _ = defaultRunStyle
+elemToRunStyle _ _ _ = defaultRunStyle
elemToRunElem :: NameSpaces -> Element -> D RunElem
elemToRunElem ns element