summaryrefslogtreecommitdiff
path: root/src/Text/Pandoc/Readers/Docx/Parse.hs
diff options
context:
space:
mode:
authorJesse Rosenthal <jrosenthal@jhu.edu>2018-01-16 10:45:45 -0500
committerJesse Rosenthal <jrosenthal@jhu.edu>2018-01-16 13:22:02 -0500
commit404706d29a8f45b43ef2ef13e93d1786dde863a0 (patch)
treeebd508e9cb8dcf3e36e437d36307575940058228 /src/Text/Pandoc/Readers/Docx/Parse.hs
parent551d8e8541cd056a2c8c81095e98e01bca4436d6 (diff)
Docx reader: Parse fldChar tags
This will allow us to parse instrTxt inside fldChar tags.
Diffstat (limited to 'src/Text/Pandoc/Readers/Docx/Parse.hs')
-rw-r--r--src/Text/Pandoc/Readers/Docx/Parse.hs86
1 files changed, 81 insertions, 5 deletions
diff --git a/src/Text/Pandoc/Readers/Docx/Parse.hs b/src/Text/Pandoc/Readers/Docx/Parse.hs
index b79b39369..b3a0fee8e 100644
--- a/src/Text/Pandoc/Readers/Docx/Parse.hs
+++ b/src/Text/Pandoc/Readers/Docx/Parse.hs
@@ -90,10 +90,19 @@ data ReaderEnv = ReaderEnv { envNotes :: Notes
}
deriving Show
-data ReaderState = ReaderState { stateWarnings :: [String] }
+data ReaderState = ReaderState { stateWarnings :: [String]
+ , stateFldCharState :: FldCharState
+ }
deriving Show
-data DocxError = DocxError | WrongElem
+data FldCharState = FldCharOpen
+ | FldCharFieldInfo FieldInfo
+ | FldCharContent FieldInfo [Run]
+ | FldCharClosed
+ deriving (Show)
+
+data DocxError = DocxError
+ | WrongElem
deriving Show
type D = ExceptT DocxError (ReaderT ReaderEnv (State ReaderState))
@@ -265,6 +274,9 @@ data ParPart = PlainRun Run
| Chart -- placeholder for now
| PlainOMath [Exp]
| SmartTag [Run]
+ | Field FieldInfo [Run]
+ | NullParPart -- when we need to return nothing, but
+ -- not because of an error.
deriving Show
data Run = Run RunStyle [RunElem]
@@ -274,6 +286,10 @@ data Run = Run RunStyle [RunElem]
| InlineChart -- placeholder
deriving Show
+data FieldInfo = HyperlinkField URL
+ | UnknownField
+ deriving (Show)
+
data RunElem = TextRun String | LnBrk | Tab | SoftHyphen | NoBreakHyphen
deriving Show
@@ -328,7 +344,9 @@ archiveToDocxWithWarnings archive = do
(styles, parstyles) = archiveToStyles archive
rEnv =
ReaderEnv notes comments numbering rels media Nothing styles parstyles InDocument
- rState = ReaderState { stateWarnings = [] }
+ rState = ReaderState { stateWarnings = []
+ , stateFldCharState = FldCharClosed
+ }
(eitherDoc, st) = runD (archiveToDocument archive) rEnv rState
case eitherDoc of
Right doc -> Right (Docx doc, stateWarnings st)
@@ -736,9 +754,67 @@ elemToParPart ns element
, c_ns <- "http://schemas.openxmlformats.org/drawingml/2006/chart"
, Just _ <- findElement (QName "chart" (Just c_ns) (Just "c")) drawingElem
= return Chart
+{-
+The next one is a bit complicated. fldChar fields work by first
+having a <w:fldChar fldCharType="begin"> in a run, then a run with
+<w:instrText>, then a <w:fldChar fldCharType="separate"> run, then the
+content runs, and finally a <w:fldChar fldCharType="end"> run. For
+example (omissions and my comments in brackets):
+
+ <w:r>
+ [...]
+ <w:fldChar w:fldCharType="begin"/>
+ </w:r>
+ <w:r>
+ [...]
+ <w:instrText xml:space="preserve"> HYPERLINK [hyperlink url] </w:instrText>
+ </w:r>
+ <w:r>
+ [...]
+ <w:fldChar w:fldCharType="separate"/>
+ </w:r>
+ <w:r w:rsidRPr=[...]>
+ [...]
+ <w:t>Foundations of Analysis, 2nd Edition</w:t>
+ </w:r>
+ <w:r>
+ [...]
+ <w:fldChar w:fldCharType="end"/>
+ </w:r>
+
+So we do this in a number of steps. If we encounter the fldchar begin
+tag, we start open a fldchar state variable (see state above). We add
+the instrtext to it as FieldInfo. Then we close that and start adding
+the runs when we get to separate. Then when we get to end, we produce
+the Field type with approriate FieldInfo and Runs.
+-}
+elemToParPart ns element
+ | isElem ns "w" "r" element
+ , Just fldChar <- findChildByName ns "w" "fldChar" element
+ , Just fldCharType <- findAttrByName ns "w" "fldCharType" fldChar = do
+ fldCharState <- gets stateFldCharState
+ case fldCharState of
+ FldCharClosed | fldCharType == "begin" -> do
+ modify $ \st -> st {stateFldCharState = FldCharOpen}
+ return NullParPart
+ FldCharFieldInfo info | fldCharType == "separate" -> do
+ modify $ \st -> st {stateFldCharState = FldCharContent info []}
+ return NullParPart
+ FldCharContent info runs | fldCharType == "end" -> do
+ modify $ \st -> st {stateFldCharState = FldCharClosed}
+ return $ Field info runs
+ _ -> throwError WrongElem
elemToParPart ns element
- | isElem ns "w" "r" element =
- elemToRun ns element >>= (\r -> return $ PlainRun r)
+ | isElem ns "w" "r" element = do
+ run <- elemToRun ns element
+ -- we check to see if we have an open FldChar in state that we're
+ -- recording.
+ fldCharState <- gets stateFldCharState
+ case fldCharState of
+ FldCharContent info runs -> do
+ modify $ \st -> st{stateFldCharState = FldCharContent info (run : runs)}
+ return NullParPart
+ _ -> return $ PlainRun run
elemToParPart ns element
| Just change <- getTrackedChange ns element = do
runs <- mapD (elemToRun ns) (elChildren element)