summaryrefslogtreecommitdiff
path: root/src/Text
diff options
context:
space:
mode:
authorJohn MacFarlane <jgm@berkeley.edu>2014-06-21 23:14:03 -0700
committerJohn MacFarlane <jgm@berkeley.edu>2014-06-21 23:14:03 -0700
commitcab16024fcba447f64b0254f54f3e0b4dd93bb99 (patch)
tree8318fa059ee7adb1103d0de5792a116b30b23502 /src/Text
parent08fe16e9720a3a191caf095d48e0a6c454039bf9 (diff)
parented43513087b514a5240fde04784dbf8709182513 (diff)
Merge pull request #1363 from jkr/newNormalize
Improve normalization
Diffstat (limited to 'src/Text')
-rw-r--r--src/Text/Pandoc/Readers/Docx.hs70
1 files changed, 58 insertions, 12 deletions
diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs
index 479a88161..09c2330fb 100644
--- a/src/Text/Pandoc/Readers/Docx.hs
+++ b/src/Text/Pandoc/Readers/Docx.hs
@@ -148,25 +148,71 @@ runElemsToString = concatMap runElemToString
--- Text.Pandoc.Shared.normalize for reasons of efficiency. For
--- whatever reason, `normalize` makes a run take almost twice as
--- long. (It does more, but this does what we need)
-strNormalize :: [Inline] -> [Inline]
-strNormalize [] = []
-strNormalize (Str "" : ils) = strNormalize ils
-strNormalize ((Str s) : (Str s') : l) = strNormalize ((Str (s++s')) : l)
-strNormalize (il:ils) = il : (strNormalize ils)
+inlineNormalize :: [Inline] -> [Inline]
+inlineNormalize [] = []
+inlineNormalize (Str "" : ils) = inlineNormalize ils
+inlineNormalize ((Str s) : (Str s') : l) =
+ inlineNormalize (Str (s++s') : l)
+inlineNormalize ((Emph ils) : (Emph ils') : l) =
+ inlineNormalize $ (Emph $ inlineNormalize (ils ++ ils')) : l
+inlineNormalize ((Emph ils) : l) =
+ Emph (inlineNormalize ils) : (inlineNormalize l)
+inlineNormalize ((Strong ils) : (Strong ils') : l) =
+ inlineNormalize $ (Strong $ inlineNormalize (ils ++ ils')) : l
+inlineNormalize ((Strong ils) : l) =
+ Strong (inlineNormalize ils) : (inlineNormalize l)
+inlineNormalize ((Strikeout ils) : (Strikeout ils') : l) =
+ inlineNormalize $ (Strikeout $ inlineNormalize (ils ++ ils')) : l
+inlineNormalize ((Strikeout ils) : l) =
+ Strikeout (inlineNormalize ils) : (inlineNormalize l)
+inlineNormalize ((Superscript ils) : (Superscript ils') : l) =
+ inlineNormalize $ (Superscript $ inlineNormalize (ils ++ ils')) : l
+inlineNormalize ((Superscript ils) : l) =
+ Superscript (inlineNormalize ils) : (inlineNormalize l)
+inlineNormalize ((Subscript ils) : (Subscript ils') : l) =
+ inlineNormalize $ (Subscript $ inlineNormalize (ils ++ ils')) : l
+inlineNormalize ((Subscript ils) : l) =
+ Subscript (inlineNormalize ils) : (inlineNormalize l)
+inlineNormalize ((Space : Space : l)) =
+ inlineNormalize $ (Space : l)
+inlineNormalize ((Quoted qt ils) : l) =
+ Quoted qt (inlineNormalize ils) : inlineNormalize l
+inlineNormalize ((Cite cits ils) : l) =
+ let
+ f :: Citation -> Citation
+ f (Citation s pref suff mode num hash) =
+ Citation s (inlineNormalize pref) (inlineNormalize suff) mode num hash
+ in
+ Cite (map f cits) (inlineNormalize ils) : (inlineNormalize l)
+inlineNormalize ((Link ils s) : l) =
+ Link (inlineNormalize ils) s : (inlineNormalize l)
+inlineNormalize ((Image ils s) : l) =
+ Image (inlineNormalize ils) s : (inlineNormalize l)
+inlineNormalize ((Note blks) : l) =
+ Note (map blockNormalize blks) : (inlineNormalize l)
+inlineNormalize ((Span attr ils) : l) =
+ Span attr (inlineNormalize ils) : (inlineNormalize l)
+inlineNormalize (il : l) = il : (inlineNormalize l)
stripSpaces :: [Inline] -> [Inline]
stripSpaces ils =
reverse $ dropWhile (Space ==) $ reverse $ dropWhile (Space ==) ils
blockNormalize :: Block -> Block
-blockNormalize (Plain ils) = Plain $ bottomUp strNormalize $ stripSpaces ils
-blockNormalize (Para ils) = Para $ bottomUp strNormalize $ stripSpaces ils
+blockNormalize (Plain ils) = Plain $ stripSpaces $ inlineNormalize ils
+blockNormalize (Para ils) = Para $ stripSpaces $ inlineNormalize ils
blockNormalize (Header n attr ils) =
- Header n attr $ bottomUp strNormalize $ stripSpaces ils
+ Header n attr $ stripSpaces $ inlineNormalize ils
blockNormalize (Table ils align width hdr cells) =
- Table (bottomUp strNormalize $ stripSpaces ils) align width hdr cells
+ Table (stripSpaces $ inlineNormalize ils) align width hdr cells
blockNormalize (DefinitionList pairs) =
- DefinitionList $ map (\(ils, blklsts) -> (bottomUp strNormalize (stripSpaces ils), blklsts)) pairs
+ DefinitionList $ map (\(ils, blklsts) -> (stripSpaces (inlineNormalize ils), (map (map blockNormalize) blklsts))) pairs
+blockNormalize (BlockQuote blks) = BlockQuote (map blockNormalize blks)
+blockNormalize (OrderedList attr blkslst) =
+ OrderedList attr $ map (\blks -> map blockNormalize blks) blkslst
+blockNormalize (BulletList blkslst) =
+ BulletList $ map (\blks -> map blockNormalize blks) blkslst
+blockNormalize (Div attr blks) = Div attr (map blockNormalize blks)
blockNormalize blk = blk
runToInlines :: ReaderOptions -> Docx -> Run -> [Inline]
@@ -240,8 +286,8 @@ parPartsToInlines opts docx parparts =
(if False -- TODO depend on option
then bottomUp (makeImagesSelfContained docx)
else id) $
- bottomUp spanCorrect $
bottomUp spanTrim $
+ bottomUp spanCorrect $
bottomUp spanReduce $
concatMap (parPartToInlines opts docx) parparts
@@ -315,7 +361,7 @@ makeImagesSelfContained _ inline = inline
bodyToBlocks :: ReaderOptions -> Docx -> Body -> [Block]
bodyToBlocks opts docx (Body bps) =
bottomUp removeEmptyPars $
- bottomUp blockNormalize $
+ map blockNormalize $
bottomUp spanRemove $
bottomUp divRemove $
map (makeHeaderAnchors) $