diff options
author | John MacFarlane <jgm@berkeley.edu> | 2015-10-04 21:56:29 -0700 |
---|---|---|
committer | John MacFarlane <jgm@berkeley.edu> | 2015-10-04 21:56:29 -0700 |
commit | 869e800bbbe4008beeb3e3420e689aafd6e67aa7 (patch) | |
tree | fcca43e20faa967041fdf725c45088295a8462f2 | |
parent | e32ab441bee323e3a2c6fced7c89bf0c6351d6c8 (diff) | |
parent | dfd06467eadace7b37cb3ebc53c943755d0436eb (diff) |
Merge pull request #2432 from hftf/hyphens
Docx Reader: Parse soft and non-breaking hyphen elements
-rw-r--r-- | src/Text/Pandoc/Readers/Docx.hs | 4 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/Docx/Parse.hs | 4 | ||||
-rw-r--r-- | tests/Tests/Readers/Docx.hs | 4 | ||||
-rw-r--r-- | tests/docx/special_punctuation.docx | bin | 0 -> 8408 bytes | |||
-rw-r--r-- | tests/docx/special_punctuation.native | 2 |
5 files changed, 13 insertions, 1 deletions
diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs index 67a97ae85..8b8d1ede1 100644 --- a/src/Text/Pandoc/Readers/Docx.hs +++ b/src/Text/Pandoc/Readers/Docx.hs @@ -206,11 +206,15 @@ runElemToInlines :: RunElem -> Inlines runElemToInlines (TextRun s) = text s runElemToInlines (LnBrk) = linebreak runElemToInlines (Tab) = space +runElemToInlines (SoftHyphen) = text "\xad" +runElemToInlines (NoBreakHyphen) = text "\x2011" runElemToString :: RunElem -> String runElemToString (TextRun s) = s runElemToString (LnBrk) = ['\n'] runElemToString (Tab) = ['\t'] +runElemToString (SoftHyphen) = ['\xad'] +runElemToString (NoBreakHyphen) = ['\x2011'] runToString :: Run -> String runToString (Run _ runElems) = concatMap runElemToString runElems diff --git a/src/Text/Pandoc/Readers/Docx/Parse.hs b/src/Text/Pandoc/Readers/Docx/Parse.hs index cce80fb48..53af19dfd 100644 --- a/src/Text/Pandoc/Readers/Docx/Parse.hs +++ b/src/Text/Pandoc/Readers/Docx/Parse.hs @@ -208,7 +208,7 @@ data Run = Run RunStyle [RunElem] | InlineDrawing FilePath B.ByteString deriving Show -data RunElem = TextRun String | LnBrk | Tab +data RunElem = TextRun String | LnBrk | Tab | SoftHyphen | NoBreakHyphen deriving Show data VertAlign = BaseLn | SupScrpt | SubScrpt @@ -877,6 +877,8 @@ elemToRunElem ns element map (\x -> fromMaybe x . getUnicode f . lowerFromPrivate $ x) str | isElem ns "w" "br" element = return LnBrk | isElem ns "w" "tab" element = return Tab + | isElem ns "w" "softHyphen" element = return SoftHyphen + | isElem ns "w" "noBreakHyphen" element = return NoBreakHyphen | isElem ns "w" "sym" element = return (getSymChar ns element) | otherwise = throwError WrongElem where diff --git a/tests/Tests/Readers/Docx.hs b/tests/Tests/Readers/Docx.hs index 47292bc99..7e3f1979e 100644 --- a/tests/Tests/Readers/Docx.hs +++ b/tests/Tests/Readers/Docx.hs @@ -131,6 +131,10 @@ tests = [ testGroup "inlines" "docx/tabs.docx" "docx/tabs.native" , testCompare + "special punctuation" + "docx/special_punctuation.docx" + "docx/special_punctuation.native" + , testCompare "normalizing inlines" "docx/normalize.docx" "docx/normalize.native" diff --git a/tests/docx/special_punctuation.docx b/tests/docx/special_punctuation.docx Binary files differnew file mode 100644 index 000000000..8e0bb55c9 --- /dev/null +++ b/tests/docx/special_punctuation.docx diff --git a/tests/docx/special_punctuation.native b/tests/docx/special_punctuation.native new file mode 100644 index 000000000..304289f44 --- /dev/null +++ b/tests/docx/special_punctuation.native @@ -0,0 +1,2 @@ +[Para [Str "Soft",Space,Str "hyphen:",Space,Str "[\173]"] +,Para [Str "Non-breaking",Space,Str "hyphen:",Space,Str "[\8209]"]] |