summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohn MacFarlane <jgm@berkeley.edu>2015-10-04 21:56:29 -0700
committerJohn MacFarlane <jgm@berkeley.edu>2015-10-04 21:56:29 -0700
commit869e800bbbe4008beeb3e3420e689aafd6e67aa7 (patch)
treefcca43e20faa967041fdf725c45088295a8462f2
parente32ab441bee323e3a2c6fced7c89bf0c6351d6c8 (diff)
parentdfd06467eadace7b37cb3ebc53c943755d0436eb (diff)
Merge pull request #2432 from hftf/hyphens
Docx Reader: Parse soft and non-breaking hyphen elements
-rw-r--r--src/Text/Pandoc/Readers/Docx.hs4
-rw-r--r--src/Text/Pandoc/Readers/Docx/Parse.hs4
-rw-r--r--tests/Tests/Readers/Docx.hs4
-rw-r--r--tests/docx/special_punctuation.docxbin0 -> 8408 bytes
-rw-r--r--tests/docx/special_punctuation.native2
5 files changed, 13 insertions, 1 deletions
diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs
index 67a97ae85..8b8d1ede1 100644
--- a/src/Text/Pandoc/Readers/Docx.hs
+++ b/src/Text/Pandoc/Readers/Docx.hs
@@ -206,11 +206,15 @@ runElemToInlines :: RunElem -> Inlines
runElemToInlines (TextRun s) = text s
runElemToInlines (LnBrk) = linebreak
runElemToInlines (Tab) = space
+runElemToInlines (SoftHyphen) = text "\xad"
+runElemToInlines (NoBreakHyphen) = text "\x2011"
runElemToString :: RunElem -> String
runElemToString (TextRun s) = s
runElemToString (LnBrk) = ['\n']
runElemToString (Tab) = ['\t']
+runElemToString (SoftHyphen) = ['\xad']
+runElemToString (NoBreakHyphen) = ['\x2011']
runToString :: Run -> String
runToString (Run _ runElems) = concatMap runElemToString runElems
diff --git a/src/Text/Pandoc/Readers/Docx/Parse.hs b/src/Text/Pandoc/Readers/Docx/Parse.hs
index cce80fb48..53af19dfd 100644
--- a/src/Text/Pandoc/Readers/Docx/Parse.hs
+++ b/src/Text/Pandoc/Readers/Docx/Parse.hs
@@ -208,7 +208,7 @@ data Run = Run RunStyle [RunElem]
| InlineDrawing FilePath B.ByteString
deriving Show
-data RunElem = TextRun String | LnBrk | Tab
+data RunElem = TextRun String | LnBrk | Tab | SoftHyphen | NoBreakHyphen
deriving Show
data VertAlign = BaseLn | SupScrpt | SubScrpt
@@ -877,6 +877,8 @@ elemToRunElem ns element
map (\x -> fromMaybe x . getUnicode f . lowerFromPrivate $ x) str
| isElem ns "w" "br" element = return LnBrk
| isElem ns "w" "tab" element = return Tab
+ | isElem ns "w" "softHyphen" element = return SoftHyphen
+ | isElem ns "w" "noBreakHyphen" element = return NoBreakHyphen
| isElem ns "w" "sym" element = return (getSymChar ns element)
| otherwise = throwError WrongElem
where
diff --git a/tests/Tests/Readers/Docx.hs b/tests/Tests/Readers/Docx.hs
index 47292bc99..7e3f1979e 100644
--- a/tests/Tests/Readers/Docx.hs
+++ b/tests/Tests/Readers/Docx.hs
@@ -131,6 +131,10 @@ tests = [ testGroup "inlines"
"docx/tabs.docx"
"docx/tabs.native"
, testCompare
+ "special punctuation"
+ "docx/special_punctuation.docx"
+ "docx/special_punctuation.native"
+ , testCompare
"normalizing inlines"
"docx/normalize.docx"
"docx/normalize.native"
diff --git a/tests/docx/special_punctuation.docx b/tests/docx/special_punctuation.docx
new file mode 100644
index 000000000..8e0bb55c9
--- /dev/null
+++ b/tests/docx/special_punctuation.docx
Binary files differ
diff --git a/tests/docx/special_punctuation.native b/tests/docx/special_punctuation.native
new file mode 100644
index 000000000..304289f44
--- /dev/null
+++ b/tests/docx/special_punctuation.native
@@ -0,0 +1,2 @@
+[Para [Str "Soft",Space,Str "hyphen:",Space,Str "[\173]"]
+,Para [Str "Non-breaking",Space,Str "hyphen:",Space,Str "[\8209]"]]