summaryrefslogtreecommitdiff
path: root/test/Tests/Readers/Docx.hs
diff options
context:
space:
mode:
Diffstat (limited to 'test/Tests/Readers/Docx.hs')
-rw-r--r--test/Tests/Readers/Docx.hs401
1 files changed, 401 insertions, 0 deletions
diff --git a/test/Tests/Readers/Docx.hs b/test/Tests/Readers/Docx.hs
new file mode 100644
index 000000000..0ba765c93
--- /dev/null
+++ b/test/Tests/Readers/Docx.hs
@@ -0,0 +1,401 @@
+module Tests.Readers.Docx (tests) where
+
+import Codec.Archive.Zip
+import qualified Data.ByteString as BS
+import qualified Data.ByteString.Lazy as B
+import qualified Data.Map as M
+import qualified Data.Text as T
+import Data.Maybe
+import System.IO.Unsafe
+import Test.Tasty
+import Test.Tasty.HUnit
+import Tests.Helpers
+import Text.Pandoc
+import qualified Text.Pandoc.Class as P
+import Text.Pandoc.MediaBag (MediaBag, lookupMedia, mediaDirectory)
+import Text.Pandoc.UTF8 as UTF8
+
+-- We define a wrapper around pandoc that doesn't normalize in the
+-- tests. Since we do our own normalization, we want to make sure
+-- we're doing it right.
+
+data NoNormPandoc = NoNormPandoc {unNoNorm :: Pandoc}
+ deriving Show
+
+noNorm :: Pandoc -> NoNormPandoc
+noNorm = NoNormPandoc
+
+defopts :: ReaderOptions
+defopts = def{ readerExtensions = getDefaultExtensions "docx" }
+
+instance ToString NoNormPandoc where
+ toString d = T.unpack $ purely (writeNative def{ writerTemplate = s }) $ toPandoc d
+ where s = case d of
+ NoNormPandoc (Pandoc (Meta m) _)
+ | M.null m -> Nothing
+ | otherwise -> Just "" -- need this to get meta output
+
+instance ToPandoc NoNormPandoc where
+ toPandoc = unNoNorm
+
+compareOutput :: ReaderOptions
+ -> FilePath
+ -> FilePath
+ -> IO (NoNormPandoc, NoNormPandoc)
+compareOutput opts docxFile nativeFile = do
+ df <- B.readFile docxFile
+ nf <- UTF8.toText <$> BS.readFile nativeFile
+ p <- runIOorExplode $ readDocx opts df
+ df' <- runIOorExplode $ readNative def nf
+ return (noNorm p, noNorm df')
+
+testCompareWithOptsIO :: ReaderOptions -> String -> FilePath -> FilePath -> IO TestTree
+testCompareWithOptsIO opts name docxFile nativeFile = do
+ (dp, np) <- compareOutput opts docxFile nativeFile
+ return $ test id name (dp, np)
+
+testCompareWithOpts :: ReaderOptions -> String -> FilePath -> FilePath -> TestTree
+testCompareWithOpts opts name docxFile nativeFile =
+ unsafePerformIO $ testCompareWithOptsIO opts name docxFile nativeFile
+
+testCompare :: String -> FilePath -> FilePath -> TestTree
+testCompare = testCompareWithOpts defopts
+
+testForWarningsWithOptsIO :: ReaderOptions -> String -> FilePath -> [String] -> IO TestTree
+testForWarningsWithOptsIO opts name docxFile expected = do
+ df <- B.readFile docxFile
+ logs <- runIOorExplode $ setVerbosity ERROR >> readDocx opts df >> P.getLog
+ let warns = [m | DocxParserWarning m <- logs]
+ return $ test id name (unlines warns, unlines expected)
+
+testForWarningsWithOpts :: ReaderOptions -> String -> FilePath -> [String] -> TestTree
+testForWarningsWithOpts opts name docxFile expected =
+ unsafePerformIO $ testForWarningsWithOptsIO opts name docxFile expected
+
+-- testForWarnings :: String -> FilePath -> [String] -> TestTree
+-- testForWarnings = testForWarningsWithOpts defopts
+
+getMedia :: FilePath -> FilePath -> IO (Maybe B.ByteString)
+getMedia archivePath mediaPath = do
+ zf <- B.readFile archivePath >>= return . toArchive
+ return $ findEntryByPath ("word/" ++ mediaPath) zf >>= (Just . fromEntry)
+
+compareMediaPathIO :: FilePath -> MediaBag -> FilePath -> IO Bool
+compareMediaPathIO mediaPath mediaBag docxPath = do
+ docxMedia <- getMedia docxPath mediaPath
+ let mbBS = case lookupMedia mediaPath mediaBag of
+ Just (_, bs) -> bs
+ Nothing -> error ("couldn't find " ++
+ mediaPath ++
+ " in media bag")
+ docxBS = fromMaybe (error ("couldn't find " ++
+ mediaPath ++
+ " in media bag")) docxMedia
+ return $ mbBS == docxBS
+
+compareMediaBagIO :: FilePath -> IO Bool
+compareMediaBagIO docxFile = do
+ df <- B.readFile docxFile
+ mb <- runIOorExplode $ readDocx defopts df >> P.getMediaBag
+ bools <- mapM
+ (\(fp, _, _) -> compareMediaPathIO fp mb docxFile)
+ (mediaDirectory mb)
+ return $ and bools
+
+testMediaBagIO :: String -> FilePath -> IO TestTree
+testMediaBagIO name docxFile = do
+ outcome <- compareMediaBagIO docxFile
+ return $ testCase name (assertBool
+ ("Media didn't match media bag in file " ++ docxFile)
+ outcome)
+
+testMediaBag :: String -> FilePath -> TestTree
+testMediaBag name docxFile = unsafePerformIO $ testMediaBagIO name docxFile
+
+tests :: [TestTree]
+tests = [ testGroup "inlines"
+ [ testCompare
+ "font formatting"
+ "docx/inline_formatting.docx"
+ "docx/inline_formatting.native"
+ , testCompare
+ "font formatting with character styles"
+ "docx/char_styles.docx"
+ "docx/char_styles.native"
+ , testCompare
+ "hyperlinks"
+ "docx/links.docx"
+ "docx/links.native"
+ , testCompare
+ "hyperlinks in <w:instrText> tag"
+ "docx/instrText_hyperlink.docx"
+ "docx/instrText_hyperlink.native"
+ , testCompare
+ "inline image"
+ "docx/image.docx"
+ "docx/image_no_embed.native"
+ , testCompare
+ "VML image"
+ "docx/image_vml.docx"
+ "docx/image_vml.native"
+ , testCompare
+ "inline image in links"
+ "docx/inline_images.docx"
+ "docx/inline_images.native"
+ , testCompare
+ "handling unicode input"
+ "docx/unicode.docx"
+ "docx/unicode.native"
+ , testCompare
+ "literal tabs"
+ "docx/tabs.docx"
+ "docx/tabs.native"
+ , testCompare
+ "special punctuation"
+ "docx/special_punctuation.docx"
+ "docx/special_punctuation.native"
+ , testCompare
+ "normalizing inlines"
+ "docx/normalize.docx"
+ "docx/normalize.native"
+ , testCompare
+ "normalizing inlines deep inside blocks"
+ "docx/deep_normalize.docx"
+ "docx/deep_normalize.native"
+ , testCompare
+ "move trailing spaces outside of formatting"
+ "docx/trailing_spaces_in_formatting.docx"
+ "docx/trailing_spaces_in_formatting.native"
+ , testCompare
+ "inline code (with VerbatimChar style)"
+ "docx/inline_code.docx"
+ "docx/inline_code.native"
+ , testCompare
+ "inline code in subscript and superscript"
+ "docx/verbatim_subsuper.docx"
+ "docx/verbatim_subsuper.native"
+ , testCompare
+ "inlines inside of Structured Document Tags"
+ "docx/sdt_elements.docx"
+ "docx/sdt_elements.native"
+ , testCompare
+ "nested Structured Document Tags"
+ "docx/nested_sdt.docx"
+ "docx/nested_sdt.native"
+ , testCompare
+ "remove anchor spans with nothing pointing to them"
+ "docx/unused_anchors.docx"
+ "docx/unused_anchors.native"
+ , testCompare
+ "collapse overlapping targets (anchor spans)"
+ "docx/overlapping_targets.docx"
+ "docx/overlapping_targets.native"
+ ]
+ , testGroup "blocks"
+ [ testCompare
+ "headers"
+ "docx/headers.docx"
+ "docx/headers.native"
+ , testCompare
+ "headers already having auto identifiers"
+ "docx/already_auto_ident.docx"
+ "docx/already_auto_ident.native"
+ , testCompare
+ "avoid zero-level headers"
+ "docx/0_level_headers.docx"
+ "docx/0_level_headers.native"
+ , testCompare
+ "nested anchor spans in header"
+ "docx/nested_anchors_in_header.docx"
+ "docx/nested_anchors_in_header.native"
+ , testCompare
+ "single numbered item not made into list"
+ "docx/numbered_header.docx"
+ "docx/numbered_header.native"
+ , testCompare
+ "enumerated headers not made into numbered list"
+ "docx/enumerated_headings.docx"
+ "docx/enumerated_headings.native"
+ , testCompare
+ "i18n blocks (headers and blockquotes)"
+ "docx/i18n_blocks.docx"
+ "docx/i18n_blocks.native"
+ , testCompare
+ "lists"
+ "docx/lists.docx"
+ "docx/lists.native"
+ , testCompare
+ "lists continuing after interruption"
+ "docx/lists_continuing.docx"
+ "docx/lists_continuing.native"
+ , testCompare
+ "lists restarting after interruption"
+ "docx/lists_restarting.docx"
+ "docx/lists_restarting.native"
+ , testCompare
+ "definition lists"
+ "docx/definition_list.docx"
+ "docx/definition_list.native"
+ , testCompare
+ "custom defined lists in styles"
+ "docx/german_styled_lists.docx"
+ "docx/german_styled_lists.native"
+ , testCompare
+ "user deletes bullet after list item (=> part of item par)"
+ "docx/dummy_item_after_list_item.docx"
+ "docx/dummy_item_after_list_item.native"
+ , testCompare
+ "user deletes bullet after par (=> new par)"
+ "docx/dummy_item_after_paragraph.docx"
+ "docx/dummy_item_after_paragraph.native"
+ , testCompare
+ "footnotes and endnotes"
+ "docx/notes.docx"
+ "docx/notes.native"
+ , testCompare
+ "links in footnotes and endnotes"
+ "docx/link_in_notes.docx"
+ "docx/link_in_notes.native"
+ , testCompare
+ "blockquotes (parsing indent as blockquote)"
+ "docx/block_quotes.docx"
+ "docx/block_quotes_parse_indent.native"
+ , testCompare
+ "hanging indents"
+ "docx/hanging_indent.docx"
+ "docx/hanging_indent.native"
+ , testCompare
+ "tables"
+ "docx/tables.docx"
+ "docx/tables.native"
+ , testCompare
+ "tables with lists in cells"
+ "docx/table_with_list_cell.docx"
+ "docx/table_with_list_cell.native"
+ , testCompare
+ "tables with one row"
+ "docx/table_one_row.docx"
+ "docx/table_one_row.native"
+ , testCompare
+ "tables with variable width"
+ "docx/table_variable_width.docx"
+ "docx/table_variable_width.native"
+ , testCompare
+ "code block"
+ "docx/codeblock.docx"
+ "docx/codeblock.native"
+ , testCompare
+ "dropcap paragraphs"
+ "docx/drop_cap.docx"
+ "docx/drop_cap.native"
+ ]
+ , testGroup "track changes"
+ [ testCompare
+ "insertion (default)"
+ "docx/track_changes_insertion.docx"
+ "docx/track_changes_insertion_accept.native"
+ , testCompareWithOpts def{readerTrackChanges=AcceptChanges}
+ "insert insertion (accept)"
+ "docx/track_changes_insertion.docx"
+ "docx/track_changes_insertion_accept.native"
+ , testCompareWithOpts def{readerTrackChanges=RejectChanges}
+ "remove insertion (reject)"
+ "docx/track_changes_insertion.docx"
+ "docx/track_changes_insertion_reject.native"
+ , testCompare
+ "deletion (default)"
+ "docx/track_changes_deletion.docx"
+ "docx/track_changes_deletion_accept.native"
+ , testCompareWithOpts def{readerTrackChanges=AcceptChanges}
+ "remove deletion (accept)"
+ "docx/track_changes_deletion.docx"
+ "docx/track_changes_deletion_accept.native"
+ , testCompareWithOpts def{readerTrackChanges=RejectChanges}
+ "insert deletion (reject)"
+ "docx/track_changes_deletion.docx"
+ "docx/track_changes_deletion_reject.native"
+ , testCompareWithOpts def{readerTrackChanges=AllChanges}
+ "keep insertion (all)"
+ "docx/track_changes_deletion.docx"
+ "docx/track_changes_deletion_all.native"
+ , testCompareWithOpts def{readerTrackChanges=AllChanges}
+ "keep deletion (all)"
+ "docx/track_changes_deletion.docx"
+ "docx/track_changes_deletion_all.native"
+ , testCompareWithOpts def{readerTrackChanges=AcceptChanges}
+ "move text (accept)"
+ "docx/track_changes_move.docx"
+ "docx/track_changes_move_accept.native"
+ , testCompareWithOpts def{readerTrackChanges=RejectChanges}
+ "move text (reject)"
+ "docx/track_changes_move.docx"
+ "docx/track_changes_move_reject.native"
+ , testCompareWithOpts def{readerTrackChanges=AllChanges}
+ "move text (all)"
+ "docx/track_changes_move.docx"
+ "docx/track_changes_move_all.native"
+ , testCompareWithOpts def{readerTrackChanges=AcceptChanges}
+ "comments (accept -- no comments)"
+ "docx/comments.docx"
+ "docx/comments_no_comments.native"
+ , testCompareWithOpts def{readerTrackChanges=RejectChanges}
+ "comments (reject -- comments)"
+ "docx/comments.docx"
+ "docx/comments_no_comments.native"
+ , testCompareWithOpts def{readerTrackChanges=AllChanges}
+ "comments (all comments)"
+ "docx/comments.docx"
+ "docx/comments.native"
+ , testCompareWithOpts def{readerTrackChanges=AcceptChanges}
+ "paragraph insertion/deletion (accept)"
+ "docx/paragraph_insertion_deletion.docx"
+ "docx/paragraph_insertion_deletion_accept.native"
+ , testCompareWithOpts def{readerTrackChanges=RejectChanges}
+ "paragraph insertion/deletion (reject)"
+ "docx/paragraph_insertion_deletion.docx"
+ "docx/paragraph_insertion_deletion_reject.native"
+ , testCompareWithOpts def{readerTrackChanges=AllChanges}
+ "paragraph insertion/deletion (all)"
+ "docx/paragraph_insertion_deletion.docx"
+ "docx/paragraph_insertion_deletion_all.native"
+ , testForWarningsWithOpts def{readerTrackChanges=AcceptChanges}
+ "comment warnings (accept -- no warnings)"
+ "docx/comments_warning.docx"
+ []
+ , testForWarningsWithOpts def{readerTrackChanges=RejectChanges}
+ "comment warnings (reject -- no warnings)"
+ "docx/comments_warning.docx"
+ []
+ , testForWarningsWithOpts def{readerTrackChanges=AllChanges}
+ "comment warnings (all)"
+ "docx/comments_warning.docx"
+ ["Docx comment 1 will not retain formatting"]
+ ]
+ , testGroup "media"
+ [ testMediaBag
+ "image extraction"
+ "docx/image.docx"
+ ]
+ , testGroup "custom styles"
+ [ testCompare
+ "custom styles (`+styles`) not enabled (default)"
+ "docx/custom-style-reference.docx"
+ "docx/custom-style-no-styles.native"
+ , testCompareWithOpts
+ def{readerExtensions=extensionsFromList [Ext_styles]}
+ "custom styles (`+styles`) enabled"
+ "docx/custom-style-reference.docx"
+ "docx/custom-style-with-styles.native"
+ ]
+ , testGroup "metadata"
+ [ testCompareWithOpts def{readerStandalone=True}
+ "metadata fields"
+ "docx/metadata.docx"
+ "docx/metadata.native"
+ , testCompareWithOpts def{readerStandalone=True}
+ "stop recording metadata with normal text"
+ "docx/metadata_after_normal.docx"
+ "docx/metadata_after_normal.native"
+ ]
+
+ ]