From 47a16065c44eb5028ca9b9e86993fe880ef2a37d Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Mon, 6 Feb 2017 23:33:23 +0100 Subject: Removed --parse-raw and readerParseRaw. These were confusing. Now we rely on the +raw_tex or +raw_html extension with latex or html input. Thus, instead of --parse-raw -f latex we use -f latex+raw_tex and instead of --parse-raw -f html we use -f html+raw_html --- MANUAL.txt | 11 ----------- src/Text/Pandoc.hs | 1 + src/Text/Pandoc/App.hs | 33 ++++++++++++++------------------- src/Text/Pandoc/Options.hs | 2 -- src/Text/Pandoc/Readers/EPUB.hs | 3 ++- src/Text/Pandoc/Readers/HTML.hs | 14 +++++++------- src/Text/Pandoc/Readers/LaTeX.hs | 19 ++++++++++++------- src/Text/Pandoc/Readers/Markdown.hs | 4 ---- src/Text/Pandoc/Readers/Org/Inlines.hs | 3 ++- src/Text/Pandoc/Readers/Textile.hs | 6 ------ test/Tests/Old.hs | 2 +- test/command/parse-raw.md | 27 +++++++++++++++++++++++++++ 12 files changed, 66 insertions(+), 59 deletions(-) create mode 100644 test/command/parse-raw.md diff --git a/MANUAL.txt b/MANUAL.txt index 33aa51abe..36bc8e5ac 100644 --- a/MANUAL.txt +++ b/MANUAL.txt @@ -391,17 +391,6 @@ General options Reader options -------------- -`-R`, `--parse-raw` - -: Parse untranslatable HTML codes and LaTeX environments as raw HTML - or LaTeX, instead of ignoring them. Affects only HTML and LaTeX - input. Raw HTML can be printed in Markdown, reStructuredText, Emacs Org - mode, HTML, Slidy, Slideous, DZSlides, reveal.js, and S5 output; raw LaTeX - can be printed in Markdown, reStructuredText, Emacs Org mode, LaTeX, and - ConTeXt output. The default is for the readers to omit untranslatable - HTML codes and LaTeX environments. (The LaTeX reader does pass through - untranslatable LaTeX *commands*, even if `-R` is not specified.) - `--base-header-level=`*NUMBER* : Specify the base level for headers (defaults to 1). diff --git a/src/Text/Pandoc.hs b/src/Text/Pandoc.hs index 920bc9655..1a0bbc4ab 100644 --- a/src/Text/Pandoc.hs +++ b/src/Text/Pandoc.hs @@ -336,6 +336,7 @@ getDefaultExtensions "context" = extensionsFromList getDefaultExtensions "textile" = extensionsFromList [Ext_old_dashes, Ext_smart, + Ext_raw_html, Ext_auto_identifiers] getDefaultExtensions _ = extensionsFromList [Ext_auto_identifiers] diff --git a/src/Text/Pandoc/App.hs b/src/Text/Pandoc/App.hs index 637d3fe49..e99767f0b 100644 --- a/src/Text/Pandoc/App.hs +++ b/src/Text/Pandoc/App.hs @@ -90,7 +90,7 @@ parseOptions options' defaults = do let (actions, args, unrecognizedOpts, errors) = getOpt' Permute options' rawArgs - let unknownOptionErrors = foldr handelUnrecognizedOption [] unrecognizedOpts + let unknownOptionErrors = foldr handleUnrecognizedOption [] unrecognizedOpts unless (null errors && null unknownOptionErrors) $ err 2 $ concat errors ++ unlines unknownOptionErrors ++ @@ -273,7 +273,6 @@ convertWithOpts opts = do _ -> Nothing let readerOpts = def{ readerStandalone = standalone - , readerParseRaw = optParseRaw opts , readerColumns = optColumns opts , readerTabStop = optTabStop opts , readerIndentedCodeClasses = optIndentedCodeClasses opts @@ -463,7 +462,6 @@ data Opt = Opt , optStandalone :: Bool -- ^ Include header, footer , optReader :: Maybe String -- ^ Reader format , optWriter :: Maybe String -- ^ Writer format - , optParseRaw :: Bool -- ^ Parse unconvertable HTML and TeX , optTableOfContents :: Bool -- ^ Include table of contents , optBaseHeaderLevel :: Int -- ^ Base header level , optTemplate :: Maybe FilePath -- ^ Custom template @@ -529,7 +527,6 @@ defaultOpts = Opt , optStandalone = False , optReader = Nothing , optWriter = Nothing - , optParseRaw = False , optTableOfContents = False , optBaseHeaderLevel = 1 , optTemplate = Nothing @@ -777,11 +774,6 @@ options = "DIRECTORY") -- "Directory containing pandoc data files." "" - , Option "R" ["parse-raw"] - (NoArg - (\opt -> return opt { optParseRaw = True })) - "" -- "Parse untranslatable HTML codes and LaTeX environments as raw" - , Option "" ["base-header-level"] (ReqArg (\arg opt -> @@ -1403,22 +1395,25 @@ compileInfo = "\nCompiled with pandoc-types " ++ VERSION_pandoc_types ++ ", texmath " ++ VERSION_texmath ++ ", skylighting " ++ VERSION_skylighting -handelUnrecognizedOption :: String -> [String] -> [String] -handelUnrecognizedOption "--smart" = - (("--smart has been removed. Use +smart or -smart extension instead.\n" ++ +handleUnrecognizedOption :: String -> [String] -> [String] +handleUnrecognizedOption "--smart" = + (("--smart/-S has been removed. Use +smart or -smart extension instead.\n" ++ "For example: pandoc -f markdown+smart -t markdown-smart.") :) -handelUnrecognizedOption "-S" = handelUnrecognizedOption "--smart" -handelUnrecognizedOption "--old-dashes" = +handleUnrecognizedOption "-S" = handleUnrecognizedOption "--smart" +handleUnrecognizedOption "--old-dashes" = ("--old-dashes has been removed. Use +old_dashes extension instead." :) -handelUnrecognizedOption "--no-wrap" = +handleUnrecognizedOption "--no-wrap" = ("--no-wrap has been removed. Use --wrap=none instead." :) -handelUnrecognizedOption "--chapters" = +handleUnrecognizedOption "--chapters" = ("--chapters has been removed. Use --top-level-division=chapter instead." :) -handelUnrecognizedOption "--reference-docx" = +handleUnrecognizedOption "--reference-docx" = ("--reference-docx has been removed. Use --reference-doc instead." :) -handelUnrecognizedOption "--reference-odt" = +handleUnrecognizedOption "--reference-odt" = ("--reference-odt has been removed. Use --reference-doc instead." :) -handelUnrecognizedOption x = +handleUnrecognizedOption "--parse-raw" = + (("--parse-raw/-R has been removed. Use +raw_html or +raw_tex extension.\n") :) +handleUnrecognizedOption "-R" = handleUnrecognizedOption "--parse-raw" +handleUnrecognizedOption x = (("Unknown option " ++ x ++ ".") :) uppercaseFirstLetter :: String -> String diff --git a/src/Text/Pandoc/Options.hs b/src/Text/Pandoc/Options.hs index 38c083dfd..2e11a64d0 100644 --- a/src/Text/Pandoc/Options.hs +++ b/src/Text/Pandoc/Options.hs @@ -55,7 +55,6 @@ import GHC.Generics (Generic) data ReaderOptions = ReaderOptions{ readerExtensions :: Extensions -- ^ Syntax extensions , readerStandalone :: Bool -- ^ Standalone document with header - , readerParseRaw :: Bool -- ^ Parse raw HTML, LaTeX , readerColumns :: Int -- ^ Number of columns in terminal , readerTabStop :: Int -- ^ Tab stop , readerApplyMacros :: Bool -- ^ Apply macros to TeX math @@ -69,7 +68,6 @@ instance Default ReaderOptions where def = ReaderOptions{ readerExtensions = emptyExtensions , readerStandalone = False - , readerParseRaw = False , readerColumns = 80 , readerTabStop = 4 , readerApplyMacros = True diff --git a/src/Text/Pandoc/Readers/EPUB.hs b/src/Text/Pandoc/Readers/EPUB.hs index f24adb5b1..851d4771f 100644 --- a/src/Text/Pandoc/Readers/EPUB.hs +++ b/src/Text/Pandoc/Readers/EPUB.hs @@ -13,6 +13,7 @@ import Text.Pandoc.Definition hiding (Attr) import Text.Pandoc.Readers.HTML (readHtml) import Text.Pandoc.Walk (walk, query) import Text.Pandoc.Options ( ReaderOptions(..), Verbosity(..)) +import Text.Pandoc.Extensions (enableExtension, Extension(Ext_raw_html)) import Text.Pandoc.Shared (escapeURI, collapseFilePath, addMetaField) import Network.URI (unEscapeString) import Text.Pandoc.MediaBag (MediaBag, insertMedia) @@ -66,7 +67,7 @@ archiveToEPUB os archive = do P.setMediaBag $ fetchImages (M.elems items) root archive ast return ast where - os' = os {readerParseRaw = True} + os' = os {readerExtensions = enableExtension Ext_raw_html (readerExtensions os)} parseSpineElem :: PandocMonad m => FilePath -> (FilePath, MimeType) -> m Pandoc parseSpineElem (normalise -> r) (normalise -> path, mime) = do report DEBUG ("parseSpineElem called with path " ++ show path) diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index 0bb837ba9..219ee81b6 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -45,9 +45,9 @@ import qualified Text.Pandoc.Builder as B import Text.Pandoc.Builder (Blocks, Inlines, trimInlines, HasMeta(..)) import Text.Pandoc.Shared ( extractSpaces, renderTags', addMetaField , escapeURI, safeRead ) -import Text.Pandoc.Options (ReaderOptions(readerParseRaw), - Verbosity(..), Extension (Ext_epub_html_exts, - Ext_native_divs, Ext_native_spans)) +import Text.Pandoc.Options (ReaderOptions(readerExtensions), extensionEnabled, + Verbosity(..), Extension (Ext_epub_html_exts, + Ext_raw_html, Ext_native_divs, Ext_native_spans)) import Text.Pandoc.Parsing hiding ((<|>)) import Text.Pandoc.Walk import qualified Data.Map as M @@ -367,8 +367,8 @@ pDiv = try $ do pRawHtmlBlock :: PandocMonad m => TagParser m Blocks pRawHtmlBlock = do raw <- pHtmlBlock "script" <|> pHtmlBlock "style" <|> pRawTag - parseRaw <- getOption readerParseRaw - if parseRaw && not (null raw) + exts <- getOption readerExtensions + if extensionEnabled Ext_raw_html exts && not (null raw) then return $ B.rawBlock "html" raw else return mempty @@ -690,8 +690,8 @@ pRawHtmlInline = do <|> if inplain then pSatisfy (not . isBlockTag) else pSatisfy isInlineTag - parseRaw <- getOption readerParseRaw - if parseRaw + exts <- getOption readerExtensions + if extensionEnabled Ext_raw_html exts then return $ B.rawInline "html" $ renderTags' [result] else return mempty diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index 86ff2b83a..ea284efef 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -303,14 +303,18 @@ ignoreInlines name = (name, doraw <|> (mempty <$ optargs)) where optargs = skipopts *> skipMany (try $ optional sp *> braced) contseq = '\\':name doraw = (rawInline "latex" . (contseq ++) . snd) <$> - (getOption readerParseRaw >>= guard >> withRaw optargs) + (do exts <- getOption readerExtensions + guard $ extensionEnabled Ext_raw_tex exts + withRaw optargs) ignoreBlocks :: PandocMonad m => String -> (String, LP m Blocks) ignoreBlocks name = (name, doraw <|> (mempty <$ optargs)) where optargs = skipopts *> skipMany (try $ optional sp *> braced) contseq = '\\':name doraw = (rawBlock "latex" . (contseq ++) . snd) <$> - (getOption readerParseRaw >>= guard >> withRaw optargs) + (do exts <- getOption readerExtensions + guard $ extensionEnabled Ext_raw_tex exts + withRaw optargs) blockCommands :: PandocMonad m => M.Map String (LP m Blocks) blockCommands = M.fromList $ @@ -423,7 +427,7 @@ inlineCommand = try $ do name <- anyControlSeq guard $ name /= "begin" && name /= "end" guard $ not $ isBlockCommand name - parseRaw <- getOption readerParseRaw + exts <- getOption readerExtensions star <- option "" (string "*") let name' = name ++ star let raw = do @@ -433,7 +437,7 @@ inlineCommand = try $ do transformed <- applyMacros' rawcommand if transformed /= rawcommand then parseFromString inlines transformed - else if parseRaw + else if extensionEnabled Ext_raw_tex exts then return $ rawInline "latex" rawcommand else return mempty (lookupListDefault mzero [name',name] inlineCommands <* @@ -441,7 +445,8 @@ inlineCommand = try $ do <|> raw unlessParseRaw :: PandocMonad m => LP m () -unlessParseRaw = getOption readerParseRaw >>= guard . not +unlessParseRaw = getOption readerExtensions >>= + guard . not . extensionEnabled Ext_raw_tex isBlockCommand :: String -> Bool isBlockCommand s = s `M.member` (blockCommands :: M.Map String (LP PandocPure Blocks)) @@ -923,10 +928,10 @@ inlineEnvironment = try $ do rawEnv :: PandocMonad m => String -> LP m Blocks rawEnv name = do - parseRaw <- getOption readerParseRaw + exts <- getOption readerExtensions rawOptions <- mconcat <$> many rawopt let addBegin x = "\\begin{" ++ name ++ "}" ++ rawOptions ++ x - if parseRaw + if extensionEnabled Ext_raw_tex exts then (rawBlock "latex" . addBegin) <$> (withRaw (env name blocks) >>= applyMacros' . snd) else env name blocks diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 1d8f7c78e..0c10889d4 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -359,10 +359,6 @@ kvPair allowEmpty = try $ do parseMarkdown :: PandocMonad m => MarkdownParser m Pandoc parseMarkdown = do - -- markdown allows raw HTML - updateState $ \state -> state { stateOptions = - let oldOpts = stateOptions state in - oldOpts{ readerParseRaw = True } } optional titleBlock blocks <- parseBlocks st <- getState diff --git a/src/Text/Pandoc/Readers/Org/Inlines.hs b/src/Text/Pandoc/Readers/Org/Inlines.hs index bcf8f6df9..f3671641a 100644 --- a/src/Text/Pandoc/Readers/Org/Inlines.hs +++ b/src/Text/Pandoc/Readers/Org/Inlines.hs @@ -806,7 +806,8 @@ inlineLaTeX = try $ do where clean = dropWhileEnd (`elem` ("{}" :: String)) . drop 1 state :: ParserState - state = def{ stateOptions = def{ readerParseRaw = True }} + state = def{ stateOptions = def{ readerExtensions = + enableExtension Ext_raw_tex (readerExtensions def) } } texMathToPandoc :: String -> Maybe [Inline] texMathToPandoc cs = (maybeRight $ readTeX cs) >>= writePandoc DisplayInline diff --git a/src/Text/Pandoc/Readers/Textile.hs b/src/Text/Pandoc/Readers/Textile.hs index 804ee39aa..07fb65b20 100644 --- a/src/Text/Pandoc/Readers/Textile.hs +++ b/src/Text/Pandoc/Readers/Textile.hs @@ -85,12 +85,6 @@ readTextile opts s = do -- | Generate a Pandoc ADT from a textile document parseTextile :: PandocMonad m => ParserT [Char] ParserState m Pandoc parseTextile = do - -- textile allows raw HTML and does smart punctuation by default, - -- but we do not enable smart punctuation unless it is explicitly - -- asked for, for better conversion to other light markup formats - oldOpts <- stateOptions `fmap` getState - updateState $ \state -> state{ stateOptions = - oldOpts{ readerParseRaw = True } } many blankline startPos <- getPosition -- go through once just to get list of reference keys and notes diff --git a/test/Tests/Old.hs b/test/Tests/Old.hs index ba8ea8240..d9877c8a0 100644 --- a/test/Tests/Old.hs +++ b/test/Tests/Old.hs @@ -46,7 +46,7 @@ tests = [ testGroup "markdown" , testGroup "latex" [ testGroup "writer" (writerTests "latex" ++ lhsWriterTests "latex") , testGroup "reader" - [ test "basic" ["-r", "latex", "-w", "native", "-s", "-R"] + [ test "basic" ["-r", "latex+raw_tex", "-w", "native", "-s"] "latex-reader.latex" "latex-reader.native" , lhsReaderTest "latex+lhs" ] diff --git a/test/command/parse-raw.md b/test/command/parse-raw.md new file mode 100644 index 000000000..f4e493c69 --- /dev/null +++ b/test/command/parse-raw.md @@ -0,0 +1,27 @@ +``` +% pandoc -f latex+raw_tex -t markdown +\emph{Hi \foo{there}} +^D +*Hi \foo{there}* +``` + +``` +% pandoc -f latex -t markdown +\emph{Hi \foo{there}} +^D +*Hi* +``` + +``` +% pandoc -f html+raw_html -t markdown +Hi there +^D +*Hi there* +``` + +``` +% pandoc -f html -t markdown +Hi there +^D +*Hi there* +``` -- cgit v1.2.3