diff options
-rw-r--r-- | src/Text/Pandoc/Parsing.hs | 95 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/Markdown.hs | 91 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/Textile.hs | 15 | ||||
-rw-r--r-- | tests/textile-reader.native | 16 |
4 files changed, 107 insertions, 110 deletions
diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs index ecb3dd262..d63fcd0a7 100644 --- a/src/Text/Pandoc/Parsing.hs +++ b/src/Text/Pandoc/Parsing.hs @@ -67,17 +67,18 @@ module Text.Pandoc.Parsing ( (>>~), Key, toKey, fromKey, - lookupKeySrc ) + lookupKeySrc, + smartPunctuation ) where import Text.Pandoc.Definition import qualified Text.Pandoc.UTF8 as UTF8 (putStrLn) import Text.ParserCombinators.Parsec import Text.Pandoc.CharacterReferences ( characterReference ) -import Data.Char ( toLower, toUpper, ord, isAscii ) +import Data.Char ( toLower, toUpper, ord, isAscii, isAlphaNum ) import Data.List ( intercalate, transpose ) import Network.URI ( parseURI, URI (..), isAllowedInURI ) -import Control.Monad ( join, liftM ) +import Control.Monad ( join, liftM, guard ) import Text.Pandoc.Shared import qualified Data.Map as M import Text.TeXMath.Macros (Macro) @@ -678,3 +679,91 @@ lookupKeySrc table key = case M.lookup key table of Nothing -> Nothing Just src -> Just src +-- | Fail unless we're in "smart typography" mode. +failUnlessSmart :: GenParser tok ParserState () +failUnlessSmart = getState >>= guard . stateSmart + +smartPunctuation :: GenParser Char ParserState Inline + -> GenParser Char ParserState Inline +smartPunctuation inlineParser = do + failUnlessSmart + choice [ quoted inlineParser, apostrophe, dash, ellipses ] + +apostrophe :: GenParser Char ParserState Inline +apostrophe = (char '\'' <|> char '\8217') >> return Apostrophe + +quoted :: GenParser Char ParserState Inline + -> GenParser Char ParserState Inline +quoted inlineParser = doubleQuoted inlineParser <|> singleQuoted inlineParser + +withQuoteContext :: QuoteContext + -> (GenParser Char ParserState Inline) + -> GenParser Char ParserState Inline +withQuoteContext context parser = do + oldState <- getState + let oldQuoteContext = stateQuoteContext oldState + setState oldState { stateQuoteContext = context } + result <- parser + newState <- getState + setState newState { stateQuoteContext = oldQuoteContext } + return result + +singleQuoted :: GenParser Char ParserState Inline + -> GenParser Char ParserState Inline +singleQuoted inlineParser = try $ do + singleQuoteStart + withQuoteContext InSingleQuote $ many1Till inlineParser singleQuoteEnd >>= + return . Quoted SingleQuote . normalizeSpaces + +doubleQuoted :: GenParser Char ParserState Inline + -> GenParser Char ParserState Inline +doubleQuoted inlineParser = try $ do + doubleQuoteStart + withQuoteContext InDoubleQuote $ do + contents <- manyTill inlineParser doubleQuoteEnd + return . Quoted DoubleQuote . normalizeSpaces $ contents + +failIfInQuoteContext :: QuoteContext -> GenParser tok ParserState () +failIfInQuoteContext context = do + st <- getState + if stateQuoteContext st == context + then fail "already inside quotes" + else return () + +singleQuoteStart :: GenParser Char ParserState () +singleQuoteStart = do + failIfInQuoteContext InSingleQuote + try $ do oneOf "'\8216" + notFollowedBy (oneOf ")!],.;:-? \t\n") + notFollowedBy (try (oneOfStrings ["s","t","m","ve","ll","re"] >> + satisfy (not . isAlphaNum))) + -- possess/contraction + return () + +singleQuoteEnd :: GenParser Char st () +singleQuoteEnd = try $ do + oneOf "'\8217" + notFollowedBy alphaNum + +doubleQuoteStart :: GenParser Char ParserState () +doubleQuoteStart = do + failIfInQuoteContext InDoubleQuote + try $ do oneOf "\"\8220" + notFollowedBy (oneOf " \t\n") + +doubleQuoteEnd :: GenParser Char st () +doubleQuoteEnd = oneOf "\"\8221" >> return () + +ellipses :: GenParser Char st Inline +ellipses = oneOfStrings ["...", " . . . ", ". . .", " . . ."] >> return Ellipses + +dash :: GenParser Char st Inline +dash = enDash <|> emDash + +enDash :: GenParser Char st Inline +enDash = try $ char '-' >> notFollowedBy (noneOf "0123456789") >> return EnDash + +emDash :: GenParser Char st Inline +emDash = oneOfStrings ["---", "--"] >> return EmDash + + diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index ad7d2a0cc..accb4cdc4 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -27,10 +27,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Conversion of markdown-formatted plain text to 'Pandoc' document. -} -module Text.Pandoc.Readers.Markdown ( - readMarkdown, - smartPunctuation - ) where +module Text.Pandoc.Readers.Markdown ( readMarkdown ) where import Data.List ( transpose, isSuffixOf, sortBy, findIndex, intercalate ) import qualified Data.Map as M @@ -108,12 +105,6 @@ failUnlessBeginningOfLine = do pos <- getPosition if sourceColumn pos == 1 then return () else fail "not beginning of line" --- | Fail unless we're in "smart typography" mode. -failUnlessSmart :: GenParser tok ParserState () -failUnlessSmart = do - state <- getState - if stateSmart state then return () else pzero - -- | Parse a sequence of inline elements between square brackets, -- including inlines between balanced pairs of square brackets. inlinesInBalancedBrackets :: GenParser Char ParserState Inline @@ -906,7 +897,7 @@ inline = choice inlineParsers <?> "inline" inlineParsers :: [GenParser Char ParserState Inline] inlineParsers = [ str - , smartPunctuation + , smartPunctuation inline , whitespace , endline , code @@ -1047,84 +1038,6 @@ subscript = failIfStrict >> enclosed (char '~') (char '~') (notFollowedBy spaceChar >> inline) >>= -- may not contain Space return . Subscript -smartPunctuation :: GenParser Char ParserState Inline -smartPunctuation = failUnlessSmart >> - choice [ quoted, apostrophe, dash, ellipses ] - -apostrophe :: GenParser Char ParserState Inline -apostrophe = (char '\'' <|> char '\8217') >> return Apostrophe - -quoted :: GenParser Char ParserState Inline -quoted = doubleQuoted <|> singleQuoted - -withQuoteContext :: QuoteContext - -> (GenParser Char ParserState Inline) - -> GenParser Char ParserState Inline -withQuoteContext context parser = do - oldState <- getState - let oldQuoteContext = stateQuoteContext oldState - setState oldState { stateQuoteContext = context } - result <- parser - newState <- getState - setState newState { stateQuoteContext = oldQuoteContext } - return result - -singleQuoted :: GenParser Char ParserState Inline -singleQuoted = try $ do - singleQuoteStart - withQuoteContext InSingleQuote $ many1Till inline singleQuoteEnd >>= - return . Quoted SingleQuote . normalizeSpaces - -doubleQuoted :: GenParser Char ParserState Inline -doubleQuoted = try $ do - doubleQuoteStart - withQuoteContext InDoubleQuote $ do - contents <- manyTill inline doubleQuoteEnd - return . Quoted DoubleQuote . normalizeSpaces $ contents - -failIfInQuoteContext :: QuoteContext -> GenParser tok ParserState () -failIfInQuoteContext context = do - st <- getState - if stateQuoteContext st == context - then fail "already inside quotes" - else return () - -singleQuoteStart :: GenParser Char ParserState () -singleQuoteStart = do - failIfInQuoteContext InSingleQuote - try $ do oneOf "'\8216" - notFollowedBy (oneOf ")!],.;:-? \t\n") - notFollowedBy (try (oneOfStrings ["s","t","m","ve","ll","re"] >> - satisfy (not . isAlphaNum))) - -- possess/contraction - return () - -singleQuoteEnd :: GenParser Char st () -singleQuoteEnd = try $ do - oneOf "'\8217" - notFollowedBy alphaNum - -doubleQuoteStart :: GenParser Char ParserState () -doubleQuoteStart = do - failIfInQuoteContext InDoubleQuote - try $ do oneOf "\"\8220" - notFollowedBy (oneOf " \t\n") - -doubleQuoteEnd :: GenParser Char st () -doubleQuoteEnd = oneOf "\"\8221" >> return () - -ellipses :: GenParser Char st Inline -ellipses = oneOfStrings ["...", " . . . ", ". . .", " . . ."] >> return Ellipses - -dash :: GenParser Char st Inline -dash = enDash <|> emDash - -enDash :: GenParser Char st Inline -enDash = try $ char '-' >> notFollowedBy (noneOf "0123456789") >> return EnDash - -emDash :: GenParser Char st Inline -emDash = oneOfStrings ["---", "--"] >> return EmDash - whitespace :: GenParser Char ParserState Inline whitespace = spaceChar >> ( (spaceChar >> skipMany spaceChar >> option Space (endline >> return LineBreak)) diff --git a/src/Text/Pandoc/Readers/Textile.hs b/src/Text/Pandoc/Readers/Textile.hs index e1d608eed..4c655691a 100644 --- a/src/Text/Pandoc/Readers/Textile.hs +++ b/src/Text/Pandoc/Readers/Textile.hs @@ -34,21 +34,17 @@ Implemented and parsed: - Lists - blockquote - Inlines : strong, emph, cite, code, deleted, superscript, - subscript, links, smart punctuation + subscript, links Implemented but discarded: - HTML-specific and CSS-specific attributes Left to be implemented: - - Pandoc Meta Information (title, author, date) - footnotes - dimension sign - - uppercase + - all caps - definition lists - continued blocks (ex bq..) - - - - TODO : refactor common patterns across readers : - autolink @@ -58,9 +54,8 @@ TODO : refactor common patterns across readers : -} -module Text.Pandoc.Readers.Textile ( - readTextile - ) where +module Text.Pandoc.Readers.Textile ( readTextile) where + import Text.Pandoc.Definition import Text.Pandoc.Shared import Text.Pandoc.Parsing @@ -313,7 +308,6 @@ inlineParsers = [ autoLink , mark , str , htmlSpan --- , smartPunctuation -- from markdown reader , whitespace , endline , rawHtmlInline @@ -328,6 +322,7 @@ inlineParsers = [ autoLink , simpleInline (char '~') Subscript , link , image + , smartPunctuation inline , symbol ] diff --git a/tests/textile-reader.native b/tests/textile-reader.native index 613c6ac6b..f2024c4d8 100644 --- a/tests/textile-reader.native +++ b/tests/textile-reader.native @@ -1,5 +1,5 @@ Pandoc (Meta {docTitle = [Str ""], docAuthors = [[Str ""]], docDate = [Str ""]}) -[ Para [Str "This",Space,Str "is",Space,Str "a",Space,Str "set",Space,Str "of",Space,Str "tests",Space,Str "for",Space,Str "pandoc",Space,Str "Textile",Space,Str "Reader",Str ".",Space,Str "Part",Space,Str "of",Space,Str "it",Space,Str "comes",LineBreak,Str "from",Space,Str "John",Space,Str "Gruber",Str "'",Str "s",Space,Str "markdown",Space,Str "test",Space,Str "suite",Str "."] +[ Para [Str "This",Space,Str "is",Space,Str "a",Space,Str "set",Space,Str "of",Space,Str "tests",Space,Str "for",Space,Str "pandoc",Space,Str "Textile",Space,Str "Reader",Str ".",Space,Str "Part",Space,Str "of",Space,Str "it",Space,Str "comes",LineBreak,Str "from",Space,Str "John",Space,Str "Gruber",Apostrophe,Str "s",Space,Str "markdown",Space,Str "test",Space,Str "suite",Str "."] , HorizontalRule , Header 1 [Str "Headers"] , Header 2 [Str "Level",Space,Str "2",Space,Str "with",Space,Str "an",Space,Link [Str "embeded",Space,Str "link"] ("http://www.example.com","")] @@ -8,9 +8,9 @@ Pandoc (Meta {docTitle = [Str ""], docAuthors = [[Str ""]], docDate = [Str ""]}) , Header 5 [Str "Level",Space,Str "5"] , Header 6 [Str "Level",Space,Str "6"] , Header 1 [Str "Paragraphs"] -, Para [Str "Here",Str "'",Str "s",Space,Str "a",Space,Str "regular",Space,Str "paragraph",Str "."] +, Para [Str "Here",Apostrophe,Str "s",Space,Str "a",Space,Str "regular",Space,Str "paragraph",Str "."] , Para [Str "Line",Space,Str "breaks",Space,Str "are",Space,Str "preserved",Space,Str "in",Space,Str "textile",Str ",",Space,Str "so",Space,Str "you",Space,Str "can",Space,Str "not",Space,Str "wrap",Space,Str "your",Space,Str "very",LineBreak,Str "long",Space,Str "paragraph",Space,Str "with",Space,Str "your",Space,Str "favourite",Space,Str "text",Space,Str "editor",Space,Str "and",Space,Str "have",Space,Str "it",Space,Str "rendered",LineBreak,Str "with",Space,Str "no",Space,Str "break",Str "."] -, Para [Str "Here",Str "'",Str "s",Space,Str "one",Space,Str "with",Space,Str "a",Space,Str "bullet",Str "."] +, Para [Str "Here",Apostrophe,Str "s",Space,Str "one",Space,Str "with",Space,Str "a",Space,Str "bullet",Str "."] , BulletList [ [ Plain [Str "criminey",Str "."] ] ] @@ -56,9 +56,9 @@ Pandoc (Meta {docTitle = [Str ""], docAuthors = [[Str ""]], docDate = [Str ""]}) , Para [Str "This",Space,Str "is",Space,Emph [Str "emphasized"],Str ",",Space,Str "and",Space,Str "so",Space,Emph [Str "is",Space,Str "this"],Str ".",LineBreak,Str "This",Space,Str "is",Space,Strong [Str "strong"],Str ",",Space,Str "and",Space,Str "so",Space,Strong [Str "is",Space,Str "this"],Str ".",LineBreak,Str "A",Space,Link [Strong [Str "strong",Space,Str "link"]] ("http://www.foobar.com",""),Str "."] , Para [Emph [Strong [Str "This",Space,Str "is",Space,Str "strong",Space,Str "and",Space,Str "em",Str "."]],LineBreak,Str "So",Space,Str "is",Space,Strong [Emph [Str "this"]],Space,Str "word",Space,Str "and",Space,Emph [Strong [Str "that",Space,Str "one"]],Str ".",LineBreak,Strikeout [Str "This",Space,Str "is",Space,Str "strikeout",Space,Str "and",Space,Strong [Str "strong"]]] , Para [Str "Superscripts",Str ":",Space,Str "a",Superscript [Str "bc"],Str "d",Space,Str "a",Superscript [Strong [Str "hello"]],Space,Str "a",Superscript [Str "hello",Space,Str "there"],Str ".",LineBreak,Str "Subscripts",Str ":",Space,Str "H",Subscript [Str "2"],Str "O",Str ",",Space,Str "H",Subscript [Str "23"],Str "O",Str ",",Space,Str "H",Subscript [Str "many",Space,Str "of",Space,Str "them"],Str "O",Str "."] -, Para [Str "Dashes",Space,Str ":",Space,Str "How",Space,Str "cool",Space,Str "-",Str "-",Space,Str "automatic",Space,Str "dashes",Str "."] -, Para [Str "Elipses",Space,Str ":",Space,Str "He",Space,Str "thought",Space,Str "and",Space,Str "thought",Space,Str ".",Str ".",Str ".",Space,Str "and",Space,Str "then",Space,Str "thought",Space,Str "some",Space,Str "more",Str "."] -, Para [Str "Quotes",Space,Str "and",Space,Str "apostrophes",Space,Str ":",Space,Str "\"",Str "I",Str "'",Str "d",Space,Str "like",Space,Str "to",Space,Str "thank",Space,Str "you",Str "\"",Space,Str "for",Space,Str "example",Str "."] +, Para [Str "Dashes",Space,Str ":",Space,Str "How",Space,Str "cool",Space,EmDash,Space,Str "automatic",Space,Str "dashes",Str "."] +, Para [Str "Elipses",Space,Str ":",Space,Str "He",Space,Str "thought",Space,Str "and",Space,Str "thought",Space,Ellipses,Space,Str "and",Space,Str "then",Space,Str "thought",Space,Str "some",Space,Str "more",Str "."] +, Para [Str "Quotes",Space,Str "and",Space,Str "apostrophes",Space,Str ":",Space,Quoted DoubleQuote [Str "I",Apostrophe,Str "d",Space,Str "like",Space,Str "to",Space,Str "thank",Space,Str "you"],Space,Str "for",Space,Str "example",Str "."] , Header 1 [Str "Links"] , Header 2 [Str "Explicit"] , Para [Str "Just",Space,Str "a",Space,Link [Str "url"] ("http://www.url.com","")] @@ -82,7 +82,7 @@ Pandoc (Meta {docTitle = [Str ""], docAuthors = [[Str ""]], docDate = [Str ""]}) [ [ Plain [Str "bella"] ] , [ Plain [Str "45"] ] , [ Plain [Str "f"] ] ] ] -, Para [Str "and",Space,Str "some",Space,Str "text",Space,Str "following",Space,Str ".",Str ".",Str "."] +, Para [Str "and",Space,Str "some",Space,Str "text",Space,Str "following",Space,Ellipses] , Header 2 [Str "With",Space,Str "headers"] , Table [] [AlignDefault,AlignDefault,AlignDefault] [0.0,0.0,0.0] [ [ Plain [Str "name"] ] @@ -123,7 +123,7 @@ Pandoc (Meta {docTitle = [Str ""], docAuthors = [[Str ""]], docDate = [Str ""]}) , RawHtml "</div>" , Para [Str "as",Space,Str "well",Str "."] , BulletList - [ [ Plain [Str "this",Space,Str "<",Str "div",Str ">",Space,Str "won",Str "'",Str "t",Space,Str "produce",Space,Str "raw",Space,Str "html",Space,Str "blocks",Space,Str "<",Str "/div",Str ">"] ] + [ [ Plain [Str "this",Space,Str "<",Str "div",Str ">",Space,Str "won",Apostrophe,Str "t",Space,Str "produce",Space,Str "raw",Space,Str "html",Space,Str "blocks",Space,Str "<",Str "/div",Str ">"] ] , [ Plain [Str "but",Space,Str "this",Space,HtmlInline "<strong>",Space,Str "will",Space,Str "produce",Space,Str "inline",Space,Str "html",Space,HtmlInline "</strong>"] ] ] , Para [Str "Can",Space,Str "you",Space,Str "prove",Space,Str "that",Space,Str "2",Space,Str "<",Space,Str "3",Space,Str "?"] , Header 1 [Str "Acronyms",Space,Str "and",Space,Str "marks"] |