From a8e2199034679c07411c76c42ab1ffb52b170029 Mon Sep 17 00:00:00 2001 From: fiddlosopher Date: Wed, 15 Aug 2007 06:00:58 +0000 Subject: Major code cleanup in all modules. (Removed unneeded imports, reformatted, etc.) More major changes are documented below: + Removed Text.Pandoc.ParserCombinators and moved all its definitions to Text.Pandoc.Shared. + In Text.Pandoc.Shared: - Removed unneeded 'try' in blanklines. - Removed endsWith function and rewrote functions to use isSuffixOf instead. - Added >>~ combinator. - Rewrote stripTrailingNewlines, removeLeadingSpaces. + Moved Text.Pandoc.Entities -> Text.Pandoc.CharacterReferences. - Removed unneeded functions charToEntity, charToNumericalEntity. - Renamed functions using proper terminology (character references, not entities). decodeEntities -> decodeCharacterReferences, characterEntity -> characterReference. - Moved escapeStringToXML to Docbook writer, which is the only thing that uses it. - Removed old entity parser in HTML and Markdown readers; replaced with new charRef parser in Text.Pandoc.Shared. + Fixed accent bug in Text.Pandoc.Readers.LaTeX: \^{} now correctly parses as a '^' character. + Text.Pandoc.ASCIIMathML is no longer an exported module. git-svn-id: https://pandoc.googlecode.com/svn/trunk@835 788f1e2b-df1e-0410-8736-df70ead52e1b --- src/Text/Pandoc/Readers/Markdown.hs | 662 +++++++++++++++--------------------- 1 file changed, 273 insertions(+), 389 deletions(-) (limited to 'src/Text/Pandoc/Readers/Markdown.hs') diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 3ccb74ba7..80a8507b4 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -31,28 +31,24 @@ module Text.Pandoc.Readers.Markdown ( readMarkdown ) where -import Data.List ( findIndex, sortBy, transpose, isSuffixOf, intersect, lookup ) +import Data.List ( transpose, isSuffixOf, lookup, sortBy ) +import Data.Ord ( comparing ) import Data.Char ( isAlphaNum ) -import Text.Pandoc.ParserCombinators import Text.Pandoc.Definition -import Text.Pandoc.Readers.LaTeX ( rawLaTeXInline, rawLaTeXEnvironment ) import Text.Pandoc.Shared +import Text.Pandoc.Readers.LaTeX ( rawLaTeXInline, rawLaTeXEnvironment ) import Text.Pandoc.Readers.HTML ( rawHtmlBlock, anyHtmlBlockTag, anyHtmlInlineTag, anyHtmlTag, anyHtmlEndTag, htmlEndTag, extractTagType, htmlBlockElement ) -import Text.Pandoc.Entities ( characterEntity, decodeEntities ) +import Text.Pandoc.CharacterReferences ( decodeCharacterReferences ) import Text.ParserCombinators.Parsec -- | Read markdown from an input string and return a Pandoc document. readMarkdown :: ParserState -> String -> Pandoc readMarkdown state str = (readWith parseMarkdown) state (str ++ "\n\n") --- | Parse markdown string with default options and print result (for testing). -testString :: String -> IO () -testString = testStringWith parseMarkdown - -- -- Constants and data structure definitions -- @@ -70,19 +66,16 @@ specialChars = "\\[]*_~`<>$!^-.&'\"" -- auxiliary functions -- --- | Skip a single endline if there is one. -skipEndline = option Space endline - indentSpaces = try $ do state <- getState let tabStop = stateTabStop state try (count tabStop (char ' ')) <|> - (do{many (char ' '); string "\t"}) "indentation" + (many (char ' ') >> string "\t") "indentation" nonindentSpaces = do state <- getState let tabStop = stateTabStop state - choice (map (\n -> (try (count n (char ' ')))) (reverse [0..(tabStop - 1)])) + choice $ map (\n -> (try (count n (char ' ')))) $ reverse [0..(tabStop - 1)] -- | Fail unless we're at beginning of a line. failUnlessBeginningOfLine = do @@ -94,20 +87,21 @@ failUnlessSmart = do state <- getState if stateSmart state then return () else fail "Smart typography feature" +-- | Parse an inline Str element with a given content. +inlineString str = try $ do + (Str res) <- inline + if res == str then return res else fail $ "unexpected Str content" + -- | Parse a sequence of inline elements between a string -- @opener@ and a string @closer@, including inlines -- between balanced pairs of @opener@ and a @closer@. inlinesInBalanced :: String -> String -> GenParser Char ParserState [Inline] inlinesInBalanced opener closer = try $ do - let openerSymbol = try $ do - res <- inline - if res == Str opener - then return res - else pzero - try (string opener) - result <- manyTill ( (do lookAhead openerSymbol - bal <- inlinesInBalanced opener closer - return $ [Str opener] ++ bal ++ [Str closer]) + string opener + result <- manyTill ( (do lookAhead (inlineString opener) + -- because it might be a link... + bal <- inlinesInBalanced opener closer + return $ [Str opener] ++ bal ++ [Str closer]) <|> (count 1 inline)) (try (string closer)) return $ concat result @@ -116,59 +110,55 @@ inlinesInBalanced opener closer = try $ do -- document structure -- -titleLine = try (do - char '%' - skipSpaces - line <- manyTill inline newline - return line) +titleLine = try $ char '%' >> skipSpaces >> manyTill inline newline -authorsLine = try (do +authorsLine = try $ do char '%' skipSpaces authors <- sepEndBy (many1 (noneOf ",;\n")) (oneOf ",;") newline - return (map (decodeEntities . removeLeadingTrailingSpace) authors)) + return $ map (decodeCharacterReferences . removeLeadingTrailingSpace) authors -dateLine = try (do +dateLine = try $ do char '%' skipSpaces date <- many (noneOf "\n") newline - return (decodeEntities $ removeTrailingSpace date)) + return $ decodeCharacterReferences $ removeTrailingSpace date -titleBlock = try (do +titleBlock = try $ do failIfStrict title <- option [] titleLine author <- option [] authorsLine date <- option "" dateLine - option "" blanklines - return (title, author, date)) + optional blanklines + return (title, author, date) parseMarkdown = do - updateState (\state -> state { stateParseRaw = True }) -- markdown allows raw HTML + -- markdown allows raw HTML + updateState (\state -> state { stateParseRaw = True }) (title, author, date) <- option ([],[],"") titleBlock -- go through once just to get list of reference keys - refs <- manyTill (referenceKey <|> (do l <- lineClump - return (LineClump l))) eof + refs <- manyTill (referenceKey <|> (lineClump >>= return . LineClump)) eof let keys = map (\(KeyBlock label target) -> (label, target)) $ filter isKeyBlock refs let rawlines = map (\(LineClump ln) -> ln) $ filter isLineClump refs setInput $ concat rawlines -- with keys stripped out updateState (\state -> state { stateKeys = keys }) - -- now go through for notes - refs <- manyTill (noteBlock <|> (do l <- lineClump - return (LineClump l))) eof + -- now go through for notes (which may contain references - hence 2nd pass) + refs <- manyTill (noteBlock <|> (lineClump >>= return . LineClump)) eof let notes = map (\(NoteBlock label blocks) -> (label, blocks)) $ filter isNoteBlock refs let rawlines = map (\(LineClump ln) -> ln) $ filter isLineClump refs - setInput $ concat rawlines -- with note blocks and keys stripped out + -- go through a 3rd time, with note blocks and keys stripped out + setInput $ concat rawlines updateState (\state -> state { stateNotes = notes }) - blocks <- parseBlocks -- go through again, for real + blocks <- parseBlocks let blocks' = filter (/= Null) blocks - return (Pandoc (Meta title author date) blocks') + return $ Pandoc (Meta title author date) blocks' -- --- initial pass for references +-- initial pass for references and notes -- referenceKey = try $ do @@ -176,9 +166,9 @@ referenceKey = try $ do label <- reference char ':' skipSpaces - option ' ' (char '<') + optional (char '<') src <- many (noneOf "> \n\t") - option ' ' (char '>') + optional (char '>') tit <- option "" title blanklines return $ KeyBlock label (removeTrailingSpace src, tit) @@ -189,33 +179,28 @@ noteMarker = try $ do manyTill (noneOf " \t\n") (char ']') rawLine = try $ do - notFollowedBy' blankline + notFollowedBy blankline notFollowedBy' noteMarker contents <- many1 nonEndline - end <- option "" (do - newline - option "" indentSpaces - return "\n") - return (contents ++ end) + end <- option "" (newline >> optional indentSpaces >> return "\n") + return $ contents ++ end -rawLines = do - lines <- many1 rawLine - return (concat lines) +rawLines = many1 rawLine >>= return . concat noteBlock = try $ do failIfStrict ref <- noteMarker char ':' - option ' ' blankline - option "" indentSpaces - raw <- sepBy rawLines (try (do {blankline; indentSpaces})) - option "" blanklines + optional blankline + optional indentSpaces + raw <- sepBy rawLines (try (blankline >> indentSpaces)) + optional blanklines -- parse the extracted text, which may contain various block elements: rest <- getInput setInput $ (joinWithSep "\n" raw) ++ "\n\n" contents <- parseBlocks setInput rest - return (NoteBlock ref contents) + return $ NoteBlock ref contents -- -- parsing blocks @@ -239,48 +224,39 @@ block = choice [ header -- header blocks -- -header = choice [ setextHeader, atxHeader ] "header" +header = setextHeader <|> atxHeader "header" -atxHeader = try (do +atxHeader = try $ do lead <- many1 (char '#') - notFollowedBy (char '.') -- this would be a list - notFollowedBy (char ')') + notFollowedBy (char '.' <|> char ')') -- this would be a list skipSpaces txt <- manyTill inline atxClosing - return (Header (length lead) (normalizeSpaces txt))) + return $ Header (length lead) (normalizeSpaces txt) -atxClosing = try (do - skipMany (char '#') - skipSpaces - newline - option "" blanklines) +atxClosing = try $ skipMany (char '#') >> skipSpaces >> newline >> + option "" blanklines setextHeader = choice $ - map (\x -> setextH x) (enumFromTo 1 (length setextHChars)) + map (\x -> setextH x) $ enumFromTo 1 (length setextHChars) -setextH n = try (do +setextH n = try $ do txt <- many1Till inline newline many1 (char (setextHChars !! (n-1))) skipSpaces newline - option "" blanklines - return (Header n (normalizeSpaces txt))) + optional blanklines + return $ Header n (normalizeSpaces txt) -- -- hrule block -- -hruleWith chr = try (do - skipSpaces - char chr - skipSpaces - char chr - skipSpaces - char chr - skipMany (oneOf (chr:spaceChars)) +hruleWith chr = try $ do + count 3 (skipSpaces >> char chr) + skipMany (skipSpaces >> char chr) newline - option "" blanklines - return HorizontalRule) + optional blanklines + return HorizontalRule hrule = choice (map hruleWith hruleChars) "hrule" @@ -288,67 +264,55 @@ hrule = choice (map hruleWith hruleChars) "hrule" -- code blocks -- -indentedLine = try (do +indentedLine = try $ do indentSpaces result <- manyTill anyChar newline - return (result ++ "\n")) + return $ result ++ "\n" -- two or more indented lines, possibly separated by blank lines -indentedBlock = try (do +indentedBlock = try $ do res1 <- indentedLine blanks <- many blankline - res2 <- choice [indentedBlock, indentedLine] - return (res1 ++ blanks ++ res2)) + res2 <- indentedBlock <|> indentedLine + return $ res1 ++ blanks ++ res2 -codeBlock = do - result <- choice [indentedBlock, indentedLine] - option "" blanklines - return (CodeBlock (stripTrailingNewlines result)) +codeBlock = (indentedBlock <|> indentedLine) >>~ optional blanklines >>= + return . CodeBlock . stripTrailingNewlines -- -- block quotes -- -emacsBoxQuote = try (do +emacsBoxQuote = try $ do failIfStrict string ",----" manyTill anyChar newline - raw <- manyTill (try (do - char '|' - option ' ' (char ' ') - result <- manyTill anyChar newline - return result)) - (string "`----") - manyTill anyChar newline - option "" blanklines - return raw) + raw <- manyTill + (try (char '|' >> optional (char ' ') >> manyTill anyChar newline)) + (try (string "`----")) + blanklines + return raw -emailBlockQuoteStart = try (do - nonindentSpaces - char '>' - option ' ' (char ' ') - return "> ") +emailBlockQuoteStart = try $ nonindentSpaces >> char '>' >>~ optional (char ' ') -emailBlockQuote = try (do +emailBlockQuote = try $ do emailBlockQuoteStart - raw <- sepBy (many (choice [nonEndline, - (try (do - endline - notFollowedBy' emailBlockQuoteStart - return '\n'))])) - (try (do {newline; emailBlockQuoteStart})) - newline <|> (do{ eof; return '\n' }) - option "" blanklines - return raw) + raw <- sepBy (many (nonEndline <|> + (try (endline >> notFollowedBy emailBlockQuoteStart >> + return '\n')))) + (try (newline >> emailBlockQuoteStart)) + newline <|> (eof >> return '\n') + optional blanklines + return raw blockQuote = do - raw <- choice [ emailBlockQuote, emacsBoxQuote ] + raw <- emailBlockQuote <|> emacsBoxQuote -- parse the extracted block, which may contain various block elements: rest <- getInput setInput $ (joinWithSep "\n" raw) ++ "\n\n" contents <- parseBlocks setInput rest - return (BlockQuote contents) + return $ BlockQuote contents -- -- list blocks @@ -357,7 +321,7 @@ blockQuote = do list = choice [ bulletList, orderedList, definitionList ] "list" bulletListStart = try $ do - option ' ' newline -- if preceded by a Plain block in a list context + optional newline -- if preceded by a Plain block in a list context nonindentSpaces notFollowedBy' hrule -- because hrules start out just like lists oneOf bulletListMarkers @@ -365,7 +329,7 @@ bulletListStart = try $ do skipSpaces anyOrderedListStart = try $ do - option ' ' newline -- if preceded by a Plain block in a list context + optional newline -- if preceded by a Plain block in a list context nonindentSpaces state <- getState if stateStrict state @@ -375,7 +339,7 @@ anyOrderedListStart = try $ do else anyOrderedListMarker orderedListStart style delim = try $ do - option ' ' newline -- if preceded by a Plain block in a list context + optional newline -- if preceded by a Plain block in a list context nonindentSpaces state <- getState if stateStrict state @@ -387,40 +351,39 @@ orderedListStart style delim = try $ do skipSpaces -- parse a line of a list item (start = parser for beginning of list item) -listLine start = try (do +listLine start = try $ do notFollowedBy' start notFollowedBy blankline - notFollowedBy' (do - indentSpaces - many (spaceChar) - choice [bulletListStart, anyOrderedListStart >> return ()]) + notFollowedBy' (do indentSpaces + many (spaceChar) + bulletListStart <|> (anyOrderedListStart >> return ())) line <- manyTill anyChar newline - return (line ++ "\n")) + return $ line ++ "\n" -- parse raw text for one list item, excluding start marker and continuations -rawListItem start = try (do +rawListItem start = try $ do start result <- many1 (listLine start) blanks <- many blankline - return ((concat result) ++ blanks)) + return $ concat result ++ blanks -- continuation of a list item - indented and separated by blankline -- or (in compact lists) endline. -- note: nested lists are parsed as continuations -listContinuation start = try (do +listContinuation start = try $ do lookAhead indentSpaces result <- many1 (listContinuationLine start) blanks <- many blankline - return ((concat result) ++ blanks)) + return $ concat result ++ blanks -listContinuationLine start = try (do - notFollowedBy' blankline +listContinuationLine start = try $ do + notFollowedBy blankline notFollowedBy' start - option "" indentSpaces + optional indentSpaces result <- manyTill anyChar newline - return (result ++ "\n")) + return $ result ++ "\n" -listItem start = try (do +listItem start = try $ do first <- rawListItem start continuations <- many (listContinuation start) -- parsing with ListItemState forces markers at beginning of lines to @@ -436,18 +399,15 @@ listItem start = try (do contents <- parseBlocks setInput rest updateState (\st -> st {stateParserContext = oldContext}) - return contents) + return contents -orderedList = try (do +orderedList = do (start, style, delim) <- lookAhead anyOrderedListStart items <- many1 (listItem (orderedListStart style delim)) - let items' = compactify items - return (OrderedList (start, style, delim) items')) + return $ OrderedList (start, style, delim) $ compactify items -bulletList = try (do - items <- many1 (listItem bulletListStart) - let items' = compactify items - return (BulletList items')) +bulletList = many1 (listItem bulletListStart) >>= + return . BulletList . compactify -- definition lists @@ -470,9 +430,9 @@ defRawBlock = try $ do char ':' state <- getState let tabStop = stateTabStop state - try (count (tabStop - 1) (char ' ')) <|> (do{many (char ' '); string "\t"}) + try (count (tabStop - 1) (char ' ')) <|> (many (char ' ') >> string "\t") firstline <- anyLine - rawlines <- many (do {notFollowedBy' blankline; indentSpaces; anyLine}) + rawlines <- many (notFollowedBy blankline >> indentSpaces >> anyLine) trailing <- option "" blanklines return $ firstline ++ "\n" ++ unlines rawlines ++ trailing @@ -488,71 +448,62 @@ definitionList = do -- paragraph block -- -para = try (do +para = try $ do result <- many1 inline newline st <- getState if stateStrict st - then choice [lookAhead blockQuote, lookAhead header, - (do{blanklines; return Null})] - else choice [(do{lookAhead emacsBoxQuote; return Null}), - (do{blanklines; return Null})] - let result' = normalizeSpaces result - return (Para result')) - -plain = do - result <- many1 inline - let result' = normalizeSpaces result - return (Plain result') + then choice [ lookAhead blockQuote, lookAhead header, + (blanklines >> return Null) ] + else choice [ lookAhead emacsBoxQuote >> return Null, + (blanklines >> return Null) ] + return $ Para $ normalizeSpaces result + +plain = many1 inline >>= return . Plain . normalizeSpaces -- -- raw html -- -htmlElement = choice [strictHtmlBlock, - htmlBlockElement] "html element" +htmlElement = strictHtmlBlock <|> htmlBlockElement "html element" htmlBlock = do st <- getState if stateStrict st - then do - failUnlessBeginningOfLine - first <- htmlElement - finalSpace <- many (oneOf spaceChars) - finalNewlines <- many newline - return (RawHtml (first ++ finalSpace ++ finalNewlines)) + then try $ do failUnlessBeginningOfLine + first <- htmlElement + finalSpace <- many (oneOf spaceChars) + finalNewlines <- many newline + return $ RawHtml $ first ++ finalSpace ++ finalNewlines else rawHtmlBlocks -- True if tag is self-closing isSelfClosing tag = isSuffixOf "/>" $ filter (\c -> (not (c `elem` " \n\t"))) tag -strictHtmlBlock = try (do +strictHtmlBlock = try $ do tag <- anyHtmlBlockTag let tag' = extractTagType tag if isSelfClosing tag || tag' == "hr" then return tag - else do - contents <- many (do{notFollowedBy' (htmlEndTag tag'); - htmlElement <|> (count 1 anyChar)}) - end <- htmlEndTag tag' - return $ tag ++ (concat contents) ++ end) + else do contents <- many (notFollowedBy' (htmlEndTag tag') >> + (htmlElement <|> (count 1 anyChar))) + end <- htmlEndTag tag' + return $ tag ++ concat contents ++ end -rawHtmlBlocks = try (do +rawHtmlBlocks = try $ do htmlBlocks <- many1 rawHtmlBlock let combined = concatMap (\(RawHtml str) -> str) htmlBlocks - let combined' = if (last combined == '\n') + let combined' = if not (null combined) && last combined == '\n' then init combined -- strip extra newline else combined - return (RawHtml combined')) + return $ RawHtml combined' -- -- LaTeX -- -rawLaTeXEnvironment' = do - failIfStrict - rawLaTeXEnvironment +rawLaTeXEnvironment' = failIfStrict >> rawLaTeXEnvironment -- -- Tables @@ -560,54 +511,46 @@ rawLaTeXEnvironment' = do -- Parse a dashed line with optional trailing spaces; return its length -- and the length including trailing space. -dashedLine ch = do - dashes <- many1 (char ch) - sp <- many spaceChar - return $ (length dashes, length $ dashes ++ sp) +dashedLine ch = try $ do + dashes <- many1 (char ch) + sp <- many spaceChar + return $ (length dashes, length $ dashes ++ sp) -- Parse a table header with dashed lines of '-' preceded by -- one line of text. -simpleTableHeader = do - rawContent <- anyLine - initSp <- nonindentSpaces - dashes <- many1 (dashedLine '-') - newline - let (lengths, lines) = unzip dashes - let indices = scanl (+) (length initSp) lines - let rawHeads = tail $ splitByIndices (init indices) rawContent - let aligns = zipWith alignType (map (\a -> [a]) rawHeads) lengths - return $ (rawHeads, aligns, indices) +simpleTableHeader = try $ do + rawContent <- anyLine + initSp <- nonindentSpaces + dashes <- many1 (dashedLine '-') + newline + let (lengths, lines) = unzip dashes + let indices = scanl (+) (length initSp) lines + let rawHeads = tail $ splitByIndices (init indices) rawContent + let aligns = zipWith alignType (map (\a -> [a]) rawHeads) lengths + return (rawHeads, aligns, indices) -- Parse a table footer - dashed lines followed by blank line. -tableFooter = try $ do - nonindentSpaces - many1 (dashedLine '-') - blanklines +tableFooter = try $ nonindentSpaces >> many1 (dashedLine '-') >> blanklines -- Parse a table separator - dashed line. -tableSep = try $ do - nonindentSpaces - many1 (dashedLine '-') - string "\n" +tableSep = try $ nonindentSpaces >> many1 (dashedLine '-') >> string "\n" -- Parse a raw line and split it into chunks by indices. rawTableLine indices = do - notFollowedBy' (blanklines <|> tableFooter) - line <- many1Till anyChar newline - return $ map removeLeadingTrailingSpace $ tail $ - splitByIndices (init indices) line + notFollowedBy' (blanklines <|> tableFooter) + line <- many1Till anyChar newline + return $ map removeLeadingTrailingSpace $ tail $ + splitByIndices (init indices) line -- Parse a table line and return a list of lists of blocks (columns). -tableLine indices = try $ do - rawline <- rawTableLine indices - mapM (parseFromString (many plain)) rawline +tableLine indices = rawTableLine indices >>= mapM (parseFromString (many plain)) -- Parse a multiline table row and return a list of blocks (columns). multilineRow indices = try $ do - colLines <- many1 (rawTableLine indices) - option "" blanklines - let cols = map unlines $ transpose colLines - mapM (parseFromString (many plain)) cols + colLines <- many1 (rawTableLine indices) + optional blanklines + let cols = map unlines $ transpose colLines + mapM (parseFromString (many plain)) cols -- Calculate relative widths of table columns, based on indices widthsFromIndices :: Int -- Number of columns on terminal @@ -615,22 +558,22 @@ widthsFromIndices :: Int -- Number of columns on terminal -> [Float] -- Fractional relative sizes of columns widthsFromIndices _ [] = [] widthsFromIndices numColumns indices = - let lengths = zipWith (-) indices (0:indices) - totLength = sum lengths - quotient = if totLength > numColumns - then fromIntegral totLength - else fromIntegral numColumns - fracs = map (\l -> (fromIntegral l) / quotient) lengths in - tail fracs + let lengths = zipWith (-) indices (0:indices) + totLength = sum lengths + quotient = if totLength > numColumns + then fromIntegral totLength + else fromIntegral numColumns + fracs = map (\l -> (fromIntegral l) / quotient) lengths in + tail fracs -- Parses a table caption: inlines beginning with 'Table:' -- and followed by blank lines. tableCaption = try $ do - nonindentSpaces - string "Table:" - result <- many1 inline - blanklines - return $ normalizeSpaces result + nonindentSpaces + string "Table:" + result <- many1 inline + blanklines + return $ normalizeSpaces result -- Parse a table using 'headerParser', 'lineParser', and 'footerParser'. tableWith headerParser lineParser footerParser = try $ do @@ -653,30 +596,19 @@ simpleTable = tableWith simpleTableHeader tableLine blanklines multilineTable = tableWith multilineTableHeader multilineRow tableFooter multilineTableHeader = try $ do - tableSep - rawContent <- many1 (do{notFollowedBy' tableSep; - many1Till anyChar newline}) - initSp <- nonindentSpaces - dashes <- many1 (dashedLine '-') - newline - let (lengths, lines) = unzip dashes - let indices = scanl (+) (length initSp) lines - let rawHeadsList = transpose $ map - (\ln -> tail $ splitByIndices (init indices) ln) - rawContent - let rawHeads = map (joinWithSep " ") rawHeadsList - let aligns = zipWith alignType rawHeadsList lengths - return $ ((map removeLeadingTrailingSpace rawHeads), - aligns, indices) - --- Returns the longest of a list of strings. -longest :: [String] -> String -longest [] = "" -longest [x] = x -longest (x:xs) = - if (length x) >= (maximum $ map length xs) - then x - else longest xs + tableSep + rawContent <- many1 (notFollowedBy' tableSep >> many1Till anyChar newline) + initSp <- nonindentSpaces + dashes <- many1 (dashedLine '-') + newline + let (lengths, lines) = unzip dashes + let indices = scanl (+) (length initSp) lines + let rawHeadsList = transpose $ map + (\ln -> tail $ splitByIndices (init indices) ln) + rawContent + let rawHeads = map (joinWithSep " ") rawHeadsList + let aligns = zipWith alignType rawHeadsList lengths + return ((map removeLeadingTrailingSpace rawHeads), aligns, indices) -- Returns an alignment type for a table, based on a list of strings -- (the rows of the column header) and a number (the length of the @@ -684,19 +616,17 @@ longest (x:xs) = alignType :: [String] -> Int -> Alignment alignType [] len = AlignDefault alignType strLst len = - let str = longest $ map removeTrailingSpace strLst - leftSpace = if null str then False else ((str !! 0) `elem` " \t") - rightSpace = (length str < len || (str !! (len - 1)) `elem` " \t") in - case (leftSpace, rightSpace) of + let str = head $ sortBy (comparing length) $ + map removeTrailingSpace strLst + leftSpace = if null str then False else (str !! 0) `elem` " \t" + rightSpace = length str < len || (str !! (len - 1)) `elem` " \t" + in case (leftSpace, rightSpace) of (True, False) -> AlignRight (False, True) -> AlignLeft - (True, True) -> AlignCenter + (True, True) -> AlignCenter (False, False) -> AlignDefault -table = do - failIfStrict - result <- simpleTable <|> multilineTable "table" - return result +table = failIfStrict >> (simpleTable <|> multilineTable) "table" -- -- inline @@ -704,7 +634,7 @@ table = do inline = choice [ rawLaTeXInline' , escapedChar - , entity + , charRef , note , inlineNote , link @@ -734,80 +664,64 @@ escapedChar = try $ do result <- if stateStrict state then oneOf "\\`*_{}[]()>#+-.!~" else satisfy (not . isAlphaNum) - return (Str [result]) + return $ Str [result] -ltSign = try (do +ltSign = try $ do notFollowedBy (noneOf "<") -- continue only if it's a < notFollowedBy' rawHtmlBlocks -- don't return < if it starts html char '<' - return (Str ['<'])) + return $ Str ['<'] specialCharsMinusLt = filter (/= '<') specialChars symbol = do result <- oneOf specialCharsMinusLt - return (Str [result]) + return $ Str [result] -- parses inline code, between n `s and n `s -code = try (do +code = try $ do starts <- many1 (char '`') let num = length starts result <- many1Till anyChar (try (count num (char '`'))) -- get rid of any internal newlines - let result' = removeLeadingTrailingSpace $ joinWithSep " " $ lines result - return (Code result')) + return $ Code $ removeLeadingTrailingSpace $ joinWithSep " " $ lines result -mathWord = many1 (choice [ (noneOf " \t\n\\$"), - (try (do - c <- char '\\' - notFollowedBy (char '$') - return c))]) +mathWord = many1 ((noneOf " \t\n\\$") <|> + (try (char '\\') >>~ notFollowedBy (char '$'))) -math = try (do +math = try $ do failIfStrict char '$' notFollowedBy space words <- sepBy1 mathWord (many1 space) char '$' - return (TeX ("$" ++ (joinWithSep " " words) ++ "$"))) + return $ TeX ("$" ++ (joinWithSep " " words) ++ "$") -emph = do - result <- choice [ (enclosed (char '*') (char '*') inline), - (enclosed (char '_') (char '_') inline) ] - return $ Emph (normalizeSpaces result) +emph = ((enclosed (char '*') (char '*') inline) <|> + (enclosed (char '_') (char '_') inline)) >>= + return . Emph . normalizeSpaces -strong = do - result <- (enclosed (string "**") (string "**") inline) <|> - (enclosed (string "__") (string "__") inline) - return $ Strong (normalizeSpaces result) +strong = ((enclosed (string "**") (string "**") inline) <|> + (enclosed (string "__") (string "__") inline)) >>= + return . Strong . normalizeSpaces -strikeout = do - failIfStrict - result <- enclosed (string "~~") (string "~~") inline - return $ Strikeout (normalizeSpaces result) +strikeout = failIfStrict >> enclosed (string "~~") (string "~~") inline >>= + return . Strikeout . normalizeSpaces -superscript = do - failIfStrict - result <- enclosed (char '^') (char '^') - (notFollowedBy' whitespace >> inline) -- may not contain Space - return $ Superscript result +superscript = failIfStrict >> enclosed (char '^') (char '^') + (notFollowedBy' whitespace >> inline) >>= -- may not contain Space + return . Superscript -subscript = do - failIfStrict - result <- enclosed (char '~') (char '~') - (notFollowedBy' whitespace >> inline) -- may not contain Space - return $ Subscript result +subscript = failIfStrict >> enclosed (char '~') (char '~') + (notFollowedBy' whitespace >> inline) >>= -- may not contain Space + return . Subscript -smartPunctuation = do - failUnlessSmart - choice [ quoted, apostrophe, dash, ellipses ] +smartPunctuation = failUnlessSmart >> + choice [ quoted, apostrophe, dash, ellipses ] -apostrophe = do - char '\'' <|> char '\8217' - return Apostrophe +apostrophe = (char '\'' <|> char '\8217') >> return Apostrophe -quoted = do - doubleQuoted <|> singleQuoted +quoted = doubleQuoted <|> singleQuoted withQuoteContext context parser = do oldState <- getState @@ -820,15 +734,13 @@ withQuoteContext context parser = do singleQuoted = try $ do singleQuoteStart - withQuoteContext InSingleQuote $ do - result <- many1Till inline singleQuoteEnd - return $ Quoted SingleQuote $ normalizeSpaces result + withQuoteContext InSingleQuote $ many1Till inline singleQuoteEnd >>= + return . Quoted SingleQuote . normalizeSpaces doubleQuoted = try $ do doubleQuoteStart - withQuoteContext InDoubleQuote $ do - result <- many1Till inline doubleQuoteEnd - return $ Quoted DoubleQuote $ normalizeSpaces result + withQuoteContext InDoubleQuote $ many1Till inline doubleQuoteEnd >>= + return . Quoted DoubleQuote . normalizeSpaces failIfInQuoteContext context = do st <- getState @@ -836,88 +748,65 @@ failIfInQuoteContext context = do then fail "already inside quotes" else return () -singleQuoteStart = try $ do +singleQuoteStart = do failIfInQuoteContext InSingleQuote - char '\8216' <|> do - char '\'' - notFollowedBy (oneOf ")!],.;:-? \t\n") - notFollowedBy (try (do -- possessive or contraction - oneOfStrings ["s","t","m","ve","ll","re"] - satisfy (not . isAlphaNum))) - return '\'' - -singleQuoteEnd = try $ do - char '\'' <|> char '\8217' - notFollowedBy alphaNum - -doubleQuoteStart = try $ do - failIfInQuoteContext InDoubleQuote - char '"' <|> char '\8220' - notFollowedBy (oneOf " \t\n") + char '\8216' <|> + do char '\'' + notFollowedBy (oneOf ")!],.;:-? \t\n") + notFollowedBy (try (oneOfStrings ["s","t","m","ve","ll","re"] >> + satisfy (not . isAlphaNum))) -- possess/contraction + return '\'' + +singleQuoteEnd = (char '\'' <|> char '\8217') >> notFollowedBy alphaNum + +doubleQuoteStart = failIfInQuoteContext InDoubleQuote >> + (char '"' <|> char '\8220') >> + notFollowedBy (oneOf " \t\n") doubleQuoteEnd = char '"' <|> char '\8221' -ellipses = try (do - oneOfStrings ["...", " . . . ", ". . .", " . . ."] - return Ellipses) +ellipses = try $ oneOfStrings ["...", " . . . ", ". . .", " . . ."] >> + return Ellipses dash = enDash <|> emDash -enDash = try (do - char '-' - notFollowedBy (noneOf "0123456789") - return EnDash) +enDash = try $ char '-' >> notFollowedBy (noneOf "0123456789") >> return EnDash -emDash = try (do - skipSpaces - oneOfStrings ["---", "--"] - skipSpaces - return EmDash) +emDash = try $ skipSpaces >> oneOfStrings ["---", "--"] >> + skipSpaces >> return EmDash -whitespace = do - many1 (oneOf spaceChars) "whitespace" - return Space +whitespace = (many1 (oneOf spaceChars) >> return Space) "whitespace" -tabchar = do - tab - return (Str "\t") +tabchar = tab >> return (Str "\t") -- hard line break -linebreak = try (do - oneOf spaceChars - many1 (oneOf spaceChars) - endline - return LineBreak ) +linebreak = try $ oneOf spaceChars >> many1 (oneOf spaceChars) >> + endline >> return LineBreak nonEndline = satisfy (/='\n') -entity = do - ent <- characterEntity - return $ Str [ent] - strChar = noneOf (specialChars ++ spaceChars ++ "\n") -str = do - result <- many1 strChar - return (Str result) +str = many1 strChar >>= return . Str -- an endline character that can be treated as a space, not a structural break -endline = try (do +endline = try $ do newline notFollowedBy blankline st <- getState if stateStrict st then do - notFollowedBy' emailBlockQuoteStart + notFollowedBy emailBlockQuoteStart notFollowedBy (char '#') -- atx header - notFollowedBy (try (do{manyTill anyChar newline; - oneOf setextHChars})) -- setext header + notFollowedBy (manyTill anyChar newline >> oneOf setextHChars) + -- setext header else return () -- parse potential list-starts differently if in a list: - if (stateParserContext st) == ListItemState - then notFollowedBy' $ choice [bulletListStart, anyOrderedListStart >> return ()] + if stateParserContext st == ListItemState + then notFollowedBy' (bulletListStart <|> + (anyOrderedListStart >> return ())) else return () - return Space) + return Space -- -- links @@ -930,24 +819,23 @@ reference = notFollowedBy' (string "[^") >> -- footnote reference -- source for a link, with optional title source = try $ do char '(' - option ' ' (char '<') + optional (char '<') src <- many (noneOf ")> \t\n") - option ' ' (char '>') + optional (char '>') tit <- option "" title skipSpaces char ')' return (removeTrailingSpace src, tit) -titleWith startChar endChar = try (do +titleWith startChar endChar = try $ do leadingSpace <- many1 (oneOf " \t\n") if length (filter (=='\n') leadingSpace) > 1 then fail "title must be separated by space and on same or next line" else return () char startChar - tit <- manyTill anyChar (try (do char endChar - skipSpaces - notFollowedBy (noneOf ")\n"))) - return $ decodeEntities tit) + tit <- manyTill anyChar (try (char endChar >> skipSpaces >> + notFollowedBy (noneOf ")\n"))) + return $ decodeCharacterReferences tit title = choice [ titleWith '(' ')', titleWith '"' '"', @@ -955,22 +843,20 @@ title = choice [ titleWith '(' ')', link = choice [explicitLink, referenceLink] "link" -explicitLink = try (do +explicitLink = try $ do label <- reference src <- source - return (Link label src)) + return $ Link label src -- a link like [this][ref] or [this][] or [this] referenceLink = try $ do label <- reference - ref <- option [] (try (do skipSpaces - option ' ' newline - skipSpaces - reference)) + ref <- option [] (try (skipSpaces >> optional newline >> + skipSpaces >> reference)) let ref' = if null ref then label else ref state <- getState case lookupKeySrc (stateKeys state) ref' of - Nothing -> fail "no corresponding key" + Nothing -> fail "no corresponding key" Just target -> return (Link label target) autoLink = autoLinkEmail <|> autoLinkRegular @@ -992,10 +878,10 @@ autoLinkRegular = try $ do let src = prot ++ rest return $ Link [Code src] (src, "") -image = try (do +image = try $ do char '!' (Link label src) <- link - return (Image label src)) + return $ Image label src note = try $ do failIfStrict @@ -1003,23 +889,21 @@ note = try $ do state <- getState let notes = stateNotes state case lookup ref notes of - Nothing -> fail "note not found" - Just contents -> return (Note contents) + Nothing -> fail "note not found" + Just contents -> return $ Note contents inlineNote = try $ do failIfStrict char '^' contents <- inlinesInBalanced "[" "]" - return (Note [Para contents]) + return $ Note [Para contents] -rawLaTeXInline' = do - failIfStrict - rawLaTeXInline +rawLaTeXInline' = failIfStrict >> rawLaTeXInline rawHtmlInline' = do st <- getState - result <- if stateStrict st - then choice [htmlBlockElement, anyHtmlTag, anyHtmlEndTag] - else choice [htmlBlockElement, anyHtmlInlineTag] - return (HtmlInline result) + result <- choice $ if stateStrict st + then [htmlBlockElement, anyHtmlTag, anyHtmlEndTag] + else [htmlBlockElement, anyHtmlInlineTag] + return $ HtmlInline result -- cgit v1.2.3