From 5a71632d11f86cb6ed700fe3a4d717a56bad1b9e Mon Sep 17 00:00:00 2001 From: Herwig Stuetz Date: Tue, 23 May 2017 23:21:51 +0200 Subject: Parsing: `many1Till`: Check for the end condition before parsing By not checking for the end condition before the first parse, the parser was applied too often, consuming too much of the input. This fixes the behaviour of `testStringWith (many1Till (oneOf "ab") (string "aa")) "aaa"` which before incorrectly returned `Right "a"`. With this change, it instead correctly fails with `Left (PandocParsecError ...)` because it is not able to parse at least one occurence of `oneOf "ab"` that is not `"aa"`. Note that this only affects `many1Till p end` where `p` matches on a prefix of `end`. --- src/Text/Pandoc/Parsing.hs | 5 +++-- src/Text/Pandoc/Readers/Org/Inlines.hs | 4 ++-- src/Text/Pandoc/Readers/TWiki.hs | 4 ++-- src/Text/Pandoc/Readers/Textile.hs | 2 +- 4 files changed, 8 insertions(+), 7 deletions(-) diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs index 225796272..cd51bff69 100644 --- a/src/Text/Pandoc/Parsing.hs +++ b/src/Text/Pandoc/Parsing.hs @@ -274,11 +274,12 @@ indentWith num = do , try (char '\t' >> indentWith (num - tabStop)) ] -- | Like @manyTill@, but reads at least one item. -many1Till :: Stream s m t +many1Till :: (Show end, Stream s m t) => ParserT s st m a -> ParserT s st m end -> ParserT s st m [a] many1Till p end = do + notFollowedBy' end first <- p rest <- manyTill p end return (first:rest) @@ -343,7 +344,7 @@ blanklines :: Stream s m Char => ParserT s st m [Char] blanklines = many1 blankline -- | Parses material enclosed between start and end parsers. -enclosed :: Stream s m Char => ParserT s st m t -- ^ start parser +enclosed :: (Show end, Stream s m Char) => ParserT s st m t -- ^ start parser -> ParserT s st m end -- ^ end parser -> ParserT s st m a -- ^ content parser (to be used repeatedly) -> ParserT s st m [a] diff --git a/src/Text/Pandoc/Readers/Org/Inlines.hs b/src/Text/Pandoc/Readers/Org/Inlines.hs index aa376fe25..6946e8379 100644 --- a/src/Text/Pandoc/Readers/Org/Inlines.hs +++ b/src/Text/Pandoc/Readers/Org/Inlines.hs @@ -687,13 +687,13 @@ mathEnd c = try $ do return res -enclosedInlines :: PandocMonad m => OrgParser m a +enclosedInlines :: (PandocMonad m, Show b) => OrgParser m a -> OrgParser m b -> OrgParser m (F Inlines) enclosedInlines start end = try $ trimInlinesF . mconcat <$> enclosed start end inline -enclosedRaw :: PandocMonad m => OrgParser m a +enclosedRaw :: (PandocMonad m, Show b) => OrgParser m a -> OrgParser m b -> OrgParser m String enclosedRaw start end = try $ diff --git a/src/Text/Pandoc/Readers/TWiki.hs b/src/Text/Pandoc/Readers/TWiki.hs index aea55b7a9..fcb95fc35 100644 --- a/src/Text/Pandoc/Readers/TWiki.hs +++ b/src/Text/Pandoc/Readers/TWiki.hs @@ -349,13 +349,13 @@ linebreak = newline >> notFollowedBy newline >> (lastNewline <|> innerNewline) where lastNewline = eof >> return mempty innerNewline = return B.space -between :: (Monoid c, PandocMonad m) +between :: (Monoid c, PandocMonad m, Show b) => TWParser m a -> TWParser m b -> (TWParser m b -> TWParser m c) -> TWParser m c between start end p = mconcat <$> try (start >> notFollowedBy whitespace >> many1Till (p end) end) -enclosed :: (Monoid b, PandocMonad m) +enclosed :: (Monoid b, PandocMonad m, Show a) => TWParser m a -> (TWParser m a -> TWParser m b) -> TWParser m b enclosed sep p = between sep (try $ sep <* endMarker) p where diff --git a/src/Text/Pandoc/Readers/Textile.hs b/src/Text/Pandoc/Readers/Textile.hs index 52f4f2493..0b964dd63 100644 --- a/src/Text/Pandoc/Readers/Textile.hs +++ b/src/Text/Pandoc/Readers/Textile.hs @@ -692,7 +692,7 @@ langAttr = do return $ \(id',classes,keyvals) -> (id',classes,("lang",lang):keyvals) -- | Parses material surrounded by a parser. -surrounded :: PandocMonad m +surrounded :: (PandocMonad m, Show t) => ParserT [Char] st m t -- ^ surrounding parser -> ParserT [Char] st m a -- ^ content parser (to be used repeatedly) -> ParserT [Char] st m [a] -- cgit v1.2.3 From bfd5c6b172b7b4cc471b1ed80673bac545604f62 Mon Sep 17 00:00:00 2001 From: Herwig Stuetz Date: Tue, 23 May 2017 21:30:31 +0200 Subject: Org reader: Fix cite parsing behaviour Until now, org-ref cite keys included special characters also at the end. This caused problems when citations occur right before colons or at the end of a sentence. With this change, all non alphanumeric characters at the end of a cite key are ignored. This also adds `,` to the list of special characters that are legal in cite keys to better mirror the behaviour of org-export. --- src/Text/Pandoc/Readers/Org/Inlines.hs | 12 ++++++++++-- test/Tests/Readers/Org.hs | 36 ++++++++++++++++++++++++++++++++++ 2 files changed, 46 insertions(+), 2 deletions(-) diff --git a/src/Text/Pandoc/Readers/Org/Inlines.hs b/src/Text/Pandoc/Readers/Org/Inlines.hs index 6946e8379..dcea61222 100644 --- a/src/Text/Pandoc/Readers/Org/Inlines.hs +++ b/src/Text/Pandoc/Readers/Org/Inlines.hs @@ -339,8 +339,16 @@ linkLikeOrgRefCite = try $ do -- | Read a citation key. The characters allowed in citation keys are taken -- from the `org-ref-cite-re` variable in `org-ref.el`. orgRefCiteKey :: PandocMonad m => OrgParser m String -orgRefCiteKey = try . many1 . satisfy $ \c -> - isAlphaNum c || c `elem` ("-_:\\./"::String) +orgRefCiteKey = + let citeKeySpecialChars = "-_:\\./," :: String + isCiteKeySpecialChar c = c `elem` citeKeySpecialChars + isCiteKeyChar c = isAlphaNum c || isCiteKeySpecialChar c + + in try $ many1Till (satisfy $ isCiteKeyChar) + $ try . lookAhead $ do + many . satisfy $ isCiteKeySpecialChar + satisfy $ not . isCiteKeyChar + -- | Supported citation types. Only a small subset of org-ref types is -- supported for now. TODO: rewrite this, use LaTeX reader as template. diff --git a/test/Tests/Readers/Org.hs b/test/Tests/Readers/Org.hs index 37ad2462b..3302e0c3e 100644 --- a/test/Tests/Readers/Org.hs +++ b/test/Tests/Readers/Org.hs @@ -334,6 +334,18 @@ tests = } in (para $ cite [citation] "cite:pandoc") + , "Org-ref simple citation with underscores" =: + "cite:pandoc_org_ref" =?> + let citation = Citation + { citationId = "pandoc_org_ref" + , citationPrefix = mempty + , citationSuffix = mempty + , citationMode = AuthorInText + , citationNoteNum = 0 + , citationHash = 0 + } + in (para $ cite [citation] "cite:pandoc_org_ref") + , "Org-ref simple citation succeeded by comma" =: "cite:pandoc," =?> let citation = Citation @@ -346,6 +358,30 @@ tests = } in (para $ cite [citation] "cite:pandoc" <> str ",") + , "Org-ref simple citation succeeded by dot" =: + "cite:pandoc." =?> + let citation = Citation + { citationId = "pandoc" + , citationPrefix = mempty + , citationSuffix = mempty + , citationMode = AuthorInText + , citationNoteNum = 0 + , citationHash = 0 + } + in (para $ cite [citation] "cite:pandoc" <> str ".") + + , "Org-ref simple citation succeeded by colon" =: + "cite:pandoc:" =?> + let citation = Citation + { citationId = "pandoc" + , citationPrefix = mempty + , citationSuffix = mempty + , citationMode = AuthorInText + , citationNoteNum = 0 + , citationHash = 0 + } + in (para $ cite [citation] "cite:pandoc" <> str ":") + , "Org-ref simple citep citation" =: "citep:pandoc" =?> let citation = Citation -- cgit v1.2.3