From a6c167125fb423b365940401b45e5a95791b2fcf Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sat, 2 Feb 2013 18:46:10 -0800 Subject: Optimized oneOfStringsCI. The call to toLower in ciMatch was very expensive (and very often used), because toLower from Data.Char calls a fully unicode aware function. This optimization avoids the call to toLower for the most common, ASCII cases. This dramatically reduces the speed penalty that comes from enabling the `autolink_bare_uris` extension. The penalty is still substantial (in one test, from 0.33s to 0.44s), but nowhere near what it used to be. --- src/Text/Pandoc/Parsing.hs | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs index 002bc18de..ebff8ec77 100644 --- a/src/Text/Pandoc/Parsing.hs +++ b/src/Text/Pandoc/Parsing.hs @@ -152,8 +152,8 @@ import Text.Pandoc.Builder (Blocks, Inlines, rawBlock) import qualified Text.Pandoc.UTF8 as UTF8 (putStrLn) import Text.Parsec import Text.Parsec.Pos (newPos) -import Data.Char ( toLower, toUpper, ord, isAscii, isAlphaNum, isDigit, isHexDigit, - isSpace ) +import Data.Char ( toLower, toUpper, ord, chr, isAscii, isAlphaNum, isDigit, + isHexDigit, isSpace ) import Data.List ( intercalate, transpose ) import Text.Pandoc.Shared import qualified Data.Map as M @@ -244,7 +244,13 @@ oneOfStrings = oneOfStrings' (==) -- | Parses one of a list of strings (tried in order), case insensitive. oneOfStringsCI :: [String] -> Parser [Char] st String oneOfStringsCI = oneOfStrings' ciMatch - where ciMatch x y = toLower x == toLower y + where ciMatch x y = toLower' x == toLower' y + -- this optimizes toLower by checking common ASCII case + -- first, before calling the expensive unicode-aware + -- function: + toLower' c | c >= 'A' && c <= 'Z' = chr (ord c + 32) + | isAscii c = c + | otherwise = toLower c -- | Parses a space or tab. spaceChar :: Parser [Char] st Char -- cgit v1.2.3