summaryrefslogtreecommitdiff
path: root/src/Text/Pandoc/Readers/HTML.hs
diff options
context:
space:
mode:
authorfiddlosopher <fiddlosopher@788f1e2b-df1e-0410-8736-df70ead52e1b>2007-01-24 17:43:39 +0000
committerfiddlosopher <fiddlosopher@788f1e2b-df1e-0410-8736-df70ead52e1b>2007-01-24 17:43:39 +0000
commit0646eef97691794eeac5549db18ac82cb6b84576 (patch)
tree5e952c367036685a68cdcceeff8f81f337d79b8b /src/Text/Pandoc/Readers/HTML.hs
parent96919a6ac5c21a8b46fbc347a4d815f0c9c89b98 (diff)
Rewrote 'extractTagType' in HTML reader so that it doesn't use
regexs. git-svn-id: https://pandoc.googlecode.com/svn/trunk@507 788f1e2b-df1e-0410-8736-df70ead52e1b
Diffstat (limited to 'src/Text/Pandoc/Readers/HTML.hs')
-rw-r--r--src/Text/Pandoc/Readers/HTML.hs12
1 files changed, 7 insertions, 5 deletions
diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs
index b3ddc8985..66b55c448 100644
--- a/src/Text/Pandoc/Readers/HTML.hs
+++ b/src/Text/Pandoc/Readers/HTML.hs
@@ -40,7 +40,6 @@ module Text.Pandoc.Readers.HTML (
htmlBlockElement
) where
-import Text.Regex ( matchRegex, mkRegex )
import Text.ParserCombinators.Parsec
import Text.ParserCombinators.Pandoc
import Text.Pandoc.Definition
@@ -84,10 +83,13 @@ inlinesTilEnd tag = try (do
return inlines)
-- | Extract type from a tag: e.g. 'br' from '<br>'
-extractTagType tag =
- case (matchRegex (mkRegex "<[[:space:]]*/?([A-Za-z0-9]+)") tag) of
- Just [match] -> (map toLower match)
- Nothing -> ""
+extractTagType :: String -> String
+extractTagType "" = ""
+extractTagType ('<':rest) =
+ if (not (null rest)) && (last rest == '>')
+ then map toLower $ removeLeadingTrailingSpace (init rest)
+ else ""
+extractTagType _ = ""
-- | Parse any HTML tag (closing or opening) and return text of tag
anyHtmlTag = try (do