1 files changed, 909 insertions, 0 deletions
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
new file mode 100644
index 000000000..df84c0ac7
--- /dev/null
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -0,0 +1,909 @@
+{-
+Copyright (C) 2006-7 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+-}
+
+{- |
+   Module      : Text.Pandoc.Readers.Markdown
+   Copyright   : Copyright (C) 2006-7 John MacFarlane
+   License     : GNU GPL, version 2 or above 
+
+   Maintainer  : John MacFarlane <jgm@berkeley.edu>
+   Stability   : alpha
+   Portability : portable
+
+Conversion of markdown-formatted plain text to 'Pandoc' document.
+-}
+module Text.Pandoc.Readers.Markdown ( 
+                                     readMarkdown 
+                                    ) where
+
+import Data.List ( transpose, isPrefixOf, isSuffixOf, lookup, sortBy )
+import Data.Ord ( comparing )
+import Data.Char ( isAlphaNum )
+import Network.URI ( isURI )
+import Text.Pandoc.Definition
+import Text.Pandoc.Shared 
+import Text.Pandoc.Readers.LaTeX ( rawLaTeXInline, rawLaTeXEnvironment )
+import Text.Pandoc.Readers.HTML ( rawHtmlBlock, anyHtmlBlockTag, 
+                                  anyHtmlInlineTag, anyHtmlTag,
+                                  anyHtmlEndTag, htmlEndTag, extractTagType,
+                                  htmlBlockElement )
+import Text.Pandoc.CharacterReferences ( decodeCharacterReferences )
+import Text.ParserCombinators.Parsec
+
+-- | Read markdown from an input string and return a Pandoc document.
+readMarkdown :: ParserState -> String -> Pandoc
+readMarkdown state str = (readWith parseMarkdown) state (str ++ "\n\n")
+
+--
+-- Constants and data structure definitions
+--
+
+spaceChars = " \t"
+bulletListMarkers = "*+-"
+hruleChars = "*-_"
+setextHChars = "=-"
+
+-- treat these as potentially non-text when parsing inline:
+specialChars = "\\[]*_~`<>$!^-.&'\""
+
+--
+-- auxiliary functions
+--
+
+indentSpaces = try $ do
+  state <- getState
+  let tabStop = stateTabStop state
+  try (count tabStop (char ' ')) <|> 
+    (many (char ' ') >> string "\t") <?> "indentation"
+
+nonindentSpaces = do
+  state <- getState
+  let tabStop = stateTabStop state
+  sps <- many (char ' ')
+  if length sps < tabStop 
+     then return sps
+     else unexpected "indented line"
+
+-- | Fail unless we're at beginning of a line.
+failUnlessBeginningOfLine = do
+  pos <- getPosition
+  if sourceColumn pos == 1 then return () else fail "not beginning of line"
+
+-- | Fail unless we're in "smart typography" mode.
+failUnlessSmart = do
+  state <- getState
+  if stateSmart state then return () else fail "Smart typography feature"
+
+-- | Parse an inline Str element with a given content.
+inlineString str = try $ do 
+  (Str res) <- inline 
+  if res == str then return res else fail $ "unexpected Str content"
+
+-- | Parse a sequence of inline elements between a string
+--  @opener@ and a string @closer@, including inlines
+--  between balanced pairs of @opener@ and a @closer@.
+inlinesInBalanced :: String -> String -> GenParser Char ParserState [Inline]
+inlinesInBalanced opener closer = try $ do
+  string opener
+  result <- manyTill ( (do lookAhead (inlineString opener)
+                           -- because it might be a link...
+                           bal <- inlinesInBalanced opener closer 
+                           return $ [Str opener] ++ bal ++ [Str closer])
+                       <|> (count 1 inline)) 
+                     (try (string closer))
+  return $ concat result
+
+--
+-- document structure
+--
+
+titleLine = try $ char '%' >> skipSpaces >> manyTill inline newline
+
+authorsLine = try $ do 
+  char '%'
+  skipSpaces
+  authors <- sepEndBy (many1 (noneOf ",;\n")) (oneOf ",;")
+  newline
+  return $ map (decodeCharacterReferences . removeLeadingTrailingSpace) authors
+
+dateLine = try $ do
+  char '%'
+  skipSpaces
+  date <- many (noneOf "\n")
+  newline
+  return $ decodeCharacterReferences $ removeTrailingSpace date
+
+titleBlock = try $ do
+  failIfStrict
+  title <- option [] titleLine
+  author <- option [] authorsLine
+  date <- option "" dateLine
+  optional blanklines
+  return (title, author, date)
+
+parseMarkdown = do
+  -- markdown allows raw HTML
+  updateState (\state -> state { stateParseRaw = True })
+  startPos <- getPosition
+  -- go through once just to get list of reference keys
+  -- docMinusKeys is the raw document with blanks where the keys were...
+  docMinusKeys <- manyTill (referenceKey <|> lineClump) eof >>= 
+                  return . concat
+  setInput docMinusKeys
+  setPosition startPos
+  st <- getState
+  -- go through again for notes unless strict...
+  if stateStrict st
+     then return ()
+     else do docMinusNotes <- manyTill (noteBlock <|> lineClump) eof >>= 
+                              return . concat
+             st <- getState
+             let reversedNotes = stateNotes st
+             updateState $ \st -> st { stateNotes = reverse reversedNotes }
+             setInput docMinusNotes
+             setPosition startPos
+  -- now parse it for real...
+  (title, author, date) <- option ([],[],"") titleBlock
+  blocks <- parseBlocks 
+  return $ Pandoc (Meta title author date) $ filter (/= Null) blocks
+
+-- 
+-- initial pass for references and notes
+--
+
+referenceKey = try $ do
+  startPos <- getPosition
+  nonindentSpaces
+  label <- reference
+  char ':'
+  skipSpaces
+  optional (char '<')
+  src <- many (noneOf "> \n\t")
+  optional (char '>')
+  tit <- option "" referenceTitle
+  blanklines
+  endPos <- getPosition
+  let newkey = (label, (removeTrailingSpace src,  tit))
+  st <- getState
+  let oldkeys = stateKeys st
+  updateState $ \st -> st { stateKeys = newkey : oldkeys }
+  -- return blanks so line count isn't affected
+  return $ replicate (sourceLine endPos - sourceLine startPos) '\n'
+
+referenceTitle = try $ do 
+  (many1 spaceChar >> option '\n' newline) <|> newline
+  skipSpaces
+  tit <-    (charsInBalanced '(' ')' >>= return . unwords . words)
+        <|> do delim <- char '\'' <|> char '"'
+               manyTill anyChar (try (char delim >> skipSpaces >>
+                                      notFollowedBy (noneOf ")\n")))
+  return $ decodeCharacterReferences tit
+
+noteMarker = string "[^" >> manyTill (noneOf " \t\n") (char ']')
+
+rawLine = do
+  notFollowedBy blankline
+  notFollowedBy' noteMarker
+  contents <- many1 nonEndline
+  end <- option "" (newline >> optional indentSpaces >> return "\n") 
+  return $ contents ++ end
+
+rawLines = many1 rawLine >>= return . concat
+
+noteBlock = try $ do
+  startPos <- getPosition
+  ref <- noteMarker
+  char ':'
+  optional blankline
+  optional indentSpaces
+  raw <- sepBy rawLines (try (blankline >> indentSpaces))
+  optional blanklines
+  endPos <- getPosition
+  -- parse the extracted text, which may contain various block elements:
+  contents <- parseFromString parseBlocks $ (joinWithSep "\n" raw) ++ "\n\n"
+  let newnote = (ref, contents)
+  st <- getState
+  let oldnotes = stateNotes st
+  updateState $ \st -> st { stateNotes = newnote : oldnotes }
+  -- return blanks so line count isn't affected
+  return $ replicate (sourceLine endPos - sourceLine startPos) '\n'
+
+--
+-- parsing blocks
+--
+
+parseBlocks = manyTill block eof
+
+block = choice [ header 
+               , table
+               , codeBlock
+               , hrule
+               , list
+               , blockQuote
+               , htmlBlock
+               , rawLaTeXEnvironment'
+               , para
+               , plain
+               , nullBlock ] <?> "block"
+
+--
+-- header blocks
+--
+
+header = atxHeader <|> setextHeader <?> "header"
+
+atxHeader = try $ do
+  level <- many1 (char '#') >>= return . length
+  notFollowedBy (char '.' <|> char ')') -- this would be a list
+  skipSpaces
+  text <- manyTill inline atxClosing >>= return . normalizeSpaces
+  return $ Header level text
+
+atxClosing = try $ skipMany (char '#') >> blanklines
+
+setextHeader = try $ do
+  -- first, see if this block has any chance of being a setextHeader:
+  lookAhead (anyLine >> oneOf setextHChars)
+  text <- many1Till inline newline >>= return . normalizeSpaces
+  level <- choice $ zipWith 
+           (\ch lev -> try (many1 $ char ch) >> blanklines >> return lev)
+           setextHChars [1..(length setextHChars)]
+  return $ Header level text
+
+--
+-- hrule block
+--
+
+hrule = try $ do
+  skipSpaces
+  start <- oneOf hruleChars
+  count 2 (skipSpaces >> char start)
+  skipMany (skipSpaces >> char start)
+  newline
+  optional blanklines
+  return HorizontalRule
+
+--
+-- code blocks
+--
+
+indentedLine = indentSpaces >> manyTill anyChar newline >>= return . (++ "\n")
+
+codeBlock = do
+  contents <- many1 (indentedLine <|> 
+                     try (do b <- blanklines
+                             l <- indentedLine
+                             return $ b ++ l))
+  optional blanklines
+  return $ CodeBlock $ stripTrailingNewlines $ concat contents
+
+--
+-- block quotes
+--
+
+emacsBoxQuote = try $ do
+  failIfStrict
+  string ",----"
+  manyTill anyChar newline
+  raw <- manyTill 
+         (try (char '|' >> optional (char ' ') >> manyTill anyChar newline))
+         (try (string "`----"))
+  blanklines
+  return raw
+
+emailBlockQuoteStart = try $ nonindentSpaces >> char '>' >>~ optional (char ' ')
+
+emailBlockQuote = try $ do
+  emailBlockQuoteStart
+  raw <- sepBy (many (nonEndline <|> 
+                      (try (endline >> notFollowedBy emailBlockQuoteStart >>
+                       return '\n'))))
+               (try (newline >> emailBlockQuoteStart))
+  newline <|> (eof >> return '\n')
+  optional blanklines
+  return raw
+
+blockQuote = do 
+  raw <- emailBlockQuote <|> emacsBoxQuote
+  -- parse the extracted block, which may contain various block elements:
+  contents <- parseFromString parseBlocks $ (joinWithSep "\n" raw) ++ "\n\n"
+  return $ BlockQuote contents
+ 
+--
+-- list blocks
+--
+
+list = choice [ bulletList, orderedList, definitionList ] <?> "list"
+
+bulletListStart = try $ do
+  optional newline -- if preceded by a Plain block in a list context
+  nonindentSpaces
+  notFollowedBy' hrule     -- because hrules start out just like lists
+  oneOf bulletListMarkers
+  spaceChar
+  skipSpaces
+
+anyOrderedListStart = try $ do
+  optional newline -- if preceded by a Plain block in a list context
+  nonindentSpaces
+  notFollowedBy $ string "p." >> spaceChar >> digit  -- page number
+  state <- getState
+  if stateStrict state
+     then do many1 digit
+             char '.'
+             spaceChar
+             return (1, DefaultStyle, DefaultDelim)
+     else anyOrderedListMarker >>~ spaceChar
+
+orderedListStart style delim = try $ do
+  optional newline -- if preceded by a Plain block in a list context
+  nonindentSpaces
+  state <- getState
+  num <- if stateStrict state
+            then do many1 digit
+                    char '.'
+                    return 1
+            else orderedListMarker style delim 
+  if delim == Period && (style == UpperAlpha || (style == UpperRoman &&
+     num `elem` [1, 5, 10, 50, 100, 500, 1000]))
+     then char '\t' <|> (spaceChar >> spaceChar)
+     else spaceChar
+  skipSpaces
+
+-- parse a line of a list item (start = parser for beginning of list item)
+listLine start = try $ do
+  notFollowedBy' start
+  notFollowedBy blankline
+  notFollowedBy' (do indentSpaces
+                     many (spaceChar)
+                     bulletListStart <|> (anyOrderedListStart >> return ()))
+  line <- manyTill anyChar newline
+  return $ line ++ "\n"
+
+-- parse raw text for one list item, excluding start marker and continuations
+rawListItem start = try $ do
+  start
+  result <- many1 (listLine start)
+  blanks <- many blankline
+  return $ concat result ++ blanks
+
+-- continuation of a list item - indented and separated by blankline 
+-- or (in compact lists) endline.
+-- note: nested lists are parsed as continuations
+listContinuation start = try $ do
+  lookAhead indentSpaces
+  result <- many1 (listContinuationLine start)
+  blanks <- many blankline
+  return $ concat result ++ blanks
+
+listContinuationLine start = try $ do
+  notFollowedBy blankline
+  notFollowedBy' start
+  optional indentSpaces
+  result <- manyTill anyChar newline
+  return $ result ++ "\n"
+
+listItem start = try $ do 
+  first <- rawListItem start
+  continuations <- many (listContinuation start)
+  -- parsing with ListItemState forces markers at beginning of lines to
+  -- count as list item markers, even if not separated by blank space.
+  -- see definition of "endline"
+  state <- getState
+  let oldContext = stateParserContext state
+  setState $ state {stateParserContext = ListItemState}
+  -- parse the extracted block, which may contain various block elements:
+  let raw = concat (first:continuations)
+  contents <- parseFromString parseBlocks raw
+  updateState (\st -> st {stateParserContext = oldContext})
+  return contents
+
+orderedList = try $ do
+  (start, style, delim) <- lookAhead anyOrderedListStart
+  items <- many1 (listItem (orderedListStart style delim))
+  return $ OrderedList (start, style, delim) $ compactify items
+
+bulletList = many1 (listItem bulletListStart) >>= 
+             return . BulletList . compactify
+
+-- definition lists
+
+definitionListItem = try $ do
+  notFollowedBy blankline
+  notFollowedBy' indentSpaces
+  -- first, see if this has any chance of being a definition list:
+  lookAhead (anyLine >> char ':')
+  term <- manyTill inline newline
+  raw <- many1 defRawBlock
+  state <- getState
+  let oldContext = stateParserContext state
+  -- parse the extracted block, which may contain various block elements:
+  contents <- parseFromString parseBlocks $ concat raw
+  updateState (\st -> st {stateParserContext = oldContext})
+  return ((normalizeSpaces term), contents)
+
+defRawBlock = try $ do
+  char ':'
+  state <- getState
+  let tabStop = stateTabStop state
+  try (count (tabStop - 1) (char ' ')) <|> (many (char ' ') >> string "\t")
+  firstline <- anyLine
+  rawlines <- many (notFollowedBy blankline >> indentSpaces >> anyLine)
+  trailing <- option "" blanklines
+  return $ firstline ++ "\n" ++ unlines rawlines ++ trailing
+
+definitionList = do
+  failIfStrict
+  items <- many1 definitionListItem
+  let (terms, defs) = unzip items
+  let defs' = compactify defs
+  let items' = zip terms defs'
+  return $ DefinitionList items'
+
+--
+-- paragraph block
+--
+
+para = try $ do 
+  result <- many1 inline
+  newline
+  blanklines <|> do st <- getState
+                    if stateStrict st
+                       then lookAhead (blockQuote <|> header) >> return ""
+                       else lookAhead emacsBoxQuote >> return ""
+  return $ Para $ normalizeSpaces result
+
+plain = many1 inline >>= return . Plain . normalizeSpaces 
+
+-- 
+-- raw html
+--
+
+htmlElement = strictHtmlBlock <|> htmlBlockElement <?> "html element"
+
+htmlBlock = do
+  st <- getState
+  if stateStrict st
+    then try $ do failUnlessBeginningOfLine
+                  first <- htmlElement
+                  finalSpace <- many (oneOf spaceChars)
+                  finalNewlines <- many newline
+                  return $ RawHtml $ first ++ finalSpace ++ finalNewlines
+    else rawHtmlBlocks
+
+-- True if tag is self-closing
+isSelfClosing tag = 
+  isSuffixOf "/>" $ filter (not . (`elem` " \n\t")) tag
+
+strictHtmlBlock = try $ do
+  tag <- anyHtmlBlockTag 
+  let tag' = extractTagType tag
+  if isSelfClosing tag || tag' == "hr" 
+     then return tag
+     else do contents <- many (notFollowedBy' (htmlEndTag tag') >> 
+                               (htmlElement <|> (count 1 anyChar)))
+             end <- htmlEndTag tag'
+             return $ tag ++ concat contents ++ end
+
+rawHtmlBlocks = do
+  htmlBlocks <- many1 rawHtmlBlock    
+  let combined = concatMap (\(RawHtml str) -> str) htmlBlocks
+  let combined' = if not (null combined) && last combined == '\n'
+                     then init combined  -- strip extra newline 
+                     else combined 
+  return $ RawHtml combined'
+
+--
+-- LaTeX
+--
+
+rawLaTeXEnvironment' = failIfStrict >> rawLaTeXEnvironment
+
+--
+-- Tables
+-- 
+
+-- Parse a dashed line with optional trailing spaces; return its length
+-- and the length including trailing space.
+dashedLine ch = do
+  dashes <- many1 (char ch)
+  sp     <- many spaceChar
+  return $ (length dashes, length $ dashes ++ sp)
+
+-- Parse a table header with dashed lines of '-' preceded by 
+-- one line of text.
+simpleTableHeader = try $ do
+  rawContent  <- anyLine
+  initSp      <- nonindentSpaces
+  dashes      <- many1 (dashedLine '-')
+  newline
+  let (lengths, lines) = unzip dashes
+  let indices  = scanl (+) (length initSp) lines
+  let rawHeads = tail $ splitByIndices (init indices) rawContent
+  let aligns   = zipWith alignType (map (\a -> [a]) rawHeads) lengths
+  return (rawHeads, aligns, indices)
+
+-- Parse a table footer - dashed lines followed by blank line.
+tableFooter = try $ nonindentSpaces >> many1 (dashedLine '-') >> blanklines
+
+-- Parse a table separator - dashed line.
+tableSep = try $ nonindentSpaces >> many1 (dashedLine '-') >> string "\n"
+
+-- Parse a raw line and split it into chunks by indices.
+rawTableLine indices = do
+  notFollowedBy' (blanklines <|> tableFooter)
+  line <- many1Till anyChar newline
+  return $ map removeLeadingTrailingSpace $ tail $ 
+           splitByIndices (init indices) line
+
+-- Parse a table line and return a list of lists of blocks (columns).
+tableLine indices = rawTableLine indices >>= mapM (parseFromString (many plain))
+
+-- Parse a multiline table row and return a list of blocks (columns).
+multilineRow indices = do
+  colLines <- many1 (rawTableLine indices)
+  optional blanklines
+  let cols = map unlines $ transpose colLines
+  mapM (parseFromString (many plain)) cols
+
+-- Calculate relative widths of table columns, based on indices
+widthsFromIndices :: Int     -- Number of columns on terminal
+                  -> [Int]   -- Indices
+                  -> [Float] -- Fractional relative sizes of columns
+widthsFromIndices _ [] = []  
+widthsFromIndices numColumns indices = 
+  let lengths = zipWith (-) indices (0:indices)
+      totLength = sum lengths
+      quotient = if totLength > numColumns
+                   then fromIntegral totLength
+                   else fromIntegral numColumns
+      fracs = map (\l -> (fromIntegral l) / quotient) lengths in
+  tail fracs
+
+-- Parses a table caption:  inlines beginning with 'Table:'
+-- and followed by blank lines.
+tableCaption = try $ do
+  nonindentSpaces
+  string "Table:"
+  result <- many1 inline
+  blanklines
+  return $ normalizeSpaces result
+
+-- Parse a table using 'headerParser', 'lineParser', and 'footerParser'.
+tableWith headerParser lineParser footerParser = try $ do
+    (rawHeads, aligns, indices) <- headerParser
+    lines <- many1Till (lineParser indices) footerParser
+    caption <- option [] tableCaption
+    heads <- mapM (parseFromString (many plain)) rawHeads
+    state <- getState
+    let numColumns = stateColumns state
+    let widths = widthsFromIndices numColumns indices
+    return $ Table caption aligns widths heads lines
+
+-- Parse a simple table with '---' header and one line per row.
+simpleTable = tableWith simpleTableHeader tableLine blanklines
+
+-- Parse a multiline table:  starts with row of '-' on top, then header
+-- (which may be multiline), then the rows,
+-- which may be multiline, separated by blank lines, and
+-- ending with a footer (dashed line followed by blank line).
+multilineTable = tableWith multilineTableHeader multilineRow tableFooter
+
+multilineTableHeader = try $ do
+  tableSep 
+  rawContent  <- many1 (notFollowedBy' tableSep >> many1Till anyChar newline)
+  initSp      <- nonindentSpaces
+  dashes      <- many1 (dashedLine '-')
+  newline
+  let (lengths, lines) = unzip dashes
+  let indices  = scanl (+) (length initSp) lines
+  let rawHeadsList = transpose $ map 
+                     (\ln -> tail $ splitByIndices (init indices) ln)
+                     rawContent
+  let rawHeads = map (joinWithSep " ") rawHeadsList
+  let aligns   = zipWith alignType rawHeadsList lengths
+  return ((map removeLeadingTrailingSpace rawHeads), aligns, indices)
+
+-- Returns an alignment type for a table, based on a list of strings
+-- (the rows of the column header) and a number (the length of the
+-- dashed line under the rows.
+alignType :: [String] -> Int -> Alignment
+alignType []  len = AlignDefault
+alignType strLst len =
+  let str        = head $ sortBy (comparing length) $ 
+                          map removeTrailingSpace strLst
+      leftSpace  = if null str then False else (str !! 0) `elem` " \t"
+      rightSpace = length str < len || (str !! (len - 1)) `elem` " \t"
+  in  case (leftSpace, rightSpace) of
+        (True,  False)   -> AlignRight
+        (False, True)    -> AlignLeft
+        (True,  True)    -> AlignCenter
+        (False, False)   -> AlignDefault
+
+table = failIfStrict >> (simpleTable <|> multilineTable) <?> "table"
+
+-- 
+-- inline
+--
+
+inline = choice [ str
+                , smartPunctuation
+                , whitespace
+                , endline
+                , code
+                , charRef
+                , strong
+                , emph
+                , note
+                , inlineNote
+                , link
+                , image
+                , math
+                , strikeout
+                , superscript
+                , subscript
+                , autoLink
+                , rawHtmlInline'
+                , rawLaTeXInline'
+                , escapedChar
+                , symbol
+                , ltSign ] <?> "inline"
+
+escapedChar = do
+  char '\\'
+  state <- getState
+  result <- option '\\' $ if stateStrict state 
+                             then oneOf "\\`*_{}[]()>#+-.!~"
+                             else satisfy (not . isAlphaNum)
+  return $ Str [result]
+
+ltSign = do
+  st <- getState
+  if stateStrict st
+     then char '<'
+     else notFollowedBy' rawHtmlBlocks >> char '<' -- unless it starts html
+  return $ Str ['<']
+
+specialCharsMinusLt = filter (/= '<') specialChars
+
+symbol = do 
+  result <- oneOf specialCharsMinusLt
+  return $ Str [result]
+
+-- parses inline code, between n `s and n `s
+code = try $ do 
+  starts <- many1 (char '`')
+  skipSpaces
+  result <- many1Till (many1 (noneOf "`\n") <|> many1 (char '`') <|>
+                       (char '\n' >> return " ")) 
+                      (try (skipSpaces >> count (length starts) (char '`') >> 
+                      notFollowedBy (char '`')))
+  return $ Code $ removeLeadingTrailingSpace $ concat result
+
+mathWord = many1 ((noneOf " \t\n\\$") <|>
+                  (try (char '\\') >>~ notFollowedBy (char '$')))
+
+math = try $ do
+  failIfStrict
+  char '$'
+  notFollowedBy space
+  words <- sepBy1 mathWord (many1 space)
+  char '$'
+  return $ TeX ("$" ++ (joinWithSep " " words) ++ "$")
+
+emph = ((enclosed (char '*') (char '*') inline) <|>
+        (enclosed (char '_') (char '_' >> notFollowedBy alphaNum) inline)) >>= 
+        return . Emph . normalizeSpaces
+
+strong = ((enclosed (string "**") (try $ string "**") inline) <|> 
+          (enclosed (string "__") (try $ string "__") inline)) >>=
+         return . Strong . normalizeSpaces
+
+strikeout = failIfStrict >> enclosed (string "~~") (try $ string "~~") inline >>=
+            return . Strikeout . normalizeSpaces
+
+superscript = failIfStrict >> enclosed (char '^') (char '^') 
+              (notFollowedBy' whitespace >> inline) >>= -- may not contain Space
+              return . Superscript
+
+subscript = failIfStrict >> enclosed (char '~') (char '~')
+            (notFollowedBy' whitespace >> inline) >>=  -- may not contain Space
+            return . Subscript 
+
+smartPunctuation = failUnlessSmart >> 
+                   choice [ quoted, apostrophe, dash, ellipses ]
+
+apostrophe = (char '\'' <|> char '\8217') >> return Apostrophe
+
+quoted = doubleQuoted <|> singleQuoted 
+
+withQuoteContext context parser = do
+  oldState <- getState
+  let oldQuoteContext = stateQuoteContext oldState
+  setState oldState { stateQuoteContext = context }
+  result <- parser
+  newState <- getState
+  setState newState { stateQuoteContext = oldQuoteContext }
+  return result
+
+singleQuoted = try $ do
+  singleQuoteStart
+  withQuoteContext InSingleQuote $ many1Till inline singleQuoteEnd >>=
+    return . Quoted SingleQuote . normalizeSpaces
+
+doubleQuoted = try $ do 
+  doubleQuoteStart
+  withQuoteContext InDoubleQuote $ many1Till inline doubleQuoteEnd >>=
+    return . Quoted DoubleQuote . normalizeSpaces
+
+failIfInQuoteContext context = do
+  st <- getState
+  if stateQuoteContext st == context
+     then fail "already inside quotes"
+     else return ()
+
+singleQuoteStart = do 
+  failIfInQuoteContext InSingleQuote
+  char '\8216' <|> 
+     do char '\''  
+        notFollowedBy (oneOf ")!],.;:-? \t\n")
+        notFollowedBy (try (oneOfStrings ["s","t","m","ve","ll","re"] >>
+                            satisfy (not . isAlphaNum))) -- possess/contraction
+        return '\''
+
+singleQuoteEnd = (char '\'' <|> char '\8217') >> notFollowedBy alphaNum
+
+doubleQuoteStart = failIfInQuoteContext InDoubleQuote >>
+                   (char '"' <|> char '\8220') >>
+                   notFollowedBy (oneOf " \t\n")
+
+doubleQuoteEnd = char '"' <|> char '\8221'
+
+ellipses = oneOfStrings ["...", " . . . ", ". . .", " . . ."] >> return Ellipses
+
+dash = enDash <|> emDash
+
+enDash = try $ char '-' >> notFollowedBy (noneOf "0123456789") >> return EnDash
+
+emDash = try $ skipSpaces >> oneOfStrings ["---", "--"] >>
+               skipSpaces >> return EmDash
+
+whitespace = do
+  sps <- many1 (oneOf spaceChars)
+  if length sps >= 2
+     then option Space (endline >> return LineBreak)
+     else return Space <?> "whitespace"
+
+nonEndline = satisfy (/='\n')
+
+strChar = noneOf (specialChars ++ spaceChars ++ "\n")
+
+str = many1 strChar >>= return . Str
+
+-- an endline character that can be treated as a space, not a structural break
+endline = try $ do
+  newline
+  notFollowedBy blankline
+  st <- getState
+  if stateStrict st 
+    then do notFollowedBy emailBlockQuoteStart
+            notFollowedBy (char '#')  -- atx header
+    else return () 
+  -- parse potential list-starts differently if in a list:
+  if stateParserContext st == ListItemState
+     then notFollowedBy' (bulletListStart <|> 
+                          (anyOrderedListStart >> return ()))
+     else return ()
+  return Space
+
+--
+-- links
+--
+
+-- a reference label for a link
+reference = notFollowedBy' (string "[^") >>  -- footnote reference
+            inlinesInBalanced "[" "]" >>= (return . normalizeSpaces)
+
+-- source for a link, with optional title
+source = try $ do 
+  char '('
+  optional (char '<')
+  src <- many (noneOf ")> \t\n")
+  optional (char '>')
+  tit <- option "" linkTitle
+  skipSpaces
+  char ')'
+  return (removeTrailingSpace src, tit)
+
+linkTitle = try $ do 
+  (many1 spaceChar >> option '\n' newline) <|> newline
+  skipSpaces
+  delim <- char '\'' <|> char '"'
+  tit <-   manyTill anyChar (try (char delim >> skipSpaces >>
+                                  notFollowedBy (noneOf ")\n")))
+  return $ decodeCharacterReferences tit
+
+link = try $ do
+  label <- reference
+  src <- source <|> referenceLink label
+  return $ Link label src
+
+-- a link like [this][ref] or [this][] or [this]
+referenceLink label = do
+  ref <- option [] (try (optional (char ' ') >> 
+                         optional (newline >> skipSpaces) >> reference))
+  let ref' = if null ref then label else ref
+  state <- getState
+  case lookupKeySrc (stateKeys state) ref' of
+     Nothing     -> fail "no corresponding key" 
+     Just target -> return target 
+
+emailAddress = try $ do
+  name <- many1 (alphaNum <|> char '+')
+  char '@'
+  first <- many1 alphaNum
+  rest <- many1 (char '.' >> many1 alphaNum)
+  return $ "mailto:" ++ name ++ "@" ++ joinWithSep "." (first:rest)
+
+uri = try $ do
+  str <- many1 (noneOf "\n\t >")
+  if isURI str
+     then return str
+     else fail "not a URI"
+
+autoLink = try $ do
+  char '<'
+  src <- uri <|> emailAddress
+  char '>'
+  let src' = if "mailto:" `isPrefixOf` src
+                then drop 7 src
+                else src 
+  st <- getState
+  return $ if stateStrict st
+              then Link [Str src'] (src, "")
+              else Link [Code src'] (src, "")
+
+image = try $ do
+  char '!'
+  (Link label src) <- link
+  return $ Image label src
+
+note = try $ do
+  failIfStrict
+  ref <- noteMarker
+  state <- getState
+  let notes = stateNotes state
+  case lookup ref notes of
+    Nothing       -> fail "note not found"
+    Just contents -> return $ Note contents
+
+inlineNote = try $ do
+  failIfStrict
+  char '^'
+  contents <- inlinesInBalanced "[" "]"
+  return $ Note [Para contents]
+
+rawLaTeXInline' = failIfStrict >> rawLaTeXInline
+
+rawHtmlInline' = do
+  st <- getState
+  result <- choice $ if stateStrict st
+                        then [htmlBlockElement, anyHtmlTag, anyHtmlEndTag] 
+                        else [htmlBlockElement, anyHtmlInlineTag]
+  return $ HtmlInline result
+