{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-
Copyright (C) 2014 Albert Krewinkel <tarleb@moltkeplatz.de>

This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
-}

{- |
   Module      : Text.Pandoc.Readers.Org
   Copyright   : Copyright (C) 2014 Albert Krewinkel
   License     : GNU GPL, version 2 or above

   Maintainer  : Albert Krewinkel <tarleb@moltkeplatz.de>

Conversion of org-mode formatted plain text to 'Pandoc' document.
-}
module Text.Pandoc.Readers.Org ( readOrg ) where

import qualified Text.Pandoc.Builder as B
import           Text.Pandoc.Builder ( Inlines, Blocks, HasMeta(..), (<>)
                                     , trimInlines )
import           Text.Pandoc.Definition
import           Text.Pandoc.Options
import qualified Text.Pandoc.Parsing as P
import           Text.Pandoc.Parsing hiding ( F, unF, askF, asksF, runF
                                            , newline, orderedListMarker
                                            , parseFromString
                                            , updateLastStrPos )
import           Text.Pandoc.Shared (compactify', compactify'DL)

import           Control.Applicative ( Applicative, pure
                                     , (<$>), (<$), (<*>), (<*), (*>), (<**>) )
import           Control.Monad (foldM, guard, liftM, liftM2, when)
import           Control.Monad.Reader (Reader, runReader, ask, asks)
import           Data.Char (isAlphaNum, toLower)
import           Data.Default
import           Data.List (intersperse, isPrefixOf, isSuffixOf)
import qualified Data.Map as M
import           Data.Maybe (listToMaybe, fromMaybe, isJust)
import           Data.Monoid (Monoid, mconcat, mempty, mappend)

-- | Parse org-mode string and return a Pandoc document.
readOrg :: ReaderOptions -- ^ Reader options
        -> String        -- ^ String to parse (assuming @'\n'@ line endings)
        -> Pandoc
readOrg opts s = readWith parseOrg def{ orgStateOptions = opts } (s ++ "\n\n")

type OrgParser = Parser [Char] OrgParserState

parseOrg :: OrgParser Pandoc
parseOrg = do
  blocks' <- parseBlocks
  st <- getState
  let meta = runF (orgStateMeta' st) st
  return $ Pandoc meta $ filter (/= Null) (B.toList $ runF blocks' st)

--
-- Parser State for Org
--

type OrgNoteRecord = (String, F Blocks)
type OrgNoteTable = [OrgNoteRecord]

type OrgBlockAttributes = M.Map String String

-- | Org-mode parser state
data OrgParserState = OrgParserState
                      { orgStateOptions              :: ReaderOptions
                      , orgStateAnchorIds            :: [String]
                      , orgStateBlockAttributes      :: OrgBlockAttributes
                      , orgStateEmphasisCharStack    :: [Char]
                      , orgStateEmphasisNewlines     :: Maybe Int
                      , orgStateLastForbiddenCharPos :: Maybe SourcePos
                      , orgStateLastPreCharPos       :: Maybe SourcePos
                      , orgStateLastStrPos           :: Maybe SourcePos
                      , orgStateMeta                 :: Meta
                      , orgStateMeta'                :: F Meta
                      , orgStateNotes'               :: OrgNoteTable
                      }

instance HasReaderOptions OrgParserState where
  extractReaderOptions = orgStateOptions

instance HasMeta OrgParserState where
  setMeta field val st =
    st{ orgStateMeta = setMeta field val $ orgStateMeta st }
  deleteMeta field st =
    st{ orgStateMeta = deleteMeta field $ orgStateMeta st }

instance Default OrgParserState where
  def = defaultOrgParserState

defaultOrgParserState :: OrgParserState
defaultOrgParserState = OrgParserState
                        { orgStateOptions = def
                        , orgStateAnchorIds = []
                        , orgStateBlockAttributes = M.empty
                        , orgStateEmphasisCharStack = []
                        , orgStateEmphasisNewlines = Nothing
                        , orgStateLastForbiddenCharPos = Nothing
                        , orgStateLastPreCharPos = Nothing
                        , orgStateLastStrPos = Nothing
                        , orgStateMeta = nullMeta
                        , orgStateMeta' = return nullMeta
                        , orgStateNotes' = []
                        }

recordAnchorId :: String -> OrgParser ()
recordAnchorId i = updateState $ \s ->
  s{ orgStateAnchorIds = i : (orgStateAnchorIds s) }

addBlockAttribute :: String -> String -> OrgParser ()
addBlockAttribute key val = updateState $ \s ->
  let attrs = orgStateBlockAttributes s
  in s{ orgStateBlockAttributes = M.insert key val attrs }

lookupBlockAttribute :: String -> OrgParser (Maybe String)
lookupBlockAttribute key =
  M.lookup key . orgStateBlockAttributes <$> getState

resetBlockAttributes :: OrgParser ()
resetBlockAttributes = updateState $ \s ->
  s{ orgStateBlockAttributes = orgStateBlockAttributes def }

updateLastStrPos :: OrgParser ()
updateLastStrPos = getPosition >>= \p ->
  updateState $ \s -> s{ orgStateLastStrPos = Just p }

updateLastForbiddenCharPos :: OrgParser ()
updateLastForbiddenCharPos = getPosition >>= \p ->
  updateState $ \s -> s{ orgStateLastForbiddenCharPos = Just p}

updateLastPreCharPos :: OrgParser ()
updateLastPreCharPos = getPosition >>= \p ->
  updateState $ \s -> s{ orgStateLastPreCharPos = Just p}

pushToInlineCharStack :: Char -> OrgParser ()
pushToInlineCharStack c = updateState $ \s ->
  s{ orgStateEmphasisCharStack = c:orgStateEmphasisCharStack s }

popInlineCharStack :: OrgParser ()
popInlineCharStack = updateState $ \s ->
  s{ orgStateEmphasisCharStack = drop 1 . orgStateEmphasisCharStack $ s }

surroundingEmphasisChar :: OrgParser [Char]
surroundingEmphasisChar = take 1 . drop 1 . orgStateEmphasisCharStack <$> getState

startEmphasisNewlinesCounting :: Int -> OrgParser ()
startEmphasisNewlinesCounting maxNewlines = updateState $ \s ->
  s{ orgStateEmphasisNewlines = Just maxNewlines }

decEmphasisNewlinesCount :: OrgParser ()
decEmphasisNewlinesCount = updateState $ \s ->
      s{ orgStateEmphasisNewlines = (\n -> n - 1) <$> orgStateEmphasisNewlines s }

newlinesCountWithinLimits :: OrgParser Bool
newlinesCountWithinLimits = do
  st <- getState
  return $ ((< 0) <$> orgStateEmphasisNewlines st) /= Just True

resetEmphasisNewlines :: OrgParser ()
resetEmphasisNewlines = updateState $ \s ->
  s{ orgStateEmphasisNewlines = Nothing }

addToNotesTable :: OrgNoteRecord -> OrgParser ()
addToNotesTable note = do
  oldnotes <- orgStateNotes' <$> getState
  updateState $ \s -> s{ orgStateNotes' = note:oldnotes }

-- The version Text.Pandoc.Parsing cannot be used, as we need additional parts
-- of the state saved and restored.
parseFromString :: OrgParser a -> String -> OrgParser a
parseFromString parser str' = do
  oldLastPreCharPos <- orgStateLastPreCharPos <$> getState
  updateState $ \s -> s{ orgStateLastPreCharPos = Nothing }
  result <- P.parseFromString parser str'
  updateState $ \s -> s{ orgStateLastPreCharPos = oldLastPreCharPos }
  return result


--
-- Adaptions and specializations of parsing utilities
--

newtype F a = F { unF :: Reader OrgParserState a
                } deriving (Monad, Applicative, Functor)

runF :: F a -> OrgParserState -> a
runF = runReader . unF

askF :: F OrgParserState
askF = F ask

asksF :: (OrgParserState -> a) -> F a
asksF f = F $ asks f

instance Monoid a => Monoid (F a) where
  mempty = return mempty
  mappend = liftM2 mappend
  mconcat = fmap mconcat . sequence

trimInlinesF :: F Inlines -> F Inlines
trimInlinesF = liftM trimInlines

returnF :: a -> OrgParser (F a)
returnF = return . return


-- | Like @Text.Parsec.Char.newline@, but causes additional state changes.
newline :: OrgParser Char
newline =
  P.newline
       <* updateLastPreCharPos
       <* updateLastForbiddenCharPos

--
-- parsing blocks
--

parseBlocks :: OrgParser (F Blocks)
parseBlocks = mconcat <$> manyTill block eof

block :: OrgParser (F Blocks)
block = choice [ mempty <$ blanklines
               , optionalAttributes $ choice
                 [ orgBlock
                 , figure
                 , table
                 ]
               , example
               , drawer
               , specialLine
               , header
               , return <$> hline
               , list
               , latexFragment
               , noteBlock
               , paraOrPlain
               ] <?> "block"

optionalAttributes :: OrgParser (F Blocks) -> OrgParser (F Blocks)
optionalAttributes parser = try $
  resetBlockAttributes *> parseBlockAttributes *> parser

parseBlockAttributes :: OrgParser ()
parseBlockAttributes = do
  attrs <- many attribute
  () <$ mapM (uncurry parseAndAddAttribute) attrs
 where
   attribute :: OrgParser (String, String)
   attribute = try $ do
         key <- metaLineStart *> many1Till (noneOf "\n\r") (char ':')
         val <- skipSpaces *> anyLine
         return (map toLower key, val)

parseAndAddAttribute :: String -> String -> OrgParser ()
parseAndAddAttribute key value = do
  let key' = map toLower key
  () <$ addBlockAttribute key' value

lookupInlinesAttr :: String -> OrgParser (Maybe (F Inlines))
lookupInlinesAttr attr = try $ do
  val <- lookupBlockAttribute attr
  maybe (return Nothing)
        (fmap Just . parseFromString parseInlines)
        val


--
-- Org Blocks (#+BEGIN_... / #+END_...)
--

orgBlock :: OrgParser (F Blocks)
orgBlock = try $ do
  (indent, blockType, args) <- blockHeader
  content <- rawBlockContent indent blockType
  contentBlocks <- parseFromString parseBlocks (content ++ "\n")
  let classArgs = [ translateLang . fromMaybe [] $ listToMaybe args ]
  case blockType of
    "comment" -> return mempty
    "html"    -> returnF $ B.rawBlock "html" content
    "latex"   -> returnF $ B.rawBlock "latex" content
    "ascii"   -> returnF $ B.rawBlock "ascii" content
    "example" -> returnF $ exampleCode content
    "quote"   -> return  $ B.blockQuote <$> contentBlocks
    "verse"   -> parseVerse content
    "src"     -> codeBlockWithAttr classArgs content
    _         -> return  $ B.divWith ("", [blockType], []) <$> contentBlocks
 where
   parseVerse :: String -> OrgParser (F Blocks)
   parseVerse cs =
       fmap B.para . mconcat . intersperse (pure B.linebreak)
       <$> mapM (parseFromString parseInlines) (lines cs)

blockHeader :: OrgParser (Int, String, [String])
blockHeader = (,,) <$> blockIndent
                   <*> blockType
                   <*> (skipSpaces *> blockArgs)
 where blockIndent = length <$> many spaceChar
       blockType = map toLower <$> (stringAnyCase "#+begin_" *> many letter)
       blockArgs = manyTill (many nonspaceChar <* skipSpaces) newline

codeBlockWithAttr :: [String] -> String -> OrgParser (F Blocks)
codeBlockWithAttr classArgs content = do
  identifier <- fromMaybe "" <$> lookupBlockAttribute "name"
  caption <- lookupInlinesAttr "caption"
  let codeBlck = B.codeBlockWith (identifier, classArgs, []) content
  return $ maybe (pure codeBlck) (labelDiv codeBlck) caption
 where
   labelDiv blk value =
       B.divWith nullAttr <$> (mappend <$> labelledBlock value
                                       <*> pure blk)
   labelledBlock = fmap (B.plain . B.spanWith ("", ["label"], []))

rawBlockContent :: Int -> String -> OrgParser String
rawBlockContent indent blockType =
  unlines . map commaEscaped <$> manyTill indentedLine blockEnder
 where
   indentedLine = try $ choice [ blankline         *> pure "\n"
                               , indentWith indent *> anyLine
                               ]
   blockEnder = try $ indentWith indent *> stringAnyCase ("#+end_" <> blockType)

-- indent by specified number of spaces (or equiv. tabs)
indentWith :: Int -> OrgParser String
indentWith num = do
  tabStop <- getOption readerTabStop
  if num < tabStop
     then count num (char ' ')
     else choice [ try (count num (char ' '))
                 , try (char '\t' >> count (num - tabStop) (char ' ')) ]

translateLang :: String -> String
translateLang "C"          = "c"
translateLang "C++"        = "cpp"
translateLang "emacs-lisp" = "commonlisp" -- emacs lisp is not supported
translateLang "js"         = "javascript"
translateLang "lisp"       = "commonlisp"
translateLang "R"          = "r"
translateLang "sh"         = "bash"
translateLang "sqlite"     = "sql"
translateLang cs = cs

commaEscaped :: String -> String
commaEscaped (',':cs@('*':_))     = cs
commaEscaped (',':cs@('#':'+':_)) = cs
commaEscaped cs                   = cs

example :: OrgParser (F Blocks)
example = try $ do
  return . return . exampleCode =<< unlines <$> many1 exampleLine

exampleCode :: String -> Blocks
exampleCode = B.codeBlockWith ("", ["example"], [])

exampleLine :: OrgParser String
exampleLine = try $ string ": " *> anyLine

-- Drawers for properties or a logbook
drawer :: OrgParser (F Blocks)
drawer = try $ do
  drawerStart
  manyTill drawerLine (try drawerEnd)
  return mempty

drawerStart :: OrgParser String
drawerStart = try $
  skipSpaces *> drawerName <* skipSpaces <* newline
 where drawerName = try $  char ':' *> validDrawerName <* char ':'
       validDrawerName =  stringAnyCase "PROPERTIES"
                          <|> stringAnyCase "LOGBOOK"

drawerLine :: OrgParser String
drawerLine = try anyLine

drawerEnd :: OrgParser String
drawerEnd = try $
  skipSpaces *> stringAnyCase ":END:" <* skipSpaces <* newline


--
-- Figures
--

-- Figures (Image on a line by itself, preceded by name and/or caption)
figure :: OrgParser (F Blocks)
figure = try $ do
  (cap, nam) <- nameAndCaption
  src <- skipSpaces *> selfTarget <* skipSpaces <* newline
  guard (isImageFilename src)
  return $ do
    cap' <- cap
    return $ B.para $ B.image src nam cap'
 where
   nameAndCaption =
       do
         maybeCap <- lookupInlinesAttr "caption"
         maybeNam <- lookupBlockAttribute "name"
         guard $ isJust maybeCap || isJust maybeNam
         return ( fromMaybe mempty maybeCap
                , maybe mempty withFigPrefix maybeNam )
   withFigPrefix cs =
       if "fig:" `isPrefixOf` cs
       then cs
       else "fig:" ++ cs

--
-- Comments, Options and Metadata
specialLine :: OrgParser (F Blocks)
specialLine = fmap return . try $ metaLine <|> commentLine

metaLine :: OrgParser Blocks
metaLine = try $ metaLineStart *> declarationLine

commentLine :: OrgParser Blocks
commentLine = try $ commentLineStart *> anyLine *> pure mempty

-- The order, in which blocks are tried, makes sure that we're not looking at
-- the beginning of a block, so we don't need to check for it
metaLineStart :: OrgParser String
metaLineStart = try $ mappend <$> many spaceChar <*> string "#+"

commentLineStart :: OrgParser String
commentLineStart = try $ mappend <$> many spaceChar <*> string "# "

declarationLine :: OrgParser Blocks
declarationLine = try $ do
  key <- metaKey
  inlinesF <- metaInlines
  updateState $ \st ->
    let meta' = B.setMeta <$> pure key <*> inlinesF <*> pure nullMeta
    in st { orgStateMeta' = orgStateMeta' st <> meta' }
  return mempty

metaInlines :: OrgParser (F MetaValue)
metaInlines = fmap (MetaInlines . B.toList) <$> inlinesTillNewline

metaKey :: OrgParser String
metaKey = map toLower <$> many1 (noneOf ": \n\r")
                      <*  char ':'
                      <*  skipSpaces

--
-- Headers
--

-- | Headers
header :: OrgParser (F Blocks)
header = try $ do
  level <- headerStart
  title <- inlinesTillNewline
  return $ B.header level <$> title

headerStart :: OrgParser Int
headerStart = try $
  (length <$> many1 (char '*')) <* many1 (char ' ')


-- Don't use (or need) the reader wrapper here, we want hline to be
-- @show@able.  Otherwise we can't use it with @notFollowedBy'@.

-- | Horizontal Line (five -- dashes or more)
hline :: OrgParser Blocks
hline = try $ do
  skipSpaces
  string "-----"
  many (char '-')
  skipSpaces
  newline
  return B.horizontalRule

--
-- Tables
--

data OrgTableRow = OrgContentRow (F [Blocks])
                 | OrgAlignRow [Alignment]
                 | OrgHlineRow

data OrgTable = OrgTable
  { orgTableColumns    :: Int
  , orgTableAlignments :: [Alignment]
  , orgTableHeader     :: [Blocks]
  , orgTableRows       :: [[Blocks]]
  }

table :: OrgParser (F Blocks)
table = try $ do
  lookAhead tableStart
  do
    rows <- tableRows
    cptn <- fromMaybe (pure "") <$> lookupInlinesAttr "caption"
    return $ (<$> cptn) . orgToPandocTable . normalizeTable =<< rowsToTable rows

orgToPandocTable :: OrgTable
                 -> Inlines
                 -> Blocks
orgToPandocTable (OrgTable _ aligns heads lns) caption =
  B.table caption (zip aligns $ repeat 0) heads lns

tableStart :: OrgParser Char
tableStart = try $ skipSpaces *> char '|'

tableRows :: OrgParser [OrgTableRow]
tableRows = try $ many (tableAlignRow <|> tableHline <|> tableContentRow)

tableContentRow :: OrgParser OrgTableRow
tableContentRow = try $
  OrgContentRow . sequence <$> (tableStart *> manyTill tableContentCell newline)

tableContentCell :: OrgParser (F Blocks)
tableContentCell = try $
  fmap B.plain . trimInlinesF . mconcat <$> many1Till inline endOfCell

endOfCell :: OrgParser Char
endOfCell = try $ char '|' <|> lookAhead newline

tableAlignRow :: OrgParser OrgTableRow
tableAlignRow = try $
  OrgAlignRow <$> (tableStart *> manyTill tableAlignCell newline)

tableAlignCell :: OrgParser Alignment
tableAlignCell =
  choice [ try $ emptyCell *> return AlignDefault
         , try $ skipSpaces
                   *> char '<'
                   *> tableAlignFromChar
                   <* many digit
                   <* char '>'
                   <* emptyCell
         ] <?> "alignment info"
    where emptyCell = try $ skipSpaces *> endOfCell

tableAlignFromChar :: OrgParser Alignment
tableAlignFromChar = try $ choice [ char 'l' *> return AlignLeft
                                  , char 'c' *> return AlignCenter
                                  , char 'r' *> return AlignRight
                                  ]

tableHline :: OrgParser OrgTableRow
tableHline = try $
  OrgHlineRow <$ (tableStart *> char '-' *> anyLine)

rowsToTable :: [OrgTableRow]
            -> F OrgTable
rowsToTable = foldM (flip rowToContent) zeroTable
  where zeroTable = OrgTable 0 mempty mempty mempty

normalizeTable :: OrgTable
               -> OrgTable
normalizeTable (OrgTable cols aligns heads lns) =
  let aligns' = fillColumns aligns AlignDefault
      heads'  = if heads == mempty
                then mempty
                else fillColumns heads (B.plain mempty)
      lns'    = map (`fillColumns` B.plain mempty) lns
      fillColumns base padding = take cols $ base ++ repeat padding
  in OrgTable cols aligns' heads' lns'


-- One or more horizontal rules after the first content line mark the previous
-- line as a header.  All other horizontal lines are discarded.
rowToContent :: OrgTableRow
             -> OrgTable
             -> F OrgTable
rowToContent OrgHlineRow        t = maybeBodyToHeader t
rowToContent (OrgAlignRow as)   t = setLongestRow as =<< setAligns as t
rowToContent (OrgContentRow rf) t = do
  rs <- rf
  setLongestRow rs =<< appendToBody rs t

setLongestRow :: [a]
              -> OrgTable
              -> F OrgTable
setLongestRow rs t =
  return t{ orgTableColumns = max (length rs) (orgTableColumns t) }

maybeBodyToHeader :: OrgTable
                  -> F OrgTable
maybeBodyToHeader t = case t of
  OrgTable{ orgTableHeader = [], orgTableRows = b:[] } ->
         return t{ orgTableHeader = b , orgTableRows = [] }
  _   -> return t

appendToBody :: [Blocks]
             -> OrgTable
             -> F OrgTable
appendToBody r t = return t{ orgTableRows = orgTableRows t ++ [r] }

setAligns :: [Alignment]
          -> OrgTable
          -> F OrgTable
setAligns aligns t = return $ t{ orgTableAlignments = aligns }


--
-- LaTeX fragments
--
latexFragment :: OrgParser (F Blocks)
latexFragment = try $ do
  envName <- latexEnvStart
  content <- mconcat <$> manyTill anyLineNewline (latexEnd envName)
  return . return $ B.rawBlock "latex" (content `inLatexEnv` envName)
 where
   c `inLatexEnv` e = mconcat [ "\\begin{", e, "}\n"
                              , c
                              , "\\end{", e, "}\n"
                              ]

latexEnvStart :: OrgParser String
latexEnvStart = try $ do
  skipSpaces *> string "\\begin{"
             *> latexEnvName
             <* string "}"
             <* blankline

latexEnd :: String -> OrgParser ()
latexEnd envName = try $
  () <$ skipSpaces
     <* string ("\\end{" ++ envName ++ "}")
     <* blankline

-- | Parses a LaTeX environment name.
latexEnvName :: OrgParser String
latexEnvName = try $ do
  mappend <$> many1 alphaNum
          <*> option "" (string "*")


--
-- Footnote defintions
--
noteBlock :: OrgParser (F Blocks)
noteBlock = try $ do
  ref <- noteMarker <* skipSpaces
  content <- mconcat <$> blocksTillHeaderOrNote
  addToNotesTable (ref, content)
  return mempty
 where
   blocksTillHeaderOrNote =
     many1Till block (eof <|> () <$ lookAhead noteMarker
                          <|> () <$ lookAhead headerStart)

-- Paragraphs or Plain text
paraOrPlain :: OrgParser (F Blocks)
paraOrPlain = try $
  parseInlines <**> (fmap <$> option B.plain (try $ newline *> pure B.para))

inlinesTillNewline :: OrgParser (F Inlines)
inlinesTillNewline = trimInlinesF . mconcat <$> manyTill inline newline


--
-- list blocks
--

list :: OrgParser (F Blocks)
list = choice [ definitionList, bulletList, orderedList ] <?> "list"

definitionList :: OrgParser (F Blocks)
definitionList = fmap B.definitionList . fmap compactify'DL . sequence
                 <$> many1 (definitionListItem bulletListStart)

bulletList :: OrgParser (F Blocks)
bulletList = fmap B.bulletList . fmap compactify' . sequence
             <$> many1 (listItem bulletListStart)

orderedList :: OrgParser (F Blocks)
-- orderedList = B.orderedList . compactify' <$> many1 (listItem orderedListStart)
orderedList = fmap B.orderedList . fmap compactify' . sequence
              <$> many1 (listItem orderedListStart)

genericListStart :: OrgParser String
                 -> OrgParser Int
genericListStart listMarker = try $
  (+) <$> (length <$> many spaceChar)
      <*> (length <$> listMarker <* many1 spaceChar)

-- parses bullet list start and returns its length (excl. following whitespace)
bulletListStart :: OrgParser Int
bulletListStart = genericListStart bulletListMarker
  where bulletListMarker = pure <$> oneOf "*-+"

orderedListStart :: OrgParser Int
orderedListStart = genericListStart orderedListMarker
  -- Ordered list markers allowed in org-mode
  where orderedListMarker = mappend <$> many1 digit <*> (pure <$> oneOf ".)")

definitionListItem :: OrgParser Int
                   -> OrgParser (F (Inlines, [Blocks]))
definitionListItem parseMarkerGetLength = try $ do
  markerLength <- parseMarkerGetLength
  term <- manyTill (noneOf "\n\r") (try $ string "::")
  first <- anyLineNewline
  blank <- option "" ("\n" <$ blankline)
  cont <- concat <$> many (listContinuation markerLength)
  term' <- parseFromString inline term
  contents' <- parseFromString parseBlocks $ first ++ blank ++ cont
  return $ (,) <$> term' <*> fmap (:[]) contents'


-- parse raw text for one list item, excluding start marker and continuations
listItem :: OrgParser Int
         -> OrgParser (F Blocks)
listItem start = try $ do
  markerLength <- try start
  firstLine <- anyLineNewline
  blank <- option "" ("\n" <$ blankline)
  rest <- concat <$> many (listContinuation markerLength)
  parseFromString parseBlocks $ firstLine ++ blank ++ rest

-- continuation of a list item - indented and separated by blankline or endline.
-- Note: nested lists are parsed as continuations.
listContinuation :: Int
                 -> OrgParser String
listContinuation markerLength = try $
  notFollowedBy' blankline
  *> (mappend <$> (concat <$> many1 listLine)
              <*> many blankline)
 where listLine = try $ indentWith markerLength *> anyLineNewline

anyLineNewline :: OrgParser String
anyLineNewline = (++ "\n") <$> anyLine


--
-- inline
--

inline :: OrgParser (F Inlines)
inline =
  choice [ whitespace
         , linebreak
         , footnote
         , linkOrImage
         , anchor
         , str
         , endline
         , emph
         , strong
         , strikeout
         , underline
         , code
         , math
         , displayMath
         , verbatim
         , subscript
         , superscript
         , symbol
         ] <* (guard =<< newlinesCountWithinLimits)
  <?> "inline"

parseInlines :: OrgParser (F Inlines)
parseInlines = trimInlinesF . mconcat <$> many1 inline

-- treat these as potentially non-text when parsing inline:
specialChars :: [Char]
specialChars = "\"$'()*+-./:<=>[\\]^_{|}~"


whitespace :: OrgParser (F Inlines)
whitespace = pure B.space <$ skipMany1 spaceChar
                          <* updateLastPreCharPos
                          <* updateLastForbiddenCharPos
             <?> "whitespace"

linebreak :: OrgParser (F Inlines)
linebreak = try $ pure B.linebreak <$ string "\\\\" <* skipSpaces <* newline

str :: OrgParser (F Inlines)
str = return . B.str <$> many1 (noneOf $ specialChars ++ "\n\r ")
      <* updateLastStrPos

-- | An endline character that can be treated as a space, not a structural
-- break.  This should reflect the values of the Emacs variable
-- @org-element-pagaraph-separate@.
endline :: OrgParser (F Inlines)
endline = try $ do
  newline
  notFollowedBy blankline
  notFollowedBy' exampleLine
  notFollowedBy' hline
  notFollowedBy' noteMarker
  notFollowedBy' tableStart
  notFollowedBy' drawerStart
  notFollowedBy' headerStart
  notFollowedBy' metaLineStart
  notFollowedBy' latexEnvStart
  notFollowedBy' commentLineStart
  notFollowedBy' bulletListStart
  notFollowedBy' orderedListStart
  decEmphasisNewlinesCount
  guard =<< newlinesCountWithinLimits
  updateLastPreCharPos
  return . return $ B.space

footnote :: OrgParser (F Inlines)
footnote = try $ inlineNote <|> referencedNote

inlineNote :: OrgParser (F Inlines)
inlineNote = try $ do
  string "[fn:"
  ref <- many alphaNum
  char ':'
  note <- fmap B.para . trimInlinesF . mconcat <$> many1Till inline (char ']')
  when (not $ null ref) $
       addToNotesTable ("fn:" ++ ref, note)
  return $ B.note <$> note

referencedNote :: OrgParser (F Inlines)
referencedNote = try $ do
  ref <- noteMarker
  return $ do
    notes <- asksF orgStateNotes'
    case lookup ref notes of
      Nothing   -> return $ B.str $ "[" ++ ref ++ "]"
      Just contents  -> do
        st <- askF
        let contents' = runF contents st{ orgStateNotes' = [] }
        return $ B.note contents'

noteMarker :: OrgParser String
noteMarker = try $ do
  char '['
  choice [ many1Till digit (char ']')
         , (++) <$> string "fn:"
                <*> many1Till (noneOf "\n\r\t ") (char ']')
         ]

linkOrImage :: OrgParser (F Inlines)
linkOrImage = explicitOrImageLink
              <|> selflinkOrImage
              <|> angleLink
              <|> plainLink
              <?> "link or image"

explicitOrImageLink :: OrgParser (F Inlines)
explicitOrImageLink = try $ do
  char '['
  src    <- linkTarget
  title  <- enclosedRaw (char '[') (char ']')
  title' <- parseFromString (mconcat <$> many inline) title
  char ']'
  return $ if isImageFilename src && isImageFilename title
           then pure $ B.link src "" $ B.image title mempty mempty
           else linkToInlinesF src =<< title'

selflinkOrImage :: OrgParser (F Inlines)
selflinkOrImage = try $ do
  src <- char '[' *> linkTarget <* char ']'
  return $ linkToInlinesF src (B.str src)

plainLink :: OrgParser (F Inlines)
plainLink = try $ do
  (orig, src) <- uri
  returnF $ B.link src "" (B.str orig)

angleLink :: OrgParser (F Inlines)
angleLink = try $ do
  char '<'
  link <- plainLink
  char '>'
  return link

selfTarget :: OrgParser String
selfTarget = try $ char '[' *> linkTarget <* char ']'

linkTarget :: OrgParser String
linkTarget = enclosed (char '[') (char ']') (noneOf "\n\r[]")

linkToInlinesF :: String -> Inlines -> F Inlines
linkToInlinesF s@('#':_) = pure . B.link s ""
linkToInlinesF s
    | isImageFilename s = const . pure $ B.image s "" ""
    | isUri s           = pure . B.link s ""
    | isRelativeUrl s   = pure . B.link s ""
linkToInlinesF s = \title -> do
  anchorB <- (s `elem`) <$> asksF orgStateAnchorIds
  if anchorB
    then pure $ B.link ('#':s) "" title
    else pure $ B.emph title

isRelativeUrl :: String -> Bool
isRelativeUrl s = (':' `notElem` s) && ("./" `isPrefixOf` s)

isUri :: String -> Bool
isUri s = let (scheme, path) = break (== ':') s
          in all (\c -> isAlphaNum c || c `elem` ".-") scheme
             && not (null path)

isImageFilename :: String -> Bool
isImageFilename filename =
  any (\x -> ('.':x)  `isSuffixOf` filename) imageExtensions &&
  (any (\x -> (x++":") `isPrefixOf` filename) protocols ||
   ':' `notElem` filename)
 where
   imageExtensions = [ "jpeg" , "jpg" , "png" , "gif" , "svg" ]
   protocols = [ "file", "http", "https" ]

-- | Parse an anchor like @<<anchor-id>>@ and return an empty span with
-- @anchor-id@ set as id.  Legal anchors in org-mode are defined through
-- @org-target-regexp@, which is fairly liberal.  Since no link is created if
-- @anchor-id@ contains spaces, we are more restrictive in what is accepted as
-- an anchor.

anchor :: OrgParser (F Inlines)
anchor =  try $ do
  anchorId <- parseAnchor
  recordAnchorId anchorId
  returnF $ B.spanWith (solidify anchorId, [], []) mempty
 where
       parseAnchor = string "<<"
                     *> many1 (noneOf "\t\n\r<>\"' ")
                     <* string ">>"
                     <* skipSpaces

-- | Replace every char but [a-zA-Z0-9_.-:] with a hypen '-'.  This mirrors
-- the org function @org-export-solidify-link-text@.

solidify :: String -> String
solidify = map replaceSpecialChar
 where replaceSpecialChar c
           | isAlphaNum c    = c
           | c `elem` "_.-:" = c
           | otherwise       = '-'

emph      :: OrgParser (F Inlines)
emph      = fmap B.emph         <$> emphasisBetween '/'

strong    :: OrgParser (F Inlines)
strong    = fmap B.strong       <$> emphasisBetween '*'

strikeout :: OrgParser (F Inlines)
strikeout = fmap B.strikeout    <$> emphasisBetween '+'

-- There is no underline, so we use strong instead.
underline :: OrgParser (F Inlines)
underline = fmap B.strong       <$> emphasisBetween '_'

code      :: OrgParser (F Inlines)
code      = return . B.code         <$> verbatimBetween '='

verbatim  :: OrgParser (F Inlines)
verbatim  = return . B.rawInline "" <$> verbatimBetween '~'

subscript   :: OrgParser (F Inlines)
subscript   = fmap B.subscript   <$> try (char '_' *> subOrSuperExpr)

superscript :: OrgParser (F Inlines)
superscript = fmap B.superscript <$> try (char '^' *> subOrSuperExpr)

math      :: OrgParser (F Inlines)
math      = return . B.math      <$> choice [ math1CharBetween '$'
                                            , mathStringBetween '$'
                                            , rawMathBetween "\\(" "\\)"
                                            ]

displayMath :: OrgParser (F Inlines)
displayMath = return . B.displayMath <$> choice [ rawMathBetween "\\[" "\\]"
                                                , rawMathBetween "$$"  "$$"
                                                ]
symbol :: OrgParser (F Inlines)
symbol = return . B.str . (: "") <$> (oneOf specialChars >>= updatePositions)
 where updatePositions c
           | c `elem` emphasisPreChars = c <$ updateLastPreCharPos
           | c `elem` emphasisForbiddenBorderChars = c <$ updateLastForbiddenCharPos
           | otherwise = return c

emphasisBetween :: Char
                -> OrgParser (F Inlines)
emphasisBetween c = try $ do
  startEmphasisNewlinesCounting emphasisAllowedNewlines
  res <- enclosedInlines (emphasisStart c) (emphasisEnd c)
  isTopLevelEmphasis <- null . orgStateEmphasisCharStack <$> getState
  when isTopLevelEmphasis
       resetEmphasisNewlines
  return res

verbatimBetween :: Char
                -> OrgParser String
verbatimBetween c = try $
  emphasisStart c *>
  many1TillNOrLessNewlines 1 (noneOf "\n\r") (emphasisEnd c)

-- | Parses a raw string delimited by @c@ using Org's math rules
mathStringBetween :: Char
                  -> OrgParser String
mathStringBetween c = try $ do
  mathStart c
  body <- many1TillNOrLessNewlines mathAllowedNewlines
                                   (noneOf (c:"\n\r"))
                                   (lookAhead $ mathEnd c)
  final <- mathEnd c
  return $ body ++ [final]

-- | Parse a single character between @c@ using math rules
math1CharBetween :: Char
                -> OrgParser String
math1CharBetween c = try $ do
  char c
  res <- noneOf $ c:mathForbiddenBorderChars
  char c
  eof <|> () <$ lookAhead (oneOf mathPostChars)
  return [res]

rawMathBetween :: String
               -> String
               -> OrgParser String
rawMathBetween s e = try $ string s *> manyTill anyChar (try $ string e)

-- | Parses the start (opening character) of emphasis
emphasisStart :: Char -> OrgParser Char
emphasisStart c = try $ do
  guard =<< afterEmphasisPreChar
  guard =<< notAfterString
  char c
  lookAhead (noneOf emphasisForbiddenBorderChars)
  pushToInlineCharStack c
  return c

-- | Parses the closing character of emphasis
emphasisEnd :: Char -> OrgParser Char
emphasisEnd c = try $ do
  guard =<< notAfterForbiddenBorderChar
  char c
  eof <|> () <$ lookAhead acceptablePostChars
  updateLastStrPos
  popInlineCharStack
  return c
 where acceptablePostChars =
           surroundingEmphasisChar >>= \x -> oneOf (x ++ emphasisPostChars)

mathStart :: Char -> OrgParser Char
mathStart c = try $
  char c <* notFollowedBy' (oneOf (c:mathForbiddenBorderChars))

mathEnd :: Char -> OrgParser Char
mathEnd c = try $ do
  res <- noneOf (c:mathForbiddenBorderChars)
  char c
  eof <|> () <$ lookAhead (oneOf mathPostChars)
  return res


enclosedInlines :: OrgParser a
                -> OrgParser b
                -> OrgParser (F Inlines)
enclosedInlines start end = try $
  trimInlinesF . mconcat <$> enclosed start end inline

enclosedRaw :: OrgParser a
            -> OrgParser b
            -> OrgParser String
enclosedRaw start end = try $
  start *> (onSingleLine <|> spanningTwoLines)
 where onSingleLine = try $ many1Till (noneOf "\n\r") end
       spanningTwoLines = try $
         anyLine >>= \f -> mappend (f <> " ") <$> onSingleLine

-- | Like many1Till, but parses at most @n+1@ lines.  @p@ must not consume
--   newlines.
many1TillNOrLessNewlines :: Int
                         -> OrgParser Char
                         -> OrgParser a
                         -> OrgParser String
many1TillNOrLessNewlines n p end = try $
  nMoreLines (Just n) mempty >>= oneOrMore
 where
   nMoreLines Nothing  cs = return cs
   nMoreLines (Just 0) cs = try $ (cs ++) <$> finalLine
   nMoreLines k        cs = try $ (final k cs <|> rest k cs)
                                  >>= uncurry nMoreLines
   final _ cs = (\x -> (Nothing,      cs ++ x)) <$> try finalLine
   rest  m cs = (\x -> (minus1 <$> m, cs ++ x ++ "\n")) <$> try (manyTill p P.newline)
   finalLine = try $ manyTill p end
   minus1 k = k - 1
   oneOrMore cs = guard (not $ null cs) *> return cs

-- Org allows customization of the way it reads emphasis.  We use the defaults
-- here (see, e.g., the Emacs Lisp variable `org-emphasis-regexp-components`
-- for details).

-- | Chars allowed to occur before emphasis (spaces and newlines are ok, too)
emphasisPreChars :: [Char]
emphasisPreChars = "\t \"'({"

-- | Chars allowed at after emphasis
emphasisPostChars :: [Char]
emphasisPostChars = "\t\n !\"'),-.:;?\\}"

-- | Chars not allowed at the (inner) border of emphasis
emphasisForbiddenBorderChars :: [Char]
emphasisForbiddenBorderChars = "\t\n\r \"',"

-- | The maximum number of newlines within
emphasisAllowedNewlines :: Int
emphasisAllowedNewlines = 1

-- LaTeX-style math: see `org-latex-regexps` for details

-- | Chars allowed after an inline ($...$) math statement
mathPostChars :: [Char]
mathPostChars = "\t\n \"'),-.:;?"

-- | Chars not allowed at the (inner) border of math
mathForbiddenBorderChars :: [Char]
mathForbiddenBorderChars = "\t\n\r ,;.$"

-- | Maximum number of newlines in an inline math statement
mathAllowedNewlines :: Int
mathAllowedNewlines = 2

-- | Whether we are right behind a char allowed before emphasis
afterEmphasisPreChar :: OrgParser Bool
afterEmphasisPreChar = do
  pos <- getPosition
  lastPrePos <- orgStateLastPreCharPos <$> getState
  return . fromMaybe True $ (== pos) <$> lastPrePos

-- | Whether we are right after the end of a string
notAfterString :: OrgParser Bool
notAfterString = do
  pos <- getPosition
  lastStrPos <- orgStateLastStrPos <$> getState
  return $ lastStrPos /= Just pos

-- | Whether the parser is right after a forbidden border char
notAfterForbiddenBorderChar :: OrgParser Bool
notAfterForbiddenBorderChar = do
  pos <- getPosition
  lastFBCPos <- orgStateLastForbiddenCharPos <$> getState
  return $ lastFBCPos /= Just pos

-- | Read a sub- or superscript expression
subOrSuperExpr :: OrgParser (F Inlines)
subOrSuperExpr = try $
  choice [ id                   <$> charsInBalanced '{' '}' (noneOf "\n\r")
         , enclosing ('(', ')') <$> charsInBalanced '(' ')' (noneOf "\n\r")
         , simpleSubOrSuperString
         ] >>= parseFromString (mconcat <$> many inline)
 where enclosing (left, right) s = left : s ++ [right]

simpleSubOrSuperString :: OrgParser String
simpleSubOrSuperString = try $
  choice [ string "*"
         , mappend <$> option [] ((:[]) <$> oneOf "+-")
                   <*> many1 alphaNum
         ]