1 files changed, 79 insertions, 23 deletions
diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs
index 980f63504..3be47cfd4 100644
--- a/src/Text/Pandoc/Readers/Org.hs
+++ b/src/Text/Pandoc/Readers/Org.hs
@@ -2,7 +2,7 @@
 {-# LANGUAGE GeneralizedNewtypeDeriving #-}
 {-# LANGUAGE MultiParamTypeClasses, FlexibleContexts, FlexibleInstances #-}
 {-
-Copyright (C) 2014-2015 Albert Krewinkel <tarleb@moltkeplatz.de>
+Copyright (C) 2014-2015 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
 
 This program is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
@@ -21,19 +21,20 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 
 {- |
    Module      : Text.Pandoc.Readers.Org
-   Copyright   : Copyright (C) 2014 Albert Krewinkel
+   Copyright   : Copyright (C) 2014-2015 Albert Krewinkel
    License     : GNU GPL, version 2 or above
 
-   Maintainer  : Albert Krewinkel <tarleb@moltkeplatz.de>
+   Maintainer  : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
 
 Conversion of org-mode formatted plain text to 'Pandoc' document.
 -}
 module Text.Pandoc.Readers.Org ( readOrg ) where
 
 import qualified Text.Pandoc.Builder as B
-import           Text.Pandoc.Builder ( Inlines, Blocks, HasMeta(..), (<>)
-                                     , trimInlines )
+import           Text.Pandoc.Builder ( Inlines, Blocks, HasMeta(..),
+                                       trimInlines )
 import           Text.Pandoc.Definition
+import           Text.Pandoc.Compat.Monoid ((<>))
 import           Text.Pandoc.Options
 import qualified Text.Pandoc.Parsing as P
 import           Text.Pandoc.Parsing hiding ( F, unF, askF, asksF, runF
@@ -45,8 +46,6 @@ import           Text.Pandoc.Shared (compactify', compactify'DL)
 import           Text.TeXMath (readTeX, writePandoc, DisplayType(..))
 import qualified Text.TeXMath.Readers.MathML.EntityMap as MathMLEntityMap
 
-import           Control.Applicative ( Applicative, pure
-                                     , (<$>), (<$), (<*>), (<*), (*>) )
 import           Control.Arrow (first)
 import           Control.Monad (foldM, guard, liftM, liftM2, mplus, mzero, when)
 import           Control.Monad.Reader (Reader, runReader, ask, asks, local)
@@ -55,7 +54,6 @@ import           Data.Default
 import           Data.List (intersperse, isPrefixOf, isSuffixOf)
 import qualified Data.Map as M
 import           Data.Maybe (fromMaybe, isJust)
-import           Data.Monoid (Monoid, mconcat, mempty, mappend)
 import           Network.HTTP (urlEncode)
 
 import           Text.Pandoc.Error
@@ -70,6 +68,14 @@ data OrgParserLocal = OrgParserLocal { orgLocalQuoteContext :: QuoteContext }
 
 type OrgParser = ParserT [Char] OrgParserState (Reader OrgParserLocal)
 
+instance HasIdentifierList OrgParserState where
+  extractIdentifierList = orgStateIdentifiers
+  updateIdentifierList f s = s{ orgStateIdentifiers = f (orgStateIdentifiers s) }
+
+instance HasHeaderMap OrgParserState where
+  extractHeaderMap = orgStateHeaderMap
+  updateHeaderMap  f s = s{ orgStateHeaderMap = f (orgStateHeaderMap s) }
+
 parseOrg :: OrgParser Pandoc
 parseOrg = do
   blocks' <- parseBlocks
@@ -135,6 +141,9 @@ data OrgParserState = OrgParserState
                       , orgStateMeta                 :: Meta
                       , orgStateMeta'                :: F Meta
                       , orgStateNotes'               :: OrgNoteTable
+                      , orgStateParserContext        :: ParserContext
+                      , orgStateIdentifiers          :: [String]
+                      , orgStateHeaderMap            :: M.Map Inlines String
                       }
 
 instance Default OrgParserLocal where
@@ -174,6 +183,9 @@ defaultOrgParserState = OrgParserState
                         , orgStateMeta = nullMeta
                         , orgStateMeta' = return nullMeta
                         , orgStateNotes' = []
+                        , orgStateParserContext = NullState
+                        , orgStateIdentifiers = []
+                        , orgStateHeaderMap = M.empty
                         }
 
 recordAnchorId :: String -> OrgParser ()
@@ -282,6 +294,23 @@ blanklines =
        <* updateLastPreCharPos
        <* updateLastForbiddenCharPos
 
+-- | Succeeds when we're in list context.
+inList :: OrgParser ()
+inList = do
+  ctx <- orgStateParserContext <$> getState
+  guard (ctx == ListItemState)
+
+-- | Parse in different context
+withContext :: ParserContext -- ^ New parser context
+            -> OrgParser a   -- ^ Parser to run in that context
+            -> OrgParser a
+withContext context parser = do
+  oldContext <- orgStateParserContext <$> getState
+  updateState $ \s -> s{ orgStateParserContext = context }
+  result <- parser
+  updateState $ \s -> s{ orgStateParserContext = oldContext }
+  return result
+
 --
 -- parsing blocks
 --
@@ -397,7 +426,7 @@ verseBlock blkProp = try $ do
   ignHeaders
   content <- rawBlockContent blkProp
   fmap B.para . mconcat . intersperse (pure B.linebreak)
-    <$> mapM (parseFromString parseInlines) (lines content)
+    <$> mapM (parseFromString parseInlines) (map (++ "\n") . lines $ content)
 
 exportsCode :: [(String, String)] -> Bool
 exportsCode attrs = not (("rundoc-exports", "none") `elem` attrs
@@ -504,10 +533,16 @@ rundocBlockClass :: String
 rundocBlockClass = rundocPrefix ++ "block"
 
 blockOption :: OrgParser (String, String)
-blockOption = try $ (,) <$> orgArgKey <*> orgParamValue
+blockOption = try $ do
+  argKey <- orgArgKey
+  paramValue <- option "yes" orgParamValue
+  return (argKey, paramValue)
 
 inlineBlockOption :: OrgParser (String, String)
-inlineBlockOption = try $ (,) <$> orgArgKey <*> orgInlineParamValue
+inlineBlockOption = try $ do
+  argKey <- orgArgKey
+  paramValue <- option "yes" orgInlineParamValue
+  return (argKey, paramValue)
 
 orgArgKey :: OrgParser String
 orgArgKey = try $
@@ -516,11 +551,17 @@ orgArgKey = try $
 
 orgParamValue :: OrgParser String
 orgParamValue = try $
-  skipSpaces *> many1 (noneOf "\t\n\r ") <* skipSpaces
+  skipSpaces
+    *> notFollowedBy (char ':' )
+    *> many1 (noneOf "\t\n\r ")
+    <* skipSpaces
 
 orgInlineParamValue :: OrgParser String
 orgInlineParamValue = try $
-  skipSpaces *> many1 (noneOf "\t\n\r ]") <* skipSpaces
+  skipSpaces
+    *> notFollowedBy (char ':')
+    *> many1 (noneOf "\t\n\r ]")
+    <* skipSpaces
 
 orgArgWordChar :: OrgParser Char
 orgArgWordChar = alphaNum <|> oneOf "-_"
@@ -668,7 +709,10 @@ header = try $ do
   title <- manyTill inline (lookAhead headerEnd)
   tags <- headerEnd
   let inlns = trimInlinesF . mconcat $ title <> map tagToInlineF tags
-  return $ B.header level <$> inlns
+  st <- getState
+  let inlines = runF inlns st
+  attr <- registerHeader nullAttr inlines
+  return $ pure (B.headerWith attr level inlines)
  where
    tagToInlineF :: String -> F Inlines
    tagToInlineF t = return $ B.spanWith ("", ["tag"], [("data-tag-name", t)]) mempty
@@ -687,7 +731,7 @@ headerTags = try $
 
 headerStart :: OrgParser Int
 headerStart = try $
-  (length <$> many1 (char '*')) <* many1 (char ' ')
+  (length <$> many1 (char '*')) <* many1 (char ' ') <* updateLastPreCharPos
 
 
 -- Don't use (or need) the reader wrapper here, we want hline to be
@@ -879,9 +923,13 @@ noteBlock = try $ do
 paraOrPlain :: OrgParser (F Blocks)
 paraOrPlain = try $ do
   ils <- parseInlines
-  nl <- option False (newline >> return True)
-  try (guard nl >> notFollowedBy (orderedListStart <|> bulletListStart) >>
-           return (B.para <$> ils))
+  nl <- option False (newline *> return True)
+  -- Read block as paragraph, except if we are in a list context and the block
+  -- is directly followed by a list item, in which case the block is read as
+  -- plain text.
+  try (guard nl
+       *> notFollowedBy (inList *> (orderedListStart <|> bulletListStart))
+       *> return (B.para <$> ils))
     <|>  (return (B.plain <$> ils))
 
 inlinesTillNewline :: OrgParser (F Inlines)
@@ -946,19 +994,22 @@ definitionListItem :: OrgParser Int
                    -> OrgParser (F (Inlines, [Blocks]))
 definitionListItem parseMarkerGetLength = try $ do
   markerLength <- parseMarkerGetLength
-  term <- manyTill (noneOf "\n\r") (try $ string "::")
+  term <- manyTill (noneOf "\n\r") (try definitionMarker)
   line1 <- anyLineNewline
   blank <- option "" ("\n" <$ blankline)
   cont <- concat <$> many (listContinuation markerLength)
   term' <- parseFromString parseInlines term
   contents' <- parseFromString parseBlocks $ line1 ++ blank ++ cont
   return $ (,) <$> term' <*> fmap (:[]) contents'
+ where
+   definitionMarker =
+     spaceChar *> string "::" <* (spaceChar <|> lookAhead P.newline)
 
 
 -- parse raw text for one list item, excluding start marker and continuations
 listItem :: OrgParser Int
          -> OrgParser (F Blocks)
-listItem start = try $ do
+listItem start = try . withContext ListItemState $ do
   markerLength <- try start
   firstLine <- anyLineNewline
   blank <- option "" ("\n" <$ blankline)
@@ -1537,8 +1588,11 @@ smart :: OrgParser (F Inlines)
 smart = do
   getOption readerSmart >>= guard
   doubleQuoted <|> singleQuoted <|>
-    choice (map (return <$>) [orgApostrophe, dash, ellipses])
-  where orgApostrophe =
+    choice (map (return <$>) [orgApostrophe, orgDash, orgEllipses])
+  where
+    orgDash = dash <* updatePositions '-'
+    orgEllipses = ellipses <* updatePositions '.'
+    orgApostrophe =
           (char '\'' <|> char '\8217') <* updateLastPreCharPos
                                        <* updateLastForbiddenCharPos
                                        *> return (B.str "\x2019")
@@ -1546,9 +1600,10 @@ smart = do
 singleQuoted :: OrgParser (F Inlines)
 singleQuoted = try $ do
   singleQuoteStart
+  updatePositions '\''
   withQuoteContext InSingleQuote $
     fmap B.singleQuoted . trimInlinesF . mconcat <$>
-      many1Till inline singleQuoteEnd
+      many1Till inline (singleQuoteEnd <* updatePositions '\'')
 
 -- doubleQuoted will handle regular double-quoted sections, as well
 -- as dialogues with an open double-quote without a close double-quote
@@ -1556,6 +1611,7 @@ singleQuoted = try $ do
 doubleQuoted :: OrgParser (F Inlines)
 doubleQuoted = try $ do
   doubleQuoteStart
+  updatePositions '"'
   contents <- mconcat <$> many (try $ notFollowedBy doubleQuoteEnd >> inline)
   (withQuoteContext InDoubleQuote $ (doubleQuoteEnd <* updateLastForbiddenCharPos) >> return
        (fmap B.doubleQuoted . trimInlinesF $ contents))