1 files changed, 577 insertions, 0 deletions
diff --git a/src/Text/Pandoc/Readers/Muse.hs b/src/Text/Pandoc/Readers/Muse.hs
new file mode 100644
index 000000000..bc9da26cb
--- /dev/null
+++ b/src/Text/Pandoc/Readers/Muse.hs
@@ -0,0 +1,577 @@
+{-
+  Copyright (C) 2017 Alexander Krotov <ilabdsf@gmail.com>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+-}
+
+{- |
+   Module      : Text.Pandoc.Readers.Muse
+   Copyright   : Copyright (C) 2017 Alexander Krotov
+   License     : GNU GPL, version 2 or above
+
+   Maintainer  : Alexander Krotov <ilabdsf@gmail.com>
+   Stability   : alpha
+   Portability : portable
+
+Conversion of Muse text to 'Pandoc' document.
+-}
+{-
+TODO:
+- {{{ }}} syntax for <example>
+- Page breaks (five "*")
+- Headings with anchors (make it round trip with Muse writer)
+- <verse> and ">"
+- Definition lists
+- Org tables
+- table.el tables
+- Images with attributes (floating and width)
+- Anchors
+- Citations and <biblio>
+- <play> environment
+- <verbatim> tag
+-}
+module Text.Pandoc.Readers.Muse (readMuse) where
+
+import Control.Monad
+import Control.Monad.Except (throwError)
+import qualified Data.Map as M
+import Data.Text (Text, unpack)
+import Data.List (stripPrefix)
+import Data.Maybe (fromMaybe)
+import Text.HTML.TagSoup
+import Text.Pandoc.Builder (Blocks, Inlines)
+import qualified Text.Pandoc.Builder as B
+import Text.Pandoc.Class (PandocMonad, report)
+import Text.Pandoc.Definition
+import Text.Pandoc.Logging
+import Text.Pandoc.Options
+import Text.Pandoc.Parsing hiding (macro, nested)
+import Text.Pandoc.Readers.HTML (htmlTag)
+import Text.Pandoc.XML (fromEntities)
+import System.FilePath (takeExtension)
+
+-- | Read Muse from an input string and return a Pandoc document.
+readMuse :: PandocMonad m
+         => ReaderOptions
+         -> Text
+         -> m Pandoc
+readMuse opts s = do
+  res <- readWithM parseMuse def{ stateOptions = opts } (unpack s)
+  case res of
+       Left e  -> throwError e
+       Right d -> return d
+
+type MuseParser = ParserT String ParserState
+
+--
+-- main parser
+--
+
+parseMuse :: PandocMonad m => MuseParser m Pandoc
+parseMuse = do
+  many directive
+  blocks <- parseBlocks
+  st <- getState
+  let doc = runF (do Pandoc _ bs <- B.doc <$> blocks
+                     meta <- stateMeta' st
+                     return $ Pandoc meta bs) st
+  reportLogMessages
+  return doc
+
+parseBlocks :: PandocMonad m => MuseParser m (F Blocks)
+parseBlocks = do
+  res <- mconcat <$> many block
+  spaces
+  eof
+  return res
+
+--
+-- utility functions
+--
+
+nested :: PandocMonad m => MuseParser m a -> MuseParser m a
+nested p = do
+  nestlevel <- stateMaxNestingLevel <$>  getState
+  guard $ nestlevel > 0
+  updateState $ \st -> st{ stateMaxNestingLevel = stateMaxNestingLevel st - 1 }
+  res <- p
+  updateState $ \st -> st{ stateMaxNestingLevel = nestlevel }
+  return res
+
+htmlElement :: PandocMonad m => String -> MuseParser m (Attr, String)
+htmlElement tag = try $ do
+  (TagOpen _ attr, _) <- htmlTag (~== TagOpen tag [])
+  content <- manyTill anyChar (endtag <|> endofinput)
+  return (htmlAttrToPandoc attr, trim content)
+  where
+    endtag     = void $ htmlTag (~== TagClose tag)
+    endofinput = lookAhead $ try $ skipMany blankline >> skipSpaces >> eof
+    trim       = dropWhile (=='\n') . reverse . dropWhile (=='\n') . reverse
+
+htmlAttrToPandoc :: [Attribute String] -> Attr
+htmlAttrToPandoc attrs = (ident, classes, keyvals)
+  where
+    ident   = fromMaybe "" $ lookup "id" attrs
+    classes = maybe [] words $ lookup "class" attrs
+    keyvals = [(k,v) | (k,v) <- attrs, k /= "id" && k /= "class"]
+
+parseHtmlContentWithAttrs :: PandocMonad m
+                          => String -> MuseParser m a -> MuseParser m (Attr, [a])
+parseHtmlContentWithAttrs tag parser = do
+  (attr, content) <- htmlElement tag
+  parsedContent <- try $ parseContent content
+  return (attr, parsedContent)
+  where
+    parseContent = parseFromString $ nested $ manyTill parser endOfContent
+    endOfContent = try $ skipMany blankline >> skipSpaces >> eof
+
+parseHtmlContent :: PandocMonad m => String -> MuseParser m a -> MuseParser m [a]
+parseHtmlContent tag p = liftM snd (parseHtmlContentWithAttrs tag p)
+
+--
+-- directive parsers
+--
+
+parseDirective :: PandocMonad m => MuseParser m (String, F Inlines)
+parseDirective = do
+  char '#'
+  key <- many letter
+  space
+  spaces
+  raw <- many $ noneOf "\n"
+  newline
+  value <- parseFromString (trimInlinesF . mconcat <$> many inline) raw
+  return (key, value)
+
+directive :: PandocMonad m => MuseParser m ()
+directive = do
+  (key, value) <- parseDirective
+  updateState $ \st -> st { stateMeta' = B.setMeta key <$> value <*> stateMeta' st }
+
+--
+-- block parsers
+--
+
+block :: PandocMonad m => MuseParser m (F Blocks)
+block = do
+  pos <- getPosition
+  res <- mempty <$ skipMany1 blankline
+         <|> blockElements
+         <|> para
+  skipMany blankline
+  report $ ParsingTrace (take 60 $ show $ B.toList $ runF res defaultParserState) pos
+  return res
+
+blockElements :: PandocMonad m => MuseParser m (F Blocks)
+blockElements = choice [ comment
+                       , separator
+                       , header
+                       , exampleTag
+                       , literal
+                       , centerTag
+                       , rightTag
+                       , quoteTag
+                       , bulletList
+                       , orderedList
+                       , table
+                       , commentTag
+                       , noteBlock
+                       ]
+
+comment :: PandocMonad m => MuseParser m (F Blocks)
+comment = try $ do
+  char ';'
+  space
+  many $ noneOf "\n"
+  void newline <|> eof
+  return mempty
+
+separator :: PandocMonad m => MuseParser m (F Blocks)
+separator = try $ do
+  string "---"
+  newline
+  return $ return B.horizontalRule
+
+header :: PandocMonad m => MuseParser m (F Blocks)
+header = try $ do
+  level <- liftM length $ many1 $ char '*'
+  guard $ level <= 5
+  skipSpaces
+  content <- trimInlinesF . mconcat <$> manyTill inline newline
+  attr <- registerHeader ("", [], []) (runF content defaultParserState)
+  return $ B.headerWith attr level <$> content
+
+exampleTag :: PandocMonad m => MuseParser m (F Blocks)
+exampleTag = liftM (return . uncurry B.codeBlockWith) $ htmlElement "example"
+
+literal :: PandocMonad m => MuseParser m (F Blocks)
+literal = liftM (return . rawBlock) $ htmlElement "literal"
+  where
+    format (_, _, kvs)        = fromMaybe "html" $ lookup "format" kvs
+    rawBlock (attrs, content) = B.rawBlock (format attrs) content
+
+blockTag :: PandocMonad m
+          => (Blocks -> Blocks)
+          -> String
+          -> MuseParser m (F Blocks)
+blockTag f s = do
+  res <- parseHtmlContent s block
+  return $ f <$> mconcat res
+
+-- <center> tag is ignored
+centerTag :: PandocMonad m => MuseParser m (F Blocks)
+centerTag = blockTag id "center"
+
+-- <right> tag is ignored
+rightTag :: PandocMonad m => MuseParser m (F Blocks)
+rightTag = blockTag id "right"
+
+quoteTag :: PandocMonad m => MuseParser m (F Blocks)
+quoteTag = blockTag B.blockQuote "quote"
+
+commentTag :: PandocMonad m => MuseParser m (F Blocks)
+commentTag = parseHtmlContent "comment" block >> return mempty
+
+para :: PandocMonad m => MuseParser m (F Blocks)
+para = do
+ res <- trimInlinesF . mconcat <$> many1Till inline endOfParaElement
+ return $ B.para <$> res
+ where
+   endOfParaElement = lookAhead $ endOfInput <|> endOfPara <|> newBlockElement
+   endOfInput       = try $ skipMany blankline >> skipSpaces >> eof
+   endOfPara        = try $ blankline >> skipMany1 blankline
+   newBlockElement  = try $ blankline >> void blockElements
+
+noteMarker :: PandocMonad m => MuseParser m String
+noteMarker = try $ do
+  char '['
+  many1Till digit $ char ']'
+
+noteBlock :: PandocMonad m => MuseParser m (F Blocks)
+noteBlock = try $ do
+  pos <- getPosition
+  ref <- noteMarker <* skipSpaces
+  content <- mconcat <$> blocksTillNote
+  oldnotes <- stateNotes' <$> getState
+  case M.lookup ref oldnotes of
+    Just _ -> logMessage $ DuplicateNoteReference ref pos
+    Nothing -> return ()
+  updateState $ \s -> s{ stateNotes' = M.insert ref (pos, content) oldnotes }
+  return mempty
+  where
+    blocksTillNote =
+      many1Till block (eof <|> () <$ lookAhead noteMarker)
+
+--
+-- lists
+--
+
+listLine :: PandocMonad m => Int -> MuseParser m String
+listLine markerLength = try $ do
+  notFollowedBy blankline
+  indentWith markerLength
+  anyLineNewline
+
+withListContext :: PandocMonad m => MuseParser m a -> MuseParser m a
+withListContext p = do
+  state <- getState
+  let oldContext = stateParserContext state
+  setState $ state { stateParserContext = ListItemState }
+  parsed <- p
+  updateState (\st -> st {stateParserContext = oldContext})
+  return parsed
+
+listContinuation :: PandocMonad m => Int -> MuseParser m String
+listContinuation markerLength = try $ do
+  result <- many1 $ listLine markerLength
+  blanks <- many1 blankline
+  return $ concat result ++ blanks
+
+listStart :: PandocMonad m => MuseParser m Int -> MuseParser m Int
+listStart marker = try $ do
+  preWhitespace <- length <$> many spaceChar
+  st <- stateParserContext <$> getState
+  getPosition >>= \pos -> guard (st == ListItemState || sourceColumn pos /= 1)
+  markerLength <- marker
+  postWhitespace <- length <$> many1 spaceChar
+  return $ preWhitespace + markerLength + postWhitespace
+
+listItem :: PandocMonad m => MuseParser m Int -> MuseParser m (F Blocks)
+listItem start = try $ do
+  markerLength <- start
+  firstLine <- anyLineNewline
+  blank <- option "" ("\n" <$ blankline)
+  restLines <- many $ listLine markerLength
+  let first = firstLine ++ blank ++ concat restLines
+  rest <- many $ listContinuation markerLength
+  parseFromString (withListContext parseBlocks) $ concat (first:rest) ++ "\n"
+
+bulletListItems :: PandocMonad m => MuseParser m (F [Blocks])
+bulletListItems = sequence <$> many1 (listItem bulletListStart)
+
+bulletListStart :: PandocMonad m => MuseParser m Int
+bulletListStart = listStart (char '-' >> return 1)
+
+bulletList :: PandocMonad m => MuseParser m (F Blocks)
+bulletList = do
+  listItems <- bulletListItems
+  return $ B.bulletList <$> listItems
+
+orderedListStart :: PandocMonad m
+                 => ListNumberStyle
+                 -> ListNumberDelim
+                 -> MuseParser m Int
+orderedListStart style delim = listStart (snd <$> withHorizDisplacement (orderedListMarker style delim))
+
+orderedList :: PandocMonad m => MuseParser m (F Blocks)
+orderedList = try $ do
+  p@(_, style, delim) <- lookAhead (many spaceChar *> anyOrderedListMarker <* spaceChar)
+  guard $ style `elem` [Decimal, LowerAlpha, UpperAlpha, LowerRoman, UpperRoman]
+  guard $ delim == Period
+  items <- sequence <$> many1 (listItem $ orderedListStart style delim)
+  return $ B.orderedListWith p <$> items
+
+--
+-- tables
+--
+
+data MuseTable = MuseTable
+  { museTableCaption :: Inlines
+  , museTableHeaders :: [[Blocks]]
+  , museTableRows :: [[Blocks]]
+  , museTableFooters :: [[Blocks]]
+  }
+
+data MuseTableElement = MuseHeaderRow (F [Blocks])
+                      | MuseBodyRow   (F [Blocks])
+                      | MuseFooterRow (F [Blocks])
+                      | MuseCaption (F Inlines)
+
+museToPandocTable :: MuseTable -> Blocks
+museToPandocTable (MuseTable caption headers body footers) =
+  B.table caption attrs headRow rows
+  where ncol = maximum (0 : map length (headers ++ body ++ footers))
+        attrs = replicate ncol (AlignDefault, 0.0)
+        headRow = if null headers then [] else head headers
+        rows = (if null headers then [] else tail headers) ++ body ++ footers
+
+museAppendElement :: MuseTable
+                  -> MuseTableElement
+                  -> F MuseTable
+museAppendElement tbl element =
+  case element of
+    MuseHeaderRow row -> do
+      row' <- row
+      return tbl{ museTableHeaders = museTableHeaders tbl ++ [row'] }
+    MuseBodyRow row -> do
+      row' <- row
+      return tbl{ museTableRows = museTableRows tbl ++ [row'] }
+    MuseFooterRow row-> do
+      row' <- row
+      return tbl{ museTableFooters = museTableFooters tbl ++ [row'] }
+    MuseCaption inlines -> do
+      inlines' <- inlines
+      return tbl{ museTableCaption = inlines' }
+
+tableCell :: PandocMonad m => MuseParser m (F Blocks)
+tableCell = try $ do
+  content <- trimInlinesF . mconcat <$> manyTill inline (lookAhead cellEnd)
+  return $ B.plain <$> content
+  where cellEnd = try $ void (many1 spaceChar >> char '|') <|> void newline <|> eof
+
+tableElements :: PandocMonad m => MuseParser m [MuseTableElement]
+tableElements = tableParseElement `sepEndBy1` (void newline <|> eof)
+
+elementsToTable :: [MuseTableElement] -> F MuseTable
+elementsToTable = foldM museAppendElement emptyTable
+  where emptyTable = MuseTable mempty mempty mempty mempty
+
+table :: PandocMonad m => MuseParser m (F Blocks)
+table = try $ do
+  rows <- tableElements
+  let tbl = elementsToTable rows
+  let pandocTbl = museToPandocTable <$> tbl :: F Blocks
+  return pandocTbl
+
+tableParseElement :: PandocMonad m => MuseParser m MuseTableElement
+tableParseElement = tableParseHeader
+                <|> tableParseBody
+                <|> tableParseFooter
+                <|> tableParseCaption
+
+tableParseRow :: PandocMonad m => Int -> MuseParser m (F [Blocks])
+tableParseRow n = try $ do
+  fields <- tableCell `sepBy2` fieldSep
+  return $ sequence fields
+    where p `sepBy2` sep = (:) <$> p <*> many1 (sep >> p)
+          fieldSep = many1 spaceChar >> count n (char '|') >> (void (many1 spaceChar) <|> void (lookAhead newline))
+
+tableParseHeader :: PandocMonad m => MuseParser m MuseTableElement
+tableParseHeader = MuseHeaderRow <$> tableParseRow 2
+
+tableParseBody :: PandocMonad m => MuseParser m MuseTableElement
+tableParseBody = MuseBodyRow <$> tableParseRow 1
+
+tableParseFooter :: PandocMonad m => MuseParser m MuseTableElement
+tableParseFooter = MuseFooterRow <$> tableParseRow 3
+
+tableParseCaption :: PandocMonad m => MuseParser m MuseTableElement
+tableParseCaption = try $ do
+  many spaceChar
+  string "|+"
+  contents <- trimInlinesF . mconcat <$> many1Till inline (lookAhead $ string "+|")
+  string "+|"
+  return $ MuseCaption contents
+
+--
+-- inline parsers
+--
+
+inline :: PandocMonad m => MuseParser m (F Inlines)
+inline = choice [ whitespace
+                , br
+                , footnote
+                , strong
+                , strongTag
+                , emph
+                , emphTag
+                , superscriptTag
+                , subscriptTag
+                , strikeoutTag
+                , link
+                , code
+                , codeTag
+                , str
+                , symbol
+                ] <?> "inline"
+
+footnote :: PandocMonad m => MuseParser m (F Inlines)
+footnote = try $ do
+  ref <- noteMarker
+  return $ do
+    notes <- asksF stateNotes'
+    case M.lookup ref notes of
+      Nothing -> return $ B.str $ "[" ++ ref ++ "]"
+      Just (_pos, contents) -> do
+        st <- askF
+        let contents' = runF contents st { stateNotes' = M.empty }
+        return $ B.note contents'
+
+whitespace :: PandocMonad m => MuseParser m (F Inlines)
+whitespace = liftM return (lb <|> regsp)
+  where lb = try $ skipMany spaceChar >> linebreak >> return B.space
+        regsp = try $ skipMany1 spaceChar >> return B.space
+
+br :: PandocMonad m => MuseParser m (F Inlines)
+br = try $ do
+  string "<br>"
+  return $ return B.linebreak
+
+linebreak :: PandocMonad m => MuseParser m (F Inlines)
+linebreak = newline >> notFollowedBy newline >> (lastNewline <|> innerNewline)
+  where lastNewline  = do
+                         eof
+                         return $ return mempty
+        innerNewline = return $ return B.space
+
+emphasisBetween :: (PandocMonad m, Show a) => MuseParser m a -> MuseParser m (F Inlines)
+emphasisBetween c = try $ enclosedInlines c c
+
+enclosedInlines :: (PandocMonad m, Show a, Show b)
+                => MuseParser m a
+                -> MuseParser m b
+                -> MuseParser m (F Inlines)
+enclosedInlines start end = try $
+  trimInlinesF . mconcat <$> enclosed start end inline
+
+verbatimBetween :: PandocMonad m
+                => Char
+                -> MuseParser m String
+verbatimBetween c = try $ do
+  char c
+  many1Till anyChar $ char c
+
+inlineTag :: PandocMonad m
+          => (Inlines -> Inlines)
+          -> String
+          -> MuseParser m (F Inlines)
+inlineTag f s = do
+  res <- parseHtmlContent s inline
+  return $ f <$> mconcat res
+
+strongTag :: PandocMonad m => MuseParser m (F Inlines)
+strongTag = inlineTag B.strong "strong"
+
+strong :: PandocMonad m => MuseParser m (F Inlines)
+strong = fmap B.strong <$> emphasisBetween (string "**")
+
+emph :: PandocMonad m => MuseParser m (F Inlines)
+emph = fmap B.emph <$> emphasisBetween (char '*')
+
+emphTag :: PandocMonad m => MuseParser m (F Inlines)
+emphTag = inlineTag B.emph "em"
+
+superscriptTag :: PandocMonad m => MuseParser m (F Inlines)
+superscriptTag = inlineTag B.superscript "sup"
+
+subscriptTag :: PandocMonad m => MuseParser m (F Inlines)
+subscriptTag = inlineTag B.subscript "sub"
+
+strikeoutTag :: PandocMonad m => MuseParser m (F Inlines)
+strikeoutTag = inlineTag B.strikeout "del"
+
+code :: PandocMonad m => MuseParser m (F Inlines)
+code = return . B.code <$> verbatimBetween '='
+
+codeTag :: PandocMonad m => MuseParser m (F Inlines)
+codeTag = do
+  (attrs, content) <- parseHtmlContentWithAttrs "code" anyChar
+  return $ return $ B.codeWith attrs $ fromEntities content
+
+str :: PandocMonad m => MuseParser m (F Inlines)
+str = liftM (return . B.str) (many1 alphaNum <|> count 1 characterReference)
+
+symbol :: PandocMonad m => MuseParser m (F Inlines)
+symbol = liftM (return . B.str) $ count 1 nonspaceChar
+
+link :: PandocMonad m => MuseParser m (F Inlines)
+link = try $ do
+  st <- getState
+  guard $ stateAllowLinks st
+  setState $ st{ stateAllowLinks = False }
+  (url, title, content) <- linkText
+  setState $ st{ stateAllowLinks = True }
+  return $ case stripPrefix "URL:" url of
+             Nothing -> if isImageUrl url
+                          then B.image url title <$> fromMaybe (return mempty) content
+                          else B.link url title <$> fromMaybe (return $ B.str url) content
+             Just url' -> B.link url' title <$> fromMaybe (return $ B.str url') content
+    where -- Taken from muse-image-regexp defined in Emacs Muse file lisp/muse-regexps.el
+          imageExtensions = [".eps", ".gif", ".jpg", ".jpeg", ".pbm", ".png", ".tiff", ".xbm", ".xpm"]
+          isImageUrl = (`elem` imageExtensions) . takeExtension
+
+linkContent :: PandocMonad m => MuseParser m (F Inlines)
+linkContent = do
+  char '['
+  res <- many1Till anyChar $ char ']'
+  parseFromString (mconcat <$> many1 inline) res
+
+linkText :: PandocMonad m => MuseParser m (String, String, Maybe (F Inlines))
+linkText = do
+  string "[["
+  url <- many1Till anyChar $ char ']'
+  content <- optionMaybe linkContent
+  char ']'
+  return (url, "", content)