diff options
Diffstat (limited to 'src/Text/Pandoc/Readers/Muse.hs')
-rw-r--r-- | src/Text/Pandoc/Readers/Muse.hs | 924 |
1 files changed, 924 insertions, 0 deletions
diff --git a/src/Text/Pandoc/Readers/Muse.hs b/src/Text/Pandoc/Readers/Muse.hs new file mode 100644 index 000000000..4a9523e84 --- /dev/null +++ b/src/Text/Pandoc/Readers/Muse.hs @@ -0,0 +1,924 @@ +{-# LANGUAGE FlexibleContexts #-} +{- + Copyright (C) 2017-2018 Alexander Krotov <ilabdsf@gmail.com> + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Readers.Muse + Copyright : Copyright (C) 2017-2018 Alexander Krotov + License : GNU GPL, version 2 or above + + Maintainer : Alexander Krotov <ilabdsf@gmail.com> + Stability : alpha + Portability : portable + +Conversion of Muse text to 'Pandoc' document. +-} +{- +TODO: +- Page breaks (five "*") +- Org tables +- table.el tables +- Images with attributes (floating and width) +- Citations and <biblio> +- <play> environment +-} +module Text.Pandoc.Readers.Muse (readMuse) where + +import Control.Monad +import Control.Monad.Except (throwError) +import Data.Char (isLetter) +import Data.Default +import Data.List (stripPrefix, intercalate) +import Data.List.Split (splitOn) +import qualified Data.Map as M +import qualified Data.Set as Set +import Data.Maybe (fromMaybe, isNothing) +import Data.Text (Text, unpack) +import System.FilePath (takeExtension) +import Text.HTML.TagSoup +import Text.Pandoc.Builder (Blocks, Inlines) +import qualified Text.Pandoc.Builder as B +import Text.Pandoc.Class (PandocMonad (..)) +import Text.Pandoc.Definition +import Text.Pandoc.Logging +import Text.Pandoc.Options +import Text.Pandoc.Parsing hiding (F) +import Text.Pandoc.Readers.HTML (htmlTag) +import Text.Pandoc.Shared (crFilter, underlineSpan) + +-- | Read Muse from an input string and return a Pandoc document. +readMuse :: PandocMonad m + => ReaderOptions + -> Text + -> m Pandoc +readMuse opts s = do + res <- readWithM parseMuse def{ museOptions = opts } (unpack (crFilter s)) + case res of + Left e -> throwError e + Right d -> return d + +type F = Future MuseState + +data MuseState = MuseState { museMeta :: F Meta -- ^ Document metadata + , museOptions :: ReaderOptions + , museHeaders :: M.Map Inlines String -- ^ List of headers and ids (used for implicit ref links) + , museIdentifierList :: Set.Set String + , museLastStrPos :: Maybe SourcePos -- ^ Position after last str parsed + , museLogMessages :: [LogMessage] + , museNotes :: M.Map String (SourcePos, F Blocks) + , museInLink :: Bool + , museInPara :: Bool + } + +instance Default MuseState where + def = defaultMuseState + +defaultMuseState :: MuseState +defaultMuseState = MuseState { museMeta = return nullMeta + , museOptions = def + , museHeaders = M.empty + , museIdentifierList = Set.empty + , museLastStrPos = Nothing + , museLogMessages = [] + , museNotes = M.empty + , museInLink = False + , museInPara = False + } + +type MuseParser = ParserT String MuseState + +instance HasReaderOptions MuseState where + extractReaderOptions = museOptions + +instance HasHeaderMap MuseState where + extractHeaderMap = museHeaders + updateHeaderMap f st = st{ museHeaders = f $ museHeaders st } + +instance HasIdentifierList MuseState where + extractIdentifierList = museIdentifierList + updateIdentifierList f st = st{ museIdentifierList = f $ museIdentifierList st } + +instance HasLastStrPosition MuseState where + setLastStrPos pos st = st{ museLastStrPos = Just pos } + getLastStrPos st = museLastStrPos st + +instance HasLogMessages MuseState where + addLogMessage m s = s{ museLogMessages = m : museLogMessages s } + getLogMessages = reverse . museLogMessages + +-- +-- main parser +-- + +parseMuse :: PandocMonad m => MuseParser m Pandoc +parseMuse = do + many directive + blocks <- parseBlocks + st <- getState + let doc = runF (do Pandoc _ bs <- B.doc <$> blocks + meta <- museMeta st + return $ Pandoc meta bs) st + reportLogMessages + return doc + +-- +-- utility functions +-- + +eol :: Stream s m Char => ParserT s st m () +eol = void newline <|> eof + +htmlElement :: PandocMonad m => String -> MuseParser m (Attr, String) +htmlElement tag = try $ do + (TagOpen _ attr, _) <- htmlTag (~== TagOpen tag []) + content <- manyTill anyChar endtag + return (htmlAttrToPandoc attr, content) + where + endtag = void $ htmlTag (~== TagClose tag) + +htmlBlock :: PandocMonad m => String -> MuseParser m (Attr, String) +htmlBlock tag = try $ do + many spaceChar + res <- htmlElement tag + manyTill spaceChar eol + return res + +htmlAttrToPandoc :: [Attribute String] -> Attr +htmlAttrToPandoc attrs = (ident, classes, keyvals) + where + ident = fromMaybe "" $ lookup "id" attrs + classes = maybe [] words $ lookup "class" attrs + keyvals = [(k,v) | (k,v) <- attrs, k /= "id" && k /= "class"] + +parseHtmlContent :: PandocMonad m + => String -> MuseParser m (Attr, F Blocks) +parseHtmlContent tag = try $ do + many spaceChar + (TagOpen _ attr, _) <- htmlTag (~== TagOpen tag []) + manyTill spaceChar eol + content <- parseBlocksTill (try $ manyTill spaceChar endtag) + manyTill spaceChar eol -- closing tag must be followed by optional whitespace and newline + return (htmlAttrToPandoc attr, content) + where + endtag = void $ htmlTag (~== TagClose tag) + +commonPrefix :: String -> String -> String +commonPrefix _ [] = [] +commonPrefix [] _ = [] +commonPrefix (x:xs) (y:ys) + | x == y = x : commonPrefix xs ys + | otherwise = [] + +atStart :: PandocMonad m => MuseParser m a -> MuseParser m a +atStart p = do + pos <- getPosition + st <- getState + guard $ museLastStrPos st /= Just pos + p + +someUntil :: (Stream s m t) + => ParserT s u m a + -> ParserT s u m b + -> ParserT s u m ([a], b) +someUntil p end = do + first <- p + (rest, e) <- manyUntil p end + return (first:rest, e) + +-- +-- directive parsers +-- + +-- While not documented, Emacs Muse allows "-" in directive name +parseDirectiveKey :: PandocMonad m => MuseParser m String +parseDirectiveKey = do + char '#' + many (letter <|> char '-') + +parseEmacsDirective :: PandocMonad m => MuseParser m (String, F Inlines) +parseEmacsDirective = do + key <- parseDirectiveKey + spaceChar + value <- trimInlinesF . mconcat <$> manyTill (choice inlineList) eol + return (key, value) + +parseAmuseDirective :: PandocMonad m => MuseParser m (String, F Inlines) +parseAmuseDirective = do + key <- parseDirectiveKey + many1 spaceChar + value <- trimInlinesF . mconcat <$> many1Till inline endOfDirective + many blankline + return (key, value) + where + endOfDirective = lookAhead $ eof <|> try (newline >> (void blankline <|> void parseDirectiveKey)) + +directive :: PandocMonad m => MuseParser m () +directive = do + ext <- getOption readerExtensions + (key, value) <- if extensionEnabled Ext_amuse ext then parseAmuseDirective else parseEmacsDirective + updateState $ \st -> st { museMeta = B.setMeta (translateKey key) <$> value <*> museMeta st } + where translateKey "cover" = "cover-image" + translateKey x = x + +-- +-- block parsers +-- + +parseBlocks :: PandocMonad m + => MuseParser m (F Blocks) +parseBlocks = + try parseEnd <|> + try blockStart <|> + try listStart <|> + try paraStart + where + parseEnd = mempty <$ eof + blockStart = do first <- header <|> blockElements <|> emacsNoteBlock + rest <- parseBlocks + return $ first B.<> rest + listStart = do + updateState (\st -> st { museInPara = False }) + (first, rest) <- anyListUntil parseBlocks <|> amuseNoteBlockUntil parseBlocks + return $ first B.<> rest + paraStart = do + indent <- length <$> many spaceChar + (first, rest) <- paraUntil parseBlocks + let first' = if indent >= 2 && indent < 6 then B.blockQuote <$> first else first + return $ first' B.<> rest + +parseBlocksTill :: PandocMonad m + => MuseParser m a + -> MuseParser m (F Blocks) +parseBlocksTill end = + try parseEnd <|> + try blockStart <|> + try listStart <|> + try paraStart + where + parseEnd = mempty <$ end + blockStart = do first <- blockElements + rest <- continuation + return $ first B.<> rest + listStart = do + updateState (\st -> st { museInPara = False }) + (first, e) <- anyListUntil ((Left <$> end) <|> (Right <$> continuation)) + case e of + Left _ -> return first + Right rest -> return $ first B.<> rest + paraStart = do (first, e) <- paraUntil ((Left <$> end) <|> (Right <$> continuation)) + case e of + Left _ -> return first + Right rest -> return $ first B.<> rest + continuation = parseBlocksTill end + +listItemContentsUntil :: PandocMonad m + => Int + -> MuseParser m a + -> MuseParser m a + -> MuseParser m (F Blocks, a) +listItemContentsUntil col pre end = + try blockStart <|> + try listStart <|> + try paraStart + where + parsePre = do e <- pre + return (mempty, e) + parseEnd = do e <- end + return (mempty, e) + paraStart = do + (first, e) <- paraUntil ((Left <$> pre) <|> (Right <$> continuation) <|> (Left <$> end)) + case e of + Left ee -> return (first, ee) + Right (rest, ee) -> return (first B.<> rest, ee) + blockStart = do first <- blockElements + (rest, e) <- parsePre <|> continuation <|> parseEnd + return (first B.<> rest, e) + listStart = do + updateState (\st -> st { museInPara = False }) + (first, e) <- anyListUntil ((Left <$> pre) <|> (Right <$> continuation) <|> (Left <$> end)) + case e of + Left ee -> return (first, ee) + Right (rest, ee) -> return (first B.<> rest, ee) + continuation = try $ do blank <- optionMaybe blankline + skipMany blankline + indentWith col + updateState (\st -> st { museInPara = museInPara st && isNothing blank }) + listItemContentsUntil col pre end + +parseBlock :: PandocMonad m => MuseParser m (F Blocks) +parseBlock = do + res <- blockElements <|> para + trace (take 60 $ show $ B.toList $ runF res def) + return res + where para = fst <$> paraUntil (try (eof <|> void (lookAhead blockElements))) + +blockElements :: PandocMonad m => MuseParser m (F Blocks) +blockElements = do + updateState (\st -> st { museInPara = False }) + choice [ mempty <$ blankline + , comment + , separator + , example + , exampleTag + , literalTag + , centerTag + , rightTag + , quoteTag + , divTag + , verseTag + , lineBlock + , table + , commentTag + ] + +comment :: PandocMonad m => MuseParser m (F Blocks) +comment = try $ do + char ';' + optional (spaceChar >> many (noneOf "\n")) + eol + return mempty + +separator :: PandocMonad m => MuseParser m (F Blocks) +separator = try $ do + string "----" + many $ char '-' + many spaceChar + eol + return $ return B.horizontalRule + +header :: PandocMonad m => MuseParser m (F Blocks) +header = try $ do + getPosition >>= \pos -> guard (sourceColumn pos == 1) + level <- fmap length $ many1 $ char '*' + guard $ level <= 5 + spaceChar + content <- trimInlinesF . mconcat <$> manyTill inline eol + anchorId <- option "" parseAnchor + attr <- registerHeader (anchorId, [], []) (runF content def) + return $ B.headerWith attr level <$> content + +example :: PandocMonad m => MuseParser m (F Blocks) +example = try $ do + string "{{{" + optional blankline + contents <- manyTill anyChar $ try (optional blankline >> string "}}}") + return $ return $ B.codeBlock contents + +-- Trim up to one newline from the beginning of the string. +lchop :: String -> String +lchop s = case s of + '\n':ss -> ss + _ -> s + +-- Trim up to one newline from the end of the string. +rchop :: String -> String +rchop = reverse . lchop . reverse + +dropSpacePrefix :: [String] -> [String] +dropSpacePrefix lns = + map (drop maxIndent) lns + where flns = filter (not . all (== ' ')) lns + maxIndent = if null flns then maximum (map length lns) else length $ takeWhile (== ' ') $ foldl1 commonPrefix flns + +exampleTag :: PandocMonad m => MuseParser m (F Blocks) +exampleTag = try $ do + (attr, contents) <- htmlBlock "example" + return $ return $ B.codeBlockWith attr $ rchop $ intercalate "\n" $ dropSpacePrefix $ splitOn "\n" $ lchop contents + +literalTag :: PandocMonad m => MuseParser m (F Blocks) +literalTag = + (return . rawBlock) <$> htmlBlock "literal" + where + -- FIXME: Emacs Muse inserts <literal> without style into all output formats, but we assume HTML + format (_, _, kvs) = fromMaybe "html" $ lookup "style" kvs + rawBlock (attrs, content) = B.rawBlock (format attrs) $ rchop $ intercalate "\n" $ dropSpacePrefix $ splitOn "\n" $ lchop content + +-- <center> tag is ignored +centerTag :: PandocMonad m => MuseParser m (F Blocks) +centerTag = snd <$> parseHtmlContent "center" + +-- <right> tag is ignored +rightTag :: PandocMonad m => MuseParser m (F Blocks) +rightTag = snd <$> parseHtmlContent "right" + +quoteTag :: PandocMonad m => MuseParser m (F Blocks) +quoteTag = fmap B.blockQuote . snd <$> parseHtmlContent "quote" + +-- <div> tag is supported by Emacs Muse, but not Amusewiki 2.025 +divTag :: PandocMonad m => MuseParser m (F Blocks) +divTag = do + (attrs, content) <- parseHtmlContent "div" + return $ B.divWith attrs <$> content + +verseLine :: PandocMonad m => MuseParser m (F Inlines) +verseLine = do + indent <- (B.str <$> many1 (char ' ' >> pure '\160')) <|> pure mempty + rest <- manyTill (choice inlineList) newline + return $ trimInlinesF $ mconcat (pure indent : rest) + +verseLines :: PandocMonad m => MuseParser m (F Blocks) +verseLines = do + lns <- many verseLine + return $ B.lineBlock <$> sequence lns + +verseTag :: PandocMonad m => MuseParser m (F Blocks) +verseTag = do + (_, content) <- htmlBlock "verse" + parseFromString verseLines (intercalate "\n" $ dropSpacePrefix $ splitOn "\n" $ lchop content) + +commentTag :: PandocMonad m => MuseParser m (F Blocks) +commentTag = htmlBlock "comment" >> return mempty + +-- Indented paragraph is either center, right or quote +paraUntil :: PandocMonad m + => MuseParser m a + -> MuseParser m (F Blocks, a) +paraUntil end = do + state <- getState + guard $ not $ museInPara state + setState $ state{ museInPara = True } + (l, e) <- someUntil inline $ try (manyTill spaceChar eol >> end) + updateState (\st -> st { museInPara = False }) + return (fmap B.para $ trimInlinesF $ mconcat l, e) + +noteMarker :: PandocMonad m => MuseParser m String +noteMarker = try $ do + char '[' + first <- oneOf "123456789" + rest <- manyTill digit (char ']') + return $ first:rest + +-- Amusewiki version of note +-- Parsing is similar to list item, except that note marker is used instead of list marker +amuseNoteBlockUntil :: PandocMonad m + => MuseParser m a + -> MuseParser m (F Blocks, a) +amuseNoteBlockUntil end = try $ do + guardEnabled Ext_amuse + ref <- noteMarker <* spaceChar + pos <- getPosition + updateState (\st -> st { museInPara = False }) + (content, e) <- listItemContentsUntil (sourceColumn pos - 1) (fail "x") end + oldnotes <- museNotes <$> getState + case M.lookup ref oldnotes of + Just _ -> logMessage $ DuplicateNoteReference ref pos + Nothing -> return () + updateState $ \s -> s{ museNotes = M.insert ref (pos, content) oldnotes } + return (mempty, e) + +-- Emacs version of note +-- Notes are allowed only at the end of text, no indentation is required. +emacsNoteBlock :: PandocMonad m => MuseParser m (F Blocks) +emacsNoteBlock = try $ do + guardDisabled Ext_amuse + pos <- getPosition + ref <- noteMarker <* skipSpaces + content <- mconcat <$> blocksTillNote + oldnotes <- museNotes <$> getState + case M.lookup ref oldnotes of + Just _ -> logMessage $ DuplicateNoteReference ref pos + Nothing -> return () + updateState $ \s -> s{ museNotes = M.insert ref (pos, content) oldnotes } + return mempty + where + blocksTillNote = + many1Till parseBlock (eof <|> () <$ lookAhead noteMarker) + +-- +-- Verse markup +-- + +lineVerseLine :: PandocMonad m => MuseParser m (F Inlines) +lineVerseLine = try $ do + string "> " + indent <- B.str <$> many (char ' ' >> pure '\160') + rest <- manyTill (choice inlineList) eol + return $ trimInlinesF $ mconcat (pure indent : rest) + +blanklineVerseLine :: PandocMonad m => MuseParser m (F Inlines) +blanklineVerseLine = try $ do + char '>' + blankline + pure mempty + +lineBlock :: PandocMonad m => MuseParser m (F Blocks) +lineBlock = try $ do + col <- sourceColumn <$> getPosition + lns <- (blanklineVerseLine <|> lineVerseLine) `sepBy1'` try (indentWith (col - 1)) + return $ B.lineBlock <$> sequence lns + +-- +-- lists +-- + +bulletListItemsUntil :: PandocMonad m + => Int + -> MuseParser m a + -> MuseParser m ([F Blocks], a) +bulletListItemsUntil indent end = try $ do + char '-' + void spaceChar <|> lookAhead eol + updateState (\st -> st { museInPara = False }) + (x, e) <- listItemContentsUntil (indent + 2) (Right <$> try (optional blankline >> indentWith indent >> bulletListItemsUntil indent end)) (Left <$> end) + case e of + Left ee -> return ([x], ee) + Right (xs, ee) -> return (x:xs, ee) + +bulletListUntil :: PandocMonad m + => MuseParser m a + -> MuseParser m (F Blocks, a) +bulletListUntil end = try $ do + many spaceChar + pos <- getPosition + let indent = sourceColumn pos - 1 + guard $ indent /= 0 + (items, e) <- bulletListItemsUntil indent end + return (B.bulletList <$> sequence items, e) + +-- | Parses an ordered list marker and returns list attributes. +anyMuseOrderedListMarker :: PandocMonad m => MuseParser m ListAttributes +anyMuseOrderedListMarker = do + (style, start) <- decimal <|> lowerRoman <|> upperRoman <|> lowerAlpha <|> upperAlpha + char '.' + return (start, style, Period) + +museOrderedListMarker :: PandocMonad m + => ListNumberStyle + -> MuseParser m Int +museOrderedListMarker style = do + (_, start) <- case style of + Decimal -> decimal + UpperRoman -> upperRoman + LowerRoman -> lowerRoman + UpperAlpha -> upperAlpha + LowerAlpha -> lowerAlpha + _ -> fail "Unhandled case" + char '.' + return start + +orderedListItemsUntil :: PandocMonad m + => Int + -> ListNumberStyle + -> MuseParser m a + -> MuseParser m ([F Blocks], a) +orderedListItemsUntil indent style end = + continuation + where + continuation = try $ do + pos <- getPosition + void spaceChar <|> lookAhead eol + updateState (\st -> st { museInPara = False }) + (x, e) <- listItemContentsUntil (sourceColumn pos) (Right <$> try (optionMaybe blankline >> indentWith indent >> museOrderedListMarker style >> continuation)) (Left <$> end) + case e of + Left ee -> return ([x], ee) + Right (xs, ee) -> return (x:xs, ee) + +orderedListUntil :: PandocMonad m + => MuseParser m a + -> MuseParser m (F Blocks, a) +orderedListUntil end = try $ do + many spaceChar + pos <- getPosition + let indent = sourceColumn pos - 1 + guard $ indent /= 0 + p@(_, style, _) <- anyMuseOrderedListMarker + guard $ style `elem` [Decimal, LowerAlpha, UpperAlpha, LowerRoman, UpperRoman] + (items, e) <- orderedListItemsUntil indent style end + return (B.orderedListWith p <$> sequence items, e) + +descriptionsUntil :: PandocMonad m + => Int + -> MuseParser m a + -> MuseParser m ([F Blocks], a) +descriptionsUntil indent end = do + void spaceChar <|> lookAhead eol + updateState (\st -> st { museInPara = False }) + (x, e) <- listItemContentsUntil indent (Right <$> try (optional blankline >> indentWith indent >> manyTill spaceChar (string "::") >> descriptionsUntil indent end)) (Left <$> end) + case e of + Right (xs, ee) -> return (x:xs, ee) + Left ee -> return ([x], ee) + +definitionListItemsUntil :: PandocMonad m + => Int + -> MuseParser m a + -> MuseParser m ([F (Inlines, [Blocks])], a) +definitionListItemsUntil indent end = + continuation + where + continuation = try $ do + pos <- getPosition + term <- trimInlinesF . mconcat <$> manyTill (choice inlineList) (try $ string "::") + (x, e) <- descriptionsUntil (sourceColumn pos) ((Right <$> try (optional blankline >> indentWith indent >> continuation)) <|> (Left <$> end)) + let xx = do + term' <- term + x' <- sequence x + return (term', x') + case e of + Left ee -> return ([xx], ee) + Right (xs, ee) -> return (xx:xs, ee) + +definitionListUntil :: PandocMonad m + => MuseParser m a + -> MuseParser m (F Blocks, a) +definitionListUntil end = try $ do + many spaceChar + pos <- getPosition + let indent = sourceColumn pos - 1 + guardDisabled Ext_amuse <|> guard (indent /= 0) -- Initial space is required by Amusewiki, but not Emacs Muse + (items, e) <- definitionListItemsUntil indent end + return (B.definitionList <$> sequence items, e) + +anyListUntil :: PandocMonad m + => MuseParser m a + -> MuseParser m (F Blocks, a) +anyListUntil end = + bulletListUntil end <|> orderedListUntil end <|> definitionListUntil end + +-- +-- tables +-- + +data MuseTable = MuseTable + { museTableCaption :: Inlines + , museTableHeaders :: [[Blocks]] + , museTableRows :: [[Blocks]] + , museTableFooters :: [[Blocks]] + } + +data MuseTableElement = MuseHeaderRow (F [Blocks]) + | MuseBodyRow (F [Blocks]) + | MuseFooterRow (F [Blocks]) + | MuseCaption (F Inlines) + +museToPandocTable :: MuseTable -> Blocks +museToPandocTable (MuseTable caption headers body footers) = + B.table caption attrs headRow rows + where ncol = maximum (0 : map length (headers ++ body ++ footers)) + attrs = replicate ncol (AlignDefault, 0.0) + headRow = if null headers then [] else head headers + rows = (if null headers then [] else tail headers) ++ body ++ footers + +museAppendElement :: MuseTable + -> MuseTableElement + -> F MuseTable +museAppendElement tbl element = + case element of + MuseHeaderRow row -> do + row' <- row + return tbl{ museTableHeaders = museTableHeaders tbl ++ [row'] } + MuseBodyRow row -> do + row' <- row + return tbl{ museTableRows = museTableRows tbl ++ [row'] } + MuseFooterRow row-> do + row' <- row + return tbl{ museTableFooters = museTableFooters tbl ++ [row'] } + MuseCaption inlines -> do + inlines' <- inlines + return tbl{ museTableCaption = inlines' } + +tableCell :: PandocMonad m => MuseParser m (F Blocks) +tableCell = try $ fmap B.plain . trimInlinesF . mconcat <$> manyTill inline (lookAhead cellEnd) + where cellEnd = try $ void (many1 spaceChar >> char '|') <|> eol + +tableElements :: PandocMonad m => MuseParser m [MuseTableElement] +tableElements = tableParseElement `sepEndBy1` eol + +elementsToTable :: [MuseTableElement] -> F MuseTable +elementsToTable = foldM museAppendElement emptyTable + where emptyTable = MuseTable mempty mempty mempty mempty + +table :: PandocMonad m => MuseParser m (F Blocks) +table = try $ do + rows <- tableElements + let tbl = elementsToTable rows + let pandocTbl = museToPandocTable <$> tbl :: F Blocks + return pandocTbl + +tableParseElement :: PandocMonad m => MuseParser m MuseTableElement +tableParseElement = tableParseHeader + <|> tableParseBody + <|> tableParseFooter + <|> tableParseCaption + +tableParseRow :: PandocMonad m => Int -> MuseParser m (F [Blocks]) +tableParseRow n = try $ do + fields <- tableCell `sepBy2` fieldSep + return $ sequence fields + where p `sepBy2` sep = (:) <$> p <*> many1 (sep >> p) + fieldSep = many1 spaceChar >> count n (char '|') >> (void (many1 spaceChar) <|> void (lookAhead newline)) + +tableParseHeader :: PandocMonad m => MuseParser m MuseTableElement +tableParseHeader = MuseHeaderRow <$> tableParseRow 2 + +tableParseBody :: PandocMonad m => MuseParser m MuseTableElement +tableParseBody = MuseBodyRow <$> tableParseRow 1 + +tableParseFooter :: PandocMonad m => MuseParser m MuseTableElement +tableParseFooter = MuseFooterRow <$> tableParseRow 3 + +tableParseCaption :: PandocMonad m => MuseParser m MuseTableElement +tableParseCaption = try $ do + many spaceChar + string "|+" + MuseCaption <$> (trimInlinesF . mconcat <$> many1Till inline (string "+|")) + +-- +-- inline parsers +-- + +inlineList :: PandocMonad m => [MuseParser m (F Inlines)] +inlineList = [ whitespace + , br + , anchor + , footnote + , strong + , strongTag + , emph + , emphTag + , underlined + , superscriptTag + , subscriptTag + , strikeoutTag + , verbatimTag + , nbsp + , link + , code + , codeTag + , inlineLiteralTag + , str + , symbol + ] + +inline :: PandocMonad m => MuseParser m (F Inlines) +inline = endline <|> choice inlineList <?> "inline" + +endline :: PandocMonad m => MuseParser m (F Inlines) +endline = try $ do + newline + notFollowedBy blankline + returnF B.softbreak + +parseAnchor :: PandocMonad m => MuseParser m String +parseAnchor = try $ do + getPosition >>= \pos -> guard (sourceColumn pos == 1) + char '#' + first <- letter + rest <- many (letter <|> digit) + skipMany spaceChar <|> void newline + return $ first:rest + +anchor :: PandocMonad m => MuseParser m (F Inlines) +anchor = try $ do + anchorId <- parseAnchor + return $ return $ B.spanWith (anchorId, [], []) mempty + +footnote :: PandocMonad m => MuseParser m (F Inlines) +footnote = try $ do + ref <- noteMarker + return $ do + notes <- asksF museNotes + case M.lookup ref notes of + Nothing -> return $ B.str $ "[" ++ ref ++ "]" + Just (_pos, contents) -> do + st <- askF + let contents' = runF contents st { museNotes = M.empty } + return $ B.note contents' + +whitespace :: PandocMonad m => MuseParser m (F Inlines) +whitespace = try $ do + skipMany1 spaceChar + return $ return B.space + +br :: PandocMonad m => MuseParser m (F Inlines) +br = try $ do + string "<br>" + return $ return B.linebreak + +emphasisBetween :: (PandocMonad m, Show a) => MuseParser m a -> MuseParser m (F Inlines) +emphasisBetween c = try $ enclosedInlines c c + +enclosedInlines :: (PandocMonad m, Show a, Show b) + => MuseParser m a + -> MuseParser m b + -> MuseParser m (F Inlines) +enclosedInlines start end = try $ + trimInlinesF . mconcat <$> (enclosed (atStart start) end inline <* notFollowedBy (satisfy isLetter)) + +inlineTag :: PandocMonad m + => (Inlines -> Inlines) + -> String + -> MuseParser m (F Inlines) +inlineTag f tag = try $ do + htmlTag (~== TagOpen tag []) + res <- manyTill inline (void $ htmlTag (~== TagClose tag)) + return $ f <$> mconcat res + +strongTag :: PandocMonad m => MuseParser m (F Inlines) +strongTag = inlineTag B.strong "strong" + +strong :: PandocMonad m => MuseParser m (F Inlines) +strong = fmap B.strong <$> emphasisBetween (string "**") + +emph :: PandocMonad m => MuseParser m (F Inlines) +emph = fmap B.emph <$> emphasisBetween (char '*') + +underlined :: PandocMonad m => MuseParser m (F Inlines) +underlined = do + guardDisabled Ext_amuse -- Supported only by Emacs Muse + fmap underlineSpan <$> emphasisBetween (char '_') + +emphTag :: PandocMonad m => MuseParser m (F Inlines) +emphTag = inlineTag B.emph "em" + +superscriptTag :: PandocMonad m => MuseParser m (F Inlines) +superscriptTag = inlineTag B.superscript "sup" + +subscriptTag :: PandocMonad m => MuseParser m (F Inlines) +subscriptTag = inlineTag B.subscript "sub" + +strikeoutTag :: PandocMonad m => MuseParser m (F Inlines) +strikeoutTag = inlineTag B.strikeout "del" + +verbatimTag :: PandocMonad m => MuseParser m (F Inlines) +verbatimTag = return . B.text . snd <$> htmlElement "verbatim" + +nbsp :: PandocMonad m => MuseParser m (F Inlines) +nbsp = try $ do + string "~~" + return $ return $ B.str "\160" + +code :: PandocMonad m => MuseParser m (F Inlines) +code = try $ do + atStart $ char '=' + contents <- many1Till (noneOf "\n\r" <|> (newline <* notFollowedBy newline)) $ char '=' + guard $ not $ null contents + guard $ head contents `notElem` " \t\n" + guard $ last contents `notElem` " \t\n" + notFollowedBy $ satisfy isLetter + return $ return $ B.code contents + +codeTag :: PandocMonad m => MuseParser m (F Inlines) +codeTag = do + (attrs, content) <- htmlElement "code" + return $ return $ B.codeWith attrs content + +inlineLiteralTag :: PandocMonad m => MuseParser m (F Inlines) +inlineLiteralTag = + (return . rawInline) <$> htmlElement "literal" + where + -- FIXME: Emacs Muse inserts <literal> without style into all output formats, but we assume HTML + format (_, _, kvs) = fromMaybe "html" $ lookup "style" kvs + rawInline (attrs, content) = B.rawInline (format attrs) content + +str :: PandocMonad m => MuseParser m (F Inlines) +str = do + result <- many1 alphaNum + updateLastStrPos + return $ return $ B.str result + +symbol :: PandocMonad m => MuseParser m (F Inlines) +symbol = return . B.str <$> count 1 nonspaceChar + +link :: PandocMonad m => MuseParser m (F Inlines) +link = try $ do + st <- getState + guard $ not $ museInLink st + setState $ st{ museInLink = True } + (url, title, content) <- linkText + updateState (\state -> state { museInLink = False }) + return $ case stripPrefix "URL:" url of + Nothing -> if isImageUrl url + then B.image url title <$> fromMaybe (return mempty) content + else B.link url title <$> fromMaybe (return $ B.str url) content + Just url' -> B.link url' title <$> fromMaybe (return $ B.str url') content + where -- Taken from muse-image-regexp defined in Emacs Muse file lisp/muse-regexps.el + imageExtensions = [".eps", ".gif", ".jpg", ".jpeg", ".pbm", ".png", ".tiff", ".xbm", ".xpm"] + isImageUrl = (`elem` imageExtensions) . takeExtension + +linkContent :: PandocMonad m => MuseParser m (F Inlines) +linkContent = do + char '[' + trimInlinesF . mconcat <$> many1Till inline (string "]") + +linkText :: PandocMonad m => MuseParser m (String, String, Maybe (F Inlines)) +linkText = do + string "[[" + url <- many1Till anyChar $ char ']' + content <- optionMaybe linkContent + char ']' + return (url, "", content) |