diff options
authorJohn MacFarlane <>2013-03-19 20:23:48 -0700
committerJohn MacFarlane <>2013-03-19 20:23:48 -0700
commit8aa617238042ce8605863e4526e8f5002647fd97 (patch)
parentd596b0db8321cdb9c018ac8037d301291d0cc63c (diff)
parent74d53f4347623631c17be557d3682dd807214263 (diff)
Merge branch 'opml'
8 files changed, 268 insertions, 4 deletions
diff --git a/README b/README
index 39c69d08f..a6602718a 100644
--- a/README
+++ b/README
@@ -13,8 +13,8 @@ Description
Pandoc is a [Haskell] library for converting from one markup format to
another, and a command-line tool that uses this library. It can read
[markdown] and (subsets of) [Textile], [reStructuredText], [HTML],
-[LaTeX], [MediaWiki markup], and [DocBook XML]; and it can write plain
-text, [markdown], [reStructuredText], [XHTML], [HTML 5], [LaTeX]
+[LaTeX], [MediaWiki markup], [OPML], and [DocBook XML]; and it can write
+plain text, [markdown], [reStructuredText], [XHTML], [HTML 5], [LaTeX]
(including [beamer] slide shows), [ConTeXt], [RTF], [DocBook XML],
[OpenDocument XML], [ODT], [Word docx], [GNU Texinfo], [MediaWiki
markup], [EPUB] (v2 or v3), [FictionBook2], [Textile], [groff man] pages, [Emacs
@@ -143,7 +143,7 @@ General options
`markdown_phpextra` (PHP Markdown Extra extended markdown),
`markdown_github` (github extended markdown),
`textile` (Textile), `rst` (reStructuredText), `html` (HTML),
- `docbook` (DocBook XML), `mediawiki` (MediaWiki markup),
+ `docbook` (DocBook XML), `opml` (OPML), `mediawiki` (MediaWiki markup),
or `latex` (LaTeX). If `+lhs` is appended to `markdown`, `rst`,
`latex`, the input will be treated as literate Haskell source:
see [Literate Haskell support](#literate-haskell-support), below.
@@ -2624,6 +2624,7 @@ Sergey Astanin, Arlo O'Keeffe, Denis Laxalde, Brent Yorgey.
[DocBook XML]:
[OpenDocument XML]:
diff --git a/pandoc.cabal b/pandoc.cabal
index 7f82d11a1..c53e29838 100644
--- a/pandoc.cabal
+++ b/pandoc.cabal
@@ -16,7 +16,7 @@ Synopsis: Conversion between markup formats
Description: Pandoc is a Haskell library for converting from one markup
format to another, and a command-line tool that uses
this library. It can read markdown and (subsets of) HTML,
- reStructuredText, LaTeX, DocBook, MediaWiki markup,
+ reStructuredText, LaTeX, DocBook, MediaWiki markup, OPML,
and Textile, and it can write markdown, reStructuredText,
HTML, LaTeX, ConTeXt, Docbook, OpenDocument, ODT,
Word docx, RTF, MediaWiki, Textile, groff man pages,
@@ -105,6 +105,8 @@ Extra-Source-Files:
+ tests/opml-reader.html,
+ tests/opml-reader.native,
@@ -278,6 +280,7 @@ Library
+ Text.Pandoc.Readers.OPML,
diff --git a/pandoc.hs b/pandoc.hs
index de132b0c8..8433460b3 100644
--- a/pandoc.hs
+++ b/pandoc.hs
@@ -758,6 +758,7 @@ defaultReaderName fallback (x:xs) =
".rst" -> "rst"
".lhs" -> "markdown+lhs"
".db" -> "docbook"
+ ".opml" -> "opml"
".wiki" -> "mediawiki"
".textile" -> "textile"
".native" -> "native"
diff --git a/src/Text/Pandoc.hs b/src/Text/Pandoc.hs
index 8201bc881..80ddb72d7 100644
--- a/src/Text/Pandoc.hs
+++ b/src/Text/Pandoc.hs
@@ -72,6 +72,7 @@ module Text.Pandoc
, readHtml
, readTextile
, readDocBook
+ , readOPML
, readNative
-- * Writers: converting /from/ Pandoc format
, Writer (..)
@@ -113,6 +114,7 @@ import Text.Pandoc.Readers.Markdown
import Text.Pandoc.Readers.MediaWiki
import Text.Pandoc.Readers.RST
import Text.Pandoc.Readers.DocBook
+import Text.Pandoc.Readers.OPML
import Text.Pandoc.Readers.LaTeX
import Text.Pandoc.Readers.HTML
import Text.Pandoc.Readers.Textile
@@ -192,6 +194,7 @@ readers = [("native" , \_ s -> return $ readNative s)
,("rst" , \o s -> return $ readRST o s)
,("mediawiki" , \o s -> return $ readMediaWiki o s)
,("docbook" , \o s -> return $ readDocBook o s)
+ ,("opml" , \o s -> return $ readOPML o s)
,("textile" , \o s -> return $ readTextile o s) -- TODO : textile+lhs
,("html" , \o s -> return $ readHtml o s)
,("latex" , \o s -> return $ readLaTeX o s)
diff --git a/src/Text/Pandoc/Readers/OPML.hs b/src/Text/Pandoc/Readers/OPML.hs
new file mode 100644
index 000000000..53b599349
--- /dev/null
+++ b/src/Text/Pandoc/Readers/OPML.hs
@@ -0,0 +1,95 @@
+module Text.Pandoc.Readers.OPML ( readOPML ) where
+import Data.Char (toUpper)
+import Text.Pandoc.Options
+import Text.Pandoc.Definition
+import Text.Pandoc.Builder
+import Text.Pandoc.Readers.HTML (readHtml)
+import Text.Pandoc.Readers.Markdown (readMarkdown)
+import Text.XML.Light
+import Text.HTML.TagSoup.Entity (lookupEntity)
+import Data.Generics
+import Data.Monoid
+import Control.Monad.State
+import Control.Applicative ((<$>), (<$))
+type OPML = State OPMLState
+data OPMLState = OPMLState{
+ opmlSectionLevel :: Int
+ , opmlDocTitle :: Inlines
+ , opmlDocAuthors :: [Inlines]
+ , opmlDocDate :: Inlines
+ } deriving Show
+readOPML :: ReaderOptions -> String -> Pandoc
+readOPML _ inp = setTitle (opmlDocTitle st')
+ $ setAuthors (opmlDocAuthors st')
+ $ setDate (opmlDocDate st')
+ $ doc $ mconcat bs
+ where (bs, st') = runState (mapM parseBlock $ normalizeTree $ parseXML inp)
+ OPMLState{ opmlSectionLevel = 0
+ , opmlDocTitle = mempty
+ , opmlDocAuthors = []
+ , opmlDocDate = mempty
+ }
+-- normalize input, consolidating adjacent Text and CRef elements
+normalizeTree :: [Content] -> [Content]
+normalizeTree = everywhere (mkT go)
+ where go :: [Content] -> [Content]
+ go (Text (CData CDataRaw _ _):xs) = xs
+ go (Text (CData CDataText s1 z):Text (CData CDataText s2 _):xs) =
+ Text (CData CDataText (s1 ++ s2) z):xs
+ go (Text (CData CDataText s1 z):CRef r:xs) =
+ Text (CData CDataText (s1 ++ convertEntity r) z):xs
+ go (CRef r:Text (CData CDataText s1 z):xs) =
+ Text (CData CDataText (convertEntity r ++ s1) z):xs
+ go (CRef r1:CRef r2:xs) =
+ Text (CData CDataText (convertEntity r1 ++ convertEntity r2) Nothing):xs
+ go xs = xs
+convertEntity :: String -> String
+convertEntity e = maybe (map toUpper e) (:[]) (lookupEntity e)
+-- convenience function to get an attribute value, defaulting to ""
+attrValue :: String -> Element -> String
+attrValue attr elt =
+ case lookupAttrBy (\x -> qName x == attr) (elAttribs elt) of
+ Just z -> z
+ Nothing -> ""
+asHtml :: String -> Inlines
+asHtml s = case readHtml def s of
+ Pandoc _ [Plain ils] -> fromList ils
+ _ -> mempty
+asMarkdown :: String -> Blocks
+asMarkdown s = fromList bs
+ where Pandoc _ bs = readMarkdown def s
+getBlocks :: Element -> OPML Blocks
+getBlocks e = mconcat <$> (mapM parseBlock $ elContent e)
+parseBlock :: Content -> OPML Blocks
+parseBlock (Elem e) =
+ case qName (elName e) of
+ "ownerName" -> mempty <$ modify (\st ->
+ st{opmlDocAuthors = [text $ strContent e]})
+ "dateModified" -> mempty <$ modify (\st ->
+ st{opmlDocDate = text $ strContent e})
+ "title" -> mempty <$ modify (\st ->
+ st{opmlDocTitle = text $ strContent e})
+ "outline" -> gets opmlSectionLevel >>= sect . (+1)
+ "?xml" -> return mempty
+ _ -> getBlocks e
+ where sect n = do let headerText = asHtml $ attrValue "text" e
+ let noteBlocks = asMarkdown $ attrValue "_note" e
+ modify $ \st -> st{ opmlSectionLevel = n }
+ bs <- getBlocks e
+ modify $ \st -> st{ opmlSectionLevel = n - 1 }
+ let headerText' = case attrValue "type" e of
+ "link" -> link
+ (attrValue "url" e) "" headerText
+ _ -> headerText
+ return $ header n headerText' <> noteBlocks <> bs
+parseBlock _ = return mempty
diff --git a/tests/Tests/Old.hs b/tests/Tests/Old.hs
index 9e7493504..c22fa9830 100644
--- a/tests/Tests/Old.hs
+++ b/tests/Tests/Old.hs
@@ -124,6 +124,10 @@ tests = [ testGroup "markdown"
, test "reader" ["-r", "mediawiki", "-w", "native", "-s"]
"" "mediawiki-reader.native"
+ , testGroup "opml"
+ [ test "reader" ["-r", "opml", "-w", "native", "-s"]
+ "opml-reader.opml" "opml-reader.native"
+ ]
, testGroup "other writers" $ map (\f -> testGroup f $ writerTests f)
[ "opendocument" , "context" , "texinfo"
, "man" , "plain" , "rtf", "org", "asciidoc"
diff --git a/tests/opml-reader.native b/tests/opml-reader.native
new file mode 100644
index 000000000..8a627c025
--- /dev/null
+++ b/tests/opml-reader.native
@@ -0,0 +1,66 @@
+Pandoc (Meta {docTitle = [Str "states.opml"], docAuthors = [[Str "Dave",Space,Str "Winer"]], docDate = [Str "Thu,",Space,Str "14",Space,Str "Jul",Space,Str "2005",Space,Str "23:41:05",Space,Str "GMT"]})
+[Header 1 ("",[],[]) [Str "United",Space,Str "States"]
+,Header 2 ("",[],[]) [Str "Far",Space,Str "West"]
+,Header 3 ("",[],[]) [Str "Alaska"]
+,Header 3 ("",[],[]) [Str "California"]
+,Header 3 ("",[],[]) [Str "Hawaii"]
+,Header 3 ("",[],[]) [Strong [Str "Nevada"]]
+,Para [Str "I",Space,Str "lived",Space,Str "here",Space,Emph [Str "once"],Str "."]
+,Para [Str "Loved",Space,Str "it."]
+,Header 4 ("",[],[]) [Link [Str "Reno"] ("","")]
+,Header 4 ("",[],[]) [Str "Las",Space,Str "Vegas"]
+,Header 4 ("",[],[]) [Str "Ely"]
+,Header 4 ("",[],[]) [Str "Gerlach"]
+,Header 3 ("",[],[]) [Str "Oregon"]
+,Header 3 ("",[],[]) [Str "Washington"]
+,Header 2 ("",[],[]) [Str "Great",Space,Str "Plains"]
+,Header 3 ("",[],[]) [Str "Kansas"]
+,Header 3 ("",[],[]) [Str "Nebraska"]
+,Header 3 ("",[],[]) [Str "North",Space,Str "Dakota"]
+,Header 3 ("",[],[]) [Str "Oklahoma"]
+,Header 3 ("",[],[]) [Str "South",Space,Str "Dakota"]
+,Header 2 ("",[],[]) [Str "Mid",Str "-",Str "Atlantic"]
+,Header 3 ("",[],[]) [Str "Delaware"]
+,Header 3 ("",[],[]) [Str "Maryland"]
+,Header 3 ("",[],[]) [Str "New",Space,Str "Jersey"]
+,Header 3 ("",[],[]) [Str "New",Space,Str "York"]
+,Header 3 ("",[],[]) [Str "Pennsylvania"]
+,Header 2 ("",[],[]) [Str "Midwest"]
+,Header 3 ("",[],[]) [Str "Illinois"]
+,Header 3 ("",[],[]) [Str "Indiana"]
+,Header 3 ("",[],[]) [Str "Iowa"]
+,Header 3 ("",[],[]) [Str "Kentucky"]
+,Header 3 ("",[],[]) [Str "Michigan"]
+,Header 3 ("",[],[]) [Str "Minnesota"]
+,Header 3 ("",[],[]) [Str "Missouri"]
+,Header 3 ("",[],[]) [Str "Ohio"]
+,Header 3 ("",[],[]) [Str "West",Space,Str "Virginia"]
+,Header 3 ("",[],[]) [Str "Wisconsin"]
+,Header 2 ("",[],[]) [Str "Mountains"]
+,Header 3 ("",[],[]) [Str "Colorado"]
+,Header 3 ("",[],[]) [Str "Idaho"]
+,Header 3 ("",[],[]) [Str "Montana"]
+,Header 3 ("",[],[]) [Str "Utah"]
+,Header 3 ("",[],[]) [Str "Wyoming"]
+,Header 2 ("",[],[]) [Str "New",Space,Str "England"]
+,Header 3 ("",[],[]) [Str "Connecticut"]
+,Header 3 ("",[],[]) [Str "Maine"]
+,Header 3 ("",[],[]) [Str "Massachusetts"]
+,Header 3 ("",[],[]) [Str "New",Space,Str "Hampshire"]
+,Header 3 ("",[],[]) [Str "Rhode",Space,Str "Island"]
+,Header 3 ("",[],[]) [Str "Vermont"]
+,Header 2 ("",[],[]) [Str "South"]
+,Header 3 ("",[],[]) [Str "Alabama"]
+,Header 3 ("",[],[]) [Str "Arkansas"]
+,Header 3 ("",[],[]) [Str "Florida"]
+,Header 3 ("",[],[]) [Str "Georgia"]
+,Header 3 ("",[],[]) [Str "Louisiana"]
+,Header 3 ("",[],[]) [Str "Mississippi"]
+,Header 3 ("",[],[]) [Str "North",Space,Str "Carolina"]
+,Header 3 ("",[],[]) [Str "South",Space,Str "Carolina"]
+,Header 3 ("",[],[]) [Str "Tennessee"]
+,Header 3 ("",[],[]) [Str "Virginia"]
+,Header 2 ("",[],[]) [Str "Southwest"]
+,Header 3 ("",[],[]) [Str "Arizona"]
+,Header 3 ("",[],[]) [Str "New",Space,Str "Mexico"]
+,Header 3 ("",[],[]) [Str "Texas"]]
diff --git a/tests/opml-reader.opml b/tests/opml-reader.opml
new file mode 100644
index 000000000..54dd592ea
--- /dev/null
+++ b/tests/opml-reader.opml
@@ -0,0 +1,91 @@
+<?xml version="1.0" encoding="ISO-8859-1"?>
+<opml version="2.0">
+ <head>
+ <title>states.opml</title>
+ <dateCreated>Tue, 15 Mar 2005 16:35:45 GMT</dateCreated>
+ <dateModified>Thu, 14 Jul 2005 23:41:05 GMT</dateModified>
+ <ownerName>Dave Winer</ownerName>
+ <ownerEmail></ownerEmail>
+ <expansionState>1, 6, 13, 16, 18, 20</expansionState>
+ <vertScrollState>1</vertScrollState>
+ <windowTop>106</windowTop>
+ <windowLeft>106</windowLeft>
+ <windowBottom>558</windowBottom>
+ <windowRight>479</windowRight>
+ </head>
+ <body>
+ <outline text="United States">
+ <outline text="Far West">
+ <outline text="Alaska"/>
+ <outline text="California"/>
+ <outline text="Hawaii"/>
+ <outline text="&lt;strong&gt;Nevada&lt;/strong&gt;" _note="I lived here *once*.&#10;&#10;Loved it.">
+ <outline text="Reno" created="Tue, 12 Jul 2005 23:56:35 GMT" type="link" url=""/>
+ <outline text="Las Vegas" created="Tue, 12 Jul 2005 23:56:37 GMT"/>
+ <outline text="Ely" created="Tue, 12 Jul 2005 23:56:39 GMT"/>
+ <outline text="Gerlach" created="Tue, 12 Jul 2005 23:56:47 GMT"/>
+ </outline>
+ <outline text="Oregon"/>
+ <outline text="Washington"/>
+ </outline>
+ <outline text="Great Plains">
+ <outline text="Kansas"/>
+ <outline text="Nebraska"/>
+ <outline text="North Dakota"/>
+ <outline text="Oklahoma"/>
+ <outline text="South Dakota"/>
+ </outline>
+ <outline text="Mid-Atlantic">
+ <outline text="Delaware"/>
+ <outline text="Maryland"/>
+ <outline text="New Jersey"/>
+ <outline text="New York"/>
+ <outline text="Pennsylvania"/>
+ </outline>
+ <outline text="Midwest">
+ <outline text="Illinois"/>
+ <outline text="Indiana"/>
+ <outline text="Iowa"/>
+ <outline text="Kentucky"/>
+ <outline text="Michigan"/>
+ <outline text="Minnesota"/>
+ <outline text="Missouri"/>
+ <outline text="Ohio"/>
+ <outline text="West Virginia"/>
+ <outline text="Wisconsin"/>
+ </outline>
+ <outline text="Mountains">
+ <outline text="Colorado"/>
+ <outline text="Idaho"/>
+ <outline text="Montana"/>
+ <outline text="Utah"/>
+ <outline text="Wyoming"/>
+ </outline>
+ <outline text="New England">
+ <outline text="Connecticut"/>
+ <outline text="Maine"/>
+ <outline text="Massachusetts"/>
+ <outline text="New Hampshire"/>
+ <outline text="Rhode Island"/>
+ <outline text="Vermont"/>
+ </outline>
+ <outline text="South">
+ <outline text="Alabama"/>
+ <outline text="Arkansas"/>
+ <outline text="Florida"/>
+ <outline text="Georgia"/>
+ <outline text="Louisiana"/>
+ <outline text="Mississippi"/>
+ <outline text="North Carolina"/>
+ <outline text="South Carolina"/>
+ <outline text="Tennessee"/>
+ <outline text="Virginia"/>
+ </outline>
+ <outline text="Southwest">
+ <outline text="Arizona"/>
+ <outline text="New Mexico"/>
+ <outline text="Texas"/>
+ </outline>
+ </outline>
+ </body>
+ </opml>