summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--pandoc.cabal4
-rw-r--r--src/Text/Pandoc/Readers/Textile.hs61
-rw-r--r--tests/RunTests.hs3
-rw-r--r--tests/textile-reader.native120
-rw-r--r--tests/textile-reader.textile156
5 files changed, 326 insertions, 18 deletions
diff --git a/pandoc.cabal b/pandoc.cabal
index ddcb94ee0..c49a42ca8 100644
--- a/pandoc.cabal
+++ b/pandoc.cabal
@@ -16,7 +16,7 @@ Synopsis: Conversion between markup formats
Description: Pandoc is a Haskell library for converting from one markup
format to another, and a command-line tool that uses
this library. It can read markdown and (subsets of)
- reStructuredText, HTML, and LaTeX, and it can write
+ reStructuredText, HTML, LaTeX and Textile, and it can write
markdown, reStructuredText, HTML, LaTeX, ConTeXt, Docbook,
OpenDocument, ODT, RTF, MediaWiki, Textile, groff man pages,
EPUB, and S5 and Slidy HTML slide shows.
@@ -79,6 +79,7 @@ Extra-Source-Files:
tests/latex-reader.native,
tests/markdown-reader-more.txt,
tests/markdown-reader-more.native,
+ tests/textile-reader.textile,
tests/rst-reader.native,
tests/rst-reader.rst,
tests/s5.basic.html,
@@ -193,6 +194,7 @@ Library
Text.Pandoc.Readers.Markdown,
Text.Pandoc.Readers.RST,
Text.Pandoc.Readers.TeXMath,
+ Text.Pandoc.Readers.Textile,
Text.Pandoc.Writers.Native,
Text.Pandoc.Writers.Docbook,
Text.Pandoc.Writers.HTML,
diff --git a/src/Text/Pandoc/Readers/Textile.hs b/src/Text/Pandoc/Readers/Textile.hs
index 3d759a944..5e4609c01 100644
--- a/src/Text/Pandoc/Readers/Textile.hs
+++ b/src/Text/Pandoc/Readers/Textile.hs
@@ -25,15 +25,16 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
Stability : alpha
Portability : portable
-Conversion from Textile to 'Pandoc' document.
+Conversion from Textile to 'Pandoc' document, based on the spec
+available at http://redcloth.org/hobix.com/textile/
Implemented :
- Paragraphs
- Code blocks
- Lists
- blockquote
- - Inlines : strong, emph, cite, code, deleted, inserted, superscript, subscript
-
+ - Inlines : strong, emph, cite, code, deleted, inserted, superscript,
+ subscript, links
Not implemented :
- HTML-specific and CSS-specific inlines
@@ -189,6 +190,7 @@ tableRow :: GenParser Char ParserState [TableCell]
tableRow = try $ do
char '|'
cells <- endBy1 tableCell (char '|')
+ -- TODO : don't eat the last newline
newline
return cells
@@ -206,25 +208,27 @@ tableHeaders = try $ do
newline
return headers
--- | A table with an optional header
+-- | A table with an optional header. Current implementation can
+-- handle tables with and without header, but will parse cells
+-- alignment attributes as content.
table :: GenParser Char ParserState Block
table = try $ do
headers <- option [] tableHeaders
rows <- tableRows
+ let nbOfCols = max (length headers) (length $ head rows)
return $ Table []
- (replicate (length headers) AlignDefault)
- (replicate (length headers) 0.0)
+ (replicate nbOfCols AlignDefault)
+ (replicate nbOfCols 0.0)
headers
rows
+
----------
-- Inlines
----------
-
-
-- | Any inline element
inline :: GenParser Char ParserState Inline
inline = choice inlineParsers <?> "inline"
@@ -248,10 +252,10 @@ inlineParsers = [ str
, simpleInline (char '+') Inserted
, simpleInline (char '^') Superscript
, simpleInline (char '~') Subscript
- -- , link
- -- , image
- -- , math
- -- , autoLink
+ , link
+ , autoLink
+ , image
+ , image
, symbol
]
@@ -270,6 +274,29 @@ endline = try $ do
newline >> notFollowedBy blankline
return Space
+link :: GenParser Char ParserState Inline
+link = try $ do
+ name <- surrounded (char '"') inline
+ char ':'
+ url <- manyTill (anyChar) (lookAhead $ (space <|> try (oneOf ".;," >> (space <|> newline))))
+ return $ Link name (url, "")
+
+-- | Detect plain links to http or email.
+autoLink :: GenParser Char ParserState Inline
+autoLink = do
+ (orig, src) <- uri -- (try uri <|> try emailAddress)
+ return $ Link [Str orig] (src, "")
+
+-- | image embedding
+image :: GenParser Char ParserState Inline
+image = try $ do
+ char '!' >> notFollowedBy space
+ src <- manyTill anyChar (lookAhead $ oneOf "!(")
+ alt <- option "" (try $ (char '(' >> manyTill anyChar (char ')')))
+ char '!'
+ return $ Image [Str alt] (src, alt)
+
+
-- | Any special symbol defined in specialChars
symbol :: GenParser Char ParserState Inline
symbol = do
@@ -297,9 +324,9 @@ simpleInline border construct = surrounded border inline >>=
-- TODO
--
--- - Pandoc Meta Information
+-- - Pandoc Meta Information (title, author, date)
-- - footnotes
--- - hyperlink "label":target
--- - tables alignments
--- - tests
--- - Inserted inline handling in writers \ No newline at end of file
+-- - autolink is not called
+-- - should autolink be shared through Parsing.hs ?
+-- - Inserted inline handling in writers
+-- - table parser is a bit too greedy and require a double newline after tables \ No newline at end of file
diff --git a/tests/RunTests.hs b/tests/RunTests.hs
index cf2997a06..bd19c10bd 100644
--- a/tests/RunTests.hs
+++ b/tests/RunTests.hs
@@ -105,6 +105,8 @@ main = do
"html-reader.html" "html-reader.native"
r10 <- runTest "latex reader" ["-r", "latex", "-w", "native", "-s", "-R"]
"latex-reader.latex" "latex-reader.native"
+ rTextile1 <- runTest "textile reader" ["-r", "textile", "-w", "native", "-s", "-R"]
+ "textile-reader.textile" "textile-reader.native"
r11 <- runTest "native reader" ["-r", "native", "-w", "native", "-s"]
"testsuite.native" "testsuite.native"
r12s <- if runLhsTests
@@ -119,6 +121,7 @@ main = do
, r8, r8a -- rst
, r9 -- html
, r10 -- latex
+ , rTextile1 -- textile
, r11 -- native
] ++ r12s ++ r13s
if all id results
diff --git a/tests/textile-reader.native b/tests/textile-reader.native
new file mode 100644
index 000000000..788a79e51
--- /dev/null
+++ b/tests/textile-reader.native
@@ -0,0 +1,120 @@
+Pandoc (Meta {docTitle = [Str ""], docAuthors = [[Str ""]], docDate = [Str ""]})
+[ Para [Str "This",Space,Str "is",Space,Str "a",Space,Str "set",Space,Str "of",Space,Str "tests",Space,Str "for",Space,Str "pandoc.",Space,Str "Most",Space,Str "of",Space,Str "them",Space,Str "are",Space,Str "adapted",Space,Str "from",Space,Str "John",Space,Str "Gruber",Str "'",Str "s",Space,Str "markdown",Space,Str "test",Space,Str "suite."]
+, Para [Strikeout [Str "-"],Str "-",Str "-"]
+, Header 1 [Str "Headers"]
+, Header 2 [Str "Level",Space,Str "2",Space,Str "with",Space,Str "an",Space,Link [Str "embeded",Space,Str "link"] ("http://www.example.com","")]
+, Header 3 [Str "Level",Space,Str "3",Space,Str "with",Space,Strong [Str "emphasis"]]
+, Header 4 [Str "Level",Space,Str "4"]
+, Header 5 [Str "Level",Space,Str "5"]
+, Header 6 [Str "Level",Space,Str "6"]
+, Header 1 [Str "Paragraphs"]
+, Para [Str "Here",Str "'",Str "s",Space,Str "a",Space,Str "regular",Space,Str "paragraph."]
+, Para [Str "Line",Space,Str "break",Space,Str "are",Space,Str "not",Space,Str "paragraph",Space,Str "break",Space,Str "in",Space,Str "textile,",Space,Str "so",Space,Str "you",Space,Str "can",Space,Str "wrap",Space,Str "your",Space,Str "very",Space,Str "long",Space,Str "paragraph",Space,Str "with",Space,Str "your",Space,Str "favourite",Space,Str "text",Space,Str "editor,",Space,Str "it",Space,Str "will",Space,Str "be",Space,Str "rendered",Space,Str "as",Space,Str "a",Space,Str "single",Space,Str "one."]
+, Para [Str "Here",Str "'",Str "s",Space,Str "one",Space,Str "with",Space,Str "a",Space,Str "bullet."]
+, BulletList
+ [ [ Plain [Str "criminey."] ]
+ ]
+, Para [Str "There",Space,Str "should",Space,Str "be",Space,Str "a",Space,Str "hard",Space,Str "line",Space,Str "break"]
+, Para [Str "here."]
+, Header 1 [Str "Block",Space,Str "Quotes"]
+, BlockQuote
+ [ Para [Str "This",Space,Str "is",Space,Str "a",Space,Str "famous",Space,Str "quote",Space,Str "from",Space,Str "somebody.",Space,Str "He",Space,Str "had",Space,Str "a",Space,Str "lot",Space,Str "of",Space,Str "things",Space,Str "to",Space,Str "say,",Space,Str "so",Space,Str "the",Space,Str "text",Space,Str "is",Space,Str "really",Space,Str "really",Space,Str "long",Space,Str "and",Space,Str "spans",Space,Str "on",Space,Str "multiple",Space,Str "lines."] ]
+
+, Para [Str "And",Space,Str "a",Space,Str "following",Space,Str "paragraph."]
+, Header 1 [Str "Code",Space,Str "Blocks"]
+, Para [Str "Code",Str ":"]
+, CodeBlock ("",[],[]) "\n ---- (should be four hyphens)\n\n sub status {\n print \"working\";\n }\n\n this code block is indented by one tab\n"
+, Para [Str "And",Str ":"]
+, CodeBlock ("",[],[]) "\n this code block is indented by two tabs\n\n These should not be escaped: \\$ \\\\ \\> \\[ \\{\n"
+, Header 1 [Str "Lists"]
+, Header 2 [Str "Unordered"]
+, Para [Str "Asterisks",Space,Str "tight",Str ":"]
+, BulletList
+ [ [ Plain [Str "asterisk",Space,Str "1"] ]
+ , [ Plain [Str "asterisk",Space,Str "2"] ]
+ , [ Plain [Str "asterisk",Space,Str "3"] ] ]
+, Header 2 [Str "Ordered"]
+, Para [Str "Tight",Str ":"]
+, OrderedList (1,DefaultStyle,DefaultDelim)
+ [ [ Plain [Str "First"] ]
+ , [ Plain [Str "Second"] ]
+ , [ Plain [Str "Third"] ] ]
+, Header 2 [Str "Nested"]
+, BulletList
+ [ [ Plain [Str "ui",Space,Str "1"]
+ , BulletList
+ [ [ Plain [Str "ui",Space,Str "1.1"]
+ , OrderedList (1,DefaultStyle,DefaultDelim)
+ [ [ Plain [Str "oi",Space,Str "1.1.1"] ]
+ , [ Plain [Str "oi",Space,Str "1.1.2"] ] ] ], [ Plain [Str "ui",Space,Str "1.2"] ] ] ], [ Plain [Str "ui",Space,Str "2"]
+ , OrderedList (1,DefaultStyle,DefaultDelim)
+ [ [ Plain [Str "oi",Space,Str "2.1"]
+ , BulletList
+ [ [ Plain [Str "ui",Space,Str "2.1.1"] ]
+ , [ Plain [Str "ui",Space,Str "2.1.2"] ] ] ] ] ] ]
+, Header 1 [Str "Inline",Space,Str "Markup"]
+, Para [Str "This",Space,Str "is",Space,Emph [Str "emphasized"],Str ",",Space,Str "and",Space,Str "so",Space,Emph [Str "is",Space,Str "this"],Str "."]
+, Para [Str "This",Space,Str "is",Space,Strong [Str "strong"],Str ",",Space,Str "and",Space,Str "so",Space,Strong [Str "is",Space,Str "this"],Str "."]
+, Para [Str "A",Space,Link [Strong [Str "strong",Space,Str "link"]] ("http://www.foobar.com",""),Str "."]
+, Para [Emph [Strong [Str "This",Space,Str "is",Space,Str "strong",Space,Str "and",Space,Str "em."]]]
+, Para [Str "So",Space,Str "is",Space,Strong [Emph [Str "this"]],Space,Str "word",Space,Str "and",Space,Emph [Strong [Str "that",Space,Str "one"]],Str "."]
+, Para [Strikeout [Str "This",Space,Str "is",Space,Str "strikeout",Space,Str "and",Space,Strong [Str "strong"]]]
+, Para [Str "Superscripts",Str ":",Space,Str "a",Superscript [Str "bc"],Str "d",Space,Str "a",Superscript [Strong [Str "hello"]],Space,Str "a",Superscript [Str "hello",Space,Str "there"],Str "."]
+, Para [Str "Subscripts",Str ":",Space,Str "H",Subscript [Str "2"],Str "O,",Space,Str "H",Subscript [Str "23"],Str "O,",Space,Str "H",Subscript [Str "many",Space,Str "of",Space,Str "them"],Str "O."]
+, Header 1 [Str "Links"]
+, Header 2 [Str "Explicit"]
+, Para [Str "Just",Space,Str "a",Space,Link [Str "url"] ("http://www.url.com","")]
+, Para [Link [Str "Email",Space,Str "link"] ("mailto:nobody@nowhere.net","")]
+, Para [Str "Automatic",Space,Str "linking",Space,Str "to",Space,Str "http",Str ":",Str "//www.example.com",Space,Str "and",Space,Str "foobar",Str "@",Str "example.com."]
+, Header 1 [Str "Tables"]
+, Para [Str "Textile",Space,Str "allows",Space,Str "tables",Space,Str "with",Space,Str "and",Space,Str "without",Space,Str "headers",Space,Str ":"]
+, Header 2 [Str "Without",Space,Str "headers"]
+, Table [] [AlignDefault,AlignDefault,AlignDefault] [0.0,0.0,0.0]
+ [
+ ] [
+ [ [ Plain [Str "name"] ]
+ , [ Plain [Str "age"] ]
+ , [ Plain [Str "sex"] ] ],
+ [ [ Plain [Str "joan"] ]
+ , [ Plain [Str "24"] ]
+ , [ Plain [Str "f"] ] ],
+ [ [ Plain [Str "archie"] ]
+ , [ Plain [Str "29"] ]
+ , [ Plain [Str "m"] ] ],
+ [ [ Plain [Str "bella"] ]
+ , [ Plain [Str "45"] ]
+ , [ Plain [Str "f"] ] ] ]
+, Para [Str "And",Space,Str "some",Space,Str "text",Space,Str "..."]
+, Table [] [AlignDefault,AlignDefault,AlignDefault] [0.0,0.0,0.0]
+ [
+ ] [
+ [ [ Plain [Str "name"] ]
+ , [ Plain [Str "age"] ]
+ , [ Plain [Str "sex"] ] ],
+ [ [ Plain [Str "joan"] ]
+ , [ Plain [Str "24"] ]
+ , [ Plain [Str "f"] ] ],
+ [ [ Plain [Str "archie"] ]
+ , [ Plain [Str "29"] ]
+ , [ Plain [Str "m"] ] ],
+ [ [ Plain [Str "bella"] ]
+ , [ Plain [Str "45"] ]
+ , [ Plain [Str "f"] ] ] ]
+, Para []
+, Header 2 [Str "With",Space,Str "headers"]
+, Table [] [AlignDefault,AlignDefault,AlignDefault] [0.0,0.0,0.0]
+ [ [ Plain [Str "name"] ]
+ , [ Plain [Str "age"] ]
+ , [ Plain [Str "sex"] ] ] [
+ [ [ Plain [Str "joan"] ]
+ , [ Plain [Str "24"] ]
+ , [ Plain [Str "f"] ] ],
+ [ [ Plain [Str "archie"] ]
+ , [ Plain [Str "29"] ]
+ , [ Plain [Str "m"] ] ],
+ [ [ Plain [Str "bella"] ]
+ , [ Plain [Str "45"] ]
+ , [ Plain [Str "f"] ] ] ]
+, Para []
+, Header 1 [Str "Images"]
+, Para [Str "Textile",Space,Str "inline",Space,Str "image",Space,Str "syntax,",Space,Str "like",Space,Str "here",Space,Image [Str "this is the alt text"] ("this_is_an_image.png","this is the alt text"),Space,Str "and",Space,Str "here",Space,Image [Str ""] ("this_is_an_image.png",""),Str "."] ]
diff --git a/tests/textile-reader.textile b/tests/textile-reader.textile
new file mode 100644
index 000000000..b4c10b1b1
--- /dev/null
+++ b/tests/textile-reader.textile
@@ -0,0 +1,156 @@
+This is a set of tests for pandoc. Most of them are adapted from John
+Gruber's markdown test suite.
+
+-----
+
+h1. Headers
+
+h2. Level 2 with an "embeded link":http://www.example.com
+
+h3. Level 3 with *emphasis*
+
+h4. Level 4
+
+h5. Level 5
+
+h6. Level 6
+
+
+h1. Paragraphs
+
+Here's a regular paragraph.
+
+Line break are not paragraph break in textile, so you can wrap your
+very long paragraph with your favourite text editor, it will be
+rendered as a single one.
+
+Here's one with a bullet.
+
+* criminey.
+
+There should be a hard line break
+
+here.
+
+h1. Block Quotes
+
+bq. This is a famous quote from somebody. He had a lot of things to
+say, so the text is really really long and spans on multiple lines.
+
+And a following paragraph.
+
+h1. Code Blocks
+
+Code:
+
+<pre>
+ ---- (should be four hyphens)
+
+ sub status {
+ print "working";
+ }
+
+ this code block is indented by one tab
+</pre>
+
+And:
+
+<pre>
+ this code block is indented by two tabs
+
+ These should not be escaped: \$ \\ \> \[ \{
+</pre>
+
+
+h1. Lists
+
+h2. Unordered
+
+Asterisks tight:
+
+* asterisk 1
+* asterisk 2
+* asterisk 3
+
+h2. Ordered
+
+Tight:
+
+# First
+# Second
+# Third
+
+h2. Nested
+
+* ui 1
+** ui 1.1
+### oi 1.1.1
+### oi 1.1.2
+** ui 1.2
+* ui 2
+## oi 2.1
+*** ui 2.1.1
+*** ui 2.1.2
+
+
+h1. Inline Markup
+
+This is _emphasized_, and so __is this__.
+
+This is *strong*, and so **is this**.
+
+A "*strong link*":http://www.foobar.com.
+
+_*This is strong and em.*_
+
+So is *_this_* word and __**that one**__.
+
+-This is strikeout and *strong*-
+
+Superscripts: a^bc^d a^*hello*^ a^hello there^.
+
+Subscripts: H~2~O, H~23~O, H~many of them~O.
+
+
+h1. Links
+
+h2. Explicit
+
+Just a "url":http://www.url.com
+
+"Email link":mailto:nobody@nowhere.net
+
+Automatic linking to http://www.example.com and foobar@example.com.
+
+h1. Tables
+
+Textile allows tables with and without headers :
+
+h2. Without headers
+
+| name | age | sex |
+| joan | 24 | f |
+| archie | 29 | m |
+| bella | 45 | f |
+
+And some text ...
+
+| name | age | sex |
+| joan | 24 | f |
+| archie| 29 | m |
+| bella | 45 | f |
+
+
+h2. With headers
+
+|_. name |_. age |_. sex |
+| joan | 24 | f |
+| archie | 29 | m |
+| bella | 45 | f |
+
+
+h1. Images
+
+Textile inline image syntax, like
+here !this_is_an_image.png(this is the alt text)!
+and here !this_is_an_image.png!.