diff options
Diffstat (limited to 'src/Text')
-rw-r--r-- | src/Text/Pandoc/MIME.hs | 483 | ||||
-rw-r--r-- | src/Text/Pandoc/Parsing.hs | 44 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/HTML.hs | 94 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/LaTeX.hs | 31 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/Markdown.hs | 63 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/RST.hs | 65 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/Textile.hs | 2 | ||||
-rw-r--r-- | src/Text/Pandoc/S5.hs | 24 | ||||
-rw-r--r-- | src/Text/Pandoc/Shared.hs | 3 | ||||
-rw-r--r-- | src/Text/Pandoc/Templates.hs | 5 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/ConTeXt.hs | 8 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/Docbook.hs | 42 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/EPUB.hs | 68 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/HTML.hs | 191 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/LaTeX.hs | 187 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/Markdown.hs | 14 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/ODT.hs | 36 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/OpenDocument.hs | 53 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/RTF.hs | 31 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/Texinfo.hs | 12 |
20 files changed, 1126 insertions, 330 deletions
diff --git a/src/Text/Pandoc/MIME.hs b/src/Text/Pandoc/MIME.hs new file mode 100644 index 000000000..c52a4c475 --- /dev/null +++ b/src/Text/Pandoc/MIME.hs @@ -0,0 +1,483 @@ +{- +Copyright (C) 2011 John MacFarlane <jgm@berkeley.edu> + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.MIME + Copyright : Copyright (C) 2011 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane <jgm@berkeley.edu> + Stability : alpha + Portability : portable + +Mime type lookup for ODT writer. +-} +module Text.Pandoc.MIME ( getMimeType ) +where +import System.FilePath +import Data.Char ( toLower ) +import qualified Data.Map as M + +-- | Determine mime type appropriate for file path. +getMimeType :: FilePath -> Maybe String +getMimeType f = M.lookup (map toLower $ drop 1 $ takeExtension f) mimeTypes + where mimeTypes = M.fromList -- List borrowed from happstack-server. + [("gz","application/x-gzip") + ,("cabal","application/x-cabal") + ,("%","application/x-trash") + ,("323","text/h323") + ,("3gp","video/3gpp") + ,("7z","application/x-7z-compressed") + ,("abw","application/x-abiword") + ,("ai","application/postscript") + ,("aif","audio/x-aiff") + ,("aifc","audio/x-aiff") + ,("aiff","audio/x-aiff") + ,("alc","chemical/x-alchemy") + ,("art","image/x-jg") + ,("asc","text/plain") + ,("asf","video/x-ms-asf") + ,("asn","chemical/x-ncbi-asn1") + ,("aso","chemical/x-ncbi-asn1-binary") + ,("asx","video/x-ms-asf") + ,("atom","application/atom") + ,("atomcat","application/atomcat+xml") + ,("atomsrv","application/atomserv+xml") + ,("au","audio/basic") + ,("avi","video/x-msvideo") + ,("b","chemical/x-molconn-Z") + ,("bak","application/x-trash") + ,("bat","application/x-msdos-program") + ,("bcpio","application/x-bcpio") + ,("bib","text/x-bibtex") + ,("bin","application/octet-stream") + ,("bmp","image/x-ms-bmp") + ,("boo","text/x-boo") + ,("book","application/x-maker") + ,("bsd","chemical/x-crossfire") + ,("c","text/x-csrc") + ,("c++","text/x-c++src") + ,("c3d","chemical/x-chem3d") + ,("cab","application/x-cab") + ,("cac","chemical/x-cache") + ,("cache","chemical/x-cache") + ,("cap","application/cap") + ,("cascii","chemical/x-cactvs-binary") + ,("cat","application/vnd.ms-pki.seccat") + ,("cbin","chemical/x-cactvs-binary") + ,("cbr","application/x-cbr") + ,("cbz","application/x-cbz") + ,("cc","text/x-c++src") + ,("cdf","application/x-cdf") + ,("cdr","image/x-coreldraw") + ,("cdt","image/x-coreldrawtemplate") + ,("cdx","chemical/x-cdx") + ,("cdy","application/vnd.cinderella") + ,("cef","chemical/x-cxf") + ,("cer","chemical/x-cerius") + ,("chm","chemical/x-chemdraw") + ,("chrt","application/x-kchart") + ,("cif","chemical/x-cif") + ,("class","application/java-vm") + ,("cls","text/x-tex") + ,("cmdf","chemical/x-cmdf") + ,("cml","chemical/x-cml") + ,("cod","application/vnd.rim.cod") + ,("com","application/x-msdos-program") + ,("cpa","chemical/x-compass") + ,("cpio","application/x-cpio") + ,("cpp","text/x-c++src") + ,("cpt","application/mac-compactpro") + ,("crl","application/x-pkcs7-crl") + ,("crt","application/x-x509-ca-cert") + ,("csf","chemical/x-cache-csf") + ,("csh","application/x-csh") + ,("csm","chemical/x-csml") + ,("csml","chemical/x-csml") + ,("css","text/css") + ,("csv","text/csv") + ,("ctab","chemical/x-cactvs-binary") + ,("ctx","chemical/x-ctx") + ,("cu","application/cu-seeme") + ,("cub","chemical/x-gaussian-cube") + ,("cxf","chemical/x-cxf") + ,("cxx","text/x-c++src") + ,("d","text/x-dsrc") + ,("dat","chemical/x-mopac-input") + ,("dcr","application/x-director") + ,("deb","application/x-debian-package") + ,("dif","video/dv") + ,("diff","text/x-diff") + ,("dir","application/x-director") + ,("djv","image/vnd.djvu") + ,("djvu","image/vnd.djvu") + ,("dl","video/dl") + ,("dll","application/x-msdos-program") + ,("dmg","application/x-apple-diskimage") + ,("dms","application/x-dms") + ,("doc","application/msword") + ,("dot","application/msword") + ,("dv","video/dv") + ,("dvi","application/x-dvi") + ,("dx","chemical/x-jcamp-dx") + ,("dxr","application/x-director") + ,("emb","chemical/x-embl-dl-nucleotide") + ,("embl","chemical/x-embl-dl-nucleotide") + ,("eml","message/rfc822") + ,("ent","chemical/x-ncbi-asn1-ascii") + ,("eps","application/postscript") + ,("etx","text/x-setext") + ,("exe","application/x-msdos-program") + ,("ez","application/andrew-inset") + ,("fb","application/x-maker") + ,("fbdoc","application/x-maker") + ,("fch","chemical/x-gaussian-checkpoint") + ,("fchk","chemical/x-gaussian-checkpoint") + ,("fig","application/x-xfig") + ,("flac","application/x-flac") + ,("fli","video/fli") + ,("fm","application/x-maker") + ,("frame","application/x-maker") + ,("frm","application/x-maker") + ,("gal","chemical/x-gaussian-log") + ,("gam","chemical/x-gamess-input") + ,("gamin","chemical/x-gamess-input") + ,("gau","chemical/x-gaussian-input") + ,("gcd","text/x-pcs-gcd") + ,("gcf","application/x-graphing-calculator") + ,("gcg","chemical/x-gcg8-sequence") + ,("gen","chemical/x-genbank") + ,("gf","application/x-tex-gf") + ,("gif","image/gif") + ,("gjc","chemical/x-gaussian-input") + ,("gjf","chemical/x-gaussian-input") + ,("gl","video/gl") + ,("gnumeric","application/x-gnumeric") + ,("gpt","chemical/x-mopac-graph") + ,("gsf","application/x-font") + ,("gsm","audio/x-gsm") + ,("gtar","application/x-gtar") + ,("h","text/x-chdr") + ,("h++","text/x-c++hdr") + ,("hdf","application/x-hdf") + ,("hh","text/x-c++hdr") + ,("hin","chemical/x-hin") + ,("hpp","text/x-c++hdr") + ,("hqx","application/mac-binhex40") + ,("hs","text/x-haskell") + ,("hta","application/hta") + ,("htc","text/x-component") + ,("htm","text/html") + ,("html","text/html") + ,("hxx","text/x-c++hdr") + ,("ica","application/x-ica") + ,("ice","x-conference/x-cooltalk") + ,("ico","image/x-icon") + ,("ics","text/calendar") + ,("icz","text/calendar") + ,("ief","image/ief") + ,("iges","model/iges") + ,("igs","model/iges") + ,("iii","application/x-iphone") + ,("inp","chemical/x-gamess-input") + ,("ins","application/x-internet-signup") + ,("iso","application/x-iso9660-image") + ,("isp","application/x-internet-signup") + ,("ist","chemical/x-isostar") + ,("istr","chemical/x-isostar") + ,("jad","text/vnd.sun.j2me.app-descriptor") + ,("jar","application/java-archive") + ,("java","text/x-java") + ,("jdx","chemical/x-jcamp-dx") + ,("jmz","application/x-jmol") + ,("jng","image/x-jng") + ,("jnlp","application/x-java-jnlp-file") + ,("jpe","image/jpeg") + ,("jpeg","image/jpeg") + ,("jpg","image/jpeg") + ,("js","application/x-javascript") + ,("kar","audio/midi") + ,("key","application/pgp-keys") + ,("kil","application/x-killustrator") + ,("kin","chemical/x-kinemage") + ,("kml","application/vnd.google-earth.kml+xml") + ,("kmz","application/vnd.google-earth.kmz") + ,("kpr","application/x-kpresenter") + ,("kpt","application/x-kpresenter") + ,("ksp","application/x-kspread") + ,("kwd","application/x-kword") + ,("kwt","application/x-kword") + ,("latex","application/x-latex") + ,("lha","application/x-lha") + ,("lhs","text/x-literate-haskell") + ,("lsf","video/x-la-asf") + ,("lsx","video/x-la-asf") + ,("ltx","text/x-tex") + ,("lyx","application/x-lyx") + ,("lzh","application/x-lzh") + ,("lzx","application/x-lzx") + ,("m3u","audio/mpegurl") + ,("m4a","audio/mpeg") + ,("maker","application/x-maker") + ,("man","application/x-troff-man") + ,("mcif","chemical/x-mmcif") + ,("mcm","chemical/x-macmolecule") + ,("mdb","application/msaccess") + ,("me","application/x-troff-me") + ,("mesh","model/mesh") + ,("mid","audio/midi") + ,("midi","audio/midi") + ,("mif","application/x-mif") + ,("mm","application/x-freemind") + ,("mmd","chemical/x-macromodel-input") + ,("mmf","application/vnd.smaf") + ,("mml","text/mathml") + ,("mmod","chemical/x-macromodel-input") + ,("mng","video/x-mng") + ,("moc","text/x-moc") + ,("mol","chemical/x-mdl-molfile") + ,("mol2","chemical/x-mol2") + ,("moo","chemical/x-mopac-out") + ,("mop","chemical/x-mopac-input") + ,("mopcrt","chemical/x-mopac-input") + ,("mov","video/quicktime") + ,("movie","video/x-sgi-movie") + ,("mp2","audio/mpeg") + ,("mp3","audio/mpeg") + ,("mp4","video/mp4") + ,("mpc","chemical/x-mopac-input") + ,("mpe","video/mpeg") + ,("mpeg","video/mpeg") + ,("mpega","audio/mpeg") + ,("mpg","video/mpeg") + ,("mpga","audio/mpeg") + ,("ms","application/x-troff-ms") + ,("msh","model/mesh") + ,("msi","application/x-msi") + ,("mvb","chemical/x-mopac-vib") + ,("mxu","video/vnd.mpegurl") + ,("nb","application/mathematica") + ,("nc","application/x-netcdf") + ,("nwc","application/x-nwc") + ,("o","application/x-object") + ,("oda","application/oda") + ,("odb","application/vnd.oasis.opendocument.database") + ,("odc","application/vnd.oasis.opendocument.chart") + ,("odf","application/vnd.oasis.opendocument.formula") + ,("odg","application/vnd.oasis.opendocument.graphics") + ,("odi","application/vnd.oasis.opendocument.image") + ,("odm","application/vnd.oasis.opendocument.text-master") + ,("odp","application/vnd.oasis.opendocument.presentation") + ,("ods","application/vnd.oasis.opendocument.spreadsheet") + ,("odt","application/vnd.oasis.opendocument.text") + ,("oga","audio/ogg") + ,("ogg","application/ogg") + ,("ogv","video/ogg") + ,("ogx","application/ogg") + ,("old","application/x-trash") + ,("otg","application/vnd.oasis.opendocument.graphics-template") + ,("oth","application/vnd.oasis.opendocument.text-web") + ,("otp","application/vnd.oasis.opendocument.presentation-template") + ,("ots","application/vnd.oasis.opendocument.spreadsheet-template") + ,("ott","application/vnd.oasis.opendocument.text-template") + ,("oza","application/x-oz-application") + ,("p","text/x-pascal") + ,("p7r","application/x-pkcs7-certreqresp") + ,("pac","application/x-ns-proxy-autoconfig") + ,("pas","text/x-pascal") + ,("pat","image/x-coreldrawpattern") + ,("patch","text/x-diff") + ,("pbm","image/x-portable-bitmap") + ,("pcap","application/cap") + ,("pcf","application/x-font") + ,("pcf.Z","application/x-font") + ,("pcx","image/pcx") + ,("pdb","chemical/x-pdb") + ,("pdf","application/pdf") + ,("pfa","application/x-font") + ,("pfb","application/x-font") + ,("pgm","image/x-portable-graymap") + ,("pgn","application/x-chess-pgn") + ,("pgp","application/pgp-signature") + ,("php","application/x-httpd-php") + ,("php3","application/x-httpd-php3") + ,("php3p","application/x-httpd-php3-preprocessed") + ,("php4","application/x-httpd-php4") + ,("phps","application/x-httpd-php-source") + ,("pht","application/x-httpd-php") + ,("phtml","application/x-httpd-php") + ,("pk","application/x-tex-pk") + ,("pl","text/x-perl") + ,("pls","audio/x-scpls") + ,("pm","text/x-perl") + ,("png","image/png") + ,("pnm","image/x-portable-anymap") + ,("pot","text/plain") + ,("ppm","image/x-portable-pixmap") + ,("pps","application/vnd.ms-powerpoint") + ,("ppt","application/vnd.ms-powerpoint") + ,("prf","application/pics-rules") + ,("prt","chemical/x-ncbi-asn1-ascii") + ,("ps","application/postscript") + ,("psd","image/x-photoshop") + ,("py","text/x-python") + ,("pyc","application/x-python-code") + ,("pyo","application/x-python-code") + ,("qt","video/quicktime") + ,("qtl","application/x-quicktimeplayer") + ,("ra","audio/x-pn-realaudio") + ,("ram","audio/x-pn-realaudio") + ,("rar","application/rar") + ,("ras","image/x-cmu-raster") + ,("rd","chemical/x-mdl-rdfile") + ,("rdf","application/rdf+xml") + ,("rgb","image/x-rgb") + ,("rhtml","application/x-httpd-eruby") + ,("rm","audio/x-pn-realaudio") + ,("roff","application/x-troff") + ,("ros","chemical/x-rosdal") + ,("rpm","application/x-redhat-package-manager") + ,("rss","application/rss+xml") + ,("rtf","application/rtf") + ,("rtx","text/richtext") + ,("rxn","chemical/x-mdl-rxnfile") + ,("sct","text/scriptlet") + ,("sd","chemical/x-mdl-sdfile") + ,("sd2","audio/x-sd2") + ,("sda","application/vnd.stardivision.draw") + ,("sdc","application/vnd.stardivision.calc") + ,("sdd","application/vnd.stardivision.impress") + ,("sdf","application/vnd.stardivision.math") + ,("sds","application/vnd.stardivision.chart") + ,("sdw","application/vnd.stardivision.writer") + ,("ser","application/java-serialized-object") + ,("sgf","application/x-go-sgf") + ,("sgl","application/vnd.stardivision.writer-global") + ,("sh","application/x-sh") + ,("shar","application/x-shar") + ,("shtml","text/html") + ,("sid","audio/prs.sid") + ,("sik","application/x-trash") + ,("silo","model/mesh") + ,("sis","application/vnd.symbian.install") + ,("sisx","x-epoc/x-sisx-app") + ,("sit","application/x-stuffit") + ,("sitx","application/x-stuffit") + ,("skd","application/x-koan") + ,("skm","application/x-koan") + ,("skp","application/x-koan") + ,("skt","application/x-koan") + ,("smi","application/smil") + ,("smil","application/smil") + ,("snd","audio/basic") + ,("spc","chemical/x-galactic-spc") + ,("spl","application/futuresplash") + ,("spx","audio/ogg") + ,("src","application/x-wais-source") + ,("stc","application/vnd.sun.xml.calc.template") + ,("std","application/vnd.sun.xml.draw.template") + ,("sti","application/vnd.sun.xml.impress.template") + ,("stl","application/vnd.ms-pki.stl") + ,("stw","application/vnd.sun.xml.writer.template") + ,("sty","text/x-tex") + ,("sv4cpio","application/x-sv4cpio") + ,("sv4crc","application/x-sv4crc") + ,("svg","image/svg+xml") + ,("svgz","image/svg+xml") + ,("sw","chemical/x-swissprot") + ,("swf","application/x-shockwave-flash") + ,("swfl","application/x-shockwave-flash") + ,("sxc","application/vnd.sun.xml.calc") + ,("sxd","application/vnd.sun.xml.draw") + ,("sxg","application/vnd.sun.xml.writer.global") + ,("sxi","application/vnd.sun.xml.impress") + ,("sxm","application/vnd.sun.xml.math") + ,("sxw","application/vnd.sun.xml.writer") + ,("t","application/x-troff") + ,("tar","application/x-tar") + ,("taz","application/x-gtar") + ,("tcl","application/x-tcl") + ,("tex","text/x-tex") + ,("texi","application/x-texinfo") + ,("texinfo","application/x-texinfo") + ,("text","text/plain") + ,("tgf","chemical/x-mdl-tgf") + ,("tgz","application/x-gtar") + ,("tif","image/tiff") + ,("tiff","image/tiff") + ,("tk","text/x-tcl") + ,("tm","text/texmacs") + ,("torrent","application/x-bittorrent") + ,("tr","application/x-troff") + ,("ts","text/texmacs") + ,("tsp","application/dsptype") + ,("tsv","text/tab-separated-values") + ,("txt","text/plain") + ,("udeb","application/x-debian-package") + ,("uls","text/iuls") + ,("ustar","application/x-ustar") + ,("val","chemical/x-ncbi-asn1-binary") + ,("vcd","application/x-cdlink") + ,("vcf","text/x-vcard") + ,("vcs","text/x-vcalendar") + ,("vmd","chemical/x-vmd") + ,("vms","chemical/x-vamas-iso14976") + ,("vrm","x-world/x-vrml") + ,("vrml","model/vrml") + ,("vsd","application/vnd.visio") + ,("wad","application/x-doom") + ,("wav","audio/x-wav") + ,("wax","audio/x-ms-wax") + ,("wbmp","image/vnd.wap.wbmp") + ,("wbxml","application/vnd.wap.wbxml") + ,("wk","application/x-123") + ,("wm","video/x-ms-wm") + ,("wma","audio/x-ms-wma") + ,("wmd","application/x-ms-wmd") + ,("wml","text/vnd.wap.wml") + ,("wmlc","application/vnd.wap.wmlc") + ,("wmls","text/vnd.wap.wmlscript") + ,("wmlsc","application/vnd.wap.wmlscriptc") + ,("wmv","video/x-ms-wmv") + ,("wmx","video/x-ms-wmx") + ,("wmz","application/x-ms-wmz") + ,("wp5","application/wordperfect5.1") + ,("wpd","application/wordperfect") + ,("wrl","model/vrml") + ,("wsc","text/scriptlet") + ,("wvx","video/x-ms-wvx") + ,("wz","application/x-wingz") + ,("xbm","image/x-xbitmap") + ,("xcf","application/x-xcf") + ,("xht","application/xhtml+xml") + ,("xhtml","application/xhtml+xml") + ,("xlb","application/vnd.ms-excel") + ,("xls","application/vnd.ms-excel") + ,("xlt","application/vnd.ms-excel") + ,("xml","application/xml") + ,("xpi","application/x-xpinstall") + ,("xpm","image/x-xpixmap") + ,("xsl","application/xml") + ,("xtel","chemical/x-xtel") + ,("xul","application/vnd.mozilla.xul+xml") + ,("xwd","image/x-xwindowdump") + ,("xyz","chemical/x-xyz") + ,("zip","application/zip") + ,("zmt","chemical/x-mopac-input") + ] + diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs index 9ce064f91..eaf0c0f67 100644 --- a/src/Text/Pandoc/Parsing.hs +++ b/src/Text/Pandoc/Parsing.hs @@ -33,6 +33,7 @@ module Text.Pandoc.Parsing ( (>>~), notFollowedBy', oneOfStrings, spaceChar, + nonspaceChar, skipSpaces, blankline, blanklines, @@ -78,7 +79,7 @@ import Text.Pandoc.Generic import qualified Text.Pandoc.UTF8 as UTF8 (putStrLn) import Text.ParserCombinators.Parsec import Text.Pandoc.CharacterReferences ( characterReference ) -import Data.Char ( toLower, toUpper, ord, isAscii, isAlphaNum, isDigit ) +import Data.Char ( toLower, toUpper, ord, isAscii, isAlphaNum, isDigit, isPunctuation ) import Data.List ( intercalate, transpose ) import Network.URI ( parseURI, URI (..), isAllowedInURI ) import Control.Monad ( join, liftM, guard ) @@ -122,6 +123,10 @@ oneOfStrings listOfStrings = choice $ map (try . string) listOfStrings spaceChar :: CharParser st Char spaceChar = satisfy $ \c -> c == ' ' || c == '\t' +-- | Parses a nonspace, nonnewline character. +nonspaceChar :: CharParser st Char +nonspaceChar = satisfy $ \x -> x /= '\t' && x /= '\n' && x /= ' ' && x /= '\r' + -- | Skips zero or more spaces or tabs. skipSpaces :: GenParser Char st () skipSpaces = skipMany spaceChar @@ -264,8 +269,24 @@ uri = try $ do let protocols = [ "http:", "https:", "ftp:", "file:", "mailto:", "news:", "telnet:" ] lookAhead $ oneOfStrings protocols - -- scan non-ascii characters and ascii characters allowed in a URI - str <- many1 $ satisfy (\c -> not (isAscii c) || isAllowedInURI c) + -- Scan non-ascii characters and ascii characters allowed in a URI. + -- We allow punctuation except when followed by a space, since + -- we don't want the trailing '.' in 'http://google.com.' + let innerPunct = try $ satisfy isPunctuation >>~ + notFollowedBy (newline <|> spaceChar) + let uriChar = innerPunct <|> + satisfy (\c -> not (isPunctuation c) && + (not (isAscii c) || isAllowedInURI c)) + -- We want to allow + -- http://en.wikipedia.org/wiki/State_of_emergency_(disambiguation) + -- as a URL, while NOT picking up the closing paren in + -- (http://wikipedia.org) + -- So we include balanced parens in the URL. + let inParens = try $ do char '(' + res <- many uriChar + char ')' + return $ '(' : res ++ ")" + str <- liftM concat $ many1 $ inParens <|> count 1 (innerPunct <|> uriChar) -- now see if they amount to an absolute URI case parseURI (escapeURI str) of Just uri' -> if uriScheme uri' `elem` protocols @@ -742,8 +763,9 @@ charOrRef cs = singleQuoteStart :: GenParser Char ParserState () singleQuoteStart = do failIfInQuoteContext InSingleQuote - try $ do charOrRef "'\8216" - notFollowedBy (oneOf ")!],.;:-? \t\n") + try $ do charOrRef "'\8216\145" + notFollowedBy (oneOf ")!],;:-? \t\n") + notFollowedBy (char '.') <|> lookAhead (string "..." >> return ()) notFollowedBy (try (oneOfStrings ["s","t","m","ve","ll","re"] >> satisfy (not . isAlphaNum))) -- possess/contraction @@ -751,23 +773,23 @@ singleQuoteStart = do singleQuoteEnd :: GenParser Char st () singleQuoteEnd = try $ do - charOrRef "'\8217" + charOrRef "'\8217\146" notFollowedBy alphaNum doubleQuoteStart :: GenParser Char ParserState () doubleQuoteStart = do failIfInQuoteContext InDoubleQuote - try $ do charOrRef "\"\8220" + try $ do charOrRef "\"\8220\147" notFollowedBy (satisfy (\c -> c == ' ' || c == '\t' || c == '\n')) doubleQuoteEnd :: GenParser Char st () doubleQuoteEnd = do - charOrRef "\"\8221" + charOrRef "\"\8221\148" return () ellipses :: GenParser Char st Inline ellipses = do - try (charOrRef "…") <|> try (string "..." >> return '…') + try (charOrRef "…\133") <|> try (string "..." >> return '…') return Ellipses dash :: GenParser Char st Inline @@ -775,13 +797,13 @@ dash = enDash <|> emDash enDash :: GenParser Char st Inline enDash = do - try (charOrRef "–") <|> + try (charOrRef "–\150") <|> try (char '-' >> lookAhead (satisfy isDigit) >> return '–') return EnDash emDash :: GenParser Char st Inline emDash = do - try (charOrRef "—") <|> (try $ string "--" >> optional (char '-') >> return '—') + try (charOrRef "—\151") <|> (try $ string "--" >> optional (char '-') >> return '—') return EmDash -- diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index 18e3113d3..7c882f680 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -47,7 +47,7 @@ import Text.Pandoc.Parsing import Data.Maybe ( fromMaybe, isJust ) import Data.List ( intercalate ) import Data.Char ( isSpace, isDigit ) -import Control.Monad ( liftM, guard ) +import Control.Monad ( liftM, guard, when ) -- | Convert HTML-formatted string to 'Pandoc' document. readHtml :: ParserState -- ^ Parser state @@ -75,7 +75,7 @@ parseHeader tags = (Meta{docTitle = tit'', docAuthors = [], docDate = []}, rest) t ~== TagOpen "body" []) tags parseBody :: TagParser [Block] -parseBody = liftM concat $ manyTill block eof +parseBody = liftM (fixPlains False . concat) $ manyTill block eof block :: TagParser [Block] block = choice @@ -107,7 +107,7 @@ pBulletList = try $ do -- treat it as a list item, though it's not valid xhtml... skipMany nonItem items <- manyTill (pInTags "li" block >>~ skipMany nonItem) (pCloses "ul") - return [BulletList items] + return [BulletList $ map (fixPlains True) items] pOrderedList :: TagParser [Block] pOrderedList = try $ do @@ -138,7 +138,7 @@ pOrderedList = try $ do -- treat it as a list item, though it's not valid xhtml... skipMany nonItem items <- manyTill (pInTags "li" block >>~ skipMany nonItem) (pCloses "ol") - return [OrderedList (start, style, DefaultDelim) items] + return [OrderedList (start, style, DefaultDelim) $ map (fixPlains True) items] pDefinitionList :: TagParser [Block] pDefinitionList = try $ do @@ -154,7 +154,22 @@ pDefListItem = try $ do defs <- many1 (try $ skipMany nonItem >> pInTags "dd" block) skipMany nonItem let term = intercalate [LineBreak] terms - return (term, defs) + return (term, map (fixPlains True) defs) + +fixPlains :: Bool -> [Block] -> [Block] +fixPlains inList bs = if any isParaish bs + then map plainToPara bs + else bs + where isParaish (Para _) = True + isParaish (CodeBlock _ _) = True + isParaish (Header _ _) = True + isParaish (BlockQuote _) = True + isParaish (BulletList _) = not inList + isParaish (OrderedList _ _) = not inList + isParaish (DefinitionList _) = not inList + isParaish _ = False + plainToPara (Plain xs) = Para xs + plainToPara x = x pRawTag :: TagParser String pRawTag = do @@ -199,9 +214,9 @@ pSimpleTable :: TagParser [Block] pSimpleTable = try $ do TagOpen _ _ <- pSatisfy (~== TagOpen "table" []) skipMany pBlank - head' <- option [] $ pInTags "th" pTd - rows <- many1 $ try $ - skipMany pBlank >> pInTags "tr" pTd + head' <- option [] $ pOptInTag "thead" $ pInTags "tr" (pCell "th") + rows <- pOptInTag "tbody" + $ many1 $ try $ skipMany pBlank >> pInTags "tr" (pCell "td") skipMany pBlank TagClose _ <- pSatisfy (~== TagClose "table") let cols = maximum $ map length rows @@ -209,17 +224,17 @@ pSimpleTable = try $ do let widths = replicate cols 0 return [Table [] aligns widths head' rows] -pTd :: TagParser [TableCell] -pTd = try $ do +pCell :: String -> TagParser [TableCell] +pCell celltype = try $ do skipMany pBlank - res <- pInTags "td" pPlain + res <- pInTags celltype pPlain skipMany pBlank return [res] pBlockQuote :: TagParser [Block] pBlockQuote = do contents <- pInTags "blockquote" block - return [BlockQuote contents] + return [BlockQuote $ fixPlains False contents] pPlain :: TagParser [Block] pPlain = do @@ -358,7 +373,7 @@ pInlinesInTags :: String -> ([Inline] -> Inline) -> TagParser [Inline] pInlinesInTags tagtype f = do contents <- pInTags tagtype inline - return [f contents] + return [f $ normalizeSpaces contents] pInTags :: String -> TagParser [a] -> TagParser [a] @@ -366,6 +381,16 @@ pInTags tagtype parser = try $ do pSatisfy (~== TagOpen tagtype []) liftM concat $ manyTill parser (pCloses tagtype <|> eof) +pOptInTag :: String -> TagParser a + -> TagParser a +pOptInTag tagtype parser = try $ do + open <- option False (pSatisfy (~== TagOpen tagtype []) >> return True) + skipMany pBlank + x <- parser + skipMany pBlank + when open $ pCloses tagtype + return x + pCloses :: String -> TagParser () pCloses tagtype = try $ do t <- lookAhead $ pSatisfy $ \tag -> isTagClose tag || isTagOpen tag @@ -391,10 +416,12 @@ pBlank = try $ do guard $ all isSpace str pTagContents :: GenParser Char ParserState Inline -pTagContents = pStr <|> pSpace <|> smartPunctuation pTagContents <|> pSymbol +pTagContents = + pStr <|> pSpace <|> smartPunctuation pTagContents <|> pSymbol <|> pBad pStr :: GenParser Char ParserState Inline -pStr = liftM Str $ many1 $ satisfy $ \c -> not (isSpace c) && not (isSpecial c) +pStr = liftM Str $ many1 $ satisfy $ \c -> + not (isSpace c) && not (isSpecial c) && not (isBad c) isSpecial :: Char -> Bool isSpecial '"' = True @@ -410,6 +437,43 @@ isSpecial _ = False pSymbol :: GenParser Char ParserState Inline pSymbol = satisfy isSpecial >>= return . Str . (:[]) +isBad :: Char -> Bool +isBad c = c >= '\128' && c <= '\159' -- not allowed in HTML + +pBad :: GenParser Char ParserState Inline +pBad = do + c <- satisfy isBad + let c' = case c of + '\128' -> '\8364' + '\130' -> '\8218' + '\131' -> '\402' + '\132' -> '\8222' + '\133' -> '\8230' + '\134' -> '\8224' + '\135' -> '\8225' + '\136' -> '\710' + '\137' -> '\8240' + '\138' -> '\352' + '\139' -> '\8249' + '\140' -> '\338' + '\142' -> '\381' + '\145' -> '\8216' + '\146' -> '\8217' + '\147' -> '\8220' + '\148' -> '\8221' + '\149' -> '\8226' + '\150' -> '\8211' + '\151' -> '\8212' + '\152' -> '\732' + '\153' -> '\8482' + '\154' -> '\353' + '\155' -> '\8250' + '\156' -> '\339' + '\158' -> '\382' + '\159' -> '\376' + _ -> '?' + return $ Str [c'] + pSpace :: GenParser Char ParserState Inline pSpace = many1 (satisfy isSpace) >> return Space diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index dca745b56..02c7361d7 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -391,7 +391,7 @@ para = do -- bibliographic :: GenParser Char ParserState Block -bibliographic = choice [ maketitle, title, authors, date ] +bibliographic = choice [ maketitle, title, subtitle, authors, date ] maketitle :: GenParser Char st Block maketitle = try (string "\\maketitle") >> spaces >> return Null @@ -404,11 +404,22 @@ title = try $ do updateState (\state -> state { stateTitle = tit }) return Null +subtitle :: GenParser Char ParserState Block +subtitle = try $ do + string "\\subtitle{" + tit <- manyTill inline (char '}') + spaces + updateState (\state -> state { stateTitle = stateTitle state ++ + Str ":" : LineBreak : tit }) + return Null + authors :: GenParser Char ParserState Block authors = try $ do string "\\author{" - raw <- many1 (notFollowedBy (char '}') >> inline) - let authors' = map normalizeSpaces $ splitBy (== LineBreak) raw + let andsep = try $ string "\\and" >> notFollowedBy letter >> + spaces >> return '&' + raw <- sepBy (many $ notFollowedBy (char '}' <|> andsep) >> inline) andsep + let authors' = map normalizeSpaces raw char '}' spaces updateState (\s -> s { stateAuthors = authors' }) @@ -777,8 +788,12 @@ doubleQuoteEnd :: CharParser st String doubleQuoteEnd = try $ string "''" ellipses :: GenParser Char st Inline -ellipses = try $ string "\\ldots" >> optional (try $ string "{}") >> - return Ellipses +ellipses = try $ do + char '\\' + optional $ char 'l' + string "dots" + optional $ try $ string "{}" + return Ellipses enDash :: GenParser Char st Inline enDash = try (string "--") >> return EnDash @@ -801,7 +816,11 @@ nonbreakingSpace = char '~' >> return (Str "\160") -- hard line break linebreak :: GenParser Char st Inline -linebreak = try (string "\\\\") >> return LineBreak +linebreak = try $ do + string "\\\\" + optional $ bracketedText '[' ']' -- e.g. \\[10pt] + spaces + return LineBreak str :: GenParser Char st Inline str = many1 (noneOf specialChars) >>= return . Str diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 01cc5e2e8..53531dc1a 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -1,3 +1,4 @@ +{-# LANGUAGE RelaxedPolyRec #-} -- needed for inlinesBetween on GHC < 7 {- Copyright (C) 2006-2010 John MacFarlane <jgm@berkeley.edu> @@ -471,8 +472,10 @@ lhsCodeBlockBirdWith c = try $ do return $ intercalate "\n" lns' birdTrackLine :: Char -> GenParser Char st [Char] -birdTrackLine c = do +birdTrackLine c = try $ do char c + -- allow html tags on left margin: + when (c == '<') $ notFollowedBy letter manyTill anyChar newline @@ -905,7 +908,7 @@ inlineParsers = [ whitespace , str , endline , code - , (fourOrMore '*' <|> fourOrMore '_') + , fours , strong , emph , note @@ -1016,24 +1019,45 @@ mathInline = try $ do -- to avoid performance problems, treat 4 or more _ or * in a row as a literal -- rather than attempting to parse for emph/strong -fourOrMore :: Char -> GenParser Char st Inline -fourOrMore c = try $ count 4 (char c) >> many (char c) >>= \s -> - return (Str $ replicate 4 c ++ s) +fours :: GenParser Char st Inline +fours = try $ do + x <- char '*' <|> char '_' + count 2 $ satisfy (==x) + rest <- many1 (satisfy (==x)) + return $ Str (x:x:x:rest) + +-- | Parses a list of inlines between start and end delimiters. +inlinesBetween :: (Show b) + => GenParser Char ParserState a + -> GenParser Char ParserState b + -> GenParser Char ParserState [Inline] +inlinesBetween start end = + normalizeSpaces `liftM` try (start >> many1Till inner end) + where inner = innerSpace <|> (notFollowedBy' whitespace >> inline) + innerSpace = try $ whitespace >>~ notFollowedBy' end emph :: GenParser Char ParserState Inline -emph = ((enclosed (char '*') (notFollowedBy' strong >> char '*') inline) <|> - (enclosed (char '_') (notFollowedBy' strong >> char '_' >> - notFollowedBy alphaNum) inline)) >>= - return . Emph . normalizeSpaces +emph = Emph `liftM` + (inlinesBetween starStart starEnd <|> inlinesBetween ulStart ulEnd) + where starStart = char '*' >> lookAhead nonspaceChar + starEnd = notFollowedBy' strong >> char '*' + ulStart = char '_' >> lookAhead nonspaceChar + ulEnd = notFollowedBy' strong >> char '_' strong :: GenParser Char ParserState Inline -strong = ((enclosed (string "**") (try $ string "**") inline) <|> - (enclosed (string "__") (try $ string "__") inline)) >>= - return . Strong . normalizeSpaces +strong = Strong `liftM` + (inlinesBetween starStart starEnd <|> inlinesBetween ulStart ulEnd) + where starStart = string "**" >> lookAhead nonspaceChar + starEnd = try $ string "**" + ulStart = string "__" >> lookAhead nonspaceChar + ulEnd = try $ string "__" strikeout :: GenParser Char ParserState Inline -strikeout = failIfStrict >> enclosed (string "~~") (try $ string "~~") inline >>= - return . Strikeout . normalizeSpaces +strikeout = Strikeout `liftM` + (failIfStrict >> inlinesBetween strikeStart strikeEnd) + where strikeStart = string "~~" >> lookAhead nonspaceChar + >> notFollowedBy (char '~') + strikeEnd = try $ string "~~" superscript :: GenParser Char ParserState Inline superscript = failIfStrict >> enclosed (char '^') (char '^') @@ -1185,7 +1209,14 @@ note = try $ do let notes = stateNotes state case lookup ref notes of Nothing -> fail "note not found" - Just raw -> liftM Note $ parseFromString parseBlocks raw + Just raw -> do + -- We temporarily empty the note list while parsing the note, + -- so that we don't get infinite loops with notes inside notes... + -- Note references inside other notes do not work. + updateState $ \st -> st{ stateNotes = [] } + contents <- parseFromString parseBlocks raw + updateState $ \st -> st{ stateNotes = notes } + return $ Note contents inlineNote :: GenParser Char ParserState Inline inlineNote = try $ do @@ -1281,7 +1312,7 @@ citeKey = try $ do suppress_author <- option False (char '-' >> return True) char '@' first <- letter - rest <- many $ (noneOf ",;]@ \t\n") + rest <- many $ (noneOf ",;!?[]()@ \t\n") let key = first:rest st <- getState guard $ key `elem` stateCitations st diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs index 32fae5ee7..7fda0da19 100644 --- a/src/Text/Pandoc/Readers/RST.hs +++ b/src/Text/Pandoc/Readers/RST.hs @@ -34,7 +34,7 @@ import Text.Pandoc.Definition import Text.Pandoc.Shared import Text.Pandoc.Parsing import Text.ParserCombinators.Parsec -import Control.Monad ( when ) +import Control.Monad ( when, liftM ) import Data.List ( findIndex, intercalate, transpose, sort, deleteFirstsBy ) import qualified Data.Map as M import Text.Printf ( printf ) @@ -58,7 +58,7 @@ underlineChars = "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~" -- treat these as potentially non-text when parsing inline: specialChars :: [Char] -specialChars = "\\`|*_<>$:[-.\"'\8216\8217\8220\8221" +specialChars = "\\`|*_<>$:[]()-.\"'\8216\8217\8220\8221" -- -- parsing documents @@ -162,6 +162,7 @@ fieldListItem indent = try $ do (name, raw) <- rawFieldListItem indent let term = [Str name] contents <- parseFromString (many block) raw + optional blanklines case (name, contents) of ("Author", x) -> do updateState $ \st -> @@ -187,7 +188,6 @@ fieldList :: GenParser Char ParserState Block fieldList = try $ do indent <- lookAhead $ many spaceChar items <- many1 $ fieldListItem indent - blanklines if null items then return Null else return $ DefinitionList $ catMaybes items @@ -198,11 +198,14 @@ fieldList = try $ do lineBlockLine :: GenParser Char ParserState [Inline] lineBlockLine = try $ do - string "| " + char '|' + char ' ' <|> lookAhead (char '\n') white <- many spaceChar line <- many $ (notFollowedBy newline >> inline) <|> (try $ endline >>~ char ' ') optional endline - return $ normalizeSpaces $ (if null white then [] else [Str white]) ++ line + return $ if null white + then normalizeSpaces line + else Str white : normalizeSpaces line lineBlock :: GenParser Char ParserState Block lineBlock = try $ do @@ -330,15 +333,14 @@ indentedLine indents = try $ do string indents manyTill anyChar newline --- two or more indented lines, possibly separated by blank lines. +-- one or more indented lines, possibly separated by blank lines. -- any amount of indentation will work. indentedBlock :: GenParser Char st [Char] -indentedBlock = try $ do +indentedBlock = try $ do indents <- lookAhead $ many1 spaceChar - lns <- many $ choice $ [ indentedLine indents, - try $ do b <- blanklines - l <- indentedLine indents - return (b ++ l) ] + lns <- many1 $ try $ do b <- option "" blanklines + l <- indentedLine indents + return (b ++ l) optional blanklines return $ unlines lns @@ -538,9 +540,15 @@ noteBlock = try $ do return $ replicate (sourceLine endPos - sourceLine startPos) '\n' noteMarker :: GenParser Char ParserState [Char] -noteMarker = char '[' >> (many1 digit <|> count 1 (oneOf "#*")) >>~ char ']' +noteMarker = do + char '[' + res <- many1 digit + <|> (try $ char '#' >> liftM ('#':) simpleReferenceName') + <|> count 1 (oneOf "#*") + char ']' + return res --- +-- -- reference key -- @@ -555,13 +563,20 @@ unquotedReferenceName = try $ do label' <- many1Till inline (lookAhead $ char ':') return label' -isolated :: Char -> GenParser Char st Char -isolated ch = try $ char ch >>~ notFollowedBy (char ch) +-- Simple reference names are single words consisting of alphanumerics +-- plus isolated (no two adjacent) internal hyphens, underscores, +-- periods, colons and plus signs; no whitespace or other characters +-- are allowed. +simpleReferenceName' :: GenParser Char st String +simpleReferenceName' = do + x <- alphaNum + xs <- many $ alphaNum + <|> (try $ oneOf "-_:+." >> lookAhead alphaNum) + return (x:xs) simpleReferenceName :: GenParser Char st [Inline] simpleReferenceName = do - raw <- many1 (alphaNum <|> isolated '-' <|> isolated '.' <|> - (try $ char '_' >>~ lookAhead alphaNum)) + raw <- simpleReferenceName' return [Str raw] referenceName :: GenParser Char ParserState [Inline] @@ -864,10 +879,16 @@ note = try $ do case lookup ref notes of Nothing -> fail "note not found" Just raw -> do + -- We temporarily empty the note list while parsing the note, + -- so that we don't get infinite loops with notes inside notes... + -- Note references inside other notes are allowed in reST, but + -- not yet in this implementation. + updateState $ \st -> st{ stateNotes = [] } contents <- parseFromString parseBlocks raw - when (ref == "*" || ref == "#") $ do -- auto-numbered - -- delete the note so the next auto-numbered note - -- doesn't get the same contents: - let newnotes = deleteFirstsBy (==) notes [(ref,raw)] - updateState $ \st -> st{ stateNotes = newnotes } + let newnotes = if (ref == "*" || ref == "#") -- auto-numbered + -- delete the note so the next auto-numbered note + -- doesn't get the same contents: + then deleteFirstsBy (==) notes [(ref,raw)] + else notes + updateState $ \st -> st{ stateNotes = newnotes } return $ Note contents diff --git a/src/Text/Pandoc/Readers/Textile.hs b/src/Text/Pandoc/Readers/Textile.hs index 19357b343..12d299aa4 100644 --- a/src/Text/Pandoc/Readers/Textile.hs +++ b/src/Text/Pandoc/Readers/Textile.hs @@ -465,7 +465,7 @@ link :: GenParser Char ParserState Inline link = try $ do name <- surrounded (char '"') inline char ':' - url <- manyTill (anyChar) (lookAhead $ (space <|> try (oneOf ".;," >> (space <|> newline)))) + url <- manyTill (anyChar) (lookAhead $ (space <|> try (oneOf ".;,:" >> (space <|> newline)))) return $ Link name (url, "") -- | Detect plain links to http or email. diff --git a/src/Text/Pandoc/S5.hs b/src/Text/Pandoc/S5.hs index 1567a3ede..b17b052c5 100644 --- a/src/Text/Pandoc/S5.hs +++ b/src/Text/Pandoc/S5.hs @@ -31,6 +31,8 @@ Definitions for creation of S5 powerpoint-like HTML. module Text.Pandoc.S5 ( s5HeaderIncludes) where import Text.Pandoc.Shared ( readDataFile ) import System.FilePath ( (</>) ) +import Data.ByteString.UTF8 ( toString, fromString ) +import Data.ByteString.Base64 ( encode ) s5HeaderIncludes :: Maybe FilePath -> IO String s5HeaderIncludes datadir = do @@ -40,10 +42,14 @@ s5HeaderIncludes datadir = do s5Javascript :: Maybe FilePath -> IO String s5Javascript datadir = do - jsCom <- readDataFile datadir $ "s5" </> "default" </> "slides.js.comment" - jsPacked <- readDataFile datadir $ "s5" </> "default" </> "slides.js.packed" - return $ "<script type=\"text/javascript\">\n" ++ jsCom ++ jsPacked ++ - "</script>\n" + js <- readDataFile datadir $ "s5" </> "default" </> "slides.min.js" + return $ "<script type=\"text/javascript\">\n" ++ inCDATA js ++ "</script>\n" + +inCDATA :: String -> String +inCDATA s = "/*<![CDATA[*/\n" ++ s ++ "\n/*]]>*/\n" + +base64 :: String -> String +base64 = toString . encode . fromString s5CSS :: Maybe FilePath -> IO String s5CSS datadir = do @@ -53,5 +59,11 @@ s5CSS datadir = do s5OperaCSS <- readDataFile datadir $ "s5" </> "default" </> "opera.css" s5OutlineCSS <- readDataFile datadir $ "s5" </> "default" </> "outline.css" s5PrintCSS <- readDataFile datadir $ "s5" </> "default" </> "print.css" - return $ "<style type=\"text/css\" media=\"projection\" id=\"slideProj\">\n" ++ s5CoreCSS ++ "\n" ++ s5FramingCSS ++ "\n" ++ s5PrettyCSS ++ "\n</style>\n<style type=\"text/css\" media=\"projection\" id=\"operaFix\">\n" ++ s5OperaCSS ++ "\n</style>\n<style type=\"text/css\" media=\"screen\" id=\"outlineStyle\">\n" ++ s5OutlineCSS ++ "\n</style>\n<style type=\"text/css\" media=\"print\" id=\"slidePrint\">\n" ++ s5PrintCSS ++ "\n</style>\n" - + return $ "<link rel=\"stylesheet\" type=\"text/css\" media=\"projection\" id=\"slideProj\" href=\"data:text/css;charset=utf-8;base64," ++ + base64 (s5CoreCSS ++ "\n" ++ s5FramingCSS ++ "\n" ++ s5PrettyCSS) ++ "\" />\n" ++ + "<link rel=\"stylesheet\" type=\"text/css\" media=\"screen\" id=\"outlineStyle\" href=\"data:text/css;charset=utf-8;base64," ++ + base64 s5OutlineCSS ++ "\" />\n" ++ + "<link rel=\"stylesheet\" type=\"text/css\" media=\"print\" id=\"slidePrint\" href=\"data:text/css;charset=utf-8;base64," ++ + base64 s5PrintCSS ++ "\" />\n" ++ + "<link rel=\"stylesheet\" type=\"text/css\" media=\"projection\" id=\"operaFix\" href=\"data:text/css;charset=utf-8;base64," ++ + base64 s5OperaCSS ++ "\" />\n" diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs index c4bc66830..9717e1bc8 100644 --- a/src/Text/Pandoc/Shared.hs +++ b/src/Text/Pandoc/Shared.hs @@ -67,7 +67,7 @@ module Text.Pandoc.Shared ( -- * File handling inDirectory, findDataFile, - readDataFile + readDataFile, ) where import Text.Pandoc.Definition @@ -493,6 +493,7 @@ data WriterOptions = WriterOptions , writerAscii :: Bool -- ^ Avoid non-ascii characters } deriving Show +{-# DEPRECATED writerXeTeX "writerXeTeX no longer does anything" #-} -- | Default writer options. defaultWriterOptions :: WriterOptions defaultWriterOptions = diff --git a/src/Text/Pandoc/Templates.hs b/src/Text/Pandoc/Templates.hs index b03e8c73f..19c9a808a 100644 --- a/src/Text/Pandoc/Templates.hs +++ b/src/Text/Pandoc/Templates.hs @@ -1,4 +1,4 @@ -{-# LANGUAGE TypeSynonymInstances #-} +{-# LANGUAGE TypeSynonymInstances, FlexibleInstances #-} {- Copyright (C) 2009-2010 John MacFarlane <jgm@berkeley.edu> @@ -83,9 +83,10 @@ getDefaultTemplate :: (Maybe FilePath) -- ^ User data directory to search first -> IO (Either E.IOException String) getDefaultTemplate _ "native" = return $ Right "" getDefaultTemplate user "odt" = getDefaultTemplate user "opendocument" +getDefaultTemplate user "epub" = getDefaultTemplate user "html" getDefaultTemplate user writer = do let format = takeWhile (/='+') writer -- strip off "+lhs" if present - let fname = "templates" </> format <.> "template" + let fname = "templates" </> "default" <.> format E.try $ readDataFile user fname data TemplateState = TemplateState Int [(String,String)] diff --git a/src/Text/Pandoc/Writers/ConTeXt.hs b/src/Text/Pandoc/Writers/ConTeXt.hs index 0f6e00a3b..763f77d7c 100644 --- a/src/Text/Pandoc/Writers/ConTeXt.hs +++ b/src/Text/Pandoc/Writers/ConTeXt.hs @@ -36,6 +36,7 @@ import Data.List ( intercalate ) import Control.Monad.State import Text.Pandoc.Pretty import Text.Pandoc.Templates ( renderTemplate ) +import Network.URI ( isAbsoluteURI, unEscapeString ) data WriterState = WriterState { stNextRef :: Int -- number of next URL reference @@ -122,7 +123,7 @@ blockToConTeXt (BlockQuote lst) = do contents <- blockListToConTeXt lst return $ "\\startblockquote" $$ nest 0 contents $$ "\\stopblockquote" <> blankline blockToConTeXt (CodeBlock _ str) = - return $ "\\starttyping" <> cr <> flush (text str) <> cr <> "\\stoptyping" $$ blankline + return $ flush ("\\starttyping" <> cr <> text str <> cr <> "\\stoptyping") $$ blankline -- blankline because \stoptyping can't have anything after it, inc. '}' blockToConTeXt (RawBlock "context" str) = return $ text str <> blankline blockToConTeXt (RawBlock _ _ ) = return empty @@ -282,7 +283,10 @@ inlineToConTeXt (Link txt (src, _)) = do brackets empty <> brackets label <> "\\from" <> brackets (text ref) inlineToConTeXt (Image _ (src, _)) = do - return $ braces $ "\\externalfigure" <> brackets (text src) + let src' = if isAbsoluteURI src + then src + else unEscapeString src + return $ braces $ "\\externalfigure" <> brackets (text src') inlineToConTeXt (Note contents) = do contents' <- blockListToConTeXt contents return $ text "\\footnote{" <> diff --git a/src/Text/Pandoc/Writers/Docbook.hs b/src/Text/Pandoc/Writers/Docbook.hs index 9d09d46e3..29c042cf9 100644 --- a/src/Text/Pandoc/Writers/Docbook.hs +++ b/src/Text/Pandoc/Writers/Docbook.hs @@ -147,9 +147,9 @@ blockToDocbook opts (Para lst) = inTagsIndented "para" $ inlinesToDocbook opts lst blockToDocbook opts (BlockQuote blocks) = inTagsIndented "blockquote" $ blocksToDocbook opts blocks -blockToDocbook _ (CodeBlock (_,classes,_) str) = - text ("<screen" ++ lang ++ ">") <> cr <> - flush (text (escapeStringForXML str) <> cr <> text "</screen>") +blockToDocbook _ (CodeBlock (_,classes,_) str) = + text ("<programlisting" ++ lang ++ ">") <> cr <> + flush (text (escapeStringForXML str) <> cr <> text "</programlisting>") where lang = if null langs then "" else " language=\"" ++ escapeStringForXML (head langs) ++ @@ -185,24 +185,24 @@ blockToDocbook _ (RawBlock "html" str) = text str -- raw XML block blockToDocbook _ (RawBlock _ _) = empty blockToDocbook _ HorizontalRule = empty -- not semantic blockToDocbook opts (Table caption aligns widths headers rows) = - let alignStrings = map alignmentToString aligns - captionDoc = if null caption + let captionDoc = if null caption then empty - else inTagsIndented "caption" + else inTagsIndented "title" (inlinesToDocbook opts caption) tableType = if isEmpty captionDoc then "informaltable" else "table" - percent w = show (truncate (100*w) :: Integer) ++ "%" - coltags = if all (== 0.0) widths - then empty - else vcat $ map (\w -> - selfClosingTag "col" [("width", percent w)]) widths + percent w = show (truncate (100*w) :: Integer) ++ "*" + coltags = vcat $ zipWith (\w al -> selfClosingTag "colspec" + ([("colwidth", percent w) | w > 0] ++ + [("align", alignmentToString al)])) widths aligns head' = if all null headers then empty else inTagsIndented "thead" $ - tableRowToDocbook opts alignStrings "th" headers + tableRowToDocbook opts headers body' = inTagsIndented "tbody" $ - vcat $ map (tableRowToDocbook opts alignStrings "td") rows - in inTagsIndented tableType $ captionDoc $$ coltags $$ head' $$ body' + vcat $ map (tableRowToDocbook opts) rows + in inTagsIndented tableType $ captionDoc $$ + (inTags True "tgroup" [("cols", show (length headers))] $ + coltags $$ head' $$ body') alignmentToString :: Alignment -> [Char] alignmentToString alignment = case alignment of @@ -212,22 +212,16 @@ alignmentToString alignment = case alignment of AlignDefault -> "left" tableRowToDocbook :: WriterOptions - -> [String] - -> String -> [[Block]] -> Doc -tableRowToDocbook opts aligns celltype cols = - inTagsIndented "tr" $ vcat $ - zipWith (tableItemToDocbook opts celltype) aligns cols +tableRowToDocbook opts cols = + inTagsIndented "row" $ vcat $ map (tableItemToDocbook opts) cols tableItemToDocbook :: WriterOptions - -> [Char] - -> [Char] -> [Block] -> Doc -tableItemToDocbook opts tag align item = - let attrib = [("align", align)] - in inTags True tag attrib $ vcat $ map (blockToDocbook opts) item +tableItemToDocbook opts item = + inTags True "entry" [] $ vcat $ map (blockToDocbook opts) item -- | Convert a list of inline elements to Docbook. inlinesToDocbook :: WriterOptions -> [Inline] -> Doc diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs index c0cc815d4..9fc393fed 100644 --- a/src/Text/Pandoc/Writers/EPUB.hs +++ b/src/Text/Pandoc/Writers/EPUB.hs @@ -46,6 +46,8 @@ import Text.Pandoc.UUID import Text.Pandoc.Writers.HTML import Text.Pandoc.Writers.Markdown ( writePlain ) import Data.Char ( toLower ) +import System.Directory ( copyFile ) +import Network.URI ( unEscapeString ) -- | Produce an EPUB file from a Pandoc document. writeEPUB :: Maybe String -- ^ EPUB stylesheet specified at command line @@ -59,9 +61,26 @@ writeEPUB mbStylesheet opts doc@(Pandoc meta _) = do , writerStandalone = True , writerWrapText = False } let sourceDir = writerSourceDirectory opts' + let vars = writerVariables opts' + let mbCoverImage = lookup "epub-cover-image" vars + + -- cover page + (cpgEntry, cpicEntry) <- + case mbCoverImage of + Nothing -> return ([],[]) + Just img -> do + let coverImage = "cover-image" ++ takeExtension img + copyFile img coverImage + let cpContent = fromString $ writeHtmlString + opts'{writerTemplate = pageTemplate + ,writerVariables = + ("coverimage",coverImage):vars} + (Pandoc meta []) + imgContent <- B.readFile img + return ( [mkEntry "cover.xhtml" cpContent] + , [mkEntry coverImage imgContent] ) -- title page - let vars = writerVariables opts' let tpContent = fromString $ writeHtmlString opts'{writerTemplate = pageTemplate ,writerVariables = ("titlepage","yes"):vars} @@ -84,8 +103,7 @@ writeEPUB mbStylesheet opts doc@(Pandoc meta _) = do let chunks = splitByIndices h1Indices blocks let titleize (Header 1 xs : ys) = Pandoc meta{docTitle = xs} ys titleize xs = Pandoc meta xs - let chapToHtml = writeHtmlString opts'{ writerTemplate = pageTemplate - , writerHTMLMathMethod = PlainMath } + let chapToHtml = writeHtmlString opts'{ writerTemplate = pageTemplate } let chapters = map titleize chunks let chapterToEntry :: Int -> Pandoc -> Entry chapterToEntry num chap = mkEntry ("ch" ++ show num ++ ".xhtml") $ @@ -117,17 +135,21 @@ writeEPUB mbStylesheet opts doc@(Pandoc meta _) = do ,("xmlns","http://www.idpf.org/2007/opf") ,("unique-identifier","BookId")] $ [ metadataElement (writerEPUBMetadata opts') - uuid lang plainTitle plainAuthors + uuid lang plainTitle plainAuthors mbCoverImage , unode "manifest" $ [ unode "item" ! [("id","ncx"), ("href","toc.ncx") ,("media-type","application/x-dtbncx+xml")] $ () , unode "item" ! [("id","style"), ("href","stylesheet.css") ,("media-type","text/css")] $ () ] ++ - map chapterNode (tpEntry : chapterEntries) ++ - map pictureNode picEntries + map chapterNode (cpgEntry ++ (tpEntry : chapterEntries)) ++ + map pictureNode (cpicEntry ++ picEntries) , unode "spine" ! [("toc","ncx")] $ - map chapterRefNode (tpEntry : chapterEntries) + case mbCoverImage of + Nothing -> [] + Just _ -> [ unode "itemref" ! + [("idref", "cover"),("linear","no")] $ () ] + ++ map chapterRefNode (tpEntry : chapterEntries) ] let contentsEntry = mkEntry "content.opf" contentsData @@ -142,7 +164,7 @@ writeEPUB mbStylesheet opts doc@(Pandoc meta _) = do let tocData = fromString $ ppTopElement $ unode "ncx" ! [("version","2005-1") ,("xmlns","http://www.daisy.org/z3986/2005/ncx/")] $ - [ unode "head" + [ unode "head" $ [ unode "meta" ! [("name","dtb:uid") ,("content", show uuid)] $ () , unode "meta" ! [("name","dtb:depth") @@ -151,7 +173,10 @@ writeEPUB mbStylesheet opts doc@(Pandoc meta _) = do ,("content", "0")] $ () , unode "meta" ! [("name","dtb:maxPageNumber") ,("content", "0")] $ () - ] + ] ++ case mbCoverImage of + Nothing -> [] + Just _ -> [unode "meta" ! [("name","cover"), + ("content","cover-image")] $ ()] , unode "docTitle" $ unode "text" $ plainTitle , unode "navMap" $ zipWith3 navPointNode (tpEntry : chapterEntries) [1..(length chapterEntries + 1)] @@ -181,11 +206,12 @@ writeEPUB mbStylesheet opts doc@(Pandoc meta _) = do -- construct archive let archive = foldr addEntryToArchive emptyArchive (mimetypeEntry : containerEntry : stylesheetEntry : tpEntry : - contentsEntry : tocEntry : (picEntries ++ chapterEntries) ) + contentsEntry : tocEntry : + (picEntries ++ cpicEntry ++ cpgEntry ++ chapterEntries) ) return $ fromArchive archive -metadataElement :: String -> UUID -> String -> String -> [String] -> Element -metadataElement metadataXML uuid lang title authors = +metadataElement :: String -> UUID -> String -> String -> [String] -> Maybe a -> Element +metadataElement metadataXML uuid lang title authors mbCoverImage = let userNodes = parseXML metadataXML elt = unode "metadata" ! [("xmlns:dc","http://purl.org/dc/elements/1.1/") ,("xmlns:opf","http://www.idpf.org/2007/opf")] $ @@ -200,7 +226,9 @@ metadataElement metadataXML uuid lang title authors = [ unode "dc:language" lang | not (elt `contains` "language") ] ++ [ unode "dc:identifier" ! [("id","BookId")] $ show uuid | not (elt `contains` "identifier") ] ++ - [ unode "dc:creator" ! [("opf:role","aut")] $ a | a <- authors ] + [ unode "dc:creator" ! [("opf:role","aut")] $ a | a <- authors ] ++ + [ unode "meta" ! [("name","cover"), ("content","cover-image")] $ () | + not (isNothing mbCoverImage) ] in elt{ elContent = elContent elt ++ map Elem newNodes } transformInlines :: HTMLMathMethod @@ -211,9 +239,10 @@ transformInlines :: HTMLMathMethod transformInlines _ _ _ (Image lab (src,_) : xs) | isNothing (imageTypeOf src) = return $ Emph lab : xs transformInlines _ sourceDir picsRef (Image lab (src,tit) : xs) = do + let src' = unEscapeString src pics <- readIORef picsRef - let oldsrc = sourceDir </> src - let ext = takeExtension src + let oldsrc = sourceDir </> src' + let ext = takeExtension src' newsrc <- case lookup oldsrc pics of Just n -> return n Nothing -> do @@ -266,9 +295,17 @@ pageTemplate = unlines , "<html xmlns=\"http://www.w3.org/1999/xhtml\">" , "<head>" , "<title>$title$</title>" + , "$if(coverimage)$" + , "<style type=\"text/css\">img{ max-width: 100%; }</style>" + , "$endif$" , "<link href=\"stylesheet.css\" type=\"text/css\" rel=\"stylesheet\" />" , "</head>" , "<body>" + , "$if(coverimage)$" + , "<div id=\"cover-image\">" + , "<img src=\"$coverimage$\" alt=\"$title$\" />" + , "</div>" + , "$else$" , "$if(titlepage)$" , "<h1 class=\"title\">$title$</h1>" , "$for(author)$" @@ -279,6 +316,7 @@ pageTemplate = unlines , "$if(toc)$" , "$toc$" , "$endif$" + , "$endif$" , "$body$" , "$endif$" , "</body>" diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs index b3320fa06..573adbf4a 100644 --- a/src/Text/Pandoc/Writers/HTML.hs +++ b/src/Text/Pandoc/Writers/HTML.hs @@ -18,9 +18,9 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -} {- | - Module : Text.Pandoc.Writers.HTML + Module : Text.Pandoc.Writers.HTML Copyright : Copyright (C) 2006-2010 John MacFarlane - License : GNU GPL, version 2 or above + License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> Stability : alpha @@ -46,6 +46,7 @@ import Text.XHtml.Transitional hiding ( stringToHtml, unordList, ordList ) import qualified Text.XHtml.Transitional as XHtml import Text.TeXMath import Text.XML.Light.Output +import System.FilePath (takeExtension) data WriterState = WriterState { stNotes :: [Html] -- ^ List of notes @@ -101,42 +102,37 @@ pandocToHtml opts (Pandoc (Meta title' authors' date') blocks) = do else return noHtml auths <- if standalone then mapM (inlineListToHtml opts) authors' - else return [] + else return [] date <- if standalone then inlineListToHtml opts date' - else return noHtml - let sects = hierarchicalize blocks - toc <- if writerTableOfContents opts + else return noHtml + let sects = hierarchicalize $ + if writerSlideVariant opts == NoSlides + then blocks + else case blocks of + (Header 1 _ : _) -> blocks + _ -> + let isL1 (Header 1 _) = True + isL1 _ = False + (preBlocks, rest) = break isL1 blocks + in (RawBlock "html" "<div class=\"slide\">" : + preBlocks) ++ (RawBlock "html" "</div>" : + rest) + toc <- if writerTableOfContents opts then tableOfContents opts sects else return Nothing - let startSlide = RawBlock "html" "<div class=\"slide\">\n" - endSlide = RawBlock "html" "</div>\n" - let cutUp (HorizontalRule : Header 1 ys : xs) = cutUp (Header 1 ys : xs) - cutUp (HorizontalRule : xs) = [endSlide, startSlide] ++ cutUp xs - cutUp (Header 1 ys : xs) = [endSlide, startSlide] ++ - (Header 1 ys : cutUp xs) - cutUp (x:xs) = x : cutUp xs - cutUp [] = [] - let slides = case blocks of - (HorizontalRule : xs) -> [startSlide] ++ cutUp xs ++ [endSlide] - (Header 1 ys : xs) -> [startSlide, Header 1 ys] ++ - cutUp xs ++ [endSlide] - _ -> [startSlide] ++ cutUp blocks ++ - [endSlide] blocks' <- liftM (toHtmlFromList . intersperse (nl opts)) $ - if writerSlideVariant opts `elem` [SlidySlides, S5Slides] - then mapM (blockToHtml opts) slides - else mapM (elementToHtml opts) sects + mapM (elementToHtml opts) sects st <- get let notes = reverse (stNotes st) let thebody = blocks' +++ footnoteSection opts notes let math = if stMath st then case writerHTMLMathMethod opts of LaTeXMathML (Just url) -> - script ! + script ! [src url, thetype "text/javascript"] $ noHtml MathML (Just url) -> - script ! + script ! [src url, thetype "text/javascript"] $ noHtml MathJax url -> script ! [src url, thetype "text/javascript"] $ noHtml @@ -144,9 +140,10 @@ pandocToHtml opts (Pandoc (Meta title' authors' date') blocks) = do script ! [src url, thetype "text/javascript"] $ noHtml _ -> case lookup "mathml-script" (writerVariables opts) of - Just s -> + Just s -> script ! [thetype "text/javascript"] << - primHtml s + primHtml ("/*<![CDATA[*/\n" ++ s ++ + "/*]]>*/\n") Nothing -> noHtml else noHtml let newvars = [("highlighting-css", defaultHighlightingCss) | @@ -173,7 +170,10 @@ inTemplate opts tit auths date toc body' newvars = [ ("body", dropWhile (=='\n') $ showHtmlFragment body') , ("pagetitle", topTitle') , ("title", dropWhile (=='\n') $ showHtmlFragment tit) - , ("date", date') ] ++ + , ("date", date') + , ("idprefix", writerIdentifierPrefix opts) + , ("slidy-url", "http://www.w3.org/Talks/Tools/Slidy2") + , ("s5-url", "ui/default") ] ++ [ ("html5","true") | writerHtml5 opts ] ++ (case toc of Just t -> [ ("toc", showHtmlFragment t)] @@ -202,14 +202,7 @@ tableOfContents opts sects = do let tocList = catMaybes contents return $ if null tocList then Nothing - else Just $ - if writerHtml5 opts - then (tag "nav" ! [prefixedId opts' "TOC"] $ - nl opts +++ unordList opts tocList +++ nl opts) - +++ nl opts - else (thediv ! [prefixedId opts' "TOC"] $ - nl opts +++ unordList opts tocList +++ nl opts) - +++ nl opts + else Just $ unordList opts tocList -- | Convert section number to string showSecNum :: [Int] -> String @@ -233,33 +226,35 @@ elementToListItem opts (Sec _ num id' headerText subsecs) = do -- | Convert an Element to Html. elementToHtml :: WriterOptions -> Element -> State WriterState Html -elementToHtml opts (Blk block) = blockToHtml opts block +elementToHtml opts (Blk HorizontalRule) | writerSlideVariant opts /= NoSlides = + return $ primHtml "</div>" +++ nl opts +++ primHtml "<div class=\"slide\">" +elementToHtml opts (Blk block) = blockToHtml opts block elementToHtml opts (Sec level num id' title' elements) = do - innerContents <- mapM (elementToHtml opts) elements modify $ \st -> st{stSecNum = num} -- update section number header' <- blockToHtml opts (Header level title') - let slides = writerSlideVariant opts `elem` [SlidySlides, S5Slides] + innerContents <- mapM (elementToHtml opts) elements let header'' = header' ! [prefixedId opts id' | not (writerStrictMarkdown opts || - writerSectionDivs opts || slides)] + writerSectionDivs opts || + writerSlideVariant opts == S5Slides)] let stuff = header'' : innerContents - return $ if slides -- S5 gets confused by the extra divs around sections - then toHtmlFromList $ intersperse (nl opts) stuff - else if writerSectionDivs opts - then if writerHtml5 opts - then tag "section" ! [prefixedId opts id'] - << (nl opts : (intersperse (nl opts) stuff - ++ [nl opts])) - else thediv ! [prefixedId opts id'] << - (nl opts : (intersperse (nl opts) stuff - ++ [nl opts])) - else toHtmlFromList $ intersperse (nl opts) stuff + let slide = writerSlideVariant opts /= NoSlides && level == 1 + let stuff' = if slide + then [thediv ! [theclass "slide"] << + (nl opts : intersperse (nl opts) stuff ++ [nl opts])] + else intersperse (nl opts) stuff + let inNl x = nl opts : x ++ [nl opts] + return $ if writerSectionDivs opts + then if writerHtml5 opts + then tag "section" ! [prefixedId opts id'] << inNl stuff' + else thediv ! [prefixedId opts id'] << inNl stuff' + else toHtmlFromList stuff' -- | Convert list of Note blocks to a footnote <div>. -- Assumes notes are sorted. footnoteSection :: WriterOptions -> [Html] -> Html footnoteSection opts notes = - if null notes + if null notes then noHtml else nl opts +++ (thediv ! [theclass "footnotes"] $ nl opts +++ hr +++ nl opts +++ @@ -272,7 +267,7 @@ parseMailto ('m':'a':'i':'l':'t':'o':':':addr) = let (name', rest) = span (/='@') addr domain = drop 1 rest in Just (name', domain) -parseMailto _ = Nothing +parseMailto _ = Nothing -- | Obfuscate a "mailto:" link. obfuscateLink :: WriterOptions -> String -> String -> Html @@ -280,15 +275,15 @@ obfuscateLink opts txt s | writerEmailObfuscation opts == NoObfuscation = anchor ! [href s] << txt obfuscateLink opts txt s = let meth = writerEmailObfuscation opts - s' = map toLower s + s' = map toLower s in case parseMailto s' of (Just (name', domain)) -> let domain' = substitute "." " dot " domain at' = obfuscateChar '@' - (linkText, altText) = + (linkText, altText) = if txt == drop 7 s' -- autolink then ("'<code>'+e+'</code>'", name' ++ " at " ++ domain') - else ("'" ++ txt ++ "'", txt ++ " (" ++ name' ++ " at " ++ + else ("'" ++ txt ++ "'", txt ++ " (" ++ name' ++ " at " ++ domain' ++ ")") in case meth of ReferenceObfuscation -> @@ -297,18 +292,18 @@ obfuscateLink opts txt s = ++ "\">" ++ (obfuscateString txt) ++ "</a>" JavascriptObfuscation -> (script ! [thetype "text/javascript"] $ - primHtml ("\n<!--\nh='" ++ - obfuscateString domain ++ "';a='" ++ at' ++ "';n='" ++ + primHtml ("\n<!--\nh='" ++ + obfuscateString domain ++ "';a='" ++ at' ++ "';n='" ++ obfuscateString name' ++ "';e=n+a+h;\n" ++ - "document.write('<a h'+'ref'+'=\"ma'+'ilto'+':'+e+'\">'+" ++ - linkText ++ "+'<\\/'+'a'+'>');\n// -->\n")) +++ + "document.write('<a h'+'ref'+'=\"ma'+'ilto'+':'+e+'\">'+" ++ + linkText ++ "+'<\\/'+'a'+'>');\n// -->\n")) +++ noscript (primHtml $ obfuscateString altText) _ -> error $ "Unknown obfuscation method: " ++ show meth _ -> anchor ! [href s] $ stringToHtml opts txt -- malformed email -- | Obfuscate character as entity. obfuscateChar :: Char -> String -obfuscateChar char = +obfuscateChar char = let num = ord char numstr = if even num then show num else "x" ++ showHex num "" in "&#" ++ numstr ++ ";" @@ -323,6 +318,17 @@ attrsToHtml opts (id',classes',keyvals) = [prefixedId opts id' | not (null id')] ++ map (\(x,y) -> strAttr x y) keyvals +imageExts :: [String] +imageExts = [ "art", "bmp", "cdr", "cdt", "cpt", "cr2", "crw", "djvu", "erf", + "gif", "ico", "ief", "jng", "jpg", "jpeg", "nef", "orf", "pat", "pbm", + "pcx", "pgm", "png", "pnm", "ppm", "psd", "ras", "rgb", "svg", "tiff", + "wbmp", "xbm", "xpm", "xwd" ] + +treatAsImage :: FilePath -> Bool +treatAsImage fp = + let ext = map toLower $ drop 1 $ takeExtension fp + in null ext || ext `elem` imageExts + -- | Convert Pandoc block element to HTML. blockToHtml :: WriterOptions -> Block -> State WriterState Html blockToHtml _ Null = return noHtml @@ -361,14 +367,14 @@ blockToHtml opts (CodeBlock (id',classes,keyvals) rawCode) = do return h blockToHtml opts (BlockQuote blocks) = -- in S5, treat list in blockquote specially - -- if default is incremental, make it nonincremental; + -- if default is incremental, make it nonincremental; -- otherwise incremental if writerSlideVariant opts /= NoSlides then let inc = not (writerIncremental opts) in - case blocks of + case blocks of [BulletList lst] -> blockToHtml (opts {writerIncremental = inc}) (BulletList lst) - [OrderedList attribs lst] -> + [OrderedList attribs lst] -> blockToHtml (opts {writerIncremental = inc}) (OrderedList attribs lst) _ -> do contents <- blockListToHtml opts blocks @@ -377,7 +383,7 @@ blockToHtml opts (BlockQuote blocks) = else do contents <- blockListToHtml opts blocks return $ blockquote (nl opts +++ contents +++ nl opts) -blockToHtml opts (Header level lst) = do +blockToHtml opts (Header level lst) = do contents <- inlineListToHtml opts lst secnum <- liftM stSecNum get let contents' = if writerNumberSections opts @@ -470,8 +476,8 @@ tableRowToHtml opts aligns rownum cols' = do 0 -> "header" x | x `rem` 2 == 1 -> "odd" _ -> "even" - cols'' <- sequence $ zipWith - (\alignment item -> tableItemToHtml opts mkcell alignment item) + cols'' <- sequence $ zipWith + (\alignment item -> tableItemToHtml opts mkcell alignment item) aligns cols' return $ (tr ! [theclass rowclass] $ nl opts +++ toHtmlFromList cols'') +++ nl opts @@ -508,13 +514,13 @@ blockListToHtml opts lst = -- | Convert list of Pandoc inline elements to HTML. inlineListToHtml :: WriterOptions -> [Inline] -> State WriterState Html -inlineListToHtml opts lst = +inlineListToHtml opts lst = mapM (inlineToHtml opts) lst >>= return . toHtmlFromList -- | Convert Pandoc inline element to HTML. inlineToHtml :: WriterOptions -> Inline -> State WriterState Html inlineToHtml opts inline = - case inline of + case inline of (Str str) -> return $ stringToHtml opts str (Space) -> return $ stringToHtml opts " " (LineBreak) -> return br @@ -543,9 +549,9 @@ inlineToHtml opts inline = stringToHtml opts "”") in do contents <- inlineListToHtml opts lst return $ leftQuote +++ contents +++ rightQuote - (Math t str) -> modify (\st -> st {stMath = True}) >> + (Math t str) -> modify (\st -> st {stMath = True}) >> (case writerHTMLMathMethod opts of - LaTeXMathML _ -> + LaTeXMathML _ -> -- putting LaTeXMathML in container with class "LaTeX" prevents -- non-math elements on the page from being treated as math by -- the javascript @@ -594,37 +600,44 @@ inlineToHtml opts inline = LaTeXMathML _ -> do modify (\st -> st {stMath = True}) return $ primHtml str _ -> return noHtml - (RawInline "html" str) -> return $ primHtml str + (RawInline "html" str) -> return $ primHtml str (RawInline _ _) -> return noHtml (Link [Code _ str] (s,_)) | "mailto:" `isPrefixOf` s -> return $ obfuscateLink opts str s (Link txt (s,_)) | "mailto:" `isPrefixOf` s -> do - linkText <- inlineListToHtml opts txt + linkText <- inlineListToHtml opts txt return $ obfuscateLink opts (show linkText) s (Link txt (s,tit)) -> do linkText <- inlineListToHtml opts txt - return $ anchor ! ([href s] ++ - if null tit then [] else [title tit]) $ + return $ anchor ! ([href s] ++ + if null tit then [] else [title tit]) $ linkText - (Image txt (s,tit)) -> do + (Image txt (s,tit)) | treatAsImage s -> do let alternate' = stringify txt let attributes = [src s] ++ - (if null tit - then [] - else [title tit]) ++ - if null txt - then [] + (if null tit + then [] + else [title tit]) ++ + if null txt + then [] else [alt alternate'] - return $ image ! attributes - -- note: null title included, as in Markdown.pl - (Note contents) -> do + return $ image ! attributes + -- note: null title included, as in Markdown.pl + (Image _ (s,tit)) -> do + let attributes = [src s] ++ + (if null tit + then [] + else [title tit]) + return $ itag "embed" ! attributes + -- note: null title included, as in Markdown.pl + (Note contents) -> do st <- get let notes = stNotes st let number = (length notes) + 1 let ref = show number - htmlContents <- blockListToNote opts ref contents + htmlContents <- blockListToNote opts ref contents -- push contents onto front of notes - put $ st {stNotes = (htmlContents:notes)} + put $ st {stNotes = (htmlContents:notes)} return $ sup << anchor ! [href ("#" ++ writerIdentifierPrefix opts ++ "fn" ++ ref), theclass "footnoteRef", @@ -635,9 +648,8 @@ blockListToNote :: WriterOptions -> String -> [Block] -> State WriterState Html blockListToNote opts ref blocks = -- If last block is Para or Plain, include the backlink at the end of -- that block. Otherwise, insert a new Plain block with the backlink. - let backlink = [RawInline "html" $ " <a href=\"#" ++ writerIdentifierPrefix opts ++ "fnref" ++ ref ++ - "\" class=\"footnoteBackLink\"" ++ - " title=\"Jump back to footnote " ++ ref ++ "\">" ++ + let backlink = [RawInline "html" $ " <a href=\"#" ++ writerIdentifierPrefix opts ++ "fnref" ++ ref ++ + "\" class=\"footnoteBackLink\">" ++ (if writerAscii opts then "↩" else "↩") ++ "</a>"] blocks' = if null blocks then [] @@ -652,4 +664,3 @@ blockListToNote opts ref blocks = Plain backlink] in do contents <- blockListToHtml opts blocks' return $ nl opts +++ (li ! [prefixedId opts ("fn" ++ ref)]) contents - diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs index 7dd736da4..d925b2897 100644 --- a/src/Text/Pandoc/Writers/LaTeX.hs +++ b/src/Text/Pandoc/Writers/LaTeX.hs @@ -34,7 +34,9 @@ import Text.Pandoc.Generic import Text.Pandoc.Shared import Text.Pandoc.Templates import Text.Printf ( printf ) -import Data.List ( (\\), isSuffixOf, isPrefixOf, intercalate, intersperse ) +import Network.URI ( isAbsoluteURI, unEscapeString ) +import Data.List ( (\\), isSuffixOf, isInfixOf, + isPrefixOf, intercalate, intersperse ) import Data.Char ( toLower, isPunctuation ) import Control.Monad.State import Text.Pandoc.Pretty @@ -42,6 +44,9 @@ import System.FilePath (dropExtension) data WriterState = WriterState { stInNote :: Bool -- @True@ if we're in a note + , stInTable :: Bool -- @True@ if we're in a table + , stTableNotes :: [(Char, Doc)] -- List of markers, notes + -- in current table , stOLLevel :: Int -- level of ordered list nesting , stOptions :: WriterOptions -- writer options, so they don't have to be parameter , stVerbInNote :: Bool -- true if document has verbatim text in note @@ -53,17 +58,20 @@ data WriterState = , stGraphics :: Bool -- true if document contains images , stLHS :: Bool -- true if document has literate haskell code , stBook :: Bool -- true if document uses book or memoir class + , stCsquotes :: Bool -- true if document uses csquotes } -- | Convert Pandoc to LaTeX. writeLaTeX :: WriterOptions -> Pandoc -> String writeLaTeX options document = evalState (pandocToLaTeX options document) $ - WriterState { stInNote = False, stOLLevel = 1, stOptions = options, + WriterState { stInNote = False, stInTable = False, + stTableNotes = [], stOLLevel = 1, stOptions = options, stVerbInNote = False, stEnumerate = False, stTable = False, stStrikeout = False, stSubscript = False, stUrl = False, stGraphics = False, - stLHS = False, stBook = writerChapters options } + stLHS = False, stBook = writerChapters options, + stCsquotes = False } pandocToLaTeX :: WriterOptions -> Pandoc -> State WriterState String pandocToLaTeX options (Pandoc (Meta title authors date) blocks) = do @@ -73,6 +81,10 @@ pandocToLaTeX options (Pandoc (Meta title authors date) blocks) = do "{report}" `isSuffixOf` x) when (any usesBookClass (lines template)) $ modify $ \s -> s{stBook = True} + -- check for \usepackage...{csquotes}; if present, we'll use + -- \enquote{...} for smart quotes: + when ("{csquotes}" `isInfixOf` template) $ + modify $ \s -> s{stCsquotes = True} opts <- liftM stOptions get let colwidth = if writerWrapText opts then Just $ writerColumns opts @@ -106,7 +118,6 @@ pandocToLaTeX options (Pandoc (Meta title authors date) blocks) = do , ("title", titletext) , ("date", dateText) ] ++ [ ("author", a) | a <- authorsText ] ++ - [ ("xetex", "yes") | writerXeTeX options ] ++ [ ("verbatim-in-note", "yes") | stVerbInNote st ] ++ [ ("fancy-enums", "yes") | stEnumerate st ] ++ [ ("tables", "yes") | stTable st ] ++ @@ -147,14 +158,6 @@ stringToLaTeX = escapeStringUsing latexEscapes inCmd :: String -> Doc -> Doc inCmd cmd contents = char '\\' <> text cmd <> braces contents --- | Remove all code elements from list of inline elements --- (because it's illegal to have verbatim inside some command arguments) -deVerb :: [Inline] -> [Inline] -deVerb [] = [] -deVerb ((Code _ str):rest) = - (RawInline "latex" $ "\\texttt{" ++ stringToLaTeX str ++ "}"):(deVerb rest) -deVerb (other:rest) = other:(deVerb rest) - -- | Convert Pandoc block element to LaTeX. blockToLaTeX :: Block -- ^ Block to convert -> State WriterState Doc @@ -211,8 +214,9 @@ blockToLaTeX (CodeBlock (_,classes,keyvalAttr) str) = do | null params = empty | otherwise = "[" <> hsep (intersperse "," (map text params)) <> "]" - return $ "\\begin{" <> text env <> "}" <> printParams $$ flush (text str) $$ - "\\end{" <> text env <> "}" $$ cr -- final cr needed because of footnotes + return $ flush ("\\begin{" <> text env <> "}" <> printParams $$ text str $$ + "\\end{" <> text env <> "}") $$ cr + -- final cr needed because of footnotes blockToLaTeX (RawBlock "latex" x) = return $ text x <> blankline blockToLaTeX (RawBlock _ _) = return empty blockToLaTeX (BulletList lst) = do @@ -244,14 +248,13 @@ blockToLaTeX (DefinitionList lst) = do blockToLaTeX HorizontalRule = return $ "\\begin{center}\\rule{3in}{0.4pt}\\end{center}" $$ blankline blockToLaTeX (Header level lst) = do - let lst' = deVerb lst - txt <- inlineListToLaTeX lst' + txt <- inlineListToLaTeX lst let noNote (Note _) = Str "" noNote x = x - let lstNoNotes = bottomUp noNote lst' + let lstNoNotes = bottomUp noNote lst -- footnotes in sections don't work unless you specify an optional -- argument: \section[mysec]{mysec\footnote{blah}} - optional <- if lstNoNotes == lst' + optional <- if lstNoNotes == lst then return empty else do res <- inlineListToLaTeX lstNoNotes @@ -269,47 +272,61 @@ blockToLaTeX (Header level lst) = do 5 -> headerWith "\\subparagraph" stuffing _ -> txt $$ blankline blockToLaTeX (Table caption aligns widths heads rows) = do + modify $ \s -> s{ stInTable = True, stTableNotes = [] } headers <- if all null heads then return empty - else liftM ($$ "\\hline") $ (tableRowToLaTeX widths) heads + else liftM ($$ "\\ML") + $ (tableRowToLaTeX True aligns widths) heads captionText <- inlineListToLaTeX caption - rows' <- mapM (tableRowToLaTeX widths) rows - let colDescriptors = concat $ zipWith toColDescriptor widths aligns - let tableBody = text ("\\begin{tabular}{" ++ colDescriptors ++ "}") $$ - headers $$ vcat rows' $$ "\\end{tabular}" - let centered txt = "\\begin{center}" $$ txt $$ "\\end{center}" - modify $ \s -> s{ stTable = True } - return $ if isEmpty captionText - then centered tableBody $$ blankline - else "\\begin{table}[h]" $$ centered tableBody $$ - inCmd "caption" captionText $$ "\\end{table}" $$ blankline - -toColDescriptor :: Double -> Alignment -> String -toColDescriptor 0 align = + let capt = if isEmpty captionText + then empty + else text "caption = " <> captionText <> "," <> space + rows' <- mapM (tableRowToLaTeX False aligns widths) rows + let rows'' = intersperse ("\\\\\\noalign{\\medskip}") rows' + tableNotes <- liftM (reverse . stTableNotes) get + let toNote (marker, x) = "\\tnote" <> brackets (char marker) <> + braces (nest 2 x) + let notes = vcat $ map toNote tableNotes + let colDescriptors = text $ concat $ map toColDescriptor aligns + let tableBody = + ("\\ctable" <> brackets (capt <> text "pos = H, center, botcap")) + <> braces colDescriptors + $$ braces ("% notes" <> cr <> notes <> cr) + $$ braces (text "% rows" $$ "\\FL" $$ + vcat (headers : rows'') $$ "\\LL" <> cr) + modify $ \s -> s{ stTable = True, stInTable = False, stTableNotes = [] } + return $ tableBody $$ blankline + +toColDescriptor :: Alignment -> String +toColDescriptor align = case align of AlignLeft -> "l" AlignRight -> "r" AlignCenter -> "c" AlignDefault -> "l" -toColDescriptor width align = ">{\\PBS" ++ - (case align of - AlignLeft -> "\\raggedright" - AlignRight -> "\\raggedleft" - AlignCenter -> "\\centering" - AlignDefault -> "\\raggedright") ++ - "\\hspace{0pt}}p{" ++ printf "%.2f" width ++ "\\columnwidth}" blockListToLaTeX :: [Block] -> State WriterState Doc blockListToLaTeX lst = mapM blockToLaTeX lst >>= return . vcat -tableRowToLaTeX :: [Double] -> [[Block]] -> State WriterState Doc -tableRowToLaTeX widths cols = do +tableRowToLaTeX :: Bool + -> [Alignment] + -> [Double] + -> [[Block]] + -> State WriterState Doc +tableRowToLaTeX header aligns widths cols = do renderedCells <- mapM blockListToLaTeX cols - let toCell 0 c = c - toCell w c = "\\parbox{" <> text (printf "%.2f" w) <> - "\\columnwidth}{" <> c <> cr <> "}" - let cells = zipWith toCell widths renderedCells - return $ (hcat $ intersperse (" & ") cells) <> "\\\\" + let valign = text $ if header then "[b]" else "[t]" + let halign x = case x of + AlignLeft -> "\\raggedright" + AlignRight -> "\\raggedleft" + AlignCenter -> "\\centering" + AlignDefault -> "\\raggedright" + let toCell 0 _ c = c + toCell w a c = "\\parbox" <> valign <> + braces (text (printf "%.2f\\columnwidth" w)) <> + braces (halign a <> cr <> c <> cr) + let cells = zipWith3 toCell widths aligns renderedCells + return $ hcat $ intersperse (" & ") cells listItemToLaTeX :: [Block] -> State WriterState Doc listItemToLaTeX lst = blockListToLaTeX lst >>= return . (text "\\item" $$) . @@ -317,7 +334,7 @@ listItemToLaTeX lst = blockListToLaTeX lst >>= return . (text "\\item" $$) . defListItemToLaTeX :: ([Inline], [[Block]]) -> State WriterState Doc defListItemToLaTeX (term, defs) = do - term' <- inlineListToLaTeX $ deVerb term + term' <- inlineListToLaTeX term def' <- liftM vsep $ mapM blockListToLaTeX defs return $ "\\item" <> brackets term' $$ def' @@ -335,23 +352,23 @@ isQuoted _ = False inlineToLaTeX :: Inline -- ^ Inline to convert -> State WriterState Doc inlineToLaTeX (Emph lst) = - inlineListToLaTeX (deVerb lst) >>= return . inCmd "emph" + inlineListToLaTeX lst >>= return . inCmd "emph" inlineToLaTeX (Strong lst) = - inlineListToLaTeX (deVerb lst) >>= return . inCmd "textbf" + inlineListToLaTeX lst >>= return . inCmd "textbf" inlineToLaTeX (Strikeout lst) = do - contents <- inlineListToLaTeX $ deVerb lst + contents <- inlineListToLaTeX lst modify $ \s -> s{ stStrikeout = True } return $ inCmd "sout" contents inlineToLaTeX (Superscript lst) = - inlineListToLaTeX (deVerb lst) >>= return . inCmd "textsuperscript" + inlineListToLaTeX lst >>= return . inCmd "textsuperscript" inlineToLaTeX (Subscript lst) = do modify $ \s -> s{ stSubscript = True } - contents <- inlineListToLaTeX $ deVerb lst + contents <- inlineListToLaTeX lst -- oddly, latex includes \textsuperscript but not \textsubscript -- so we have to define it (using a different name so as not to conflict with memoir class): return $ inCmd "textsubscr" contents inlineToLaTeX (SmallCaps lst) = - inlineListToLaTeX (deVerb lst) >>= return . inCmd "textsc" + inlineListToLaTeX lst >>= return . inCmd "textsc" inlineToLaTeX (Cite cits lst) = do st <- get let opts = stOptions st @@ -362,29 +379,38 @@ inlineToLaTeX (Cite cits lst) = do inlineToLaTeX (Code _ str) = do st <- get - when (stInNote st) $ modify $ \s -> s{ stVerbInNote = True } - let chr = ((enumFromTo '!' '~') \\ str) !! 0 if writerListings (stOptions st) - then return $ text $ "\\lstinline" ++ [chr] ++ str ++ [chr] - else return $ text $ "\\verb" ++ [chr] ++ str ++ [chr] + then do + when (stInNote st) $ modify $ \s -> s{ stVerbInNote = True } + let chr = ((enumFromTo '!' '~') \\ str) !! 0 + return $ text $ "\\lstinline" ++ [chr] ++ str ++ [chr] + else return $ text $ "\\texttt{" ++ stringToLaTeX str ++ "}" inlineToLaTeX (Quoted SingleQuote lst) = do contents <- inlineListToLaTeX lst - let s1 = if (not (null lst)) && (isQuoted (head lst)) - then "\\," - else empty - let s2 = if (not (null lst)) && (isQuoted (last lst)) - then "\\," - else empty - return $ char '`' <> s1 <> contents <> s2 <> char '\'' + csquotes <- liftM stCsquotes get + if csquotes + then return $ "\\enquote" <> braces contents + else do + let s1 = if (not (null lst)) && (isQuoted (head lst)) + then "\\," + else empty + let s2 = if (not (null lst)) && (isQuoted (last lst)) + then "\\," + else empty + return $ char '`' <> s1 <> contents <> s2 <> char '\'' inlineToLaTeX (Quoted DoubleQuote lst) = do contents <- inlineListToLaTeX lst - let s1 = if (not (null lst)) && (isQuoted (head lst)) - then "\\," - else empty - let s2 = if (not (null lst)) && (isQuoted (last lst)) - then "\\," - else empty - return $ "``" <> s1 <> contents <> s2 <> "''" + csquotes <- liftM stCsquotes get + if csquotes + then return $ "\\enquote" <> braces contents + else do + let s1 = if (not (null lst)) && (isQuoted (head lst)) + then "\\," + else empty + let s2 = if (not (null lst)) && (isQuoted (last lst)) + then "\\," + else empty + return $ "``" <> s1 <> contents <> s2 <> "''" inlineToLaTeX Apostrophe = return $ char '\'' inlineToLaTeX EmDash = return "---" inlineToLaTeX EnDash = return "--" @@ -402,19 +428,28 @@ inlineToLaTeX (Link txt (src, _)) = [Code _ x] | x == src -> -- autolink do modify $ \s -> s{ stUrl = True } return $ text $ "\\url{" ++ x ++ "}" - _ -> do contents <- inlineListToLaTeX $ deVerb txt + _ -> do contents <- inlineListToLaTeX txt return $ text ("\\href{" ++ stringToLaTeX src ++ "}{") <> contents <> char '}' inlineToLaTeX (Image _ (source, _)) = do modify $ \s -> s{ stGraphics = True } - return $ "\\includegraphics" <> braces (text source) + let source' = if isAbsoluteURI source + then source + else unEscapeString source + return $ "\\includegraphics" <> braces (text source') inlineToLaTeX (Note contents) = do modify (\s -> s{stInNote = True}) contents' <- blockListToLaTeX contents modify (\s -> s {stInNote = False}) - -- note: a \n before } is needed when note ends with a Verbatim environment - return $ "\\footnote" <> braces (nest 2 contents') - + inTable <- liftM stInTable get + if inTable + then do + curnotes <- liftM stTableNotes get + let marker = cycle ['a'..'z'] !! length curnotes + modify $ \s -> s{ stTableNotes = (marker, contents') : curnotes } + return $ "\\tmark" <> brackets (char marker) <> space + else return $ "\\footnote" <> braces (nest 2 contents') + -- note: a \n before } needed when note ends with a Verbatim environment citationsToNatbib :: [Citation] -> State WriterState Doc citationsToNatbib (one:[]) diff --git a/src/Text/Pandoc/Writers/Markdown.hs b/src/Text/Pandoc/Writers/Markdown.hs index 5e12c4aca..48e9578b4 100644 --- a/src/Text/Pandoc/Writers/Markdown.hs +++ b/src/Text/Pandoc/Writers/Markdown.hs @@ -366,7 +366,19 @@ blockListToMarkdown :: WriterOptions -- ^ Options -> [Block] -- ^ List of block elements -> State WriterState Doc blockListToMarkdown opts blocks = - mapM (blockToMarkdown opts) blocks >>= return . cat + mapM (blockToMarkdown opts) (fixBlocks blocks) >>= return . cat + -- insert comment between list and indented code block, or the + -- code block will be treated as a list continuation paragraph + where fixBlocks (b : CodeBlock attr x : rest) + | (writerStrictMarkdown opts || attr == nullAttr) && isListBlock b = + b : RawBlock "html" "<!-- -->\n" : CodeBlock attr x : + fixBlocks rest + fixBlocks (x : xs) = x : fixBlocks xs + fixBlocks [] = [] + isListBlock (BulletList _) = True + isListBlock (OrderedList _ _) = True + isListBlock (DefinitionList _) = True + isListBlock _ = False -- | Get reference for target; if none exists, create unique one and return. -- Prefer label if possible; otherwise, generate a unique key. diff --git a/src/Text/Pandoc/Writers/ODT.hs b/src/Text/Pandoc/Writers/ODT.hs index cf1be8755..f8030965c 100644 --- a/src/Text/Pandoc/Writers/ODT.hs +++ b/src/Text/Pandoc/Writers/ODT.hs @@ -29,6 +29,7 @@ Conversion of 'Pandoc' documents to ODT. -} module Text.Pandoc.Writers.ODT ( writeODT ) where import Data.IORef +import Data.List ( isPrefixOf ) import System.FilePath ( (</>), takeExtension ) import qualified Data.ByteString.Lazy as B import Data.ByteString.Lazy.UTF8 ( fromString ) @@ -36,11 +37,15 @@ import Codec.Archive.Zip import System.Time import Paths_pandoc ( getDataFileName ) import Text.Pandoc.Shared ( WriterOptions(..) ) +import Text.Pandoc.MIME ( getMimeType ) import Text.Pandoc.Definition import Text.Pandoc.Generic import Text.Pandoc.Writers.OpenDocument ( writeOpenDocument ) import System.Directory import Control.Monad (liftM) +import Network.URI ( unEscapeString ) +import Text.Pandoc.XML +import Text.Pandoc.Pretty -- | Produce an ODT file from a Pandoc document. writeODT :: Maybe FilePath -- ^ Path specified by --reference-odt @@ -69,14 +74,37 @@ writeODT mbRefOdt opts doc = do (TOD epochtime _) <- getClockTime let contentEntry = toEntry "content.xml" epochtime $ fromString newContents picEntries <- readIORef picEntriesRef - let archive = foldr addEntryToArchive refArchive (contentEntry : picEntries) - return $ fromArchive archive + let archive = foldr addEntryToArchive refArchive $ contentEntry : picEntries + -- construct META-INF/manifest.xml based on archive + let toFileEntry fp = case getMimeType fp of + Nothing -> empty + Just m -> selfClosingTag "manifest:file-entry" + [("manifest:media-type", m) + ,("manifest:full-path", fp) + ] + let files = [ ent | ent <- filesInArchive archive, not ("META-INF" `isPrefixOf` ent) ] + let manifestEntry = toEntry "META-INF/manifest.xml" epochtime + $ fromString $ show + $ text "<?xml version=\"1.0\" encoding=\"utf-8\"?>" + $$ + ( inTags True "manifest:manifest" + [("xmlns:manifest","urn:oasis:names:tc:opendocument:xmlns:manifest:1.0")] + $ ( selfClosingTag "manifest:file-entry" + [("manifest:media-type","application/vnd.oasis.opendocument.text") + ,("manifest:version","1.2") + ,("manifest:full-path","/")] + $$ vcat ( map toFileEntry $ files ) + ) + ) + let archive' = addEntryToArchive manifestEntry archive + return $ fromArchive archive' transformPic :: FilePath -> IORef [Entry] -> Inline -> IO Inline transformPic sourceDir entriesRef (Image lab (src,tit)) = do + let src' = unEscapeString src entries <- readIORef entriesRef - let newsrc = "Pictures/" ++ show (length entries) ++ takeExtension src - catch (readEntry [] (sourceDir </> src) >>= \entry -> + let newsrc = "Pictures/" ++ show (length entries) ++ takeExtension src' + catch (readEntry [] (sourceDir </> src') >>= \entry -> modifyIORef entriesRef (entry{ eRelativePath = newsrc } :) >> return (Image lab (newsrc, tit))) (\_ -> return (Emph lab)) diff --git a/src/Text/Pandoc/Writers/OpenDocument.hs b/src/Text/Pandoc/Writers/OpenDocument.hs index b9444aac7..e675f4e65 100644 --- a/src/Text/Pandoc/Writers/OpenDocument.hs +++ b/src/Text/Pandoc/Writers/OpenDocument.hs @@ -62,6 +62,7 @@ data WriterState = , stIndentPara :: Int , stInDefinition :: Bool , stTight :: Bool + , stFirstPara :: Bool } defaultWriterState :: WriterState @@ -75,6 +76,7 @@ defaultWriterState = , stIndentPara = 0 , stInDefinition = False , stTight = False + , stFirstPara = False } when :: Bool -> Doc -> Doc @@ -111,10 +113,18 @@ inTightList f = modify (\s -> s { stTight = True }) >> f >>= \r -> setInDefinitionList :: Bool -> State WriterState () setInDefinitionList b = modify $ \s -> s { stInDefinition = b } -inParagraphTags :: Doc -> Doc -inParagraphTags d | isEmpty d = empty -inParagraphTags d = - inTags False "text:p" [("text:style-name", "Text_20_body")] d +setFirstPara :: State WriterState () +setFirstPara = modify $ \s -> s { stFirstPara = True } + +inParagraphTags :: Doc -> State WriterState Doc +inParagraphTags d | isEmpty d = return empty +inParagraphTags d = do + b <- gets stFirstPara + a <- if b + then do modify $ \st -> st { stFirstPara = False } + return $ [("text:style-name", "First_20_paragraph")] + else return [("text:style-name", "Text_20_body")] + return $ inTags False "text:p" a d inParagraphTagsWithStyle :: String -> Doc -> Doc inParagraphTagsWithStyle sty = inTags False "text:p" [("text:style-name", sty)] @@ -138,9 +148,10 @@ inTextStyle d = do $ selfClosingTag "style:text-properties" (concatMap snd $ Map.toList at) return $ inTags False "text:span" [("text:style-name","T" ++ show tn)] d -inHeaderTags :: Int -> Doc -> Doc -inHeaderTags i = inTags False "text:h" [ ("text:style-name", "Heading_20_" ++ show i) - , ("text:outline-level", show i)] +inHeaderTags :: Int -> Doc -> State WriterState Doc +inHeaderTags i d = + return $ inTags False "text:h" [ ("text:style-name", "Heading_20_" ++ show i) + , ("text:outline-level", show i)] d inQuotes :: QuoteType -> Doc -> Doc inQuotes SingleQuote s = text "‘" <> s <> text "’" @@ -164,7 +175,7 @@ writeOpenDocument :: WriterOptions -> Pandoc -> String writeOpenDocument opts (Pandoc (Meta title authors date) blocks) = let ((doc, title', authors', date'),s) = flip runState defaultWriterState $ do - title'' <- inlinesToOpenDocument opts title + title'' <- inlinesToOpenDocument opts title authors'' <- mapM (inlinesToOpenDocument opts) authors date'' <- inlinesToOpenDocument opts date doc'' <- blocksToOpenDocument opts blocks @@ -274,18 +285,20 @@ blocksToOpenDocument o b = vcat <$> mapM (blockToOpenDocument o) b -- | Convert a Pandoc block element to OpenDocument. blockToOpenDocument :: WriterOptions -> Block -> State WriterState Doc blockToOpenDocument o bs - | Plain b <- bs = inParagraphTags <$> inlinesToOpenDocument o b - | Para b <- bs = inParagraphTags <$> inlinesToOpenDocument o b - | Header i b <- bs = inHeaderTags i <$> inlinesToOpenDocument o b - | BlockQuote b <- bs = mkBlockQuote b - | CodeBlock _ s <- bs = preformatted s + | Plain b <- bs = inParagraphTags =<< inlinesToOpenDocument o b + | Para b <- bs = inParagraphTags =<< inlinesToOpenDocument o b + | Header i b <- bs = setFirstPara >> + (inHeaderTags i =<< inlinesToOpenDocument o b) + | BlockQuote b <- bs = setFirstPara >> mkBlockQuote b + | DefinitionList b <- bs = setFirstPara >> defList b + | BulletList b <- bs = setFirstPara >> bulletListToOpenDocument o b + | OrderedList a b <- bs = setFirstPara >> orderedList a b + | CodeBlock _ s <- bs = setFirstPara >> preformatted s + | Table c a w h r <- bs = setFirstPara >> table c a w h r + | HorizontalRule <- bs = setFirstPara >> return (selfClosingTag "text:p" + [ ("text:style-name", "Horizontal_20_Line") ]) | RawBlock _ _ <- bs = return empty - | DefinitionList b <- bs = defList b - | BulletList b <- bs = bulletListToOpenDocument o b - | OrderedList a b <- bs = orderedList a b - | Table c a w h r <- bs = table c a w h r | Null <- bs = return empty - | HorizontalRule <- bs = return $ selfClosingTag "text:p" [ ("text:style-name", "Horizontal_20_Line") ] | otherwise = return empty where defList b = do setInDefinitionList True @@ -381,14 +394,14 @@ inlineToOpenDocument o ils mkImg s = inTags False "draw:frame" [] $ selfClosingTag "draw:image" [ ("xlink:href" , s ) , ("xlink:type" , "simple") - , (" xlink:show" , "embed" ) + , ("xlink:show" , "embed" ) , ("xlink:actuate", "onLoad")] mkNote l = do n <- length <$> gets stNotes let footNote t = inTags False "text:note" [ ("text:id" , "ftn" ++ show n) , ("text:note-class", "footnote" )] $ - inTagsSimple "text:note-citation" (text . show $ n + 1) <> + inTagsSimple "text:note-citation" (text . show $ n + 1) <> inTagsSimple "text:note-body" t nn <- footNote <$> withParagraphStyle o "Footnote" l addNote nn diff --git a/src/Text/Pandoc/Writers/RTF.hs b/src/Text/Pandoc/Writers/RTF.hs index 605e4162b..eb36c1ca6 100644 --- a/src/Text/Pandoc/Writers/RTF.hs +++ b/src/Text/Pandoc/Writers/RTF.hs @@ -37,23 +37,28 @@ import Data.Char ( ord, isDigit, toLower ) import System.FilePath ( takeExtension ) import qualified Data.ByteString as B import Text.Printf ( printf ) +import Network.URI ( isAbsoluteURI, unEscapeString ) -- | Convert Image inlines into a raw RTF embedded image, read from a file. -- If file not found or filetype not jpeg or png, leave the inline unchanged. rtfEmbedImage :: Inline -> IO Inline -rtfEmbedImage x@(Image _ (src,_)) - | map toLower (takeExtension src) `elem` [".jpg",".jpeg",".png"] = do - imgdata <- catch (B.readFile src) (\_ -> return B.empty) - let bytes = map (printf "%02x") $ B.unpack imgdata - let filetype = case map toLower (takeExtension src) of - ".jpg" -> "\\jpegblip" - ".jpeg" -> "\\jpegblip" - ".png" -> "\\pngblip" - _ -> error "Unknown file type" - let raw = "{\\pict" ++ filetype ++ " " ++ concat bytes ++ "}" - return $ if B.null imgdata - then x - else RawInline "rtf" raw +rtfEmbedImage x@(Image _ (src,_)) = do + let ext = map toLower (takeExtension src) + if ext `elem` [".jpg",".jpeg",".png"] && not (isAbsoluteURI src) + then do + let src' = unEscapeString src + imgdata <- catch (B.readFile src') (\_ -> return B.empty) + let bytes = map (printf "%02x") $ B.unpack imgdata + let filetype = case ext of + ".jpg" -> "\\jpegblip" + ".jpeg" -> "\\jpegblip" + ".png" -> "\\pngblip" + _ -> error "Unknown file type" + let raw = "{\\pict" ++ filetype ++ " " ++ concat bytes ++ "}" + return $ if B.null imgdata + then x + else RawInline "rtf" raw + else return x rtfEmbedImage x = return x -- | Convert Pandoc to a string in rich text format. diff --git a/src/Text/Pandoc/Writers/Texinfo.hs b/src/Text/Pandoc/Writers/Texinfo.hs index c8638cdd7..4f6645cd5 100644 --- a/src/Text/Pandoc/Writers/Texinfo.hs +++ b/src/Text/Pandoc/Writers/Texinfo.hs @@ -37,6 +37,8 @@ import Data.Ord ( comparing ) import Data.Char ( chr, ord ) import Control.Monad.State import Text.Pandoc.Pretty +import Network.URI ( isAbsoluteURI, unEscapeString ) +import System.FilePath data WriterState = WriterState { stStrikeout :: Bool -- document contains strikeout @@ -412,11 +414,11 @@ inlineToTexinfo (Image alternate (source, _)) = do return $ text ("@image{" ++ base ++ ",,,") <> content <> text "," <> text (ext ++ "}") where - (revext, revbase) = break (=='.') (reverse source) - ext = reverse revext - base = case revbase of - ('.' : rest) -> reverse rest - _ -> reverse revbase + ext = drop 1 $ takeExtension source' + base = takeBaseName source' + source' = if isAbsoluteURI source + then source + else unEscapeString source inlineToTexinfo (Note contents) = do contents' <- blockListToTexinfo contents |