diff options
Diffstat (limited to 'src')
25 files changed, 174 insertions, 156 deletions
diff --git a/src/Text/Pandoc.hs b/src/Text/Pandoc.hs index ec2dc19f5..9cad5fb34 100644 --- a/src/Text/Pandoc.hs +++ b/src/Text/Pandoc.hs @@ -1,5 +1,5 @@ {- -Copyright (C) 2006-7 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2006-2010 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -18,7 +18,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc - Copyright : Copyright (C) 2006-7 John MacFarlane + Copyright : Copyright (C) 2006-2010 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> diff --git a/src/Text/Pandoc/CharacterReferences.hs b/src/Text/Pandoc/CharacterReferences.hs index ac517ddf0..8ac55fc61 100644 --- a/src/Text/Pandoc/CharacterReferences.hs +++ b/src/Text/Pandoc/CharacterReferences.hs @@ -1,5 +1,5 @@ {- -Copyright (C) 2006-7 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2006-2010 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -18,7 +18,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.CharacterReferences - Copyright : Copyright (C) 2006-7 John MacFarlane + Copyright : Copyright (C) 2006-2010 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> diff --git a/src/Text/Pandoc/Definition.hs b/src/Text/Pandoc/Definition.hs index 02bf5efbb..169c4d1a6 100644 --- a/src/Text/Pandoc/Definition.hs +++ b/src/Text/Pandoc/Definition.hs @@ -1,6 +1,6 @@ {-# OPTIONS_GHC -fglasgow-exts #-} -- for deriving Typeable {- -Copyright (C) 2006-7 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2006-2010 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -19,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Definition - Copyright : Copyright (C) 2006-7 John MacFarlane + Copyright : Copyright (C) 2006-2010 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> diff --git a/src/Text/Pandoc/ODT.hs b/src/Text/Pandoc/ODT.hs index 6d602fb2a..d978c0cb4 100644 --- a/src/Text/Pandoc/ODT.hs +++ b/src/Text/Pandoc/ODT.hs @@ -1,5 +1,5 @@ {- -Copyright (C) 2008 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2008-2010 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -18,7 +18,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.ODT - Copyright : Copyright (C) 2006-7 John MacFarlane + Copyright : Copyright (C) 2008-2010 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index 2e5473992..5c188e3d9 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -1,5 +1,5 @@ {- -Copyright (C) 2006-8 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2006-2010 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -18,7 +18,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Readers.HTML - Copyright : Copyright (C) 2006-8 John MacFarlane + Copyright : Copyright (C) 2006-2010 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -182,7 +182,7 @@ unsanitaryURI u = "ldaps:", "magnet:", "mms:", "msnim:", "notes:", "rsync:", "secondlife:", "skype:", "ssh:", "sftp:", "smb:", "sms:", "snews:", "webcal:", "ymsgr:"] - in case parseURIReference u of + in case parseURIReference (escapeURI u) of Just p -> (map toLower $ uriScheme p) `notElem` safeURISchemes Nothing -> True @@ -746,7 +746,7 @@ link = try $ do Nothing -> fail "no href" let title = fromMaybe "" $ extractAttribute "title" attributes lab <- inlinesTilEnd "a" - return $ Link (normalizeSpaces lab) (url, title) + return $ Link (normalizeSpaces lab) (escapeURI url, title) image :: GenParser Char ParserState Inline image = try $ do @@ -756,5 +756,5 @@ image = try $ do Nothing -> fail "no src" let title = fromMaybe "" $ extractAttribute "title" attributes let alt = fromMaybe "" (extractAttribute "alt" attributes) - return $ Image [Str alt] (url, title) + return $ Image [Str alt] (escapeURI url, title) diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index f10f0e219..36940fab0 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -1,5 +1,5 @@ {- -Copyright (C) 2006-8 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2006-2010 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -18,7 +18,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Readers.LaTeX - Copyright : Copyright (C) 2006-8 John MacFarlane + Copyright : Copyright (C) 2006-2010 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -738,7 +738,7 @@ url :: GenParser Char ParserState Inline url = try $ do string "\\url" url' <- charsInBalanced '{' '}' - return $ Link [Code url'] (url', "") + return $ Link [Code url'] (escapeURI url', "") link :: GenParser Char ParserState Inline link = try $ do @@ -746,17 +746,16 @@ link = try $ do url' <- manyTill anyChar (char '}') char '{' label' <- manyTill inline (char '}') - return $ Link (normalizeSpaces label') (url', "") + return $ Link (normalizeSpaces label') (escapeURI url', "") image :: GenParser Char ParserState Inline image = try $ do ("includegraphics", _, args) <- command let args' = filter isArg args -- filter out options - let src = if null args' then - ("", "") - else - (stripFirstAndLast (head args'), "") - return $ Image [Str "image"] src + let (src,tit) = case args' of + [] -> ("", "") + (x:_) -> (stripFirstAndLast x, "") + return $ Image [Str "image"] (escapeURI src, tit) footnote :: GenParser Char ParserState Inline footnote = try $ do diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 82c761685..13edd0586 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -1,5 +1,5 @@ {- -Copyright (C) 2006-8 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2006-2010 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -18,7 +18,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Readers.Markdown - Copyright : Copyright (C) 2006-8 John MacFarlane + Copyright : Copyright (C) 2006-2010 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -31,7 +31,7 @@ module Text.Pandoc.Readers.Markdown ( readMarkdown ) where -import Data.List ( transpose, isPrefixOf, isSuffixOf, sortBy, findIndex, intercalate ) +import Data.List ( transpose, isSuffixOf, sortBy, findIndex, intercalate ) import Data.Ord ( comparing ) import Data.Char ( isAlphaNum ) import Data.Maybe @@ -73,10 +73,6 @@ specialChars = "\\[]*_~`<>$!^-.&'\"\8216\8217\8220\8221;" -- auxiliary functions -- --- | Replace spaces with %20 -uriEscapeSpaces :: String -> String -uriEscapeSpaces = substitute " " "%20" - indentSpaces :: GenParser Char ParserState [Char] indentSpaces = try $ do state <- getState @@ -206,7 +202,7 @@ referenceKey = try $ do tit <- option "" referenceTitle blanklines endPos <- getPosition - let newkey = (lab, (uriEscapeSpaces $ removeTrailingSpace src, tit)) + let newkey = (lab, (escapeURI $ removeTrailingSpace src, tit)) st <- getState let oldkeys = stateKeys st updateState $ \s -> s { stateKeys = newkey : oldkeys } @@ -1194,7 +1190,7 @@ source' = do tit <- option "" linkTitle skipSpaces eof - return (uriEscapeSpaces $ removeTrailingSpace src, tit) + return (escapeURI $ removeTrailingSpace src, tit) linkTitle :: GenParser Char st String linkTitle = try $ do @@ -1208,11 +1204,11 @@ linkTitle = try $ do link :: GenParser Char ParserState Inline link = try $ do lab <- reference - src <- source <|> referenceLink lab + (src, tit) <- source <|> referenceLink lab sanitize <- getState >>= return . stateSanitizeHTML - if sanitize && unsanitaryURI (fst src) + if sanitize && unsanitaryURI src then fail "Unsanitary URI" - else return $ Link lab src + else return $ Link lab (src, tit) -- a link like [this][ref] or [this][] or [this] referenceLink :: [Inline] @@ -1229,18 +1225,15 @@ referenceLink lab = do autoLink :: GenParser Char ParserState Inline autoLink = try $ do char '<' - src <- uri <|> (emailAddress >>= (return . ("mailto:" ++))) + (orig, src) <- uri <|> emailAddress char '>' - let src' = if "mailto:" `isPrefixOf` src - then drop 7 src - else src st <- getState let sanitize = stateSanitizeHTML st if sanitize && unsanitaryURI src then fail "Unsanitary URI" else return $ if stateStrict st - then Link [Str src'] (src, "") - else Link [Code src'] (src, "") + then Link [Str orig] (src, "") + else Link [Code orig] (src, "") image :: GenParser Char ParserState Inline image = try $ do diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs index 2496d1823..5e7ea512e 100644 --- a/src/Text/Pandoc/Readers/RST.hs +++ b/src/Text/Pandoc/Readers/RST.hs @@ -1,5 +1,5 @@ {- -Copyright (C) 2006-8 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2006-2010 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -18,7 +18,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Readers.RST - Copyright : Copyright (C) 2006-8 John MacFarlane + Copyright : Copyright (C) 2006-2010 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -556,7 +556,7 @@ targetURI = do contents <- many1 (try (many spaceChar >> newline >> many1 spaceChar >> noneOf " \t\n") <|> noneOf "\n") blanklines - return contents + return $ escapeURI $ removeLeadingTrailingSpace $ contents imageKey :: GenParser Char ParserState ([Inline], (String, [Char])) imageKey = try $ do @@ -565,13 +565,13 @@ imageKey = try $ do skipSpaces string "image::" src <- targetURI - return (normalizeSpaces ref, (removeLeadingTrailingSpace src, "")) + return (normalizeSpaces ref, (src, "")) anonymousKey :: GenParser Char st ([Inline], (String, [Char])) anonymousKey = try $ do oneOfStrings [".. __:", "__"] src <- targetURI - return ([Str "_"], (removeLeadingTrailingSpace src, "")) + return ([Str "_"], (src, "")) regularKey :: GenParser Char ParserState ([Inline], (String, [Char])) regularKey = try $ do @@ -579,7 +579,7 @@ regularKey = try $ do ref <- referenceName char ':' src <- targetURI - return (normalizeSpaces ref, (removeLeadingTrailingSpace src, "")) + return (normalizeSpaces ref, (src, "")) -- -- tables @@ -883,7 +883,8 @@ explicitLink = try $ do src <- manyTill (noneOf ">\n") (char '>') skipSpaces string "`_" - return $ Link (normalizeSpaces label') (removeLeadingTrailingSpace src, "") + return $ Link (normalizeSpaces label') + (escapeURI $ removeLeadingTrailingSpace src, "") referenceLink :: GenParser Char ParserState Inline referenceLink = try $ do @@ -891,25 +892,25 @@ referenceLink = try $ do key <- option label' (do{char '_'; return [Str "_"]}) -- anonymous link state <- getState let keyTable = stateKeys state - src <- case lookupKeySrc keyTable key of - Nothing -> fail "no corresponding key" - Just target -> return target + (src,tit) <- case lookupKeySrc keyTable key of + Nothing -> fail "no corresponding key" + Just target -> return target -- if anonymous link, remove first anon key so it won't be used again let keyTable' = if (key == [Str "_"]) -- anonymous link? - then delete ([Str "_"], src) keyTable -- remove first anon key + then delete ([Str "_"], (src,tit)) keyTable -- remove first anon key else keyTable setState $ state { stateKeys = keyTable' } - return $ Link (normalizeSpaces label') src + return $ Link (normalizeSpaces label') (src, tit) autoURI :: GenParser Char ParserState Inline autoURI = do - src <- uri - return $ Link [Str src] (src, "") + (orig, src) <- uri + return $ Link [Str orig] (src, "") autoEmail :: GenParser Char ParserState Inline autoEmail = do - src <- emailAddress - return $ Link [Str src] ("mailto:" ++ src, "") + (orig, src) <- emailAddress + return $ Link [Str orig] (src, "") autoLink :: GenParser Char ParserState Inline autoLink = autoURI <|> autoEmail @@ -921,7 +922,7 @@ image = try $ do ref <- manyTill inline (char '|') state <- getState let keyTable = stateKeys state - src <- case lookupKeySrc keyTable ref of - Nothing -> fail "no corresponding key" - Just target -> return target - return $ Image (normalizeSpaces ref) src + (src,tit) <- case lookupKeySrc keyTable ref of + Nothing -> fail "no corresponding key" + Just target -> return target + return $ Image (normalizeSpaces ref) (src, tit) diff --git a/src/Text/Pandoc/Readers/TeXMath.hs b/src/Text/Pandoc/Readers/TeXMath.hs index 18790d03a..080354be1 100644 --- a/src/Text/Pandoc/Readers/TeXMath.hs +++ b/src/Text/Pandoc/Readers/TeXMath.hs @@ -1,5 +1,5 @@ {- -Copyright (C) 2007 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2007-2010 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -18,7 +18,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Readers.TeXMath - Copyright : Copyright (C) 2007 John MacFarlane + Copyright : Copyright (C) 2007-2010 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs index f093ddbee..26aff4250 100644 --- a/src/Text/Pandoc/Shared.hs +++ b/src/Text/Pandoc/Shared.hs @@ -1,6 +1,6 @@ {-# LANGUAGE DeriveDataTypeable #-} {- -Copyright (C) 2006-8 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2006-2010 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -19,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Shared - Copyright : Copyright (C) 2006-8 John MacFarlane + Copyright : Copyright (C) 2006-2010 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -28,7 +28,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Utility functions and definitions used by the various Pandoc modules. -} -module Text.Pandoc.Shared ( +module Text.Pandoc.Shared ( -- * List processing splitBy, splitByIndices, @@ -43,6 +43,8 @@ module Text.Pandoc.Shared ( stripFirstAndLast, camelCaseToHyphenated, toRomanNumeral, + escapeURI, + unescapeURI, wrapped, wrapIfNeeded, wrappedTeX, @@ -114,10 +116,11 @@ import Text.ParserCombinators.Parsec import Text.PrettyPrint.HughesPJ ( Doc, fsep, ($$), (<>), empty, isEmpty, text, nest ) import qualified Text.PrettyPrint.HughesPJ as PP import Text.Pandoc.CharacterReferences ( characterReference ) -import Data.Char ( toLower, toUpper, ord, isLower, isUpper, isAlpha, - isPunctuation ) +import Data.Char ( toLower, toUpper, ord, isLower, isUpper, isAlpha, isAscii, + isLetter, isDigit ) import Data.List ( find, isPrefixOf, intercalate ) -import Network.URI ( parseURI, URI (..), isAllowedInURI ) +import Network.URI ( parseURI, URI (..), isAllowedInURI, escapeURIString, unEscapeString ) +import Codec.Binary.UTF8.String ( encodeString, decodeString ) import System.Directory import System.FilePath ( (</>) ) -- Note: ghc >= 6.12 (base >=4.2) supports unicode through iconv @@ -131,6 +134,7 @@ import Data.Generics import qualified Control.Monad.State as S import Control.Monad (join) import Paths_pandoc (getDataFileName) + -- -- List processing -- @@ -228,6 +232,17 @@ toRomanNumeral x = _ | x >= 1 -> "I" ++ toRomanNumeral (x - 1) _ -> "" +-- | Escape unicode characters in a URI. Characters that are +-- already valid in a URI, including % and ?, are left alone. +escapeURI :: String -> String +escapeURI = escapeURIString isAllowedInURI . encodeString + +-- | Unescape unicode and some special characters in a URI, but +-- without introducing spaces. +unescapeURI :: String -> String +unescapeURI = escapeURIString (\c -> isAllowedInURI c || not (isAscii c)) . + decodeString . unEscapeString + -- | Wrap inlines to line length. wrapped :: Monad m => ([Inline] -> m Doc) -> [Inline] -> m Doc wrapped listWriter sect = (mapM listWriter $ splitBy Space sect) >>= @@ -479,25 +494,30 @@ domain = do dom <- many1 $ try (char '.' >> many1 domainChar ) return $ intercalate "." (first:dom) --- | Parses an email address; returns string. -emailAddress :: GenParser Char st [Char] +-- | Parses an email address; returns original and corresponding +-- escaped mailto: URI. +emailAddress :: GenParser Char st (String, String) emailAddress = try $ do firstLetter <- alphaNum restAddr <- many emailChar let addr = firstLetter:restAddr char '@' dom <- domain - return $ addr ++ '@':dom + let full = addr ++ '@':dom + return (full, escapeURI $ "mailto:" ++ full) --- | Parses a URI. -uri :: GenParser Char st String +-- | Parses a URI. Returns pair of original and URI-escaped version. +uri :: GenParser Char st (String, String) uri = try $ do - str <- many1 $ satisfy isAllowedInURI - case parseURI str of - Just uri' -> if uriScheme uri' `elem` [ "http:", "https:", "ftp:", - "file:", "mailto:", - "news:", "telnet:" ] - then return $ show uri' + let protocols = [ "http:", "https:", "ftp:", "file:", "mailto:", + "news:", "telnet:" ] + lookAhead $ oneOfStrings protocols + -- scan non-ascii characters and ascii characters allowed in a URI + str <- many1 $ satisfy (\c -> not (isAscii c) || isAllowedInURI c) + -- now see if they amount to an absolute URI + case parseURI (escapeURI str) of + Just uri' -> if uriScheme uri' `elem` protocols + then return (str, show uri') else fail "not a URI" Nothing -> fail "not a URI" @@ -894,38 +914,37 @@ data Element = Blk Block -- lvl num ident label contents deriving (Eq, Read, Show, Typeable, Data) --- | Convert Pandoc inline list to plain text identifier. +-- | Convert Pandoc inline list to plain text identifier. HTML +-- identifiers must start with a letter, and may contain only +-- letters, digits, and the characters _-:. inlineListToIdentifier :: [Inline] -> String -inlineListToIdentifier = dropWhile (not . isAlpha) . inlineListToIdentifier' - -inlineListToIdentifier' :: [Inline] -> [Char] -inlineListToIdentifier' [] = "" -inlineListToIdentifier' (x:xs) = - xAsText ++ inlineListToIdentifier' xs - where xAsText = case x of - Str s -> filter (\c -> c `elem` "_-." || not (isPunctuation c)) $ - intercalate "-" $ words $ map toLower s - Emph lst -> inlineListToIdentifier' lst - Strikeout lst -> inlineListToIdentifier' lst - Superscript lst -> inlineListToIdentifier' lst - SmallCaps lst -> inlineListToIdentifier' lst - Subscript lst -> inlineListToIdentifier' lst - Strong lst -> inlineListToIdentifier' lst - Quoted _ lst -> inlineListToIdentifier' lst - Cite _ lst -> inlineListToIdentifier' lst - Code s -> s - Space -> "-" - EmDash -> "-" - EnDash -> "-" - Apostrophe -> "" - Ellipses -> "" - LineBreak -> "-" - Math _ _ -> "" - TeX _ -> "" - HtmlInline _ -> "" - Link lst _ -> inlineListToIdentifier' lst - Image lst _ -> inlineListToIdentifier' lst - Note _ -> "" +inlineListToIdentifier = + dropWhile (not . isAlpha) . intercalate "-" . words . map toLower . + filter (\c -> isLetter c || isDigit c || c `elem` "_-:. ") . + concatMap extractText + where extractText x = case x of + Str s -> s + Emph lst -> concatMap extractText lst + Strikeout lst -> concatMap extractText lst + Superscript lst -> concatMap extractText lst + SmallCaps lst -> concatMap extractText lst + Subscript lst -> concatMap extractText lst + Strong lst -> concatMap extractText lst + Quoted _ lst -> concatMap extractText lst + Cite _ lst -> concatMap extractText lst + Code s -> s + Space -> " " + EmDash -> "---" + EnDash -> "--" + Apostrophe -> "" + Ellipses -> "..." + LineBreak -> " " + Math _ s -> s + TeX _ -> "" + HtmlInline _ -> "" + Link lst _ -> concatMap extractText lst + Image lst _ -> concatMap extractText lst + Note _ -> "" -- | Convert list of Pandoc blocks into (hierarchical) list of Elements hierarchicalize :: [Block] -> [Element] @@ -957,7 +976,9 @@ headerLtEq _ _ = False -- Second argument is a list of already used identifiers. uniqueIdent :: [Inline] -> [String] -> String uniqueIdent title' usedIdents = - let baseIdent = inlineListToIdentifier title' + let baseIdent = case inlineListToIdentifier title' of + "" -> "section" + x -> x numIdent n = baseIdent ++ "-" ++ show n in if baseIdent `elem` usedIdents then case find (\x -> numIdent x `notElem` usedIdents) ([1..60000] :: [Int]) of diff --git a/src/Text/Pandoc/Templates.hs b/src/Text/Pandoc/Templates.hs index c30af0bfc..2238f4da8 100644 --- a/src/Text/Pandoc/Templates.hs +++ b/src/Text/Pandoc/Templates.hs @@ -1,6 +1,6 @@ {-# LANGUAGE TypeSynonymInstances #-} {- -Copyright (C) 2009 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2009-2010 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -19,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Templates - Copyright : Copyright (C) 2009 John MacFarlane + Copyright : Copyright (C) 2009-2010 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> diff --git a/src/Text/Pandoc/Writers/ConTeXt.hs b/src/Text/Pandoc/Writers/ConTeXt.hs index 545acded5..32948e292 100644 --- a/src/Text/Pandoc/Writers/ConTeXt.hs +++ b/src/Text/Pandoc/Writers/ConTeXt.hs @@ -1,5 +1,5 @@ {- -Copyright (C) 2007-8 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2007-2010 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -18,7 +18,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.ConTeXt - Copyright : Copyright (C) 2007-8 John MacFarlane + Copyright : Copyright (C) 2007-2010 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> diff --git a/src/Text/Pandoc/Writers/Docbook.hs b/src/Text/Pandoc/Writers/Docbook.hs index fc97ed3ac..3abed1610 100644 --- a/src/Text/Pandoc/Writers/Docbook.hs +++ b/src/Text/Pandoc/Writers/Docbook.hs @@ -1,5 +1,5 @@ {- -Copyright (C) 2006-7 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2006-2010 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -18,7 +18,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.Docbook - Copyright : Copyright (C) 2006-7 John MacFarlane + Copyright : Copyright (C) 2006-2010 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs index d33dcff27..299471328 100644 --- a/src/Text/Pandoc/Writers/HTML.hs +++ b/src/Text/Pandoc/Writers/HTML.hs @@ -1,6 +1,6 @@ {-# OPTIONS_GHC -fno-warn-deprecations #-} {- -Copyright (C) 2006-8 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2006-2010 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -19,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.HTML - Copyright : Copyright (C) 2006-8 John MacFarlane + Copyright : Copyright (C) 2006-2010 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs index 02fbf4add..8aa028bd7 100644 --- a/src/Text/Pandoc/Writers/LaTeX.hs +++ b/src/Text/Pandoc/Writers/LaTeX.hs @@ -1,5 +1,5 @@ {- -Copyright (C) 2006-8 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2006-2010 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -18,7 +18,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.LaTeX - Copyright : Copyright (C) 2006-8 John MacFarlane + Copyright : Copyright (C) 2006-2010 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> diff --git a/src/Text/Pandoc/Writers/Man.hs b/src/Text/Pandoc/Writers/Man.hs index 62bb90f8e..77dead196 100644 --- a/src/Text/Pandoc/Writers/Man.hs +++ b/src/Text/Pandoc/Writers/Man.hs @@ -1,5 +1,5 @@ {- -Copyright (C) 2007 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2007-2010 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -18,7 +18,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.Man - Copyright : Copyright (C) 2007 John MacFarlane + Copyright : Copyright (C) 2007-2010 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> diff --git a/src/Text/Pandoc/Writers/Markdown.hs b/src/Text/Pandoc/Writers/Markdown.hs index 777784704..fe8e0c2de 100644 --- a/src/Text/Pandoc/Writers/Markdown.hs +++ b/src/Text/Pandoc/Writers/Markdown.hs @@ -1,5 +1,5 @@ {- -Copyright (C) 2006-7 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2006-2010 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -18,7 +18,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.Markdown - Copyright : Copyright (C) 2006-7 John MacFarlane + Copyright : Copyright (C) 2006-2010 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -400,9 +400,10 @@ inlineToMarkdown _ (HtmlInline str) = return $ text str inlineToMarkdown _ (LineBreak) = return $ text " \n" inlineToMarkdown _ Space = return $ char ' ' inlineToMarkdown opts (Cite _ cits) = inlineListToMarkdown opts cits -inlineToMarkdown opts (Link txt (src, tit)) = do +inlineToMarkdown opts (Link txt (src', tit)) = do linktext <- inlineListToMarkdown opts txt let linktitle = if null tit then empty else text $ " \"" ++ tit ++ "\"" + let src = unescapeURI src' let srcSuffix = if isPrefixOf "mailto:" src then drop 7 src else src let useRefLinks = writerReferenceLinks opts let useAuto = null tit && txt == [Code srcSuffix] @@ -423,7 +424,7 @@ inlineToMarkdown opts (Image alternate (source, tit)) = do (alternate == [Str source]) -- to prevent autolinks then [Str "image"] else alternate - linkPart <- inlineToMarkdown opts (Link txt (source, tit)) + linkPart <- inlineToMarkdown opts (Link txt (unescapeURI source, tit)) return $ char '!' <> linkPart inlineToMarkdown _ (Note contents) = do modify (\st -> st{ stNotes = contents : stNotes st }) diff --git a/src/Text/Pandoc/Writers/MediaWiki.hs b/src/Text/Pandoc/Writers/MediaWiki.hs index f1e985bb7..e8cb33caf 100644 --- a/src/Text/Pandoc/Writers/MediaWiki.hs +++ b/src/Text/Pandoc/Writers/MediaWiki.hs @@ -1,5 +1,5 @@ {- -Copyright (C) 2008 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2008-2010 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -18,7 +18,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.MediaWiki - Copyright : Copyright (C) 2006-7 John MacFarlane + Copyright : Copyright (C) 2008-2010 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> diff --git a/src/Text/Pandoc/Writers/OpenDocument.hs b/src/Text/Pandoc/Writers/OpenDocument.hs index 347072cf1..4e3979c07 100644 --- a/src/Text/Pandoc/Writers/OpenDocument.hs +++ b/src/Text/Pandoc/Writers/OpenDocument.hs @@ -1,6 +1,7 @@ {-# LANGUAGE PatternGuards #-} {- -Copyright (C) 2008 Andrea Rossato <andrea.rossato@ing.unitn.it> +Copyright (C) 2008-2010 Andrea Rossato <andrea.rossato@ing.unitn.it> +and John MacFarlane. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -19,7 +20,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.OpenDocument - Copyright : Copyright (C) 2008 Andrea Rossato + Copyright : Copyright (C) 2008-2010 Andrea Rossato and John MacFarlane License : GNU GPL, version 2 or above Maintainer : Andrea Rossato <andrea.rossato@ing.unitn.it> @@ -40,6 +41,7 @@ import Control.Applicative ( (<$>) ) import Control.Arrow ( (***), (>>>) ) import Control.Monad.State hiding ( when ) import Data.Char (chr) +import qualified Data.Map as Map -- | Auxiliary function to convert Plain block to Para. plainToPara :: Block -> Block @@ -56,7 +58,7 @@ data WriterState = , stParaStyles :: [Doc] , stListStyles :: [(Int, [Doc])] , stTextStyles :: [Doc] - , stTextStyleAttr :: [(TextStyle,[(String,String)])] + , stTextStyleAttr :: Map.Map TextStyle [(String,String)] , stIndentPara :: Int , stInDefinition :: Bool , stTight :: Bool @@ -69,7 +71,7 @@ defaultWriterState = , stParaStyles = [] , stListStyles = [] , stTextStyles = [] - , stTextStyleAttr = [] + , stTextStyleAttr = Map.empty , stIndentPara = 0 , stInDefinition = False , stTight = False @@ -91,11 +93,10 @@ addTextStyle :: Doc -> State WriterState () addTextStyle i = modify $ \s -> s { stTextStyles = i : stTextStyles s } addTextStyleAttr :: (TextStyle, [(String,String)]) -> State WriterState () -addTextStyleAttr i = modify $ \s -> s { stTextStyleAttr = i : stTextStyleAttr s } +addTextStyleAttr (ts, xs) = modify $ \s -> s { stTextStyleAttr = Map.insert ts xs (stTextStyleAttr s) } -rmTextStyleAttr :: State WriterState () -rmTextStyleAttr = modify $ \s -> s { stTextStyleAttr = rmHead (stTextStyleAttr s) } - where rmHead l = if l /= [] then tail l else [] +rmTextStyleAttr :: TextStyle -> State WriterState () +rmTextStyleAttr ts = modify $ \s -> s { stTextStyleAttr = Map.delete ts (stTextStyleAttr s) } increaseIndent :: State WriterState () increaseIndent = modify $ \s -> s { stIndentPara = 1 + stIndentPara s } @@ -121,18 +122,18 @@ inSpanTags s = inTags False "text:span" [("text:style-name",s)] withTextStyle :: TextStyle -> State WriterState a -> State WriterState a withTextStyle s f = addTextStyleAttr (s,textStyleAttr s) >> - f >>= \r -> rmTextStyleAttr >> return r + f >>= \r -> rmTextStyleAttr s >> return r inTextStyle :: Doc -> State WriterState Doc inTextStyle d = do at <- gets stTextStyleAttr - if at == [] + if Map.null at then return d else do tn <- (+) 1 . length <$> gets stTextStyles addTextStyle $ inTags False "style:style" [("style:name" , "T" ++ show tn) ,("style:family", "text" )] - $ selfClosingTag "style:text-properties" (concatMap snd at) + $ selfClosingTag "style:text-properties" (concatMap snd $ Map.toList at) return $ inTags False "text:span" [("text:style-name","T" ++ show tn)] d inHeaderTags :: Int -> Doc -> Doc @@ -491,7 +492,7 @@ paraTableStyles t s (a:xs) [ ("fo:text-align", x) , ("style:justify-single-word", "false")] -data TextStyle = Italic | Bold | Strike | Sub | Sup | SmallC deriving ( Eq ) +data TextStyle = Italic | Bold | Strike | Sub | Sup | SmallC deriving ( Eq,Ord ) textStyleAttr :: TextStyle -> [(String,String)] textStyleAttr s diff --git a/src/Text/Pandoc/Writers/RST.hs b/src/Text/Pandoc/Writers/RST.hs index 534c34c09..f4dfb2aa6 100644 --- a/src/Text/Pandoc/Writers/RST.hs +++ b/src/Text/Pandoc/Writers/RST.hs @@ -1,5 +1,5 @@ {- -Copyright (C) 2006-7 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2006-2010 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -18,7 +18,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.RST - Copyright : Copyright (C) 2006-7 John MacFarlane + Copyright : Copyright (C) 2006-2010 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -305,8 +305,9 @@ inlineToRST Space = return $ char ' ' inlineToRST (Link [Code str] (src, _)) | src == str || src == "mailto:" ++ str = do let srcSuffix = if isPrefixOf "mailto:" src then drop 7 src else src - return $ text srcSuffix -inlineToRST (Link txt (src, tit)) = do + return $ text $ unescapeURI srcSuffix +inlineToRST (Link txt (src', tit)) = do + let src = unescapeURI src' useReferenceLinks <- get >>= (return . writerReferenceLinks . stOptions) linktext <- inlineListToRST $ normalizeSpaces txt if useReferenceLinks @@ -317,7 +318,8 @@ inlineToRST (Link txt (src, tit)) = do modify $ \st -> st { stLinks = refs' } return $ char '`' <> linktext <> text "`_" else return $ char '`' <> linktext <> text " <" <> text src <> text ">`_" -inlineToRST (Image alternate (source, tit)) = do +inlineToRST (Image alternate (source', tit)) = do + let source = unescapeURI source' pics <- get >>= (return . stImages) let labelsUsed = map fst pics let txt = if null alternate || alternate == [Str ""] || diff --git a/src/Text/Pandoc/Writers/RTF.hs b/src/Text/Pandoc/Writers/RTF.hs index c0c3d0536..ae71e1307 100644 --- a/src/Text/Pandoc/Writers/RTF.hs +++ b/src/Text/Pandoc/Writers/RTF.hs @@ -1,5 +1,5 @@ {- -Copyright (C) 2006-7 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2006-2010 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -18,7 +18,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.RTF - Copyright : Copyright (C) 2006-7 John MacFarlane + Copyright : Copyright (C) 2006-2010 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> diff --git a/src/Text/Pandoc/Writers/S5.hs b/src/Text/Pandoc/Writers/S5.hs index 1dff06e62..1a2639a50 100644 --- a/src/Text/Pandoc/Writers/S5.hs +++ b/src/Text/Pandoc/Writers/S5.hs @@ -1,5 +1,5 @@ {- -Copyright (C) 2006-7 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2006-2010 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -18,7 +18,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.S5 - Copyright : Copyright (C) 2006-7 John MacFarlane + Copyright : Copyright (C) 2006-2010 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> diff --git a/src/Text/Pandoc/Writers/Texinfo.hs b/src/Text/Pandoc/Writers/Texinfo.hs index 47a318631..503222754 100644 --- a/src/Text/Pandoc/Writers/Texinfo.hs +++ b/src/Text/Pandoc/Writers/Texinfo.hs @@ -1,5 +1,5 @@ {- -Copyright (C) 2008 John MacFarlane and Peter Wang +Copyright (C) 2008-2010 John MacFarlane and Peter Wang This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -18,7 +18,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.Texinfo - Copyright : Copyright (C) 2008 John MacFarlane and Peter Wang + Copyright : Copyright (C) 2008-2010 John MacFarlane and Peter Wang License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> diff --git a/src/Text/Pandoc/XML.hs b/src/Text/Pandoc/XML.hs index 68c5c3c5c..0c48b48df 100644 --- a/src/Text/Pandoc/XML.hs +++ b/src/Text/Pandoc/XML.hs @@ -1,5 +1,5 @@ {- -Copyright (C) 2006-7 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2006-2010 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -18,7 +18,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.XML - Copyright : Copyright (C) 2006-7 John MacFarlane + Copyright : Copyright (C) 2006-2010 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> diff --git a/src/pandoc.hs b/src/pandoc.hs index 7e1bfc284..0560efc0a 100644 --- a/src/pandoc.hs +++ b/src/pandoc.hs @@ -1,5 +1,5 @@ {- -Copyright (C) 2006-8 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2006-2010 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -18,7 +18,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Main - Copyright : Copyright (C) 2006-8 John MacFarlane + Copyright : Copyright (C) 2006-2010 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley@edu> |