{-# LANGUAGE DeriveDataTypeable, CPP, MultiParamTypeClasses, FlexibleContexts, ScopedTypeVariables, PatternGuards, ViewPatterns #-} {- Copyright (C) 2006-2016 John MacFarlane This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -} {- | Module : Text.Pandoc.Shared Copyright : Copyright (C) 2006-2016 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane Stability : alpha Portability : portable Utility functions and definitions used by the various Pandoc modules. -} module Text.Pandoc.Shared ( -- * List processing splitBy, splitByIndices, splitStringByIndices, substitute, ordNub, -- * Text processing backslashEscapes, escapeStringUsing, stripTrailingNewlines, trim, triml, trimr, stripFirstAndLast, camelCaseToHyphenated, toRomanNumeral, escapeURI, tabFilter, -- * Date/time normalizeDate, -- * Pandoc block and inline list processing orderedListMarkers, normalizeSpaces, extractSpaces, normalize, normalizeInlines, normalizeBlocks, removeFormatting, stringify, capitalize, compactify, compactify', compactify'DL, linesToPara, Element (..), hierarchicalize, uniqueIdent, inlineListToIdentifier, isHeaderBlock, headerShift, isTightList, addMetaField, makeMeta, -- * TagSoup HTML handling renderTags', -- * File handling inDirectory, getDefaultReferenceDocx, getDefaultReferenceODT, readDataFile, readDataFileUTF8, fetchItem, fetchItem', openURL, collapseFilePath, filteredFilesFromArchive, -- * Error handling err, warn, mapLeft, hush, -- * for squashing blocks blocksToInlines, -- * Safe read safeRead, -- * Temp directory withTempDir, -- * Version pandocVersion ) where import Text.Pandoc.Definition import Text.Pandoc.Walk import Text.Pandoc.MediaBag (MediaBag, lookupMedia) import Text.Pandoc.Builder (Inlines, Blocks, ToMetaValue(..)) import qualified Text.Pandoc.Builder as B import qualified Text.Pandoc.UTF8 as UTF8 import System.Environment (getProgName) import System.Exit (exitWith, ExitCode(..)) import Data.Char ( toLower, isLower, isUpper, isAlpha, isLetter, isDigit, isSpace ) import Data.List ( find, stripPrefix, intercalate ) import Data.Maybe (mapMaybe) import Data.Version ( showVersion ) import qualified Data.Map as M import Network.URI ( escapeURIString, nonStrictRelativeTo, unEscapeString, parseURIReference, isAllowedInURI, parseURI, URI(..) ) import qualified Data.Set as Set import System.Directory import System.FilePath (splitDirectories, isPathSeparator) import qualified System.FilePath.Posix as Posix import Text.Pandoc.MIME (MimeType, getMimeType) import System.FilePath ( (), takeExtension, dropExtension) import Data.Generics (Typeable, Data) import qualified Control.Monad.State as S import Control.Monad.Trans (MonadIO (..)) import qualified Control.Exception as E import Control.Monad (msum, unless, MonadPlus(..)) import Text.Pandoc.Pretty (charWidth) import Text.Pandoc.Compat.Time import Data.Time.Clock.POSIX import System.IO (stderr) import System.IO.Temp import Text.HTML.TagSoup (renderTagsOptions, RenderOptions(..), Tag(..), renderOptions) import Data.Monoid ((<>)) import qualified Data.ByteString as BS import qualified Data.ByteString.Char8 as B8 import Data.ByteString.Base64 (decodeLenient) import Data.Sequence (ViewR(..), ViewL(..), viewl, viewr) import qualified Data.Text as T (toUpper, pack, unpack) import Data.ByteString.Lazy (toChunks, fromChunks) import qualified Data.ByteString.Lazy as BL import Paths_pandoc (version) import Codec.Archive.Zip #ifdef EMBED_DATA_FILES import Text.Pandoc.Data (dataFiles) #else import Paths_pandoc (getDataFileName) #endif #ifdef HTTP_CLIENT import Network.HTTP.Client (httpLbs, responseBody, responseHeaders, Request(port,host)) import Network.HTTP.Client (parseRequest) import Network.HTTP.Client (newManager) import Network.HTTP.Client.Internal (addProxy) import Network.HTTP.Client.TLS (tlsManagerSettings) import System.Environment (getEnv) import Network.HTTP.Types.Header ( hContentType) import Network (withSocketsDo) #else import Network.URI (parseURI) import Network.HTTP (findHeader, rspBody, RequestMethod(..), HeaderName(..), mkRequest) import Network.Browser (browse, setAllowRedirects, setOutHandler, request) #endif -- | Version number of pandoc library. pandocVersion :: String pandocVersion = showVersion version -- -- List processing -- -- | Split list by groups of one or more sep. splitBy :: (a -> Bool) -> [a] -> [[a]] splitBy _ [] = [] splitBy isSep lst = let (first, rest) = break isSep lst rest' = dropWhile isSep rest in first:(splitBy isSep rest') splitByIndices :: [Int] -> [a] -> [[a]] splitByIndices [] lst = [lst] splitByIndices (x:xs) lst = first:(splitByIndices (map (\y -> y - x) xs) rest) where (first, rest) = splitAt x lst -- | Split string into chunks divided at specified indices. splitStringByIndices :: [Int] -> [Char] -> [[Char]] splitStringByIndices [] lst = [lst] splitStringByIndices (x:xs) lst = let (first, rest) = splitAt' x lst in first : (splitStringByIndices (map (\y -> y - x) xs) rest) splitAt' :: Int -> [Char] -> ([Char],[Char]) splitAt' _ [] = ([],[]) splitAt' n xs | n <= 0 = ([],xs) splitAt' n (x:xs) = (x:ys,zs) where (ys,zs) = splitAt' (n - charWidth x) xs -- | Replace each occurrence of one sublist in a list with another. substitute :: (Eq a) => [a] -> [a] -> [a] -> [a] substitute _ _ [] = [] substitute [] _ xs = xs substitute target replacement lst@(x:xs) = case stripPrefix target lst of Just lst' -> replacement ++ substitute target replacement lst' Nothing -> x : substitute target replacement xs ordNub :: (Ord a) => [a] -> [a] ordNub l = go Set.empty l where go _ [] = [] go s (x:xs) = if x `Set.member` s then go s xs else x : go (Set.insert x s) xs -- -- Text processing -- -- | Returns an association list of backslash escapes for the -- designated characters. backslashEscapes :: [Char] -- ^ list of special characters to escape -> [(Char, String)] backslashEscapes = map (\ch -> (ch, ['\\',ch])) -- | Escape a string of characters, using an association list of -- characters and strings. escapeStringUsing :: [(Char, String)] -> String -> String escapeStringUsing _ [] = "" escapeStringUsing escapeTable (x:xs) = case (lookup x escapeTable) of Just str -> str ++ rest Nothing -> x:rest where rest = escapeStringUsing escapeTable xs -- | Strip trailing newlines from string. stripTrailingNewlines :: String -> String stripTrailingNewlines = reverse . dropWhile (== '\n') . reverse -- | Remove leading and trailing space (including newlines) from string. trim :: String -> String trim = triml . trimr -- | Remove leading space (including newlines) from string. triml :: String -> String triml = dropWhile (`elem` " \r\n\t") -- | Remove trailing space (including newlines) from string. trimr :: String -> String trimr = reverse . triml . reverse -- | Strip leading and trailing characters from string stripFirstAndLast :: String -> String stripFirstAndLast str = drop 1 $ take ((length str) - 1) str -- | Change CamelCase word to hyphenated lowercase (e.g., camel-case). camelCaseToHyphenated :: String -> String camelCaseToHyphenated [] = "" camelCaseToHyphenated (a:b:rest) | isLower a && isUpper b = a:'-':(toLower b):(camelCaseToHyphenated rest) camelCaseToHyphenated (a:rest) = (toLower a):(camelCaseToHyphenated rest) -- | Convert number < 4000 to uppercase roman numeral. toRomanNumeral :: Int -> String toRomanNumeral x = if x >= 4000 || x < 0 then "?" else case x of _ | x >= 1000 -> "M" ++ toRomanNumeral (x - 1000) _ | x >= 900 -> "CM" ++ toRomanNumeral (x - 900) _ | x >= 500 -> "D" ++ toRomanNumeral (x - 500) _ | x >= 400 -> "CD" ++ toRomanNumeral (x - 400) _ | x >= 100 -> "C" ++ toRomanNumeral (x - 100) _ | x >= 90 -> "XC" ++ toRomanNumeral (x - 90) _ | x >= 50 -> "L" ++ toRomanNumeral (x - 50) _ | x >= 40 -> "XL" ++ toRomanNumeral (x - 40) _ | x >= 10 -> "X" ++ toRomanNumeral (x - 10) _ | x == 9 -> "IX" _ | x >= 5 -> "V" ++ toRomanNumeral (x - 5) _ | x == 4 -> "IV" _ | x >= 1 -> "I" ++ toRomanNumeral (x - 1) _ -> "" -- | Escape whitespace and some punctuation characters in URI. escapeURI :: String -> String escapeURI = escapeURIString (not . needsEscaping) where needsEscaping c = isSpace c || c `elem` ['<','>','|','"','{','}','[',']','^', '`'] -- | Convert tabs to spaces and filter out DOS line endings. -- Tabs will be preserved if tab stop is set to 0. tabFilter :: Int -- ^ Tab stop -> String -- ^ Input -> String tabFilter tabStop = let go _ [] = "" go _ ('\n':xs) = '\n' : go tabStop xs go _ ('\r':'\n':xs) = '\n' : go tabStop xs go _ ('\r':xs) = '\n' : go tabStop xs go spsToNextStop ('\t':xs) = if tabStop == 0 then '\t' : go tabStop xs else replicate spsToNextStop ' ' ++ go tabStop xs go 1 (x:xs) = x : go tabStop xs go spsToNextStop (x:xs) = x : go (spsToNextStop - 1) xs in go tabStop -- -- Date/time -- -- | Parse a date and convert (if possible) to "YYYY-MM-DD" format. We -- limit years to the range 1601-9999 (ISO 8601 accepts greater than -- or equal to 1583, but MS Word only accepts dates starting 1601). normalizeDate :: String -> Maybe String normalizeDate s = fmap (formatTime defaultTimeLocale "%F") (msum $ map (\fs -> parsetimeWith fs s >>= rejectBadYear) formats :: Maybe Day) where rejectBadYear day = case toGregorian day of (y, _, _) | y >= 1601 && y <= 9999 -> Just day _ -> Nothing parsetimeWith = #if MIN_VERSION_time(1,5,0) parseTimeM True defaultTimeLocale #else parseTime defaultTimeLocale #endif formats = ["%x","%m/%d/%Y", "%D","%F", "%d %b %Y", "%d %B %Y", "%b. %d, %Y", "%B %d, %Y", "%Y%m%d", "%Y%m", "%Y"] -- -- Pandoc block and inline list processing -- -- | Generate infinite lazy list of markers for an ordered list, -- depending on list attributes. orderedListMarkers :: (Int, ListNumberStyle, ListNumberDelim) -> [String] orderedListMarkers (start, numstyle, numdelim) = let singleton c = [c] nums = case numstyle of DefaultStyle -> map show [start..] Example -> map show [start..] Decimal -> map show [start..] UpperAlpha -> drop (start - 1) $ cycle $ map singleton ['A'..'Z'] LowerAlpha -> drop (start - 1) $ cycle $ map singleton ['a'..'z'] UpperRoman -> map toRomanNumeral [start..] LowerRoman -> map (map toLower . toRomanNumeral) [start..] inDelim str = case numdelim of DefaultDelim -> str ++ "." Period -> str ++ "." OneParen -> str ++ ")" TwoParens -> "(" ++ str ++ ")" in map inDelim nums -- | Normalize a list of inline elements: remove leading and trailing -- @Space@ elements, collapse double @Space@s into singles, and -- remove empty Str elements. normalizeSpaces :: [Inline] -> [Inline] normalizeSpaces = cleanup . dropWhile isSpaceOrEmpty where cleanup [] = [] cleanup (Space:rest) = case dropWhile isSpaceOrEmpty rest of [] -> [] (x:xs) -> Space : x : cleanup xs cleanup ((Str ""):rest) = cleanup rest cleanup (x:rest) = x : cleanup rest isSpaceOrEmpty :: Inline -> Bool isSpaceOrEmpty Space = True isSpaceOrEmpty (Str "") = True isSpaceOrEmpty _ = False -- | Extract the leading and trailing spaces from inside an inline element -- and place them outside the element. SoftBreaks count as Spaces for -- these purposes. extractSpaces :: (Inlines -> Inlines) -> Inlines -> Inlines extractSpaces f is = let contents = B.unMany is left = case viewl contents of (Space :< _) -> B.space (SoftBreak :< _) -> B.softbreak _ -> mempty right = case viewr contents of (_ :> Space) -> B.space (_ :> SoftBreak) -> B.softbreak _ -> mempty in (left <> f (B.trimInlines . B.Many $ contents) <> right) -- | Normalize @Pandoc@ document, consolidating doubled 'Space's, -- combining adjacent 'Str's and 'Emph's, remove 'Null's and -- empty elements, etc. normalize :: Pandoc -> Pandoc normalize (Pandoc (Meta meta) blocks) = Pandoc (Meta $ M.map go meta) (normalizeBlocks blocks) where go (MetaInlines xs) = MetaInlines $ normalizeInlines xs go (MetaBlocks xs) = MetaBlocks $ normalizeBlocks xs go (MetaList ms) = MetaList $ map go ms go (MetaMap m) = MetaMap $ M.map go m go x = x normalizeBlocks :: [Block] -> [Block] normalizeBlocks (Null : xs) = normalizeBlocks xs normalizeBlocks (Div attr bs : xs) = Div attr (normalizeBlocks bs) : normalizeBlocks xs normalizeBlocks (BlockQuote bs : xs) = case normalizeBlocks bs of [] -> normalizeBlocks xs bs' -> BlockQuote bs' : normalizeBlocks xs normalizeBlocks (BulletList [] : xs) = normalizeBlocks xs normalizeBlocks (BulletList items : xs) = BulletList (map normalizeBlocks items) : normalizeBlocks xs normalizeBlocks (OrderedList _ [] : xs) = normalizeBlocks xs normalizeBlocks (OrderedList attr items : xs) = OrderedList attr (map normalizeBlocks items) : normalizeBlocks xs normalizeBlocks (DefinitionList [] : xs) = normalizeBlocks xs normalizeBlocks (DefinitionList items : xs) = DefinitionList (map go items) : normalizeBlocks xs where go (ils, bs) = (normalizeInlines ils, map normalizeBlocks bs) normalizeBlocks (RawBlock _ "" : xs) = normalizeBlocks xs normalizeBlocks (RawBlock f x : xs) = case normalizeBlocks xs of (RawBlock f' x' : rest) | f' == f -> RawBlock f (x ++ ('\n':x')) : rest rest -> RawBlock f x : rest normalizeBlocks (Para ils : xs) = case normalizeInlines ils of [] -> normalizeBlocks xs ils' -> Para ils' : normalizeBlocks xs normalizeBlocks (Plain ils : xs) = case normalizeInlines ils of [] -> normalizeBlocks xs ils' -> Plain ils' : normalizeBlocks xs normalizeBlocks (Header lev attr ils : xs) = Header lev attr (normalizeInlines ils) : normalizeBlocks xs normalizeBlocks (Table capt aligns widths hdrs rows : xs) = Table (normalizeInlines capt) aligns widths (map normalizeBlocks hdrs) (map (map normalizeBlocks) rows) : normalizeBlocks xs normalizeBlocks (x:xs) = x : normalizeBlocks xs normalizeBlocks [] = [] normalizeInlines :: [Inline] -> [Inline] normalizeInlines (Str x : ys) = case concat (x : map fromStr strs) of "" -> rest n -> Str n : rest where (strs, rest) = span isStr $ normalizeInlines ys isStr (Str _) = True isStr _ = False fromStr (Str z) = z fromStr _ = error "normalizeInlines - fromStr - not a Str" normalizeInlines (Space : SoftBreak : ys) = SoftBreak : normalizeInlines ys normalizeInlines (Space : ys) = if null rest then [] else Space : rest where isSp Space = True isSp _ = False rest = dropWhile isSp $ normalizeInlines ys normalizeInlines (Emph xs : zs) = case normalizeInlines zs of (Emph ys : rest) -> normalizeInlines $ Emph (normalizeInlines $ xs ++ ys) : rest rest -> case normalizeInlines xs of [] -> rest xs' -> Emph xs' : rest normalizeInlines (Strong xs : zs) = case normalizeInlines zs of (Strong ys : rest) -> normalizeInlines $ Strong (normalizeInlines $ xs ++ ys) : rest rest -> case normalizeInlines xs of [] -> rest xs' -> Strong xs' : rest normalizeInlines (Subscript xs : zs) = case normalizeInlines zs of (Subscript ys : rest) -> normalizeInlines $ Subscript (normalizeInlines $ xs ++ ys) : rest rest -> case normalizeInlines xs of [] -> rest xs' -> Subscript xs' : rest normalizeInlines (Superscript xs : zs) = case normalizeInlines zs of (Superscript ys : rest) -> normalizeInlines $ Superscript (normalizeInlines $ xs ++ ys) : rest rest -> case normalizeInlines xs of [] -> rest xs' -> Superscript xs' : rest normalizeInlines (SmallCaps xs : zs) = case normalizeInlines zs of (SmallCaps ys : rest) -> normalizeInlines $ SmallCaps (normalizeInlines $ xs ++ ys) : rest rest -> case normalizeInlines xs of [] -> rest xs' -> SmallCaps xs' : rest normalizeInlines (Strikeout xs : zs) = case normalizeInlines zs of (Strikeout ys : rest) -> normalizeInlines $ Strikeout (normalizeInlines $ xs ++ ys) : rest rest -> case normalizeInlines xs of [] -> rest xs' -> Strikeout xs' : rest normalizeInlines (RawInline _ [] : ys) = normalizeInlines ys normalizeInlines (RawInline f xs : zs) = case normalizeInlines zs of (RawInline f' ys : rest) | f == f' -> normalizeInlines $ RawInline f (xs ++ ys) : rest rest -> RawInline f xs : rest normalizeInlines (Code _ "" : ys) = normalizeInlines ys normalizeInlines (Code attr xs : zs) = case normalizeInlines zs of (Code attr' ys : rest) | attr == attr' -> normalizeInlines $ Code attr (xs ++ ys) : rest rest -> Code attr xs : rest -- allow empty spans, they may carry identifiers etc. -- normalizeInlines (Span _ [] : ys) = normalizeInlines ys normalizeInlines (Span attr xs : zs) = case normalizeInlines zs of (Span attr' ys : rest) | attr == attr' -> normalizeInlines $ Span attr (normalizeInlines $ xs ++ ys) : rest rest -> Span attr (normalizeInlines xs) : rest normalizeInlines (Note bs : ys) = Note (normalizeBlocks bs) : normalizeInlines ys normalizeInlines (Quoted qt ils : ys) = Quoted qt (normalizeInlines ils) : normalizeInlines ys normalizeInlines (Link attr ils t : ys) = Link attr (normalizeInlines ils) t : normalizeInlines ys normalizeInlines (Image attr ils t : ys) = Image attr (normalizeInlines ils) t : normalizeInlines ys normalizeInlines (Cite cs ils : ys) = Cite cs (normalizeInlines ils) : normalizeInlines ys normalizeInlines (x : xs) = x : normalizeInlines xs normalizeInlines [] = [] -- | Extract inlines, removing formatting. removeFormatting :: Walkable Inline a => a -> [Inline] removeFormatting = query go . walk deNote where go :: Inline -> [Inline] go (Str xs) = [Str xs] go Space = [Space] go SoftBreak = [SoftBreak] go (Code _ x) = [Str x] go (Math _ x) = [Str x] go LineBreak = [Space] go _ = [] deNote (Note _) = Str "" deNote x = x -- | Convert pandoc structure to a string with formatting removed. -- Footnotes are skipped (since we don't want their contents in link -- labels). stringify :: Walkable Inline a => a -> String stringify = query go . walk deNote where go :: Inline -> [Char] go Space = " " go SoftBreak = " " go (Str x) = x go (Code _ x) = x go (Math _ x) = x go (RawInline (Format "html") ('<':'b':'r':_)) = " " -- see #2105 go LineBreak = " " go _ = "" deNote (Note _) = Str "" deNote x = x -- | Bring all regular text in a pandoc structure to uppercase. -- -- This function correctly handles cases where a lowercase character doesn't -- match to a single uppercase character – e.g. “Straße” would be converted -- to “STRASSE”, not “STRAßE”. capitalize :: Walkable Inline a => a -> a capitalize = walk go where go :: Inline -> Inline go (Str s) = Str (T.unpack $ T.toUpper $ T.pack s) go x = x -- | Change final list item from @Para@ to @Plain@ if the list contains -- no other @Para@ blocks. compactify :: [[Block]] -- ^ List of list items (each a list of blocks) -> [[Block]] compactify [] = [] compactify items = case (init items, last items) of (_,[]) -> items (others, final) -> case last final of Para a -> case (filter isPara $ concat items) of -- if this is only Para, change to Plain [_] -> others ++ [init final ++ [Plain a]] _ -> items _ -> items -- | Change final list item from @Para@ to @Plain@ if the list contains -- no other @Para@ blocks. Like compactify, but operates on @Blocks@ rather -- than @[Block]@. compactify' :: [Blocks] -- ^ List of list items (each a list of blocks) -> [Blocks] compactify' [] = [] compactify' items = let (others, final) = (init items, last items) in case reverse (B.toList final) of (Para a:xs) -> case [Para x | Para x <- concatMap B.toList items] of -- if this is only Para, change to Plain [_] -> others ++ [B.fromList (reverse $ Plain a : xs)] _ -> items _ -> items -- | Like @compactify'@, but acts on items of definition lists. compactify'DL :: [(Inlines, [Blocks])] -> [(Inlines, [Blocks])] compactify'DL items = let defs = concatMap snd items in case reverse (concatMap B.toList defs) of (Para x:xs) | not (any isPara xs) -> let (t,ds) = last items lastDef = B.toList $ last ds ds' = init ds ++ if null lastDef then [B.fromList lastDef] else [B.fromList $ init lastDef ++ [Plain x]] in init items ++ [(t, ds')] | otherwise -> items _ -> items -- | Combine a list of lines by adding hard linebreaks. combineLines :: [[Inline]] -> [Inline] combineLines = intercalate [LineBreak] -- | Convert a list of lines into a paragraph with hard line breaks. This is -- useful e.g. for rudimentary support of LineBlock elements in writers. linesToPara :: [[Inline]] -> Block linesToPara = Para . combineLines isPara :: Block -> Bool isPara (Para _) = True isPara _ = False -- | Data structure for defining hierarchical Pandoc documents data Element = Blk Block | Sec Int [Int] Attr [Inline] [Element] -- lvl num attributes label contents deriving (Eq, Read, Show, Typeable, Data) instance Walkable Inline Element where walk f (Blk x) = Blk (walk f x) walk f (Sec lev nums attr ils elts) = Sec lev nums attr (walk f ils) (walk f elts) walkM f (Blk x) = Blk `fmap` walkM f x walkM f (Sec lev nums attr ils elts) = do ils' <- walkM f ils elts' <- walkM f elts return $ Sec lev nums attr ils' elts' query f (Blk x) = query f x query f (Sec _ _ _ ils elts) = query f ils <> query f elts instance Walkable Block Element where walk f (Blk x) = Blk (walk f x) walk f (Sec lev nums attr ils elts) = Sec lev nums attr (walk f ils) (walk f elts) walkM f (Blk x) = Blk `fmap` walkM f x walkM f (Sec lev nums attr ils elts) = do ils' <- walkM f ils elts' <- walkM f elts return $ Sec lev nums attr ils' elts' query f (Blk x) = query f x query f (Sec _ _ _ ils elts) = query f ils <> query f elts -- | Convert Pandoc inline list to plain text identifier. HTML -- identifiers must start with a letter, and may contain only -- letters, digits, and the characters _-. inlineListToIdentifier :: [Inline] -> String inlineListToIdentifier = dropWhile (not . isAlpha) . intercalate "-" . words . map (nbspToSp . toLower) . filter (\c -> isLetter c || isDigit c || c `elem` "_-. ") . stringify where nbspToSp '\160' = ' ' nbspToSp x = x -- | Convert list of Pandoc blocks into (hierarchical) list of Elements hierarchicalize :: [Block] -> [Element] hierarchicalize blocks = S.evalState (hierarchicalizeWithIds blocks) [] hierarchicalizeWithIds :: [Block] -> S.State [Int] [Element] hierarchicalizeWithIds [] = return [] hierarchicalizeWithIds ((Header level attr@(_,classes,_) title'):xs) = do lastnum <- S.get let lastnum' = take level lastnum let newnum = case length lastnum' of x | "unnumbered" `elem` classes -> [] | x >= level -> init lastnum' ++ [last lastnum' + 1] | otherwise -> lastnum ++ replicate (level - length lastnum - 1) 0 ++ [1] unless (null newnum) $ S.put newnum let (sectionContents, rest) = break (headerLtEq level) xs sectionContents' <- hierarchicalizeWithIds sectionContents rest' <- hierarchicalizeWithIds rest return $ Sec level newnum attr title' sectionContents' : rest' hierarchicalizeWithIds ((Div ("",["references"],[]) (Header level (ident,classes,kvs) title' : xs)):ys) = hierarchicalizeWithIds ((Header level (ident,("references":classes),kvs) title') : (xs ++ ys)) hierarchicalizeWithIds (x:rest) = do rest' <- hierarchicalizeWithIds rest return $ (Blk x) : rest' headerLtEq :: Int -> Block -> Bool headerLtEq level (Header l _ _) = l <= level headerLtEq level (Div ("",["references"],[]) (Header l _ _ : _)) = l <= level headerLtEq _ _ = False -- | Generate a unique identifier from a list of inlines. -- Second argument is a list of already used identifiers. uniqueIdent :: [Inline] -> Set.Set String -> String uniqueIdent title' usedIdents = let baseIdent = case inlineListToIdentifier title' of "" -> "section" x -> x numIdent n = baseIdent ++ "-" ++ show n in if baseIdent `Set.member` usedIdents then case find (\x -> not $ numIdent x `Set.member` usedIdents) ([1..60000] :: [Int]) of Just x -> numIdent x Nothing -> baseIdent -- if we have more than 60,000, allow repeats else baseIdent -- | True if block is a Header block. isHeaderBlock :: Block -> Bool isHeaderBlock (Header _ _ _) = True isHeaderBlock _ = False -- | Shift header levels up or down. headerShift :: Int -> Pandoc -> Pandoc headerShift n = walk shift where shift :: Block -> Block shift (Header level attr inner) = Header (level + n) attr inner shift x = x -- | Detect if a list is tight. isTightList :: [[Block]] -> Bool isTightList = all firstIsPlain where firstIsPlain (Plain _ : _) = True firstIsPlain _ = False -- | Set a field of a 'Meta' object. If the field already has a value, -- convert it into a list with the new value appended to the old value(s). addMetaField :: ToMetaValue a => String -> a -> Meta -> Meta addMetaField key val (Meta meta) = Meta $ M.insertWith combine key (toMetaValue val) meta where combine newval (MetaList xs) = MetaList (xs ++ tolist newval) combine newval x = MetaList [x, newval] tolist (MetaList ys) = ys tolist y = [y] -- | Create 'Meta' from old-style title, authors, date. This is -- provided to ease the transition from the old API. makeMeta :: [Inline] -> [[Inline]] -> [Inline] -> Meta makeMeta title authors date = addMetaField "title" (B.fromList title) $ addMetaField "author" (map B.fromList authors) $ addMetaField "date" (B.fromList date) $ nullMeta -- -- TagSoup HTML handling -- -- | Render HTML tags. renderTags' :: [Tag String] -> String renderTags' = renderTagsOptions renderOptions{ optMinimize = matchTags ["hr", "br", "img", "meta", "link"] , optRawTag = matchTags ["script", "style"] } where matchTags = \tags -> flip elem tags . map toLower -- -- File handling -- -- | Perform an IO action in a directory, returning to starting directory. inDirectory :: FilePath -> IO a -> IO a inDirectory path action = E.bracket getCurrentDirectory setCurrentDirectory (const $ setCurrentDirectory path >> action) getDefaultReferenceDocx :: Maybe FilePath -> IO Archive getDefaultReferenceDocx datadir = do let paths = ["[Content_Types].xml", "_rels/.rels", "docProps/app.xml", "docProps/core.xml", "word/document.xml", "word/fontTable.xml", "word/footnotes.xml", "word/numbering.xml", "word/settings.xml", "word/webSettings.xml", "word/styles.xml", "word/_rels/document.xml.rels", "word/_rels/footnotes.xml.rels", "word/theme/theme1.xml"] let toLazy = fromChunks . (:[]) let pathToEntry path = do epochtime <- (floor . utcTimeToPOSIXSeconds) <$> getCurrentTime contents <- toLazy <$> readDataFile datadir ("docx/" ++ path) return $ toEntry path epochtime contents mbArchive <- case datadir of Nothing -> return Nothing Just d -> do exists <- doesFileExist (d "reference.docx") if exists then return (Just (d "reference.docx")) else return Nothing case mbArchive of Just arch -> toArchive <$> BL.readFile arch Nothing -> foldr addEntryToArchive emptyArchive <$> mapM pathToEntry paths getDefaultReferenceODT :: Maybe FilePath -> IO Archive getDefaultReferenceODT datadir = do let paths = ["mimetype", "manifest.rdf", "styles.xml", "content.xml", "meta.xml", "settings.xml", "Configurations2/accelerator/current.xml", "Thumbnails/thumbnail.png", "META-INF/manifest.xml"] let pathToEntry path = do epochtime <- floor `fmap` getPOSIXTime contents <- (fromChunks . (:[])) `fmap` readDataFile datadir ("odt/" ++ path) return $ toEntry path epochtime contents mbArchive <- case datadir of Nothing -> return Nothing Just d -> do exists <- doesFileExist (d "reference.odt") if exists then return (Just (d "reference.odt")) else return Nothing case mbArchive of Just arch -> toArchive <$> BL.readFile arch Nothing -> foldr addEntryToArchive emptyArchive <$> mapM pathToEntry paths readDefaultDataFile :: FilePath -> IO BS.ByteString readDefaultDataFile "reference.docx" = (BS.concat . toChunks . fromArchive) <$> getDefaultReferenceDocx Nothing readDefaultDataFile "reference.odt" = (BS.concat . toChunks . fromArchive) <$> getDefaultReferenceODT Nothing readDefaultDataFile fname = #ifdef EMBED_DATA_FILES case lookup (makeCanonical fname) dataFiles of Nothing -> err 97 $ "Could not find data file " ++ fname Just contents -> return contents where makeCanonical = Posix.joinPath . transformPathParts . splitDirectories transformPathParts = reverse . foldl go [] go as "." = as go (_:as) ".." = as go as x = x : as #else getDataFileName fname' >>= checkExistence >>= BS.readFile where fname' = if fname == "MANUAL.txt" then fname else "data" fname checkExistence :: FilePath -> IO FilePath checkExistence fn = do exists <- doesFileExist fn if exists then return fn else err 97 ("Could not find data file " ++ fn) #endif -- | Read file from specified user data directory or, if not found there, from -- Cabal data directory. readDataFile :: Maybe FilePath -> FilePath -> IO BS.ByteString readDataFile Nothing fname = readDefaultDataFile fname readDataFile (Just userDir) fname = do exists <- doesFileExist (userDir fname) if exists then BS.readFile (userDir fname) else readDefaultDataFile fname -- | Same as 'readDataFile' but returns a String instead of a ByteString. readDataFileUTF8 :: Maybe FilePath -> FilePath -> IO String readDataFileUTF8 userDir fname = UTF8.toString `fmap` readDataFile userDir fname -- | Specialized version of parseURIReference that disallows -- single-letter schemes. Reason: these are usually windows absolute -- paths. parseURIReference' :: String -> Maybe URI parseURIReference' s = case parseURIReference s of Just u | length (uriScheme u) > 2 -> Just u | null (uriScheme u) -> Just u -- protocol-relative _ -> Nothing -- | Fetch an image or other item from the local filesystem or the net. -- Returns raw content and maybe mime type. fetchItem :: Maybe String -> String -> IO (Either E.SomeException (BS.ByteString, Maybe MimeType)) fetchItem sourceURL s = case (sourceURL >>= parseURIReference' . ensureEscaped, ensureEscaped s) of (Just u, s') -> -- try fetching from relative path at source case parseURIReference' s' of Just u' -> openURL $ show $ u' `nonStrictRelativeTo` u Nothing -> openURL s' -- will throw error (Nothing, s') -> case parseURI s' of -- requires absolute URI -- We don't want to treat C:/ as a scheme: Just u' | length (uriScheme u') > 2 -> openURL (show u') Just u' | uriScheme u' == "file:" -> E.try $ readLocalFile $ dropWhile (=='/') (uriPath u') _ -> E.try $ readLocalFile fp -- get from local file system where readLocalFile f = do cont <- BS.readFile f return (cont, mime) dropFragmentAndQuery = takeWhile (\c -> c /= '?' && c /= '#') fp = unEscapeString $ dropFragmentAndQuery s mime = case takeExtension fp of ".gz" -> getMimeType $ dropExtension fp ".svgz" -> getMimeType $ dropExtension fp ++ ".svg" x -> getMimeType x ensureEscaped = escapeURIString isAllowedInURI . map convertSlash convertSlash '\\' = '/' convertSlash x = x -- | Like 'fetchItem', but also looks for items in a 'MediaBag'. fetchItem' :: MediaBag -> Maybe String -> String -> IO (Either E.SomeException (BS.ByteString, Maybe MimeType)) fetchItem' media sourceURL s = do case lookupMedia s media of Nothing -> fetchItem sourceURL s Just (mime, bs) -> return $ Right (BS.concat $ toChunks bs, Just mime) -- | Read from a URL and return raw data and maybe mime type. openURL :: String -> IO (Either E.SomeException (BS.ByteString, Maybe MimeType)) openURL u | Just u'' <- stripPrefix "data:" u = let mime = takeWhile (/=',') u'' contents = B8.pack $ unEscapeString $ drop 1 $ dropWhile (/=',') u'' in return $ Right (decodeLenient contents, Just mime) #ifdef HTTP_CLIENT | otherwise = withSocketsDo $ E.try $ do let parseReq = parseRequest (proxy :: Either E.SomeException String) <- E.try $ getEnv "http_proxy" req <- parseReq u req' <- case proxy of Left _ -> return req Right pr -> (parseReq pr >>= \r -> return $ addProxy (host r) (port r) req) `mplus` return req resp <- newManager tlsManagerSettings >>= httpLbs req' return (BS.concat $ toChunks $ responseBody resp, UTF8.toString `fmap` lookup hContentType (responseHeaders resp)) #else | otherwise = E.try $ getBodyAndMimeType `fmap` browse (do liftIO $ UTF8.hPutStrLn stderr $ "Fetching " ++ u ++ "..." setOutHandler $ const (return ()) setAllowRedirects True request (getRequest' u')) where getBodyAndMimeType (_, r) = (rspBody r, findHeader HdrContentType r) getRequest' uriString = case parseURI uriString of Nothing -> error ("Not a valid URL: " ++ uriString) Just v -> mkRequest GET v u' = escapeURIString (/= '|') u -- pipes are rejected by Network.URI #endif -- -- Error reporting -- err :: Int -> String -> IO a err exitCode msg = do name <- getProgName UTF8.hPutStrLn stderr $ name ++ ": " ++ msg exitWith $ ExitFailure exitCode return undefined warn :: MonadIO m => String -> m () warn msg = liftIO $ do name <- getProgName UTF8.hPutStrLn stderr $ "[" ++ name ++ " warning] " ++ msg mapLeft :: (a -> b) -> Either a c -> Either b c mapLeft f (Left x) = Left (f x) mapLeft _ (Right x) = Right x hush :: Either a b -> Maybe b hush (Left _) = Nothing hush (Right x) = Just x -- | Remove intermediate "." and ".." directories from a path. -- -- > collapseFilePath "./foo" == "foo" -- > collapseFilePath "/bar/../baz" == "/baz" -- > collapseFilePath "/../baz" == "/../baz" -- > collapseFilePath "parent/foo/baz/../bar" == "parent/foo/bar" -- > collapseFilePath "parent/foo/baz/../../bar" == "parent/bar" -- > collapseFilePath "parent/foo/.." == "parent" -- > collapseFilePath "/parent/foo/../../bar" == "/bar" collapseFilePath :: FilePath -> FilePath collapseFilePath = Posix.joinPath . reverse . foldl go [] . splitDirectories where go rs "." = rs go r@(p:rs) ".." = case p of ".." -> ("..":r) (checkPathSeperator -> Just True) -> ("..":r) _ -> rs go _ (checkPathSeperator -> Just True) = [[Posix.pathSeparator]] go rs x = x:rs isSingleton [] = Nothing isSingleton [x] = Just x isSingleton _ = Nothing checkPathSeperator = fmap isPathSeparator . isSingleton -- -- File selection from the archive -- filteredFilesFromArchive :: Archive -> (FilePath -> Bool) -> [(FilePath, BL.ByteString)] filteredFilesFromArchive zf f = mapMaybe (fileAndBinary zf) (filter f (filesInArchive zf)) where fileAndBinary :: Archive -> FilePath -> Maybe (FilePath, BL.ByteString) fileAndBinary a fp = findEntryByPath fp a >>= \e -> Just (fp, fromEntry e) --- --- Squash blocks into inlines --- blockToInlines :: Block -> [Inline] blockToInlines (Plain ils) = ils blockToInlines (Para ils) = ils blockToInlines (LineBlock lns) = combineLines lns blockToInlines (CodeBlock attr str) = [Code attr str] blockToInlines (RawBlock fmt str) = [RawInline fmt str] blockToInlines (BlockQuote blks) = blocksToInlines blks blockToInlines (OrderedList _ blkslst) = concatMap blocksToInlines blkslst blockToInlines (BulletList blkslst) = concatMap blocksToInlines blkslst blockToInlines (DefinitionList pairslst) = concatMap f pairslst where f (ils, blkslst) = ils ++ [Str ":", Space] ++ (concatMap blocksToInlines blkslst) blockToInlines (Header _ _ ils) = ils blockToInlines (HorizontalRule) = [] blockToInlines (Table _ _ _ headers rows) = intercalate [LineBreak] $ map (concatMap blocksToInlines) tbl where tbl = headers : rows blockToInlines (Div _ blks) = blocksToInlines blks blockToInlines Null = [] blocksToInlinesWithSep :: [Inline] -> [Block] -> [Inline] blocksToInlinesWithSep sep blks = intercalate sep $ map blockToInlines blks blocksToInlines :: [Block] -> [Inline] blocksToInlines = blocksToInlinesWithSep [Space, Str "¶", Space] -- -- Safe read -- safeRead :: (MonadPlus m, Read a) => String -> m a safeRead s = case reads s of (d,x):_ | all isSpace x -> return d _ -> mzero -- -- Temp directory -- withTempDir :: String -> (FilePath -> IO a) -> IO a withTempDir = #ifdef _WINDOWS withTempDirectory "." #else withSystemTempDirectory #endif