diff options
Diffstat (limited to 'src/Text/Pandoc/Shared.hs')
-rw-r--r-- | src/Text/Pandoc/Shared.hs | 87 |
1 files changed, 71 insertions, 16 deletions
diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs index 13eab9bdb..82ae08601 100644 --- a/src/Text/Pandoc/Shared.hs +++ b/src/Text/Pandoc/Shared.hs @@ -1,4 +1,4 @@ -{-# LANGUAGE CPP #-} +{-# LANGUAGE CPP, DeriveDataTypeable #-} {- Copyright (C) 2006-8 John MacFarlane <jgm@berkeley.edu> @@ -112,13 +112,16 @@ import Text.ParserCombinators.Parsec import Text.PrettyPrint.HughesPJ ( Doc, fsep, ($$), (<>), empty, isEmpty, text, nest ) import qualified Text.PrettyPrint.HughesPJ as PP import Text.Pandoc.CharacterReferences ( characterReference ) -import Data.Char ( toLower, toUpper, ord, isLower, isUpper ) +import Data.Char ( toLower, toUpper, ord, isLower, isUpper, isAlpha, + isPunctuation ) import Data.List ( find, isPrefixOf, intercalate ) -import Control.Monad ( join ) import Network.URI ( parseURI, URI (..), isAllowedInURI ) import System.Directory import Prelude hiding ( putStrLn, writeFile, readFile, getContents ) import System.IO.UTF8 +import Data.Generics +import qualified Control.Monad.State as S +import Control.Monad (join) -- -- List processing @@ -878,22 +881,74 @@ endsWithPlain blocks = -- | Data structure for defining hierarchical Pandoc documents data Element = Blk Block - | Sec [Inline] [Element] deriving (Eq, Read, Show) - --- | Returns @True@ on Header block with at least the specified level -headerAtLeast :: Int -> Block -> Bool -headerAtLeast level (Header x _) = x <= level -headerAtLeast _ _ = False + | Sec Int String [Inline] [Element] + -- lvl ident label contents + deriving (Eq, Read, Show, Typeable, Data) + +-- | Convert Pandoc inline list to plain text identifier. +inlineListToIdentifier :: [Inline] -> String +inlineListToIdentifier = dropWhile (not . isAlpha) . inlineListToIdentifier' + +inlineListToIdentifier' :: [Inline] -> [Char] +inlineListToIdentifier' [] = "" +inlineListToIdentifier' (x:xs) = + xAsText ++ inlineListToIdentifier' xs + where xAsText = case x of + Str s -> filter (\c -> c == '-' || not (isPunctuation c)) $ + intercalate "-" $ words $ map toLower s + Emph lst -> inlineListToIdentifier' lst + Strikeout lst -> inlineListToIdentifier' lst + Superscript lst -> inlineListToIdentifier' lst + SmallCaps lst -> inlineListToIdentifier' lst + Subscript lst -> inlineListToIdentifier' lst + Strong lst -> inlineListToIdentifier' lst + Quoted _ lst -> inlineListToIdentifier' lst + Cite _ lst -> inlineListToIdentifier' lst + Code s -> s + Space -> "-" + EmDash -> "-" + EnDash -> "-" + Apostrophe -> "" + Ellipses -> "" + LineBreak -> "-" + Math _ _ -> "" + TeX _ -> "" + HtmlInline _ -> "" + Link lst _ -> inlineListToIdentifier' lst + Image lst _ -> inlineListToIdentifier' lst + Note _ -> "" -- | Convert list of Pandoc blocks into (hierarchical) list of Elements hierarchicalize :: [Block] -> [Element] -hierarchicalize [] = [] -hierarchicalize (block:rest) = - case block of - (Header level title) -> - let (thisSection, rest') = break (headerAtLeast level) rest - in (Sec title (hierarchicalize thisSection)):(hierarchicalize rest') - x -> (Blk x):(hierarchicalize rest) +hierarchicalize blocks = S.evalState (hierarchicalizeWithIds blocks) [] + +hierarchicalizeWithIds :: [Block] -> S.State [String] [Element] +hierarchicalizeWithIds [] = return [] +hierarchicalizeWithIds ((Header level title'):xs) = do + usedIdents <- S.get + let ident = uniqueIdent title' usedIdents + S.modify (ident :) + let (sectionContents, rest) = break (headerLtEq level) xs + sectionContents' <- hierarchicalizeWithIds sectionContents + rest' <- hierarchicalizeWithIds rest + return $ Sec level ident title' sectionContents' : rest' +hierarchicalizeWithIds (x:rest) = do + rest' <- hierarchicalizeWithIds rest + return $ (Blk x) : rest' + +headerLtEq :: Int -> Block -> Bool +headerLtEq level (Header l _) = l <= level +headerLtEq _ _ = False + +uniqueIdent :: [Inline] -> [String] -> String +uniqueIdent title' usedIdents = + let baseIdent = inlineListToIdentifier title' + numIdent n = baseIdent ++ "-" ++ show n + in if baseIdent `elem` usedIdents + then case find (\x -> numIdent x `notElem` usedIdents) ([1..60000] :: [Int]) of + Just x -> numIdent x + Nothing -> baseIdent -- if we have more than 60,000, allow repeats + else baseIdent -- | True if block is a Header block. isHeaderBlock :: Block -> Bool |