summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/Text/Pandoc/Readers/Markdown.hs167
-rw-r--r--src/Text/Pandoc/Shared.hs56
-rw-r--r--src/Text/Pandoc/Writers/HTML.hs46
3 files changed, 123 insertions, 146 deletions
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index 7168cee4d..ded9f2136 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -19,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
{- |
Module : Text.Pandoc.Readers.Markdown
Copyright : Copyright (C) 2006-7 John MacFarlane
- License : GNU GPL, version 2 or above
+ License : GNU GPL, version 2 or above
Maintainer : John MacFarlane <jgm@berkeley.edu>
Stability : alpha
@@ -27,8 +27,8 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
Conversion of markdown-formatted plain text to 'Pandoc' document.
-}
-module Text.Pandoc.Readers.Markdown (
- readMarkdown
+module Text.Pandoc.Readers.Markdown (
+ readMarkdown
) where
import Data.List ( transpose, isPrefixOf, isSuffixOf, lookup, sortBy, findIndex )
@@ -37,9 +37,9 @@ import Data.Char ( isAlphaNum )
import Data.Maybe ( fromMaybe )
import Network.URI ( isURI )
import Text.Pandoc.Definition
-import Text.Pandoc.Shared
+import Text.Pandoc.Shared
import Text.Pandoc.Readers.LaTeX ( rawLaTeXInline, rawLaTeXEnvironment )
-import Text.Pandoc.Readers.HTML ( rawHtmlBlock, anyHtmlBlockTag,
+import Text.Pandoc.Readers.HTML ( rawHtmlBlock, anyHtmlBlockTag,
anyHtmlInlineTag, anyHtmlTag,
anyHtmlEndTag, htmlEndTag, extractTagType,
htmlBlockElement )
@@ -69,14 +69,14 @@ specialChars = "\\[]*_~`<>$!^-.&'\"\8216\8217\8220\8221"
indentSpaces = try $ do
state <- getState
let tabStop = stateTabStop state
- try (count tabStop (char ' ')) <|>
+ try (count tabStop (char ' ')) <|>
(many (char ' ') >> string "\t") <?> "indentation"
nonindentSpaces = do
state <- getState
let tabStop = stateTabStop state
sps <- many (char ' ')
- if length sps < tabStop
+ if length sps < tabStop
then return sps
else unexpected "indented line"
@@ -91,8 +91,8 @@ failUnlessSmart = do
if stateSmart state then return () else fail "Smart typography feature"
-- | Parse an inline Str element with a given content.
-inlineString str = try $ do
- (Str res) <- inline
+inlineString str = try $ do
+ (Str res) <- inline
if res == str then return res else fail $ "unexpected Str content"
-- | Parse a sequence of inline elements between a string
@@ -103,9 +103,9 @@ inlinesInBalanced opener closer = try $ do
string opener
result <- manyTill ( (do lookAhead (inlineString opener)
-- because it might be a link...
- bal <- inlinesInBalanced opener closer
+ bal <- inlinesInBalanced opener closer
return $ [Str opener] ++ bal ++ [Str closer])
- <|> (count 1 inline))
+ <|> (count 1 inline))
(try (string closer))
return $ concat result
@@ -115,7 +115,7 @@ inlinesInBalanced opener closer = try $ do
titleLine = try $ char '%' >> skipSpaces >> manyTill inline newline
-authorsLine = try $ do
+authorsLine = try $ do
char '%'
skipSpaces
authors <- sepEndBy (many1 (noneOf ",;\n")) (oneOf ",;")
@@ -143,24 +143,15 @@ parseMarkdown = do
startPos <- getPosition
-- go through once just to get list of reference keys
-- docMinusKeys is the raw document with blanks where the keys were...
- docMinusKeys <- manyTill (referenceKey <|> headerReference <|>
- lineClump) eof >>= return . concat
+ docMinusKeys <- manyTill (referenceKey <|> lineClump) eof >>=
+ return . concat
setInput docMinusKeys
setPosition startPos
st <- getState
- -- get headers and construct implicit references unless strict
- if stateStrict st
- then return ()
- else do let oldkeys = stateKeys st
- let headers = reverse $ stateHeaders st
- let idents = uniqueIdentifiers headers
- let implicitRefs = zipWith (\hd ident -> (hd, ("#" ++ ident, "")))
- headers idents
- updateState $ \st -> st { stateKeys = oldkeys ++ implicitRefs }
-- go through again for notes unless strict...
if stateStrict st
then return ()
- else do docMinusNotes <- manyTill (noteBlock <|> lineClump) eof >>=
+ else do docMinusNotes <- manyTill (noteBlock <|> lineClump) eof >>=
return . concat
st <- getState
let reversedNotes = stateNotes st
@@ -169,10 +160,10 @@ parseMarkdown = do
setPosition startPos
-- now parse it for real...
(title, author, date) <- option ([],[],"") titleBlock
- blocks <- parseBlocks
+ blocks <- parseBlocks
return $ Pandoc (Meta title author date) $ filter (/= Null) blocks
---
+--
-- initial pass for references and notes
--
@@ -195,17 +186,7 @@ referenceKey = try $ do
-- return blanks so line count isn't affected
return $ replicate (sourceLine endPos - sourceLine startPos) '\n'
-headerReference = try $ do
- failIfStrict
- startPos <- getPosition
- (Header level text) <- lookAhead $ atxHeader <|> setextHeader
- st <- getState
- let headers = stateHeaders st
- updateState $ \st -> st { stateHeaders = text:headers }
- endPos <- getPosition
- lineClump -- return the raw header, because we need to parse it later
-
-referenceTitle = try $ do
+referenceTitle = try $ do
(many1 spaceChar >> option '\n' newline) <|> newline
skipSpaces
tit <- (charsInBalanced '(' ')' >>= return . unwords . words)
@@ -220,7 +201,7 @@ rawLine = do
notFollowedBy blankline
notFollowedBy' noteMarker
contents <- many1 nonEndline
- end <- option "" (newline >> optional indentSpaces >> return "\n")
+ end <- option "" (newline >> optional indentSpaces >> return "\n")
return $ contents ++ end
rawLines = many1 rawLine >>= return . concat
@@ -249,7 +230,7 @@ noteBlock = try $ do
parseBlocks = manyTill block eof
-block = choice [ header
+block = choice [ header
, table
, codeBlock
, hrule
@@ -304,7 +285,7 @@ hrule = try $ do
indentedLine = indentSpaces >> manyTill anyChar newline >>= return . (++ "\n")
codeBlock = do
- contents <- many1 (indentedLine <|>
+ contents <- many1 (indentedLine <|>
try (do b <- blanklines
l <- indentedLine
return $ b ++ l))
@@ -319,7 +300,7 @@ emacsBoxQuote = try $ do
failIfStrict
string ",----"
manyTill anyChar newline
- raw <- manyTill
+ raw <- manyTill
(try (char '|' >> optional (char ' ') >> manyTill anyChar newline))
(try (string "`----"))
blanklines
@@ -329,7 +310,7 @@ emailBlockQuoteStart = try $ nonindentSpaces >> char '>' >>~ optional (char ' ')
emailBlockQuote = try $ do
emailBlockQuoteStart
- raw <- sepBy (many (nonEndline <|>
+ raw <- sepBy (many (nonEndline <|>
(try (endline >> notFollowedBy emailBlockQuoteStart >>
return '\n'))))
(try (newline >> emailBlockQuoteStart))
@@ -337,12 +318,12 @@ emailBlockQuote = try $ do
optional blanklines
return raw
-blockQuote = do
+blockQuote = do
raw <- emailBlockQuote <|> emacsBoxQuote
-- parse the extracted block, which may contain various block elements:
contents <- parseFromString parseBlocks $ (joinWithSep "\n" raw) ++ "\n\n"
return $ BlockQuote contents
-
+
--
-- list blocks
--
@@ -377,7 +358,7 @@ orderedListStart style delim = try $ do
then do many1 digit
char '.'
return 1
- else orderedListMarker style delim
+ else orderedListMarker style delim
if delim == Period && (style == UpperAlpha || (style == UpperRoman &&
num `elem` [1, 5, 10, 50, 100, 500, 1000]))
then char '\t' <|> (spaceChar >> spaceChar)
@@ -401,7 +382,7 @@ rawListItem start = try $ do
blanks <- many blankline
return $ concat result ++ blanks
--- continuation of a list item - indented and separated by blankline
+-- continuation of a list item - indented and separated by blankline
-- or (in compact lists) endline.
-- note: nested lists are parsed as continuations
listContinuation start = try $ do
@@ -417,7 +398,7 @@ listContinuationLine start = try $ do
result <- manyTill anyChar newline
return $ result ++ "\n"
-listItem start = try $ do
+listItem start = try $ do
first <- rawListItem start
continuations <- many (listContinuation start)
-- parsing with ListItemState forces markers at beginning of lines to
@@ -437,7 +418,7 @@ orderedList = try $ do
items <- many1 (listItem (orderedListStart style delim))
return $ OrderedList (start, style, delim) $ compactify items
-bulletList = many1 (listItem bulletListStart) >>=
+bulletList = many1 (listItem bulletListStart) >>=
return . BulletList . compactify
-- definition lists
@@ -478,7 +459,7 @@ definitionList = do
-- paragraph block
--
-para = try $ do
+para = try $ do
result <- many1 inline
newline
blanklines <|> do st <- getState
@@ -487,9 +468,9 @@ para = try $ do
else lookAhead emacsBoxQuote >> return ""
return $ Para $ normalizeSpaces result
-plain = many1 inline >>= return . Plain . normalizeSpaces
+plain = many1 inline >>= return . Plain . normalizeSpaces
---
+--
-- raw html
--
@@ -506,25 +487,25 @@ htmlBlock = do
else rawHtmlBlocks
-- True if tag is self-closing
-isSelfClosing tag =
+isSelfClosing tag =
isSuffixOf "/>" $ filter (not . (`elem` " \n\t")) tag
strictHtmlBlock = try $ do
- tag <- anyHtmlBlockTag
+ tag <- anyHtmlBlockTag
let tag' = extractTagType tag
- if isSelfClosing tag || tag' == "hr"
+ if isSelfClosing tag || tag' == "hr"
then return tag
- else do contents <- many (notFollowedBy' (htmlEndTag tag') >>
+ else do contents <- many (notFollowedBy' (htmlEndTag tag') >>
(htmlElement <|> (count 1 anyChar)))
end <- htmlEndTag tag'
return $ tag ++ concat contents ++ end
rawHtmlBlocks = do
- htmlBlocks <- many1 rawHtmlBlock
+ htmlBlocks <- many1 rawHtmlBlock
let combined = concatMap (\(RawHtml str) -> str) htmlBlocks
let combined' = if not (null combined) && last combined == '\n'
- then init combined -- strip extra newline
- else combined
+ then init combined -- strip extra newline
+ else combined
return $ RawHtml combined'
--
@@ -535,7 +516,7 @@ rawLaTeXEnvironment' = failIfStrict >> rawLaTeXEnvironment
--
-- Tables
---
+--
-- Parse a dashed line with optional trailing spaces; return its length
-- and the length including trailing space.
@@ -544,7 +525,7 @@ dashedLine ch = do
sp <- many spaceChar
return $ (length dashes, length $ dashes ++ sp)
--- Parse a table header with dashed lines of '-' preceded by
+-- Parse a table header with dashed lines of '-' preceded by
-- one line of text.
simpleTableHeader = try $ do
rawContent <- anyLine
@@ -567,7 +548,7 @@ tableSep = try $ nonindentSpaces >> many1 (dashedLine '-') >> string "\n"
rawTableLine indices = do
notFollowedBy' (blanklines <|> tableFooter)
line <- many1Till anyChar newline
- return $ map removeLeadingTrailingSpace $ tail $
+ return $ map removeLeadingTrailingSpace $ tail $
splitByIndices (init indices) line
-- Parse a table line and return a list of lists of blocks (columns).
@@ -584,8 +565,8 @@ multilineRow indices = do
widthsFromIndices :: Int -- Number of columns on terminal
-> [Int] -- Indices
-> [Float] -- Fractional relative sizes of columns
-widthsFromIndices _ [] = []
-widthsFromIndices numColumns indices =
+widthsFromIndices _ [] = []
+widthsFromIndices numColumns indices =
let lengths = zipWith (-) indices (0:indices)
totLength = sum lengths
quotient = if totLength > numColumns
@@ -624,14 +605,14 @@ simpleTable = tableWith simpleTableHeader tableLine blanklines
multilineTable = tableWith multilineTableHeader multilineRow tableFooter
multilineTableHeader = try $ do
- tableSep
+ tableSep
rawContent <- many1 (notFollowedBy' tableSep >> many1Till anyChar newline)
initSp <- nonindentSpaces
dashes <- many1 (dashedLine '-')
newline
let (lengths, lines) = unzip dashes
let indices = scanl (+) (length initSp) lines
- let rawHeadsList = transpose $ map
+ let rawHeadsList = transpose $ map
(\ln -> tail $ splitByIndices (init indices) ln)
rawContent
let rawHeads = map (joinWithSep " ") rawHeadsList
@@ -644,7 +625,7 @@ multilineTableHeader = try $ do
alignType :: [String] -> Int -> Alignment
alignType [] len = AlignDefault
alignType strLst len =
- let str = head $ sortBy (comparing length) $
+ let str = head $ sortBy (comparing length) $
map removeTrailingSpace strLst
leftSpace = if null str then False else (str !! 0) `elem` " \t"
rightSpace = length str < len || (str !! (len - 1)) `elem` " \t"
@@ -656,7 +637,7 @@ alignType strLst len =
table = failIfStrict >> (simpleTable <|> multilineTable) <?> "table"
---
+--
-- inline
--
@@ -686,7 +667,7 @@ inline = choice [ str
escapedChar = do
char '\\'
state <- getState
- result <- option '\\' $ if stateStrict state
+ result <- option '\\' $ if stateStrict state
then oneOf "\\`*_{}[]()>#+-.!~"
else satisfy (not . isAlphaNum)
return $ Str [result]
@@ -700,17 +681,17 @@ ltSign = do
specialCharsMinusLt = filter (/= '<') specialChars
-symbol = do
+symbol = do
result <- oneOf specialCharsMinusLt
return $ Str [result]
-- parses inline code, between n `s and n `s
-code = try $ do
+code = try $ do
starts <- many1 (char '`')
skipSpaces
result <- many1Till (many1 (noneOf "`\n") <|> many1 (char '`') <|>
- (char '\n' >> return " "))
- (try (skipSpaces >> count (length starts) (char '`') >>
+ (char '\n' >> return " "))
+ (try (skipSpaces >> count (length starts) (char '`') >>
notFollowedBy (char '`')))
return $ Code $ removeLeadingTrailingSpace $ concat result
@@ -726,30 +707,30 @@ math = try $ do
return $ TeX ("$" ++ (joinWithSep " " words) ++ "$")
emph = ((enclosed (char '*') (char '*') inline) <|>
- (enclosed (char '_') (char '_' >> notFollowedBy alphaNum) inline)) >>=
+ (enclosed (char '_') (char '_' >> notFollowedBy alphaNum) inline)) >>=
return . Emph . normalizeSpaces
-strong = ((enclosed (string "**") (try $ string "**") inline) <|>
+strong = ((enclosed (string "**") (try $ string "**") inline) <|>
(enclosed (string "__") (try $ string "__") inline)) >>=
return . Strong . normalizeSpaces
strikeout = failIfStrict >> enclosed (string "~~") (try $ string "~~") inline >>=
return . Strikeout . normalizeSpaces
-superscript = failIfStrict >> enclosed (char '^') (char '^')
+superscript = failIfStrict >> enclosed (char '^') (char '^')
(notFollowedBy' whitespace >> inline) >>= -- may not contain Space
return . Superscript
subscript = failIfStrict >> enclosed (char '~') (char '~')
(notFollowedBy' whitespace >> inline) >>= -- may not contain Space
- return . Subscript
+ return . Subscript
-smartPunctuation = failUnlessSmart >>
+smartPunctuation = failUnlessSmart >>
choice [ quoted, apostrophe, dash, ellipses ]
apostrophe = (char '\'' <|> char '\8217') >> return Apostrophe
-quoted = doubleQuoted <|> singleQuoted
+quoted = doubleQuoted <|> singleQuoted
withQuoteContext context parser = do
oldState <- getState
@@ -765,7 +746,7 @@ singleQuoted = try $ do
withQuoteContext InSingleQuote $ many1Till inline singleQuoteEnd >>=
return . Quoted SingleQuote . normalizeSpaces
-doubleQuoted = try $ do
+doubleQuoted = try $ do
doubleQuoteStart
withQuoteContext InDoubleQuote $ many1Till inline doubleQuoteEnd >>=
return . Quoted DoubleQuote . normalizeSpaces
@@ -776,13 +757,13 @@ failIfInQuoteContext context = do
then fail "already inside quotes"
else return ()
-singleQuoteStart = do
+singleQuoteStart = do
failIfInQuoteContext InSingleQuote
- char '\8216' <|>
- (try $ do char '\''
+ char '\8216' <|>
+ (try $ do char '\''
notFollowedBy (oneOf ")!],.;:-? \t\n")
notFollowedBy (try (oneOfStrings ["s","t","m","ve","ll","re"] >>
- satisfy (not . isAlphaNum)))
+ satisfy (not . isAlphaNum)))
-- possess/contraction
return '\'')
@@ -826,13 +807,13 @@ endline = try $ do
newline
notFollowedBy blankline
st <- getState
- if stateStrict st
+ if stateStrict st
then do notFollowedBy emailBlockQuoteStart
notFollowedBy (char '#') -- atx header
- else return ()
+ else return ()
-- parse potential list-starts differently if in a list:
if stateParserContext st == ListItemState
- then notFollowedBy' (bulletListStart <|>
+ then notFollowedBy' (bulletListStart <|>
(anyOrderedListStart >> return ()))
else return ()
return Space
@@ -846,7 +827,7 @@ reference = notFollowedBy' (string "[^") >> -- footnote reference
inlinesInBalanced "[" "]" >>= (return . normalizeSpaces)
-- source for a link, with optional title
-source = try $ do
+source = try $ do
char '('
optional (char '<')
src <- many (noneOf ")> \t\n")
@@ -856,7 +837,7 @@ source = try $ do
char ')'
return (removeTrailingSpace src, tit)
-linkTitle = try $ do
+linkTitle = try $ do
(many1 spaceChar >> option '\n' newline) <|> newline
skipSpaces
delim <- char '\'' <|> char '"'
@@ -871,13 +852,13 @@ link = try $ do
-- a link like [this][ref] or [this][] or [this]
referenceLink label = do
- ref <- option [] (try (optional (char ' ') >>
+ ref <- option [] (try (optional (char ' ') >>
optional (newline >> skipSpaces) >> reference))
let ref' = if null ref then label else ref
state <- getState
case lookupKeySrc (stateKeys state) ref' of
- Nothing -> fail "no corresponding key"
- Just target -> return target
+ Nothing -> fail "no corresponding key"
+ Just target -> return target
emailAddress = try $ do
name <- many1 (alphaNum <|> char '+')
@@ -898,7 +879,7 @@ autoLink = try $ do
char '>'
let src' = if "mailto:" `isPrefixOf` src
then drop 7 src
- else src
+ else src
st <- getState
return $ if stateStrict st
then Link [Str src'] (src, "")
@@ -929,7 +910,7 @@ rawLaTeXInline' = failIfStrict >> rawLaTeXInline
rawHtmlInline' = do
st <- getState
result <- choice $ if stateStrict st
- then [htmlBlockElement, anyHtmlTag, anyHtmlEndTag]
+ then [htmlBlockElement, anyHtmlTag, anyHtmlEndTag]
else [htmlBlockElement, anyHtmlInlineTag]
return $ HtmlInline result
diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs
index 5fb1306af..626679025 100644
--- a/src/Text/Pandoc/Shared.hs
+++ b/src/Text/Pandoc/Shared.hs
@@ -34,7 +34,6 @@ module Text.Pandoc.Shared (
substitute,
joinWithSep,
-- * Text processing
- isPunctuation,
backslashEscapes,
escapeStringUsing,
stripTrailingNewlines,
@@ -92,7 +91,6 @@ module Text.Pandoc.Shared (
Element (..),
hierarchicalize,
isHeaderBlock,
- uniqueIdentifiers,
-- * Writer options
WriterOptions (..),
defaultWriterOptions
@@ -104,7 +102,7 @@ import Text.PrettyPrint.HughesPJ ( Doc, fsep, ($$), (<>), empty )
import qualified Text.PrettyPrint.HughesPJ as PP
import Text.Pandoc.CharacterReferences ( characterReference )
import Data.Char ( toLower, toUpper, ord, isLower, isUpper )
-import Data.List ( find, isPrefixOf, intersperse )
+import Data.List ( find, isPrefixOf )
import Control.Monad ( join )
--
@@ -146,15 +144,6 @@ joinWithSep sep lst = foldr1 (\a b -> a ++ sep ++ b) lst
-- Text processing
--
--- | True if character is a punctuation character (unicode).
-isPunctuation :: Char -> Bool
-isPunctuation c =
- let c' = ord c
- in if c `elem` "!\"'()*,-./:;<>?[\\]`{|}~" || c' >= 0x2000 && c' <= 0x206F ||
- c' >= 0xE000 && c' <= 0xE0FF
- then True
- else False
-
-- | Returns an association list of backslash escapes for the
-- designated characters.
backslashEscapes :: [Char] -- ^ list of special characters to escape
@@ -580,8 +569,7 @@ data ParserState = ParserState
stateStrict :: Bool, -- ^ Use strict markdown syntax?
stateSmart :: Bool, -- ^ Use smart typography?
stateColumns :: Int, -- ^ Number of columns in terminal
- stateHeaderTable :: [HeaderType], -- ^ Ordered list of header types used
- stateHeaders :: [[Inline]] -- ^ List of header texts used
+ stateHeaderTable :: [HeaderType] -- ^ Ordered list of header types used
}
deriving Show
@@ -600,8 +588,7 @@ defaultParserState =
stateStrict = False,
stateSmart = False,
stateColumns = 80,
- stateHeaderTable = [],
- stateHeaders = [] }
+ stateHeaderTable = [] }
data HeaderType
= SingleHeader Char -- ^ Single line of characters underneath
@@ -803,43 +790,6 @@ isHeaderBlock :: Block -> Bool
isHeaderBlock (Header _ _) = True
isHeaderBlock _ = False
--- | Convert Pandoc inline list to plain text identifier.
-inlineListToIdentifier :: [Inline] -> String
-inlineListToIdentifier [] = ""
-inlineListToIdentifier (x:xs) =
- xAsText ++ inlineListToIdentifier xs
- where xAsText = case x of
- Str s -> filter (\c -> c == '-' || not (isPunctuation c)) $
- concat $ intersperse "-" $ words $ map toLower s
- Emph lst -> inlineListToIdentifier lst
- Strikeout lst -> inlineListToIdentifier lst
- Superscript lst -> inlineListToIdentifier lst
- Subscript lst -> inlineListToIdentifier lst
- Strong lst -> inlineListToIdentifier lst
- Quoted _ lst -> inlineListToIdentifier lst
- Code s -> s
- Space -> "-"
- EmDash -> "-"
- EnDash -> "-"
- Apostrophe -> ""
- Ellipses -> ""
- LineBreak -> "-"
- TeX _ -> ""
- HtmlInline _ -> ""
- Link lst _ -> inlineListToIdentifier lst
- Image lst _ -> inlineListToIdentifier lst
- Note _ -> ""
-
--- | Return unique identifiers for list of inline lists.
-uniqueIdentifiers :: [[Inline]] -> [String]
-uniqueIdentifiers ls =
- let addIdentifier (nonuniqueIds, uniqueIds) l =
- let new = inlineListToIdentifier l
- matches = length $ filter (== new) nonuniqueIds
- new' = new ++ if matches > 0 then ("-" ++ show matches) else ""
- in (new:nonuniqueIds, new':uniqueIds)
- in reverse $ snd $ foldl addIdentifier ([],[]) ls
-
--
-- Writer options
--
diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs
index 56ca5ca48..7ec95d8ef 100644
--- a/src/Text/Pandoc/Writers/HTML.hs
+++ b/src/Text/Pandoc/Writers/HTML.hs
@@ -193,6 +193,15 @@ obfuscateChar char =
obfuscateString :: String -> String
obfuscateString = concatMap obfuscateChar . decodeCharacterReferences
+-- | True if character is a punctuation character (unicode).
+isPunctuation :: Char -> Bool
+isPunctuation c =
+ let c' = ord c
+ in if c `elem` "!\"'()*,-./:;<>?[\\]`{|}~" || c' >= 0x2000 && c' <= 0x206F ||
+ c' >= 0xE000 && c' <= 0xE0FF
+ then True
+ else False
+
-- | Add CSS for document header.
addToCSS :: String -> State WriterState ()
addToCSS item = do
@@ -200,6 +209,43 @@ addToCSS item = do
let current = stCSS st
put $ st {stCSS = S.insert item current}
+-- | Convert Pandoc inline list to plain text identifier.
+inlineListToIdentifier :: [Inline] -> String
+inlineListToIdentifier [] = ""
+inlineListToIdentifier (x:xs) =
+ xAsText ++ inlineListToIdentifier xs
+ where xAsText = case x of
+ Str s -> filter (\c -> c == '-' || not (isPunctuation c)) $
+ concat $ intersperse "-" $ words $ map toLower s
+ Emph lst -> inlineListToIdentifier lst
+ Strikeout lst -> inlineListToIdentifier lst
+ Superscript lst -> inlineListToIdentifier lst
+ Subscript lst -> inlineListToIdentifier lst
+ Strong lst -> inlineListToIdentifier lst
+ Quoted _ lst -> inlineListToIdentifier lst
+ Code s -> s
+ Space -> "-"
+ EmDash -> "-"
+ EnDash -> "-"
+ Apostrophe -> ""
+ Ellipses -> ""
+ LineBreak -> "-"
+ TeX _ -> ""
+ HtmlInline _ -> ""
+ Link lst _ -> inlineListToIdentifier lst
+ Image lst _ -> inlineListToIdentifier lst
+ Note _ -> ""
+
+-- | Return unique identifiers for list of inline lists.
+uniqueIdentifiers :: [[Inline]] -> [String]
+uniqueIdentifiers ls =
+ let addIdentifier (nonuniqueIds, uniqueIds) l =
+ let new = inlineListToIdentifier l
+ matches = length $ filter (== new) nonuniqueIds
+ new' = new ++ if matches > 0 then ("-" ++ show matches) else ""
+ in (new:nonuniqueIds, new':uniqueIds)
+ in reverse $ snd $ foldl addIdentifier ([],[]) ls
+
-- | Convert Pandoc block element to HTML.
blockToHtml :: WriterOptions -> Block -> State WriterState Html
blockToHtml opts Null = return $ noHtml