diff options
Diffstat (limited to 'src/Text/Pandoc/Writers/LaTeX.hs')
-rw-r--r-- | src/Text/Pandoc/Writers/LaTeX.hs | 345 |
1 files changed, 294 insertions, 51 deletions
diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs index 5857723a6..9e15e0be7 100644 --- a/src/Text/Pandoc/Writers/LaTeX.hs +++ b/src/Text/Pandoc/Writers/LaTeX.hs @@ -38,10 +38,11 @@ import Text.Pandoc.Options import Text.Pandoc.Templates import Text.Printf ( printf ) import Network.URI ( isURI, unEscapeString ) -import Data.List ( (\\), isInfixOf, stripPrefix, intercalate, intersperse ) +import Data.Aeson (object, (.=)) +import Data.List ( (\\), isInfixOf, stripPrefix, intercalate, intersperse, nub, nubBy ) import Data.Char ( toLower, isPunctuation, isAscii, isLetter, isDigit, ord ) -import Data.Maybe ( fromMaybe ) -import Data.Aeson.Types ( (.:), parseMaybe, withObject ) +import Data.Maybe ( fromMaybe, isJust ) +import qualified Data.Text as T import Control.Applicative ((<|>)) import Control.Monad.State import qualified Text.Parsec as P @@ -121,7 +122,7 @@ pandocToLaTeX options (Pandoc meta blocks) = do Right r -> r Left _ -> "" case lookup "documentclass" (writerVariables options) `mplus` - parseMaybe (withObject "object" (.: "documentclass")) metadata of + fmap stringify (lookupMeta "documentclass" meta) of Just x | x `elem` bookClasses -> modify $ \s -> s{stBook = True} | otherwise -> return () Nothing | documentClass `elem` bookClasses @@ -145,11 +146,8 @@ pandocToLaTeX options (Pandoc meta blocks) = do st <- get titleMeta <- stringToLaTeX TextString $ stringify $ docTitle meta authorsMeta <- mapM (stringToLaTeX TextString . stringify) $ docAuthors meta - let (mainlang, otherlang) = - case (reverse . splitBy (==',') . filter (/=' ')) `fmap` - getField "lang" metadata of - Just (m:os) -> (m, reverse os) - _ -> ("", []) + let docLangs = nub $ query (extract "lang") blocks + let hasStringValue x = isJust (getField x metadata :: Maybe String) let context = defField "toc" (writerTableOfContents options) $ defField "toc-depth" (show (writerTOCDepth options - if stBook st @@ -174,8 +172,6 @@ pandocToLaTeX options (Pandoc meta blocks) = do defField "euro" (stUsesEuro st) $ defField "listings" (writerListings options || stLHS st) $ defField "beamer" (writerBeamer options) $ - defField "mainlang" mainlang $ - defField "otherlang" otherlang $ (if stHighlighting st then defField "highlighting-macros" (styleToLaTeX $ writerHighlightStyle options ) @@ -186,9 +182,56 @@ pandocToLaTeX options (Pandoc meta blocks) = do Biblatex -> defField "biblio-title" biblioTitle . defField "biblatex" True _ -> id) $ + -- set lang to something so polyglossia/babel is included + defField "lang" (if null docLangs then ""::String else "en") $ + defField "otherlangs" docLangs $ + defField "colorlinks" (any hasStringValue + ["citecolor", "urlcolor", "linkcolor", "toccolor"]) $ + defField "dir" (if (null $ query (extract "dir") blocks) + then ""::String + else "ltr") $ metadata + let toPolyObj lang = object [ "name" .= T.pack name + , "options" .= T.pack opts ] + where + (name, opts) = toPolyglossia lang + let lang = maybe [] (splitBy (=='-')) $ getField "lang" context + otherlangs = maybe [] (map $ splitBy (=='-')) $ getField "otherlangs" context + let context' = + defField "babel-lang" (toBabel lang) + $ defField "babel-otherlangs" (map toBabel otherlangs) + $ defField "babel-newcommands" (concatMap (\(poly, babel) -> + -- \textspanish and \textgalician are already used by babel + -- save them as \oritext... and let babel use that + if poly `elem` ["spanish", "galician"] + then "\\let\\oritext" ++ poly ++ "\\text" ++ poly ++ "\n" ++ + "\\AddBabelHook{" ++ poly ++ "}{beforeextras}" ++ + "{\\renewcommand{\\text" ++ poly ++ "}{\\oritext" + ++ poly ++ "}}\n" ++ + "\\AddBabelHook{" ++ poly ++ "}{afterextras}" ++ + "{\\renewcommand{\\text" ++ poly ++ "}[2][]{\\foreignlanguage{" + ++ poly ++ "}{##2}}}\n" + else "\\newcommand{\\text" ++ poly ++ "}[2][]{\\foreignlanguage{" + ++ babel ++ "}{#2}}\n" ++ + "\\newenvironment{" ++ poly ++ "}[1]{\\begin{otherlanguage}{" + ++ babel ++ "}}{\\end{otherlanguage}}\n" + ) + -- eliminate duplicates that have same polyglossia name + $ nubBy (\a b -> fst a == fst b) + -- find polyglossia and babel names of languages used in the document + $ map (\l -> + let lng = splitBy (=='-') l + in (fst $ toPolyglossia lng, toBabel lng) + ) + docLangs ) + $ defField "polyglossia-lang" (toPolyObj lang) + $ defField "polyglossia-otherlangs" (map toPolyObj otherlangs) + $ defField "latex-dir-rtl" (case (getField "dir" context)::Maybe String of + Just "rtl" -> True + _ -> False) + $ context return $ if writerStandalone options - then renderTemplate' template context + then renderTemplate' template context' else main -- | Convert Elements to LaTeX @@ -234,7 +277,7 @@ stringToLaTeX ctx (x:xs) = do '^' -> "\\^{}" ++ rest '\\'| isUrl -> '/' : rest -- NB. / works as path sep even on Windows | otherwise -> "\\textbackslash{}" ++ rest - '|' -> "\\textbar{}" ++ rest + '|' | not isUrl -> "\\textbar{}" ++ rest '<' -> "\\textless{}" ++ rest '>' -> "\\textgreater{}" ++ rest '[' -> "{[}" ++ rest -- to avoid interpretation as @@ -292,9 +335,12 @@ elementToBeamer slideLevel (Sec lvl _num (ident,classes,kvs) tit elts) if writerListings opts then query hasCode elts else []) - let allowframebreaks = "allowframebreaks" `elem` classes + let frameoptions = ["allowdisplaybreaks", "allowframebreaks", + "b", "c", "t", "environment", + "label", "plain", "shrink"] let optionslist = ["fragile" | fragile] ++ - ["allowframebreaks" | allowframebreaks] + [k | k <- classes, k `elem` frameoptions] ++ + [k ++ "=" ++ v | (k,v) <- kvs, k `elem` frameoptions] let options = if null optionslist then "" else "[" ++ intercalate "," optionslist ++ "]" @@ -322,34 +368,53 @@ isLineBreakOrSpace _ = False blockToLaTeX :: Block -- ^ Block to convert -> State WriterState Doc blockToLaTeX Null = return empty -blockToLaTeX (Div (identifier,classes,_) bs) = do +blockToLaTeX (Div (identifier,classes,kvs) bs) = do beamer <- writerBeamer `fmap` gets stOptions ref <- toLabel identifier let linkAnchor = if null identifier then empty - else "\\hyperdef{}" <> braces (text ref) <> - braces ("\\label" <> braces (text ref)) - contents <- blockListToLaTeX bs - if beamer && "notes" `elem` classes -- speaker notes - then return $ "\\note" <> braces contents - else return (linkAnchor $$ contents) + else "\\hypertarget" <> braces (text ref) <> + braces empty + let align dir txt = inCmd "begin" dir $$ txt $$ inCmd "end" dir + let wrapDir = case lookup "dir" kvs of + Just "rtl" -> align "RTL" + Just "ltr" -> align "LTR" + _ -> id + wrapLang txt = case lookup "lang" kvs of + Just lng -> let (l, o) = toPolyglossiaEnv lng + ops = if null o + then "" + else brackets $ text o + in inCmd "begin" (text l) <> ops + $$ blankline <> txt <> blankline + $$ inCmd "end" (text l) + Nothing -> txt + wrapNotes txt = if beamer && "notes" `elem` classes + then "\\note" <> braces txt -- speaker notes + else linkAnchor $$ txt + fmap (wrapDir . wrapLang . wrapNotes) $ blockListToLaTeX bs blockToLaTeX (Plain lst) = inlineListToLaTeX $ dropWhile isLineBreakOrSpace lst -- title beginning with fig: indicates that the image is a figure blockToLaTeX (Para [Image attr txt (src,'f':'i':'g':':':tit)]) = do inNote <- gets stInNote + modify $ \st -> st{ stInMinipage = True, stNotes = [] } capt <- inlineListToLaTeX txt + notes <- gets stNotes + modify $ \st -> st{ stInMinipage = False, stNotes = [] } + -- We can't have footnotes in the list of figures, so remove them: + captForLof <- if null notes + then return empty + else brackets <$> inlineListToLaTeX (walk deNote txt) img <- inlineToLaTeX (Image attr txt (src,tit)) - let (ident, _, _) = attr - idn <- toLabel ident - let label = if null ident - then empty - else "\\label" <> braces (text idn) + let footnotes = notesToLaTeX notes return $ if inNote -- can't have figures in notes then "\\begin{center}" $$ img $+$ capt $$ "\\end{center}" else "\\begin{figure}[htbp]" $$ "\\centering" $$ img $$ - ("\\caption" <> braces capt) $$ label $$ "\\end{figure}" + ("\\caption" <> captForLof <> braces capt) $$ + "\\end{figure}" $$ + footnotes -- . . . indicates pause in beamer slides blockToLaTeX (Para [Str ".",Space,Str ".",Space,Str "."]) = do beamer <- writerBeamer `fmap` gets stOptions @@ -378,7 +443,7 @@ blockToLaTeX (CodeBlock (identifier,classes,keyvalAttr) str) = do ref <- toLabel identifier let linkAnchor = if null identifier then empty - else "\\hyperdef{}" <> braces (text ref) <> + else "\\hypertarget" <> braces (text ref) <> braces ("\\label" <> braces (text ref)) let lhsCodeBlock = do modify $ \s -> s{ stLHS = True } @@ -591,19 +656,21 @@ tableCellToLaTeX header (width, align, blocks) = do return $ ("\\begin{minipage}" <> valign <> braces (text (printf "%.2f\\columnwidth" width)) <> (halign <> "\\strut" <> cr <> cellContents <> cr) <> - "\\strut\\end{minipage}") - $$ case notes of - [] -> empty - ns -> (case length ns of + "\\strut\\end{minipage}") $$ + notesToLaTeX notes + +notesToLaTeX :: [Doc] -> Doc +notesToLaTeX [] = empty +notesToLaTeX ns = (case length ns of n | n > 1 -> "\\addtocounter" <> braces "footnote" <> braces (text $ show $ 1 - n) | otherwise -> empty) - $$ - vcat (intersperse - ("\\addtocounter" <> braces "footnote" <> braces "1") - $ map (\x -> "\\footnotetext" <> braces x) - $ reverse ns) + $$ + vcat (intersperse + ("\\addtocounter" <> braces "footnote" <> braces "1") + $ map (\x -> "\\footnotetext" <> braces x) + $ reverse ns) listItemToLaTeX :: [Block] -> State WriterState Doc listItemToLaTeX lst @@ -665,8 +732,7 @@ sectionHeader unnumbered ref level lst = do let level' = if book || writerChapters opts then level - 1 else level internalLinks <- gets stInternalLinks let refLabel x = (if ref `elem` internalLinks - then text "\\hyperdef" - <> braces empty + then text "\\hypertarget" <> braces lab <> braces x else x) @@ -731,22 +797,29 @@ isQuoted _ = False -- | Convert inline element to LaTeX inlineToLaTeX :: Inline -- ^ Inline to convert -> State WriterState Doc -inlineToLaTeX (Span (id',classes,_) ils) = do +inlineToLaTeX (Span (id',classes,kvs) ils) = do let noEmph = "csl-no-emph" `elem` classes let noStrong = "csl-no-strong" `elem` classes let noSmallCaps = "csl-no-smallcaps" `elem` classes + let rtl = ("dir","rtl") `elem` kvs + let ltr = ("dir","ltr") `elem` kvs ref <- toLabel id' let linkAnchor = if null id' then empty - else "\\hyperdef{}" <> braces (text ref) <> - braces ("\\label" <> braces (text ref)) + else "\\protect\\hypertarget" <> braces (text ref) <> + braces empty fmap (linkAnchor <>) ((if noEmph then inCmd "textup" else id) . (if noStrong then inCmd "textnormal" else id) . (if noSmallCaps then inCmd "textnormal" else id) . - (if not (noEmph || noStrong || noSmallCaps) - then braces - else id)) `fmap` inlineListToLaTeX ils + (if rtl then inCmd "RL" else id) . + (if ltr then inCmd "LR" else id) . + (case lookup "lang" kvs of + Just lng -> let (l, o) = toPolyglossia $ splitBy (=='-') lng + ops = if null o then "" else brackets (text o) + in \c -> char '\\' <> "text" <> text l <> ops <> braces c + Nothing -> id) + ) `fmap` inlineListToLaTeX ils inlineToLaTeX (Emph lst) = inlineListToLaTeX lst >>= return . inCmd "emph" inlineToLaTeX (Strong lst) = @@ -831,22 +904,22 @@ inlineToLaTeX Space = return space inlineToLaTeX (Link _ txt ('#':ident, _)) = do contents <- inlineListToLaTeX txt lab <- toLabel ident - return $ text "\\hyperref" <> brackets (text lab) <> braces contents + return $ text "\\protect\\hyperlink" <> braces (text lab) <> braces contents inlineToLaTeX (Link _ txt (src, _)) = case txt of [Str x] | escapeURI x == src -> -- autolink do modify $ \s -> s{ stUrl = True } - src' <- stringToLaTeX URLString src + src' <- stringToLaTeX URLString (escapeURI src) return $ text $ "\\url{" ++ src' ++ "}" [Str x] | Just rest <- stripPrefix "mailto:" src, escapeURI x == rest -> -- email autolink do modify $ \s -> s{ stUrl = True } - src' <- stringToLaTeX URLString src + src' <- stringToLaTeX URLString (escapeURI src) contents <- inlineListToLaTeX txt return $ "\\href" <> braces (text src') <> braces ("\\nolinkurl" <> braces contents) _ -> do contents <- inlineListToLaTeX txt - src' <- stringToLaTeX URLString src + src' <- stringToLaTeX URLString (escapeURI src) return $ text ("\\href{" ++ src' ++ "}{") <> contents <> char '}' inlineToLaTeX (Image attr _ (source, _)) = do @@ -869,7 +942,7 @@ inlineToLaTeX (Image attr _ (source, _)) = do source' = if isURI source then source else unEscapeString source - source'' <- stringToLaTeX URLString source' + source'' <- stringToLaTeX URLString (escapeURI source') inHeading <- gets stInHeading return $ (if inHeading then "\\protect\\includegraphics" else "\\includegraphics") <> @@ -1001,3 +1074,173 @@ citationsToBiblatex _ = return empty getListingsLanguage :: [String] -> Maybe String getListingsLanguage [] = Nothing getListingsLanguage (x:xs) = toListingsLanguage x <|> getListingsLanguage xs + +-- Extract a key from divs and spans +extract :: String -> Block -> [String] +extract key (Div attr _) = lookKey key attr +extract key (Plain ils) = concatMap (extractInline key) ils +extract key (Para ils) = concatMap (extractInline key) ils +extract key (Header _ _ ils) = concatMap (extractInline key) ils +extract _ _ = [] + +-- Extract a key from spans +extractInline :: String -> Inline -> [String] +extractInline key (Span attr _) = lookKey key attr +extractInline _ _ = [] + +-- Look up a key in an attribute and give a list of its values +lookKey :: String -> Attr -> [String] +lookKey key (_,_,kvs) = maybe [] words $ lookup key kvs + +-- In environments \Arabic instead of \arabic is used +toPolyglossiaEnv :: String -> (String, String) +toPolyglossiaEnv l = + case toPolyglossia $ (splitBy (=='-')) l of + ("arabic", o) -> ("Arabic", o) + x -> x + +-- Takes a list of the constituents of a BCP 47 language code and +-- converts it to a Polyglossia (language, options) tuple +-- http://mirrors.concertpass.com/tex-archive/macros/latex/contrib/polyglossia/polyglossia.pdf +toPolyglossia :: [String] -> (String, String) +toPolyglossia ("ar":"DZ":_) = ("arabic", "locale=algeria") +toPolyglossia ("ar":"IQ":_) = ("arabic", "locale=mashriq") +toPolyglossia ("ar":"JO":_) = ("arabic", "locale=mashriq") +toPolyglossia ("ar":"LB":_) = ("arabic", "locale=mashriq") +toPolyglossia ("ar":"LY":_) = ("arabic", "locale=libya") +toPolyglossia ("ar":"MA":_) = ("arabic", "locale=morocco") +toPolyglossia ("ar":"MR":_) = ("arabic", "locale=mauritania") +toPolyglossia ("ar":"PS":_) = ("arabic", "locale=mashriq") +toPolyglossia ("ar":"SY":_) = ("arabic", "locale=mashriq") +toPolyglossia ("ar":"TN":_) = ("arabic", "locale=tunisia") +toPolyglossia ("de":"1901":_) = ("german", "spelling=old") +toPolyglossia ("de":"AT":"1901":_) = ("german", "variant=austrian, spelling=old") +toPolyglossia ("de":"AT":_) = ("german", "variant=austrian") +toPolyglossia ("de":"CH":"1901":_) = ("german", "variant=swiss, spelling=old") +toPolyglossia ("de":"CH":_) = ("german", "variant=swiss") +toPolyglossia ("de":_) = ("german", "") +toPolyglossia ("dsb":_) = ("lsorbian", "") +toPolyglossia ("el":"polyton":_) = ("greek", "variant=poly") +toPolyglossia ("en":"AU":_) = ("english", "variant=australian") +toPolyglossia ("en":"CA":_) = ("english", "variant=canadian") +toPolyglossia ("en":"GB":_) = ("english", "variant=british") +toPolyglossia ("en":"NZ":_) = ("english", "variant=newzealand") +toPolyglossia ("en":"UK":_) = ("english", "variant=british") +toPolyglossia ("en":"US":_) = ("english", "variant=american") +toPolyglossia ("grc":_) = ("greek", "variant=ancient") +toPolyglossia ("hsb":_) = ("usorbian", "") +toPolyglossia ("sl":_) = ("slovenian", "") +toPolyglossia x = (commonFromBcp47 x, "") + +-- Takes a list of the constituents of a BCP 47 language code and +-- converts it to a Babel language string. +-- http://mirrors.concertpass.com/tex-archive/macros/latex/required/babel/base/babel.pdf +-- Note that the PDF unfortunately does not contain a complete list of supported languages. +toBabel :: [String] -> String +toBabel ("de":"1901":_) = "german" +toBabel ("de":"AT":"1901":_) = "austrian" +toBabel ("de":"AT":_) = "naustrian" +toBabel ("de":_) = "ngerman" +toBabel ("dsb":_) = "lowersorbian" +toBabel ("el":"polyton":_) = "polutonikogreek" +toBabel ("en":"AU":_) = "australian" +toBabel ("en":"CA":_) = "canadian" +toBabel ("en":"GB":_) = "british" +toBabel ("en":"NZ":_) = "newzealand" +toBabel ("en":"UK":_) = "british" +toBabel ("en":"US":_) = "american" +toBabel ("fr":"CA":_) = "canadien" +toBabel ("fra":"aca":_) = "acadian" +toBabel ("grc":_) = "polutonikogreek" +toBabel ("hsb":_) = "uppersorbian" +toBabel ("sl":_) = "slovene" +toBabel x = commonFromBcp47 x + +-- Takes a list of the constituents of a BCP 47 language code +-- and converts it to a string shared by Babel and Polyglossia. +-- https://tools.ietf.org/html/bcp47#section-2.1 +commonFromBcp47 :: [String] -> String +commonFromBcp47 [] = "" +commonFromBcp47 ("pt":"BR":_) = "brazilian" +commonFromBcp47 x = fromIso $ head x + where + fromIso "af" = "afrikaans" + fromIso "am" = "amharic" + fromIso "ar" = "arabic" + fromIso "ast" = "asturian" + fromIso "bg" = "bulgarian" + fromIso "bn" = "bengali" + fromIso "bo" = "tibetan" + fromIso "br" = "breton" + fromIso "ca" = "catalan" + fromIso "cy" = "welsh" + fromIso "cz" = "czech" + fromIso "cop" = "coptic" + fromIso "da" = "danish" + fromIso "dv" = "divehi" + fromIso "el" = "greek" + fromIso "en" = "english" + fromIso "eo" = "esperanto" + fromIso "es" = "spanish" + fromIso "et" = "estonian" + fromIso "eu" = "basque" + fromIso "fa" = "farsi" + fromIso "fi" = "finnish" + fromIso "fr" = "french" + fromIso "fur" = "friulan" + fromIso "ga" = "irish" + fromIso "gd" = "scottish" + fromIso "gl" = "galician" + fromIso "he" = "hebrew" + fromIso "hi" = "hindi" + fromIso "hr" = "croatian" + fromIso "hy" = "armenian" + fromIso "hu" = "magyar" + fromIso "ia" = "interlingua" + fromIso "id" = "indonesian" + fromIso "ie" = "interlingua" + fromIso "is" = "icelandic" + fromIso "it" = "italian" + fromIso "jp" = "japanese" + fromIso "km" = "khmer" + fromIso "kn" = "kannada" + fromIso "ko" = "korean" + fromIso "la" = "latin" + fromIso "lo" = "lao" + fromIso "lt" = "lithuanian" + fromIso "lv" = "latvian" + fromIso "ml" = "malayalam" + fromIso "mn" = "mongolian" + fromIso "mr" = "marathi" + fromIso "nb" = "norsk" + fromIso "nl" = "dutch" + fromIso "nn" = "nynorsk" + fromIso "no" = "norsk" + fromIso "nqo" = "nko" + fromIso "oc" = "occitan" + fromIso "pl" = "polish" + fromIso "pms" = "piedmontese" + fromIso "pt" = "portuguese" + fromIso "rm" = "romansh" + fromIso "ro" = "romanian" + fromIso "ru" = "russian" + fromIso "sa" = "sanskrit" + fromIso "se" = "samin" + fromIso "sk" = "slovak" + fromIso "sq" = "albanian" + fromIso "sr" = "serbian" + fromIso "sv" = "swedish" + fromIso "syr" = "syriac" + fromIso "ta" = "tamil" + fromIso "te" = "telugu" + fromIso "th" = "thai" + fromIso "tk" = "turkmen" + fromIso "tr" = "turkish" + fromIso "uk" = "ukrainian" + fromIso "ur" = "urdu" + fromIso "vi" = "vietnamese" + fromIso _ = "" + +deNote :: Inline -> Inline +deNote (Note _) = RawInline (Format "latex") "" +deNote x = x |