summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authormb21 <mb21@users.noreply.github.com>2015-10-17 14:48:31 +0200
committermb21 <mb21@users.noreply.github.com>2015-10-18 17:01:37 +0200
commit9328f4cd3d5d5b96e7783b419214bd8599c17ebc (patch)
treec8f3f7ac6b146978fcb6c954698d80a98ac7e929
parent7f5a677bbff1a99d36a81312140dcd928d05262d (diff)
LaTeX and ConTeXt writers: support lang attribute on divs and spans
For LaTeX, also collect lang and dir attributes on spans and divs to set the lang, otherlangs and dir variables if they aren’t set already. See #895.
-rw-r--r--README17
-rw-r--r--src/Text/Pandoc/Writers/ConTeXt.hs43
-rw-r--r--src/Text/Pandoc/Writers/LaTeX.hs99
3 files changed, 121 insertions, 38 deletions
diff --git a/README b/README
index f10de123c..7bf28873e 100644
--- a/README
+++ b/README
@@ -1047,12 +1047,21 @@ Language variables
format stored in the additional variables `babel-lang`,
`polyglossia-lang` (LaTeX) and `context-lang` (ConTeXt).
+ Native pandoc `span`s and `div`s with the lang attribute
+ (value in BCP 47) can be used to switch the language in
+ that range.
+
`otherlangs`
: a list of other languages used in the document
in the YAML metadata, according to [BCP 47]. For example:
`otherlangs: [en-GB, fr]`.
- Currently only used by `xelatex` through the generated
- `polyglossia-otherlangs` variable.
+ This is automatically generated from the `lang` attributes
+ in all `span`s and `div`s but can be overriden.
+ Currently only used by LaTeX through the generated
+ `babel-otherlangs` and `polyglossia-otherlangs` variables.
+ The LaTeX writer outputs polyglossia commands in the text but
+ the `babel-newcommands` variable contains mappings for them
+ to the corresponding babel.
`dir`
: the base direction of the document, either `rtl` (right-to-left)
@@ -1065,10 +1074,6 @@ Language variables
(e.g. the browser, when generating HTML) supports the
[Unicode Bidirectional Algorithm].
- LaTeX and ConTeXt assume by default that all text is left-to-right.
- Setting `dir: ltr` enables bidirectional text handling in a document
- whose base direction is left-to-right but contains some right-to-left script.
-
When using LaTeX for bidirectional documents, only the `xelatex` engine
is fully supported (use `--latex-engine=xelatex`).
diff --git a/src/Text/Pandoc/Writers/ConTeXt.hs b/src/Text/Pandoc/Writers/ConTeXt.hs
index 7d3830a60..61e62aa17 100644
--- a/src/Text/Pandoc/Writers/ConTeXt.hs
+++ b/src/Text/Pandoc/Writers/ConTeXt.hs
@@ -157,17 +157,21 @@ blockToConTeXt (CodeBlock _ str) =
blockToConTeXt (RawBlock "context" str) = return $ text str <> blankline
blockToConTeXt (RawBlock _ _ ) = return empty
blockToConTeXt (Div (ident,_,kvs) bs) = do
- contents <- blockListToConTeXt bs
- let contents' = if null ident
- then contents
- else ("\\reference" <> brackets (text $ toLabel ident) <>
- braces empty <> "%") $$ contents
- let align dir = blankline <> "\\startalignment[" <> dir <> "]"
- $$ contents' $$ "\\stopalignment" <> blankline
- return $ case lookup "dir" kvs of
- Just "rtl" -> align "righttoleft"
- Just "ltr" -> align "lefttoright"
- _ -> contents'
+ let align dir txt = "\\startalignment[" <> dir <> "]" $$ txt $$ "\\stopalignment"
+ let wrapRef txt = if null ident
+ then txt
+ else ("\\reference" <> brackets (text $ toLabel ident) <>
+ braces empty <> "%") $$ txt
+ wrapDir = case lookup "dir" kvs of
+ Just "rtl" -> align "righttoleft"
+ Just "ltr" -> align "lefttoright"
+ _ -> id
+ wrapLang txt = case lookup "lang" kvs of
+ Just lng -> "\\start\\language["
+ <> text (fromBcp47' lng) <> "]" $$ txt $$ "\\stop"
+ Nothing -> txt
+ wrapBlank txt = blankline <> txt <> blankline
+ fmap (wrapBlank . wrapLang . wrapDir . wrapRef) $ blockListToConTeXt bs
blockToConTeXt (BulletList lst) = do
contents <- mapM listItemToConTeXt lst
return $ ("\\startitemize" <> if isTightList lst
@@ -346,11 +350,15 @@ inlineToConTeXt (Note contents) = do
else text "\\startbuffer " <> nest 2 contents' <>
text "\\stopbuffer\\footnote{\\getbuffer}"
inlineToConTeXt (Span (_,_,kvs) ils) = do
- contents <- inlineListToConTeXt ils
- return $ case lookup "dir" kvs of
- Just "rtl" -> braces $ "\\righttoleft " <> contents
- Just "ltr" -> braces $ "\\lefttoright " <> contents
- _ -> contents
+ let wrapDir txt = case lookup "dir" kvs of
+ Just "rtl" -> braces $ "\\righttoleft " <> txt
+ Just "ltr" -> braces $ "\\lefttoright " <> txt
+ _ -> txt
+ wrapLang txt = case lookup "lang" kvs of
+ Just lng -> "\\start\\language[" <> text (fromBcp47' lng)
+ <> "]" <> txt <> "\\stop "
+ Nothing -> txt
+ fmap (wrapLang . wrapDir) $ inlineListToConTeXt ils
-- | Craft the section header, inserting the secton reference, if supplied.
sectionHeader :: Attr
@@ -377,6 +385,9 @@ sectionHeader (ident,classes,_) hdrLevel lst = do
then char '\\' <> chapter <> braces contents
else contents <> blankline
+fromBcp47' :: String -> String
+fromBcp47' = fromBcp47 . splitBy (=='-')
+
-- Takes a list of the constituents of a BCP 47 language code
-- and irons out ConTeXt's exceptions
-- https://tools.ietf.org/html/bcp47#section-2.1
diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs
index 770a674b7..b31497a22 100644
--- a/src/Text/Pandoc/Writers/LaTeX.hs
+++ b/src/Text/Pandoc/Writers/LaTeX.hs
@@ -39,7 +39,7 @@ import Text.Pandoc.Templates
import Text.Printf ( printf )
import Network.URI ( isURI, unEscapeString )
import Data.Aeson (object, (.=))
-import Data.List ( (\\), isInfixOf, stripPrefix, intercalate, intersperse )
+import Data.List ( (\\), isInfixOf, stripPrefix, intercalate, intersperse, nub, nubBy )
import Data.Char ( toLower, isPunctuation, isAscii, isLetter, isDigit, ord )
import Data.Maybe ( fromMaybe )
import qualified Data.Text as T
@@ -145,6 +145,7 @@ pandocToLaTeX options (Pandoc meta blocks) = do
st <- get
titleMeta <- stringToLaTeX TextString $ stringify $ docTitle meta
authorsMeta <- mapM (stringToLaTeX TextString . stringify) $ docAuthors meta
+ let docLangs = nub $ query (extract "lang") blocks
let context = defField "toc" (writerTableOfContents options) $
defField "toc-depth" (show (writerTOCDepth options -
if stBook st
@@ -179,18 +180,48 @@ pandocToLaTeX options (Pandoc meta blocks) = do
Biblatex -> defField "biblio-title" biblioTitle .
defField "biblatex" True
_ -> id) $
+ -- set lang to something so polyglossia/babel is included
+ defField "lang" (if null docLangs then ""::String else "en") $
+ defField "otherlangs" docLangs $
+ defField "dir" (if (null $ query (extract "dir") blocks)
+ then ""::String
+ else "ltr") $
metadata
let toPolyObj lang = object [ "name" .= T.pack name
, "options" .= T.pack opts ]
where
(name, opts) = toPolyglossia lang
let lang = maybe [] (splitBy (=='-')) $ getField "lang" context
+ otherlangs = maybe [] (map $ splitBy (=='-')) $ getField "otherlangs" context
let context' =
defField "babel-lang" (toBabel lang)
+ $ defField "babel-otherlangs" (map toBabel otherlangs)
+ $ defField "babel-newcommands" (concatMap (\(poly, babel) ->
+ -- \textspanish and \textgalician are already used by babel
+ -- save them as \oritext... and let babel use that
+ if poly `elem` ["spanish", "galician"]
+ then "\\let\\oritext" ++ poly ++ "\\text" ++ poly ++ "\n" ++
+ "\\AddBabelHook{" ++ poly ++ "}{beforeextras}" ++
+ "{\\renewcommand{\\text" ++ poly ++ "}{\\oritext"
+ ++ poly ++ "}}\n" ++
+ "\\AddBabelHook{" ++ poly ++ "}{afterextras}" ++
+ "{\\renewcommand{\\text" ++ poly ++ "}[2][]{\\foreignlanguage{"
+ ++ poly ++ "}{##2}}}\n"
+ else "\\newcommand{\\text" ++ poly ++ "}[2][]{\\foreignlanguage{"
+ ++ babel ++ "}{#2}}\n" ++
+ "\\newenvironment{" ++ poly ++ "}[1]{\\begin{otherlanguage}{"
+ ++ babel ++ "}}{\\end{otherlanguage}}\n"
+ )
+ -- eliminate duplicates that have same polyglossia name
+ $ nubBy (\a b -> fst a == fst b)
+ -- find polyglossia and babel names of languages used in the document
+ $ map (\l ->
+ let lng = splitBy (=='-') l
+ in (fst $ toPolyglossia lng, toBabel lng)
+ )
+ docLangs )
$ defField "polyglossia-lang" (toPolyObj lang)
- $ defField "polyglossia-otherlangs"
- (maybe [] (map $ toPolyObj . splitBy (=='-')) $
- getField "otherlangs" context)
+ $ defField "polyglossia-otherlangs" (map toPolyObj otherlangs)
$ defField "latex-dir-rtl" (case (getField "dir" context)::Maybe String of
Just "rtl" -> True
_ -> False)
@@ -340,15 +371,24 @@ blockToLaTeX (Div (identifier,classes,kvs) bs) = do
then empty
else "\\hyperdef{}" <> braces (text ref) <>
braces ("\\label" <> braces (text ref))
- contents' <- blockListToLaTeX bs
- let align dir = inCmd "begin" dir $$ contents' $$ inCmd "end" dir
- let contents = case lookup "dir" kvs of
- Just "rtl" -> align "RTL"
- Just "ltr" -> align "LTR"
- _ -> contents'
- if beamer && "notes" `elem` classes -- speaker notes
- then return $ "\\note" <> braces contents
- else return (linkAnchor $$ contents)
+ let align dir txt = inCmd "begin" dir $$ txt $$ inCmd "end" dir
+ let wrapDir = case lookup "dir" kvs of
+ Just "rtl" -> align "RTL"
+ Just "ltr" -> align "LTR"
+ _ -> id
+ wrapLang txt = case lookup "lang" kvs of
+ Just lng -> let (l, o) = toPolyglossiaEnv lng
+ ops = if null o
+ then ""
+ else brackets $ text o
+ in inCmd "begin" (text l) <> ops
+ $$ blankline <> txt <> blankline
+ $$ inCmd "end" (text l)
+ Nothing -> txt
+ wrapNotes txt = if beamer && "notes" `elem` classes
+ then "\\note" <> braces txt -- speaker notes
+ else linkAnchor $$ txt
+ fmap (wrapDir . wrapLang . wrapNotes) $ blockListToLaTeX bs
blockToLaTeX (Plain lst) =
inlineListToLaTeX $ dropWhile isLineBreakOrSpace lst
-- title beginning with fig: indicates that the image is a figure
@@ -759,9 +799,12 @@ inlineToLaTeX (Span (id',classes,kvs) ils) = do
(if noSmallCaps then inCmd "textnormal" else id) .
(if rtl then inCmd "RL" else id) .
(if ltr then inCmd "LR" else id) .
- (if not (noEmph || noStrong || noSmallCaps || rtl || ltr)
- then braces
- else id)) `fmap` inlineListToLaTeX ils
+ (case lookup "lang" kvs of
+ Just lng -> let (l, o) = toPolyglossiaEnv lng
+ ops = if null o then "" else brackets (text o)
+ in \c -> char '\\' <> "text" <> text l <> ops <> braces c
+ Nothing -> id)
+ ) `fmap` inlineListToLaTeX ils
inlineToLaTeX (Emph lst) =
inlineListToLaTeX lst >>= return . inCmd "emph"
inlineToLaTeX (Strong lst) =
@@ -1002,6 +1045,30 @@ getListingsLanguage :: [String] -> Maybe String
getListingsLanguage [] = Nothing
getListingsLanguage (x:xs) = toListingsLanguage x <|> getListingsLanguage xs
+-- Extract a key from divs and spans
+extract :: String -> Block -> [String]
+extract key (Div attr _) = lookKey key attr
+extract key (Plain ils) = concatMap (extractInline key) ils
+extract key (Para ils) = concatMap (extractInline key) ils
+extract key (Header _ _ ils) = concatMap (extractInline key) ils
+extract _ _ = []
+
+-- Extract a key from spans
+extractInline :: String -> Inline -> [String]
+extractInline key (Span attr _) = lookKey key attr
+extractInline _ _ = []
+
+-- Look up a key in an attribute and give a list of its values
+lookKey :: String -> Attr -> [String]
+lookKey key (_,_,kvs) = maybe [] words $ lookup key kvs
+
+-- In environments \Arabic instead of \arabic is used
+toPolyglossiaEnv :: String -> (String, String)
+toPolyglossiaEnv l =
+ case toPolyglossia $ (splitBy (=='-')) l of
+ ("arabic", o) -> ("Arabic", o)
+ x -> x
+
-- Takes a list of the constituents of a BCP 47 language code and
-- converts it to a Polyglossia (language, options) tuple
-- http://mirrors.concertpass.com/tex-archive/macros/latex/contrib/polyglossia/polyglossia.pdf