From c243e5b67b0a3be6a713c6120da24e5e65cb0357 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sun, 25 Apr 2010 12:33:50 -0700 Subject: Use texmath's parser in TexMath module. * This replaces a lot of custom parser code, and expands the tex -> unicode conversion. * The behavior has also changed: if the whole formula can't be converted, the whole formula is left in raw TeX. Previously, pandoc converted parts of the formula to unicode and left other parts in raw TeX. * Added (but not yet exported) readTeXMath', which returns a Maybe. * Updated tests --- src/Text/Pandoc/Readers/TeXMath.hs | 250 ++++++++----------------------------- tests/s5.basic.html | 26 +--- tests/s5.fragment.html | 26 +--- tests/s5.inserts.html | 26 +--- tests/writer.docbook | 10 +- tests/writer.html | 34 +---- tests/writer.opendocument | 37 ++---- tests/writer.rtf | 10 +- 8 files changed, 83 insertions(+), 336 deletions(-) diff --git a/src/Text/Pandoc/Readers/TeXMath.hs b/src/Text/Pandoc/Readers/TeXMath.hs index 080354be1..40cf39987 100644 --- a/src/Text/Pandoc/Readers/TeXMath.hs +++ b/src/Text/Pandoc/Readers/TeXMath.hs @@ -28,208 +28,64 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Conversion of TeX math to a list of 'Pandoc' inline elements. -} module Text.Pandoc.Readers.TeXMath ( - readTeXMath + readTeXMath ) where import Text.ParserCombinators.Parsec import Text.Pandoc.Definition +import Text.TeXMath.Parser --- | Converts a string of raw TeX math to a list of 'Pandoc' inlines. +-- | Converts a raw TeX math formula to a list of 'Pandoc' inlines. +-- Defaults to raw formula between @$@ characters if entire formula +-- can't be converted. readTeXMath :: String -- ^ String to parse (assumes @'\n'@ line endings) -> [Inline] -readTeXMath inp = case parse teXMath ("formula: " ++ inp) inp of - Left _ -> [Str inp] -- if unparseable, just include original - Right res -> res - -teXMath :: GenParser Char st [Inline] -teXMath = manyTill mathPart eof >>= return . concat - -mathPart :: GenParser Char st [Inline] -mathPart = whitespace <|> superscript <|> subscript <|> symbol <|> - argument <|> digits <|> letters <|> misc - -whitespace :: GenParser Char st [Inline] -whitespace = many1 space >> return [] - -symbol :: GenParser Char st [Inline] -symbol = try $ do - char '\\' - res <- many1 letter - case lookup res teXsymbols of - Just m -> return [Str m] - Nothing -> return [Str $ "\\" ++ res] - -argument :: GenParser Char st [Inline] -argument = try $ do - char '{' - res <- many mathPart - char '}' - return $ if null res - then [Str " "] - else [Str "{"] ++ concat res ++ [Str "}"] - -digits :: GenParser Char st [Inline] -digits = do - res <- many1 digit - return [Str res] - -letters :: GenParser Char st [Inline] -letters = do - res <- many1 letter - return [Emph [Str res]] - -misc :: GenParser Char st [Inline] -misc = do - res <- noneOf "}" - return [Str [res]] - -scriptArg :: GenParser Char st [Inline] -scriptArg = try $ do - (try (do{char '{'; r <- many mathPart; char '}'; return $ concat r})) - <|> symbol - <|> (do{c <- (letter <|> digit); return [Str [c]]}) - -superscript :: GenParser Char st [Inline] -superscript = try $ do - char '^' - arg <- scriptArg - return [Superscript arg] - -subscript :: GenParser Char st [Inline] -subscript = try $ do - char '_' - arg <- scriptArg - return [Subscript arg] - -withThinSpace :: String -> String -withThinSpace str = "\x2009" ++ str ++ "\x2009" - -teXsymbols :: [(String, String)] -teXsymbols = - [("alpha","\x3B1") - ,("beta", "\x3B2") - ,("chi", "\x3C7") - ,("delta", "\x3B4") - ,("Delta", "\x394") - ,("epsilon", "\x3B5") - ,("varepsilon", "\x25B") - ,("eta", "\x3B7") - ,("gamma", "\x3B3") - ,("Gamma", "\x393") - ,("iota", "\x3B9") - ,("kappa", "\x3BA") - ,("lambda", "\x3BB") - ,("Lambda", "\x39B") - ,("mu", "\x3BC") - ,("nu", "\x3BD") - ,("omega", "\x3C9") - ,("Omega", "\x3A9") - ,("phi", "\x3C6") - ,("varphi", "\x3D5") - ,("Phi", "\x3A6") - ,("pi", "\x3C0") - ,("Pi", "\x3A0") - ,("psi", "\x3C8") - ,("Psi", "\x3A8") - ,("rho", "\x3C1") - ,("sigma", "\x3C3") - ,("Sigma", "\x3A3") - ,("tau", "\x3C4") - ,("theta", "\x3B8") - ,("vartheta", "\x3D1") - ,("Theta", "\x398") - ,("upsilon", "\x3C5") - ,("xi", "\x3BE") - ,("Xi", "\x39E") - ,("zeta", "\x3B6") - ,("ne", "\x2260") - ,("lt", withThinSpace "<") - ,("le", withThinSpace "\x2264") - ,("leq", withThinSpace "\x2264") - ,("ge", withThinSpace "\x2265") - ,("geq", withThinSpace "\x2265") - ,("prec", withThinSpace "\x227A") - ,("succ", withThinSpace "\x227B") - ,("preceq", withThinSpace "\x2AAF") - ,("succeq", withThinSpace "\x2AB0") - ,("in", withThinSpace "\x2208") - ,("notin", withThinSpace "\x2209") - ,("subset", withThinSpace "\x2282") - ,("supset", withThinSpace "\x2283") - ,("subseteq", withThinSpace "\x2286") - ,("supseteq", withThinSpace "\x2287") - ,("equiv", withThinSpace "\x2261") - ,("cong", withThinSpace "\x2245") - ,("approx", withThinSpace "\x2248") - ,("propto", withThinSpace "\x221D") - ,("cdot", withThinSpace "\x22C5") - ,("star", withThinSpace "\x22C6") - ,("backslash", "\\") - ,("times", withThinSpace "\x00D7") - ,("divide", withThinSpace "\x00F7") - ,("circ", withThinSpace "\x2218") - ,("oplus", withThinSpace "\x2295") - ,("otimes", withThinSpace "\x2297") - ,("odot", withThinSpace "\x2299") - ,("sum", "\x2211") - ,("prod", "\x220F") - ,("wedge", withThinSpace "\x2227") - ,("bigwedge", withThinSpace "\x22C0") - ,("vee", withThinSpace "\x2228") - ,("bigvee", withThinSpace "\x22C1") - ,("cap", withThinSpace "\x2229") - ,("bigcap", withThinSpace "\x22C2") - ,("cup", withThinSpace "\x222A") - ,("bigcup", withThinSpace "\x22C3") - ,("neg", "\x00AC") - ,("implies", withThinSpace "\x21D2") - ,("iff", withThinSpace "\x21D4") - ,("forall", "\x2200") - ,("exists", "\x2203") - ,("bot", "\x22A5") - ,("top", "\x22A4") - ,("vdash", "\x22A2") - ,("models", withThinSpace "\x22A8") - ,("uparrow", "\x2191") - ,("downarrow", "\x2193") - ,("rightarrow", withThinSpace "\x2192") - ,("to", withThinSpace "\x2192") - ,("rightarrowtail", "\x21A3") - ,("twoheadrightarrow", withThinSpace "\x21A0") - ,("twoheadrightarrowtail", withThinSpace "\x2916") - ,("mapsto", withThinSpace "\x21A6") - ,("leftarrow", withThinSpace "\x2190") - ,("leftrightarrow", withThinSpace "\x2194") - ,("Rightarrow", withThinSpace "\x21D2") - ,("Leftarrow", withThinSpace "\x21D0") - ,("Leftrightarrow", withThinSpace "\x21D4") - ,("partial", "\x2202") - ,("nabla", "\x2207") - ,("pm", "\x00B1") - ,("emptyset", "\x2205") - ,("infty", "\x221E") - ,("aleph", "\x2135") - ,("ldots", "...") - ,("therefore", "\x2234") - ,("angle", "\x2220") - ,("quad", "\x00A0\x00A0") - ,("cdots", "\x22EF") - ,("vdots", "\x22EE") - ,("ddots", "\x22F1") - ,("diamond", "\x22C4") - ,("Box", "\x25A1") - ,("lfloor", "\x230A") - ,("rfloor", "\x230B") - ,("lceiling", "\x2308") - ,("rceiling", "\x2309") - ,("langle", "\x2329") - ,("rangle", "\x232A") - ,("int", "\8747") - ,("{", "{") - ,("}", "}") - ,("[", "[") - ,("]", "]") - ,("|", "|") - ,("||", "||") - ] +readTeXMath inp = case readTeXMath' inp of + Nothing -> [Str ("$" ++ inp ++ "$")] + Just res -> res + +-- | Like 'readTeXMath', but without the default. +readTeXMath' :: String -- ^ String to parse (assumes @'\n'@ line endings) + -> Maybe [Inline] +readTeXMath' inp = case parse formula "formula" inp of + Left _ -> Just [Str inp] + Right exps -> expsToInlines exps + +expsToInlines :: [Exp] -> Maybe [Inline] +expsToInlines xs = do + res <- mapM expToInlines xs + return (concat res) + +expToInlines :: Exp -> Maybe [Inline] +expToInlines (ENumber s) = Just [Str s] +expToInlines (EIdentifier s) = Just [Emph [Str s]] +expToInlines (EMathOperator s) = Just [Str s] +expToInlines (ESymbol t s) = Just $ addSpace t (Str s) + where addSpace Op x = [x, thinspace] + addSpace Bin x = [medspace, x, medspace] + addSpace Rel x = [widespace, x, widespace] + addSpace Pun x = [x, thinspace] + addSpace _ x = [x] + thinspace = Str "\x2006" + medspace = Str "\x2005" + widespace = Str "\x2004" +expToInlines (EStretchy x) = expToInlines x +expToInlines (EGrouped xs) = expsToInlines xs +expToInlines (ESpace _) = Just [Str " "] -- variable widths not supported +expToInlines (EBinary _ _ _) = Nothing +expToInlines (ESub x y) = do + x' <- expToInlines x + y' <- expToInlines y + return $ x' ++ [Subscript y'] +expToInlines (ESuper x y) = do + x' <- expToInlines x + y' <- expToInlines y + return $ x' ++ [Superscript y'] +expToInlines (ESubsup x y z) = do + x' <- expToInlines x + y' <- expToInlines y + z' <- expToInlines z + return $ x' ++ [Subscript y'] ++ [Superscript z'] +expToInlines (EText _ x) = Just [Emph [Str x]] +expToInlines _ = Nothing diff --git a/tests/s5.basic.html b/tests/s5.basic.html index 253bcab2f..659fc065a 100644 --- a/tests/s5.basic.html +++ b/tests/s5.basic.html @@ -309,31 +309,7 @@ > diff --git a/tests/s5.fragment.html b/tests/s5.fragment.html index 6ab704d41..32346a331 100644 --- a/tests/s5.fragment.html +++ b/tests/s5.fragment.html @@ -14,31 +14,7 @@ > - 2+2=4 + 2 + 2 = 4 - x ∈ y + x ∈ y - α ∧ ω + α ∧ ω @@ -1103,13 +1103,13 @@ Blah Here's some display math: - \frac{d}{dx}f(x)=\limh → 0\frac{f(x+h)-f(x)}{h} + $\frac{d}{dx}f(x)=\lim_{h\to 0}\frac{f(x+h)-f(x)}{h}$ Here's one that has a line break in it: - α+ω × x2. + α + ω × x2. diff --git a/tests/writer.html b/tests/writer.html index f41a8e60a..b8296a536 100644 --- a/tests/writer.html +++ b/tests/writer.html @@ -830,19 +830,19 @@ Blah >
  • 2+2=42 + 2 = 4
  • x ∈  ∈ y
  • α ∧ ωα ∧ ω
  • -Tree
  • Here’s some display math: \frac{d}{dx}f(x)=\limh → 0\frac{f(x+h)-f(x)}{h}$\frac{d}{dx}f(x)=\lim_{h\to 0}\frac{f(x+h)-f(x)}{h}$
  • Here’s one that has a line break in it: α+ω × α + ω × x2 - + - - - + + + - - - - - - - - - - - - - @@ -1317,13 +1304,13 @@ \cite[22-23]{smith.1899} - 2+2=4 + 2 + 2 = 4 - x ∈ y + x ∈ y - α ∧ ω + α ∧ ω 223 @@ -1332,10 +1319,10 @@ p-Tree - Here’s some display math: \frac{d}{dx}f(x)=\limh → 0\frac{f(x+h)-f(x)}{h} + Here’s some display math: $\frac{d}{dx}f(x)=\lim_{h\to 0}\frac{f(x+h)-f(x)}{h}$ - Here’s one that has a line break in it: α+ω × x2. + Here’s one that has a line break in it: α + ω × x2. These shouldn’t be math: @@ -1344,13 +1331,13 @@ To get the famous equation, write $e = mc^2$. - $22,000 is a lot of money. So is $34,000. (It worked if “lot” is emphasized.) + $22,000 is a lot of money. So is $34,000. (It worked if “lot” is emphasized.) Shoes ($20) and socks ($5). - Escaped $: $73 this should be emphasized 23$. + Escaped $: $73 this should be emphasized 23$. Here’s a LaTeX table: @@ -1454,7 +1441,7 @@ Cat & 1 \\ \hline Here is a movie icon. Footnotes -Here is a footnote reference,1Here is the footnote. It can go anywhere after the footnote reference. It need not be placed at the end of the document. and another.2Here’s the long note. This one contains multiple blocks.Subsequent blocks are indented to show that they belong to the footnote (as with list items).{ <code> }If you want, you can indent every line, but you can also be lazy and just indent the first line of each block. This should not be a footnote reference, because it contains a space.[^my note] Here is an inline note.3This is easier to type. Inline notes may contain links and ] verbatim characters, as well as [bracketed text]. +Here is a footnote reference,1Here is the footnote. It can go anywhere after the footnote reference. It need not be placed at the end of the document. and another.2Here’s the long note. This one contains multiple blocks.Subsequent blocks are indented to show that they belong to the footnote (as with list items).{ <code> }If you want, you can indent every line, but you can also be lazy and just indent the first line of each block. This should not be a footnote reference, because it contains a space.[^my note] Here is an inline note.3This is easier to type. Inline notes may contain links and ] verbatim characters, as well as [bracketed text]. Notes can go in quotes.4In quote. diff --git a/tests/writer.rtf b/tests/writer.rtf index ae3776261..9bb6547cb 100644 --- a/tests/writer.rtf +++ b/tests/writer.rtf @@ -264,13 +264,13 @@ quoted link {\pard \qc \f0 \sa180 \li0 \fi0 \emdash\emdash\emdash\emdash\emdash\par} {\pard \ql \f0 \sa180 \li0 \fi0 \b \fs36 LaTeX\par} {\pard \ql \f0 \sa0 \li360 \fi-360 \bullet \tx360\tab \par} -{\pard \ql \f0 \sa0 \li360 \fi-360 \bullet \tx360\tab 2+2=4\par} -{\pard \ql \f0 \sa0 \li360 \fi-360 \bullet \tx360\tab {\i x}\u8201?\u8712?\u8201?{\i y}\par} -{\pard \ql \f0 \sa0 \li360 \fi-360 \bullet \tx360\tab \u945?\u8201?\u8743?\u8201?\u969?\par} +{\pard \ql \f0 \sa0 \li360 \fi-360 \bullet \tx360\tab 2\u8197?+\u8197?2\u8196?=\u8196?4\par} +{\pard \ql \f0 \sa0 \li360 \fi-360 \bullet \tx360\tab {\i x}\u8196?\u8712?\u8196?{\i y}\par} +{\pard \ql \f0 \sa0 \li360 \fi-360 \bullet \tx360\tab \u945?\u8197?\u8743?\u8197?\u969?\par} {\pard \ql \f0 \sa0 \li360 \fi-360 \bullet \tx360\tab 223\par} {\pard \ql \f0 \sa0 \li360 \fi-360 \bullet \tx360\tab {\i p}-Tree\par} -{\pard \ql \f0 \sa0 \li360 \fi-360 \bullet \tx360\tab Here\u8217's some display math: \\frac\{{\i d}\}\{{\i dx}\}{\i f}({\i x})=\\lim{\sub {\i h}\u8201?\u8594?\u8201?0}\\frac\{{\i f}({\i x}+{\i h})-{\i f}({\i x})\}\{{\i h}\}\par} -{\pard \ql \f0 \sa0 \li360 \fi-360 \bullet \tx360\tab Here\u8217's one that has a line break in it: \u945?+\u969?\u8201?\u215?\u8201?{\i x}{\super 2}.\sa180\par} +{\pard \ql \f0 \sa0 \li360 \fi-360 \bullet \tx360\tab Here\u8217's some display math: $\\frac\{d\}\{dx\}f(x)=\\lim_\{h\\to 0\}\\frac\{f(x+h)-f(x)\}\{h\}$\par} +{\pard \ql \f0 \sa0 \li360 \fi-360 \bullet \tx360\tab Here\u8217's one that has a line break in it: \u945?\u8197?+\u8197?\u969?\u8197?\u215?\u8197?{\i x}{\super 2}.\sa180\par} {\pard \ql \f0 \sa180 \li0 \fi0 These shouldn\u8217't be math:\par} {\pard \ql \f0 \sa0 \li360 \fi-360 \bullet \tx360\tab To get the famous equation, write {\f1 $e = mc^2$}.\par} {\pard \ql \f0 \sa0 \li360 \fi-360 \bullet \tx360\tab $22,000 is a {\i lot} of money. So is $34,000. (It worked if \u8220"lot\u8221" is emphasized.)\par} -- cgit v1.2.3