summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohn MacFarlane <jgm@berkeley.edu>2014-04-11 07:37:57 -0700
committerJohn MacFarlane <jgm@berkeley.edu>2014-04-11 07:37:57 -0700
commit2304e9cb940e382afd5bce6722e9c52f482ce2f6 (patch)
tree04363183f1d9f3e91d65465c52ea4a9f9532bfff
parentca40acea5b022d6309a36000d54844a482c14555 (diff)
parent6f19be7d40f583ee4e10fa2b0f20bd4f1fa80c43 (diff)
Merge pull request #1231 from tarleb/org-fix-subexpr
Org reader: Fix parsing of sub-/superscript expressions
-rw-r--r--src/Text/Pandoc/Readers/Org.hs47
-rw-r--r--tests/Tests/Readers/Org.hs39
2 files changed, 67 insertions, 19 deletions
diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs
index 29611e8cc..ceac69367 100644
--- a/src/Text/Pandoc/Readers/Org.hs
+++ b/src/Text/Pandoc/Readers/Org.hs
@@ -622,17 +622,11 @@ displayMath = B.displayMath <$> choice [ rawMathBetween "\\[" "\\]"
, rawMathBetween "$$" "$$"
]
-subscript :: OrgParser Inlines
-subscript = B.subscript <$> try (char '_' *> maybeGroupedByBraces)
+subscript :: OrgParser Inlines
+subscript = B.subscript <$> try (char '_' *> subOrSuperExpr)
-superscript :: OrgParser Inlines
-superscript = B.superscript <$> try (char '^' *> maybeGroupedByBraces)
-
-maybeGroupedByBraces :: OrgParser Inlines
-maybeGroupedByBraces = try $
- choice [ try $ enclosedInlines (char '{') (char '}')
- , B.str . (:"") <$> anyChar
- ]
+superscript :: OrgParser Inlines
+superscript = B.superscript <$> try (char '^' *> subOrSuperExpr)
symbol :: OrgParser Inlines
symbol = B.str . (: "") <$> (oneOf specialChars >>= updatePositions)
@@ -805,3 +799,36 @@ notAfterForbiddenBorderChar = do
pos <- getPosition
lastFBCPos <- orgStateLastForbiddenCharPos <$> getState
return $ lastFBCPos /= Just pos
+
+-- | Read a sub- or superscript expression
+subOrSuperExpr :: OrgParser Inlines
+subOrSuperExpr = try $ do
+ choice [ balancedSexp '{' '}'
+ , balancedSexp '(' ')' >>= return . enclosing ('(', ')')
+ , simpleSubOrSuperString
+ ] >>= parseFromString (mconcat <$> many inline)
+
+-- | Read a balanced sexp
+balancedSexp :: Char
+ -> Char
+ -> OrgParser String
+balancedSexp l r = try $ do
+ char l
+ res <- concat <$> many ( many1 (noneOf ([l, r] ++ "\n\r"))
+ <|> try (string [l, r])
+ <|> enclosing (l, r) <$> balancedSexp l r
+ )
+ char r
+ return res
+
+simpleSubOrSuperString :: OrgParser String
+simpleSubOrSuperString = try $
+ choice [ string "*"
+ , mappend <$> option [] ((:[]) <$> oneOf "+-")
+ <*> many1 alphaNum
+ ]
+
+enclosing :: (a, a)
+ -> [a]
+ -> [a]
+enclosing (left, right) s = left : s ++ [right]
diff --git a/tests/Tests/Readers/Org.hs b/tests/Tests/Readers/Org.hs
index 9e9482e45..49130f0ab 100644
--- a/tests/Tests/Readers/Org.hs
+++ b/tests/Tests/Readers/Org.hs
@@ -78,15 +78,15 @@ tests =
"A * symbol" =?>
para (str "A" <> space <> str "*" <> space <> "symbol")
- , "Superscript single char" =:
- "2^n" =?>
- para (str "2" <> superscript "n")
+ , "Superscript simple expression" =:
+ "2^-λ" =?>
+ para (str "2" <> superscript "-λ")
, "Superscript multi char" =:
"2^{n-1}" =?>
para (str "2" <> superscript "n-1")
- , "Subscript single char" =:
+ , "Subscript simple expression" =:
"a_n" =?>
para (str "a" <> subscript "n")
@@ -105,11 +105,8 @@ tests =
])
, "No empty markup" =:
- -- FIXME: __ is erroneously parsed as subscript "_"
- -- "// ** __ ++ == ~~ $$" =?>
- -- para (spcSep [ "//", "**", "__", "++", "==", "~~", "$$" ])
- "// ** ++ == ~~ $$" =?>
- para (spcSep [ "//", "**", "++", "==", "~~", "$$" ])
+ "// ** __ ++ == ~~ $$" =?>
+ para (spcSep [ "//", "**", "__", "++", "==", "~~", "$$" ])
, "Adherence to Org's rules for markup borders" =:
"/t/& a/ / ./r/ (*l*) /e/! /b/." =?>
@@ -143,6 +140,30 @@ tests =
]))
, "emph/" ])
+ , "Sub- and superscript expressions" =:
+ unlines [ "a_(a(b)(c)d)"
+ , "e^(f(g)h)"
+ , "i_(jk)l)"
+ , "m^()n"
+ , "o_{p{q{}r}}"
+ , "s^{t{u}v}"
+ , "w_{xy}z}"
+ , "1^{}2"
+ , "3_{{}}"
+ , "4^(a(*b(c*)d))"
+ ] =?>
+ para (spcSep [ "a" <> subscript "(a(b)(c)d)"
+ , "e" <> superscript "(f(g)h)"
+ , "i" <> (subscript "(jk)") <> "l)"
+ , "m" <> (superscript "()") <> "n"
+ , "o" <> subscript "p{q{}r}"
+ , "s" <> superscript "t{u}v"
+ , "w" <> (subscript "xy") <> "z}"
+ , "1" <> (superscript "") <> "2"
+ , "3" <> subscript "{}"
+ , "4" <> superscript ("(a(" <> strong "b(c" <> ")d))")
+ ])
+
, "Image" =:
"[[./sunset.jpg]]" =?>
(para $ image "./sunset.jpg" "" "")