summaryrefslogtreecommitdiff
path: root/Text
diff options
context:
space:
mode:
authorfiddlosopher <fiddlosopher@788f1e2b-df1e-0410-8736-df70ead52e1b>2008-07-11 02:14:57 +0000
committerfiddlosopher <fiddlosopher@788f1e2b-df1e-0410-8736-df70ead52e1b>2008-07-11 02:14:57 +0000
commit824bb2d22e40e035703ccf6ec7fd6bcde51950ce (patch)
treeb47ff78e86ddcdea299932fc2e4be39a387aa435 /Text
parent8ed710bc9d771a25e73be2582b379485a3e240dc (diff)
In smart mode, use nonbreaking spaces after abbreviations in markdown parser.
Thus, for example, "Mr. Brown" comes out as "Mr.~Brown" in LaTeX, and does not produce a sentence-separating space. Resolves Issue #75. git-svn-id: https://pandoc.googlecode.com/svn/trunk@1298 788f1e2b-df1e-0410-8736-df70ead52e1b
Diffstat (limited to 'Text')
-rw-r--r--Text/Pandoc/Readers/Markdown.hs25
1 files changed, 23 insertions, 2 deletions
diff --git a/Text/Pandoc/Readers/Markdown.hs b/Text/Pandoc/Readers/Markdown.hs
index c9fbbe2d9..d0d99b607 100644
--- a/Text/Pandoc/Readers/Markdown.hs
+++ b/Text/Pandoc/Readers/Markdown.hs
@@ -33,7 +33,7 @@ module Text.Pandoc.Readers.Markdown (
import Data.List ( transpose, isPrefixOf, isSuffixOf, lookup, sortBy, findIndex )
import Data.Ord ( comparing )
-import Data.Char ( isAlphaNum )
+import Data.Char ( isAlphaNum, isAlpha, isLower, isDigit )
import Data.Maybe ( fromMaybe )
import Text.Pandoc.Definition
import Text.Pandoc.Shared
@@ -697,7 +697,8 @@ table = simpleTable <|> multilineTable <?> "table"
inline = choice inlineParsers <?> "inline"
-inlineParsers = [ str
+inlineParsers = [ abbrev
+ , str
, smartPunctuation
, whitespace
, endline
@@ -792,6 +793,26 @@ subscript = failIfStrict >> enclosed (char '~') (char '~')
(notFollowedBy' whitespace >> inline) >>= -- may not contain Space
return . Subscript
+abbrev = failUnlessSmart >>
+ (assumedAbbrev <|> knownAbbrev) >>= return . Str . (++ ".\160")
+
+-- an string of letters followed by a period that does not end a sentence
+-- is assumed to be an abbreviation. It is assumed that sentences don't
+-- start with lowercase letters or numerals.
+assumedAbbrev = try $ do
+ result <- many1 $ satisfy isAlpha
+ string ". "
+ lookAhead $ satisfy (\x -> isLower x || isDigit x)
+ return result
+
+-- these strings are treated as abbreviations even if they are followed
+-- by a capital letter (such as a name).
+knownAbbrev = try $ do
+ result <- oneOfStrings [ "Mr", "Mrs", "Ms", "Capt", "Dr", "Prof", "Gen",
+ "Gov", "e.g", "i.e", "Sgt", "St", "vol", "vs" ]
+ string ". "
+ return result
+
smartPunctuation = failUnlessSmart >>
choice [ quoted, apostrophe, dash, ellipses ]