summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohn MacFarlane <jgm@berkeley.edu>2017-06-25 18:31:59 +0200
committerJohn MacFarlane <jgm@berkeley.edu>2017-06-25 18:31:59 +0200
commit643cbdf1044623475cb6ade9c35de85148d0dff6 (patch)
treec7c151e976b9e86c1041e964a79b650bfd5d8fe2
parenta85d8335767b8acad7de36a16be1c6ae4bca9aff (diff)
Writers.Shared: improve type of Lang and bcp47 parser.
Use a real parsec parser for BCP47, include variants.
-rw-r--r--src/Text/Pandoc/Writers/ODT.hs6
-rw-r--r--src/Text/Pandoc/Writers/OpenDocument.hs18
-rw-r--r--src/Text/Pandoc/Writers/Shared.hs96
3 files changed, 79 insertions, 41 deletions
diff --git a/src/Text/Pandoc/Writers/ODT.hs b/src/Text/Pandoc/Writers/ODT.hs
index 8573f5719..54873efb2 100644
--- a/src/Text/Pandoc/Writers/ODT.hs
+++ b/src/Text/Pandoc/Writers/ODT.hs
@@ -171,11 +171,11 @@ updateStyleWithLang (Just lang) arch = do
| e <- zEntries arch] }
addLang :: Lang -> Element -> Element
-addLang (Lang lang country) = everywhere' (mkT updateLangAttr)
+addLang lang = everywhere' (mkT updateLangAttr)
where updateLangAttr (Attr n@(QName "language" _ (Just "fo")) _)
- = Attr n lang
+ = Attr n (langLanguage lang)
updateLangAttr (Attr n@(QName "country" _ (Just "fo")) _)
- = Attr n country
+ = Attr n (langRegion lang)
updateLangAttr x = x
-- | transform both Image and Math elements
diff --git a/src/Text/Pandoc/Writers/OpenDocument.hs b/src/Text/Pandoc/Writers/OpenDocument.hs
index 57f3c1194..763cea5ad 100644
--- a/src/Text/Pandoc/Writers/OpenDocument.hs
+++ b/src/Text/Pandoc/Writers/OpenDocument.hs
@@ -614,7 +614,7 @@ data TextStyle = Italic
| Sup
| SmallC
| Pre
- | Language String String
+ | Language Lang
deriving ( Eq,Ord )
textStyleAttr :: TextStyle -> [(String,String)]
@@ -632,9 +632,9 @@ textStyleAttr s
| Pre <- s = [("style:font-name" ,"Courier New")
,("style:font-name-asian" ,"Courier New")
,("style:font-name-complex" ,"Courier New")]
- | Language lang country <- s
- = [("fo:language" ,lang)
- ,("fo:country" ,country)]
+ | Language lang <- s
+ = [("fo:language" ,langLanguage lang)
+ ,("fo:country" ,langRegion lang)]
| otherwise = []
withLangFromAttr :: PandocMonad m => Attr -> OD m a -> OD m a
@@ -642,8 +642,8 @@ withLangFromAttr (_,_,kvs) action =
case lookup "lang" kvs of
Nothing -> action
Just l -> do
- mblang <- parseBCP47 l
- case mblang of
- Just (Lang lang country) -> withTextStyle
- (Language lang country) action
- _ -> action
+ case parseBCP47 l of
+ Right lang -> withTextStyle (Language lang) action
+ Left _ -> do
+ report $ InvalidLang l
+ action
diff --git a/src/Text/Pandoc/Writers/Shared.hs b/src/Text/Pandoc/Writers/Shared.hs
index efb553ac2..b56f2d468 100644
--- a/src/Text/Pandoc/Writers/Shared.hs
+++ b/src/Text/Pandoc/Writers/Shared.hs
@@ -46,11 +46,12 @@ module Text.Pandoc.Writers.Shared (
, gridTable
)
where
-import Control.Monad (liftM, zipWithM, mplus)
+import Control.Monad (liftM, zipWithM, guard)
import Data.Aeson (FromJSON (..), Result (..), ToJSON (..), Value (Object),
encode, fromJSON)
+import Data.Char (isAscii, isLetter, isUpper, isLower)
import qualified Data.HashMap.Strict as H
-import Data.List (groupBy, intersperse, transpose)
+import Data.List (groupBy, intersperse, transpose, intercalate)
import qualified Data.Map as M
import Data.Maybe (isJust)
import qualified Data.Text as T
@@ -60,45 +61,82 @@ import Text.Pandoc.Class (PandocMonad, report)
import Text.Pandoc.Logging
import Text.Pandoc.Options
import Text.Pandoc.Pretty
-import Text.Pandoc.Shared (splitBy)
import Text.Pandoc.UTF8 (toStringLazy)
import Text.Pandoc.XML (escapeStringForXML)
+import qualified Text.Parsec as P
-- | Represents BCP 47 language/country code.
-data Lang = Lang String String
+data Lang = Lang{ langLanguage :: String
+ , langScript :: String
+ , langRegion :: String
+ , langVariants :: [String] }
+ deriving (Eq, Ord, Show)
-- | Render a Lang as BCP 47.
renderLang :: Lang -> String
-renderLang (Lang la co) = la ++ if null co
- then ""
- else '-':co
+renderLang lang = intercalate "-" (langLanguage lang : filter (not . null)
+ ([langScript lang, langRegion lang] ++ langVariants lang))
-- | Get the contents of the `lang` metadata field or variable.
getLang :: PandocMonad m => WriterOptions -> Meta -> m (Maybe Lang)
-getLang opts meta = maybe (return Nothing) parseBCP47 $
- case lookup "lang" (writerVariables opts) of
- Just s -> Just s
- _ -> Nothing
- `mplus`
- case lookupMeta "lang" meta of
- Just (MetaInlines [Str s]) -> Just s
- Just (MetaString s) -> Just s
- _ -> Nothing
+getLang opts meta = case
+ (case lookup "lang" (writerVariables opts) of
+ Just s -> Just s
+ _ ->
+ case lookupMeta "lang" meta of
+ Just (MetaInlines [Str s]) -> Just s
+ Just (MetaString s) -> Just s
+ _ -> Nothing) of
+ Nothing -> return Nothing
+ Just s -> case parseBCP47 s of
+ Left _ -> do
+ report $ InvalidLang s
+ return Nothing
+ Right l -> return (Just l)
--- | Parse a BCP 47 string as a Lang, issuing a warning if there
--- are issues.
-parseBCP47 :: PandocMonad m => String -> m (Maybe Lang)
+-- | Parse a BCP 47 string as a Lang.
+parseBCP47 :: String -> Either String Lang
parseBCP47 lang =
- case splitBy (== '-') lang of
- [la,co]
- | length la == 2 && length co == 2
- -> return $ Just $ Lang la co
- [la]
- | length la == 2
- -> return $ Just $ Lang la ""
- _ -> do
- report $ InvalidLang lang
- return Nothing
+ case P.parse bcp47 "lang" lang of
+ Right r -> Right r
+ Left e -> Left $ show e
+ where bcp47 = do
+ language <- pLanguage
+ script <- P.option "" pScript
+ region <- P.option "" pRegion
+ variants <- P.many pVariant
+ () <$ P.char '-' P.<|> P.eof
+ return $ Lang{ langLanguage = language
+ , langScript = script
+ , langRegion = region
+ , langVariants = variants }
+ asciiLetter = P.satisfy (\c -> isAscii c && isLetter c)
+ pLanguage = do
+ cs <- P.many1 asciiLetter
+ let lcs = length cs
+ guard $ lcs == 2 || lcs == 3
+ return cs
+ pScript = P.try $ do
+ P.char '-'
+ x <- P.satisfy (\c -> isAscii c && isLetter c && isUpper c)
+ xs <- P.count 3
+ (P.satisfy (\c -> isAscii c && isLetter c && isLower c))
+ return (x:xs)
+ pRegion = P.try $ do
+ P.char '-'
+ cs <- P.many1 asciiLetter
+ let lcs = length cs
+ guard $ lcs == 2 || lcs == 3
+ return cs
+ pVariant = P.try $ do
+ P.char '-'
+ ds <- P.option "" (P.count 1 P.digit)
+ cs <- P.many1 asciiLetter
+ let var = ds ++ cs
+ guard $ if null ds
+ then length var >= 5 && length var <= 8
+ else length var == 4
+ return var
-- | Create JSON value for template from a 'Meta' and an association list
-- of variables, specified at the command line or in the writer.