summaryrefslogtreecommitdiff
path: root/src/Text/Pandoc/Pretty.hs
diff options
context:
space:
mode:
authorJohn MacFarlane <fiddlosopher@gmail.com>2012-01-27 13:30:15 -0800
committerJohn MacFarlane <fiddlosopher@gmail.com>2012-01-27 13:30:47 -0800
commit5b3c0a10819a317cda5d6acaf2144a017bf55d75 (patch)
tree5651218ed35a6d2695adceed934bf9abac80bad5 /src/Text/Pandoc/Pretty.hs
parent663cfc2fbd0ff83f74dcde53e3cc98130fb645e0 (diff)
Made charWidth in Text.Pandoc.Pretty complete.
Used the unicode spec EastAsianWidth.txt. All characters marked W or F get width 2. Closes #377.
Diffstat (limited to 'src/Text/Pandoc/Pretty.hs')
-rw-r--r--src/Text/Pandoc/Pretty.hs64
1 files changed, 35 insertions, 29 deletions
diff --git a/src/Text/Pandoc/Pretty.hs b/src/Text/Pandoc/Pretty.hs
index c3979348e..b06f2f384 100644
--- a/src/Text/Pandoc/Pretty.hs
+++ b/src/Text/Pandoc/Pretty.hs
@@ -468,36 +468,42 @@ doubleQuotes = inside (char '"') (char '"')
charWidth :: Char -> Int
charWidth c =
case c of
- _ | c >= '\x300' && c <= '\x36e' -> 0 -- combining
- | c == '\x3000' -> 2 -- full
- | c >= '\xFF01' && c <= '\xFF60' -> 2
- | c >= '\xFFE0' && c <= '\xFFE6' -> 2
- | c >= '\x1100' && c <= '\x1159' -> 2 -- wide
- | c >= '\x115F' && c <= '\x115F' -> 2
+ _ | c < '\x0300' -> 1
+ | c >= '\x0300' && c <= '\x036F' -> 0 -- combining
+ | c >= '\x0370' && c <= '\x10FC' -> 1
+ | c >= '\x1100' && c <= '\x115F' -> 2
+ | c >= '\x1160' && c <= '\x11A2' -> 1
+ | c >= '\x11A3' && c <= '\x11A7' -> 2
+ | c >= '\x11A8' && c <= '\x11F9' -> 1
+ | c >= '\x11FA' && c <= '\x11FF' -> 2
+ | c >= '\x1200' && c <= '\x2328' -> 1
| c >= '\x2329' && c <= '\x232A' -> 2
- | c >= '\x2E80' && c <= '\x2E99' -> 2
- | c >= '\x2E9B' && c <= '\x2EF3' -> 2
- | c >= '\x2F00' && c <= '\x2FD5' -> 2
- | c >= '\x2FF0' && c <= '\x2FFB' -> 2
- | c >= '\x3001' && c <= '\x303E' -> 2
- | c >= '\x3041' && c <= '\x3096' -> 2
- | c >= '\x3099' && c <= '\x30FF' -> 2
- | c >= '\x3105' && c <= '\x312C' -> 2
- | c >= '\x3131' && c <= '\x318E' -> 2
- | c >= '\x3190' && c <= '\x31B7' -> 2
- | c >= '\x31F0' && c <= '\x321E' -> 2
- | c >= '\x3220' && c <= '\x3243' -> 2
- | c >= '\x3250' && c <= '\x327D' -> 2
- | c >= '\x327F' && c <= '\x32FE' -> 2
- | c >= '\x3300' && c <= '\x33FF' -> 2
- | c >= '\xA000' && c <= '\xA48C' -> 2
- | c >= '\xA490' && c <= '\xA4C6' -> 2
- | c >= '\xF900' && c <= '\xFA2D' -> 2
- | c >= '\xFA30' && c <= '\xFA6A' -> 2
- | c >= '\xFE30' && c <= '\xFE52' -> 2
- | c >= '\xFE54' && c <= '\xFE66' -> 2
- | c >= '\xFE68' && c <= '\xFE6B' -> 2
- | c >= '\x2F800' && c <= '\x2FA1D' -> 2
+ | c >= '\x232B' && c <= '\x2E31' -> 1
+ | c >= '\x2E80' && c <= '\x303E' -> 2
+ | c == '\x303F' -> 1
+ | c >= '\x3041' && c <= '\x3247' -> 2
+ | c >= '\x3248' && c <= '\x324F' -> 1 -- ambiguous
+ | c >= '\x3250' && c <= '\x4DBF' -> 2
+ | c >= '\x4DC0' && c <= '\x4DFF' -> 1
+ | c >= '\x4E00' && c <= '\xA4C6' -> 2
+ | c >= '\xA4D0' && c <= '\xA95F' -> 1
+ | c >= '\xA960' && c <= '\xA97C' -> 2
+ | c >= '\xA980' && c <= '\xABF9' -> 1
+ | c >= '\xAC00' && c <= '\xD7FB' -> 2
+ | c >= '\xD800' && c <= '\xDFFF' -> 1
+ | c >= '\xE000' && c <= '\xF8FF' -> 1 -- ambiguous
+ | c >= '\xF900' && c <= '\xFAFF' -> 2
+ | c >= '\xFB00' && c <= '\xFDFD' -> 1
+ | c >= '\xFE00' && c <= '\xFE0F' -> 1 -- ambiguous
+ | c >= '\xFE10' && c <= '\xFE19' -> 2
+ | c >= '\xFE20' && c <= '\xFE26' -> 1
+ | c >= '\xFE30' && c <= '\xFE6B' -> 2
+ | c >= '\xFE70' && c <= '\x16A38' -> 1
+ | c >= '\x1B000' && c <= '\x1B001' -> 2
+ | c >= '\x1D000' && c <= '\x1F1FF' -> 1
+ | c >= '\x1F200' && c <= '\x1F251' -> 2
+ | c >= '\x1F300' && c <= '\x1F773' -> 1
+ | c >= '\x20000' && c <= '\x3FFFD' -> 2
| otherwise -> 1
-- | Get real length of string, taking into account combining and double-wide