summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorMatthew Pickering <matthewtpickering@gmail.com>2014-08-09 20:46:59 +0100
committerJesse Rosenthal <jrosenthal@jhu.edu>2014-08-09 22:37:12 -0400
commit504465c6a39a2fe8c5ecf744afa328f21ef250df (patch)
tree5e418f4259100e35b8ae1bcc011fc8a51eff6db7 /lib
parent2deaa7096f186c3a87a2cbf4f3ca8a042328246e (diff)
lib: Added symbol.txt and file to generate codepoint to unicode mapping
Diffstat (limited to 'lib')
-rw-r--r--lib/fonts/Makefile6
-rw-r--r--lib/fonts/parseUnicodeMapping.hs40
-rw-r--r--lib/fonts/symbol.txt256
3 files changed, 302 insertions, 0 deletions
diff --git a/lib/fonts/Makefile b/lib/fonts/Makefile
new file mode 100644
index 000000000..5693ee054
--- /dev/null
+++ b/lib/fonts/Makefile
@@ -0,0 +1,6 @@
+symbol.hs: symbol.txt
+ runghc parseUnicodeMapping.hs symbol.txt
+
+.PHONY: clean
+clean:
+ -rm symbol.hs
diff --git a/lib/fonts/parseUnicodeMapping.hs b/lib/fonts/parseUnicodeMapping.hs
new file mode 100644
index 000000000..4f7ff692b
--- /dev/null
+++ b/lib/fonts/parseUnicodeMapping.hs
@@ -0,0 +1,40 @@
+import System.FilePath
+import Text.Parsec
+import Data.Char
+import System.Environment
+import Control.Applicative hiding (many)
+import Data.List
+
+main :: IO ()
+main = (head <$> getArgs) >>= parseUnicodeMapping
+
+
+parseUnicodeMapping :: FilePath -> IO ()
+parseUnicodeMapping fname = do
+ fin <- readFile fname
+ let mapname = dropExtension . takeFileName $ fname
+ let res = runParse fin
+ let header = "-- Generated from " ++ fname ++ "\n" ++
+ mapname ++ " :: [(Char, Char)]\n" ++ mapname ++" =\n [ "
+ let footer = "]"
+ writeFile (replaceExtension fname ".hs")
+ (header ++ (concat $ intersperse "\n , " (map show res)) ++ footer)
+
+type Unicode = Char
+
+runParse :: String -> [(Char, Unicode)]
+runParse s= either (error . show) id (parse parseMap "" s)
+
+anyline = manyTill anyChar newline
+
+getHexChar :: Parsec String () Char
+getHexChar = do
+ [(c,_)] <- readLitChar . ("\\x" ++) <$> many1 hexDigit
+ return c
+
+parseMap :: Parsec String () [(Char, Unicode)]
+parseMap = do
+ skipMany (char '#' >> anyline)
+ many (flip (,) <$> getHexChar <* tab <*> getHexChar <* anyline)
+
+
diff --git a/lib/fonts/symbol.txt b/lib/fonts/symbol.txt
new file mode 100644
index 000000000..b98baf6cf
--- /dev/null
+++ b/lib/fonts/symbol.txt
@@ -0,0 +1,256 @@
+#
+# Name: Adobe Symbol Encoding to Unicode
+# Unicode version: 2.0
+# Table version: 1.0
+# Date: 2011 July 12
+#
+# Copyright (c) 1991-2011 Unicode, Inc. All Rights reserved.
+#
+# This file is provided as-is by Unicode, Inc. (The Unicode Consortium). No
+# claims are made as to fitness for any particular purpose. No warranties of
+# any kind are expressed or implied. The recipient agrees to determine
+# applicability of information provided. If this file has been provided on
+# magnetic media by Unicode, Inc., the sole remedy for any claim will be
+# exchange of defective media within 90 days of receipt.
+#
+# Unicode, Inc. hereby grants the right to freely use the information
+# supplied in this file in the creation of products supporting the
+# Unicode Standard, and to make copies of this file in any form for
+# internal or external distribution as long as this notice remains
+# attached.
+#
+# Format: 4 tab-delimited fields:
+#
+# (1) The Unicode value (in hexadecimal)
+# (2) The Symbol Encoding code point (in hexadecimal)
+# (3) # Unicode name
+# (4) # PostScript character name
+#
+# General Notes:
+#
+# The Unicode values in this table were produced as the result of applying
+# the algorithm described in the section "Populating a Unicode space" in the
+# document "Unicode and Glyph Names," at
+# http://partners.adobe.com/asn/developer/typeforum/unicodegn.html
+# to the characters in Symbol. Note that some characters, such as "space",
+# are mapped to 2 Unicode values. 29 characters have assignments in the
+# Corporate Use Subarea; these are indicated by "(CUS)" in field 4. Refer to
+# the above document for more details.
+#
+# 2011 July 12: The above link is no longer valid. For comparable,
+# more current information, see the document, "Glyph", at:
+# <http://www.adobe.com/devnet/opentype/archives/glyph.html>
+#
+# Revision History:
+#
+# [v1.0, 2011 July 12]
+# Updated terms of use to current wording.
+# Updated contact information and document link.
+# No changes to the mapping data.
+#
+# [v0.2, 30 March 1999]
+# Different algorithm to produce Unicode values (see notes above) results in
+# some character codes being mapped to 2 Unicode values; use of Corporate
+# Use subarea values; addition of the euro character; changed assignments of
+# some characters such as the COPYRIGHT SIGNs and RADICAL EXTENDER. Updated
+# Unicode names to Unicode 2.0 names.
+#
+# [v0.1, 5 May 1995] First release.
+#
+# Use the Unicode reporting form <http://www.unicode.org/reporting.html>
+# for any questions or comments or to report errors in the data.
+#
+0020 20 # SPACE # space
+00A0 20 # NO-BREAK SPACE # space
+0021 21 # EXCLAMATION MARK # exclam
+2200 22 # FOR ALL # universal
+0023 23 # NUMBER SIGN # numbersign
+2203 24 # THERE EXISTS # existential
+0025 25 # PERCENT SIGN # percent
+0026 26 # AMPERSAND # ampersand
+220B 27 # CONTAINS AS MEMBER # suchthat
+0028 28 # LEFT PARENTHESIS # parenleft
+0029 29 # RIGHT PARENTHESIS # parenright
+2217 2A # ASTERISK OPERATOR # asteriskmath
+002B 2B # PLUS SIGN # plus
+002C 2C # COMMA # comma
+2212 2D # MINUS SIGN # minus
+002E 2E # FULL STOP # period
+002F 2F # SOLIDUS # slash
+0030 30 # DIGIT ZERO # zero
+0031 31 # DIGIT ONE # one
+0032 32 # DIGIT TWO # two
+0033 33 # DIGIT THREE # three
+0034 34 # DIGIT FOUR # four
+0035 35 # DIGIT FIVE # five
+0036 36 # DIGIT SIX # six
+0037 37 # DIGIT SEVEN # seven
+0038 38 # DIGIT EIGHT # eight
+0039 39 # DIGIT NINE # nine
+003A 3A # COLON # colon
+003B 3B # SEMICOLON # semicolon
+003C 3C # LESS-THAN SIGN # less
+003D 3D # EQUALS SIGN # equal
+003E 3E # GREATER-THAN SIGN # greater
+003F 3F # QUESTION MARK # question
+2245 40 # APPROXIMATELY EQUAL TO # congruent
+0391 41 # GREEK CAPITAL LETTER ALPHA # Alpha
+0392 42 # GREEK CAPITAL LETTER BETA # Beta
+03A7 43 # GREEK CAPITAL LETTER CHI # Chi
+0394 44 # GREEK CAPITAL LETTER DELTA # Delta
+2206 44 # INCREMENT # Delta
+0395 45 # GREEK CAPITAL LETTER EPSILON # Epsilon
+03A6 46 # GREEK CAPITAL LETTER PHI # Phi
+0393 47 # GREEK CAPITAL LETTER GAMMA # Gamma
+0397 48 # GREEK CAPITAL LETTER ETA # Eta
+0399 49 # GREEK CAPITAL LETTER IOTA # Iota
+03D1 4A # GREEK THETA SYMBOL # theta1
+039A 4B # GREEK CAPITAL LETTER KAPPA # Kappa
+039B 4C # GREEK CAPITAL LETTER LAMDA # Lambda
+039C 4D # GREEK CAPITAL LETTER MU # Mu
+039D 4E # GREEK CAPITAL LETTER NU # Nu
+039F 4F # GREEK CAPITAL LETTER OMICRON # Omicron
+03A0 50 # GREEK CAPITAL LETTER PI # Pi
+0398 51 # GREEK CAPITAL LETTER THETA # Theta
+03A1 52 # GREEK CAPITAL LETTER RHO # Rho
+03A3 53 # GREEK CAPITAL LETTER SIGMA # Sigma
+03A4 54 # GREEK CAPITAL LETTER TAU # Tau
+03A5 55 # GREEK CAPITAL LETTER UPSILON # Upsilon
+03C2 56 # GREEK SMALL LETTER FINAL SIGMA # sigma1
+03A9 57 # GREEK CAPITAL LETTER OMEGA # Omega
+2126 57 # OHM SIGN # Omega
+039E 58 # GREEK CAPITAL LETTER XI # Xi
+03A8 59 # GREEK CAPITAL LETTER PSI # Psi
+0396 5A # GREEK CAPITAL LETTER ZETA # Zeta
+005B 5B # LEFT SQUARE BRACKET # bracketleft
+2234 5C # THEREFORE # therefore
+005D 5D # RIGHT SQUARE BRACKET # bracketright
+22A5 5E # UP TACK # perpendicular
+005F 5F # LOW LINE # underscore
+F8E5 60 # RADICAL EXTENDER # radicalex (CUS)
+03B1 61 # GREEK SMALL LETTER ALPHA # alpha
+03B2 62 # GREEK SMALL LETTER BETA # beta
+03C7 63 # GREEK SMALL LETTER CHI # chi
+03B4 64 # GREEK SMALL LETTER DELTA # delta
+03B5 65 # GREEK SMALL LETTER EPSILON # epsilon
+03C6 66 # GREEK SMALL LETTER PHI # phi
+03B3 67 # GREEK SMALL LETTER GAMMA # gamma
+03B7 68 # GREEK SMALL LETTER ETA # eta
+03B9 69 # GREEK SMALL LETTER IOTA # iota
+03D5 6A # GREEK PHI SYMBOL # phi1
+03BA 6B # GREEK SMALL LETTER KAPPA # kappa
+03BB 6C # GREEK SMALL LETTER LAMDA # lambda
+00B5 6D # MICRO SIGN # mu
+03BC 6D # GREEK SMALL LETTER MU # mu
+03BD 6E # GREEK SMALL LETTER NU # nu
+03BF 6F # GREEK SMALL LETTER OMICRON # omicron
+03C0 70 # GREEK SMALL LETTER PI # pi
+03B8 71 # GREEK SMALL LETTER THETA # theta
+03C1 72 # GREEK SMALL LETTER RHO # rho
+03C3 73 # GREEK SMALL LETTER SIGMA # sigma
+03C4 74 # GREEK SMALL LETTER TAU # tau
+03C5 75 # GREEK SMALL LETTER UPSILON # upsilon
+03D6 76 # GREEK PI SYMBOL # omega1
+03C9 77 # GREEK SMALL LETTER OMEGA # omega
+03BE 78 # GREEK SMALL LETTER XI # xi
+03C8 79 # GREEK SMALL LETTER PSI # psi
+03B6 7A # GREEK SMALL LETTER ZETA # zeta
+007B 7B # LEFT CURLY BRACKET # braceleft
+007C 7C # VERTICAL LINE # bar
+007D 7D # RIGHT CURLY BRACKET # braceright
+223C 7E # TILDE OPERATOR # similar
+20AC A0 # EURO SIGN # Euro
+03D2 A1 # GREEK UPSILON WITH HOOK SYMBOL # Upsilon1
+2032 A2 # PRIME # minute
+2264 A3 # LESS-THAN OR EQUAL TO # lessequal
+2044 A4 # FRACTION SLASH # fraction
+2215 A4 # DIVISION SLASH # fraction
+221E A5 # INFINITY # infinity
+0192 A6 # LATIN SMALL LETTER F WITH HOOK # florin
+2663 A7 # BLACK CLUB SUIT # club
+2666 A8 # BLACK DIAMOND SUIT # diamond
+2665 A9 # BLACK HEART SUIT # heart
+2660 AA # BLACK SPADE SUIT # spade
+2194 AB # LEFT RIGHT ARROW # arrowboth
+2190 AC # LEFTWARDS ARROW # arrowleft
+2191 AD # UPWARDS ARROW # arrowup
+2192 AE # RIGHTWARDS ARROW # arrowright
+2193 AF # DOWNWARDS ARROW # arrowdown
+00B0 B0 # DEGREE SIGN # degree
+00B1 B1 # PLUS-MINUS SIGN # plusminus
+2033 B2 # DOUBLE PRIME # second
+2265 B3 # GREATER-THAN OR EQUAL TO # greaterequal
+00D7 B4 # MULTIPLICATION SIGN # multiply
+221D B5 # PROPORTIONAL TO # proportional
+2202 B6 # PARTIAL DIFFERENTIAL # partialdiff
+2022 B7 # BULLET # bullet
+00F7 B8 # DIVISION SIGN # divide
+2260 B9 # NOT EQUAL TO # notequal
+2261 BA # IDENTICAL TO # equivalence
+2248 BB # ALMOST EQUAL TO # approxequal
+2026 BC # HORIZONTAL ELLIPSIS # ellipsis
+F8E6 BD # VERTICAL ARROW EXTENDER # arrowvertex (CUS)
+F8E7 BE # HORIZONTAL ARROW EXTENDER # arrowhorizex (CUS)
+21B5 BF # DOWNWARDS ARROW WITH CORNER LEFTWARDS # carriagereturn
+2135 C0 # ALEF SYMBOL # aleph
+2111 C1 # BLACK-LETTER CAPITAL I # Ifraktur
+211C C2 # BLACK-LETTER CAPITAL R # Rfraktur
+2118 C3 # SCRIPT CAPITAL P # weierstrass
+2297 C4 # CIRCLED TIMES # circlemultiply
+2295 C5 # CIRCLED PLUS # circleplus
+2205 C6 # EMPTY SET # emptyset
+2229 C7 # INTERSECTION # intersection
+222A C8 # UNION # union
+2283 C9 # SUPERSET OF # propersuperset
+2287 CA # SUPERSET OF OR EQUAL TO # reflexsuperset
+2284 CB # NOT A SUBSET OF # notsubset
+2282 CC # SUBSET OF # propersubset
+2286 CD # SUBSET OF OR EQUAL TO # reflexsubset
+2208 CE # ELEMENT OF # element
+2209 CF # NOT AN ELEMENT OF # notelement
+2220 D0 # ANGLE # angle
+2207 D1 # NABLA # gradient
+F6DA D2 # REGISTERED SIGN SERIF # registerserif (CUS)
+F6D9 D3 # COPYRIGHT SIGN SERIF # copyrightserif (CUS)
+F6DB D4 # TRADE MARK SIGN SERIF # trademarkserif (CUS)
+220F D5 # N-ARY PRODUCT # product
+221A D6 # SQUARE ROOT # radical
+22C5 D7 # DOT OPERATOR # dotmath
+00AC D8 # NOT SIGN # logicalnot
+2227 D9 # LOGICAL AND # logicaland
+2228 DA # LOGICAL OR # logicalor
+21D4 DB # LEFT RIGHT DOUBLE ARROW # arrowdblboth
+21D0 DC # LEFTWARDS DOUBLE ARROW # arrowdblleft
+21D1 DD # UPWARDS DOUBLE ARROW # arrowdblup
+21D2 DE # RIGHTWARDS DOUBLE ARROW # arrowdblright
+21D3 DF # DOWNWARDS DOUBLE ARROW # arrowdbldown
+25CA E0 # LOZENGE # lozenge
+2329 E1 # LEFT-POINTING ANGLE BRACKET # angleleft
+F8E8 E2 # REGISTERED SIGN SANS SERIF # registersans (CUS)
+F8E9 E3 # COPYRIGHT SIGN SANS SERIF # copyrightsans (CUS)
+F8EA E4 # TRADE MARK SIGN SANS SERIF # trademarksans (CUS)
+2211 E5 # N-ARY SUMMATION # summation
+F8EB E6 # LEFT PAREN TOP # parenlefttp (CUS)
+F8EC E7 # LEFT PAREN EXTENDER # parenleftex (CUS)
+F8ED E8 # LEFT PAREN BOTTOM # parenleftbt (CUS)
+F8EE E9 # LEFT SQUARE BRACKET TOP # bracketlefttp (CUS)
+F8EF EA # LEFT SQUARE BRACKET EXTENDER # bracketleftex (CUS)
+F8F0 EB # LEFT SQUARE BRACKET BOTTOM # bracketleftbt (CUS)
+F8F1 EC # LEFT CURLY BRACKET TOP # bracelefttp (CUS)
+F8F2 ED # LEFT CURLY BRACKET MID # braceleftmid (CUS)
+F8F3 EE # LEFT CURLY BRACKET BOTTOM # braceleftbt (CUS)
+F8F4 EF # CURLY BRACKET EXTENDER # braceex (CUS)
+232A F1 # RIGHT-POINTING ANGLE BRACKET # angleright
+222B F2 # INTEGRAL # integral
+2320 F3 # TOP HALF INTEGRAL # integraltp
+F8F5 F4 # INTEGRAL EXTENDER # integralex (CUS)
+2321 F5 # BOTTOM HALF INTEGRAL # integralbt
+F8F6 F6 # RIGHT PAREN TOP # parenrighttp (CUS)
+F8F7 F7 # RIGHT PAREN EXTENDER # parenrightex (CUS)
+F8F8 F8 # RIGHT PAREN BOTTOM # parenrightbt (CUS)
+F8F9 F9 # RIGHT SQUARE BRACKET TOP # bracketrighttp (CUS)
+F8FA FA # RIGHT SQUARE BRACKET EXTENDER # bracketrightex (CUS)
+F8FB FB # RIGHT SQUARE BRACKET BOTTOM # bracketrightbt (CUS)
+F8FC FC # RIGHT CURLY BRACKET TOP # bracerighttp (CUS)
+F8FD FD # RIGHT CURLY BRACKET MID # bracerightmid (CUS)
+F8FE FE # RIGHT CURLY BRACKET BOTTOM # bracerightbt (CUS)