summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRoozbeh Pournader <roozbeh@google.com>2014-07-31 20:09:19 -0700
committerJames Godfrey-Kittle <jamesgk@google.com>2015-04-16 12:16:04 -0700
commit985c16569167e1cc7d6d1dcf15df77135e67913f (patch)
treef12335c6e41e83360acd1eca05a3f12691f0ed44
parent8f7caa4ca489caca851ec0d8426d7857b6d7a051 (diff)
Add scripts for testing minimal character coverage.
-rw-r--r--Makefile5
-rw-r--r--res/char_requirements.tsv47
-rwxr-xr-xscripts/coverage_test.py139
3 files changed, 190 insertions, 1 deletions
diff --git a/Makefile b/Makefile
index dabcd2b..8e3ceda 100644
--- a/Makefile
+++ b/Makefile
@@ -54,7 +54,10 @@ glass: out/android/Roboto-Thin.ttf
mkdir -p out/glass
python scripts/touchup_for_glass.py $< out/glass/Roboto-Thin.ttf
-test: test-android
+test: test-android test-coverage
test-android:
python scripts/run_android_tests.py
+
+test-coverage:
+ python scripts/coverage_test.py
diff --git a/res/char_requirements.tsv b/res/char_requirements.tsv
new file mode 100644
index 0000000..67a665a
--- /dev/null
+++ b/res/char_requirements.tsv
@@ -0,0 +1,47 @@
+# The only information parsed are the second, sixth, and seventh columns,
+# which include the code point range, if the block should be completely
+# supported, and if not, which characters should be included or excluded.
+ Code point range Total # of graphic characters in Unicode 7.0 Roboto (v1.200310) coverage # of characters to add to Roboto for Unicode 7.0 support should be fully covered to support LGC? What characters to cover if full coverage is not necessary
+Basic Latin 0000..007F 95 95 0 1
+Latin-1 Supplement 0080..00FF 96 96 0 1
+Latin Extended-A 0100..017F 128 128 0 1
+Latin Extended-B 0180..024F 208 17 191 1
+IPA Extensions 0250..02AF 96 1 95 1
+Spacing Modifier Letters 02B0..02FF 80 11 69 1
+Combining Diacritical Marks 0300..036F 112 6 106 1
+Greek and Coptic 0370..03FF 135 75 46 0 Everything except 03E2..03EF
+Cyrillic 0400..04FF 256 255 1 1
+Cyrillic Supplement 0500..052F 48 20 28 1
+Combining Diacritical Marks Extended 1AB0..1AFF 15 0 15 1
+Phonetic Extensions 1D00..1D7F 128 128 1
+Phonetic Extensions Supplement 1D80..1DBF 64 64 1
+Combining Diacritical Marks Supplement 1DC0..1DFF 58 58 1
+Latin Extended Additional 1E00..1EFF 256 100 156 1
+Greek Extended 1F00..1FFF 233 12 221 1
+General Punctuation 2000..206F 111 35 54 0 Everything except 2028..202E, 2060..2064, and 2066..206F
+Superscripts and Subscripts 2070..209F 42 2 40 1
+Currency Symbols 20A0..20CF 29 5 24 1
+Combining Diacritical Marks for Symbols 20D0..20FF 33 5 0 20DB, 20DC, 20E3, 20E8, 20F0
+Letterlike Symbols 2100..214F 80 6 17 0 2100, 2101, 2103, 2105, 2106, 2109, 2113, 2116, 2117, 211E..2123, 2125, 2126, 212A, 212B, 2132, 213B, 214D, 214F
+Number Forms 2150..218F 58 4 54 1
+Arrows 2190..21FF 112 2 0 2191, 2193
+Mathematical Operators 2200..22FF 256 12 0 0 2202, 2206, 220F, 2211, 2212, 221A, 221E, 222B, 2248, 2260, 2264, 2265
+Miscellaneous Technical 2300..23FF 44 0 0
+Control Pictures 2400..243F 39 1 0 2423
+Box Drawing 2500..257F 128 0 0
+Block Elements 2580..259F 32 0 0
+Geometric Shapes 25A0..25FF 96 1 0 0
+Miscellaneous Symbols 2600..26FF 256 0 0
+Miscellaneous Mathematical Symbols-A 27C0..27EF 48 10 0 27E6..27EF
+Miscellaneous Symbols and Arrows 2B37..2B5E 8 0 2B4E, 2B4F, 2B5A..2B5F
+Latin Extended-C 2C60..2C7F 32 32 1
+Cyrillic Extended-A 2DE0..2DFF 32 32 1
+Supplemental Punctuation 2E00..2E7F 69 69 1
+Cyrillic Extended-B A640..A69F 95 95 1
+Modifier Tone Letters A700..A71F 32 32 1
+Latin Extended-D A720..A7FF 152 152 1
+Latin Extended-E AB30..ABBF 50 50 1
+Alphabetic Presentation Forms FB00..FB4F 58 4 3 0 FB00..FB06
+Combining Half Marks FE20..FE2F 14 14 1
+Specials FFF0..FFFF 5 2 0 0
+Enclosed Alphanumeric Supplement 1F100..1F1FF 2 0 1F16A, 1F16B
diff --git a/scripts/coverage_test.py b/scripts/coverage_test.py
new file mode 100755
index 0000000..bb25041
--- /dev/null
+++ b/scripts/coverage_test.py
@@ -0,0 +1,139 @@
+#!/usr/bin/env python
+"""Routines for checking character coverage of Roboto fonts.
+
+This scripts takes the name of the directory where the fonts are and checks
+that they cover all characters required in the Roboto extension contract.
+
+The data is in res/char_requirements.tsv.
+"""
+
+__author__ = (
+ "roozbeh@google.com (Roozbeh Pournader) and "
+ "cibu@google.com (Cibu Johny)")
+
+import sys
+
+import glob
+
+from fontTools import ttLib
+from nototools import coverage
+from nototools import font_data
+from nototools import unicode_data
+
+
+def load_fonts():
+ """Load all fonts built for Android."""
+ all_fonts = (glob.glob('out/RobotoTTF/*.ttf')
+ + glob.glob('out/RobotoCondensedTTF/*.ttf'))
+ all_fonts = [ttLib.TTFont(font) for font in all_fonts]
+ return all_fonts
+
+
+def _character_name(code):
+ """Returns the printable name of a character."""
+ return unicode_data.name(unichr(code), '<Unassigned>')
+
+
+def _print_char(code, additional_info=None):
+ """Print a Unicode character as code and name and perhaps extra info."""
+ sys.stdout.write('U+%04X %s' % (code, _character_name(code)))
+
+ if additional_info is not None:
+ sys.stdout.write('\t' + additional_info)
+
+ sys.stdout.write('\n')
+
+
+def _range_string_to_set(range_str):
+ """Convert a range encoding in a string to a set."""
+ if '..' in range_str:
+ range_start, range_end = range_str.split('..')
+ range_start = int(range_start, 16)
+ range_end = int(range_end, 16)
+ return set(range(range_start, range_end+1))
+ else:
+ return {int(range_str, 16)}
+
+
+def _multiple_range_string_to_set(ranges_str):
+ """Convert a string of multiple ranges to a set."""
+ char_set = set()
+ for range_str in ranges_str.split(', '):
+ if range_str.startswith('and '):
+ range_str = range_str[4:] # drop the 'and '
+ char_set.update(_range_string_to_set(range_str))
+ return char_set
+
+
+def _defined_characters_in_range(range_str):
+ """Given a range string, returns defined Unicode characters in the range."""
+ characters = set()
+ for code in _range_string_to_set(range_str):
+ if unicode_data.is_defined(code):
+ characters.add(code)
+ return characters
+
+
+_EXCEPTION_STARTER = 'Everything except '
+
+
+def _find_required_chars(block_range, full_coverage_required, exceptions):
+ """Finds required coverage based on a row of the spreadsheet."""
+ chars_defined_in_block = _defined_characters_in_range(block_range)
+ if full_coverage_required:
+ return chars_defined_in_block
+ else:
+ if not exceptions:
+ return set()
+ if exceptions.startswith(_EXCEPTION_STARTER):
+ exceptions = exceptions[len(_EXCEPTION_STARTER):]
+ chars_to_exclude = _multiple_range_string_to_set(exceptions)
+ return chars_defined_in_block - chars_to_exclude
+ else:
+ chars_to_limit_to = _multiple_range_string_to_set(exceptions)
+ return chars_defined_in_block & chars_to_limit_to
+
+
+def main():
+ """Checkes the coverage of all Roboto fonts."""
+ with open('res/char_requirements.tsv') as char_reqs_file:
+ char_reqs_data = char_reqs_file.read()
+
+ # The format of the data to be parsed is like the following:
+ # General Punctuation\t2000..206F\t111\t35\t54\t0\tEverything except 2028..202E, 2060..2064, and 2066..206F
+ # Currency Symbols\t20A0..20CF\t29\t5\t24\t1\t
+ required_set = set()
+ for line in char_reqs_data.split('\n'):
+ if line.startswith('#'): # Skip comment lines
+ continue
+ line = line.split('\t')
+ if not line[0]:
+ continue # Skip the first line and empty lines
+ block_range = line[1]
+ full_coverage_required = (line[5] == '1')
+ exceptions = line[6]
+ required_set.update(
+ _find_required_chars(block_range,
+ full_coverage_required,
+ exceptions))
+
+ # Skip ASCII and C1 controls
+ required_set -= set(range(0, 0x20) + range(0x7F, 0xA0))
+
+ missing_char_found = False
+ for font in load_fonts():
+ font_coverage = coverage.character_set(font)
+ missing_chars = required_set - font_coverage
+ if missing_chars:
+ missing_char_found = True
+ font_name = font_data.font_name(font)
+ print 'Characters missing from %s:' % font_name
+ for char in sorted(missing_chars):
+ _print_char(char)
+ print
+
+ if missing_char_found:
+ sys.exit(1)
+
+if __name__ == '__main__':
+ main()