diff options
author | Roozbeh Pournader <roozbeh@google.com> | 2014-07-31 20:09:19 -0700 |
---|---|---|
committer | James Godfrey-Kittle <jamesgk@google.com> | 2015-04-16 12:16:04 -0700 |
commit | 985c16569167e1cc7d6d1dcf15df77135e67913f (patch) | |
tree | f12335c6e41e83360acd1eca05a3f12691f0ed44 | |
parent | 8f7caa4ca489caca851ec0d8426d7857b6d7a051 (diff) |
Add scripts for testing minimal character coverage.
-rw-r--r-- | Makefile | 5 | ||||
-rw-r--r-- | res/char_requirements.tsv | 47 | ||||
-rwxr-xr-x | scripts/coverage_test.py | 139 |
3 files changed, 190 insertions, 1 deletions
@@ -54,7 +54,10 @@ glass: out/android/Roboto-Thin.ttf mkdir -p out/glass python scripts/touchup_for_glass.py $< out/glass/Roboto-Thin.ttf -test: test-android +test: test-android test-coverage test-android: python scripts/run_android_tests.py + +test-coverage: + python scripts/coverage_test.py diff --git a/res/char_requirements.tsv b/res/char_requirements.tsv new file mode 100644 index 0000000..67a665a --- /dev/null +++ b/res/char_requirements.tsv @@ -0,0 +1,47 @@ +# The only information parsed are the second, sixth, and seventh columns, +# which include the code point range, if the block should be completely +# supported, and if not, which characters should be included or excluded. + Code point range Total # of graphic characters in Unicode 7.0 Roboto (v1.200310) coverage # of characters to add to Roboto for Unicode 7.0 support should be fully covered to support LGC? What characters to cover if full coverage is not necessary +Basic Latin 0000..007F 95 95 0 1 +Latin-1 Supplement 0080..00FF 96 96 0 1 +Latin Extended-A 0100..017F 128 128 0 1 +Latin Extended-B 0180..024F 208 17 191 1 +IPA Extensions 0250..02AF 96 1 95 1 +Spacing Modifier Letters 02B0..02FF 80 11 69 1 +Combining Diacritical Marks 0300..036F 112 6 106 1 +Greek and Coptic 0370..03FF 135 75 46 0 Everything except 03E2..03EF +Cyrillic 0400..04FF 256 255 1 1 +Cyrillic Supplement 0500..052F 48 20 28 1 +Combining Diacritical Marks Extended 1AB0..1AFF 15 0 15 1 +Phonetic Extensions 1D00..1D7F 128 128 1 +Phonetic Extensions Supplement 1D80..1DBF 64 64 1 +Combining Diacritical Marks Supplement 1DC0..1DFF 58 58 1 +Latin Extended Additional 1E00..1EFF 256 100 156 1 +Greek Extended 1F00..1FFF 233 12 221 1 +General Punctuation 2000..206F 111 35 54 0 Everything except 2028..202E, 2060..2064, and 2066..206F +Superscripts and Subscripts 2070..209F 42 2 40 1 +Currency Symbols 20A0..20CF 29 5 24 1 +Combining Diacritical Marks for Symbols 20D0..20FF 33 5 0 20DB, 20DC, 20E3, 20E8, 20F0 +Letterlike Symbols 2100..214F 80 6 17 0 2100, 2101, 2103, 2105, 2106, 2109, 2113, 2116, 2117, 211E..2123, 2125, 2126, 212A, 212B, 2132, 213B, 214D, 214F +Number Forms 2150..218F 58 4 54 1 +Arrows 2190..21FF 112 2 0 2191, 2193 +Mathematical Operators 2200..22FF 256 12 0 0 2202, 2206, 220F, 2211, 2212, 221A, 221E, 222B, 2248, 2260, 2264, 2265 +Miscellaneous Technical 2300..23FF 44 0 0 +Control Pictures 2400..243F 39 1 0 2423 +Box Drawing 2500..257F 128 0 0 +Block Elements 2580..259F 32 0 0 +Geometric Shapes 25A0..25FF 96 1 0 0 +Miscellaneous Symbols 2600..26FF 256 0 0 +Miscellaneous Mathematical Symbols-A 27C0..27EF 48 10 0 27E6..27EF +Miscellaneous Symbols and Arrows 2B37..2B5E 8 0 2B4E, 2B4F, 2B5A..2B5F +Latin Extended-C 2C60..2C7F 32 32 1 +Cyrillic Extended-A 2DE0..2DFF 32 32 1 +Supplemental Punctuation 2E00..2E7F 69 69 1 +Cyrillic Extended-B A640..A69F 95 95 1 +Modifier Tone Letters A700..A71F 32 32 1 +Latin Extended-D A720..A7FF 152 152 1 +Latin Extended-E AB30..ABBF 50 50 1 +Alphabetic Presentation Forms FB00..FB4F 58 4 3 0 FB00..FB06 +Combining Half Marks FE20..FE2F 14 14 1 +Specials FFF0..FFFF 5 2 0 0 +Enclosed Alphanumeric Supplement 1F100..1F1FF 2 0 1F16A, 1F16B diff --git a/scripts/coverage_test.py b/scripts/coverage_test.py new file mode 100755 index 0000000..bb25041 --- /dev/null +++ b/scripts/coverage_test.py @@ -0,0 +1,139 @@ +#!/usr/bin/env python +"""Routines for checking character coverage of Roboto fonts. + +This scripts takes the name of the directory where the fonts are and checks +that they cover all characters required in the Roboto extension contract. + +The data is in res/char_requirements.tsv. +""" + +__author__ = ( + "roozbeh@google.com (Roozbeh Pournader) and " + "cibu@google.com (Cibu Johny)") + +import sys + +import glob + +from fontTools import ttLib +from nototools import coverage +from nototools import font_data +from nototools import unicode_data + + +def load_fonts(): + """Load all fonts built for Android.""" + all_fonts = (glob.glob('out/RobotoTTF/*.ttf') + + glob.glob('out/RobotoCondensedTTF/*.ttf')) + all_fonts = [ttLib.TTFont(font) for font in all_fonts] + return all_fonts + + +def _character_name(code): + """Returns the printable name of a character.""" + return unicode_data.name(unichr(code), '<Unassigned>') + + +def _print_char(code, additional_info=None): + """Print a Unicode character as code and name and perhaps extra info.""" + sys.stdout.write('U+%04X %s' % (code, _character_name(code))) + + if additional_info is not None: + sys.stdout.write('\t' + additional_info) + + sys.stdout.write('\n') + + +def _range_string_to_set(range_str): + """Convert a range encoding in a string to a set.""" + if '..' in range_str: + range_start, range_end = range_str.split('..') + range_start = int(range_start, 16) + range_end = int(range_end, 16) + return set(range(range_start, range_end+1)) + else: + return {int(range_str, 16)} + + +def _multiple_range_string_to_set(ranges_str): + """Convert a string of multiple ranges to a set.""" + char_set = set() + for range_str in ranges_str.split(', '): + if range_str.startswith('and '): + range_str = range_str[4:] # drop the 'and ' + char_set.update(_range_string_to_set(range_str)) + return char_set + + +def _defined_characters_in_range(range_str): + """Given a range string, returns defined Unicode characters in the range.""" + characters = set() + for code in _range_string_to_set(range_str): + if unicode_data.is_defined(code): + characters.add(code) + return characters + + +_EXCEPTION_STARTER = 'Everything except ' + + +def _find_required_chars(block_range, full_coverage_required, exceptions): + """Finds required coverage based on a row of the spreadsheet.""" + chars_defined_in_block = _defined_characters_in_range(block_range) + if full_coverage_required: + return chars_defined_in_block + else: + if not exceptions: + return set() + if exceptions.startswith(_EXCEPTION_STARTER): + exceptions = exceptions[len(_EXCEPTION_STARTER):] + chars_to_exclude = _multiple_range_string_to_set(exceptions) + return chars_defined_in_block - chars_to_exclude + else: + chars_to_limit_to = _multiple_range_string_to_set(exceptions) + return chars_defined_in_block & chars_to_limit_to + + +def main(): + """Checkes the coverage of all Roboto fonts.""" + with open('res/char_requirements.tsv') as char_reqs_file: + char_reqs_data = char_reqs_file.read() + + # The format of the data to be parsed is like the following: + # General Punctuation\t2000..206F\t111\t35\t54\t0\tEverything except 2028..202E, 2060..2064, and 2066..206F + # Currency Symbols\t20A0..20CF\t29\t5\t24\t1\t + required_set = set() + for line in char_reqs_data.split('\n'): + if line.startswith('#'): # Skip comment lines + continue + line = line.split('\t') + if not line[0]: + continue # Skip the first line and empty lines + block_range = line[1] + full_coverage_required = (line[5] == '1') + exceptions = line[6] + required_set.update( + _find_required_chars(block_range, + full_coverage_required, + exceptions)) + + # Skip ASCII and C1 controls + required_set -= set(range(0, 0x20) + range(0x7F, 0xA0)) + + missing_char_found = False + for font in load_fonts(): + font_coverage = coverage.character_set(font) + missing_chars = required_set - font_coverage + if missing_chars: + missing_char_found = True + font_name = font_data.font_name(font) + print 'Characters missing from %s:' % font_name + for char in sorted(missing_chars): + _print_char(char) + print + + if missing_char_found: + sys.exit(1) + +if __name__ == '__main__': + main() |