1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
|
#!/usr/bin/env python3
__doc__ = '''Warn for differences in glyph inventory and encoding between UFO and input file (e.g., glyph_data.csv).
Input file can be:
- simple text file with one glyph name per line
- csv file with headers, using headers "glyph_name" and, if present, "USV"'''
__url__ = 'https://github.com/silnrsi/pysilfont'
__copyright__ = 'Copyright (c) 2020-2023 SIL International (https://www.sil.org)'
__license__ = 'Released under the MIT License (https://opensource.org/licenses/MIT)'
__author__ = 'Bob Hallissy'
from silfont.core import execute
argspec = [
('ifont', {'help': 'Input UFO'}, {'type': 'infont'}),
('-i', '--input', {'help': 'Input text file, default glyph_data.csv in current directory', 'default': 'glyph_data.csv'}, {'type': 'incsv'}),
('--indent', {'help': 'size of indent (default 10)', 'type': int, 'default': 10}, {}),
('-l', '--log', {'help': 'Log file'}, {'type': 'outfile', 'def': '_checkinventory.log'})]
def doit(args):
font = args.ifont
incsv = args.input
logger = args.logger
indent = ' '*args.indent
if not (args.quiet or 'scrlevel' in args.paramsobj.sets['command line']):
logger.raisescrlevel('W') # Raise level to W if not already W or higher
def csvWarning(msg, exception=None):
m = f'glyph_data line {incsv.line_num}: {msg}'
if exception is not None:
m += '; ' + exception.message
logger.log(m, 'W')
# Get glyph names and encoding from input file
glyphFromCSVuid = {}
uidsFromCSVglyph = {}
# Identify file format (plain text or csv) from first line
# If csv file, it must have headers for "glyph_name" and "USV"
fl = incsv.firstline
if fl is None: logger.log('Empty input file', 'S')
numfields = len(fl)
incsv.numfields = numfields
usvCol = None # Use this as a flag later to determine whether to check USV inventory
if numfields > 1: # More than 1 column, so must have headers
# Required columns:
try:
nameCol = fl.index('glyph_name');
except ValueError as e:
logger.log('Missing csv input field: ' + e.message, 'S')
except Exception as e:
logger.log('Error reading csv input field: ' + e.message, 'S')
# Optional columns:
usvCol = fl.index('USV') if 'USV' in fl else None
next(incsv.reader, None) # Skip first line with headers in
glyphList = set()
for line in incsv:
gname = line[nameCol]
if len(gname) == 0 or line[0].strip().startswith('#'):
continue # No need to include cases where name is blank or comment
if gname in glyphList:
csvWarning(f'glyph name {gname} previously seen; ignored')
continue
glyphList.add(gname)
if usvCol:
# Process USV field, which can be:
# empty string -- unencoded glyph
# single USV -- encoded glyph
# USVs connected by '_' -- ligature (in glyph_data for test generation, not glyph encoding)
# space-separated list of the above, where presence of multiple USVs indicates multiply-encoded glyph
for usv in line[usvCol].split():
if '_' in usv:
# ignore ligatures -- these are for test generation, not encoding
continue
try:
uid = int(usv, 16)
except Exception as e:
csvWarning("invalid USV '%s' (%s); ignored: " % (usv, e.message))
if uid in glyphFromCSVuid:
csvWarning('USV %04X previously seen; ignored' % uid)
else:
# Remember this glyph encoding
glyphFromCSVuid[uid] = gname
uidsFromCSVglyph.setdefault(gname, set()).add(uid)
elif numfields == 1: # Simple text file.
glyphList = set(line[0] for line in incsv)
else:
logger.log('Invalid csv file', 'S')
# Get the list of glyphs in the UFO
ufoList = set(font.deflayer.keys())
notInUFO = glyphList - ufoList
notInGlyphData = ufoList - glyphList
if len(notInUFO):
logger.log('Glyphs present in glyph_data but missing from UFO:\n' + '\n'.join(indent + g for g in sorted(notInUFO)), 'W')
if len(notInGlyphData):
logger.log('Glyphs present in UFO but missing from glyph_data:\n' + '\n'.join(indent + g for g in sorted(notInGlyphData)), 'W')
if len(notInUFO) == 0 and len(notInGlyphData) == 0:
logger.log('No glyph inventory differences found', 'P')
if usvCol:
# We can check encoding of glyphs in common
inBoth = glyphList & ufoList # Glyphs we want to examine
csvEncodings = set(f'{gname}|{uid:04X}' for gname in filter(lambda x: x in uidsFromCSVglyph, inBoth) for uid in uidsFromCSVglyph[gname] )
ufoEncodings = set(f'{gname}|{int(u.hex, 16):04X}' for gname in inBoth for u in font.deflayer[gname]['unicode'])
notInUFO = csvEncodings - ufoEncodings
notInGlyphData = ufoEncodings - csvEncodings
if len(notInUFO):
logger.log('Encodings present in glyph_data but missing from UFO:\n' + '\n'.join(indent + g for g in sorted(notInUFO)), 'W')
if len(notInGlyphData):
logger.log('Encodings present in UFO but missing from glyph_data:\n' + '\n'.join(indent + g for g in sorted(notInGlyphData)), 'W')
if len(notInUFO) == 0 and len(notInGlyphData) == 0:
logger.log('No glyph encoding differences found', 'P')
else:
logger.log('Glyph encodings not compared', 'P')
def cmd(): execute('UFO', doit, argspec)
if __name__ == '__main__': cmd()
|