summaryrefslogtreecommitdiffstats
path: root/src/silfont/scripts/psfcheckglyphinventory.py
blob: 4a805d460cf0356f3e491bab6f4356876faffc74 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
#!/usr/bin/env python3
__doc__ = '''Warn for differences in glyph inventory and encoding between UFO and input file (e.g., glyph_data.csv). 
Input file can be: 
    - simple text file with one glyph name per line
    - csv file with headers, using headers "glyph_name" and, if present, "USV"'''
__url__ = 'https://github.com/silnrsi/pysilfont'
__copyright__ = 'Copyright (c) 2020-2023 SIL International (https://www.sil.org)'
__license__ = 'Released under the MIT License (https://opensource.org/licenses/MIT)'
__author__ = 'Bob Hallissy'

from silfont.core import execute

argspec = [
    ('ifont', {'help': 'Input UFO'}, {'type': 'infont'}),
    ('-i', '--input', {'help': 'Input text file, default glyph_data.csv in current directory', 'default': 'glyph_data.csv'}, {'type': 'incsv'}),
    ('--indent', {'help': 'size of indent (default 10)', 'type': int, 'default': 10}, {}),
    ('-l', '--log', {'help': 'Log file'}, {'type': 'outfile', 'def': '_checkinventory.log'})]

def doit(args):
    font = args.ifont
    incsv = args.input
    logger = args.logger
    indent = ' '*args.indent

    if not (args.quiet or 'scrlevel' in args.paramsobj.sets['command line']):
        logger.raisescrlevel('W')  # Raise level to W if not already W or higher

    def csvWarning(msg, exception=None):
        m = f'glyph_data line {incsv.line_num}: {msg}'
        if exception is not None:
            m += '; ' + exception.message
        logger.log(m, 'W')

    # Get glyph names and encoding from input file
    glyphFromCSVuid = {}
    uidsFromCSVglyph = {}

    # Identify file format (plain text or csv) from first line
    # If csv file, it must have headers for "glyph_name" and "USV"
    fl = incsv.firstline
    if fl is None: logger.log('Empty input file', 'S')
    numfields = len(fl)
    incsv.numfields = numfields
    usvCol = None  # Use this as a flag later to determine whether to check USV inventory
    if numfields > 1:  # More than 1 column, so must have headers
        # Required columns:
        try:
            nameCol = fl.index('glyph_name');
        except ValueError as e:
            logger.log('Missing csv input field: ' + e.message, 'S')
        except Exception as e:
            logger.log('Error reading csv input field: ' + e.message, 'S')
        # Optional columns:
        usvCol = fl.index('USV') if 'USV' in fl else None

        next(incsv.reader, None)  # Skip first line with headers in

        glyphList = set()
        for line in incsv:
            gname = line[nameCol]
            if len(gname) == 0 or line[0].strip().startswith('#'):
                continue    # No need to include cases where name is blank or comment
            if gname in glyphList:
                csvWarning(f'glyph name {gname} previously seen; ignored')
                continue
            glyphList.add(gname)

            if usvCol:
                # Process USV field, which can be:
                #   empty string -- unencoded glyph
                #   single USV -- encoded glyph
                #   USVs connected by '_' -- ligature (in glyph_data for test generation, not glyph encoding)
                #   space-separated list of the above, where presence of multiple USVs indicates multiply-encoded glyph
                for usv in line[usvCol].split():
                    if '_' in usv:
                        # ignore ligatures -- these are for test generation, not encoding
                        continue
                    try:
                        uid = int(usv, 16)
                    except Exception as e:
                        csvWarning("invalid USV '%s' (%s); ignored: " % (usv, e.message))

                    if uid in glyphFromCSVuid:
                        csvWarning('USV %04X previously seen; ignored' % uid)
                    else:
                        # Remember this glyph encoding
                        glyphFromCSVuid[uid] = gname
                        uidsFromCSVglyph.setdefault(gname, set()).add(uid)
    elif numfields == 1:   # Simple text file.
        glyphList = set(line[0] for line in incsv)
    else:
        logger.log('Invalid csv file', 'S')

    # Get the list of glyphs in the UFO
    ufoList = set(font.deflayer.keys())

    notInUFO = glyphList - ufoList
    notInGlyphData = ufoList - glyphList

    if len(notInUFO):
        logger.log('Glyphs present in glyph_data but missing from UFO:\n' + '\n'.join(indent + g for g in sorted(notInUFO)), 'W')

    if len(notInGlyphData):
        logger.log('Glyphs present in UFO but missing from glyph_data:\n' + '\n'.join(indent + g for g in sorted(notInGlyphData)), 'W')

    if len(notInUFO) == 0 and len(notInGlyphData) == 0:
        logger.log('No glyph inventory differences found', 'P')

    if usvCol:
        # We can check encoding of glyphs in common
        inBoth = glyphList & ufoList   # Glyphs we want to examine

        csvEncodings = set(f'{gname}|{uid:04X}' for gname in filter(lambda x: x in uidsFromCSVglyph, inBoth) for uid in uidsFromCSVglyph[gname] )
        ufoEncodings = set(f'{gname}|{int(u.hex, 16):04X}' for gname in inBoth for u in font.deflayer[gname]['unicode'])

        notInUFO = csvEncodings - ufoEncodings
        notInGlyphData = ufoEncodings - csvEncodings

        if len(notInUFO):
            logger.log('Encodings present in glyph_data but missing from UFO:\n' + '\n'.join(indent + g for g in sorted(notInUFO)), 'W')

        if len(notInGlyphData):
            logger.log('Encodings present in UFO but missing from glyph_data:\n' + '\n'.join(indent + g for g in sorted(notInGlyphData)), 'W')

        if len(notInUFO) == 0 and len(notInGlyphData) == 0:
            logger.log('No glyph encoding differences found', 'P')

    else:
        logger.log('Glyph encodings not compared', 'P')


def cmd(): execute('UFO', doit, argspec)
if __name__ == '__main__': cmd()