src/silfont/scripts/psfcheckclassorders.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142

#!/usr/bin/env python3
'''verify classes defined in xml have correct ordering where needed

Looks for comment lines in the classes.xml file that match the string:
  *NEXT n CLASSES MUST MATCH*
where n is the number of upcoming class definitions that must result in the
same glyph alignment when glyph names are sorted by TTF order (as described
in the glyph_data.csv file).
'''
__url__ = 'https://github.com/silnrsi/pysilfont'
__copyright__ = 'Copyright (c) 2019 SIL International  (https://www.sil.org)'
__license__ = 'Released under the MIT License (https://opensource.org/licenses/MIT)'
__author__ = 'Bob Hallissy'

import re
import types
from xml.etree import ElementTree as ET
from silfont.core import execute

argspec = [
    ('classes', {'help': 'class definition in XML format', 'nargs': '?', 'default': 'classes.xml'}, {'type': 'infile'}),
    ('glyphdata', {'help': 'Glyph info csv file', 'nargs': '?', 'default': 'glyph_data.csv'}, {'type': 'incsv'}),
    ('--gname', {'help': 'Column header for glyph name', 'default': 'glyph_name'}, {}),
    ('--sort', {'help': 'Column header(s) for sort order', 'default': 'sort_final'}, {}),
]

# Dictionary of glyphName : sortValue
sorts = dict()

# Keep track of glyphs mentioned in classes but not in glyph_data.csv
missingGlyphs = set()

def doit(args):
    logger = args.logger

    # Read input csv to get glyph sort order
    incsv = args.glyphdata
    fl = incsv.firstline
    if fl is None: logger.log("Empty input file", "S")
    if args.gname in fl:
        glyphnpos = fl.index(args.gname)
    else:
        logger.log("No" + args.gname + "field in csv headers", "S")
    if args.sort in fl:
        sortpos = fl.index(args.sort)
    else:
        logger.log('No "' + args.sort + '" heading in csv headers"', "S")
    next(incsv.reader, None)  # Skip first line with containing headers
    for line in incsv:
        glyphn = line[glyphnpos]
        if len(glyphn) == 0:
            continue	# No need to include cases where name is blank
        sorts[glyphn] = float(line[sortpos])

    # RegEx we are looking for in comments
    matchCountRE = re.compile("\*NEXT ([1-9]\d*) CLASSES MUST MATCH\*")

    # parse classes.xml but include comments
    class MyTreeBuilder(ET.TreeBuilder):
        def comment(self, data):
            res = matchCountRE.search(data)
            if res:
                # record the count of classes that must match
                self.start(ET.Comment, {})
                self.data(res.group(1))
                self.end(ET.Comment)
    doc = ET.parse(args.classes, parser=ET.XMLParser(target=MyTreeBuilder())).getroot()

    # process results looking for both class elements and specially formatted comments
    matchCount = 0
    refClassList = None
    refClassName = None

    for child in doc:
        if isinstance(child.tag, types.FunctionType):
            # Special type used for comments
            if matchCount > 0:
                logger.log("Unexpected match request '{}': matching {} is not yet complete".format(child.text, refClassName), "E")
                ref = None
            matchCount = int(child.text)
            # print "Match count = {}".format(matchCount)

        elif child.tag == 'class':
            l = orderClass(child, logger)  # Do this so we record classes whether we match them or not.
            if matchCount > 0:
                matchCount -= 1
                className = child.attrib['name']
                if refClassName is None:
                    refClassList = l
                    refLen = len(refClassList)
                    refClassName = className
                else:
                    # compare ref list and l
                    if len(l) != refLen:
                        logger.log("Class {} (length {}) and {} (length {}) have unequal length".format(refClassName, refLen, className, len(l)), "E")
                    else:
                        errCount = 0
                        for i in range(refLen):
                            if l[i][0] != refClassList[i][0]:
                                logger.log ("Class {} and {} inconsistent order glyphs {} and {}".format(refClassName, className, refClassList[i][2], l[i][2]), "E")
                                errCount += 1
                                if errCount > 5:
                                    logger.log ("Abandoning compare between Classes {} and {}".format(refClassName, className), "E")
                                    break
                if matchCount == 0:
                    refClassName = None

    # List glyphs mentioned in classes.xml but not present in glyph_data:
    if len(missingGlyphs):
        logger.log('Glyphs mentioned in classes.xml but not present in glyph_data: ' + ', '.join(sorted(missingGlyphs)), 'W')


classes = {}  # Keep record of all classes we've seen so we can flatten references

def orderClass(classElement, logger):
    # returns a list of tuples, each containing (indexWithinClass, sortOrder, glyphName)
    # list is sorted by sortOrder
    glyphList = classElement.text.split()
    res = []
    for i in range(len(glyphList)):
        token = glyphList[i]
        if token.startswith('@'):
            # Nested class
            cname = token[1:]
            if cname in classes:
                res.extend(classes[cname])
            else:
                logger.log("Invalid fea: class {} referenced before being defined".format(cname),"S")
        else:
            # simple glyph name -- make sure it is in glyph_data:
            if token in sorts:
                res.append((i, sorts[token], token))
            else:
                missingGlyphs.add(token)

    classes[classElement.attrib['name']] = res
    return sorted(res, key=lambda x: x[1])


def cmd() : execute(None,doit,argspec)
if __name__ == "__main__": cmd()