summaryrefslogtreecommitdiffstats
path: root/src/silfont/scripts/psfcheckclassorders.py
diff options
context:
space:
mode:
Diffstat (limited to 'src/silfont/scripts/psfcheckclassorders.py')
-rw-r--r--src/silfont/scripts/psfcheckclassorders.py142
1 files changed, 142 insertions, 0 deletions
diff --git a/src/silfont/scripts/psfcheckclassorders.py b/src/silfont/scripts/psfcheckclassorders.py
new file mode 100644
index 0000000..1dcd517
--- /dev/null
+++ b/src/silfont/scripts/psfcheckclassorders.py
@@ -0,0 +1,142 @@
+#!/usr/bin/env python3
+'''verify classes defined in xml have correct ordering where needed
+
+Looks for comment lines in the classes.xml file that match the string:
+ *NEXT n CLASSES MUST MATCH*
+where n is the number of upcoming class definitions that must result in the
+same glyph alignment when glyph names are sorted by TTF order (as described
+in the glyph_data.csv file).
+'''
+__url__ = 'https://github.com/silnrsi/pysilfont'
+__copyright__ = 'Copyright (c) 2019 SIL International (https://www.sil.org)'
+__license__ = 'Released under the MIT License (https://opensource.org/licenses/MIT)'
+__author__ = 'Bob Hallissy'
+
+import re
+import types
+from xml.etree import ElementTree as ET
+from silfont.core import execute
+
+argspec = [
+ ('classes', {'help': 'class definition in XML format', 'nargs': '?', 'default': 'classes.xml'}, {'type': 'infile'}),
+ ('glyphdata', {'help': 'Glyph info csv file', 'nargs': '?', 'default': 'glyph_data.csv'}, {'type': 'incsv'}),
+ ('--gname', {'help': 'Column header for glyph name', 'default': 'glyph_name'}, {}),
+ ('--sort', {'help': 'Column header(s) for sort order', 'default': 'sort_final'}, {}),
+]
+
+# Dictionary of glyphName : sortValue
+sorts = dict()
+
+# Keep track of glyphs mentioned in classes but not in glyph_data.csv
+missingGlyphs = set()
+
+def doit(args):
+ logger = args.logger
+
+ # Read input csv to get glyph sort order
+ incsv = args.glyphdata
+ fl = incsv.firstline
+ if fl is None: logger.log("Empty input file", "S")
+ if args.gname in fl:
+ glyphnpos = fl.index(args.gname)
+ else:
+ logger.log("No" + args.gname + "field in csv headers", "S")
+ if args.sort in fl:
+ sortpos = fl.index(args.sort)
+ else:
+ logger.log('No "' + args.sort + '" heading in csv headers"', "S")
+ next(incsv.reader, None) # Skip first line with containing headers
+ for line in incsv:
+ glyphn = line[glyphnpos]
+ if len(glyphn) == 0:
+ continue # No need to include cases where name is blank
+ sorts[glyphn] = float(line[sortpos])
+
+ # RegEx we are looking for in comments
+ matchCountRE = re.compile("\*NEXT ([1-9]\d*) CLASSES MUST MATCH\*")
+
+ # parse classes.xml but include comments
+ class MyTreeBuilder(ET.TreeBuilder):
+ def comment(self, data):
+ res = matchCountRE.search(data)
+ if res:
+ # record the count of classes that must match
+ self.start(ET.Comment, {})
+ self.data(res.group(1))
+ self.end(ET.Comment)
+ doc = ET.parse(args.classes, parser=ET.XMLParser(target=MyTreeBuilder())).getroot()
+
+ # process results looking for both class elements and specially formatted comments
+ matchCount = 0
+ refClassList = None
+ refClassName = None
+
+ for child in doc:
+ if isinstance(child.tag, types.FunctionType):
+ # Special type used for comments
+ if matchCount > 0:
+ logger.log("Unexpected match request '{}': matching {} is not yet complete".format(child.text, refClassName), "E")
+ ref = None
+ matchCount = int(child.text)
+ # print "Match count = {}".format(matchCount)
+
+ elif child.tag == 'class':
+ l = orderClass(child, logger) # Do this so we record classes whether we match them or not.
+ if matchCount > 0:
+ matchCount -= 1
+ className = child.attrib['name']
+ if refClassName is None:
+ refClassList = l
+ refLen = len(refClassList)
+ refClassName = className
+ else:
+ # compare ref list and l
+ if len(l) != refLen:
+ logger.log("Class {} (length {}) and {} (length {}) have unequal length".format(refClassName, refLen, className, len(l)), "E")
+ else:
+ errCount = 0
+ for i in range(refLen):
+ if l[i][0] != refClassList[i][0]:
+ logger.log ("Class {} and {} inconsistent order glyphs {} and {}".format(refClassName, className, refClassList[i][2], l[i][2]), "E")
+ errCount += 1
+ if errCount > 5:
+ logger.log ("Abandoning compare between Classes {} and {}".format(refClassName, className), "E")
+ break
+ if matchCount == 0:
+ refClassName = None
+
+ # List glyphs mentioned in classes.xml but not present in glyph_data:
+ if len(missingGlyphs):
+ logger.log('Glyphs mentioned in classes.xml but not present in glyph_data: ' + ', '.join(sorted(missingGlyphs)), 'W')
+
+
+classes = {} # Keep record of all classes we've seen so we can flatten references
+
+def orderClass(classElement, logger):
+ # returns a list of tuples, each containing (indexWithinClass, sortOrder, glyphName)
+ # list is sorted by sortOrder
+ glyphList = classElement.text.split()
+ res = []
+ for i in range(len(glyphList)):
+ token = glyphList[i]
+ if token.startswith('@'):
+ # Nested class
+ cname = token[1:]
+ if cname in classes:
+ res.extend(classes[cname])
+ else:
+ logger.log("Invalid fea: class {} referenced before being defined".format(cname),"S")
+ else:
+ # simple glyph name -- make sure it is in glyph_data:
+ if token in sorts:
+ res.append((i, sorts[token], token))
+ else:
+ missingGlyphs.add(token)
+
+ classes[classElement.attrib['name']] = res
+ return sorted(res, key=lambda x: x[1])
+
+
+
+def cmd() : execute(None,doit,argspec)
+if __name__ == "__main__": cmd()