diff options
Diffstat (limited to 'src/silfont/scripts/psfsubset.py')
-rw-r--r-- | src/silfont/scripts/psfsubset.py | 124 |
1 files changed, 124 insertions, 0 deletions
diff --git a/src/silfont/scripts/psfsubset.py b/src/silfont/scripts/psfsubset.py new file mode 100644 index 0000000..da9c4e6 --- /dev/null +++ b/src/silfont/scripts/psfsubset.py @@ -0,0 +1,124 @@ +#!/usr/bin/env python3 +__doc__ = '''Subset an existing UFO based on a csv or text list of glyph names or USVs to keep. +''' +__url__ = 'https://github.com/silnrsi/pysilfont' +__copyright__ = 'Copyright (c) 2018-2023 SIL International (https://www.sil.org)' +__license__ = 'Released under the MIT License (https://opensource.org/licenses/MIT)' +__author__ = 'Bob Hallissy' + +from silfont.core import execute +from xml.etree import ElementTree as ET +import re + +argspec = [ + ('ifont',{'help': 'Input font file'}, {'type': 'infont'}), + ('ofont',{'help': 'Output font file','nargs': '?' }, {'type': 'outfont'}), + ('-i','--input',{'help': 'Input csv file'}, {'type': 'incsv'}), + ('--header', {'help': 'Column header for glyph list', 'default': 'glyph_name'}, {}), + ('--filter', {'help': 'Column header for filter status', 'default': None}, {}), + ('-l','--log',{'help': 'Log file'}, {'type': 'outfile', 'def': '_subset.log'})] + +def doit(args) : + font = args.ifont + incsv = args.input + logger = args.logger + deflayer = font.deflayer + + # Create mappings to find glyph name from decimal usv: + dusv2gname = {int(ucode.hex, 16): gname for gname in deflayer for ucode in deflayer[gname]['unicode']} + + # check for headers in the csv + fl = incsv.firstline + if fl is None: logger.log("Empty input file", "S") + numfields = len(fl) + if numfields == 1 and args.header not in fl: + dataCol = 0 # Default for plain csv + elif numfields >= 1: # Must have headers + try: + dataCol = fl.index(args.header) + except ValueError as e: + logger.log(f'Missing csv header field: {e}', 'S') + except Exception as e: + logger.log(f'Error reading csv header field: {e}', 'S') + if args.filter: + try: + filterCol = fl.index(args.filter) + except ValueError as e: + logger.log(f'Missing csv filter field: {e}', 'S') + except Exception as e: + logger.log(f'Error reading csv filter field: {e}', 'S') + next(incsv.reader, None) # Skip first line with headers in + else: + logger.log("Invalid csv file", "S") + + # From the csv, assemble a list of glyphs to process: + toProcess = set() + usvRE = re.compile('[0-9a-f]{4,6}$',re.IGNORECASE) # matches 4-6 digit hex + for r in incsv: + if args.filter: + filterstatus = r[filterCol].strip() + if filterstatus != "Y": + continue + gname = r[dataCol].strip() + if usvRE.match(gname): + # data is USV, not glyph name + dusv = int(gname,16) + if dusv in dusv2gname: + toProcess.add(dusv2gname[dusv]) + continue + # The USV wasn't in the font... try it as a glyph name + if gname not in deflayer: + logger.log("Glyph '%s' not in font; line %d ignored" % (gname, incsv.line_num), 'W') + continue + toProcess.add(gname) + + # Generate a complete list of glyphs to keep: + toKeep = set() + while len(toProcess): + gname = toProcess.pop() # retrieves a random item from the set + if gname in toKeep: + continue # Already processed this one + toKeep.add(gname) + + # If it has any components we haven't already processed, add them to the toProcess list + for component in deflayer[gname].etree.findall('./outline/component[@base]'): + cname = component.get('base') + if cname not in toKeep: + toProcess.add(cname) + + # Generate a complete list of glyphs to delete: + toDelete = set(deflayer).difference(toKeep) + + # Remove any glyphs not in the toKeep set + for gname in toDelete: + logger.log("Deleting " + gname, "V") + deflayer.delGlyph(gname) + assert len(deflayer) == len(toKeep), "len(deflayer) != len(toKeep)" + logger.log("Retained %d glyphs, deleted %d glyphs." % (len(toKeep), len(toDelete)), "P") + + # Clean up and rebuild sort orders + libexists = True if "lib" in font.__dict__ else False + for orderName in ('public.glyphOrder', 'com.schriftgestaltung.glyphOrder'): + if libexists and orderName in font.lib: + glyphOrder = font.lib.getval(orderName) # This is an array + array = ET.Element("array") + for gname in glyphOrder: + if gname in toKeep: + ET.SubElement(array, "string").text = gname + font.lib.setelem(orderName, array) + + # Clean up and rebuild psnames + if libexists and 'public.postscriptNames' in font.lib: + psnames = font.lib.getval('public.postscriptNames') # This is a dict keyed by glyphnames + dict = ET.Element("dict") + for gname in psnames: + if gname in toKeep: + ET.SubElement(dict, "key").text = gname + ET.SubElement(dict, "string").text = psnames[gname] + font.lib.setelem("public.postscriptNames", dict) + + return font + +def cmd() : execute("UFO",doit,argspec) + +if __name__ == "__main__": cmd() |