summaryrefslogtreecommitdiffstats
path: root/src/silfont/ftml.py
diff options
context:
space:
mode:
Diffstat (limited to 'src/silfont/ftml.py')
-rw-r--r--src/silfont/ftml.py433
1 files changed, 433 insertions, 0 deletions
diff --git a/src/silfont/ftml.py b/src/silfont/ftml.py
new file mode 100644
index 0000000..cd0a59f
--- /dev/null
+++ b/src/silfont/ftml.py
@@ -0,0 +1,433 @@
+#!/usr/bin/env python3
+'Classes and functions for use handling FTML objects in pysilfont scripts'
+__url__ = 'https://github.com/silnrsi/pysilfont'
+__copyright__ = 'Copyright (c) 2016 SIL International (https://www.sil.org)'
+__license__ = 'Released under the MIT License (https://opensource.org/licenses/MIT)'
+__author__ = 'David Raymond'
+
+from xml.etree import ElementTree as ET
+from fontTools import ttLib
+import re
+from xml.sax.saxutils import quoteattr
+import silfont.core
+import silfont.etutil as ETU
+
+# Regular expression for parsing font name
+fontspec = re.compile(r"""^ # beginning of string
+ (?P<rest>[A-Za-z ]+?) # Font Family Name
+ \s*(?P<bold>Bold)? # Bold
+ \s*(?P<italic>Italic)? # Italic
+ \s*(?P<regular>Regular)? # Regular
+ $""", re.VERBOSE) # end of string
+
+class Fxml(ETU.ETelement) :
+ def __init__(self, file = None, xmlstring = None, testgrouplabel = None, logger = None, params = None) :
+ self.logger = logger if logger is not None else silfont.core.loggerobj()
+ self.params = params if params is not None else silfont.core.parameters()
+ self.parseerrors=None
+ if not exactlyoneof(file, xmlstring, testgrouplabel) : self.logger.log("Must supply exactly one of file, xmlstring and testgrouplabel","X")
+
+ if testgrouplabel : # Create minimal valid ftml
+ xmlstring = '<ftml version="1.0"><head></head><testgroup label=' + quoteattr(testgrouplabel) +'></testgroup></ftml>'
+
+ if file and not hasattr(file, 'read') : self.logger.log("'file' is not a file object", "X") # ET.parse would also work on file name, but other code assumes file object
+
+ try :
+ if file :
+ self.element = ET.parse(file).getroot()
+ else :
+ self.element = ET.fromstring(xmlstring)
+ except Exception as e :
+ self.logger.log("Error parsing FTML input: " + str(e), "S")
+
+ super(Fxml,self).__init__(self.element)
+
+ self.version = getattrib(self.element,"version")
+ if self.version != "1.0" : self.logger.log("ftml items must have a version of 1.0", "S")
+
+ self.process_subelements((
+ ("head", "head" , Fhead, True, False),
+ ("testgroup", "testgroups", Ftestgroup, True, True )),
+ offspec = False)
+
+ self.stylesheet = {}
+ if file : # If reading from file, look to see if a stylesheet is present in xml processing instructions
+ file.seek(0) # Have to re-read file since ElementTree does not support processing instructions
+ for line in file :
+ if line[0:2] == "<?" :
+ line = line.strip()[:-2] # Strip white space and removing training ?>
+ parts = line.split(" ")
+ if parts[0] == "<?xml-stylesheet" :
+ for part in parts[1:] :
+ (name,value) = part.split("=")
+ self.stylesheet[name] = value[1:-1] # Strip quotes
+ break
+ else :
+ break
+
+ self.filename = file if file else None
+
+ if self.parseerrors:
+ self.logger.log("Errors parsing ftml element:","E")
+ for error in self.parseerrors : self.logger.log(" " + error,"E")
+ self.logger.log("Invalid FTML", "S")
+
+ def save(self, file) :
+ self.outxmlstr=""
+ element = self.create_element()
+ etw = ETU.ETWriter(element, inlineelem = ["em"])
+ self.outxmlstr = etw.serialize_xml()
+ file.write(self.outxmlstr)
+
+ def create_element(self) : # Create a new Elementtree element based on current object contents
+ element = ET.Element('ftml', version = str(self.version))
+ if self.stylesheet : # Create dummy .pi attribute for style sheet processing instruction
+ pi = "xml-stylesheet"
+ for attrib in sorted(self.stylesheet) : pi = pi + ' ' + attrib + '="' + self.stylesheet[attrib] + '"' ## Spec is not clear about what order attributes should be in
+ element.attrib['.pi'] = pi
+ element.append(self.head.create_element())
+ for testgroup in self.testgroups : element.append(testgroup.create_element())
+ return element
+
+class Fhead(ETU.ETelement) :
+ def __init__(self, parent, element) :
+ self.parent = parent
+ self.logger = parent.logger
+ super(Fhead,self).__init__(element)
+
+ self.process_subelements((
+ ("comment", "comment", None, False, False),
+ ("fontscale", "fontscale", None, False, False),
+ ("fontsrc", "fontsrc", Ffontsrc, False, True),
+ ("styles", "styles", ETU.ETelement, False, False ), # Initially just basic elements; Fstyles created below
+ ("title", "title", None, False, False),
+ ("widths", "widths", _Fwidth, False, False)),
+ offspec = True)
+
+ if self.fontscale is not None : self.fontscale = int(self.fontscale)
+ if self.styles is not None :
+ styles = {}
+ for styleelem in self.styles["style"] :
+ style = Fstyle(self, element = styleelem)
+ styles[style.name] = style
+ if style.parseerrors:
+ name = "" if style.name is None else style.name
+ self.parseerrors.append("Errors parsing style element: " + name)
+ for error in style.parseerrors : self.parseerrors.append(" " + error)
+ self.styles = styles
+ if self.widths is not None : self.widths = self.widths.widthsdict # Convert _Fwidths object into dict
+
+ self.elements = dict(self._contents) # Dictionary of all elements, particularly for handling non-standard elements
+
+ def findstyle(self, name = None, feats = None, lang = None) :
+ if self.styles is not None:
+ for s in self.styles :
+ style = self.styles[s]
+ if style.feats == feats and style.lang == lang :
+ if name is None or name == style.name : return style # if name is supplied it must match
+ return None
+
+ def addstyle(self, name, feats = None, lang = None) : # Return style if it exists otherwise create new style with newname
+ s = self.findstyle(name, feats, lang)
+ if s is None :
+ if self.styles is None:
+ self.styles = {}
+ if name in self.styles : self.logger.log("Adding duplicate style name " + name, "X")
+ s = Fstyle(self, name = name, feats = feats, lang = lang)
+ self.styles[name] = s
+ return s
+
+ def create_element(self) :
+ element = ET.Element('head')
+ # Add in-spec sub-elements in alphabetic order
+ if self.comment : x = ET.SubElement(element, 'comment') ; x.text = self.comment
+ if self.fontscale : x = ET.SubElement(element, 'fontscale') ; x.text = str(self.fontscale)
+ if isinstance(self.fontsrc, list):
+ # Allow multiple fontsrc
+ for fontsrc in self.fontsrc:
+ element.append(fontsrc.create_element())
+ elif self.fontsrc is not None:
+ element.append(self.fontsrc.create_element())
+ if self.styles :
+ x = ET.SubElement(element, 'styles')
+ for style in sorted(self.styles) : x.append(self.styles[style].create_element())
+ if self.title : y = ET.SubElement(element, 'title') ; y.text = self.title
+ if not self.widths is None :
+ x = ET.SubElement(element, 'widths')
+ for width in sorted(self.widths) :
+ if self.widths[width] is not None: x.set(width, self.widths[width])
+
+ # Add any non-spec elements
+ for el in sorted(self.elements) :
+ if el not in ("comment", "fontscale", "fontsrc", "styles", "title", "widths") :
+ for elem in self.elements[el] : element.append(elem)
+
+ return element
+
+class Ffontsrc(ETU.ETelement) :
+ # This library only supports a single font in the fontsrc as recommended by the FTML spec
+ # Currently it only supports simple url() and local() values
+
+ def __init__(self, parent, element = None, text = None, label=None) :
+ self.parent = parent
+ self.logger = parent.logger
+ self.parseerrors = []
+
+ if not exactlyoneof(element, text) : self.logger.log("Must supply exactly one of element and text","X")
+
+ try:
+ (txt, url, local) = parsefontsrc(text, allowplain=True) if text else parsefontsrc(element.text)
+ except ValueError as e :
+ txt = text if text else element.text
+ self.parseerrors.append(str(e) + ": " + txt)
+ else :
+ if text : element = ET.Element("fontsrc") ; element.text = txt
+ if label : element.set('label', label)
+ super(Ffontsrc,self).__init__(element)
+ self.process_attributes((
+ ("label", "label", False),),
+ others=False)
+ self.text = txt
+ self.url = url
+ self.local = local
+ if self.local : # Parse font name to find if bold, italic etc
+ results = re.match(fontspec, self.local) ## Does not cope with -, eg Gentium-Bold. Should it?"
+ self.fontfamily = results.group('rest')
+ self.bold = results.group('bold') != None
+ self.italic = results.group('italic') != None
+ else :
+ self.fontfamily = None # If details are needed call getweights()
+
+ def addfontinfo(self) : # set fontfamily, bold and italic by looking inside font
+ (ff, bold, italic) = getfontinfo(self.url)
+ self.fontfamily = ff
+ self.bold = bold
+ self.italic = italic
+
+ def create_element(self) :
+ element = ET.Element("fontsrc")
+ element.text = self.text
+ if self.label : element.set("label", self.label)
+ return element
+
+class Fstyle(ETU.ETelement) :
+ def __init__(self, parent, element = None, name = None, feats = None, lang = None) :
+ self.parent = parent
+ self.logger = parent.logger
+ if element is not None :
+ if name or feats or lang : parent.logger("Can't supply element and other parameters", "X")
+ else :
+ if name is None : self.logger.log("Must supply element or name to Fstyle", "X")
+ element = self.element = ET.Element("style", name = name)
+ if feats is not None :
+ if type(feats) is dict : feats = self.dict_to_string(feats)
+ element.set('feats',feats)
+ if lang is not None : element.set('lang', lang)
+ super(Fstyle,self).__init__(element)
+
+ self.process_attributes((
+ ("feats", "feats", False),
+ ("lang", "lang", False),
+ ("name", "name", True)),
+ others = False)
+
+ if type(self.feats) is str : self.feats = self.string_to_dict(self.feats)
+
+ def string_to_dict(self, string) : # Split string on ',', then add to dict splitting on " " and removing quotes
+ dict={}
+ for f in string.split(','):
+ f = f.strip()
+ m = re.match(r'''(?P<quote>['"])(\w{4})(?P=quote)\s+(\d+|on|off)$''', f)
+ if m:
+ dict[m.group(2)] = m.group(3)
+ else:
+ self.logger.log(f'Invalid feature syntax "{f}"', 'E')
+ return dict
+
+ def dict_to_string(self, dict) :
+ str=""
+ for name in sorted(dict) :
+ if dict[name] is not None : str += "'" + name + "' " + dict[name] + ", "
+ str = str[0:-2] # remove final ", "
+ return str
+
+ def create_element(self) :
+ element = ET.Element("style", name = self.name)
+ if self.feats : element.set("feats", self.dict_to_string(self.feats))
+ if self.lang : element.set("lang", self.lang)
+ return element
+
+
+class _Fwidth(ETU.ETelement) : # Only used temporarily whilst parsing xml
+ def __init__(self, parent, element) :
+ super(_Fwidth,self).__init__(element)
+ self.parent = parent
+ self.logger = parent.logger
+
+ self.process_attributes((
+ ("comment", "comment", False),
+ ("label", "label", False),
+ ("string", "string", False),
+ ("stylename", "stylename", False),
+ ("table", "table", False)),
+ others = False)
+ self.widthsdict = {
+ "comment": self.comment,
+ "label": self.label,
+ "string": self.string,
+ "stylename": self.stylename,
+ "table": self.table}
+
+class Ftestgroup(ETU.ETelement) :
+ def __init__(self, parent, element = None, label = None) :
+ self.parent = parent
+ self.logger = parent.logger
+ if not exactlyoneof(element, label) : self.logger.log("Must supply exactly one of element and label","X")
+
+ if label : element = ET.Element("testgroup", label = label)
+
+ super(Ftestgroup,self).__init__(element)
+
+ self.subgroup = True if type(parent) is Ftestgroup else False
+ self.process_attributes((
+ ("background", "background", False),
+ ("label", "label", True)),
+ others = False)
+ self.process_subelements((
+ ("comment", "comment", None, False, False),
+ ("test", "tests", Ftest, False, True),
+ ("testgroup", "testgroups", Ftestgroup, False, True)),
+ offspec = False)
+ if self.subgroup and self.testgroups != [] : parent.parseerrors.append("Only one level of testgroup nesting permitted")
+
+ # Merge any sub-testgroups into tests
+ if self.testgroups != [] :
+ tests = []
+ tg = list(self.testgroups) # Want to preserve original list
+ for elem in self.element :
+ if elem.tag == "test":
+ tests.append(self.tests.pop(0))
+ elif elem.tag == "testgroup" :
+ tests.append(tg.pop(0))
+ self.tests = tests
+
+ def create_element(self) :
+ element = ET.Element("testgroup")
+ if self.background : element.set("background", self.background)
+ element.set("label", self.label)
+ if self.comment : x = ET.SubElement(element, 'comment') ; x.text = self.comment
+ for test in self.tests : element.append(test.create_element())
+ return element
+
+class Ftest(ETU.ETelement) :
+ def __init__(self, parent, element = None, label = None, string = None) :
+ self.parent = parent
+ self.logger = parent.logger
+ if not exactlyoneof(element, (label, string)) : self.logger.log("Must supply exactly one of element and label/string","X")
+
+ if label :
+ element = ET.Element("test", label = label)
+ x = ET.SubElement(element,"string") ; x.text = string
+
+ super(Ftest,self).__init__(element)
+
+ self.process_attributes((
+ ("background", "background", False),
+ ("label", "label", True),
+ ("rtl", "rtl", False),
+ ("stylename", "stylename", False)),
+ others = False)
+
+ self.process_subelements((
+ ("comment", "comment", None, False, False),
+ ("string", "string", _Fstring, True, False)),
+ offspec = False)
+
+ self.string = self.string.string # self.string initially a temporary _Fstring element
+
+ def str(self, noems = False) : # Return formatted version of string
+ string = self.string
+ if noems :
+ string = string.replace("<em>","")
+ string = string.replace("</em>","")
+ return string ## Other formatting options to be added as needed cf ftml2odt
+
+ def create_element(self) :
+ element = ET.Element("test")
+ if self.background : element.set("background", self.background)
+ element.set("label", self.label)
+ if self.rtl : element.set("rtl", self.rtl)
+ if self.stylename : element.set("stylename", self.stylename)
+ if self.comment : x = ET.SubElement(element, "comment") ; x.text = self.comment
+ x = ET.SubElement(element, "string") ; x.text = self.string
+
+ return element
+
+class _Fstring(ETU.ETelement) : # Only used temporarily whilst parsing xml
+ def __init__(self, parent, element = None) :
+ self.parent = parent
+ self.logger = parent.logger
+ super(_Fstring,self).__init__(element)
+ self.process_subelements((("em", "em", ETU.ETelement,False, True),), offspec = False)
+ # Need to build text of string to include <em> subelements
+ self.string = element.text if element.text else ""
+ for em in self.em :
+ self.string += "<em>{}</em>{}".format(em.element.text, em.element.tail)
+
+def getattrib(element,attrib) :
+ return element.attrib[attrib] if attrib in element.attrib else None
+
+def exactlyoneof( *args ) : # Check one and only one of args is not None
+
+ last = args[-1] # Check if last argument is a tuple - in which case
+ if type(last) is tuple : # either all or none of list must be None
+ for test in last[1:] :
+ if (test is None) != (last[0] == None) : return False
+ args = list(args) # Convert to list so last val can be changed
+ args[-1] = last[0] # Now valid to test on any item in tuple
+
+ one = False
+ for test in args :
+ if test is not None :
+ if one : return False # already have found one not None
+ one = True
+ if one : return True
+ return False
+
+def parsefontsrc(text, allowplain = False) : # Check fontsrc text is valid and return normalised text, url and local values
+ ''' - if multiple (fallback) fonts are specified, just process the first one
+ - just handles simple url() or local() formats
+ - if allowplain is set, allows text without url() or local() and decides which based on "." in text '''
+ text = text.split(",")[0] # If multiple (fallback) fonts are specified, just process the first one
+ #if allowplain and not re.match(r"^(url|local)[(][^)]+[)]",text) : # Allow for text without url() or local() form
+ if allowplain and not "(" in text : # Allow for text without url() or local() form
+ plain = True
+ if "." in text :
+ type = "url"
+ else :
+ type = "local"
+ else :
+ type = text.split("(")[0]
+ if type == "url" :
+ text = text.split("(")[1][:-1].strip()
+ elif type == "local" :
+ text = text.split("(")[1][:-1].strip()
+ else : raise ValueError("Invalid fontsrc string")
+ if type == "url" :
+ return ("url("+text+")", text, None)
+ else :
+ return ("local("+text+")", None , text)
+
+ return (text,url,local)
+
+def getfontinfo(filename) : # peek inside the font for the name, weight, style
+ f = ttLib.TTFont(filename)
+ # take name from name table, NameID 1, platform ID 3, Encoding ID 1 (possible fallback platformID 1, EncodingID =0)
+ n = f['name'] # name table from font
+ fontname = n.getName(1,3,1).toUnicode() # nameID 1 = Font Family name
+ # take bold and italic info from OS/2 table, fsSelection bits 0 and 5
+ o = f['OS/2'] # OS/2 table
+ italic = (o.fsSelection & 1) > 0
+ bold = (o.fsSelection & 32) > 0
+ return (fontname, bold, italic)
+