#!/usr/bin/python3 #-*- coding: UTF-8 -*- # Data is from https://unicode.org/Public/cldr/44/core.zip import os, sys, glob, re import xml.etree.ElementTree as ET printenumsandexit = ('--printenums' in sys.argv) printdocsandexit = ('--printdocs' in sys.argv) def mapcopy(frommap, tomap): for key in frommap.keys(): tomap[key] = frommap[key] def mapmerge(frommap, tomap, defaultmap): for key in frommap.keys(): if frommap[key] == defaultmap[key]: continue tomap[key] = frommap[key] def listcopy(fromlist, tolist): for entry in fromlist: tolist.append(entry) def stripxmltext(fromxmltext): result = fromxmltext.replace('\n', '') result = result.replace('\t', '') # 3-passes of double-space removal seems to be enough for all cases for r in range(3): result = result.replace(' ', ' ') return result.strip() def normalizestring(fromstring): result = fromstring.replace(' ', '') result = result.replace('-', '') result = result.replace(',', '') result = result.replace("'", '') result = result.replace('&', 'And') result = result.replace('(', '') result = result.replace(')', '') result = result.replace('St.', 'St') result = result.replace('U.S.', 'UnitedStates') # UTF-8 chars result = result.replace(u'ʼ', '') result = result.replace(u'’', '') result = result.replace(u'ü', 'u') result = result.replace(u'å', 'a') result = result.replace(u'ç', 'c') result = result.replace(u'õ', 'o') result = result.replace(u'Å', 'A') result = result.replace(u'ô', 'o') result = result.replace(u'ã', 'a') result = result.replace(u'é', 'e') result = result.replace(u'í', 'i') result = result.replace(u'ā', 'a') result = result.replace(u'á', 'a') return result def xmlmerge(fromxml, fromxml2): tree = ET.parse(fromxml) root = tree.getroot() tree2 = ET.parse(fromxml2) root2 = tree2.getroot() for element in root: root2.insert(0, element) return root2 def touint(fromstring): # NOTE: symbols (plus, minus, etc.) are assumed to be single character which is not true for # many of the locales, however the API for those does not handle them as strings thus the first # character only is used return ord(fromstring) def tochar(fromstring): if fromstring: return '"%s\\0"' % fromstring return 'nullptr' def tochararray(fromstringlist): result = '{ ' for string in fromstringlist: result = '%s%s, ' % (result, tochar(string)) result = '%s }' % result result = result.replace(', }', ' }') return result def todayenum(day): if day == 'mon': return 'Qt::Monday' elif day == 'tue': return 'Qt::Tuesday' elif day == 'wed': return 'Qt::Wednesday' elif day == 'thu': return 'Qt::Thursday' elif day == 'fri': return 'Qt::Friday' elif day == 'sat': return 'Qt::Saturday' elif day == 'sun': return 'Qt::Sunday' print('Unknown day: %s' % day) sys.exit(1) def todatetimeformat(fromformat): # valid are y, m, M, d, h, H, s, a, A, z and t unsupportedtags = [ 'g', 'u', 'q', 'l', 'w', 'f', 'j', ] replacementtags = { 'MMMMM' : 'MMM', # narrow month name 'LLLLL' : 'MMM', # stand-alone narrow month name 'E' : 'ddd', 'EE' : 'ddd', 'EEE' : 'ddd', 'EEEEE' : 'ddd', 'EEEE' : 'dddd', # day of week 'e' : 'ddd', 'ee' : 'ddd', 'eee' : 'ddd', 'eeeee' : 'ddd', 'eeee' : 'dddd', # local day of week 'c' : 'ddd', 'cc' : 'ddd', 'ccc' : 'ddd', 'ccccc' : 'ddd', 'cccc' : 'dddd', # stand-alone local day of week 'K' : 'h', # Hour 0-11 'k' : 'H', # Hour 1-24 'z' : 'Z', 'zz' : 'Z', 'zzz' : 'Z', 'zzzz' : 'Z', # timezone 'Z' : 'Z', 'ZZ' : 'Z', 'ZZZ' : 'Z', 'ZZZZ' : 'Z', # timezone 'v' : 'Z', 'vv' : 'Z', 'vvv' : 'Z', 'vvvv' : 'Z', # timezone 'V' : 'Z', 'VV' : 'Z', 'VVV' : 'Z', 'VVVV' : 'Z', # timezone 'L' : 'M', # stand-alone month names. not supported } replacementregex = { r'y' : 'yyyy', # four-digit year without leading zeroes r'yyy{3,}' : 'yyyy', # more that three digits hence convert to four-digit year r'S{1,}' : '', # fractional seconds. not supported. r'A{1,}' : '', # milliseconds in day. not supported. r'a' : 'AP', # AM/PM } possibleoccurences = [ '%s, ', ', %s', '%s.', '.%s', '%s-', '-%s', '(%s)', "('%s')", '%s ', ' %s', '%s', ] result = fromformat for tag in unsupportedtags: uppertag = tag.upper() for occurence in possibleoccurences: result = result.replace(occurence % (tag * 4), '') result = result.replace(occurence % (tag * 3), '') result = result.replace(occurence % (tag * 2), '') result = result.replace(occurence % tag, '') result = result.replace(occurence % (uppertag * 4), '') result = result.replace(occurence % (uppertag * 3), '') result = result.replace(occurence % (uppertag * 2), '') result = result.replace(occurence % uppertag, '') for key in replacementregex.keys(): result = re.sub(key, replacementregex[key], result) for key in replacementtags.keys(): result = result.replace(key, replacementtags[key]) return result def tomonthslist(fromxmlelements, initialvalues): result = [] listcopy(initialvalues, result) for month in fromxmlelements: monthtype = month.get('type') if monthtype == '1': result[0] = month.text elif monthtype == '2': result[1] = month.text elif monthtype == '3': result[2] = month.text elif monthtype == '4': result[3] = month.text elif monthtype == '5': result[4] = month.text elif monthtype == '6': result[5] = month.text elif monthtype == '7': result[6] = month.text elif monthtype == '8': result[7] = month.text elif monthtype == '9': result[8] = month.text elif monthtype == '10': result[9] = month.text elif monthtype == '11': result[10] = month.text elif monthtype == '12': result[11] = month.text else: print('Unknown month: %s' % monthtype) sys.exit(1) return result def todayslist(fromxmlelements, initialvalues): result = [] listcopy(initialvalues, result) for day in fromxmlelements: daytype = day.get('type') if daytype == 'mon': result[0] = day.text elif daytype == 'tue': result[1] = day.text elif daytype == 'wed': result[2] = day.text elif daytype == 'thu': result[3] = day.text elif daytype == 'fri': result[4] = day.text elif daytype == 'sat': result[5] = day.text elif daytype == 'sun': result[6] = day.text else: print('Unknown day: %s' % daytype) sys.exit(1) return result def tolanguageenum(fromstring): for key in languagemap.keys(): if fromstring == languagemap[key]['code']: return 'QLocale::Language::%s' % key # print('Unknown language: %s' % fromstring) # sys.exit(1) def toscriptenum(fromstring): for key in scriptmap.keys(): if fromstring == scriptmap[key]['code']: return 'QLocale::Script::%s' % key # print('Unknown script: %s' % fromstring) # sys.exit(1) def tocountryenum(fromstring): for key in countrymap.keys(): if fromstring == countrymap[key]['code']: return 'QLocale::Country::%s' % key # print('Unknown country: %s' % fromstring) # sys.exit(1) # printenum prints mapped values that have unique code only, the rest are set to the enum of the # first occurence. the reason for doing so is because table lookups for figuring out language, # script and country required for constructing QLocale from string (named locales) relies on the # fact that there is only one code for each, if that is not the case constructing copy of locale # from its name will not copy it correctly. printtable skips duplicate code entries entirely def printenum(frommap, prefix): keyscount = 0 aliascodes = [] seencodes = [] print(' enum %s {' % prefix) # print Default and C first for key in frommap.keys(): if not key in ('Any%s' % prefix, 'C'): continue print(' %s = %d,' % (key, keyscount)) keyscount += 1 # now everything except those, save last key for later lastkey = '' for key in sorted(frommap.keys()): if key in ('Any%s' % prefix, 'C'): continue code = frommap[key]['code'] if code in seencodes: aliascodes.append(key) continue seencodes.append(code) print(' %s = %d,' % (key, keyscount)) lastkey = key keyscount += 1 # now aliases print('') for alias in sorted(aliascodes): aliascode = frommap[alias]['code'] aliasenum = None for key in sorted(frommap.keys()): code = frommap[key]['code'] if aliascode == code: aliasenum == key break print(' %s = %s,' % (alias, key)) # print last key print('\n Last%s = %s' % (prefix, lastkey)) print(' };\n') def printdoc(frommap, prefix): print('// %s' % prefix) for key in sorted(frommap.keys()): if key in ('Any%s' % prefix, 'C'): continue print(' \\value %s' % key) print('') def printtable(frommap, prefix): lowerprefix = prefix.lower() seencodes = [] print('''static const struct %sTblData { const QLocale::%s %s; const char* name; const char* code; } %sTbl[] = {''' % (lowerprefix, prefix, lowerprefix, lowerprefix)) # print Default and C first for key in frommap.keys(): if not key in ('Any%s' % prefix, 'C'): continue code = frommap[key]['code'] name = frommap[key]['name'] print(' { QLocale::%s::%s, %s, %s },' % (prefix, key, tochar(name), tochar(code))) # now everything except those but only unique code values for key in sorted(frommap.keys()): if key in ('Any%s' % prefix, 'C'): continue code = frommap[key]['code'] if code in seencodes: continue seencodes.append(code) name = frommap[key]['name'] print(' { QLocale::%s::%s, %s, %s },' % (prefix, key, tochar(name), tochar(code))) print('};') print('static const qint16 %sTblSize = sizeof(%sTbl) / sizeof(%sTblData);\n' % (lowerprefix, lowerprefix, lowerprefix)) def printlocaledata(frommap, key): value = frommap[key] # skip table entries without country (non-territory), unless it is artificial, this is done to # preserve the assumption in QLocalePrivate::findLocale that "AnyCountry" means "find me a # language, no matter what country it is spoken in" if "AnyCountry" is passed to it as argument # and also shrinks the table if value['country'] == 'QLocale::Country::AnyCountry' and not key == 'C': return # HACK: skip table entries the language of which is unknown if key == 'apc_SY' or key == 'skr_PK': return # HACK: skip table entries with and without specifiec script if key == 'ha_Arab_NG': return print(''' { %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s }, // %s''' % ( value['language'], value['script'], value['country'], value['first_day_of_week'], value['weekend_start'], value['weekend_end'], touint(value['decimal']), touint(value['group']), touint(value['list']), touint(value['percent']), touint(value['minus']), touint(value['plus']), touint(value['exponential']), touint(value['zero']), tochar(value['short_date_format']), tochar(value['long_date_format']), tochar(value['short_time_format']), tochar(value['long_time_format']), tochar(value['am']), tochar(value['pm']), tochararray(value['standalone_short_month_names']), tochararray(value['standalone_long_month_names']), tochararray(value['standalone_narrow_month_names']), tochararray(value['short_month_names']), tochararray(value['long_month_names']), tochararray(value['narrow_month_names']), tochararray(value['standalone_short_day_names']), tochararray(value['standalone_long_day_names']), tochararray(value['standalone_narrow_day_names']), tochararray(value['short_day_names']), tochararray(value['long_day_names']), tochararray(value['narrow_day_names']), key, ) ) def printaliastable(frommap, prefix): print('''static const struct %sAliasTblData { const char* original; const char* substitute; } %sAliasTbl[] = {''' % (prefix, prefix)) for key in sorted(frommap): # territories and scripts entries can contain multiple replacements, add one for each splitvalue = frommap[key].split(' ') for value in splitvalue: print(' { "%s", "%s" },' % (key, value)) print('};') print('static const qint16 %sAliasTblSize = sizeof(%sAliasTbl) / sizeof(%sAliasTblData);\n' % (prefix, prefix, prefix)) # main maps languagemap = {} countrymap = {} scriptmap = {} localemap = {} likelysubtagsmap = {} languagealiasmap = {} countryaliasmap = {} scriptaliasmap = {} # cross-reference maps localeparentmap = {} localeparentvaluesmap = {} localescriptmap = {} localefirstdaymap = {} localeweekendstartmap = {} localeweekendendmap = {} localenumberingmap = {} # regular expressions localeregex = re.compile('([^_|\-|\.|@]+)+') # artificial entries languagemap['AnyLanguage'] = { 'code': '', 'name': 'Default', } languagemap['C'] = { 'code': 'C', 'name': 'C', } countrymap['AnyCountry'] = { 'code': '', 'name': 'Default', } scriptmap['AnyScript'] = { 'code': '', 'name': 'Default', } # locale to parent parsing tree = ET.parse('common/supplemental/supplementalData.xml') root = tree.getroot() for parentlocale in root.findall('./parentLocales/parentLocale'): parentlocaleparent = parentlocale.get('parent') parentlocalelocales = parentlocale.get('locales') localeparentmap[parentlocaleparent] = parentlocalelocales.split(' ') # locale to script parsing # only languages with one primary script are mapped because if there are multiple it should be # specified in the locale data, see: # https://sites.google.com/site/cldr/development/updating-codes/update-language-script-info/language-script-description # secondary scripts are not taken into account at all for suppllanguage in root.findall('./languageData/language'): suppllanguagetype = suppllanguage.get('type') suppllanguagescripts = suppllanguage.get('scripts') suppllanguagealt = suppllanguage.get('alt') if not suppllanguagescripts or suppllanguagealt == 'secondary': # alternative entry, skip it continue suppllanguagescriptslist = suppllanguagescripts.split(' ') if not len(suppllanguagescriptslist) == 1: # skip entries without definitive primary script continue suppllanguageterritories = suppllanguage.get('territories') if not suppllanguageterritories: # territories is optional, if not specified use artifical value to map all languages of # that type to the script suppllanguageterritories = 'AnyTerritory' localescriptmap[suppllanguagetype] = { 'script': suppllanguagescripts, 'territories': suppllanguageterritories.split(' '), } # locale to first day parsing for firstday in root.findall('./weekData/firstDay'): firstdayday = firstday.get('day') firstdayterritories = firstday.get('territories') localefirstdaymap[todayenum(firstdayday)] = stripxmltext(firstdayterritories).split(' ') # locale to weekend start parsing for weekstart in root.findall('./weekData/weekendStart'): weekstartday = weekstart.get('day') weekstartterritories = weekstart.get('territories') localeweekendstartmap[todayenum(weekstartday)] = stripxmltext(weekstartterritories).split(' ') # locale to weekend end parsing for weekend in root.findall('./weekData/weekendEnd'): weekendday = weekend.get('day') weekendterritories = weekend.get('territories') localeweekendendmap[todayenum(weekendday)] = stripxmltext(weekendterritories).split(' ') # locale to numbering system parsing tree = ET.parse('common/supplemental/numberingSystems.xml') root = tree.getroot() for numberingsystem in root.findall('./numberingSystems/numberingSystem'): numberingsystemid = numberingsystem.get('id') numberingsystemdigits = numberingsystem.get('digits') if numberingsystemdigits: # either digits or rules is set localenumberingmap[numberingsystemid] = stripxmltext(numberingsystemdigits) # language parsing tree = ET.parse('common/main/en.xml') root = tree.getroot() for language in root.findall('./localeDisplayNames/languages/language'): languagetype = language.get('type') normallanguage = normalizestring(language.text) if normallanguage in ('Nauru', 'Tokelau', 'Tuvalu'): # country and language are the same, suffix to solve enum clashes normallanguage = '%sLanguage' % normallanguage languagemap[normallanguage] = { 'code': languagetype, 'name': language.text, } if printenumsandexit: printenum(languagemap, 'Language') elif printdocsandexit: printdoc(languagemap, 'Language') else: printtable(languagemap, 'Language') # country parsing for country in root.findall('./localeDisplayNames/territories/territory'): countrytype = country.get('type') normalcountry = normalizestring(country.text) countrymap[normalcountry] = { 'code': countrytype, 'name': country.text, } if printenumsandexit: printenum(countrymap, 'Country') elif printdocsandexit: printdoc(countrymap, 'Country') else: printtable(countrymap, 'Country') # scripts parsing for script in root.findall('./localeDisplayNames/scripts/script'): scripttype = script.get('type') normalscript = normalizestring(script.text) if not normalscript.endswith('Script'): # suffix script if needed normalscript = '%sScript' % normalscript if normalscript in ('UnknownScript', 'CommonScript'): # only interested in specific scripts continue scriptmap[normalscript] = { 'code': scripttype, 'name': script.text, } if printenumsandexit: printenum(scriptmap, 'Script') sys.exit(0) elif printdocsandexit: printdoc(scriptmap, 'Script') sys.exit(0) else: printtable(scriptmap, 'Script') # these defaults are used as parent locales fallback, C uses them as actual values because root # contains UTF-8 characters and for compatibility. for the rest defaults are set from root localedefaults = { # enums 'language': 'QLocale::Language::AnyLanguage', 'script': 'QLocale::Script::AnyScript', 'country': 'QLocale::Country::AnyCountry', 'first_day_of_week': 'Qt::Monday', 'weekend_start': 'Qt::Saturday', 'weekend_end': 'Qt::Sunday', # characters 'decimal': '.', 'group': ',', 'list': ';', 'percent': '%', 'zero': '0', 'minus': '-', 'plus': '+', 'exponential': 'e', # default in CLDR is E # strings 'short_date_format': 'd MMM yyyy', # default in CLDR is y-MM-dd 'long_date_format': 'd MMMM yyyy', 'short_time_format': 'HH:mm:ss', # default in CLDR is HH:mm 'long_time_format': 'HH:mm:ss z', 'am': 'AM', 'pm': 'PM', # arrays 'standalone_short_month_names': ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'], 'standalone_long_month_names': ['January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 'September', 'October', 'November', 'December'], 'standalone_narrow_month_names': ['J', 'F', 'M', 'A', 'M', 'J', 'J', 'A', 'S', 'O', 'N', 'D'], 'short_month_names': ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'], 'long_month_names': ['January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 'September', 'October', 'November', 'December'], 'narrow_month_names': ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12'], 'standalone_short_day_names': ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'], 'standalone_long_day_names': ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday'], 'standalone_narrow_day_names': ['M', 'T', 'W', 'T', 'F', 'S', 'S'], 'short_day_names': ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'], 'long_day_names': ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday'], 'narrow_day_names': ['1', '2', '3', '4', '5', '6', '7'], } # artificial entries localemap['C'] = {} mapcopy(localedefaults, localemap['C']) localemap['C']['language'] = 'QLocale::Language::C' # locales parsing # TODO: accept only "contributed" or "approved" values def readlocale(fromxml, tomap, isparent): locale = os.path.basename(fromxml) locale = locale.replace('.xml', '') if '_' in fromxml: # merge parent locales (non-territory) into the current one so that data can be read # from them. they do not have territory and currency data is based on territory so # cross-reference is not possible without doing so localeparent = locale.split('_')[0] xmlparent = fromxml.replace(locale, localeparent) root = xmlmerge(fromxml, xmlparent) else: tree = ET.parse(fromxml) root = tree.getroot() variant = root.find('./identity/variant') if variant is not None: # TODO: variants are not supported by QLocale return language = root.find('./identity/language') langtype = language.get('type') country = root.find('./identity/territory') countrytype = None scripttype = None numbertype = 'latn' # CLDR default tomap[locale] = {} if isparent: mapcopy(localedefaults, tomap[locale]) else: mapcopy(localeparentvaluesmap['root'], tomap[locale]) # set defaults from parent locale if territory is specified if country is not None: for parent in localeparentmap.keys(): if locale in localeparentmap[parent]: if not parent in localeparentvaluesmap.keys(): # reference to locale without data continue mapcopy(localeparentvaluesmap[parent], tomap[locale]) # then from main locale (non-territory) filling the blanks that even parent locales do not fill if not isparent: mapmerge(localemap[langtype], tomap[locale], localedefaults) # find the enums from mapped values tomap[locale]['language'] = tolanguageenum(langtype) if not isparent and country is not None: countrytype = country.get('type') tomap[locale]['country'] = tocountryenum(countrytype) else: # territory often is not specified, use language code as fallback countrytype = langtype.upper() # script is specified either in the locale or supplemental data script = root.find('./identity/script') if script is not None: scripttype = script.get('type') elif not isparent: # scripts map is partial, pick from what is mapped if langtype in localescriptmap.keys(): scriptterritories = localescriptmap[langtype]['territories'] if 'AnyTerritory' in scriptterritories \ or countrytype in scriptterritories: scripttype = localescriptmap[langtype]['script'] defaultnumbersystem = root.find('./numbers/defaultNumberingSystem') if defaultnumbersystem is not None: numbertype = defaultnumbersystem.text # find values from supplemental maps if not isparent and scripttype: tomap[locale]['script'] = toscriptenum(scripttype) for key in localefirstdaymap.keys(): for countryvalue in localefirstdaymap[key]: if countryvalue == countrytype: tomap[locale]['first_day_of_week'] = key break for key in localeweekendstartmap.keys(): for countryvalue in localeweekendstartmap[key]: if countryvalue == countrytype: tomap[locale]['weekend_start'] = key break for key in localeweekendendmap.keys(): for countryvalue in localeweekendendmap[key]: if countryvalue == countrytype: tomap[locale]['weekend_end'] = key break # find from locale data for symbol in root.findall('./numbers/symbols'): symbolnumbersystem = symbol.get('numberSystem') if not symbolnumbersystem == numbertype: # should be the locale numeric system continue decimal = symbol.find('./decimal') if decimal is not None and len(decimal.text) == 1: tomap[locale]['decimal'] = decimal.text group = symbol.find('./group') if group is not None and len(group.text) == 1: tomap[locale]['group'] = group.text listdelimiter = symbol.find('./list') if listdelimiter is not None and len(listdelimiter.text) == 1: tomap[locale]['list'] = listdelimiter.text percent = symbol.find('./percentSign') if percent is not None and len(percent.text) == 1: tomap[locale]['percent'] = percent.text minus = symbol.find('./minusSign') if minus is not None and len(minus.text) == 1: tomap[locale]['minus'] = minus.text plus = symbol.find('./plusSign') if plus is not None and len(plus.text) == 1: tomap[locale]['plus'] = plus.text exponential = symbol.find('./exponential') if exponential is not None and len(exponential.text) == 1: tomap[locale]['exponential'] = exponential.text # zero is from cross-reference numeric system map, # taking the first character works even for UTF-8 chars tomap[locale]['zero'] = localenumberingmap[numbertype][0] # locale numeric system was found, break break for calendar in root.findall('./dates/calendars/calendar'): calendartype = calendar.get('type') if not calendartype == 'gregorian': # all values should be from gregorian calendar continue for dateformat in calendar.findall('./dateFormats/dateFormatLength'): dateformattype = dateformat.get('type') if dateformattype == 'short': pattern = dateformat.find('./dateFormat/pattern') tomap[locale]['short_date_format'] = todatetimeformat(pattern.text) elif dateformattype == 'long': pattern = dateformat.find('./dateFormat/pattern') tomap[locale]['long_date_format'] = todatetimeformat(pattern.text) for timeformat in calendar.findall('./timeFormats/timeFormatLength'): timeformattype = timeformat.get('type') if timeformattype == 'short': pattern = timeformat.find('./timeFormat/pattern') tomap[locale]['short_time_format'] = todatetimeformat(pattern.text) elif timeformattype == 'long': pattern = timeformat.find('./timeFormat/pattern') tomap[locale]['long_time_format'] = todatetimeformat(pattern.text) for dayperiodwidth in calendar.findall('./dayPeriods/dayPeriodContext/dayPeriodWidth'): dayperiodwidthtype = dayperiodwidth.get('type') if not dayperiodwidthtype == 'wide': # all values should be in wide format continue for dayperiod in dayperiodwidth.findall('dayPeriod'): dayperiodtype = dayperiod.get('type') if dayperiodtype == 'am': tomap[locale]['am'] = dayperiod.text elif dayperiodtype == 'pm': tomap[locale]['pm'] = dayperiod.text # month/day names for monthcontext in calendar.findall('./months/monthContext'): monthcontexttype = monthcontext.get('type') if monthcontexttype == 'stand-alone': for monthwidth in monthcontext.findall('./monthWidth'): monthwidthtype = monthwidth.get('type') if monthwidthtype == 'wide': months = monthwidth.findall('./month') tomap[locale]['standalone_long_month_names'] = tomonthslist(months, tomap[locale]['standalone_long_month_names']) elif monthwidthtype == 'abbreviated': months = monthwidth.findall('./month') tomap[locale]['standalone_short_month_names'] = tomonthslist(months, tomap[locale]['standalone_short_month_names']) elif monthwidthtype == 'narrow': months = monthwidth.findall('./month') tomap[locale]['standalone_narrow_month_names'] = tomonthslist(months, tomap[locale]['standalone_narrow_month_names']) elif monthcontexttype == 'format': for monthwidth in monthcontext.findall('./monthWidth'): monthwidthtype = monthwidth.get('type') if monthwidthtype == 'wide': months = monthwidth.findall('./month') tomap[locale]['long_month_names'] = tomonthslist(months, tomap[locale]['long_month_names']) elif monthwidthtype == 'abbreviated': months = monthwidth.findall('./month') tomap[locale]['short_month_names'] = tomonthslist(months, tomap[locale]['short_month_names']) elif monthwidthtype == 'narrow': months = monthwidth.findall('./month') tomap[locale]['narrow_month_names'] = tomonthslist(months, tomap[locale]['narrow_month_names']) for daycontext in calendar.findall('./days/dayContext'): daycontexttype = daycontext.get('type') if daycontexttype == 'stand-alone': for daywidth in daycontext.findall('./dayWidth'): daywidthtype = daywidth.get('type') if daywidthtype == 'wide': days = daywidth.findall('./day') tomap[locale]['standalone_long_day_names'] = todayslist(days, tomap[locale]['standalone_long_day_names']) elif daywidthtype == 'abbreviated': days = daywidth.findall('./day') tomap[locale]['standalone_short_day_names'] = todayslist(days, tomap[locale]['standalone_short_day_names']) elif daywidthtype == 'narrow': days = daywidth.findall('./day') tomap[locale]['standalone_narrow_day_names'] = todayslist(days, tomap[locale]['standalone_narrow_day_names']) elif daycontexttype == 'format': for daywidth in daycontext.findall('./dayWidth'): daywidthtype = daywidth.get('type') if daywidthtype == 'wide': days = daywidth.findall('./day') tomap[locale]['long_day_names'] = todayslist(days, tomap[locale]['long_day_names']) elif daywidthtype == 'abbreviated': days = daywidth.findall('./day') tomap[locale]['short_day_names'] = todayslist(days, tomap[locale]['short_day_names']) elif daywidthtype == 'narrow': days = daywidth.findall('./day') tomap[locale]['narrow_day_names'] = todayslist(days, tomap[locale]['narrow_day_names']) # gregorian calendar was found, break break # month/day names are set during calendar parsing # read parent locales first for xml in glob.glob('common/main/*.xml'): xmlbase = os.path.basename(xml) xmlbase = xmlbase.replace('.xml', '') if not xmlbase in localeparentmap.keys(): continue readlocale(xml, localeparentvaluesmap, True) # now everything including those for xml in sorted(glob.glob('common/main/*.xml')): if xml.endswith('/root.xml'): # root is not actual locale continue readlocale(xml, localemap, False) print('''static const QLocalePrivate localeTbl[] = {''') # print C first printlocaledata(localemap, 'C') # now everything except that for key in sorted(localemap.keys()): if key == 'C': continue printlocaledata(localemap, key) print('};') print('static const qint16 localeTblSize = sizeof(localeTbl) / sizeof(QLocalePrivate);\n') # likely subtags parsing tree = ET.parse('common/supplemental/likelySubtags.xml') root = tree.getroot() for likelysubtag in root.findall('./likelySubtags/likelySubtag'): likelysubtagfrom = likelysubtag.get('from') likelysubtagto = likelysubtag.get('to') # split code into language, script and country to make it possible to match against regardless # of separators and remap to enums so that it is possible to substitute both named and enumed # locale searches at the cost of not covering all named cases likelysubtagfromsplit = localeregex.findall(likelysubtagfrom) likelysubtagfromsplitlen = len(likelysubtagfromsplit) likelysubtagtosplit = localeregex.findall(likelysubtagto) likelysubtagtosplitlen = len(likelysubtagtosplit) likelyfromlanguage = None likelyfromscript = None likelyfromcountry = None if likelysubtagfromsplitlen == 1: likelyfromlanguage = tolanguageenum(likelysubtagfromsplit[0]) likelyfromscript = 'QLocale::Script::AnyScript' likelyfromcountry = 'QLocale::Country::AnyCountry' elif likelysubtagfromsplitlen == 2: likelyfromlanguage = tolanguageenum(likelysubtagfromsplit[0]) likelyfromscript = 'QLocale::Script::AnyScript' likelyfromcountry = tocountryenum(likelysubtagfromsplit[1]) elif likelysubtagfromsplitlen == 3: likelyfromlanguage = tolanguageenum(likelysubtagfromsplit[0]) likelyfromscript = toscriptenum(likelysubtagfromsplit[1]) likelyfromcountry = tocountryenum(likelysubtagfromsplit[2]) elif likelysubtagfromsplitlen > 3 or likelysubtagtosplitlen > 3: # the regular expression is intentionally greedy, if there are more than 3 group matches # then it is likely a variant and that is not supported case yet print(likelysubtagfrom, likelysubtagfromsplit) print(likelysubtagto, likelysubtagtosplit) sys.exit(1) likelytolanguage = tolanguageenum(likelysubtagtosplit[0]) likelytoscript = toscriptenum(likelysubtagtosplit[1]) likelytocountry = tocountryenum(likelysubtagtosplit[2]) if not likelyfromlanguage or not likelyfromscript or not likelyfromcountry \ or not likelytolanguage or not likelytoscript or not likelytocountry: # if there are no enums for the codes skip the entry continue likelysubtagsmap[likelysubtagfrom] = { 'fromlanguage' : likelyfromlanguage, 'fromscript' : likelyfromscript, 'fromcountry' : likelyfromcountry, 'tolanguage' : likelytolanguage, 'toscript' : likelytoscript, 'tocountry' : likelytocountry, } print('''static const struct subtagAliasTblData { const QLocale::Language fromlanguage; const QLocale::Script fromscript; const QLocale::Country fromcountry; const QLocale::Language tolanguage; const QLocale::Script toscript; const QLocale::Country tocountry; } subtagAliasTbl[] = {''') for key in sorted(likelysubtagsmap): value = likelysubtagsmap[key] print(''' { %s, %s, %s, %s, %s, %s },''' % ( value['fromlanguage'], value['fromscript'], value['fromcountry'], value['tolanguage'], value['toscript'], value['tocountry'], ) ) print('};') print('static const qint16 subtagAliasTblSize = sizeof(subtagAliasTbl) / sizeof(subtagAliasTblData);\n') # language alias parsing tree = ET.parse('common/supplemental/supplementalMetadata.xml') root = tree.getroot() for languagealias in root.findall('./metadata/alias/languageAlias'): languagealiastype = languagealias.get('type') languagealiasreplacement = languagealias.get('replacement') if '_' in languagealiastype or '-' in languagealiastype \ or '_' in languagealiasreplacement or '-' in languagealiasreplacement: # if either the original or the substitute is BCP47 code (language and script/country # included) skip it because QLocalePrivate::codeToLanguage() should be dealing with # language codes only continue languagealiasmap[languagealiastype] = languagealiasreplacement printaliastable(languagealiasmap, 'language') # country alias parsing for territoryalias in root.findall('./metadata/alias/territoryAlias'): territoryaliastype = territoryalias.get('type') territoryaliasreplacement = territoryalias.get('replacement') countryaliasmap[territoryaliastype] = territoryaliasreplacement printaliastable(countryaliasmap, 'country') # script alias parsing for scriptalias in root.findall('./metadata/alias/scriptAlias'): scriptaliastype = scriptalias.get('type') scriptaliasreplacement = scriptalias.get('replacement') scriptaliasmap[scriptaliastype] = scriptaliasreplacement printaliastable(scriptaliasmap, 'script')