katie/scripts/genlocale.py
Ivailo Monev 92b514fc06 quotation data contains only chars so it can be uint in the locale table
Signed-off-by: Ivailo Monev <xakepa10@laimg.moc>
2019-08-09 14:52:38 +00:00

1008 lines
No EOL
38 KiB
Python
Executable file
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/python3
#-*- coding: UTF-8 -*-
# Data is from https://unicode.org/Public/cldr/35.1/core.zip
import os, sys, glob
import xml.etree.ElementTree as ET
printenumsandexit = ('--printenums' in sys.argv)
def mapcopy(frommap, tomap):
for key in frommap.keys():
tomap[key] = frommap[key]
def mapmerge(frommap, tomap, defaultmap):
for key in frommap.keys():
if frommap[key] == defaultmap[key]:
continue
tomap[key] = frommap[key]
def listcopy(fromlist, tolist):
for entry in fromlist:
tolist.append(entry)
def stripxmltext(fromxmltext):
result = fromxmltext.replace('\n', '')
result = result.replace('\t', '')
# 3-passes of double-space removal seems to be enough for all cases
for p in range(3):
result = result.replace(' ', ' ')
return result.strip()
def touint(fromstring):
# NOTE: symbols (plus, minus, etc.) are assumed to be single character which is not true for
# many of the locales, however the API for those does not handle them as strings thus the first
# character only is used
return ord(fromstring)
def tochar(fromstring):
if fromstring:
return '"%s\\0"' % fromstring
return 'Q_NULLPTR'
def toescapedchar(fromstring):
if fromstring:
return '"%s\\0"' % fromstring.replace('"', '\\"')
return 'Q_NULLPTR'
def tochararray(fromstringlist):
result = '{ '
for string in fromstringlist:
result = '%s%s, ' % (result, tochar(string))
result = '%s }' % result
result = result.replace(', }', ' }')
return result
def todayenum(day):
if day == 'mon':
return 'Qt::Monday'
elif day == 'tue':
return 'Qt::Tuesday'
elif day == 'wed':
return 'Qt::Wednesday'
elif day == 'thu':
return 'Qt::Thursday'
elif day == 'fri':
return 'Qt::Friday'
elif day == 'sat':
return 'Qt::Saturday'
elif day == 'sun':
return 'Qt::Sunday'
print('Unknown day: %s' % day)
sys.exit(1)
def tolistformat(fromformat):
result = fromformat.replace('{0}', '%1')
result = result.replace('{1}', '%2')
result = result.replace('{2}', '%3')
return result
def tocurrencyformat(fromformat, frommap):
result = []
# currency format can optionally have negative form separated by ';'
for fmt in fromformat.split(';'):
fmt = fmt.replace('0', '#')
fmt = fmt.replace(',', '#')
fmt = fmt.replace('.', '#')
for r in range(20):
fmt = fmt.replace('##', "#")
fmt = fmt.replace('#', "%1")
fmt = fmt.replace(u'\xa4', "%2")
fmt = fmt.replace('-', frommap['minus'])
fmt = fmt.replace('+', frommap['plus'])
result.append(fmt)
return result
def todatetimeformat(fromformat):
# valid are y, m, M, d, h, H, s, a, A, z and t
unsupportedtags = [
'g',
'u',
'q',
'l',
'w',
'f',
'e',
'c',
'k',
'j',
'v',
]
possibleoccurences = [
'%s, ',
', %s',
'%s.',
'.%s',
'%s-',
'-%s',
'(%s)',
'%s',
]
result = fromformat
for tag in unsupportedtags:
uppertag = tag.upper()
for occurence in possibleoccurences:
result = result.replace(occurence % (tag * 4), '')
result = result.replace(occurence % (tag * 3), '')
result = result.replace(occurence % (tag * 2), '')
result = result.replace(occurence % tag, '')
result = result.replace(occurence % (uppertag * 4), '')
result = result.replace(occurence % (uppertag * 3), '')
result = result.replace(occurence % (uppertag * 2), '')
result = result.replace(occurence % uppertag, '')
return result
def tomonthslist(fromxmlelements, initialvalues):
result = []
listcopy(initialvalues, result)
for month in fromxmlelements:
monthtype = month.get('type')
if monthtype == '1':
result[0] = month.text
elif monthtype == '2':
result[1] = month.text
elif monthtype == '3':
result[2] = month.text
elif monthtype == '4':
result[3] = month.text
elif monthtype == '5':
result[4] = month.text
elif monthtype == '6':
result[5] = month.text
elif monthtype == '7':
result[6] = month.text
elif monthtype == '8':
result[7] = month.text
elif monthtype == '9':
result[8] = month.text
elif monthtype == '10':
result[9] = month.text
elif monthtype == '11':
result[10] = month.text
elif monthtype == '12':
result[11] = month.text
else:
print('Unknown month: %s' % monthtype)
sys.exit(1)
return result
def todayslist(fromxmlelements, initialvalues):
result = []
listcopy(initialvalues, result)
for day in fromxmlelements:
daytype = day.get('type')
if daytype == 'sun':
result[0] = day.text
elif daytype == 'mon':
result[1] = day.text
elif daytype == 'tue':
result[2] = day.text
elif daytype == 'wed':
result[3] = day.text
elif daytype == 'thu':
result[4] = day.text
elif daytype == 'fri':
result[5] = day.text
elif daytype == 'sat':
result[6] = day.text
else:
print('Unknown day: %s' % daytype)
sys.exit(1)
return result
def normalizestring(fromstring):
result = fromstring.replace(' ', '')
result = result.replace('-', '')
result = result.replace("'", '')
result = result.replace('&', 'And')
result = result.replace('(', '')
result = result.replace(')', '')
result = result.replace('St.', 'St')
result = result.replace('U.S.', 'UnitedStates')
# UTF-8 chars
result = result.replace(u'ʼ', '')
result = result.replace(u'', '')
result = result.replace(u'ü', 'u')
result = result.replace(u'å', 'a')
result = result.replace(u'ç', 'c')
result = result.replace(u'õ', 'o')
result = result.replace(u'Å', 'A')
result = result.replace(u'ô', 'o')
result = result.replace(u'ã', 'a')
result = result.replace(u'é', 'e')
result = result.replace(u'í', 'i')
return result
# printenum prints mapped values that have unique code only, the rest are set to the enum of the
# first occurence. the reason for doing so is because table lookups for figuring out language,
# script and country required for constructing QLocale from string (named locales) relies on the
# fact that there is only one code for each, if that is not the case constructing copy of locale
# from its name will not copy it correctly. printtable skips duplicate code entries entirely
def printenum(frommap, prefix):
keyscount = 0
aliascodes = []
seencodes = []
print(' enum %s {' % prefix)
# print Default and C first
for key in frommap.keys():
if not key in ('Any%s' % prefix, 'C'):
continue
print(' %s = %d,' % (key, keyscount))
keyscount += 1
# now everything except those, save last key for later
lastkey = ''
for key in sorted(frommap.keys()):
if key in ('Any%s' % prefix, 'C'):
continue
code = frommap[key]['code']
if code in seencodes:
aliascodes.append(key)
continue
seencodes.append(code)
print(' %s = %d,' % (key, keyscount))
lastkey = key
keyscount += 1
# now aliases
print('')
for alias in sorted(aliascodes):
aliascode = frommap[alias]['code']
aliasenum = None
for key in sorted(frommap.keys()):
code = frommap[key]['code']
if aliascode == code:
aliasenum == key
break
print(' %s = %s,' % (alias, key))
# print last key
print('\n Last%s = %s' % (prefix, lastkey))
print(' };\n')
def printtable(frommap, prefix):
lowerprefix = prefix.lower()
seencodes = []
print('''static const struct %sTblData {
const char* name;
const char* code;
const QLocale::%s %s;
} %sTbl[] = {''' % (lowerprefix, prefix, lowerprefix, lowerprefix))
# print Default and C first
for key in frommap.keys():
if not key in ('Any%s' % prefix, 'C'):
continue
code = frommap[key]['code']
name = frommap[key]['name']
print(' { %s, %s, QLocale::%s::%s },' % (tochar(name), tochar(code), prefix, key))
# now everything except those but only unique code values
for key in sorted(frommap.keys()):
if key in ('Any%s' % prefix, 'C'):
continue
code = frommap[key]['code']
if code in seencodes:
continue
seencodes.append(code)
name = frommap[key]['name']
print(' { %s, %s, QLocale::%s::%s },' % (tochar(name), tochar(code), prefix, key))
print('};')
print('static const qint16 %sTblSize = sizeof(%sTbl) / sizeof(%sTblData);\n' % (lowerprefix, lowerprefix, lowerprefix))
def printlocaledata(frommap, key):
value = frommap[key]
# skip table entries without country (non-territory), unless it is artificial, this is done to
# preserve the assumption in QLocalePrivate::findLocale that "AnyCountry" means "find me a
# language, no matter what country it is spoken in" if "AnyCountry" is passed to it as argument
# and also shrinks the table
if value['country'] == 'QLocale::Country::AnyCountry' and not key == 'C':
return
print(''' {
%s, %s, %s,
// week
%s, %s, %s,
// symbols
%s, %s, %s, %s, %s, %s, %s, %s, %s, %s,
// quotation
%s, %s, %s, %s,
// endonym
%s, %s,
// list pattern
%s, %s, %s, %s,
// date/time format
%s, %s, %s, %s,
// am/pm
%s, %s,
// currency
%s, %s, %s, %s,
%s,
// month names
%s,
%s,
%s,
%s,
%s,
%s,
// day names
%s,
%s,
%s,
%s,
%s,
%s
}, // %s''' % (
value['language'],
value['script'],
value['country'],
value['first_day_of_week'],
value['weekend_start'],
value['weekend_end'],
touint(value['decimal']),
touint(value['group']),
touint(value['list']),
touint(value['percent']),
touint(value['minus']),
touint(value['plus']),
touint(value['exponential']),
value['currency_digits'],
value['currency_rounding'],
touint(value['zero']),
touint(value['quotation_start']),
touint(value['quotation_end']),
touint(value['alternate_quotation_start']),
touint(value['alternate_quotation_end']),
tochar(value['language_endonym']),
tochar(value['country_endonym']),
tochar(value['list_pattern_part_start']),
tochar(value['list_pattern_part_mid']),
tochar(value['list_pattern_part_end']),
tochar(value['list_pattern_part_two']),
tochar(todatetimeformat(value['short_date_format'])),
tochar(todatetimeformat(value['long_date_format'])),
tochar(value['short_time_format']),
tochar(value['long_time_format']),
tochar(value['am']),
tochar(value['pm']),
tochar(value['currency_symbol']),
tochar(value['currency_format']),
tochar(value['currency_negative_format']),
tochar(value['currency_iso_code']),
tochararray(value['currency_display_name']),
tochararray(value['standalone_short_month_names']),
tochararray(value['standalone_long_month_names']),
tochararray(value['standalone_narrow_month_names']),
tochararray(value['short_month_names']),
tochararray(value['long_month_names']),
tochararray(value['narrow_month_names']),
tochararray(value['standalone_short_day_names']),
tochararray(value['standalone_long_day_names']),
tochararray(value['standalone_narrow_day_names']),
tochararray(value['short_day_names']),
tochararray(value['long_day_names']),
tochararray(value['narrow_day_names']),
key,
)
)
# main maps
languagemap = {}
countrymap = {}
scriptmap = {}
localemap = {}
# main lists
imperiallist = []
# cross-reference maps
localeparentmap = {}
localeparentvaluesmap = {}
localescriptmap = {}
localefirstdaymap = {}
localeweekendstartmap = {}
localeweekendendmap = {}
localeiso4217map = {}
localecurrencymap = {}
localenumberingmap = {}
# artificial entries
languagemap['AnyLanguage'] = {
'code': '',
'name': 'Default',
}
languagemap['C'] = {
'code': 'C',
'name': 'C',
}
countrymap['AnyCountry'] = {
'code': '',
'name': 'Default',
}
scriptmap['AnyScript'] = {
'code': '',
'name': 'Default',
}
# locale to parent parsing
tree = ET.parse('common/supplemental/supplementalData.xml')
root = tree.getroot()
for parentlocale in root.findall('./parentLocales/parentLocale'):
parentlocaleparent = parentlocale.get('parent')
parentlocalelocales = parentlocale.get('locales')
localeparentmap[parentlocaleparent] = parentlocalelocales.split(' ')
# locale to script parsing
# only languages with one primary script are mapped because if there are multiple it should be
# specified in the locale data, see:
# https://sites.google.com/site/cldr/development/updating-codes/update-language-script-info/language-script-description
# secondary scripts are not taken into account at all.
for suppllanguage in root.findall('./languageData/language'):
suppllanguagetype = suppllanguage.get('type')
suppllanguagescripts = suppllanguage.get('scripts')
suppllanguagealt = suppllanguage.get('alt')
if not suppllanguagescripts or suppllanguagealt == 'secondary':
# alternative entry, skip it
continue
suppllanguagescriptslist = suppllanguagescripts.split(' ')
if not len(suppllanguagescriptslist) == 1:
# skip entries without definitive primary script
continue
suppllanguageterritories = suppllanguage.get('territories')
if not suppllanguageterritories:
# territories is optional, if not specified use artifical value to map all languages of
# that type to the script
suppllanguageterritories = 'AnyTerritory'
localescriptmap[suppllanguagetype] = {
'script': suppllanguagescripts,
'territories': suppllanguageterritories.split(' '),
}
# locale to first day parsing
for firstday in root.findall('./weekData/firstDay'):
firstdayday = firstday.get('day')
firstdayterritories = firstday.get('territories')
localefirstdaymap[todayenum(firstdayday)] = stripxmltext(firstdayterritories).split(' ')
# locale to weekend start parsing
for weekstart in root.findall('./weekData/weekendStart'):
weekstartday = weekstart.get('day')
weekstartterritories = weekstart.get('territories')
localeweekendstartmap[todayenum(weekstartday)] = stripxmltext(weekstartterritories).split(' ')
# locale to weekend end parsing
for weekend in root.findall('./weekData/weekendEnd'):
weekendday = weekend.get('day')
weekendterritories = weekend.get('territories')
localeweekendendmap[todayenum(weekendday)] = stripxmltext(weekendterritories).split(' ')
# locale to iso4217 parsing
for region in root.findall('./currencyData/region'):
regioniso3166 = region.get('iso3166')
# data includes past currencies too, pick the current currency which is first
currency = region.find('currency')
currencyiso4217 = currency.get('iso4217')
localeiso4217map[regioniso3166] = currencyiso4217
# locale to currency parsing
for info in root.findall('./currencyData/fractions/info'):
infoiso4217 = info.get('iso4217')
infodigits = info.get('digits')
inforounding = info.get('rounding')
localecurrencymap[infoiso4217] = {
'digits': infodigits,
'rounding': inforounding,
}
# locale to numbering system parsing
tree = ET.parse('common/supplemental/numberingSystems.xml')
root = tree.getroot()
for numberingsystem in root.findall('./numberingSystems/numberingSystem'):
numberingsystemid = numberingsystem.get('id')
numberingsystemdigits = numberingsystem.get('digits')
if numberingsystemdigits:
# either digits or rules is set
localenumberingmap[numberingsystemid] = stripxmltext(numberingsystemdigits)
# language parsing
tree = ET.parse('common/main/en.xml')
root = tree.getroot()
for language in root.findall('./localeDisplayNames/languages/language'):
languagetype = language.get('type')
normallanguage = normalizestring(language.text)
if normallanguage in ('Nauru', 'Tokelau', 'Tuvalu'):
# countries and language are the same, suffix to solve enum clashes
normallanguage = '%sLanguage' % normallanguage
languagemap[normallanguage] = {
'code': languagetype,
'name': language.text,
}
if printenumsandexit:
printenum(languagemap, 'Language')
else:
printtable(languagemap, 'Language')
# country parsing
for country in root.findall('./localeDisplayNames/territories/territory'):
countrytype = country.get('type')
normalcountry = normalizestring(country.text)
countrymap[normalcountry] = {
'code': countrytype,
'name': country.text,
}
if printenumsandexit:
printenum(countrymap, 'Country')
else:
printtable(countrymap, 'Country')
# scripts parsing
for script in root.findall('./localeDisplayNames/scripts/script'):
scripttype = script.get('type')
normalscript = normalizestring(script.text)
if not normalscript.endswith('Script'):
# suffix script if needed
normalscript = '%sScript' % normalscript
if normalscript in ('UnknownScript', 'CommonScript'):
# only interested in specific scripts
continue
scriptmap[normalscript] = {
'code': scripttype,
'name': script.text,
}
if printenumsandexit:
printenum(scriptmap, 'Script')
sys.exit(0)
else:
printtable(scriptmap, 'Script')
# these defaults are used as parent locales fallback, C uses them as actual values because root
# contains UTF-8 characters and for compatibility. for the rest defaults are set from root
localedefaults = {
# enums
'language': 'QLocale::Language::AnyLanguage',
'script': 'QLocale::Script::AnyScript',
'country': 'QLocale::Country::AnyCountry',
'first_day_of_week': 'Qt::Monday',
'weekend_start': 'Qt::Saturday',
'weekend_end': 'Qt::Sunday',
# characters
'decimal': '.',
'group': ',',
'list': ';',
'percent': '%',
'zero': '0',
'minus': '-',
'plus': '+',
'exponential': 'e', # default in CLDR is E
'currency_digits': '2',
'currency_rounding': '1', # not used, default in CLDR is 0
# strings
'quotation_start': '"', # default in CLDR is “
'quotation_end': '"', # default in CLDR is ”
'alternate_quotation_start': "'", # default in CLDR is
'alternate_quotation_end': "'", # default in CLDR is
'language_endonym': '',
'country_endonym': '',
'list_pattern_part_start': "%1, %2",
'list_pattern_part_mid': "%1, %2",
'list_pattern_part_end': "%1, %2",
'list_pattern_part_two': "%1, %2",
'short_date_format': 'd MMM yyyy', # default in CLDR is y-MM-dd
'long_date_format': 'd MMMM yyyy',
'short_time_format': 'HH:mm:ss', # default in CLDR is HH:mm
'long_time_format': 'HH:mm:ss z',
'am': 'AM',
'pm': 'PM',
'currency_symbol': '',
'currency_format': '%1%2',
'currency_negative_format': '',
'currency_iso_code': '',
# arrays
'currency_display_name': ['', '', '', '', '', '', ''], # only the first entry is used
'standalone_short_month_names': ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'],
'standalone_long_month_names': ['January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 'September', 'October', 'November', 'December'],
'standalone_narrow_month_names': ['J', 'F', 'M', 'A', 'M', 'J', 'J', 'A', 'S', 'O', 'N', 'D'],
'short_month_names': ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'],
'long_month_names': ['January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 'September', 'October', 'November', 'December'],
'narrow_month_names': ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12'],
'standalone_short_day_names': ['Sun', 'Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat'],
'standalone_long_day_names': ['Sunday', 'Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday'],
'standalone_narrow_day_names': ['S', 'M', 'T', 'W', 'T', 'F', 'S'],
'short_day_names': ['Sun', 'Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat'],
'long_day_names': ['Sunday', 'Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday'],
'narrow_day_names': ['7', '1', '2', '3', '4', '5', '6'],
}
# artificial entries
localemap['C'] = {}
mapcopy(localedefaults, localemap['C'])
localemap['C']['language'] = 'QLocale::Language::C'
# locales parsing
# TODO: accept only "contributed" or "approved" values
def readlocale(fromxml, tomap, isparent):
tree = ET.parse(fromxml)
root = tree.getroot()
variant = root.find('./identity/variant')
if variant is not None:
# TODO: variants are not supported by QLocale
return
language = root.find('./identity/language')
langtype = language.get('type')
country = root.find('./identity/territory')
countrytype = None
currencytype = None
scripttype = None
numbertype = 'latn' # CLDR default
locale = os.path.basename(xml)
locale = locale.replace('.xml', '')
tomap[locale] = {}
if isparent:
mapcopy(localedefaults, tomap[locale])
else:
mapcopy(localeparentvaluesmap['root'], tomap[locale])
# set defaults from parent locale if territory is specified
if country is not None:
for parent in localeparentmap.keys():
if locale in localeparentmap[parent]:
mapcopy(localeparentvaluesmap[parent], tomap[locale])
# then from main locale (non-territory) filling the blanks that even parent locales do not fill
if not isparent:
mapmerge(localemap[langtype], tomap[locale], localedefaults)
# find the enums from mapped values
for key in languagemap.keys():
if langtype == languagemap[key]['code']:
tomap[locale]['language'] = 'QLocale::Language::%s' % key
break
if country is not None:
countrytype = country.get('type')
for key in countrymap.keys():
if countrytype == countrymap[key]['code']:
tomap[locale]['country'] = 'QLocale::Country::%s' % key
break
else:
# territory often is not specified, use language code as fallback
countrytype = langtype.upper()
# script is specified either in the locale or supplemental data
script = root.find('./identity/script')
if script is not None:
scripttype = script.get('type')
elif not isparent:
# scripts map is partial, pick from what is mapped
if langtype in localescriptmap.keys():
scriptterritories = localescriptmap[langtype]['territories']
if 'AnyTerritory' in scriptterritories \
or countrytype in scriptterritories:
scripttype = localescriptmap[langtype]['script']
# store for later, data is partial so pick from what is mapped
if countrytype in localeiso4217map.keys():
currencytype = localeiso4217map[countrytype]
defaultnumbersystem = root.find('./numbers/defaultNumberingSystem')
if defaultnumbersystem is not None:
numbertype = defaultnumbersystem.text
# find values from supplemental maps
if not isparent and scripttype:
for key in scriptmap.keys():
if scriptmap[key]['code'] == scripttype:
tomap[locale]['script'] = 'QLocale::Script::%s' % key
break
for key in localefirstdaymap.keys():
for countryvalue in localefirstdaymap[key]:
if countryvalue == countrytype:
tomap[locale]['first_day_of_week'] = key
break
for key in localeweekendstartmap.keys():
for countryvalue in localeweekendstartmap[key]:
if countryvalue == countrytype:
tomap[locale]['weekend_start'] = key
break
for key in localeweekendendmap.keys():
for countryvalue in localeweekendendmap[key]:
if countryvalue == countrytype:
tomap[locale]['weekend_end'] = key
break
# find from locale data
for symbol in root.findall('./numbers/symbols'):
symbolnumbersystem = symbol.get('numberSystem')
if not symbolnumbersystem == numbertype:
# should be the locale numeric system
continue
decimal = symbol.find('./decimal')
if decimal is not None and len(decimal.text) == 1:
tomap[locale]['decimal'] = decimal.text
group = symbol.find('./group')
if group is not None and len(group.text) == 1:
tomap[locale]['group'] = group.text
listdelimiter = symbol.find('./list')
if listdelimiter is not None and len(listdelimiter.text) == 1:
tomap[locale]['list'] = listdelimiter.text
percent = symbol.find('./percentSign')
if percent is not None and len(percent.text) == 1:
tomap[locale]['percent'] = percent.text
minus = symbol.find('./minusSign')
if minus is not None and len(minus.text) == 1:
tomap[locale]['minus'] = minus.text
plus = symbol.find('./plusSign')
if plus is not None and len(plus.text) == 1:
tomap[locale]['plus'] = plus.text
exponential = symbol.find('./exponential')
if exponential is not None and len(exponential.text) == 1:
tomap[locale]['exponential'] = exponential.text
# zero is from cross-reference numeric system map,
# taking the first character works even for UTF-8 chars
tomap[locale]['zero'] = localenumberingmap[numbertype][0]
# locale numeric system was found, break
break
# digits/rounding data is specific so check if it is mapped
if currencytype and currencytype in localecurrencymap.keys():
tomap[locale]['currency_digits'] = localecurrencymap[currencytype]['digits']
tomap[locale]['currency_rounding'] = localecurrencymap[currencytype]['rounding']
quotationstart = root.find('./delimiters/quotationStart')
if quotationstart is not None:
tomap[locale]['quotation_start'] = quotationstart.text
quotationend = root.find('./delimiters/quotationEnd')
if quotationend is not None:
tomap[locale]['quotation_end'] = quotationend.text
altquotationstart = root.find('./delimiters/alternateQuotationStart')
if altquotationstart is not None:
tomap[locale]['alternate_quotation_start'] = altquotationstart.text
altquotationend = root.find('./delimiters/alternateQuotationEnd')
if altquotationend is not None:
tomap[locale]['alternate_quotation_end'] = altquotationend.text
for nativelang in root.findall('./localeDisplayNames/languages/language'):
nativelangtype = nativelang.get('type')
if nativelangtype == langtype:
tomap[locale]['language_endonym'] = nativelang.text
break
if countrytype:
for nativecountry in root.findall('./localeDisplayNames/territories/territory'):
nativecountrytype = nativecountry.get('type')
if nativecountrytype == countrytype:
tomap[locale]['country_endonym'] = nativecountry.text
break
listpattern = root.find('./listPatterns/listPattern')
if listpattern is not None:
for listpatternpart in listpattern.findall('./listPatternPart'):
listpatternparttype = listpatternpart.get('type')
if listpatternparttype == 'start':
tomap[locale]['list_pattern_part_start'] = tolistformat(listpatternpart.text)
elif listpatternparttype == 'middle':
tomap[locale]['list_pattern_part_mid'] = tolistformat(listpatternpart.text)
elif listpatternparttype == 'end':
tomap[locale]['list_pattern_part_end'] = tolistformat(listpatternpart.text)
elif listpatternparttype == '2':
tomap[locale]['list_pattern_part_two'] = tolistformat(listpatternpart.text)
for calendar in root.findall('./dates/calendars/calendar'):
calendartype = calendar.get('type')
if not calendartype == 'gregorian':
# all values should be from gregorian calendar
continue
for dateformat in calendar.findall('./dateFormats/dateFormatLength'):
dateformattype = dateformat.get('type')
if dateformattype == 'short':
pattern = dateformat.find('./dateFormat/pattern')
tomap[locale]['short_date_format'] = todatetimeformat(pattern.text)
elif dateformattype == 'long':
pattern = dateformat.find('./dateFormat/pattern')
tomap[locale]['long_date_format'] = todatetimeformat(pattern.text)
for timeformat in calendar.findall('./timeFormats/timeFormatLength'):
timeformattype = timeformat.get('type')
if timeformattype == 'short':
pattern = timeformat.find('./timeFormat/pattern')
tomap[locale]['short_time_format'] = todatetimeformat(pattern.text)
elif timeformattype == 'long':
pattern = timeformat.find('./timeFormat/pattern')
tomap[locale]['long_time_format'] = todatetimeformat(pattern.text)
for dayperiodwidth in calendar.findall('./dayPeriods/dayPeriodContext/dayPeriodWidth'):
dayperiodwidthtype = dayperiodwidth.get('type')
if not dayperiodwidthtype == 'wide':
# all values should be in wide format
continue
for dayperiod in dayperiodwidth.findall('dayPeriod'):
dayperiodtype = dayperiod.get('type')
if dayperiodtype == 'am':
tomap[locale]['am'] = dayperiod.text
elif dayperiodtype == 'pm':
tomap[locale]['pm'] = dayperiod.text
# month/day names
for monthcontext in calendar.findall('./months/monthContext'):
monthcontexttype = monthcontext.get('type')
if monthcontexttype == 'stand-alone':
for monthwidth in monthcontext.findall('./monthWidth'):
monthwidthtype = monthwidth.get('type')
if monthwidthtype == 'wide':
months = monthwidth.findall('./month')
tomap[locale]['standalone_long_month_names'] = tomonthslist(months, tomap[locale]['standalone_long_month_names'])
elif monthwidthtype == 'abbreviated':
months = monthwidth.findall('./month')
tomap[locale]['standalone_short_month_names'] = tomonthslist(months, tomap[locale]['standalone_short_month_names'])
elif monthwidthtype == 'narrow':
months = monthwidth.findall('./month')
tomap[locale]['standalone_narrow_month_names'] = tomonthslist(months, tomap[locale]['standalone_narrow_month_names'])
elif monthcontexttype == 'format':
for monthwidth in monthcontext.findall('./monthWidth'):
monthwidthtype = monthwidth.get('type')
if monthwidthtype == 'wide':
months = monthwidth.findall('./month')
tomap[locale]['long_month_names'] = tomonthslist(months, tomap[locale]['long_month_names'])
elif monthwidthtype == 'abbreviated':
months = monthwidth.findall('./month')
tomap[locale]['short_month_names'] = tomonthslist(months, tomap[locale]['short_month_names'])
elif monthwidthtype == 'narrow':
months = monthwidth.findall('./month')
tomap[locale]['narrow_month_names'] = tomonthslist(months, tomap[locale]['narrow_month_names'])
for daycontext in calendar.findall('./days/dayContext'):
daycontexttype = daycontext.get('type')
if daycontexttype == 'stand-alone':
for daywidth in daycontext.findall('./dayWidth'):
daywidthtype = daywidth.get('type')
if daywidthtype == 'wide':
days = daywidth.findall('./day')
tomap[locale]['standalone_long_day_names'] = todayslist(days, tomap[locale]['standalone_long_day_names'])
elif daywidthtype == 'abbreviated':
days = daywidth.findall('./day')
tomap[locale]['standalone_short_day_names'] = todayslist(days, tomap[locale]['standalone_short_day_names'])
elif daywidthtype == 'narrow':
days = daywidth.findall('./day')
tomap[locale]['standalone_narrow_day_names'] = todayslist(days, tomap[locale]['standalone_narrow_day_names'])
elif daycontexttype == 'format':
for daywidth in daycontext.findall('./dayWidth'):
daywidthtype = daywidth.get('type')
if daywidthtype == 'wide':
days = daywidth.findall('./day')
tomap[locale]['long_day_names'] = todayslist(days, tomap[locale]['long_day_names'])
elif daywidthtype == 'abbreviated':
days = daywidth.findall('./day')
tomap[locale]['short_day_names'] = todayslist(days, tomap[locale]['short_day_names'])
elif daywidthtype == 'narrow':
days = daywidth.findall('./day')
tomap[locale]['narrow_day_names'] = todayslist(days, tomap[locale]['narrow_day_names'])
# gregorian calendar was found, break
break
if currencytype:
for elemcurrency in root.findall('./numbers/currencies/currency'):
elemcurrencytype = elemcurrency.get('type')
if elemcurrencytype == currencytype:
symbol = elemcurrency.find('./symbol')
if symbol is not None:
tomap[locale]['currency_symbol'] = symbol.text
displaynamelist = []
listcopy(tomap[locale]['currency_display_name'], displaynamelist)
for displayname in elemcurrency.findall('./displayName'):
displaynamecount = displayname.get('count')
# TODO: 0 and 1 are aliases?
if not displaynamecount:
displaynamelist[0] = displayname.text
elif displaynamecount == 'zero':
displaynamelist[1] = displayname.text
elif displaynamecount == 'one':
displaynamelist[2] = displayname.text
elif displaynamecount == 'two':
displaynamelist[3] = displayname.text
elif displaynamecount == 'few':
displaynamelist[4] = displayname.text
elif displaynamecount == 'many':
displaynamelist[5] = displayname.text
elif displaynamecount == 'other':
displaynamelist[6] = displayname.text
tomap[locale]['currency_display_name'] = displaynamelist
# currency type was found, break
break
for currencyformat in root.findall('./numbers/currencyFormats'):
currencyformatnumbersystem = currencyformat.get('numberSystem')
if not currencyformatnumbersystem == numbertype:
# should be the locale numeric system
continue
nativecurrencyformat = currencyformat.find('currencyFormatLength/currencyFormat/pattern')
if nativecurrencyformat is not None:
formats = tocurrencyformat(nativecurrencyformat.text, tomap[locale])
tomap[locale]['currency_format'] = formats[0]
# negative format is optional
if len(formats) > 1:
tomap[locale]['currency_negative_format'] = formats[1]
tomap[locale]['currency_iso_code'] = currencytype
# month/day names are set during calendar parsing
# read parent locales first
for xml in glob.glob('common/main/*.xml'):
xmlbase = os.path.basename(xml)
xmlbase = xmlbase.replace('.xml', '')
if not xmlbase in localeparentmap.keys():
continue
readlocale(xml, localeparentvaluesmap, True)
# now everything including those
for xml in sorted(glob.glob('common/main/*.xml')):
if xml.endswith('/root.xml'):
# root is not actual locale
continue
readlocale(xml, localemap, False)
print('''static const QLocalePrivate localeTbl[] = {''')
# print C first
printlocaledata(localemap, 'C')
# now everything except that
for key in sorted(localemap.keys()):
if key == 'C':
continue
printlocaledata(localemap, key)
print('};')
print('static const qint16 localeTblSize = sizeof(localeTbl) / sizeof(QLocalePrivate);\n')
# imperial parsing
tree = ET.parse('common/supplemental/supplementalData.xml')
root = tree.getroot()
for measurementsystem in root.findall('./measurementData/measurementSystem'):
measurementsystemtype = measurementsystem.get('type')
if measurementsystemtype in ('UK', 'US'):
territories = measurementsystem.get('territories')
for territory in territories.split(' '):
countryenum = None
for key in countrymap.keys():
countrycode = countrymap[key]['code']
if countrycode == territory:
countryenum = key
break
imperiallist.append(countryenum)
print('''static const QLocale::Country imperialTbl[] = {''')
for string in sorted(imperiallist):
print(' QLocale::Country::%s,' % string)
print('};')
print('static const qint16 imperialTblSize = sizeof(imperialTbl);')