mirror of
https://bitbucket.org/smil3y/katie.git
synced 2025-02-24 02:42:55 +00:00

combining types which harfbuzz does not support are no longer added to the tables too, this should make obtaining properties of text faster. special language table used to probe fonts for scripts support should be reviewed once new CLDR version is released to add proper language entries if present in likelySubtags.xml Signed-off-by: Ivailo Monev <xakepa10@laimg.moc>
251 lines
7.6 KiB
Python
Executable file
251 lines
7.6 KiB
Python
Executable file
#!/usr/bin/python
|
|
|
|
import os, sys
|
|
|
|
def readlines(fromfile):
|
|
lines = []
|
|
with open(fromfile, 'r') as f:
|
|
content = f.read()
|
|
for line in content.splitlines():
|
|
if line.startswith('#') or not line:
|
|
continue
|
|
commentsplit = line.split('#')
|
|
lines.append(commentsplit[0])
|
|
return lines
|
|
|
|
def mapinsert(tomap, key, value):
|
|
if not key in tomap.keys():
|
|
tomap[key] = [value]
|
|
else:
|
|
tomap[key].append(value)
|
|
|
|
def mapdecideinsert(toswitchmap, torangemap, value, key):
|
|
if '..' in key:
|
|
rangesplit = key.split('..')
|
|
rangemin = rangesplit[0]
|
|
rangemax = rangesplit[1]
|
|
# use switch statement if the range is 1
|
|
irangemax = int(rangemax, 16)
|
|
if (int(rangemin, 16) + 1) == irangemax or (int(rangemin, 16) - 1) == irangemax:
|
|
mapinsert(toswitchmap, value, rangemin)
|
|
mapinsert(toswitchmap, value, rangemax)
|
|
else:
|
|
mapinsert(torangemap, value, key)
|
|
else:
|
|
mapinsert(toswitchmap, value, key)
|
|
|
|
def printswitch(frommap):
|
|
print(' switch (ucs4) {')
|
|
for key in frommap:
|
|
for case in frommap[key]:
|
|
print(' case 0x%s:' % case)
|
|
print(' return %s;' % key)
|
|
print(' }')
|
|
|
|
def printifrange(frommap):
|
|
for key in frommap:
|
|
for r in frommap[key]:
|
|
rangesplit = r.split('..')
|
|
rangemin = rangesplit[0]
|
|
rangemax = rangesplit[1]
|
|
print(' if (ucs4 <= 0x%s && ucs4 >= 0x%s)' % (rangemax, rangemin))
|
|
print(' return %s;' % key)
|
|
|
|
switchmap = {}
|
|
rangemap = {}
|
|
|
|
if 'combining' in sys.argv:
|
|
for line in readlines('extracted/DerivedCombiningClass.txt'):
|
|
tablesplit = line.split(';')
|
|
codepoint = tablesplit[0].strip()
|
|
value = tablesplit[1].strip()
|
|
if value == '0':
|
|
# not ordered is default
|
|
continue
|
|
elif value == '202':
|
|
value = 'QUnicodeTables::Combining_AttachedBelow'
|
|
elif value == '214':
|
|
value = 'QUnicodeTables::Combining_AttachedAbove'
|
|
elif value == '216':
|
|
value = 'QUnicodeTables::Combining_AttachedAboveRight'
|
|
elif value == '218':
|
|
value = 'QUnicodeTables::Combining_BelowLeft'
|
|
elif value == '220':
|
|
value = 'QUnicodeTables::Combining_Below'
|
|
elif value == '222':
|
|
value = 'QUnicodeTables::Combining_BelowRight'
|
|
elif value == '224':
|
|
value = 'QUnicodeTables::Combining_Left'
|
|
elif value == '226':
|
|
value = 'QUnicodeTables::Combining_Right'
|
|
elif value == '228':
|
|
value = 'QUnicodeTables::Combining_AboveLeft'
|
|
elif value == '230':
|
|
value = 'QUnicodeTables::Combining_Above'
|
|
elif value == '232':
|
|
value = 'QUnicodeTables::Combining_AboveRight'
|
|
elif value == '233':
|
|
value = 'QUnicodeTables::Combining_DoubleBelow'
|
|
elif value == '234':
|
|
value = 'QUnicodeTables::Combining_DoubleAbove'
|
|
elif value == '240':
|
|
value = 'QUnicodeTables::Combining_IotaSubscript'
|
|
elif value in ['200', '204', '208', '210', '212']:
|
|
print('Unhandled deprecated combining type: %s' % value)
|
|
sys.exit(2)
|
|
else:
|
|
# only some are supported by harfbuzz
|
|
continue
|
|
mapdecideinsert(switchmap, rangemap, value, codepoint)
|
|
printswitch(switchmap)
|
|
printifrange(rangemap)
|
|
elif 'grapheme' in sys.argv:
|
|
# only some are supported by harfbuzz
|
|
supported = [
|
|
'CR',
|
|
'LF',
|
|
'Control'
|
|
'Extend',
|
|
'L',
|
|
'V',
|
|
'T',
|
|
'LV',
|
|
'LVT',
|
|
]
|
|
for line in readlines('auxiliary/GraphemeBreakProperty.txt'):
|
|
tablesplit = line.split(';')
|
|
codepoint = tablesplit[0].strip()
|
|
value = tablesplit[1].strip()
|
|
if not value in supported:
|
|
continue
|
|
value = 'QUnicodeTables::GraphemeBreak_%s' % value.replace('_', '')
|
|
mapdecideinsert(switchmap, rangemap, value, codepoint)
|
|
printswitch(switchmap)
|
|
printifrange(rangemap)
|
|
elif 'word' in sys.argv:
|
|
# only some are supported by harfbuzz
|
|
supported = [
|
|
'Format',
|
|
'Katakana'
|
|
'ALetter',
|
|
'MidLetter',
|
|
'MidNum',
|
|
'Numeric',
|
|
'ExtendNumLet',
|
|
]
|
|
for line in readlines('auxiliary/WordBreakProperty.txt'):
|
|
tablesplit = line.split(';')
|
|
codepoint = tablesplit[0].strip()
|
|
value = tablesplit[1].strip()
|
|
if not value in supported:
|
|
continue
|
|
value = 'QUnicodeTables::WordBreak_%s' % value.replace('_', '')
|
|
mapdecideinsert(switchmap, rangemap, value, codepoint)
|
|
printswitch(switchmap)
|
|
printifrange(rangemap)
|
|
elif 'sentence' in sys.argv:
|
|
# only some are supported by harfbuzz
|
|
supported = [
|
|
'Sep',
|
|
'Format',
|
|
'Sp'
|
|
'Lower',
|
|
'Upper',
|
|
'OLetter',
|
|
'Numeric',
|
|
'ATerm',
|
|
'STerm',
|
|
'Close',
|
|
]
|
|
for line in readlines('auxiliary/SentenceBreakProperty.txt'):
|
|
tablesplit = line.split(';')
|
|
codepoint = tablesplit[0].strip()
|
|
value = tablesplit[1].strip()
|
|
if not value in supported:
|
|
continue
|
|
value = 'QUnicodeTables::SentenceBreak_%s' % value.replace('_', '')
|
|
mapdecideinsert(switchmap, rangemap, value, codepoint)
|
|
printswitch(switchmap)
|
|
printifrange(rangemap)
|
|
elif 'line' in sys.argv:
|
|
# only some are supported by harfbuzz
|
|
supported = [
|
|
'OP',
|
|
'CL',
|
|
'QU'
|
|
'GL',
|
|
'NS',
|
|
'EX',
|
|
'SY',
|
|
'IS',
|
|
'PR',
|
|
'PO',
|
|
'NU',
|
|
'AL',
|
|
'ID',
|
|
'IN',
|
|
'HY',
|
|
'BA',
|
|
'BB',
|
|
'B2',
|
|
'ZW',
|
|
'CM',
|
|
'WJ',
|
|
'H2',
|
|
'H3',
|
|
'JL',
|
|
'JV',
|
|
'JT',
|
|
'SA',
|
|
'SG',
|
|
'SP',
|
|
'CR',
|
|
'LF',
|
|
'BK'
|
|
]
|
|
for line in readlines('LineBreak.txt'):
|
|
tablesplit = line.split(';')
|
|
codepoint = tablesplit[0].strip()
|
|
value = tablesplit[1].strip()
|
|
if not value in supported:
|
|
continue
|
|
value = 'QUnicodeTables::LineBreak_%s' % value.replace('_', '')
|
|
mapdecideinsert(switchmap, rangemap, value, codepoint)
|
|
printswitch(switchmap)
|
|
printifrange(rangemap)
|
|
elif 'script' in sys.argv:
|
|
for line in readlines('Scripts.txt'):
|
|
tablesplit = line.split(';')
|
|
codepoint = tablesplit[0].strip()
|
|
value = tablesplit[1].strip()
|
|
if value == 'Common':
|
|
# default is Common
|
|
continue
|
|
value = 'QUnicodeTables::%s' % value.replace('_', '')
|
|
mapdecideinsert(switchmap, rangemap, value, codepoint)
|
|
printswitch(switchmap)
|
|
printifrange(rangemap)
|
|
elif 'special' in sys.argv:
|
|
scriptslist = []
|
|
for line in readlines('Scripts.txt'):
|
|
tablesplit = line.split(';')
|
|
codepoint = tablesplit[0].strip()
|
|
value = tablesplit[1].strip()
|
|
if value in ('Inherited', 'Common'):
|
|
# both are treated differently
|
|
codepoint = '0'
|
|
if value in scriptslist:
|
|
# only one per script
|
|
continue
|
|
elif '..' in codepoint:
|
|
rangesplit = codepoint.split('..')
|
|
rangemin = rangesplit[0]
|
|
print(' 0x%s, // %s' % (rangemin, value.replace('_', '')))
|
|
else:
|
|
print(' 0x%s, // %s' % (codepoint, value.replace('_', '')))
|
|
scriptslist.append(value)
|
|
else:
|
|
print('''usage: <combining|grapheme|word|sentence|line|script|special>
|
|
|
|
Data is from https://unicode.org/Public/13.0.0/ucd/UCD.zip''')
|
|
sys.exit(1)
|