Numworks Epsilon  1.4.1
Graphing Calculator Operating System
makeqstrdata.py
Go to the documentation of this file.
1 """
2 Process raw qstr file and output qstr data with length, hash and data bytes.
3 
4 This script works with Python 2.6, 2.7, 3.3 and 3.4.
5 """
6 
7 from __future__ import print_function
8 
9 import re
10 import sys
11 
12 # Python 2/3 compatibility:
13 # - iterating through bytes is different
14 # - codepoint2name lives in a different module
15 import platform
16 if platform.python_version_tuple()[0] == '2':
17  bytes_cons = lambda val, enc=None: bytearray(val)
18  from htmlentitydefs import codepoint2name
19 elif platform.python_version_tuple()[0] == '3':
20  bytes_cons = bytes
21  from html.entities import codepoint2name
22 # end compatibility code
23 
24 codepoint2name[ord('-')] = 'hyphen';
25 
26 # add some custom names to map characters that aren't in HTML
27 codepoint2name[ord(' ')] = 'space'
28 codepoint2name[ord('\'')] = 'squot'
29 codepoint2name[ord(',')] = 'comma'
30 codepoint2name[ord('.')] = 'dot'
31 codepoint2name[ord(':')] = 'colon'
32 codepoint2name[ord(';')] = 'semicolon'
33 codepoint2name[ord('/')] = 'slash'
34 codepoint2name[ord('%')] = 'percent'
35 codepoint2name[ord('#')] = 'hash'
36 codepoint2name[ord('(')] = 'paren_open'
37 codepoint2name[ord(')')] = 'paren_close'
38 codepoint2name[ord('[')] = 'bracket_open'
39 codepoint2name[ord(']')] = 'bracket_close'
40 codepoint2name[ord('{')] = 'brace_open'
41 codepoint2name[ord('}')] = 'brace_close'
42 codepoint2name[ord('*')] = 'star'
43 codepoint2name[ord('!')] = 'bang'
44 codepoint2name[ord('\\')] = 'backslash'
45 codepoint2name[ord('+')] = 'plus'
46 codepoint2name[ord('$')] = 'dollar'
47 codepoint2name[ord('=')] = 'equals'
48 codepoint2name[ord('?')] = 'question'
49 codepoint2name[ord('@')] = 'at_sign'
50 codepoint2name[ord('^')] = 'caret'
51 codepoint2name[ord('|')] = 'pipe'
52 codepoint2name[ord('~')] = 'tilde'
53 
54 # this must match the equivalent function in qstr.c
55 def compute_hash(qstr, bytes_hash):
56  hash = 5381
57  for b in qstr:
58  hash = (hash * 33) ^ b
59  # Make sure that valid hash is never zero, zero means "hash not computed"
60  return (hash & ((1 << (8 * bytes_hash)) - 1)) or 1
61 
62 def qstr_escape(qst):
63  def esc_char(m):
64  c = ord(m.group(0))
65  try:
66  name = codepoint2name[c]
67  except KeyError:
68  name = '0x%02x' % c
69  return "_" + name + '_'
70  return re.sub(r'[^A-Za-z0-9_]', esc_char, qst)
71 
72 def parse_input_headers(infiles):
73  # read the qstrs in from the input files
74  qcfgs = {}
75  qstrs = {}
76  for infile in infiles:
77  with open(infile, 'rt') as f:
78  for line in f:
79  line = line.strip()
80 
81  # is this a config line?
82  match = re.match(r'^QCFG\((.+), (.+)\)', line)
83  if match:
84  value = match.group(2)
85  if value[0] == '(' and value[-1] == ')':
86  # strip parenthesis from config value
87  value = value[1:-1]
88  qcfgs[match.group(1)] = value
89  continue
90 
91  # is this a QSTR line?
92  match = re.match(r'^Q\((.*)\)$', line)
93  if not match:
94  continue
95 
96  # get the qstr value
97  qstr = match.group(1)
98 
99  # special case to specify control characters
100  if qstr == '\\n':
101  qstr = '\n'
102 
103  # work out the corresponding qstr name
104  ident = qstr_escape(qstr)
105 
106  # don't add duplicates
107  if ident in qstrs:
108  continue
109 
110  # add the qstr to the list, with order number to retain original order in file
111  order = len(qstrs)
112  # but put special method names like __add__ at the top of list, so
113  # that their id's fit into a byte
114  if ident == "":
115  # Sort empty qstr above all still
116  order = -200000
117  elif ident.startswith("__"):
118  order -= 100000
119  qstrs[ident] = (order, ident, qstr)
120 
121  if not qcfgs:
122  sys.stderr.write("ERROR: Empty preprocessor output - check for errors above\n")
123  sys.exit(1)
124 
125  return qcfgs, qstrs
126 
127 def make_bytes(cfg_bytes_len, cfg_bytes_hash, qstr):
128  qbytes = bytes_cons(qstr, 'utf8')
129  qlen = len(qbytes)
130  qhash = compute_hash(qbytes, cfg_bytes_hash)
131  if all(32 <= ord(c) <= 126 and c != '\\' and c != '"' for c in qstr):
132  # qstr is all printable ASCII so render it as-is (for easier debugging)
133  qdata = qstr
134  else:
135  # qstr contains non-printable codes so render entire thing as hex pairs
136  qdata = ''.join(('\\x%02x' % b) for b in qbytes)
137  if qlen >= (1 << (8 * cfg_bytes_len)):
138  print('qstr is too long:', qstr)
139  assert False
140  qlen_str = ('\\x%02x' * cfg_bytes_len) % tuple(((qlen >> (8 * i)) & 0xff) for i in range(cfg_bytes_len))
141  qhash_str = ('\\x%02x' * cfg_bytes_hash) % tuple(((qhash >> (8 * i)) & 0xff) for i in range(cfg_bytes_hash))
142  return '(const byte*)"%s%s" "%s"' % (qhash_str, qlen_str, qdata)
143 
144 def print_qstr_data(qcfgs, qstrs):
145  # get config variables
146  cfg_bytes_len = int(qcfgs['BYTES_IN_LEN'])
147  cfg_bytes_hash = int(qcfgs['BYTES_IN_HASH'])
148 
149  # print out the starter of the generated C header file
150  print('// This file was automatically generated by makeqstrdata.py')
151  print('')
152 
153  # add NULL qstr with no hash or data
154  print('QDEF(MP_QSTR_NULL, (const byte*)"%s%s" "")' % ('\\x00' * cfg_bytes_hash, '\\x00' * cfg_bytes_len))
155 
156  # go through each qstr and print it out
157  for order, ident, qstr in sorted(qstrs.values(), key=lambda x: x[0]):
158  qbytes = make_bytes(cfg_bytes_len, cfg_bytes_hash, qstr)
159  print('QDEF(MP_QSTR_%s, %s)' % (ident, qbytes))
160 
161 def do_work(infiles):
162  qcfgs, qstrs = parse_input_headers(infiles)
163  print_qstr_data(qcfgs, qstrs)
164 
165 if __name__ == "__main__":
166  do_work(sys.argv[1:])
def make_bytes(cfg_bytes_len, cfg_bytes_hash, qstr)
def qstr_escape(qst)
Definition: makeqstrdata.py:62
def compute_hash(qstr, bytes_hash)
Definition: makeqstrdata.py:55
def print_qstr_data(qcfgs, qstrs)
def parse_input_headers(infiles)
Definition: makeqstrdata.py:72
def do_work(infiles)