EpsilonDocs/makeqstrdata_8py_source.html

 """
 Process raw qstr file and output qstr data with length, hash and data bytes.

 This script works with Python 2.6, 2.7, 3.3 and 3.4.
 """

 from __future__ import print_function

 import re
 import sys

 # Python 2/3 compatibility:
 #   - iterating through bytes is different
 #   - codepoint2name lives in a different module
 import platform
 if platform.python_version_tuple()[0] == '2':
     bytes_cons = lambda val, enc=None: bytearray(val)
     from htmlentitydefs import codepoint2name
 elif platform.python_version_tuple()[0] == '3':
     bytes_cons = bytes
     from html.entities import codepoint2name
 # end compatibility code

 codepoint2name[ord('-')] = 'hyphen';

 # add some custom names to map characters that aren't in HTML
 codepoint2name[ord(' ')] = 'space'
 codepoint2name[ord('\'')] = 'squot'
 codepoint2name[ord(',')] = 'comma'
 codepoint2name[ord('.')] = 'dot'
 codepoint2name[ord(':')] = 'colon'
 codepoint2name[ord(';')] = 'semicolon'
 codepoint2name[ord('/')] = 'slash'
 codepoint2name[ord('%')] = 'percent'
 codepoint2name[ord('#')] = 'hash'
 codepoint2name[ord('(')] = 'paren_open'
 codepoint2name[ord(')')] = 'paren_close'
 codepoint2name[ord('[')] = 'bracket_open'
 codepoint2name[ord(']')] = 'bracket_close'
 codepoint2name[ord('{')] = 'brace_open'
 codepoint2name[ord('}')] = 'brace_close'
 codepoint2name[ord('*')] = 'star'
 codepoint2name[ord('!')] = 'bang'
 codepoint2name[ord('\\')] = 'backslash'
 codepoint2name[ord('+')] = 'plus'
 codepoint2name[ord('$')] = 'dollar'
 codepoint2name[ord('=')] = 'equals'
 codepoint2name[ord('?')] = 'question'
 codepoint2name[ord('@')] = 'at_sign'
 codepoint2name[ord('^')] = 'caret'
 codepoint2name[ord('|')] = 'pipe'
 codepoint2name[ord('~')] = 'tilde'

 # this must match the equivalent function in qstr.c
 def compute_hash(qstr, bytes_hash):
     hash = 5381
     for b in qstr:
         hash = (hash * 33) ^ b
     # Make sure that valid hash is never zero, zero means "hash not computed"
     return (hash & ((1 << (8 * bytes_hash)) - 1)) or 1

 def qstr_escape(qst):
     def esc_char(m):
         c = ord(m.group(0))
         try:
             name = codepoint2name[c]
         except KeyError:
             name = '0x%02x' % c
         return "_" + name + '_'
     return re.sub(r'[^A-Za-z0-9_]', esc_char, qst)

 def parse_input_headers(infiles):
     # read the qstrs in from the input files
     qcfgs = {}
     qstrs = {}
     for infile in infiles:
         with open(infile, 'rt') as f:
             for line in f:
                 line = line.strip()

                 # is this a config line?
                 match = re.match(r'^QCFG\((.+), (.+)\)', line)
                 if match:
                     value = match.group(2)
                     if value[0] == '(' and value[-1] == ')':
                         # strip parenthesis from config value
                         value = value[1:-1]
                     qcfgs[match.group(1)] = value
                     continue

                 # is this a QSTR line?
                 match = re.match(r'^Q\((.*)\)$', line)
                 if not match:
                     continue

                 # get the qstr value
                 qstr = match.group(1)

                 # special case to specify control characters
                 if qstr == '\\n':
                     qstr = '\n'

                 # work out the corresponding qstr name
                 ident = qstr_escape(qstr)

                 # don't add duplicates
                 if ident in qstrs:
                     continue

                 # add the qstr to the list, with order number to retain original order in file
                 order = len(qstrs)
                 # but put special method names like __add__ at the top of list, so
                 # that their id's fit into a byte
                 if ident == "":
                     # Sort empty qstr above all still
                     order = -200000
                 elif ident.startswith("__"):
                     order -= 100000
                 qstrs[ident] = (order, ident, qstr)

     if not qcfgs:
         sys.stderr.write("ERROR: Empty preprocessor output - check for errors above\n")
         sys.exit(1)

     return qcfgs, qstrs

 def make_bytes(cfg_bytes_len, cfg_bytes_hash, qstr):
     qbytes = bytes_cons(qstr, 'utf8')
     qlen = len(qbytes)
     qhash = compute_hash(qbytes, cfg_bytes_hash)
     if all(32 <= ord(c) <= 126 and c != '\\' and c != '"' for c in qstr):
         # qstr is all printable ASCII so render it as-is (for easier debugging)
         qdata = qstr
     else:
         # qstr contains non-printable codes so render entire thing as hex pairs
         qdata = ''.join(('\\x%02x' % b) for b in qbytes)
     if qlen >= (1 << (8 * cfg_bytes_len)):
         print('qstr is too long:', qstr)
         assert False
     qlen_str = ('\\x%02x' * cfg_bytes_len) % tuple(((qlen >> (8 * i)) & 0xff) for i in range(cfg_bytes_len))
     qhash_str = ('\\x%02x' * cfg_bytes_hash) % tuple(((qhash >> (8 * i)) & 0xff) for i in range(cfg_bytes_hash))
     return '(const byte*)"%s%s" "%s"' % (qhash_str, qlen_str, qdata)

 def print_qstr_data(qcfgs, qstrs):
     # get config variables
     cfg_bytes_len = int(qcfgs['BYTES_IN_LEN'])
     cfg_bytes_hash = int(qcfgs['BYTES_IN_HASH'])

     # print out the starter of the generated C header file
     print('// This file was automatically generated by makeqstrdata.py')
     print('')

     # add NULL qstr with no hash or data
     print('QDEF(MP_QSTR_NULL, (const byte*)"%s%s" "")' % ('\\x00' * cfg_bytes_hash, '\\x00' * cfg_bytes_len))

     # go through each qstr and print it out
     for order, ident, qstr in sorted(qstrs.values(), key=lambda x: x[0]):
         qbytes = make_bytes(cfg_bytes_len, cfg_bytes_hash, qstr)
         print('QDEF(MP_QSTR_%s, %s)' % (ident, qbytes))

 def do_work(infiles):
     qcfgs, qstrs = parse_input_headers(infiles)
     print_qstr_data(qcfgs, qstrs)

 if __name__ == "__main__":
     do_work(sys.argv[1:])
makeqstrdata.make_bytes
def make_bytes(cfg_bytes_len, cfg_bytes_hash, qstr)
Definition: makeqstrdata.py:127

makeqstrdata.qstr_escape
def qstr_escape(qst)
Definition: makeqstrdata.py:62

makeqstrdata.compute_hash
def compute_hash(qstr, bytes_hash)
Definition: makeqstrdata.py:55

makeqstrdata.print_qstr_data
def print_qstr_data(qcfgs, qstrs)
Definition: makeqstrdata.py:144

makeqstrdata.parse_input_headers
def parse_input_headers(infiles)
Definition: makeqstrdata.py:72

makeqstrdata.bytes_cons
bytes_cons
Definition: makeqstrdata.py:17

makeqstrdata.do_work
def do_work(infiles)
Definition: makeqstrdata.py:161