35 #if MICROPY_ENABLE_COMPILER 42 #define MP_LEXER_EOF ((unichar)MP_READER_EOF) 43 #define CUR_CHAR(lex) ((lex)->chr0) 46 return lex->
chr0 == MP_LEXER_EOF;
50 return lex->
chr0 ==
'\n';
54 return lex->
chr0 ==
c;
58 return lex->
chr0 == c1 || lex->
chr0 == c2;
62 return lex->
chr0 == c1 || lex->
chr0 == c2 || lex->
chr0 == c3;
66 return lex->
chr1 ==
c;
70 return lex->
chr1 == c1 || lex->
chr1 == c2;
74 return lex->
chr2 == c1 || lex->
chr2 == c2;
78 return lex->
chr0 == c1 && lex->
chr1 == c2;
99 return chr1 ==
'b' || chr1 ==
'o' || chr1 ==
'x';
103 return lex->
chr1 >=
'0' && lex->
chr1 <=
'7';
107 return is_char_or(lex,
'\'',
'\"')
108 || (is_char_or3(lex,
'r',
'u',
'b') && is_char_following_or(lex,
'\'',
'\"'))
109 || ((is_char_and(lex,
'r',
'b') || is_char_and(lex,
'b',
'r'))
110 && is_char_following_following_or(lex,
'\'',
'\"'));
115 return is_letter(lex) || lex->
chr0 ==
'_' || lex->
chr0 >= 0x80;
119 return is_head_of_identifier(lex) || is_digit(lex);
123 if (lex->
chr0 ==
'\n') {
127 }
else if (lex->
chr0 ==
'\t') {
129 lex->
column = (((lex->
column - 1 + TAB_SIZE) / TAB_SIZE) * TAB_SIZE) + 1;
139 if (lex->
chr1 ==
'\r') {
142 if (lex->
chr2 ==
'\n') {
149 if (lex->
chr2 == MP_LEXER_EOF && lex->
chr1 != MP_LEXER_EOF && lex->
chr1 !=
'\n') {
176 STATIC const char *
const tok_enc =
213 STATIC const char *
const tok_kw[] = {
221 #if MICROPY_PY_ASYNC_AWAIT 259 while (num_digits-- != 0) {
273 char quote_char =
'\'';
274 if (is_char(lex,
'\"')) {
281 if (is_char_and(lex, quote_char, quote_char)) {
291 size_t n_closing = 0;
292 while (!is_end(lex) && (num_quotes > 1 || !is_char(lex,
'\n')) && n_closing < num_quotes) {
293 if (is_char(lex, quote_char)) {
298 if (is_char(lex,
'\\')) {
308 case '\n':
c = MP_LEXER_EOF;
break;
312 case 'a':
c = 0x07;
break;
313 case 'b':
c = 0x08;
break;
314 case 't':
c = 0x09;
break;
315 case 'n':
c = 0x0a;
break;
316 case 'v':
c = 0x0b;
break;
317 case 'f':
c = 0x0c;
break;
318 case 'r':
c = 0x0d;
break;
330 if (!get_hex(lex, (
c ==
'x' ? 2 :
c ==
'u' ? 4 : 8), &num)) {
346 if (
c >=
'0' &&
c <=
'7') {
350 while (is_following_odigit(lex) && --digits != 0) {
352 num = num * 8 + (CUR_CHAR(lex) -
'0');
362 if (
c != MP_LEXER_EOF) {
394 if (n_closing < num_quotes) {
403 bool had_physical_newline =
false;
404 while (!is_end(lex)) {
405 if (is_physical_newline(lex)) {
409 had_physical_newline =
true;
411 }
else if (is_whitespace(lex)) {
413 }
else if (is_char(lex,
'#')) {
415 while (!is_end(lex) && !is_physical_newline(lex)) {
419 }
else if (is_char_and(lex,
'\\',
'\n')) {
427 return had_physical_newline;
432 vstr_reset(&lex->
vstr);
435 bool had_physical_newline = skip_whitespace(lex,
false);
452 size_t num_spaces = lex->
column - 1;
453 if (num_spaces == indent_top(lex)) {
454 }
else if (num_spaces > indent_top(lex)) {
455 indent_push(lex, num_spaces);
458 while (num_spaces < indent_top(lex)) {
462 if (num_spaces != indent_top(lex)) {
467 }
else if (is_end(lex)) {
470 }
else if (is_string_or_bytes(lex)) {
488 if (is_char(lex,
'u')) {
490 }
else if (is_char(lex,
'b')) {
493 if (is_char_following(lex,
'r')) {
497 }
else if (is_char(lex,
'r')) {
500 if (is_char_following(lex,
'b')) {
523 parse_string_literal(lex, is_raw);
526 skip_whitespace(lex,
true);
528 }
while (is_string_or_bytes(lex));
530 }
else if (is_head_of_identifier(lex)) {
538 while (!is_end(lex) && is_tail_of_identifier(lex)) {
549 int cmp =
strcmp(s, tok_kw[i]);
556 }
else if (cmp < 0) {
562 }
else if (is_digit(lex) || (is_char(lex,
'.') && is_following_digit(lex))) {
563 bool forced_integer =
false;
564 if (is_char(lex,
'.')) {
568 if (is_char(lex,
'0') && is_following_base_char(lex)) {
569 forced_integer =
true;
578 while (!is_end(lex)) {
579 if (!forced_integer && is_char_or(lex,
'e',
'E')) {
583 if (is_char(lex,
'+') || is_char(lex,
'-')) {
587 }
else if (is_letter(lex) || is_digit(lex) || is_char(lex,
'.')) {
588 if (is_char_or3(lex,
'.',
'j',
'J')) {
601 const char *t = tok_enc;
602 size_t tok_enc_index = 0;
603 for (; *t != 0 && !is_char(lex, *t); t += 1) {
604 if (*t ==
'e' || *t ==
'c') {
616 }
else if (*t ==
'!') {
618 if (is_char(lex,
'=')) {
625 }
else if (*t ==
'.') {
627 if (is_char_and(lex,
'.',
'.')) {
640 size_t t_index = tok_enc_index;
641 while (*t ==
'c' || *t ==
'e') {
643 if (is_char(lex, t[1])) {
645 tok_enc_index = t_index;
649 }
else if (*t ==
'c') {
656 lex->
tok_kind = tok_enc_kind[tok_enc_index];
710 #if MICROPY_READER_POSIX || MICROPY_READER_VFS 718 #if MICROPY_HELPER_LEXER_UNIX 720 mp_lexer_t *mp_lexer_new_from_fd(
qstr filename,
int fd,
bool close_fd) {
742 void mp_lexer_show_token(
const mp_lexer_t *lex) {
752 printf(
"%c", (
int)
c);
762 #endif // MICROPY_ENABLE_COMPILER
size_t alloc_indent_level
void mp_lexer_to_next(mp_lexer_t *lex)
void mp_lexer_free(mp_lexer_t *lex)
NORETURN void mp_raise_NotImplementedError(const char *msg)
bool unichar_isalpha(unichar c)
void mp_reader_new_file_from_fd(mp_reader_t *reader, int fd, bool close_fd)
#define m_del(type, ptr, num)
void vstr_add_char(vstr_t *vstr, unichar chr)
#define MICROPY_ALLOC_LEXER_INDENT_INIT
mp_lexer_t * mp_lexer_new_from_str_len(qstr src_name, const char *str, size_t len, size_t free_len)
void vstr_init(vstr_t *vstr, size_t alloc)
bool unichar_isxdigit(unichar c)
#define MICROPY_PY_BUILTINS_STR_UNICODE_DYNAMIC
void mp_reader_new_file(mp_reader_t *reader, const char *filename)
void vstr_add_byte(vstr_t *vstr, byte v)
mp_lexer_t * mp_lexer_new(qstr src_name, mp_reader_t reader)
qstr qstr_from_str(const char *str)
const byte * utf8_next_char(const byte *s)
bool unichar_isdigit(unichar c)
enum _mp_token_kind_t mp_token_kind_t
mp_lexer_t * mp_lexer_new_from_file(const char *filename)
#define m_del_obj(type, ptr)
bool unichar_isspace(unichar c)
mp_uint_t unichar_xdigit_value(unichar c)
#define m_renew(type, ptr, old_num, new_num)
unichar utf8_get_char(const byte *s)
mp_int_t nested_bracket_level
char * vstr_null_terminated_str(vstr_t *vstr)
bool unichar_isprint(unichar c)
void(* close)(void *data)
#define MICROPY_ALLOC_LEXEL_INDENT_INC
int strcmp(const char *s1, const char *s2)
mp_uint_t(* readbyte)(void *data)
void mp_reader_new_mem(mp_reader_t *reader, const byte *buf, size_t len, size_t free_len)
void vstr_clear(vstr_t *vstr)
void vstr_cut_tail_bytes(vstr_t *vstr, size_t bytes_to_cut)