From 2932a0f3d586d2977f3a38c27a24e2a1d6d0a122 Mon Sep 17 00:00:00 2001 From: TimePath Date: Wed, 6 Sep 2017 18:41:08 +1000 Subject: [PATCH] TODO --- CMakeLists.txt | 3 +- ftepp.cpp | 2 +- gmqcc.h | 2 +- lexer.cpp | 76 ++-- lexer.h | 522 ++++++++++++++------------ parser.cpp | 2 +- parser2.cpp | 992 +++++++++++++++++++++++++++++++++++++++++++++++++ 7 files changed, 1305 insertions(+), 294 deletions(-) create mode 100644 parser2.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 8a350af..39e0842 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -19,7 +19,8 @@ set(SOURCE_FILES ir.h lexer.cpp lexer.h opts.cpp - parser.cpp parser.h + # parser.cpp parser.h + parser2.cpp parser.h stat.cpp utf8.cpp util.cpp) diff --git a/ftepp.cpp b/ftepp.cpp index a85d857..cab2409 100644 --- a/ftepp.cpp +++ b/ftepp.cpp @@ -182,7 +182,7 @@ static GMQCC_INLINE char *(*ftepp_predef(const char *name))(ftepp_t *context) { return (i != 0) ? ftepp_predefs[i-1].func : nullptr; } -#define ftepp_tokval(f) ((f)->lex->tok.value) +#define ftepp_tokval(f) ((f)->lex->tok.value.mut()) #define ftepp_ctx(f) ((f)->lex->tok.ctx) static void ftepp_errorat(ftepp_t *ftepp, lex_ctx_t ctx, const char *fmt, ...) diff --git a/gmqcc.h b/gmqcc.h index 07f8c95..0696eb8 100644 --- a/gmqcc.h +++ b/gmqcc.h @@ -168,7 +168,7 @@ char *stat_mem_strdup(const char *, bool); #define mem_d(PTRN) free((void*)PTRN) #define mem_r(PTRN, SIZE) realloc((void*)PTRN, SIZE) -#define util_strdup(SRC) stat_mem_strdup((char*)(SRC), false) +#define util_strdup(SRC) stat_mem_strdup((const char*)(SRC), false) #define util_strdupe(SRC) stat_mem_strdup((char*)(SRC), true) #define util_isalpha(a) ((((unsigned)(a)|32)-'a') < 26) diff --git a/lexer.cpp b/lexer.cpp index 0bc28cb..28134e2 100644 --- a/lexer.cpp +++ b/lexer.cpp @@ -19,7 +19,6 @@ static const char *keywords_qc[] = { /* For fte/gmgqcc */ static const char *keywords_fg[] = { "switch", "case", "default", - "struct", "union", "break", "continue", "typedef", "goto", @@ -62,8 +61,7 @@ static bool lexwarn(lex_file *lex, int warntype, const char *fmt, ...) static void lex_token_new(lex_file *lex) { - if (lex->tok.value) - vec_shrinkto(lex->tok.value, 0); + lex->tok.value.shrinkto(0); lex->tok.constval.t = TYPE_VOID; lex->tok.ctx.line = lex->sline; @@ -146,29 +144,20 @@ lex_file* lex_open_string(const char *str, size_t len, const char *name) return lex; } -void lex_cleanup(void) +void lex_cleanup() { - size_t i; - for (i = 0; i < vec_size(lex_filenames); ++i) + for (size_t i = 0; i < vec_size(lex_filenames); ++i) mem_d(lex_filenames[i]); vec_free(lex_filenames); } void lex_close(lex_file *lex) { - size_t i; - for (i = 0; i < vec_size(lex->frames); ++i) - mem_d(lex->frames[i].name); vec_free(lex->frames); - if (lex->modelname) - vec_free(lex->modelname); - if (lex->file) fclose(lex->file); - vec_free(lex->tok.value); - /* mem_d(lex->name); collected in lex_filenames */ mem_d(lex); } @@ -314,14 +303,14 @@ static bool isxdigit_only(int ch) /* Append a character to the token buffer */ static void lex_tokench(lex_file *lex, int ch) { - vec_push(lex->tok.value, ch); + lex->tok.value.push(ch); } /* Append a trailing null-byte */ static void lex_endtoken(lex_file *lex) { - vec_push(lex->tok.value, 0); - vec_shrinkby(lex->tok.value, 1); + lex->tok.value.push(0); + lex->tok.value.shrinkby(1); } static bool lex_try_pragma(lex_file *lex) @@ -630,7 +619,7 @@ static bool lex_finish_frames(lex_file *lex) return false; for (i = 0; i < vec_size(lex->frames); ++i) { - if (!strcmp(lex->tok.value, lex->frames[i].name)) { + if (lex->frames[i].name == lex->tok.value.c_str()) { lex->frames[i].value = lex->framevalue++; if (lexwarn(lex, WARN_FRAME_MACROS, "duplicate frame macro defined: `%s`", lex->tok.value)) return false; @@ -641,8 +630,8 @@ static bool lex_finish_frames(lex_file *lex) continue; m.value = lex->framevalue++; - m.name = util_strdup(lex->tok.value); - vec_shrinkto(lex->tok.value, 0); + m.name = util_strdup(lex->tok.value.c_str()); + lex->tok.value.shrinkto(0); vec_push(lex->frames, m); } while (true); @@ -892,9 +881,9 @@ static Token GMQCC_WARN lex_finish_digit(lex_file *lex, Token lastch) lex_endtoken(lex); if (lex->tok.ttype == Token::FLOATCONST) - lex->tok.constval.f = strtod(lex->tok.value, nullptr); + lex->tok.constval.f = strtod(lex->tok.value.c_str(), nullptr); else - lex->tok.constval.i = strtol(lex->tok.value, nullptr, 0); + lex->tok.constval.i = strtol(lex->tok.value.c_str(), nullptr, 0); return lex->tok.ttype; } @@ -953,7 +942,7 @@ Token lex_do(lex_file *lex) return (lex->tok.ttype = Token::ERROR); lex_endtoken(lex); /* skip the known commands */ - v = lex->tok.value; + v = lex->tok.value.c_str(); if (!strcmp(v, "frame") || !strcmp(v, "framesave")) { @@ -1004,9 +993,9 @@ Token lex_do(lex_file *lex) if (rc < 0) return (lex->tok.ttype = Token::FATAL); - v = lex->tok.value; + v = lex->tok.value.c_str(); for (frame = 0; frame < vec_size(lex->frames); ++frame) { - if (!strcmp(v, lex->frames[frame].name)) { + if (lex->frames[frame].name == v) { lex->framevalue = lex->frames[frame].value; return lex_do(lex); } @@ -1030,23 +1019,18 @@ Token lex_do(lex_file *lex) if (rc < 0) return (lex->tok.ttype = Token::FATAL); - if (lex->modelname) { + if (lex->modelname.size()) { frame_macro m; + m.name = std::move(lex->modelname); m.value = lex->framevalue; - m.name = lex->modelname; - lex->modelname = nullptr; vec_push(lex->frames, m); } - lex->modelname = lex->tok.value; - lex->tok.value = nullptr; + lex->modelname = std::string(lex->tok.value.c_str()); return lex_do(lex); } if (!strcmp(v, "flush")) { - size_t fi; - for (fi = 0; fi < vec_size(lex->frames); ++fi) - mem_d(lex->frames[fi].name); vec_free(lex->frames); /* skip line (fteqcc does it too) */ ch = lex_getch(lex); @@ -1070,7 +1054,7 @@ Token lex_do(lex_file *lex) } for (frame = 0; frame < vec_size(lex->frames); ++frame) { - if (!strcmp(v, lex->frames[frame].name)) { + if (lex->frames[frame].name == v) { lex->tok.constval.i = lex->frames[frame].value; return (lex->tok.ttype = Token::INTCONST); } @@ -1149,6 +1133,9 @@ Token lex_do(lex_file *lex) { case Token::MUL: case Token::DIV: + case Token::MOD: + case Token::ADD: + case Token::SUB: case Token::LT: case Token::GT: case Token::EQ: @@ -1169,7 +1156,7 @@ Token lex_do(lex_file *lex) if (ch == Token::DOT) { lex_tokench(lex, ch); - /* peak ahead once */ + // peak ahead once nextch = lex_getch(lex); if (nextch != Token::DOT) { lex_ungetch(lex, nextch); @@ -1179,7 +1166,7 @@ Token lex_do(lex_file *lex) else return (lex->tok.ttype = Token::OPERATOR); } - /* peak ahead again */ + // peak ahead again nextch = lex_getch(lex); if (nextch != Token::DOT) { lex_ungetch(lex, nextch); @@ -1190,7 +1177,7 @@ Token lex_do(lex_file *lex) else return (lex->tok.ttype = Token::OPERATOR); } - /* fill the token to be "..." */ + // fill the token to be "..." lex_tokench(lex, ch); lex_tokench(lex, ch); lex_endtoken(lex); @@ -1203,7 +1190,7 @@ Token lex_do(lex_file *lex) return (lex->tok.ttype = Token::OPERATOR); } - if (ch == Token::ADD || ch == Token::SUB || /* ++, --, +=, -= and -> as well! */ + if (ch == Token::ADD || ch == Token::SUB || /* ++, --, +=, -= */ ch == Token::GT || ch == Token::LT|| /* <<, >>, <=, >= and >< as well! */ ch == Token::EQ || ch == Token::NOT || /* <=>, ==, != */ ch == Token::AND || ch == Token::OR || /* &&, ||, &=, |= */ @@ -1226,9 +1213,6 @@ Token lex_do(lex_file *lex) lex_tokench(lex, thirdch); else lex_ungetch(lex, thirdch); - - } else if (ch == Token::SUB && nextch == Token::GT) { - lex_tokench(lex, nextch); } else if (ch == Token::AND && nextch == Token::BITNOT) { thirdch = lex_getch(lex); if (thirdch != Token::EQ) { @@ -1290,7 +1274,7 @@ Token lex_do(lex_file *lex) lex_endtoken(lex); lex->tok.ttype = Token::IDENT; - v = lex->tok.value; + v = lex->tok.value.c_str(); if (!strcmp(v, "void")) { lex->tok.ttype = Token::TYPENAME; lex->tok.constval.t = TYPE_VOID; @@ -1369,7 +1353,7 @@ Token lex_do(lex_file *lex) lex->tok.ttype = Token::CHARCONST; /* It's a vector if we can successfully scan 3 floats */ - if (util_sscanf(lex->tok.value, " %f %f %f ", + if (util_sscanf(lex->tok.value.c_str(), " %f %f %f ", &lex->tok.constval.v.x, &lex->tok.constval.v.y, &lex->tok.constval.v.z) == 3) { @@ -1377,10 +1361,10 @@ Token lex_do(lex_file *lex) } else { - if (!lex->flags.preprocessing && strlen(lex->tok.value) > 1) { + if (!lex->flags.preprocessing && strlen(lex->tok.value.c_str()) > 1) { utf8ch_t u8char; /* check for a valid utf8 character */ - if (!OPTS_FLAG(UTF8) || !utf8_to(&u8char, (const unsigned char *)lex->tok.value, 8)) { + if (!OPTS_FLAG(UTF8) || !utf8_to(&u8char, (const unsigned char *)lex->tok.value.c_str(), 8)) { if (lexwarn(lex, WARN_MULTIBYTE_CHARACTER, ( OPTS_FLAG(UTF8) ? "invalid multibyte character sequence `%s`" : "multibyte character: `%s`" ), @@ -1391,7 +1375,7 @@ Token lex_do(lex_file *lex) lex->tok.constval.i = u8char; } else - lex->tok.constval.i = lex->tok.value[0]; + lex->tok.constval.i = lex->tok.value.c_str()[0]; } return lex->tok.ttype; diff --git a/lexer.h b/lexer.h index 94db3d2..a98cee3 100644 --- a/lexer.h +++ b/lexer.h @@ -1,97 +1,127 @@ #ifndef GMQCC_LEXER_HDR #define GMQCC_LEXER_HDR -#include "gmqcc.h" - -/* Lexer - * - */ -enum Token : int { // todo: enum class - /* Other tokens which we can return: */ - NONE = 0, - - CR = '\r', - LF = '\n', - WS = ' ', - BACKSLASH = '\\', - - HASH = '#', - DOLLAR = '$', - - DOT = '.', - COMMA = ',', - COLON = ':', - SEMICOLON = ';', - - AND = '&', - OR = '|', - XOR = '^', - BITNOT = '~', - NOT = '!', - - LT = '<', - GT = '>', - EQ = '=', - - MUL = '*', - DIV = '/', - MOD = '%', - - ADD = '+', - SUB = '-', - - QUOT_SINGLE = '\'', - QUOT_DOUBLE = '"', - QUESTION = '?', - - BRACE_OPEN = '{', BRACE_CLOSE = '}', - BRACKET_OPEN = '[', BRACKET_CLOSE = ']', - PAREN_OPEN = '(', PAREN_CLOSE = ')', - - START = 128, - - IDENT, +#include "gmqcc.h" - TYPENAME, +#define TOKENS(X) \ + /* Other tokens which we can return: */ \ + X(NONE, =0) \ + X(CR, = '\r') \ + X(LF, = '\n') \ + X(WS, = ' ') \ + X(BACKSLASH, = '\\') \ + X(HASH, = '#') \ + X(DOLLAR, = '$') \ + X(DOT, = '.') \ + X(COMMA, = ',') \ + X(COLON, = ':') \ + X(SEMICOLON, = ';') \ + X(AND, = '&') \ + X(OR, = '|') \ + X(XOR, = '^') \ + X(BITNOT, = '~') \ + X(NOT, = '!') \ + X(LT, = '<') \ + X(GT, = '>') \ + X(EQ, = '=') \ + X(MUL, = '*') \ + X(DIV, = '/') \ + X(MOD, = '%') \ + X(ADD, = '+') \ + X(SUB, = '-') \ + X(QUOT_SINGLE, = '\'') \ + X(QUOT_DOUBLE, = '"') \ + X(QUESTION, = '?') \ + X(BRACE_OPEN, = '{') \ + X(BRACE_CLOSE, = '}') \ + X(BRACKET_OPEN, = '[') \ + X(BRACKET_CLOSE, = ']') \ + X(PAREN_OPEN, = '(') \ + X(PAREN_CLOSE, = ')') \ + X(START, = 128) \ + X(IDENT, ) \ + X(TYPENAME, ) \ + X(OPERATOR, ) \ + /* loop */ \ + X(KEYWORD, ) \ + /* 3 dots, ... */ \ + X(DOTS, ) \ + /* [[ */ \ + X(ATTRIBUTE_OPEN, ) \ + /* ]] */ \ + X(ATTRIBUTE_CLOSE, ) \ + /* for the ftepp only */ \ + X(VA_ARGS, ) \ + /* for the ftepp only */ \ + X(VA_ARGS_ARRAY, ) \ + /* to get the count of vaargs */ \ + X(VA_COUNT, ) \ + /* not the typename but an actual "string" */ \ + X(STRINGCONST, ) \ + X(CHARCONST, ) \ + X(VECTORCONST, ) \ + X(INTCONST, ) \ + X(FLOATCONST, ) \ + X(WHITE, ) \ + X(EOL, ) \ + /* if we add additional tokens before this, the exposed API \ + * should not be broken anyway, but EOF/ERROR/... should \ + * still be at the bottom \ + */ \ + X(END, = 1024) \ + /* We use '< ERROR', so FATAL must come after it and any \ + * other error related tokens as well \ + */ \ + X(ERROR, ) \ + /* internal error, eg out of memory */ \ + X(FATAL, ) \ + /**/ - OPERATOR, +enum Token : int { // todo: enum class +#define X(id, val) id val, + TOKENS(X) +#undef X +}; - KEYWORD, /* loop */ +inline const char *TokenName(Token t) { + switch (t) { + default: + return "UNKNOWN"; +#define X(id, val) case Token::id: return #id; + TOKENS(X) +#undef X + } +} - DOTS, /* 3 dots, ... */ +struct cvec { + std::string value; - ATTRIBUTE_OPEN, /* [[ */ - ATTRIBUTE_CLOSE, /* ]] */ + explicit cvec() = default; - VA_ARGS, /* for the ftepp only */ - VA_ARGS_ARRAY, /* for the ftepp only */ - VA_COUNT, /* to get the count of vaargs */ + char *mut() { + return &value[0]; + } - STRINGCONST, /* not the typename but an actual "string" */ - CHARCONST, - VECTORCONST, - INTCONST, - FLOATCONST, + const char *c_str() { + return value.c_str(); + } - WHITE, - EOL, + void shrinkto(size_t i) { + value.resize(i); + } - /* if we add additional tokens before this, the exposed API - * should not be broken anyway, but EOF/ERROR/... should - * still be at the bottom - */ - END = 1024, + void shrinkby(size_t i) { + value.resize(value.size() - i); + } - /* We use '< ERROR', so FATAL must come after it and any - * other error related tokens as well - */ - ERROR, - FATAL /* internal error, eg out of memory */ + void push(char it) { + value.push_back(it); + } }; struct token { Token ttype; - char *value; + cvec value; union { vec3_t v; int i; @@ -102,47 +132,51 @@ struct token { }; struct frame_macro { - char *name; + std::string name; int value; }; struct lex_file { - FILE *file; + FILE *file; const char *open_string; - size_t open_string_length; - size_t open_string_pos; + size_t open_string_length; + size_t open_string_pos; - char *name; - size_t line; - size_t sline; /* line at the start of a token */ - size_t column; + char *name; + size_t line; + size_t sline; /* line at the start of a token */ + size_t column; - Token peek[256]; - size_t peekpos; + std::array peek; + size_t peekpos; - bool eof; + bool eof; - token tok; /* not a pointer anymore */ + token tok; /* not a pointer anymore */ struct { - unsigned noops:1; - unsigned nodigraphs:1; /* used when lexing string constants */ - unsigned preprocessing:1; /* whitespace and EOLs become actual tokens */ - unsigned mergelines:1; /* backslash at the end of a line escapes the newline */ + bool noops:1; + bool nodigraphs:1; /* used when lexing string constants */ + bool preprocessing:1; /* whitespace and EOLs become actual tokens */ + bool mergelines:1; /* backslash at the end of a line escapes the newline */ } flags; /* sizeof == 1 */ int framevalue; frame_macro *frames; - char *modelname; + std::string modelname; size_t push_line; }; -lex_file* lex_open (const char *file); -lex_file* lex_open_string(const char *str, size_t len, const char *name); -void lex_close(lex_file *lex); -Token lex_do (lex_file *lex); -void lex_cleanup(void); +lex_file *lex_open(const char *file); + +lex_file *lex_open_string(const char *str, size_t len, const char *name); + +void lex_close(lex_file *lex); + +Token lex_do(lex_file *lex); + +void lex_cleanup(); /* Parser * @@ -157,13 +191,13 @@ enum { #define OP_PREFIX 2 struct oper_info { - const char *op; + const char *op; unsigned int operands; unsigned int id; unsigned int assoc; - signed int prec; + signed int prec; unsigned int flags; - bool folds; + bool folds; }; /* @@ -172,172 +206,172 @@ struct oper_info { * possibility. */ #define opid1(a) ((uint8_t)a) -#define opid2(a,b) (((uint8_t)a<<8) |(uint8_t)b) -#define opid3(a,b,c) (((uint8_t)a<<16)|((uint8_t)b<<8)|(uint8_t)c) +#define opid2(a, b) (((uint8_t)a<<8) |(uint8_t)b) +#define opid3(a, b, c) (((uint8_t)a<<16)|((uint8_t)b<<8)|(uint8_t)c) static const oper_info c_operators[] = { - { "(", 0, opid1('('), ASSOC_LEFT, 99, OP_PREFIX, false}, /* paren expression - non function call */ - { "_length", 1, opid3('l','e','n'), ASSOC_RIGHT, 98, OP_PREFIX, true}, - - { "++", 1, opid3('S','+','+'), ASSOC_LEFT, 17, OP_SUFFIX, false}, - { "--", 1, opid3('S','-','-'), ASSOC_LEFT, 17, OP_SUFFIX, false}, - { ".", 2, opid1('.'), ASSOC_LEFT, 17, 0, false}, - { "(", 0, opid1('('), ASSOC_LEFT, 17, 0, false}, /* function call */ - { "[", 2, opid1('['), ASSOC_LEFT, 17, 0, false}, /* array subscript */ - - { "++", 1, opid3('+','+','P'), ASSOC_RIGHT, 16, OP_PREFIX, false}, - { "--", 1, opid3('-','-','P'), ASSOC_RIGHT, 16, OP_PREFIX, false}, - - { "**", 2, opid2('*','*'), ASSOC_RIGHT, 14, 0, true}, - { "!", 1, opid2('!','P'), ASSOC_RIGHT, 14, OP_PREFIX, true}, - { "~", 1, opid2('~','P'), ASSOC_RIGHT, 14, OP_PREFIX, true}, - { "+", 1, opid2('+','P'), ASSOC_RIGHT, 14, OP_PREFIX, false}, - { "-", 1, opid2('-','P'), ASSOC_RIGHT, 14, OP_PREFIX, true}, + {"(", 0, opid1('('), ASSOC_LEFT, 99, OP_PREFIX, false}, /* paren expression - non function call */ + {"_length", 1, opid3('l', 'e', 'n'), ASSOC_RIGHT, 98, OP_PREFIX, true}, + + {"++", 1, opid3('S', '+', '+'), ASSOC_LEFT, 17, OP_SUFFIX, false}, + {"--", 1, opid3('S', '-', '-'), ASSOC_LEFT, 17, OP_SUFFIX, false}, + {".", 2, opid1('.'), ASSOC_LEFT, 17, 0, false}, + {"(", 0, opid1('('), ASSOC_LEFT, 17, 0, false}, /* function call */ + {"[", 2, opid1('['), ASSOC_LEFT, 17, 0, false}, /* array subscript */ + + {"++", 1, opid3('+', '+', 'P'), ASSOC_RIGHT, 16, OP_PREFIX, false}, + {"--", 1, opid3('-', '-', 'P'), ASSOC_RIGHT, 16, OP_PREFIX, false}, + + {"**", 2, opid2('*', '*'), ASSOC_RIGHT, 14, 0, true}, + {"!", 1, opid2('!', 'P'), ASSOC_RIGHT, 14, OP_PREFIX, true}, + {"~", 1, opid2('~', 'P'), ASSOC_RIGHT, 14, OP_PREFIX, true}, + {"+", 1, opid2('+', 'P'), ASSOC_RIGHT, 14, OP_PREFIX, false}, + {"-", 1, opid2('-', 'P'), ASSOC_RIGHT, 14, OP_PREFIX, true}, /* { "&", 1, opid2('&','P'), ASSOC_RIGHT, 14, OP_PREFIX, false}, */ - { "*", 2, opid1('*'), ASSOC_LEFT, 13, 0, true}, - { "/", 2, opid1('/'), ASSOC_LEFT, 13, 0, true}, - { "%", 2, opid1('%'), ASSOC_LEFT, 13, 0, true}, - { "><", 2, opid2('>','<'), ASSOC_LEFT, 13, 0, true}, + {"*", 2, opid1('*'), ASSOC_LEFT, 13, 0, true}, + {"/", 2, opid1('/'), ASSOC_LEFT, 13, 0, true}, + {"%", 2, opid1('%'), ASSOC_LEFT, 13, 0, true}, + {"><", 2, opid2('>', '<'), ASSOC_LEFT, 13, 0, true}, - { "+", 2, opid1('+'), ASSOC_LEFT, 12, 0, true}, - { "-", 2, opid1('-'), ASSOC_LEFT, 12, 0, true}, + {"+", 2, opid1('+'), ASSOC_LEFT, 12, 0, true}, + {"-", 2, opid1('-'), ASSOC_LEFT, 12, 0, true}, - { "<<", 2, opid2('<','<'), ASSOC_LEFT, 11, 0, true}, - { ">>", 2, opid2('>','>'), ASSOC_LEFT, 11, 0, true}, + {"<<", 2, opid2('<', '<'), ASSOC_LEFT, 11, 0, true}, + {">>", 2, opid2('>', '>'), ASSOC_LEFT, 11, 0, true}, - { "<", 2, opid1('<'), ASSOC_LEFT, 10, 0, false}, - { ">", 2, opid1('>'), ASSOC_LEFT, 10, 0, false}, - { "<=>", 2, opid3('<','=','>'), ASSOC_LEFT, 10, 0, true}, - { "<=", 2, opid2('<','='), ASSOC_LEFT, 10, 0, false}, - { ">=", 2, opid2('>','='), ASSOC_LEFT, 10, 0, false}, + {"<", 2, opid1('<'), ASSOC_LEFT, 10, 0, false}, + {">", 2, opid1('>'), ASSOC_LEFT, 10, 0, false}, + {"<=>", 2, opid3('<', '=', '>'), ASSOC_LEFT, 10, 0, true}, + {"<=", 2, opid2('<', '='), ASSOC_LEFT, 10, 0, false}, + {">=", 2, opid2('>', '='), ASSOC_LEFT, 10, 0, false}, - { "==", 2, opid2('=','='), ASSOC_LEFT, 9, 0, true}, - { "!=", 2, opid2('!','='), ASSOC_LEFT, 9, 0, true}, + {"==", 2, opid2('=', '='), ASSOC_LEFT, 9, 0, true}, + {"!=", 2, opid2('!', '='), ASSOC_LEFT, 9, 0, true}, - { "&", 2, opid1('&'), ASSOC_LEFT, 8, 0, true}, + {"&", 2, opid1('&'), ASSOC_LEFT, 8, 0, true}, - { "^", 2, opid1('^'), ASSOC_LEFT, 7, 0, true}, + {"^", 2, opid1('^'), ASSOC_LEFT, 7, 0, true}, - { "|", 2, opid1('|'), ASSOC_LEFT, 6, 0, true}, + {"|", 2, opid1('|'), ASSOC_LEFT, 6, 0, true}, - { "&&", 2, opid2('&','&'), ASSOC_LEFT, 5, 0, true}, + {"&&", 2, opid2('&', '&'), ASSOC_LEFT, 5, 0, true}, - { "||", 2, opid2('|','|'), ASSOC_LEFT, 4, 0, true}, + {"||", 2, opid2('|', '|'), ASSOC_LEFT, 4, 0, true}, - { "?", 3, opid2('?',':'), ASSOC_RIGHT, 3, 0, true}, + {"?", 3, opid2('?', ':'), ASSOC_RIGHT, 3, 0, true}, - { "=", 2, opid1('='), ASSOC_RIGHT, 2, 0, false}, - { "+=", 2, opid2('+','='), ASSOC_RIGHT, 2, 0, false}, - { "-=", 2, opid2('-','='), ASSOC_RIGHT, 2, 0, false}, - { "*=", 2, opid2('*','='), ASSOC_RIGHT, 2, 0, false}, - { "/=", 2, opid2('/','='), ASSOC_RIGHT, 2, 0, false}, - { "%=", 2, opid2('%','='), ASSOC_RIGHT, 2, 0, false}, - { ">>=", 2, opid3('>','>','='), ASSOC_RIGHT, 2, 0, false}, - { "<<=", 2, opid3('<','<','='), ASSOC_RIGHT, 2, 0, false}, - { "&=", 2, opid2('&','='), ASSOC_RIGHT, 2, 0, false}, - { "^=", 2, opid2('^','='), ASSOC_RIGHT, 2, 0, false}, - { "|=", 2, opid2('|','='), ASSOC_RIGHT, 2, 0, false}, + {"=", 2, opid1('='), ASSOC_RIGHT, 2, 0, false}, + {"+=", 2, opid2('+', '='), ASSOC_RIGHT, 2, 0, false}, + {"-=", 2, opid2('-', '='), ASSOC_RIGHT, 2, 0, false}, + {"*=", 2, opid2('*', '='), ASSOC_RIGHT, 2, 0, false}, + {"/=", 2, opid2('/', '='), ASSOC_RIGHT, 2, 0, false}, + {"%=", 2, opid2('%', '='), ASSOC_RIGHT, 2, 0, false}, + {">>=", 2, opid3('>', '>', '='), ASSOC_RIGHT, 2, 0, false}, + {"<<=", 2, opid3('<', '<', '='), ASSOC_RIGHT, 2, 0, false}, + {"&=", 2, opid2('&', '='), ASSOC_RIGHT, 2, 0, false}, + {"^=", 2, opid2('^', '='), ASSOC_RIGHT, 2, 0, false}, + {"|=", 2, opid2('|', '='), ASSOC_RIGHT, 2, 0, false}, - { ":", 0, opid2(':','?'), ASSOC_RIGHT, 1, 0, false}, + {":", 0, opid2(':', '?'), ASSOC_RIGHT, 1, 0, false}, - { ",", 2, opid1(','), ASSOC_LEFT, 0, 0, false} + {",", 2, opid1(','), ASSOC_LEFT, 0, 0, false} }; static const oper_info fte_operators[] = { - { "(", 0, opid1('('), ASSOC_LEFT, 99, OP_PREFIX, false}, /* paren expression - non function call */ - - { "++", 1, opid3('S','+','+'), ASSOC_LEFT, 15, OP_SUFFIX, false}, - { "--", 1, opid3('S','-','-'), ASSOC_LEFT, 15, OP_SUFFIX, false}, - { ".", 2, opid1('.'), ASSOC_LEFT, 15, 0, false}, - { "(", 0, opid1('('), ASSOC_LEFT, 15, 0, false}, /* function call */ - { "[", 2, opid1('['), ASSOC_LEFT, 15, 0, false}, /* array subscript */ - - { "!", 1, opid2('!','P'), ASSOC_RIGHT, 14, OP_PREFIX, true}, - { "+", 1, opid2('+','P'), ASSOC_RIGHT, 14, OP_PREFIX, false}, - { "-", 1, opid2('-','P'), ASSOC_RIGHT, 14, OP_PREFIX, true}, - { "++", 1, opid3('+','+','P'), ASSOC_RIGHT, 14, OP_PREFIX, false}, - { "--", 1, opid3('-','-','P'), ASSOC_RIGHT, 14, OP_PREFIX, false}, - - { "*", 2, opid1('*'), ASSOC_LEFT, 13, 0, true}, - { "/", 2, opid1('/'), ASSOC_LEFT, 13, 0, true}, - { "&", 2, opid1('&'), ASSOC_LEFT, 13, 0, true}, - { "|", 2, opid1('|'), ASSOC_LEFT, 13, 0, true}, - - { "+", 2, opid1('+'), ASSOC_LEFT, 12, 0, true}, - { "-", 2, opid1('-'), ASSOC_LEFT, 12, 0, true}, - - { "<<", 2, opid2('<','<'), ASSOC_LEFT, 11, 0, true}, - { ">>", 2, opid2('>','>'), ASSOC_LEFT, 11, 0, true}, - - { "<", 2, opid1('<'), ASSOC_LEFT, 10, 0, false}, - { ">", 2, opid1('>'), ASSOC_LEFT, 10, 0, false}, - { "<=", 2, opid2('<','='), ASSOC_LEFT, 10, 0, false}, - { ">=", 2, opid2('>','='), ASSOC_LEFT, 10, 0, false}, - { "==", 2, opid2('=','='), ASSOC_LEFT, 10, 0, true}, - { "!=", 2, opid2('!','='), ASSOC_LEFT, 10, 0, true}, - - { "?", 3, opid2('?',':'), ASSOC_RIGHT, 9, 0, true}, - - { "=", 2, opid1('='), ASSOC_RIGHT, 8, 0, false}, - { "+=", 2, opid2('+','='), ASSOC_RIGHT, 8, 0, false}, - { "-=", 2, opid2('-','='), ASSOC_RIGHT, 8, 0, false}, - { "*=", 2, opid2('*','='), ASSOC_RIGHT, 8, 0, false}, - { "/=", 2, opid2('/','='), ASSOC_RIGHT, 8, 0, false}, - { "%=", 2, opid2('%','='), ASSOC_RIGHT, 8, 0, false}, - { "&=", 2, opid2('&','='), ASSOC_RIGHT, 8, 0, false}, - { "|=", 2, opid2('|','='), ASSOC_RIGHT, 8, 0, false}, - { "&~=", 2, opid3('&','~','='), ASSOC_RIGHT, 8, 0, false}, - - { "&&", 2, opid2('&','&'), ASSOC_LEFT, 5, 0, true}, - { "||", 2, opid2('|','|'), ASSOC_LEFT, 5, 0, true}, - - /* Leave precedence 3 for : with -fcorrect-ternary */ - { ",", 2, opid1(','), ASSOC_LEFT, 2, 0, false}, - { ":", 0, opid2(':','?'), ASSOC_RIGHT, 1, 0, false} + {"(", 0, opid1('('), ASSOC_LEFT, 99, OP_PREFIX, false}, /* paren expression - non function call */ + + {"++", 1, opid3('S', '+', '+'), ASSOC_LEFT, 15, OP_SUFFIX, false}, + {"--", 1, opid3('S', '-', '-'), ASSOC_LEFT, 15, OP_SUFFIX, false}, + {".", 2, opid1('.'), ASSOC_LEFT, 15, 0, false}, + {"(", 0, opid1('('), ASSOC_LEFT, 15, 0, false}, /* function call */ + {"[", 2, opid1('['), ASSOC_LEFT, 15, 0, false}, /* array subscript */ + + {"!", 1, opid2('!', 'P'), ASSOC_RIGHT, 14, OP_PREFIX, true}, + {"+", 1, opid2('+', 'P'), ASSOC_RIGHT, 14, OP_PREFIX, false}, + {"-", 1, opid2('-', 'P'), ASSOC_RIGHT, 14, OP_PREFIX, true}, + {"++", 1, opid3('+', '+', 'P'), ASSOC_RIGHT, 14, OP_PREFIX, false}, + {"--", 1, opid3('-', '-', 'P'), ASSOC_RIGHT, 14, OP_PREFIX, false}, + + {"*", 2, opid1('*'), ASSOC_LEFT, 13, 0, true}, + {"/", 2, opid1('/'), ASSOC_LEFT, 13, 0, true}, + {"&", 2, opid1('&'), ASSOC_LEFT, 13, 0, true}, + {"|", 2, opid1('|'), ASSOC_LEFT, 13, 0, true}, + + {"+", 2, opid1('+'), ASSOC_LEFT, 12, 0, true}, + {"-", 2, opid1('-'), ASSOC_LEFT, 12, 0, true}, + + {"<<", 2, opid2('<', '<'), ASSOC_LEFT, 11, 0, true}, + {">>", 2, opid2('>', '>'), ASSOC_LEFT, 11, 0, true}, + + {"<", 2, opid1('<'), ASSOC_LEFT, 10, 0, false}, + {">", 2, opid1('>'), ASSOC_LEFT, 10, 0, false}, + {"<=", 2, opid2('<', '='), ASSOC_LEFT, 10, 0, false}, + {">=", 2, opid2('>', '='), ASSOC_LEFT, 10, 0, false}, + {"==", 2, opid2('=', '='), ASSOC_LEFT, 10, 0, true}, + {"!=", 2, opid2('!', '='), ASSOC_LEFT, 10, 0, true}, + + {"?", 3, opid2('?', ':'), ASSOC_RIGHT, 9, 0, true}, + + {"=", 2, opid1('='), ASSOC_RIGHT, 8, 0, false}, + {"+=", 2, opid2('+', '='), ASSOC_RIGHT, 8, 0, false}, + {"-=", 2, opid2('-', '='), ASSOC_RIGHT, 8, 0, false}, + {"*=", 2, opid2('*', '='), ASSOC_RIGHT, 8, 0, false}, + {"/=", 2, opid2('/', '='), ASSOC_RIGHT, 8, 0, false}, + {"%=", 2, opid2('%', '='), ASSOC_RIGHT, 8, 0, false}, + {"&=", 2, opid2('&', '='), ASSOC_RIGHT, 8, 0, false}, + {"|=", 2, opid2('|', '='), ASSOC_RIGHT, 8, 0, false}, + {"&~=", 2, opid3('&', '~', '='), ASSOC_RIGHT, 8, 0, false}, + + {"&&", 2, opid2('&', '&'), ASSOC_LEFT, 5, 0, true}, + {"||", 2, opid2('|', '|'), ASSOC_LEFT, 5, 0, true}, + + /* Leave precedence 3 for : with -fcorrect-ternary */ + {",", 2, opid1(','), ASSOC_LEFT, 2, 0, false}, + {":", 0, opid2(':', '?'), ASSOC_RIGHT, 1, 0, false} }; static const oper_info qcc_operators[] = { - { "(", 0, opid1('('), ASSOC_LEFT, 99, OP_PREFIX, false}, /* paren expression - non function call */ - - { ".", 2, opid1('.'), ASSOC_LEFT, 15, 0, false}, - { "(", 0, opid1('('), ASSOC_LEFT, 15, 0, false}, /* function call */ - { "[", 2, opid1('['), ASSOC_LEFT, 15, 0, false}, /* array subscript */ - - { "!", 1, opid2('!','P'), ASSOC_RIGHT, 14, OP_PREFIX, true}, - { "+", 1, opid2('+','P'), ASSOC_RIGHT, 14, OP_PREFIX, false}, - { "-", 1, opid2('-','P'), ASSOC_RIGHT, 14, OP_PREFIX, true}, - - { "*", 2, opid1('*'), ASSOC_LEFT, 13, 0, true}, - { "/", 2, opid1('/'), ASSOC_LEFT, 13, 0, true}, - { "&", 2, opid1('&'), ASSOC_LEFT, 13, 0, true}, - { "|", 2, opid1('|'), ASSOC_LEFT, 13, 0, true}, - - { "+", 2, opid1('+'), ASSOC_LEFT, 12, 0, true}, - { "-", 2, opid1('-'), ASSOC_LEFT, 12, 0, true}, - - { "<", 2, opid1('<'), ASSOC_LEFT, 10, 0, false}, - { ">", 2, opid1('>'), ASSOC_LEFT, 10, 0, false}, - { "<=", 2, opid2('<','='), ASSOC_LEFT, 10, 0, false}, - { ">=", 2, opid2('>','='), ASSOC_LEFT, 10, 0, false}, - { "==", 2, opid2('=','='), ASSOC_LEFT, 10, 0, true}, - { "!=", 2, opid2('!','='), ASSOC_LEFT, 10, 0, true}, - - { "=", 2, opid1('='), ASSOC_RIGHT, 8, 0, false}, - { "+=", 2, opid2('+','='), ASSOC_RIGHT, 8, 0, false}, - { "-=", 2, opid2('-','='), ASSOC_RIGHT, 8, 0, false}, - { "*=", 2, opid2('*','='), ASSOC_RIGHT, 8, 0, false}, - { "/=", 2, opid2('/','='), ASSOC_RIGHT, 8, 0, false}, - { "%=", 2, opid2('%','='), ASSOC_RIGHT, 8, 0, false}, - { "&=", 2, opid2('&','='), ASSOC_RIGHT, 8, 0, false}, - { "|=", 2, opid2('|','='), ASSOC_RIGHT, 8, 0, false}, - - { "&&", 2, opid2('&','&'), ASSOC_LEFT, 5, 0, true}, - { "||", 2, opid2('|','|'), ASSOC_LEFT, 5, 0, true}, - - { ",", 2, opid1(','), ASSOC_LEFT, 2, 0, false}, + {"(", 0, opid1('('), ASSOC_LEFT, 99, OP_PREFIX, false}, /* paren expression - non function call */ + + {".", 2, opid1('.'), ASSOC_LEFT, 15, 0, false}, + {"(", 0, opid1('('), ASSOC_LEFT, 15, 0, false}, /* function call */ + {"[", 2, opid1('['), ASSOC_LEFT, 15, 0, false}, /* array subscript */ + + {"!", 1, opid2('!', 'P'), ASSOC_RIGHT, 14, OP_PREFIX, true}, + {"+", 1, opid2('+', 'P'), ASSOC_RIGHT, 14, OP_PREFIX, false}, + {"-", 1, opid2('-', 'P'), ASSOC_RIGHT, 14, OP_PREFIX, true}, + + {"*", 2, opid1('*'), ASSOC_LEFT, 13, 0, true}, + {"/", 2, opid1('/'), ASSOC_LEFT, 13, 0, true}, + {"&", 2, opid1('&'), ASSOC_LEFT, 13, 0, true}, + {"|", 2, opid1('|'), ASSOC_LEFT, 13, 0, true}, + + {"+", 2, opid1('+'), ASSOC_LEFT, 12, 0, true}, + {"-", 2, opid1('-'), ASSOC_LEFT, 12, 0, true}, + + {"<", 2, opid1('<'), ASSOC_LEFT, 10, 0, false}, + {">", 2, opid1('>'), ASSOC_LEFT, 10, 0, false}, + {"<=", 2, opid2('<', '='), ASSOC_LEFT, 10, 0, false}, + {">=", 2, opid2('>', '='), ASSOC_LEFT, 10, 0, false}, + {"==", 2, opid2('=', '='), ASSOC_LEFT, 10, 0, true}, + {"!=", 2, opid2('!', '='), ASSOC_LEFT, 10, 0, true}, + + {"=", 2, opid1('='), ASSOC_RIGHT, 8, 0, false}, + {"+=", 2, opid2('+', '='), ASSOC_RIGHT, 8, 0, false}, + {"-=", 2, opid2('-', '='), ASSOC_RIGHT, 8, 0, false}, + {"*=", 2, opid2('*', '='), ASSOC_RIGHT, 8, 0, false}, + {"/=", 2, opid2('/', '='), ASSOC_RIGHT, 8, 0, false}, + {"%=", 2, opid2('%', '='), ASSOC_RIGHT, 8, 0, false}, + {"&=", 2, opid2('&', '='), ASSOC_RIGHT, 8, 0, false}, + {"|=", 2, opid2('|', '='), ASSOC_RIGHT, 8, 0, false}, + + {"&&", 2, opid2('&', '&'), ASSOC_LEFT, 5, 0, true}, + {"||", 2, opid2('|', '|'), ASSOC_LEFT, 5, 0, true}, + + {",", 2, opid1(','), ASSOC_LEFT, 2, 0, false}, }; extern const oper_info *operators; -extern size_t operator_count; +extern size_t operator_count; #endif diff --git a/parser.cpp b/parser.cpp index b2957d4..4f8939d 100644 --- a/parser.cpp +++ b/parser.cpp @@ -74,7 +74,7 @@ static bool parser_next(parser_t &parser) return true; } -#define parser_tokval(p) ((p).lex->tok.value) +#define parser_tokval(p) ((p).lex->tok.value.c_str()) #define parser_token(p) (&((p).lex->tok)) static ast_expression* parser_find_field(parser_t &parser, const char *name) { diff --git a/parser2.cpp b/parser2.cpp new file mode 100644 index 0000000..cbae3d2 --- /dev/null +++ b/parser2.cpp @@ -0,0 +1,992 @@ +#include "parser.h" + +#include + +// todo: field types +// todo: void() types +// todo: template traits for each rule to enable prediction instead of backtracking +// todo: commit to a rule if possible and disable backtracking +// todo: store memo on heap +// todo: parameterize expression parsing instead of separate rules + +#define RVALUE(it) ((void) 0, (it)) + +namespace result { + +#define OK() return Result::OK +#define ERR(msg) return Result((std::string("") + __PRETTY_FUNCTION__ + ": " + msg), ctx.tok) + + struct Result { + static Result OK; + + explicit Result(std::string error, Token t) : tok(t) { + auto tokname = std::string(TokenName(t)); + auto tokval = std::to_string(t); + this->error = std::move(error) + " -> " + tokname + " (" + tokval + ")"; + } + + std::string error; + Token tok; + + operator bool() const { + return Result::OK == *this; + } + + bool operator==(const Result &other) const { + return error == other.error; + } + }; + + Result Result::OK = Result("", Token::NONE); +} + +using Result = result::Result; + +/// lexer + +#define LTOK() RVALUE(ctx.lex.tok) +#define CTX() RVALUE(LTOK().ctx) + +#define SOURCELOC() \ + RVALUE((std::string(CTX().file) + ":" + std::to_string(CTX().line) + ":" + std::to_string(CTX().column)).c_str()) + +#define STRING() RVALUE((LTOK().value).c_str()) +#define DEBUG(msg) RVALUE(ctx.debug((std::string("*** `") + STRING() + "`: " + msg).c_str())) + +/// parser + +struct memo_t { + decltype(lex_file::tok) tok; + /// XXX: hack + decltype(lex_file::peek) peek; + size_t peekpos; + size_t line, column; + size_t idx; +}; + +struct ctx_t { + parser_t &parser; + lex_file &lex; + Token tok; + + explicit ctx_t(parser_t &parser) : parser(parser), lex(*parser.lex) { + tok = Token::NONE; + } + + memo_t memo() { + auto idx = lex.file ? ftell(lex.file) : lex.open_string_pos; + return memo_t{lex.tok, lex.peek, lex.peekpos, lex.line, lex.column, idx}; + } + + void memo(memo_t memo) { + lex.tok = memo.tok; + tok = memo.tok.ttype; + lex.peek = memo.peek; + lex.peekpos = memo.peekpos; + lex.line = memo.line; + lex.column = memo.column; + if (lex.file) { + fseek(lex.file, memo.idx, SEEK_SET); + } else if (lex.open_string) { + lex.open_string_pos = memo.idx; + } + } + + void next() { + tok = lex_do(parser.lex); + if (tok >= Token::ERROR) { + error("lex error"); + tok = Token::NONE; + } + } + + template + inline void error(const char *fmt, const Ts &...ts) { + return parseerror(parser, fmt, formatNormalize(ts)...); + } + + std::string indent = ""; + + void rule_enter(const char *rule) { + auto &ctx = *this; + debug((std::string(rule) + " : `" + STRING() + "`").c_str()); + indent += " "; + } + + void rule_leave(const char *rule, Result &ret) { + indent.resize(std::max(static_cast(0), indent.size() - 2)); + if (ret) { + debug((std::string(rule) + "::OK").c_str()); + } else { + debug((std::string(rule) + "::ERR").c_str()); + } + } + + void debug(const char *msg) { + printf("%s%s\n", indent.c_str(), msg); + } + +private: + static void parseerror(parser_t &parser, const char *fmt, ...) { + va_list ap; + va_start(ap, fmt); + vcompile_error(parser.lex->tok.ctx, fmt, ap); + va_end(ap); + } +}; + +// entrypoint + +static bool parser_compile(ctx_t &&ctx); + +namespace parse { + +#define RULE(rule) \ +static Result rule(ctx_t &ctx) { ctx.rule_enter(#rule); auto ret = do_##rule(ctx); ctx.rule_leave(#rule, ret); return ret; } \ +static Result do_##rule(ctx_t &ctx) + +#define TRY(...) do { \ + auto ret = __VA_ARGS__; \ + if (!ret) { \ + return ret; \ + } \ +} while (false) + +#define PEEK() RVALUE(ctx.tok) +#define ACCEPT(it) RVALUE(parse::accept(ctx, (it))) + + bool accept(ctx_t &ctx, Token t) { + if (PEEK() == t) { + ctx.next(); + return true; + } + return false; + } + +#define EXPECT(tok) EXPECT_2(tok, "unexpected symbol, " + TokenName(tok) + " was expected") +#define EXPECT_2(tok, msg) do { \ + if (!ACCEPT(tok)) ERR(msg); \ +} while (false) + + +#define ACCEPT_IDENT(it) (parse::accept_ident(ctx, (it))) + + bool accept_ident(ctx_t &ctx, const char *ident) { + if (PEEK() == Token::IDENT && strcmp(STRING(), ident) == 0) { + ctx.next(); + return true; + } + return false; + } +} + +using Rule = Result(ctx_t &ctx); + +#define BT() ERR("BT") + +/// rule[0] rule[1..n] +template +Result seq(ctx_t &ctx) { + auto list = {rules...}; + for (auto &rule : list) { + TRY(rule(ctx)); + } + OK(); +} + +/// rule? +#define OPT(...) ([&](ctx_t &) { \ + auto memo = ctx.memo(); \ + if (!__VA_ARGS__(ctx)) ctx.memo(memo); \ + OK(); \ +}) + +/// rule? +template +Result opt(ctx_t &ctx) { + return OPT(rule)(ctx); +} + +/// rule+ +#define CROSS(rule) ([&](ctx_t &) { \ + TRY(rule(ctx)); \ + for (;;) { \ + auto memo = ctx.memo(); \ + if (!rule(ctx)) { \ + ctx.memo(memo); \ + break; \ + } \ + } \ + OK(); \ +}) + +/// rule+ +template +Result cross(ctx_t &ctx) { + return CROSS(rule)(ctx); +} + +/// rule* == (rule+)? +#define STAR(rule) OPT(CROSS(rule)) + +/// rule* == (rule+)? +template +Result star(ctx_t &ctx) { + return STAR(rule)(ctx); +} + +/// rule (s rule)* +template +Result sep(ctx_t &ctx) { + return seq>>(ctx); +} + +/// rule[0] | rule[1..n] +#define ALT(...) ([&](ctx_t &) { \ + auto memo = ctx.memo(); \ + auto list = {__VA_ARGS__}; \ + for (auto &rule : list) { \ + auto ret = rule(ctx); \ + if (ret) \ + OK(); \ + ctx.memo(memo); \ + } \ + BT(); \ +}) + +/// rule[0] | rule[1..n] +template +Result alt(ctx_t &ctx) { + return ALT(rules...)(ctx); +} + +template +Result tok(ctx_t &ctx) { + EXPECT(t); + OK(); +} + +template +Result lit(ctx_t &ctx) { + static char str_[] = {chars..., 0}; + const char *str = str_; + if (PEEK() == t) { + const char *s = STRING(); + auto diff = strcmp(str, s); + ctx.next(); + if (!diff) OK(); + } + BT(); +} + +template +Result leftop(ctx_t &ctx) { + return sep(ctx); +} + +struct grammar { + + // declarations + + /// : translationUnit? EOF + RULE(compilationUnit) { + TRY(OPT(translationUnit)(ctx)); + TRY(tok(ctx)); + OK(); + } + + /// : externalDeclaration+ + RULE(translationUnit) { + TRY(CROSS(externalDeclaration)(ctx)); + OK(); + } + + /// : pragma + /// | functionDefinition + /// | declaration + /// | ';' + RULE(externalDeclaration) { + TRY(alt< + pragma, + functionDefinition, + declaration, + tok + >(ctx)); + OK(); + } + + RULE(pragma) { + EXPECT(Token::HASH); + if (ACCEPT_IDENT("pragma")) { + if (ACCEPT_IDENT("noref")) { + EXPECT(Token::INTCONST); + OK(); + } + ERR("unknown pragma '" + STRING() + "'"); + } + ERR("unknown pragma '" + STRING() + "'"); + } + + /// : declarationSpecifiers? declarator compoundStatement + RULE(functionDefinition) { + TRY(OPT(declarationSpecifiers)(ctx)); + TRY(declarator(ctx)); + TRY(compoundStatement(ctx)); + OK(); + } + + /// : declarationSpecifier+ + RULE(declarationSpecifiers) { + TRY(CROSS(declarationSpecifier)(ctx)); + OK(); + } + + /// : storageClassSpecifier + /// | typeSpecifier + /// | typeQualifier + /// | functionSpecifier + RULE(declarationSpecifier) { + TRY(alt< + storageClassSpecifier, + typeSpecifier, + typeQualifier, + functionSpecifier + >(ctx)); + OK(); + } + + /// : 'typedef' + /// | 'extern' + /// | 'static' + RULE(storageClassSpecifier) { + constexpr auto Typedef = lit; + constexpr auto Extern = lit; + constexpr auto Static = lit; + + TRY(alt< + Typedef, + Extern, + Static + >(ctx)); + OK(); + } + + /// : ('void' | 'char' | 'int' | 'float' | 'entity') + /// | enumSpecifier + /// | typedefName + RULE(typeSpecifier) { + constexpr auto Void = lit; + constexpr auto Char = lit; + constexpr auto Int = lit; + constexpr auto Float = lit; + constexpr auto Entity = lit; + + TRY(alt< + alt< + Void, + Char, + Int, + Float, + Entity + >, + enumSpecifier, + typedefName + >(ctx)); + OK(); + } + + /// : 'enum' '{' enumeratorList ','? '}' + RULE(enumSpecifier) { + constexpr auto Enum = lit; + + TRY(Enum(ctx)); + TRY(tok(ctx)); + TRY(enumeratorList(ctx)); + TRY(OPT(tok)(ctx)); + TRY(tok(ctx)); + OK(); + } + + /// : enumerator (',' enumerator)* + RULE(enumeratorList) { + TRY(sep>(ctx)); + OK(); + } + + /// : enumerationConstant ('=' constantExpression)? + RULE(enumerator) { + TRY(enumerationConstant(ctx)); + TRY(OPT(seq, constantExpression>)(ctx)); + OK(); + } + + /// : Identifier + RULE(enumerationConstant) { + TRY(tok(ctx)); + OK(); + } + + /// : Identifier + RULE(typedefName) { + TRY(tok(ctx)); + OK(); + } + + /// : 'const' + RULE(typeQualifier) { + constexpr auto Const = lit; + + TRY(Const(ctx)); + OK(); + } + + /// : '[[' .+ ']]' + /// todo + RULE(functionSpecifier) { + BT(); + } + + /// : Identifier ( + /// '[' assignmentExpression_15? ']' + /// | '(' parameterTypeList? ')' + /// )* + RULE(declarator) { + TRY(tok(ctx)); + TRY(star, opt, tok>, + seq, opt, tok> + >>(ctx)); + OK(); + } + + /// : ( + /// '[' assignmentExpression_15? ']' + /// | '(' parameterTypeList? ')' + /// )+ + RULE(abstractDeclarator) { + TRY(star, opt, tok>, + seq, opt, tok> + >>(ctx)); + OK(); + } + + /// : parameterList (',' parameterVarargs)? + /// | parameterVarargs + RULE(parameterTypeList) { + TRY(opt, tok>>>, + tok + >>(ctx)); + OK(); + } + + /// : parameterDeclaration (',' parameterDeclaration)* + RULE(parameterList) { + TRY(sep>(ctx)); + OK(); + } + + /// : declarationSpecifiers (declarator | abstractDeclarator?) + RULE(parameterDeclaration) { + TRY(seq> + >(ctx)); + OK(); + } + + /// : declarationSpecifiers initDeclaratorList? ';' + RULE(declaration) { + TRY(declarationSpecifiers(ctx)); + TRY(OPT(initDeclaratorList)(ctx)); + TRY(tok(ctx)); + OK(); + } + + /// : initDeclarator (',' initDeclarator)* + RULE(initDeclaratorList) { + TRY(sep>(ctx)); + OK(); + } + + /// : declarator ('=' initializer)? + RULE(initDeclarator) { + TRY(seq, initializer>>>(ctx)); + OK(); + } + + /// : assignmentExpression_15 + /// | '{' initializerList ','? '}' + RULE(initializer) { + TRY(alt< + assignmentExpression_15, + seq, initializerList, opt>, tok> + >(ctx)); + OK(); + } + + /// : initializer (',' initializer)* + RULE(initializerList) { + TRY(sep>(ctx)); + OK(); + } + + // statements + + /// : labeledStatement + /// | compoundStatement + /// | expressionStatement + /// | selectionStatement + /// | iterationStatement + /// | jumpStatement + RULE(statement) { + TRY(alt< + labeledStatement, + compoundStatement, + expressionStatement, + selectionStatement, + iterationStatement, + jumpStatement + >(ctx)); + OK(); + } + + /// : ( + /// Identifier + /// | 'case' constantExpression + /// | 'default' + /// ) ':' statement + RULE(labeledStatement) { + constexpr auto Case = lit; + constexpr auto Default = lit; + + TRY(seq, + seq, + Default + >, tok, statement>(ctx)); + OK(); + } + + /// : '{' blockItem* '}' + RULE(compoundStatement) { + TRY(tok(ctx)); + TRY(STAR(blockItem)(ctx)); + TRY(tok(ctx)); + OK(); + } + + /// : declaration + /// | statement + RULE(blockItem) { + TRY(alt(ctx)); + OK(); + } + + /// : expression? ';' + RULE(expressionStatement) { + TRY(OPT(expression)(ctx)); + TRY(tok(ctx)); + OK(); + } + + /// : 'if' '(' expression ')' statement ('else' statement)? + /// | 'switch' '(' expression ')' statement + RULE(selectionStatement) { + constexpr auto If = lit; + constexpr auto Else = lit; + constexpr auto Switch = lit; + + TRY(alt< + seq, expression, tok, statement, opt>>, + seq, expression, tok, statement> + >(ctx)); + OK(); + } + + /// : 'while' '(' expression ')' statement + /// | 'do' statement 'while' '(' expression ')' ';' + /// | 'for' '(' forCondition ')' statement + RULE(iterationStatement) { + constexpr auto While = lit; + constexpr auto Do = lit; + constexpr auto For = lit; + + TRY(alt< + seq, expression, tok, statement>, + seq, expression, tok, tok>, + seq, forCondition, tok, statement> + >(ctx)); + OK(); + } + + /// : (forDeclaration | expression)? ';' forExpression? ';' forExpression? + RULE(forCondition) { + /// : declarationSpecifiers initDeclaratorList? + constexpr auto forDeclaration = seq>; + + TRY(opt>(ctx)); + TRY(tok(ctx)); + TRY(opt(ctx)); + TRY(tok(ctx)); + TRY(opt(ctx)); + OK(); + } + + /// : 'goto' Identifier ';' + /// | 'continue' ';' + /// | 'break' ';' + /// | 'return' expression? ';' + RULE(jumpStatement) { + constexpr auto Goto = lit; + constexpr auto Continue = lit; + constexpr auto Break = lit; + constexpr auto Return = lit; + + TRY(alt< + seq, tok>, + seq>, + seq>, + seq, tok> + >(ctx)); + OK(); + } + + // expressions + // left associative unless specified otherwise + + RULE(expression) { + return commaExpression_16(ctx); + } + + /// assignmentExpression_15 (',' assignmentExpression_15)* + RULE(commaExpression_16) { + TRY(leftop< + assignmentExpression_15, + tok + >(ctx)); + OK(); + } + + RULE(constantExpression) { + return conditionalExpression(ctx); + } + + /// : postfixExpression_2 assignmentOperator assignmentExpression_15 + /// | conditionalExpression + /// right associative + RULE(assignmentExpression_15) { + constexpr auto assignmentOperator = alt< + tok, + seq, + tok, + tok, + tok, + tok, + seq, tok>, + seq, tok>, + tok, + tok, + tok + >, tok> + >; + TRY(alt< + seq, + conditionalExpression + >(ctx)); + OK(); + } + + /// : logicalOrExpression_14 ('?' expression ':' conditionalExpression)? + /// right associative + RULE(conditionalExpression) { + TRY(logicalOrExpression_14(ctx)); + TRY(OPT(seq, expression, tok, conditionalExpression>)(ctx)); + OK(); + } + + /// : logicalAndExpression_13 ('||' logicalAndExpression_13)* + RULE(logicalOrExpression_14) { + TRY(leftop< + logicalAndExpression_13, + seq, tok> + >(ctx)); + OK(); + } + + /// : inclusiveOrExpression_12 ('&&' inclusiveOrExpression_12)* + RULE(logicalAndExpression_13) { + TRY(leftop< + inclusiveOrExpression_12, + seq, tok> + >(ctx)); + OK(); + } + + /// : exclusiveOrExpression_11 ('|' exclusiveOrExpression_11)* + RULE(inclusiveOrExpression_12) { + TRY(leftop< + exclusiveOrExpression_11, + tok + >(ctx)); + OK(); + } + + /// : andExpression_10 ('^' andExpression_10)* + RULE(exclusiveOrExpression_11) { + TRY(leftop< + andExpression_10, + tok + >(ctx)); + OK(); + } + + /// : equalityExpression_9 ('&' equalityExpression_9)* + RULE(andExpression_10) { + TRY(leftop< + equalityExpression_9, + tok + >(ctx)); + OK(); + } + + /// : relationalExpression_8 (('==' | '!=') relationalExpression_8)* + RULE(equalityExpression_9) { + TRY(leftop< + relationalExpression_8, + seq, + tok + >, tok> + >(ctx)); + OK(); + } + + /// : shiftExpression_7 (('<' | '<=' | '>' | '>=') shiftExpression_7)* + RULE(relationalExpression_8) { + TRY(leftop< + shiftExpression_7, + alt< + seq, opt>>, + seq, opt>> + > + >(ctx)); + OK(); + } + + /// : additiveExpression_6 (('<<' | '>>') additiveExpression_6)* + RULE(shiftExpression_7) { + TRY(leftop< + additiveExpression_6, + alt< + seq, tok>, + seq, tok> + > + >(ctx)); + OK(); + } + + /// : multiplicativeExpression_5 (('+' | '-') multiplicativeExpression_5)* + RULE(additiveExpression_6) { + TRY(leftop< + multiplicativeExpression_5, + alt< + tok, + tok + > + >(ctx)); + OK(); + } + + /// : castExpression_3 (('*' | '/' | '%') castExpression_3)* + RULE(multiplicativeExpression_5) { + TRY(leftop< + castExpression_3, + alt< + tok, + tok, + tok + > + >(ctx)); + OK(); + } + + /// : unaryExpression_3 + /// | '(' typeName ')' castExpression_3 + /// right associative + RULE(castExpression_3) { + // no casts yet + return unaryExpression_3(ctx); + } + + /// : postfixExpression_2 + /// | ('++' | '--') unaryExpression_3 + /// | ('+' | '-' | '~' | '!') castExpression_3 + /// right associative + RULE(unaryExpression_3) { + TRY(alt< + postfixExpression_2, + seq, tok>, + seq, tok> + >, unaryExpression_3>, + seq, + tok, + tok, + tok + >, castExpression_3> + >(ctx)); + OK(); + } + + /// : primaryExpression ( + /// | '[' expression ']' + /// | '(' expression? ')' + /// | '.' Identifier # static field + /// | '.' '(' expression ')' # computed field + /// | ('++' | '--') + /// | )* + RULE(postfixExpression_2) { + TRY(primaryExpression(ctx)); + TRY(star, expression, tok>, + seq, opt, tok>, + seq, tok>, + seq, tok, expression, tok>, + seq, tok>, + seq, tok> + >> + >>(ctx)); + OK(); + } + + /// : Identifier + /// | Constant + /// | StringLiteral+ + /// | '...' '(' expression ',' typeSpecifier ')' # absolute va_arg + /// | '(' expression ')' + RULE(primaryExpression) { + TRY(alt< + tok, + tok, + tok, + tok, + tok, + cross>, + seq, tok, expression, tok, typeSpecifier, tok>, + seq, expression, tok> + >(ctx)); + OK(); + } +}; + +static bool parser_compile(ctx_t &&ctx) { + ctx.parser.lex->flags.noops = true; // don't parse operators + ctx.next(); + auto result = grammar::compilationUnit(ctx); + if (result) { + return true; + } + ctx.error(result.error.c_str()); + return false; +} + +// utils + +const int PARSER_HT_SIZE = 512; +const int TYPEDEF_HT_SIZE = 512; + +parser_t *parser_create() { + auto parser = new parser_t; + for (size_t i = 0; i < operator_count; ++i) { + if (operators[i].id == opid1('=')) { + parser->assign_op = &operators[i]; + break; + } + } + if (!parser->assign_op) { + con_err("internal error: initializing parser: failed to find assign operator\n"); + delete parser; + return nullptr; + } + + return parser; +} + +parser_t::parser_t() + : lex(nullptr), tok(Token::NONE), ast_cleaned(false), translated(0), crc_globals(0), crc_fields(0), + function(nullptr), + aliases(util_htnew(PARSER_HT_SIZE)), htfields(util_htnew(PARSER_HT_SIZE)), + htglobals(util_htnew(PARSER_HT_SIZE)), assign_op(nullptr), noref(false), max_param_count(1), m_fold(*this), + m_intrin(*this) { + variables.push_back(htfields); + variables.push_back(htglobals); + typedefs.push_back(util_htnew(TYPEDEF_HT_SIZE)); + _blocktypedefs.push_back(0); + + lex_ctx_t empty_ctx; + empty_ctx.file = ""; + empty_ctx.line = 0; + empty_ctx.column = 0; + nil = new ast_value(empty_ctx, "nil", TYPE_NIL); + nil->m_cvq = CV_CONST; + if (OPTS_FLAG(UNTYPED_NIL)) + util_htset(htglobals, "nil", (void *) nil); + + const_vec[0] = new ast_value(empty_ctx, "", TYPE_NOEXPR); + const_vec[1] = new ast_value(empty_ctx, "", TYPE_NOEXPR); + const_vec[2] = new ast_value(empty_ctx, "", TYPE_NOEXPR); + + if (OPTS_OPTION_BOOL(OPTION_ADD_INFO)) { + reserved_version = new ast_value(empty_ctx, "reserved:version", TYPE_STRING); + reserved_version->m_cvq = CV_CONST; + reserved_version->m_hasvalue = true; + reserved_version->m_flags |= AST_FLAG_INCLUDE_DEF; + reserved_version->m_flags |= AST_FLAG_NOREF; + reserved_version->m_constval.vstring = util_strdup(GMQCC_FULL_VERSION_STRING); + } else { + reserved_version = nullptr; + } +} + +parser_t::~parser_t() { + remove_ast(); +} + +void parser_t::remove_ast() { + +} + +bool parser_compile_string(parser_t &parser, const char *name, const char *str, size_t len) { + parser.lex = lex_open_string(str, len, name); + if (!parser.lex) { + con_err("failed to create lexer for string \"%s\"\n", name); + return false; + } + return parser_compile(ctx_t(parser)); +} + +bool parser_compile_file(parser_t &parser, const char *filename) { + parser.lex = lex_open(filename); + if (!parser.lex) { + con_err("failed to open file \"%s\"\n", filename); + return false; + } + return parser_compile(ctx_t(parser)); +} + +ast_expression *parser_find_global(parser_t &parser, const char *name) { + auto ctx = ctx_t(parser); + ast_expression *var = (ast_expression *) util_htget(parser.aliases, STRING()); + if (var) + return var; + return (ast_expression *) util_htget(parser.htglobals, name); +} + +bool parser_finish(parser_t &parser, const char *output) { + return true; +} -- 2.39.2