From 9b6598f049c425e918cd7587016105abbc808bda Mon Sep 17 00:00:00 2001 From: Wolfgang Bumiller Date: Mon, 16 Jul 2012 13:59:10 +0200 Subject: [PATCH] Copying my old lexer --- Makefile | 3 +- gmqcc.h | 91 +------- lexer.c | 632 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ lexer.h | 207 ++++++++++++++++++ main.c | 15 +- parser.c | 7 + 6 files changed, 855 insertions(+), 100 deletions(-) create mode 100644 lexer.c create mode 100644 lexer.h create mode 100644 parser.c diff --git a/Makefile b/Makefile index feaf611..9101d2b 100644 --- a/Makefile +++ b/Makefile @@ -17,14 +17,13 @@ ifeq ($(CC), clang) endif OBJ = \ - error.o \ util.o \ code.o \ ast.o \ ir.o OBJ_A = test/ast-test.o OBJ_I = test/ir-test.o -OBJ_C = main.o +OBJ_C = main.o lexer.o parser.o #default is compiler only default: gmqcc diff --git a/gmqcc.h b/gmqcc.h index 85cb9ae..e5d37af 100644 --- a/gmqcc.h +++ b/gmqcc.h @@ -179,95 +179,6 @@ typedef char int64_size_is_correct [sizeof(int64_t) == 8?1:-1]; typedef char uintptr_size_is_correct[sizeof(intptr_t) == sizeof(int*)?1:-1]; typedef char intptr_size_is_correct [sizeof(uintptr_t)== sizeof(int*)?1:-1]; -/*===================================================================*/ -/*============================ lex.c ================================*/ -/*===================================================================*/ -typedef struct lex_file_t { - FILE *file; /* file handler */ - char *name; /* name of file */ - char peek [5]; - char lastok[8192]; - - int last; /* last token */ - int current; /* current token */ - int length; /* bytes left to parse */ - int size; /* never changes (size of file) */ - int line; /* what line are we on? */ -} lex_file; - -/* - * It's important that this table never exceed 32 keywords, the ascii - * table starts at 33 (and we don't want conflicts) - */ -enum { - TOKEN_DO , - TOKEN_ELSE , - TOKEN_IF , - TOKEN_WHILE , - TOKEN_BREAK , - TOKEN_CONTINUE , - TOKEN_RETURN , - TOKEN_GOTO , - TOKEN_FOR , /* extension */ - TOKEN_TYPEDEF , /* extension */ - - /* ensure the token types are out of the */ - /* bounds of anyothers that may conflict. */ - TOKEN_FLOAT = 110, - TOKEN_VECTOR , - TOKEN_STRING , - TOKEN_ENTITY , - TOKEN_VOID -}; - -/* - * Lexer state constants, these are numbers for where exactly in - * the lexing the lexer is at. Or where it decided to stop if a lexer - * error occurs. These numbers must be > where the ascii-table ends - * and > the last type token which is TOKEN_VOID - */ -enum { - LEX_COMMENT = 1128, - LEX_CHRLIT , - LEX_STRLIT , - LEX_IDENT -}; - -int lex_token (lex_file *); -void lex_reset (lex_file *); -void lex_close (lex_file *); -void lex_parse (lex_file *); -lex_file *lex_include(lex_file *, const char *); -void lex_init (const char *, lex_file **); - -/*===================================================================*/ -/*========================== error.c ================================*/ -/*===================================================================*/ -#define ERROR_LEX (SHRT_MAX+0) -#define ERROR_PARSE (SHRT_MAX+1) -#define ERROR_INTERNAL (SHRT_MAX+2) -#define ERROR_COMPILER (SHRT_MAX+3) -#define ERROR_PREPRO (SHRT_MAX+4) -int error(lex_file *, int, const char *, ...); - -/*===================================================================*/ -/*========================== parse.c ================================*/ -/*===================================================================*/ -int parse_gen(lex_file *); - -/*===================================================================*/ -/*========================== typedef.c ==============================*/ -/*===================================================================*/ -typedef struct typedef_node_t { - char *name; -} typedef_node; - -void typedef_init(); -void typedef_clear(); -typedef_node *typedef_find(const char *); -int typedef_add (lex_file *file, const char *, const char *); - - /*===================================================================*/ /*=========================== util.c ================================*/ /*===================================================================*/ @@ -364,7 +275,7 @@ enum { TYPE_FIELD , TYPE_FUNCTION , TYPE_POINTER , - /* TYPE_INTEGER , */ + TYPE_INTEGER , TYPE_QUATERNION , TYPE_MATRIX , TYPE_VARIANT , diff --git a/lexer.c b/lexer.c new file mode 100644 index 0000000..867fb94 --- /dev/null +++ b/lexer.c @@ -0,0 +1,632 @@ +#include +#include +#include +#include + +#include "gmqcc.h" +#include "lexer.h" + +MEM_VEC_FUNCTIONS(token, char, value) + +void lexerror(lex_file *lex, const char *fmt, ...) +{ + va_list ap; + + if (lex) + printf("error %s:%lu: ", lex->name, (unsigned long)lex->sline); + else + printf("error: "); + + va_start(ap, fmt); + vprintf(fmt, ap); + va_end(ap); + + printf("\n"); +} + +token* token_new() +{ + token *tok = (token*)mem_a(sizeof(token)); + if (!tok) + return NULL; + memset(tok, 0, sizeof(*tok)); + return tok; +} + +void token_delete(token *self) +{ + if (self->next && self->next->prev == self) + self->next->prev = self->prev; + if (self->prev && self->prev->next == self) + self->prev->next = self->next; + MEM_VECTOR_CLEAR(self, value); + mem_d(self); +} + +token* token_copy(const token *cp) +{ + token* self = token_new(); + if (!self) + return NULL; + /* copy the value */ + self->value_alloc = cp->value_count + 1; + self->value_count = cp->value_count; + self->value = (char*)mem_a(self->value_alloc); + if (!self->value) { + mem_d(self); + return NULL; + } + memcpy(self->value, cp->value, cp->value_count); + self->value[self->value_alloc-1] = 0; + + /* rest */ + self->ctx = cp->ctx; + self->ttype = cp->ttype; + memcpy(&self->constval, &cp->constval, sizeof(self->constval)); + return self; +} + +void token_delete_all(token *t) +{ + token *n; + + do { + n = t->next; + token_delete(t); + t = n; + } while(t); +} + +token* token_copy_all(const token *cp) +{ + token *cur; + token *out; + + out = cur = token_copy(cp); + if (!out) + return NULL; + + while (cp->next) { + cp = cp->next; + cur->next = token_copy(cp); + if (!cur->next) { + token_delete_all(out); + return NULL; + } + cur->next->prev = cur; + cur = cur->next; + } + + return out; +} + +lex_file* lex_open(const char *file) +{ + lex_file *lex; + FILE *in = fopen(file, "rb"); + + if (!in) { + lexerror(NULL, "open failed: '%s'\n", file); + return NULL; + } + + lex = (lex_file*)mem_a(sizeof(*lex)); + if (!lex) { + fclose(in); + lexerror(NULL, "out of memory\n"); + return NULL; + } + + memset(lex, 0, sizeof(*lex)); + + lex->file = in; + lex->name = util_strdup(file); + lex->line = 1; /* we start counting at 1 */ + + lex->peekpos = 0; + + return lex; +} + +void lex_close(lex_file *lex) +{ + if (lex->file) + fclose(lex->file); + if (lex->tok) + token_delete(lex->tok); + mem_d(lex->name); + mem_d(lex); +} + +/* Get or put-back data + * The following to functions do NOT understand what kind of data they + * are working on. + * The are merely wrapping get/put in order to count line numbers. + */ +static int lex_getch(lex_file *lex) +{ + int ch; + + if (lex->peekpos) { + lex->peekpos--; + if (lex->peek[lex->peekpos] == '\n') + lex->line++; + return lex->peek[lex->peekpos]; + } + + ch = fgetc(lex->file); + if (ch == '\n') + lex->line++; + return ch; +} + +static void lex_ungetch(lex_file *lex, int ch) +{ + lex->peek[lex->peekpos++] = ch; + if (ch == '\n') + lex->line--; +} + +/* classify characters + * some additions to the is*() functions of ctype.h + */ + +/* Idents are alphanumberic, but they start with alpha or _ */ +static bool isident_start(int ch) +{ + return isalpha(ch) || ch == '_'; +} + +static bool isident(int ch) +{ + return isident_start(ch) || isdigit(ch); +} + +/* isxdigit_only is used when we already know it's not a digit + * and want to see if it's a hex digit anyway. + */ +static bool isxdigit_only(int ch) +{ + return (ch >= 'a' && ch <= 'f') || (ch >= 'A' && ch <= 'F'); +} + +/* Skip whitespace and comments and return the first + * non-white character. + * As this makes use of the above getch() ungetch() functions, + * we don't need to care at all about line numbering anymore. + * + * In theory, this function should only be used at the beginning + * of lexing, or when we *know* the next character is part of the token. + * Otherwise, if the parser throws an error, the linenumber may not be + * the line of the error, but the line of the next token AFTER the error. + * + * This is currently only problematic when using c-like string-continuation, + * since comments and whitespaces are allowed between 2 such strings. + * Example: +printf( "line one\n" +// A comment + "A continuation of the previous string" +// This line is skipped + , foo); + + * In this case, if the parse decides it didn't actually want a string, + * and uses lex->line to print an error, it will show the ', foo);' line's + * linenumber. + * + * On the other hand, the parser is supposed to remember the line of the next + * token's beginning. In this case we would want skipwhite() to be called + * AFTER reading a token, so that the parser, before reading the NEXT token, + * doesn't store teh *comment's* linenumber, but the actual token's linenumber. + * + * THIS SOLUTION + * here is to store the line of the first character after skipping + * the initial whitespace in lex->sline, this happens in lex_do. + */ +static int lex_skipwhite(lex_file *lex) +{ + int ch = 0; + + do + { + ch = lex_getch(lex); + while (ch != EOF && isspace(ch)) ch = lex_getch(lex); + + if (ch == '/') { + ch = lex_getch(lex); + if (ch == '/') + { + /* one line comment */ + ch = lex_getch(lex); + + /* check for special: '/', '/', '*', '/' */ + if (ch == '*') { + ch = lex_getch(lex); + if (ch == '/') { + ch = ' '; + continue; + } + } + + while (ch != EOF && ch != '\n') { + ch = lex_getch(lex); + } + continue; + } + if (ch == '*') + { + /* multiline comment */ + while (ch != EOF) + { + ch = lex_getch(lex); + if (ch == '*') { + ch = lex_getch(lex); + if (ch == '/') { + ch = lex_getch(lex); + break; + } + } + } + if (ch == '/') /* allow *//* direct following comment */ + { + lex_ungetch(lex, ch); + ch = ' '; /* cause TRUE in the isspace check */ + } + continue; + } + /* Otherwise roll back to the slash and break out of the loop */ + lex_ungetch(lex, ch); + ch = '/'; + break; + } + } while (ch != EOF && isspace(ch)); + + return ch; +} + +/* Append a character to the token buffer */ +static bool GMQCC_WARN lex_tokench(lex_file *lex, int ch) +{ + if (!token_value_add(lex->tok, ch)) { + lexerror(lex, "out of memory"); + return false; + } + return true; +} + +/* Append a trailing null-byte */ +static bool GMQCC_WARN lex_endtoken(lex_file *lex) +{ + if (!token_value_add(lex->tok, 0)) { + lexerror(lex, "out of memory"); + return false; + } + lex->tok->value_count--; + return true; +} + +/* Get a token */ +static bool GMQCC_WARN lex_finish_ident(lex_file *lex) +{ + int ch; + + ch = lex_getch(lex); + while (ch != EOF && isident(ch)) + { + if (!lex_tokench(lex, ch)) + return (lex->tok->ttype = TOKEN_FATAL); + ch = lex_getch(lex); + } + + /* last ch was not an ident ch: */ + lex_ungetch(lex, ch); + + return true; +} + +static int GMQCC_WARN lex_finish_string(lex_file *lex, int quote) +{ + int ch = 0; + + while (ch != EOF) + { + ch = lex_getch(lex); + if (ch == quote) + return TOKEN_STRINGCONST; + + if (!lex_tokench(lex, ch)) + return (lex->tok->ttype = TOKEN_FATAL); + + /* as lexer we only care about \" to not terminate the string prematurely */ + if (ch == '\\') { + ch = lex_getch(lex); + if (ch == EOF) { + lexerror(lex, "unexpected end of file"); + lex_ungetch(lex, EOF); /* next token to be TOKEN_EOF */ + return (lex->tok->ttype = TOKEN_ERROR); + } + /* so we just add the next character no matter what it actually is */ + if (!lex_tokench(lex, ch)) + return (lex->tok->ttype = TOKEN_FATAL); + } + } + lexerror(lex, "unexpected end of file within string constant"); + lex_ungetch(lex, EOF); /* next token to be TOKEN_EOF */ + return (lex->tok->ttype = TOKEN_ERROR); +} + +static int GMQCC_WARN lex_finish_digit(lex_file *lex, int lastch) +{ + bool ishex = false; + + int ch = lastch; + + /* parse a number... */ + lex->tok->ttype = TOKEN_INTCONST; + + if (!lex_tokench(lex, ch)) + return (lex->tok->ttype = TOKEN_FATAL); + + ch = lex_getch(lex); + if (ch != '.' && !isdigit(ch)) + { + if (lastch != '0' || ch != 'x') + { + /* end of the number or EOF */ + lex_ungetch(lex, ch); + if (!lex_endtoken(lex)) + return (lex->tok->ttype = TOKEN_FATAL); + + lex->tok->constval.i = lastch - '0'; + return lex->tok->ttype; + } + + ishex = true; + } + + /* EOF would have been caught above */ + + if (ch != '.') + { + if (!lex_tokench(lex, ch)) + return (lex->tok->ttype = TOKEN_FATAL); + ch = lex_getch(lex); + while (isdigit(ch) || (ishex && isxdigit_only(ch))) + { + if (!lex_tokench(lex, ch)) + return (lex->tok->ttype = TOKEN_FATAL); + ch = lex_getch(lex); + } + } + /* NOT else, '.' can come from above as well */ + if (ch == '.' && !ishex) + { + /* Allow floating comma in non-hex mode */ + lex->tok->ttype = TOKEN_FLOATCONST; + if (!lex_tokench(lex, ch)) + return (lex->tok->ttype = TOKEN_FATAL); + + /* continue digits-only */ + ch = lex_getch(lex); + while (isdigit(ch)) + { + if (!lex_tokench(lex, ch)) + return (lex->tok->ttype = TOKEN_FATAL); + ch = lex_getch(lex); + } + } + /* put back the last character */ + /* but do not put back the trailing 'f' or a float */ + if (lex->tok->ttype == TOKEN_FLOATCONST && ch == 'f') + ch = lex_getch(lex); + + /* generally we don't want words to follow numbers: */ + if (isident(ch)) { + lexerror(lex, "unexpected trailing characters after number"); + return (lex->tok->ttype = TOKEN_ERROR); + } + lex_ungetch(lex, ch); + + if (!lex_endtoken(lex)) + return (lex->tok->ttype = TOKEN_FATAL); + if (lex->tok->ttype == TOKEN_FLOATCONST) + lex->tok->constval.f = strtod(lex->tok->value, NULL); + else + lex->tok->constval.i = strtol(lex->tok->value, NULL, 0); + return lex->tok->ttype; +} + +int lex_do(lex_file *lex) +{ + int ch, nextch; + + if (lex->tok) + token_delete(lex->tok); + lex->tok = token_new(); + if (!lex->tok) + return TOKEN_FATAL; + + ch = lex_skipwhite(lex); + lex->sline = lex->line; + lex->tok->ctx.line = lex->sline; + lex->tok->ctx.file = lex->name; + + if (ch == EOF) + return (lex->tok->ttype = TOKEN_EOF); + + /* single-character tokens */ + switch (ch) + { + case ';': + case '(': + case ')': + case '{': + case '}': + case '[': + case ']': + + case ',': + + return (lex->tok->ttype = ch); + default: + break; + } + + if (lex->flags.noops) + { + /* Detect characters early which are normally + * operators OR PART of an operator. + */ + switch (ch) + { + case '+': + case '-': + case '*': + case '/': + case '<': + case '>': + case '=': + case '&': + case '|': + case '^': + case '~': + return ch; + default: + break; + } + } + + if (ch == '+' || ch == '-' || /* ++, --, +=, -= and -> as well! */ + ch == '>' || ch == '<' || /* <<, >>, <=, >= */ + ch == '=' || /* == */ + ch == '&' || ch == '|') /* &&, ||, &=, |= */ + { + if (!lex_tokench(lex, ch)) + return (lex->tok->ttype = TOKEN_FATAL); + + nextch = lex_getch(lex); + if (nextch == ch || nextch == '=') { + if (!lex_tokench(lex, nextch)) + return (lex->tok->ttype = TOKEN_FATAL); + } else if (ch == '-' && nextch == '>') { + if (!lex_tokench(lex, nextch)) + return (lex->tok->ttype = TOKEN_FATAL); + } else + lex_ungetch(lex, nextch); + + if (!lex_endtoken(lex)) + return (lex->tok->ttype = TOKEN_FATAL); + return (lex->tok->ttype = TOKEN_OPERATOR); + } + + if (ch == '^' || ch == '~' || ch == '!') + { + if (!lex_tokench(lex, ch) || + !lex_endtoken(lex)) + { + return (lex->tok->ttype = TOKEN_FATAL); + } + return (lex->tok->ttype = TOKEN_OPERATOR); + } + + if (ch == '*' || ch == '/') /* *=, /= */ + { + if (!lex_tokench(lex, ch)) + return (lex->tok->ttype = TOKEN_FATAL); + + nextch = lex_getch(lex); + if (nextch == '=') { + if (!lex_tokench(lex, nextch)) + return (lex->tok->ttype = TOKEN_FATAL); + } else + lex_ungetch(lex, nextch); + + if (!lex_endtoken(lex)) + return (lex->tok->ttype = TOKEN_FATAL); + return (lex->tok->ttype = TOKEN_OPERATOR); + } + + if (isident_start(ch)) + { + const char *v; + if (!lex_tokench(lex, ch)) + return (lex->tok->ttype = TOKEN_FATAL); + if (!lex_finish_ident(lex)) { + /* error? */ + return (lex->tok->ttype = TOKEN_ERROR); + } + if (!lex_endtoken(lex)) + return (lex->tok->ttype = TOKEN_FATAL); + lex->tok->ttype = TOKEN_IDENT; + + v = lex->tok->value; + if (!strcmp(v, "void") || + !strcmp(v, "int") || + !strcmp(v, "float") || + !strcmp(v, "vector") ) + { + lex->tok->ttype = TOKEN_TYPENAME; + switch (v[1]) { + case 'o': lex->tok->constval.t = TYPE_VOID; break; + case 'n': lex->tok->constval.t = TYPE_INTEGER; break; + case 'l': lex->tok->constval.t = TYPE_FLOAT; break; + case 'e': lex->tok->constval.t = TYPE_VECTOR; break; + } + } + else if (!strcmp(v, "for") || + !strcmp(v, "while") || + !strcmp(v, "do")) + lex->tok->ttype = TOKEN_KEYWORD; + + return lex->tok->ttype; + } + + if (ch == '"') + { + lex->tok->ttype = lex_finish_string(lex, '"'); + while (lex->tok->ttype == TOKEN_STRINGCONST) + { + /* Allow c style "string" "continuation" */ + ch = lex_skipwhite(lex); + if (ch != '"') { + lex_ungetch(lex, ch); + break; + } + + lex->tok->ttype = lex_finish_string(lex, '"'); + } + if (!lex_endtoken(lex)) + return (lex->tok->ttype = TOKEN_FATAL); + return lex->tok->ttype; + } + + if (ch == '\'') + { + /* we parse character constants like string, + * but return TOKEN_CHARCONST, or a vector type if it fits... + * Likewise actual unescaping has to be done by the parser. + * The difference is we don't allow 'char' 'continuation'. + */ + lex->tok->ttype = lex_finish_string(lex, '\''); + if (!lex_endtoken(lex)) + return (lex->tok->ttype = TOKEN_FATAL); + + /* It's a vector if we can successfully scan 3 floats */ + if (sscanf(lex->tok->value, " %f %f %f ", &lex->tok->constval.v.x, &lex->tok->constval.v.y, &lex->tok->constval.v.z) == 3) + { + lex->tok->ttype = TOKEN_VECTORCONST; + } + + return lex->tok->ttype; + } + + if (isdigit(ch)) + { + lex->tok->ttype = lex_finish_digit(lex, ch); + if (!lex_endtoken(lex)) + return (lex->tok->ttype = TOKEN_FATAL); + return lex->tok->ttype; + } + + lexerror(lex, "unknown token"); + return (lex->tok->ttype = TOKEN_ERROR); +} diff --git a/lexer.h b/lexer.h new file mode 100644 index 0000000..f49b8ff --- /dev/null +++ b/lexer.h @@ -0,0 +1,207 @@ +#ifndef GMQCC_LEXER_HDR_ +#define GMQCC_LEXER_HDR_ + +typedef struct token_s token; + +#include "ast.h" + +struct token_s { + int ttype; + + MEM_VECTOR_MAKE(char, value); + + union { + vector v; + int i; + double f; + int t; /* type */ + } constval; + + struct token_s *next; + struct token_s *prev; + + lex_ctx ctx; +}; + +token* token_new(); +void token_delete(token*); +token* token_copy(const token *cp); +void token_delete_all(token *t); +token* token_copy_all(const token *cp); + +/* Lexer + * + */ +enum { + /* Other tokens which we can return: */ + TOKEN_NONE = 0, + TOKEN_START = 128, + + TOKEN_IDENT, + + TOKEN_TYPENAME, + + TOKEN_OPERATOR, + + TOKEN_KEYWORD, /* loop */ + + TOKEN_STRINGCONST, /* not the typename but an actual "string" */ + TOKEN_CHARCONST, + TOKEN_VECTORCONST, + TOKEN_INTCONST, + TOKEN_FLOATCONST, + + TOKEN_EOF, + + /* We use '< TOKEN_ERROR', so TOKEN_FATAL must come after it and any + * other error related tokens as well + */ + TOKEN_ERROR, + TOKEN_FATAL /* internal error, eg out of memory */ +}; + +static const char *_tokennames[] = { + "TOKEN_START", + "TOKEN_IDENT", + "TOKEN_TYPENAME", + "TOKEN_OPERATOR", + "TOKEN_KEYWORD", + "TOKEN_STRINGCONST", + "TOKEN_CHARCONST", + "TOKEN_VECTORCONST", + "TOKEN_INTCONST", + "TOKEN_FLOATCONST", + "TOKEN_EOF", + "TOKEN_ERROR", + "TOKEN_FATAL", +}; +typedef int +_all_tokennames_added_[ + ((TOKEN_FATAL - TOKEN_START + 1) == + (sizeof(_tokennames)/sizeof(_tokennames[0]))) + ? 1 : -1]; + +typedef struct { + FILE *file; + char *name; + size_t line; + size_t sline; /* line at the start of a token */ + + char peek[256]; + size_t peekpos; + + token *tok; + + struct { + bool noops; + } flags; +} lex_file; + +MEM_VECTOR_PROTO(lex_file, char, token); + +lex_file* lex_open (const char *file); +void lex_close(lex_file *lex); +int lex_do (lex_file *lex); + +/* Parser + * + */ + +enum { + ASSOC_LEFT, + ASSOC_RIGHT +}; + +#define OP_SUFFIX 1 +#define OP_PREFIX 2 + +typedef struct { + const char *op; + unsigned int assoc; + unsigned int prec; + unsigned int flags; +} oper_info; + +static const oper_info operators[] = { + { "++", ASSOC_LEFT, 16, OP_SUFFIX}, + { "--", ASSOC_LEFT, 16, OP_SUFFIX}, + + { ".", ASSOC_LEFT, 15, 0 }, + + { "!", ASSOC_RIGHT, 14, 0 }, + { "~", ASSOC_RIGHT, 14, 0 }, + { "+", ASSOC_RIGHT, 14, OP_PREFIX }, + { "-", ASSOC_RIGHT, 14, OP_PREFIX }, + { "++", ASSOC_RIGHT, 14, OP_PREFIX }, + { "--", ASSOC_RIGHT, 14, OP_PREFIX }, +/* { "&", ASSOC_RIGHT, 14, OP_PREFIX }, */ + + { "*", ASSOC_LEFT, 13, 0 }, + { "/", ASSOC_LEFT, 13, 0 }, + { "%", ASSOC_LEFT, 13, 0 }, + + { "+", ASSOC_LEFT, 12, 0 }, + { "-", ASSOC_LEFT, 12, 0 }, + + { "<<", ASSOC_LEFT, 11, 0 }, + { ">>", ASSOC_LEFT, 11, 0 }, + + { "<", ASSOC_LEFT, 10, 0 }, + { ">", ASSOC_LEFT, 10, 0 }, + { "<=", ASSOC_LEFT, 10, 0 }, + { ">=", ASSOC_LEFT, 10, 0 }, + + { "==", ASSOC_LEFT, 9, 0 }, + { "!=", ASSOC_LEFT, 9, 0 }, + + { "&", ASSOC_LEFT, 8, 0 }, + + { "^", ASSOC_LEFT, 7, 0 }, + + { "|", ASSOC_LEFT, 6, 0 }, + + { "&&", ASSOC_LEFT, 5, 0 }, + + { "||", ASSOC_LEFT, 4, 0 }, + + { "?", ASSOC_RIGHT, 3, 0 }, + + { "=", ASSOC_RIGHT, 2, 0 }, + { "+=", ASSOC_RIGHT, 2, 0 }, + { "-=", ASSOC_RIGHT, 2, 0 }, + { "*=", ASSOC_RIGHT, 2, 0 }, + { "/=", ASSOC_RIGHT, 2, 0 }, + { "%=", ASSOC_RIGHT, 2, 0 }, + { ">>=", ASSOC_RIGHT, 2, 0 }, + { "<<=", ASSOC_RIGHT, 2, 0 }, + { "&=", ASSOC_RIGHT, 2, 0 }, + { "^=", ASSOC_RIGHT, 2, 0 }, + { "|=", ASSOC_RIGHT, 2, 0 }, +}; + +typedef struct +{ + lex_file *lex; + int error; + lex_ctx ctx; + + token *tokens; + token *lastok; + + token *tok; /* current token */ + + MEM_VECTOR_MAKE(ast_value*, globals); +} parse_file; + +MEM_VECTOR_PROTO(parse_file, ast_value*, globals); + +parse_file* parse_open(const char *file); +void parse_file_close(parse_file*); + +bool parse(parse_file*); + +bool parse_iskey(parse_file *self, const char *ident); + +void lexerror(lex_file*, const char *fmt, ...); + +#endif diff --git a/main.c b/main.c index 62786d3..195574e 100644 --- a/main.c +++ b/main.c @@ -21,19 +21,18 @@ * SOFTWARE. */ #include "gmqcc.h" -typedef struct { char *name, type; } argitem; -VECTOR_MAKE(argitem, items); +bool parser_compile(const char *filename); int main(int argc, char **argv) { - size_t itr; - util_debug("COM", "starting ...\n"); + if (argc == 2) { + if (!parser_compile(argv[1])) { + printf("There were compile errors\n"); + } + } + util_debug("COM", "cleaning ...\n"); - /* clean list */ - for (itr = 0; itr < items_elements; itr++) - mem_d(items_data[itr].name); - mem_d(items_data); util_meminfo(); return 0; diff --git a/parser.c b/parser.c new file mode 100644 index 0000000..0dd972e --- /dev/null +++ b/parser.c @@ -0,0 +1,7 @@ +#include "gmqcc.h" +#include "lexer.h" + +bool parser_compile(const char *filename) +{ + return false; +} -- 2.39.2