From: Dale Weiler Date: Mon, 9 Apr 2012 23:00:13 +0000 (-0400) Subject: More parsing & parse tree X-Git-Tag: 0.1-rc1~710 X-Git-Url: https://git.rm.cloudns.org/?a=commitdiff_plain;h=2b94dc1d898a17ff30594c2e183c2b8234387f88;p=xonotic%2Fgmqcc.git More parsing & parse tree --- diff --git a/gmqcc b/gmqcc index 9bc7f27..d00f3c7 100755 Binary files a/gmqcc and b/gmqcc differ diff --git a/gmqcc.h b/gmqcc.h index 7fda6fd..bd1d715 100644 --- a/gmqcc.h +++ b/gmqcc.h @@ -157,12 +157,11 @@ struct lex_file { #define TOKEN_GOTO 7 #define TOKEN_FOR 8 // extension #define TOKEN_INT 9 // extension -#define TOKEN_BOOL 10 // extension -#define TOKEN_VOID 11 -#define TOKEN_STRING 12 -#define TOKEN_FLOAT 13 -#define TOKEN_VECTOR 14 -#define TOKEN_ENTITY 15 +#define TOKEN_VOID 10 +#define TOKEN_STRING 11 +#define TOKEN_FLOAT 12 +#define TOKEN_VECTOR 13 +#define TOKEN_ENTITY 14 /* * Lexer state constants, these are numbers for where exactly in @@ -190,6 +189,11 @@ int error(int, const char *, ...); /* parse.c */ int parse(struct lex_file *); +struct parsenode { + struct parsenode *next; + int type; /* some token */ +}; + /* cpp.c */ int cpp (struct lex_file *); diff --git a/lex.c b/lex.c index 5333171..62067bb 100644 --- a/lex.c +++ b/lex.c @@ -27,6 +27,10 @@ #include #include "gmqcc.h" +/* + * Keywords are multichar, punctuation lexing is a bit more complicated + * than keyword lexing. + */ static const char *const lex_keywords[] = { "do", "else", "if", "while", "break", "continue", "return", "goto", @@ -34,7 +38,6 @@ static const char *const lex_keywords[] = { /* types */ "int", - "bool", "void", "string", "float", diff --git a/main.c b/main.c index 5134e98..bb5b83e 100644 --- a/main.c +++ b/main.c @@ -34,10 +34,11 @@ int main(int argc, char **argv) { const char *ofile = NULL; const char *ifile = NULL; int i; - if (argc <= 2) + if (argc <= 2) { return usage(*argv); + } - for (i=0; i +#include #include "gmqcc.h" +/* + * These are not lexical tokens: These are parse tree types. Most people + * perform tokenizing on language punctuation which is wrong. That stuff + * is technically already tokenized, it just needs to be parsed into a tree + */ +#define PARSE_TYPE_DO 0 +#define PARSE_TYPE_ELSE 1 +#define PARSE_TYPE_IF 2 +#define PARSE_TYPE_WHILE 3 +#define PARSE_TYPE_BREAK 4 +#define PARSE_TYPE_CONTINUE 5 +#define PARSE_TYPE_RETURN 6 +#define PARSE_TYPE_GOTO 7 +#define PARSE_TYPE_FOR 8 // extension +#define PARSE_TYPE_INT 9 // extension +#define PARSE_TYPE_BOOL 10 // extension +#define PARSE_TYPE_VOID 11 +#define PARSE_TYPE_STRING 12 +#define PARSE_TYPE_FLOAT 13 +#define PARSE_TYPE_VECTOR 14 +#define PARSE_TYPE_ENTITY 15 +#define PARSE_TYPE_LAND 16 +#define PARSE_TYPE_LOR 17 +#define PARSE_TYPE_LTEQ 18 +#define PARSE_TYPE_GTEQ 19 +#define PARSE_TYPE_EQEQ 20 +#define PARSE_TYPE_LNEQ 21 +#define PARSE_TYPE_COMMA 22 +#define PARSE_TYPE_LNOT 23 +#define PARSE_TYPE_STAR 24 +#define PARSE_TYPE_DIVIDE 25 +#define PARSE_TYPE_LPARTH 26 +#define PARSE_TYPE_RPARTH 27 +#define PARSE_TYPE_MINUS 28 +#define PARSE_TYPE_ADD 29 +#define PARSE_TYPE_EQUAL 30 +#define PARSE_TYPE_LSS 31 // left subscript +#define PARSE_TYPE_RSS 32 +#define PARSE_TYPE_LBS 33 // left bracket scope +#define PARSE_TYPE_RBS 34 // right bracket scope +#define PARSE_TYPE_ELIP 35 // ... +#define PARSE_TYPE_DOT 36 +#define PARSE_TYPE_LT 37 +#define PARSE_TYPE_GT 38 +#define PARSE_TYPE_BAND 39 +#define PARSE_TYPE_BOR 40 +#define PARSE_TYPE_DONE 41 // finished statement + +/* + * Adds a parse type to the parse tree, this is where all the hard + * work actually begins. + */ +#define PARSE_TREE_ADD(X) \ + do { \ + parsetree->next = mem_a(sizeof(struct parsenode)); \ + parsetree->next->next = NULL; \ + parsetree->next->type = (X); \ + parsetree = parsetree->next; \ + } while (0) static const char *const parse_punct[] = { "&&", "||", "<=", ">=", "==", "!=", ";", ",", "!", "*", - "/" , "(" , "-" , "+" , "=" , "[" , "]", "{", "}", "...", - "." , "<" , ">" , "#" , "&" , "|" , "$", "@", ":", NULL - /* - * $,@,: are extensions: - * $ is a shorter `self`, so instead of self.frags, $.frags - * @ is a constructor - * : is compiler builtin functions - */ + "/" , "(" , ")" , "-" , "+" , "=" , "[" , "]", "{", "}", "...", + "." , "<" , ">" , "&" , "|" , NULL }; +#define STORE(X) { \ + printf(X); \ + break; \ +} + +void parse_debug(struct parsenode *tree) { + while (tree && tree->next != NULL) { + /* skip blanks */ + if (tree->type == 0) { + tree = tree->next; + continue; + } + + switch (tree->type) { + case PARSE_TYPE_ADD: STORE("ADD \n"); + case PARSE_TYPE_BAND: STORE("BITAND \n"); + case PARSE_TYPE_BOR: STORE("BITOR \n"); + case PARSE_TYPE_BREAK: STORE("BREAK \n"); + case PARSE_TYPE_COMMA: STORE("SEPERATOR\n"); + case PARSE_TYPE_CONTINUE: STORE("CONTINUE\n"); + case PARSE_TYPE_DIVIDE: STORE("DIVIDE\n"); + case PARSE_TYPE_EQUAL: STORE("ASSIGNMENT\n"); + case PARSE_TYPE_GOTO: STORE("GOTO\n"); + case PARSE_TYPE_DOT: STORE("DOT\n"); + + + case PARSE_TYPE_ELIP: STORE("DECLTYPE: VALIST\n"); + case PARSE_TYPE_ENTITY: STORE("DECLTYPE: ENTITY\n"); + case PARSE_TYPE_INT: STORE("DECLTYPE: INT\n"); + case PARSE_TYPE_FLOAT: STORE("DECLTYPE: FLOAT\n"); + case PARSE_TYPE_BOOL: STORE("DECLTYPE: BOOL\n"); + + case PARSE_TYPE_GT: STORE("TEST: GREATER THAN\n"); + case PARSE_TYPE_LT: STORE("TEST: LESS THAN\n"); + case PARSE_TYPE_GTEQ: STORE("TEST: GREATER THAN OR EQUAL\n"); + case PARSE_TYPE_LTEQ: STORE("TEST: LESS THAN OR EQUAL\n"); + case PARSE_TYPE_LNEQ: STORE("TEST: NOT EQUAL\n"); + case PARSE_TYPE_EQEQ: STORE("TEST: EQUAL-EQUAL\n"); + + case PARSE_TYPE_LBS: break; + case PARSE_TYPE_RBS: break; + + case PARSE_TYPE_LAND: STORE("LOGICAL: AND\n"); + case PARSE_TYPE_LNOT: STORE("LOGICAL: NOT\n"); + case PARSE_TYPE_LOR: STORE("LOGICAL: OR\n"); + case PARSE_TYPE_LPARTH: STORE("PARTH: END\n"); + case PARSE_TYPE_RPARTH: STORE("PARTH: BEG\n"); + + case PARSE_TYPE_FOR: STORE("LOOP: FOR\n"); + case PARSE_TYPE_DO: STORE("LOOP: DO\n"); + case PARSE_TYPE_ELSE: STORE("BLOCK: ELSE\n"); + case PARSE_TYPE_IF: STORE("BLOCK: IF\n"); + } + tree = tree->next; + } +} + +/* + * This just skips the token and throws it in the parse tree for later + * checking / optimization / codegen, it doesn't do anything with it + * like syntax check for legal use -- like it should as it's a TODO item + * which is not implemented + */ +#define PARSE_TODO(X) { \ + token = lex_token(file); \ + PARSE_TREE_ADD(X); \ + break; \ +} + int parse(struct lex_file *file) { + struct parsenode *parsetree = NULL; + struct parsenode *parseroot = NULL; + + /* + * Allocate memory for our parse tree: + * the parse tree is just a singly linked list which will contain + * all the data for code generation. + */ + if (!parseroot) { + parseroot = mem_a(sizeof(struct parsenode)); + if (!parseroot) + return error(ERROR_INTERNAL, "Ran out of memory", " "); + parsetree = parseroot; + parsetree = parseroot; + } + int token = 0; while ((token = lex_token(file)) != ERROR_LEX && \ token != ERROR_COMPILER && \ @@ -51,7 +190,36 @@ int parse(struct lex_file *file) { if (token != '(') error(ERROR_PARSE, "Expected `(` after if\n", ""); + + PARSE_TREE_ADD(PARSE_TYPE_IF); + break; + case TOKEN_ELSE: + token = lex_token(file); + while ((token == ' ' || token == '\n') && file->length >= 0) + token = lex_token(file); + + PARSE_TREE_ADD(PARSE_TYPE_ELSE); break; + case TOKEN_FOR: + token = lex_token(file); + while ((token == ' ' || token == '\n') && file->length >= 0) + token = lex_token(file); + + PARSE_TREE_ADD(PARSE_TYPE_FOR); + break; + + case TOKEN_DO: PARSE_TODO(PARSE_TYPE_DO); + case TOKEN_WHILE: PARSE_TODO(PARSE_TYPE_WHILE); + case TOKEN_BREAK: PARSE_TODO(PARSE_TYPE_BREAK); + case TOKEN_CONTINUE: PARSE_TODO(PARSE_TYPE_CONTINUE); + case TOKEN_RETURN: PARSE_TODO(PARSE_TYPE_RETURN); + case TOKEN_GOTO: PARSE_TODO(PARSE_TYPE_GOTO); + case TOKEN_INT: PARSE_TODO(PARSE_TYPE_INT); + case TOKEN_VOID: PARSE_TODO(PARSE_TYPE_VOID); + case TOKEN_STRING: PARSE_TODO(PARSE_TYPE_STRING); + case TOKEN_FLOAT: PARSE_TODO(PARSE_TYPE_FLOAT); + case TOKEN_VECTOR: PARSE_TODO(PARSE_TYPE_VECTOR); + case TOKEN_ENTITY: PARSE_TODO(PARSE_TYPE_ENTITY); /* TODO: Preprocessor */ case '#': @@ -63,81 +231,101 @@ int parse(struct lex_file *file) { token = lex_token(file); break; - /* PUNCTUATION PARSING BEGINS */ + /* + * From here down is all language punctuation: There is no + * need to actual create tokens from these because they're already + * tokenized as these individual tokens (which are in a special area + * of the ascii table which doesn't conflict with our other tokens + * which are higer than the ascii table. + */ case '&': /* & */ token = lex_token(file); if (token == '&') { /* && */ token = lex_token(file); - printf("--> LOGICAL AND\n"); + PARSE_TREE_ADD(PARSE_TYPE_LAND); goto end; } + PARSE_TREE_ADD(PARSE_TYPE_BAND); printf("--> BITWISE AND\n"); break; case '|': /* | */ token = lex_token(file); if (token == '|') { /* || */ token = lex_token(file); - printf("--> LOGICAL OR\n"); + PARSE_TREE_ADD(PARSE_TYPE_LOR); goto end; } - printf("--> BITWISE OR\n"); + PARSE_TREE_ADD(PARSE_TYPE_BOR); break; case '!': token = lex_token(file); if (token == '=') { /* != */ token = lex_token(file); - printf("--> LOGICAL NOT EQUAL\n"); + PARSE_TREE_ADD(PARSE_TYPE_LNEQ); goto end; } - printf("--> LOGICAL NOT\n"); + PARSE_TREE_ADD(PARSE_TYPE_LNOT); break; case '<': /* < */ token = lex_token(file); if (token == '=') { /* <= */ token = lex_token(file); - printf("--> LESS THAN OR EQUALL\n"); + PARSE_TREE_ADD(PARSE_TYPE_LTEQ); goto end; } - printf("--> LESS THAN\n"); + PARSE_TREE_ADD(PARSE_TYPE_LT); break; case '>': /* > */ token = lex_token(file); if (token == '=') { /* >= */ token = lex_token(file); - printf("--> GREATER THAN OR EQUAL\n"); + PARSE_TREE_ADD(PARSE_TYPE_GTEQ); goto end; } - printf("--> GREATER THAN\n"); + PARSE_TREE_ADD(PARSE_TYPE_GT); break; case '=': token = lex_token(file); if (token == '=') { /* == */ token = lex_token(file); - printf("--> COMPARISION \n"); + PARSE_TREE_ADD(PARSE_TYPE_EQEQ); goto end; } - printf("--> ASSIGNMENT\n"); + PARSE_TREE_ADD(PARSE_TYPE_EQUAL); break; case ';': token = lex_token(file); - printf("--> FINISHED STATMENT\n"); + PARSE_TREE_ADD(PARSE_TYPE_DONE); break; case '-': token = lex_token(file); - printf("--> SUBTRACTION EXPRESSION\n"); + PARSE_TREE_ADD(PARSE_TYPE_MINUS); break; case '+': token = lex_token(file); - printf("--> ASSIGNMENT EXPRRESSION\n"); + PARSE_TREE_ADD(PARSE_TYPE_ADD); + break; + case '(': + token = lex_token(file); + PARSE_TREE_ADD(PARSE_TYPE_LPARTH); + break; + case ')': + token = lex_token(file); + PARSE_TREE_ADD(PARSE_TYPE_RPARTH); + break; + case '{': + token = lex_token(file); + PARSE_TREE_ADD(PARSE_TYPE_LBS); + break; + case '}': + token = lex_token(file); + PARSE_TREE_ADD(PARSE_TYPE_RBS); break; } end:; } + parse_debug(parseroot); lex_reset(file); - // "&&", "||", "<=", ">=", "==", "!=", ";", ",", "!", "*", - //"/" , "(" , "-" , "+" , "=" , "[" , "]", "{", "}", "...", - //"." , "<" , ">" , "#" , "&" , "|" , "$", "@", ":", NULL - return 1; }