From: Dale Weiler Date: Fri, 31 May 2013 03:41:03 +0000 (+0000) Subject: Perliminary restructuring / implementation of the diagnostics system. X-Git-Url: https://git.rm.cloudns.org/?a=commitdiff_plain;h=8ef04978b372bd0598c4e46067e72c8fe83ec64a;p=xonotic%2Fgmqcc.git Perliminary restructuring / implementation of the diagnostics system. --- diff --git a/Makefile b/Makefile index fea3356..3bcb68e 100644 --- a/Makefile +++ b/Makefile @@ -44,7 +44,7 @@ ifeq ($(track), no) CFLAGS += -DNOTRACK endif -OBJ_D = util.o code.o ast.o ir.o conout.o ftepp.o opts.o fs.o utf8.o correct.o +OBJ_D = util.o code.o ast.o ir.o conout.o ftepp.o opts.o fs.o utf8.o correct.o diag.o OBJ_P = util.o fs.o conout.o opts.o pak.o OBJ_T = test.o util.o conout.o fs.o OBJ_C = main.o lexer.o parser.o fs.o @@ -244,8 +244,9 @@ opts.o: gmqcc.h opts.def fs.o: gmqcc.h opts.def utf8.o: gmqcc.h opts.def correct.o: gmqcc.h opts.def +diag.o: gmqcc.h opts.def pak.o: gmqcc.h opts.def test.o: gmqcc.h opts.def main.o: gmqcc.h opts.def lexer.h lexer.o: gmqcc.h opts.def lexer.h -parser.o: gmqcc.h opts.def lexer.h ast.h ir.h intrin.h +parser.o: gmqcc.h opts.def intrin.h diff --git a/diag.c b/diag.c new file mode 100644 index 0000000..6132942 --- /dev/null +++ b/diag.c @@ -0,0 +1,258 @@ +/* + * Copyright (C) 2012, 2013 + * Dale Weiler + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is furnished to do + * so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "gmqcc.h" +#include "lexer.h" + +typedef struct { + const char *line; /* contents of the current line */ + size_t *tokens; /* stream of tokens */ + char **values; /* stream of values for tokens */ +} diagnostic_data_t; + +/* + * TODO: these should become a structure pased around which is stored + * in the parser instead of globals. + */ +ht diagnostic_table = NULL; /* map> */ +uint32_t diagnostic_item = 0; + +static void diagnostic_line(const char *file, size_t line, diagnostic_data_t ***read, size_t beg, size_t end) { + diagnostic_data_t **datas = NULL; + size_t feed = 0; + + if (!diagnostic_table) + diagnostic_table = util_htnew(1024); + + /* + * Build the data one line at a time if it doesn't already exists. + * We also lex one line at a time for consistency here. + */ + if (!(datas = (diagnostic_data_t**)util_htget(diagnostic_table, file))) { + lex_file *lexer = NULL; + char *line = NULL; + FILE *handle = fs_file_open(file, "r"); + size_t size = 0; + size_t tok = 0; + + + /* + * Now process all data line per line from the file, while inserting + * the contents of each line into data.line, the token stream for + * each line into data.tokens, and the values that are associated + * with that given token into data.values. Then after one line is + * complete, push the data associated with it into the datas vector + * which will be stored alongside the hashtable. + */ + while (fs_file_getline(&line, &size, handle) != EOF) { + diagnostic_data_t *data = mem_a(sizeof(diagnostic_data_t)); + + data->tokens = NULL; + data->values = NULL; + data->line = util_strdup(line); + lexer = lex_open_string(line, strlen(line), file); + lexer->flags.preprocessing = true; /* enable whitespace */ + lexer->flags.mergelines = true; + + /* build token stream */ + while ((tok = lex_do(lexer)) != TOKEN_EOF) { + char *string = NULL; + char *claim = lexer->tok.value; + + for (; claim && *claim; claim ++) + vec_push(string, (*claim == '\t') ? ' ' : *claim); + + vec_push(string, '\0'); + vec_push(data->tokens, tok); + vec_push(data->values, string); + + /* prevent duplicated entries */ + memset(&lexer->tok, 0, sizeof(lexer->tok)); + } + + lex_close(lexer); + vec_push(datas, data); + } + + /*mem_d(data);*/ + util_htset(diagnostic_table, file, datas); + fs_file_close(handle); + } + + /* store the lines request back to the vector */ + if (line - beg + end > vec_size(datas)) { + beg = 1; + end = 1; + } + + for(feed = line - beg; feed < line - beg + end; ++feed) + vec_push((*read), datas[feed]); +} + +static void diagnostic_feed(const char *file, size_t line, size_t beg, size_t end, bool marker, size_t diagnostic) { + diagnostic_data_t **read = NULL; + size_t feed = 0; + size_t space = 6; + size_t len = 0; + size_t itr = 0; + size_t tok = 0; + + /* get line */ + diagnostic_line(file, line, &read, beg, end); + + /* use old token stream to pretty the output */ + for (; feed < vec_size(read); feed++) { + con_out("%4d: ", line); + while ((tok = read[feed]->tokens[itr]) != TOKEN_EOL) { + switch (tok) { + case TOKEN_TYPENAME: + case TOKEN_KEYWORD: + con_out("\033[1;33m%s\033[0m", read[feed]->values[itr]); + break; + + case TOKEN_INTCONST: + case TOKEN_FLOATCONST: + con_out("\033[1;32m%s\033[0m", read[feed]->values[itr]); + break; + + case TOKEN_CHARCONST: + case TOKEN_STRINGCONST: + con_out("\033[1;31m%s\033[0m", read[feed]->values[itr]); + break; + + case TOKEN_EOF: + case TOKEN_ERROR: + case TOKEN_EOL: + /* ignore */ + break; + + default: + con_out("%s", read[feed]->values[itr]); + break; + }; + itr++; + } + itr = 0; + con_out("\n"); + } + + switch (diagnostic) { + case DIAGNOSTIC_SEMICOLON: + while (vec_last(read)->line[space] != '=') + space++; + space++; + while (vec_last(read)->line[space] == ' ') + space++; + + while (vec_last(read)->line[space + len] != '\n') + len++; + + break; + + case DIAGNOSTIC_SEMICOLON_SAME: + for (; len < vec_size(vec_last(read)->values); len++) + space += strlen(vec_last(read)->values[len]); + + len = 1; + space -= beg - end; + break; + + case DIAGNOSTIC_ASSIGNMENT: + break; + } + + while (space --) con_out(" "); + while (len --) con_out("~"); + + con_out((marker) ? "^\n" : "\n"); + + vec_free(read); +} + + +static void diagnostic_destory_data(void *data) { + diagnostic_data_t **datas = (diagnostic_data_t **)data; + size_t i,j; + + for (i = 0; i < vec_size(datas); i++) { + vec_free(datas[i]->line); + + /* + * There is always the same number of tokens as + * values, one loop suffices. + */ + for (j = 0; i < vec_size(datas[i]->tokens); i++) { + mem_d(datas[i]->tokens[j]); + mem_d(datas[i]->values[j]); + } + + vec_free(datas[i]->tokens); + vec_free(datas[i]->values); + + mem_d(datas[i]); + } +} + +void diagnostic_destroy() { + if (!diagnostic_table) + return; + + util_htrem(diagnostic_table, diagnostic_destory_data); +} + +void diagnostic_calculate(const char *file, size_t line, size_t diagnostic) { + size_t linebeg = 1; + size_t linecnt = 1; + bool marker = false; + + + switch (diagnostic) { + /* + * Semicolon reports error on nextline, which is why we need + * to increment the beginning line for diagnostics, and also + * enable the marker (to show where it's missing). + */ + case DIAGNOSTIC_SEMICOLON: + linebeg++; + marker = true; + break; + + case DIAGNOSTIC_SEMICOLON_SAME: + linecnt = 1; + linebeg = line-2; + marker = true; + break; + + /* + * Cases that don't need line calculation should break the + * statement and carry on to the feeder. + */ + case DIAGNOSTIC_ASSIGNMENT: + break; + + /* Catches the DIAGNOSTIC_NULL and out of range case */ + default: + return; + } + + diagnostic_feed(file, line, linebeg, linecnt, marker, diagnostic); +} diff --git a/gmqcc.h b/gmqcc.h index 3b221e9..ba1f94c 100644 --- a/gmqcc.h +++ b/gmqcc.h @@ -1004,7 +1004,6 @@ qcint prog_tempstring(qc_program *prog, const char *_str); /*===================== parser.c commandline ========================*/ /*===================================================================*/ struct parser_s; - struct parser_s *parser_create (); bool parser_compile_file (struct parser_s *parser, const char *); bool parser_compile_string(struct parser_s *parser, const char *, const char *, size_t); @@ -1024,6 +1023,20 @@ void ftepp_flush (struct ftepp_s *ftepp); void ftepp_add_define (struct ftepp_s *ftepp, const char *source, const char *name); void ftepp_add_macro (struct ftepp_s *ftepp, const char *name, const char *value); +/*===================================================================*/ +/*============================= exec.c ==============================*/ +/*===================================================================*/ +enum { + DIAGNOSTIC_NULL, + DIAGNOSTIC_SEMICOLON, /* Where semicolon requires from next line */ + DIAGNOSTIC_SEMICOLON_SAME, /* Where semicolon required from same line */ + DIAGNOSTIC_ASSIGNMENT +}; + +void diagnostic_destroy(); +void diagnostic_calculate(const char *file, size_t line, size_t diagnostic); + + /*===================================================================*/ /*======================= main.c commandline ========================*/ /*===================================================================*/ diff --git a/lexer.c b/lexer.c index a4d3728..30ffba3 100644 --- a/lexer.c +++ b/lexer.c @@ -174,9 +174,10 @@ static void lex_token_new(lex_file *lex) #else if (lex->tok.value) vec_shrinkto(lex->tok.value, 0); - lex->tok.constval.t = 0; - lex->tok.ctx.line = lex->sline; - lex->tok.ctx.file = lex->name; + + lex->tok.constval.t = 0; + lex->tok.ctx.line = lex->sline; + lex->tok.ctx.file = lex->name; #endif } #endif diff --git a/parser.c b/parser.c index 7918f0f..7a9e5c9 100644 --- a/parser.c +++ b/parser.c @@ -31,7 +31,6 @@ /* beginning of locals */ #define PARSER_HT_LOCALS 2 - #define PARSER_HT_SIZE 128 #define TYPEDEF_HT_SIZE 16 @@ -103,6 +102,9 @@ typedef struct parser_s { /* collected information */ size_t max_param_count; + + /* diagnostic */ + size_t diagnostic; /* code generator */ code_t *code; @@ -127,173 +129,6 @@ static ast_value* parser_create_array_getter_proto(parser_t *parser, ast_value * static ast_value *parse_typename(parser_t *parser, ast_value **storebase, ast_value *cached_typedef); -/* map> */ -ht diagnostic_table = NULL; -char **diagnostic_index = NULL; -uint32_t diagnostic_item = 0; - -static void diagnostic_line(parser_t *parser, char ***read, size_t beg, size_t end) { - char **lines = NULL; - size_t feed = 0; - - if (!diagnostic_table) - diagnostic_table = util_htnew(1024); - - if (!(lines = (char**)util_htget(diagnostic_table, parser->lex->name))) { - char *data = NULL; - size_t size = 0; - FILE *handle = fs_file_open(parser->lex->name, "r"); - - while (fs_file_getline(&data, &size, handle) != EOF) { - /* claim memory for string */ - char *claim = util_strdup(data); - - vec_push(lines, claim); - } - mem_d(data); - - util_htset(diagnostic_table, parser->lex->name, lines); - vec_push(diagnostic_index, parser->lex->name); - fs_file_close(handle); - } - - /* store the lines request back to the vector */ - if (parser->lex->line - beg + end > vec_size(lines)) { - beg = 1; - end = 1; - } - - for(feed = parser->lex->line - beg; feed < parser->lex->line - beg + end; ++feed) - vec_push((*read), lines[feed]); -} - -static void diagnostic_feed(parser_t *parser, size_t beg, size_t end, bool marker) { - lex_file *lexer = NULL; - char **read = NULL; - char *peek = NULL; - char *find = parser->lex->tok.value; - size_t feed = 0; - size_t space = 0; - size_t len = strlen(find); - int tok = 0; - - diagnostic_line(parser, &read, beg, end); - - for (; feed < vec_size(read); feed++) { - lexer = lex_open_string(read[feed], strlen(read[feed]), parser->lex->name); - lexer->flags.preprocessing = true; /* enable whitespace */ - lexer->flags.mergelines = true; - - con_out("% 4d| ", parser->lex->line - beg + feed + 1); - - /* fancy printing */ - while ((tok = lex_do(lexer)) != TOKEN_EOF) { - switch (tok) { - - case TOKEN_TYPENAME: - case TOKEN_KEYWORD: - con_out("\033[1;33m%s\033[0m", lexer->tok.value); - break; - - case TOKEN_INTCONST: - case TOKEN_FLOATCONST: - con_out("\033[1;32m%s\033[0m", lexer->tok.value); - break; - - case TOKEN_CHARCONST: - case TOKEN_STRINGCONST: - con_out("\033[1;31m%s\033[0m", lexer->tok.value); - break; - - case TOKEN_EOF: - case TOKEN_ERROR: - case TOKEN_EOL: - /* ignore */ - break; - - default: - con_out("%s", lexer->tok.value); - break; - }; - } - lex_close(lexer); - con_out("\n"); - } - - /* MOTHER FUCKING HACK! */ - /* MOTHER FUCKING HACK! */ - if (!strcmp(find, "SEMICOLON")) { - space = 0; - len = 0; - while (vec_last(read)[space] != '=') - space++; - space++; - while (vec_last(read)[space] == ' ') - space++; - - while (vec_last(read)[space + len] != '\n') - len++; - - space += 6; - } else { - /* find it in the last line */ - if ((peek = strstr(vec_last(read), find))) { - space = peek - vec_last(read) + 6; /*% 4d|*/ - } - } - - while (space --) con_out(" "); - while (len --) con_out("~"); - - con_out((marker) ? "^\n" : "\n"); /* marker */ - - /* yes we allocate a whole vector each subsection read */ - vec_free(read); -} - -static void diagnostic_destroy() { - char **lines = NULL; - size_t index = 0; - size_t item = 0; - - /* - * TODO: traverse the hash table and free from the buckets. Or even - * better implement an 'iterator' system for it to enumerate items. - * we currently store a vector of strings as "keys" into the hashtable - * such that we can erase all allocated data. This is such a waste of - * space. - */ - if (!diagnostic_index || !diagnostic_table) - return; - - for (; index < vec_size(diagnostic_index); index++) { - lines = (char**)util_htget(diagnostic_table, diagnostic_index[index]); - for (item = 0; item < vec_size(lines); item++) { - mem_d(lines[item]); - } - vec_free(lines); - } - - util_htdel(diagnostic_table); - vec_free (diagnostic_index); -} - -static void diagnostic_calculate(parser_t *parser, const char *fmt) { - size_t linebeg = 1; - size_t linecnt = 1; - bool marker = false; - - if (strstr(fmt, "missing semicolon")) - linebeg++, marker = true; - /* - * special linebeg/ linecnt offset calculations can be done - * here. - */ - - diagnostic_feed(parser, linebeg, linecnt, marker); - parser->lex->tok.value = NULL; /* MOTHER FUCKING HACK! */ -} - static void parseerror(parser_t *parser, const char *fmt, ...) { va_list ap; @@ -304,9 +139,7 @@ static void parseerror(parser_t *parser, const char *fmt, ...) vcompile_error(parser->lex->tok.ctx, fmt, ap); va_end(ap); - /* only print when not bailing out */ - if (!strstr(fmt, "bailing out")) - diagnostic_calculate(parser, fmt); + diagnostic_calculate(parser->lex->name, parser->lex->line, parser->diagnostic); } /* returns true if it counts as an error */ @@ -317,6 +150,8 @@ static bool GMQCC_WARN parsewarning(parser_t *parser, int warntype, const char * va_start(ap, fmt); r = vcompile_warning(parser->lex->tok.ctx, warntype, fmt, ap); va_end(ap); + + diagnostic_calculate(parser->lex->name, parser->lex->line, parser->diagnostic); return r; } @@ -1411,11 +1246,14 @@ static bool parser_sy_apply_operator(parser_t *parser, shunt *sy) field->next->vtype == TYPE_FUNCTION && exprs[1]->vtype == TYPE_FUNCTION) { + parser->diagnostic = DIAGNOSTIC_ASSIGNMENT; (void)!compile_warning(ctx, WARN_ASSIGN_FUNCTION_TYPES, "invalid types in assignment: cannot assign %s to %s", ty2, ty1); } - else + else { + parser->diagnostic = DIAGNOSTIC_ASSIGNMENT; compile_error(ctx, "invalid types in assignment: cannot assign %s to %s", ty2, ty1); + } } } else @@ -3130,10 +2968,12 @@ static bool parse_return(parser_t *parser, ast_block *block, ast_expression **ou return false; } - if (parser->tok != ';') + if (parser->tok != ';') { + parser->diagnostic = DIAGNOSTIC_SEMICOLON; parseerror(parser, "missing semicolon after return assignment"); - else if (!parser_next(parser)) + } else if (!parser_next(parser)) { parseerror(parser, "parse error after return assignment"); + } *out = var; return true; @@ -5866,9 +5706,10 @@ skipvar: if (parser->tok != '{' || var->expression.vtype != TYPE_FUNCTION) { if (parser->tok != '=') { const char *obtain = parser_tokval(parser); - if (!strcmp(obtain, "}")) - parseerror(parser, "missing semicolon"); - else + if (!strcmp(obtain, "}")) { + parser->diagnostic = DIAGNOSTIC_SEMICOLON_SAME; + parseerror(parser, "expected semicolon, got `%s`", obtain); + } else parseerror(parser, "missing initializer"); break; } @@ -6081,8 +5922,7 @@ another: } if (parser->tok != ';') { - /* MOTHER FUCKING HACK! */ - parser->lex->tok.value = "SEMICOLON"; + parser->diagnostic = DIAGNOSTIC_SEMICOLON; parseerror(parser, "missing semicolon after variables"); break; } @@ -6324,8 +6164,10 @@ static bool parser_compile(parser_t *parser) if (!parser_global_statement(parser)) { if (parser->tok == TOKEN_EOF) parseerror(parser, "unexpected eof"); - else if (compile_errors) + else if (compile_errors) { + parser->diagnostic = DIAGNOSTIC_NULL; parseerror(parser, "there have been errors, bailing out"); + } lex_close(parser->lex); parser->lex = NULL; return false;