From: Dale Weiler <killfieldengine@gmail.com>
Date: Fri, 31 May 2013 03:41:03 +0000 (+0000)
Subject: Perliminary restructuring / implementation of the diagnostics system.
X-Git-Url: https://git.rm.cloudns.org/?a=commitdiff_plain;h=8ef04978b372bd0598c4e46067e72c8fe83ec64a;p=xonotic%2Fgmqcc.git

Perliminary restructuring / implementation of the diagnostics system.
---

diff --git a/Makefile b/Makefile
index fea3356..3bcb68e 100644
--- a/Makefile
+++ b/Makefile
@@ -44,7 +44,7 @@ ifeq ($(track), no)
 	CFLAGS += -DNOTRACK
 endif
 
-OBJ_D = util.o code.o ast.o ir.o conout.o ftepp.o opts.o fs.o utf8.o correct.o
+OBJ_D = util.o code.o ast.o ir.o conout.o ftepp.o opts.o fs.o utf8.o correct.o diag.o
 OBJ_P = util.o fs.o conout.o opts.o pak.o
 OBJ_T = test.o util.o conout.o fs.o
 OBJ_C = main.o lexer.o parser.o fs.o
@@ -244,8 +244,9 @@ opts.o: gmqcc.h opts.def
 fs.o: gmqcc.h opts.def
 utf8.o: gmqcc.h opts.def
 correct.o: gmqcc.h opts.def
+diag.o: gmqcc.h opts.def
 pak.o: gmqcc.h opts.def
 test.o: gmqcc.h opts.def
 main.o: gmqcc.h opts.def lexer.h
 lexer.o: gmqcc.h opts.def lexer.h
-parser.o: gmqcc.h opts.def lexer.h ast.h ir.h intrin.h
+parser.o: gmqcc.h opts.def intrin.h
diff --git a/diag.c b/diag.c
new file mode 100644
index 0000000..6132942
--- /dev/null
+++ b/diag.c
@@ -0,0 +1,258 @@
+/*
+ * Copyright (C) 2012, 2013
+ *     Dale Weiler
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "gmqcc.h"
+#include "lexer.h"
+
+typedef struct {
+    const char *line;    /* contents of the current line */
+    size_t     *tokens;  /* stream of tokens             */
+    char      **values;  /* stream of values for tokens  */
+} diagnostic_data_t;
+
+/*
+ * TODO: these should become a structure pased around which is stored
+ * in the parser instead of globals.
+ */
+ht        diagnostic_table = NULL; /* map<const char *, vector<diagnostic_data_t>> */
+uint32_t  diagnostic_item  = 0;
+
+static void diagnostic_line(const char *file, size_t line, diagnostic_data_t ***read, size_t beg, size_t end) {
+    diagnostic_data_t  **datas = NULL;
+    size_t               feed  = 0;
+
+    if (!diagnostic_table)
+         diagnostic_table = util_htnew(1024);
+
+    /*
+     * Build the data one line at a time if it doesn't already exists.
+     * We also lex one line at a time for consistency here.
+     */
+    if (!(datas = (diagnostic_data_t**)util_htget(diagnostic_table, file))) {
+        lex_file          *lexer  = NULL;
+        char              *line   = NULL;
+        FILE              *handle = fs_file_open(file, "r");
+        size_t             size   = 0;
+        size_t             tok    = 0;
+        
+
+        /*
+         * Now process all data line per line from the file, while inserting
+         * the contents of each line into data.line, the token stream for
+         * each line into data.tokens, and the values that are associated
+         * with that given token into data.values. Then after one line is
+         * complete, push the data associated with it into the datas vector
+         * which will be stored alongside the hashtable.
+         */
+        while (fs_file_getline(&line, &size, handle) != EOF) {
+            diagnostic_data_t *data    = mem_a(sizeof(diagnostic_data_t));
+            
+            data->tokens               = NULL;
+            data->values               = NULL;
+            data->line                 = util_strdup(line);
+            lexer                      = lex_open_string(line, strlen(line), file);
+            lexer->flags.preprocessing = true; /* enable whitespace */
+            lexer->flags.mergelines    = true;
+            
+            /* build token stream */
+            while ((tok = lex_do(lexer)) != TOKEN_EOF) {
+                char *string = NULL;
+                char *claim  = lexer->tok.value;
+                
+                for (; claim && *claim; claim ++)
+                    vec_push(string, (*claim == '\t') ? ' ' : *claim);
+                    
+                vec_push(string, '\0');
+                vec_push(data->tokens, tok);
+                vec_push(data->values, string);
+                
+                /* prevent duplicated entries */
+                memset(&lexer->tok, 0, sizeof(lexer->tok));
+            }
+            
+            lex_close(lexer);
+            vec_push(datas, data);
+        }
+        
+        /*mem_d(data);*/
+        util_htset(diagnostic_table, file, datas);
+        fs_file_close(handle);
+    }
+
+    /* store the lines request back to the vector */
+    if (line - beg + end > vec_size(datas)) {
+        beg = 1;
+        end = 1;
+    }
+
+    for(feed = line - beg; feed < line - beg + end; ++feed)
+        vec_push((*read), datas[feed]);
+}
+
+static void diagnostic_feed(const char *file, size_t line, size_t beg, size_t end, bool marker, size_t diagnostic) {
+    diagnostic_data_t  **read  = NULL;
+    size_t               feed  = 0;
+    size_t               space = 6;
+    size_t               len   = 0;
+    size_t               itr   = 0;
+    size_t               tok   = 0;
+
+    /* get line */
+    diagnostic_line(file, line, &read, beg, end);
+
+    /* use old token stream to pretty the output */
+    for (; feed < vec_size(read); feed++) {
+        con_out("%4d: ", line);
+        while ((tok = read[feed]->tokens[itr]) != TOKEN_EOL) {
+            switch (tok) {
+                case TOKEN_TYPENAME:
+                case TOKEN_KEYWORD:
+                    con_out("\033[1;33m%s\033[0m", read[feed]->values[itr]);
+                    break;
+
+                case TOKEN_INTCONST:
+                case TOKEN_FLOATCONST:
+                    con_out("\033[1;32m%s\033[0m", read[feed]->values[itr]);
+                    break;
+
+                case TOKEN_CHARCONST:
+                case TOKEN_STRINGCONST:
+                    con_out("\033[1;31m%s\033[0m", read[feed]->values[itr]);
+                    break;
+
+                case TOKEN_EOF:
+                case TOKEN_ERROR:
+                case TOKEN_EOL:
+                    /* ignore */
+                    break;
+                    
+                default:
+                    con_out("%s", read[feed]->values[itr]);
+                    break;
+            };
+            itr++;
+        }
+        itr = 0;
+        con_out("\n");
+    }
+    
+    switch (diagnostic) {
+        case DIAGNOSTIC_SEMICOLON:
+            while (vec_last(read)->line[space] != '=')
+                space++;
+            space++;
+            while (vec_last(read)->line[space] == ' ')
+                space++;
+
+            while (vec_last(read)->line[space + len] != '\n')
+                len++;
+
+            break;
+            
+        case DIAGNOSTIC_SEMICOLON_SAME:
+            for (; len < vec_size(vec_last(read)->values); len++)
+                space += strlen(vec_last(read)->values[len]);
+                
+            len    = 1;
+            space -= beg - end;
+            break;
+            
+        case DIAGNOSTIC_ASSIGNMENT:
+            break;
+    }
+
+    while (space --) con_out(" ");
+    while (len   --) con_out("~");
+
+    con_out((marker) ? "^\n" : "\n"); 
+
+    vec_free(read);
+}
+
+
+static void diagnostic_destory_data(void *data) {
+    diagnostic_data_t **datas = (diagnostic_data_t **)data;
+    size_t              i,j;
+    
+    for (i = 0; i < vec_size(datas); i++) {
+        vec_free(datas[i]->line);
+        
+        /*
+         * There is always the same number of tokens as
+         * values, one loop suffices.
+         */
+        for (j = 0; i < vec_size(datas[i]->tokens); i++) {
+            mem_d(datas[i]->tokens[j]);
+            mem_d(datas[i]->values[j]);
+        }
+        
+        vec_free(datas[i]->tokens);
+        vec_free(datas[i]->values);
+        
+        mem_d(datas[i]);
+    }
+}
+
+void diagnostic_destroy() {
+    if (!diagnostic_table)
+        return;
+
+    util_htrem(diagnostic_table, diagnostic_destory_data);
+}
+
+void diagnostic_calculate(const char *file, size_t line, size_t diagnostic) {
+    size_t linebeg = 1;
+    size_t linecnt = 1;
+    bool   marker  = false;
+
+
+    switch (diagnostic) {
+        /*
+         * Semicolon reports error on nextline, which is why we need
+         * to increment the beginning line for diagnostics, and also
+         * enable the marker (to show where it's missing).
+         */
+        case DIAGNOSTIC_SEMICOLON:
+            linebeg++;
+            marker = true;
+            break;
+        
+        case DIAGNOSTIC_SEMICOLON_SAME:
+            linecnt = 1;
+            linebeg = line-2;
+            marker  = true;
+            break;
+        
+        /* 
+         * Cases that don't need line calculation should break the
+         * statement and carry on to the feeder.
+         */
+        case DIAGNOSTIC_ASSIGNMENT:
+            break;
+
+        /* Catches the DIAGNOSTIC_NULL and out of range case */
+        default:
+            return;
+    }
+
+    diagnostic_feed(file, line, linebeg, linecnt, marker, diagnostic);
+}
diff --git a/gmqcc.h b/gmqcc.h
index 3b221e9..ba1f94c 100644
--- a/gmqcc.h
+++ b/gmqcc.h
@@ -1004,7 +1004,6 @@ qcint             prog_tempstring(qc_program *prog, const char *_str);
 /*===================== parser.c commandline ========================*/
 /*===================================================================*/
 struct parser_s;
-
 struct parser_s *parser_create        ();
 bool             parser_compile_file  (struct parser_s *parser, const char *);
 bool             parser_compile_string(struct parser_s *parser, const char *, const char *, size_t);
@@ -1024,6 +1023,20 @@ void            ftepp_flush            (struct ftepp_s *ftepp);
 void            ftepp_add_define       (struct ftepp_s *ftepp, const char *source, const char *name);
 void            ftepp_add_macro        (struct ftepp_s *ftepp, const char *name,   const char *value);
 
+/*===================================================================*/
+/*============================= exec.c ==============================*/
+/*===================================================================*/
+enum {
+    DIAGNOSTIC_NULL,
+    DIAGNOSTIC_SEMICOLON,      /* Where semicolon requires from next line */
+    DIAGNOSTIC_SEMICOLON_SAME, /* Where semicolon required from same line */
+    DIAGNOSTIC_ASSIGNMENT
+};
+
+void diagnostic_destroy();
+void diagnostic_calculate(const char *file, size_t line, size_t diagnostic);
+
+
 /*===================================================================*/
 /*======================= main.c commandline ========================*/
 /*===================================================================*/
diff --git a/lexer.c b/lexer.c
index a4d3728..30ffba3 100644
--- a/lexer.c
+++ b/lexer.c
@@ -174,9 +174,10 @@ static void lex_token_new(lex_file *lex)
 #else
     if (lex->tok.value)
         vec_shrinkto(lex->tok.value, 0);
-    lex->tok.constval.t  = 0;
-    lex->tok.ctx.line = lex->sline;
-    lex->tok.ctx.file = lex->name;
+        
+    lex->tok.constval.t = 0;
+    lex->tok.ctx.line   = lex->sline;
+    lex->tok.ctx.file   = lex->name;
 #endif
 }
 #endif
diff --git a/parser.c b/parser.c
index 7918f0f..7a9e5c9 100644
--- a/parser.c
+++ b/parser.c
@@ -31,7 +31,6 @@
 
 /* beginning of locals */
 #define PARSER_HT_LOCALS  2
-
 #define PARSER_HT_SIZE    128
 #define TYPEDEF_HT_SIZE   16
 
@@ -103,6 +102,9 @@ typedef struct parser_s {
 
     /* collected information */
     size_t     max_param_count;
+    
+    /* diagnostic */
+    size_t     diagnostic;
 
     /* code generator */
     code_t     *code;
@@ -127,173 +129,6 @@ static ast_value* parser_create_array_getter_proto(parser_t *parser, ast_value *
 static ast_value *parse_typename(parser_t *parser, ast_value **storebase, ast_value *cached_typedef);
 
 
-/* map<string, vector<char>> */
-ht        diagnostic_table = NULL;
-char    **diagnostic_index = NULL;
-uint32_t  diagnostic_item  = 0;
-
-static void diagnostic_line(parser_t *parser, char ***read, size_t beg, size_t end) {
-    char **lines = NULL;
-    size_t feed  = 0;
-
-    if (!diagnostic_table)
-         diagnostic_table = util_htnew(1024);
-
-    if (!(lines = (char**)util_htget(diagnostic_table, parser->lex->name))) {
-        char  *data  = NULL;
-        size_t size  = 0;
-        FILE *handle = fs_file_open(parser->lex->name, "r");
-
-        while (fs_file_getline(&data, &size, handle) != EOF) {
-            /* claim memory for string */
-            char *claim = util_strdup(data);
-
-            vec_push(lines, claim);
-        }
-        mem_d(data);
-        
-        util_htset(diagnostic_table, parser->lex->name, lines);
-        vec_push(diagnostic_index, parser->lex->name);
-        fs_file_close(handle);
-    }
-
-    /* store the lines request back to the vector */
-    if (parser->lex->line - beg + end > vec_size(lines)) {
-        beg = 1;
-        end = 1;
-    }
-
-    for(feed = parser->lex->line - beg; feed < parser->lex->line - beg + end; ++feed)
-        vec_push((*read), lines[feed]);
-}
-
-static void diagnostic_feed(parser_t *parser, size_t beg, size_t end, bool marker) {
-    lex_file *lexer = NULL;
-    char    **read  = NULL;
-    char     *peek  = NULL;
-    char     *find  = parser->lex->tok.value;
-    size_t    feed  = 0;
-    size_t    space = 0;
-    size_t    len   = strlen(find);
-    int       tok   = 0;
-
-    diagnostic_line(parser, &read, beg, end);
-
-    for (; feed < vec_size(read); feed++) {
-        lexer = lex_open_string(read[feed], strlen(read[feed]), parser->lex->name);
-        lexer->flags.preprocessing = true; /* enable whitespace */
-        lexer->flags.mergelines    = true;
-
-        con_out("% 4d| ", parser->lex->line - beg + feed + 1); 
-
-        /* fancy printing */
-        while ((tok = lex_do(lexer)) != TOKEN_EOF) {
-            switch (tok) {
-
-                case TOKEN_TYPENAME:
-                case TOKEN_KEYWORD:
-                    con_out("\033[1;33m%s\033[0m", lexer->tok.value);
-                    break;
-
-                case TOKEN_INTCONST:
-                case TOKEN_FLOATCONST:
-                    con_out("\033[1;32m%s\033[0m", lexer->tok.value);
-                    break;
-
-                case TOKEN_CHARCONST:
-                case TOKEN_STRINGCONST:
-                    con_out("\033[1;31m%s\033[0m", lexer->tok.value);
-                    break;
-
-                case TOKEN_EOF:
-                case TOKEN_ERROR:
-                case TOKEN_EOL:
-                    /* ignore */
-                    break;
-
-                default:
-                    con_out("%s", lexer->tok.value);
-                    break;
-            };
-        }
-        lex_close(lexer);
-        con_out("\n");
-    }
-
-    /* MOTHER FUCKING HACK! */
-    /* MOTHER FUCKING HACK! */
-    if (!strcmp(find, "SEMICOLON")) {
-        space = 0;
-        len = 0;
-        while (vec_last(read)[space] != '=')
-            space++;
-        space++;
-        while (vec_last(read)[space] == ' ')
-            space++;
-
-        while (vec_last(read)[space + len] != '\n')
-            len++;
-
-        space += 6;
-    } else {
-        /* find it in the last line */
-        if ((peek = strstr(vec_last(read), find))) {
-            space = peek - vec_last(read) + 6; /*% 4d|*/
-        }
-    }
-
-    while (space --) con_out(" ");
-    while (len   --) con_out("~");
-
-    con_out((marker) ? "^\n" : "\n"); /* marker */
-
-    /* yes we allocate a whole vector each subsection read */
-    vec_free(read);
-}
-
-static void diagnostic_destroy() {
-    char **lines = NULL;
-    size_t index = 0;
-    size_t item  = 0;
-
-    /*
-     * TODO: traverse the hash table and free from the buckets. Or even
-     * better implement an 'iterator' system for it to enumerate items.
-     * we currently store a vector of strings as "keys" into the hashtable
-     * such that we can erase all allocated data. This is such a waste of
-     * space.
-     */
-    if (!diagnostic_index || !diagnostic_table)
-        return;
-
-    for (; index < vec_size(diagnostic_index); index++) {
-        lines = (char**)util_htget(diagnostic_table, diagnostic_index[index]);
-        for (item = 0; item < vec_size(lines); item++) {
-            mem_d(lines[item]);
-        }
-        vec_free(lines);
-    }
-
-    util_htdel(diagnostic_table);
-    vec_free  (diagnostic_index);
-}
-
-static void diagnostic_calculate(parser_t *parser, const char *fmt) {
-    size_t linebeg = 1;
-    size_t linecnt = 1;
-    bool   marker  = false;
-
-    if (strstr(fmt, "missing semicolon"))
-        linebeg++, marker = true;
-    /*
-     * special linebeg/ linecnt offset calculations can be done
-     * here.
-     */  
-
-    diagnostic_feed(parser, linebeg, linecnt, marker);
-    parser->lex->tok.value = NULL; /* MOTHER FUCKING HACK! */
-}
-
 static void parseerror(parser_t *parser, const char *fmt, ...)
 {
     va_list  ap;
@@ -304,9 +139,7 @@ static void parseerror(parser_t *parser, const char *fmt, ...)
     vcompile_error(parser->lex->tok.ctx, fmt, ap);
     va_end(ap);
 
-    /* only print when not bailing out */
-    if (!strstr(fmt, "bailing out"))
-        diagnostic_calculate(parser, fmt);
+    diagnostic_calculate(parser->lex->name, parser->lex->line, parser->diagnostic);
 }
 
 /* returns true if it counts as an error */
@@ -317,6 +150,8 @@ static bool GMQCC_WARN parsewarning(parser_t *parser, int warntype, const char *
     va_start(ap, fmt);
     r = vcompile_warning(parser->lex->tok.ctx, warntype, fmt, ap);
     va_end(ap);
+    
+    diagnostic_calculate(parser->lex->name, parser->lex->line, parser->diagnostic);
     return r;
 }
 
@@ -1411,11 +1246,14 @@ static bool parser_sy_apply_operator(parser_t *parser, shunt *sy)
                         field->next->vtype == TYPE_FUNCTION &&
                         exprs[1]->vtype == TYPE_FUNCTION)
                     {
+                        parser->diagnostic = DIAGNOSTIC_ASSIGNMENT;
                         (void)!compile_warning(ctx, WARN_ASSIGN_FUNCTION_TYPES,
                                                "invalid types in assignment: cannot assign %s to %s", ty2, ty1);
                     }
-                    else
+                    else {
+                        parser->diagnostic = DIAGNOSTIC_ASSIGNMENT;
                         compile_error(ctx, "invalid types in assignment: cannot assign %s to %s", ty2, ty1);
+                    }
                 }
             }
             else
@@ -3130,10 +2968,12 @@ static bool parse_return(parser_t *parser, ast_block *block, ast_expression **ou
             return false;
         }
 
-        if (parser->tok != ';')
+        if (parser->tok != ';') {
+            parser->diagnostic = DIAGNOSTIC_SEMICOLON;
             parseerror(parser, "missing semicolon after return assignment");
-        else if (!parser_next(parser))
+        } else if (!parser_next(parser)) {
             parseerror(parser, "parse error after return assignment");
+        }
 
         *out = var;
         return true;
@@ -5866,9 +5706,10 @@ skipvar:
         if (parser->tok != '{' || var->expression.vtype != TYPE_FUNCTION) {
             if (parser->tok != '=') {
                 const char *obtain = parser_tokval(parser);
-                if (!strcmp(obtain, "}"))
-                    parseerror(parser, "missing semicolon");
-                else
+                if (!strcmp(obtain, "}")) {
+                    parser->diagnostic = DIAGNOSTIC_SEMICOLON_SAME;
+                    parseerror(parser, "expected semicolon, got `%s`", obtain);
+                } else
                     parseerror(parser, "missing initializer");
                 break;
             }
@@ -6081,8 +5922,7 @@ another:
         }
 
         if (parser->tok != ';') {
-            /* MOTHER FUCKING HACK! */
-            parser->lex->tok.value = "SEMICOLON";
+            parser->diagnostic = DIAGNOSTIC_SEMICOLON;
             parseerror(parser, "missing semicolon after variables");
             break;
         }
@@ -6324,8 +6164,10 @@ static bool parser_compile(parser_t *parser)
             if (!parser_global_statement(parser)) {
                 if (parser->tok == TOKEN_EOF)
                     parseerror(parser, "unexpected eof");
-                else if (compile_errors)
+                else if (compile_errors) {
+                    parser->diagnostic = DIAGNOSTIC_NULL;
                     parseerror(parser, "there have been errors, bailing out");
+                }
                 lex_close(parser->lex);
                 parser->lex = NULL;
                 return false;