From 9f1bda3797380704613e45db6273c5da69c03f7f Mon Sep 17 00:00:00 2001
From: Dale Weiler <killfieldengine@gmail.com>
Date: Tue, 16 Apr 2013 08:11:22 +0000
Subject: [PATCH] Work in progress clang-like diagnostics (lots of work to be
 done yet)

---
 intrin.h |   2 +-
 lexer.c  |   3 +-
 lexer.h  |   2 +-
 main.c   |   6 +-
 parser.c | 164 ++++++++++++++++++++++++++++++++++++++++++++++++++++++-
 util.c   |   4 +-
 6 files changed, 173 insertions(+), 8 deletions(-)
diff --git a/intrin.h b/intrin.h
index 69490dc..09ae38c 100644
--- a/intrin.h
+++ b/intrin.h
@@ -413,6 +413,6 @@ ast_expression *intrin_func(parser_t *parser, const char *name) {
         return ((intrin_t*)find)->intrin(parser);
     }
 
-    parseerror(parser, "need function: `%s` compiler depends on it", name);
+    /*parseerror(parser, "need function: `%s` compiler depends on it", name);*/
     return NULL;
 }
diff --git a/lexer.c b/lexer.c
index 6f9def8..a6a0f04 100644
--- a/lexer.c
+++ b/lexer.c
@@ -271,8 +271,9 @@ void lex_close(lex_file *lex)
 
 static int lex_fgetc(lex_file *lex)
 {
-    if (lex->file)
+    if (lex->file) {
         return fs_file_getc(lex->file);
+    }
     if (lex->open_string) {
         if (lex->open_string_pos >= lex->open_string_length)
             return EOF;
diff --git a/lexer.h b/lexer.h
index 9f080dc..811c698 100644
--- a/lexer.h
+++ b/lexer.h
@@ -102,7 +102,7 @@ typedef struct {
 } frame_macro;
 
 typedef struct lex_file_s {
-    FILE   *file;
+    FILE       *file;
     const char *open_string;
     size_t      open_string_length;
     size_t      open_string_pos;
diff --git a/main.c b/main.c
index 9da3769..158d818 100644
--- a/main.c
+++ b/main.c
@@ -780,7 +780,11 @@ srcdone:
             }
         }
     }
-
+{
+    #define FOO "\xD\xE\xA\xD\xB\xE\xE\xF\xD\xE\xA\xD\xB\xE\xE\xF"
+    char *foo = util_strdup(FOO FOO FOO FOO "hello world!");
+    (void)foo;
+}
 cleanup:
     util_debug("COM", "cleaning ...\n");
     ftepp_finish();
diff --git a/parser.c b/parser.c
index 088cbff..9810513 100644
--- a/parser.c
+++ b/parser.c
@@ -121,12 +121,166 @@ static ast_value* parser_create_array_setter_proto(parser_t *parser, ast_value *
 static ast_value* parser_create_array_getter_proto(parser_t *parser, ast_value *array, const ast_expression *elemtype, const char *funcname);
 static ast_value *parse_typename(parser_t *parser, ast_value **storebase, ast_value *cached_typedef);
 
+
+/* map<string, vector<char>> */
+ht        diagnostic_table = NULL;
+char    **diagnostic_index = NULL;
+uint32_t  diagnostic_item  = 0;
+
+static void diagnostic_line(parser_t *parser, char ***read, size_t beg, size_t end) {
+    char **lines = NULL;
+    size_t feed  = 0;
+
+    if (!diagnostic_table)
+         diagnostic_table = util_htnew(1024);
+
+    if (!(lines = (char**)util_htget(diagnostic_table, parser->lex->name))) {
+        char  *data  = NULL;
+        size_t size  = 0;
+        FILE *handle = fs_file_open(parser->lex->name, "r");
+
+        while (fs_file_getline(&data, &size, handle) != EOF) {
+            /* claim memory for string */
+            char *claim = util_strdup(data);
+
+            vec_push(lines, claim);
+        }
+        mem_d(data);
+        
+        util_htset(diagnostic_table, parser->lex->name, lines);
+        vec_push(diagnostic_index, parser->lex->name);
+        fs_file_close(handle);
+    }
+
+    /* store the lines request back to the vector */
+    if (parser->lex->line - beg + end > vec_size(lines)) {
+        beg = 1;
+        end = 1;
+    }
+
+    for(feed = parser->lex->line - beg; feed < parser->lex->line - beg + end; ++feed)
+        vec_push((*read), lines[feed]);
+}
+
+static void diagnostic_feed(parser_t *parser, size_t beg, size_t end, bool marker) {
+    lex_file *lexer = NULL;
+    char    **read  = NULL;
+    char     *peek  = NULL;
+    char     *find  = parser->lex->tok.value;
+    size_t    feed  = 0;
+    size_t    space = 0;
+    size_t    len   = strlen(find);
+    int       tok   = 0;
+
+    diagnostic_line(parser, &read, beg, end);
+
+    for (; feed < vec_size(read); feed++) {
+        lexer = lex_open_string(read[feed], strlen(read[feed]), parser->lex->name);
+        lexer->flags.preprocessing = true; /* enable whitespace */
+        lexer->flags.mergelines    = true;
+
+        con_out("% 4d| ", parser->lex->line - beg + feed + 1); 
+
+        /* fancy printing */
+        while ((tok = lex_do(lexer)) != TOKEN_EOF) {
+            switch (tok) {
+
+                case TOKEN_TYPENAME:
+                case TOKEN_KEYWORD:
+                    con_out("\033[1;33m%s\033[0m", lexer->tok.value);
+                    break;
+
+                case TOKEN_INTCONST:
+                case TOKEN_FLOATCONST:
+                    con_out("\033[1;32m%s\033[0m", lexer->tok.value);
+                    break;
+
+                case TOKEN_CHARCONST:
+                case TOKEN_STRINGCONST:
+                    con_out("\033[1;31m%s\033[0m", lexer->tok.value);
+                    break;
+
+                case TOKEN_EOF:
+                case TOKEN_ERROR:
+                case TOKEN_EOL:
+                    /* ignore */
+                    break;
+
+                default:
+                    con_out("%s", lexer->tok.value);
+                    break;
+            };
+        }
+        lex_close(lexer);
+        con_out("\n");
+    }
+
+    /* find it in the last line */
+    if ((peek = strstr(vec_last(read), find))) {
+        space = peek - vec_last(read) + 6; /*% 4d|*/
+    }
+
+    while (space --) con_out(" ");
+    while (len   --) con_out("~");
+
+    con_out((marker) ? "^\n" : "\n"); /* marker */
+
+    /* yes we allocate a whole vector each subsection read */
+    vec_free(read);
+}
+
+static void diagnostic_destroy() {
+    char **lines = NULL;
+    size_t index = 0;
+    size_t item  = 0;
+
+    /*
+     * TODO: traverse the hash table and free from the buckets. Or even
+     * better implement an 'iterator' system for it to enumerate items.
+     * we currently store a vector of strings as "keys" into the hashtable
+     * such that we can erase all allocated data. This is such a waste of
+     * space.
+     */
+    if (!diagnostic_index || !diagnostic_table)
+        return;
+
+    for (; index < vec_size(diagnostic_index); index++) {
+        lines = (char**)util_htget(diagnostic_table, diagnostic_index[index]);
+        for (item = 0; item < vec_size(lines); item++) {
+            mem_d(lines[item]);
+        }
+        vec_free(lines);
+    }
+
+    util_htdel(diagnostic_table);
+    vec_free  (diagnostic_index);
+}
+
+static void diagnostic_calculate(parser_t *parser) {
+    size_t linebeg = 1;
+    size_t linecnt = 1;
+
+    /*
+     * special linebeg/ linecnt offset calculations can be done
+     * here.
+     */  
+
+    diagnostic_feed(parser, linebeg, linecnt, false);
+}
+
 static void parseerror(parser_t *parser, const char *fmt, ...)
 {
-    va_list ap;
+    va_list  ap;
     va_start(ap, fmt);
+
+
+    /*vcompile_error(parser->lex->tok.ctx, data);*/
     vcompile_error(parser->lex->tok.ctx, fmt, ap);
     va_end(ap);
+
+    /* only print when not bailing out */
+    if (!strstr(fmt, "bailing out"))
+        diagnostic_calculate(parser);
 }
 
 /* returns true if it counts as an error */
@@ -5588,7 +5742,11 @@ skipvar:
 
         if (parser->tok != '{') {
             if (parser->tok != '=') {
-                parseerror(parser, "missing semicolon or initializer, got: `%s`", parser_tokval(parser));
+                const char *obtain = parser_tokval(parser);
+                if (!strcmp(obtain, "}"))
+                    parseerror(parser, "missing semicolon");
+                else
+                    parseerror(parser, "missing initializer");
                 break;
             }
 
@@ -6282,6 +6440,8 @@ bool parser_finish(const char *output)
         }
     }
 
+    diagnostic_destroy();
+
     ir_builder_delete(ir);
     return retval;
 }
diff --git a/util.c b/util.c
index 92f8e5e..59be9bf 100644
--- a/util.c
+++ b/util.c
@@ -192,8 +192,8 @@ void util_meminfo() {
             Total peak memory:   %f (MB)\n\
             Total leaked memory: %f (MB) in %llu allocations\n",
                 mem_at,
-                (float)(mem_dt)           / 1048576.0f,
-                mem_ab,
+                mem_dt,
+                (float)(mem_ab)           / 1048576.0f,
                 (float)(mem_db)           / 1048576.0f,
                 (float)(mem_pk)           / 1048576.0f,
                 (float)(mem_ab -  mem_db) / 1048576.0f,
-- 
2.39.5