From: Dale Weiler Date: Tue, 17 Apr 2012 08:29:58 +0000 (-0400) Subject: Work in progress assembler, cleanups and more. X-Git-Tag: 0.1-rc1~662 X-Git-Url: https://git.rm.cloudns.org/?a=commitdiff_plain;h=6e3819a76c191291eb66807bd9c02614f15a1f38;p=xonotic%2Fgmqcc.git Work in progress assembler, cleanups and more. --- diff --git a/Makefile b/Makefile index 6ce342c..9d1faf5 100644 --- a/Makefile +++ b/Makefile @@ -1,12 +1,13 @@ CC = gcc CFLAGS += -Wall -OBJ = main.o \ - lex.o \ - error.o \ - parse.o \ - typedef.o \ - util.o \ - code.o +OBJ = main.o \ + lex.o \ + error.o \ + parse.o \ + typedef.o \ + util.o \ + code.o \ + assembler.c %.o: %.c $(CC) -c $< -o $@ $(CFLAGS) diff --git a/assembler.c b/assembler.c new file mode 100644 index 0000000..d7d16de --- /dev/null +++ b/assembler.c @@ -0,0 +1,233 @@ +/* + * Copyright (C) 2012 + * Dale Weiler + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is furnished to do + * so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "gmqcc.h" +/* + * This is the assembler, gmqas, this is being implemented because I'm + * not exactly sure how codegen would work for the C compiler as of yet + * and also I plan to allow inline assembly for the compiler. + */ +static const char *const asm_instr[] = { + [INSTR_DONE] = "DONE", + [INSTR_MUL_F] = "MUL_F", + [INSTR_MUL_V] = "MUL_V", + [INSTR_MUL_FV] = "MUL_FV", + [INSTR_MUL_VF] = "MUL_VF", + [INSTR_DIV_F] = "DIV", + [INSTR_ADD_F] = "ADD_F", + [INSTR_ADD_V] = "ADD_V", + [INSTR_SUB_F] = "SUB_F", + [INSTR_SUB_V] = "DUB_V", + [INSTR_EQ_F] = "EQ_F", + [INSTR_EQ_V] = "EQ_V", + [INSTR_EQ_S] = "EQ_S", + [INSTR_EQ_E] = "EQ_E", + [INSTR_EQ_FNC] = "ES_FNC", + [INSTR_NE_F] = "NE_F", + [INSTR_NE_V] = "NE_V", + [INSTR_NE_S] = "NE_S", + [INSTR_NE_E] = "NE_E", + [INSTR_NE_FNC] = "NE_FNC", + [INSTR_LE] = "LE", + [INSTR_GE] = "GE", + [INSTR_LT] = "LT", + [INSTR_GT] = "GT", + [INSTR_LOAD_F] = "FIELD_F", + [INSTR_LOAD_V] = "FIELD_V", + [INSTR_LOAD_S] = "FIELD_S", + [INSTR_LOAD_ENT] = "FIELD_ENT", + [INSTR_LOAD_FLD] = "FIELD_FLD", + [INSTR_LOAD_FNC] = "FIELD_FNC", + [INSTR_ADDRESS] = "ADDRESS", + [INSTR_STORE_F] = "STORE_F", + [INSTR_STORE_V] = "STORE_V", + [INSTR_STORE_S] = "STORE_S", + [INSTR_STORE_ENT] = "STORE_ENT", + [INSTR_STORE_FLD] = "STORE_FLD", + [INSTR_STORE_FNC] = "STORE_FNC", + [INSTR_STOREP_F] = "STOREP_F", + [INSTR_STOREP_V] = "STOREP_V", + [INSTR_STOREP_S] = "STOREP_S", + [INSTR_STOREP_ENT] = "STOREP_ENT", + [INSTR_STOREP_FLD] = "STOREP_FLD", + [INSTR_STOREP_FNC] = "STOREP_FNC", + [INSTR_RETURN] = "RETURN", + [INSTR_NOT_F] = "NOT_F", + [INSTR_NOT_V] = "NOT_V", + [INSTR_NOT_S] = "NOT_S", + [INSTR_NOT_ENT] = "NOT_ENT", + [INSTR_NOT_FNC] = "NOT_FNC", + [INSTR_IF] = "IF", + [INSTR_IFNOT] = "IFNOT", + [INSTR_CALL0] = "CALL0", + [INSTR_CALL1] = "CALL1", + [INSTR_CALL2] = "CALL2", + [INSTR_CALL3] = "CALL3", + [INSTR_CALL4] = "CALL4", + [INSTR_CALL5] = "CALL5", + [INSTR_CALL6] = "CALL6", + [INSTR_CALL7] = "CALL7", + [INSTR_CALL8] = "CALL8", + [INSTR_STATE] = "STATE", + [INSTR_GOTO] = "GOTO", + [INSTR_AND] = "AND", + [INSTR_OR] = "OR", + [INSTR_BITAND] = "AND", + [INSTR_BITOR] = "OR" +}; + +/* + * Some assembler keywords not part of the opcodes above: these are + * for creating functions, or constants. + */ +const char *const asm_keys[] = { + "FLOAT" , /* define float */ + "VECTOR" , /* define vector */ + "ENTITY" , /* define ent */ + "FIELD" , /* define field */ + "STRING" , /* define string */ + "FUNCTION" +}; + +static char *const asm_getline(size_t *byte, FILE *fp) { + char *line = NULL; + ssize_t read = getline(&line, byte, fp); + *byte = read; + if (read == -1) { + free (line); + //exit (1); + return NULL; + } + return line; +} + +#define asm_rmnewline(L,S) *((L)+*(S)-1) = '\0' +#define asm_skipwhite(L) \ + while((*(L)==' '||*(L)=='\t')) { \ + (L)++; \ + } + +void asm_init(const char *file, FILE **fp) { + *fp = fopen(file, "r"); + code_init(); +} + +void asm_close(FILE *fp) { + fclose(fp); + code_write(); +} + +void asm_parse(FILE *fp) { + char *data = NULL; + char *skip = NULL; + long line = 1; /* current line */ + size_t size = 0; /* size of line */ + + while ((data = asm_getline(&size, fp)) != NULL) { + skip = data; + asm_skipwhite(skip); + asm_rmnewline(skip, &size); + + #define DECLTYPE(X, CODE) \ + if (!strncmp(X, skip, strlen(X))) { \ + if (skip[strlen(X)] != ':') { \ + printf("%li: Missing `:` after decltype\n",line); \ + exit (1); \ + } \ + skip += strlen(X)+1; \ + asm_skipwhite(skip); \ + if(!isalpha(*skip)) { \ + printf("%li: Invalid identififer: %s\n", line, skip); \ + exit (1); \ + } else { \ + size_t offset_code = code_statements_elements+1; \ + size_t offset_chars = code_strings_elements +1; \ + size_t offset_globals = code_globals_elements +1; \ + size_t offset_functions = code_functions_elements +1; \ + size_t offset_fields = code_fields_elements +1; \ + size_t offset_defs = code_defs_elements +1; \ + CODE \ + /* silent unused warnings */ \ + (void)offset_code; \ + (void)offset_chars; \ + (void)offset_globals; \ + (void)offset_functions; \ + (void)offset_fields; \ + (void)offset_defs; \ + } \ + goto end; \ + } + + /* FLOAT */ + DECLTYPE(asm_keys[0], { + code_defs_add((prog_section_def){ + .type = TYPE_FLOAT, + .offset = offset_globals, /* global table */ + .name = offset_chars /* string table TODO */ + }); + float f = 0; /*TODO*/ + code_globals_add(*(int*)&f); + + }); + DECLTYPE(asm_keys[1], { + code_defs_add((prog_section_def){ + .type = TYPE_FLOAT, + .offset = offset_globals, /* global table */ + .name = offset_chars /* string table TODO */ + }); + float f1 = 0; + float f2 = 0; + float f3 = 0; + code_globals_add(*(int*)&f1); + code_globals_add(*(int*)&f2); + code_globals_add(*(int*)&f3); + }); + /* ENTITY */ DECLTYPE(asm_keys[2], {}); + /* FIELD */ DECLTYPE(asm_keys[3], {}); + /* STRING */ + DECLTYPE(asm_keys[4], { + code_defs_add((prog_section_def){ + .type = TYPE_STRING, + .offset = offset_globals, /* offset to offset in string table (for data)*/ + .name = offset_chars /* location of name in string table (for name)*/ + }); + }); + /* FUNCTION */ DECLTYPE(asm_keys[5], {}); + + /* if we make it this far then we have statements */ + { + size_t i = 0; + for (; i < sizeof(asm_instr)/sizeof(*asm_instr); i++) { + if (!strncmp(skip, asm_instr[i], strlen(asm_instr[i]))) { + /* TODO */ + goto end; + } + } + } + + /* if we made it this far something is wrong */ + printf("ERROR"); + + end: + free(data); + } +} diff --git a/code.c b/code.c index c0d4ea4..c2ae9c8 100644 --- a/code.c +++ b/code.c @@ -20,75 +20,8 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include -#include #include "gmqcc.h" -typedef struct { - uint16_t opcode; - - /* operand 1 */ - union { - int16_t s1; /* signed */ - uint16_t u1; /* unsigned */ - }; - /* operand 2 */ - union { - int16_t s2; /* signed */ - uint16_t u2; /* unsigned */ - }; - /* operand 3 */ - union { - int16_t s3; /* signed */ - uint16_t u3; /* unsigned */ - }; - - /* - * This is the same as the structure in darkplaces - * { - * unsigned short op; - * short a,b,c; - * } - * But this one is more sane to work with, and the - * type sizes are guranteed. - */ -} prog_section_statement; - -typedef struct { - /* The type is (I assume) - * 0 = ev_void - * 1 = ev_string - * 2 = ev_float - * 3 = ev_vector - * 4 = ev_entity - * 5 = ev_field - * 6 = ev_function - * 7 = ev_pointer - * 8 = ev_bad (is this right for uint16_t type?) - */ - uint16_t type; - uint16_t offset; /* offset in file? (what about length) */ - uint32_t name; /* offset in string table? (confused :() */ -} prog_section_both; - -/* - * var and field use the same structure. But lets not use the same - * name just for safety reasons? (still castable ...). - */ -typedef prog_section_both prog_section_def; -typedef prog_section_both prog_section_field; - -typedef struct { - int32_t entry; /* in statement table for instructions */ - uint32_t firstlocal; /* First local in local table */ - uint32_t locals; /* Total ints of params + locals */ - uint32_t profile; /* Always zero (engine uses this) */ - uint32_t name; /* name of function in string table */ - uint32_t file; /* file of the source file */ - uint32_t nargs; /* number of arguments */ - uint8_t argsize[8]; /* size of arguments (keep 8 always?) */ -} prog_section_function; - typedef struct { uint32_t offset; /* Offset in file of where data begins */ uint32_t length; /* Length of section (how many of) */ @@ -146,7 +79,6 @@ VECTOR_MAKE(prog_section_field, code_fields ); VECTOR_MAKE(prog_section_function, code_functions ); VECTOR_MAKE(int, code_globals ); VECTOR_MAKE(char, code_strings ); -prog_header code_header ={0}; void code_init() { /* @@ -205,11 +137,8 @@ void code_test() { code_statements_add((prog_section_statement){INSTR_RETURN, {0}, {0}, {0}}); } -/* program header */ void code_write() { - code_init(); - code_test(); - + prog_header code_header={0}; code_header.version = 6; code_header.crc16 = 0; /* TODO: */ code_header.statements = (prog_section){sizeof(prog_header), code_statements_elements }; @@ -236,5 +165,50 @@ void code_write() { free(code_globals_data); free(code_strings_data); + util_debug("CODE","wrote program.dat\n\ + version: = %d\n\ + crc16: = %d\n\ + statements {\n\ + .offset = %d\n\ + .length = %d\n\ + }\n\ + defs {\n\ + .offset = %d\n\ + .length = %d\n\ + }\n\ + fields {\n\ + .offset = %d\n\ + .length = %d\n\ + }\n\ + functions {\n\ + .offset = %d\n\ + .length = %d\n\ + }\n\ + globals {\n\ + .offset = %d\n\ + .length = %d\n\ + }\n\ + strings {\n\ + .offset = %d\n\ + .length = %d\n\ + }\n\ + entfield: = %d\n", + code_header.version, + code_header.crc16, + code_header.statements.offset, + code_header.statements.length, + code_header.defs.offset, + code_header.defs.length, + code_header.fields.offset, + code_header.fields.length, + code_header.functions.offset, + code_header.functions.length, + code_header.strings.offset, + code_header.strings.length, + code_header.globals.offset, + code_header.globals.length, + code_header.entfield + ); + fclose(fp); } diff --git a/error.c b/error.c index 3f7cfb5..c0df207 100644 --- a/error.c +++ b/error.c @@ -21,9 +21,6 @@ * SOFTWARE. */ #include -#include -#include -#include #include "gmqcc.h" /* diff --git a/gmqcc.h b/gmqcc.h index 29949f0..e5d1a56 100644 --- a/gmqcc.h +++ b/gmqcc.h @@ -22,7 +22,52 @@ */ #ifndef GMQCC_HDR #define GMQCC_HDR +#include +#include +#include #include +#include +/* + * stdint.h and inttypes.h -less subset + * for systems that don't have it, which we must + * assume is all systems. (int8_t not required) + */ +#if CHAR_MIN == -128 + typedef unsigned char uint8_t; /* same as below */ +#elif SCHAR_MIN == -128 + typedef unsigned char uint8_t; /* same as above */ +#endif +#if SHRT_MAX == 0x7FFF + typedef short int16_t; + typedef unsigned short uint16_t; +#elif INT_MAX == 0x7FFF + typedef int int16_t; + typedef unsigned int uint16_t; +#endif +#if INT_MAX == 0x7FFFFFFF + typedef int int32_t; + typedef unsigned int uint32_t; +#elif LONG_MAX == 0x7FFFFFFF + typedef long int32_t; + typedef unsigned long uint32_t; +#endif +#ifdef _LP64 /* long pointer == 64 */ + typedef unsigned long uintptr_t; + typedef long intptr_t; +#else + typedef unsigned int uintptr_t; + typedef int intptr_t; +#endif +/* Ensure type sizes are correct: */ +typedef char uint8_size_is_correct [sizeof(uint8_t) == 1?1:-1]; +typedef char uint16_size_if_correct [sizeof(uint16_t) == 2?1:-1]; +typedef char uint32_size_is_correct [sizeof(uint32_t) == 4?1:-1]; +typedef char int8_size_is_correct [sizeof(int8_t) == 1?1:-1]; +typedef char int16_size_if_correct [sizeof(int16_t) == 2?1:-1]; +typedef char int32_size_is_correct [sizeof(int32_t) == 4?1:-1]; +/* intptr_t / uintptr_t correct size check */ +typedef char uintptr_size_is_correct[sizeof(intptr_t) == sizeof(int*)?1:-1]; +typedef char uintptr_size_is_correct[sizeof(uintptr_t)== sizeof(int*)?1:-1]; //=================================================================== //============================ lex.c ================================ @@ -33,10 +78,8 @@ struct lex_file { char peek [5]; char lastok[8192]; - int last; /* last token */ int current; /* current token */ - int length; /* bytes left to parse */ int size; /* never changes (size of file) */ int line; /* what line are we on? */ @@ -176,6 +219,66 @@ void util_debug (const char *, const char *, ...); #define OFS_PARM6 (OFS_PARM5 +3) #define OFS_PARM7 (OFS_PARM6 +3) +typedef struct { + uint16_t opcode; + + /* operand 1 */ + union { + int16_t s1; /* signed */ + uint16_t u1; /* unsigned */ + }; + /* operand 2 */ + union { + int16_t s2; /* signed */ + uint16_t u2; /* unsigned */ + }; + /* operand 3 */ + union { + int16_t s3; /* signed */ + uint16_t u3; /* unsigned */ + }; + + /* + * This is the same as the structure in darkplaces + * { + * unsigned short op; + * short a,b,c; + * } + * But this one is more sane to work with, and the + * type sizes are guranteed. + */ +} prog_section_statement; + +typedef struct { + /* The types: + * 0 = ev_void + * 1 = ev_string + * 2 = ev_float + * 3 = ev_vector + * 4 = ev_entity + * 5 = ev_field + * 6 = ev_function + * 7 = ev_pointer -- engine only + * 8 = ev_bad -- engine only + */ + uint16_t type; + uint16_t offset; + uint32_t name; +} prog_section_both; +typedef prog_section_both prog_section_def; +typedef prog_section_both prog_section_field; + +typedef struct { + int32_t entry; /* in statement table for instructions */ + uint32_t firstlocal; /* First local in local table */ + uint32_t locals; /* Total ints of params + locals */ + uint32_t profile; /* Always zero (engine uses this) */ + uint32_t name; /* name of function in string table */ + uint32_t file; /* file of the source file */ + uint32_t nargs; /* number of arguments */ + uint8_t argsize[8]; /* size of arguments (keep 8 always?) */ +} prog_section_function; + /* * Instructions * These are the external instructions supported by the interperter @@ -192,47 +295,39 @@ enum { INSTR_ADD_V, INSTR_SUB_F, INSTR_SUB_V, - INSTR_EQ_F, INSTR_EQ_V, INSTR_EQ_S, INSTR_EQ_E, INSTR_EQ_FNC, - INSTR_NE_F, INSTR_NE_V, INSTR_NE_S, INSTR_NE_E, INSTR_NE_FNC, - INSTR_LE, INSTR_GE, INSTR_LT, INSTR_GT, - INSTR_LOAD_F, INSTR_LOAD_V, INSTR_LOAD_S, INSTR_LOAD_ENT, INSTR_LOAD_FLD, INSTR_LOAD_FNC, - INSTR_ADDRESS, - INSTR_STORE_F, INSTR_STORE_V, INSTR_STORE_S, INSTR_STORE_ENT, INSTR_STORE_FLD, INSTR_STORE_FNC, - INSTR_STOREP_F, INSTR_STOREP_V, INSTR_STOREP_S, INSTR_STOREP_ENT, INSTR_STOREP_FLD, INSTR_STOREP_FNC, - INSTR_RETURN, INSTR_NOT_F, INSTR_NOT_V, @@ -254,10 +349,45 @@ enum { INSTR_GOTO, INSTR_AND, INSTR_OR, - INSTR_BITAND, INSTR_BITOR }; -void code_write(); +/* + * The symbols below are created by the following + * expanded macros: + * + * VECTOR_MAKE(prog_section_statement, code_statements); + * VECTOR_MAKE(prog_section_def, code_defs ); + * VECTOR_MAKE(prog_section_field, code_fields ); + * VECTOR_MAKE(prog_section_function, code_functions ); + * VECTOR_MAKE(int, code_globals ); + * VECTOR_MAKE(char, code_strings ); + */ +int code_statements_add(prog_section_statement); +int code_defs_add (prog_section_def); +int code_fields_add (prog_section_field); +int code_functions_add(prog_section_function); +int code_globals_add (int); +int code_strings_add (char); +extern long code_statements_elements; +extern long code_strings_elements; +extern long code_globals_elements; +extern long code_functions_elements; +extern long code_fields_elements; +extern long code_defs_elements; + +/* + * code_write -- writes out the compiled file + * code_init -- prepares the code file + */ +void code_write (); +void code_init (); + +//=================================================================== +//========================= assembler.c ============================= +//=================================================================== +void asm_init (const char *, FILE **); +void asm_close(FILE *); +void asm_parse(FILE *); #endif diff --git a/info.txt b/info.txt deleted file mode 100644 index 8916a97..0000000 --- a/info.txt +++ /dev/null @@ -1,12 +0,0 @@ -there are 3 accessible memory zones - -globals: - array of 32bit ints/floats, mixed, LE, -entities: - structure is up to the engine but the fields are a linear array - of mixed ints/floats, there are globals referring to the offsets - of these in the entity struct so there are ADDRESS and STOREP and - LOAD instructions that use globals containing field offsets. -strings: - a static array in the progs.dat, with file parsing creating - additional constants, and some engine fields are mapped by - address as well to unique string offsets diff --git a/lex.c b/lex.c index bf24f18..5673701 100644 --- a/lex.c +++ b/lex.c @@ -20,11 +20,6 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include -#include -#include -#include -#include #include "gmqcc.h" /* diff --git a/main.c b/main.c index 89ccec2..d087c4e 100644 --- a/main.c +++ b/main.c @@ -20,33 +20,18 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include -#include -#include -#include -#include "gmqcc.h" +#include "gmqcc.h" int main(int argc, char **argv) { argc--; argv++; - const char *ifile = argv[0]; + //const char *ifile = argv[0]; + FILE *fp; - FILE *fp = fopen(ifile, "r"); - if (!fp) { - fclose(fp); - return printf("ERROR Source file %s %s\n", ifile, strerror(errno)); - } else { - struct lex_file *lex = lex_open(fp); - lex->name = util_strdup(ifile); - if (!lex) { - fclose(fp); - return 0; - } - parse_gen(lex); - mem_d(lex->name); - lex_close(lex); - } + /*TODO: proper interface swith switches*/ - code_write(); + asm_init ("test.qs", &fp); + asm_parse(fp); + asm_close(fp); return 0; } diff --git a/parse.c b/parse.c index a218e63..e763e56 100644 --- a/parse.c +++ b/parse.c @@ -20,10 +20,6 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include -#include -#include -#include #include "gmqcc.h" /* compile-time constant for type constants */ diff --git a/typedef.c b/typedef.c index 6c10d53..0118861 100644 --- a/typedef.c +++ b/typedef.c @@ -20,9 +20,6 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include -#include /* replace if stdint.h doesn't exist! */ -#include #include "gmqcc.h" static typedef_node *typedef_table[1024]; diff --git a/util.c b/util.c index b6264cc..6e02fe7 100644 --- a/util.c +++ b/util.c @@ -20,9 +20,6 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include -#include -#include #include #include "gmqcc.h"