From: Dale Weiler Date: Mon, 9 Apr 2012 10:42:06 +0000 (-0400) Subject: initial commit X-Git-Tag: 0.1-rc1~716 X-Git-Url: https://git.rm.cloudns.org/?a=commitdiff_plain;h=48a95ec3c9a4a8601f97c8ac1adbd7d94ba15465;p=xonotic%2Fgmqcc.git initial commit --- 48a95ec3c9a4a8601f97c8ac1adbd7d94ba15465 diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..ae46f26 --- /dev/null +++ b/Makefile @@ -0,0 +1,12 @@ +CC = gcc +CFLAGS = -O3 -Wall +OBJ = main.o lex.o error.o parse.o + +%.o: %.c + $(CC) -c -o $@ $< $(CFLAGS) + +gmqcc: $(OBJ) + $(CC) -o $@ $^ $(CFLAGS) + +clean: + rm -f *.o dpqcc diff --git a/README b/README new file mode 100644 index 0000000..d069def --- /dev/null +++ b/README @@ -0,0 +1,41 @@ +This is my work in progress C compiler. There are very few _good_ qc +compilers out there on the internet that can be used in the opensource +community. There are a lot of mediocre compilers, but no one wants those. +This is the solution for that, for once a proper quake c compiler that is +capable of doing proper optimization. The design so far of this compiler +is basic, because it doesn't actually compile code yet. + +gmqcc.h + This is the common header with all definitions, structures, and + constants for everything. + +error.c + This is the error subsystem, this handles the output of good detailed + error messages (not currently, but will), with colors and such. + +lex.c + This is the lexer, a very small basic step-seek lexer that can be easily + changed to add new tokens, very retargetable. + +main.c + This is the core compiler entry, handles switches (will) to toggle on + and off certian compiler features. + +parse.c + This is the parser which goes over all tokens and generates a parse tree + (not currently, but will) and check for syntax correctness. + +README + This is the file you're currently reading + +Makefile + The makefile, when sources are added you should add them to the SRC= + line otherwise the build will not pick it up. Trivial stuff, small + easy to manage makefile, no need to complicate it. + Some targets: + #make gmqcc + Builds gmqcc, creating a gmqcc binary file in the current + directory as the makefile. + + #make clean + Cleans the build files left behind by a previous build diff --git a/error.c b/error.c new file mode 100644 index 0000000..c220d72 --- /dev/null +++ b/error.c @@ -0,0 +1,87 @@ +/* + * Copyright (C) 2012 + * Dale Weiler + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is furnished to do + * so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include +#include +#include +#include + +/* + * Compiler error system, this handles the error printing, and managing + * such as after so many errors just stop the compilation, and other + * intereting like colors for the console. + */ +#ifndef WIN32 +# define CON_BLACK 30 +# define CON_RED 31 +# define CON_GREEN 32 +# define CON_BROWN 33 +# define CON_BLUE 34 +# define CON_MAGENTA 35 +# define CON_CYAN 36 +# define CON_WHITE 37 +static const int error_color[] = { + CON_RED, + CON_CYAN, + CON_MAGENTA +}; +#endif +int error_total = 0; +int error_max = 10; + +static const char *const error_list[] = { + "Parsing Error:", + "Lexing Error:", + "Internal Error:" +}; + +int error(int status, const char *msg, ...) { + char bu[1024*4]; /* enough? */ + char fu[1024*4]; /* enough? */ + va_list va; + + if (error_total + 1 > error_max) { + fprintf(stderr, "%d errors and more following, bailing\n", error_total); + exit (-1); + } + error_total ++; +/* color */ +# ifndef WIN32 + sprintf (bu, "\033[0;%dm%s \033[0;%dm", error_color[status-SHRT_MAX], error_list[status-SHRT_MAX], error_color[(status-1)-SHRT_MAX]); +#else + sprintf (bu, "%s ", error_list[status-SHRT_MAX]); +#endif + va_start (va, msg); + vsprintf (fu, msg, va); + va_end (va); + fputs (bu, stderr); + fputs (fu, stderr); + +/* color */ +# ifndef WIN32 + fputs ("\033[0m", stderr); +# endif + + fflush (stderr); + + return status; +} diff --git a/gmqcc b/gmqcc new file mode 100755 index 0000000..5ee6043 Binary files /dev/null and b/gmqcc differ diff --git a/gmqcc.h b/gmqcc.h new file mode 100644 index 0000000..da17ac0 --- /dev/null +++ b/gmqcc.h @@ -0,0 +1,174 @@ +/* + * Compiler error system, this handles the error printing, and managing + * such as after so many errors just stop the compilation, and other + * intereting like colors for the console. + */ +#ifndef DPQCC_HDR +#define DPQCC_HDR +#include + +/* The types supported by the language */ +#define TYPE_VOID 0 +#define TYPE_STRING 1 +#define TYPE_FLOAT 2 +#define TYPE_VECTOR 3 +#define TYPE_ENTITY 4 +#define TYPE_FIELD 5 +#define TYPE_FUNCTION 6 +#define TYPE_POINTER 7 + +/* + * there are 3 accessible memory zones - + * globals + * array of 32bit ints/floats, mixed, LE, + * entities + * structure is up to the engine but the fields are a linear array + * of mixed ints/floats, there are globals referring to the offsets + * of these in the entity struct so there are ADDRESS and STOREP and + * LOAD instructions that use globals containing field offsets. + * strings + * a static array in the progs.dat, with file parsing creating + * additional constants, and some engine fields are mapped by + * address as well to unique string offsets + */ + +/* + * Instructions + * These are the external instructions supported by the interperter + * this is what things compile to (from the C code). This is not internal + * instructions for support like int, and such (which are translated) + */ +#define INSTR_DONE 0 +// math +#define INSTR_MUL_F 1 /* multiplication float */ +#define INSTR_MUL_V 2 /* multiplication vector */ +#define INSTR_MUL_FV 3 /* multiplication float->vector */ +#define INSTR_MUL_VF 4 /* multiplication vector->float */ +#define INSTR_DIV_F 5 +#define INSTR_ADD_F 6 +#define INSTR_ADD_V 7 +#define INSTR_SUB_F 8 +#define INSTR_SUB_V 9 +// compare +#define INSTR_EQ_F 10 +#define INSTR_EQ_V 11 +#define INSTR_EQ_S 12 +#define INSTR_EQ_E 13 +#define INSTR_EQ_FNC 14 +#define INSTR_NE_F 15 +#define INSTR_NE_V 16 +#define INSTR_NE_S 17 +#define INSTR_NE_E 18 +#define INSTR_NE_FNC 19 +// multi compare +#define INSTR_LE 20 +#define INSTR_GE 21 +#define INSTR_LT 22 +#define INSTR_GT 23 +// load and store +#define INSTR_LOAD_F 24 +#define INSTR_LOAD_V 25 +#define INSTR_LOAD_S 26 +#define INSTR_LOAD_ENT 27 +#define INSTR_LOAD_FLD 28 +#define INSTR_LOAD_FNC 29 +#define INSTR_STORE_F 31 +#define INSTR_STORE_V 32 +#define INSTR_STORE_S 33 +#define INSTR_STORE_ENT 34 +#define INSTR_STORE_FLD 35 +#define INSTR_STORE_FNC 36 +// others +#define INSTR_ADDRESS 30 +#define INSTR_RETURN 37 +#define INSTR_NOT_F 38 +#define INSTR_NOT_V 39 +#define INSTR_NOT_S 40 +#define INSTR_NOT_ENT 41 +#define INSTR_NOT_FNC 42 +#define INSTR_IF 43 +#define INSTR_IFNOT 44 +#define INSTR_CALL0 45 +#define INSTR_CALL1 46 +#define INSTR_CALL2 47 +#define INSTR_CALL3 48 +#define INSTR_CALL4 49 +#define INSTR_CALL5 50 +#define INSTR_CALL6 51 +#define INSTR_CALL7 52 +#define INSTR_CALL8 53 +#define INSTR_STATE 54 +#define INSTR_GOTO 55 +#define INSTR_AND 56 +#define INSTR_OR 57 +#define INSTR_BITAND 59 +#define INSTR_BITOR 60 + +#define mem_a(x) malloc(x) +#define mem_d(x) free (x) + +/* + * This is the smallest lexer I've ever wrote: and I must say, it's quite + * more nicer than those large bulky complex parsers that most people write + * which has some sort of a complex state. + */ +struct lex_file { + /* + * This is a simple state for lexing, no need to be complex for qc + * code. It's trivial stuff. + */ + FILE *file; + char peek[5]; /* extend for depthier peeks */ + int last; + int current; + int length; + int size; + char lastok[8192]; /* No token shall ever be bigger than this! */ +}; + +/* + * It's important that this table never exceed 32 keywords, the ascii + * table starts at 33 (which we need) + */ +#define TOKEN_DO 0 +#define TOKEN_ELSE 1 +#define TOKEN_IF 2 +#define TOKEN_WHILE 3 +#define TOKEN_BREAK 4 +#define TOKEN_CONTINUE 5 +#define TOKEN_RETURN 6 +#define TOKEN_GOTO 7 +#define TOKEN_FOR 8 + +/* + * Lexer state constants, these are numbers for where exactly in + * the lexing the lexer is at. Or where it decided to stop if a lexer + * error occurs. + */ +#define LEX_COMMENT 128 /* higher than ascii */ +#define LEX_CHRLIT 129 +#define LEX_STRLIT 130 +#define LEX_IDENT 131 +#define LEX_DO 132 +#define LEX_ELSE 133 +#define LEX_IF 134 +#define LEX_WHILE 135 +#define LEX_INCLUDE 136 +#define LEX_DEFINE 137 + +int lex_token(struct lex_file *); +void lex_reset(struct lex_file *); +int lex_debug(struct lex_file *); +int lex_close(struct lex_file *); +struct lex_file *lex_open (const char *); + +/* errors */ +#define ERROR_LEX (SHRT_MAX+0) +#define ERROR_PARSE (SHRT_MAX+1) +#define ERROR_INTERNAL (SHRT_MAX+2) +int error(int, const char *, ...); + +/* parse.c */ +int parse(struct lex_file *); + +#endif diff --git a/lex.c b/lex.c new file mode 100644 index 0000000..1cb8d0c --- /dev/null +++ b/lex.c @@ -0,0 +1,345 @@ +/* + * Copyright (C) 2012 + * Dale Weiler + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is furnished to do + * so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include +#include +#include +#include +#include +#include "gmqcc.h" + +static const char *const lex_keywords[] = { + "do", "else", "if", "while", + "break", "continue", "return", "goto", + "for" +}; + +struct lex_file *lex_open(const char *name) { + struct lex_file *lex = mem_a(sizeof(struct lex_file)); + if (lex) { + lex->file = fopen(name, "r"); + fseek(lex->file, 0, SEEK_END); + lex->length = ftell(lex->file); + lex->size = lex->length; /* copy, this is never changed */ + fseek(lex->file, 0, SEEK_SET); + lex->last = 0; + + memset(lex->peek, 0, sizeof(lex->peek)); + } + return lex; +} + +int lex_close(struct lex_file *file) { + int ret = -1; + if (file) { + ret = fclose(file->file); + mem_d(file); + } + return ret; +} + +static void lex_addch(int ch, struct lex_file *file) { + if (file->current < sizeof(file->lastok)-1) + file->lastok[file->current++] = (char)ch; + if (file->current == sizeof(file->lastok)-1) + file->lastok[file->current] = (char)'\0'; +} +static inline void lex_clear(struct lex_file *file) { + file->current = 0; +} + +/* + * read in inget/unget character from a lexer stream. + * This doesn't play with file streams, the lexer has + * it's own internal state for this. + */ +static int lex_inget(struct lex_file *file) { + file->length --; + if (file->last > 0) + return file->peek[--file->last]; + return fgetc(file->file); +} +static void lex_unget(int ch, struct lex_file *file) { + if (file->last < sizeof(file->peek)) + file->peek[file->last++] = ch; + file->length ++; +} + +/* + * This is trigraph and digraph support, a feature not qc compiler + * supports. Moving up in this world! + */ +static int lex_trigraph(struct lex_file *file) { + int ch; + if ((ch = lex_inget(file)) != '?') { + lex_unget(ch, file); + return '?'; + } + + ch = lex_inget(file); + switch (ch) { + case '(' : return '[' ; + case ')' : return ']' ; + case '/' : return '\\'; + case '\'': return '^' ; + case '<' : return '{' ; + case '>' : return '}' ; + case '!' : return '|' ; + case '-' : return '~' ; + case '=' : return '#' ; + default: + lex_unget('?', file); + lex_unget(ch , file); + return '?'; + } + return '?'; +} +static int lex_digraph(struct lex_file *file, int first) { + int ch = lex_inget(file); + switch (first) { + case '<': + if (ch == '%') return '{'; + if (ch == ':') return '['; + break; + case '%': + if (ch == '>') return '}'; + if (ch == ':') return '#'; + break; + case ':': + if (ch == '>') return ']'; + break; + } + + lex_unget(ch, file); + return first; +} + +static int lex_getch(struct lex_file *file) { + int ch = lex_inget(file); + if (ch == '?') + return lex_trigraph(file); + if (ch == '<' || ch == ':' || ch == '%') + return lex_digraph (file, ch); + + return ch; +} + +static int lex_get(struct lex_file *file) { + int ch; + if (!isspace(ch = lex_getch(file))) + return ch; + + /* skip over all spaces */ + while (isspace(ch) && ch != '\n') + ch = lex_getch(file); + + if (ch == '\n') + return ch; + + lex_unget(ch, file); + return ' '; +} + +static int lex_skipchr(struct lex_file *file) { + int ch; + int it; + + lex_clear(file); + lex_addch('\'', file); + + for (it = 0; it < 2 && ((ch = lex_inget(file)) != '\''); it++) { + lex_addch(ch, file); + + if (ch == '\n') + return ERROR_LEX; + if (ch == '\\') + lex_addch(lex_getch(file), file); + } + lex_addch('\'', file); + lex_addch('\0', file); + + if (it > 2) + return ERROR_LEX; + + return LEX_CHRLIT; +} + +static int lex_skipstr(struct lex_file *file) { + int ch; + lex_clear(file); + lex_addch('"', file); + + while ((ch = lex_getch(file)) != '"') { + if (ch == '\n' || ch == EOF) + return ERROR_LEX; + + lex_addch(ch, file); + if (ch == '\\') + lex_addch(lex_inget(file), file); + } + + lex_addch('"', file); + lex_addch('\0', file); + + return LEX_STRLIT; +} +static int lex_skipcmt(struct lex_file *file) { + int ch; + lex_clear(file); + ch = lex_getch(file); + + if (ch == '/') { + lex_addch('/', file); + lex_addch('/', file); + + while ((ch = lex_getch(file)) != '\n') { + if (ch == '\\') { + lex_addch(ch, file); + lex_addch(lex_getch(file), file); + } else { + lex_addch(ch, file); + } + } + lex_addch('\0', file); + return LEX_COMMENT; + } + + if (ch != '*') { + lex_unget(ch, file); + return '/'; + } + + lex_addch('/', file); + + /* hate this */ + do { + lex_addch(ch, file); + while ((ch = lex_getch(file)) != '*') { + if (ch == EOF) + return error(ERROR_LEX, "malformatted comment"," "); + else + lex_addch(ch, file); + } + lex_addch(ch, file); + } while ((ch = lex_getch(file)) != '/'); + + lex_addch('/', file); + lex_addch('\0', file); + + return LEX_COMMENT; +} + +static int lex_getsource(struct lex_file *file) { + int ch = lex_get(file); + + /* skip char/string/comment */ + switch (ch) { + case '\'': return lex_skipchr(file); + case '"': return lex_skipstr(file); + case '/': return lex_skipcmt(file); + default: return ch; + } +} + +int lex_token(struct lex_file *file) { + int ch = lex_getsource(file); + int it; + + /* valid identifier */ + if (ch > 0 && (ch == '_' || isalpha(ch))) { + lex_clear(file); + while (ch > 0 && (isalpha(ch) || isdigit(ch) || ch == '_')) { + lex_addch(ch, file); + ch = lex_getsource(file); + } + lex_unget(ch, file); + lex_addch('\0', file); + + /* look inside the table for a keyword .. */ + for (it = 0; it < sizeof(lex_keywords)/sizeof(*lex_keywords); it++) + if (!strncmp(file->lastok, lex_keywords[it], sizeof(lex_keywords[it]))) + return it; + + return LEX_IDENT; + } + return ch; +} + +void lex_reset(struct lex_file *file) { + file->current = 0; + file->last = 0; + file->length = file->size; + fseek(file->file, 0, SEEK_SET); + + memset(file->peek, 0, sizeof(file->peek )); + memset(file->lastok, 0, sizeof(file->lastok)); +} + +int lex_debug(struct lex_file *file) { + int list_do = 0; + int list_else = 0; + int list_if = 0; + int list_while = 0; + int list_break = 0; + int list_continue = 0; + int list_return = 0; + int list_goto = 0; + int list_for = 0; + int token = 0; + printf("===========================\nTOKENS: \n===========================\n"); + while ((token = lex_token(file)) != ERROR_LEX && file->length >= 0) { + if (token != -1) { + switch (token) { + case 0: list_do ++; break; + case 1: list_else ++; break; + case 2: list_if ++; break; + case 3: list_while ++; break; + case 4: list_break ++; break; + case 5: list_continue++; break; + case 6: list_return ++; break; + case 7: list_goto ++; break; + case 8: list_for ++; break; + } + } + if (token >= 33 && token <= 126) + putchar(token); + } + printf("\n===========================\nBRANCHES \n===========================\n"); + printf("\t if % 8d\n", list_if); + printf("\t else % 8d\n", list_else); + printf("===========================\nLOOPS \n===========================\n"); + printf("\t for % 8d\n", list_for); + printf("\t while % 8d\n", list_while); + printf("\t do % 8d\n", list_do); + printf("===========================\nSTATEMENTS \n===========================\n"); + printf("\t break % 8d\n", list_break); + printf("\t continue % 8d\n", list_continue); + printf("\t return % 8d\n", list_return); + printf("\t goto % 8d\n", list_goto); + printf("===========================\nIDENTIFIERS\n===========================\n"); + lex_reset(file); + while ((token = lex_token(file)) != ERROR_LEX && file->length >= 0) + if (token == LEX_IDENT) + printf("%s ", file->lastok); + lex_reset(file); + return 1; +} diff --git a/main.c b/main.c new file mode 100644 index 0000000..8718a72 --- /dev/null +++ b/main.c @@ -0,0 +1,37 @@ +/* + * Copyright (C) 2012 + * Dale Weiler + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is furnished to do + * so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "gmqcc.h" + +int main(int argc, char **argv) { + if (argc <= 1) { + printf("Usage: %s infile.qc outfile\n", *argv); + return -1; + } + + struct lex_file *lex = lex_open(argv[1]); + lex_debug(lex); + parse (lex); + lex_close(lex); + + return 0; +} diff --git a/parse.c b/parse.c new file mode 100644 index 0000000..a43981f --- /dev/null +++ b/parse.c @@ -0,0 +1,42 @@ +/* + * Copyright (C) 2012 + * Dale Weiler + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is furnished to do + * so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include +#include "gmqcc.h" +int parse(struct lex_file *file) { + int token = 0; + while ((token = lex_token(file)) != ERROR_LEX && file->length >= 0) { + switch (token) { + case TOKEN_IF: + token = lex_token(file); + while ((token == ' ' || token == '\n') && file->length >= 0) + token = lex_token(file); + + if (token != '(') + error(ERROR_PARSE, "Expected `(` after if\n", ""); + break; + } + } + lex_reset(file); + + return 1; +}