From 48a95ec3c9a4a8601f97c8ac1adbd7d94ba15465 Mon Sep 17 00:00:00 2001 From: Dale Weiler Date: Mon, 9 Apr 2012 06:42:06 -0400 Subject: [PATCH 1/1] initial commit --- Makefile | 12 ++ README | 41 +++++++ error.c | 87 ++++++++++++++ gmqcc | Bin 0 -> 16675 bytes gmqcc.h | 174 ++++++++++++++++++++++++++++ lex.c | 345 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ main.c | 37 ++++++ parse.c | 42 +++++++ 8 files changed, 738 insertions(+) create mode 100644 Makefile create mode 100644 README create mode 100644 error.c create mode 100755 gmqcc create mode 100644 gmqcc.h create mode 100644 lex.c create mode 100644 main.c create mode 100644 parse.c diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..ae46f26 --- /dev/null +++ b/Makefile @@ -0,0 +1,12 @@ +CC = gcc +CFLAGS = -O3 -Wall +OBJ = main.o lex.o error.o parse.o + +%.o: %.c + $(CC) -c -o $@ $< $(CFLAGS) + +gmqcc: $(OBJ) + $(CC) -o $@ $^ $(CFLAGS) + +clean: + rm -f *.o dpqcc diff --git a/README b/README new file mode 100644 index 0000000..d069def --- /dev/null +++ b/README @@ -0,0 +1,41 @@ +This is my work in progress C compiler. There are very few _good_ qc +compilers out there on the internet that can be used in the opensource +community. There are a lot of mediocre compilers, but no one wants those. +This is the solution for that, for once a proper quake c compiler that is +capable of doing proper optimization. The design so far of this compiler +is basic, because it doesn't actually compile code yet. + +gmqcc.h + This is the common header with all definitions, structures, and + constants for everything. + +error.c + This is the error subsystem, this handles the output of good detailed + error messages (not currently, but will), with colors and such. + +lex.c + This is the lexer, a very small basic step-seek lexer that can be easily + changed to add new tokens, very retargetable. + +main.c + This is the core compiler entry, handles switches (will) to toggle on + and off certian compiler features. + +parse.c + This is the parser which goes over all tokens and generates a parse tree + (not currently, but will) and check for syntax correctness. + +README + This is the file you're currently reading + +Makefile + The makefile, when sources are added you should add them to the SRC= + line otherwise the build will not pick it up. Trivial stuff, small + easy to manage makefile, no need to complicate it. + Some targets: + #make gmqcc + Builds gmqcc, creating a gmqcc binary file in the current + directory as the makefile. + + #make clean + Cleans the build files left behind by a previous build diff --git a/error.c b/error.c new file mode 100644 index 0000000..c220d72 --- /dev/null +++ b/error.c @@ -0,0 +1,87 @@ +/* + * Copyright (C) 2012 + * Dale Weiler + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is furnished to do + * so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include +#include +#include +#include + +/* + * Compiler error system, this handles the error printing, and managing + * such as after so many errors just stop the compilation, and other + * intereting like colors for the console. + */ +#ifndef WIN32 +# define CON_BLACK 30 +# define CON_RED 31 +# define CON_GREEN 32 +# define CON_BROWN 33 +# define CON_BLUE 34 +# define CON_MAGENTA 35 +# define CON_CYAN 36 +# define CON_WHITE 37 +static const int error_color[] = { + CON_RED, + CON_CYAN, + CON_MAGENTA +}; +#endif +int error_total = 0; +int error_max = 10; + +static const char *const error_list[] = { + "Parsing Error:", + "Lexing Error:", + "Internal Error:" +}; + +int error(int status, const char *msg, ...) { + char bu[1024*4]; /* enough? */ + char fu[1024*4]; /* enough? */ + va_list va; + + if (error_total + 1 > error_max) { + fprintf(stderr, "%d errors and more following, bailing\n", error_total); + exit (-1); + } + error_total ++; +/* color */ +# ifndef WIN32 + sprintf (bu, "\033[0;%dm%s \033[0;%dm", error_color[status-SHRT_MAX], error_list[status-SHRT_MAX], error_color[(status-1)-SHRT_MAX]); +#else + sprintf (bu, "%s ", error_list[status-SHRT_MAX]); +#endif + va_start (va, msg); + vsprintf (fu, msg, va); + va_end (va); + fputs (bu, stderr); + fputs (fu, stderr); + +/* color */ +# ifndef WIN32 + fputs ("\033[0m", stderr); +# endif + + fflush (stderr); + + return status; +} diff --git a/gmqcc b/gmqcc new file mode 100755 index 0000000000000000000000000000000000000000..5ee60435edddb90c524391dec00a7aae948bfebc GIT binary patch literal 16675 zcmcIr3wTu3wcaxmWC&qqkf>2mjygQmKoF6K)R{cU2}Fnl6bm>B$pj*KIXMHNS`E&G zIu2ky>$SACEvFv(M}PvdU#vi-oCVWnW;#J(=s_kT^2tx`wk7TgdFp!=|z+>;fQT z@bmBzq9UhgpERwgUCP-&b0}|>fYl>1l3pe_rl|6eDAkur9+?$%{tUq?%9zFh(j#9N z%JPLXYB;Xwf=zcPcq^N2iwT*sLq`fIt4>;mIL|7?5rYOl$ zsyb!VQ~vnXoCWppjH0^*%oJ66w?dEn{Jcr|Ij5;{)10RI>zW!{f*sd&EL?Eig87B* zt%dVoh-|y?qb8QGT#KU^E~-bvkIM7$qp_s&$=lw#;L+E=+MD;>!T0*FIy$`TfgO>F zD7zFtl`cVs^u|iyBG7R%?*t`#7vo2DOu{c0KL>v2OXMHKsEssN@=OI2kSCw0 zT^CB6hG@LZlO3v`Y?2S1l-cRB5=Oq0-C39Kp0eou=#=-atRt64O+Nyv?3)E{$bvUy z!MA3?AIpMI&w~52;H$IX-^zmjOBTE&3r^jq;g^{L07mN{TJNLb>$2b#S@1bo@a0+X zE3@Exv*0_j;CBL_h^TdPRt<7u4ifzeeuT#|@TI^n#0;;R;z5qF5Oo+OpKo(>Ym2Yl zs0|oCAHy=OqjkA}H8eB@+qW=(N29@f6{~!0L8Fc}w6^(MSi2EuscUXy{y?BLz#0sH zQxn_P-WF(VF&bFIW}SXm7QCzxVZ+LLsF0HES6Z{?^{&Q&;sC8-mDD&8PU~7FP1>RO zq=tgZW}5KiDFXOh6K;+{3r)C+(PYPC!l`bRbQ4Z(P^rprA+c<%HLma1~bx z-(tdZ36QDHgsT`xMI9!bVx~%+CfppCA2i|SpxAA~)mW0PeI}gZu}b?*xH%3yZNlli zpwa;oPUjAl4w`T}$Ec)--m%AikCBaS!Z5}z$DGhZFW8T&^27~irfMuP?Nc0`Q$5HL zzlDn9Clg51-Y1@>M*KMEPZLj5BHqXOH;JdI5I?~AKM+q-AikgTuMkf|AMfV;OT^QV z$2&QHgm@b2cpK*r5l=%Lui^Yph^L{AS8@LP#OD(4;r#cAr=g6`<@`5@rzH{BIR6Oo zG^BAC=N}@ThBD4L{{Zncgz=%z08IN5@icVtlbjC_PeT?z&iT8Dr=g1Xaefo=G(_5o{-_>Z&}*RAi~3?mN%_UZ z`lH?5MekNbetV}Lc~cL)HB_~xVxZ3j8GYdS&MSxv)Z}_t?01m=u)CYaSC5XFc7?|D zu0F$>IL?Q+Tcx#n%2&`{`{8b%k8-qNATsE6t` zEe2#}Hj-Yjm`ITanXB)0BsA~Dz_`Xw~-1!ws0WM^bC zt(V}lhpz?~zZ+ABcs+V^Y$oI*$KuNXb%#c#Im6Eg`QVq)kZ%ZmOYdtCtlhc{Bu zY+i)^C+DaguQ>O9owQF|^tvPP&)Gwb8M)cnMYCUt{9K5jT(5ftB9rWUs@@2lxqMrj zY@|apvLB7S8B);|1!(%7yDgFS0_uSGc?Vq2JK%Y1WJN*zyO>_op2PQ$TVDky+JE@V z#D|hR_0Pgj6A9E4_rs%v`@i;)+6EHa0I|dOQEoNm?xdVfxqB)11>{Km<4+QacqMPR zhk4iV84-IDHY=kV7KfAeCniVs(5`#--yVA`*WitCylUd`Y5ShFL(!zsnw~N{hzu*tx7-bI(a_+Gt2`FqKqGy5V4Or{sHiAik2rZL6UuJTCe1+ z?Ch@ZLs+@|ZRC^opug{P=pQ!ePvPVIysKWiLE zf6-)K6dKuJw4A?3f-lS7pbq?+Pz(;FbPy0|KxrsxGKg+GNL4wz)})4u29-d}SmF0} zD3X`t;08D-T5u)RQiNKP6*#|Qz)E}*?@Gio>CND4sy#_7)XX@IH9zkp4NyfCmub4r zNqQKZ8K;{!sd!HnAH;mY5Evz)k%Tk+5_zju!q_?h(qtqau!s*3Q|goz8GK zj8m0mTx$;M2|gxP8u_bwH07^xja-MFP;`Ff43A^ftn{_$>^g}O=@lOZ%oW}r>kl2c z(!+X=YaZ51fuyJ}9>L~8ipDQa4OP4z`EEL$B5++L1J_`l>~ESftH|@{2zAy~&hSI< zhHw}kPY+sR(4A2n{! zwQF*_u+1q@>+mqVP7MAz886lO@&!JuGrW-(oZ$!IE8>zf+{!Z;>t)E4b-h&@IqBS+ zi@Z1Vsm~d1LoT+2nh-grM_EPPtIlWgs~|MopW_U_iFGzm!h>B=-{AGhc?7#}pa^p) zIG!fdLzua!7Uu7}ozA`$b-%$Pf~Do1Z|6C~e}t@vZ!&U}K3G3VORXrn0YGMK$p!?! zNqjqaVfOxigT3^93~VHKYiY(QEU}dFACmFVQSDh9Mz8mjM&tRX51w*7Xd8rmu3C;A6jr8)CjWpXu&_8}Y9JPR?iL z$1>>$_G449MC4g_pq#HPzTV0UXw^(1b9?U5OpEOh({SHmc)$`Y#?cyFgJVvxA@bXz zzUSk%;UhU8=>Kly4)D7RdT1bL_=wdQ*FZctYv@gjVe21@U?Hx9_S|!2e~HE#4kf0N zHRC5DE~n&ATgkq$U*N)Q=yVQQ9*lN-Z!arKcyB8!8uqR&D|*+vrmX0#wMFkznC?AD zBZDXq`_~}^jZ!zij|0lX1}F_58_01JG4E&W*c&gUlY!yIM!xG;9g7ji4eWF6&RvyO z#`xZUup9|%qJ+Qsqq<4%L{I&NkQScgP z-|oFfumfNtf9>5ZxM#t^8GG-yIIr*7s#Wf(!}T=q-Ve9R&z1!B0jpSqLJ3}@F7ENvP~XV=Rjm*edE19Jbw46$Q|i1~qD?ZC!Ij7L$l zx}fWoj@e5?442yfqoYwj6pec8uVMr$m2B`;u7AE}c+kxAG-FY+8D+_4-1^TogC;ti zU3jpgD3TZSmvA3@5Rpm6e@stHe4ALZr8o^7v&6T6Q}fdsdchLEodccd z1Dt6fP;TZ2Eb$vtduVtZV#us-8!h@an1_6tB`Zf+U>64^ig*kr;ockIepUI&FO!*_jCF)_W@3y zbN7KpFIqA|V@oa?gQFh#$hqe&hV>q@bS4see#&sf)-rO@CA{=6`aZpGxHOvc^X{t= zXrg(?_Jj85I7IWi2*XOxdzX4L>>h$~eb1wAJEzaN>83NL??9L%6m^l!L*ZOxb+pUH z1;cKQDEI=<*tg$-wdf7^Lr{Yt8nsL$KS~2ekJix)+;1V8Hcf1oCnQ1(8Xc`u9&p}k%%Q|WQ9UF%UF!&w0 z9!PH;&XZyV9mV|~&FC9EMwe;Nk{^SI(%i%OvB`a+GE(n8sqeY&3G{a5o@tL00UoQ2 z81A9U$i3JcVw;FNdG9Q&oOQ|@Ia?7K)}KFXtBCwQcFk#YOxL@{l%hY9{mRIh%E)jj zB3vT>b!>HT4{tDdI<Rz~l2&&7&gsP>Z|WZsWc_X}}{&@YvYyq_ zKab=OOcw3jFKM@=oszamS|e$dq#jB8B*jy#hSorHtzr1RXw=36HoM z8`$RMY2JofgpfZqrPTG-}RqZLom0&JYt*r16tO3xM=Fq5#OH zOK{5sDT(@4A&>-kt!4@3KvF_DVzNOlaVx?orVO7IxP-`_a@VX{QMR&rG4yh?ODz?K z&L@#uyxP06L@%rU9CprAQEug`RaMoz75~`K`Rr7$!FKVMvXyH-hePMHQ&9>V70W8h zR#%^|`MiUs)oc7|YP(k3Qm-|)2K-t>D;|_?Z*19ot+uJQ5id?`#;TpXe(sWK_01R{ znIoi`Rn-RCA)uAf6WPVA5>IVYTt$oF546-asZy#C_rI7MzpIHs@*&VtYLWC_Ny|Fg z{B<RDqPXqBb}*D^=Ja_9K%_D**5 zWlLsX@4gBsmnj83h_)0HQ0j1ntc|%2yLWuG!_MXds>km|v@s99Ft5Y4%j$I$gly#w zts~b_P~vcT9rhLDt$LCv9nY?X=;Qe9k0t2Jfw5wTYd7ibvUwd^D973WY1k@u*h}bP z8u|Mh{DvT3AnOTPt?$5*lJV9RAZ7gVDf#muelhgzcan1V3b6wq`gq&j7SQtXUL0>z z&s@o$Ytau?*nbpKf1qyI|CB2(!*TieTX6I$dGcoi$zvbghP-HZ1+}}>p;X4E>SkMnSRfuzeM zy-m^vNw-P5OVY9C|11~o`I!)tWUQ)7H zn~68_W@+;a7ZlFZ=FKhQ|Juy9fIr}G^4GTeXJv|A&sbsmj%K5F6Q~gobc@R26(WD2 zjTN@E8va6Wam95;?PkesZV47{3O3>;!p3@5NZgiMyr5KAzoP~A1T_MpWSbx7!`7BG z#)mRERZ9vo*VbgP!p0W7;Z$h&J8*0OgR<88TBDW~`nUKR0=3P4-5a#_{mVCxvbU^vwFZGN#HWUC68*} zE8uEmXiX{oDOL|S#9aw!oJ$>YQdyoW!qKdMlhwoVo20&*bE!k7zK$Y_F{=I=s|Os# zBTm=>LHN6ol(kD<*(n8&%SO7>&XbB;E*WU%N?gS)r7vN&5}EP%DSZ{!JW_w1ETA@0 z-zj|+Uv2|NqDo)I$$c_#s<;Rdo-+HNp2<^8Qu-hA?ceWL8E z^VEN?t3BRK>~9y9&lj}p>X{of|f%WYEMm!Y8iRtBHW&{ywmtM|EgD?v#! z%&}i&=-0SCT(ZV36kb)12{t)?l4X6!kpFVI;C=TRaH;p+GyPTL&xLRpz|X8tZ#q%Z zXK->(e`o5y3^B7lz2!t{oz%~i&*XmxS({AJgPB6Ja1zc zAaPRCR4`={fy8%$3$UvqFnB^?#(U^E;r z$YORn*zt`1zdDPZLf{sBIiupfs)v@31$SdI`)xVL&+WI{fm6Lh8TB@E`ODal>^HUk z?v?n6#MQcfNZP3x??En=vibiQM961~J6@RPvFez$BFzxM>}w8ZyM@F2(7{rt$-`=Hz(hleBl9ujgCPYL(9 z1kUdn0n4RO1^ncN0_XRHfL{!JG=HuDZo#^v$5@oqeFl2FlJNZ!S8?_R;G@}D3Y@;Y znxE1CE4iI>*U{}+bgj1W=0bBkn^gT#ie*#INGGWK3j1 z7JNBynkV+zH704sR!RHnds$U@8*rMROv)=iH)XLOl=d^{;ZA8^oqvmE1-r7?c{~gL zBjB2H&P0z&`zJ5=aP$#b_Ny#*{*(nD%7W*h;TIwrsq08pbuPyp?7$_W-dKNTUDy-v#ZXO|9hY#FLAJwtCfYELQ@vR1Z^j5NF)oNd5 zMfDndYv=Qorg6(lOhxsrzRgXon`)bU{5F%XHrRm&iOp?IemspSTrhvpjf@I>jrASG zEyAa9bf>C5*xbBB)l;^zlvlHCwfB}XpW>(*l~2{e?`iQ`=tDdB+T`yLM<0FAw}p7h zaoWDMv8`@PAeC=U;kc70J`995f28HBYi+{APLXeFY&TdNo>uvJcTn&7Y6F4V9X@|c zJ@b{`vC?}>MF|>UD&gNIGGBS+s$y@YZ`HD8)n#jZYrMskWz>iK6T!OnppW0XqYnf< z9`%JFeHSP{R?GzH!^Cuq?$wEJ4H!Oik(#&jVn*>HA2pU4-%4i6nwlrSuS}QVpK+!`8Q+HbeD&?E zzAbn_iAUSyIKJ9!Z1Dx#{TR8_IMB%PnPL9kGu^QGwlp0R6F`i$m@7uBQQIWGB+U@@ z;f|^tD*joh`uH?mjDM_}4*8aE^R4!8#w7FyN}6ih+x_igHZuA7YPvXm{F;u4FI3Yp hK392*1O9foLX6g}_)s?OBiEGD_40ey6hagAe*uB(X}16X literal 0 HcmV?d00001 diff --git a/gmqcc.h b/gmqcc.h new file mode 100644 index 0000000..da17ac0 --- /dev/null +++ b/gmqcc.h @@ -0,0 +1,174 @@ +/* + * Compiler error system, this handles the error printing, and managing + * such as after so many errors just stop the compilation, and other + * intereting like colors for the console. + */ +#ifndef DPQCC_HDR +#define DPQCC_HDR +#include + +/* The types supported by the language */ +#define TYPE_VOID 0 +#define TYPE_STRING 1 +#define TYPE_FLOAT 2 +#define TYPE_VECTOR 3 +#define TYPE_ENTITY 4 +#define TYPE_FIELD 5 +#define TYPE_FUNCTION 6 +#define TYPE_POINTER 7 + +/* + * there are 3 accessible memory zones - + * globals + * array of 32bit ints/floats, mixed, LE, + * entities + * structure is up to the engine but the fields are a linear array + * of mixed ints/floats, there are globals referring to the offsets + * of these in the entity struct so there are ADDRESS and STOREP and + * LOAD instructions that use globals containing field offsets. + * strings + * a static array in the progs.dat, with file parsing creating + * additional constants, and some engine fields are mapped by + * address as well to unique string offsets + */ + +/* + * Instructions + * These are the external instructions supported by the interperter + * this is what things compile to (from the C code). This is not internal + * instructions for support like int, and such (which are translated) + */ +#define INSTR_DONE 0 +// math +#define INSTR_MUL_F 1 /* multiplication float */ +#define INSTR_MUL_V 2 /* multiplication vector */ +#define INSTR_MUL_FV 3 /* multiplication float->vector */ +#define INSTR_MUL_VF 4 /* multiplication vector->float */ +#define INSTR_DIV_F 5 +#define INSTR_ADD_F 6 +#define INSTR_ADD_V 7 +#define INSTR_SUB_F 8 +#define INSTR_SUB_V 9 +// compare +#define INSTR_EQ_F 10 +#define INSTR_EQ_V 11 +#define INSTR_EQ_S 12 +#define INSTR_EQ_E 13 +#define INSTR_EQ_FNC 14 +#define INSTR_NE_F 15 +#define INSTR_NE_V 16 +#define INSTR_NE_S 17 +#define INSTR_NE_E 18 +#define INSTR_NE_FNC 19 +// multi compare +#define INSTR_LE 20 +#define INSTR_GE 21 +#define INSTR_LT 22 +#define INSTR_GT 23 +// load and store +#define INSTR_LOAD_F 24 +#define INSTR_LOAD_V 25 +#define INSTR_LOAD_S 26 +#define INSTR_LOAD_ENT 27 +#define INSTR_LOAD_FLD 28 +#define INSTR_LOAD_FNC 29 +#define INSTR_STORE_F 31 +#define INSTR_STORE_V 32 +#define INSTR_STORE_S 33 +#define INSTR_STORE_ENT 34 +#define INSTR_STORE_FLD 35 +#define INSTR_STORE_FNC 36 +// others +#define INSTR_ADDRESS 30 +#define INSTR_RETURN 37 +#define INSTR_NOT_F 38 +#define INSTR_NOT_V 39 +#define INSTR_NOT_S 40 +#define INSTR_NOT_ENT 41 +#define INSTR_NOT_FNC 42 +#define INSTR_IF 43 +#define INSTR_IFNOT 44 +#define INSTR_CALL0 45 +#define INSTR_CALL1 46 +#define INSTR_CALL2 47 +#define INSTR_CALL3 48 +#define INSTR_CALL4 49 +#define INSTR_CALL5 50 +#define INSTR_CALL6 51 +#define INSTR_CALL7 52 +#define INSTR_CALL8 53 +#define INSTR_STATE 54 +#define INSTR_GOTO 55 +#define INSTR_AND 56 +#define INSTR_OR 57 +#define INSTR_BITAND 59 +#define INSTR_BITOR 60 + +#define mem_a(x) malloc(x) +#define mem_d(x) free (x) + +/* + * This is the smallest lexer I've ever wrote: and I must say, it's quite + * more nicer than those large bulky complex parsers that most people write + * which has some sort of a complex state. + */ +struct lex_file { + /* + * This is a simple state for lexing, no need to be complex for qc + * code. It's trivial stuff. + */ + FILE *file; + char peek[5]; /* extend for depthier peeks */ + int last; + int current; + int length; + int size; + char lastok[8192]; /* No token shall ever be bigger than this! */ +}; + +/* + * It's important that this table never exceed 32 keywords, the ascii + * table starts at 33 (which we need) + */ +#define TOKEN_DO 0 +#define TOKEN_ELSE 1 +#define TOKEN_IF 2 +#define TOKEN_WHILE 3 +#define TOKEN_BREAK 4 +#define TOKEN_CONTINUE 5 +#define TOKEN_RETURN 6 +#define TOKEN_GOTO 7 +#define TOKEN_FOR 8 + +/* + * Lexer state constants, these are numbers for where exactly in + * the lexing the lexer is at. Or where it decided to stop if a lexer + * error occurs. + */ +#define LEX_COMMENT 128 /* higher than ascii */ +#define LEX_CHRLIT 129 +#define LEX_STRLIT 130 +#define LEX_IDENT 131 +#define LEX_DO 132 +#define LEX_ELSE 133 +#define LEX_IF 134 +#define LEX_WHILE 135 +#define LEX_INCLUDE 136 +#define LEX_DEFINE 137 + +int lex_token(struct lex_file *); +void lex_reset(struct lex_file *); +int lex_debug(struct lex_file *); +int lex_close(struct lex_file *); +struct lex_file *lex_open (const char *); + +/* errors */ +#define ERROR_LEX (SHRT_MAX+0) +#define ERROR_PARSE (SHRT_MAX+1) +#define ERROR_INTERNAL (SHRT_MAX+2) +int error(int, const char *, ...); + +/* parse.c */ +int parse(struct lex_file *); + +#endif diff --git a/lex.c b/lex.c new file mode 100644 index 0000000..1cb8d0c --- /dev/null +++ b/lex.c @@ -0,0 +1,345 @@ +/* + * Copyright (C) 2012 + * Dale Weiler + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is furnished to do + * so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include +#include +#include +#include +#include +#include "gmqcc.h" + +static const char *const lex_keywords[] = { + "do", "else", "if", "while", + "break", "continue", "return", "goto", + "for" +}; + +struct lex_file *lex_open(const char *name) { + struct lex_file *lex = mem_a(sizeof(struct lex_file)); + if (lex) { + lex->file = fopen(name, "r"); + fseek(lex->file, 0, SEEK_END); + lex->length = ftell(lex->file); + lex->size = lex->length; /* copy, this is never changed */ + fseek(lex->file, 0, SEEK_SET); + lex->last = 0; + + memset(lex->peek, 0, sizeof(lex->peek)); + } + return lex; +} + +int lex_close(struct lex_file *file) { + int ret = -1; + if (file) { + ret = fclose(file->file); + mem_d(file); + } + return ret; +} + +static void lex_addch(int ch, struct lex_file *file) { + if (file->current < sizeof(file->lastok)-1) + file->lastok[file->current++] = (char)ch; + if (file->current == sizeof(file->lastok)-1) + file->lastok[file->current] = (char)'\0'; +} +static inline void lex_clear(struct lex_file *file) { + file->current = 0; +} + +/* + * read in inget/unget character from a lexer stream. + * This doesn't play with file streams, the lexer has + * it's own internal state for this. + */ +static int lex_inget(struct lex_file *file) { + file->length --; + if (file->last > 0) + return file->peek[--file->last]; + return fgetc(file->file); +} +static void lex_unget(int ch, struct lex_file *file) { + if (file->last < sizeof(file->peek)) + file->peek[file->last++] = ch; + file->length ++; +} + +/* + * This is trigraph and digraph support, a feature not qc compiler + * supports. Moving up in this world! + */ +static int lex_trigraph(struct lex_file *file) { + int ch; + if ((ch = lex_inget(file)) != '?') { + lex_unget(ch, file); + return '?'; + } + + ch = lex_inget(file); + switch (ch) { + case '(' : return '[' ; + case ')' : return ']' ; + case '/' : return '\\'; + case '\'': return '^' ; + case '<' : return '{' ; + case '>' : return '}' ; + case '!' : return '|' ; + case '-' : return '~' ; + case '=' : return '#' ; + default: + lex_unget('?', file); + lex_unget(ch , file); + return '?'; + } + return '?'; +} +static int lex_digraph(struct lex_file *file, int first) { + int ch = lex_inget(file); + switch (first) { + case '<': + if (ch == '%') return '{'; + if (ch == ':') return '['; + break; + case '%': + if (ch == '>') return '}'; + if (ch == ':') return '#'; + break; + case ':': + if (ch == '>') return ']'; + break; + } + + lex_unget(ch, file); + return first; +} + +static int lex_getch(struct lex_file *file) { + int ch = lex_inget(file); + if (ch == '?') + return lex_trigraph(file); + if (ch == '<' || ch == ':' || ch == '%') + return lex_digraph (file, ch); + + return ch; +} + +static int lex_get(struct lex_file *file) { + int ch; + if (!isspace(ch = lex_getch(file))) + return ch; + + /* skip over all spaces */ + while (isspace(ch) && ch != '\n') + ch = lex_getch(file); + + if (ch == '\n') + return ch; + + lex_unget(ch, file); + return ' '; +} + +static int lex_skipchr(struct lex_file *file) { + int ch; + int it; + + lex_clear(file); + lex_addch('\'', file); + + for (it = 0; it < 2 && ((ch = lex_inget(file)) != '\''); it++) { + lex_addch(ch, file); + + if (ch == '\n') + return ERROR_LEX; + if (ch == '\\') + lex_addch(lex_getch(file), file); + } + lex_addch('\'', file); + lex_addch('\0', file); + + if (it > 2) + return ERROR_LEX; + + return LEX_CHRLIT; +} + +static int lex_skipstr(struct lex_file *file) { + int ch; + lex_clear(file); + lex_addch('"', file); + + while ((ch = lex_getch(file)) != '"') { + if (ch == '\n' || ch == EOF) + return ERROR_LEX; + + lex_addch(ch, file); + if (ch == '\\') + lex_addch(lex_inget(file), file); + } + + lex_addch('"', file); + lex_addch('\0', file); + + return LEX_STRLIT; +} +static int lex_skipcmt(struct lex_file *file) { + int ch; + lex_clear(file); + ch = lex_getch(file); + + if (ch == '/') { + lex_addch('/', file); + lex_addch('/', file); + + while ((ch = lex_getch(file)) != '\n') { + if (ch == '\\') { + lex_addch(ch, file); + lex_addch(lex_getch(file), file); + } else { + lex_addch(ch, file); + } + } + lex_addch('\0', file); + return LEX_COMMENT; + } + + if (ch != '*') { + lex_unget(ch, file); + return '/'; + } + + lex_addch('/', file); + + /* hate this */ + do { + lex_addch(ch, file); + while ((ch = lex_getch(file)) != '*') { + if (ch == EOF) + return error(ERROR_LEX, "malformatted comment"," "); + else + lex_addch(ch, file); + } + lex_addch(ch, file); + } while ((ch = lex_getch(file)) != '/'); + + lex_addch('/', file); + lex_addch('\0', file); + + return LEX_COMMENT; +} + +static int lex_getsource(struct lex_file *file) { + int ch = lex_get(file); + + /* skip char/string/comment */ + switch (ch) { + case '\'': return lex_skipchr(file); + case '"': return lex_skipstr(file); + case '/': return lex_skipcmt(file); + default: return ch; + } +} + +int lex_token(struct lex_file *file) { + int ch = lex_getsource(file); + int it; + + /* valid identifier */ + if (ch > 0 && (ch == '_' || isalpha(ch))) { + lex_clear(file); + while (ch > 0 && (isalpha(ch) || isdigit(ch) || ch == '_')) { + lex_addch(ch, file); + ch = lex_getsource(file); + } + lex_unget(ch, file); + lex_addch('\0', file); + + /* look inside the table for a keyword .. */ + for (it = 0; it < sizeof(lex_keywords)/sizeof(*lex_keywords); it++) + if (!strncmp(file->lastok, lex_keywords[it], sizeof(lex_keywords[it]))) + return it; + + return LEX_IDENT; + } + return ch; +} + +void lex_reset(struct lex_file *file) { + file->current = 0; + file->last = 0; + file->length = file->size; + fseek(file->file, 0, SEEK_SET); + + memset(file->peek, 0, sizeof(file->peek )); + memset(file->lastok, 0, sizeof(file->lastok)); +} + +int lex_debug(struct lex_file *file) { + int list_do = 0; + int list_else = 0; + int list_if = 0; + int list_while = 0; + int list_break = 0; + int list_continue = 0; + int list_return = 0; + int list_goto = 0; + int list_for = 0; + int token = 0; + printf("===========================\nTOKENS: \n===========================\n"); + while ((token = lex_token(file)) != ERROR_LEX && file->length >= 0) { + if (token != -1) { + switch (token) { + case 0: list_do ++; break; + case 1: list_else ++; break; + case 2: list_if ++; break; + case 3: list_while ++; break; + case 4: list_break ++; break; + case 5: list_continue++; break; + case 6: list_return ++; break; + case 7: list_goto ++; break; + case 8: list_for ++; break; + } + } + if (token >= 33 && token <= 126) + putchar(token); + } + printf("\n===========================\nBRANCHES \n===========================\n"); + printf("\t if % 8d\n", list_if); + printf("\t else % 8d\n", list_else); + printf("===========================\nLOOPS \n===========================\n"); + printf("\t for % 8d\n", list_for); + printf("\t while % 8d\n", list_while); + printf("\t do % 8d\n", list_do); + printf("===========================\nSTATEMENTS \n===========================\n"); + printf("\t break % 8d\n", list_break); + printf("\t continue % 8d\n", list_continue); + printf("\t return % 8d\n", list_return); + printf("\t goto % 8d\n", list_goto); + printf("===========================\nIDENTIFIERS\n===========================\n"); + lex_reset(file); + while ((token = lex_token(file)) != ERROR_LEX && file->length >= 0) + if (token == LEX_IDENT) + printf("%s ", file->lastok); + lex_reset(file); + return 1; +} diff --git a/main.c b/main.c new file mode 100644 index 0000000..8718a72 --- /dev/null +++ b/main.c @@ -0,0 +1,37 @@ +/* + * Copyright (C) 2012 + * Dale Weiler + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is furnished to do + * so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "gmqcc.h" + +int main(int argc, char **argv) { + if (argc <= 1) { + printf("Usage: %s infile.qc outfile\n", *argv); + return -1; + } + + struct lex_file *lex = lex_open(argv[1]); + lex_debug(lex); + parse (lex); + lex_close(lex); + + return 0; +} diff --git a/parse.c b/parse.c new file mode 100644 index 0000000..a43981f --- /dev/null +++ b/parse.c @@ -0,0 +1,42 @@ +/* + * Copyright (C) 2012 + * Dale Weiler + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is furnished to do + * so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include +#include "gmqcc.h" +int parse(struct lex_file *file) { + int token = 0; + while ((token = lex_token(file)) != ERROR_LEX && file->length >= 0) { + switch (token) { + case TOKEN_IF: + token = lex_token(file); + while ((token == ' ' || token == '\n') && file->length >= 0) + token = lex_token(file); + + if (token != '(') + error(ERROR_PARSE, "Expected `(` after if\n", ""); + break; + } + } + lex_reset(file); + + return 1; +} -- 2.39.5