From caee1a7d53590e13a207deee7318d6d912e6de96 Mon Sep 17 00:00:00 2001 From: Cloudwalk Date: Tue, 27 Apr 2021 17:43:24 -0400 Subject: [PATCH] Numerous improvements, such as making the parser actually function. I am generally dissatisfied. --- json.c | 218 +++++++++++++++++++++++++++---------------------------- parser.c | 109 ++++++++++++++++++++-------- parser.h | 28 +++++-- 3 files changed, 208 insertions(+), 147 deletions(-) diff --git a/json.c b/json.c index 4f4770ae..705d1c0b 100644 --- a/json.c +++ b/json.c @@ -21,35 +21,6 @@ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. #include "darkplaces.h" #include "parser.h" -// taken from json's wikipedia article -const char json_test_string[] = -{ - "{\n" - "\t\"firstName\": \"John\",\n" - "\t\"lastName\": \"Smith\",\n" - "\t\"isAlive\": true,\n" - "\t\"age\": 27,\n" - "\t\"address\": {\n" - "\t\t\"streetAddress\": \"21 2nd Street\",\n" - "\t\t\"city\": \"New York\",\n" - "\t\t\"state\": \"NY\",\n" - "\t\t\"postalCode\": \"10021-3100\"\n" - "\t},\n" - "\t\"phoneNumbers\": [\n" - "\t\t{\n" - "\t\t\t\"type\": \"home\",\n" - "\t\t\t\"number\": \"212 555-1234\"\n" - "\t\t},\n" - "\t\t{\n" - "\t\t\t\"type\": \"office\",\n" - "\t\t\t\"number\": \"646 555-4567\"\n" - "\t\t}\n" - "\t],\n" - "\t\"children\": [],\n" - "\t\"spouse\": null\n" - "}\n\000" -}; - typedef enum qjson_type_e { JSON_TYPE_UNDEFINED = 0, @@ -69,11 +40,11 @@ typedef struct qjson_token_s typedef struct qjson_state_s { qjson_token_t *head, *cur; - qparser_state_t state; + qparser_state_t *state; } qjson_state_t; -static void Json_Parse_Object(struct qjson_state_s *state); -static void Json_Parse_Array(struct qjson_state_s *state); +static inline void Json_Parse_Object(struct qjson_state_s *state); +static inline void Json_Parse_Array(struct qjson_state_s *state); // Checks for C/C++-style comments and ignores them. This is not standard json. static qbool Json_Parse_Comment_SingleLine(struct qparser_state_s *state) @@ -84,7 +55,7 @@ static qbool Json_Parse_Comment_SingleLine(struct qparser_state_s *state) if(*state->pos++ == '/') return true; else - Parse_Error(state, PARSE_ERR_INVAL); + Parse_Error(state, PARSE_ERR_INVAL, "// or /*"); } return false; } @@ -97,7 +68,7 @@ static qbool Json_Parse_CheckComment_Multiline_Start(struct qparser_state_s *sta if(*state->pos++ == '*') return true; else - Parse_Error(state, PARSE_ERR_INVAL); + Parse_Error(state, PARSE_ERR_INVAL, "// or /*"); } return false; } @@ -115,25 +86,23 @@ static qbool Json_Parse_CheckComment_Multiline_End(struct qparser_state_s *state // TODO: handle escape sequences -static void Json_Parse_String(struct qjson_state_s *json) +static inline void Json_Parse_String(struct qjson_state_s *json) { do { - Parse_Next(&json->state, 1); - if(*json->state.pos == '\\') + Parse_Next(json->state, 1); + if(*json->state->pos == '\\') { - Parse_Next(&json->state, 1); + Parse_Next(json->state, 1); continue; } - } while(*json->state.pos != '"'); - - Parse_Next(&json->state, 1); + } while(*json->state->pos != '"'); } // Handles numbers. Json numbers can be either an integer or a double. -static qbool Json_Parse_Number(struct qjson_state_s *json) +static inline qbool Json_Parse_Number(struct qjson_state_s *json) { int i, numsize; - const char *in = json->state.pos; + const unsigned char *in = json->state->pos; //char out[128]; qbool is_float = false; qbool is_exp = false; @@ -145,7 +114,7 @@ static qbool Json_Parse_Number(struct qjson_state_s *json) if(in[i] == '.') { if(is_float || is_exp) - Parse_Error(&json->state, PARSE_ERR_INVAL); + Parse_Error(json->state, PARSE_ERR_INVAL, "a number"); is_float = true; i++; continue; @@ -154,7 +123,7 @@ static qbool Json_Parse_Number(struct qjson_state_s *json) if(in[i] == 'e' || in[i] == 'E') { if(is_exp) - Parse_Error(&json->state, PARSE_ERR_INVAL); + Parse_Error(json->state, PARSE_ERR_INVAL, "a number"); if(in[i+1] == '+' || in[i+1] == '-') i++; is_exp = true; @@ -163,28 +132,26 @@ static qbool Json_Parse_Number(struct qjson_state_s *json) } } // TODO: use strtod() - Parse_Next(&json->state, i); + Parse_Next(json->state, i); return true; } // Parse a keyword. -static qbool Json_Parse_Keyword(struct qjson_state_s *json, const char *keyword) +static inline qbool Json_Parse_Keyword(struct qjson_state_s *json, const char *keyword) { size_t keyword_size = strlen(keyword); - if(!strncmp(keyword, json->state.pos, keyword_size)) + if(!strncmp(keyword, json->state->pos, keyword_size)) { - Parse_Next(&json->state, keyword_size); + Parse_Next(json->state, keyword_size - 1); return true; } return false; } // Parse a value. -static void Json_Parse_Value(struct qjson_state_s *json) +static inline qbool Json_Parse_Value(struct qjson_state_s *json) { - Parse_Next(&json->state, 1); - - switch(Parse_NextToken(&json->state)) + switch(Parse_NextToken(json->state)) { case '"': // string Json_Parse_String(json); @@ -199,102 +166,109 @@ static void Json_Parse_Value(struct qjson_state_s *json) Json_Parse_Number(json); break; default: + if(isdigit(*json->state->pos)) + Json_Parse_Number(json); if(Json_Parse_Keyword(json, "true")) break; if(Json_Parse_Keyword(json, "false")) break; if(Json_Parse_Keyword(json, "null")) break; - if(isdigit(*json->state.pos)) - Json_Parse_Number(json); + //Parse_Error(json->state, PARSE_ERR_INVAL, "a value"); + return false; } + return true; +} + +static inline qbool Json_Parse_Pairs(struct qjson_state_s *json) +{ + do + { + // Parse the key + if(Parse_NextToken(json->state) == '"') + { + Json_Parse_String(json); + + // And its value + if(Parse_NextToken(json->state) == ':') + { + if(!Json_Parse_Value(json)) + Parse_Error(json->state, PARSE_ERR_INVAL, "a value"); + } + else + Parse_Error(json->state, PARSE_ERR_INVAL, ":"); + } + else + return false; + } while (Parse_NextToken(json->state) == ','); + + return true; } // Parse an object. -static void Json_Parse_Object(struct qjson_state_s *json) +static inline void Json_Parse_Object(struct qjson_state_s *json) { + Parse_IncDepth(json->state); + /* * Json objects are basically a data map; key-value pairs. * They end in a comma or a closing curly brace. */ - do { - Parse_Next(&json->state, 1); + Json_Parse_Pairs(json); - // Parse the key - if(Parse_NextToken(&json->state) == '"') - Json_Parse_String(json); - else - goto fail; - - // And its value - if(Parse_NextToken(&json->state) == ':') - Json_Parse_Value(json); - else - goto fail; - } while (Parse_NextToken(&json->state) == ','); + if(Parse_CurrentToken(json->state) != '}') + Parse_Error(json->state, PARSE_ERR_INVAL, ", or }"); - if(Parse_NextToken(&json->state) == '}') - return; -fail: - Parse_Error(&json->state, PARSE_ERR_INVAL); + Parse_DecDepth(json->state); } // Parse an array. -static void Json_Parse_Array(struct qjson_state_s *json) +static inline void Json_Parse_Array(struct qjson_state_s *json) { + Parse_IncDepth(json->state); + /* * Json arrays are basically lists. They can contain * any value, comma-separated, and end with a closing square bracket. */ do { - Json_Parse_Value(json); - } while (Parse_NextToken(&json->state) == ','); + if(!Json_Parse_Value(json)) + break; + } while (Parse_NextToken(json->state) == ','); + + if(Parse_CurrentToken(json->state) != ']') + Parse_Error(json->state, PARSE_ERR_INVAL, ", or ]"); - if(Parse_NextToken(&json->state) == ']') - return; - else - Parse_Error(&json->state, PARSE_ERR_INVAL); + Parse_DecDepth(json->state); } // Main function for the parser. -qjson_token_t *Json_Parse(const char *data) +static qjson_token_t *Json_Parse_Main(qjson_state_t *json) { - struct qjson_state_s json = - { - .head = NULL, - .cur = NULL, - .state = - { - .name = "json", - .buf = data, - .pos = &data[0], - .line = 1, - .col = 1, - .callback = - { - .CheckComment_SingleLine = Json_Parse_Comment_SingleLine, - .CheckComment_Multiline_Start = Json_Parse_CheckComment_Multiline_Start, - .CheckComment_Multiline_End = Json_Parse_CheckComment_Multiline_End - } - } - }; + json->state->callback.CheckComment_SingleLine = Json_Parse_Comment_SingleLine; + json->state->callback.CheckComment_Multiline_Start = Json_Parse_CheckComment_Multiline_Start; + json->state->callback.CheckComment_Multiline_End = Json_Parse_CheckComment_Multiline_End; - if(data == NULL) + if(setjmp(parse_error)) { - Con_Printf(CON_ERROR "Json_Parse: Empty json file\n"); + // actually not sure about this return NULL; } - - if(setjmp(parse_error)) + if(json->state->buf == NULL) { - // actually not sure about this + Con_Printf(CON_ERROR "Json_Parse: Empty json file\n"); return NULL; } - if(Parse_NextToken(&(json.state)) == '{') - Json_Parse_Object(&json); - else + switch(Parse_CurrentToken(json->state)) { + case '{': + Json_Parse_Object(json); + break; + case '[': + Json_Parse_Array(json); + break; + default: Con_Printf(CON_ERROR "Json_Parse: Not a json file\n"); return NULL; } @@ -306,7 +280,31 @@ qjson_token_t *Json_Parse(const char *data) return NULL; } +qjson_token_t *Json_Parse_File(const char *file) +{ + struct qjson_state_s json = + { + .head = NULL, + .cur = NULL, + .state = Parse_LoadFile(file) + }; + + return Json_Parse_Main(&json); +} + +qjson_token_t *Json_Parse(const unsigned char *data) +{ + struct qjson_state_s json = + { + .head = NULL, + .cur = NULL, + .state = Parse_New(data) + }; + + return Json_Parse_Main(&json); +} + void Json_Test_f(cmd_state_t *cmd) { - Json_Parse(json_test_string); + Json_Parse_File("test.json"); } diff --git a/parser.c b/parser.c index 534b236c..b6ecc083 100644 --- a/parser.c +++ b/parser.c @@ -24,23 +24,25 @@ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. jmp_buf parse_error; // Tell the user that their stuff is broken, why it's broken, and where it's broken, so hopefully they fix it. -void Parse_Error(struct qparser_state_s *state, qparser_err_t error) +DP_FUNC_NORETURN void Parse_Error(struct qparser_state_s *state, qparser_err_t error, const char *expected) { - if(!error) - return; - else - { - switch (error) - { - case PARSE_ERR_INVAL: - Con_Printf(CON_ERROR "Parse Error: %s: Unexpected token '%c', line %i, column %i\n", state->name, *state->pos, state->line, state->col); - break; - case PARSE_ERR_EOF: - Con_Printf(CON_ERROR "Parse Error: %s: Unexpected end-of-file\n", state->name); - break; - default: - return; - } + switch (error) + { + case PARSE_ERR_INVAL: + if(!expected) + Sys_Error("Parse_Error: Expected to expect something (expected == NULL)! Your parser is broken."); + Con_Printf(CON_ERROR "Parse Error: %s: Unexpected token '%c', expected %s. Line %i, column %i\n", state->name, *state->pos, expected, state->line, state->col); + break; + case PARSE_ERR_EOF: + if(expected) + Sys_Error("Parse_Error: expected != NULL when it should be NULL. Your parser is broken."); + Con_Printf(CON_ERROR "Parse Error: %s: Unexpected end-of-file\n", state->name); + break; + case PARSE_ERR_DEPTH: + Con_Printf(CON_ERROR "Parse Error: %s: This file is nested too deep. Max depth of %i reached.\n", state->name, PARSER_MAX_DEPTH); + break; + default: + Sys_Error("Parse_Error: Invalid error number %i. Your parser is broken.", error); } longjmp(parse_error, 1); @@ -53,7 +55,7 @@ void Parse_Next(struct qparser_state_s *state, size_t count) state->pos += count; if(!*state->pos) - Parse_Error(state, PARSE_ERR_EOF); + Parse_Error(state, PARSE_ERR_EOF, NULL); } // Skips newlines, and handles different line endings. @@ -76,15 +78,20 @@ newline: } // Skip all whitespace, as we normally know it. -static inline void Parse_Whitespace(struct qparser_state_s *state) +static inline qbool Parse_Skip_Whitespace(struct qparser_state_s *state) { + qbool ret = false; // TODO: Some languages enforce indentation style. Add a callback to override this. while(*state->pos == ' ' || *state->pos == '\t') + { Parse_Next(state, 1); + ret = true; + } + return ret; } // Skips the current line. Only useful for comments. -static inline void Parse_SkipLine(struct qparser_state_s *state) +static inline void Parse_Skip_Line(struct qparser_state_s *state) { while(!Parse_Newline(state)) Parse_Next(state, 1); @@ -94,11 +101,11 @@ static inline qbool Parse_Skip_Comments(struct qparser_state_s *state) { // Make sure these are both defined (or both not defined) if((state->callback.CheckComment_Multiline_Start != NULL) ^ (state->callback.CheckComment_Multiline_End != NULL)) - Sys_Error("Parse_Skip_Comments: CheckComment_Multiline_Start (or _End) == NULL"); + Sys_Error("Parse_Skip_Comments: CheckComment_Multiline_Start (or _End) == NULL. Your parser is broken."); // Assume language doesn't support the respective comment types if one of these are NULL. if(state->callback.CheckComment_SingleLine && state->callback.CheckComment_SingleLine(state)) - Parse_SkipLine(state); + Parse_Skip_Line(state); else if(state->callback.CheckComment_Multiline_Start && state->callback.CheckComment_Multiline_Start(state)) { do @@ -114,25 +121,63 @@ static inline qbool Parse_Skip_Comments(struct qparser_state_s *state) } // Skip all whitespace. -static inline void Parse_Skip(struct qparser_state_s *state) +static inline qbool Parse_SkipToToken(struct qparser_state_s *state) { + qbool ret = false; + /* * Repeat this until we run out of whitespace, newlines, and comments. * state->pos should be left on non-whitespace when this returns. */ - do { - Parse_Whitespace(state); - } while (Parse_Skip_Comments(state) || Parse_Newline(state)); + while(Parse_Skip_Comments(state) || Parse_Skip_Whitespace(state) || Parse_Newline(state)) + ret = true; + + return ret; } -// Skip to the next token that isn't whitespace. Hopefully a valid one. +// Skip to the next token. Advance the pointer at least 1 if we're not sitting on whitespace. char Parse_NextToken(struct qparser_state_s *state) { - /* - * This assumes state->pos is already on whitespace. Most of the time this - * doesn't happen automatically, but advancing the pointer here would break - * comment and newline handling when it does happen automatically. - */ - Parse_Skip(state); + // Check if we will skip first. + if(!Parse_SkipToToken(state)) + { + // If not, advance the pointer. + Parse_Next(state, 1); + // Ensure we didn't land on whitespace and skip that too. + Parse_SkipToToken(state); + } + return *state->pos; +} + +// Return the current token but skip comments. +char Parse_CurrentToken(struct qparser_state_s *state) +{ + Parse_SkipToToken(state); return *state->pos; } + +qparser_state_t *Parse_New(const unsigned char *in) +{ + qparser_state_t *out; + + if(!in) + { + Con_Printf("Parse_New: FS_LoadFile() failed"); + return NULL; + } + + out = (qparser_state_t *)Z_Malloc(sizeof(qparser_state_t)); + + out->buf = in; + out->pos = in; + out->line = 1; + out->col = 1; + out->depth = 0; + + return out; +} + +qparser_state_t *Parse_LoadFile(const char *file) +{ + return Parse_New(FS_LoadFile(file, tempmempool, false, NULL)); +} diff --git a/parser.h b/parser.h index 022d2636..9eeb520c 100644 --- a/parser.h +++ b/parser.h @@ -21,20 +21,23 @@ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. #include "qtypes.h" #include +#define PARSER_MAX_DEPTH 256 + typedef enum qparser_err_e { PARSE_ERR_SUCCESS = 0, PARSE_ERR_INVAL = 1, PARSE_ERR_EOF = 2, - PARSE_ERR_EMPTY = 3 + PARSE_ERR_DEPTH = 3, + PARSE_ERR_EMPTY = 4 } qparser_err_t; typedef struct qparser_state_s { const char *name; - const char *buf; - const char *pos; - int line, col; + const unsigned char *buf; + const unsigned char *pos; + int line, col, depth; struct { @@ -46,6 +49,21 @@ typedef struct qparser_state_s extern jmp_buf parse_error; -void Parse_Error(struct qparser_state_s *state, qparser_err_t error); +void Parse_Error(struct qparser_state_s *state, qparser_err_t error, const char *expected); void Parse_Next(struct qparser_state_s *state, size_t count); char Parse_NextToken(struct qparser_state_s *state); +char Parse_CurrentToken(struct qparser_state_s *state); +qparser_state_t *Parse_New(const unsigned char *in); +qparser_state_t *Parse_LoadFile(const char *file); + +static inline void Parse_IncDepth(struct qparser_state_s *state) +{ + if(state->depth >= PARSER_MAX_DEPTH) + Parse_Error(state, PARSE_ERR_DEPTH, NULL); + state->depth++; +} + +static inline void Parse_DecDepth(struct qparser_state_s *state) +{ + state->depth--; +} \ No newline at end of file -- 2.39.2