From c99dda723c399e2f1218f89a2e0b64352fc07194 Mon Sep 17 00:00:00 2001 From: Samuel Rowe Date: Wed, 24 Jun 2020 14:54:16 +0530 Subject: [PATCH 1/3] Implemented a JSON parser. This commit implements an LL(1) lexer and the parser for the JSON specification. Both the lexer and the parser are integrated in the same structure for the sake of simplicity. There is no buffering of tokens. Any token produced by the lexer is immediately consumed by the parser. In fact, the parser triggers the lexer to create a token only when it requires one. --- .vscode/settings.json | 6 +- CMakeLists.txt | 2 + include/jtk/support/json.h | 69 ++ source/jtk/collection/array/ByteArray.c | 1 + source/jtk/io/InputStreamHelper.c | 2 + source/jtk/support/json.c | 825 ++++++++++++++++++ .../com/jtk/collection/list/ArrayListTest.c | 2 +- .../source/com/jtk/collection/list/suite.json | 10 + 8 files changed, 915 insertions(+), 2 deletions(-) create mode 100644 include/jtk/support/json.h create mode 100644 source/jtk/support/json.c create mode 100644 test/source/com/jtk/collection/list/suite.json diff --git a/.vscode/settings.json b/.vscode/settings.json index 4fd5570..a069092 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -1,3 +1,7 @@ { - "C_Cpp.default.configurationProvider": "vector-of-bool.cmake-tools" + "C_Cpp.default.configurationProvider": "vector-of-bool.cmake-tools", + "files.associations": { + "inputstream.h": "c", + "type_traits": "c" + } } \ No newline at end of file diff --git a/CMakeLists.txt b/CMakeLists.txt index e33fd68..0ff63aa 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -56,6 +56,8 @@ set( ${PROJECT_SOURCE_DIR}/source/jtk/core/StringBuilder.c ${PROJECT_SOURCE_DIR}/source/jtk/core/System.c + ${PROJECT_SOURCE_DIR}/source/jtk/support/json.c + # Input/Output Module ${PROJECT_SOURCE_DIR}/source/jtk/io/DataInputStream.c diff --git a/include/jtk/support/json.h b/include/jtk/support/json.h new file mode 100644 index 0000000..a23aaeb --- /dev/null +++ b/include/jtk/support/json.h @@ -0,0 +1,69 @@ +/* + * Copyright 2017-2020 Samuel Rowe + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// Tuesday, June 23 2020 + +#ifndef JTK_SUPPORT_JSON_H +#define JTK_SUPPORT_JSON_H + +#include +#include +#include + +struct jtk_JsonError_t { + +}; + +typedef struct jtk_JsonError_t jtk_JsonError_t; + +enum jtk_JsonValueType_t { + JTK_JSON_VALUE_NULL, + JTK_JSON_VALUE_BOOLEAN, + JTK_JSON_VALUE_NUMBER, + JTK_JSON_VALUE_STRING, + JTK_JSON_VALUE_ARRAY, + JTK_JSON_VALUE_OBJECT +}; + +typedef enum jtk_JsonValueType_t jtk_JsonValueType_t; + +struct jtk_JsonValue_t { + jtk_JsonValueType_t type; + union { + bool boolean; + double number; + struct { + int32_t length; + uint8_t* bytes; + } string; + jtk_ArrayList_t* array; + jtk_HashMap_t* object; + }; +}; + +typedef struct jtk_JsonValue_t jtk_JsonValue_t; + +jtk_JsonValue_t* jtk_JsonValue_forObject(); +jtk_JsonValue_t* jtk_JsonValue_forArray(); +jtk_JsonValue_t* jtk_JsonValue_forString(const uint8_t* text, int32_t length); +jtk_JsonValue_t* jtk_JsonValue_forNumber(const uint8_t* text, int32_t length); +jtk_JsonValue_t* jtk_JsonValue_forTrue(); +jtk_JsonValue_t* jtk_JsonValue_forFalse(); + +jtk_JsonValue_t* jtk_parseJson(const uint8_t* sequence, int32_t size, jtk_JsonError_t* error); +uint8_t* jtk_toJson(jtk_JsonValue_t* value, int32_t* size); + +#endif /* JTK_SUPPORT_JSON_H */ \ No newline at end of file diff --git a/source/jtk/collection/array/ByteArray.c b/source/jtk/collection/array/ByteArray.c index 87a59a5..6212e83 100644 --- a/source/jtk/collection/array/ByteArray.c +++ b/source/jtk/collection/array/ByteArray.c @@ -17,6 +17,7 @@ // Tuesday, March 17, 2020 #include +#include /******************************************************************************* * ByteArray * diff --git a/source/jtk/io/InputStreamHelper.c b/source/jtk/io/InputStreamHelper.c index 31ff5b1..5086b0c 100644 --- a/source/jtk/io/InputStreamHelper.c +++ b/source/jtk/io/InputStreamHelper.c @@ -19,6 +19,8 @@ #include #include #include +#include +#include /******************************************************************************* * InputStreamHelper * diff --git a/source/jtk/support/json.c b/source/jtk/support/json.c new file mode 100644 index 0000000..5a06108 --- /dev/null +++ b/source/jtk/support/json.c @@ -0,0 +1,825 @@ +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define allocate(type, size) ((type*)malloc(sizeof (type) * (size))) +#define deallocate(object) free(object) + +/******************************************************************************* + * ErrorCode * + *******************************************************************************/ + +enum jtk_ErrorCode_t { + JTK_JSON_ERROR_NONE, + ERROR_UNTERMINATED_STRING, + ERROR_MALFORMED_UNICODE_CHARACTER_SEQUENCE, + ERROR_INVALID_ESCAPE_SEQUENCE, + ERROR_INVALID_KEYWORD, + ERROR_UNKNOWN_CHARACTER, + JTK_JSON_ERROR_UNEXPECTED_TOKEN, + JTK_JSON_ERROR_EXPECTED_OBJECT_OR_ARRAY +}; + +typedef enum jtk_ErrorCode_t jtk_ErrorCode_t; + +/******************************************************************************* + * TokenType * + *******************************************************************************/ + +enum TokenType { + TOKEN_UNKNOWN, + TOKEN_WHITESPACE, + TOKEN_NEWLINE, + TOKEN_EOF, + TOKEN_LEFT_BRACE, + TOKEN_RIGHT_BRACE, + TOKEN_LEFT_BRACKET, + TOKEN_RIGHT_BRACKET, + TOKEN_COMMA, + TOKEN_COLON, + TOKEN_TRUE, + TOKEN_FALSE, + TOKEN_NULL, + TOKEN_STRING, + TOKEN_NUMBER, +}; + +typedef enum TokenType TokenType; + +static char tokenNames[][25] = { + "", + "", + "", + "", + "{", + "}", + "[", + "]", + "," + ":", + "true", + "false", + "null", + "", + "" +}; + +/******************************************************************************* + * TokenChannel * + *******************************************************************************/ + +enum TokenChannel { + TOKEN_CHANNEL_DEFAULT, + TOKEN_CHANNEL_HIDDEN, +}; + +typedef enum TokenChannel TokenChannel; + +/******************************************************************************* + * Token * + *******************************************************************************/ + +/** + * A token represents the smallest entity that appears + * in a source code. Each token has two primary attributes: + * a token type (symbol category) and the text associated + * with it. + */ +struct Token { + TokenChannel channel; + TokenType type; + uint8_t* text; + int32_t length; + int32_t startIndex; + int32_t stopIndex; + int32_t startLine; + int32_t stopLine; + int32_t startColumn; + int32_t stopColumn; + int32_t index; + const char* file; +}; + +typedef struct Token Token; + +Token* newToken(TokenChannel channel, TokenType type, + const uint8_t* text, int32_t length, int32_t startIndex, int32_t stopIndex, + int32_t startLine, int32_t stopLine, int32_t startColumn, int32_t stopColumn, + const char* file); + +void deleteToken(Token* token); + +/******************************************************************************* + * Parser * + *******************************************************************************/ + +struct Parser { + + /** + * The input stream of characters. + */ + uint8_t* input; + + /** + * The size of the input stream. + */ + int32_t inputSize; + + /** + * The character at LA(1), this field is always updated + * by k_Lexer_consume(...). + */ + int32_t la1; + + /** + * The input cursor under which the current look ahead + * character is located. + */ + int32_t index; + + /** + * The zero based line number at which the lexer is currently + * reading. + */ + int32_t line; + + /** + * The zero based column index within the line, where the + * lexer is currently reading. + */ + int32_t column; + + /** + * The starting index of the current token in the input + * stream, inclusive. + */ + int32_t startIndex; + + /** + * The zero based line number at which the current + * token begins, inclusive. + */ + int32_t startLine; + + /** + * The zero based column at which the current token + * begins. It is always relative to the starting line. + */ + int32_t startColumn; + + /** + * Determines whether the lexer has reached the end of + * the input stream. + */ + int32_t hitEndOfStream:1; + + /** + * The token that was most recently emitted. + */ + Token* token; + + /** + * The channel on which the next recognized + * token will be created on. + */ + TokenChannel channel; + + /** + * The text consumed so far to recognize the next + * token. + */ + jtk_StringBuilder_t* text; + + /** + * The token type of the next recognized token. + */ + TokenType type; + + const char* file; + + Token* lt1; +}; + +typedef struct Parser Parser; + +/** + * The primary interface to the lexer. It uses the lookahead + * character (character under the input cursor) to route control + * flow to the appropriate recognition function. And returns the + * next token recognized from the input stream. + */ +static Token* nextToken(Parser* parser); + +Parser* parserNew(); +void parserDelete(Parser* parser); +jtk_JsonValue_t* parse(Parser* parser); +jtk_JsonValue_t* parseObject(Parser* parser); +jtk_JsonValue_t* parseArray(Parser* parser); +jtk_JsonValue_t* parseValue(Parser* parser); + +static void read(Parser* parser); +static Token* createToken(Parser* parser); +static void onNewLine(Parser* parser); +static void decimalIntegerLiteral(Parser* parser); + +Parser* newParser(const uint8_t* input, int32_t inputSize) { + /* The constructor invokes consume() to initialize + * the LA(1) character. Therefore, we assign negative values + * to certain attributes. + */ + + Parser* parser = allocate(Parser, 1); + + parser->input = input; + parser->inputSize = inputSize; + parser->la1 = 0; + parser->index = -1; + parser->line = 1; + parser->column = -1; + parser->startIndex = 0; + parser->startLine = 0; + parser->startColumn = 0; + parser->hitEndOfStream = false; + parser->token = NULL; + parser->channel = TOKEN_CHANNEL_DEFAULT; + parser->text = jtk_StringBuilder_new(); + parser->type = TOKEN_UNKNOWN; + + read(parser); + + parser->lt1 = nextToken(parser); + + return parser; +} + +/******************************************************************************* + * Token * + *******************************************************************************/ + +Token* newToken( + TokenChannel channel, + TokenType type, + const uint8_t* text, + int32_t length, + int32_t startIndex, + int32_t stopIndex, + int32_t startLine, + int32_t stopLine, + int32_t startColumn, + int32_t stopColumn, + const char* file) { + Token* token = allocate(Token, 1); + token->channel = channel; + token->type = type; + token->text = jtk_CString_newEx(text, length); + token->length = length; // This is the length of the text representation! + token->startIndex = startIndex; + token->stopIndex = stopIndex; + token->startLine = startLine; + token->stopLine = stopLine; + token->startColumn = startColumn; + token->stopColumn = stopColumn; + token->file = file; + + return token; +} + +void deleteToken(Token* token) { + jtk_Assert_assertObject(token, "The specified token is null."); + + jtk_CString_delete(token->text); + deallocate(token); +} + +/******************************************************************************* + * Lexer * + *******************************************************************************/ + +#define isEscapeSequence(c) \ + (c == 'b') || \ + (c == 'f') || \ + (c == 'n') || \ + (c == 'r') || \ + (c == 't') || \ + (c == '\\') || \ + (c == '\"') || \ + (c == '\'') + +#define isLetter(c) ((c >= 'a') && (c <= 'z')) || ((c >= 'A') && (c <= 'Z')) + + +#define isHexadecimalDigit(c) \ + ((c >= '0') && (c <= '9')) || ((c >= 'a') && (c <= 'f')) || ((c >= 'A') && (c <= 'F')) + +/* Destructor */ + +void lexerDelete(Parser* parser) { + jtk_Assert_assertObject(parser, "The specified lexer is null."); + + jtk_StringBuilder_delete(parser->text); + deallocate(parser); +} + +/* Create Token */ + +Token* createToken(Parser* parser) { + int32_t length = jtk_StringBuilder_getSize(parser->text); + length = (parser->type == TOKEN_STRING)? length - 2 : length; + + uint8_t* text = parser->text->m_value; + text = jtk_CString_newEx((parser->type == TOKEN_STRING)? text + 1 : text, + length); + + + Token* token = + newToken( + parser->channel, + parser->type, + text, + length, + parser->startIndex, /* inclusive */ + parser->index, /* exclusive */ + parser->startLine, /* inclusive */ + parser->line, /* inclusive */ + parser->startColumn, /* inclusive */ + parser->column, /* inclusive */ + parser->file + ); + + /* Destroy the text; not required anymore. */ + jtk_CString_delete(text); + + return token; +} + +void onNewLine(Parser* parser) { + parser->line++; + parser->column = 1; +} + +bool isInputStart(Parser* parser) { + return (parser->startLine == 0) && (parser->startColumn == 0); +} + +void read(Parser* parser) { + jtk_StringBuilder_appendCodePoint(parser->text, parser->la1); + + parser->index++; + parser->column++; + /* NOTE: We could have used parser->index >= "length of input stream" + * Unfortunately, the flexible design of both the lexer and input stream fails + * to provide a method to determine the stream length in advance. + * + * NOTE: The getAvailable() function is only temporarily used. However, the + * working of this function is not finalized. Therefore, the following expression + * may be subjected to changes. + */ + if (parser->index >= parser->inputSize) { + parser->la1 = TOKEN_EOF; + } + else { + parser->la1 = parser->input[parser->index]; + } +} + +/* + * Check for a buffered token. If found, remove it from the buffer + * and return it to the user. + */ +Token* nextToken(Parser* parser) { + jtk_Assert_assertObject(parser, "The specified lexer is null."); + + jtk_ErrorCode_t errorCode = JTK_JSON_ERROR_NONE; + + /* We don't exit the loop until + * -- We have a token. + * -- We have reached the end of the stream. + * -- We have encountered an error. (Interestingly, this condition + * is not explicitly checked because errorneous token recognition + * too generate tokens!) + */ + loopEntry: { + parser->token = NULL; + parser->type = TOKEN_UNKNOWN; + jtk_StringBuilder_clear(parser->text); + parser->channel = TOKEN_CHANNEL_DEFAULT; + parser->startIndex = parser->index; + parser->startLine = parser->line; + parser->startColumn = parser->column; + + if (parser->index == parser->inputSize) { + parser->type = TOKEN_EOF; + parser->hitEndOfStream = true; + } + else { + + switch (parser->la1) { + + + case ' ' : { + if (parser->la1 == ' ') { + do { + read(parser); + } + while (parser->la1 == ' '); + } + goto loopEntry; + } + + case '\r' : + case '\n' : { + if (parser->la1 == '\r') { + read(parser); + if (parser->la1 == '\n') { + read(parser); + onNewLine(parser); + } + } + else { + read(parser); + onNewLine(parser); + } + + goto loopEntry; + } + + /* COMMA + * : ',' + * ; + */ + case ',': { + read(parser); + parser->type = TOKEN_COMMA; + break; + } + + /* + * COLON + * : ':' + * ; + */ + case ':': { + read(parser); + parser->type = TOKEN_COLON; + + break; + } + + /* LEFT_BRACE + * : '{' + * ; + */ + case '{': { + read(parser); + parser->type = TOKEN_LEFT_BRACE; + break; + } + + /* RIGHT_BRACE + * : '}' + * ; + */ + case '}' : { + /* Consume and discard the '}' character. */ + read(parser); + /* The lexer has recognized the '}' token. */ + parser->type = TOKEN_RIGHT_BRACE; + break; + } + + /* LEFT_SQUARE_BRACKET + * : '[' + * ; + */ + case '[' : { + /* Consume and discard the '[' character. */ + read(parser); + /* The lexer has recognized the '[' token. */ + parser->type = TOKEN_LEFT_BRACKET; + break; + } + + /* RIGHT_SQUARE_BRACKET + * : ']' + * ; + */ + case ']' : { + /* Consume and discard the ']' character. */ + read(parser); + /* The lexer has recognized the ']' token. */ + parser->type = TOKEN_RIGHT_BRACKET; + break; + } + + /* STRING + * : '"' STRING_CHARACTER* '"' + * ; + * + * STRING_CHARACTER + * : ~["'\\] + * | ESCAPE_SEQUENCE + * ; + * + * ESCAPE_SEQUENCE + * : '\\' [btnfr"'\\] + * | '\\' 'u' HEXADECIMAL_DIGIT HEXADECIMAL_DIGIT HEXADECIMAL_DIGIT HEXADECIMAL_DIGIT + * ; + * + * HEXADECIMAL_DIGIT + * : [0-9a-fA-F] + * ; + */ + case '"': { + read(parser); + + while (parser->la1 != '"') { + if ((parser->index == parser->inputSize) || (parser->la1 == '\n')) { + errorCode = ERROR_UNTERMINATED_STRING; + break; + } + else if (parser->la1 == '\\') { + read(parser); + + if (isEscapeSequence(parser->la1)) { + read(parser); + } + else if (parser->la1 == 'u') { + read(parser); + + int32_t i; + for (i = 0; i < 4; i++) { + if (isHexadecimalDigit(parser->la1)) { + read(parser); + } + else { + errorCode = ERROR_MALFORMED_UNICODE_CHARACTER_SEQUENCE; + break; + } + } + } + else { + errorCode = ERROR_INVALID_ESCAPE_SEQUENCE; + read(parser); + } + } + else { + read(parser); + } + } + + if (parser->la1 == '"') { + read(parser); + } + // else { + // Most likely, we encountered an immature end of line or stream. + // } + + parser->type = TOKEN_STRING; + break; + } + + default: { + /* KEYWORD + * : LETTER LETTER* + * ; + */ + if (isLetter(parser->la1)) { + read(parser); + + while (isLetter(parser->la1)) { + read(parser); + } + + uint8_t* text = parser->text->m_value; // jtk_StringBuilder_toCString(parser->text); + int32_t length = parser->text->m_size; // parser->index - parser->startIndex; + + if (jtk_CString_equals(text, length, tokenNames[(int32_t)TOKEN_FALSE], 5)) { + parser->type = TOKEN_FALSE; + } + else if (jtk_CString_equals(text, length, tokenNames[(int32_t)TOKEN_TRUE], 4)) { + parser->type = TOKEN_TRUE; + } + else if (jtk_CString_equals(text, length, tokenNames[(int32_t)TOKEN_NULL], 4)) { + parser->type = TOKEN_NULL; + } + else { + errorCode = ERROR_INVALID_KEYWORD; + } + } + // else if (isDecimalDigit(parser->la1)) { + // TODO + // } + else { + errorCode = ERROR_UNKNOWN_CHARACTER; + parser->type = TOKEN_UNKNOWN; + read(parser); + } + break; + } + } + } + } + + Token* newToken = createToken(parser); + if (errorCode != JTK_JSON_ERROR_NONE) { + handleError(parser, newToken, TOKEN_UNKNOWN, errorCode); + } + + return newToken; +} + +Token* consume(Parser* parser) { + Token* temporary = parser->lt1; + parser->lt1 = nextToken(parser); + return temporary; +} + +Token* match(Parser* parser, TokenType type) { + jtk_Assert_assertObject(parser, "The specified parser is null."); + + Token* result = parser->lt1; + if (parser->lt1->type == type) { + /* Do not consume the end-of-stream token. */ + if (parser->lt1->type != TOKEN_EOF) { + consume(parser); + } + } + else { + handleError(parser, parser->lt1, type, JTK_JSON_ERROR_UNEXPECTED_TOKEN); + } + return result; +} + +jtk_JsonValue_t* jtk_parseJson(const uint8_t* sequence, int32_t size, + jtk_JsonError_t* error) { + jtk_JsonValue_t* result = NULL; + Parser* parser = newParser(sequence, size); + if (parser->lt1->type == TOKEN_LEFT_BRACKET) { + result = parseArray(parser); + } + else if (parser->lt1->type == TOKEN_LEFT_BRACE) { + result = parseObject(parser); + } + else { + handleError(parser, parser->lt1, TOKEN_UNKNOWN, JTK_JSON_ERROR_EXPECTED_OBJECT_OR_ARRAY); + result = NULL; + } + match(parser, TOKEN_EOF); + return result; +} + +jtk_JsonValue_t* parseObject(Parser* parser) { + match(parser, TOKEN_LEFT_BRACE); + + jtk_JsonValue_t* result = jtk_JsonValue_forObject(); + + if (parser->lt1->type == TOKEN_STRING) { + Token* key = consume(parser); + match(parser, TOKEN_COLON); + jtk_JsonValue_t* value = parseValue(parser); + jtk_HashMap_put(result->object, key->text, value); + + while (parser->lt1->type == TOKEN_COMMA) { + consume(parser); + Token* key = match(parser, TOKEN_STRING); + match(parser, TOKEN_COLON); + jtk_JsonValue_t* value = parseValue(parser); + jtk_HashMap_put(result->object, key->text, value); + } + } + + match(parser, TOKEN_RIGHT_BRACE); + + return result; +} + +#define followValue(token) \ + (token == TOKEN_LEFT_BRACE) || \ + (token == TOKEN_LEFT_BRACKET) || \ + (token == TOKEN_STRING) || \ + (token == TOKEN_NUMBER) || \ + (token == TOKEN_TRUE) || \ + (token == TOKEN_FALSE) || \ + (token == TOKEN_NULL) + +jtk_JsonValue_t* parseArray(Parser* parser) { + match(parser, TOKEN_LEFT_BRACKET); + + jtk_JsonValue_t* value = jtk_JsonValue_forArray(); + if (followValue(parser->lt1->type)) { + jtk_JsonValue_t* result = parseValue(parser); + jtk_ArrayList_add(value->array, result); + + while (parser->lt1->type == TOKEN_COMMA) { + consume(parser); + result = parseValue(parser); + jtk_ArrayList_add(value->array, result); + } + } + + match(parser, TOKEN_RIGHT_BRACKET); + + return value; +} + +jtk_JsonValue_t* parseValue(Parser* parser) { + TokenType type = parser->lt1->type; + jtk_JsonValue_t* result = NULL; + switch (type) { + case TOKEN_STRING: { + Token* literal = parser->lt1; + result = jtk_JsonValue_forString(literal->text, literal->length); + consume(parser); + break; + } + + case TOKEN_NUMBER: { + Token* literal = consume(parser); + result = jtk_JsonValue_forNumber(literal->text, literal->length); + break; + } + + case TOKEN_TRUE: { + consume(parser); + result = jtk_JsonValue_forTrue(); + break; + } + + case TOKEN_FALSE: { + consume(parser); + result = jtk_JsonValue_forFalse(); + } + + case TOKEN_NULL: { + result = jtk_JsonValue_forTrue(); + consume(parser); + break; + } + + case TOKEN_LEFT_BRACKET: { + result = parseArray(parser); + break; + } + + case TOKEN_LEFT_BRACE: { + result = parseObject(parser); + } + } + return result; +} + +jtk_JsonValue_t* newJsonValue(jtk_JsonValueType_t type) { + jtk_JsonValue_t* value = allocate(jtk_JsonValue_t, 1); + value->type = type; + return value; +} + +jtk_JsonValue_t* jtk_JsonValue_forObject() { + jtk_ObjectAdapter_t* adapter = jtk_CStringObjectAdapter_getInstance(); + jtk_JsonValue_t* value = newJsonValue(JTK_JSON_VALUE_OBJECT); + value->object = jtk_HashMap_new(adapter, NULL); + return value; +} + +jtk_JsonValue_t* jtk_JsonValue_forArray() { + jtk_JsonValue_t* value = newJsonValue(JTK_JSON_VALUE_ARRAY); + value->array = jtk_ArrayList_new(); + return value; +} + +jtk_JsonValue_t* jtk_JsonValue_forString(const uint8_t* text, int32_t length) { + jtk_JsonValue_t* value = newJsonValue(JTK_JSON_VALUE_STRING); + value->string.bytes = jtk_CString_newEx(text, length); + value->string.length = length; + return value; +} + +jtk_JsonValue_t* jtk_JsonValue_forNumber(const uint8_t* text, int32_t length) { + jtk_JsonValue_t* value = newJsonValue(JTK_JSON_VALUE_NUMBER); + value->number = 0; // TODO + return value; +} + +jtk_JsonValue_t* jtk_JsonValue_forFalse() { + jtk_JsonValue_t* value = newJsonValue(JTK_JSON_VALUE_BOOLEAN); + value->number = false; + return value; +} + +jtk_JsonValue_t* jtk_JsonValue_forTrue() { + jtk_JsonValue_t* value = newJsonValue(JTK_JSON_VALUE_BOOLEAN); + value->number = true; + return value; +} + +void handleError() { + printf("Errro!\n"); +} \ No newline at end of file diff --git a/test/source/com/jtk/collection/list/ArrayListTest.c b/test/source/com/jtk/collection/list/ArrayListTest.c index 64d71b6..fe4a51d 100644 --- a/test/source/com/jtk/collection/list/ArrayListTest.c +++ b/test/source/com/jtk/collection/list/ArrayListTest.c @@ -112,6 +112,6 @@ void jtk_ArrayListTest_testAdd(jtk_TestCase_t* testCase, jtk_ArrayListBundle_t* for (i = 0; i < k; i++) { jtk_TestCase_assertEquals_i(testCase, i, (intptr_t)jtk_ArrayList_getValue(bundle->m_list, i)); } - + printf("Size: %d, Capacity: %d\n", jtk_ArrayList_getSize(bundle->m_list), jtk_ArrayList_getCapacity(bundle->m_list)); } \ No newline at end of file diff --git a/test/source/com/jtk/collection/list/suite.json b/test/source/com/jtk/collection/list/suite.json new file mode 100644 index 0000000..8c7b7df --- /dev/null +++ b/test/source/com/jtk/collection/list/suite.json @@ -0,0 +1,10 @@ +[ + { + "text": "Ensure that the list is not empty.", + "id": "testNotEmpty", + "setUp": "setUpList", + "destroy": "destroy", + "pending": true, + "tags": [ "checker", "coder" ] + } +] \ No newline at end of file From 4bc50d31f95a978cef8234861ceffd8f9e0ba787 Mon Sep 17 00:00:00 2001 From: Samuel Rowe Date: Wed, 24 Jun 2020 18:21:15 +0530 Subject: [PATCH 2/3] Implemented a utility function to convert JSON values to strings. --- include/jtk/support/json.h | 18 +++++- source/jtk/support/json.c | 119 ++++++++++++++++++++++++++++++++++++- 2 files changed, 135 insertions(+), 2 deletions(-) diff --git a/include/jtk/support/json.h b/include/jtk/support/json.h index a23aaeb..364b312 100644 --- a/include/jtk/support/json.h +++ b/include/jtk/support/json.h @@ -64,6 +64,22 @@ jtk_JsonValue_t* jtk_JsonValue_forTrue(); jtk_JsonValue_t* jtk_JsonValue_forFalse(); jtk_JsonValue_t* jtk_parseJson(const uint8_t* sequence, int32_t size, jtk_JsonError_t* error); -uint8_t* jtk_toJson(jtk_JsonValue_t* value, int32_t* size); + +/** + * Sometimes you may want to generate JSON instead of parsing it. In such cases, + * you can manually create `jtk_JsonValue_t` objects to create your data and + * invoke `jtk_toJson()`. It will convert the objects to a JSON string. + * + * For example, consider in a text editor application you store all the user + * preferences in a JSON file. Therefore, you will load that file during application + * startup. The preferences will be stored in a `jtk_JsonValue_t` object. When + * your user updates the preferences from your text editor, you need to save it + * to the JSON file. Instead of manually constructing a JSON string, you can just + * edit the `jtk_JsonValue_t` object you initially loaded. When the user wants + * to save the preferences, you simply invoke `jtk_toJson()` and get an equivalent + * string for the user's preferences. You can then write that string to the JSON + * file. + */ +uint8_t* jtk_toJson(jtk_JsonValue_t* value, int32_t* size, bool pretty); #endif /* JTK_SUPPORT_JSON_H */ \ No newline at end of file diff --git a/source/jtk/support/json.c b/source/jtk/support/json.c index 5a06108..f0efde6 100644 --- a/source/jtk/support/json.c +++ b/source/jtk/support/json.c @@ -230,6 +230,11 @@ static Token* createToken(Parser* parser); static void onNewLine(Parser* parser); static void decimalIntegerLiteral(Parser* parser); +static void indent(jtk_StringBuilder_t* builder, int32_t depth); +static void toString(jtk_StringBuilder_t* builder, jtk_JsonValue_t* value); +static void toPrettyString(jtk_StringBuilder_t* builder, jtk_JsonValue_t* value, int32_t depth, bool member); + + Parser* newParser(const uint8_t* input, int32_t inputSize) { /* The constructor invokes consume() to initialize * the LA(1) character. Therefore, we assign negative values @@ -822,4 +827,116 @@ jtk_JsonValue_t* jtk_JsonValue_forTrue() { void handleError() { printf("Errro!\n"); -} \ No newline at end of file +} + +void indent(jtk_StringBuilder_t* builder, int32_t depth) { + jtk_StringBuilder_multiply_z(builder, " ", 4, depth); +} + +void toString(jtk_StringBuilder_t* builder, jtk_JsonValue_t* value) { + if (value->type == JTK_JSON_VALUE_OBJECT) { + jtk_StringBuilder_appendCodePoint(builder, '{'); + jtk_Iterator_t* iterator = jtk_HashMap_getEntryIterator(value->object); + bool first = true; + while (jtk_Iterator_hasNext(iterator)) { + jtk_HashMapEntry_t* entry = (jtk_HashMapEntry_t*)jtk_Iterator_getNext(iterator); + + if (!first) { + jtk_StringBuilder_appendCodePoint(builder, ','); + } + + jtk_StringBuilder_appendCodePoint(builder, '"'); + jtk_StringBuilder_append_z(builder, (char*)entry->m_key); + jtk_StringBuilder_appendCodePoint(builder, ':'); + toString(builder, (jtk_JsonValue_t*)entry->m_value); + + first = false; + } + jtk_StringBuilder_appendCodePoint(builder, '}'); + } + else if (value->type == JTK_JSON_VALUE_ARRAY) { + jtk_StringBuilder_appendCodePoint(builder, '['); + int32_t limit = jtk_ArrayList_getSize(value->array); + int32_t i; + for (i = 0; i < limit; i++) { + jtk_JsonValue_t* element = (jtk_JsonValue_t*)jtk_ArrayList_getValue(value->array, i); + toString(builder, element); + + if (i + 1 < limit) { + jtk_StringBuilder_appendCodePoint(builder, ','); + } + } + jtk_StringBuilder_appendCodePoint(builder, ']'); + } + else if (value->type == JTK_JSON_VALUE_STRING) { + jtk_StringBuilder_appendCodePoint(builder, '"'); + jtk_StringBuilder_appendEx_z(builder, value->string.bytes, value->string.length); + jtk_StringBuilder_appendCodePoint(builder, '"'); + } +} + +void toPrettyString(jtk_StringBuilder_t* builder, jtk_JsonValue_t* value, + int32_t depth, bool member) { + if (value->type == JTK_JSON_VALUE_OBJECT) { + jtk_StringBuilder_appendEx_z(builder, "{\n", 2); + jtk_Iterator_t* iterator = jtk_HashMap_getEntryIterator(value->object); + bool first = true; + while (jtk_Iterator_hasNext(iterator)) { + jtk_HashMapEntry_t* entry = (jtk_HashMapEntry_t*)jtk_Iterator_getNext(iterator); + + if (!first) { + jtk_StringBuilder_appendEx_z(builder, ",\n", 2); + } + + indent(builder, depth + 1); + jtk_StringBuilder_appendCodePoint(builder, '"'); + jtk_StringBuilder_append_z(builder, (char*)entry->m_key); + jtk_StringBuilder_appendEx_z(builder, "\": ", 3); + toPrettyString(builder, (jtk_JsonValue_t*)entry->m_value, depth + 1, true); + + first = false; + } + jtk_StringBuilder_appendCodePoint(builder, '\n'); + indent(builder, depth); + jtk_StringBuilder_appendCodePoint(builder, '}'); + } + else if (value->type == JTK_JSON_VALUE_ARRAY) { + jtk_StringBuilder_appendEx_z(builder, "[\n", 2); + int32_t limit = jtk_ArrayList_getSize(value->array); + int32_t i; + for (i = 0; i < limit; i++) { + jtk_JsonValue_t* element = (jtk_JsonValue_t*)jtk_ArrayList_getValue(value->array, i); + indent(builder, depth + 1); + toPrettyString(builder, element, depth + 1, true); + + if (i + 1 < limit) { + jtk_StringBuilder_appendCodePoint(builder, ','); + } + jtk_StringBuilder_appendCodePoint(builder, '\n'); + } + indent(builder, depth); + jtk_StringBuilder_appendCodePoint(builder, ']'); + } + else if (value->type == JTK_JSON_VALUE_STRING) { + jtk_StringBuilder_appendCodePoint(builder, '"'); + jtk_StringBuilder_appendEx_z(builder, (char*)value->string.bytes, value->string.length); + jtk_StringBuilder_appendCodePoint(builder, '"'); + } + + if (!member) { + jtk_StringBuilder_appendCodePoint(builder, '\n'); + } +} + +uint8_t* jtk_toJson(jtk_JsonValue_t* value, int32_t* size, bool pretty) { + jtk_StringBuilder_t* builder = jtk_StringBuilder_new(); + if (pretty) { + toPrettyString(builder, value, 0, false); + } + else { + toString(builder, value); + } + uint8_t* result = jtk_StringBuilder_toCString(builder, size); + jtk_StringBuilder_delete(builder); + return result; +} From 04c72ca67a7e0bde4668582d997f5294d5c01a42 Mon Sep 17 00:00:00 2001 From: Samuel Rowe Date: Thu, 25 Jun 2020 10:42:06 +0530 Subject: [PATCH 3/3] Implemented a destructor for JSON values. The destructor recursively traverses the object tree releasing memory accordingly. Since, key-value pairs are stored in a hash map, the key needs to persist longer than the lifetime of a token, which is where we derive the key from. As of now, tokens are not freed. In the future, we need to implement a recyclable token that is destroyed along with the parser. Removed the `TokenChannel` enumeration because the tokens are always generated on the default channel, given we don't generate tokens for whitespace. --- include/jtk/support/json.h | 1 + source/jtk/support/json.c | 67 ++++++++++++++++++++++---------------- 2 files changed, 40 insertions(+), 28 deletions(-) diff --git a/include/jtk/support/json.h b/include/jtk/support/json.h index 364b312..76992c3 100644 --- a/include/jtk/support/json.h +++ b/include/jtk/support/json.h @@ -62,6 +62,7 @@ jtk_JsonValue_t* jtk_JsonValue_forString(const uint8_t* text, int32_t length); jtk_JsonValue_t* jtk_JsonValue_forNumber(const uint8_t* text, int32_t length); jtk_JsonValue_t* jtk_JsonValue_forTrue(); jtk_JsonValue_t* jtk_JsonValue_forFalse(); +void jtk_JsonValue_delete(jtk_JsonValue_t* value); jtk_JsonValue_t* jtk_parseJson(const uint8_t* sequence, int32_t size, jtk_JsonError_t* error); diff --git a/source/jtk/support/json.c b/source/jtk/support/json.c index f0efde6..2dddf94 100644 --- a/source/jtk/support/json.c +++ b/source/jtk/support/json.c @@ -72,17 +72,6 @@ static char tokenNames[][25] = { "" }; -/******************************************************************************* - * TokenChannel * - *******************************************************************************/ - -enum TokenChannel { - TOKEN_CHANNEL_DEFAULT, - TOKEN_CHANNEL_HIDDEN, -}; - -typedef enum TokenChannel TokenChannel; - /******************************************************************************* * Token * *******************************************************************************/ @@ -94,7 +83,6 @@ typedef enum TokenChannel TokenChannel; * with it. */ struct Token { - TokenChannel channel; TokenType type; uint8_t* text; int32_t length; @@ -110,7 +98,7 @@ struct Token { typedef struct Token Token; -Token* newToken(TokenChannel channel, TokenType type, +Token* newToken(TokenType type, const uint8_t* text, int32_t length, int32_t startIndex, int32_t stopIndex, int32_t startLine, int32_t stopLine, int32_t startColumn, int32_t stopColumn, const char* file); @@ -179,19 +167,13 @@ struct Parser { * Determines whether the lexer has reached the end of * the input stream. */ - int32_t hitEndOfStream:1; + int32_t hitEOF:1; /** * The token that was most recently emitted. */ Token* token; - /** - * The channel on which the next recognized - * token will be created on. - */ - TokenChannel channel; - /** * The text consumed so far to recognize the next * token. @@ -252,9 +234,8 @@ Parser* newParser(const uint8_t* input, int32_t inputSize) { parser->startIndex = 0; parser->startLine = 0; parser->startColumn = 0; - parser->hitEndOfStream = false; + parser->hitEOF = false; parser->token = NULL; - parser->channel = TOKEN_CHANNEL_DEFAULT; parser->text = jtk_StringBuilder_new(); parser->type = TOKEN_UNKNOWN; @@ -270,7 +251,6 @@ Parser* newParser(const uint8_t* input, int32_t inputSize) { *******************************************************************************/ Token* newToken( - TokenChannel channel, TokenType type, const uint8_t* text, int32_t length, @@ -282,7 +262,6 @@ Token* newToken( int32_t stopColumn, const char* file) { Token* token = allocate(Token, 1); - token->channel = channel; token->type = type; token->text = jtk_CString_newEx(text, length); token->length = length; // This is the length of the text representation! @@ -346,7 +325,6 @@ Token* createToken(Parser* parser) { Token* token = newToken( - parser->channel, parser->type, text, length, @@ -415,14 +393,13 @@ Token* nextToken(Parser* parser) { parser->token = NULL; parser->type = TOKEN_UNKNOWN; jtk_StringBuilder_clear(parser->text); - parser->channel = TOKEN_CHANNEL_DEFAULT; parser->startIndex = parser->index; parser->startLine = parser->line; parser->startColumn = parser->column; if (parser->index == parser->inputSize) { parser->type = TOKEN_EOF; - parser->hitEndOfStream = true; + parser->hitEOF = true; } else { @@ -641,6 +618,7 @@ Token* nextToken(Parser* parser) { return newToken; } +// TODO: Recycle the token object and delete it along with the parser. Token* consume(Parser* parser) { Token* temporary = parser->lt1; parser->lt1 = nextToken(parser); @@ -688,9 +666,10 @@ jtk_JsonValue_t* parseObject(Parser* parser) { if (parser->lt1->type == TOKEN_STRING) { Token* key = consume(parser); + uint8_t* keyText = jtk_CString_newEx(key->text, key->length); match(parser, TOKEN_COLON); jtk_JsonValue_t* value = parseValue(parser); - jtk_HashMap_put(result->object, key->text, value); + jtk_HashMap_put(result->object, keyText, value); while (parser->lt1->type == TOKEN_COMMA) { consume(parser); @@ -825,6 +804,36 @@ jtk_JsonValue_t* jtk_JsonValue_forTrue() { return value; } +void jtk_JsonValue_delete(jtk_JsonValue_t* value) { + if (value != NULL) { + if (value->type == JTK_JSON_VALUE_STRING) { + jtk_CString_delete(value->string.bytes); + } + else if (value->type == JTK_JSON_VALUE_ARRAY) { + int32_t limit = jtk_ArrayList_getSize(value->array); + int32_t i; + for (i = 0; i < limit; i++) { + jtk_JsonValue_t* temporary = + (jtk_JsonValue_t*)jtk_ArrayList_getValue(value->array, i); + jtk_JsonValue_delete(temporary); + } + jtk_ArrayList_delete(value->array); + } + else if (value->type == JTK_JSON_VALUE_OBJECT) { + jtk_Iterator_t* iterator = jtk_HashMap_getEntryIterator(value->object); + while (jtk_Iterator_hasNext(iterator)) { + jtk_HashMapEntry_t* entry = + (jtk_HashMapEntry_t*)jtk_Iterator_getNext(iterator); + jtk_CString_delete((uint8_t*)entry->m_key); + jtk_JsonValue_delete((jtk_JsonValue_t*)entry->m_value); + } + jtk_Iterator_delete(iterator); + jtk_HashMap_delete(value->object); + } + deallocate(value); + } +} + void handleError() { printf("Errro!\n"); } @@ -852,6 +861,7 @@ void toString(jtk_StringBuilder_t* builder, jtk_JsonValue_t* value) { first = false; } + jtk_Iterator_delete(iterator); jtk_StringBuilder_appendCodePoint(builder, '}'); } else if (value->type == JTK_JSON_VALUE_ARRAY) { @@ -896,6 +906,7 @@ void toPrettyString(jtk_StringBuilder_t* builder, jtk_JsonValue_t* value, first = false; } + jtk_Iterator_delete(iterator); jtk_StringBuilder_appendCodePoint(builder, '\n'); indent(builder, depth); jtk_StringBuilder_appendCodePoint(builder, '}');