From 967be9e750221ab2ab783f95df79bb26d290a45e Mon Sep 17 00:00:00 2001 From: Martial Simon Date: Mon, 15 Sep 2025 01:07:58 +0200 Subject: add: added projects --- myfind/ast_evaluation/parser.c | 258 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 258 insertions(+) create mode 100644 myfind/ast_evaluation/parser.c (limited to 'myfind/ast_evaluation/parser.c') diff --git a/myfind/ast_evaluation/parser.c b/myfind/ast_evaluation/parser.c new file mode 100644 index 0000000..66ab59b --- /dev/null +++ b/myfind/ast_evaluation/parser.c @@ -0,0 +1,258 @@ +#include "parser.h" + +#include +#include +#include +#include +#include + +#include "memory.h" + +enum my_token_type +{ + TOKEN_PLUS = 0, + TOKEN_MINUS, + TOKEN_MULTIPLY, + TOKEN_DIVIDE, + TOKEN_LEFT_PARENTHESIS, + TOKEN_RIGHT_PARENTHESIS, + TOKEN_NUMBER +}; + +struct my_token +{ + enum my_token_type type; + int value; +}; + +struct my_tokens +{ + struct my_token *data; + unsigned length; +}; + +struct token_model +{ + const char *str; + enum my_token_type token_type; +}; + +static struct my_token parse_token(const char *str) +{ + static struct token_model exprs[] = { { "+", TOKEN_PLUS }, + { "-", TOKEN_MINUS }, + { "*", TOKEN_MULTIPLY }, + { "/", TOKEN_DIVIDE }, + { "(", TOKEN_LEFT_PARENTHESIS }, + { ")", TOKEN_RIGHT_PARENTHESIS } }; + + struct my_token token = { TOKEN_NUMBER, 0 }; + for (unsigned i = 0; i < sizeof(exprs) / sizeof(*exprs); ++i) + { + if (!strcmp(str, exprs[i].str)) + { + token.type = exprs[i].token_type; + return token; + } + } + + if (sscanf(str, "%d", &token.value) == 1) + return token; + + errx(1, "Token not recognized: %s", str); +} + +static const char *token_str(struct my_token *token) +{ + static const char *token_strs[] = { "+", "-", "*", "/", "(", ")" }; + static char number_str[11]; + + if (token->type != TOKEN_NUMBER) + return token_strs[token->type]; + + sprintf(number_str, "%d", token->value); + return number_str; +} + +static struct my_expr *new_expr(void) +{ + return my_malloc(sizeof(struct my_expr)); +} + +static struct my_expr *clone_expr(struct my_expr *expr) +{ + return memcpy(new_expr(), expr, sizeof(*expr)); +} + +/** + * Returns the index of the matching right parenthesis + */ +static unsigned find_parenthesis(struct my_token *tokens, unsigned index, + unsigned length) +{ + unsigned level = 1; + + for (unsigned i = index; i < length; ++i) + { + enum my_token_type token_type = tokens[i].type; + if (token_type == TOKEN_LEFT_PARENTHESIS) + ++level; + if (token_type == TOKEN_RIGHT_PARENTHESIS) + --level; + + if (!level) + return i; + } + + errx(1, "Expected token: ')'"); +} + +static struct my_expr *parse_operations(struct my_token *tokens, + unsigned *index, unsigned length); + +static struct my_expr *parse_operand(struct my_token *tokens, unsigned *index, + unsigned length) +{ + assert(*index < length); + struct my_token *token = tokens + (*index)++; + + // Handle negation (e.g. "- 4") + if (token->type == TOKEN_MINUS) + { + if (length == 1) + errx(1, "Expected expression after token: -"); + + struct my_expr expr = { .data.value = 0, .type = EXPR_NUMBER }; + expr.type = EXPR_NEGATION; + expr.data.children.left = parse_operand(tokens, index, length); + return clone_expr(&expr); + } + if (token->type == TOKEN_NUMBER) + { + struct my_expr expr = { EXPR_NUMBER, { .value = token->value } }; + return clone_expr(&expr); + } + else if (token->type == TOKEN_LEFT_PARENTHESIS) + { + unsigned right = find_parenthesis(tokens, *index, length); + struct my_expr *expr = parse_operations(tokens, index, right); + *index = right + 1; + return expr; + } + + errx(1, "Expected tokens: -, '(' or number, but got: %s", token_str(token)); +} + +static struct my_expr *build_op_mult(struct my_expr **expr_stack, + unsigned expr_stack_length, + enum my_expr_type *op_stack, + unsigned op_stack_length) +{ + if (!op_stack_length) + return *expr_stack; + + struct my_expr expr = { .data.value = 0, .type = EXPR_NUMBER }; + expr.type = op_stack[op_stack_length - 1]; + expr.data.children.left = build_op_mult(expr_stack, expr_stack_length - 1, + op_stack, op_stack_length - 1); + + expr.data.children.right = expr_stack[expr_stack_length - 1]; + return clone_expr(&expr); +} + +static struct my_expr *build_op_sum(struct my_expr **expr_stack, + unsigned expr_stack_length, + enum my_expr_type *op_stack, + unsigned op_stack_length) +{ + /* + * Here, i will overflow at the end of the loop. + * No worries: overflow is perfectly defined for unsigned types. + * In that case, 0 - 1 == ~0, which is not < op_stack_length. + */ + for (unsigned i = op_stack_length - 1; i < op_stack_length; --i) + if (op_stack[i] == EXPR_ADDITION || op_stack[i] == EXPR_SUBTRACTION) + { + struct my_expr expr = { .data.value = 0, .type = EXPR_NUMBER }; + expr.type = op_stack[i]; + expr.data.children.left = + build_op_sum(expr_stack, i + 1, op_stack, i); + + expr.data.children.right = + build_op_mult(expr_stack + i + 1, expr_stack_length - (i + 1), + op_stack + i + 1, op_stack_length - (i + 1)); + return clone_expr(&expr); + } + + return build_op_mult(expr_stack, expr_stack_length, op_stack, + op_stack_length); +} + +static struct my_expr *parse_operations(struct my_token *tokens, + unsigned *index, unsigned length) +{ + assert(*index < length); + + unsigned expr_stack_length = 0; + struct my_expr **expr_stack = my_malloc(sizeof(*expr_stack)); + expr_stack[expr_stack_length++] = parse_operand(tokens, index, length); + + unsigned op_stack_length = 0; + enum my_expr_type *op_stack = NULL; + + while (*index < length) + { + struct my_token *token = tokens + (*index)++; + + // If the token is not an operator + if (!(token->type < TOKEN_LEFT_PARENTHESIS)) + errx(1, "Expected +, -, * or /, got %s", token_str(token)); + if (*index >= length) + errx(1, "Expected expression after %s", token_str(token)); + + expr_stack = my_reallocarray(expr_stack, expr_stack_length + 1, + sizeof(*expr_stack)); + expr_stack[expr_stack_length++] = parse_operand(tokens, index, length); + + op_stack = + my_reallocarray(op_stack, op_stack_length + 1, sizeof(*op_stack)); + // Legitimate cast: ops mapping between both enums matches + int expr_type_int = token->type; + op_stack[op_stack_length++] = expr_type_int; + } + + struct my_expr *expr = + build_op_sum(expr_stack, expr_stack_length, op_stack, op_stack_length); + + free(expr_stack); + free(op_stack); + + return expr; +} + +void free_expr(struct my_expr *expr) +{ + if (expr->type != EXPR_NUMBER) + { + if (expr->data.children.right) + free_expr(expr->data.children.right); + if (expr->data.children.left) + free_expr(expr->data.children.left); + } + free(expr); +} + +struct my_expr *parse_expr(char **strs, unsigned length) +{ + struct my_token *tokens = my_calloc(length, sizeof(struct my_token)); + + for (unsigned i = 0; i < length; ++i) + tokens[i] = parse_token(strs[i]); + + unsigned index = 0; + struct my_expr *expr = parse_operations(tokens, &index, length); + + free(tokens); + + return expr; +} -- cgit v1.2.3