From 967be9e750221ab2ab783f95df79bb26d290a45e Mon Sep 17 00:00:00 2001 From: Martial Simon Date: Mon, 15 Sep 2025 01:07:58 +0200 Subject: add: added projects --- 21sh/ll-expr/Makefile | 27 ++++++ 21sh/ll-expr/src/ast/ast.c | 27 ++++++ 21sh/ll-expr/src/ast/ast.h | 40 +++++++++ 21sh/ll-expr/src/eval/ast_print.c | 57 +++++++++++++ 21sh/ll-expr/src/eval/rpn_print.c | 52 ++++++++++++ 21sh/ll-expr/src/eval/token_printer.c | 34 ++++++++ 21sh/ll-expr/src/lexer/lexer.c | 104 ++++++++++++++++++++++++ 21sh/ll-expr/src/lexer/lexer.h | 61 ++++++++++++++ 21sh/ll-expr/src/lexer/token.h | 25 ++++++ 21sh/ll-expr/src/parser/parser.c | 149 ++++++++++++++++++++++++++++++++++ 21sh/ll-expr/src/parser/parser.h | 46 +++++++++++ 11 files changed, 622 insertions(+) create mode 100644 21sh/ll-expr/Makefile create mode 100644 21sh/ll-expr/src/ast/ast.c create mode 100644 21sh/ll-expr/src/ast/ast.h create mode 100644 21sh/ll-expr/src/eval/ast_print.c create mode 100644 21sh/ll-expr/src/eval/rpn_print.c create mode 100644 21sh/ll-expr/src/eval/token_printer.c create mode 100644 21sh/ll-expr/src/lexer/lexer.c create mode 100644 21sh/ll-expr/src/lexer/lexer.h create mode 100644 21sh/ll-expr/src/lexer/token.h create mode 100644 21sh/ll-expr/src/parser/parser.c create mode 100644 21sh/ll-expr/src/parser/parser.h (limited to '21sh/ll-expr') diff --git a/21sh/ll-expr/Makefile b/21sh/ll-expr/Makefile new file mode 100644 index 0000000..0293956 --- /dev/null +++ b/21sh/ll-expr/Makefile @@ -0,0 +1,27 @@ +CC = gcc +CFLAGS = -Wall -Werror -Wextra -std=c99 -pedantic -Wvla -fsanitize=address +CPPFLAGS = -Isrc -Isrc/parser -Isrc/lexer -Isrc/ast +LDFLAGS = -fsanitize=address + +OBJS = src/ast/ast.o \ + src/lexer/lexer.o \ + src/parser/parser.o + +OBJ_AST = src/eval/ast_print.o +OBJ_RPN = src/eval/rpn_print.o +OBJ_TOKEN = src/eval/token_printer.o + +all: ast rpn token + +ast: $(OBJS) $(OBJ_AST) + $(CC) $(LDFLAGS) -o eval_ast $(OBJS) $(OBJ_AST) + +rpn: $(OBJS) $(OBJ_RPN) + $(CC) $(LDFLAGS) -o eval_rpn $(OBJS) $(OBJ_RPN) + +token: $(OBJS) $(OBJ_TOKEN) + $(CC) $(LDFLAGS) -o eval_token $(OBJS) $(OBJ_TOKEN) + +clean: + $(RM) $(OBJS) $(OBJ_AST) $(OBJ_TOKEN) $(OBJ_RPN) eval_* + diff --git a/21sh/ll-expr/src/ast/ast.c b/21sh/ll-expr/src/ast/ast.c new file mode 100644 index 0000000..701d40e --- /dev/null +++ b/21sh/ll-expr/src/ast/ast.c @@ -0,0 +1,27 @@ +#include "ast.h" + +#include +#include + +struct ast *ast_new(enum ast_type type) +{ + struct ast *new = calloc(1, sizeof(struct ast)); + if (!new) + return NULL; + new->type = type; + return new; +} + +void ast_free(struct ast *ast) +{ + if (ast == NULL) + return; + + ast_free(ast->left); + ast->left = NULL; + + ast_free(ast->right); + ast->right = NULL; + + free(ast); +} diff --git a/21sh/ll-expr/src/ast/ast.h b/21sh/ll-expr/src/ast/ast.h new file mode 100644 index 0000000..01e0064 --- /dev/null +++ b/21sh/ll-expr/src/ast/ast.h @@ -0,0 +1,40 @@ +#ifndef AST_H +#define AST_H + +#include + +enum ast_type +{ + AST_PLUS, + AST_MINUS, + AST_MUL, + AST_DIV, + AST_NUMBER, + AST_NEG +}; + +/** + * This very simple AST structure should be sufficient for a simple AST. + * It is however, NOT GOOD ENOUGH for more complicated projects, such as a + * shell. Please read the project guide for some insights about other kinds of + * ASTs. + */ +struct ast +{ + enum ast_type type; // The kind of node we're dealing with + ssize_t value; // If the node is a number, it stores its value + struct ast *left; // The left branch if any, unary or binary + struct ast *right; // The right branch of the binary node +}; + +/** + ** \brief Allocates a new ast with the given type. + */ +struct ast *ast_new(enum ast_type type); + +/** + ** \brief Recursively frees the given ast. + */ +void ast_free(struct ast *ast); + +#endif /* !AST_H */ diff --git a/21sh/ll-expr/src/eval/ast_print.c b/21sh/ll-expr/src/eval/ast_print.c new file mode 100644 index 0000000..9d7cbb8 --- /dev/null +++ b/21sh/ll-expr/src/eval/ast_print.c @@ -0,0 +1,57 @@ +#include + +#include "lexer.h" +#include "parser.h" + +char tab[] = { [AST_PLUS] = '+', + [AST_MINUS] = '-', + [AST_MUL] = '*', + [AST_DIV] = '/' }; + +void print_ast(struct ast *ast) +{ + if (ast == NULL) + return; + + if (ast->type == AST_NUMBER) + printf("%zu", ast->value); + else if (ast->type == AST_NEG) + printf("-%zu", (ast->left)->value); + else + { + printf("("); + + print_ast(ast->left); + + printf("%c", tab[ast->type]); + + print_ast(ast->right); + + printf(")"); + } +} + +int main(int argc, char *argv[]) +{ + if (argc != 2) + return 1; + + struct lexer *lexer = lexer_new(argv[1]); + + struct ast *ast; + enum parser_status status = PARSER_OK; + ast = parse(&status, lexer); + if (status != PARSER_OK) + { + lexer_free(lexer); + return 1; + } + + print_ast(ast); + printf("\n"); + + ast_free(ast); + lexer_free(lexer); + + return 0; +} diff --git a/21sh/ll-expr/src/eval/rpn_print.c b/21sh/ll-expr/src/eval/rpn_print.c new file mode 100644 index 0000000..defb519 --- /dev/null +++ b/21sh/ll-expr/src/eval/rpn_print.c @@ -0,0 +1,52 @@ +#include + +#include "lexer.h" +#include "parser.h" + +char tab[] = { [AST_PLUS] = '+', + [AST_MINUS] = '-', + [AST_MUL] = '*', + [AST_DIV] = '/' }; + +void print_ast(struct ast *ast) +{ + if (!ast) + return; + + if (ast->type == AST_NUMBER) + printf("%zu ", ast->value); + else if (ast->type == AST_NEG) + printf("-%zu ", (ast->left)->value); + else + { + print_ast(ast->left); + print_ast(ast->right); + + printf("%c ", tab[ast->type]); + } +} + +int main(int argc, char *argv[]) +{ + if (argc != 2) + return 1; + + struct lexer *lexer = lexer_new(argv[1]); + + struct ast *ast; + enum parser_status status; + ast = parse(&status, lexer); + if (status != PARSER_OK) + { + lexer_free(lexer); + return 1; + } + + print_ast(ast); + printf("\n"); + + ast_free(ast); + lexer_free(lexer); + + return 0; +} diff --git a/21sh/ll-expr/src/eval/token_printer.c b/21sh/ll-expr/src/eval/token_printer.c new file mode 100644 index 0000000..78096f4 --- /dev/null +++ b/21sh/ll-expr/src/eval/token_printer.c @@ -0,0 +1,34 @@ +#include + +#include "lexer.h" + +char tab[] = { + [TOKEN_PLUS] = '+', [TOKEN_MINUS] = '-', [TOKEN_MUL] = '*', + [TOKEN_DIV] = '/', [TOKEN_LEFT_PAR] = '(', [TOKEN_RIGHT_PAR] = ')' +}; + +int main(int argc, char *argv[]) +{ + if (argc != 2) + return 1; + + struct lexer *lexer = lexer_new(argv[1]); + struct token token = lexer_pop(lexer); + + while (token.type != TOKEN_EOF && token.type != TOKEN_ERROR) + { + if (token.type == TOKEN_NUMBER) + printf("%zu\n", token.value); + else + printf("%c\n", tab[token.type]); + + token = lexer_pop(lexer); + } + + if (token.type == TOKEN_EOF) + printf("EOF\n"); + + lexer_free(lexer); + + return 0; +} diff --git a/21sh/ll-expr/src/lexer/lexer.c b/21sh/ll-expr/src/lexer/lexer.c new file mode 100644 index 0000000..3b3d29f --- /dev/null +++ b/21sh/ll-expr/src/lexer/lexer.c @@ -0,0 +1,104 @@ +#include "lexer.h" + +#include +#include +#include + +struct lexer *lexer_new(const char *input) +{ + struct lexer *new = malloc(sizeof(struct lexer)); + new->input = input; + new->pos = 0; + return new; +} + +void lexer_free(struct lexer *lexer) +{ + free(lexer); +} + +struct template +{ + char value; + enum token_type type; +}; + +struct template templates[] = { + { '+', TOKEN_PLUS }, { '-', TOKEN_MINUS }, { '*', TOKEN_MUL }, + { '/', TOKEN_DIV }, { '(', TOKEN_LEFT_PAR }, { ')', TOKEN_RIGHT_PAR }, + { '\0', TOKEN_EOF }, { 0, TOKEN_ERROR } +}; + +static ssize_t parse_number(struct lexer *l) +{ + union + { + const char *cc; + char *c; + } cast; + cast.cc = l->input + l->pos; + char *in = cast.c; + size_t t = 0; + while (in[t] && in[t] >= '0' && in[t] <= '9') + t++; + char tmp = in[t]; + in[t] = '\0'; + ssize_t res = atoi(in); + in[t] = tmp; + return res; +} + +struct token lexer_next_token(struct lexer *lexer) +{ + for (; lexer->input[lexer->pos] && lexer->input[lexer->pos] == ' '; + lexer->pos++) + continue; + for (int i = 0; i < 7; i++) + { + if (lexer->input[lexer->pos] == templates[i].value) + { + lexer->current_tok.type = templates[i].type; + return lexer->current_tok; + } + } + if (lexer->input[lexer->pos] > '9' || lexer->input[lexer->pos] < '0') + { + lexer->current_tok.type = TOKEN_ERROR; + fprintf(stderr, "lexer: invalid token %c\n", lexer->input[lexer->pos]); + return lexer->current_tok; + } + else + { + lexer->current_tok.type = TOKEN_NUMBER; + lexer->current_tok.value = parse_number(lexer); + return lexer->current_tok; + } +} + +struct token lexer_peek(struct lexer *lexer) +{ + return lexer_next_token(lexer); +} + +static size_t count_numbers(ssize_t value) +{ + if (value == 0) + return 1; + size_t size = 0; + while (value) + { + size++; + value /= 10; + } + return size; +} + +struct token lexer_pop(struct lexer *lexer) +{ + struct token res = lexer_next_token(lexer); + if (res.type == TOKEN_NUMBER) + lexer->pos += count_numbers(res.value); + else + lexer->pos++; + return res; +} diff --git a/21sh/ll-expr/src/lexer/lexer.h b/21sh/ll-expr/src/lexer/lexer.h new file mode 100644 index 0000000..40a7cc9 --- /dev/null +++ b/21sh/ll-expr/src/lexer/lexer.h @@ -0,0 +1,61 @@ +#ifndef LEXER_H +#define LEXER_H + +#include "token.h" + +/** + * \page Lexer + * + * The lexer cuts some input text into blocks called tokens. + + * This process is done **on demand**: the lexer doesn't read the + * input more than it needs, only creates tokens when lexer_peek + * or lexer_pop is called, and no token is available. + * + * "2 + 3" will produce 3 tokens: + * - TOKEN_NUMBER { .value = 2 } + * - TOKEN_PLUS + * - TOKEN_NUMBER { .value = 3 } + */ + +struct lexer +{ + const char *input; // The input data + size_t pos; // The current offset inside the input data + struct token current_tok; // The next token, if processed +}; + +/** + * \brief Creates a new lexer given an input string. + */ +struct lexer *lexer_new(const char *input); + +/** + ** \brief Frees the given lexer, but not its input. + */ +void lexer_free(struct lexer *lexer); + +/** + * \brief Returns a token from the input string. + + * This function goes through the input string character by character and + * builds a token. lexer_peek and lexer_pop should call it. If the input is + * invalid, you must print something on stderr and return the appropriate token. + */ +struct token lexer_next_token(struct lexer *lexer); + +/** + * \brief Returns the next token, but doesn't move forward: calling lexer_peek + * multiple times in a row always returns the same result. + * This function is meant to help the parser check if the next token matches + * some rule. + */ +struct token lexer_peek(struct lexer *lexer); + +/** + * \brief Returns the next token, and removes it from the stream: + * calling lexer_pop in a loop will iterate over all tokens until EOF. + */ +struct token lexer_pop(struct lexer *lexer); + +#endif /* !LEXER_H */ diff --git a/21sh/ll-expr/src/lexer/token.h b/21sh/ll-expr/src/lexer/token.h new file mode 100644 index 0000000..b0866fc --- /dev/null +++ b/21sh/ll-expr/src/lexer/token.h @@ -0,0 +1,25 @@ +#ifndef TOKEN_H +#define TOKEN_H + +#include + +enum token_type +{ + TOKEN_PLUS, // '+' + TOKEN_MINUS, // '-' + TOKEN_MUL, // '*' + TOKEN_DIV, // '/' + TOKEN_NUMBER, // "[0-9]+" + TOKEN_LEFT_PAR, // '(' + TOKEN_RIGHT_PAR, // ')' + TOKEN_EOF, // end of input marker + TOKEN_ERROR // it is not a real token, it is returned in case of invalid + // input +}; + +struct token +{ + enum token_type type; // The kind of token + ssize_t value; // If the token is a number, its value +}; +#endif /* !TOKEN_H */ diff --git a/21sh/ll-expr/src/parser/parser.c b/21sh/ll-expr/src/parser/parser.c new file mode 100644 index 0000000..d1a1526 --- /dev/null +++ b/21sh/ll-expr/src/parser/parser.c @@ -0,0 +1,149 @@ +#include "parser.h" + +#include + +struct ast *parse(enum parser_status *status, struct lexer *lexer) +{ + struct token t = lexer_peek(lexer); + *status = PARSER_OK; + if (t.type == TOKEN_EOF) + { + lexer_pop(lexer); + return NULL; + } + else + { + struct ast *exp = parse_exp(status, lexer); + if (*status != PARSER_OK) + { + return NULL; + } + struct token t = lexer_pop(lexer); + if (*status == PARSER_OK && t.type == TOKEN_EOF) + { + return exp; + } + else if (t.type != TOKEN_EOF) + { + *status = PARSER_UNEXPECTED_TOKEN; + fprintf(stderr, "parser: unexpected token\n"); + } + ast_free(exp); + return NULL; + } +} + +struct ast *parse_exp(enum parser_status *status, struct lexer *lexer) +{ + struct ast *left = parse_sexp(status, lexer); + if (*status == PARSER_UNEXPECTED_TOKEN) + return NULL; + struct token t = lexer_peek(lexer); + if (t.type != TOKEN_PLUS && t.type != TOKEN_MINUS) + { + *status = PARSER_OK; + return left; + } + else + { + struct ast *root = left; + do + { + left = root; + t = lexer_pop(lexer); + if (t.type == TOKEN_PLUS) + root = ast_new(AST_PLUS); + else + root = ast_new(AST_MINUS); + root->left = left; + root->right = parse_sexp(status, lexer); + if (*status == PARSER_UNEXPECTED_TOKEN) + { + ast_free(root); + return NULL; + } + t = lexer_peek(lexer); + } while (t.type != TOKEN_EOF + && (t.type == TOKEN_PLUS || t.type == TOKEN_MINUS)); + return root; + } +} + +struct ast *parse_sexp(enum parser_status *status, struct lexer *lexer) +{ + struct ast *left = parse_texp(status, lexer); + if (*status == PARSER_UNEXPECTED_TOKEN) + return NULL; + struct token t = lexer_peek(lexer); + if (t.type != TOKEN_MUL && t.type != TOKEN_DIV) + { + *status = PARSER_OK; + return left; + } + else + { + struct ast *root = left; + do + { + left = root; + t = lexer_pop(lexer); + if (t.type == TOKEN_MUL) + root = ast_new(AST_MUL); + else + root = ast_new(AST_DIV); + root->left = left; + root->right = parse_texp(status, lexer); + if (*status == PARSER_UNEXPECTED_TOKEN) + { + ast_free(root); + return NULL; + } + t = lexer_peek(lexer); + } while (t.type != TOKEN_EOF + && (t.type == TOKEN_MUL || t.type == TOKEN_DIV)); + return root; + } +} + +struct ast *parse_texp(enum parser_status *status, struct lexer *lexer) +{ + struct token t = lexer_pop(lexer); + if (t.type == TOKEN_NUMBER) + { + struct ast *res = ast_new(AST_NUMBER); + res->value = t.value; + return res; + } + else if (t.type == TOKEN_MINUS) + { + t = lexer_peek(lexer); + if (t.type != TOKEN_NUMBER && t.type != TOKEN_LEFT_PAR) + { + *status = PARSER_UNEXPECTED_TOKEN; + fprintf(stderr, "parser: unexpected token\n"); + return NULL; + } + struct ast *unary = ast_new(AST_NEG); + unary->left = parse_texp(status, lexer); + return unary; + } + else if (t.type == TOKEN_LEFT_PAR) + { + struct ast *exp = parse_exp(status, lexer); + t = lexer_pop(lexer); + if (t.type != TOKEN_RIGHT_PAR) + { + *status = PARSER_UNEXPECTED_TOKEN; + fprintf(stderr, "parser: expected closing parenthesis\n"); + ast_free(exp); + return NULL; + } + return exp; + } + else + { + *status = PARSER_UNEXPECTED_TOKEN; + fprintf(stderr, "parser: unexpected token\n"); + return NULL; + } +} diff --git a/21sh/ll-expr/src/parser/parser.h b/21sh/ll-expr/src/parser/parser.h new file mode 100644 index 0000000..057c6bc --- /dev/null +++ b/21sh/ll-expr/src/parser/parser.h @@ -0,0 +1,46 @@ +#ifndef PARSER_H +#define PARSER_H + +#include "ast.h" +#include "lexer.h" + +enum parser_status +{ + PARSER_OK, + PARSER_UNEXPECTED_TOKEN, +}; + +/** + * \brief Parses an expression or nothing. + * + * input = EOF + * | exp EOF ; + */ +struct ast *parse(enum parser_status *status, struct lexer *lexer); + +/** + * \brief Parses sexp expressions separated by + and -. + * + * exp = sexp { ( '+' | '-' ) sexp } ; + */ +struct ast *parse_exp(enum parser_status *status, struct lexer *lexer); + +/** + * \brief Parses texp expressions separated by * and /. + * + * sexp = texp { ('*' | '/' ) texp } ; + */ +struct ast *parse_sexp(enum parser_status *status, struct lexer *lexer); + +/** + * \brief Parses a number, a - a number, or a parenthesized expression. + * + * texp = NUMBER + * | '-' NUMBER + * | '-' '(' exp ')' + * | '(' exp ')' + * ; + */ +struct ast *parse_texp(enum parser_status *status, struct lexer *lexer); + +#endif /* !PARSER_H */ -- cgit v1.2.3