diff options
| author | Martial Simon <msimon_fr@hotmail.com> | 2025-09-15 01:07:58 +0200 |
|---|---|---|
| committer | Martial Simon <msimon_fr@hotmail.com> | 2025-09-15 01:07:58 +0200 |
| commit | 967be9e750221ab2ab783f95df79bb26d290a45e (patch) | |
| tree | 6802900a5e975f9f68b169f0f503f040056d6952 /42sh/src | |
Diffstat (limited to '42sh/src')
58 files changed, 5526 insertions, 0 deletions
diff --git a/42sh/src/42sh.c b/42sh/src/42sh.c new file mode 100644 index 0000000..780d575 --- /dev/null +++ b/42sh/src/42sh.c @@ -0,0 +1,16 @@ +#define _POSIX_C_SOURCE 200809L + +#include "helper.h" + +int main(int argc, char *argv[]) +{ + env_setup(); + + struct string *input = get_input(argc, argv); + + int r = _process_input(input); + + env_unset("PRETTY_PRINT"); + + return r; +} diff --git a/42sh/src/IO/IOBackend.h b/42sh/src/IO/IOBackend.h new file mode 100644 index 0000000..dc190db --- /dev/null +++ b/42sh/src/IO/IOBackend.h @@ -0,0 +1,27 @@ +#ifndef IOBACKEND_H +#define IOBACKEND_H + +#include "utils/libstring.h" + +#undef DEFAULT_RW_SIZE + +/** + * @brief Represents the 'count' parameter used in read/write syscalls. + */ +#define DEFAULT_RW_SIZE 256 + +/** + * @brief Retrieves the input provided to the main program. + * + * This function processes the program arguments and returns them as a single + * heap-allocated string. The returned string always ends with a null byte + * (`\0`). + * + * @param argc The argument count passed to main(). + * @param argv The argument vector passed to main(). + * @return A pointer to a `struct string` containing the input, or `NULL` on + * failure. + */ +struct string *get_input(int argc, char *argv[]); + +#endif /* ! IOBACKEND_H */ diff --git a/42sh/src/IO/Makefile.am b/42sh/src/IO/Makefile.am new file mode 100644 index 0000000..27b5721 --- /dev/null +++ b/42sh/src/IO/Makefile.am @@ -0,0 +1,11 @@ +lib_LIBRARIES = libIO.a + +libIO_a_SOURCES = \ + IOBackend.h \ + string_input.c + +libIO_a_CPPFLAGS = -I$(top_srcdir)/src + +libIO_a_CFLAGS = -std=c99 -Werror -Wall -Wextra -Wvla -pedantic + +noinst_LIBRARIES = libIO.a diff --git a/42sh/src/IO/string_input.c b/42sh/src/IO/string_input.c new file mode 100644 index 0000000..2f252fb --- /dev/null +++ b/42sh/src/IO/string_input.c @@ -0,0 +1,115 @@ +#define _POSIX_C_SOURCE 200809L + +#include <err.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "IO/IOBackend.h" +#include "utils/env.h" + +static void _usage_xerror(void) +{ + errx(2, "Usage:\n\ + \t./42sh [--pretty-print] [FILENAME]\n\ + \t./42sh [-c | --pretty-print] [FILENAME]\n\ + \t./42sh [--pretty-print] [ < FILENAME ]"); +} + +static struct string *_parse_input(FILE *f) +{ + ssize_t r = 0; + struct string *output = string_create(NULL); + char buf[DEFAULT_RW_SIZE + 1] = { 0 }; + do + { + r = fread(buf, sizeof(char), DEFAULT_RW_SIZE, f); + if (r == -1) + { + fprintf(stderr, "_parse_input: fread failed!"); + return NULL; + } + buf[r] = 0; + string_pushstr(output, buf); + } while (r >= DEFAULT_RW_SIZE); + + return output; +} + +static void _add_newline(struct string *s) +{ + if (s->data[s->length - 1] != '\n') + { + string_pushc(s, '\n'); + } +} + +struct string *get_input(int argc, char *argv[]) +{ + int i = 1; + bool s_input = false; + FILE *f = NULL; + while (i < argc && argv[i][0] == '-') + { + if (strcmp(argv[i], "-c") == 0) + { + s_input = true; + i++; + continue; + } + + if (strcmp(argv[i], "--pretty-print") == 0) + { + env_set("PRETTY_PRINT", "TRUE"); + i++; + continue; + } + + _usage_xerror(); + } + if (i == argc) + { + f = stdin; + } + else + { + char *input = argv[i]; + if (s_input) + { + f = fmemopen(input, strlen(input) + 1, "r"); + } + else + { + f = fopen(input, "r"); + env_set("0", input); + } + } + + if (!f) + { + errx(2, "Invalid path!"); + } + + struct string *string_input = _parse_input(f); + if (f != stdin) + { + fclose(f); + } + + int nb_args = 0; + i++; + for (; i < argc; i++) + { + nb_args++; + char buf[16] = { 0 }; + sprintf(buf, "%d", nb_args); + env_set(buf, argv[i]); + } + + char buf2[16] = { 0 }; + sprintf(buf2, "%d", (s_input && nb_args > 0) ? 1 : nb_args); + env_set("#", buf2); + + _add_newline(string_input); + return string_input; +} diff --git a/42sh/src/Makefile.am b/42sh/src/Makefile.am new file mode 100644 index 0000000..95d8eb4 --- /dev/null +++ b/42sh/src/Makefile.am @@ -0,0 +1,28 @@ +# define the subdirectories +SUBDIRS = ast \ + builtins \ + parser \ + lexer \ + utils \ + IO \ + exec + + +bin_PROGRAMS = 42sh + +42sh_SOURCES = helper.h \ + helper.c \ + 42sh.c + +42sh_CPPFLAGS = -I%D% + +42sh_CFLAGS = -std=c99 -Werror -Wall -Wextra -Wvla -pedantic + +42sh_LDADD = \ + parser/libparser.a \ + IO/libIO.a \ + ast/libast.a \ + exec/libeval.a \ + lexer/liblexer.a \ + utils/libutils.a \ + builtins/libbuiltins.a diff --git a/42sh/src/ast/Makefile.am b/42sh/src/ast/Makefile.am new file mode 100644 index 0000000..6967aa1 --- /dev/null +++ b/42sh/src/ast/Makefile.am @@ -0,0 +1,14 @@ +lib_LIBRARIES = libast.a + +libast_a_SOURCES = \ + ast.h \ + ast.c \ + ast_accessors.h \ + ast_accessors.c \ + ast_pretty_print.c + +libast_a_CPPFLAGS = -I$(top_srcdir)/src + +libast_a_CFLAGS = -std=c99 -Werror -Wall -Wextra -Wvla -pedantic + +noinst_LIBRARIES = libast.a diff --git a/42sh/src/ast/ast.c b/42sh/src/ast/ast.c new file mode 100644 index 0000000..41b7a55 --- /dev/null +++ b/42sh/src/ast/ast.c @@ -0,0 +1,164 @@ +#include "ast/ast.h" + +#include <stdio.h> +#include <stdlib.h> + +#include "ast/ast_accessors.h" + +static const size_t ast_size_ltable[] = { + [AST_COMMAND] = sizeof(struct ast_command), + [AST_IF] = sizeof(struct ast_if), + [AST_LOGICAL] = sizeof(struct ast_logical), + [AST_LIST] = sizeof(struct ast_list), + [AST_REDIRECTION] = sizeof(struct ast_redirection), + [AST_PIPELINE] = sizeof(struct ast_pipeline), + [AST_WHILE] = sizeof(struct ast_while), + [AST_ASSIGN] = sizeof(struct ast_assign), + [AST_FOR] = sizeof(struct ast_for), + [AST_FUNCTION] = sizeof(struct ast_function), + [AST_SUBSHELL] = sizeof(struct ast_subshell) +}; + +struct ast *ast_create(enum ast_node type) +{ + struct ast *a = calloc(1, ast_size_ltable[type]); + if (!a) + { + fprintf(stderr, "ast_create: calloc failed.\n"); + return NULL; + } + + a->type = type; + return a; +} + +static void free_str_args(struct string **args) +{ + if (!args) + { + return; + } + size_t i = 0; + while (args[i]) + { + string_free(args[i]); + i++; + } + free(args); +} + +static void _free_ast_command(struct ast *ast) +{ + free_str_args(((struct ast_command *)ast)->args); + free(ast); +} + +static void _free_ast_if(struct ast *ast) +{ + ast_free(get_i(ast, 0)); + ast_free(get_i(ast, 1)); + ast_free(get_i(ast, 2)); + free(ast); +} + +static void _free_ast_list(struct ast *ast) +{ + for (size_t i = 0; i < ((struct ast_list *)ast)->nb_children; i++) + { + ast_free(get_i(ast, i)); + } + free(((struct ast_list *)ast)->children); + free(ast); +} + +static void _free_ast_logical(struct ast *ast) +{ + ast_free(((struct ast_logical *)ast)->left); + ast_free(((struct ast_logical *)ast)->right); + free(ast); +} + +static void _free_ast_pipeline(struct ast *ast) +{ + ast_free(((struct ast_pipeline *)ast)->left); + ast_free(((struct ast_pipeline *)ast)->right); + free(ast); +} + +static void _free_redir_struct(struct ast *ast) +{ + struct ast_redirection *ast_r = ((struct ast_redirection *)ast); + string_free(ast_r->redirect.fd); + string_free(ast_r->redirect.file); + string_free(ast_r->redirect.redir); +} + +static void _free_ast_redirection(struct ast *ast) +{ + ast_free(((struct ast_redirection *)ast)->expr); + _free_redir_struct(ast); + free(ast); +} + +static void _free_ast_while(struct ast *ast) +{ + ast_free(((struct ast_while *)ast)->cond); + ast_free(((struct ast_while *)ast)->body); + free(ast); +} + +static void _free_ast_assign(struct ast *ast) +{ + string_free(((struct ast_assign *)ast)->name); + string_free(((struct ast_assign *)ast)->val); + free(ast); +} + +static void _free_ast_for(struct ast *ast) +{ + struct ast_for *ast_f = ((struct ast_for *)ast); + ast_free(ast_f->left); + ast_free(ast_f->right); + string_free(ast_f->var); + // Someone probably forgot to free the ast itself + free(ast); +} + +static void _free_ast_function(struct ast *ast) +{ + struct ast_function *ast_fun = ((struct ast_function *)ast); + ast_free(ast_fun->body); + string_free(ast_fun->name); + free(ast); +} + +static void _free_ast_subshell(struct ast *ast) +{ + ast_free(((struct ast_subshell *)ast)->body); + free(ast); +} + +typedef void (*free_fct)(struct ast *); + +static const free_fct free_function_ltable[] = { + [AST_COMMAND] = _free_ast_command, + [AST_IF] = _free_ast_if, + [AST_LOGICAL] = _free_ast_logical, + [AST_LIST] = _free_ast_list, + [AST_PIPELINE] = _free_ast_pipeline, + [AST_REDIRECTION] = _free_ast_redirection, + [AST_WHILE] = _free_ast_while, + [AST_ASSIGN] = _free_ast_assign, + [AST_FOR] = _free_ast_for, + [AST_FUNCTION] = _free_ast_function, + [AST_SUBSHELL] = _free_ast_subshell +}; + +void ast_free(struct ast *ast) +{ + if (!ast) + { + return; + } + free_function_ltable[ast->type](ast); +} diff --git a/42sh/src/ast/ast.h b/42sh/src/ast/ast.h new file mode 100644 index 0000000..3a1d20a --- /dev/null +++ b/42sh/src/ast/ast.h @@ -0,0 +1,221 @@ +#ifndef AST_H +#define AST_H + +#include <stdlib.h> + +#include "ast/ast_redirect.h" +#include "lexer/token.h" +#include "utils/libstring.h" + +/** + * @brief Each node type represent a class of elements from the SCL grammar. + */ +enum ast_node +{ + // STEP 1 + AST_COMMAND, + AST_IF, + AST_LOGICAL, + AST_LIST, + + // STEP 2 + AST_PIPELINE, + AST_REDIRECTION, + AST_WHILE, + AST_ASSIGN, + AST_FOR, + + // STEP 3 + AST_FUNCTION, + AST_SUBSHELL +}; + +/** + * @brief Base struct for the AST implementation. + */ +struct ast +{ + enum ast_node type; /**< Type of the targeted AST. Used for inheritance. */ +}; + +/** + * @brief AST which represent any kind of `command`. + * It can be a `shell command` or a `simple command`. + * Both are evaluated the same. + */ +struct ast_command +{ + struct ast base; /**< Base struct for the AST implementation. */ + struct string **args; /**< `args` used to call the given command. */ +}; + +/** + * @brief AST which represent a `if/then/elif/else` block. + * An `if` statement may not contain an `elif` or an `else` block. + * If the `if` statement contains an `elif` block, it is stored as an + * `ast_if` in the `else_body`. + */ +struct ast_if +{ + struct ast base; /**< Base struct for the AST implementation. */ + struct ast *condition; /**< Contains the AST to evaluate as a condition. */ + struct ast *then_body; /**< Contains the AST of the `then` block. */ + struct ast *else_body; /**< Contains the AST of the `else` block. */ +}; + +/** + * @brief AST which represent a shell list. + * A list can be either a `list` or a `compound list`. + * @note Although this struct contains a `nb_children` value, + * it is parsed as a binary tree, thus `nb_children` never exceeds 2. + */ +struct ast_list +{ + struct ast base; /**< Base struct for the AST implementation. */ + size_t nb_children; /**< Number of elements in the list */ + struct ast ** + children; /** Array of ASTs which represents the elements of the list */ +}; + +/** + * @brief AST which represents any logical block. + * A logical block can be either a `not` block, a `and` block or a `or` block. + * The type of the logical block is determined by the `type` value. + * The `type` value can be either `TOKEN_AND`, `TOKEN_OR` or `TOKEN_NEG`. + * The binary logical operators are evaluated following the lazy method. + * @note If the `type` value is not one of the listed above, the evaluation + * of the tree will always exit this the code 1 and write to `stderr`. + */ +struct ast_logical +{ + struct ast base; /**< Base struct for the AST implementation. */ + enum token_type type; /**< Type of the logical operator. */ + struct ast *left; /**< First block to be evaluated (always evaluated). */ + struct ast *right; /**< Second block to be evaluated (may not be evaluated, + see `note`). */ +}; + +/** + * @brief AST which represents a pipeline. + * The pipeline evaluation creates a context in which the output of + * `left` is redirected to the input of `right`. + */ +struct ast_pipeline +{ + struct ast base; /**< Base struct for the AST implementation. */ + struct ast *left; /**< The output of this AST is given to `right`. */ + struct ast *right; /**< This AST gets its input from `left`. */ +}; + +/** + * @brief AST used to represent redirections + * Redirect is a struct used to store info about this redirection + * Expr is the expression who the output shall be redirected + */ +struct ast_redirection +{ + struct ast base; /**< Base struct for the AST implementation. */ + struct redirect redirect; + struct ast *expr; +}; + +/** + * @brief AST used to represent the while loops (and also the until loops) + * Cond is the condition used to know if the loop shall still iterate + * Body is the tree to execute while the condition is still valid + */ +struct ast_while +{ + struct ast base; + struct ast *cond; + struct ast *body; +}; + +/** + * @brief Struct used for variable assignment + * Name is the name of the variable + * Val is the value of said variable + */ +struct ast_assign +{ + struct ast base; + struct string *name; + struct string *val; +}; + +/** + * @brief Left contains the args (the `in { WORD }` part). + * Right contains the body to execute. + */ +struct ast_for +{ + struct ast base; + struct ast *left; + struct ast *right; + struct string *var; +}; + +/** + * @brief AST used for function definition. + * Name contains the name of the function + * Body contains the actual ast that will be executed each time + * the function is called + */ +struct ast_function +{ + struct ast base; + struct string *name; + struct ast *body; +}; + +/** + * @brief AST used for subshell + * Body just contains the compound list to be executed in the subshell + */ +struct ast_subshell +{ + struct ast base; + struct ast *body; +}; + +union ast_caster +{ + struct ast *ast; + struct ast_command *ast_c; + struct ast_if *ast_i; + struct ast_list *ast_l; + struct ast_logical *ast_lo; + struct ast_pipeline *ast_p; + struct ast_redirection *ast_r; + struct ast_while *ast_w; + struct ast_assign *ast_a; + struct ast_for *ast_f; + struct ast_function *ast_func; + struct ast_subshell *ast_sub; +}; + +/** + * @brief Creates a heap-allocated AST of the size of ast_`type`. + * The AST is returned as a `struct ast*` but it can be casted into the right + * ast type struct. + * @param type The type of the AST to be created. + */ +struct ast *ast_create(enum ast_node type); + +/** + * @brief Recursively frees the given AST. Any heap-allocated can be passed. + * @param ast The AST to be freed. + * @note This method does not exit when `ast` is `NULL`. + */ +void ast_free(struct ast *ast); + +/** + * @brief Creates a file named `ast.dot` in the current folder which + * contains the DOT representation of the given AST. + * @param ast The AST to represent. + * @note This function is called only when the program is called with + * the `--pretty-print` flag. + */ +void pretty_print(struct ast *ast); + +#endif /* ! AST_H */ diff --git a/42sh/src/ast/ast_accessors.c b/42sh/src/ast/ast_accessors.c new file mode 100644 index 0000000..39affad --- /dev/null +++ b/42sh/src/ast/ast_accessors.c @@ -0,0 +1,326 @@ +#include "ast_accessors.h" + +static struct ast *_get_from_command(struct ast *ast, size_t index) +{ + (void)ast; + (void)index; + return NULL; +} + +static struct ast *_get_from_if(struct ast *ast, size_t index) +{ + switch (index) + { + case 0: + return ((struct ast_if *)ast)->condition; + case 1: + return ((struct ast_if *)ast)->then_body; + case 2: + return ((struct ast_if *)ast)->else_body; + default: + return NULL; + } +} + +static struct ast *_get_from_list(struct ast *ast, size_t index) +{ + if (index >= ((struct ast_list *)ast)->nb_children) + { + return NULL; + } + return ((struct ast_list *)ast)->children[index]; +} + +static struct ast *_get_from_logical(struct ast *ast, size_t index) +{ + switch (index) + { + case 0: + return ((struct ast_logical *)ast)->left; + case 1: + return ((struct ast_logical *)ast)->right; + default: + return NULL; + } +} + +static struct ast *_get_from_pipeline(struct ast *ast, size_t index) +{ + switch (index) + { + case 0: + return ((struct ast_pipeline *)ast)->left; + case 1: + return ((struct ast_pipeline *)ast)->right; + default: + return NULL; + } +} + +static struct ast *_get_from_redirection(struct ast *ast, size_t index) +{ + switch (index) + { + case 0: + return ((struct ast_redirection *)ast)->expr; + default: + return NULL; + } +} + +static struct ast *_get_from_while(struct ast *ast, size_t index) +{ + switch (index) + { + case 0: + return ((struct ast_while *)ast)->cond; + case 1: + return ((struct ast_while *)ast)->body; + default: + return NULL; + } +} + +static struct ast *_get_from_assign(struct ast *ast, size_t index) +{ + (void)ast; + (void)index; + return NULL; +} + +static struct ast *_get_from_for(struct ast *ast, size_t index) +{ + switch (index) + { + case 0: + return ((struct ast_for *)ast)->left; + case 1: + return ((struct ast_for *)ast)->right; + default: + return NULL; + } +} + +static struct ast *_get_from_function(struct ast *ast, size_t index) +{ + switch (index) + { + case 0: + return ((struct ast_function *)ast)->body; + default: + return NULL; + } +} + +static struct ast *_get_from_subshell(struct ast *ast, size_t index) +{ + switch (index) + { + case 0: + return ((struct ast_subshell *)ast)->body; + default: + return NULL; + } +} + +typedef struct ast *(*ast_getters)(struct ast *, size_t); + +static const ast_getters ast_getters_ltable[] = { + [AST_COMMAND] = _get_from_command, + [AST_IF] = _get_from_if, + [AST_LOGICAL] = _get_from_logical, + [AST_LIST] = _get_from_list, + [AST_PIPELINE] = _get_from_pipeline, + [AST_REDIRECTION] = _get_from_redirection, + [AST_WHILE] = _get_from_while, + [AST_ASSIGN] = _get_from_assign, + [AST_FOR] = _get_from_for, + [AST_FUNCTION] = _get_from_function, + [AST_SUBSHELL] = _get_from_subshell +}; + +struct ast *get_i(struct ast *ast, size_t index) +{ + return ast_getters_ltable[ast->type](ast, index); +} + +struct ast *get_left(struct ast *ast) +{ + return get_i(ast, 0); +} + +struct ast *get_right(struct ast *ast) +{ + return get_i(ast, 1); +} + +static void _set_on_command(struct ast *ast, struct ast *child, size_t index) +{ + (void)ast; + (void)child; + (void)index; + return; +} + +static void _set_on_if(struct ast *ast, struct ast *child, size_t index) +{ + switch (index) + { + case 0: + ((struct ast_if *)ast)->condition = child; + return; + case 1: + ((struct ast_if *)ast)->then_body = child; + return; + case 2: + ((struct ast_if *)ast)->else_body = child; + return; + default: + return; + } +} + +static void _set_on_logical(struct ast *ast, struct ast *child, size_t index) +{ + switch (index) + { + case 0: + ((struct ast_logical *)ast)->left = child; + break; + case 1: + ((struct ast_logical *)ast)->right = child; + break; + default: + return; + } +} + +static void _set_on_list(struct ast *ast, struct ast *child, size_t index) +{ + if (index > ((struct ast_list *)ast)->nb_children) + { + return; + } + if (index == ((struct ast_list *)ast)->nb_children) + { + ((struct ast_list *)ast)->nb_children++; + ((struct ast_list *)ast)->children = realloc( + ((struct ast_list *)ast)->children, + ((struct ast_list *)ast)->nb_children * sizeof(struct ast_command)); + } + ((struct ast_list *)ast)->children[index] = child; +} + +static void _set_on_pipeline(struct ast *ast, struct ast *child, size_t index) +{ + switch (index) + { + case 0: + ((struct ast_pipeline *)ast)->left = child; + break; + case 1: + ((struct ast_pipeline *)ast)->right = child; + break; + default: + return; + } +} + +static void _set_on_redirection(struct ast *ast, struct ast *child, + size_t index) +{ + switch (index) + { + case 0: + ((struct ast_redirection *)ast)->expr = child; + break; + default: + return; + } +} + +static void _set_on_while(struct ast *ast, struct ast *child, size_t index) +{ + switch (index) + { + case 0: + ((struct ast_while *)ast)->cond = child; + break; + case 1: + ((struct ast_while *)ast)->body = child; + break; + default: + return; + } +} + +static void _set_on_assign(struct ast *ast, struct ast *child, size_t index) +{ + (void)ast; + (void)child; + (void)index; + return; +} + +static void _set_on_for(struct ast *ast, struct ast *child, size_t index) +{ + switch (index) + { + case 0: + ((struct ast_for *)ast)->left = child; + return; + case 1: + ((struct ast_for *)ast)->right = child; + return; + default: + return; + } +} + +static void _set_on_function(struct ast *ast, struct ast *child, size_t index) +{ + switch (index) + { + case 0: + ((struct ast_function *)ast)->body = child; + default: + return; + } +} + +static void _set_on_subshell(struct ast *ast, struct ast *child, size_t index) +{ + switch (index) + { + case 0: + ((struct ast_subshell *)ast)->body = child; + default: + return; + } +} + +typedef void (*ast_setters)(struct ast *, struct ast *, size_t); + +static const ast_setters ast_setters_ltable[] = { + [AST_COMMAND] = _set_on_command, [AST_IF] = _set_on_if, + [AST_LOGICAL] = _set_on_logical, [AST_LIST] = _set_on_list, + [AST_PIPELINE] = _set_on_pipeline, [AST_REDIRECTION] = _set_on_redirection, + [AST_WHILE] = _set_on_while, [AST_ASSIGN] = _set_on_assign, + [AST_FOR] = _set_on_for, [AST_FUNCTION] = _set_on_function, + [AST_SUBSHELL] = _set_on_subshell +}; + +void set_i(struct ast *ast, struct ast *child, size_t index) +{ + ast_setters_ltable[ast->type](ast, child, index); +} + +void set_left(struct ast *ast, struct ast *child) +{ + set_i(ast, child, 0); +} + +void set_right(struct ast *ast, struct ast *child) +{ + set_i(ast, child, 1); +} diff --git a/42sh/src/ast/ast_accessors.h b/42sh/src/ast/ast_accessors.h new file mode 100644 index 0000000..757bff5 --- /dev/null +++ b/42sh/src/ast/ast_accessors.h @@ -0,0 +1,63 @@ +#ifndef AST_ACCESSORS_H +#define AST_ACCESSORS_H + +#include <stddef.h> + +#include "ast/ast.h" + +/** + * @brief Returns the i-th child of `ast`. + * The children are stored as follows: + * `index` 0: left child, or expression in unary trees, or `condition` in `if` + * blocks. `index` 1: right child, or `then` expression in `if` blocks. `index` + * 2+: middle children, or `else` expression in `if` blocks (only `index` 2). + * @param ast Any kind of AST. + * @param index The index of the child to get. + * @note All kinds of ASTs can be passed to this function. + * All `index` can be passed to this function too. + * However, if the given type does not have any child or if the `index` is too + * big, `get_i` returns `NULL.` + */ +struct ast *get_i(struct ast *ast, size_t index); + +/** + * @brief Alias for `get_i(ast, 0)`. + * @note See `get_i` for a full explanation. + */ +struct ast *get_left(struct ast *ast); + +/** + * @brief Alias for `get_i(ast, 1)`. + * @note See `get_i` for a full explanation. + */ +struct ast *get_right(struct ast *ast); + +/** + * @brief Sets the `child` as the i-th child of `ast`. + * The children are stored as follows: + * `index` 0: left child, or expression in unary trees, or `condition` in `if` + * blocks. `index` 1: right child, or `then` expression in `if` blocks. `index` + * 2+: middle children, or `else` expression in `if` blocks (only `index` 2). + * @param ast Any kind of AST. + * @param child The child to be assigned in `ast`. + * @param index The position of the child in the `ast`. + * @note All kinds of ASTs can be passed to this function. + * All `index` can be passed to this function too. + * However, if the given type can not contain any child or if the `index` is too + * big, `set_i` does nothing. + */ +void set_i(struct ast *ast, struct ast *child, size_t index); + +/** + * @brief Alias for `set_i(ast, child, 0)`. + * @note See `set_i` for a full explanation. + */ +void set_left(struct ast *ast, struct ast *child); + +/** + * @brief Alias for `set_i(ast, child, 1)`. + * @note See `set_i` for a full explanation. + */ +void set_right(struct ast *ast, struct ast *child); + +#endif /* ! AST_ACCESSORS_H */ diff --git a/42sh/src/ast/ast_pretty_print.c b/42sh/src/ast/ast_pretty_print.c new file mode 100644 index 0000000..17c5b2a --- /dev/null +++ b/42sh/src/ast/ast_pretty_print.c @@ -0,0 +1,247 @@ +#undef _POSIX_C_SOURCE +#define _POSIX_C_SOURCE 200809L + +#include <stdio.h> +#include <stdlib.h> + +#include "ast/ast.h" +#include "ast/ast_accessors.h" +#include "ast/ast_redirect.h" +#include "lexer/token.h" + +#define BUFFER_SIZE 10000 + +static char *_type_to_string[] = { [AST_LOGICAL] = "logical block", + [AST_COMMAND] = "command", + [AST_IF] = "conditionnal block", + [AST_LIST] = "list", + [AST_PIPELINE] = "pipeline", + [AST_REDIRECTION] = "redirection", + [AST_WHILE] = "while", + [AST_ASSIGN] = "assign", + [AST_FOR] = "for" }; + +typedef char *(*tostring)(struct ast *); + +static char *_args_tostring(struct string **args) +{ + struct string *output = string_create(NULL); + size_t i = 0; + while (args[i]) + { + struct string *c = string_deepcopy(args[i++]); + string_catenate(output, c); + if (args[i]) + string_pushstr(output, " - "); + } + char *data = output->data; + free(output); + return data; +} + +static char *_ast_command_tostring(struct ast *ast) +{ + char buf[BUFFER_SIZE] = { 0 }; + char *args_to_string = _args_tostring(((struct ast_command *)ast)->args); + int len = snprintf(buf, BUFFER_SIZE, "%s\\nargs: %s", + _type_to_string[ast->type], args_to_string); + free(args_to_string); + char *str = malloc((len + 1) * sizeof(char)); + for (int i = 0; i < len; i++) + { + str[i] = buf[i]; + } + str[len] = 0; + return str; +} + +static char *_ast_if_tostring(struct ast *ast) +{ + char buf[BUFFER_SIZE] = { 0 }; + int len = snprintf(buf, BUFFER_SIZE, "%s\\n", _type_to_string[ast->type]); + char *str = malloc((len + 1) * sizeof(char)); + for (int i = 0; i < len; i++) + { + str[i] = buf[i]; + } + str[len] = 0; + return str; +} + +static char *_get_token_type_to_str(struct ast *ast) +{ + enum token_type t = ((struct ast_logical *)ast)->type; + switch (t) + { + case TOKEN_AND: + return "&&"; + case TOKEN_OR: + return "||"; + case TOKEN_NEG: + return "!"; + default: + return "unknown"; + } +} + +static char *_ast_logical_tostring(struct ast *ast) +{ + char buf[BUFFER_SIZE] = { 0 }; + int len = snprintf(buf, BUFFER_SIZE, "%s\\n%s", _type_to_string[ast->type], + _get_token_type_to_str(ast)); + char *str = malloc((len + 1) * sizeof(char)); + for (int i = 0; i < len; i++) + { + str[i] = buf[i]; + } + str[len] = 0; + return str; +} + +static char *_ast_list_tostring(struct ast *ast) +{ + char buf[BUFFER_SIZE] = { 0 }; + size_t nb_c = ((struct ast_list *)ast)->nb_children; + int len = snprintf(buf, BUFFER_SIZE, "%s\\n%lu child(ren)", + _type_to_string[ast->type], nb_c); + char *str = malloc((len + 1) * sizeof(char)); + for (int i = 0; i < len; i++) + { + str[i] = buf[i]; + } + str[len] = 0; + return str; +} + +static char *_ast_pipeline_tostring(struct ast *ast) +{ + char buf[BUFFER_SIZE] = { 0 }; + int len = snprintf(buf, BUFFER_SIZE, "%s\\nleft | right", + _type_to_string[ast->type]); + char *str = malloc((len + 1) * sizeof(char)); + for (int i = 0; i < len; i++) + { + str[i] = buf[i]; + } + str[len] = 0; + return str; +} + +static char *_ast_assign_tostring(struct ast *ast) +{ + char buf[BUFFER_SIZE] = { 0 }; + struct string *name = ((struct ast_assign *)ast)->name; + struct string *value = ((struct ast_assign *)ast)->val; + int len = snprintf(buf, BUFFER_SIZE, "%s\\n%s=%s", + _type_to_string[ast->type], name->data, value->data); + char *str = malloc((len + 1) * sizeof(char)); + for (int i = 0; i < len; i++) + { + str[i] = buf[i]; + } + str[len] = 0; + return str; +} + +static char *_ast_redirection_tostring(struct ast *ast) +{ + struct redirect r = ((struct ast_redirection *)ast)->redirect; + char buf[BUFFER_SIZE] = { 0 }; + int len = + snprintf(buf, BUFFER_SIZE, "%s\\n%s %s %s", _type_to_string[ast->type], + r.fd->data, r.redir->data, r.file->data); + char *str = malloc((len + 1) * sizeof(char)); + for (int i = 0; i < len; i++) + { + str[i] = buf[i]; + } + str[len] = 0; + return str; +} + +static char *_ast_while_tostring(struct ast *ast) +{ + char buf[BUFFER_SIZE] = { 0 }; + int len = snprintf(buf, BUFFER_SIZE, "%s\\nleft is cond\nright is body", + _type_to_string[ast->type]); + char *str = malloc((len + 1) * sizeof(char)); + for (int i = 0; i < len; i++) + { + str[i] = buf[i]; + } + str[len] = 0; + return str; +} + +static char *_ast_for_tostring(struct ast *ast) +{ + char buf[BUFFER_SIZE] = { 0 }; + int len = snprintf(buf, BUFFER_SIZE, "%s\\nleft is words\nright is body", + _type_to_string[ast->type]); + char *str = malloc((len + 1) * sizeof(char)); + for (int i = 0; i < len; i++) + { + str[i] = buf[i]; + } + str[len] = 0; + return str; +} + +static const tostring _node_to_string_ltable[] = { + [AST_LOGICAL] = _ast_logical_tostring, + [AST_COMMAND] = _ast_command_tostring, + [AST_IF] = _ast_if_tostring, + [AST_LIST] = _ast_list_tostring, + [AST_PIPELINE] = _ast_pipeline_tostring, + [AST_REDIRECTION] = _ast_redirection_tostring, + [AST_WHILE] = _ast_while_tostring, + [AST_ASSIGN] = _ast_assign_tostring, + [AST_FOR] = _ast_for_tostring +}; + +static char *_ast_to_string(struct ast *ast) +{ + return _node_to_string_ltable[ast->type](ast); +} + +static void _pretty_print_internal(struct ast *ast, FILE *f) +{ + if (!ast) + { + return; + } + char *format = _ast_to_string(ast); + fprintf(f, "\t%lu [label=\"%s\"];\n", (size_t)ast, format); + free(format); + size_t i = 0; + struct ast *child = NULL; + do + { + child = get_i(ast, i); + if (!child) + { + break; + } + i++; + fprintf(f, "\t%lu -- %lu;\n", (size_t)ast, (size_t)child); + _pretty_print_internal(child, f); + } while (child); +} + +void pretty_print(struct ast *ast) +{ + FILE *f = fopen("ast.dot", "w"); + if (!f) + { + fprintf(stderr, + "pretty_print: unable to open a file to write the DOT AST.\n"); + return; + } + + fprintf(f, "graph {\n"); + + _pretty_print_internal(ast, f); + + fprintf(f, "}\n"); + fclose(f); +} diff --git a/42sh/src/ast/ast_redirect.h b/42sh/src/ast/ast_redirect.h new file mode 100644 index 0000000..2f573a8 --- /dev/null +++ b/42sh/src/ast/ast_redirect.h @@ -0,0 +1,13 @@ +#ifndef AST_REDIRECT_H +#define AST_REDIRECT_H + +#include "utils/libstring.h" + +struct redirect +{ + struct string *fd; // src + struct string *redir; // ('<' | '>' | '>>' | etc ...) + struct string *file; // dest +}; + +#endif /* ! AST_REDIRECT_H */ diff --git a/42sh/src/builtins/Makefile.am b/42sh/src/builtins/Makefile.am new file mode 100644 index 0000000..4a16c3d --- /dev/null +++ b/42sh/src/builtins/Makefile.am @@ -0,0 +1,20 @@ +lib_LIBRARIES = libbuiltins.a + +libbuiltins_a_SOURCES = \ + builtins.h \ + true.c \ + false.c \ + echo.c \ + exit.c \ + break.c \ + export.c \ + continue.c \ + unset.c \ + dot.c \ + cd.c + +libbuiltins_a_CPPFLAGS = -I$(top_srcdir)/src + +libbuiltins_a_CFLAGS = -std=c99 -Werror -Wall -Wextra -Wvla -pedantic + +noinst_LIBRARIES = libbuiltins.a diff --git a/42sh/src/builtins/break.c b/42sh/src/builtins/break.c new file mode 100644 index 0000000..6718de1 --- /dev/null +++ b/42sh/src/builtins/break.c @@ -0,0 +1,27 @@ +#include <errno.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "builtins.h" +#include "utils/env.h" + +int my_break(struct string **args) +{ + char *n = "1"; + if (args[0] != NULL) + { + char *end; + long num = strtol(args[0]->data, &end, 10); + if (*end != '\0' || num <= 0) + { + fprintf(stderr, "break: %s: numeric argument required\n", + args[1]->data); + return 2; + } + n = args[0]->data; + } + env_set("BREAK", n); + fflush(stdout); + return 0; +} diff --git a/42sh/src/builtins/builtins.h b/42sh/src/builtins/builtins.h new file mode 100644 index 0000000..6b99918 --- /dev/null +++ b/42sh/src/builtins/builtins.h @@ -0,0 +1,21 @@ +#ifndef BUILTINS_H +#define BUILTINS_H + +#include "utils/libstring.h" + +#define NB_BUILTINS 10 + +typedef int (*builtin)(struct string **); + +int echo(struct string **args); +int my_true(struct string **args); +int my_false(struct string **args); +int my_exit(struct string **args); +int cd(struct string **args); +int my_continue(struct string **args); +int my_break(struct string **args); +int dot(struct string **args); +int unset(struct string **args); +int export(struct string **args); + +#endif /* ! BUILTINS_H */ diff --git a/42sh/src/builtins/cd.c b/42sh/src/builtins/cd.c new file mode 100644 index 0000000..90fd9db --- /dev/null +++ b/42sh/src/builtins/cd.c @@ -0,0 +1,74 @@ +#include <errno.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "builtins.h" +#include "utils/env.h" + +int cd_pointpoint(char *path, char *old) +{ + char *current = strrchr(path, '/'); + size_t len = current - path; + if (len == 0) + { + fprintf(stderr, "cd: error with ..\n"); + return 2; + } + char *parent = malloc(len + 1); + memcpy(parent, current, len); + parent[len] = '\0'; + old = path; + path = parent; + env_set("OLDPWD", old); + env_set("PWD", path); + free(parent); + + return 0; +} + +int cd(struct string **args) +{ + if (args[0] == NULL || args[1] != NULL) + { + fprintf(stderr, "cd: error too many arguments\n"); + fflush(stdout); + return 2; + } + if (strcmp(env_get("PWD"), "") == 0) + { + fprintf(stderr, "cd: error with PWD\n"); + fflush(stdout); + return 2; + } + char *old = env_get("OLDPWD"); + char *path = env_get("PWD"); + char *tmp = old; + if (strcmp(args[0]->data, "-") == 0) + { + if (strcmp(old, "") == 0) + { + fprintf(stderr, "cd: error with OLDPWD\n"); + fflush(stdout); + return 2; + } + printf("%s\n", old); + old = path; + path = tmp; + } + else if (strcmp(args[0]->data, "..") == 0) + { + int res = cd_pointpoint(path, old); + fflush(stdout); + return res; + } + else + { + old = path; + path = args[0]->data; + } + env_set("OLDPWD", old); + env_set("PWD", path); + fflush(stdout); + return 0; +} diff --git a/42sh/src/builtins/continue.c b/42sh/src/builtins/continue.c new file mode 100644 index 0000000..295fb8c --- /dev/null +++ b/42sh/src/builtins/continue.c @@ -0,0 +1,27 @@ +#include <errno.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "builtins.h" +#include "utils/env.h" + +int my_continue(struct string **args) +{ + char *n = "1"; + if (args[0] != NULL) + { + char *end; + long num = strtol(args[0]->data, &end, 10); + if (*end != '\0' || num <= 0) + { + fprintf(stderr, "continue: %s: numeric argument required\n", + args[1]->data); + return 2; + } + n = args[0]->data; + } + env_set("CONTINUE", n); + fflush(stdout); + return 0; +} diff --git a/42sh/src/builtins/dot.c b/42sh/src/builtins/dot.c new file mode 100644 index 0000000..6cec097 --- /dev/null +++ b/42sh/src/builtins/dot.c @@ -0,0 +1,43 @@ +#include <errno.h> +#include <fcntl.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + +#include "builtins.h" +#include "utils/env.h" + +int dot(struct string **args) +{ + if (args[0] == NULL) + { + fprintf(stderr, ".: filename argument required\n"); + fflush(stdout); + return 2; + } + if (args[1] != NULL) + { + fprintf(stderr, ".: too many arguments\n"); + fflush(stdout); + return 2; + } + + FILE *file = fopen(args[0]->data, "r"); + if (file == NULL) + { + fprintf(stderr, ".: filename does not exist\n"); + fflush(stdout); + return 1; + } + + char line[1024]; + while (fgets(line, sizeof(line), file)) + { + // TODO : execute file + fputs(line, stdout); + } + fclose(file); + fflush(stdout); + return 0; +} diff --git a/42sh/src/builtins/echo.c b/42sh/src/builtins/echo.c new file mode 100644 index 0000000..f330050 --- /dev/null +++ b/42sh/src/builtins/echo.c @@ -0,0 +1,94 @@ +#include <stdio.h> +#include <string.h> + +#include "builtins.h" + +static void print_echo(struct string **args, int escapes, int newline, int i) +{ + while (args[i] != NULL) + { + if (escapes) + { + size_t j = 0; + while (j < args[i]->length) + { + if (args[i]->data[j] == '\\' && args[i]->data[j] != '\0') + { + j += 1; + switch (args[i]->data[j]) + { + case 'n': + putchar('\n'); + break; + case 't': + putchar('\t'); + break; + case '\\': + putchar('\\'); + break; + default: + putchar('\\'); + putchar(args[i]->data[j]); + break; + } + j += 1; + } + else + { + putchar(args[i]->data[j]); + j += 1; + } + } + } + else + { + fputs(args[i]->data, stdout); + } + if (args[i + 1] != NULL) + { + putchar(' '); + } + i += 1; + } + if (newline) + { + putchar('\n'); + } +} + +int echo(struct string **args) +{ + int newline = 1; + int escapes = 0; + size_t i = 0; + if (args[0] == NULL) + { + putchar('\n'); + return 0; + } + while (args[i] != NULL && args[i]->data[0] == '-') + { + if (args[i]->length == 2 && args[i]->data[1] == 'n') + { + newline = 0; + i += 1; + } + else if (args[i]->length == 2 && args[i]->data[1] == 'e') + { + escapes = 1; + i += 1; + } + else if (args[i]->length == 2 && args[i]->data[1] == 'E') + { + escapes = 0; + i += 1; + } + else + { + break; + } + } + print_echo(args, escapes, newline, i); + fflush(stdout); + return 0; +} diff --git a/42sh/src/builtins/exit.c b/42sh/src/builtins/exit.c new file mode 100644 index 0000000..943c22f --- /dev/null +++ b/42sh/src/builtins/exit.c @@ -0,0 +1,28 @@ +#include <errno.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "builtins.h" + +int my_exit(struct string **args) +{ + unsigned int res = 0; + if (args[0] != NULL) + { + char *c; + long status = strtol(args[0]->data, &c, 10); + if (*c != '\0' || status < 0 || status > 255) + { + fprintf(stderr, "exit: %s: numeric argument required\n", + args[0]->data); + res = 2; + } + else + { + res = (unsigned int)status; + } + } + fflush(stdout); + exit(res); +} diff --git a/42sh/src/builtins/export.c b/42sh/src/builtins/export.c new file mode 100644 index 0000000..358cfc7 --- /dev/null +++ b/42sh/src/builtins/export.c @@ -0,0 +1,68 @@ +#include <errno.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "builtins.h" +#include "utils/env.h" + +extern char **environ; + +void printenv(void) +{ + size_t i = 0; + while (environ[i] != NULL) + { + printf("export %s\n", environ[i]); + i += 1; + } +} + +void export_var(struct string *arg) +{ + char *equal = strchr(arg->data, '='); + if (equal != NULL) + { + size_t len = equal - arg->data; + char *var = malloc(len + 1); + memcpy(var, arg->data, len); + char *value = equal + 1; + var[len] = '\0'; + env_set(var, value); + free(var); + } + else + { + env_set(arg->data, ""); + } +} + +int export(struct string **args) +{ + if (args[0] == NULL) + { + printenv(); + fflush(stdout); + return 0; + } + int flagp = 0; + size_t i = 0; + if (strcmp(args[0]->data, "-p") == 0) + { + flagp = 1; + i += 1; + } + if (args[1] == NULL && flagp) + { + printenv(); + fflush(stdout); + return 0; + } + while (args[i] != NULL) + { + export_var(args[i]); + i += 1; + } + fflush(stdout); + return 0; +} diff --git a/42sh/src/builtins/false.c b/42sh/src/builtins/false.c new file mode 100644 index 0000000..17ab256 --- /dev/null +++ b/42sh/src/builtins/false.c @@ -0,0 +1,10 @@ +#include <stdio.h> + +#include "builtins.h" + +int my_false(struct string **args) +{ + (void)args; + fflush(stdout); + return 1; +} diff --git a/42sh/src/builtins/true.c b/42sh/src/builtins/true.c new file mode 100644 index 0000000..ed7c7d1 --- /dev/null +++ b/42sh/src/builtins/true.c @@ -0,0 +1,10 @@ +#include <stdio.h> + +#include "builtins.h" + +int my_true(struct string **args) +{ + (void)args; + fflush(stdout); + return 0; +} diff --git a/42sh/src/builtins/unset.c b/42sh/src/builtins/unset.c new file mode 100644 index 0000000..77b323a --- /dev/null +++ b/42sh/src/builtins/unset.c @@ -0,0 +1,44 @@ +#include <errno.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "builtins.h" +#include "utils/env.h" + +int unset(struct string **args) +{ + size_t i = 0; + int var = 0; + int fun = 0; + while (args[i] != NULL && args[i]->data[0] == '-') + { + if (args[i]->data[1] == 'v') + { + var = 1; + i += 1; + } + else if (args[i]->data[1] == 'f') + { + fun = 1; + i += 1; + } + else + { + break; + } + } + if (var == 0 && fun == 0) + { + var = 1; + fun = 1; + } + // TODO unset with flag -f + while (args[i] != NULL) + { + env_unset(args[i]->data); + i += 1; + } + fflush(stdout); + return 0; +} diff --git a/42sh/src/exec/Makefile.am b/42sh/src/exec/Makefile.am new file mode 100644 index 0000000..a6a9134 --- /dev/null +++ b/42sh/src/exec/Makefile.am @@ -0,0 +1,15 @@ +lib_LIBRARIES = libeval.a + +libeval_a_SOURCES = \ + ast_exec_functions.c \ + ast_exec_functions.h \ + ast_eval.h \ + ast_eval.c \ + ast_exec_redirs.h \ + ast_exec_redirs.c + +libeval_a_CPPFLAGS = -I$(top_srcdir)/src + +libeval_a_CFLAGS = -std=c99 -Werror -Wall -Wextra -Wvla -pedantic + +noinst_LIBRARIES = libeval.a diff --git a/42sh/src/exec/ast_eval.c b/42sh/src/exec/ast_eval.c new file mode 100644 index 0000000..db152f8 --- /dev/null +++ b/42sh/src/exec/ast_eval.c @@ -0,0 +1,20 @@ +#include "exec/ast_eval.h" + +#include "exec/ast_exec_functions.h" + +static const exec_f exec_node_ltable[] = { + [AST_LIST] = exec_ast_list, [AST_IF] = exec_ast_if, + [AST_COMMAND] = exec_ast_command, [AST_LOGICAL] = exec_ast_logical, + [AST_PIPELINE] = exec_ast_pipe, [AST_REDIRECTION] = exec_ast_redirection, + [AST_WHILE] = exec_ast_while, [AST_ASSIGN] = exec_ast_assign, + [AST_FOR] = exec_ast_for, [AST_SUBSHELL] = exec_ast_subshell +}; + +int eval_ast(struct ast *ast) +{ + if (!ast) + { + return SHELL_TRUE; + } + return exec_node_ltable[ast->type](ast); +} diff --git a/42sh/src/exec/ast_eval.h b/42sh/src/exec/ast_eval.h new file mode 100644 index 0000000..785df5a --- /dev/null +++ b/42sh/src/exec/ast_eval.h @@ -0,0 +1,24 @@ +#ifndef AST_EVAL_H +#define AST_EVAL_H + +#include "ast/ast.h" + +/** + * @brief Shell true value, this is not the builtin true. + */ +#define SHELL_TRUE 0 + +/** + * @brief Shell false value, this is not the builtin false. + */ +#define SHELL_FALSE 1 + +/** + * @brief Returns the exit code of the given AST. + * @param ast The AST to evaluate. It is given as a `struct ast*` + * and uses an inheritance-like principle. + * An AST can be of any type from the `enum ast_node`. + */ +int eval_ast(struct ast *ast); + +#endif /* ! AST_EVAL_H */ diff --git a/42sh/src/exec/ast_exec_functions.c b/42sh/src/exec/ast_exec_functions.c new file mode 100644 index 0000000..9885997 --- /dev/null +++ b/42sh/src/exec/ast_exec_functions.c @@ -0,0 +1,491 @@ +#define _POSIX_C_SOURCE 200809L + +#include "exec/ast_exec_functions.h" + +#include <err.h> +#include <errno.h> +#include <fcntl.h> +#include <stdio.h> +#include <stdlib.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <unistd.h> + +#include "ast/ast_accessors.h" +#include "ast/ast_redirect.h" +#include "exec/ast_exec_redirs.h" +#include "lexer/expansion.h" +#include "utils/env.h" + +#define XCODE_TOENV(R) \ + char buf[16] = { 0 }; \ + sprintf(buf, "%d", (R)); \ + env_set("?", buf) + +static char **convert_args(struct string **args) +{ + size_t l = 0; + while (args[l]) + { + l++; + } + + char **c_args = calloc(l + 1, sizeof(char *)); + if (!c_args) + { + errx(EXIT_FAILURE, "convert_args: calloc failed."); + } + + for (size_t i = 0; i < l; i++) + { + c_args[i] = calloc(args[i]->length + 1, sizeof(char)); + if (!c_args[i]) + { + errx(EXIT_FAILURE, "convert_args: calloc failed."); + } + c_args[i] = strncpy(c_args[i], args[i]->data, args[i]->length); + } + + return c_args; +} + +static void free_args(char **args) +{ + size_t i = 0; + while (args[i]) + { + free(args[i]); + i++; + } + free(args); +} + +static builtin fetch_builtin(struct string *s) +{ + if (!s) + { + return NULL; + } + if (STRINGS_ARE_EQUAL(s->data, "false")) + { + return my_false; + } + if (STRINGS_ARE_EQUAL(s->data, "true")) + { + return my_true; + } + if (STRINGS_ARE_EQUAL(s->data, "echo")) + { + return echo; + } + if (STRINGS_ARE_EQUAL(s->data, ".")) + { + return dot; + } + if (STRINGS_ARE_EQUAL(s->data, "exit")) + { + return my_exit; + } + if (STRINGS_ARE_EQUAL(s->data, "cd")) + { + return cd; + } + if (STRINGS_ARE_EQUAL(s->data, "export")) + { + return export; + } + return NULL; +} + +static int exec_program(struct string **args) +{ + char **argv = convert_args(args); + if (!argv[0]) + { + free_args(argv); + return 127; + } + pid_t p = fork(); + if (!p) + { + execvp(argv[0], argv); + if (errno == ENOEXEC) + { + fprintf(stderr, + "exec_program: the file \'%s\' is not executable.\n", + argv[0]); + free_args(argv); + exit(126); + } + fprintf(stderr, "exec_program: command \'%s\' not found.\n", argv[0]); + free_args(argv); + exit(127); + } + else + { + int status = 0; + waitpid(p, &status, 0); + free_args(argv); + if (WIFEXITED(status)) + { + return WEXITSTATUS(status); + } + return SHELL_FALSE; + } +} + +static struct string **_copy_args(struct string **args) +{ + size_t len = 0; + while (args[len]) + { + len++; + } + struct string **cpy = calloc(len + 1, sizeof(struct string *)); + for (size_t i = 0; i < len; i++) + { + cpy[i] = string_deepcopy(args[i]); + } + return cpy; +} + +static void _free_str_args(struct string **args) +{ + size_t i = 0; + while (args[i]) + { + string_free(args[i]); + i++; + } + free(args); +} + +int exec_ast_command(struct ast *ast) +{ + union ast_caster ast_cast; + ast_cast.ast = ast; + struct string **cpy = _copy_args(ast_cast.ast_c->args); + if (!ast_cast.ast_c->args) + { + _free_str_args(cpy); + fprintf(stderr, "exec_ast_command: invalid args for a command exec.\n"); + XCODE_TOENV(SHELL_FALSE); + return SHELL_FALSE; + } + + size_t i = 0; + size_t j = 0; + while (ast_cast.ast_c->args[i]) + { + struct string *cv = expand_word(ast_cast.ast_c->args[i]); + if (cv->length > 0) + { + string_free(ast_cast.ast_c->args[j]); + ast_cast.ast_c->args[j++] = cv; + } + else + { + string_free(cv); + } + i++; + } + while (ast_cast.ast_c->args[j]) + { + string_free(ast_cast.ast_c->args[j]); + ast_cast.ast_c->args[j++] = NULL; + } + if (i == 0) + { + // No args ! + XCODE_TOENV(SHELL_TRUE); + return SHELL_TRUE; + } + builtin f = fetch_builtin(ast_cast.ast_c->args[0]); + int r = SHELL_TRUE; + if (f != NULL) + { + r = (*f)(ast_cast.ast_c->args + 1); + } + else + { + r = exec_program(ast_cast.ast_c->args); + } + _free_str_args(ast_cast.ast_c->args); + ast_cast.ast_c->args = cpy; + XCODE_TOENV(r); + return r; +} + +int exec_ast_list(struct ast *ast) +{ + union ast_caster ast_cast; + ast_cast.ast = ast; + int status = SHELL_TRUE; + for (size_t i = 0; i < ast_cast.ast_l->nb_children; i++) + { + status = eval_ast(get_i(ast_cast.ast, i)); + } + XCODE_TOENV(status); + return status; +} + +int exec_ast_if(struct ast *ast) +{ + int k = SHELL_TRUE; + if (eval_ast(get_i(ast, 0)) == SHELL_TRUE) + { + k = eval_ast(get_i(ast, 1)); + } + else + { + k = eval_ast(get_i(ast, 2)); + } + XCODE_TOENV(k); + return k; +} + +int exec_ast_logical(struct ast *ast) +{ + union ast_caster ast_cast; + ast_cast.ast = ast; + int k = SHELL_TRUE; + switch (ast_cast.ast_lo->type) + { + case TOKEN_NEG: + k = !eval_ast(get_left(ast)); + break; + case TOKEN_AND: + if (eval_ast(get_left(ast)) == SHELL_TRUE) + { + k = eval_ast(get_right(ast)); + break; + } + k = SHELL_FALSE; + break; + case TOKEN_OR: + if (eval_ast(get_left(ast)) == SHELL_FALSE) + { + k = eval_ast(get_right(ast)); + break; + } + k = SHELL_TRUE; + break; + default: + errx(SHELL_FALSE, + "exec_ast_logical: invalid token type for logical operation. Got: " + "%d", + ast_cast.ast_lo->type); + } + XCODE_TOENV(k); + return k; +} + +int exec_ast_pipe(struct ast *ast) +{ + union ast_caster ast_cast; + ast_cast.ast = ast; + + struct ast *left = ast_cast.ast_p->left; + struct ast *right = ast_cast.ast_p->right; + if (!left || !right) + { + XCODE_TOENV(EXIT_FAILURE); + return EXIT_FAILURE; + } + + int fds[2]; + if (pipe(fds) == -1) + { + XCODE_TOENV(EXIT_FAILURE); + return EXIT_FAILURE; + } + + pid_t p1 = fork(); + if (!p1) + { + close(fds[0]); + dup2(fds[1], STDOUT_FILENO); + int r1 = eval_ast(left); + exit(r1); + } + + pid_t p2 = fork(); + if (!p2) + { + close(fds[1]); + dup2(fds[0], STDIN_FILENO); + int r2 = eval_ast(right); + exit(r2); + } + + close(fds[0]); + close(fds[1]); + + int status = 0; + waitpid(p1, &status, 0); + waitpid(p2, &status, 0); + + if (WIFEXITED(status)) + { + int k = WEXITSTATUS(status); + XCODE_TOENV(k); + return k; + } + XCODE_TOENV(SHELL_FALSE); + return SHELL_FALSE; +} + +inline static bool _is_a_std_fd(int fd) +{ + return fd >= STDIN_FILENO && fd <= STDERR_FILENO; +} + +static int _exec_ast_redir(struct ast_redirection *ast, int left_fd, + int right_fd) +{ + // Save std fds + int cpy_stdin = dup(STDIN_FILENO); + int cpy_stdout = dup(STDOUT_FILENO); + int cpy_stderr = dup(STDERR_FILENO); + if (right_fd == CLOSE_FD) + { + close(left_fd); + } + else + { + dup2(right_fd, left_fd); + } + + int r = eval_ast(ast->expr); + + if (!_is_a_std_fd(right_fd)) + { + close(right_fd); + } + if (!_is_a_std_fd(left_fd)) + { + close(left_fd); + } + + // Restore std fds + dup2(cpy_stdin, STDIN_FILENO); + dup2(cpy_stdout, STDOUT_FILENO); + dup2(cpy_stderr, STDERR_FILENO); + XCODE_TOENV(r); + return r; +} + +int exec_ast_redirection(struct ast *ast) +{ + union ast_caster ast_cast; + ast_cast.ast = ast; + + int left_fd = 0; + int right_fd = 0; + find_fds(ast_cast.ast_r->redirect, &left_fd, &right_fd); + if (left_fd == BAD_FD || right_fd == BAD_FD) + { + fprintf(stderr, "redirection: bad fd.\n"); + XCODE_TOENV(SHELL_FALSE); + return SHELL_FALSE; + } + + int k = _exec_ast_redir(ast_cast.ast_r, left_fd, right_fd); + XCODE_TOENV(k); + return k; +} + +int exec_ast_while(struct ast *ast) +{ + int k = SHELL_TRUE; + while (true) + { + int r = eval_ast(((struct ast_while *)ast)->cond); + if (r != SHELL_TRUE) + { + break; + } + k = eval_ast(((struct ast_while *)ast)->body); + } + XCODE_TOENV(k); + return k; +} + +int exec_ast_assign(struct ast *ast) +{ + struct ast_assign *ast_a = (struct ast_assign *)ast; + struct string *expanded = expand_word(ast_a->val); + env_set(ast_a->name->data, expanded->data); + string_free(expanded); + XCODE_TOENV(SHELL_TRUE); + return SHELL_TRUE; +} + +int exec_ast_for(struct ast *ast) +{ + if (!get_left(ast)) + { + XCODE_TOENV(SHELL_TRUE); + return SHELL_TRUE; + } + struct string **args = ((struct ast_command *)get_left(ast))->args; + struct string *name = ((struct ast_for *)ast)->var; + int k = SHELL_TRUE; + size_t i = 0; + while (args[i]) + { + struct string *value = args[i]; + i++; + env_set(name->data, value->data); + k = eval_ast(get_right(ast)); + } + XCODE_TOENV(k); + return k; +} + +static int _exec_fork(struct ast *ast) +{ + pid_t child = fork(); + + if (child == -1) + { + XCODE_TOENV(SHELL_FALSE); + return SHELL_FALSE; + } + else if (child) + { + // Parent process + + int status; + // Why did I put 0 last time I used fork(2) + waitpid(child, &status, 0); + + if (!WIFEXITED(status)) + { + XCODE_TOENV(SHELL_FALSE); + return SHELL_FALSE; + } + + return WEXITSTATUS(status); + } + else + { + // Child process + + int ret = eval_ast(ast); + // Need to exit or else we will have to exit from the main function + // So we will execute the remainder of the ast twice + // Which is not what we want + exit(ret); + } +} + +int exec_ast_subshell(struct ast *ast) +{ + struct ast *sub = get_left(ast); + int fork_res = _exec_fork(sub); + // TODO:The exit codes might not be the correct one + XCODE_TOENV(fork_res); + return fork_res; +} diff --git a/42sh/src/exec/ast_exec_functions.h b/42sh/src/exec/ast_exec_functions.h new file mode 100644 index 0000000..ba2fbcd --- /dev/null +++ b/42sh/src/exec/ast_exec_functions.h @@ -0,0 +1,77 @@ +#ifndef AST_EXEC_FUNCTIONS_H +#define AST_EXEC_FUNCTIONS_H + +#include "builtins/builtins.h" +#include "exec/ast_eval.h" + +#define BASE_RW_SIZE 256 + +typedef int (*exec_f)(struct ast *ast); + +/** + * @brief Evaluates the given AST and return the exit code of it. + * A command can be a shell builtin or an external binary file called with + * `execvp`. + * @param ast An `AST_COMMAND` AST to evaluate. It is given as a `struct ast*` + * and uses inheritance-like principle. + */ +int exec_ast_command(struct ast *ast); + +/** + * @brief Evaluates the given AST and return the exit code of it. + * It performs a lazy evaluation. + * @param ast An `AST_LOGICAL` AST to evaluate. It is given as a `struct ast*` + * and uses inheritance-like principle. + */ +int exec_ast_logical(struct ast *ast); + +/** + * @brief Evaluates the given AST and return the exit code of the last + * `AST_COMMAND`. + * @param ast An `AST_LIST` AST to evaluate. It is given as a `struct ast*` + * and uses inheritance-like principle. + */ +int exec_ast_list(struct ast *ast); + +/** + * @brief Evaluates the given AST and return the exit code of it. + * It performs a lazy evaluation. + * @param ast An `AST_IF` AST to evaluate. It is given as a `struct ast*` + * and uses inheritance-like principle. + */ +int exec_ast_if(struct ast *ast); + +/** + * @brief Evaluates the given AST and return the exit code of it. + * It redirects the standard output of `ast->left` to the standard input of + * `ast->right`. + * @param ast An `AST_PIPELINE` AST to evaluate. It is given as a `struct ast*` + * and uses inheritance-like principle. + */ +int exec_ast_pipe(struct ast *ast); + +/** + * @brief Evaluates the given AST and return the exit code of it. + * It gets the redirection details from `ast->redir` which is a `struct + * redirect`. + * @param ast An `AST_REDIRECTION` AST to evaluate. It is given as a `struct + * ast*` and uses inheritance-like principle. + */ +int exec_ast_redirection(struct ast *ast); + +int exec_ast_while(struct ast *ast); + +int exec_ast_assign(struct ast *ast); + +int exec_ast_for(struct ast *ast); + +/** + * @brief Evaluates the given AST and return the exit code of it. + * It gets the ast corresponding to the subshell and executes it in a new + * environment thanks to the fork(2) function. + * @param ast An `AST_SUBSHELL` AST to evaluate It is given as a `struct ast*` + * and uses inheritance-like principle. + */ +int exec_ast_subshell(struct ast *ast); + +#endif /* ! AST_EXEC_FUNCTIONS_H */ diff --git a/42sh/src/exec/ast_exec_redirs.c b/42sh/src/exec/ast_exec_redirs.c new file mode 100644 index 0000000..5cd013c --- /dev/null +++ b/42sh/src/exec/ast_exec_redirs.c @@ -0,0 +1,149 @@ +#define _POSIX_C_SOURCE 200809L + +#include "exec/ast_exec_redirs.h" + +#include <err.h> +#include <fcntl.h> +#include <stdio.h> +#include <stdlib.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <unistd.h> + +#include "ast/ast.h" +#include "ast/ast_accessors.h" +#include "ast/ast_redirect.h" +#include "utils/env.h" +#include "utils/libstring.h" + +inline static long _get_open_max(void) +{ + return sysconf(_SC_OPEN_MAX); +} + +static bool _file_exists(char *filename) +{ + struct stat s; + return stat(filename, &s) == 0; +} + +inline static bool _is_a_digit(char c) +{ + return c >= '0' && c <= '9'; +} + +static bool _is_a_number(struct string *s) +{ + size_t i = 0; + while (i < s->length && _is_a_digit(s->data[i])) + { + i++; + } + return i == s->length; +} + +/** + * @brief Assigns the fd described by s to out_fd if s + * describes an integer which is less than _SC_OPEN_MAX. + */ +static void _assign_if_valid_int_fd(struct string *s, int *out_fd) +{ + if (_is_a_number(s)) + { + int fd = atoi(s->data); + if (fd < _get_open_max()) + { + *out_fd = fd; + return; + } + } + *out_fd = BAD_FD; +} + +/** + * @brief Assigns the fd described by s to out_fd if s + * describes an integer which is less than _SC_OPEN_MAX. + * s can also be exactly '-', in which case out_fd contains + * the value CLOSE_FD. + */ +static void _assign_if_valid_fd_and(struct string *s, int *out_fd) +{ + if (_is_a_number(s)) + { + int fd = atoi(s->data); + int cpy_fd = dup(fd); + *out_fd = cpy_fd; + return; + } + if (s->length == 1 && s->data[0] == '-') + { + *out_fd = CLOSE_FD; + return; + } + *out_fd = BAD_FD; +} + +void find_fds(struct redirect r, int *left_fd, int *right_fd) +{ + char *rd = r.redir->data; + static const int filemode = 0644; + + // Default value is no valid left fd is passed in `r`. + *left_fd = BAD_FD; + *right_fd = BAD_FD; + + if (STRINGS_ARE_EQUAL(rd, ">")) + { + // Sec. 2.7.2: noclobber must avoid overwritting on an existing file + if (env_get("noclobber") != NULL && _file_exists(r.file->data)) + { + fprintf(stderr, "redirection: Unable to overwrite on %s.\n", + r.file->data); + return; + } + *right_fd = open(r.file->data, O_WRONLY | O_CREAT | O_TRUNC, filemode); + _assign_if_valid_int_fd(r.fd, left_fd); + } + if (STRINGS_ARE_EQUAL(rd, ">>")) + { + *right_fd = open(r.file->data, O_WRONLY | O_CREAT | O_APPEND, filemode); + _assign_if_valid_int_fd(r.fd, left_fd); + } + if (STRINGS_ARE_EQUAL(rd, ">|")) + { + // Sec. 2.7.2: this redir bypasses noclobber + *right_fd = open(r.file->data, O_WRONLY | O_CREAT | O_TRUNC, filemode); + _assign_if_valid_int_fd(r.fd, left_fd); + } + if (STRINGS_ARE_EQUAL(rd, ">&")) + { + _assign_if_valid_int_fd(r.fd, left_fd); + _assign_if_valid_fd_and(r.file, right_fd); + if (*right_fd == BAD_FD) + { + // Sure why not after all, + // Because when it is about bash posix + // 'undefined' in the SCL means: + // OH <&{filename} is AMBIGUOUS + // BUT >&{filename} is PERFECTLY FINE + *right_fd = + open(r.file->data, O_WRONLY | O_CREAT | O_TRUNC, filemode); + } + } + if (STRINGS_ARE_EQUAL(rd, "<")) + { + *right_fd = open(r.file->data, O_RDONLY); + _assign_if_valid_int_fd(r.fd, left_fd); + } + if (STRINGS_ARE_EQUAL(rd, "<&")) + { + _assign_if_valid_int_fd(r.fd, left_fd); + _assign_if_valid_fd_and(r.file, right_fd); + } + if (STRINGS_ARE_EQUAL(rd, "<>")) + { + *right_fd = open(r.file->data, O_RDWR | O_CREAT, filemode); + _assign_if_valid_int_fd(r.fd, left_fd); + } +} diff --git a/42sh/src/exec/ast_exec_redirs.h b/42sh/src/exec/ast_exec_redirs.h new file mode 100644 index 0000000..4fd32de --- /dev/null +++ b/42sh/src/exec/ast_exec_redirs.h @@ -0,0 +1,11 @@ +#ifndef AST_EXEC_REDIRS_H +#define AST_EXEC_REDIRS_H + +#include "ast/ast_redirect.h" + +#define BAD_FD -1 +#define CLOSE_FD -2 + +void find_fds(struct redirect r, int *left_fd, int *right_fd); + +#endif /* ! AST_EXEC_REDIRS_H */ diff --git a/42sh/src/helper.c b/42sh/src/helper.c new file mode 100644 index 0000000..7f936fc --- /dev/null +++ b/42sh/src/helper.c @@ -0,0 +1,43 @@ +#include "helper.h" + +int _process_input(struct string *input) +{ + struct lexer *lexer = lexer_new(input); + if (!lexer) + { + errx(BAD_GRAMMAR, "main: unable to create lexer."); + } + + enum parser_state status = OK; + + struct ast *ast = NULL; + int r = SHELL_TRUE; + do + { + ast_free(ast); + ast = parse(lexer, &status); + + if (status == ERROR) + { + errx(BAD_GRAMMAR, "main: parse failed."); + } + + if (env_get("PRETTY_PRINT") != NULL) + { + pretty_print(ast); + } + if (ast) + { + r = eval_ast(ast); + } + } while (ast != NULL && status != ERROR); + if (status == ERROR) + { + ast_free(ast); + } + + string_free(lexer->input); + lexer_free(lexer); + + return r; +} diff --git a/42sh/src/helper.h b/42sh/src/helper.h new file mode 100644 index 0000000..6032c1c --- /dev/null +++ b/42sh/src/helper.h @@ -0,0 +1,17 @@ +#ifndef HELPER_H +#define HELPER_H + +#include <err.h> +#include <stdio.h> +#include <stdlib.h> + +#include "IO/IOBackend.h" +#include "exec/ast_exec_functions.h" +#include "parser/parser.h" +#include "utils/env.h" + +#define BAD_GRAMMAR 2 + +int _process_input(struct string *input); + +#endif /* ! HELPER_H */ diff --git a/42sh/src/lexer/Makefile.am b/42sh/src/lexer/Makefile.am new file mode 100644 index 0000000..4eda36d --- /dev/null +++ b/42sh/src/lexer/Makefile.am @@ -0,0 +1,15 @@ +lib_LIBRARIES = liblexer.a + +liblexer_a_SOURCES = \ + token.h \ + utils.h \ + lexer.h \ + lexer.c \ + expansion.h \ + expansion.c + +liblexer_a_CPPFLAGS = -I$(top_srcdir)/src + +liblexer_a_CFLAGS = -std=c99 -Werror -Wall -Wextra -Wvla -pedantic + +noinst_LIBRARIES = liblexer.a diff --git a/42sh/src/lexer/expansion.c b/42sh/src/lexer/expansion.c new file mode 100644 index 0000000..d648009 --- /dev/null +++ b/42sh/src/lexer/expansion.c @@ -0,0 +1,386 @@ +#include <ctype.h> +#include <lexer/expansion.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <unistd.h> + +#include "utils/env.h" + +#define BUFFER_SIZE 51 + +#define ERROR_CHECK(MSG) \ + if (str[*i] == '\0') \ + { \ + string_free(input); \ + return clean_exit(MSG, error); \ + } + +#define DQUOTEESCAPED "$\\\n`\"" +// Identifies double-quote escaped characters +#define ISDQUOTEESCAPED(C) strchr(DQUOTEESCAPED, (int)C) + +#define SPECIAL "@*?$#" +// Identifies special variable names +#define ISSPECIAL(C) strchr(SPECIAL, (int)C) + +// error = 1 indicates a missing } +// error = 0 and NULL returned indicates an unrecognized var name +// error = 0 and anything else than NULL returned is the var name +static struct string *get_var_name(char *str, int *error) +{ + struct string *res = string_create(NULL); + int i = 0; + if (str[i] == '{') + { + while (str[i] && str[i] != '}') + { + if (str[i] == '\\' && str[i + 1] == '}') + i++; + string_pushc(res, str[i]); + i++; + } + if (!str[i]) + { + string_free(res); + *error = 1; + return NULL; + } + *error = 0; + return res; + } + else if (ISSPECIAL(str[i]) || isdigit(str[i])) + { + string_pushc(res, str[i]); + *error = 0; + return res; + } + else if (!isalpha(str[i])) + { + *error = 0; + string_free(res); + return NULL; + } + else + { + while (isalnum(str[i]) || str[i] == '_') + string_pushc(res, str[i++]); + return res; + } +} + +// Useful to automate the same exit process accross the few functions +// that often do this +static struct string *clean_exit(char *txt, int *error) +{ + fprintf(stderr, "%s", txt); + *error = 1; + return NULL; +} + +// Creates the fork() in order to make a subshell for the command expansion +// (cf section 2.6.3 of the SCL) +// Only called in expand_substitution() +static struct string *fork_subshell(struct string *input, int j, char *str, + int *error) +{ + int fds[2]; + if (pipe(fds) == -1) + { + return clean_exit("pipe() faild to create 2 fds\n", error); + } + + struct string *output; + pid_t child = fork(); + + if (child == -1) + { + // Fork not working + return clean_exit("fork() faild to produce a children\n", error); + } + else if (child) + { + // Parent process + close(fds[1]); + + output = string_create(NULL); + + char buff[BUFFER_SIZE]; + + buff[BUFFER_SIZE - 1] = 0; + + int r; + + int status; + waitpid(child, &status, 0); + + // Check if the child terminated normally + if (!WIFEXITED(status)) + { + close(fds[0]); + string_free(output); + return clean_exit("Child process failed miserably\n", error); + } + + while ((r = read(fds[0], buff, 50))) + { + buff[r] = 0; + if (!string_pushstr(output, buff)) + { + string_free(output); + return clean_exit("Failed to transfer from pipe\n", error); + } + } + + close(fds[0]); + return output; + } + else + { + // Child process + str += j; + close(fds[0]); + + if (dup2(fds[1], STDOUT_FILENO) == -1) + { + // We are forced to return NULL + // how are we going to know if something wnet wrong ? + exit(-1); + } + + _process_input(input); + + close(fds[1]); + exit(1); + } +} + +static int look_for_next(char *in, int i, char c) +{ + int escaped = 0; + while (in[i] && (in[i] != c || (escaped && in[i] == c))) + { + if (in[i] == '\\') + escaped ^= 1; + else + escaped = 0; + i++; + } + return i; +} + +// Removes all the <newline> characters at the end of the string obtained by +// the command substitution (Also section 2.6.3 of the SCL) +static void trimming_newline(struct string *txt) +{ + if (!txt->length) + { + return; + } + char *str = txt->data; + size_t len = txt->length; + + size_t i = len - 1; + while (str[i] == '\n') + { + str[i] = 0; + len--; + } + + // I am scared and I know this isn't useful but just in case + txt->data = str; + txt->length = len; +} + +// Performs the substitution (forks and get back the stdout) +struct string *expand_substitution(char *str, int *i, int *error, char delim) +{ + int j = *i; + struct string *input = string_create(NULL); + if (input == NULL) + { + return clean_exit("Could not create string for input\n", error); + } + + if (delim == '`') + { + *i = look_for_next(str, j, delim); + + ERROR_CHECK("Could not match `\n") + + str[*i] = '\0'; + } + // Sadly, there is no other way around this + else + { + int escaped = 0; + int par_count = 1; + + while (str[*i] != 0) + { + if (str[*i] == '\\') + { + escaped ^= 1; + } + else if (str[*i] == '\'') + { + (*i) += 1; + while (str[*i] != '\0' && str[*i] != '\'') + { + (*i) += 1; + } + ERROR_CHECK("Missing matching '\n") + } + else if ((str[*i] == '\"' || str[*i] == '`') && !escaped) + { + (*i) += 1; + *i = look_for_next(str, *i, str[(*i) - 1]); + + ERROR_CHECK("Missing matching `\n") + } + else if (str[*i] == '(' && !escaped) + { + par_count++; + } + else if (str[*i] == delim && !escaped) + { + par_count--; + if (!par_count) + { + str[*i] = 0; + break; + } + } + else + { + escaped = 0; + } + + (*i)++; + } + } + + string_pushstr(input, str + j); + struct string *output = fork_subshell(input, j, str, error); + string_free(input); + + trimming_newline(output); + str[*i] = delim; + return output; +} + +static int expand_var(struct string *res, char *input, int i) +{ + // Will only be called after a '$' was read + + int e = 0; + struct string *name = get_var_name(input + i + 1, &e); + + if (e) + { + string_free(name); + fprintf(stderr, "Missing } in variable expansion\n"); + return -1; + } + else if (name == NULL) + { + string_pushc(res, input[i]); + i++; + } + else + { + // Get the value associated to the name + char *value = env_get(name->data); + // Concatenate the strings if the variable has a value + if (value) + string_pushstr(res, value); + if (input[++i] == '{') + i += 2; + i += name->length; + string_free(name); + } + return i; +} + +static int expand_dquotes(char *input, int i, struct string *res) +{ + while (input[i] != '"') + { + if (input[i] == '$') + { + if ((i = i + expand_var(res, input, i)) == -1) + { + string_free(res); + return -1; + } + continue; + } + if ((input[i] == '`' || (input[i] == '$' && input[i + 1] == '('))) + { + int e = 0; + i += (input[i] == '$' ? 2 : 1); + struct string *output = + expand_substitution(input, &i, &e, input[i]); + if (!e) + { + string_free(res); + return -1; + } + + // +1 for the last parenthesis/backquote + i++; + string_catenate(res, output); + continue; + } + if (input[i] == '\\' && ISDQUOTEESCAPED(input[i + 1])) + i++; + string_pushc(res, input[i]); + i++; + } + return i; +} + +struct string *expand_word(struct string *word) +{ + char *input = word->data; + int escape = 0; + struct string *res = string_create(NULL); + for (int i = 0; input[i]; i++) + { + if (!escape && input[i] == '\'') + { + while (input[++i] != '\'') + string_pushc(res, input[i]); + } + else if (!escape && input[i] == '"') + { + i++; + + if ((i = expand_dquotes(input, i, res)) == -1) + return NULL; + } + else if (!escape && input[i] == '\\') + escape ^= 1; + else + { + // We don't care if we are after a backslash, we just include this + // char + if (input[i] == '$' && !escape) + { + if ((i = i + expand_var(res, input, i)) == -1) + { + string_free(res); + return NULL; + } + continue; + } + string_pushc(res, input[i]); + escape = 0; + } + } + + // string_free(word); + return res; +} diff --git a/42sh/src/lexer/expansion.h b/42sh/src/lexer/expansion.h new file mode 100644 index 0000000..4729cb8 --- /dev/null +++ b/42sh/src/lexer/expansion.h @@ -0,0 +1,13 @@ +#ifndef EXPANSION_H +#define EXPANSION_H + +#include <stddef.h> +#include <utils/libstring.h> + +#include "helper.h" + +struct string *expand_substitution(char *str, int *i, int *error, char delim); + +struct string *expand_word(struct string *word); + +#endif /* ! EXPANSION_H */ diff --git a/42sh/src/lexer/lexer.c b/42sh/src/lexer/lexer.c new file mode 100644 index 0000000..eac77ab --- /dev/null +++ b/42sh/src/lexer/lexer.c @@ -0,0 +1,359 @@ +#include "lexer.h" + +#include <ctype.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "expansion.h" +#include "utils.h" + +#define NEWLINEESCAPE (in[lexer->pos] == '\\' && in[lexer->pos + 1] == '\n') +#define SEPARATORS " \n;&|<>()" +#define ISSEPARATOR(I) strchr(SEPARATORS, (int)I) != NULL + +struct lexer *lexer_new(struct string *input) +{ + struct lexer *new = malloc(sizeof(struct lexer)); + new->input = input; + new->pos = 0; + new->processed = 0; + new->current_tok.value = NULL; + new->current_tok.type = TOKEN_EOF; + return new; +} + +void lexer_free(struct lexer *lexer) +{ + free(lexer); +} + +static void yeet_comment(struct lexer *l) +{ + while (l->input->data[l->pos] && l->input->data[l->pos] != '\n') + l->pos++; +} + +static int create_ionumber(struct lexer *l) +{ + struct string *str = string_create(NULL); + int i; + for (i = l->pos; isdigit(l->input->data[i]); i++) + string_pushc(str, l->input->data[i]); + if (l->input->data[i] == '<' || l->input->data[i] == '>') + { + l->current_tok.type = TOKEN_IONUMBER; + l->current_tok.value = str; + return 1; + } + else + { + string_free(str); + return 0; + } +} + +static int look_for_next(char *in, int i, char c) +{ + int escaped = 0; + while (in[i] && (in[i] != c || (escaped && in[i] == c))) + { + if (in[i] == '\\') + escaped ^= 1; + else + escaped = 0; + i++; + } + return i; +} + +static struct string *substitution(char *in, int i) +{ + char split = in[i]; + in[i] = '\0'; + struct string *new_input = string_create(in); + + if (split == '$') + { + i++; + } + + i++; + int error = 0; + struct string *substitute = + expand_substitution(in, &i, &error, (split == '`') ? '`' : ')'); + + if (error) + { + string_free(new_input); + return NULL; + } + + string_catenate(new_input, substitute); + + struct string *end_input = string_create(in + i + 1); + string_catenate(new_input, end_input); + + return new_input; +} + +static char extract_word(struct lexer *l, int *end) +{ + char *in = l->input->data; + int i = l->pos; + // True if we are currently inside quotes + int escaped = 0; + + while (in[i]) + { + if (!escaped && ((in[i] == '$' && in[i + 1] == '(') || in[i] == '`')) + { + struct string *new_input = substitution(in, i); + if (new_input == NULL) + { + l->current_tok.type = TOKEN_ERROR; + return '\0'; + } + + string_free(l->input); + l->input = new_input; + in = l->input->data; + } + // Checking that we are NOT in a quote and have a separator + if (!escaped && ISSEPARATOR(in[i])) + break; + if (!escaped && in[i] == '\'') + { + i++; + while (in[i] && in[i] != '\'') + i++; + if (!in[i]) + { + l->current_tok.type = TOKEN_ERROR; + return '\0'; + } + } + else if (!escaped && in[i] == '"') + { + i = look_for_next(in, i + 1, in[i]); + if (!in[i]) + { + l->current_tok.type = TOKEN_ERROR; + return '\0'; + } + } + else if (in[i] == '\\') + { + escaped ^= 1; + } + else + escaped = 0; + i++; + } + + char tmp = in[i]; + in[i] = '\0'; + *end = i; + return tmp; +} + +static enum token_type word_or_ass(struct token t) +{ + char *word = t.value->data; + if (isdigit(word[0])) + { + return TOKEN_WORD; + } + for (int i = 0; word[i]; i++) + { + if (word[i] == '=') + { + return TOKEN_ASS_WORD; + } + if (word[i] != '_' && !isalnum(word[i])) + { + return TOKEN_WORD; + } + } + return TOKEN_WORD; +} + +static void lex_word(struct lexer *l) +{ + int i = 0; + char tmp = extract_word(l, &i); + if (l->current_tok.type == TOKEN_ERROR) + return; + char *in = l->input->data; + + l->current_tok.type = TOKEN_ERROR; + // Identifies reserved words + for (int n = 0; reserved_words[n].word != NULL; n++) + { + if (STRINGS_ARE_EQUAL(in + l->pos, reserved_words[n].word)) + l->current_tok.type = reserved_words[n].type; + } + + // If we couldn't identify a reserved word + if (l->current_tok.type == TOKEN_ERROR) + { + struct string *pp = string_create(in + l->pos); + if (pp == NULL) + { + l->current_tok.type = TOKEN_ERROR; + in[i] = tmp; + return; + } + l->current_tok.value = pp; + // Set the token type + l->current_tok.type = word_or_ass(l->current_tok); + } + in[i] = tmp; +} + +static struct token set_ttype(struct lexer *lexer, enum token_type type) +{ + lexer->current_tok.type = type; + return lexer->current_tok; +} + +static struct token lex_and_or(struct lexer *l) +{ + if (l->input->data[l->pos] == '|' && l->input->data[l->pos + 1] == '|') + l->current_tok.type = TOKEN_OR; + else if (l->input->data[l->pos] == '|' && l->input->data[l->pos + 1] != '|') + l->current_tok.type = TOKEN_PIPE; + else if (l->input->data[l->pos] == '&' && l->input->data[l->pos + 1] == '&') + l->current_tok.type = TOKEN_AND; + else + l->current_tok.type = TOKEN_ERROR; + return l->current_tok; +} + +static struct token lex_redirect(struct lexer *l) +{ + l->current_tok.type = TOKEN_REDIR; + struct string *val = (l->current_tok.value = string_create(NULL)); + string_pushc(val, l->input->data[l->pos]); + if ((l->input->data[l->pos + 1] == '>' || l->input->data[l->pos + 1] == '&') + || (l->input->data[l->pos] == '>' && l->input->data[l->pos + 1] == '|')) + string_pushc(val, l->input->data[l->pos + 1]); + return l->current_tok; +} + +static void lex_special(struct lexer *l) +{ + // If the first char is a digit and we recognized a number followed by a + // redir + if (isdigit(l->input->data[l->pos]) && create_ionumber(l)) + // return immediately + return; + lex_word(l); +} + +struct token lexer_next_token(struct lexer *lexer) +{ + if (lexer->pos >= lexer->input->length) + { + lexer->current_tok.type = TOKEN_EOF; + lexer->current_tok.value = NULL; + return lexer->current_tok; + } + char *in = lexer->input->data; + + for (; in[lexer->pos] && (in[lexer->pos] == ' ' || NEWLINEESCAPE); + lexer->pos++) + { + if (in[lexer->pos] == '\\' && in[lexer->pos + 1] == '\n') + lexer->pos++; + continue; + } + + switch (in[lexer->pos]) + { + case ';': + return set_ttype(lexer, TOKEN_SEMICOLON); + case '\n': + return set_ttype(lexer, TOKEN_NEWLINE); + case '\0': + return set_ttype(lexer, TOKEN_EOF); + case '(': + return set_ttype(lexer, TOKEN_PAR_LEFT); + case ')': + return set_ttype(lexer, TOKEN_PAR_RIGHT); + case '{': + return set_ttype(lexer, TOKEN_CURLY_LEFT); + case '}': + return set_ttype(lexer, TOKEN_CURLY_RIGHT); + case '|': + /* FALLTHROUGH */ + case '&': + return lex_and_or(lexer); + case '<': + /* FALLTHROUGH */ + case '>': + return lex_redirect(lexer); + case '#': + yeet_comment(lexer); + return lexer_next_token(lexer); + default: + lex_special(lexer); + return lexer->current_tok; + } +} + +static void move_pos(struct lexer *lexer) +{ + enum token_type t = lexer->current_tok.type; + if (t == TOKEN_EOF) + { + return; + } + if (t == TOKEN_IF || t == TOKEN_FI || t == TOKEN_IN || t == TOKEN_DO + || t == TOKEN_OR || t == TOKEN_AND) + lexer->pos += 2; + else if (t == TOKEN_FOR) + lexer->pos += 3; + else if (t == TOKEN_ELSE || t == TOKEN_ELIF || t == TOKEN_THEN + || t == TOKEN_DONE) + lexer->pos += 4; + else if (t == TOKEN_WHILE || t == TOKEN_UNTIL) + lexer->pos += 5; + else if (t == TOKEN_WORD || t == TOKEN_IONUMBER || t == TOKEN_REDIR + || t == TOKEN_ASS_WORD) + lexer->pos += lexer->current_tok.value->length; + else + lexer->pos++; +} + +struct token lexer_peek(struct lexer *lexer) +{ + if (lexer->processed) + return lexer->current_tok; + lexer->processed = 1; + struct token res = lexer_next_token(lexer); + + move_pos(lexer); + + return res; +} + +struct token lexer_pop(struct lexer *lexer) +{ + struct token res; + + if (lexer->processed) + { + res = lexer->current_tok; + lexer->processed = 0; + return res; + } + + res = lexer_next_token(lexer); + + move_pos(lexer); + + lexer->processed = 0; + + return res; +} diff --git a/42sh/src/lexer/lexer.h b/42sh/src/lexer/lexer.h new file mode 100644 index 0000000..8d8cdf5 --- /dev/null +++ b/42sh/src/lexer/lexer.h @@ -0,0 +1,48 @@ +#ifndef LEXER_H +#define LEXER_H + +#include <lexer/token.h> +#include <stddef.h> +#include <utils/libstring.h> + +// True if C could be used as a word +#define ISWORD(C) \ + C == TOKEN_WORD || C == TOKEN_THEN || C == TOKEN_ELIF || C == TOKEN_ELSE \ + || C == TOKEN_IF || C == TOKEN_WHILE || C == TOKEN_UNTIL \ + || C == TOKEN_DO || C == TOKEN_DONE || C == TOKEN_FOR || C == TOKEN_IN \ + || C == TOKEN_NEG || C == TOKEN_FI || C == TOKEN_CURLY_LEFT \ + || C == TOKEN_CURLY_RIGHT + +struct lexer +{ + struct string *input; // input data + size_t pos; // the current offset inside the input data + char processed; + struct token current_tok; // next (if processed) token +}; + +// Creates a new lexer given an input string +struct lexer *lexer_new(struct string *input); + +// Frees the given lexer, not its input +void lexer_free(struct lexer *lexer); + +// Returns a token from the input string +// If the token is a WORD, copies the word to the current_tok.value field +struct token lexer_next_token(struct lexer *lexer); + +/* +** Processes the next token if necessary +** (previous call to lexer_pop or first call) +*/ +// Returns the next token +struct token lexer_peek(struct lexer *lexer); + +/* +** Processes the next token if necessary +** (previous call to lexer_pop or first call) +*/ +// Returns the next token and moves the cursor forward +struct token lexer_pop(struct lexer *lexer); + +#endif /* ! LEXER_H */ diff --git a/42sh/src/lexer/token.h b/42sh/src/lexer/token.h new file mode 100644 index 0000000..89d772a --- /dev/null +++ b/42sh/src/lexer/token.h @@ -0,0 +1,54 @@ +#ifndef TOKEN_H +#define TOKEN_H + +#include <utils/libstring.h> + +enum token_type +{ + // STEP 1 + TOKEN_NEWLINE, + TOKEN_EOF, + TOKEN_ERROR, + TOKEN_WORD, + TOKEN_IF, + TOKEN_THEN, + TOKEN_ELIF, + TOKEN_ELSE, + TOKEN_SEMICOLON, + TOKEN_FI, + TOKEN_HASHTAG, + + // STEP 2 + TOKEN_REDIR, + TOKEN_PIPE, + TOKEN_NEG, + TOKEN_WHILE, + TOKEN_UNTIL, + TOKEN_DO, + TOKEN_FOR, + TOKEN_DONE, + TOKEN_AND, + TOKEN_OR, + TOKEN_ESCAPE, + TOKEN_ASS_WORD, + TOKEN_DOUBLEQUOTE, + TOKEN_DOLLAR, + TOKEN_IN, + TOKEN_IONUMBER, + + // STEP 3 + TOKEN_PAR_RIGHT, + TOKEN_PAR_LEFT, + TOKEN_CURLY_RIGHT, + TOKEN_CURLY_LEFT, + + // STEP 4 +}; + +struct token +{ + enum token_type type; + struct string *value; +}; + +#endif /* ! TOKEN_H */ diff --git a/42sh/src/lexer/utils.h b/42sh/src/lexer/utils.h new file mode 100644 index 0000000..3edd83f --- /dev/null +++ b/42sh/src/lexer/utils.h @@ -0,0 +1,23 @@ +#ifndef UTILS_H +#define UTILS_H + +#include <lexer/token.h> + +struct reserved_word +{ + const char *word; + enum token_type type; +}; + +static struct reserved_word reserved_words[] = { + { "if", TOKEN_IF }, { "then", TOKEN_THEN }, + { "elif", TOKEN_ELIF }, { "else", TOKEN_ELSE }, + { "fi", TOKEN_FI }, { "while", TOKEN_WHILE }, + { "until", TOKEN_UNTIL }, { "do", TOKEN_DO }, + { "done", TOKEN_DONE }, { "for", TOKEN_FOR }, + { "in", TOKEN_IN }, { "!", TOKEN_NEG }, + { "}", TOKEN_CURLY_RIGHT }, { "{", TOKEN_CURLY_LEFT }, + { NULL, TOKEN_ERROR } +}; + +#endif /* ! UTILS_H */ diff --git a/42sh/src/parser/Makefile.am b/42sh/src/parser/Makefile.am new file mode 100644 index 0000000..f6c38fb --- /dev/null +++ b/42sh/src/parser/Makefile.am @@ -0,0 +1,21 @@ +lib_LIBRARIES = libparser.a + +libparser_a_SOURCES = \ + parser.h \ + parser.c \ + parser_functions.h \ + parser_commands.c \ + parser_conditionnals.c \ + parser_lists.c \ + parser_operators.c \ + parser_redir.c \ + parser_utils.h \ + parser_utils.c \ + parser_loops.c \ + parser_fundec.c + +libparser_a_CPPFLAGS = -I$(top_srcdir)/src + +libparser_a_CFLAGS = -std=c99 -Werror -Wall -Wextra -Wvla -pedantic + +noinst_LIBRARIES = libparser.a diff --git a/42sh/src/parser/parser.c b/42sh/src/parser/parser.c new file mode 100644 index 0000000..7c6a1b2 --- /dev/null +++ b/42sh/src/parser/parser.c @@ -0,0 +1,40 @@ +#include "parser/parser.h" + +#include <stdlib.h> + +#include "ast/ast_accessors.h" +#include "lexer/token.h" +#include "parser/parser_functions.h" +#include "parser/parser_utils.h" + +struct ast *parse(struct lexer *lexer, enum parser_state *state) +{ + struct token next = lexer_peek(lexer); + + if (next.type == TOKEN_EOF || next.type == TOKEN_NEWLINE) + { + lexer_pop(lexer); + return NULL; + } + + struct ast *res = parse_list(lexer, state); + + next = lexer_peek(lexer); + + if (error_check(res, state, next)) + { + return NULL; + } + + lexer_pop(lexer); + + if (next.type == TOKEN_EOF || next.type == TOKEN_NEWLINE) + { + return res; + } + else + { + cleanup(res, state); + return NULL; + } +} diff --git a/42sh/src/parser/parser.h b/42sh/src/parser/parser.h new file mode 100644 index 0000000..c029a63 --- /dev/null +++ b/42sh/src/parser/parser.h @@ -0,0 +1,17 @@ +#ifndef PARSER_H +#define PARSER_H + +#include "ast/ast.h" +#include "lexer/lexer.h" +#include "parser/parser_utils.h" + +/** + * @brief The main function to be called in the main loop. It will parse the + * current expression given by the lexer. Return NULL and set the state variable + * to `ERROR` if an error occured, else return the AST for this expression. + * @param lexer The lexer used to create the AST. + * @param state A pointger used to store the current state of the pointer. + */ +struct ast *parse(struct lexer *lexer, enum parser_state *state); + +#endif /* ! PARSER_H */ diff --git a/42sh/src/parser/parser_commands.c b/42sh/src/parser/parser_commands.c new file mode 100644 index 0000000..3321773 --- /dev/null +++ b/42sh/src/parser/parser_commands.c @@ -0,0 +1,393 @@ +#include <err.h> + +#include "parser_functions.h" +#include "parser_utils.h" + +static struct ast *cons_redirs(struct lexer *lexer, enum parser_state *state, + struct ast *root) +{ + struct token next = lexer_peek(lexer); + + while (TST_REDIR) + { + // The redir constructions are not consistents between the two + // grammars that can create redirs + // This is normal as theses redirs don't serve the same purposes + // This redir here is meant to redir a chuncky command + // While the one on the prefix grammar is meant for redirs mixed + // with assignments of variables + struct ast *redir = parse_redirection(lexer, state); + next = lexer_peek(lexer); + if (error_check(root, state, next)) + { + return NULL; + } + set_left(redir, root); + root = redir; + } + + return root; +} + +struct ast *parse_command(struct lexer *lexer, enum parser_state *state) +{ + struct token next = lexer_peek(lexer); + + if (error_check(NULL, state, next)) + { + return NULL; + } + + // Case where we are in a loop + if (next.type == TOKEN_IF || next.type == TOKEN_WHILE + || next.type == TOKEN_UNTIL || next.type == TOKEN_FOR + || next.type == TOKEN_CURLY_LEFT || next.type == TOKEN_PAR_LEFT) + { + struct ast *loops = parse_shell_command(lexer, state); + next = lexer_peek(lexer); + + if (error_check(loops, state, next)) + { + return NULL; + } + + return cons_redirs(lexer, state, loops); + } + // Case where we're sure we have a prefix + else if (next.type == TOKEN_ASS_WORD || TST_REDIR) + { + return parse_simple_command(lexer, state); + } + // Ambiguous case where a WORD can be used in simple_command or in fundec + // We know that parse_simple will crash if something after the WORD is + // a parenthesis. So we just save the word in case and call simple command + else + { + next = lexer_peek(lexer); + if (next.type != TOKEN_WORD) + { + QUICK_CLEANUP + } + + struct string *cpy = + string_create((next.value) ? next.value->data : NULL); + if (cpy == NULL) + { + QUICK_CLEANUP + } + struct ast *temp = parse_simple_command(lexer, state); + + next = lexer_peek(lexer); + + // I have the feeling that this can be problematic in the case where + // we have a function definiton and a subshell + if (next.type == TOKEN_PAR_LEFT) + { + ast_free(temp); + temp = NULL; + } + + if (temp != NULL) + { + string_free(cpy); + return temp; + } + else + { + // Just like it never happened... + *state = OK; + + if (next.type == TOKEN_WORD) + { + string_free(cpy); + temp = parse_funcdec(lexer, state, NULL); + } + else + { + temp = parse_funcdec(lexer, state, cpy); + } + + next = lexer_peek(lexer); + if (error_check(temp, state, next)) + { + return NULL; + } + + return cons_redirs(lexer, state, temp); + } + } +} + +struct ast *parse_shell_command(struct lexer *lexer, enum parser_state *state) +{ + struct token next = lexer_peek(lexer); + switch (next.type) + { + case TOKEN_IF: + return parse_rule_if(lexer, state); + case TOKEN_WHILE: + return parse_rule_while(lexer, state); + case TOKEN_UNTIL: + return parse_rule_until(lexer, state); + case TOKEN_FOR: + return parse_rule_for(lexer, state); + case TOKEN_CURLY_LEFT: + lexer_pop(lexer); + struct ast *lst = parse_compound_list(lexer, state); + next = lexer_peek(lexer); + + if (error_check(lst, state, next)) + { + return NULL; + } + + if (next.type != TOKEN_CURLY_RIGHT) + { + cleanup(lst, state); + return NULL; + } + + lexer_pop(lexer); + return lst; + case TOKEN_PAR_LEFT: + lexer_pop(lexer); + struct ast *subshell = ast_create(AST_SUBSHELL); + + if (subshell == NULL) + { + QUICK_CLEANUP + } + + struct ast *body = parse_compound_list(lexer, state); + next = lexer_peek(lexer); + set_left(subshell, body); + + if (error_check(subshell, state, next)) + { + return NULL; + } + + if (next.type != TOKEN_PAR_RIGHT) + { + cleanup(subshell, state); + return NULL; + } + + lexer_pop(lexer); + return subshell; + default: + // This case should not happen but just in case + for pedantic warnings + cleanup(NULL, state); + return NULL; + } +} + +struct ast *parse_simple_command(struct lexer *lexer, enum parser_state *state) +{ + struct token next = lexer_peek(lexer); + struct ast *root = NULL; + struct ast *up = NULL; + + // Should be enough to guard against the case where we have no prefix/WORDs + if (next.type != TOKEN_ASS_WORD && next.type != TOKEN_IONUMBER + && next.type != TOKEN_REDIR && next.type != TOKEN_WORD) + { + // Honnestly, it's quite useful and also puts me below 40 lines + QUICK_CLEANUP + } + struct ast *lst = ast_create(AST_LIST); + + if (lst == NULL) + { + QUICK_CLEANUP + } + + size_t i = 0; + + while (next.type == TOKEN_ASS_WORD || TST_REDIR) + { + if (next.type == TOKEN_ASS_WORD) + { + struct ast *var = parse_prefix(lexer, state); + next = lexer_peek(lexer); + if (error_check(var, state, next)) + { + return NULL; + } + + set_i(lst, var, i); + i++; + } + else + { + struct ast *redir = parse_prefix(lexer, state); + next = lexer_peek(lexer); + if (error_check(redir, state, next)) + { + return NULL; + } + + if (root == NULL || up == NULL) + { + root = redir; + up = redir; + set_left(up, lst); + } + else + { + set_left(up, redir); + set_left(redir, lst); + up = redir; + } + } + + next = lexer_peek(lexer); + } + + struct ast *son = parse_element(lexer, state, lst, up); + if (root == NULL) + { + root = son; + } + next = lexer_peek(lexer); + + if (error_check(root, state, next)) + { + // You never know if it was only variable assignment + // If so, this would never be attached to root + // cleanup(lst, state); // this one may not be necessary ~ + return NULL; + } + + return (root == NULL) ? lst : root; +} + +// This function takes care of the reallocation and all the checks that +// comes with calling realloc. (Mainly freeing stuff and exiting) +static struct string **_realloc_ast_command(struct ast *ast, size_t *capacity) +{ + struct ast_command *ast_c = ((struct ast_command *)ast); + size_t i = 0; + while (i < *capacity && ast_c->args[i]) + { + i++; + } + struct string **p = + realloc(ast_c->args, (i + CMDSIZE) * sizeof(struct string *)); + if (!p) + { + errx(EXIT_FAILURE, "Realloc failed."); + } + ast_c->args = p; + *capacity = i + CMDSIZE; + memset(ast_c->args + i, 0, (CMDSIZE) * sizeof(struct string *)); + return p; +} + +// Does not EXACTLY follows the grammar but it's close enough +// Here is the pattern it produces: +// element = { (WORD | redirection) }; +// Plus, it directly changes the tree instead of returning an ast to be +// attached to the parent node +struct ast *parse_element(struct lexer *lexer, enum parser_state *state, + struct ast *lst, struct ast *up) +{ + struct ast *root = up; + struct token next = lexer_peek(lexer); + struct ast *base = ast_create(AST_COMMAND); + int child_filled = 0; + if (base == NULL) + { + QUICK_CLEANUP; + } + + struct string **space = calloc(11, sizeof(struct string *)); + if (space == NULL) + { + cleanup(base, state); + return NULL; + } + + size_t capacity = CMDSIZE; + size_t i = 0; + + ((struct ast_command *)base)->args = space; + + while (ISWORD(next.type) || TST_REDIR) + { + if (!(ISWORD(next.type))) + { + struct ast *redir = parse_redirection(lexer, state); + // Because state is at ERROR state, I'm sure the 2 will be executed + if (error_check(root, state, next) && error_check(lst, state, next)) + { + return NULL; + } + if (root == NULL) + { + root = redir; + up = redir; + } + set_left(up, redir); + set_left(redir, lst); + up = redir; + } + else + { + if (!child_filled) + { + child_filled = 1; + } + if (i == capacity) + { + space = _realloc_ast_command(base, &capacity); + } + + litteral_reserved_word(&next); + space[i] = next.value; + i++; + lexer_pop(lexer); + } + next = lexer_peek(lexer); + } + + // In case we created the child for nothing and he wasn't filled + if (!child_filled) + { + ast_free(base); + } + else + { + size_t lst_end = list_length(lst); + set_i(lst, base, lst_end); + } + + // No need to reassign the space variable to the ast here because the worst + // that can happens is if it has been moved becasue of realloc() and this + // case has been dealt with + + return (root == NULL) ? lst : root; +} + +struct ast *parse_prefix(struct lexer *lexer, enum parser_state *state) +{ + struct token next = lexer_peek(lexer); + if (next.type == TOKEN_ASS_WORD) + { + struct ast *assign = assign_setup(next); + lexer_pop(lexer); + if (assign == NULL) + { + QUICK_CLEANUP + } + return assign; + } + else if (TST_REDIR) + { + return parse_redirection(lexer, state); + } + else + { + QUICK_CLEANUP + } +} diff --git a/42sh/src/parser/parser_conditionnals.c b/42sh/src/parser/parser_conditionnals.c new file mode 100644 index 0000000..ef1f67a --- /dev/null +++ b/42sh/src/parser/parser_conditionnals.c @@ -0,0 +1,152 @@ +#include "parser/parser_functions.h" +#include "parser/parser_utils.h" + +struct ast *parse_rule_if(struct lexer *lexer, enum parser_state *state) +{ + struct token next = lexer_peek(lexer); + // Not mandatory because we checked before going in here + if (next.type != TOKEN_IF) + { + QUICK_CLEANUP + } + + struct ast *root = ast_create(AST_IF); + + if (root == NULL) + { + *state = ERROR; + return NULL; + } + + lexer_pop(lexer); + + struct ast *cond = parse_compound_list(lexer, state); + + next = lexer_peek(lexer); + if (error_check(root, state, next)) + { + return NULL; + } + + set_left(root, cond); + + if (next.type != TOKEN_THEN) + { + cleanup(root, state); + return NULL; + } + + lexer_pop(lexer); + + cond = parse_compound_list(lexer, state); + next = lexer_peek(lexer); + if (error_check(root, state, next)) + { + return NULL; + } + + if (cond == NULL) + { + cleanup(root, state); + return NULL; + } + + set_right(root, cond); + + if (next.type != TOKEN_FI) + { + cond = parse_else_clause(lexer, state); + if (cond == NULL) + { + cleanup(root, state); + return NULL; + } + set_i(root, cond, 2); + } + + lexer_pop(lexer); + + return root; +} + +struct ast *parse_else_clause(struct lexer *lexer, enum parser_state *state) +{ + struct token next = lexer_peek(lexer); + struct ast *root = NULL; + + if (next.type == TOKEN_ELSE) + { + lexer_pop(lexer); + + struct ast *cond = parse_compound_list(lexer, state); + + if (cond == NULL) + { + cleanup(root, state); + return NULL; + } + + if (error_check(cond, state, next)) + { + return NULL; + } + + return cond; + } + else if (next.type != TOKEN_ELIF) + { + cleanup(NULL, state); + return NULL; + } + + lexer_pop(lexer); + + // Doesn' it seems stupid to just manually re-creating the IF again? + // Can't I just call parse_if there ? But we won't be following the grammar + // This is a late-game optimization that might not be worth it + root = ast_create(AST_IF); + if (root == NULL) + { + cleanup(root, state); + return NULL; + } + + struct ast *cond = parse_compound_list(lexer, state); + set_left(root, cond); + + if (error_check(root, state, next)) + { + return NULL; + } + + next = lexer_peek(lexer); + + if (next.type != TOKEN_THEN) + { + cleanup(root, state); + return NULL; + } + + lexer_pop(lexer); + + set_right(root, parse_compound_list(lexer, state)); + + if (error_check(root, state, next)) + { + return NULL; + } + + next = lexer_peek(lexer); + + if (next.type == TOKEN_ELSE || next.type == TOKEN_ELIF) + { + cond = parse_else_clause(lexer, state); + if (error_check(root, state, next)) + { + return NULL; + } + set_i(root, cond, 2); + } + + return root; +} diff --git a/42sh/src/parser/parser_functions.h b/42sh/src/parser/parser_functions.h new file mode 100644 index 0000000..bc87cf6 --- /dev/null +++ b/42sh/src/parser/parser_functions.h @@ -0,0 +1,198 @@ +#ifndef PARSER_FUNCTIONS_H +#define PARSER_FUNCTIONS_H + +#include "ast/ast.h" +#include "lexer/lexer.h" +#include "parser/parser_utils.h" + +#define QUICK_CLEANUP \ + cleanup(NULL, state); \ + return NULL; + +#define TST_REDIR next.type == TOKEN_IONUMBER || next.type == TOKEN_REDIR + +/** + * @brief This function parses the list grammar + * @param lexer The lexer used to parse the current expression. + * @param state Current state of the parser. Set to `ERROR` when needed. + * @note Here is the Extended Backus-Naur form: + * + * `list = and_or { ';' and_or } [ ';' ] ;` + */ +struct ast *parse_list(struct lexer *lexer, enum parser_state *state); + +/** + * @brief This function parses the and_or grammar + * @param lexer The lexer used to parse the current expression. + * @param state Current state of the parser. Set to `ERROR` when needed. + * @note Here is the Extended Backus-Naur form: + * + * `and_or = pipeline { ( '&&' | '||' ) {'\\n'} pipeline } ;` + */ +struct ast *parse_and_or(struct lexer *lexer, enum parser_state *state); + +/** + * @brief This function parses the pipeline grammar + * @param lexer The lexer used to parse the current expression. + * @param state Current state of the parser. Set to `ERROR` when needed. + * @note Here is the Extended Backus-Naur form: + * + * `pipeline = ['!'] command { '|' {'\\n'} command } ;` + */ +struct ast *parse_pipeline(struct lexer *lexer, enum parser_state *state); + +/** + * @brief This function parses the command grammar + * @param lexer The lexer used to parse the current expression. + * @param state Current state of the parser. Set to `ERROR` when needed. + * @note Here is the Extended Backus-Naur form: + * + * `command = simple_command + * | shell command { redirection } + * | fundec { redirection } + * ;` + */ +struct ast *parse_command(struct lexer *lexer, enum parser_state *state); + +/** + * @brief This function parses the simple_command grammar + * @param lexer The lexer used to parse the current expression. + * @param state Current state of the parser. Set to `ERROR` when needed. + * @note Here is the Extended Backus-Naur form: + * + * `simple_command = prefix { prefix } + * | { prefix } WORD { element } + * ;` + */ +struct ast *parse_simple_command(struct lexer *lexer, enum parser_state *state); + +/** + * @brief This function parses the element grammar + * @param lexer The lexer used to parse the current expression. + * @param state Current state of the parser. Set to `ERROR` when needed. + * @note Here is the Extended Backus-Naur form (different from subject): + * + * `element = { (WORD | redirection) };` + */ +struct ast *parse_element(struct lexer *lexer, enum parser_state *state, + struct ast *lst, struct ast *up); + +/** + * @brief This function parses the shell_command grammar + * @param lexer The lexer used to parse the current expression. + * @param state Current state of the parser. Set to `ERROR` when needed. + * @note Here is the Extended Backus-Naur form: + * + * `shell_command = rule_if + * | rule_while + * | rule_until + * | rule_for + * | '{' compound_list '}' + * | '(' compound_list ')' + * ;` + */ +struct ast *parse_shell_command(struct lexer *lexer, enum parser_state *state); + +/** + * @brief This function parses the rule_if grammar + * @param lexer The lexer used to parse the current expression. + * @param state Current state of the parser. Set to `ERROR` when needed. + * @note Here is the Extended Backus-Naur form: + * + * `rule_if = 'if' compound_list 'then' compound_list [else_clause] 'fi' ;` + */ +struct ast *parse_rule_if(struct lexer *lexer, enum parser_state *state); + +/** + * @brief This function parses the else_clause grammar + * @param lexer The lexer used to parse the current expression. + * @param state Current state of the parser. Set to `ERROR` when needed. + * @note Here is the Extended Backus-Naur form: + * + * `else_clause = 'else' compound_list + * | 'elif' compound_list 'then' compound_list [else_clause]` + */ +struct ast *parse_else_clause(struct lexer *lexer, enum parser_state *state); + +/** + * @brief This function parses the rule_while grammar + * @param lexer The lexer used to parse the current expression. + * @param state Current state of the parser. Set to `ERROR` when needed. + * @note Here is the Extended Backus-Naur form: + * + * `rule_while = 'while' compound_list 'do' compound_list 'done' ;` + */ +struct ast *parse_rule_while(struct lexer *lexer, enum parser_state *state); + +/** + * @brief This function parses the rule_until grammar + * @param lexer The lexer used to parse the current expression. + * @param state Current state of the parser. Set to `ERROR` when needed. + * @note Here is the Extended Backus-Naur form: + * + * `rule_until = 'until' compound_list 'do' compound_list 'done' ;` + */ +struct ast *parse_rule_until(struct lexer *lexer, enum parser_state *state); + +/** + * @brief This function parses the rule_for grammar + * @param lexer The lexer used to parse the current expression. + * @param state Current state of the parser. Set to `ERROR` when needed. + * @note Here is the Extended Backus-Naur form: + * + * `rule_for = 'for' WORD ( [';'] | [ {'\\n'} 'in' { WORD } ( ';' | '\\n' ) ] ) + * {'\\n'} 'do' compound_list 'done' + * ;` + */ +struct ast *parse_rule_for(struct lexer *lexer, enum parser_state *state); + +/** + * @brief This function parses the prefix grammar + * @param lexer The lexer used to parse the current expression. + * @param state Current state of the parser. Set to `ERROR` when needed. + * @note Here is the Extended Backus-Naur form: + * + * `prefix = ASSIGNMENT_WORD + * | redirection + * ;` + */ +struct ast *parse_prefix(struct lexer *lexer, enum parser_state *state); + +/** + * @brief This function parses the redirection grammar + * @param lexer The lexer used to parse the current expression. + * @param state Current state of the parser. Set to `ERROR` when needed. + * @note Here is the Extended Backus-Naur form: + * + * `redirection = [IONUMBER] ( '>' | '<' | '>>' | '>&' | '<&' | '>|' | '<>' ) + * WORD + * ;` + */ +struct ast *parse_redirection(struct lexer *lexer, enum parser_state *state); + +/** + * @brief This function parses the compound_list grammar + * @param lexer The lexer used to parse the current expression. + * @param state Current state of the parser. Set to `ERROR` when needed. + * @note Here is the Extended Backus-Naur form: + * + * `compound_list = { '\\n' } and_or { ( ';' | '\\n' ) { '\\n' } and_or } [ ';' + * ] { '\\n' } ;` + */ +struct ast *parse_compound_list(struct lexer *lexer, enum parser_state *state); + +/** + * @brief This function parses the funcdec grammar + * @param lexer The lexer used to parse the current expression. + * @param state Current state of the parser. Set to `ERROR` when needed. + * @param txt The string to use as the name of the function. NULL means it must + * be fetched from the lexer. + * @note This change in declaration is to accomodate for the way we manage the + * conflict between the simple_command and the fundec rules (both start w/ WORD) + * @note Here is the Extended Backus-Naur form: + * + * funcdec = WORD '(' ')' {'\n'} shell_command ; + */ +struct ast *parse_funcdec(struct lexer *lexer, enum parser_state *state, + struct string *txt); +#endif /* ! PARSER_FUNCTIONS_H */ diff --git a/42sh/src/parser/parser_fundec.c b/42sh/src/parser/parser_fundec.c new file mode 100644 index 0000000..08d4816 --- /dev/null +++ b/42sh/src/parser/parser_fundec.c @@ -0,0 +1,63 @@ +#include "parser/parser_functions.h" +#include "parser/parser_utils.h" + +struct ast *parse_funcdec(struct lexer *lexer, enum parser_state *state, + struct string *txt) +{ + struct ast *function = ast_create(AST_FUNCTION); + if (function == NULL) + { + QUICK_CLEANUP + } + + struct token next = lexer_peek(lexer); + + if (txt == NULL && next.type != TOKEN_WORD) + { + cleanup(function, state); + return NULL; + } + else if (txt == NULL) + { + ((struct ast_function *)function)->name = next.value; + lexer_pop(lexer); + next = lexer_peek(lexer); + } + else + { + // If we used the word in the txt variable, no need to peek + ((struct ast_function *)function)->name = txt; + } + + if (next.type != TOKEN_PAR_LEFT) + { + cleanup(function, state); + return NULL; + } + + lexer_pop(lexer); + next = lexer_peek(lexer); + + if (next.type != TOKEN_PAR_RIGHT) + { + cleanup(function, state); + return NULL; + } + + lexer_pop(lexer); + next = lexer_peek(lexer); + + clean_cons_tokens(TOKEN_NEWLINE, lexer); + + struct ast *body = parse_shell_command(lexer, state); + next = lexer_peek(lexer); + + if (error_check(function, state, next)) + { + return NULL; + } + + set_left(function, body); + + return function; +} diff --git a/42sh/src/parser/parser_lists.c b/42sh/src/parser/parser_lists.c new file mode 100644 index 0000000..4b5e204 --- /dev/null +++ b/42sh/src/parser/parser_lists.c @@ -0,0 +1,128 @@ +#include <err.h> +#include <stdio.h> + +#include "parser/parser_functions.h" +#include "parser/parser_utils.h" + +struct ast *parse_list(struct lexer *lexer, enum parser_state *state) +{ + struct ast *node = parse_and_or(lexer, state); + struct ast *ast_list = NULL; + + // Who forgot to check that node can be NULL, I sure did + if (node == NULL) + { + QUICK_CLEANUP + } + + struct token next = lexer_peek(lexer); + + size_t i = 0; + + if (next.type == TOKEN_SEMICOLON) + { + ast_list = ast_create(AST_LIST); + + if (ast_list == NULL) + { + cleanup(node, state); + return NULL; + } + + set_left(ast_list, node); + i++; + } + + // If we enter this loop, this means we have a list of more than 1 element + while (next.type == TOKEN_SEMICOLON) + { + lexer_pop(lexer); + next = lexer_peek(lexer); + + // Case where we have only one ';', meaning it is the end of the list + if (next.type == TOKEN_EOF || next.type == TOKEN_NEWLINE) + { + // set_right(ast_list, NULL); + return ast_list; + } + + struct ast *right = parse_and_or(lexer, state); + set_i(ast_list, right, i); + i++; + + // Was checking if right was NULL, but in that case state is ERROR + // and error_check will catch it + + next = lexer_peek(lexer); + if (error_check(ast_list, state, next)) + { + return NULL; + } + } + + return (ast_list == NULL) ? node : ast_list; +} + +struct ast *parse_compound_list(struct lexer *lexer, enum parser_state *state) +{ + clean_cons_tokens(TOKEN_NEWLINE, lexer); + + struct ast *node = parse_and_or(lexer, state); + struct ast *ast_lst = NULL; + + struct token next = lexer_peek(lexer); + + if (error_check(node, state, next)) + { + return NULL; + } + + size_t i = 0; + + while (next.type == TOKEN_SEMICOLON || next.type == TOKEN_NEWLINE) + { + lexer_pop(lexer); + + clean_cons_tokens(TOKEN_NEWLINE, lexer); + next = lexer_peek(lexer); + + // This condition might need to be expanded in the future + // But I don't know if there is a better way to do this + if (next.type == TOKEN_THEN || next.type == TOKEN_FI + || next.type == TOKEN_ELSE || next.type == TOKEN_ELIF + || next.type == TOKEN_DO || next.type == TOKEN_DONE + || next.type == TOKEN_CURLY_RIGHT || next.type == TOKEN_PAR_LEFT) + { + return (ast_lst == NULL) ? node : ast_lst; + } + + if (ast_lst == NULL) + { + ast_lst = ast_create(AST_LIST); + if (ast_lst == NULL) + { + cleanup(node, state); + return NULL; + } + + set_left(ast_lst, node); + i++; + } + + next = lexer_peek(lexer); + + struct ast *right = parse_and_or(lexer, state); + + // Can put the i++ in the set_i function but I'm too scared + set_i(ast_lst, right, i); + i++; + + next = lexer_peek(lexer); + if (error_check(ast_lst, state, next)) + { + return NULL; + } + } + + return (ast_lst == NULL) ? node : ast_lst; +} diff --git a/42sh/src/parser/parser_loops.c b/42sh/src/parser/parser_loops.c new file mode 100644 index 0000000..bf1033a --- /dev/null +++ b/42sh/src/parser/parser_loops.c @@ -0,0 +1,265 @@ +#include <err.h> + +#include "parser_functions.h" +#include "parser_utils.h" + +struct ast *parse_rule_while(struct lexer *lexer, enum parser_state *state) +{ + struct token next = lexer_peek(lexer); + + // Because we use this function for both while and until loops, this needs + // to be accounted for. The construction is exactly the same otherwise. + // The only difference is that the condition is NOT'ed in parse_rule_until + if (next.type != TOKEN_WHILE && next.type != TOKEN_UNTIL) + { + QUICK_CLEANUP + } + + lexer_pop(lexer); + next = lexer_peek(lexer); + + struct ast *cond = parse_compound_list(lexer, state); + next = lexer_peek(lexer); + + if (error_check(NULL, state, next)) + { + return NULL; + } + + if (next.type != TOKEN_DO) + { + cleanup(cond, state); + return NULL; + } + + lexer_pop(lexer); + + struct ast *body = parse_compound_list(lexer, state); + next = lexer_peek(lexer); + + if (error_check(cond, state, next)) + { + return NULL; + } + + if (next.type != TOKEN_DONE) + { + cleanup(cond, state); + cleanup(body, state); + return NULL; + } + + lexer_pop(lexer); + + struct ast *while_loop = ast_create(AST_WHILE); + + if (while_loop == NULL) + { + cleanup(cond, state); + cleanup(body, state); + return NULL; + } + + set_left(while_loop, cond); + set_right(while_loop, body); + + return while_loop; +} + +// Warning, does not respect the grammar for parse_rule_until. +// This is because we reuse the parse_rule_while method (the ast representation +// being exatly the same, minus the condition being negated) +struct ast *parse_rule_until(struct lexer *lexer, enum parser_state *state) +{ + struct token next = lexer_peek(lexer); + + if (next.type != TOKEN_UNTIL) + { + QUICK_CLEANUP + } + + struct ast *while_root = parse_rule_while(lexer, state); + next = lexer_peek(lexer); + + if (error_check(while_root, state, next)) + { + return NULL; + } + + struct ast *inverted = ast_create(AST_LOGICAL); + if (inverted == NULL) + { + cleanup(while_root, state); + return NULL; + } + + ((struct ast_logical *)inverted)->type = TOKEN_NEG; + + set_left(inverted, ((struct ast_while *)while_root)->cond); + ((struct ast_while *)while_root)->cond = inverted; + + return while_root; +} + +static struct string **_realloc_ast_command(struct ast *ast, size_t *capacity) +{ + struct ast_command *ast_c = ((struct ast_command *)ast); + size_t i = 0; + while (ast_c->args[i]) + { + i++; + } + struct string **p = + realloc(ast_c->args, (i + CMDSIZE) * sizeof(struct string *)); + if (!p) + { + errx(EXIT_FAILURE, "Realloc failed."); + } + ast_c->args = p; + *capacity = i + CMDSIZE; + memset(ast_c->args + i, 0, (CMDSIZE) * sizeof(struct string *)); + return p; +} + +static void fill_cmd(struct ast *ast, struct lexer *lexer) +{ + struct token next = lexer_peek(lexer); + size_t i = 0; + size_t capacity = CMDSIZE; + + struct string **arr = calloc(CMDSIZE, sizeof(struct string *)); + + ((struct ast_command *)ast)->args = arr; + while (ISWORD(next.type)) + { + if (i == capacity) + { + arr = _realloc_ast_command(ast, &capacity); + ((struct ast_command *)ast)->args = arr; + } + + litteral_reserved_word(&next); + arr[i] = next.value; + i++; + lexer_pop(lexer); + next = lexer_peek(lexer); + } +} + +static struct ast *fill_for_body(struct ast *root, struct lexer *lexer, + enum parser_state *state) +{ + clean_cons_tokens(TOKEN_NEWLINE, lexer); + + struct token next = lexer_peek(lexer); + + if (next.type != TOKEN_DO) + { + cleanup(root, state); + return NULL; + } + + lexer_pop(lexer); + next = lexer_peek(lexer); + + struct ast *cmp_lst = parse_compound_list(lexer, state); + + next = lexer_peek(lexer); + if (error_check(root, state, next)) + { + return NULL; + } + + set_right(root, cmp_lst); + + if (next.type != TOKEN_DONE) + { + cleanup(root, state); + return NULL; + } + + lexer_pop(lexer); + + return root; +} + +struct ast *parse_rule_for(struct lexer *lexer, enum parser_state *state) +{ + struct token next = lexer_peek(lexer); + + if (next.type != TOKEN_FOR) + { + QUICK_CLEANUP; + } + + lexer_pop(lexer); + next = lexer_peek(lexer); + + // It can be a keyword interpreted as a normal word + litteral_reserved_word(&next); + struct ast *for_node = ast_create(AST_FOR); + + // Cannot do both at the same time because I couldn't know why the code exit + // Was it because of the tokenor the ast and should I free something ? + if (for_node == NULL) + { + QUICK_CLEANUP; + } + + if (!(ISWORD(next.type))) + { + cleanup(for_node, state); + return NULL; + } + + ((struct ast_for *)for_node)->var = next.value; + + lexer_pop(lexer); + next = lexer_peek(lexer); + + if (next.type == TOKEN_SEMICOLON) + { + lexer_pop(lexer); + } + else if (next.type == TOKEN_NEWLINE || next.type == TOKEN_IN) + { + if (next.type == TOKEN_NEWLINE) + { + clean_cons_tokens(TOKEN_NEWLINE, lexer); + // We only need to peek if we destroyed tokens + next = lexer_peek(lexer); + } + + if (next.type == TOKEN_IN) + { + lexer_pop(lexer); + next = lexer_peek(lexer); + + struct ast *lst = ast_create(AST_COMMAND); + if (lst == NULL) + { + cleanup(for_node, state); + return NULL; + } + + fill_cmd(lst, lexer); + set_left(for_node, lst); + + next = lexer_peek(lexer); + if (next.type != TOKEN_SEMICOLON && next.type != TOKEN_NEWLINE) + { + cleanup(for_node, state); + return NULL; + } + + lexer_pop(lexer); + } + } + else if (next.type != TOKEN_DO) + { + cleanup(for_node, state); + return NULL; + } + + return fill_for_body(for_node, lexer, state); +} diff --git a/42sh/src/parser/parser_operators.c b/42sh/src/parser/parser_operators.c new file mode 100644 index 0000000..31cebbb --- /dev/null +++ b/42sh/src/parser/parser_operators.c @@ -0,0 +1,122 @@ +#include "parser/parser_functions.h" +#include "parser/parser_utils.h" + +struct ast *parse_and_or(struct lexer *lexer, enum parser_state *state) +{ + struct ast *node = parse_pipeline(lexer, state); + struct token next = lexer_peek(lexer); + + if (error_check(node, state, next)) + { + QUICK_CLEANUP + } + + while (next.type == TOKEN_AND || next.type == TOKEN_OR) + { + struct ast *logical = ast_create(AST_LOGICAL); + if (logical == NULL) + { + cleanup(node, state); + return NULL; + } + + ((struct ast_logical *)logical)->type = next.type; + + lexer_pop(lexer); + next = lexer_peek(lexer); + + while (next.type == TOKEN_NEWLINE) + { + lexer_pop(lexer); + next = lexer_peek(lexer); + } + + set_left(logical, node); + + struct ast *right = parse_pipeline(lexer, state); + + // So it's ready for next iteration + usable in error_check() + next = lexer_peek(lexer); + + // If there IS an error, then it MUST have returned NULL + // So no need to free 'right' + if (error_check(logical, state, next)) + { + return NULL; + } + + set_right(logical, right); + node = logical; + } + return node; +} + +struct ast *parse_pipeline(struct lexer *lexer, enum parser_state *state) +{ + struct ast *negation = NULL; + struct token next = lexer_peek(lexer); + + if (next.type == TOKEN_NEG) + { + negation = ast_create(AST_LOGICAL); + if (negation == NULL) + { + QUICK_CLEANUP + } + + union ast_caster cast; + cast.ast = negation; + cast.ast_lo->type = next.type; + + lexer_pop(lexer); + next = lexer_peek(lexer); + } + + struct ast *node = parse_command(lexer, state); + next = lexer_peek(lexer); + + if (negation != NULL) + { + set_left(negation, node); + node = negation; + } + + // If there is an error here (aka node failed), just negation is alloc'ed + if (error_check(negation, state, next)) + { + return NULL; + } + + while (next.type == TOKEN_PIPE) + { + lexer_pop(lexer); + struct ast *pipe = ast_create(AST_PIPELINE); + + if (pipe == NULL) + { + cleanup(node, state); + return NULL; + } + + set_left(pipe, node); + + while (next.type == TOKEN_NEWLINE) + { + lexer_pop(lexer); + next = lexer_peek(lexer); + } + + set_right(pipe, parse_command(lexer, state)); + + next = lexer_peek(lexer); + + if (error_check(pipe, state, next)) + { + return NULL; + } + + node = pipe; + } + + return node; +} diff --git a/42sh/src/parser/parser_redir.c b/42sh/src/parser/parser_redir.c new file mode 100644 index 0000000..9a06d0b --- /dev/null +++ b/42sh/src/parser/parser_redir.c @@ -0,0 +1,59 @@ +#include <err.h> + +#include "parser_functions.h" +#include "parser_utils.h" + +struct ast *parse_redirection(struct lexer *lexer, enum parser_state *state) +{ + // The caller (prefix grammar in this case) will have to attach its + // own ast as the left child of the redir ast + struct ast *redir = ast_create(AST_REDIRECTION); + if (redir == NULL) + { + QUICK_CLEANUP + } + + struct token next = lexer_peek(lexer); + + union ast_caster cast; + cast.ast = redir; + + if (next.type == TOKEN_IONUMBER) + { + cast.ast_r->redirect.fd = next.value; + lexer_pop(lexer); + next = lexer_peek(lexer); + } + else + { + cast.ast_r->redirect.fd = string_create("1"); + } + + if (next.type != TOKEN_REDIR) + { + cleanup(redir, state); + return NULL; + } + + cast.ast_r->redirect.redir = next.value; + if (next.value->data[0] == '<' && cast.ast_r->redirect.fd->data[0] == '1') + { + cast.ast_r->redirect.fd->data[0] = '0'; + } + + lexer_pop(lexer); + next = lexer_peek(lexer); + + if (!(ISWORD(next.type))) + { + cleanup(redir, state); + errx(2, "redir: bad grammar (bad redir)."); + } + + litteral_reserved_word(&next); + cast.ast_r->redirect.file = next.value; + lexer_pop(lexer); + + ((struct ast_redirection *)redir)->redirect = cast.ast_r->redirect; + return redir; +} diff --git a/42sh/src/parser/parser_utils.c b/42sh/src/parser/parser_utils.c new file mode 100644 index 0000000..f30c60f --- /dev/null +++ b/42sh/src/parser/parser_utils.c @@ -0,0 +1,147 @@ +#include "parser_utils.h" + +#include "ast/ast.h" +#include "utils/libstring.h" + +void cleanup(struct ast *root, enum parser_state *state) +{ + if (root != NULL) + { + ast_free(root); + } + *state = ERROR; +} + +int error_check(struct ast *root, enum parser_state *state, struct token next) +{ + if (next.type == TOKEN_ERROR || *state == ERROR) + { + cleanup(root, state); + return 1; + } + else + { + return 0; + } +} + +struct ast *assign_setup(struct token next) +{ + struct ast *root = ast_create(AST_ASSIGN); + if (root == NULL) + { + return NULL; + } + struct string *var_name = string_create(NULL); + struct string *value = string_create(NULL); + + char *txt = next.value->data; + + size_t i = 0; + + while (txt[i] != '=') + { + if (!string_pushc(var_name, txt[i])) + { + string_free(var_name); + string_free(value); + // Noone will free it so we'll have to do it by ourselves + string_free(next.value); + ast_free(root); + return NULL; + } + i++; + } + + i++; + + while (txt[i] != '\0') + { + if (!string_pushc(value, txt[i])) + { + string_free(var_name); + string_free(value); + // Noone will free it so we'll have to do it by ourselves + string_free(next.value); + ast_free(root); + return NULL; + } + i++; + } + + ((struct ast_assign *)root)->name = var_name; + ((struct ast_assign *)root)->val = value; + + // Beause it will no be inside the ast, it won't be naturally freed + string_free(next.value); + return root; +} + +size_t list_length(struct ast *lst) +{ + union ast_caster cast; + cast.ast = lst; + return cast.ast_l->nb_children; +} + +// This is the function you're supposed to call when a keyword is considered +// as a simple word +void litteral_reserved_word(struct token *next) +{ + // This is pretty efficient thanks to the switch case statement + // I don't think there is a cleaner way to do this (37 lines) + switch (next->type) + { + case TOKEN_IF: + next->value = string_create("if"); + return; + case TOKEN_THEN: + next->value = string_create("then"); + return; + case TOKEN_ELIF: + next->value = string_create("elif"); + return; + case TOKEN_ELSE: + next->value = string_create("else"); + return; + case TOKEN_FI: + next->value = string_create("fi"); + return; + case TOKEN_WHILE: + next->value = string_create("while"); + return; + case TOKEN_UNTIL: + next->value = string_create("until"); + return; + case TOKEN_DO: + next->value = string_create("do"); + return; + case TOKEN_DONE: + next->value = string_create("done"); + return; + case TOKEN_FOR: + next->value = string_create("for"); + return; + case TOKEN_IN: + next->value = string_create("in"); + return; + case TOKEN_NEG: + next->value = string_create("!"); + return; + default: + // This catches any TOKEN_WORD and at this point, you're not even + // supposed to have anything else + return; + } +} + +void clean_cons_tokens(enum token_type type, struct lexer *lexer) +{ + struct token next = lexer_peek(lexer); + + while (next.type == type) + { + lexer_pop(lexer); + next = lexer_peek(lexer); + } +} diff --git a/42sh/src/parser/parser_utils.h b/42sh/src/parser/parser_utils.h new file mode 100644 index 0000000..142913c --- /dev/null +++ b/42sh/src/parser/parser_utils.h @@ -0,0 +1,75 @@ +#ifndef PARSER_UTILS_H +#define PARSER_UTILS_H + +#include "ast/ast.h" +#include "ast/ast_accessors.h" +#include "lexer/lexer.h" + +#define CMDSIZE 10 + +enum parser_state +{ + OK, + ERROR, +}; + +/** + * @brief Method used by error_check() but made available to the developper + * to ease the error management if an error where to occur. It sets state + * to the `ERROR` value and frees the ast if it is not NULL. + * @param root The root of the ast that will be freed. + * @param state The current state of the lexer that will be changed to `ERROR`. + * @note Because giving NULL for the root argument will not free anything, + * it can be used to set the state variable to the `ERROR` status. + */ +void cleanup(struct ast *root, enum parser_state *state); + +/** + * @brief This method will return an integer a nonzero value if an error was + * detected. If so, cleanup() will be called. An error is detected if the state + * argument is set to `ERROR` or if the token next is of type `TOKEN_ERROR`. + * @param root The root of the ast to be freed if there is an error. + * @param state The current state of the parser. Set to `ERROR` if needed. + * @param next The next token given by the lexer. + * @note This function allows for quick and efficient error checks: + * if (error_check(root, state, next)) + * { + * return NULL; + * } + */ +int error_check(struct ast *root, enum parser_state *state, struct token next); + +/** + * @brief This creates a node as an ast_assign and fills it with the variable + * next and its value found within said token. Data in the token needs to be + * parsed before doing anything + * @param next The token containing the value to fill in the ast + */ +struct ast *assign_setup(struct token next); + +/** + * @brief This function returns the number of elements in an ast_list + * @param lst The ast_list we want to know how much childrens it has + * @note Tre result is undefined if the ast type is not AST_LIST + */ +size_t list_length(struct ast *lst); + +/** + * @brief This function will fill the token with the appropriate string + * when encountering a reserved word that we want to use as a word + * @param next the token to fill + * @note NULL is set in the token if string cannot be created + */ +void litteral_reserved_word(struct token *next); + +/** + * @brief This function will pop all the token with the same type as the next + * parameter. This is useful because a lot of grammars can have consecutive + * tokens of the same type you need to get rid of. Stops when encountering + * another type of token on the lexer + * @param type The type we want to get rid of + * @param lexer The lexer that will give us the tokens + * @note the function won't do anything if it cannot find the matching type + */ +void clean_cons_tokens(enum token_type type, struct lexer *lexer); +#endif /* ! PARSER_UTILS_H */ diff --git a/42sh/src/utils/Makefile.am b/42sh/src/utils/Makefile.am new file mode 100644 index 0000000..2418637 --- /dev/null +++ b/42sh/src/utils/Makefile.am @@ -0,0 +1,13 @@ +lib_LIBRARIES = libutils.a + +libutils_a_SOURCES = \ + libstring.h \ + basicstring.c \ + env.c \ + env.h + +libutils_a_CPPFLAGS = -I$(top_srcdir)/src + +libutils_a_CFLAGS = -std=c99 -Werror -Wall -Wextra -Wvla -pedantic + +noinst_LIBRARIES = libutils.a diff --git a/42sh/src/utils/basicstring.c b/42sh/src/utils/basicstring.c new file mode 100644 index 0000000..6efbe1f --- /dev/null +++ b/42sh/src/utils/basicstring.c @@ -0,0 +1,124 @@ +#include <stdio.h> +#include <stdlib.h> + +#include "libstring.h" + +struct string *string_create(char *s) +{ + if (!s) + { + struct string *r = calloc(1, sizeof(struct string)); + char *data = calloc((BASE_STRING_SIZE + 1), sizeof(char)); + if (!r || !data) + { + fprintf(stderr, "string_create: calloc failed.\n"); + return NULL; + } + r->data = data; + r->capacity = BASE_STRING_SIZE; + return r; + } + + struct string *str = malloc(1 * sizeof(struct string)); + if (!str) + { + fprintf(stderr, "string_create: malloc failed.\n"); + return NULL; + } + + // Align the capacity on the string base size + size_t l = strlen(s); + size_t m = (l + 1) % BASE_STRING_SIZE; + str->length = l; + str->capacity = (l + 1) + (BASE_STRING_SIZE - m); + str->data = calloc(str->capacity, sizeof(char)); + if (!str->data) + { + fprintf(stderr, "string_create: malloc failed.\n"); + return NULL; + } + memcpy(str->data, s, l); + + return str; +} + +void string_free(struct string *str) +{ + if (str) + { + free(str->data); + free(str); + } +} + +static bool _resize_string(struct string *str, int p) +{ + char *new_data = realloc( + str->data, ((p >= 0) ? str->capacity * 2 : str->capacity / 2) + 1); + if (!new_data) + { + fprintf(stderr, "_resize_string: realloc failed.\n"); + return false; + } + + str->data = new_data; + str->capacity = ((p >= 0) ? str->capacity * 2 : str->capacity / 2); + return true; +} + +bool string_pushc(struct string *str, char c) +{ + if (str->length == str->capacity) + { + if (!_resize_string(str, 1)) + { + return false; + } + } + str->data[str->length] = c; + str->length++; + str->data[str->length] = 0; + return true; +} + +bool string_pushstr(struct string *str, char *s) +{ + if (!s || !s[0]) + { + return true; + } + size_t l = strlen(s); + while (str->length + l >= str->capacity) + { + if (!_resize_string(str, 1)) + { + return false; + } + } + + memcpy(str->data + str->length, s, l); + str->length += l; + str->data[str->length] = 0; + return true; +} + +bool string_catenate(struct string *dst, struct string *src) +{ + if (!src) + { + return true; + } + + if (!dst || !string_pushstr(dst, src->data)) + { + return false; + } + + string_free(src); + return true; +} + +struct string *string_deepcopy(struct string *str) +{ + return string_create(str->data); +} diff --git a/42sh/src/utils/env.c b/42sh/src/utils/env.c new file mode 100644 index 0000000..e3095a4 --- /dev/null +++ b/42sh/src/utils/env.c @@ -0,0 +1,125 @@ +#define _DEFAULT_SOURCE +#define _POSIX_C_SOURCE 200809L + +#include "utils/env.h" + +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> + +#include "utils/libstring.h" + +void env_set(const char *name, const char *value) +{ + if (setenv(name, value, 1) == -1) + { + fprintf(stderr, "env_set: unable to set \'%s\'=\'%s\'.\n", name, value); + } +} + +void env_unset(const char *name) +{ + if (unsetenv(name) == -1) + { + fprintf(stderr, "env_unset: unable to unset \'%s\'.\n", name); + } +} + +static void _generate_random(void) +{ + int d = rand() % MAX_RAND; + char buf[16] = { 0 }; + sprintf(buf, "%d", d); + env_set("RANDOM", buf); +} + +static struct string *_cat_args(void) +{ + int nb_args = atoi(env_get("#")); + struct string *s = string_create(NULL); + for (int i = 1; i <= nb_args; i++) + { + char buf[16] = { 0 }; + sprintf(buf, "%d", i); + string_pushstr(s, env_get(buf)); + if (i != nb_args) + { + string_pushc(s, ' '); + } + } + return s; +} + +char *env_get(const char *name) +{ + if (STRINGS_ARE_EQUAL(name, "RANDOM")) + { + _generate_random(); + } + if (STRINGS_ARE_EQUAL(name, "*")) + { + struct string *s = _cat_args(); + env_set("*", s->data); + string_free(s); + } + if (STRINGS_ARE_EQUAL(name, "@")) + { + struct string *s = string_create(NULL); + string_pushc(s, '\"'); + struct string *args = _cat_args(); + string_pushstr(s, args->data); + string_pushc(s, '\"'); + if (s->length > 2) + { + env_set("@", s->data); + } + string_free(s); + string_free(args); + } + return getenv(name); +} + +void env_clear(void) +{ + if (clearenv()) + { + fprintf(stderr, "env_clear: unable to clear the environment.\n"); + } +} + +static void _set_pwd(void) +{ + char buf[MAX_PATH_SIZE] = { 0 }; + char *path = getcwd(buf, MAX_PATH_SIZE); + env_set("PWD", path); + env_set("OLDPWD", path); +} + +static void _set_uid(void) +{ + uid_t uid = getuid(); + // Magic value but UID is an integer (POSIX requirement) + // So it won't be more that 10 chars long + char buf[16] = { 0 }; + sprintf(buf, "%u", uid); + env_set("UID", buf); +} + +static void _set_pid(void) +{ + pid_t pid = getpid(); + // Magic value but PID is an integer (POSIX requirement) + // So it won't be more that 10 chars long + char buf[16] = { 0 }; + sprintf(buf, "%d", pid); + env_set("$", buf); +} + +void env_setup(void) +{ + _set_pwd(); + _set_uid(); + _set_pid(); + env_set("IFS", DEFAULT_IFS); + env_set("?", "0"); +} diff --git a/42sh/src/utils/env.h b/42sh/src/utils/env.h new file mode 100644 index 0000000..e349651 --- /dev/null +++ b/42sh/src/utils/env.h @@ -0,0 +1,41 @@ +#ifndef ENV_H +#define ENV_H + +#include <stdlib.h> + +#define DEFAULT_IFS " \t\n" + +#define MAX_PATH_SIZE 4096 +#define MAX_RAND 32768 + +/** + * @brief Sets the variable `name`=`value` into the environment. + */ +void env_set(const char *name, const char *value); + +/** + * @brief Unsets the variable `name` from the environment. + */ +void env_unset(const char *name); + +/** + * @brief Gets the value of the variable `name`. + * @return A string that contains the value. + */ +char *env_get(const char *name); + +/** + * @brief Clears all of the variables that are in the environment. + */ +void env_clear(void); + +/** + * @brief Sets a few variables to default values (that can be found in `env.h`). + * Here is the list of the variables affected by this function: + * `PWD`, + * `OLDPWD`, + * `IFS` + */ +void env_setup(void); + +#endif /* ! ENV_H */ diff --git a/42sh/src/utils/libstring.h b/42sh/src/utils/libstring.h new file mode 100644 index 0000000..e30097c --- /dev/null +++ b/42sh/src/utils/libstring.h @@ -0,0 +1,87 @@ +#ifndef LIBSTRING_H +#define LIBSTRING_H + +#include <stdbool.h> +#include <string.h> + +/** + * @brief Represents the default capacity of the string + */ +#define BASE_STRING_SIZE 8 + +#define STRINGS_ARE_EQUAL(A, B) (strcmp(A, B) == 0) + +/** + * @brief Represents a dynamically allocated string. + * + * This struct manages a dynamically allocated null-terminated string + * along with its length and capacity. + */ +struct string +{ + size_t length; /**< The current length of the string (excluding the null + terminator). */ + size_t capacity; /**< The allocated capacity of the string (always a + multiple of `BASE_STRING_SIZE`). */ + char *data; /**< A pointer to the null-terminated character data. */ +}; + +/** + * @brief Creates a heap-allocated copy of the given string. + * + * The capacity is always a multiple of `BASE_STRING_SIZE`. If `s` is + * heap-allocated, the caller must free it later. The resulting string struct + * and data are freed when `string_free` is called. + * + * @param s A null-terminated string that can be null. + * @return A pointer to a newly heap-allocated `struct string`. + */ +struct string *string_create(char *s); + +/** + * @brief Frees the `struct string` and its associated data. + * + * This function frees the struct `str` and its heap-allocated null-terminated + * character data. + * + * @param str A pointer to the `struct string` to be freed. + */ +void string_free(struct string *str); + +/** + * @brief Pushes a single character into the string. + * + * Resizes the string's data and capacity if needed. + * + * @param str A pointer to the `struct string` where the character will be + * added. + * @param c The character to push into the string. + * @return `true` if the push succeeds, `false` otherwise. + */ +bool string_pushc(struct string *str, char c); + +/** + * @brief Pushes a null-terminated string into the string. + * + * @param str A pointer to the `struct string` where the string will be added. + * @param s A null-terminated string to push into `str`. + * @return `true` if the operation succeeds, `false` otherwise. + */ +bool string_pushstr(struct string *str, char *s); + +/** + * @brief Concatenates two `struct string` objects and frees the source string. + * + * The `src` struct and its data are freed after concatenation. + * + * @param dst A pointer to the destination `struct string` to which `src` will + * be concatenated. + * @param src A pointer to the source `struct string` that will be concatenated + * and freed. + * @return `true` if the concatenation succeeds, `false` otherwise. + */ +bool string_catenate(struct string *dst, struct string *src); + +struct string *string_deepcopy(struct string *str); + +#endif /* ! LIBSTRING_H */ |
