diff options
| author | Martial Simon <msimon_fr@hotmail.com> | 2025-09-15 01:07:58 +0200 |
|---|---|---|
| committer | Martial Simon <msimon_fr@hotmail.com> | 2025-09-15 01:07:58 +0200 |
| commit | 967be9e750221ab2ab783f95df79bb26d290a45e (patch) | |
| tree | 6802900a5e975f9f68b169f0f503f040056d6952 /42sh/src/parser | |
Diffstat (limited to '42sh/src/parser')
| -rw-r--r-- | 42sh/src/parser/Makefile.am | 21 | ||||
| -rw-r--r-- | 42sh/src/parser/parser.c | 40 | ||||
| -rw-r--r-- | 42sh/src/parser/parser.h | 17 | ||||
| -rw-r--r-- | 42sh/src/parser/parser_commands.c | 393 | ||||
| -rw-r--r-- | 42sh/src/parser/parser_conditionnals.c | 152 | ||||
| -rw-r--r-- | 42sh/src/parser/parser_functions.h | 198 | ||||
| -rw-r--r-- | 42sh/src/parser/parser_fundec.c | 63 | ||||
| -rw-r--r-- | 42sh/src/parser/parser_lists.c | 128 | ||||
| -rw-r--r-- | 42sh/src/parser/parser_loops.c | 265 | ||||
| -rw-r--r-- | 42sh/src/parser/parser_operators.c | 122 | ||||
| -rw-r--r-- | 42sh/src/parser/parser_redir.c | 59 | ||||
| -rw-r--r-- | 42sh/src/parser/parser_utils.c | 147 | ||||
| -rw-r--r-- | 42sh/src/parser/parser_utils.h | 75 |
13 files changed, 1680 insertions, 0 deletions
diff --git a/42sh/src/parser/Makefile.am b/42sh/src/parser/Makefile.am new file mode 100644 index 0000000..f6c38fb --- /dev/null +++ b/42sh/src/parser/Makefile.am @@ -0,0 +1,21 @@ +lib_LIBRARIES = libparser.a + +libparser_a_SOURCES = \ + parser.h \ + parser.c \ + parser_functions.h \ + parser_commands.c \ + parser_conditionnals.c \ + parser_lists.c \ + parser_operators.c \ + parser_redir.c \ + parser_utils.h \ + parser_utils.c \ + parser_loops.c \ + parser_fundec.c + +libparser_a_CPPFLAGS = -I$(top_srcdir)/src + +libparser_a_CFLAGS = -std=c99 -Werror -Wall -Wextra -Wvla -pedantic + +noinst_LIBRARIES = libparser.a diff --git a/42sh/src/parser/parser.c b/42sh/src/parser/parser.c new file mode 100644 index 0000000..7c6a1b2 --- /dev/null +++ b/42sh/src/parser/parser.c @@ -0,0 +1,40 @@ +#include "parser/parser.h" + +#include <stdlib.h> + +#include "ast/ast_accessors.h" +#include "lexer/token.h" +#include "parser/parser_functions.h" +#include "parser/parser_utils.h" + +struct ast *parse(struct lexer *lexer, enum parser_state *state) +{ + struct token next = lexer_peek(lexer); + + if (next.type == TOKEN_EOF || next.type == TOKEN_NEWLINE) + { + lexer_pop(lexer); + return NULL; + } + + struct ast *res = parse_list(lexer, state); + + next = lexer_peek(lexer); + + if (error_check(res, state, next)) + { + return NULL; + } + + lexer_pop(lexer); + + if (next.type == TOKEN_EOF || next.type == TOKEN_NEWLINE) + { + return res; + } + else + { + cleanup(res, state); + return NULL; + } +} diff --git a/42sh/src/parser/parser.h b/42sh/src/parser/parser.h new file mode 100644 index 0000000..c029a63 --- /dev/null +++ b/42sh/src/parser/parser.h @@ -0,0 +1,17 @@ +#ifndef PARSER_H +#define PARSER_H + +#include "ast/ast.h" +#include "lexer/lexer.h" +#include "parser/parser_utils.h" + +/** + * @brief The main function to be called in the main loop. It will parse the + * current expression given by the lexer. Return NULL and set the state variable + * to `ERROR` if an error occured, else return the AST for this expression. + * @param lexer The lexer used to create the AST. + * @param state A pointger used to store the current state of the pointer. + */ +struct ast *parse(struct lexer *lexer, enum parser_state *state); + +#endif /* ! PARSER_H */ diff --git a/42sh/src/parser/parser_commands.c b/42sh/src/parser/parser_commands.c new file mode 100644 index 0000000..3321773 --- /dev/null +++ b/42sh/src/parser/parser_commands.c @@ -0,0 +1,393 @@ +#include <err.h> + +#include "parser_functions.h" +#include "parser_utils.h" + +static struct ast *cons_redirs(struct lexer *lexer, enum parser_state *state, + struct ast *root) +{ + struct token next = lexer_peek(lexer); + + while (TST_REDIR) + { + // The redir constructions are not consistents between the two + // grammars that can create redirs + // This is normal as theses redirs don't serve the same purposes + // This redir here is meant to redir a chuncky command + // While the one on the prefix grammar is meant for redirs mixed + // with assignments of variables + struct ast *redir = parse_redirection(lexer, state); + next = lexer_peek(lexer); + if (error_check(root, state, next)) + { + return NULL; + } + set_left(redir, root); + root = redir; + } + + return root; +} + +struct ast *parse_command(struct lexer *lexer, enum parser_state *state) +{ + struct token next = lexer_peek(lexer); + + if (error_check(NULL, state, next)) + { + return NULL; + } + + // Case where we are in a loop + if (next.type == TOKEN_IF || next.type == TOKEN_WHILE + || next.type == TOKEN_UNTIL || next.type == TOKEN_FOR + || next.type == TOKEN_CURLY_LEFT || next.type == TOKEN_PAR_LEFT) + { + struct ast *loops = parse_shell_command(lexer, state); + next = lexer_peek(lexer); + + if (error_check(loops, state, next)) + { + return NULL; + } + + return cons_redirs(lexer, state, loops); + } + // Case where we're sure we have a prefix + else if (next.type == TOKEN_ASS_WORD || TST_REDIR) + { + return parse_simple_command(lexer, state); + } + // Ambiguous case where a WORD can be used in simple_command or in fundec + // We know that parse_simple will crash if something after the WORD is + // a parenthesis. So we just save the word in case and call simple command + else + { + next = lexer_peek(lexer); + if (next.type != TOKEN_WORD) + { + QUICK_CLEANUP + } + + struct string *cpy = + string_create((next.value) ? next.value->data : NULL); + if (cpy == NULL) + { + QUICK_CLEANUP + } + struct ast *temp = parse_simple_command(lexer, state); + + next = lexer_peek(lexer); + + // I have the feeling that this can be problematic in the case where + // we have a function definiton and a subshell + if (next.type == TOKEN_PAR_LEFT) + { + ast_free(temp); + temp = NULL; + } + + if (temp != NULL) + { + string_free(cpy); + return temp; + } + else + { + // Just like it never happened... + *state = OK; + + if (next.type == TOKEN_WORD) + { + string_free(cpy); + temp = parse_funcdec(lexer, state, NULL); + } + else + { + temp = parse_funcdec(lexer, state, cpy); + } + + next = lexer_peek(lexer); + if (error_check(temp, state, next)) + { + return NULL; + } + + return cons_redirs(lexer, state, temp); + } + } +} + +struct ast *parse_shell_command(struct lexer *lexer, enum parser_state *state) +{ + struct token next = lexer_peek(lexer); + switch (next.type) + { + case TOKEN_IF: + return parse_rule_if(lexer, state); + case TOKEN_WHILE: + return parse_rule_while(lexer, state); + case TOKEN_UNTIL: + return parse_rule_until(lexer, state); + case TOKEN_FOR: + return parse_rule_for(lexer, state); + case TOKEN_CURLY_LEFT: + lexer_pop(lexer); + struct ast *lst = parse_compound_list(lexer, state); + next = lexer_peek(lexer); + + if (error_check(lst, state, next)) + { + return NULL; + } + + if (next.type != TOKEN_CURLY_RIGHT) + { + cleanup(lst, state); + return NULL; + } + + lexer_pop(lexer); + return lst; + case TOKEN_PAR_LEFT: + lexer_pop(lexer); + struct ast *subshell = ast_create(AST_SUBSHELL); + + if (subshell == NULL) + { + QUICK_CLEANUP + } + + struct ast *body = parse_compound_list(lexer, state); + next = lexer_peek(lexer); + set_left(subshell, body); + + if (error_check(subshell, state, next)) + { + return NULL; + } + + if (next.type != TOKEN_PAR_RIGHT) + { + cleanup(subshell, state); + return NULL; + } + + lexer_pop(lexer); + return subshell; + default: + // This case should not happen but just in case + for pedantic warnings + cleanup(NULL, state); + return NULL; + } +} + +struct ast *parse_simple_command(struct lexer *lexer, enum parser_state *state) +{ + struct token next = lexer_peek(lexer); + struct ast *root = NULL; + struct ast *up = NULL; + + // Should be enough to guard against the case where we have no prefix/WORDs + if (next.type != TOKEN_ASS_WORD && next.type != TOKEN_IONUMBER + && next.type != TOKEN_REDIR && next.type != TOKEN_WORD) + { + // Honnestly, it's quite useful and also puts me below 40 lines + QUICK_CLEANUP + } + struct ast *lst = ast_create(AST_LIST); + + if (lst == NULL) + { + QUICK_CLEANUP + } + + size_t i = 0; + + while (next.type == TOKEN_ASS_WORD || TST_REDIR) + { + if (next.type == TOKEN_ASS_WORD) + { + struct ast *var = parse_prefix(lexer, state); + next = lexer_peek(lexer); + if (error_check(var, state, next)) + { + return NULL; + } + + set_i(lst, var, i); + i++; + } + else + { + struct ast *redir = parse_prefix(lexer, state); + next = lexer_peek(lexer); + if (error_check(redir, state, next)) + { + return NULL; + } + + if (root == NULL || up == NULL) + { + root = redir; + up = redir; + set_left(up, lst); + } + else + { + set_left(up, redir); + set_left(redir, lst); + up = redir; + } + } + + next = lexer_peek(lexer); + } + + struct ast *son = parse_element(lexer, state, lst, up); + if (root == NULL) + { + root = son; + } + next = lexer_peek(lexer); + + if (error_check(root, state, next)) + { + // You never know if it was only variable assignment + // If so, this would never be attached to root + // cleanup(lst, state); // this one may not be necessary ~ + return NULL; + } + + return (root == NULL) ? lst : root; +} + +// This function takes care of the reallocation and all the checks that +// comes with calling realloc. (Mainly freeing stuff and exiting) +static struct string **_realloc_ast_command(struct ast *ast, size_t *capacity) +{ + struct ast_command *ast_c = ((struct ast_command *)ast); + size_t i = 0; + while (i < *capacity && ast_c->args[i]) + { + i++; + } + struct string **p = + realloc(ast_c->args, (i + CMDSIZE) * sizeof(struct string *)); + if (!p) + { + errx(EXIT_FAILURE, "Realloc failed."); + } + ast_c->args = p; + *capacity = i + CMDSIZE; + memset(ast_c->args + i, 0, (CMDSIZE) * sizeof(struct string *)); + return p; +} + +// Does not EXACTLY follows the grammar but it's close enough +// Here is the pattern it produces: +// element = { (WORD | redirection) }; +// Plus, it directly changes the tree instead of returning an ast to be +// attached to the parent node +struct ast *parse_element(struct lexer *lexer, enum parser_state *state, + struct ast *lst, struct ast *up) +{ + struct ast *root = up; + struct token next = lexer_peek(lexer); + struct ast *base = ast_create(AST_COMMAND); + int child_filled = 0; + if (base == NULL) + { + QUICK_CLEANUP; + } + + struct string **space = calloc(11, sizeof(struct string *)); + if (space == NULL) + { + cleanup(base, state); + return NULL; + } + + size_t capacity = CMDSIZE; + size_t i = 0; + + ((struct ast_command *)base)->args = space; + + while (ISWORD(next.type) || TST_REDIR) + { + if (!(ISWORD(next.type))) + { + struct ast *redir = parse_redirection(lexer, state); + // Because state is at ERROR state, I'm sure the 2 will be executed + if (error_check(root, state, next) && error_check(lst, state, next)) + { + return NULL; + } + if (root == NULL) + { + root = redir; + up = redir; + } + set_left(up, redir); + set_left(redir, lst); + up = redir; + } + else + { + if (!child_filled) + { + child_filled = 1; + } + if (i == capacity) + { + space = _realloc_ast_command(base, &capacity); + } + + litteral_reserved_word(&next); + space[i] = next.value; + i++; + lexer_pop(lexer); + } + next = lexer_peek(lexer); + } + + // In case we created the child for nothing and he wasn't filled + if (!child_filled) + { + ast_free(base); + } + else + { + size_t lst_end = list_length(lst); + set_i(lst, base, lst_end); + } + + // No need to reassign the space variable to the ast here because the worst + // that can happens is if it has been moved becasue of realloc() and this + // case has been dealt with + + return (root == NULL) ? lst : root; +} + +struct ast *parse_prefix(struct lexer *lexer, enum parser_state *state) +{ + struct token next = lexer_peek(lexer); + if (next.type == TOKEN_ASS_WORD) + { + struct ast *assign = assign_setup(next); + lexer_pop(lexer); + if (assign == NULL) + { + QUICK_CLEANUP + } + return assign; + } + else if (TST_REDIR) + { + return parse_redirection(lexer, state); + } + else + { + QUICK_CLEANUP + } +} diff --git a/42sh/src/parser/parser_conditionnals.c b/42sh/src/parser/parser_conditionnals.c new file mode 100644 index 0000000..ef1f67a --- /dev/null +++ b/42sh/src/parser/parser_conditionnals.c @@ -0,0 +1,152 @@ +#include "parser/parser_functions.h" +#include "parser/parser_utils.h" + +struct ast *parse_rule_if(struct lexer *lexer, enum parser_state *state) +{ + struct token next = lexer_peek(lexer); + // Not mandatory because we checked before going in here + if (next.type != TOKEN_IF) + { + QUICK_CLEANUP + } + + struct ast *root = ast_create(AST_IF); + + if (root == NULL) + { + *state = ERROR; + return NULL; + } + + lexer_pop(lexer); + + struct ast *cond = parse_compound_list(lexer, state); + + next = lexer_peek(lexer); + if (error_check(root, state, next)) + { + return NULL; + } + + set_left(root, cond); + + if (next.type != TOKEN_THEN) + { + cleanup(root, state); + return NULL; + } + + lexer_pop(lexer); + + cond = parse_compound_list(lexer, state); + next = lexer_peek(lexer); + if (error_check(root, state, next)) + { + return NULL; + } + + if (cond == NULL) + { + cleanup(root, state); + return NULL; + } + + set_right(root, cond); + + if (next.type != TOKEN_FI) + { + cond = parse_else_clause(lexer, state); + if (cond == NULL) + { + cleanup(root, state); + return NULL; + } + set_i(root, cond, 2); + } + + lexer_pop(lexer); + + return root; +} + +struct ast *parse_else_clause(struct lexer *lexer, enum parser_state *state) +{ + struct token next = lexer_peek(lexer); + struct ast *root = NULL; + + if (next.type == TOKEN_ELSE) + { + lexer_pop(lexer); + + struct ast *cond = parse_compound_list(lexer, state); + + if (cond == NULL) + { + cleanup(root, state); + return NULL; + } + + if (error_check(cond, state, next)) + { + return NULL; + } + + return cond; + } + else if (next.type != TOKEN_ELIF) + { + cleanup(NULL, state); + return NULL; + } + + lexer_pop(lexer); + + // Doesn' it seems stupid to just manually re-creating the IF again? + // Can't I just call parse_if there ? But we won't be following the grammar + // This is a late-game optimization that might not be worth it + root = ast_create(AST_IF); + if (root == NULL) + { + cleanup(root, state); + return NULL; + } + + struct ast *cond = parse_compound_list(lexer, state); + set_left(root, cond); + + if (error_check(root, state, next)) + { + return NULL; + } + + next = lexer_peek(lexer); + + if (next.type != TOKEN_THEN) + { + cleanup(root, state); + return NULL; + } + + lexer_pop(lexer); + + set_right(root, parse_compound_list(lexer, state)); + + if (error_check(root, state, next)) + { + return NULL; + } + + next = lexer_peek(lexer); + + if (next.type == TOKEN_ELSE || next.type == TOKEN_ELIF) + { + cond = parse_else_clause(lexer, state); + if (error_check(root, state, next)) + { + return NULL; + } + set_i(root, cond, 2); + } + + return root; +} diff --git a/42sh/src/parser/parser_functions.h b/42sh/src/parser/parser_functions.h new file mode 100644 index 0000000..bc87cf6 --- /dev/null +++ b/42sh/src/parser/parser_functions.h @@ -0,0 +1,198 @@ +#ifndef PARSER_FUNCTIONS_H +#define PARSER_FUNCTIONS_H + +#include "ast/ast.h" +#include "lexer/lexer.h" +#include "parser/parser_utils.h" + +#define QUICK_CLEANUP \ + cleanup(NULL, state); \ + return NULL; + +#define TST_REDIR next.type == TOKEN_IONUMBER || next.type == TOKEN_REDIR + +/** + * @brief This function parses the list grammar + * @param lexer The lexer used to parse the current expression. + * @param state Current state of the parser. Set to `ERROR` when needed. + * @note Here is the Extended Backus-Naur form: + * + * `list = and_or { ';' and_or } [ ';' ] ;` + */ +struct ast *parse_list(struct lexer *lexer, enum parser_state *state); + +/** + * @brief This function parses the and_or grammar + * @param lexer The lexer used to parse the current expression. + * @param state Current state of the parser. Set to `ERROR` when needed. + * @note Here is the Extended Backus-Naur form: + * + * `and_or = pipeline { ( '&&' | '||' ) {'\\n'} pipeline } ;` + */ +struct ast *parse_and_or(struct lexer *lexer, enum parser_state *state); + +/** + * @brief This function parses the pipeline grammar + * @param lexer The lexer used to parse the current expression. + * @param state Current state of the parser. Set to `ERROR` when needed. + * @note Here is the Extended Backus-Naur form: + * + * `pipeline = ['!'] command { '|' {'\\n'} command } ;` + */ +struct ast *parse_pipeline(struct lexer *lexer, enum parser_state *state); + +/** + * @brief This function parses the command grammar + * @param lexer The lexer used to parse the current expression. + * @param state Current state of the parser. Set to `ERROR` when needed. + * @note Here is the Extended Backus-Naur form: + * + * `command = simple_command + * | shell command { redirection } + * | fundec { redirection } + * ;` + */ +struct ast *parse_command(struct lexer *lexer, enum parser_state *state); + +/** + * @brief This function parses the simple_command grammar + * @param lexer The lexer used to parse the current expression. + * @param state Current state of the parser. Set to `ERROR` when needed. + * @note Here is the Extended Backus-Naur form: + * + * `simple_command = prefix { prefix } + * | { prefix } WORD { element } + * ;` + */ +struct ast *parse_simple_command(struct lexer *lexer, enum parser_state *state); + +/** + * @brief This function parses the element grammar + * @param lexer The lexer used to parse the current expression. + * @param state Current state of the parser. Set to `ERROR` when needed. + * @note Here is the Extended Backus-Naur form (different from subject): + * + * `element = { (WORD | redirection) };` + */ +struct ast *parse_element(struct lexer *lexer, enum parser_state *state, + struct ast *lst, struct ast *up); + +/** + * @brief This function parses the shell_command grammar + * @param lexer The lexer used to parse the current expression. + * @param state Current state of the parser. Set to `ERROR` when needed. + * @note Here is the Extended Backus-Naur form: + * + * `shell_command = rule_if + * | rule_while + * | rule_until + * | rule_for + * | '{' compound_list '}' + * | '(' compound_list ')' + * ;` + */ +struct ast *parse_shell_command(struct lexer *lexer, enum parser_state *state); + +/** + * @brief This function parses the rule_if grammar + * @param lexer The lexer used to parse the current expression. + * @param state Current state of the parser. Set to `ERROR` when needed. + * @note Here is the Extended Backus-Naur form: + * + * `rule_if = 'if' compound_list 'then' compound_list [else_clause] 'fi' ;` + */ +struct ast *parse_rule_if(struct lexer *lexer, enum parser_state *state); + +/** + * @brief This function parses the else_clause grammar + * @param lexer The lexer used to parse the current expression. + * @param state Current state of the parser. Set to `ERROR` when needed. + * @note Here is the Extended Backus-Naur form: + * + * `else_clause = 'else' compound_list + * | 'elif' compound_list 'then' compound_list [else_clause]` + */ +struct ast *parse_else_clause(struct lexer *lexer, enum parser_state *state); + +/** + * @brief This function parses the rule_while grammar + * @param lexer The lexer used to parse the current expression. + * @param state Current state of the parser. Set to `ERROR` when needed. + * @note Here is the Extended Backus-Naur form: + * + * `rule_while = 'while' compound_list 'do' compound_list 'done' ;` + */ +struct ast *parse_rule_while(struct lexer *lexer, enum parser_state *state); + +/** + * @brief This function parses the rule_until grammar + * @param lexer The lexer used to parse the current expression. + * @param state Current state of the parser. Set to `ERROR` when needed. + * @note Here is the Extended Backus-Naur form: + * + * `rule_until = 'until' compound_list 'do' compound_list 'done' ;` + */ +struct ast *parse_rule_until(struct lexer *lexer, enum parser_state *state); + +/** + * @brief This function parses the rule_for grammar + * @param lexer The lexer used to parse the current expression. + * @param state Current state of the parser. Set to `ERROR` when needed. + * @note Here is the Extended Backus-Naur form: + * + * `rule_for = 'for' WORD ( [';'] | [ {'\\n'} 'in' { WORD } ( ';' | '\\n' ) ] ) + * {'\\n'} 'do' compound_list 'done' + * ;` + */ +struct ast *parse_rule_for(struct lexer *lexer, enum parser_state *state); + +/** + * @brief This function parses the prefix grammar + * @param lexer The lexer used to parse the current expression. + * @param state Current state of the parser. Set to `ERROR` when needed. + * @note Here is the Extended Backus-Naur form: + * + * `prefix = ASSIGNMENT_WORD + * | redirection + * ;` + */ +struct ast *parse_prefix(struct lexer *lexer, enum parser_state *state); + +/** + * @brief This function parses the redirection grammar + * @param lexer The lexer used to parse the current expression. + * @param state Current state of the parser. Set to `ERROR` when needed. + * @note Here is the Extended Backus-Naur form: + * + * `redirection = [IONUMBER] ( '>' | '<' | '>>' | '>&' | '<&' | '>|' | '<>' ) + * WORD + * ;` + */ +struct ast *parse_redirection(struct lexer *lexer, enum parser_state *state); + +/** + * @brief This function parses the compound_list grammar + * @param lexer The lexer used to parse the current expression. + * @param state Current state of the parser. Set to `ERROR` when needed. + * @note Here is the Extended Backus-Naur form: + * + * `compound_list = { '\\n' } and_or { ( ';' | '\\n' ) { '\\n' } and_or } [ ';' + * ] { '\\n' } ;` + */ +struct ast *parse_compound_list(struct lexer *lexer, enum parser_state *state); + +/** + * @brief This function parses the funcdec grammar + * @param lexer The lexer used to parse the current expression. + * @param state Current state of the parser. Set to `ERROR` when needed. + * @param txt The string to use as the name of the function. NULL means it must + * be fetched from the lexer. + * @note This change in declaration is to accomodate for the way we manage the + * conflict between the simple_command and the fundec rules (both start w/ WORD) + * @note Here is the Extended Backus-Naur form: + * + * funcdec = WORD '(' ')' {'\n'} shell_command ; + */ +struct ast *parse_funcdec(struct lexer *lexer, enum parser_state *state, + struct string *txt); +#endif /* ! PARSER_FUNCTIONS_H */ diff --git a/42sh/src/parser/parser_fundec.c b/42sh/src/parser/parser_fundec.c new file mode 100644 index 0000000..08d4816 --- /dev/null +++ b/42sh/src/parser/parser_fundec.c @@ -0,0 +1,63 @@ +#include "parser/parser_functions.h" +#include "parser/parser_utils.h" + +struct ast *parse_funcdec(struct lexer *lexer, enum parser_state *state, + struct string *txt) +{ + struct ast *function = ast_create(AST_FUNCTION); + if (function == NULL) + { + QUICK_CLEANUP + } + + struct token next = lexer_peek(lexer); + + if (txt == NULL && next.type != TOKEN_WORD) + { + cleanup(function, state); + return NULL; + } + else if (txt == NULL) + { + ((struct ast_function *)function)->name = next.value; + lexer_pop(lexer); + next = lexer_peek(lexer); + } + else + { + // If we used the word in the txt variable, no need to peek + ((struct ast_function *)function)->name = txt; + } + + if (next.type != TOKEN_PAR_LEFT) + { + cleanup(function, state); + return NULL; + } + + lexer_pop(lexer); + next = lexer_peek(lexer); + + if (next.type != TOKEN_PAR_RIGHT) + { + cleanup(function, state); + return NULL; + } + + lexer_pop(lexer); + next = lexer_peek(lexer); + + clean_cons_tokens(TOKEN_NEWLINE, lexer); + + struct ast *body = parse_shell_command(lexer, state); + next = lexer_peek(lexer); + + if (error_check(function, state, next)) + { + return NULL; + } + + set_left(function, body); + + return function; +} diff --git a/42sh/src/parser/parser_lists.c b/42sh/src/parser/parser_lists.c new file mode 100644 index 0000000..4b5e204 --- /dev/null +++ b/42sh/src/parser/parser_lists.c @@ -0,0 +1,128 @@ +#include <err.h> +#include <stdio.h> + +#include "parser/parser_functions.h" +#include "parser/parser_utils.h" + +struct ast *parse_list(struct lexer *lexer, enum parser_state *state) +{ + struct ast *node = parse_and_or(lexer, state); + struct ast *ast_list = NULL; + + // Who forgot to check that node can be NULL, I sure did + if (node == NULL) + { + QUICK_CLEANUP + } + + struct token next = lexer_peek(lexer); + + size_t i = 0; + + if (next.type == TOKEN_SEMICOLON) + { + ast_list = ast_create(AST_LIST); + + if (ast_list == NULL) + { + cleanup(node, state); + return NULL; + } + + set_left(ast_list, node); + i++; + } + + // If we enter this loop, this means we have a list of more than 1 element + while (next.type == TOKEN_SEMICOLON) + { + lexer_pop(lexer); + next = lexer_peek(lexer); + + // Case where we have only one ';', meaning it is the end of the list + if (next.type == TOKEN_EOF || next.type == TOKEN_NEWLINE) + { + // set_right(ast_list, NULL); + return ast_list; + } + + struct ast *right = parse_and_or(lexer, state); + set_i(ast_list, right, i); + i++; + + // Was checking if right was NULL, but in that case state is ERROR + // and error_check will catch it + + next = lexer_peek(lexer); + if (error_check(ast_list, state, next)) + { + return NULL; + } + } + + return (ast_list == NULL) ? node : ast_list; +} + +struct ast *parse_compound_list(struct lexer *lexer, enum parser_state *state) +{ + clean_cons_tokens(TOKEN_NEWLINE, lexer); + + struct ast *node = parse_and_or(lexer, state); + struct ast *ast_lst = NULL; + + struct token next = lexer_peek(lexer); + + if (error_check(node, state, next)) + { + return NULL; + } + + size_t i = 0; + + while (next.type == TOKEN_SEMICOLON || next.type == TOKEN_NEWLINE) + { + lexer_pop(lexer); + + clean_cons_tokens(TOKEN_NEWLINE, lexer); + next = lexer_peek(lexer); + + // This condition might need to be expanded in the future + // But I don't know if there is a better way to do this + if (next.type == TOKEN_THEN || next.type == TOKEN_FI + || next.type == TOKEN_ELSE || next.type == TOKEN_ELIF + || next.type == TOKEN_DO || next.type == TOKEN_DONE + || next.type == TOKEN_CURLY_RIGHT || next.type == TOKEN_PAR_LEFT) + { + return (ast_lst == NULL) ? node : ast_lst; + } + + if (ast_lst == NULL) + { + ast_lst = ast_create(AST_LIST); + if (ast_lst == NULL) + { + cleanup(node, state); + return NULL; + } + + set_left(ast_lst, node); + i++; + } + + next = lexer_peek(lexer); + + struct ast *right = parse_and_or(lexer, state); + + // Can put the i++ in the set_i function but I'm too scared + set_i(ast_lst, right, i); + i++; + + next = lexer_peek(lexer); + if (error_check(ast_lst, state, next)) + { + return NULL; + } + } + + return (ast_lst == NULL) ? node : ast_lst; +} diff --git a/42sh/src/parser/parser_loops.c b/42sh/src/parser/parser_loops.c new file mode 100644 index 0000000..bf1033a --- /dev/null +++ b/42sh/src/parser/parser_loops.c @@ -0,0 +1,265 @@ +#include <err.h> + +#include "parser_functions.h" +#include "parser_utils.h" + +struct ast *parse_rule_while(struct lexer *lexer, enum parser_state *state) +{ + struct token next = lexer_peek(lexer); + + // Because we use this function for both while and until loops, this needs + // to be accounted for. The construction is exactly the same otherwise. + // The only difference is that the condition is NOT'ed in parse_rule_until + if (next.type != TOKEN_WHILE && next.type != TOKEN_UNTIL) + { + QUICK_CLEANUP + } + + lexer_pop(lexer); + next = lexer_peek(lexer); + + struct ast *cond = parse_compound_list(lexer, state); + next = lexer_peek(lexer); + + if (error_check(NULL, state, next)) + { + return NULL; + } + + if (next.type != TOKEN_DO) + { + cleanup(cond, state); + return NULL; + } + + lexer_pop(lexer); + + struct ast *body = parse_compound_list(lexer, state); + next = lexer_peek(lexer); + + if (error_check(cond, state, next)) + { + return NULL; + } + + if (next.type != TOKEN_DONE) + { + cleanup(cond, state); + cleanup(body, state); + return NULL; + } + + lexer_pop(lexer); + + struct ast *while_loop = ast_create(AST_WHILE); + + if (while_loop == NULL) + { + cleanup(cond, state); + cleanup(body, state); + return NULL; + } + + set_left(while_loop, cond); + set_right(while_loop, body); + + return while_loop; +} + +// Warning, does not respect the grammar for parse_rule_until. +// This is because we reuse the parse_rule_while method (the ast representation +// being exatly the same, minus the condition being negated) +struct ast *parse_rule_until(struct lexer *lexer, enum parser_state *state) +{ + struct token next = lexer_peek(lexer); + + if (next.type != TOKEN_UNTIL) + { + QUICK_CLEANUP + } + + struct ast *while_root = parse_rule_while(lexer, state); + next = lexer_peek(lexer); + + if (error_check(while_root, state, next)) + { + return NULL; + } + + struct ast *inverted = ast_create(AST_LOGICAL); + if (inverted == NULL) + { + cleanup(while_root, state); + return NULL; + } + + ((struct ast_logical *)inverted)->type = TOKEN_NEG; + + set_left(inverted, ((struct ast_while *)while_root)->cond); + ((struct ast_while *)while_root)->cond = inverted; + + return while_root; +} + +static struct string **_realloc_ast_command(struct ast *ast, size_t *capacity) +{ + struct ast_command *ast_c = ((struct ast_command *)ast); + size_t i = 0; + while (ast_c->args[i]) + { + i++; + } + struct string **p = + realloc(ast_c->args, (i + CMDSIZE) * sizeof(struct string *)); + if (!p) + { + errx(EXIT_FAILURE, "Realloc failed."); + } + ast_c->args = p; + *capacity = i + CMDSIZE; + memset(ast_c->args + i, 0, (CMDSIZE) * sizeof(struct string *)); + return p; +} + +static void fill_cmd(struct ast *ast, struct lexer *lexer) +{ + struct token next = lexer_peek(lexer); + size_t i = 0; + size_t capacity = CMDSIZE; + + struct string **arr = calloc(CMDSIZE, sizeof(struct string *)); + + ((struct ast_command *)ast)->args = arr; + while (ISWORD(next.type)) + { + if (i == capacity) + { + arr = _realloc_ast_command(ast, &capacity); + ((struct ast_command *)ast)->args = arr; + } + + litteral_reserved_word(&next); + arr[i] = next.value; + i++; + lexer_pop(lexer); + next = lexer_peek(lexer); + } +} + +static struct ast *fill_for_body(struct ast *root, struct lexer *lexer, + enum parser_state *state) +{ + clean_cons_tokens(TOKEN_NEWLINE, lexer); + + struct token next = lexer_peek(lexer); + + if (next.type != TOKEN_DO) + { + cleanup(root, state); + return NULL; + } + + lexer_pop(lexer); + next = lexer_peek(lexer); + + struct ast *cmp_lst = parse_compound_list(lexer, state); + + next = lexer_peek(lexer); + if (error_check(root, state, next)) + { + return NULL; + } + + set_right(root, cmp_lst); + + if (next.type != TOKEN_DONE) + { + cleanup(root, state); + return NULL; + } + + lexer_pop(lexer); + + return root; +} + +struct ast *parse_rule_for(struct lexer *lexer, enum parser_state *state) +{ + struct token next = lexer_peek(lexer); + + if (next.type != TOKEN_FOR) + { + QUICK_CLEANUP; + } + + lexer_pop(lexer); + next = lexer_peek(lexer); + + // It can be a keyword interpreted as a normal word + litteral_reserved_word(&next); + struct ast *for_node = ast_create(AST_FOR); + + // Cannot do both at the same time because I couldn't know why the code exit + // Was it because of the tokenor the ast and should I free something ? + if (for_node == NULL) + { + QUICK_CLEANUP; + } + + if (!(ISWORD(next.type))) + { + cleanup(for_node, state); + return NULL; + } + + ((struct ast_for *)for_node)->var = next.value; + + lexer_pop(lexer); + next = lexer_peek(lexer); + + if (next.type == TOKEN_SEMICOLON) + { + lexer_pop(lexer); + } + else if (next.type == TOKEN_NEWLINE || next.type == TOKEN_IN) + { + if (next.type == TOKEN_NEWLINE) + { + clean_cons_tokens(TOKEN_NEWLINE, lexer); + // We only need to peek if we destroyed tokens + next = lexer_peek(lexer); + } + + if (next.type == TOKEN_IN) + { + lexer_pop(lexer); + next = lexer_peek(lexer); + + struct ast *lst = ast_create(AST_COMMAND); + if (lst == NULL) + { + cleanup(for_node, state); + return NULL; + } + + fill_cmd(lst, lexer); + set_left(for_node, lst); + + next = lexer_peek(lexer); + if (next.type != TOKEN_SEMICOLON && next.type != TOKEN_NEWLINE) + { + cleanup(for_node, state); + return NULL; + } + + lexer_pop(lexer); + } + } + else if (next.type != TOKEN_DO) + { + cleanup(for_node, state); + return NULL; + } + + return fill_for_body(for_node, lexer, state); +} diff --git a/42sh/src/parser/parser_operators.c b/42sh/src/parser/parser_operators.c new file mode 100644 index 0000000..31cebbb --- /dev/null +++ b/42sh/src/parser/parser_operators.c @@ -0,0 +1,122 @@ +#include "parser/parser_functions.h" +#include "parser/parser_utils.h" + +struct ast *parse_and_or(struct lexer *lexer, enum parser_state *state) +{ + struct ast *node = parse_pipeline(lexer, state); + struct token next = lexer_peek(lexer); + + if (error_check(node, state, next)) + { + QUICK_CLEANUP + } + + while (next.type == TOKEN_AND || next.type == TOKEN_OR) + { + struct ast *logical = ast_create(AST_LOGICAL); + if (logical == NULL) + { + cleanup(node, state); + return NULL; + } + + ((struct ast_logical *)logical)->type = next.type; + + lexer_pop(lexer); + next = lexer_peek(lexer); + + while (next.type == TOKEN_NEWLINE) + { + lexer_pop(lexer); + next = lexer_peek(lexer); + } + + set_left(logical, node); + + struct ast *right = parse_pipeline(lexer, state); + + // So it's ready for next iteration + usable in error_check() + next = lexer_peek(lexer); + + // If there IS an error, then it MUST have returned NULL + // So no need to free 'right' + if (error_check(logical, state, next)) + { + return NULL; + } + + set_right(logical, right); + node = logical; + } + return node; +} + +struct ast *parse_pipeline(struct lexer *lexer, enum parser_state *state) +{ + struct ast *negation = NULL; + struct token next = lexer_peek(lexer); + + if (next.type == TOKEN_NEG) + { + negation = ast_create(AST_LOGICAL); + if (negation == NULL) + { + QUICK_CLEANUP + } + + union ast_caster cast; + cast.ast = negation; + cast.ast_lo->type = next.type; + + lexer_pop(lexer); + next = lexer_peek(lexer); + } + + struct ast *node = parse_command(lexer, state); + next = lexer_peek(lexer); + + if (negation != NULL) + { + set_left(negation, node); + node = negation; + } + + // If there is an error here (aka node failed), just negation is alloc'ed + if (error_check(negation, state, next)) + { + return NULL; + } + + while (next.type == TOKEN_PIPE) + { + lexer_pop(lexer); + struct ast *pipe = ast_create(AST_PIPELINE); + + if (pipe == NULL) + { + cleanup(node, state); + return NULL; + } + + set_left(pipe, node); + + while (next.type == TOKEN_NEWLINE) + { + lexer_pop(lexer); + next = lexer_peek(lexer); + } + + set_right(pipe, parse_command(lexer, state)); + + next = lexer_peek(lexer); + + if (error_check(pipe, state, next)) + { + return NULL; + } + + node = pipe; + } + + return node; +} diff --git a/42sh/src/parser/parser_redir.c b/42sh/src/parser/parser_redir.c new file mode 100644 index 0000000..9a06d0b --- /dev/null +++ b/42sh/src/parser/parser_redir.c @@ -0,0 +1,59 @@ +#include <err.h> + +#include "parser_functions.h" +#include "parser_utils.h" + +struct ast *parse_redirection(struct lexer *lexer, enum parser_state *state) +{ + // The caller (prefix grammar in this case) will have to attach its + // own ast as the left child of the redir ast + struct ast *redir = ast_create(AST_REDIRECTION); + if (redir == NULL) + { + QUICK_CLEANUP + } + + struct token next = lexer_peek(lexer); + + union ast_caster cast; + cast.ast = redir; + + if (next.type == TOKEN_IONUMBER) + { + cast.ast_r->redirect.fd = next.value; + lexer_pop(lexer); + next = lexer_peek(lexer); + } + else + { + cast.ast_r->redirect.fd = string_create("1"); + } + + if (next.type != TOKEN_REDIR) + { + cleanup(redir, state); + return NULL; + } + + cast.ast_r->redirect.redir = next.value; + if (next.value->data[0] == '<' && cast.ast_r->redirect.fd->data[0] == '1') + { + cast.ast_r->redirect.fd->data[0] = '0'; + } + + lexer_pop(lexer); + next = lexer_peek(lexer); + + if (!(ISWORD(next.type))) + { + cleanup(redir, state); + errx(2, "redir: bad grammar (bad redir)."); + } + + litteral_reserved_word(&next); + cast.ast_r->redirect.file = next.value; + lexer_pop(lexer); + + ((struct ast_redirection *)redir)->redirect = cast.ast_r->redirect; + return redir; +} diff --git a/42sh/src/parser/parser_utils.c b/42sh/src/parser/parser_utils.c new file mode 100644 index 0000000..f30c60f --- /dev/null +++ b/42sh/src/parser/parser_utils.c @@ -0,0 +1,147 @@ +#include "parser_utils.h" + +#include "ast/ast.h" +#include "utils/libstring.h" + +void cleanup(struct ast *root, enum parser_state *state) +{ + if (root != NULL) + { + ast_free(root); + } + *state = ERROR; +} + +int error_check(struct ast *root, enum parser_state *state, struct token next) +{ + if (next.type == TOKEN_ERROR || *state == ERROR) + { + cleanup(root, state); + return 1; + } + else + { + return 0; + } +} + +struct ast *assign_setup(struct token next) +{ + struct ast *root = ast_create(AST_ASSIGN); + if (root == NULL) + { + return NULL; + } + struct string *var_name = string_create(NULL); + struct string *value = string_create(NULL); + + char *txt = next.value->data; + + size_t i = 0; + + while (txt[i] != '=') + { + if (!string_pushc(var_name, txt[i])) + { + string_free(var_name); + string_free(value); + // Noone will free it so we'll have to do it by ourselves + string_free(next.value); + ast_free(root); + return NULL; + } + i++; + } + + i++; + + while (txt[i] != '\0') + { + if (!string_pushc(value, txt[i])) + { + string_free(var_name); + string_free(value); + // Noone will free it so we'll have to do it by ourselves + string_free(next.value); + ast_free(root); + return NULL; + } + i++; + } + + ((struct ast_assign *)root)->name = var_name; + ((struct ast_assign *)root)->val = value; + + // Beause it will no be inside the ast, it won't be naturally freed + string_free(next.value); + return root; +} + +size_t list_length(struct ast *lst) +{ + union ast_caster cast; + cast.ast = lst; + return cast.ast_l->nb_children; +} + +// This is the function you're supposed to call when a keyword is considered +// as a simple word +void litteral_reserved_word(struct token *next) +{ + // This is pretty efficient thanks to the switch case statement + // I don't think there is a cleaner way to do this (37 lines) + switch (next->type) + { + case TOKEN_IF: + next->value = string_create("if"); + return; + case TOKEN_THEN: + next->value = string_create("then"); + return; + case TOKEN_ELIF: + next->value = string_create("elif"); + return; + case TOKEN_ELSE: + next->value = string_create("else"); + return; + case TOKEN_FI: + next->value = string_create("fi"); + return; + case TOKEN_WHILE: + next->value = string_create("while"); + return; + case TOKEN_UNTIL: + next->value = string_create("until"); + return; + case TOKEN_DO: + next->value = string_create("do"); + return; + case TOKEN_DONE: + next->value = string_create("done"); + return; + case TOKEN_FOR: + next->value = string_create("for"); + return; + case TOKEN_IN: + next->value = string_create("in"); + return; + case TOKEN_NEG: + next->value = string_create("!"); + return; + default: + // This catches any TOKEN_WORD and at this point, you're not even + // supposed to have anything else + return; + } +} + +void clean_cons_tokens(enum token_type type, struct lexer *lexer) +{ + struct token next = lexer_peek(lexer); + + while (next.type == type) + { + lexer_pop(lexer); + next = lexer_peek(lexer); + } +} diff --git a/42sh/src/parser/parser_utils.h b/42sh/src/parser/parser_utils.h new file mode 100644 index 0000000..142913c --- /dev/null +++ b/42sh/src/parser/parser_utils.h @@ -0,0 +1,75 @@ +#ifndef PARSER_UTILS_H +#define PARSER_UTILS_H + +#include "ast/ast.h" +#include "ast/ast_accessors.h" +#include "lexer/lexer.h" + +#define CMDSIZE 10 + +enum parser_state +{ + OK, + ERROR, +}; + +/** + * @brief Method used by error_check() but made available to the developper + * to ease the error management if an error where to occur. It sets state + * to the `ERROR` value and frees the ast if it is not NULL. + * @param root The root of the ast that will be freed. + * @param state The current state of the lexer that will be changed to `ERROR`. + * @note Because giving NULL for the root argument will not free anything, + * it can be used to set the state variable to the `ERROR` status. + */ +void cleanup(struct ast *root, enum parser_state *state); + +/** + * @brief This method will return an integer a nonzero value if an error was + * detected. If so, cleanup() will be called. An error is detected if the state + * argument is set to `ERROR` or if the token next is of type `TOKEN_ERROR`. + * @param root The root of the ast to be freed if there is an error. + * @param state The current state of the parser. Set to `ERROR` if needed. + * @param next The next token given by the lexer. + * @note This function allows for quick and efficient error checks: + * if (error_check(root, state, next)) + * { + * return NULL; + * } + */ +int error_check(struct ast *root, enum parser_state *state, struct token next); + +/** + * @brief This creates a node as an ast_assign and fills it with the variable + * next and its value found within said token. Data in the token needs to be + * parsed before doing anything + * @param next The token containing the value to fill in the ast + */ +struct ast *assign_setup(struct token next); + +/** + * @brief This function returns the number of elements in an ast_list + * @param lst The ast_list we want to know how much childrens it has + * @note Tre result is undefined if the ast type is not AST_LIST + */ +size_t list_length(struct ast *lst); + +/** + * @brief This function will fill the token with the appropriate string + * when encountering a reserved word that we want to use as a word + * @param next the token to fill + * @note NULL is set in the token if string cannot be created + */ +void litteral_reserved_word(struct token *next); + +/** + * @brief This function will pop all the token with the same type as the next + * parameter. This is useful because a lot of grammars can have consecutive + * tokens of the same type you need to get rid of. Stops when encountering + * another type of token on the lexer + * @param type The type we want to get rid of + * @param lexer The lexer that will give us the tokens + * @note the function won't do anything if it cannot find the matching type + */ +void clean_cons_tokens(enum token_type type, struct lexer *lexer); +#endif /* ! PARSER_UTILS_H */ |
