7 files changed, 898 insertions, 0 deletions
diff --git a/42sh/src/lexer/Makefile.am b/42sh/src/lexer/Makefile.am
new file mode 100644
index 0000000..4eda36d
--- /dev/null
+++ b/42sh/src/lexer/Makefile.am
@@ -0,0 +1,15 @@
+lib_LIBRARIES = liblexer.a
+
+liblexer_a_SOURCES =  \
+  token.h \
+  utils.h \
+  lexer.h \
+  lexer.c \
+  expansion.h \
+  expansion.c
+
+liblexer_a_CPPFLAGS = -I$(top_srcdir)/src
+
+liblexer_a_CFLAGS = -std=c99 -Werror -Wall -Wextra -Wvla -pedantic
+
+noinst_LIBRARIES = liblexer.a
diff --git a/42sh/src/lexer/expansion.c b/42sh/src/lexer/expansion.c
new file mode 100644
index 0000000..d648009
--- /dev/null
+++ b/42sh/src/lexer/expansion.c
@@ -0,0 +1,386 @@
+#include <ctype.h>
+#include <lexer/expansion.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include "utils/env.h"
+
+#define BUFFER_SIZE 51
+
+#define ERROR_CHECK(MSG)                                                       \
+    if (str[*i] == '\0')                                                       \
+    {                                                                          \
+        string_free(input);                                                    \
+        return clean_exit(MSG, error);                                         \
+    }
+
+#define DQUOTEESCAPED "$\\\n`\""
+// Identifies double-quote escaped characters
+#define ISDQUOTEESCAPED(C) strchr(DQUOTEESCAPED, (int)C)
+
+#define SPECIAL "@*?$#"
+// Identifies special variable names
+#define ISSPECIAL(C) strchr(SPECIAL, (int)C)
+
+// error = 1 indicates a missing }
+// error = 0 and NULL returned indicates an unrecognized var name
+// error = 0 and anything else than NULL returned is the var name
+static struct string *get_var_name(char *str, int *error)
+{
+    struct string *res = string_create(NULL);
+    int i = 0;
+    if (str[i] == '{')
+    {
+        while (str[i] && str[i] != '}')
+        {
+            if (str[i] == '\\' && str[i + 1] == '}')
+                i++;
+            string_pushc(res, str[i]);
+            i++;
+        }
+        if (!str[i])
+        {
+            string_free(res);
+            *error = 1;
+            return NULL;
+        }
+        *error = 0;
+        return res;
+    }
+    else if (ISSPECIAL(str[i]) || isdigit(str[i]))
+    {
+        string_pushc(res, str[i]);
+        *error = 0;
+        return res;
+    }
+    else if (!isalpha(str[i]))
+    {
+        *error = 0;
+        string_free(res);
+        return NULL;
+    }
+    else
+    {
+        while (isalnum(str[i]) || str[i] == '_')
+            string_pushc(res, str[i++]);
+        return res;
+    }
+}
+
+// Useful to automate the same exit process accross the few functions
+// that often do this
+static struct string *clean_exit(char *txt, int *error)
+{
+    fprintf(stderr, "%s", txt);
+    *error = 1;
+    return NULL;
+}
+
+// Creates the fork() in order to make a subshell for the command expansion
+// (cf section 2.6.3 of the SCL)
+// Only called in expand_substitution()
+static struct string *fork_subshell(struct string *input, int j, char *str,
+                                    int *error)
+{
+    int fds[2];
+    if (pipe(fds) == -1)
+    {
+        return clean_exit("pipe() faild to create 2 fds\n", error);
+    }
+
+    struct string *output;
+    pid_t child = fork();
+
+    if (child == -1)
+    {
+        // Fork not working
+        return clean_exit("fork() faild to produce a children\n", error);
+    }
+    else if (child)
+    {
+        // Parent process
+        close(fds[1]);
+
+        output = string_create(NULL);
+
+        char buff[BUFFER_SIZE];
+
+        buff[BUFFER_SIZE - 1] = 0;
+
+        int r;
+
+        int status;
+        waitpid(child, &status, 0);
+
+        // Check if the child terminated normally
+        if (!WIFEXITED(status))
+        {
+            close(fds[0]);
+            string_free(output);
+            return clean_exit("Child process failed miserably\n", error);
+        }
+
+        while ((r = read(fds[0], buff, 50)))
+        {
+            buff[r] = 0;
+            if (!string_pushstr(output, buff))
+            {
+                string_free(output);
+                return clean_exit("Failed to transfer from pipe\n", error);
+            }
+        }
+
+        close(fds[0]);
+        return output;
+    }
+    else
+    {
+        // Child process
+        str += j;
+        close(fds[0]);
+
+        if (dup2(fds[1], STDOUT_FILENO) == -1)
+        {
+            // We are forced to return NULL
+            // how are we going to know if something wnet wrong ?
+            exit(-1);
+        }
+
+        _process_input(input);
+
+        close(fds[1]);
+        exit(1);
+    }
+}
+
+static int look_for_next(char *in, int i, char c)
+{
+    int escaped = 0;
+    while (in[i] && (in[i] != c || (escaped && in[i] == c)))
+    {
+        if (in[i] == '\\')
+            escaped ^= 1;
+        else
+            escaped = 0;
+        i++;
+    }
+    return i;
+}
+
+// Removes all the <newline> characters at the end of the string obtained by
+// the command substitution (Also section 2.6.3 of the SCL)
+static void trimming_newline(struct string *txt)
+{
+    if (!txt->length)
+    {
+        return;
+    }
+    char *str = txt->data;
+    size_t len = txt->length;
+
+    size_t i = len - 1;
+    while (str[i] == '\n')
+    {
+        str[i] = 0;
+        len--;
+    }
+
+    // I am scared and I know this isn't useful but just in case
+    txt->data = str;
+    txt->length = len;
+}
+
+// Performs the substitution (forks and get back the stdout)
+struct string *expand_substitution(char *str, int *i, int *error, char delim)
+{
+    int j = *i;
+    struct string *input = string_create(NULL);
+    if (input == NULL)
+    {
+        return clean_exit("Could not create string for input\n", error);
+    }
+
+    if (delim == '`')
+    {
+        *i = look_for_next(str, j, delim);
+
+        ERROR_CHECK("Could not match `\n")
+
+        str[*i] = '\0';
+    }
+    // Sadly, there is no other way around this
+    else
+    {
+        int escaped = 0;
+        int par_count = 1;
+
+        while (str[*i] != 0)
+        {
+            if (str[*i] == '\\')
+            {
+                escaped ^= 1;
+            }
+            else if (str[*i] == '\'')
+            {
+                (*i) += 1;
+                while (str[*i] != '\0' && str[*i] != '\'')
+                {
+                    (*i) += 1;
+                }
+                ERROR_CHECK("Missing matching '\n")
+            }
+            else if ((str[*i] == '\"' || str[*i] == '`') && !escaped)
+            {
+                (*i) += 1;
+                *i = look_for_next(str, *i, str[(*i) - 1]);
+
+                ERROR_CHECK("Missing matching `\n")
+            }
+            else if (str[*i] == '(' && !escaped)
+            {
+                par_count++;
+            }
+            else if (str[*i] == delim && !escaped)
+            {
+                par_count--;
+                if (!par_count)
+                {
+                    str[*i] = 0;
+                    break;
+                }
+            }
+            else
+            {
+                escaped = 0;
+            }
+
+            (*i)++;
+        }
+    }
+
+    string_pushstr(input, str + j);
+    struct string *output = fork_subshell(input, j, str, error);
+    string_free(input);
+
+    trimming_newline(output);
+    str[*i] = delim;
+    return output;
+}
+
+static int expand_var(struct string *res, char *input, int i)
+{
+    // Will only be called after a '$' was read
+
+    int e = 0;
+    struct string *name = get_var_name(input + i + 1, &e);
+
+    if (e)
+    {
+        string_free(name);
+        fprintf(stderr, "Missing } in variable expansion\n");
+        return -1;
+    }
+    else if (name == NULL)
+    {
+        string_pushc(res, input[i]);
+        i++;
+    }
+    else
+    {
+        // Get the value associated to the name
+        char *value = env_get(name->data);
+        // Concatenate the strings if the variable has a value
+        if (value)
+            string_pushstr(res, value);
+        if (input[++i] == '{')
+            i += 2;
+        i += name->length;
+        string_free(name);
+    }
+    return i;
+}
+
+static int expand_dquotes(char *input, int i, struct string *res)
+{
+    while (input[i] != '"')
+    {
+        if (input[i] == '$')
+        {
+            if ((i = i + expand_var(res, input, i)) == -1)
+            {
+                string_free(res);
+                return -1;
+            }
+            continue;
+        }
+        if ((input[i] == '`' || (input[i] == '$' && input[i + 1] == '(')))
+        {
+            int e = 0;
+            i += (input[i] == '$' ? 2 : 1);
+            struct string *output =
+                expand_substitution(input, &i, &e, input[i]);
+            if (!e)
+            {
+                string_free(res);
+                return -1;
+            }
+
+            // +1 for the last parenthesis/backquote
+            i++;
+            string_catenate(res, output);
+            continue;
+        }
+        if (input[i] == '\\' && ISDQUOTEESCAPED(input[i + 1]))
+            i++;
+        string_pushc(res, input[i]);
+        i++;
+    }
+    return i;
+}
+
+struct string *expand_word(struct string *word)
+{
+    char *input = word->data;
+    int escape = 0;
+    struct string *res = string_create(NULL);
+    for (int i = 0; input[i]; i++)
+    {
+        if (!escape && input[i] == '\'')
+        {
+            while (input[++i] != '\'')
+                string_pushc(res, input[i]);
+        }
+        else if (!escape && input[i] == '"')
+        {
+            i++;
+
+            if ((i = expand_dquotes(input, i, res)) == -1)
+                return NULL;
+        }
+        else if (!escape && input[i] == '\\')
+            escape ^= 1;
+        else
+        {
+            // We don't care if we are after a backslash, we just include this
+            // char
+            if (input[i] == '$' && !escape)
+            {
+                if ((i = i + expand_var(res, input, i)) == -1)
+                {
+                    string_free(res);
+                    return NULL;
+                }
+                continue;
+            }
+            string_pushc(res, input[i]);
+            escape = 0;
+        }
+    }
+
+    // string_free(word);
+    return res;
+}
diff --git a/42sh/src/lexer/expansion.h b/42sh/src/lexer/expansion.h
new file mode 100644
index 0000000..4729cb8
--- /dev/null
+++ b/42sh/src/lexer/expansion.h
@@ -0,0 +1,13 @@
+#ifndef EXPANSION_H
+#define EXPANSION_H
+
+#include <stddef.h>
+#include <utils/libstring.h>
+
+#include "helper.h"
+
+struct string *expand_substitution(char *str, int *i, int *error, char delim);
+
+struct string *expand_word(struct string *word);
+
+#endif /* ! EXPANSION_H */
diff --git a/42sh/src/lexer/lexer.c b/42sh/src/lexer/lexer.c
new file mode 100644
index 0000000..eac77ab
--- /dev/null
+++ b/42sh/src/lexer/lexer.c
@@ -0,0 +1,359 @@
+#include "lexer.h"
+
+#include <ctype.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "expansion.h"
+#include "utils.h"
+
+#define NEWLINEESCAPE (in[lexer->pos] == '\\' && in[lexer->pos + 1] == '\n')
+#define SEPARATORS " \n;&|<>()"
+#define ISSEPARATOR(I) strchr(SEPARATORS, (int)I) != NULL
+
+struct lexer *lexer_new(struct string *input)
+{
+    struct lexer *new = malloc(sizeof(struct lexer));
+    new->input = input;
+    new->pos = 0;
+    new->processed = 0;
+    new->current_tok.value = NULL;
+    new->current_tok.type = TOKEN_EOF;
+    return new;
+}
+
+void lexer_free(struct lexer *lexer)
+{
+    free(lexer);
+}
+
+static void yeet_comment(struct lexer *l)
+{
+    while (l->input->data[l->pos] && l->input->data[l->pos] != '\n')
+        l->pos++;
+}
+
+static int create_ionumber(struct lexer *l)
+{
+    struct string *str = string_create(NULL);
+    int i;
+    for (i = l->pos; isdigit(l->input->data[i]); i++)
+        string_pushc(str, l->input->data[i]);
+    if (l->input->data[i] == '<' || l->input->data[i] == '>')
+    {
+        l->current_tok.type = TOKEN_IONUMBER;
+        l->current_tok.value = str;
+        return 1;
+    }
+    else
+    {
+        string_free(str);
+        return 0;
+    }
+}
+
+static int look_for_next(char *in, int i, char c)
+{
+    int escaped = 0;
+    while (in[i] && (in[i] != c || (escaped && in[i] == c)))
+    {
+        if (in[i] == '\\')
+            escaped ^= 1;
+        else
+            escaped = 0;
+        i++;
+    }
+    return i;
+}
+
+static struct string *substitution(char *in, int i)
+{
+    char split = in[i];
+    in[i] = '\0';
+    struct string *new_input = string_create(in);
+
+    if (split == '$')
+    {
+        i++;
+    }
+
+    i++;
+    int error = 0;
+    struct string *substitute =
+        expand_substitution(in, &i, &error, (split == '`') ? '`' : ')');
+
+    if (error)
+    {
+        string_free(new_input);
+        return NULL;
+    }
+
+    string_catenate(new_input, substitute);
+
+    struct string *end_input = string_create(in + i + 1);
+    string_catenate(new_input, end_input);
+
+    return new_input;
+}
+
+static char extract_word(struct lexer *l, int *end)
+{
+    char *in = l->input->data;
+    int i = l->pos;
+    // True if we are currently inside quotes
+    int escaped = 0;
+
+    while (in[i])
+    {
+        if (!escaped && ((in[i] == '$' && in[i + 1] == '(') || in[i] == '`'))
+        {
+            struct string *new_input = substitution(in, i);
+            if (new_input == NULL)
+            {
+                l->current_tok.type = TOKEN_ERROR;
+                return '\0';
+            }
+
+            string_free(l->input);
+            l->input = new_input;
+            in = l->input->data;
+        }
+        // Checking that we are NOT in a quote and have a separator
+        if (!escaped && ISSEPARATOR(in[i]))
+            break;
+        if (!escaped && in[i] == '\'')
+        {
+            i++;
+            while (in[i] && in[i] != '\'')
+                i++;
+            if (!in[i])
+            {
+                l->current_tok.type = TOKEN_ERROR;
+                return '\0';
+            }
+        }
+        else if (!escaped && in[i] == '"')
+        {
+            i = look_for_next(in, i + 1, in[i]);
+            if (!in[i])
+            {
+                l->current_tok.type = TOKEN_ERROR;
+                return '\0';
+            }
+        }
+        else if (in[i] == '\\')
+        {
+            escaped ^= 1;
+        }
+        else
+            escaped = 0;
+        i++;
+    }
+
+    char tmp = in[i];
+    in[i] = '\0';
+    *end = i;
+    return tmp;
+}
+
+static enum token_type word_or_ass(struct token t)
+{
+    char *word = t.value->data;
+    if (isdigit(word[0]))
+    {
+        return TOKEN_WORD;
+    }
+    for (int i = 0; word[i]; i++)
+    {
+        if (word[i] == '=')
+        {
+            return TOKEN_ASS_WORD;
+        }
+        if (word[i] != '_' && !isalnum(word[i]))
+        {
+            return TOKEN_WORD;
+        }
+    }
+    return TOKEN_WORD;
+}
+
+static void lex_word(struct lexer *l)
+{
+    int i = 0;
+    char tmp = extract_word(l, &i);
+    if (l->current_tok.type == TOKEN_ERROR)
+        return;
+    char *in = l->input->data;
+
+    l->current_tok.type = TOKEN_ERROR;
+    // Identifies reserved words
+    for (int n = 0; reserved_words[n].word != NULL; n++)
+    {
+        if (STRINGS_ARE_EQUAL(in + l->pos, reserved_words[n].word))
+            l->current_tok.type = reserved_words[n].type;
+    }
+
+    // If we couldn't identify a reserved word
+    if (l->current_tok.type == TOKEN_ERROR)
+    {
+        struct string *pp = string_create(in + l->pos);
+        if (pp == NULL)
+        {
+            l->current_tok.type = TOKEN_ERROR;
+            in[i] = tmp;
+            return;
+        }
+        l->current_tok.value = pp;
+        // Set the token type
+        l->current_tok.type = word_or_ass(l->current_tok);
+    }
+    in[i] = tmp;
+}
+
+static struct token set_ttype(struct lexer *lexer, enum token_type type)
+{
+    lexer->current_tok.type = type;
+    return lexer->current_tok;
+}
+
+static struct token lex_and_or(struct lexer *l)
+{
+    if (l->input->data[l->pos] == '|' && l->input->data[l->pos + 1] == '|')
+        l->current_tok.type = TOKEN_OR;
+    else if (l->input->data[l->pos] == '|' && l->input->data[l->pos + 1] != '|')
+        l->current_tok.type = TOKEN_PIPE;
+    else if (l->input->data[l->pos] == '&' && l->input->data[l->pos + 1] == '&')
+        l->current_tok.type = TOKEN_AND;
+    else
+        l->current_tok.type = TOKEN_ERROR;
+    return l->current_tok;
+}
+
+static struct token lex_redirect(struct lexer *l)
+{
+    l->current_tok.type = TOKEN_REDIR;
+    struct string *val = (l->current_tok.value = string_create(NULL));
+    string_pushc(val, l->input->data[l->pos]);
+    if ((l->input->data[l->pos + 1] == '>' || l->input->data[l->pos + 1] == '&')
+        || (l->input->data[l->pos] == '>' && l->input->data[l->pos + 1] == '|'))
+        string_pushc(val, l->input->data[l->pos + 1]);
+    return l->current_tok;
+}
+
+static void lex_special(struct lexer *l)
+{
+    // If the first char is a digit and we recognized a number followed by a
+    // redir
+    if (isdigit(l->input->data[l->pos]) && create_ionumber(l))
+        // return immediately
+        return;
+    lex_word(l);
+}
+
+struct token lexer_next_token(struct lexer *lexer)
+{
+    if (lexer->pos >= lexer->input->length)
+    {
+        lexer->current_tok.type = TOKEN_EOF;
+        lexer->current_tok.value = NULL;
+        return lexer->current_tok;
+    }
+    char *in = lexer->input->data;
+
+    for (; in[lexer->pos] && (in[lexer->pos] == ' ' || NEWLINEESCAPE);
+         lexer->pos++)
+    {
+        if (in[lexer->pos] == '\\' && in[lexer->pos + 1] == '\n')
+            lexer->pos++;
+        continue;
+    }
+
+    switch (in[lexer->pos])
+    {
+    case ';':
+        return set_ttype(lexer, TOKEN_SEMICOLON);
+    case '\n':
+        return set_ttype(lexer, TOKEN_NEWLINE);
+    case '\0':
+        return set_ttype(lexer, TOKEN_EOF);
+    case '(':
+        return set_ttype(lexer, TOKEN_PAR_LEFT);
+    case ')':
+        return set_ttype(lexer, TOKEN_PAR_RIGHT);
+    case '{':
+        return set_ttype(lexer, TOKEN_CURLY_LEFT);
+    case '}':
+        return set_ttype(lexer, TOKEN_CURLY_RIGHT);
+    case '|':
+    /* FALLTHROUGH */
+    case '&':
+        return lex_and_or(lexer);
+    case '<':
+    /* FALLTHROUGH */
+    case '>':
+        return lex_redirect(lexer);
+    case '#':
+        yeet_comment(lexer);
+        return lexer_next_token(lexer);
+    default:
+        lex_special(lexer);
+        return lexer->current_tok;
+    }
+}
+
+static void move_pos(struct lexer *lexer)
+{
+    enum token_type t = lexer->current_tok.type;
+    if (t == TOKEN_EOF)
+    {
+        return;
+    }
+    if (t == TOKEN_IF || t == TOKEN_FI || t == TOKEN_IN || t == TOKEN_DO
+        || t == TOKEN_OR || t == TOKEN_AND)
+        lexer->pos += 2;
+    else if (t == TOKEN_FOR)
+        lexer->pos += 3;
+    else if (t == TOKEN_ELSE || t == TOKEN_ELIF || t == TOKEN_THEN
+             || t == TOKEN_DONE)
+        lexer->pos += 4;
+    else if (t == TOKEN_WHILE || t == TOKEN_UNTIL)
+        lexer->pos += 5;
+    else if (t == TOKEN_WORD || t == TOKEN_IONUMBER || t == TOKEN_REDIR
+             || t == TOKEN_ASS_WORD)
+        lexer->pos += lexer->current_tok.value->length;
+    else
+        lexer->pos++;
+}
+
+struct token lexer_peek(struct lexer *lexer)
+{
+    if (lexer->processed)
+        return lexer->current_tok;
+    lexer->processed = 1;
+    struct token res = lexer_next_token(lexer);
+
+    move_pos(lexer);
+
+    return res;
+}
+
+struct token lexer_pop(struct lexer *lexer)
+{
+    struct token res;
+
+    if (lexer->processed)
+    {
+        res = lexer->current_tok;
+        lexer->processed = 0;
+        return res;
+    }
+
+    res = lexer_next_token(lexer);
+
+    move_pos(lexer);
+
+    lexer->processed = 0;
+
+    return res;
+}
diff --git a/42sh/src/lexer/lexer.h b/42sh/src/lexer/lexer.h
new file mode 100644
index 0000000..8d8cdf5
--- /dev/null
+++ b/42sh/src/lexer/lexer.h
@@ -0,0 +1,48 @@
+#ifndef LEXER_H
+#define LEXER_H
+
+#include <lexer/token.h>
+#include <stddef.h>
+#include <utils/libstring.h>
+
+// True if C could be used as a word
+#define ISWORD(C)                                                              \
+    C == TOKEN_WORD || C == TOKEN_THEN || C == TOKEN_ELIF || C == TOKEN_ELSE   \
+        || C == TOKEN_IF || C == TOKEN_WHILE || C == TOKEN_UNTIL               \
+        || C == TOKEN_DO || C == TOKEN_DONE || C == TOKEN_FOR || C == TOKEN_IN \
+        || C == TOKEN_NEG || C == TOKEN_FI || C == TOKEN_CURLY_LEFT            \
+        || C == TOKEN_CURLY_RIGHT
+
+struct lexer
+{
+    struct string *input; // input data
+    size_t pos; // the current offset inside the input data
+    char processed;
+    struct token current_tok; // next (if processed) token
+};
+
+// Creates a new lexer given an input string
+struct lexer *lexer_new(struct string *input);
+
+// Frees the given lexer, not its input
+void lexer_free(struct lexer *lexer);
+
+// Returns a token from the input string
+// If the token is a WORD, copies the word to the current_tok.value field
+struct token lexer_next_token(struct lexer *lexer);
+
+/*
+** Processes the next token if necessary
+** (previous call to lexer_pop or first call)
+*/
+// Returns the next token
+struct token lexer_peek(struct lexer *lexer);
+
+/*
+** Processes the next token if necessary
+** (previous call to lexer_pop or first call)
+*/
+// Returns the next token and moves the cursor forward
+struct token lexer_pop(struct lexer *lexer);
+
+#endif /* ! LEXER_H */
diff --git a/42sh/src/lexer/token.h b/42sh/src/lexer/token.h
new file mode 100644
index 0000000..89d772a
--- /dev/null
+++ b/42sh/src/lexer/token.h
@@ -0,0 +1,54 @@
+#ifndef TOKEN_H
+#define TOKEN_H
+
+#include <utils/libstring.h>
+
+enum token_type
+{
+    // STEP 1
+    TOKEN_NEWLINE,
+    TOKEN_EOF,
+    TOKEN_ERROR,
+    TOKEN_WORD,
+    TOKEN_IF,
+    TOKEN_THEN,
+    TOKEN_ELIF,
+    TOKEN_ELSE,
+    TOKEN_SEMICOLON,
+    TOKEN_FI,
+    TOKEN_HASHTAG,
+
+    // STEP 2
+    TOKEN_REDIR,
+    TOKEN_PIPE,
+    TOKEN_NEG,
+    TOKEN_WHILE,
+    TOKEN_UNTIL,
+    TOKEN_DO,
+    TOKEN_FOR,
+    TOKEN_DONE,
+    TOKEN_AND,
+    TOKEN_OR,
+    TOKEN_ESCAPE,
+    TOKEN_ASS_WORD,
+    TOKEN_DOUBLEQUOTE,
+    TOKEN_DOLLAR,
+    TOKEN_IN,
+    TOKEN_IONUMBER,
+
+    // STEP 3
+    TOKEN_PAR_RIGHT,
+    TOKEN_PAR_LEFT,
+    TOKEN_CURLY_RIGHT,
+    TOKEN_CURLY_LEFT,
+
+    // STEP 4
+};
+
+struct token
+{
+    enum token_type type;
+    struct string *value;
+};
+
+#endif /* ! TOKEN_H */
diff --git a/42sh/src/lexer/utils.h b/42sh/src/lexer/utils.h
new file mode 100644
index 0000000..3edd83f
--- /dev/null
+++ b/42sh/src/lexer/utils.h
@@ -0,0 +1,23 @@
+#ifndef UTILS_H
+#define UTILS_H
+
+#include <lexer/token.h>
+
+struct reserved_word
+{
+    const char *word;
+    enum token_type type;
+};
+
+static struct reserved_word reserved_words[] = {
+    { "if", TOKEN_IF },         { "then", TOKEN_THEN },
+    { "elif", TOKEN_ELIF },     { "else", TOKEN_ELSE },
+    { "fi", TOKEN_FI },         { "while", TOKEN_WHILE },
+    { "until", TOKEN_UNTIL },   { "do", TOKEN_DO },
+    { "done", TOKEN_DONE },     { "for", TOKEN_FOR },
+    { "in", TOKEN_IN },         { "!", TOKEN_NEG },
+    { "}", TOKEN_CURLY_RIGHT }, { "{", TOKEN_CURLY_LEFT },
+    { NULL, TOKEN_ERROR }
+};
+
+#endif /* ! UTILS_H */