summaryrefslogtreecommitdiff
path: root/42sh/src/lexer
diff options
context:
space:
mode:
Diffstat (limited to '42sh/src/lexer')
-rw-r--r--42sh/src/lexer/Makefile.am15
-rw-r--r--42sh/src/lexer/expansion.c386
-rw-r--r--42sh/src/lexer/expansion.h13
-rw-r--r--42sh/src/lexer/lexer.c359
-rw-r--r--42sh/src/lexer/lexer.h48
-rw-r--r--42sh/src/lexer/token.h54
-rw-r--r--42sh/src/lexer/utils.h23
7 files changed, 898 insertions, 0 deletions
diff --git a/42sh/src/lexer/Makefile.am b/42sh/src/lexer/Makefile.am
new file mode 100644
index 0000000..4eda36d
--- /dev/null
+++ b/42sh/src/lexer/Makefile.am
@@ -0,0 +1,15 @@
+lib_LIBRARIES = liblexer.a
+
+liblexer_a_SOURCES = \
+ token.h \
+ utils.h \
+ lexer.h \
+ lexer.c \
+ expansion.h \
+ expansion.c
+
+liblexer_a_CPPFLAGS = -I$(top_srcdir)/src
+
+liblexer_a_CFLAGS = -std=c99 -Werror -Wall -Wextra -Wvla -pedantic
+
+noinst_LIBRARIES = liblexer.a
diff --git a/42sh/src/lexer/expansion.c b/42sh/src/lexer/expansion.c
new file mode 100644
index 0000000..d648009
--- /dev/null
+++ b/42sh/src/lexer/expansion.c
@@ -0,0 +1,386 @@
+#include <ctype.h>
+#include <lexer/expansion.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include "utils/env.h"
+
+#define BUFFER_SIZE 51
+
+#define ERROR_CHECK(MSG) \
+ if (str[*i] == '\0') \
+ { \
+ string_free(input); \
+ return clean_exit(MSG, error); \
+ }
+
+#define DQUOTEESCAPED "$\\\n`\""
+// Identifies double-quote escaped characters
+#define ISDQUOTEESCAPED(C) strchr(DQUOTEESCAPED, (int)C)
+
+#define SPECIAL "@*?$#"
+// Identifies special variable names
+#define ISSPECIAL(C) strchr(SPECIAL, (int)C)
+
+// error = 1 indicates a missing }
+// error = 0 and NULL returned indicates an unrecognized var name
+// error = 0 and anything else than NULL returned is the var name
+static struct string *get_var_name(char *str, int *error)
+{
+ struct string *res = string_create(NULL);
+ int i = 0;
+ if (str[i] == '{')
+ {
+ while (str[i] && str[i] != '}')
+ {
+ if (str[i] == '\\' && str[i + 1] == '}')
+ i++;
+ string_pushc(res, str[i]);
+ i++;
+ }
+ if (!str[i])
+ {
+ string_free(res);
+ *error = 1;
+ return NULL;
+ }
+ *error = 0;
+ return res;
+ }
+ else if (ISSPECIAL(str[i]) || isdigit(str[i]))
+ {
+ string_pushc(res, str[i]);
+ *error = 0;
+ return res;
+ }
+ else if (!isalpha(str[i]))
+ {
+ *error = 0;
+ string_free(res);
+ return NULL;
+ }
+ else
+ {
+ while (isalnum(str[i]) || str[i] == '_')
+ string_pushc(res, str[i++]);
+ return res;
+ }
+}
+
+// Useful to automate the same exit process accross the few functions
+// that often do this
+static struct string *clean_exit(char *txt, int *error)
+{
+ fprintf(stderr, "%s", txt);
+ *error = 1;
+ return NULL;
+}
+
+// Creates the fork() in order to make a subshell for the command expansion
+// (cf section 2.6.3 of the SCL)
+// Only called in expand_substitution()
+static struct string *fork_subshell(struct string *input, int j, char *str,
+ int *error)
+{
+ int fds[2];
+ if (pipe(fds) == -1)
+ {
+ return clean_exit("pipe() faild to create 2 fds\n", error);
+ }
+
+ struct string *output;
+ pid_t child = fork();
+
+ if (child == -1)
+ {
+ // Fork not working
+ return clean_exit("fork() faild to produce a children\n", error);
+ }
+ else if (child)
+ {
+ // Parent process
+ close(fds[1]);
+
+ output = string_create(NULL);
+
+ char buff[BUFFER_SIZE];
+
+ buff[BUFFER_SIZE - 1] = 0;
+
+ int r;
+
+ int status;
+ waitpid(child, &status, 0);
+
+ // Check if the child terminated normally
+ if (!WIFEXITED(status))
+ {
+ close(fds[0]);
+ string_free(output);
+ return clean_exit("Child process failed miserably\n", error);
+ }
+
+ while ((r = read(fds[0], buff, 50)))
+ {
+ buff[r] = 0;
+ if (!string_pushstr(output, buff))
+ {
+ string_free(output);
+ return clean_exit("Failed to transfer from pipe\n", error);
+ }
+ }
+
+ close(fds[0]);
+ return output;
+ }
+ else
+ {
+ // Child process
+ str += j;
+ close(fds[0]);
+
+ if (dup2(fds[1], STDOUT_FILENO) == -1)
+ {
+ // We are forced to return NULL
+ // how are we going to know if something wnet wrong ?
+ exit(-1);
+ }
+
+ _process_input(input);
+
+ close(fds[1]);
+ exit(1);
+ }
+}
+
+static int look_for_next(char *in, int i, char c)
+{
+ int escaped = 0;
+ while (in[i] && (in[i] != c || (escaped && in[i] == c)))
+ {
+ if (in[i] == '\\')
+ escaped ^= 1;
+ else
+ escaped = 0;
+ i++;
+ }
+ return i;
+}
+
+// Removes all the <newline> characters at the end of the string obtained by
+// the command substitution (Also section 2.6.3 of the SCL)
+static void trimming_newline(struct string *txt)
+{
+ if (!txt->length)
+ {
+ return;
+ }
+ char *str = txt->data;
+ size_t len = txt->length;
+
+ size_t i = len - 1;
+ while (str[i] == '\n')
+ {
+ str[i] = 0;
+ len--;
+ }
+
+ // I am scared and I know this isn't useful but just in case
+ txt->data = str;
+ txt->length = len;
+}
+
+// Performs the substitution (forks and get back the stdout)
+struct string *expand_substitution(char *str, int *i, int *error, char delim)
+{
+ int j = *i;
+ struct string *input = string_create(NULL);
+ if (input == NULL)
+ {
+ return clean_exit("Could not create string for input\n", error);
+ }
+
+ if (delim == '`')
+ {
+ *i = look_for_next(str, j, delim);
+
+ ERROR_CHECK("Could not match `\n")
+
+ str[*i] = '\0';
+ }
+ // Sadly, there is no other way around this
+ else
+ {
+ int escaped = 0;
+ int par_count = 1;
+
+ while (str[*i] != 0)
+ {
+ if (str[*i] == '\\')
+ {
+ escaped ^= 1;
+ }
+ else if (str[*i] == '\'')
+ {
+ (*i) += 1;
+ while (str[*i] != '\0' && str[*i] != '\'')
+ {
+ (*i) += 1;
+ }
+ ERROR_CHECK("Missing matching '\n")
+ }
+ else if ((str[*i] == '\"' || str[*i] == '`') && !escaped)
+ {
+ (*i) += 1;
+ *i = look_for_next(str, *i, str[(*i) - 1]);
+
+ ERROR_CHECK("Missing matching `\n")
+ }
+ else if (str[*i] == '(' && !escaped)
+ {
+ par_count++;
+ }
+ else if (str[*i] == delim && !escaped)
+ {
+ par_count--;
+ if (!par_count)
+ {
+ str[*i] = 0;
+ break;
+ }
+ }
+ else
+ {
+ escaped = 0;
+ }
+
+ (*i)++;
+ }
+ }
+
+ string_pushstr(input, str + j);
+ struct string *output = fork_subshell(input, j, str, error);
+ string_free(input);
+
+ trimming_newline(output);
+ str[*i] = delim;
+ return output;
+}
+
+static int expand_var(struct string *res, char *input, int i)
+{
+ // Will only be called after a '$' was read
+
+ int e = 0;
+ struct string *name = get_var_name(input + i + 1, &e);
+
+ if (e)
+ {
+ string_free(name);
+ fprintf(stderr, "Missing } in variable expansion\n");
+ return -1;
+ }
+ else if (name == NULL)
+ {
+ string_pushc(res, input[i]);
+ i++;
+ }
+ else
+ {
+ // Get the value associated to the name
+ char *value = env_get(name->data);
+ // Concatenate the strings if the variable has a value
+ if (value)
+ string_pushstr(res, value);
+ if (input[++i] == '{')
+ i += 2;
+ i += name->length;
+ string_free(name);
+ }
+ return i;
+}
+
+static int expand_dquotes(char *input, int i, struct string *res)
+{
+ while (input[i] != '"')
+ {
+ if (input[i] == '$')
+ {
+ if ((i = i + expand_var(res, input, i)) == -1)
+ {
+ string_free(res);
+ return -1;
+ }
+ continue;
+ }
+ if ((input[i] == '`' || (input[i] == '$' && input[i + 1] == '(')))
+ {
+ int e = 0;
+ i += (input[i] == '$' ? 2 : 1);
+ struct string *output =
+ expand_substitution(input, &i, &e, input[i]);
+ if (!e)
+ {
+ string_free(res);
+ return -1;
+ }
+
+ // +1 for the last parenthesis/backquote
+ i++;
+ string_catenate(res, output);
+ continue;
+ }
+ if (input[i] == '\\' && ISDQUOTEESCAPED(input[i + 1]))
+ i++;
+ string_pushc(res, input[i]);
+ i++;
+ }
+ return i;
+}
+
+struct string *expand_word(struct string *word)
+{
+ char *input = word->data;
+ int escape = 0;
+ struct string *res = string_create(NULL);
+ for (int i = 0; input[i]; i++)
+ {
+ if (!escape && input[i] == '\'')
+ {
+ while (input[++i] != '\'')
+ string_pushc(res, input[i]);
+ }
+ else if (!escape && input[i] == '"')
+ {
+ i++;
+
+ if ((i = expand_dquotes(input, i, res)) == -1)
+ return NULL;
+ }
+ else if (!escape && input[i] == '\\')
+ escape ^= 1;
+ else
+ {
+ // We don't care if we are after a backslash, we just include this
+ // char
+ if (input[i] == '$' && !escape)
+ {
+ if ((i = i + expand_var(res, input, i)) == -1)
+ {
+ string_free(res);
+ return NULL;
+ }
+ continue;
+ }
+ string_pushc(res, input[i]);
+ escape = 0;
+ }
+ }
+
+ // string_free(word);
+ return res;
+}
diff --git a/42sh/src/lexer/expansion.h b/42sh/src/lexer/expansion.h
new file mode 100644
index 0000000..4729cb8
--- /dev/null
+++ b/42sh/src/lexer/expansion.h
@@ -0,0 +1,13 @@
+#ifndef EXPANSION_H
+#define EXPANSION_H
+
+#include <stddef.h>
+#include <utils/libstring.h>
+
+#include "helper.h"
+
+struct string *expand_substitution(char *str, int *i, int *error, char delim);
+
+struct string *expand_word(struct string *word);
+
+#endif /* ! EXPANSION_H */
diff --git a/42sh/src/lexer/lexer.c b/42sh/src/lexer/lexer.c
new file mode 100644
index 0000000..eac77ab
--- /dev/null
+++ b/42sh/src/lexer/lexer.c
@@ -0,0 +1,359 @@
+#include "lexer.h"
+
+#include <ctype.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "expansion.h"
+#include "utils.h"
+
+#define NEWLINEESCAPE (in[lexer->pos] == '\\' && in[lexer->pos + 1] == '\n')
+#define SEPARATORS " \n;&|<>()"
+#define ISSEPARATOR(I) strchr(SEPARATORS, (int)I) != NULL
+
+struct lexer *lexer_new(struct string *input)
+{
+ struct lexer *new = malloc(sizeof(struct lexer));
+ new->input = input;
+ new->pos = 0;
+ new->processed = 0;
+ new->current_tok.value = NULL;
+ new->current_tok.type = TOKEN_EOF;
+ return new;
+}
+
+void lexer_free(struct lexer *lexer)
+{
+ free(lexer);
+}
+
+static void yeet_comment(struct lexer *l)
+{
+ while (l->input->data[l->pos] && l->input->data[l->pos] != '\n')
+ l->pos++;
+}
+
+static int create_ionumber(struct lexer *l)
+{
+ struct string *str = string_create(NULL);
+ int i;
+ for (i = l->pos; isdigit(l->input->data[i]); i++)
+ string_pushc(str, l->input->data[i]);
+ if (l->input->data[i] == '<' || l->input->data[i] == '>')
+ {
+ l->current_tok.type = TOKEN_IONUMBER;
+ l->current_tok.value = str;
+ return 1;
+ }
+ else
+ {
+ string_free(str);
+ return 0;
+ }
+}
+
+static int look_for_next(char *in, int i, char c)
+{
+ int escaped = 0;
+ while (in[i] && (in[i] != c || (escaped && in[i] == c)))
+ {
+ if (in[i] == '\\')
+ escaped ^= 1;
+ else
+ escaped = 0;
+ i++;
+ }
+ return i;
+}
+
+static struct string *substitution(char *in, int i)
+{
+ char split = in[i];
+ in[i] = '\0';
+ struct string *new_input = string_create(in);
+
+ if (split == '$')
+ {
+ i++;
+ }
+
+ i++;
+ int error = 0;
+ struct string *substitute =
+ expand_substitution(in, &i, &error, (split == '`') ? '`' : ')');
+
+ if (error)
+ {
+ string_free(new_input);
+ return NULL;
+ }
+
+ string_catenate(new_input, substitute);
+
+ struct string *end_input = string_create(in + i + 1);
+ string_catenate(new_input, end_input);
+
+ return new_input;
+}
+
+static char extract_word(struct lexer *l, int *end)
+{
+ char *in = l->input->data;
+ int i = l->pos;
+ // True if we are currently inside quotes
+ int escaped = 0;
+
+ while (in[i])
+ {
+ if (!escaped && ((in[i] == '$' && in[i + 1] == '(') || in[i] == '`'))
+ {
+ struct string *new_input = substitution(in, i);
+ if (new_input == NULL)
+ {
+ l->current_tok.type = TOKEN_ERROR;
+ return '\0';
+ }
+
+ string_free(l->input);
+ l->input = new_input;
+ in = l->input->data;
+ }
+ // Checking that we are NOT in a quote and have a separator
+ if (!escaped && ISSEPARATOR(in[i]))
+ break;
+ if (!escaped && in[i] == '\'')
+ {
+ i++;
+ while (in[i] && in[i] != '\'')
+ i++;
+ if (!in[i])
+ {
+ l->current_tok.type = TOKEN_ERROR;
+ return '\0';
+ }
+ }
+ else if (!escaped && in[i] == '"')
+ {
+ i = look_for_next(in, i + 1, in[i]);
+ if (!in[i])
+ {
+ l->current_tok.type = TOKEN_ERROR;
+ return '\0';
+ }
+ }
+ else if (in[i] == '\\')
+ {
+ escaped ^= 1;
+ }
+ else
+ escaped = 0;
+ i++;
+ }
+
+ char tmp = in[i];
+ in[i] = '\0';
+ *end = i;
+ return tmp;
+}
+
+static enum token_type word_or_ass(struct token t)
+{
+ char *word = t.value->data;
+ if (isdigit(word[0]))
+ {
+ return TOKEN_WORD;
+ }
+ for (int i = 0; word[i]; i++)
+ {
+ if (word[i] == '=')
+ {
+ return TOKEN_ASS_WORD;
+ }
+ if (word[i] != '_' && !isalnum(word[i]))
+ {
+ return TOKEN_WORD;
+ }
+ }
+ return TOKEN_WORD;
+}
+
+static void lex_word(struct lexer *l)
+{
+ int i = 0;
+ char tmp = extract_word(l, &i);
+ if (l->current_tok.type == TOKEN_ERROR)
+ return;
+ char *in = l->input->data;
+
+ l->current_tok.type = TOKEN_ERROR;
+ // Identifies reserved words
+ for (int n = 0; reserved_words[n].word != NULL; n++)
+ {
+ if (STRINGS_ARE_EQUAL(in + l->pos, reserved_words[n].word))
+ l->current_tok.type = reserved_words[n].type;
+ }
+
+ // If we couldn't identify a reserved word
+ if (l->current_tok.type == TOKEN_ERROR)
+ {
+ struct string *pp = string_create(in + l->pos);
+ if (pp == NULL)
+ {
+ l->current_tok.type = TOKEN_ERROR;
+ in[i] = tmp;
+ return;
+ }
+ l->current_tok.value = pp;
+ // Set the token type
+ l->current_tok.type = word_or_ass(l->current_tok);
+ }
+ in[i] = tmp;
+}
+
+static struct token set_ttype(struct lexer *lexer, enum token_type type)
+{
+ lexer->current_tok.type = type;
+ return lexer->current_tok;
+}
+
+static struct token lex_and_or(struct lexer *l)
+{
+ if (l->input->data[l->pos] == '|' && l->input->data[l->pos + 1] == '|')
+ l->current_tok.type = TOKEN_OR;
+ else if (l->input->data[l->pos] == '|' && l->input->data[l->pos + 1] != '|')
+ l->current_tok.type = TOKEN_PIPE;
+ else if (l->input->data[l->pos] == '&' && l->input->data[l->pos + 1] == '&')
+ l->current_tok.type = TOKEN_AND;
+ else
+ l->current_tok.type = TOKEN_ERROR;
+ return l->current_tok;
+}
+
+static struct token lex_redirect(struct lexer *l)
+{
+ l->current_tok.type = TOKEN_REDIR;
+ struct string *val = (l->current_tok.value = string_create(NULL));
+ string_pushc(val, l->input->data[l->pos]);
+ if ((l->input->data[l->pos + 1] == '>' || l->input->data[l->pos + 1] == '&')
+ || (l->input->data[l->pos] == '>' && l->input->data[l->pos + 1] == '|'))
+ string_pushc(val, l->input->data[l->pos + 1]);
+ return l->current_tok;
+}
+
+static void lex_special(struct lexer *l)
+{
+ // If the first char is a digit and we recognized a number followed by a
+ // redir
+ if (isdigit(l->input->data[l->pos]) && create_ionumber(l))
+ // return immediately
+ return;
+ lex_word(l);
+}
+
+struct token lexer_next_token(struct lexer *lexer)
+{
+ if (lexer->pos >= lexer->input->length)
+ {
+ lexer->current_tok.type = TOKEN_EOF;
+ lexer->current_tok.value = NULL;
+ return lexer->current_tok;
+ }
+ char *in = lexer->input->data;
+
+ for (; in[lexer->pos] && (in[lexer->pos] == ' ' || NEWLINEESCAPE);
+ lexer->pos++)
+ {
+ if (in[lexer->pos] == '\\' && in[lexer->pos + 1] == '\n')
+ lexer->pos++;
+ continue;
+ }
+
+ switch (in[lexer->pos])
+ {
+ case ';':
+ return set_ttype(lexer, TOKEN_SEMICOLON);
+ case '\n':
+ return set_ttype(lexer, TOKEN_NEWLINE);
+ case '\0':
+ return set_ttype(lexer, TOKEN_EOF);
+ case '(':
+ return set_ttype(lexer, TOKEN_PAR_LEFT);
+ case ')':
+ return set_ttype(lexer, TOKEN_PAR_RIGHT);
+ case '{':
+ return set_ttype(lexer, TOKEN_CURLY_LEFT);
+ case '}':
+ return set_ttype(lexer, TOKEN_CURLY_RIGHT);
+ case '|':
+ /* FALLTHROUGH */
+ case '&':
+ return lex_and_or(lexer);
+ case '<':
+ /* FALLTHROUGH */
+ case '>':
+ return lex_redirect(lexer);
+ case '#':
+ yeet_comment(lexer);
+ return lexer_next_token(lexer);
+ default:
+ lex_special(lexer);
+ return lexer->current_tok;
+ }
+}
+
+static void move_pos(struct lexer *lexer)
+{
+ enum token_type t = lexer->current_tok.type;
+ if (t == TOKEN_EOF)
+ {
+ return;
+ }
+ if (t == TOKEN_IF || t == TOKEN_FI || t == TOKEN_IN || t == TOKEN_DO
+ || t == TOKEN_OR || t == TOKEN_AND)
+ lexer->pos += 2;
+ else if (t == TOKEN_FOR)
+ lexer->pos += 3;
+ else if (t == TOKEN_ELSE || t == TOKEN_ELIF || t == TOKEN_THEN
+ || t == TOKEN_DONE)
+ lexer->pos += 4;
+ else if (t == TOKEN_WHILE || t == TOKEN_UNTIL)
+ lexer->pos += 5;
+ else if (t == TOKEN_WORD || t == TOKEN_IONUMBER || t == TOKEN_REDIR
+ || t == TOKEN_ASS_WORD)
+ lexer->pos += lexer->current_tok.value->length;
+ else
+ lexer->pos++;
+}
+
+struct token lexer_peek(struct lexer *lexer)
+{
+ if (lexer->processed)
+ return lexer->current_tok;
+ lexer->processed = 1;
+ struct token res = lexer_next_token(lexer);
+
+ move_pos(lexer);
+
+ return res;
+}
+
+struct token lexer_pop(struct lexer *lexer)
+{
+ struct token res;
+
+ if (lexer->processed)
+ {
+ res = lexer->current_tok;
+ lexer->processed = 0;
+ return res;
+ }
+
+ res = lexer_next_token(lexer);
+
+ move_pos(lexer);
+
+ lexer->processed = 0;
+
+ return res;
+}
diff --git a/42sh/src/lexer/lexer.h b/42sh/src/lexer/lexer.h
new file mode 100644
index 0000000..8d8cdf5
--- /dev/null
+++ b/42sh/src/lexer/lexer.h
@@ -0,0 +1,48 @@
+#ifndef LEXER_H
+#define LEXER_H
+
+#include <lexer/token.h>
+#include <stddef.h>
+#include <utils/libstring.h>
+
+// True if C could be used as a word
+#define ISWORD(C) \
+ C == TOKEN_WORD || C == TOKEN_THEN || C == TOKEN_ELIF || C == TOKEN_ELSE \
+ || C == TOKEN_IF || C == TOKEN_WHILE || C == TOKEN_UNTIL \
+ || C == TOKEN_DO || C == TOKEN_DONE || C == TOKEN_FOR || C == TOKEN_IN \
+ || C == TOKEN_NEG || C == TOKEN_FI || C == TOKEN_CURLY_LEFT \
+ || C == TOKEN_CURLY_RIGHT
+
+struct lexer
+{
+ struct string *input; // input data
+ size_t pos; // the current offset inside the input data
+ char processed;
+ struct token current_tok; // next (if processed) token
+};
+
+// Creates a new lexer given an input string
+struct lexer *lexer_new(struct string *input);
+
+// Frees the given lexer, not its input
+void lexer_free(struct lexer *lexer);
+
+// Returns a token from the input string
+// If the token is a WORD, copies the word to the current_tok.value field
+struct token lexer_next_token(struct lexer *lexer);
+
+/*
+** Processes the next token if necessary
+** (previous call to lexer_pop or first call)
+*/
+// Returns the next token
+struct token lexer_peek(struct lexer *lexer);
+
+/*
+** Processes the next token if necessary
+** (previous call to lexer_pop or first call)
+*/
+// Returns the next token and moves the cursor forward
+struct token lexer_pop(struct lexer *lexer);
+
+#endif /* ! LEXER_H */
diff --git a/42sh/src/lexer/token.h b/42sh/src/lexer/token.h
new file mode 100644
index 0000000..89d772a
--- /dev/null
+++ b/42sh/src/lexer/token.h
@@ -0,0 +1,54 @@
+#ifndef TOKEN_H
+#define TOKEN_H
+
+#include <utils/libstring.h>
+
+enum token_type
+{
+ // STEP 1
+ TOKEN_NEWLINE,
+ TOKEN_EOF,
+ TOKEN_ERROR,
+ TOKEN_WORD,
+ TOKEN_IF,
+ TOKEN_THEN,
+ TOKEN_ELIF,
+ TOKEN_ELSE,
+ TOKEN_SEMICOLON,
+ TOKEN_FI,
+ TOKEN_HASHTAG,
+
+ // STEP 2
+ TOKEN_REDIR,
+ TOKEN_PIPE,
+ TOKEN_NEG,
+ TOKEN_WHILE,
+ TOKEN_UNTIL,
+ TOKEN_DO,
+ TOKEN_FOR,
+ TOKEN_DONE,
+ TOKEN_AND,
+ TOKEN_OR,
+ TOKEN_ESCAPE,
+ TOKEN_ASS_WORD,
+ TOKEN_DOUBLEQUOTE,
+ TOKEN_DOLLAR,
+ TOKEN_IN,
+ TOKEN_IONUMBER,
+
+ // STEP 3
+ TOKEN_PAR_RIGHT,
+ TOKEN_PAR_LEFT,
+ TOKEN_CURLY_RIGHT,
+ TOKEN_CURLY_LEFT,
+
+ // STEP 4
+};
+
+struct token
+{
+ enum token_type type;
+ struct string *value;
+};
+
+#endif /* ! TOKEN_H */
diff --git a/42sh/src/lexer/utils.h b/42sh/src/lexer/utils.h
new file mode 100644
index 0000000..3edd83f
--- /dev/null
+++ b/42sh/src/lexer/utils.h
@@ -0,0 +1,23 @@
+#ifndef UTILS_H
+#define UTILS_H
+
+#include <lexer/token.h>
+
+struct reserved_word
+{
+ const char *word;
+ enum token_type type;
+};
+
+static struct reserved_word reserved_words[] = {
+ { "if", TOKEN_IF }, { "then", TOKEN_THEN },
+ { "elif", TOKEN_ELIF }, { "else", TOKEN_ELSE },
+ { "fi", TOKEN_FI }, { "while", TOKEN_WHILE },
+ { "until", TOKEN_UNTIL }, { "do", TOKEN_DO },
+ { "done", TOKEN_DONE }, { "for", TOKEN_FOR },
+ { "in", TOKEN_IN }, { "!", TOKEN_NEG },
+ { "}", TOKEN_CURLY_RIGHT }, { "{", TOKEN_CURLY_LEFT },
+ { NULL, TOKEN_ERROR }
+};
+
+#endif /* ! UTILS_H */