[RFC PATCH 21/28] cli: lil: Add a distinct parsing step

Sean Anderson seanga2 at gmail.com
Thu Jul 1 08:16:04 CEST 2021


This adds a parser to LIL (as separate from the interpreter). This is
necessary to detect syntax errors before evaluating anything. Before this,
running a script like

	echo some message; echo syntax error}

would result in "some message" being printed before the error was
discovered. This is not only rather surprising, but also makes things like
Hush's secondary prompt impossible to implement. In addition, the original
parser would accept almost any input, and silently return NULL if it
encountered problems. This made it difficult to determine if a command had
been mis-parsed, since an empty command would just evaluate to "".

The grammar is not the same as LIL originally. Several ideas have been
taken from TCL proper as well. In order to simplify the parser, it has been
rewritten to be LL(1), except for line continuations which are LL(2). In
particular, multi-line comments and command/variable subtitutions partially
through unquoted words (e.g. a$b) have been removed. Some other characters
such as unescaped, unmatched }s are now syntax errors. On the other hand,
some things such as escaped characters in unquoted words have been added
back (as seen in TCL). Unlike TCL, comments may be placed almost anywhere.
The exact grammar is subject to change, but I have tried to make it as sane
as I can get it.

The grammar has been documented in (extended) EBNF. The names of the
nonterminals are the same as are used in the dodekalogue [1]. Each
nonterminal foo has a function parse_foo() which recognizes it.

[1] https://www.tcl.tk/man/tcl8.6/TclCmd/Tcl.htm

Signed-off-by: Sean Anderson <seanga2 at gmail.com>
---

 cmd/Kconfig       |    4 +-
 common/cli.c      |    2 +-
 common/cli_lil.c  | 1880 ++++++++++++++++++++++++++++++++++++---------
 include/cli_lil.h |   11 +-
 test/cmd/lil.c    |   73 +-
 5 files changed, 1527 insertions(+), 443 deletions(-)

diff --git a/cmd/Kconfig b/cmd/Kconfig
index bba72bbdc2..7ff8e4a7e5 100644
--- a/cmd/Kconfig
+++ b/cmd/Kconfig
@@ -43,8 +43,8 @@ if LIL
 config LIL_FULL
 	bool "Enable all LIL features"
 	help
-	  This enables all LIL builtin functions, as well as expression support
-	  for arithmetic and bitwise operations.
+	  This enables all LIL builtin functions, expression support for
+	  arithmetic and bitwise operations, and expanded error messages.
 
 config LIL_POOLS
 	bool "Use memory pools for LIL structures"
diff --git a/common/cli.c b/common/cli.c
index ad5d76d563..391fee0ec7 100644
--- a/common/cli.c
+++ b/common/cli.c
@@ -49,7 +49,7 @@ static const struct lil_callbacks env_callbacks = {
 static int lil_run(const char *cmd)
 {
 	int err;
-	struct lil_value *result = lil_parse(lil, cmd, 0, 0);
+	struct lil_value *result = lil_parse_eval(lil, cmd, 0, true);
 	const char *err_msg, *strres = lil_to_string(result);
 
 	/* The result may be very big, so use puts */
diff --git a/common/cli_lil.c b/common/cli_lil.c
index 06fd37c383..2ed96ebc2d 100644
--- a/common/cli_lil.c
+++ b/common/cli_lil.c
@@ -16,6 +16,7 @@
 #include <stdlib.h>
 #include <stdio.h>
 #include <string.h>
+#include <linux/err.h>
 
 #define HASHMAP_CELLS 256
 #define HASHMAP_CELLMASK 0xFF
@@ -121,14 +122,27 @@ struct lil_list {
 };
 
 /**
- * struct lil_symbol - A symbol parsed by the parser
+ * enum lil_symbol_type - The type of data in a symbol
  * @LIL_SYMBOL_VALUE: A plain old string and length
  * @LIL_SYMBOL_VARIABLE: A name of a variable to be substituted
  * @LIL_SYMBOL_LIST: A list of symbols
- * @value: A literal value or name of variable
- * @list: A list of commands in the script
- * @word: Another word to be evaluated
- * @type: The type of word
+ * @LIL_SYMBOL_COMMAND: A command to be ran
+ * @LIL_SYMBOL_SCRIPT: A script to be run
+ */
+enum lil_symbol_type {
+	LIL_SYMBOL_VALUE = 0,
+	LIL_SYMBOL_LIST,
+	LIL_SYMBOL_VARIABLE,
+	LIL_SYMBOL_COMMAND,
+	LIL_SYMBOL_SCRIPT,
+};
+
+/**
+ * struct lil_symbol - A symbol parsed by the parser
+ * @value: A literal value
+ * @list: A list of commands, words, or symbols
+ * @symbol: Another symbol to be evaluated
+ * @type: The type of symbol
  */
 struct lil_symbol {
 	union {
@@ -136,13 +150,7 @@ struct lil_symbol {
 		struct lil_symbol *symbol;
 		struct lil_list list;
 	};
-	enum {
-		LIL_SYMBOL_VALUE = 0,
-		LIL_SYMBOL_LIST,
-		LIL_SYMBOL_VARIABLE,
-		LIL_SYMBOL_COMMAND,
-		LIL_SYMBOL_SCRIPT,
-	} type;
+	enum lil_symbol_type type;
 };
 
 /**
@@ -162,14 +170,64 @@ struct lil_func {
 	lil_func_proc_t proc;
 };
 
+/**
+ * struct lil_position - A position within a script
+ * @head: The absolute offset
+ * @line: The current line (as delineated by newlines)
+ * @column: The column within the current line
+ */
+struct lil_position {
+	size_t head;
+	size_t line;
+	size_t column;
+};
+
+/**
+ * struct lil_parser_error - Errors encountered while parsing a script
+ * @PERR_NONE: There is no error.
+ * @PERR_OOM: We ran out of memory.
+ * @PERR_EXPECTED: The parser was expecting a specific character but got
+ *                 something else or ran out of input.
+ * @PERR_UNEXPECTED: The parser encountered an unexpected character or ran out
+ *                   of input.
+ * @type: The type of error
+ * @func: The name of the function which caused the error
+ * @expected: The character we expected to find
+ * @matching: The position of the character which made us expect @expected
+ *
+ * @expected is only valid when @type is %PERR_EXPECTED. @matching is valid when
+ * @type is %PERR_EXPECTED or when @type is %PERR_UNEXPECTED and the parser is
+ * at the end of the file.
+ */
+struct lil_parser_error {
+	struct lil_position matching;
+	const char *func;
+	enum {
+		PERR_NONE = 0,
+		PERR_OOM,
+		PERR_EXPECTED,
+		PERR_UNEXPECTED,
+	} type;
+	char expected;
+};
+
+/**
+ * struct lil_parser - State used when parsing a script
+ * @code: The script which is being parsed
+ * @len: The length of the script
+ * @pos: Our current position within @code
+ * @err: The current error (if any)
+ */
+struct lil_parser {
+	const char *code;
+	size_t len;
+	size_t depth;
+	struct lil_position pos;
+	struct lil_parser_error err;
+};
+
 /**
  * struct lil - The current state of the interpreter
- * @code: The code which is being interpreted
- * @rootcode: The top-level code (e.g. the code for the initial call to
- *            lil_parse())
- * @clen: The length of @code
- * @head: The first uninterpreted part of this code, as an index of @code
- * @ignoreeol: Whether to treat newlines as whitespace or command terminators
  * @cmd: A list of the current commands
  * @cmds: The number of commands in @cmd
  * @cmdmap: A hash map mapping command names to pointers to commands in @cmd
@@ -185,14 +243,9 @@ struct lil_func {
  * @error: The current error status
  * @err_head: The offset in @code which caused the @error
  * @err_msg: An optional string describing the current @error
- * @parse_depth: The depth of recursive function calls
+ * @depth: The depth of recursive function calls
  */
 struct lil {
-	const char *code; /* need save on parse */
-	const char *rootcode;
-	size_t clen; /* need save on parse */
-	size_t head; /* need save on parse */
-	int ignoreeol;
 	struct lil_func **cmd;
 	size_t cmds;
 	struct hashmap cmdmap;
@@ -203,7 +256,7 @@ struct lil {
 	enum lil_error err;
 	char *err_msg;
 	struct lil_callbacks callbacks;
-	size_t parse_depth;
+	size_t depth;
 };
 
 /**
@@ -231,7 +284,6 @@ struct expreval {
 	} error;
 };
 
-static struct lil_value *next_word(struct lil *lil);
 static void register_stdcmds(struct lil *lil);
 static void lil_set_error(struct lil *lil, enum lil_error err, const char *msg);
 static void lil_set_errorf(struct lil *lil, enum lil_error err,
@@ -560,51 +612,6 @@ struct lil_value *lil_list_get(struct lil_list *list, size_t index)
 	return &sym->value;
 }
 
-static int needs_escape(const char *str)
-{
-	size_t i;
-
-	if (!str || !str[0])
-		return 1;
-
-	for (i = 0; str[i]; i++)
-		if (ispunct(str[i]) || isspace(str[i]))
-			return 1;
-
-	return 0;
-}
-
-struct lil_value *lil_list_to_value(struct lil_list *list, int do_escape)
-{
-	struct lil_value *val = alloc_value(NULL);
-	size_t i, j;
-
-	for (i = 0; i < list->c; i++) {
-		struct lil_value *item = lil_list_get(list, i);
-		int escape =
-			do_escape ? needs_escape(lil_to_string(item)) : 0;
-
-		if (i)
-			lil_append_char(val, ' ');
-
-		if (escape) {
-			lil_append_char(val, '{');
-			for (j = 0; j < item->l; j++) {
-				if (item->d[j] == '{')
-					lil_append_string(val, "}\"\\o\"{");
-				else if (item->d[j] == '}')
-					lil_append_string(val, "}\"\\c\"{");
-				else
-					lil_append_char(val, item->d[j]);
-			}
-			lil_append_char(val, '}');
-		} else {
-			lil_append_val(val, item);
-		}
-	}
-	return val;
-}
-
 struct lil_env *lil_alloc_env(struct lil_env *parent)
 {
 	struct lil_env *env;
@@ -888,275 +895,1167 @@ struct lil *lil_new(const struct lil_callbacks *callbacks)
 	return lil;
 }
 
-static int islilspecial(char ch)
+/**
+ * DOC: Syntax
+ *
+ * Syntax is EBNF, except that [], {}, ?, *, and + have been borrowed from
+ * regular expressions and , is optional. In addition, a - b matches strings
+ * which matches a but which do not match b (set difference).
+ *
+ * ::
+ *
+ *  script = whitespace* command? ( terminator whitespace* command? )* ;
+ *
+ *  command = word ( ( whitespace - continuation ) whitespace* word )*
+ *            whitespace* ;
+ *
+ *  word = single-quote | double-quote | brace | bracket | dollar
+ *       | ( escape-sequence | continuation |
+ *           ( character - word-special - space ) )+ ;
+ *
+ *  single-quote    = "'" ( subcommand | dollar | escape-sequence | continuation
+ *                        | ( character - "'" ) )* "'" ;
+ *  double-quote    = '"' ( subcommand | dollar | escape-sequence | continuation
+ *                        | ( character - '"' ) )* '"' ;
+ *  escape-sequence = '\\' ( character - '\n' ) ;
+ *  brace           = '{' ( brace | '\\' character | comment
+ *                        | ( character - '}' - '#' ) )* '}' ;
+ *  bracket         = '[' script ']' ;
+ *  dollar          = '$' word ;
+ *
+ *  whitespace   = space | continuation ;
+ *  continuation = '\\\n' ;
+ *  terminator   = '\n' | ';' | comment ;
+ *  comment      = '#' ( character - '\n' )* '\n' ;
+ *
+ *  space        = ' ' | '\f' | '\r' | '\t' | '\v' ;
+ *  word-special = '$' | '{' | '}' | '[' | ']' | '"' | "'" | '\\' |
+ *               | terminator ;
+ *  character    = [\x00-\xff] ;
+ *
+ * In addition to the full syntax above, many commands expect arguments
+ * formatted as lists. This syntax is similar to the above, with the following
+ * exceptions:
+ *
+ * - Lists are words separated by whitespace.
+ * - Neither ``$`` nor ``[`` substitutions are performed.
+ * - ``#`` and ``;`` have no special meaning.
+ * - ``\n`` is considered whitespace.
+ *
+ * ::
+ *
+ *  list = space* word? ( space+ word )* space* ;
+ *
+ *  word = single-quote | double-quote | brace
+ *       | ( escape-sequence | continuation | ( character - space ) )+ ;
+ *
+ *  single-quote    = "'" ( escape-sequence | continuation
+ *                        | ( character - "'" ) )* "'" ;
+ *  double-quote    = '"' ( escape-sequence | continuation
+ *                        | ( character - '"' ) )* '"' ;
+ *  escape-sequence = '\\' ( character - '\n' ) ;
+ *  brace           = '{' ( brace | '\\' character | ( character - '}' ) )* '}' ;
+ *
+ *  continuation    = '\\\n' ;
+ *  space           = ' ' | '\f' | '\n' | '\r' | '\t' | '\v' ;
+ *  character       = [\x00-\xff] ;
+ *
+ * Because of the similarity of these grammars, they may be parsed using the
+ * same functions. Where differences occur, they are selected by a boolean
+ * parameter.
+ *
+ * In general, each parse function must determine two things: what symbol to try
+ * and parse when there are multiple possible choices, and where the end of the
+ * symbol is. To choose from symbols, we consider the FIRST set of the possible
+ * symbols. The FIRST set is the set of terminals which may begin a symbol. For
+ * example, in the grammar
+ *
+ * ::
+ *
+ *  number   = sign? integer fraction? ;
+ *  integer  = digit+ ;
+ *  fraction = '.' digit+ ;
+ *  sign     = '+' | '-' ;
+ *  digit    = [0-9] ;
+ *
+ * the FIRST sets are
+ *
+ * ::
+ *
+ *  FIRST(sign)     = '+' | '-' ;
+ *  FIRST(digit)    = [0-9] ;
+ *  FIRST(fraction) = '.' ;
+ *  FIRST(integer)  = FIRST(digit) ;
+ *                  = [0-9] ;
+ *  FIRST(number)   = FIRST(sign) | FIRST(integer) ;
+ *                  = '+' | '-' | [0-9] ;
+ *
+ * A parser, when deciding whether to parse a sign or an integer, may observe
+ * whether the number begins with FIRST(sign) or FIRST(integer). To prevent
+ * backtracking, the FIRST sets of all symbols which must be picked from must
+ * be disjoint. When this is not the case (like for escape-sequence and
+ * continuation), the analogous SECOND set may be used.
+ *
+ * The FOLLOW set of a symbol is the union of all FIRST sets which may come
+ * after it. For the above grammar, the FOLLOW sets are
+ *
+ * ::
+ *
+ *  FOLLOW(number)   = ;
+ *  FOLLOW(sign)     = FIRST(integer) ;
+ *                   = [0-9] ;
+ *  FOLLOW(integer)  = FIRST(fraction) | ;
+ *                   = '.' | ;
+ *  FOLLOW(fraction) = ;
+ *  FOLLOW(digit)    = FOLLOW(integer) | FOLLOW(fraction) ;
+ *                   = '.' | ;
+ *
+ * The parser, when deciding whether it is done parsing an integer, may consider
+ * whether the current character matches FOLLOW(integer). To prevent
+ * backtracking, the FOLLOW sets of each symbol must not contain characters
+ * which may be present at the end of the symbol. In general, the FOLLOW set of
+ * a symbol is interesting if the symbol has a trailing repeating portion which
+ * is ended only by the next symbol.
+ */
+
+static struct lil_symbol *parse_word(struct lil_parser *p, bool islist);
+static struct lil_list *parse_script(struct lil_parser *p);
+
+/**
+ * eof() - Whether we have reached the end of input
+ * @p: The parser
+ *
+ * Return: %true if there are no more characters left
+ */
+static bool eof(struct lil_parser *p)
 {
-	return ch == '$' || ch == '{' || ch == '}' || ch == '[' || ch == ']' ||
-	       ch == '"' || ch == '\'' || ch == ';';
+	return p->pos.head >= p->len;
 }
 
-static int eolchar(char ch)
+/**
+ * peek() - Peek at the next character to parse
+ * @p: The parser
+ *
+ * eof() for @p must be %false.
+ *
+ * Return: The character which would be returned by pop().
+ */
+static char peek(struct lil_parser *p)
 {
-	return ch == '\n' || ch == '\r' || ch == ';';
+	return p->code[p->pos.head];
 }
 
-static int ateol(struct lil *lil)
+/**
+ * peek2() - Peek two characters ahead
+ * @p: The parser
+ *
+ * NB: Unlike peek(), peek2() checks for eof().
+ *
+ * Return: The character which would be returned by pop()ing twice, or %-1 if no
+ * such character is present (due to the end of input).
+ */
+static char peek2(struct lil_parser *p)
 {
-	return !(lil->ignoreeol) && eolchar(lil->code[lil->head]);
+	return p->pos.head + 1 < p->len ? p->code[p->pos.head + 1] : -1;
 }
 
-static void lil_skip_spaces(struct lil *lil)
+/**
+ * pop() - Advance the parser by one character
+ * @p: The parser
+ *
+ * Return: The character which is next in the input
+ */
+static char pop(struct lil_parser *p)
 {
-	while (lil->head < lil->clen) {
-		if (lil->code[lil->head] == '#') {
-			if (lil->code[lil->head + 1] == '#' &&
-			    lil->code[lil->head + 2] != '#') {
-				lil->head += 2;
-				while (lil->head < lil->clen) {
-					if ((lil->code[lil->head] == '#') &&
-					    (lil->code[lil->head + 1] == '#') &&
-					    (lil->code[lil->head + 2] != '#')) {
-						lil->head += 2;
-						break;
-					}
-					lil->head++;
-				}
-			} else {
-				while (lil->head < lil->clen &&
-				       !eolchar(lil->code[lil->head]))
-					lil->head++;
+	char ret = p->code[p->pos.head++];
+
+#if IS_ENABLED(CONFIG_LIL_FULL)
+	p->pos.column++;
+	if (ret == '\n') {
+		p->pos.line++;
+		p->pos.column = 1;
+	}
+#endif
+	return ret;
+}
+
+#if IS_ENABLED(CONFIG_LIL_DEBUG)
+#define set_err_func(p, _func) (p)->err.func = _func
+#else
+#define set_err_func(p, _func)
+#endif
+
+#define err_oom(p) do { \
+	(p)->err.type = PERR_OOM; \
+	set_err_func(p, __func__); \
+} while (0)
+
+#define err_unexpected_1(p) do { \
+	assert(!eof(p)); \
+	(p)->err.type = PERR_UNEXPECTED; \
+	set_err_func(p, __func__); \
+} while (0)
+
+#if IS_ENABLED(CONFIG_LIL_FULL)
+#define err_expected(p, c, _matching) do { \
+	(p)->err.type = PERR_EXPECTED; \
+	set_err_func(p, __func__); \
+	(p)->err.expected = (c); \
+	(p)->err.matching = (_matching); \
+} while (0)
+
+#define err_unexpected_2(p, _matching) do { \
+	assert(eof(p)); \
+	(p)->err.type = PERR_UNEXPECTED; \
+	set_err_func(p, __func__); \
+	(p)->err.matching = (_matching); \
+} while (0)
+#else /* CONFIG_LIL_FULL */
+#define UNUSED(x) (void)(x)
+
+#define err_expected(p, c, _matching) do { \
+	(p)->err.type = PERR_EXPECTED; \
+	set_err_func(p, __func__); \
+	UNUSED(_matching); \
+} while (0)
+
+#define err_unexpected_2(p, _matching) do { \
+	assert(eof(p)); \
+	(p)->err.type = PERR_UNEXPECTED; \
+	set_err_func(p, __func__); \
+	UNUSED(_matching); \
+} while (0)
+#endif
+
+#define err_unexpected(p, ...) \
+	__concat(err_unexpected_, __count_args(p, ##__VA_ARGS__)) (p, ##__VA_ARGS__)
+
+/**
+ * expect() - Expect a specific character next
+ * @p: The parser
+ * @sym: The symbol to return on success; free()'d on error
+ * @expected: The character to expect
+ * @matching: The position of the thing we expected to match
+ *
+ * Return: @w, or %NULL on error
+ */
+static struct lil_symbol *_expect(struct lil_parser *p, struct lil_symbol *sym,
+				  char expected, struct lil_position *matching,
+				  const char *func)
+{
+	char got;
+
+	if (!sym)
+		return sym;
+
+	if (eof(p))
+		goto err;
+
+	got = pop(p);
+	if (got != expected)
+		goto err;
+	return sym;
+
+err:
+	lil_free_symbol(sym);
+	err_expected(p, expected, *matching);
+	set_err_func(p, func);
+	return NULL;
+}
+
+#define expect(p, sym, expected, matching) \
+	_expect(p, sym, expected, matching, __func__)
+
+#define CASE_SPACE \
+	case '\f': \
+	case '\r': \
+	case '\t': \
+	case '\v': \
+	case ' ' \
+
+#define CASE_WHITESPACE \
+	CASE_SPACE: \
+	case '\\'
+
+#define CASE_TERMINATOR \
+	case '\n': \
+	case ';': \
+	case '#'
+
+/**
+ * parse_continuation() - Parse a line continuation
+ * @p: The parser
+ *
+ * FIRST(continuation) = '\\' ;
+ * SECOND(continuation) = '\n' ;
+ */
+static void parse_continuation(struct lil_parser *p)
+{
+	assert(pop(p) == '\\');
+	assert(pop(p) == '\n');
+}
+
+/**
+ * parse_whitespace() - Parse a single unit of whitespace, if present
+ * @p: The parser
+ *
+ * FIRST(whitespace) = space | '\\' ;
+ */
+static void parse_whitespace(struct lil_parser *p)
+{
+	switch (peek(p)) {
+	CASE_SPACE:
+		pop(p);
+		return;
+	case '\\':
+		parse_continuation(p);
+		return;
+	default:
+		return;
+	}
+}
+
+/**
+ * parse_dollar() - Parse a variable reference
+ * @p: The parser
+ *
+ * FIRST(variable) = '$' ;
+ * FOLLOW(variable) = FOLLOW(word) ;
+ *
+ * Return: A symbol containing the name of the variable, or %NULL on error
+ */
+static struct lil_symbol *parse_dollar(struct lil_parser *p)
+{
+	struct lil_symbol *sym = calloc(1, sizeof(struct lil_symbol));
+
+	if (!sym) {
+		err_oom(p);
+		return NULL;
+	}
+
+	assert(pop(p) == '$');
+	sym->type = LIL_SYMBOL_VARIABLE;
+	sym->symbol = parse_word(p, false);
+	if (!sym->symbol) {
+		lil_free_symbol(sym);
+		return NULL;
+	}
+	return sym;
+}
+
+/**
+ * parse_bracket() - Parse a subscript enclosed in brackets
+ * @p: The parser
+ *
+ * FIRST(bracket) = '[' ;
+ * FOLLOW(bracket) = FOLLOW(word) ;
+ *
+ * Return: A symbol containing the script, or %NULL on error
+ */
+static struct lil_symbol *parse_bracket(struct lil_parser *p)
+{
+	struct lil_position savepos = p->pos;
+
+	assert(pop(p) == '[');
+	return expect(p, list_to_symbol(parse_script(p)), ']', &savepos);
+}
+
+/**
+ * parse_comment() - Parse a comment
+ * @p: The parser
+ *
+ * FIRST(comment) = '#' ;
+ */
+static void parse_comment(struct lil_parser *p)
+{
+	assert(pop(p) == '#');
+	while (!eof(p)) {
+		switch (pop(p)) {
+		case '\n':
+			return;
+		}
+	}
+}
+
+/**
+ * parse_brace() - Parse a value enclosed in braces
+ * @p: The parser
+ * @islist: If we are parsing a list
+ *
+ * This function is used to parse braces in scripts and lists. If we are parsing
+ * a list, then comments are not parsed.
+ *
+ * FIRST(brace) = '{' ;
+ * FOLLOW(brace) = FOLLOW(word) ;
+ *
+ * Return: A symbol containing a value, or %NULL on error
+ */
+static struct lil_symbol *parse_brace(struct lil_parser *p, bool islist)
+{
+	struct lil_value *val = alloc_value(NULL);
+	struct lil_position savepos = p->pos;
+
+	if (!val)
+		goto oom;
+
+	assert(pop(p) == '{');
+	while (!eof(p)) {
+		switch (peek(p)) {
+		case '{': {
+			bool fail;
+			struct lil_symbol *brace = parse_brace(p, islist);
+
+			if (!brace)
+				goto err;
+
+			fail = lil_append_char(val, '{') ||
+			       lil_append_val(val, &brace->value) ||
+			       lil_append_char(val, '}');
+
+			lil_free_symbol(brace);
+			if (fail)
+				goto oom;
+			break;
+		}
+		case '#':
+			if (islist)
+				goto character;
+			parse_comment(p);
+			break;
+		case '\\': {
+			struct lil_position escapepos = p->pos;
+
+			if (lil_append_char(val, pop(p)))
+				goto oom;
+
+			if (eof(p)) {
+				err_unexpected(p, escapepos);
+				goto err;
 			}
-		} else if (lil->code[lil->head] == '\\' &&
-			   eolchar(lil->code[lil->head + 1])) {
-			lil->head++;
-			while (lil->head < lil->clen &&
-			       eolchar(lil->code[lil->head]))
-				lil->head++;
-		} else if (eolchar(lil->code[lil->head])) {
-			if (lil->ignoreeol)
-				lil->head++;
+		}
+		fallthrough;
+		default:
+character:
+			if (lil_append_char(val, pop(p)))
+				goto oom;
+			break;
+		case '}':
+			pop(p);
+			return value_to_symbol(val);
+		}
+	}
+	err_expected(p, '}', savepos);
+	goto err;
+
+oom:
+	err_oom(p);
+err:
+	lil_free_value(val);
+	return NULL;
+}
+
+/**
+ * parse_escape() - Parse an escape sequence
+ * @p: The parser
+ *
+ * FIRST(escape) = '\\' ;
+ * SECOND(escape) = character - '\n' ;
+ *
+ * Return: The character parsed. If there was an error, then @p->err.type will
+ * be set.
+ */
+static char parse_escape(struct lil_parser *p)
+{
+	char c;
+	struct lil_position savepos = p->pos;
+
+	assert(pop(p) == '\\');
+	if (eof(p)) {
+		err_unexpected(p, savepos);
+		return -1;
+	}
+
+	c = pop(p);
+	switch (c) {
+	case 'a':
+		return '\a';
+	case 'b':
+		return '\b';
+	case 'f':
+		return '\f';
+	case 'n':
+		return '\n';
+	case 't':
+		return '\t';
+	case 'r':
+		return '\r';
+	case 'v':
+		return '\v';
+	case '\n':
+		assert(0);
+		fallthrough;
+	default:
+		return c;
+	}
+}
+
+/**
+ * parse_quote() - Parse a value in quotes
+ * @p: The parser
+ * @q: The quote character, either ``'`` or ``"``. The special value %-1 may
+ *     also be used to specify that no enclosing quotes are expected.
+ * @islist: If we are parsing a list
+ *
+ * This function is used both for parsing scripts and lists. When used in
+ * scripts, we parse a list of symbols which must be later evaluated and
+ * concatenated. When parsing a list, we just parse a value because lists do not
+ * contain $ or [ substitutions.
+ *
+ * When @q is %-1, then no enclosing quotes are parsed. This may be used to
+ * parse a string into a form which may have substitutions performed on it.
+ *
+ * FIRST(quote) = "'" | '"' ;
+ * FOLLOW(quote) = FOLLOW(word) ;
+ *
+ * Return: A list of symbols, a value (if @islist), or %NULL on error.
+ */
+static struct lil_symbol *parse_quote(struct lil_parser *p, char q, bool islist)
+{
+	struct lil_position savepos = p->pos;
+	struct lil_list *list = islist ? NULL : lil_alloc_list();
+	struct lil_value *val = alloc_value(NULL);
+
+	if ((!islist && !list) || !val)
+		goto oom;
+
+	if (q != -1) {
+		assert(q == '\'' || q == '"');
+		assert(pop(p) == q);
+	}
+
+	while (!eof(p)) {
+		char c = peek(p);
+
+		switch (c) {
+		case '$':
+		case '[': {
+			struct lil_symbol *sym;
+
+			if (islist)
+				goto character;
+
+			if (val->l) {
+				if (lil_list_append(list, val))
+					goto oom;
+				val = alloc_value(NULL);
+				if (!val)
+					goto oom;
+			}
+
+			if (c == '[')
+				sym = parse_bracket(p);
 			else
-				break;
-		} else if (isspace(lil->code[lil->head])) {
-			lil->head++;
-		} else {
+				sym = parse_dollar(p);
+
+			if (!sym)
+				goto err;
+
+			if (lil_list_append(list, sym))
+				goto oom;
+
+			break;
+		}
+		case '\'':
+		case '"':
+			if (c == q) {
+				pop(p);
+				goto out;
+			}
+			goto character;
+		case '\\':
+			if (peek2(p) == '\n') {
+				parse_continuation(p);
+				continue;
+			}
+
+			c = parse_escape(p);
+			if (p->err.type)
+				goto err;
+			goto character_post_pop;
+		default:
+character:
+			c = pop(p);
+character_post_pop:
+			if (lil_append_char(val, c))
+				goto oom;
 			break;
 		}
 	}
+
+	if (q == -1)
+		goto out;
+	err_expected(p, q, savepos);
+	goto err;
+
+out:
+	if (islist)
+		return value_to_symbol(val);
+	else if (lil_list_append(list, val))
+		goto oom;
+	return list_to_symbol(list);
+
+oom:
+	err_oom(p);
+err:
+	lil_free_list(list);
+	lil_free_value(val);
+	return NULL;
 }
 
-static struct lil_value *get_bracketpart(struct lil *lil)
+/**
+ * parse_word() - Parse a word
+ * @p: The parser
+ *
+ * This function used to parse words for both scripts and lists. For lists, $
+ * and [ subtitution is not performed. In addition, there are less illegal
+ * characters (since we no longer need to worry about some cases of nesting).
+ * Because of this, the FIRST and FOLLOW sets for parsing scripts are:
+ *
+ * FIRST(word) = character - ']' - '\\' - '}' - terminator - FIRST(whitespace) ;
+ * FOLLOW(word) = ''' | '"' | ']' | FIRST(terminator) | space | ;
+ *
+ * and the sets when parsing lists are:
+ *
+ * FIRST(word) = character - space ;
+ * FOLLOW(word) = space | ;
+ *
+ * Return: A symbol for one word, or %NULL on error. If we are parsing a list,
+ *         this symbol will always have type %LIL_SYMBOL_VALUE.
+ */
+static struct lil_symbol *parse_word(struct lil_parser *p, bool islist)
 {
-	size_t cnt = 1;
-	struct lil_value *val, *cmd = alloc_value(NULL);
-	int save_eol = lil->ignoreeol;
+	struct lil_value *word;
 
-	lil->ignoreeol = 0;
-	lil->head++;
-	while (lil->head < lil->clen) {
-		if (lil->code[lil->head] == '[') {
-			lil->head++;
-			cnt++;
-			lil_append_char(cmd, '[');
-		} else if (lil->code[lil->head] == ']') {
-			lil->head++;
-			if (--cnt == 0)
-				break;
-			else
-				lil_append_char(cmd, ']');
-		} else {
-			lil_append_char(cmd, lil->code[lil->head++]);
-		}
+	switch (peek(p)) {
+	case '\'':
+	case '"':
+		return parse_quote(p, peek(p), islist);
+	case '{':
+		return parse_brace(p, islist);
+	case '[':
+		if (islist)
+			break;
+		return parse_bracket(p);
+	case '$':
+		if (islist)
+			break;
+		return parse_dollar(p);
+	case '\\':
+		if (peek2(p) == '\n')
+			goto terminator;
+		break;
+	case ']':
+	case '}':
+	case ';':
+	case '#':
+		if (islist)
+			break;
+		fallthrough;
+	case '\n':
+	CASE_SPACE:
+terminator:
+		err_unexpected(p);
+		return NULL;
 	}
 
-	val = lil_parse_value(lil, cmd, 0);
-	lil_free_value(cmd);
-	lil->ignoreeol = save_eol;
-	return val;
-}
+	word = alloc_value(NULL);
+	if (!word)
+		goto oom;
 
-static struct lil_value *get_dollarpart(struct lil *lil)
-{
-	struct lil_value *val, *name, *tmp;
+	do {
+		char c = peek(p);
 
-	lil->head++;
-	name = next_word(lil);
-	tmp = alloc_value("set ");
-	lil_append_val(tmp, name);
-	lil_free_value(name);
-
-	val = lil_parse_value(lil, tmp, 0);
-	lil_free_value(tmp);
-	return val;
-}
-
-static struct lil_value *next_word(struct lil *lil)
-{
-	struct lil_value *val;
-	size_t start;
-
-	lil_skip_spaces(lil);
-	if (lil->code[lil->head] == '$') {
-		val = get_dollarpart(lil);
-	} else if (lil->code[lil->head] == '{') {
-		size_t cnt = 1;
-
-		lil->head++;
-		val = alloc_value(NULL);
-		while (lil->head < lil->clen) {
-			if (lil->code[lil->head] == '{') {
-				lil->head++;
-				cnt++;
-				lil_append_char(val, '{');
-			} else if (lil->code[lil->head] == '}') {
-				lil->head++;
-				if (--cnt == 0)
-					break;
-				else
-					lil_append_char(val, '}');
-			} else {
-				lil_append_char(val, lil->code[lil->head++]);
+		switch (c) {
+		case ']':
+		case '\'':
+		case '"':
+		case ';':
+		case '#':
+			if (islist)
+				goto character;
+			fallthrough;
+		case '\n':
+		CASE_SPACE:
+			return value_to_symbol(word);
+		case '{':
+		case '}':
+		case '[':
+		case '$':
+			if (islist)
+				goto character;
+			err_unexpected(p);
+			return NULL;
+		case '\\':
+			if (peek2(p) == '\n') {
+				parse_continuation(p);
+				continue;
 			}
+
+			c = parse_escape(p);
+			if (p->err.type)
+				goto err;
+			goto character_post_pop;
+		default:
+character:
+			c = pop(p);
+character_post_pop:
+			if (lil_append_char(word, c))
+				goto oom;
 		}
-	} else if (lil->code[lil->head] == '[') {
-		val = get_bracketpart(lil);
-	} else if (lil->code[lil->head] == '"' ||
-		   lil->code[lil->head] == '\'') {
-		char sc = lil->code[lil->head++];
+	} while (!eof(p));
 
-		val = alloc_value(NULL);
-		while (lil->head < lil->clen) {
-			if (lil->code[lil->head] == '[' ||
-			    lil->code[lil->head] == '$') {
-				struct lil_value *tmp =
-					lil->code[lil->head] == '$' ?
-						      get_dollarpart(lil) :
-						      get_bracketpart(lil);
+	return value_to_symbol(word);
 
-				lil_append_val(val, tmp);
-				lil_free_value(tmp);
-				lil->head--; /* avoid skipping the char below */
-			} else if (lil->code[lil->head] == '\\') {
-				lil->head++;
-				switch (lil->code[lil->head]) {
-				case 'b':
-					lil_append_char(val, '\b');
-					break;
-				case 't':
-					lil_append_char(val, '\t');
-					break;
-				case 'n':
-					lil_append_char(val, '\n');
-					break;
-				case 'v':
-					lil_append_char(val, '\v');
-					break;
-				case 'f':
-					lil_append_char(val, '\f');
-					break;
-				case 'r':
-					lil_append_char(val, '\r');
-					break;
-				case '0':
-					lil_append_char(val, 0);
-					break;
-				case 'a':
-					lil_append_char(val, '\a');
-					break;
-				case 'c':
-					lil_append_char(val, '}');
-					break;
-				case 'o':
-					lil_append_char(val, '{');
-					break;
-				default:
-					lil_append_char(val,
-							lil->code[lil->head]);
-					break;
+oom:
+	err_oom(p);
+err:
+	lil_free_value(word);
+	return NULL;
+}
+
+/**
+ * parse_command() - Parse a command
+ * @p: The parser
+ *
+ * FIRST(command) = FIRST(word) ;
+ * FOLLOW(command) = ']' | FIRST(terminator) | ;
+ *
+ * Return: A list of words which compose the command, or %NULL on error
+ */
+static struct lil_list *parse_command(struct lil_parser *p)
+{
+	struct lil_symbol *word;
+	struct lil_list *command = lil_alloc_list();
+
+	if (!command)
+		goto oom;
+	list_to_symbol(command)->type = LIL_SYMBOL_COMMAND;
+
+	do {
+		word = parse_word(p, false);
+		if (!word)
+			goto err;
+
+		if (lil_list_append(command, word))
+			goto oom;
+
+		if (eof(p))
+			return command;
+
+		switch (peek(p)) {
+		CASE_WHITESPACE:
+			do {
+				switch (peek(p)) {
+				case '\\':
+					if (peek2(p) != '\n')
+						break;
+					fallthrough;
+				CASE_SPACE:
+					parse_whitespace(p);
+					continue;
+				case ']':
+				CASE_TERMINATOR:
+					return command;
 				}
-			} else if (lil->code[lil->head] == sc) {
-				lil->head++;
+
 				break;
-			} else {
-				lil_append_char(val, lil->code[lil->head]);
-			}
-			lil->head++;
+			} while (!eof(p));
+
+			continue;
+		case ']':
+		CASE_TERMINATOR:
+			return command;
 		}
-	} else {
-		start = lil->head;
-		while (lil->head < lil->clen &&
-		       !isspace(lil->code[lil->head]) &&
-		       !islilspecial(lil->code[lil->head]))
-			lil->head++;
-		val = alloc_value_len(lil->code + start, lil->head - start);
-	}
-	return val ? val : alloc_value(NULL);
+
+		err_unexpected(p);
+		goto err;
+	} while (!eof(p));
+
+	return command;
+
+oom:
+	err_oom(p);
+err:
+	lil_free_list(command);
+	return NULL;
 }
 
-static struct lil_list *substitute(struct lil *lil)
+/**
+ * parse_terminator() - Parse the end of a command
+ * @p: The parser
+ *
+ * FIRST(terminator) = ';' | '\n' | '#' ;
+ */
+static void parse_terminator(struct lil_parser *p)
 {
-	struct lil_list *words = lil_alloc_list();
+	switch (peek(p)) {
+	case '\n':
+	case ';':
+		pop(p);
+		return;
+	case '#':
+		parse_comment(p);
+		return;
+	}
+	assert(0);
+}
 
-	lil_skip_spaces(lil);
-	while (lil->head < lil->clen && !ateol(lil) && !lil->err) {
-		struct lil_value *w = alloc_value(NULL);
+/**
+ * parse_script - Parse a script
+ *
+ * FIRST(script) = FIRST(whitespace) | FIRST(command) | FIRST(terminator) ;
+ * FOLLOW(script) = ']' | ;
+ *
+ * Return: A symbol containing a list of commands which compose the script, or
+ *         %NULL on error
+ */
+static struct lil_list *parse_script(struct lil_parser *p)
+{
+	struct lil_list *command;
+	struct lil_list *script = lil_alloc_list();
 
-		do {
-			size_t head = lil->head;
-			struct lil_value *wp = next_word(lil);
+	if (!script)
+		goto oom;
+	list_to_symbol(script)->type = LIL_SYMBOL_SCRIPT;
 
-			if (head ==
-			    lil->head) { /* something wrong, the parser can't proceed */
-				lil_free_value(w);
-				lil_free_value(wp);
-				lil_free_list(words);
-				return NULL;
+	do {
+		while (!eof(p)) {
+			switch (peek(p)) {
+			case '\\':
+				if (peek2(p) != '\n')
+					break;
+				fallthrough;
+			CASE_SPACE:
+				parse_whitespace(p);
+				continue;
 			}
+			break;
+		}
 
-			lil_append_val(w, wp);
-			lil_free_value(wp);
-		} while (lil->head < lil->clen &&
-			 !eolchar(lil->code[lil->head]) &&
-			 !isspace(lil->code[lil->head]) && !lil->err);
-		lil_skip_spaces(lil);
+		switch (peek(p)) {
+		case '\\':
+			if (peek2(p) != '\n')
+				break;
+			fallthrough;
+		CASE_SPACE:
+			err_unexpected(p);
+			goto err;
+		CASE_TERMINATOR:
+			parse_terminator(p);
+			continue;
+		case ']':
+			return script;
+		}
 
-		lil_list_append(words, w);
+		command = parse_command(p);
+		if (!command)
+			goto err;
+
+		if (lil_list_append(script, list_to_symbol(command)))
+			goto oom;
+	} while (!eof(p));
+	return script;
+
+oom:
+	err_oom(p);
+err:
+	lil_free_list(script);
+	return NULL;
+}
+
+#if IS_ENABLED(CONFIG_LIL_DEBUG)
+static void do_print_symbol(struct lil_symbol *sym, unsigned int level)
+{
+	unsigned int i;
+
+	for (i = 0; i < level; i++)
+		putc('\t');
+
+	switch (sym->type) {
+	case LIL_SYMBOL_VALUE:
+		puts(lil_to_string(&sym->value));
+		putc('\n');
+		break;
+	case LIL_SYMBOL_SCRIPT:
+		puts("<script>\n");
+		goto list;
+	case LIL_SYMBOL_COMMAND:
+		puts("<command>\n");
+		goto list;
+	case LIL_SYMBOL_LIST:
+		puts("<list>\n");
+list:
+		for (i = 0; i < sym->list.c; i++)
+			do_print_symbol(sym->list.v[i], level + 1);
+		break;
+	case LIL_SYMBOL_VARIABLE:
+		puts("<variable>\n");
+		do_print_symbol(sym->symbol, level + 1);
+		break;
+	default:
+		puts("<unknown>\n");
+		break;
+	}
+}
+
+/*
+ * Helper function for printing out symbols; insert where you would like to
+ * debug something
+ */
+static void __maybe_unused print_symbol(struct lil_symbol *sym)
+{
+	return do_print_symbol(sym, 0);
+}
+#endif
+
+/**
+ * lil_parser_init() - Initialize a parser
+ * @p: The parser to initialize
+ *
+ * This initializes the parser by setting @p->pos and @p->err.
+ */
+static void lil_parser_init(struct lil_parser *p)
+{
+	p->pos.head = 0;
+#if IS_ENABLED(CONFIG_LIL_FULL)
+	p->pos.line = 1;
+	p->pos.column = 1;
+#endif
+	p->err.type = PERR_NONE;
+}
+
+static void parser_set_error(struct lil *lil, struct lil_parser *p)
+{
+	switch (p->err.type) {
+	case PERR_OOM:
+		lil_set_error(lil, LIL_ERR_OOM,
+			      IS_ENABLED(CONFIG_LIL_DEBUG) ? p->err.func : NULL);
+		return;
+	case PERR_EXPECTED:
+	case PERR_UNEXPECTED: {
+		enum lil_error err;
+
+		if (eof(p))
+			err = LIL_ERR_EOF;
+		else
+			err = LIL_ERR_SYNTAX;
+
+		if (IS_ENABLED(CONFIG_LIL_FULL)) {
+			char fmt[] = "character '%c'";
+			char ubuf[sizeof(fmt)], buf[128];
+			char *unexpected = ubuf;
+			size_t pos = 0;
+
+			if (eof(p))
+				unexpected = "end of file";
+			else if (peek(p))
+				snprintf(ubuf, sizeof(ubuf), fmt, peek(p));
+			else
+				unexpected = "character '\\0'";
+
+#define format(fmt, ...) \
+	pos += snprintf(buf + pos, sizeof(buf) - pos, fmt, __VA_ARGS__)
+#define format_pos(pos) format("%zu:%zu", pos.line, pos.column)
+
+			format_pos(p->pos);
+			format(": unexpected %s", unexpected);
+			if (p->err.type == PERR_EXPECTED || eof(p)) {
+				format(" while parsing '%c' at ",
+				       p->code[p->err.matching.head]);
+				format_pos(p->err.matching);
+			}
+			if (p->err.type == PERR_EXPECTED)
+				format("; expected '%c'", p->err.expected);
+			if (IS_ENABLED(CONFIG_LIL_DEBUG))
+				format(" in %s", p->err.func);
+
+			lil_set_error(lil, err, buf);
+		} else {
+			lil_set_error(lil, err, "syntax error");
+		}
+		return;
+	}
+	case PERR_NONE:
+		return;
+	}
+	log_debug("unknown error %d\n", p->err.type);
+	assert(0);
+	lil_set_error(lil, LIL_ERR_CASE, NULL);
+}
+
+/**
+ * lil_parse() - Parse a script
+ * @name: The name of what we are parsing. This will be prepended to @error.
+ * @code: The script to parse
+ * @codelen: The length of @code, or %0 to use strlen
+ * @error: A pointer which will be set to a string describing the error (if
+ *         there is one)
+ *
+ * Return: A list of commands in the script, which may be passed to lil_eval(),
+ *         or %NULL on error.
+ */
+struct lil_list *lil_parse(struct lil *lil, const char *code, size_t codelen)
+{
+	struct lil_list *script;
+	struct lil_parser p;
+
+	p.code = code;
+	p.len = codelen ? codelen : strlen(code);
+	lil_parser_init(&p);
+	script = parse_script(&p);
+	if (script && !eof(&p)) {
+		lil_free_list(script);
+		script = NULL;
+		err_unexpected(&p);
 	}
 
-	return words;
+	parser_set_error(lil, &p);
+	return script;
 }
 
-struct lil_list *lil_subst_to_list(struct lil *lil, struct lil_value *code)
+static struct lil_list *substitute(struct lil *lil, struct lil_list *list);
+
+/**
+ * concat() - Convert a list into a value
+ * @lil: The interpreter
+ * @list: The list to convert
+ *
+ * This evaluates each symbol in the list, and then concatenates them together
+ * with no spaces. For example, the list ``{a $b [c]}`` might result in ``ade``
+ * if the value of variable ``b`` is ``d`` and the result of the command ``c``
+ * is ``e``.
+ *
+ * This is used for evaluating quoted words. For example, the word
+ * ``"a${b}[c]"`` would be parsed as a list, and therefore needs to be pasted
+ * together without the usual intervening spaces.
+ *
+ * Return: A value containing the evaluated, concatenated symbols, or %NULL on
+ *         error
+ */
+static struct lil_value *concat(struct lil *lil, struct lil_list *list)
 {
-	const char *save_code = lil->code;
-	size_t save_clen = lil->clen;
-	size_t save_head = lil->head;
-	int save_igeol = lil->ignoreeol;
-	struct lil_list *words;
+	size_t i;
+	struct lil_list *parts;
+	struct lil_value *val = alloc_value(NULL);
 
-	lil->code = lil_to_string(code);
-	lil->clen = code->l;
-	lil->head = 0;
-	lil->ignoreeol = 1;
+	if (!val)
+		goto oom;
 
-	words = substitute(lil);
-	if (!words)
-		words = lil_alloc_list();
+	parts = substitute(lil, list);
+	if (!parts)
+		goto err;
 
-	lil->code = save_code;
-	lil->clen = save_clen;
-	lil->head = save_head;
-	lil->ignoreeol = save_igeol;
-	return words;
-}
-
-struct lil_value *lil_subst_to_value(struct lil *lil, struct lil_value *code)
-{
-	struct lil_list *words = lil_subst_to_list(lil, code);
-	struct lil_value *val;
-
-	val = lil_list_to_value(words, 0);
-	lil_free_list(words);
+	assert(list_to_symbol(list)->type == LIL_SYMBOL_LIST);
+	for (i = 0; i < parts->c; i++) {
+		assert(parts->v[i]->type == LIL_SYMBOL_VALUE);
+		if (lil_append_val(val, &parts->v[i]->value))
+			goto oom;
+	}
+	lil_free_list(parts);
 	return val;
+
+oom:
+	lil_set_error_oom(lil);
+err:
+	lil_free_list(parts);
+	lil_free_value(val);
+	return NULL;
+}
+
+/**
+ * substitute_symbol() - Substitute one symbol
+ * @lil: The interpreter
+ * @sym: The symbol to substitute
+ *
+ * This performs substitutions on one symbol. For values, this is a no-op. Lists
+ * (from parse_quote()) are concat()enated. Variables are dereferenced.
+ * Sub-scripts (from parse_bracket()) are lil_eval()ed. NB: it is an error to
+ * pass a command to this function! Use substitute() instead.
+ *
+ * Return: The value of the symbol in the current context, or %NULL on error
+ */
+static struct lil_value *substitute_symbol(struct lil *lil,
+					   struct lil_symbol *sym)
+{
+	switch (sym->type) {
+	case LIL_SYMBOL_VALUE:
+		return lil_clone_value(&sym->value);
+	case LIL_SYMBOL_LIST:
+		return concat(lil, &sym->list);
+	case LIL_SYMBOL_VARIABLE: {
+		struct lil_value *val, *var =
+			substitute_symbol(lil, sym->symbol);
+
+		if (!var)
+			return NULL;
+
+		assert(value_to_symbol(var)->type == LIL_SYMBOL_VALUE);
+		val = lil_get_var(lil, lil_to_string(var));
+		lil_free_value(var);
+		return lil_clone_value(val);
+	}
+	case LIL_SYMBOL_SCRIPT:
+		return lil_eval(lil, &sym->list, false);
+	case LIL_SYMBOL_COMMAND:
+		break;
+	}
+	log_debug("invalid type %d\n", sym->type);
+	assert(0);
+	lil_set_error(lil, LIL_ERR_CASE, NULL);
+	return NULL;
+}
+
+/**
+ * substitute() - Substitute a list of symbols
+ * @lil: The interpreter
+ * @list: The list of symbols
+ *
+ * This performs substitutions using substitute_symbol() on each of the symbols
+ * in the list. NB: it is an error to pass a script to this function. use
+ * lil_eval() or substitute_symbol() instead.
+ *
+ * Return: A list of &struct lil_value, or %NULL on error.
+ */
+static struct lil_list *substitute(struct lil *lil, struct lil_list *list)
+{
+	size_t i;
+	enum lil_symbol_type type = list_to_symbol(list)->type;
+	struct lil_value *val = NULL;
+	struct lil_list *values;
+
+	values = lil_alloc_list();
+	if (!values)
+		goto oom;
+	assert(type == LIL_SYMBOL_COMMAND || type == LIL_SYMBOL_LIST);
+	list_to_symbol(values)->type = type;
+
+	for (i = 0; i < list->c; i++) {
+		struct lil_value *val = substitute_symbol(lil, list->v[i]);
+
+		if (!val)
+			goto err;
+
+		if (lil_list_append(values, val))
+			goto oom;
+	}
+	return values;
+
+oom:
+	lil_set_error_oom(lil);
+err:
+	lil_free_value(val);
+	lil_free_list(values);
+	return NULL;
 }
 
 static struct lil_value *run_cmd(struct lil *lil, struct lil_func *cmd,
@@ -1197,7 +2096,7 @@ static struct lil_value *run_cmd(struct lil *lil, struct lil_func *cmd,
 					    val, LIL_SETVAR_LOCAL_NEW);
 			}
 		}
-		r = lil_parse_value(lil, cmd->code, 1);
+		r = lil_parse_value(lil, cmd->code, true);
 
 		lil_pop_env(lil);
 	}
@@ -1205,102 +2104,329 @@ static struct lil_value *run_cmd(struct lil *lil, struct lil_func *cmd,
 	return r;
 }
 
-struct lil_value *lil_parse(struct lil *lil, const char *code, size_t codelen,
-			    int funclevel)
+/**
+ * lil_eval() - Evaluate a script
+ * @lil: The interpreter
+ * @script: The script to be evaluated
+ * @new_frame: Whether this represents a new stack frame. If @new_frame is set,
+ *             then subcommands which set &struct lil.breakrun (such as
+ *             ``return``) will stop here. Otherwise, we will pass the buck and
+ *             let our caller deal with it. Top-level calls to this lil_eval()
+ *             should always set @new_frame.
+ *
+ * This evaluates a script. Each command in the script is substitute()ed, and
+ * then is passed to run_cmd().
+ *
+ * Return: The result of the last command in @script, &struct lil.retval if it
+ * is set, or %NULL on error.
+ */
+struct lil_value *lil_eval(struct lil *lil, struct lil_list *script,
+			   bool new_frame)
 {
-	const char *save_code = lil->code;
-	size_t save_clen = lil->clen;
-	size_t save_head = lil->head;
-	struct lil_value *val = NULL;
-	struct lil_list *words = NULL;
+	size_t i;
+	struct lil_value *ret = NULL;
 
-	if (!save_code)
-		lil->rootcode = code;
-	lil->code = code;
-	lil->clen = codelen ? codelen : strlen(code);
-	lil->head = 0;
+	if (!lil->depth)
+		lil->err = 0;
 
-	lil_skip_spaces(lil);
-	lil->parse_depth++;
-	if (CONFIG_LIL_RECLIMIT && lil->parse_depth > CONFIG_LIL_RECLIMIT) {
+	if (CONFIG_LIL_RECLIMIT && lil->depth++ > CONFIG_LIL_RECLIMIT) {
 		lil_set_error(lil, LIL_ERR_DEPTH, "recursion limit reached");
-		goto cleanup;
+		return NULL;
 	}
 
-	if (lil->parse_depth == 1)
-		lil->err = LIL_ERR_NONE;
+	assert(list_to_symbol(script)->type == LIL_SYMBOL_SCRIPT);
+	for (i = 0; i < script->c; i++) {
+		struct lil_list *command;
 
-	if (funclevel)
-		lil->env->breakrun = 0;
+		if (ret)
+			lil_free_value(ret);
+		ret = NULL;
 
-	while (lil->head < lil->clen && !lil->err) {
-		if (words)
-			lil_free_list(words);
+		assert(script->v[i]->type == LIL_SYMBOL_COMMAND);
+		command = substitute(lil, &script->v[i]->list);
+		if (!command)
+			break;
 
-		if (val)
-			lil_free_value(val);
-		val = NULL;
+		if (command->c) {
+			const char *funcname =
+				lil_to_string(lil_list_get(command, 0));
+			struct lil_func *func = lil_find_cmd(lil, funcname);
+
+			if (func)
+				ret = run_cmd(lil, func, command);
+			else if (funcname[0])
+				lil_set_error_nocmd(lil, funcname);
+		}
+		lil_free_list(command);
 
 		if (ctrlc()) {
-			lil_set_error(lil, LIL_ERR_INTR, "interrupted");
-			goto cleanup;
+			lil_set_error_intr(lil);
+			break;
+		} else if (lil->err || lil->env->breakrun) {
+			break;
 		}
-
-		words = substitute(lil);
-		if (!words || lil->err)
-			goto cleanup;
-
-		if (words->c) {
-			const char *cmdname =
-				lil_to_string(lil_list_get(words, 0));
-			struct lil_func *cmd = lil_find_cmd(lil, cmdname);
-
-			if (!cmd) {
-				if (cmdname[0]) {
-					lil_set_error_nocmd(lil, cmdname);
-					goto cleanup;
-				}
-			} else {
-				val = run_cmd(lil, cmd, words);
-			}
-
-			if (lil->env->breakrun)
-				goto cleanup;
-		}
-
-		lil_skip_spaces(lil);
-		while (ateol(lil))
-			lil->head++;
-		lil_skip_spaces(lil);
 	}
 
-cleanup:
-	if (words)
-		lil_free_list(words);
-	lil->code = save_code;
-	lil->clen = save_clen;
-	lil->head = save_head;
-
-	if (funclevel && lil->env->retval_set) {
-		if (val)
-			lil_free_value(val);
-		val = lil->env->retval;
+	if (new_frame && lil->env->retval_set) {
+		lil_free_value(ret);
+		ret = lil->env->retval;
 		lil->env->retval = NULL;
-		lil->env->retval_set = 0;
-		lil->env->breakrun = 0;
+		lil->env->retval_set = false;
+		lil->env->breakrun = false;
 	}
 
-	lil->parse_depth--;
-	return val ? val : alloc_value(NULL);
+	if (lil->err) {
+		lil_free_value(ret);
+		ret = NULL;
+	}
+
+	lil->depth--;
+	return ret;
+}
+
+struct lil_value *lil_parse_eval(struct lil *lil, const char *code,
+				 size_t codelen, bool new_frame)
+{
+	struct lil_value *result;
+	struct lil_list *script = lil_parse(lil, code, codelen);
+
+	if (!script)
+		return NULL;
+	result = lil_eval(lil, script, new_frame);
+	lil_free_list(script);
+	return result;
+}
+
+/**
+ * parse_list() - Parse a list
+ *
+ * FIRST(list) = FIRST(space) | FIRST(word) | ;
+ * FOLLOW(list) = ;
+ *
+ * Return: A list of words in the list, or %NULL on error.
+ */
+static struct lil_list *parse_list(struct lil_parser *p)
+{
+	struct lil_list *list = lil_alloc_list();
+
+	if (!list)
+		goto oom;
+
+	while (!eof(p)) {
+		switch (peek(p)) {
+		case '\n':
+		CASE_SPACE:
+			pop(p);
+			continue;
+		}
+		break;
+	}
+
+	if (eof(p))
+		return list;
+
+	do {
+		struct lil_symbol *sym = parse_word(p, true);
+
+		if (!sym)
+			goto err;
+
+		assert(sym->type == LIL_SYMBOL_VALUE);
+		if (lil_list_append(list, &sym->value))
+			goto oom;
+
+		if (eof(p))
+			return list;
+
+		do {
+			switch (peek(p)) {
+			case '\n':
+			CASE_SPACE:
+				pop(p);
+				continue;
+			}
+			break;
+		} while (!eof(p));
+	} while (!eof(p));
+
+	return list;
+
+oom:
+	err_oom(p);
+err:
+	lil_free_list(list);
+	return NULL;
+}
+
+/* FIXME: rename this to something better */
+struct lil_list *lil_subst_to_list(struct lil *lil, struct lil_value *code)
+{
+	struct lil_list *list;
+	struct lil_parser p;
+
+	p.code = code->d;
+	p.len = code->l;
+	lil_parser_init(&p);
+	list = parse_list(&p);
+	assert(!list || eof(&p));
+	parser_set_error(lil, &p);
+
+	/*
+	 * FIXME: Callers of this function do not expect NULL, so we must always
+	 * allocate something for them. Of course, lil_alloc_list can also fail,
+	 * so we're screwed either way.
+	 */
+	if (!list)
+		list = lil_alloc_list();
+	return list;
+}
+
+struct lil_value *lil_subst_to_value(struct lil *lil, struct lil_value *code)
+{
+	struct lil_symbol *sym;
+	struct lil_parser p;
+
+	p.code = code->d;
+	p.len = code->l;
+	lil_parser_init(&p);
+	sym = parse_quote(&p, -1, false);
+	parser_set_error(lil, &p);
+	if (!sym)
+		return NULL;
+
+	assert(eof(&p));
+	assert(sym->type == LIL_SYMBOL_LIST);
+	return substitute_symbol(lil, sym);
 }
 
 struct lil_value *lil_parse_value(struct lil *lil, struct lil_value *val,
-				  int funclevel)
+				  bool new_frame)
 {
 	if (!val || !val->d || !val->l)
 		return alloc_value(NULL);
 
-	return lil_parse(lil, val->d, val->l, funclevel);
+	return lil_parse_eval(lil, val->d, val->l, new_frame);
+}
+
+/**
+ * enum lil_list_flags - Whether an item needs special treatment
+ * @NEEDS_NOTHING: This item needs no changes
+ * @NEEDS_BRACES: This item needs to be enclosed in braces
+ * @NEEDS_SINGLE: This item needs to be enclosed in double quotes
+ * @NEEDS_DOUBLE: This item needs to be enclosed in single quotes
+ * @NEEDS_QUOTES: This item needs to be enclosed in (some kind of) quotes
+ */
+enum needs {
+	NEEDS_NOTHING,
+	NEEDS_BRACES,
+	NEEDS_DOUBLE,
+	NEEDS_SINGLE,
+	NEEDS_QUOTES = NEEDS_DOUBLE,
+};
+
+static enum needs item_needs(const char *str, size_t n)
+{
+	bool was_backslash = false;
+	int nesting = 0;
+	enum needs needs = NEEDS_NOTHING;
+	size_t i, sq = 0, dq = 0;
+
+	if (!str || !str[0])
+		return NEEDS_BRACES;
+
+	for (i = 0; i < n; i++) {
+		switch (str[i]) {
+		case '{':
+			nesting++;
+			goto braces;
+		case '}':
+			nesting--;
+			if (nesting < 0)
+				needs = NEEDS_QUOTES;
+			goto braces;
+		case '\\':
+			was_backslash = !was_backslash;
+			if (needs == NEEDS_NOTHING)
+				needs = NEEDS_BRACES;
+			continue;
+		case '\'':
+			sq++;
+			goto braces;
+		case '"':
+			dq++;
+			fallthrough;
+		case '\n':
+		CASE_SPACE:
+braces:
+			if (needs == NEEDS_NOTHING)
+				needs = NEEDS_BRACES;
+		}
+		was_backslash = false;
+	}
+
+	if (nesting || was_backslash)
+		needs = NEEDS_QUOTES;
+
+	if (needs == NEEDS_QUOTES && dq > sq)
+		needs = NEEDS_SINGLE;
+	return needs;
+}
+
+struct lil_value *lil_list_to_value(struct lil_list *list, bool do_escape)
+{
+	struct lil_value *val = alloc_value(NULL);
+	size_t i, j;
+
+	for (i = 0; i < list->c; i++) {
+		char q;
+		struct lil_value *item = lil_list_get(list, i);
+		enum needs needs;
+
+		if (do_escape)
+			needs = item_needs(lil_to_string(item), item->l);
+		else
+			needs = NEEDS_NOTHING;
+
+		if (i)
+			lil_append_char(val, ' ');
+
+		switch (needs) {
+		case NEEDS_NOTHING:
+			if (lil_append_val(val, item))
+				goto err;
+			continue;
+		case NEEDS_BRACES:
+			if (lil_append_char(val, '{') ||
+			    lil_append_val(val, item) ||
+			    lil_append_char(val, '}'))
+				goto err;
+			continue;
+		case NEEDS_DOUBLE:
+			q = '"';
+			goto quote;
+		case NEEDS_SINGLE:
+			q = '\'';
+quote:
+			if (lil_append_char(val, q))
+				goto err;
+			for (j = 0; j < item->l; j++) {
+				char c = item->d[j];
+
+				if (c == '\\' || c == q)
+					if (lil_append_char(val, '\\'))
+						goto err;
+				if (lil_append_char(val, c))
+					goto err;
+			}
+			if (lil_append_char(val, q))
+				goto err;
+		}
+	}
+	return val;
+
+err:
+	lil_free_value(val);
+	return NULL;
 }
 
 static void lil_set_error(struct lil *lil, enum lil_error err, const char *msg)
@@ -1801,7 +2927,7 @@ struct lil_value *lil_eval_expr(struct lil *lil, struct lil_value *code)
 	struct expreval ee;
 
 	if (ctrlc()) {
-		lil_set_error(lil, LIL_ERR_INTR, "interrupted");
+		lil_set_error_intr(lil);
 		return NULL;
 	}
 
@@ -2076,18 +3202,6 @@ static struct lil_value *fnc_reflect(struct lil *lil, size_t argc,
 	if (!strcmp(type, "error"))
 		return lil->err_msg ? lil_alloc_string(lil->err_msg) : NULL;
 
-	if (!strcmp(type, "this")) {
-		struct lil_env *env = lil->env;
-
-		while (env != lil->rootenv && !env->func)
-			env = env->parent;
-
-		if (env == lil->rootenv)
-			return lil_alloc_string(lil->rootcode);
-
-		return env->func ? env->func->code : NULL;
-	}
-
 	if (!strcmp(type, "name")) {
 		struct lil_env *env = lil->env;
 
@@ -2564,10 +3678,12 @@ static struct lil_value *fnc_foreach(struct lil *lil, size_t argc,
 		lil_set_var(lil, varname, lil_list_get(list, i),
 			    LIL_SETVAR_LOCAL_ONLY);
 		rv = lil_parse_value(lil, argv[codeidx], 0);
-		if (rv->l)
-			lil_list_append(rlist, rv);
-		else
-			lil_free_value(rv);
+		if (rv) {
+			if (rv->l)
+				lil_list_append(rlist, rv);
+			else
+				lil_free_value(rv);
+		}
 
 		if (lil->env->breakrun || lil->err)
 			break;
diff --git a/include/cli_lil.h b/include/cli_lil.h
index 40c822401e..290329372a 100644
--- a/include/cli_lil.h
+++ b/include/cli_lil.h
@@ -133,10 +133,13 @@ void lil_free(struct lil *lil);
 
 int lil_register(struct lil *lil, const char *name, lil_func_proc_t proc);
 
-struct lil_value *lil_parse(struct lil *lil, const char *code, size_t codelen,
-			    int funclevel);
+struct lil_list *lil_parse(struct lil *lil, const char *code, size_t codelen);
+struct lil_value *lil_eval(struct lil *lil, struct lil_list *script,
+			   bool new_frame);
+struct lil_value *lil_parse_eval(struct lil *lil, const char *code,
+				 size_t codelen, bool new_frame);
 struct lil_value *lil_parse_value(struct lil *lil, struct lil_value *val,
-				  int funclevel);
+				  bool new_frame);
 
 enum lil_error lil_error(struct lil *lil, const char **msg);
 
@@ -158,7 +161,7 @@ void lil_free_list(struct lil_list *list);
 int lil_list_append(struct lil_list *list, void *item);
 size_t lil_list_size(struct lil_list *list);
 struct lil_value *lil_list_get(struct lil_list *list, size_t index);
-struct lil_value *lil_list_to_value(struct lil_list *list, int do_escape);
+struct lil_value *lil_list_to_value(struct lil_list *list, bool do_escape);
 
 struct lil_list *lil_subst_to_list(struct lil *lil, struct lil_value *code);
 struct lil_value *lil_subst_to_value(struct lil *lil, struct lil_value *code);
diff --git a/test/cmd/lil.c b/test/cmd/lil.c
index 896b2fed15..fb33fa83a6 100644
--- a/test/cmd/lil.c
+++ b/test/cmd/lil.c
@@ -23,7 +23,7 @@ const char helpers[] =
 	"proc assert_err {cmd} {"
 		"set ok 1;"
 		"try {upeval $cmd; set ok 0} {};"
-		"assert {$ok};"
+		"if not $ok { error $cmd }"
 	"};"
 	"proc asserteq {expr1 expr2} {"
 		"set val1 [upeval 'expr \"$expr1\"'];"
@@ -65,14 +65,13 @@ static const struct {
 	{"and",
 		"proc and args {"
 			"foreach [slice $args 1] {"
-				"upeval 'downeval \\'set v \\'\\[${i}\\]';"
-				"if not $v { return 0 }"
+				"upeval 'downeval \"set v \\[${i}\\]\"';"
+				"if not $v { return 0 };"
 			"};"
 			"return 1"
 		"};"
 		"set a 0;"
-		"set final [and {set a 3} {return 0} {set a 32}];"
-		"asserteq 0 {$final};"
+		"asserteq 0 {[and {set a 3} {return 0} {set a 32}]};"
 		"assert 3 {$a};"
 	},
 	{"assert",
@@ -108,11 +107,10 @@ static const struct {
 		"asserteq -6 {1 +~ (2*3 )};"
 		"asserteq -6 {~(2*3)+1};"
 		"asserteq 0 {1*!(2+2)};"
-		"asserteq -1 {~!(!{})};"
+		"asserteq -1 {~!(!)};"
 		"asserteq 1 {1 +~*(2*3)};"
 		"asserteq 1 {'hello'};"
 		"asserteq 0 {0};"
-		"asserteq 0 {{}};"
 		"asserteq 1 {()};"
 		"asserteq 1 {( )};"
 		"asserteq_str '' {[expr]};"
@@ -144,6 +142,7 @@ static const struct {
 			"return $ret"
 		"};"
 		"set list [list {bad's day} {good's day} eh??];"
+		"asserteq_list [lapply $list length] [list 9 10 4];"
 		"asserteq_list [lapply $list split] [list "
 			"[list {bad's} day] "
 			"[list {good's} day] "
@@ -159,37 +158,22 @@ static const struct {
 		"asserteq_str baz {[index $l 2]};"
 		"append l 'Hello, world!';"
 		"asserteq_list $l [list foo bar baz bad 'Hello, world!'];"
-		"set l [subst $l];"
-		"asserteq_list $l [list foo bar baz bad Hello, world!];"
 		"lmap $l foox barx bamia;"
 		"asserteq_str foo {$foox};"
 		"asserteq_str bar {$barx};"
 		"asserteq_str baz {$bamia};"
-		"set l {one	# linebreaks are ignored in list parsing mode\n"
+		"set l {one	# linebreaks are whitespace in lists\n"
 		"\n"
-		"two;three      # a semicolon still counts as line break\n"
-		"               # (which in list mode is treated as a\n"
-		"               # separator for list entries)\n"
-		"# of course a semicolon inside quotes is treated like normal\n"
-		"three';'and';a;half'\n"
-		"# like in code mode, a semicolon will stop the comment; four\n"
-		"\n"
-		"# below we have a quote, square brackets for inline\n"
-		"# expansions are still taken into consideration\n"
-		"[quote {this line will be ignored completely\n"
-		"        as will this line and instead be replaced\n"
-		"        with the 'five' below since while in code\n"
-		"        mode (that is, inside the brackets here)\n"
-		"        linebreaks are still processed}\n"
-		" quote five]\n"
+		"two;three      # a semicolon does not count as a line break\n"
+		"# a semicolon will not stop the comment; four\n"
 		"\n"
 		"# The curly brackets are also processed so the next three\n"
 		"# lines will show up as three separate lines\n"
 		"{six\n"
 		"seven\n"
 		"eight}}\n"
-		"asserteq_list $l [list one two three 'three;and;a;half' four "
-		"five 'six\\nseven\\neight'];"
+		"asserteq_list $l [list one 'two;three' "
+			"'six\\nseven\\neight'];"
 	},
 	{"local",
 		"proc bits-for {x} {"
@@ -210,33 +194,13 @@ static const struct {
 		"asserteq 45 {$x};"
 		"asserteq 6 {$bitsx}"
 	},
-	{"multiline comment",
+	{"comment",
 		"# this line will not be executed, but the following will\n"
 		"set ok1 1\n"
-		"## This is a multiline comment\n"
-		"   which, as the name implies,\n"
-		"   spans multiple lines.\n"
-		"set ok2 1\n"
-		"   the code above wouldn't execute,\n"
-		"   but this will --> ##set ok3 1\n"
-		"### more than two #s will not count as multiline comments\n"
-		"set ok4 1\n"
-		"# Note that semicolons can be used as linebreaks so\n"
-		"# this code will be executed: ; set ok5 1\n"
-		"##\n"
-		"   ...however inside multiline comments semicolons do not\n"
-		"   stop the comment section (pretty much like linebreaks)\n"
-		"   and this code will not be executed: ; set ok6 1\n"
-		"##\n"
-		"# Also note that unlike in regular code, semicolons cannot\n"
-		"# be escaped in single-line comments, e.g.: ; set ok7 1\n"
+		"# Note that semicolons cannot be used as linebreaks so\n"
+		"# this code will not be executed: ; set ok5 1\n"
 		"asserteq_str 1 {$ok1};"
-		"assert {![reflect has-var ok2]}"
-		"asserteq_str 1 {$ok3};"
-		"asserteq_str 1 {$ok4};"
-		"asserteq_str 1 {$ok5};"
-		"assert {![reflect has-var ok6]}"
-		"asserteq_str 1 {$ok7};"
+		"assert {! [reflect has-var ok5]}"
 	},
 	{"multiline code",
 		"asserteq_list [list hello \\\n"
@@ -270,7 +234,7 @@ static const struct {
 		"asserteq 10 {[strpos $a string]};"
 		"asserteq 16 {[strpos $b string]};"
 		"asserteq -78 {[compare $a $b]};"
-		"assert {![streq $a $b]};"
+		"assert {! [streq $a $b]};"
 		"asserteq_str 'This is a foo' {[repstr $a string foo]};"
 		"asserteq_str 'This is another foo' {[repstr $b string foo]};"
 		"asserteq_list [split $a] [list This is a string];"
@@ -321,9 +285,10 @@ static int lib_test_lil(struct unit_test_state *uts)
 		enum lil_error err;
 		struct lil *lil = lil_new(NULL);
 
-		lil_free_value(lil_parse(lil, helpers, sizeof(helpers) - 1, 0));
+		lil_free_value(lil_parse_eval(lil, helpers, sizeof(helpers) - 1,
+					      true));
 		ut_asserteq(LIL_ERR_NONE, lil_error(lil, &err_msg));
-		lil_free_value(lil_parse(lil, lil_tests[i].cmd, 0, 0));
+		lil_free_value(lil_parse_eval(lil, lil_tests[i].cmd, 0, true));
 		err = lil_error(lil, &err_msg);
 		if (err) {
 			ut_failf(uts, __FILE__, __LINE__, __func__,
-- 
2.32.0



More information about the U-Boot mailing list