[PATCH v2 10/12] slre: implement support for ranges in character classes
Rasmus Villemoes
ravi at prevas.dk
Tue May 13 10:40:32 CEST 2025
When trying to use U-Boot's regex facility, it is a rather large
gotcha that [a-z] range syntax is not supported. It doesn't require a
lot of extra code to implement that; we just let the regular parsing
emit the start and end literal symbols as usual, and add a new
"escape" code RANGE.
At match time, this means the code will first just see an 'a' and try
to match that, and only then recognize that it's actually part of a
range and then do the 'a' <= ch <= 'z' test.
Of course, this means that a - in the middle of a [] pair no longer
matches a literal dash, but I highly doubt anybody relies on
that. Putting it first or last, or escaping it with \, as in most
other RE engines, continues to work.
Reviewed-by: Simon Glass <sjg at chromium.org>
Signed-off-by: Rasmus Villemoes <ravi at prevas.dk>
---
lib/slre.c | 22 ++++++++++++++++++++--
1 file changed, 20 insertions(+), 2 deletions(-)
diff --git a/lib/slre.c b/lib/slre.c
index 87dfde720e9..117815a6d60 100644
--- a/lib/slre.c
+++ b/lib/slre.c
@@ -30,7 +30,7 @@
#include <slre.h>
enum {END, BRANCH, ANY, EXACT, ANYOF, ANYBUT, OPEN, CLOSE, BOL, EOL,
- STAR, PLUS, STARQ, PLUSQ, QUEST, SPACE, NONSPACE, DIGIT};
+ STAR, PLUS, STARQ, PLUSQ, QUEST, SPACE, NONSPACE, DIGIT, RANGE};
#ifdef SLRE_TEST
static struct {
@@ -55,7 +55,8 @@ static struct {
{"QUEST", 1, "o"}, /* Match zero or one time, "?" */
{"SPACE", 0, ""}, /* Match whitespace, "\s" */
{"NONSPACE", 0, ""}, /* Match non-space, "\S" */
- {"DIGIT", 0, ""} /* Match digit, "\d" */
+ {"DIGIT", 0, ""}, /* Match digit, "\d" */
+ {"RANGE", 0, ""}, /* Range separator - */
};
#endif /* SLRE_TEST */
@@ -260,6 +261,15 @@ anyof(struct slre *r, const char **re)
return;
/* NOTREACHED */
break;
+ case '-':
+ if (r->data_size == old_data_size || **re == ']') {
+ /* First or last character, just match - itself. */
+ store_char_in_data(r, '-');
+ break;
+ }
+ store_char_in_data(r, 0);
+ store_char_in_data(r, RANGE);
+ break;
case '\\':
esc = get_escape_char(re);
if ((esc & 0xff) == 0) {
@@ -487,6 +497,14 @@ is_any_of(const unsigned char *p, int len, const char *s, int *ofs)
if (isdigit(ch))
goto match;
break;
+ case RANGE:
+ /*
+ * a-z is represented in the data array as {'a', \0, RANGE, 'z'}
+ */
+ ++i;
+ if (p[i - 3] <= (unsigned char)ch && (unsigned char)ch <= p[i])
+ goto match;
+ break;
}
continue;
}
--
2.49.0
More information about the U-Boot
mailing list