From 509d798eeb603a48235e1bb534ad414e56c766f0 Mon Sep 17 00:00:00 2001 From: Troels Jessen Date: Tue, 4 Nov 2025 14:55:07 +0100 Subject: [PATCH 1/4] Treat unicode characters as errors --- src/slash.c | 49 ++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 46 insertions(+), 3 deletions(-) diff --git a/src/slash.c b/src/slash.c index d68f028..bf5d26a 100644 --- a/src/slash.c +++ b/src/slash.c @@ -870,12 +870,21 @@ char *slash_readline(struct slash *slash) char *ret = slash->buffer; int c, esc[3]; bool done = false, escaped = false; + bool quote[2] = { false, false }; /* Index 0 represent double quote, index 1 represent single quote */ + int mightbeminus = 0; /* Reset buffer */ slash_reset(slash); slash_refresh(slash, 0); while (!done && ((c = slash_getchar(slash)) >= 0)) { + if (!quote[0] && c == '\"') { + quote[1] = !quote[1]; + } + if (!quote[1] && c == '\'') { + quote[0] = !quote[0]; + } + if (escaped) { esc[0] = c; esc[1] = slash_getchar(slash); @@ -999,9 +1008,43 @@ char *slash_readline(struct slash *slash) /* Unknown control */ break; } - } else if (isprint(c)) { - /* Add to buffer */ - slash_insert(slash, c); + } else { + /* Check for non-ASCII characters outside quotes */ + if ((c&0x80) != 0 && !quote[0] && !quote[1]) { + + /* Convert unicode minus from Ubuntu calculator to ASCII dash */ + if (mightbeminus == 0 && c == 0xE2) { + mightbeminus = 1; + continue; + } else if (mightbeminus == 1 && c == 0x88) { + mightbeminus = 2; + continue; + } else if (mightbeminus == 2 && c == 0x92) { + /* Yep, it's a minus sign */ + slash_insert(slash, '-'); + mightbeminus = 0; + continue; + } else { + mightbeminus = 0; + } + + printf(" Got non-ascii character 0x%02x outside of quotes, ignoring line\n", c&0xFF); + + /* Discard the rest of the line */ + do { c = slash_getchar(slash); } + while (c != '\n' && c != '\r' && c >= 0); + + slash_reset(slash); + done = true; + break; + } + mightbeminus = 0; + + /* If inside a quote, unrecognised characters are ignored */ + if (isprint(c)) { + /* Add to buffer */ + slash_insert(slash, c); + } } slash->last_char = c; From 75d5d657235504894cff35fc38a6532ecd6ebe28 Mon Sep 17 00:00:00 2001 From: Jean-Baptiste Lab Date: Tue, 4 Nov 2025 19:50:12 +0100 Subject: [PATCH 2/4] Add handling of unicode in slash_execute(), in the same manner as in slash_readline() --- src/slash.c | 90 ++++++++++++++++++++++++++++++++++++----------------- 1 file changed, 61 insertions(+), 29 deletions(-) diff --git a/src/slash.c b/src/slash.c index bf5d26a..fee42b6 100644 --- a/src/slash.c +++ b/src/slash.c @@ -403,8 +403,51 @@ __attribute__((weak)) int slash_prompt(struct slash *slash) { slash_process_cmd_line_hook_t slash_process_cmd_line_hook = NULL; +static const int minus_unicode_bytes[] = { 0xE2, 0x88, 0x92 }; + +static bool has_bad_unicode (struct slash *slash, unsigned char *c, bool in_quotes, int *mightbeminus) { + /* Check for non-ASCII characters outside quotes */ + if ((*c & 0x80) != 0 && !in_quotes) { + if (*c == minus_unicode_bytes[*mightbeminus]) { + *mightbeminus = *mightbeminus + 1; + if (*mightbeminus == sizeof(minus_unicode_bytes)/sizeof(minus_unicode_bytes[0])) { + /* Yep, it's a minus sign */ + *c = '-'; + memmove(c-2, c, strlen(c)+1); + *mightbeminus = 0; + } + return false; + } else { + *mightbeminus = 0; + printf(" Got non-ascii character 0x%02x outside of quotes, ignoring line\n", *c&0xFF); + return true; + } + } else { + *mightbeminus = 0; + } + return false; +} + +static bool has_unicode(struct slash *slash, char *line) { + bool result = false; + bool quote[2] = { false, false }; /* Index 0 represent double quote, index 1 represent single quote */ + int mightbeminus = 0; + for (unsigned char *c = (unsigned char *)line; *c != '\0'; c++) { + if (*c == '\"' && !quote[0]) quote[1] = !quote[1]; + else if (*c == '\'' && !quote[1]) quote[0] = !quote[0]; + if(has_bad_unicode(slash, c, quote[0] || quote[1], &mightbeminus)) { + result = true; + break; + } + } + return result; +} + int slash_execute(struct slash *slash, char *line) { + if (has_unicode(slash, line)) { + return EINVAL; + } struct slash_command *command; char *args, *argv[SLASH_ARG_MAX]; char *processed_cmd_line = NULL, *line_to_use; @@ -878,12 +921,8 @@ char *slash_readline(struct slash *slash) slash_refresh(slash, 0); while (!done && ((c = slash_getchar(slash)) >= 0)) { - if (!quote[0] && c == '\"') { - quote[1] = !quote[1]; - } - if (!quote[1] && c == '\'') { - quote[0] = !quote[0]; - } + if (c == '\"' && !quote[0]) quote[1] = !quote[1]; + else if (c == '\'' && !quote[1]) quote[0] = !quote[0]; if (escaped) { esc[0] = c; @@ -1010,33 +1049,26 @@ char *slash_readline(struct slash *slash) } } else { /* Check for non-ASCII characters outside quotes */ - if ((c&0x80) != 0 && !quote[0] && !quote[1]) { - - /* Convert unicode minus from Ubuntu calculator to ASCII dash */ - if (mightbeminus == 0 && c == 0xE2) { - mightbeminus = 1; - continue; - } else if (mightbeminus == 1 && c == 0x88) { - mightbeminus = 2; - continue; - } else if (mightbeminus == 2 && c == 0x92) { - /* Yep, it's a minus sign */ - slash_insert(slash, '-'); - mightbeminus = 0; + if ((c & 0x80) != 0 && !quote[0] && !quote[1]) { + if (c == minus_unicode_bytes[mightbeminus]) { + mightbeminus++; + if (mightbeminus == sizeof(minus_unicode_bytes)/sizeof(minus_unicode_bytes[0])) { + slash_insert(slash, '-'); + slash_refresh(slash, 0); + mightbeminus = 0; + } continue; } else { - mightbeminus = 0; - } - - printf(" Got non-ascii character 0x%02x outside of quotes, ignoring line\n", c&0xFF); + printf(" Got non-ascii character 0x%02x outside of quotes, ignoring line\n", c&0xFF); - /* Discard the rest of the line */ - do { c = slash_getchar(slash); } - while (c != '\n' && c != '\r' && c >= 0); + /* Discard the rest of the line */ + do { c = slash_getchar(slash); } + while (c != '\n' && c != '\r' && c >= 0); - slash_reset(slash); - done = true; - break; + slash_reset(slash); + done = true; + break; + } } mightbeminus = 0; From 848c1df15e190d46934514c820d5fca1ad13bfb6 Mon Sep 17 00:00:00 2001 From: Troels Jessen Date: Tue, 4 Nov 2025 20:59:17 +0100 Subject: [PATCH 3/4] Ignore in-line comments --- src/slash.c | 65 ++++++++++++++++++++++++++++++++--------------------- 1 file changed, 39 insertions(+), 26 deletions(-) diff --git a/src/slash.c b/src/slash.c index fee42b6..c605a9c 100644 --- a/src/slash.c +++ b/src/slash.c @@ -428,14 +428,19 @@ static bool has_bad_unicode (struct slash *slash, unsigned char *c, bool in_quot return false; } +static bool in_quote(unsigned char c, bool quote[3]) { + if (c == '\"' && !quote[0]) quote[1] = !quote[1]; + else if (c == '\'' && !quote[1]) quote[0] = !quote[0]; + else if (c == '#' && !quote[0] && !quote[1]) quote[2] = true; + return quote[0] || quote[1] || quote[2]; +} + static bool has_unicode(struct slash *slash, char *line) { bool result = false; - bool quote[2] = { false, false }; /* Index 0 represent double quote, index 1 represent single quote */ + bool quote[3] = { false, false, false }; /* Index 0 represent double quote, index 1 represent single quote */ int mightbeminus = 0; for (unsigned char *c = (unsigned char *)line; *c != '\0'; c++) { - if (*c == '\"' && !quote[0]) quote[1] = !quote[1]; - else if (*c == '\'' && !quote[1]) quote[0] = !quote[0]; - if(has_bad_unicode(slash, c, quote[0] || quote[1], &mightbeminus)) { + if(has_bad_unicode(slash, c, in_quote(*c, quote), &mightbeminus)) { result = true; break; } @@ -443,6 +448,16 @@ static bool has_unicode(struct slash *slash, char *line) { return result; } +static void strip_comment(char *line) { + bool quote[3] = { false, false, false }; /* Index 0 represent double quote, index 1 represent single quote */ + for (int i = 1; line[i] != '\0'; i++) { + if(!in_quote(line[i-1], quote) && line[i] == '#') { + line[i] = '\0'; + return; + } + } +} + int slash_execute(struct slash *slash, char *line) { if (has_unicode(slash, line)) { @@ -461,6 +476,8 @@ int slash_execute(struct slash *slash, char *line) return SLASH_SUCCESS; } + strip_comment(line); + if(NULL != slash_process_cmd_line_hook) { processed_cmd_line = slash_process_cmd_line_hook(line); } @@ -913,7 +930,7 @@ char *slash_readline(struct slash *slash) char *ret = slash->buffer; int c, esc[3]; bool done = false, escaped = false; - bool quote[2] = { false, false }; /* Index 0 represent double quote, index 1 represent single quote */ + bool quote[3] = { false, false, false }; /* Index 0 represent double quote, index 1 represent single quote */ int mightbeminus = 0; /* Reset buffer */ @@ -921,9 +938,6 @@ char *slash_readline(struct slash *slash) slash_refresh(slash, 0); while (!done && ((c = slash_getchar(slash)) >= 0)) { - if (c == '\"' && !quote[0]) quote[1] = !quote[1]; - else if (c == '\'' && !quote[1]) quote[0] = !quote[0]; - if (escaped) { esc[0] = c; esc[1] = slash_getchar(slash); @@ -1049,26 +1063,25 @@ char *slash_readline(struct slash *slash) } } else { /* Check for non-ASCII characters outside quotes */ - if ((c & 0x80) != 0 && !quote[0] && !quote[1]) { - if (c == minus_unicode_bytes[mightbeminus]) { - mightbeminus++; - if (mightbeminus == sizeof(minus_unicode_bytes)/sizeof(minus_unicode_bytes[0])) { - slash_insert(slash, '-'); - slash_refresh(slash, 0); - mightbeminus = 0; - } - continue; - } else { - printf(" Got non-ascii character 0x%02x outside of quotes, ignoring line\n", c&0xFF); + if (c == minus_unicode_bytes[mightbeminus]) { + mightbeminus++; + if (mightbeminus == sizeof(minus_unicode_bytes)/sizeof(minus_unicode_bytes[0])) { + slash_insert(slash, '-'); + slash_refresh(slash, 0); + mightbeminus = 0; + } + continue; + } + if (!in_quote((unsigned char)c, quote) && (c & 0x80) != 0) { + printf(" Got non-ascii character 0x%02x outside of quotes, ignoring line\n", c&0xFF); - /* Discard the rest of the line */ - do { c = slash_getchar(slash); } - while (c != '\n' && c != '\r' && c >= 0); + /* Discard the rest of the line */ + do { c = slash_getchar(slash); } + while (c != '\n' && c != '\r' && c >= 0); - slash_reset(slash); - done = true; - break; - } + slash_reset(slash); + done = true; + break; } mightbeminus = 0; From 51c9c6a5ca804052179b373f7bc6c530d46a572b Mon Sep 17 00:00:00 2001 From: Jean-Baptiste Lab Date: Wed, 5 Nov 2025 09:08:33 +0100 Subject: [PATCH 4/4] Trim leading white spaces before looking up commands This allows typing " cmd" instead of "cmd" and not being thrown out with an "Unknown command". Update code comments after handling of "#" --- src/slash.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/src/slash.c b/src/slash.c index c605a9c..fbb581f 100644 --- a/src/slash.c +++ b/src/slash.c @@ -437,7 +437,7 @@ static bool in_quote(unsigned char c, bool quote[3]) { static bool has_unicode(struct slash *slash, char *line) { bool result = false; - bool quote[3] = { false, false, false }; /* Index 0 represent double quote, index 1 represent single quote */ + bool quote[3] = { false, false, false }; /* Index 0 represents double quote, index 1 represents single quote, index 2 represents a comment */ int mightbeminus = 0; for (unsigned char *c = (unsigned char *)line; *c != '\0'; c++) { if(has_bad_unicode(slash, c, in_quote(*c, quote), &mightbeminus)) { @@ -449,7 +449,7 @@ static bool has_unicode(struct slash *slash, char *line) { } static void strip_comment(char *line) { - bool quote[3] = { false, false, false }; /* Index 0 represent double quote, index 1 represent single quote */ + bool quote[3] = { false, false, false }; /* Index 0 represents double quote, index 1 represents single quote, index 2 represents a comment */ for (int i = 1; line[i] != '\0'; i++) { if(!in_quote(line[i-1], quote) && line[i] == '#') { line[i] = '\0'; @@ -458,8 +458,9 @@ static void strip_comment(char *line) { } } -int slash_execute(struct slash *slash, char *line) +int slash_execute(struct slash *slash, char *org_line) { + char *line = org_line; if (has_unicode(slash, line)) { return EINVAL; } @@ -475,7 +476,9 @@ int slash_execute(struct slash *slash, char *line) if (line[0] == '#') { return SLASH_SUCCESS; } - + /* Skip heading white spaces */ + while (*line && isspace((unsigned int) *line)) + line++; strip_comment(line); if(NULL != slash_process_cmd_line_hook) { @@ -930,7 +933,7 @@ char *slash_readline(struct slash *slash) char *ret = slash->buffer; int c, esc[3]; bool done = false, escaped = false; - bool quote[3] = { false, false, false }; /* Index 0 represent double quote, index 1 represent single quote */ + bool quote[3] = { false, false, false }; /* Index 0 represents double quote, index 1 represents single quote, index 2 represents a comment */ int mightbeminus = 0; /* Reset buffer */