diff --git a/src/slash.c b/src/slash.c index d68f028..fbb581f 100644 --- a/src/slash.c +++ b/src/slash.c @@ -403,8 +403,67 @@ __attribute__((weak)) int slash_prompt(struct slash *slash) { slash_process_cmd_line_hook_t slash_process_cmd_line_hook = NULL; -int slash_execute(struct slash *slash, char *line) +static const int minus_unicode_bytes[] = { 0xE2, 0x88, 0x92 }; + +static bool has_bad_unicode (struct slash *slash, unsigned char *c, bool in_quotes, int *mightbeminus) { + /* Check for non-ASCII characters outside quotes */ + if ((*c & 0x80) != 0 && !in_quotes) { + if (*c == minus_unicode_bytes[*mightbeminus]) { + *mightbeminus = *mightbeminus + 1; + if (*mightbeminus == sizeof(minus_unicode_bytes)/sizeof(minus_unicode_bytes[0])) { + /* Yep, it's a minus sign */ + *c = '-'; + memmove(c-2, c, strlen(c)+1); + *mightbeminus = 0; + } + return false; + } else { + *mightbeminus = 0; + printf(" Got non-ascii character 0x%02x outside of quotes, ignoring line\n", *c&0xFF); + return true; + } + } else { + *mightbeminus = 0; + } + return false; +} + +static bool in_quote(unsigned char c, bool quote[3]) { + if (c == '\"' && !quote[0]) quote[1] = !quote[1]; + else if (c == '\'' && !quote[1]) quote[0] = !quote[0]; + else if (c == '#' && !quote[0] && !quote[1]) quote[2] = true; + return quote[0] || quote[1] || quote[2]; +} + +static bool has_unicode(struct slash *slash, char *line) { + bool result = false; + bool quote[3] = { false, false, false }; /* Index 0 represents double quote, index 1 represents single quote, index 2 represents a comment */ + int mightbeminus = 0; + for (unsigned char *c = (unsigned char *)line; *c != '\0'; c++) { + if(has_bad_unicode(slash, c, in_quote(*c, quote), &mightbeminus)) { + result = true; + break; + } + } + return result; +} + +static void strip_comment(char *line) { + bool quote[3] = { false, false, false }; /* Index 0 represents double quote, index 1 represents single quote, index 2 represents a comment */ + for (int i = 1; line[i] != '\0'; i++) { + if(!in_quote(line[i-1], quote) && line[i] == '#') { + line[i] = '\0'; + return; + } + } +} + +int slash_execute(struct slash *slash, char *org_line) { + char *line = org_line; + if (has_unicode(slash, line)) { + return EINVAL; + } struct slash_command *command; char *args, *argv[SLASH_ARG_MAX]; char *processed_cmd_line = NULL, *line_to_use; @@ -417,6 +476,10 @@ int slash_execute(struct slash *slash, char *line) if (line[0] == '#') { return SLASH_SUCCESS; } + /* Skip heading white spaces */ + while (*line && isspace((unsigned int) *line)) + line++; + strip_comment(line); if(NULL != slash_process_cmd_line_hook) { processed_cmd_line = slash_process_cmd_line_hook(line); @@ -870,6 +933,8 @@ char *slash_readline(struct slash *slash) char *ret = slash->buffer; int c, esc[3]; bool done = false, escaped = false; + bool quote[3] = { false, false, false }; /* Index 0 represents double quote, index 1 represents single quote, index 2 represents a comment */ + int mightbeminus = 0; /* Reset buffer */ slash_reset(slash); @@ -999,9 +1064,35 @@ char *slash_readline(struct slash *slash) /* Unknown control */ break; } - } else if (isprint(c)) { - /* Add to buffer */ - slash_insert(slash, c); + } else { + /* Check for non-ASCII characters outside quotes */ + if (c == minus_unicode_bytes[mightbeminus]) { + mightbeminus++; + if (mightbeminus == sizeof(minus_unicode_bytes)/sizeof(minus_unicode_bytes[0])) { + slash_insert(slash, '-'); + slash_refresh(slash, 0); + mightbeminus = 0; + } + continue; + } + if (!in_quote((unsigned char)c, quote) && (c & 0x80) != 0) { + printf(" Got non-ascii character 0x%02x outside of quotes, ignoring line\n", c&0xFF); + + /* Discard the rest of the line */ + do { c = slash_getchar(slash); } + while (c != '\n' && c != '\r' && c >= 0); + + slash_reset(slash); + done = true; + break; + } + mightbeminus = 0; + + /* If inside a quote, unrecognised characters are ignored */ + if (isprint(c)) { + /* Add to buffer */ + slash_insert(slash, c); + } } slash->last_char = c;