Skip to content

Commit f3e4b55

Browse files
committed
Allow to parse macro identifiers in variable decls
1 parent 18000b1 commit f3e4b55

File tree

2 files changed

+102
-24
lines changed

2 files changed

+102
-24
lines changed

grammar.js

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,7 @@ module.exports = grammar({
6767
$._external_end_of_statement,
6868
$._preproc_unary_operator,
6969
$.hollerith_constant,
70+
$.macro_identifier,
7071
],
7172

7273
extras: $ => [
@@ -870,6 +871,7 @@ module.exports = grammar({
870871
$.derived_type,
871872
alias($.procedure_declaration, $.procedure),
872873
$.declared_type,
874+
$.macro_identifier,
873875
)),
874876
optional(seq(',',
875877
commaSep1(

src/scanner.c

Lines changed: 100 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
#include "tree_sitter/alloc.h"
2+
#include "tree_sitter/array.h"
23
#include "tree_sitter/parser.h"
34
#include <ctype.h>
45
#include <wctype.h>
@@ -13,10 +14,12 @@ enum TokenType {
1314
END_OF_STATEMENT,
1415
PREPROC_UNARY_OPERATOR,
1516
HOLLERITH_CONSTANT,
17+
MACRO_IDENTIFIER,
1618
};
1719

1820
typedef struct {
1921
bool in_line_continuation;
22+
Array(char *) MacroIdentifiers;
2023
} Scanner;
2124

2225
typedef enum {
@@ -301,31 +304,44 @@ static bool scan_end_line_continuation(Scanner *scanner, TSLexer *lexer) {
301304
return true;
302305
}
303306

304-
static bool scan_string_literal_kind(TSLexer *lexer) {
305-
// Strictly, it's allowed for the kind to be an integer literal, in
306-
// practice I've not seen it
307+
typedef Array(char) String;
308+
309+
// Returns NULL on error, otherwise an allocated char array for an identifier
310+
static String *scan_identifier(TSLexer *lexer) {
307311
if (!iswalpha(lexer->lookahead)) {
308-
return false;
312+
return NULL;
309313
}
310-
311-
lexer->result_symbol = STRING_LITERAL_KIND;
312-
313-
// We need two characters of lookahead to see `_"`
314-
char current_char = '\0';
315-
314+
String *possible_identifier = ts_calloc(1, sizeof(String));
316315
while (is_identifier_char(lexer->lookahead) && !lexer->eof(lexer)) {
317-
current_char = lexer->lookahead;
318-
// Don't capture the trailing underscore as part of the kind identifier
319-
if (lexer->lookahead == '_') {
320-
lexer->mark_end(lexer);
321-
}
322-
advance(lexer);
316+
array_push(possible_identifier, lexer->lookahead);
317+
// Don't capture the trailing underscore as part of the kind identifier
318+
// If another user of this function wants to mark the end again after
319+
// the identifier they're free to do so
320+
if (lexer->lookahead == '_') {
321+
lexer->mark_end(lexer);
322+
}
323+
advance(lexer);
324+
}
325+
if (possible_identifier->size == 0) {
326+
array_delete(possible_identifier);
327+
ts_free(possible_identifier);
328+
return NULL;
329+
}
330+
return possible_identifier;
331+
}
332+
333+
static bool scan_string_literal_kind(TSLexer *lexer, String *identifier) {
334+
if (identifier->size == 0) {
335+
return false;
323336
}
324337

325-
if ((current_char != '_') || (lexer->lookahead != '"' && lexer->lookahead != '\'')) {
338+
char last_char = identifier->contents[identifier->size - 1];
339+
if ((last_char != '_') ||
340+
(lexer->lookahead != '"' && lexer->lookahead != '\'')) {
326341
return false;
327342
}
328343

344+
lexer->result_symbol = STRING_LITERAL_KIND;
329345
return true;
330346
}
331347

@@ -393,6 +409,28 @@ static bool scan_string_literal(TSLexer *lexer) {
393409
return false;
394410
}
395411

412+
static bool scan_macro_identifier(Scanner *scanner, TSLexer *lexer,
413+
String *identifier) {
414+
unsigned num_macro_ids = scanner->MacroIdentifiers.size;
415+
if (num_macro_ids == 0) {
416+
return false;
417+
}
418+
419+
for (size_t i = 0, end = scanner->MacroIdentifiers.size; i < end; ++i) {
420+
char *macro_id = *array_get(&scanner->MacroIdentifiers, i);
421+
unsigned macro_id_len = strlen(macro_id);
422+
if (identifier->size != macro_id_len) {
423+
continue;
424+
}
425+
if (strncmp(macro_id, identifier->contents, identifier->size) == 0) {
426+
lexer->mark_end(lexer);
427+
lexer->result_symbol = MACRO_IDENTIFIER;
428+
return true;
429+
}
430+
}
431+
return false;
432+
}
433+
396434
/// Need an external scanner to catch '!' before its parsed as a comment
397435
static bool scan_preproc_unary_operator(TSLexer *lexer) {
398436
const char next_char = lexer->lookahead;
@@ -467,19 +505,50 @@ static bool scan(Scanner *scanner, TSLexer *lexer, const bool *valid_symbols) {
467505
return true;
468506
}
469507

470-
if (valid_symbols[STRING_LITERAL_KIND]) {
508+
// These symbols both scan for an identifier, we need to combine the logic
509+
// and they always need to be the last to look for since we can't backtrack
510+
if (valid_symbols[STRING_LITERAL_KIND] || valid_symbols[MACRO_IDENTIFIER]) {
511+
String *identifier = scan_identifier(lexer);
512+
bool identifier_result = false;
471513
// This may need a lot of lookahead, so should (probably) always
472514
// be the last token to look for
473-
if (scan_string_literal_kind(lexer)) {
515+
if (identifier && valid_symbols[STRING_LITERAL_KIND]) {
516+
if (scan_string_literal_kind(lexer, identifier)) {
517+
identifier_result = true;
518+
}
519+
}
520+
if (!identifier_result && identifier && valid_symbols[MACRO_IDENTIFIER]) {
521+
if (scan_macro_identifier(scanner, lexer, identifier)) {
522+
identifier_result = true;
523+
}
524+
}
525+
if (identifier) {
526+
ts_free(identifier);
527+
}
528+
if (identifier_result) {
474529
return true;
475530
}
476531
}
477-
478532
return false;
479533
}
480534

481535
void *tree_sitter_fortran_external_scanner_create() {
482-
return ts_calloc(1, sizeof(bool));
536+
Scanner *result = (Scanner *)ts_calloc(1, sizeof(Scanner));
537+
char *macro_ids = getenv("CODEE_TS_MACRO_IDS");
538+
if (!macro_ids) {
539+
return result;
540+
}
541+
char *macro_id = strtok(macro_ids, ":");
542+
Array(char *) *macroIdsResult = &result->MacroIdentifiers;
543+
while (macro_id) {
544+
int length = strlen(macro_id);
545+
char *new_str = (char *)ts_malloc((length + 1) * sizeof(char));
546+
strncpy(new_str, macro_id, length);
547+
array_push(macroIdsResult, new_str);
548+
// Keep splitting
549+
macro_id = strtok(NULL, ":");
550+
}
551+
return result;
483552
}
484553

485554
bool tree_sitter_fortran_external_scanner_scan(void *payload, TSLexer *lexer,
@@ -491,20 +560,27 @@ bool tree_sitter_fortran_external_scanner_scan(void *payload, TSLexer *lexer,
491560
unsigned tree_sitter_fortran_external_scanner_serialize(void *payload,
492561
char *buffer) {
493562
Scanner *scanner = (Scanner *)payload;
494-
buffer[0] = (char)scanner->in_line_continuation;
495-
return 1;
563+
unsigned size = sizeof(*scanner);
564+
memcpy(buffer, scanner, size);
565+
return size;
496566
}
497567

498568
void tree_sitter_fortran_external_scanner_deserialize(void *payload,
499569
const char *buffer,
500570
unsigned length) {
501571
Scanner *scanner = (Scanner *)payload;
502572
if (length > 0) {
503-
scanner->in_line_continuation = buffer[0];
573+
unsigned size = sizeof(*scanner);
574+
memcpy(scanner, buffer, size);
504575
}
505576
}
506577

507578
void tree_sitter_fortran_external_scanner_destroy(void *payload) {
508579
Scanner *scanner = (Scanner *)payload;
580+
for (size_t i = 0, end = scanner->MacroIdentifiers.size; i < end; ++i) {
581+
char *str = *array_get(&scanner->MacroIdentifiers, i);
582+
ts_free(str);
583+
}
584+
array_delete(&scanner->MacroIdentifiers);
509585
ts_free(scanner);
510586
}

0 commit comments

Comments
 (0)