1
1
#include "tree_sitter/alloc.h"
2
+ #include "tree_sitter/array.h"
2
3
#include "tree_sitter/parser.h"
3
4
#include <ctype.h>
4
5
#include <wctype.h>
@@ -13,10 +14,12 @@ enum TokenType {
13
14
END_OF_STATEMENT ,
14
15
PREPROC_UNARY_OPERATOR ,
15
16
HOLLERITH_CONSTANT ,
17
+ MACRO_IDENTIFIER ,
16
18
};
17
19
18
20
typedef struct {
19
21
bool in_line_continuation ;
22
+ Array (char * ) MacroIdentifiers ;
20
23
} Scanner ;
21
24
22
25
typedef enum {
@@ -301,31 +304,44 @@ static bool scan_end_line_continuation(Scanner *scanner, TSLexer *lexer) {
301
304
return true;
302
305
}
303
306
304
- static bool scan_string_literal_kind (TSLexer * lexer ) {
305
- // Strictly, it's allowed for the kind to be an integer literal, in
306
- // practice I've not seen it
307
+ typedef Array (char ) String ;
308
+
309
+ // Returns NULL on error, otherwise an allocated char array for an identifier
310
+ static String * scan_identifier (TSLexer * lexer ) {
307
311
if (!iswalpha (lexer -> lookahead )) {
308
- return false ;
312
+ return NULL ;
309
313
}
310
-
311
- lexer -> result_symbol = STRING_LITERAL_KIND ;
312
-
313
- // We need two characters of lookahead to see `_"`
314
- char current_char = '\0' ;
315
-
314
+ String * possible_identifier = ts_calloc (1 , sizeof (String ));
316
315
while (is_identifier_char (lexer -> lookahead ) && !lexer -> eof (lexer )) {
317
- current_char = lexer -> lookahead ;
318
- // Don't capture the trailing underscore as part of the kind identifier
319
- if (lexer -> lookahead == '_' ) {
320
- lexer -> mark_end (lexer );
321
- }
322
- advance (lexer );
316
+ array_push (possible_identifier , lexer -> lookahead );
317
+ // Don't capture the trailing underscore as part of the kind identifier
318
+ // If another user of this function wants to mark the end again after
319
+ // the identifier they're free to do so
320
+ if (lexer -> lookahead == '_' ) {
321
+ lexer -> mark_end (lexer );
322
+ }
323
+ advance (lexer );
324
+ }
325
+ if (possible_identifier -> size == 0 ) {
326
+ array_delete (possible_identifier );
327
+ ts_free (possible_identifier );
328
+ return NULL ;
329
+ }
330
+ return possible_identifier ;
331
+ }
332
+
333
+ static bool scan_string_literal_kind (TSLexer * lexer , String * identifier ) {
334
+ if (identifier -> size == 0 ) {
335
+ return false;
323
336
}
324
337
325
- if ((current_char != '_' ) || (lexer -> lookahead != '"' && lexer -> lookahead != '\'' )) {
338
+ char last_char = identifier -> contents [identifier -> size - 1 ];
339
+ if ((last_char != '_' ) ||
340
+ (lexer -> lookahead != '"' && lexer -> lookahead != '\'' )) {
326
341
return false;
327
342
}
328
343
344
+ lexer -> result_symbol = STRING_LITERAL_KIND ;
329
345
return true;
330
346
}
331
347
@@ -393,6 +409,28 @@ static bool scan_string_literal(TSLexer *lexer) {
393
409
return false;
394
410
}
395
411
412
+ static bool scan_macro_identifier (Scanner * scanner , TSLexer * lexer ,
413
+ String * identifier ) {
414
+ unsigned num_macro_ids = scanner -> MacroIdentifiers .size ;
415
+ if (num_macro_ids == 0 ) {
416
+ return false;
417
+ }
418
+
419
+ for (size_t i = 0 , end = scanner -> MacroIdentifiers .size ; i < end ; ++ i ) {
420
+ char * macro_id = * array_get (& scanner -> MacroIdentifiers , i );
421
+ unsigned macro_id_len = strlen (macro_id );
422
+ if (identifier -> size != macro_id_len ) {
423
+ continue ;
424
+ }
425
+ if (strncmp (macro_id , identifier -> contents , identifier -> size ) == 0 ) {
426
+ lexer -> mark_end (lexer );
427
+ lexer -> result_symbol = MACRO_IDENTIFIER ;
428
+ return true;
429
+ }
430
+ }
431
+ return false;
432
+ }
433
+
396
434
/// Need an external scanner to catch '!' before its parsed as a comment
397
435
static bool scan_preproc_unary_operator (TSLexer * lexer ) {
398
436
const char next_char = lexer -> lookahead ;
@@ -467,19 +505,50 @@ static bool scan(Scanner *scanner, TSLexer *lexer, const bool *valid_symbols) {
467
505
return true;
468
506
}
469
507
470
- if (valid_symbols [STRING_LITERAL_KIND ]) {
508
+ // These symbols both scan for an identifier, we need to combine the logic
509
+ // and they always need to be the last to look for since we can't backtrack
510
+ if (valid_symbols [STRING_LITERAL_KIND ] || valid_symbols [MACRO_IDENTIFIER ]) {
511
+ String * identifier = scan_identifier (lexer );
512
+ bool identifier_result = false;
471
513
// This may need a lot of lookahead, so should (probably) always
472
514
// be the last token to look for
473
- if (scan_string_literal_kind (lexer )) {
515
+ if (identifier && valid_symbols [STRING_LITERAL_KIND ]) {
516
+ if (scan_string_literal_kind (lexer , identifier )) {
517
+ identifier_result = true;
518
+ }
519
+ }
520
+ if (!identifier_result && identifier && valid_symbols [MACRO_IDENTIFIER ]) {
521
+ if (scan_macro_identifier (scanner , lexer , identifier )) {
522
+ identifier_result = true;
523
+ }
524
+ }
525
+ if (identifier ) {
526
+ ts_free (identifier );
527
+ }
528
+ if (identifier_result ) {
474
529
return true;
475
530
}
476
531
}
477
-
478
532
return false;
479
533
}
480
534
481
535
void * tree_sitter_fortran_external_scanner_create () {
482
- return ts_calloc (1 , sizeof (bool ));
536
+ Scanner * result = (Scanner * )ts_calloc (1 , sizeof (Scanner ));
537
+ char * macro_ids = getenv ("CODEE_TS_MACRO_IDS" );
538
+ if (!macro_ids ) {
539
+ return result ;
540
+ }
541
+ char * macro_id = strtok (macro_ids , ":" );
542
+ Array (char * ) * macroIdsResult = & result -> MacroIdentifiers ;
543
+ while (macro_id ) {
544
+ int length = strlen (macro_id );
545
+ char * new_str = (char * )ts_malloc ((length + 1 ) * sizeof (char ));
546
+ strncpy (new_str , macro_id , length );
547
+ array_push (macroIdsResult , new_str );
548
+ // Keep splitting
549
+ macro_id = strtok (NULL , ":" );
550
+ }
551
+ return result ;
483
552
}
484
553
485
554
bool tree_sitter_fortran_external_scanner_scan (void * payload , TSLexer * lexer ,
@@ -491,20 +560,27 @@ bool tree_sitter_fortran_external_scanner_scan(void *payload, TSLexer *lexer,
491
560
unsigned tree_sitter_fortran_external_scanner_serialize (void * payload ,
492
561
char * buffer ) {
493
562
Scanner * scanner = (Scanner * )payload ;
494
- buffer [0 ] = (char )scanner -> in_line_continuation ;
495
- return 1 ;
563
+ unsigned size = sizeof (* scanner );
564
+ memcpy (buffer , scanner , size );
565
+ return size ;
496
566
}
497
567
498
568
void tree_sitter_fortran_external_scanner_deserialize (void * payload ,
499
569
const char * buffer ,
500
570
unsigned length ) {
501
571
Scanner * scanner = (Scanner * )payload ;
502
572
if (length > 0 ) {
503
- scanner -> in_line_continuation = buffer [0 ];
573
+ unsigned size = sizeof (* scanner );
574
+ memcpy (scanner , buffer , size );
504
575
}
505
576
}
506
577
507
578
void tree_sitter_fortran_external_scanner_destroy (void * payload ) {
508
579
Scanner * scanner = (Scanner * )payload ;
580
+ for (size_t i = 0 , end = scanner -> MacroIdentifiers .size ; i < end ; ++ i ) {
581
+ char * str = * array_get (& scanner -> MacroIdentifiers , i );
582
+ ts_free (str );
583
+ }
584
+ array_delete (& scanner -> MacroIdentifiers );
509
585
ts_free (scanner );
510
586
}
0 commit comments