Skip to content

Commit 1e7f87b

Browse files
committed
Implement goto and label statement support
Enable parsing and analysis of C labels and goto so the compiler can accept common patterns (for example error-handling cleanup) and be more compatible with real-world C code. Provide diagnostics for duplicate, undefined, and unused labels, and warn when a goto can bypass a variable's initialization. Close #280
1 parent c4f778e commit 1e7f87b

File tree

6 files changed

+224
-2
lines changed

6 files changed

+224
-2
lines changed

COMPLIANCE.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ This document tracks compliance gaps and non-standard behaviors.
1717

1818
### Control Flow
1919
- `if`/`else` statements
20+
- `goto` and label statements
2021
- `while`, `do-while`, `for` loops
2122
- `switch`/`case`/`default` statements
2223
- `break`, `continue`, `return` statements
@@ -96,7 +97,6 @@ This document tracks compliance gaps and non-standard behaviors.
9697

9798
| Feature | Status | Description |
9899
|---------|--------|-------------|
99-
| `goto` and labels | Missing | No arbitrary jumps |
100100
| Designated initializers | Missing | No `.field = value` syntax |
101101
| Compound literals | Partial | Limited support |
102102
| Flexible array members | Missing | No `[]` at struct end |
@@ -115,6 +115,7 @@ This document tracks compliance gaps and non-standard behaviors.
115115
- Escape sequence: `\e` for ESC character
116116
- `void*` arithmetic (treated as `char*`)
117117
- `sizeof(void)` returns 0 (should be error)
118+
- Computed goto
118119

119120
### Implementation-Specific
120121
- Array compound literals in scalar context use first element

src/defs.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
#define MAX_LOCALS 1600
2121
#define MAX_FIELDS 64
2222
#define MAX_TYPES 256
23+
#define MAX_LABELS 256
2324
#define MAX_IR_INSTR 80000
2425
#define MAX_BB_PRED 128
2526
#define MAX_BB_DOM_SUCC 64
@@ -179,6 +180,7 @@ typedef enum {
179180
T_break,
180181
T_default,
181182
T_continue,
183+
T_goto,
182184
T_const, /* const qualifier */
183185
/* C pre-processor directives */
184186
T_cppd_include,
@@ -270,6 +272,7 @@ typedef enum {
270272
OP_branch, /* conditional jump */
271273
OP_jump, /* unconditional jump */
272274
OP_func_ret, /* returned value */
275+
OP_label, /* for goto label */
273276

274277
/* function pointer */
275278
OP_address_of_func, /* resolve function entry */
@@ -567,6 +570,13 @@ struct ref_block {
567570
* type, parameters) with SSA-related information (e.g., basic blocks, control
568571
* flow) to support parsing, analysis, optimization, and code generation.
569572
*/
573+
574+
typedef struct {
575+
char label_name[MAX_ID_LEN];
576+
basic_block_t *bb;
577+
bool used;
578+
} label_t;
579+
570580
struct func {
571581
/* Syntatic info */
572582
var_t return_def;

src/globals.c

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1481,6 +1481,14 @@ void dump_bb_insn(func_t *func, basic_block_t *bb, bool *at_func_start)
14811481
printf("br %%%s, %s, %s", rs1->var_name, bb->then_->bb_label_name,
14821482
bb->else_->bb_label_name);
14831483
break;
1484+
case OP_jump:
1485+
print_indent(1);
1486+
printf("jmp %s", bb->next->bb_label_name);
1487+
break;
1488+
case OP_label:
1489+
print_indent(0);
1490+
printf("%s:", insn->str);
1491+
break;
14841492
case OP_push:
14851493
print_indent(1);
14861494
printf("push %%%s", rs1->var_name);

src/lexer.c

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212

1313
/* Hash table constants */
1414
#define NUM_DIRECTIVES 11
15-
#define NUM_KEYWORDS 17
15+
#define NUM_KEYWORDS 18
1616

1717
/* Token mapping structure for elegant initialization */
1818
typedef struct {
@@ -85,6 +85,7 @@ void lex_init_keywords()
8585
{"break", T_break},
8686
{"default", T_default},
8787
{"continue", T_continue},
88+
{"goto", T_goto},
8889
{"union", T_union},
8990
{"const", T_const},
9091
};
@@ -786,6 +787,8 @@ token_t lex_token_impl(bool aliasing)
786787
keyword = T_enum;
787788
} else if (!memcmp(token_str, "case", 4))
788789
keyword = T_case;
790+
else if (!memcmp(token_str, "goto", 4))
791+
keyword = T_goto;
789792
break;
790793

791794
case 5: /* 5-letter keywords: while, break, union, const */

src/parser.c

Lines changed: 117 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,12 @@ int break_exit_idx = 0;
2626
basic_block_t *continue_bb[MAX_NESTING];
2727
int continue_pos_idx = 0;
2828

29+
/* Label utilities */
30+
label_t labels[MAX_LABELS];
31+
int label_idx = 0;
32+
basic_block_t *backpatch_bb[MAX_LABELS];
33+
int backpatch_bb_idx = 0;
34+
2935
/* stack of the operands of 3AC */
3036
var_t *operand_stack[MAX_OPERAND_STACK_SIZE];
3137
int operand_stack_idx = 0;
@@ -40,6 +46,23 @@ void parse_array_init(var_t *var,
4046
basic_block_t **bb,
4147
bool emit_code);
4248

49+
50+
label_t *find_label(char *name)
51+
{
52+
for (int i = 0; i < label_idx; i++) {
53+
if (!strcmp(name, labels[i].label_name))
54+
return &labels[i];
55+
}
56+
return NULL;
57+
}
58+
59+
void add_label(char *name, basic_block_t *bb)
60+
{
61+
label_t *l = &labels[label_idx++];
62+
strncpy(l->label_name, name, MAX_ID_LEN);
63+
l->bb = bb;
64+
}
65+
4366
char *gen_name_to(char *buf)
4467
{
4568
sprintf(buf, ".t%d", global_var_idx++);
@@ -997,6 +1020,59 @@ basic_block_t *handle_while_statement(block_t *parent, basic_block_t *bb)
9971020
return else_;
9981021
}
9991022

1023+
basic_block_t *handle_goto_statement(block_t *parent, basic_block_t *bb)
1024+
{
1025+
/* Since a goto splits the current program into two basic blocks and makes
1026+
* the subsequent basic block unreachable, this causes problems for later
1027+
* CFG operations. Therefore, we create a fake if that always executes to
1028+
* wrap the goto, and connect the unreachable basic block to the else
1029+
* branch. Finally, return this else block.
1030+
*
1031+
* after:
1032+
* code1;
1033+
* goto label;
1034+
* code2;
1035+
*
1036+
* before:
1037+
* code1;
1038+
* if (1) goto label;
1039+
* code2;
1040+
*/
1041+
1042+
char token[MAX_ID_LEN];
1043+
if (!lex_peek(T_identifier, token)) {
1044+
error("Expected identifier after 'goto'");
1045+
return NULL;
1046+
}
1047+
1048+
lex_expect(T_identifier);
1049+
lex_expect(T_semicolon);
1050+
1051+
basic_block_t *fake_if = bb_create(parent);
1052+
bb_connect(bb, fake_if, NEXT);
1053+
var_t *val = require_var(parent);
1054+
gen_name_to(val->var_name);
1055+
val->init_val = 1;
1056+
add_insn(parent, fake_if, OP_load_constant, val, NULL, NULL, 0, NULL);
1057+
add_insn(parent, fake_if, OP_branch, NULL, val, NULL, 0, NULL);
1058+
1059+
basic_block_t *then_ = bb_create(parent);
1060+
basic_block_t *else_ = bb_create(parent);
1061+
bb_connect(fake_if, then_, THEN);
1062+
bb_connect(fake_if, else_, ELSE);
1063+
1064+
add_insn(parent, then_, OP_jump, NULL, NULL, NULL, 0, token);
1065+
label_t *label = find_label(token);
1066+
if (label) {
1067+
label->used = true;
1068+
bb_connect(then_, label->bb, NEXT);
1069+
return else_;
1070+
}
1071+
1072+
backpatch_bb[backpatch_bb_idx++] = then_;
1073+
return else_;
1074+
}
1075+
10001076
basic_block_t *handle_struct_variable_decl(block_t *parent,
10011077
basic_block_t *bb,
10021078
char *token)
@@ -4169,6 +4245,9 @@ basic_block_t *read_body_statement(block_t *parent, basic_block_t *bb)
41694245
return do_while_end;
41704246
}
41714247

4248+
if (lex_accept(T_goto))
4249+
return handle_goto_statement(parent, bb);
4250+
41724251
/* empty statement */
41734252
if (lex_accept(T_semicolon))
41744253
return bb;
@@ -4753,6 +4832,22 @@ basic_block_t *read_body_statement(block_t *parent, basic_block_t *bb)
47534832
return bb;
47544833
}
47554834

4835+
if (lex_peek(T_identifier, token)) {
4836+
lex_accept(T_identifier);
4837+
if (lex_accept(T_colon)) {
4838+
label_t *l = find_label(token);
4839+
if (l) {
4840+
error("label redefinition");
4841+
return NULL;
4842+
}
4843+
basic_block_t *n = bb_create(parent);
4844+
bb_connect(bb, n, NEXT);
4845+
add_label(token, n);
4846+
add_insn(parent, n, OP_label, NULL, NULL, NULL, 0, token);
4847+
return n;
4848+
}
4849+
}
4850+
47564851
error("Unrecognized statement token");
47574852
return NULL;
47584853
}
@@ -4794,6 +4889,28 @@ void read_func_body(func_t *func)
47944889
basic_block_t *body = read_code_block(func, NULL, NULL, func->bbs);
47954890
if (body)
47964891
bb_connect(body, func->exit, NEXT);
4892+
4893+
for (int i = 0; i < backpatch_bb_idx; i++) {
4894+
basic_block_t *bb = backpatch_bb[i];
4895+
insn_t *g = bb->insn_list.tail;
4896+
label_t *label = find_label(g->str);
4897+
if (!label)
4898+
error("goto label undefined");
4899+
4900+
label->used = true;
4901+
bb_connect(bb, label->bb, NEXT);
4902+
}
4903+
4904+
for (int i = 0; i < label_idx; i++) {
4905+
label_t *label = &labels[i];
4906+
if (label->used)
4907+
continue
4908+
4909+
printf("Warning: unused label %s\n", label->label_name);
4910+
}
4911+
4912+
backpatch_bb_idx = 0;
4913+
label_idx = 0;
47974914
}
47984915

47994916
/* if first token is type */

src/ssa.c

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -929,6 +929,80 @@ void unwind_phi(void)
929929
}
930930
}
931931

932+
bool is_dominate(basic_block_t *pred, basic_block_t *succ)
933+
{
934+
int i;
935+
bool found = false;
936+
for (i = 0; i < MAX_BB_DOM_SUCC; i++) {
937+
if (!pred->dom_next[i])
938+
break;
939+
if (pred->dom_next[i] == succ) {
940+
found = true;
941+
break;
942+
}
943+
found |= is_dominate(pred->dom_next[i], succ);
944+
}
945+
946+
return found;
947+
}
948+
949+
void bb_check_var_cross_init(func_t *func, basic_block_t *bb)
950+
{
951+
UNUSED(func);
952+
953+
/*
954+
* For any variable, the basic block that defines it must dominate all the
955+
* basic blocks where it is used; otherwise, it is an invalid cross-block
956+
* initialization.
957+
*/
958+
959+
for (insn_t *insn = bb->insn_list.head; insn; insn = insn->next) {
960+
if (insn->opcode != OP_allocat)
961+
continue;
962+
963+
var_t *var = insn->rd;
964+
ref_block_t *ref;
965+
for (ref = var->ref_block_list.head; ref; ref = ref->next) {
966+
if (ref->bb == bb)
967+
continue;
968+
969+
if (!is_dominate(bb, ref->bb))
970+
printf("Warning: Variable '%s' cross-initialized\n",
971+
var->var_name);
972+
}
973+
}
974+
}
975+
976+
void check_var_cross_init()
977+
{
978+
/**
979+
* A variable's initialization lives in a basic block that does not dominate
980+
* all of its uses, so control flow can reach a use without first passing
981+
* through its initialization (i.e., a possibly-uninitialized use).
982+
*
983+
* For Example:
984+
* goto label; // Jumps directly to 'label', skipping the
985+
* declaration below if (1) { int x; // This line is never
986+
* executed when 'goto' is taken label: x = 5; // Uses 'x'
987+
* after its declaration was bypassed
988+
* }
989+
*/
990+
991+
bb_traversal_args_t *args = arena_alloc_traversal_args();
992+
for (func_t *func = FUNC_LIST.head; func; func = func->next) {
993+
/* Skip function declarations without bodies */
994+
if (!func->bbs)
995+
continue;
996+
997+
args->func = func;
998+
args->bb = func->bbs;
999+
1000+
func->visited++;
1001+
args->postorder_cb = bb_check_var_cross_init;
1002+
bb_forward_traversal(args);
1003+
}
1004+
}
1005+
9321006
#ifdef __SHECC__
9331007
#else
9341008
void bb_dump_connection(FILE *fd,
@@ -1112,6 +1186,12 @@ void bb_dump(FILE *fd, func_t *func, basic_block_t *bb)
11121186
sprintf(str, "<BRANCH %s<SUB>%d</SUB>>", insn->rs1->var_name,
11131187
insn->rs1->subscript);
11141188
break;
1189+
case OP_jump:
1190+
sprintf(str, "<JUMP>");
1191+
break;
1192+
case OP_label:
1193+
sprintf(str, "<LABEL>");
1194+
break;
11151195
case OP_push:
11161196
sprintf(str, "<PUSH %s<SUB>%d</SUB>>", insn->rs1->var_name,
11171197
insn->rs1->subscript);
@@ -1281,6 +1361,9 @@ void ssa_build(void)
12811361
build_df();
12821362

12831363
solve_globals();
1364+
1365+
check_var_cross_init();
1366+
12841367
solve_phi_insertion();
12851368
solve_phi_params();
12861369

0 commit comments

Comments
 (0)