Skip to content

Commit e0871d0

Browse files
committed
Implement goto and label statement support
Enable parsing and analysis of C labels and goto so the compiler can accept common patterns (for example error-handling cleanup) and be more compatible with real-world C code. Provide diagnostics for duplicate, undefined, and unused labels, and warn when a goto can bypass a variable's initialization. Close #280
1 parent c4f778e commit e0871d0

File tree

6 files changed

+220
-2
lines changed

6 files changed

+220
-2
lines changed

COMPLIANCE.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ This document tracks compliance gaps and non-standard behaviors.
1717

1818
### Control Flow
1919
- `if`/`else` statements
20+
- `goto` and label statements
2021
- `while`, `do-while`, `for` loops
2122
- `switch`/`case`/`default` statements
2223
- `break`, `continue`, `return` statements
@@ -96,7 +97,6 @@ This document tracks compliance gaps and non-standard behaviors.
9697

9798
| Feature | Status | Description |
9899
|---------|--------|-------------|
99-
| `goto` and labels | Missing | No arbitrary jumps |
100100
| Designated initializers | Missing | No `.field = value` syntax |
101101
| Compound literals | Partial | Limited support |
102102
| Flexible array members | Missing | No `[]` at struct end |
@@ -115,6 +115,7 @@ This document tracks compliance gaps and non-standard behaviors.
115115
- Escape sequence: `\e` for ESC character
116116
- `void*` arithmetic (treated as `char*`)
117117
- `sizeof(void)` returns 0 (should be error)
118+
- Computed goto
118119

119120
### Implementation-Specific
120121
- Array compound literals in scalar context use first element

src/defs.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
#define MAX_LOCALS 1600
2121
#define MAX_FIELDS 64
2222
#define MAX_TYPES 256
23+
#define MAX_LABELS 256
2324
#define MAX_IR_INSTR 80000
2425
#define MAX_BB_PRED 128
2526
#define MAX_BB_DOM_SUCC 64
@@ -179,6 +180,7 @@ typedef enum {
179180
T_break,
180181
T_default,
181182
T_continue,
183+
T_goto,
182184
T_const, /* const qualifier */
183185
/* C pre-processor directives */
184186
T_cppd_include,
@@ -270,6 +272,7 @@ typedef enum {
270272
OP_branch, /* conditional jump */
271273
OP_jump, /* unconditional jump */
272274
OP_func_ret, /* returned value */
275+
OP_label, /* for goto label */
273276

274277
/* function pointer */
275278
OP_address_of_func, /* resolve function entry */
@@ -567,6 +570,13 @@ struct ref_block {
567570
* type, parameters) with SSA-related information (e.g., basic blocks, control
568571
* flow) to support parsing, analysis, optimization, and code generation.
569572
*/
573+
574+
typedef struct {
575+
char label_name[MAX_ID_LEN];
576+
basic_block_t *bb;
577+
bool used;
578+
} label_t;
579+
570580
struct func {
571581
/* Syntatic info */
572582
var_t return_def;

src/globals.c

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1481,6 +1481,14 @@ void dump_bb_insn(func_t *func, basic_block_t *bb, bool *at_func_start)
14811481
printf("br %%%s, %s, %s", rs1->var_name, bb->then_->bb_label_name,
14821482
bb->else_->bb_label_name);
14831483
break;
1484+
case OP_jump:
1485+
print_indent(1);
1486+
printf("jmp %s", bb->next->bb_label_name);
1487+
break;
1488+
case OP_label:
1489+
print_indent(0);
1490+
printf("%s:", insn->str);
1491+
break;
14841492
case OP_push:
14851493
print_indent(1);
14861494
printf("push %%%s", rs1->var_name);

src/lexer.c

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212

1313
/* Hash table constants */
1414
#define NUM_DIRECTIVES 11
15-
#define NUM_KEYWORDS 17
15+
#define NUM_KEYWORDS 18
1616

1717
/* Token mapping structure for elegant initialization */
1818
typedef struct {
@@ -85,6 +85,7 @@ void lex_init_keywords()
8585
{"break", T_break},
8686
{"default", T_default},
8787
{"continue", T_continue},
88+
{"goto", T_goto},
8889
{"union", T_union},
8990
{"const", T_const},
9091
};
@@ -786,6 +787,8 @@ token_t lex_token_impl(bool aliasing)
786787
keyword = T_enum;
787788
} else if (!memcmp(token_str, "case", 4))
788789
keyword = T_case;
790+
else if (!memcmp(token_str, "goto", 4))
791+
keyword = T_goto;
789792
break;
790793

791794
case 5: /* 5-letter keywords: while, break, union, const */

src/parser.c

Lines changed: 115 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,12 @@ int break_exit_idx = 0;
2626
basic_block_t *continue_bb[MAX_NESTING];
2727
int continue_pos_idx = 0;
2828

29+
/* Label utilities */
30+
label_t labels[MAX_LABELS];
31+
int label_idx = 0;
32+
basic_block_t *backpatch_bb[MAX_LABELS];
33+
int backpatch_bb_idx = 0;
34+
2935
/* stack of the operands of 3AC */
3036
var_t *operand_stack[MAX_OPERAND_STACK_SIZE];
3137
int operand_stack_idx = 0;
@@ -40,6 +46,23 @@ void parse_array_init(var_t *var,
4046
basic_block_t **bb,
4147
bool emit_code);
4248

49+
50+
label_t *find_label(char *name)
51+
{
52+
for (int i = 0; i < label_idx; i++) {
53+
if (!strcmp(name, labels[i].label_name))
54+
return &labels[i];
55+
}
56+
return NULL;
57+
}
58+
59+
void add_label(char *name, basic_block_t *bb)
60+
{
61+
label_t *l = &labels[label_idx++];
62+
strncpy(l->label_name, name, MAX_ID_LEN);
63+
l->bb = bb;
64+
}
65+
4366
char *gen_name_to(char *buf)
4467
{
4568
sprintf(buf, ".t%d", global_var_idx++);
@@ -997,6 +1020,58 @@ basic_block_t *handle_while_statement(block_t *parent, basic_block_t *bb)
9971020
return else_;
9981021
}
9991022

1023+
basic_block_t *handle_goto_statement(block_t *parent, basic_block_t *bb)
1024+
{
1025+
/* Since a goto splits the current program into two basic blocks and makes
1026+
* the subsequent basic block unreachable, this causes problems for later
1027+
* CFG operations. Therefore, we create a fake if that always executes to
1028+
* wrap the goto, and connect the unreachable basic block to the else
1029+
* branch. Finally, return this else block.
1030+
*
1031+
* after:
1032+
* code1;
1033+
* goto label;
1034+
* code2;
1035+
*
1036+
* before:
1037+
* code1;
1038+
* if (1) goto label;
1039+
* code2;
1040+
*/
1041+
1042+
char token[MAX_ID_LEN];
1043+
if (lex_peek(T_identifier, token)) {
1044+
lex_expect(T_identifier);
1045+
lex_expect(T_semicolon);
1046+
1047+
basic_block_t *fake_if = bb_create(parent);
1048+
bb_connect(bb, fake_if, NEXT);
1049+
var_t *val = require_var(parent);
1050+
gen_name_to(val->var_name);
1051+
val->init_val = 1;
1052+
add_insn(parent, fake_if, OP_load_constant, val, NULL, NULL, 0, NULL);
1053+
add_insn(parent, fake_if, OP_branch, NULL, val, NULL, 0, NULL);
1054+
1055+
basic_block_t *then_ = bb_create(parent);
1056+
basic_block_t *else_ = bb_create(parent);
1057+
bb_connect(fake_if, then_, THEN);
1058+
bb_connect(fake_if, else_, ELSE);
1059+
1060+
label_t *label = find_label(token);
1061+
add_insn(parent, then_, OP_jump, NULL, NULL, NULL, 0, token);
1062+
if (label) {
1063+
label->used = true;
1064+
bb_connect(then_, label->bb, NEXT);
1065+
} else {
1066+
backpatch_bb[backpatch_bb_idx++] = then_;
1067+
}
1068+
return else_;
1069+
}
1070+
1071+
error("Expected identifier after 'goto'");
1072+
return NULL;
1073+
}
1074+
10001075
basic_block_t *handle_struct_variable_decl(block_t *parent,
10011076
basic_block_t *bb,
10021077
char *token)
@@ -4169,6 +4244,9 @@ basic_block_t *read_body_statement(block_t *parent, basic_block_t *bb)
41694244
return do_while_end;
41704245
}
41714246

4247+
if (lex_accept(T_goto))
4248+
return handle_goto_statement(parent, bb);
4249+
41724250
/* empty statement */
41734251
if (lex_accept(T_semicolon))
41744252
return bb;
@@ -4753,6 +4831,22 @@ basic_block_t *read_body_statement(block_t *parent, basic_block_t *bb)
47534831
return bb;
47544832
}
47554833

4834+
if (lex_peek(T_identifier, token)) {
4835+
lex_accept(T_identifier);
4836+
if (lex_accept(T_colon)) {
4837+
label_t *l = find_label(token);
4838+
if (l) {
4839+
error("label redefinition");
4840+
return NULL;
4841+
}
4842+
basic_block_t *n = bb_create(parent);
4843+
bb_connect(bb, n, NEXT);
4844+
add_label(token, n);
4845+
add_insn(parent, n, OP_label, NULL, NULL, NULL, 0, token);
4846+
return n;
4847+
}
4848+
}
4849+
47564850
error("Unrecognized statement token");
47574851
return NULL;
47584852
}
@@ -4794,6 +4888,27 @@ void read_func_body(func_t *func)
47944888
basic_block_t *body = read_code_block(func, NULL, NULL, func->bbs);
47954889
if (body)
47964890
bb_connect(body, func->exit, NEXT);
4891+
4892+
for (int i = 0; i < backpatch_bb_idx; i++) {
4893+
basic_block_t *bb = backpatch_bb[i];
4894+
insn_t *g = bb->insn_list.tail;
4895+
label_t *label = find_label(g->str);
4896+
if (!label) {
4897+
error("goto label undefined");
4898+
} else {
4899+
label->used = true;
4900+
bb_connect(bb, label->bb, NEXT);
4901+
}
4902+
}
4903+
4904+
for (int i = 0; i < label_idx; i++) {
4905+
label_t *label = &labels[i];
4906+
if (!label->used)
4907+
printf("Warning: unused label %s\n", label->label_name);
4908+
}
4909+
4910+
backpatch_bb_idx = 0;
4911+
label_idx = 0;
47974912
}
47984913

47994914
/* if first token is type */

src/ssa.c

Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -929,6 +929,78 @@ void unwind_phi(void)
929929
}
930930
}
931931

932+
bool is_dominate(basic_block_t *pred, basic_block_t *succ)
933+
{
934+
int i;
935+
bool found = false;
936+
for (i = 0; i < MAX_BB_DOM_SUCC; i++) {
937+
if (!pred->dom_next[i])
938+
break;
939+
if (pred->dom_next[i] == succ) {
940+
found = true;
941+
break;
942+
}
943+
found |= is_dominate(pred->dom_next[i], succ);
944+
}
945+
946+
return found;
947+
}
948+
949+
void bb_check_var_cross_init(func_t *func, basic_block_t *bb)
950+
{
951+
UNUSED(func);
952+
953+
/*
954+
* For any variable, the basic block that defines it must dominate all the
955+
* basic blocks where it is used; otherwise, it is an invalid cross-block
956+
* initialization.
957+
*/
958+
959+
for (insn_t *insn = bb->insn_list.head; insn; insn = insn->next) {
960+
if (insn->opcode == OP_allocat) {
961+
var_t *var = insn->rd;
962+
ref_block_t *ref;
963+
for (ref = var->ref_block_list.head; ref; ref = ref->next) {
964+
if (ref->bb == bb)
965+
continue;
966+
if (!is_dominate(bb, ref->bb))
967+
printf("Warning: Variable '%s' cross-initialized\n",
968+
var->var_name);
969+
}
970+
}
971+
}
972+
}
973+
974+
void check_var_cross_init()
975+
{
976+
/**
977+
* A variable's initialization lives in a basic block that does not dominate
978+
* all of its uses, so control flow can reach a use without first passing
979+
* through its initialization (i.e., a possibly-uninitialized use).
980+
*
981+
* For Example:
982+
* goto label; // Jumps directly to 'label', skipping the
983+
* declaration below if (1) { int x; // This line is never
984+
* executed when 'goto' is taken label: x = 5; // Uses 'x'
985+
* after its declaration was bypassed
986+
* }
987+
*/
988+
989+
bb_traversal_args_t *args = arena_alloc_traversal_args();
990+
for (func_t *func = FUNC_LIST.head; func; func = func->next) {
991+
/* Skip function declarations without bodies */
992+
if (!func->bbs)
993+
continue;
994+
995+
args->func = func;
996+
args->bb = func->bbs;
997+
998+
func->visited++;
999+
args->postorder_cb = bb_check_var_cross_init;
1000+
bb_forward_traversal(args);
1001+
}
1002+
}
1003+
9321004
#ifdef __SHECC__
9331005
#else
9341006
void bb_dump_connection(FILE *fd,
@@ -1112,6 +1184,12 @@ void bb_dump(FILE *fd, func_t *func, basic_block_t *bb)
11121184
sprintf(str, "<BRANCH %s<SUB>%d</SUB>>", insn->rs1->var_name,
11131185
insn->rs1->subscript);
11141186
break;
1187+
case OP_jump:
1188+
sprintf(str, "<JUMP>");
1189+
break;
1190+
case OP_label:
1191+
sprintf(str, "<LABEL>");
1192+
break;
11151193
case OP_push:
11161194
sprintf(str, "<PUSH %s<SUB>%d</SUB>>", insn->rs1->var_name,
11171195
insn->rs1->subscript);
@@ -1281,6 +1359,9 @@ void ssa_build(void)
12811359
build_df();
12821360

12831361
solve_globals();
1362+
1363+
check_var_cross_init();
1364+
12841365
solve_phi_insertion();
12851366
solve_phi_params();
12861367

0 commit comments

Comments
 (0)