Skip to content

Commit 8da1d20

Browse files
committed
Implement goto and label statement support
Enable parsing and analysis of C labels and goto so the compiler can accept common patterns (for example error-handling cleanup) and be more compatible with real-world C code. Provide diagnostics for duplicate, undefined, and unused labels, and warn when a goto can bypass a variable's initialization. Close #280
1 parent c4f778e commit 8da1d20

File tree

6 files changed

+230
-2
lines changed

6 files changed

+230
-2
lines changed

COMPLIANCE.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ This document tracks compliance gaps and non-standard behaviors.
1717

1818
### Control Flow
1919
- `if`/`else` statements
20+
- `goto` and label statements
2021
- `while`, `do-while`, `for` loops
2122
- `switch`/`case`/`default` statements
2223
- `break`, `continue`, `return` statements
@@ -96,7 +97,6 @@ This document tracks compliance gaps and non-standard behaviors.
9697

9798
| Feature | Status | Description |
9899
|---------|--------|-------------|
99-
| `goto` and labels | Missing | No arbitrary jumps |
100100
| Designated initializers | Missing | No `.field = value` syntax |
101101
| Compound literals | Partial | Limited support |
102102
| Flexible array members | Missing | No `[]` at struct end |
@@ -115,6 +115,7 @@ This document tracks compliance gaps and non-standard behaviors.
115115
- Escape sequence: `\e` for ESC character
116116
- `void*` arithmetic (treated as `char*`)
117117
- `sizeof(void)` returns 0 (should be error)
118+
- Computed goto
118119

119120
### Implementation-Specific
120121
- Array compound literals in scalar context use first element

src/defs.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
#define MAX_LOCALS 1600
2121
#define MAX_FIELDS 64
2222
#define MAX_TYPES 256
23+
#define MAX_LABELS 256
2324
#define MAX_IR_INSTR 80000
2425
#define MAX_BB_PRED 128
2526
#define MAX_BB_DOM_SUCC 64
@@ -179,6 +180,7 @@ typedef enum {
179180
T_break,
180181
T_default,
181182
T_continue,
183+
T_goto,
182184
T_const, /* const qualifier */
183185
/* C pre-processor directives */
184186
T_cppd_include,
@@ -270,6 +272,7 @@ typedef enum {
270272
OP_branch, /* conditional jump */
271273
OP_jump, /* unconditional jump */
272274
OP_func_ret, /* returned value */
275+
OP_label, /* for goto label */
273276

274277
/* function pointer */
275278
OP_address_of_func, /* resolve function entry */
@@ -567,6 +570,13 @@ struct ref_block {
567570
* type, parameters) with SSA-related information (e.g., basic blocks, control
568571
* flow) to support parsing, analysis, optimization, and code generation.
569572
*/
573+
574+
typedef struct {
575+
char label_name[MAX_ID_LEN];
576+
basic_block_t *bb;
577+
bool used;
578+
} label_t;
579+
570580
struct func {
571581
/* Syntatic info */
572582
var_t return_def;

src/globals.c

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1481,6 +1481,14 @@ void dump_bb_insn(func_t *func, basic_block_t *bb, bool *at_func_start)
14811481
printf("br %%%s, %s, %s", rs1->var_name, bb->then_->bb_label_name,
14821482
bb->else_->bb_label_name);
14831483
break;
1484+
case OP_jump:
1485+
print_indent(1);
1486+
printf("jmp %s", bb->next->bb_label_name);
1487+
break;
1488+
case OP_label:
1489+
print_indent(0);
1490+
printf("%s:", insn->str);
1491+
break;
14841492
case OP_push:
14851493
print_indent(1);
14861494
printf("push %%%s", rs1->var_name);

src/lexer.c

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212

1313
/* Hash table constants */
1414
#define NUM_DIRECTIVES 11
15-
#define NUM_KEYWORDS 17
15+
#define NUM_KEYWORDS 18
1616

1717
/* Token mapping structure for elegant initialization */
1818
typedef struct {
@@ -85,6 +85,7 @@ void lex_init_keywords()
8585
{"break", T_break},
8686
{"default", T_default},
8787
{"continue", T_continue},
88+
{"goto", T_goto},
8889
{"union", T_union},
8990
{"const", T_const},
9091
};
@@ -786,6 +787,8 @@ token_t lex_token_impl(bool aliasing)
786787
keyword = T_enum;
787788
} else if (!memcmp(token_str, "case", 4))
788789
keyword = T_case;
790+
else if (!memcmp(token_str, "goto", 4))
791+
keyword = T_goto;
789792
break;
790793

791794
case 5: /* 5-letter keywords: while, break, union, const */

src/parser.c

Lines changed: 121 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,12 @@ int break_exit_idx = 0;
2626
basic_block_t *continue_bb[MAX_NESTING];
2727
int continue_pos_idx = 0;
2828

29+
/* Label utilities */
30+
label_t labels[MAX_LABELS];
31+
int label_idx = 0;
32+
basic_block_t *backpatch_bb[MAX_LABELS];
33+
int backpatch_bb_idx = 0;
34+
2935
/* stack of the operands of 3AC */
3036
var_t *operand_stack[MAX_OPERAND_STACK_SIZE];
3137
int operand_stack_idx = 0;
@@ -40,6 +46,26 @@ void parse_array_init(var_t *var,
4046
basic_block_t **bb,
4147
bool emit_code);
4248

49+
50+
label_t *find_label(char *name)
51+
{
52+
for (int i = 0; i < label_idx; i++) {
53+
if (!strcmp(name, labels[i].label_name))
54+
return &labels[i];
55+
}
56+
return NULL;
57+
}
58+
59+
void add_label(char *name, basic_block_t *bb)
60+
{
61+
if (label_idx > MAX_LABELS - 1)
62+
error("Too many labels in function");
63+
64+
label_t *l = &labels[label_idx++];
65+
strncpy(l->label_name, name, MAX_ID_LEN);
66+
l->bb = bb;
67+
}
68+
4369
char *gen_name_to(char *buf)
4470
{
4571
sprintf(buf, ".t%d", global_var_idx++);
@@ -997,6 +1023,61 @@ basic_block_t *handle_while_statement(block_t *parent, basic_block_t *bb)
9971023
return else_;
9981024
}
9991025

1026+
basic_block_t *handle_goto_statement(block_t *parent, basic_block_t *bb)
1027+
{
1028+
/* Since a goto splits the current program into two basic blocks and makes
1029+
* the subsequent basic block unreachable, this causes problems for later
1030+
* CFG operations. Therefore, we create a fake if that always executes to
1031+
* wrap the goto, and connect the unreachable basic block to the else
1032+
* branch. Finally, return this else block.
1033+
*
1034+
* after:
1035+
* a = b + c;
1036+
* goto label;
1037+
* c *= d;
1038+
*
1039+
* before:
1040+
* a = b + c;
1041+
* if (1)
1042+
* goto label;
1043+
* c *= d;
1044+
*/
1045+
1046+
char token[MAX_ID_LEN];
1047+
if (!lex_peek(T_identifier, token))
1048+
error("Expected identifier after 'goto'");
1049+
1050+
lex_expect(T_identifier);
1051+
lex_expect(T_semicolon);
1052+
1053+
basic_block_t *fake_if = bb_create(parent);
1054+
bb_connect(bb, fake_if, NEXT);
1055+
var_t *val = require_var(parent);
1056+
gen_name_to(val->var_name);
1057+
val->init_val = 1;
1058+
add_insn(parent, fake_if, OP_load_constant, val, NULL, NULL, 0, NULL);
1059+
add_insn(parent, fake_if, OP_branch, NULL, val, NULL, 0, NULL);
1060+
1061+
basic_block_t *then_ = bb_create(parent);
1062+
basic_block_t *else_ = bb_create(parent);
1063+
bb_connect(fake_if, then_, THEN);
1064+
bb_connect(fake_if, else_, ELSE);
1065+
1066+
add_insn(parent, then_, OP_jump, NULL, NULL, NULL, 0, token);
1067+
label_t *label = find_label(token);
1068+
if (label) {
1069+
label->used = true;
1070+
bb_connect(then_, label->bb, NEXT);
1071+
return else_;
1072+
}
1073+
1074+
if (backpatch_bb_idx > MAX_LABELS - 1)
1075+
error("Too many forward-referenced labels");
1076+
1077+
backpatch_bb[backpatch_bb_idx++] = then_;
1078+
return else_;
1079+
}
1080+
10001081
basic_block_t *handle_struct_variable_decl(block_t *parent,
10011082
basic_block_t *bb,
10021083
char *token)
@@ -4169,6 +4250,9 @@ basic_block_t *read_body_statement(block_t *parent, basic_block_t *bb)
41694250
return do_while_end;
41704251
}
41714252

4253+
if (lex_accept(T_goto))
4254+
return handle_goto_statement(parent, bb);
4255+
41724256
/* empty statement */
41734257
if (lex_accept(T_semicolon))
41744258
return bb;
@@ -4753,6 +4837,21 @@ basic_block_t *read_body_statement(block_t *parent, basic_block_t *bb)
47534837
return bb;
47544838
}
47554839

4840+
if (lex_peek(T_identifier, token)) {
4841+
lex_accept(T_identifier);
4842+
if (lex_accept(T_colon)) {
4843+
label_t *l = find_label(token);
4844+
if (l)
4845+
error("label redefinition");
4846+
4847+
basic_block_t *n = bb_create(parent);
4848+
bb_connect(bb, n, NEXT);
4849+
add_label(token, n);
4850+
add_insn(parent, n, OP_label, NULL, NULL, NULL, 0, token);
4851+
return n;
4852+
}
4853+
}
4854+
47564855
error("Unrecognized statement token");
47574856
return NULL;
47584857
}
@@ -4794,6 +4893,28 @@ void read_func_body(func_t *func)
47944893
basic_block_t *body = read_code_block(func, NULL, NULL, func->bbs);
47954894
if (body)
47964895
bb_connect(body, func->exit, NEXT);
4896+
4897+
for (int i = 0; i < backpatch_bb_idx; i++) {
4898+
basic_block_t *bb = backpatch_bb[i];
4899+
insn_t *g = bb->insn_list.tail;
4900+
label_t *label = find_label(g->str);
4901+
if (!label)
4902+
error("goto label undefined");
4903+
4904+
label->used = true;
4905+
bb_connect(bb, label->bb, NEXT);
4906+
}
4907+
4908+
for (int i = 0; i < label_idx; i++) {
4909+
label_t *label = &labels[i];
4910+
if (label->used)
4911+
continue;
4912+
4913+
printf("Warning: unused label %s\n", label->label_name);
4914+
}
4915+
4916+
backpatch_bb_idx = 0;
4917+
label_idx = 0;
47974918
}
47984919

47994920
/* if first token is type */

src/ssa.c

Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -929,6 +929,82 @@ void unwind_phi(void)
929929
}
930930
}
931931

932+
bool is_dominate(basic_block_t *pred, basic_block_t *succ)
933+
{
934+
int i;
935+
bool found = false;
936+
for (i = 0; i < MAX_BB_DOM_SUCC; i++) {
937+
if (!pred->dom_next[i])
938+
break;
939+
if (pred->dom_next[i] == succ) {
940+
found = true;
941+
break;
942+
}
943+
found |= is_dominate(pred->dom_next[i], succ);
944+
}
945+
946+
return found;
947+
}
948+
949+
/*
950+
* For any variable, the basic block that defines it must dominate all the
951+
* basic blocks where it is used; otherwise, it is an invalid cross-block
952+
* initialization.
953+
*/
954+
void bb_check_var_cross_init(func_t *func, basic_block_t *bb)
955+
{
956+
UNUSED(func);
957+
958+
for (insn_t *insn = bb->insn_list.head; insn; insn = insn->next) {
959+
if (insn->opcode != OP_allocat)
960+
continue;
961+
962+
var_t *var = insn->rd;
963+
ref_block_t *ref;
964+
for (ref = var->ref_block_list.head; ref; ref = ref->next) {
965+
if (ref->bb == bb)
966+
continue;
967+
968+
if (!is_dominate(bb, ref->bb))
969+
printf("Warning: Variable '%s' cross-initialized\n",
970+
var->var_name);
971+
}
972+
}
973+
}
974+
975+
/**
976+
* A variable's initialization lives in a basic block that does not dominate
977+
* all of its uses, so control flow can reach a use without first passing
978+
* through its initialization (i.e., a possibly-uninitialized use).
979+
*
980+
* For Example:
981+
* // Jumps directly to 'label', skipping the declaration below
982+
* goto label;
983+
* if (1) {
984+
* // This line is never executed when 'goto' is taken
985+
* int x;
986+
* label:
987+
* // Uses 'x' after its declaration was bypassed
988+
* x = 5;
989+
* }
990+
*/
991+
void check_var_cross_init()
992+
{
993+
bb_traversal_args_t *args = arena_alloc_traversal_args();
994+
for (func_t *func = FUNC_LIST.head; func; func = func->next) {
995+
/* Skip function declarations without bodies */
996+
if (!func->bbs)
997+
continue;
998+
999+
args->func = func;
1000+
args->bb = func->bbs;
1001+
1002+
func->visited++;
1003+
args->postorder_cb = bb_check_var_cross_init;
1004+
bb_forward_traversal(args);
1005+
}
1006+
}
1007+
9321008
#ifdef __SHECC__
9331009
#else
9341010
void bb_dump_connection(FILE *fd,
@@ -1112,6 +1188,12 @@ void bb_dump(FILE *fd, func_t *func, basic_block_t *bb)
11121188
sprintf(str, "<BRANCH %s<SUB>%d</SUB>>", insn->rs1->var_name,
11131189
insn->rs1->subscript);
11141190
break;
1191+
case OP_jump:
1192+
sprintf(str, "<JUMP>");
1193+
break;
1194+
case OP_label:
1195+
sprintf(str, "<LABEL>");
1196+
break;
11151197
case OP_push:
11161198
sprintf(str, "<PUSH %s<SUB>%d</SUB>>", insn->rs1->var_name,
11171199
insn->rs1->subscript);
@@ -1281,6 +1363,9 @@ void ssa_build(void)
12811363
build_df();
12821364

12831365
solve_globals();
1366+
1367+
check_var_cross_init();
1368+
12841369
solve_phi_insertion();
12851370
solve_phi_params();
12861371

0 commit comments

Comments
 (0)