Skip to content

Commit d275d4d

Browse files
committed
Implement goto statement support
Lexer/Parser: add T_goto; parse goto identifier; and identifier: labels. Function-scoped labels with forward references via backpatching. Duplicate labels error; undefined labels error; unused labels warning. IR/CFG: introduce OP_label and OP_jump; update human-readable and graph dumps. Lower goto via an if (1) wrapper to keep CFG well-formed (then: jmp label; else: fallthrough). Label tracking: label_t table and backpatch list (MAX_LABELS=256), reset per function. SSA/semantics: add dominance-based check to warn when goto crosses variable initialization. Notes: no computed goto (GNU extension).
1 parent c4f778e commit d275d4d

File tree

5 files changed

+207
-0
lines changed

5 files changed

+207
-0
lines changed

src/defs.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
#define MAX_LOCALS 1600
2121
#define MAX_FIELDS 64
2222
#define MAX_TYPES 256
23+
#define MAX_LABELS 256
2324
#define MAX_IR_INSTR 80000
2425
#define MAX_BB_PRED 128
2526
#define MAX_BB_DOM_SUCC 64
@@ -179,6 +180,7 @@ typedef enum {
179180
T_break,
180181
T_default,
181182
T_continue,
183+
T_goto,
182184
T_const, /* const qualifier */
183185
/* C pre-processor directives */
184186
T_cppd_include,
@@ -270,6 +272,7 @@ typedef enum {
270272
OP_branch, /* conditional jump */
271273
OP_jump, /* unconditional jump */
272274
OP_func_ret, /* returned value */
275+
OP_label, /* for goto label */
273276

274277
/* function pointer */
275278
OP_address_of_func, /* resolve function entry */
@@ -567,6 +570,13 @@ struct ref_block {
567570
* type, parameters) with SSA-related information (e.g., basic blocks, control
568571
* flow) to support parsing, analysis, optimization, and code generation.
569572
*/
573+
574+
typedef struct {
575+
char label_name[MAX_ID_LEN];
576+
basic_block_t *bb;
577+
bool used;
578+
} label_t;
579+
570580
struct func {
571581
/* Syntatic info */
572582
var_t return_def;

src/globals.c

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1481,6 +1481,14 @@ void dump_bb_insn(func_t *func, basic_block_t *bb, bool *at_func_start)
14811481
printf("br %%%s, %s, %s", rs1->var_name, bb->then_->bb_label_name,
14821482
bb->else_->bb_label_name);
14831483
break;
1484+
case OP_jump:
1485+
print_indent(1);
1486+
printf("jmp %s", bb->next->bb_label_name);
1487+
break;
1488+
case OP_label:
1489+
print_indent(0);
1490+
printf("%s:", insn->str);
1491+
break;
14841492
case OP_push:
14851493
print_indent(1);
14861494
printf("push %%%s", rs1->var_name);

src/lexer.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,7 @@ void lex_init_keywords()
8585
{"break", T_break},
8686
{"default", T_default},
8787
{"continue", T_continue},
88+
{"goto", T_goto},
8889
{"union", T_union},
8990
{"const", T_const},
9091
};
@@ -786,6 +787,8 @@ token_t lex_token_impl(bool aliasing)
786787
keyword = T_enum;
787788
} else if (!memcmp(token_str, "case", 4))
788789
keyword = T_case;
790+
else if (!memcmp(token_str, "goto", 4))
791+
keyword = T_goto;
789792
break;
790793

791794
case 5: /* 5-letter keywords: while, break, union, const */

src/parser.c

Lines changed: 118 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,12 @@ int break_exit_idx = 0;
2626
basic_block_t *continue_bb[MAX_NESTING];
2727
int continue_pos_idx = 0;
2828

29+
/* Label utilities */
30+
label_t labels[MAX_LABELS];
31+
int label_idx = 0;
32+
basic_block_t *backpatch_bb[MAX_LABELS];
33+
int backpatch_bb_idx = 0;
34+
2935
/* stack of the operands of 3AC */
3036
var_t *operand_stack[MAX_OPERAND_STACK_SIZE];
3137
int operand_stack_idx = 0;
@@ -40,12 +46,30 @@ void parse_array_init(var_t *var,
4046
basic_block_t **bb,
4147
bool emit_code);
4248

49+
50+
label_t *find_label(char *name)
51+
{
52+
for (int i = 0; i < label_idx; i++) {
53+
if (!strcmp(name, labels[i].label_name))
54+
return &labels[i];
55+
}
56+
return NULL;
57+
}
58+
59+
void add_label(char *name, basic_block_t *bb)
60+
{
61+
label_t *l = &labels[label_idx++];
62+
strncpy(l->label_name, name, MAX_ID_LEN);
63+
l->bb = bb;
64+
}
65+
4366
char *gen_name_to(char *buf)
4467
{
4568
sprintf(buf, ".t%d", global_var_idx++);
4669
return buf;
4770
}
4871

72+
4973
var_t *require_var(block_t *blk)
5074
{
5175
var_list_t *var_list = &blk->locals;
@@ -997,6 +1021,60 @@ basic_block_t *handle_while_statement(block_t *parent, basic_block_t *bb)
9971021
return else_;
9981022
}
9991023

1024+
1025+
1026+
basic_block_t *handle_goto_statement(block_t *parent, basic_block_t *bb)
1027+
{
1028+
/* Since a goto splits the current program into two basic blocks and makes
1029+
* the subsequent basic block unreachable, this causes problems for later
1030+
* CFG operations. Therefore, we create a fake if that always executes to
1031+
* wrap the goto, and connect the unreachable basic block to the else
1032+
* branch. Finally, return this else block.
1033+
*
1034+
* after:
1035+
* code1;
1036+
* goto label;
1037+
* code2;
1038+
*
1039+
* before:
1040+
* code1;
1041+
* if (1) goto label;
1042+
* code2;
1043+
*/
1044+
1045+
char token[MAX_ID_LEN];
1046+
if (lex_peek(T_identifier, token)) {
1047+
lex_expect(T_identifier);
1048+
lex_expect(T_semicolon);
1049+
1050+
basic_block_t *fake_if = bb_create(parent);
1051+
bb_connect(bb, fake_if, NEXT);
1052+
var_t *val = require_var(parent);
1053+
gen_name_to(val->var_name);
1054+
val->init_val = 1;
1055+
add_insn(parent, fake_if, OP_load_constant, val, NULL, NULL, 0, NULL);
1056+
add_insn(parent, fake_if, OP_branch, NULL, val, NULL, 0, NULL);
1057+
1058+
basic_block_t *then_ = bb_create(parent);
1059+
basic_block_t *else_ = bb_create(parent);
1060+
bb_connect(fake_if, then_, THEN);
1061+
bb_connect(fake_if, else_, ELSE);
1062+
1063+
label_t *label = find_label(token);
1064+
add_insn(parent, then_, OP_jump, NULL, NULL, NULL, 0, token);
1065+
if (label) {
1066+
label->used = true;
1067+
bb_connect(then_, label->bb, NEXT);
1068+
} else {
1069+
backpatch_bb[backpatch_bb_idx++] = then_;
1070+
}
1071+
return else_;
1072+
} else {
1073+
error("Expected identifier after 'goto'");
1074+
return NULL;
1075+
}
1076+
}
1077+
10001078
basic_block_t *handle_struct_variable_decl(block_t *parent,
10011079
basic_block_t *bb,
10021080
char *token)
@@ -4169,6 +4247,9 @@ basic_block_t *read_body_statement(block_t *parent, basic_block_t *bb)
41694247
return do_while_end;
41704248
}
41714249

4250+
if (lex_accept(T_goto))
4251+
return handle_goto_statement(parent, bb);
4252+
41724253
/* empty statement */
41734254
if (lex_accept(T_semicolon))
41744255
return bb;
@@ -4753,6 +4834,22 @@ basic_block_t *read_body_statement(block_t *parent, basic_block_t *bb)
47534834
return bb;
47544835
}
47554836

4837+
if (lex_peek(T_identifier, token)) {
4838+
lex_accept(T_identifier);
4839+
if (lex_accept(T_colon)) {
4840+
label_t *l = find_label(token);
4841+
if (l) {
4842+
error("label redefinition");
4843+
return NULL;
4844+
}
4845+
basic_block_t *n = bb_create(parent);
4846+
bb_connect(bb, n, NEXT);
4847+
add_label(token, n);
4848+
add_insn(parent, n, OP_label, NULL, NULL, NULL, 0, token);
4849+
return n;
4850+
}
4851+
}
4852+
47564853
error("Unrecognized statement token");
47574854
return NULL;
47584855
}
@@ -4794,6 +4891,27 @@ void read_func_body(func_t *func)
47944891
basic_block_t *body = read_code_block(func, NULL, NULL, func->bbs);
47954892
if (body)
47964893
bb_connect(body, func->exit, NEXT);
4894+
4895+
for (int i = 0; i < backpatch_bb_idx; i++) {
4896+
basic_block_t *bb = backpatch_bb[i];
4897+
insn_t *g = bb->insn_list.tail;
4898+
label_t *label = find_label(g->str);
4899+
if (!label) {
4900+
error("goto label undefined");
4901+
} else {
4902+
label->used = true;
4903+
bb_connect(bb, label->bb, NEXT);
4904+
}
4905+
}
4906+
4907+
for (int i = 0; i < label_idx; i++) {
4908+
label_t *label = &labels[i];
4909+
if (!label->used)
4910+
printf("Warning: unused label %s\n", label->label_name);
4911+
}
4912+
4913+
backpatch_bb_idx = 0;
4914+
label_idx = 0;
47974915
}
47984916

47994917
/* if first token is type */

src/ssa.c

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -929,6 +929,65 @@ void unwind_phi(void)
929929
}
930930
}
931931

932+
bool is_dominate(basic_block_t *pred, basic_block_t *succ)
933+
{
934+
int i;
935+
bool found = false;
936+
for (i = 0; i < MAX_BB_DOM_SUCC; i++) {
937+
if (!pred->dom_next[i])
938+
break;
939+
if (pred->dom_next[i] == succ) {
940+
found = true;
941+
break;
942+
}
943+
found |= is_dominate(pred->dom_next[i], succ);
944+
}
945+
946+
return found;
947+
}
948+
949+
void bb_check_var_cross_init(func_t *func, basic_block_t *bb)
950+
{
951+
UNUSED(func);
952+
953+
/*
954+
* For any variable, the basic block that defines it must dominate all the
955+
* basic blocks where it is used; otherwise, it is an invalid cross-block
956+
* initialization.
957+
*/
958+
959+
for (insn_t *insn = bb->insn_list.head; insn; insn = insn->next) {
960+
if (insn->opcode == OP_allocat) {
961+
var_t *var = insn->rd;
962+
ref_block_t *ref;
963+
for (ref = var->ref_block_list.head; ref; ref = ref->next) {
964+
if (ref->bb == bb)
965+
continue;
966+
if (!is_dominate(bb, ref->bb))
967+
printf("Warning: Variable '%s' cross-initialized\n",
968+
var->var_name);
969+
}
970+
}
971+
}
972+
}
973+
974+
void check_var_cross_initialization()
975+
{
976+
bb_traversal_args_t *args = arena_alloc_traversal_args();
977+
for (func_t *func = FUNC_LIST.head; func; func = func->next) {
978+
/* Skip function declarations without bodies */
979+
if (!func->bbs)
980+
continue;
981+
982+
args->func = func;
983+
args->bb = func->bbs;
984+
985+
func->visited++;
986+
args->postorder_cb = bb_check_var_cross_init;
987+
bb_forward_traversal(args);
988+
}
989+
}
990+
932991
#ifdef __SHECC__
933992
#else
934993
void bb_dump_connection(FILE *fd,
@@ -1112,6 +1171,12 @@ void bb_dump(FILE *fd, func_t *func, basic_block_t *bb)
11121171
sprintf(str, "<BRANCH %s<SUB>%d</SUB>>", insn->rs1->var_name,
11131172
insn->rs1->subscript);
11141173
break;
1174+
case OP_jump:
1175+
sprintf(str, "<JUMP>");
1176+
break;
1177+
case OP_label:
1178+
sprintf(str, "<LABEL>");
1179+
break;
11151180
case OP_push:
11161181
sprintf(str, "<PUSH %s<SUB>%d</SUB>>", insn->rs1->var_name,
11171182
insn->rs1->subscript);
@@ -1281,6 +1346,9 @@ void ssa_build(void)
12811346
build_df();
12821347

12831348
solve_globals();
1349+
1350+
check_var_cross_initialization();
1351+
12841352
solve_phi_insertion();
12851353
solve_phi_params();
12861354

0 commit comments

Comments
 (0)