Skip to content

Commit c4f778e

Browse files
authored
Merge pull request #306 from hhhh1300/feature/complete-short-type-support
Support short type
2 parents f265042 + 5cb9b03 commit c4f778e

File tree

13 files changed

+334
-40
lines changed

13 files changed

+334
-40
lines changed

COMPLIANCE.md

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ This document tracks compliance gaps and non-standard behaviors.
77
## Implemented Features
88

99
### Core Language
10-
- Basic types: `int`, `char`, `void`, `_Bool`
10+
- Basic types: `int`, `short`, `char`, `void`, `_Bool`
1111
- Structures and unions with nested definitions
1212
- Enumerations with automatic value assignment
1313
- Function definitions and declarations
@@ -58,7 +58,6 @@ This document tracks compliance gaps and non-standard behaviors.
5858

5959
| Feature | Status | Notes |
6060
|---------|--------|-------|
61-
| `short` | Missing | Only 4-byte integers |
6261
| `long` | Missing | Only 4-byte integers |
6362
| `long long` | Missing | No 64-bit integers |
6463
| `unsigned` | Missing | All integers are signed |

src/arm-codegen.c

Lines changed: 27 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -121,7 +121,10 @@ void update_elf_offset(ph2_ir_t *ph2_ir)
121121
elf_offset += 24;
122122
return;
123123
case OP_trunc:
124-
elf_offset += 4;
124+
if (ph2_ir->src1 == 2)
125+
elf_offset += 8;
126+
else
127+
elf_offset += 4;
125128
return;
126129
case OP_sign_ext:
127130
elf_offset += 4;
@@ -261,6 +264,8 @@ void emit_ph2_ir(ph2_ir_t *ph2_ir)
261264
case OP_read:
262265
if (ph2_ir->src1 == 1)
263266
emit(__lb(__AL, rd, rn, 0));
267+
else if (ph2_ir->src1 == 2)
268+
emit(__lh(__AL, rd, rn, 0));
264269
else if (ph2_ir->src1 == 4)
265270
emit(__lw(__AL, rd, rn, 0));
266271
else
@@ -269,6 +274,8 @@ void emit_ph2_ir(ph2_ir_t *ph2_ir)
269274
case OP_write:
270275
if (ph2_ir->dest == 1)
271276
emit(__sb(__AL, rm, rn, 0));
277+
else if (ph2_ir->dest == 2)
278+
emit(__sh(__AL, rm, rn, 0));
272279
else if (ph2_ir->dest == 4)
273280
emit(__sw(__AL, rm, rn, 0));
274281
else
@@ -432,20 +439,27 @@ void emit_ph2_ir(ph2_ir_t *ph2_ir)
432439
emit(__mov_i(__EQ, rd, 1));
433440
return;
434441
case OP_trunc:
435-
if (rm == 1)
436-
rm = 0xFF;
437-
else if (rm == 2)
438-
rm = 0xFFFF;
439-
else if (rm == 4)
440-
rm = 0xFFFFFFFF;
441-
else
442+
if (rm == 1) {
443+
emit(__and_i(__AL, rd, rn, 0xFF));
444+
} else if (rm == 2) {
445+
emit(__sll_amt(__AL, 0, logic_ls, rd, rn, 16));
446+
emit(__sll_amt(__AL, 0, logic_rs, rd, rd, 16));
447+
} else if (rm == 4) {
448+
emit(__mov_r(__AL, rd, rn));
449+
} else {
442450
fatal("Unsupported truncation operation with invalid target size");
443-
444-
emit(__and_i(__AL, rd, rn, rm));
451+
}
445452
return;
446-
case OP_sign_ext:
447-
/* TODO: Support sign extension to types other than int */
448-
emit(__sxtb(__AL, rd, rn, 0));
453+
case OP_sign_ext: {
454+
/* Decode source size from upper 16 bits */
455+
int source_size = (rm >> 16) & 0xFFFF;
456+
if (source_size == 2) {
457+
emit(__sxth(__AL, rd, rn, 0));
458+
} else {
459+
/* For other cases, use byte extension (original behavior) */
460+
emit(__sxtb(__AL, rd, rn, 0));
461+
}
462+
}
449463
return;
450464
case OP_cast:
451465
/* Generic cast operation - for now, just move the value */

src/arm.c

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -265,6 +265,39 @@ int __zero(int rd)
265265
return __mov_i(__AL, rd, 0);
266266
}
267267

268+
/* ARM halfword transfer (immediate offset) using special encoding
269+
* For halfword: bits[11:8] = imm4H, bits[7:4] = encoding, bits[3:0] = imm4L
270+
* imm4H: upper 4 bits of offset
271+
* imm4L: lower 4 bits of offset
272+
* encoding: 0b1011 for unsigned halfword, 0b1111 for signed halfword
273+
*/
274+
int arm_halfword_transfer(arm_cond_t cond,
275+
int l,
276+
arm_reg rn,
277+
arm_reg rd,
278+
int ofs,
279+
int signed_op)
280+
{
281+
int opcode = 16 + 8 + 4 + l;
282+
283+
if (ofs < 0) {
284+
opcode -= 8;
285+
ofs = -ofs;
286+
}
287+
288+
if (ofs > 255)
289+
error("Halfword offset too large");
290+
291+
/* Halfword encoding: split offset into 4-bit high and low parts */
292+
int imm4H = ((ofs >> 4) & 0xF) << 8;
293+
int imm4L = ofs & 0xF;
294+
295+
/* Encode lower 8 bits: 1011xxxx for unsigned, 1111xxxx for signed */
296+
int encoded_ofs = imm4H | 0xB0 | imm4L | (signed_op << 6);
297+
298+
return arm_encode(cond, opcode, rn, rd, encoded_ofs);
299+
}
300+
268301
int arm_transfer(arm_cond_t cond,
269302
int l,
270303
int size,
@@ -302,6 +335,18 @@ int __sb(arm_cond_t cond, arm_reg rd, arm_reg rn, int ofs)
302335
return arm_transfer(cond, 0, 1, rn, rd, ofs);
303336
}
304337

338+
/* ARM signed halfword load (LDRSH) */
339+
int __lh(arm_cond_t cond, arm_reg rd, arm_reg rn, int ofs)
340+
{
341+
return arm_halfword_transfer(cond, 1, rn, rd, ofs, 1);
342+
}
343+
344+
/* ARM halfword store (STRH) */
345+
int __sh(arm_cond_t cond, arm_reg rd, arm_reg rn, int ofs)
346+
{
347+
return arm_halfword_transfer(cond, 0, rn, rd, ofs, 0);
348+
}
349+
305350
int __stmdb(arm_cond_t cond, int w, arm_reg rn, int reg_list)
306351
{
307352
return arm_encode(cond, arm_stmdb + (0x2 << 6) + (w << 1), rn, 0, reg_list);
@@ -373,3 +418,12 @@ int __sxtb(arm_cond_t cond, arm_reg rd, arm_reg rm, int rotation)
373418
return arm_encode(cond, 106, 0xF, rd,
374419
rm | ((rotation >> 3) << 10) | (0x7 << 4));
375420
}
421+
422+
int __sxth(arm_cond_t cond, arm_reg rd, arm_reg rm, int rotation)
423+
{
424+
if (rotation != 0 && rotation != 8 && rotation != 16 && rotation != 24)
425+
fatal("SXTH rotation must be 0, 8, 16, or 24");
426+
427+
return arm_encode(cond, 107, 0xF, rd,
428+
rm | ((rotation >> 3) << 10) | (0x7 << 4));
429+
}

src/defs.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -239,6 +239,7 @@ typedef enum {
239239
TYPE_void = 0,
240240
TYPE_int,
241241
TYPE_char,
242+
TYPE_short,
242243
TYPE_struct,
243244
TYPE_union,
244245
TYPE_typedef

src/globals.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@ type_t *TY_void;
5454
type_t *TY_char;
5555
type_t *TY_bool;
5656
type_t *TY_int;
57+
type_t *TY_short;
5758

5859
/* Arenas */
5960

src/parser.c

Lines changed: 46 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -247,8 +247,18 @@ var_t *promote_unchecked(block_t *block,
247247
{
248248
var_t *rd = require_typed_ptr_var(block, target_type, target_ptr);
249249
gen_name_to(rd->var_name);
250-
add_insn(block, *bb, OP_sign_ext, rd, var, NULL,
251-
target_ptr ? PTR_SIZE : target_type->size, NULL);
250+
/* Encode both source and target sizes in src1:
251+
* Lower 16 bits: target size
252+
* Upper 16 bits: source size
253+
* This allows codegen to distinguish between different promotion types
254+
* without changing IR semantics.
255+
*/
256+
int encoded_size = ((var->type->size) << 16);
257+
if (target_ptr)
258+
encoded_size |= PTR_SIZE;
259+
else
260+
encoded_size |= target_type->size;
261+
add_insn(block, *bb, OP_sign_ext, rd, var, NULL, encoded_size, NULL);
252262
return rd;
253263
}
254264

@@ -1598,6 +1608,9 @@ void handle_single_dereference(block_t *parent, basic_block_t **bb)
15981608
case TYPE_char:
15991609
sz = TY_char->size;
16001610
break;
1611+
case TYPE_short:
1612+
sz = TY_short->size;
1613+
break;
16011614
case TYPE_int:
16021615
sz = TY_int->size;
16031616
break;
@@ -1678,6 +1691,9 @@ void handle_multiple_dereference(block_t *parent, basic_block_t **bb)
16781691
case TYPE_char:
16791692
sz = TY_char->size;
16801693
break;
1694+
case TYPE_short:
1695+
sz = TY_short->size;
1696+
break;
16811697
case TYPE_int:
16821698
sz = TY_int->size;
16831699
break;
@@ -1980,6 +1996,7 @@ void read_expr_operand(block_t *parent, basic_block_t **bb)
19801996
add_insn(parent, *bb, OP_load_constant, compound_var, NULL,
19811997
NULL, 0, NULL);
19821998
} else if (cast_or_literal_type->base_type == TYPE_int ||
1999+
cast_or_literal_type->base_type == TYPE_short ||
19832000
cast_or_literal_type->base_type == TYPE_char) {
19842001
/* Handle empty compound literals */
19852002
if (lex_peek(T_close_curly, NULL)) {
@@ -2266,6 +2283,8 @@ int get_pointer_element_size(var_t *ptr_var)
22662283
switch (ptr_var->type->base_type) {
22672284
case TYPE_char:
22682285
return TY_char->size;
2286+
case TYPE_short:
2287+
return TY_short->size;
22692288
case TYPE_int:
22702289
return TY_int->size;
22712290
case TYPE_void:
@@ -2280,6 +2299,8 @@ int get_pointer_element_size(var_t *ptr_var)
22802299
switch (ptr_var->type->base_type) {
22812300
case TYPE_char:
22822301
return TY_char->size;
2302+
case TYPE_short:
2303+
return TY_short->size;
22832304
case TYPE_int:
22842305
return TY_int->size;
22852306
case TYPE_void:
@@ -2388,6 +2409,9 @@ void handle_pointer_arithmetic(block_t *parent,
23882409
case TYPE_char:
23892410
element_size = 1;
23902411
break;
2412+
case TYPE_short:
2413+
element_size = 2;
2414+
break;
23912415
case TYPE_int:
23922416
element_size = 4;
23932417
break;
@@ -2406,6 +2430,9 @@ void handle_pointer_arithmetic(block_t *parent,
24062430
case TYPE_char:
24072431
element_size = 1;
24082432
break;
2433+
case TYPE_short:
2434+
element_size = 2;
2435+
break;
24092436
case TYPE_int:
24102437
element_size = 4;
24112438
break;
@@ -2865,6 +2892,9 @@ void read_lvalue(lvalue_t *lvalue,
28652892
case TYPE_char:
28662893
lvalue->size = TY_char->size;
28672894
break;
2895+
case TYPE_short:
2896+
lvalue->size = TY_short->size;
2897+
break;
28682898
case TYPE_int:
28692899
lvalue->size = TY_int->size;
28702900
break;
@@ -3096,6 +3126,9 @@ void read_lvalue(lvalue_t *lvalue,
30963126
case TYPE_char:
30973127
increment_size = TY_char->size;
30983128
break;
3129+
case TYPE_short:
3130+
increment_size = TY_short->size;
3131+
break;
30993132
case TYPE_int:
31003133
increment_size = TY_int->size;
31013134
break;
@@ -3431,6 +3464,9 @@ bool read_body_assignment(char *token,
34313464
case TYPE_char:
34323465
increment_size = TY_char->size;
34333466
break;
3467+
case TYPE_short:
3468+
increment_size = TY_short->size;
3469+
break;
34343470
case TYPE_int:
34353471
increment_size = TY_int->size;
34363472
break;
@@ -4237,7 +4273,8 @@ basic_block_t *read_body_statement(block_t *parent, basic_block_t *bb)
42374273
*/
42384274
if (expr_result && expr_result->array_size > 0 &&
42394275
!var->ptr_level && var->array_size == 0 && var->type &&
4240-
var->type->base_type == TYPE_int &&
4276+
(var->type->base_type == TYPE_int ||
4277+
var->type->base_type == TYPE_short) &&
42414278
expr_result->var_name[0] == '.') {
42424279
var_t *first_elem = require_var(parent);
42434280
first_elem->type = var->type;
@@ -4525,7 +4562,8 @@ basic_block_t *read_body_statement(block_t *parent, basic_block_t *bb)
45254562
/* Handle array compound literal to scalar assignment */
45264563
if (expr_result && expr_result->array_size > 0 &&
45274564
!var->ptr_level && var->array_size == 0 && var->type &&
4528-
var->type->base_type == TYPE_int &&
4565+
(var->type->base_type == TYPE_int ||
4566+
var->type->base_type == TYPE_short) &&
45294567
expr_result->var_name[0] == '.') {
45304568
/* Extract first element from compound literal array */
45314569
var_t *first_elem = require_var(parent);
@@ -5210,6 +5248,10 @@ void parse_internal(void)
52105248
TY_int->base_type = TYPE_int;
52115249
TY_int->size = 4;
52125250

5251+
TY_short = add_named_type("short");
5252+
TY_short->base_type = TYPE_short;
5253+
TY_short->size = 2;
5254+
52135255
/* builtin type _Bool was introduced in C99 specification, it is more
52145256
* well-known as macro type bool, which is defined in <std_bool.h> (in
52155257
* shecc, it is defined in 'lib/c.c').

src/reg-alloc.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -432,6 +432,7 @@ void reg_alloc(void)
432432
if (global_insn->rd->ptr_level)
433433
GLOBAL_FUNC->stack_size += PTR_SIZE;
434434
else if (global_insn->rd->type != TY_int &&
435+
global_insn->rd->type != TY_short &&
435436
global_insn->rd->type != TY_char &&
436437
global_insn->rd->type != TY_bool) {
437438
GLOBAL_FUNC->stack_size +=
@@ -585,6 +586,7 @@ void reg_alloc(void)
585586
case OP_allocat:
586587
if ((insn->rd->type == TY_void ||
587588
insn->rd->type == TY_int ||
589+
insn->rd->type == TY_short ||
588590
insn->rd->type == TY_char ||
589591
insn->rd->type == TY_bool) &&
590592
insn->rd->array_size == 0)

0 commit comments

Comments
 (0)