diff --git a/CLAUDE.md b/CLAUDE.md index 5621af6d4..c8f4362fe 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -27,8 +27,6 @@ NEVER amend git commits. Pre-commit checks, - First re-review `git diff HEAD` in totality - Passes `cargo clippy --all-targets` with zero warnings (pre-existing warnings MUST be fixed) -- Changes to (cc/token/ cc/partse/ cc/ir/) have associated unit tests? -- New features And bug fixes have associated integration tests? (cc/tests/) ## Architecture @@ -47,7 +45,7 @@ Other: `calc/`, `cc/`, `cron/`, `datetime/`, `dev/`, `display/`, `file/`, `m4/`, Debug protocol: Update EXISTING wrapper script in `/tmp/*.sh`, run via Bash tool. -DO NOT (1) create file with cat, (2) run `bash` from Bash tool (run script directly, following Update). +DO NOT (1) create file with cat, (2) run `bash` from Bash tool Integration tests use `plib::testing::TestPlan`. Test logic in `$crate/tests/$category/mod.rs`. diff --git a/cc/CLAUDE.md b/cc/CLAUDE.md new file mode 100644 index 000000000..3c971da20 --- /dev/null +++ b/cc/CLAUDE.md @@ -0,0 +1,7 @@ +# cc (pcc) Development Rules + +## Testing Requirements + +1. **All changes must have tests** — every fix and feature needs accompanying tests to prevent regressions. +2. **Changes to `cc/ir/`, `cc/token/`, `cc/parse/` MUST include unit tests.** +3. **All changes must also have e2e integration tests** in `cc/tests/` to ensure full coverage. diff --git a/cc/arch/aarch64/codegen.rs b/cc/arch/aarch64/codegen.rs index e223eb039..4252b9e66 100644 --- a/cc/arch/aarch64/codegen.rs +++ b/cc/arch/aarch64/codegen.rs @@ -3115,6 +3115,11 @@ impl CodeGenerator for Aarch64CodeGen { fn set_pic_mode(&mut self, pic: bool) { self.pic_mode = pic; } + + fn set_shared_mode(&mut self, _shared: bool) { + // AArch64 TLS model selection not yet implemented + // For now, this is a no-op + } } /// Helper enum for atomic bitwise operations diff --git a/cc/arch/aarch64/features.rs b/cc/arch/aarch64/features.rs index 9a5930bcc..679d72c96 100644 --- a/cc/arch/aarch64/features.rs +++ b/cc/arch/aarch64/features.rs @@ -212,7 +212,7 @@ impl Aarch64CodeGen { // FP-relative for alloca safety let actual_offset = self.stack_offset(frame_size, *offset); self.push_lir(Aarch64Inst::Str { - size: OperandSize::B64, + size: op_size, src: scratch1, addr: MemAddr::BaseOffset { base: Reg::X29, diff --git a/cc/arch/codegen.rs b/cc/arch/codegen.rs index 01cc6cf7f..167449461 100644 --- a/cc/arch/codegen.rs +++ b/cc/arch/codegen.rs @@ -661,8 +661,13 @@ pub trait CodeGenerator { /// Set whether to emit basic unwind tables (cfi_startproc/cfi_endproc) fn set_emit_unwind_tables(&mut self, emit: bool); - /// Set position-independent code mode (for shared libraries) + /// Set position-independent code mode (for shared libraries and PIE) fn set_pic_mode(&mut self, pic: bool); + + /// Set shared library mode (for TLS model selection) + /// In shared library mode, TLS uses Initial Exec/General Dynamic model. + /// In PIE/executable mode, TLS uses Local Exec for local variables. + fn set_shared_mode(&mut self, shared: bool); } /// Create a code generator for the given target @@ -670,6 +675,7 @@ pub fn create_codegen( target: Target, emit_unwind_tables: bool, pic_mode: bool, + shared_mode: bool, ) -> Box { use crate::target::Arch; @@ -679,5 +685,6 @@ pub fn create_codegen( }; codegen.set_emit_unwind_tables(emit_unwind_tables); codegen.set_pic_mode(pic_mode); + codegen.set_shared_mode(shared_mode); codegen } diff --git a/cc/arch/x86_64/codegen.rs b/cc/arch/x86_64/codegen.rs index f163610e8..7fd489d94 100644 --- a/cc/arch/x86_64/codegen.rs +++ b/cc/arch/x86_64/codegen.rs @@ -51,8 +51,10 @@ pub struct X86_64CodeGen { pub(super) extern_symbols: HashSet, /// Thread-local storage symbols (need TLS access via FS segment) pub(super) tls_symbols: HashSet, - /// Position-independent code mode (for shared libraries) + /// Position-independent code mode (for shared libraries and PIE) pic_mode: bool, + /// Shared library mode (affects TLS model selection) + shared_mode: bool, /// Long double constants to emit (label_bits -> value_bits) pub(super) ld_constants: HashMap, /// Double constants to emit (label_bits -> f64 value) @@ -74,6 +76,7 @@ impl X86_64CodeGen { extern_symbols: HashSet::new(), tls_symbols: HashSet::new(), pic_mode: false, + shared_mode: false, ld_constants: HashMap::new(), double_constants: HashMap::new(), } @@ -138,10 +141,11 @@ impl X86_64CodeGen { if self.pic_mode && !name.starts_with('.') { return true; } - // External symbols always need GOT access: - // - On macOS: required for dynamic linking - // - On Linux: required for PIE (default) and when linking with shared libs - self.extern_symbols.contains(name) + // External symbols need GOT access on macOS for dynamic linking. + if self.base.target.os == Os::MacOS { + return self.extern_symbols.contains(name); + } + false } /// Emit .loc directive for source line tracking (delegates to base) @@ -1502,8 +1506,10 @@ impl X86_64CodeGen { }); } else if self.tls_symbols.contains(&name) && self.base.target.os == Os::Linux { // Thread-local storage: use FS segment + // Use Initial Exec model for external TLS or when building shared libraries. + // PIE executables can use Local Exec for their own TLS variables. let is_extern_tls = self.extern_symbols.contains(&name); - let use_ie_model = is_extern_tls || self.pic_mode; + let use_ie_model = is_extern_tls || self.shared_mode; if use_ie_model { // Initial Exec: load offset from GOT, then load via FS segment @@ -1859,10 +1865,10 @@ impl X86_64CodeGen { // Check TLS first - TLS symbols need special access pattern even for external symbols if self.tls_symbols.contains(&name) && self.base.target.os == Os::Linux { // Check if this is an external TLS variable (needs Initial Exec model) - // or if we're in PIC mode (shared libraries also need IE model) - // Only non-PIC local TLS can use Local Exec model + // or if we're building a shared library (also needs IE model). + // PIE executables can use Local Exec for their own TLS variables. let is_extern_tls = self.extern_symbols.contains(&name); - let use_ie_model = is_extern_tls || self.pic_mode; + let use_ie_model = is_extern_tls || self.shared_mode; if use_ie_model { // Initial Exec TLS model for external symbols: @@ -2158,26 +2164,14 @@ impl X86_64CodeGen { Symbol::global(name.clone()) }; - if self.needs_got_access(&name) { - // External symbols on macOS: load address from GOT, then store - self.push_lir(X86Inst::Mov { - size: OperandSize::B64, - src: GpOperand::Mem(MemAddr::GotPcrel(Symbol::extern_sym(name.clone()))), - dst: GpOperand::Reg(Reg::R11), - }); - self.push_lir(X86Inst::Mov { - size: op_size, - src: GpOperand::Reg(value_reg), - dst: GpOperand::Mem(MemAddr::BaseOffset { - base: Reg::R11, - offset: insn.offset as i32, - }), - }); - } else if self.tls_symbols.contains(&name) && self.base.target.os == Os::Linux { + // Check TLS FIRST before GOT - TLS symbols need special access pattern + // and should not go through the GOT path even in PIC mode + if self.tls_symbols.contains(&name) && self.base.target.os == Os::Linux { // Thread-local storage: use FS segment - // In PIC mode or for external TLS, use Initial Exec model + // Use Initial Exec model for external TLS or when building shared libraries. + // PIE executables can use Local Exec for their own TLS variables. let is_extern_tls = self.extern_symbols.contains(&name); - let use_ie_model = is_extern_tls || self.pic_mode; + let use_ie_model = is_extern_tls || self.shared_mode; if use_ie_model { // Initial Exec: load offset from GOT, then store via FS segment @@ -2199,6 +2193,21 @@ impl X86_64CodeGen { dst: GpOperand::Mem(MemAddr::TlsIE(symbol)), }); } + } else if self.needs_got_access(&name) { + // External symbols on macOS: load address from GOT, then store + self.push_lir(X86Inst::Mov { + size: OperandSize::B64, + src: GpOperand::Mem(MemAddr::GotPcrel(Symbol::extern_sym(name.clone()))), + dst: GpOperand::Reg(Reg::R11), + }); + self.push_lir(X86Inst::Mov { + size: op_size, + src: GpOperand::Reg(value_reg), + dst: GpOperand::Mem(MemAddr::BaseOffset { + base: Reg::R11, + offset: insn.offset as i32, + }), + }); } else { // LIR: store to global via RIP-relative self.push_lir(X86Inst::Mov { @@ -3349,12 +3358,115 @@ impl X86_64CodeGen { let expected_loc = self.get_location(expected_ptr); let desired_loc = self.get_location(desired); - // Load expected_ptr (pointer to expected value) into R9 - // We need this for later store-back on failure - self.emit_mov_to_reg(expected_loc, Reg::R9, 64); + // IMPORTANT: Regalloc may have assigned operands to any register including + // R9, R10, R11, or RAX. Loading one operand into a scratch register can + // clobber another operand. + // + // Strategy: We need to load three values into R9, R10, R11 (plus RAX for *expected). + // Any source could be in any of these registers. We use a dependency-aware load order: + // + // 1. Collect which sources are in which registers + // 2. Load into target registers in an order that doesn't clobber unread sources + // + // For simplicity, we use the red zone (128 bytes below RSP) as scratch space. + // We spill all three operands first, then load from there. + + // Helper lambda to check if a location is a specific register + let is_reg = |loc: &Loc, r: Reg| -> bool { matches!(loc, Loc::Reg(x) if *x == r) }; + + // Use red zone for temporary storage at RSP-8, RSP-16, RSP-24 + let addr_temp = MemAddr::BaseOffset { + base: Reg::Rsp, + offset: -8, + }; + let expected_temp = MemAddr::BaseOffset { + base: Reg::Rsp, + offset: -16, + }; + let desired_temp = MemAddr::BaseOffset { + base: Reg::Rsp, + offset: -24, + }; + + // Step 1: Spill all three operands to red zone. + // We need a temporary register that is NOT one of our targets (R9, R10, R11). + // Use RCX as temp since it's caller-saved and not involved here. + + // Spill addr + if let Loc::Reg(r) = addr_loc { + // Already in a register, just store it + self.push_lir(X86Inst::Mov { + size: OperandSize::B64, + src: GpOperand::Reg(r), + dst: GpOperand::Mem(addr_temp.clone()), + }); + } else { + self.emit_mov_to_reg(addr_loc, Reg::Rcx, 64); + self.push_lir(X86Inst::Mov { + size: OperandSize::B64, + src: GpOperand::Reg(Reg::Rcx), + dst: GpOperand::Mem(addr_temp.clone()), + }); + } + + // Spill expected_ptr + if let Loc::Reg(r) = expected_loc { + self.push_lir(X86Inst::Mov { + size: OperandSize::B64, + src: GpOperand::Reg(r), + dst: GpOperand::Mem(expected_temp.clone()), + }); + } else { + self.emit_mov_to_reg(expected_loc, Reg::Rcx, 64); + self.push_lir(X86Inst::Mov { + size: OperandSize::B64, + src: GpOperand::Reg(Reg::Rcx), + dst: GpOperand::Mem(expected_temp.clone()), + }); + } - // Load address of atomic variable into R11 - self.emit_mov_to_reg(addr_loc, Reg::R11, 64); + // Spill desired + if let Loc::Reg(r) = desired_loc { + self.push_lir(X86Inst::Mov { + size: op_size, + src: GpOperand::Reg(r), + dst: GpOperand::Mem(desired_temp.clone()), + }); + } else { + self.emit_mov_to_reg(desired_loc, Reg::Rcx, size); + self.push_lir(X86Inst::Mov { + size: op_size, + src: GpOperand::Reg(Reg::Rcx), + dst: GpOperand::Mem(desired_temp.clone()), + }); + } + + // Step 2: Load from red zone into target registers. + // Now all values are safely on stack, order doesn't matter. + + // Load addr into R11 + self.push_lir(X86Inst::Mov { + size: OperandSize::B64, + src: GpOperand::Mem(addr_temp), + dst: GpOperand::Reg(Reg::R11), + }); + + // Load expected_ptr into R9 + self.push_lir(X86Inst::Mov { + size: OperandSize::B64, + src: GpOperand::Mem(expected_temp), + dst: GpOperand::Reg(Reg::R9), + }); + + // Load desired into R10 + self.push_lir(X86Inst::Mov { + size: op_size, + src: GpOperand::Mem(desired_temp), + dst: GpOperand::Reg(Reg::R10), + }); + + // Suppress unused variable warnings + let _ = is_reg; // Load expected value from *expected_ptr (R9) into RAX self.push_lir(X86Inst::Mov { @@ -3366,9 +3478,6 @@ impl X86_64CodeGen { dst: GpOperand::Reg(Reg::Rax), }); - // Load desired value into R10 - self.emit_mov_to_reg(desired_loc, Reg::R10, size); - // LOCK CMPXCHG: if *addr == RAX, set *addr = R10 and ZF=1 // else RAX = *addr and ZF=0 self.push_lir(X86Inst::LockCmpxchg { @@ -3865,4 +3974,8 @@ impl CodeGenerator for X86_64CodeGen { fn set_pic_mode(&mut self, pic: bool) { self.pic_mode = pic; } + + fn set_shared_mode(&mut self, shared: bool) { + self.shared_mode = shared; + } } diff --git a/cc/arch/x86_64/features.rs b/cc/arch/x86_64/features.rs index 317635f73..fff46ce1a 100644 --- a/cc/arch/x86_64/features.rs +++ b/cc/arch/x86_64/features.rs @@ -230,8 +230,9 @@ impl X86_64CodeGen { }); } Loc::Stack(dst_offset) => { + let adjusted_offset = -(*dst_offset + self.callee_saved_offset); self.push_lir(X86Inst::Mov { - size: OperandSize::B32, + size: lir_arg_size, src: GpOperand::Mem(MemAddr::BaseOffset { base: Reg::Rax, offset: 0, @@ -239,11 +240,11 @@ impl X86_64CodeGen { dst: GpOperand::Reg(Reg::R11), }); self.push_lir(X86Inst::Mov { - size: OperandSize::B32, + size: lir_arg_size, src: GpOperand::Reg(Reg::R11), dst: GpOperand::Mem(MemAddr::BaseOffset { base: Reg::Rbp, - offset: *dst_offset, + offset: adjusted_offset, }), }); } @@ -303,12 +304,13 @@ impl X86_64CodeGen { } } Loc::Stack(dst_offset) => { + let adjusted_offset = -(*dst_offset + self.callee_saved_offset); self.push_lir(X86Inst::Mov { size: lir_arg_size, src: GpOperand::Reg(Reg::Rax), dst: GpOperand::Mem(MemAddr::BaseOffset { base: Reg::Rbp, - offset: *dst_offset, + offset: adjusted_offset, }), }); } @@ -475,6 +477,14 @@ impl X86_64CodeGen { } (Loc::Reg(src_reg), Loc::Reg(dst_reg)) => { // Both src and dest are in registers (containing addresses) + // Choose a temp register that doesn't conflict with src or dst + let temp = if *src_reg != Reg::Rax && *dst_reg != Reg::Rax { + Reg::Rax + } else if *src_reg != Reg::Rdx && *dst_reg != Reg::Rdx { + Reg::Rdx + } else { + Reg::Rcx + }; // Copy gp_offset (4 bytes) self.push_lir(X86Inst::Mov { size: OperandSize::B32, @@ -482,11 +492,11 @@ impl X86_64CodeGen { base: *src_reg, offset: 0, }), - dst: GpOperand::Reg(Reg::Rax), + dst: GpOperand::Reg(temp), }); self.push_lir(X86Inst::Mov { size: OperandSize::B32, - src: GpOperand::Reg(Reg::Rax), + src: GpOperand::Reg(temp), dst: GpOperand::Mem(MemAddr::BaseOffset { base: *dst_reg, offset: 0, @@ -499,11 +509,11 @@ impl X86_64CodeGen { base: *src_reg, offset: 4, }), - dst: GpOperand::Reg(Reg::Rax), + dst: GpOperand::Reg(temp), }); self.push_lir(X86Inst::Mov { size: OperandSize::B32, - src: GpOperand::Reg(Reg::Rax), + src: GpOperand::Reg(temp), dst: GpOperand::Mem(MemAddr::BaseOffset { base: *dst_reg, offset: 4, @@ -516,11 +526,11 @@ impl X86_64CodeGen { base: *src_reg, offset: 8, }), - dst: GpOperand::Reg(Reg::Rax), + dst: GpOperand::Reg(temp), }); self.push_lir(X86Inst::Mov { size: OperandSize::B64, - src: GpOperand::Reg(Reg::Rax), + src: GpOperand::Reg(temp), dst: GpOperand::Mem(MemAddr::BaseOffset { base: *dst_reg, offset: 8, @@ -533,11 +543,11 @@ impl X86_64CodeGen { base: *src_reg, offset: 16, }), - dst: GpOperand::Reg(Reg::Rax), + dst: GpOperand::Reg(temp), }); self.push_lir(X86Inst::Mov { size: OperandSize::B64, - src: GpOperand::Reg(Reg::Rax), + src: GpOperand::Reg(temp), dst: GpOperand::Mem(MemAddr::BaseOffset { base: *dst_reg, offset: 16, diff --git a/cc/arch/x86_64/float.rs b/cc/arch/x86_64/float.rs index 804726c03..87dc8e6b2 100644 --- a/cc/arch/x86_64/float.rs +++ b/cc/arch/x86_64/float.rs @@ -1009,12 +1009,13 @@ impl X86_64CodeGen { }); } Loc::Stack(dst_offset) => { + let adjusted_offset = -(*dst_offset + self.callee_saved_offset); self.push_lir(X86Inst::MovFp { size: lir_fp_size, src: XmmOperand::Reg(XmmReg::Xmm15), dst: XmmOperand::Mem(MemAddr::BaseOffset { base: Reg::Rbp, - offset: *dst_offset, + offset: adjusted_offset, }), }); } @@ -1069,12 +1070,13 @@ impl X86_64CodeGen { }); } Loc::Stack(dst_offset) => { + let adjusted_offset = -(*dst_offset + self.callee_saved_offset); self.push_lir(X86Inst::MovFp { size: lir_fp_size, src: XmmOperand::Reg(XmmReg::Xmm15), dst: XmmOperand::Mem(MemAddr::BaseOffset { base: Reg::Rbp, - offset: *dst_offset, + offset: adjusted_offset, }), }); } diff --git a/cc/include/emmintrin.h b/cc/include/emmintrin.h index 64f3884d4..3485b20d6 100644 --- a/cc/include/emmintrin.h +++ b/cc/include/emmintrin.h @@ -10,39 +10,6 @@ #ifndef _EMMINTRIN_H #define _EMMINTRIN_H -/* Include SSE intrinsics (emmintrin.h includes xmmintrin.h in GCC) */ -#include - -/* _mm_pause - inserts a PAUSE instruction for spin-wait loops - * This improves performance in spin-lock scenarios by reducing - * pipeline stalls and power consumption */ -static __inline__ void __attribute__((__always_inline__)) -_mm_pause(void) { - __asm__ __volatile__("pause" ::: "memory"); -} - -/* _mm_clflush - flush cache line containing address p */ -static __inline__ void __attribute__((__always_inline__)) -_mm_clflush(void const *__p) { - __asm__ __volatile__("clflush (%0)" : : "r"(__p) : "memory"); -} - -/* _mm_lfence - load fence */ -static __inline__ void __attribute__((__always_inline__)) -_mm_lfence(void) { - __asm__ __volatile__("lfence" ::: "memory"); -} - -/* _mm_mfence - memory fence */ -static __inline__ void __attribute__((__always_inline__)) -_mm_mfence(void) { - __asm__ __volatile__("mfence" ::: "memory"); -} - -/* _mm_sfence - store fence (from SSE, but commonly used with SSE2) */ -static __inline__ void __attribute__((__always_inline__)) -_mm_sfence(void) { - __asm__ __volatile__("sfence" ::: "memory"); -} +#error "emmintrin.h (SSE2) intrinsics are not supported by pcc yet" #endif /* _EMMINTRIN_H */ diff --git a/cc/include/xmmintrin.h b/cc/include/xmmintrin.h index 014492cc6..b0aebc7b2 100644 --- a/cc/include/xmmintrin.h +++ b/cc/include/xmmintrin.h @@ -10,17 +10,6 @@ #ifndef _XMMINTRIN_H #define _XMMINTRIN_H -/* SSE prefetch hints */ -#define _MM_HINT_T0 1 -#define _MM_HINT_T1 2 -#define _MM_HINT_T2 3 -#define _MM_HINT_NTA 0 - -/* _mm_prefetch - prefetch data into cache */ -static __inline__ void __attribute__((__always_inline__)) -_mm_prefetch(const void *__p, int __i) { - (void)__i; - __builtin_prefetch(__p); -} +#error "xmmintrin.h (SSE) intrinsics are not supported by pcc yet" #endif /* _XMMINTRIN_H */ diff --git a/cc/ir/linearize.rs b/cc/ir/linearize.rs index 2e101d0cb..46cba7723 100644 --- a/cc/ir/linearize.rs +++ b/cc/ir/linearize.rs @@ -49,6 +49,26 @@ struct LocalVarInfo { /// For int arr[n][m], this contains [sym_for_n, sym_for_m] /// These are needed to compute runtime strides for outer dimension access. vla_dim_syms: Vec, + /// True if this local holds a pointer to the actual data (e.g., va_list parameters). + /// When true, linearize_lvalue loads the pointer instead of taking the address. + is_indirect: bool, +} + +struct ResolvedDesignator { + offset: usize, + typ: TypeId, + bit_offset: Option, + bit_width: Option, + storage_unit_size: Option, +} + +struct RawFieldInit { + offset: usize, + field_size: usize, + init: Initializer, + bit_offset: Option, + bit_width: Option, + storage_unit_size: Option, } /// Information about a static local variable @@ -773,31 +793,75 @@ impl<'a> Linearizer<'a> { let elem_size = (self.types.size_bits(elem_type) / 8) as usize; let mut init_elements = Vec::new(); + let mut element_lists: HashMap> = HashMap::new(); + let mut element_indices: Vec = Vec::new(); let mut current_idx: i64 = 0; for element in elements { - // Calculate the actual index considering designators - let actual_idx = if element.designators.is_empty() { - let idx = current_idx; - current_idx += 1; + let mut index = None; + let mut index_pos = None; + for (pos, designator) in element.designators.iter().enumerate() { + if let Designator::Index(idx) = designator { + index = Some(*idx); + index_pos = Some(pos); + break; + } + } + + let element_index = if let Some(idx) = index { + current_idx = idx + 1; idx } else { - // Use the first designator (should be Index for arrays) - let idx = match &element.designators[0] { - Designator::Index(i) => *i, - Designator::Field(_) => current_idx, // Fallback - }; - current_idx = idx + 1; + let idx = current_idx; + current_idx += 1; idx }; - let offset = (actual_idx as usize) * elem_size; - let elem_init = self.ast_init_to_ir(&element.value, elem_type); + let remaining_designators = match index_pos { + Some(pos) => element.designators[pos + 1..].to_vec(), + None => element.designators.clone(), + }; + + let entry = element_lists.entry(element_index).or_insert_with(|| { + element_indices.push(element_index); + Vec::new() + }); + + if remaining_designators.is_empty() { + if let ExprKind::InitList { + elements: nested_elements, + } = &element.value.kind + { + entry.extend(nested_elements.clone()); + continue; + } + } + + entry.push(InitElement { + designators: remaining_designators, + value: element.value.clone(), + }); + } + + element_indices.sort(); + for element_index in element_indices { + let Some(list) = element_lists.get(&element_index) else { + continue; + }; + let offset = (element_index as usize) * elem_size; + let elem_init = if matches!( + self.types.kind(elem_type), + TypeKind::Array | TypeKind::Struct | TypeKind::Union + ) { + self.ast_init_list_to_ir(list, elem_type) + } else if let Some(last) = list.last() { + self.ast_init_to_ir(&last.value, elem_type) + } else { + Initializer::None + }; init_elements.push((offset, elem_init)); } - // Sort elements by offset to ensure proper emission order - // (designated initializers can be in any order) init_elements.sort_by_key(|(offset, _)| *offset); Initializer::Array { @@ -815,73 +879,106 @@ impl<'a> Linearizer<'a> { let resolved_size = (self.types.size_bits(resolved_typ) / 8) as usize; if let Some(composite) = self.types.get(resolved_typ).composite.as_ref() { let members = &composite.members; + let is_union = self.types.kind(resolved_typ) == TypeKind::Union; // Collect field initializations with bitfield info: // (offset, field_size, init, bit_offset, bit_width, storage_unit_size) - #[allow(clippy::type_complexity)] - let mut raw_fields: Vec<( - usize, - usize, - Initializer, - Option, - Option, - Option, - )> = Vec::new(); + let mut raw_fields: Vec = Vec::new(); let mut current_field_idx = 0; for element in elements { - // Find the field (by designator or position) - // Use find_member to support anonymous struct/union members (C11 6.7.2.1p13) - let member_info = - if let Some(Designator::Field(name)) = element.designators.first() { - // Designated initializer: .field = value - // First try direct member lookup to update position counter - if let Some((idx, _)) = - members.iter().enumerate().find(|(_, m)| &m.name == name) - { - current_field_idx = idx + 1; - } - // Use find_member which handles anonymous struct/union members - self.types.find_member(resolved_typ, *name) - } else if current_field_idx < members.len() { - // Positional initializer - direct member only - let m = &members[current_field_idx]; - current_field_idx += 1; - Some(crate::types::MemberInfo { - offset: m.offset, - typ: m.typ, - bit_offset: m.bit_offset, - bit_width: m.bit_width, - storage_unit_size: m.storage_unit_size, - }) - } else { - None + if element.designators.is_empty() { + let Some(member) = self.next_positional_member( + members, + is_union, + &mut current_field_idx, + ) else { + continue; }; - - if let Some(member) = member_info { - let offset = member.offset; let field_size = (self.types.size_bits(member.typ) / 8) as usize; let field_init = self.ast_init_to_ir(&element.value, member.typ); - raw_fields.push(( - offset, + raw_fields.push(RawFieldInit { + offset: member.offset, field_size, - field_init, - member.bit_offset, - member.bit_width, - member.storage_unit_size, - )); + init: field_init, + bit_offset: member.bit_offset, + bit_width: member.bit_width, + storage_unit_size: member.storage_unit_size, + }); + continue; + } + + let resolved = + self.resolve_designator_chain(resolved_typ, 0, &element.designators); + let Some(ResolvedDesignator { + offset, + typ: field_type, + bit_offset, + bit_width, + storage_unit_size, + }) = resolved + else { + continue; + }; + if let Some(Designator::Field(name)) = element.designators.first() { + if let Some(next_idx) = self.member_index_for_designator(members, *name) + { + current_field_idx = next_idx; + } } + let field_size = (self.types.size_bits(field_type) / 8) as usize; + let field_init = self.ast_init_to_ir(&element.value, field_type); + raw_fields.push(RawFieldInit { + offset, + field_size, + init: field_init, + bit_offset, + bit_width, + storage_unit_size, + }); } // Sort fields by offset to ensure proper emission order // (designated initializers can be in any order) - raw_fields.sort_by_key(|(offset, _, _, _, _, _)| *offset); + // For bitfields, also sort by bit_offset to keep them together + raw_fields.sort_by(|a, b| { + a.offset + .cmp(&b.offset) + .then_with(|| a.bit_offset.unwrap_or(0).cmp(&b.bit_offset.unwrap_or(0))) + }); + + // Remove duplicate initializations (later one wins, per C semantics) + // For regular fields: duplicate if same offset + // For bitfields: duplicate if same offset AND same bit_offset + let mut idx = 0; + while idx + 1 < raw_fields.len() { + let same_offset = raw_fields[idx].offset == raw_fields[idx + 1].offset; + let both_bitfields = raw_fields[idx].bit_offset.is_some() + && raw_fields[idx + 1].bit_offset.is_some(); + let same_bitfield = both_bitfields + && raw_fields[idx].bit_offset == raw_fields[idx + 1].bit_offset; + + // Only remove if: + // - Same offset for non-bitfields, OR + // - Same offset AND same bit_offset for bitfields + if same_offset && (!both_bitfields || same_bitfield) { + raw_fields.remove(idx); + } else { + idx += 1; + } + } // Now pack bitfields that share the same storage unit let mut init_fields: Vec<(usize, usize, Initializer)> = Vec::new(); let mut i = 0; while i < raw_fields.len() { - let (offset, field_size, init, bit_offset, bit_width, storage_unit_size) = - &raw_fields[i]; + let RawFieldInit { + offset, + field_size, + init, + bit_offset, + bit_width, + storage_unit_size, + } = &raw_fields[i]; if let (Some(bit_off), Some(bit_w), Some(storage_size)) = (bit_offset, bit_width, storage_unit_size) @@ -898,8 +995,13 @@ impl<'a> Linearizer<'a> { // Look ahead for more bitfields at the same offset let mut j = i + 1; while j < raw_fields.len() { - let (next_off, _, next_init, next_bit_off, next_bit_w, _) = - &raw_fields[j]; + let RawFieldInit { + offset: next_off, + init: next_init, + bit_offset: next_bit_off, + bit_width: next_bit_w, + .. + } = &raw_fields[j]; if *next_off != *offset { break; } @@ -946,6 +1048,132 @@ impl<'a> Linearizer<'a> { } } + fn resolve_designator_chain( + &self, + base_type: TypeId, + base_offset: usize, + designators: &[Designator], + ) -> Option { + let mut offset = base_offset; + let mut typ = base_type; + let mut bit_offset = None; + let mut bit_width = None; + let mut storage_unit_size = None; + + for (idx, designator) in designators.iter().enumerate() { + match designator { + Designator::Field(name) => { + let mut resolved = typ; + if self.types.kind(resolved) == TypeKind::Array { + resolved = self.types.base_type(resolved)?; + } + resolved = self.resolve_struct_type(resolved); + let member = self.types.find_member(resolved, *name)?; + offset += member.offset; + typ = member.typ; + if idx + 1 == designators.len() { + bit_offset = member.bit_offset; + bit_width = member.bit_width; + storage_unit_size = member.storage_unit_size; + } else { + bit_offset = None; + bit_width = None; + storage_unit_size = None; + } + } + Designator::Index(index) => { + if self.types.kind(typ) != TypeKind::Array { + return None; + } + let elem_type = self.types.base_type(typ)?; + let elem_size = self.types.size_bits(elem_type) / 8; + offset += (*index as usize) * (elem_size as usize); + typ = elem_type; + bit_offset = None; + bit_width = None; + storage_unit_size = None; + } + } + } + + Some(ResolvedDesignator { + offset, + typ, + bit_offset, + bit_width, + storage_unit_size, + }) + } + + fn next_positional_member( + &self, + members: &[crate::types::StructMember], + is_union: bool, + current_field_idx: &mut usize, + ) -> Option { + if is_union { + if *current_field_idx > 0 { + return None; + } + let member = members.iter().find(|m| m.name != StringId::EMPTY)?; + *current_field_idx = members.len(); + return Some(MemberInfo { + offset: member.offset, + typ: member.typ, + bit_offset: member.bit_offset, + bit_width: member.bit_width, + storage_unit_size: member.storage_unit_size, + }); + } + + while *current_field_idx < members.len() { + let member = &members[*current_field_idx]; + if member.name == StringId::EMPTY && member.bit_width.is_some() { + *current_field_idx += 1; + continue; + } + if member.name != StringId::EMPTY || member.bit_width.is_none() { + *current_field_idx += 1; + return Some(MemberInfo { + offset: member.offset, + typ: member.typ, + bit_offset: member.bit_offset, + bit_width: member.bit_width, + storage_unit_size: member.storage_unit_size, + }); + } + *current_field_idx += 1; + } + + None + } + + fn member_index_for_designator( + &self, + members: &[crate::types::StructMember], + name: StringId, + ) -> Option { + for (idx, member) in members.iter().enumerate() { + if member.name == name { + return Some(idx + 1); + } + if member.name == StringId::EMPTY { + let member_type = self.types.get(member.typ); + let is_anon_aggregate = + matches!(member_type.kind, TypeKind::Struct | TypeKind::Union) + && member_type + .composite + .as_ref() + .is_some_and(|composite| composite.tag.is_none()); + if is_anon_aggregate && self.types.find_member(member.typ, name).is_some() { + return Some(idx + 1); + } + } + } + + None + } + // ======================================================================== // Function linearization // ======================================================================== @@ -1039,6 +1267,9 @@ impl<'a> Linearizer<'a> { // Scalar parameters need local storage for SSA-correct reassignment handling let mut scalar_params: Vec<(String, Option, TypeId, PseudoId)> = Vec::with_capacity(func.params.len()); + // va_list parameters need special handling (pointer storage) + let mut valist_params: Vec<(String, Option, TypeId, PseudoId)> = + Vec::with_capacity(func.params.len()); for (i, param) in func.params.iter().enumerate() { let name = param @@ -1055,7 +1286,12 @@ impl<'a> Linearizer<'a> { // For struct/union types, we'll copy to a local later // so member access works properly let param_kind = self.types.kind(param.typ); - if param_kind == TypeKind::Struct || param_kind == TypeKind::Union { + if param_kind == TypeKind::VaList { + // va_list parameters are special: due to array-to-pointer decay at call site, + // the actual value passed is a pointer to the va_list struct, not the struct itself. + // We'll handle this after function setup. + valist_params.push((name, param.symbol, param.typ, pseudo_id)); + } else if param_kind == TypeKind::Struct || param_kind == TypeKind::Union { struct_params.push((name, param.symbol, param.typ, pseudo_id)); } else if self.types.is_complex(param.typ) { // Complex parameters: copy to local storage so real/imag access works @@ -1079,6 +1315,36 @@ impl<'a> Linearizer<'a> { // Entry instruction self.emit(Instruction::new(Opcode::Entry)); + // Handle va_list parameters: store the pointer value (not the struct) + for (name, symbol_id_opt, typ, arg_pseudo) in valist_params { + // va_list params are passed as pointers due to array decay at call site. + // Store the pointer value (8 bytes) to a local. + let ptr_type = self.types.pointer_to(typ); + let local_sym = self.alloc_pseudo(); + let sym = Pseudo::sym(local_sym, name.clone()); + if let Some(func) = &mut self.current_func { + func.add_pseudo(sym); + func.add_local(&name, local_sym, ptr_type, false, false, None, None); + } + let ptr_size = self.types.size_bits(ptr_type); + self.emit(Instruction::store( + arg_pseudo, local_sym, 0, ptr_type, ptr_size, + )); + if let Some(symbol_id) = symbol_id_opt { + self.locals.insert( + symbol_id, + LocalVarInfo { + sym: local_sym, + typ, // Keep original va_list type for type checking + vla_size_sym: None, + vla_elem_type: None, + vla_dim_syms: vec![], + is_indirect: true, // va_list param: local holds a pointer + }, + ); + } + } + // Copy struct parameters to local storage so member access works for (name, symbol_id_opt, typ, arg_pseudo) in struct_params { // Create a symbol pseudo for this local variable (its address) @@ -1140,6 +1406,7 @@ impl<'a> Linearizer<'a> { vla_size_sym: None, vla_elem_type: None, vla_dim_syms: vec![], + is_indirect: false, }, ); } @@ -1173,6 +1440,7 @@ impl<'a> Linearizer<'a> { vla_size_sym: None, vla_elem_type: None, vla_dim_syms: vec![], + is_indirect: false, }, ); } @@ -1207,6 +1475,7 @@ impl<'a> Linearizer<'a> { vla_size_sym: None, vla_elem_type: None, vla_dim_syms: vec![], + is_indirect: false, }, ); } @@ -1379,25 +1648,43 @@ impl<'a> Linearizer<'a> { Stmt::Return(expr) => { if let Some(e) = expr { - let typ = self.expr_type(e); + let expr_typ = self.expr_type(e); + // Get the function's actual return type for proper conversion + let func_ret_type = self + .current_func + .as_ref() + .map(|f| f.return_type) + .unwrap_or(expr_typ); if let Some(sret_ptr) = self.struct_return_ptr { self.emit_sret_return(e, sret_ptr, self.struct_return_size); } else if let Some(ret_type) = self.two_reg_return_type { self.emit_two_reg_return(e, ret_type); - } else if self.types.is_complex(typ) { + } else if self.types.is_complex(expr_typ) { let addr = self.linearize_lvalue(e); - let typ_size = self.types.size_bits(typ); - self.emit(Instruction::ret_typed(Some(addr), typ, typ_size)); + let typ_size = self.types.size_bits(func_ret_type); + self.emit(Instruction::ret_typed(Some(addr), func_ret_type, typ_size)); } else { let val = self.linearize_expr(e); + // Convert expression value to function's return type if needed + let converted_val = if expr_typ != func_ret_type + && self.types.kind(func_ret_type) != TypeKind::Void + { + self.emit_convert(val, expr_typ, func_ret_type) + } else { + val + }; // Function types decay to pointers when returned - let typ_size = if self.types.kind(typ) == TypeKind::Function { + let typ_size = if self.types.kind(func_ret_type) == TypeKind::Function { self.target.pointer_width } else { - self.types.size_bits(typ) + self.types.size_bits(func_ret_type) }; - self.emit(Instruction::ret_typed(Some(val), typ, typ_size)); + self.emit(Instruction::ret_typed( + Some(converted_val), + func_ret_type, + typ_size, + )); } } else { self.emit(Instruction::ret(None)); @@ -1528,6 +1815,7 @@ impl<'a> Linearizer<'a> { vla_size_sym: None, vla_elem_type: None, vla_dim_syms: vec![], + is_indirect: false, }, ); @@ -1636,13 +1924,27 @@ impl<'a> Linearizer<'a> { val_imag, sym_id, base_bytes, base_typ, base_bits, )); } else { - // Simple scalar initializer - let val = self.linearize_expr(init); - // Convert the value to the target type (important for _Bool normalization) - let init_type = self.expr_type(init); - let converted = self.emit_convert(val, init_type, typ); - let size = self.types.size_bits(typ); - self.emit(Instruction::store(converted, sym_id, 0, typ, size)); + // Check for large struct/union initialization (> 64 bits) + // linearize_expr returns an address for large aggregates + let type_kind = self.types.kind(typ); + let type_size = self.types.size_bits(typ); + if (type_kind == TypeKind::Struct || type_kind == TypeKind::Union) + && type_size > 64 + { + // Large struct/union init - source is an address, do block copy + let value_addr = self.linearize_expr(init); + let type_size_bytes = type_size / 8; + + self.emit_block_copy(sym_id, value_addr, type_size_bytes as i64); + } else { + // Simple scalar initializer + let val = self.linearize_expr(init); + // Convert the value to the target type (important for _Bool normalization) + let init_type = self.expr_type(init); + let converted = self.emit_convert(val, init_type, typ); + let size = self.types.size_bits(typ); + self.emit(Instruction::store(converted, sym_id, 0, typ, size)); + } } } } @@ -1812,6 +2114,7 @@ impl<'a> Linearizer<'a> { vla_size_sym: Some(size_sym_id), vla_elem_type: Some(elem_type), vla_dim_syms, + is_indirect: false, }, ); } @@ -1873,6 +2176,7 @@ impl<'a> Linearizer<'a> { vla_size_sym: None, vla_elem_type: None, vla_dim_syms: vec![], + is_indirect: false, }, ); @@ -1920,43 +2224,79 @@ impl<'a> Linearizer<'a> { TypeKind::Array => { let elem_type = self.types.base_type(typ).unwrap_or(self.types.int_id); let elem_size = self.types.size_bits(elem_type) / 8; + let mut element_lists: HashMap> = HashMap::new(); + let mut element_indices: Vec = Vec::new(); + let mut current_idx: i64 = 0; - for (idx, element) in elements.iter().enumerate() { - // Calculate the actual index considering designators - let actual_idx = if element.designators.is_empty() { - idx as i64 - } else { - // Use the first designator (should be Index for arrays) - match &element.designators[0] { - Designator::Index(i) => *i, - Designator::Field(_) => idx as i64, // Fall back for mismatched designator + for element in elements.iter() { + let mut index = None; + let mut index_pos = None; + for (pos, designator) in element.designators.iter().enumerate() { + if let Designator::Index(idx) = designator { + index = Some(*idx); + index_pos = Some(pos); + break; } + } + + let element_index = if let Some(idx) = index { + current_idx = idx + 1; + idx + } else { + let idx = current_idx; + current_idx += 1; + idx }; - let offset = base_offset + actual_idx * elem_size as i64; + let remaining_designators = match index_pos { + Some(pos) => element.designators[pos + 1..].to_vec(), + None => element.designators.clone(), + }; - // Handle nested initializer lists or scalar values - if let ExprKind::InitList { - elements: nested_elems, - } = &element.value.kind - { - // Nested array/struct initialization - recurse with accumulated offset - self.linearize_init_list_at_offset( - base_sym, - offset, - elem_type, - nested_elems, - ); - } else { - // Scalar value - let val = self.linearize_expr(&element.value); - let val_type = self.expr_type(&element.value); - let converted = self.emit_convert(val, val_type, elem_type); - let elem_size = self.types.size_bits(elem_type); - self.emit(Instruction::store( - converted, base_sym, offset, elem_type, elem_size, - )); + let entry = element_lists.entry(element_index).or_insert_with(|| { + element_indices.push(element_index); + Vec::new() + }); + + if remaining_designators.is_empty() { + if let ExprKind::InitList { + elements: nested_elements, + } = &element.value.kind + { + entry.extend(nested_elements.clone()); + continue; + } } + + entry.push(InitElement { + designators: remaining_designators, + value: element.value.clone(), + }); + } + + element_indices.sort(); + for element_index in element_indices { + let Some(list) = element_lists.get(&element_index) else { + continue; + }; + let offset = base_offset + element_index * elem_size as i64; + if matches!( + self.types.kind(elem_type), + TypeKind::Array | TypeKind::Struct | TypeKind::Union + ) { + self.linearize_init_list_at_offset(base_sym, offset, elem_type, list); + continue; + } + let Some(last) = list.last() else { + continue; + }; + let val = self.linearize_expr(&last.value); + let val_type = self.expr_type(&last.value); + let converted = self.emit_convert(val, val_type, elem_type); + let elem_size = self.types.size_bits(elem_type); + self.emit(Instruction::store( + converted, base_sym, offset, elem_type, elem_size, + )); } } TypeKind::Struct | TypeKind::Union => { @@ -1968,109 +2308,106 @@ impl<'a> Linearizer<'a> { if let Some(composite) = self.types.get(resolved_typ).composite.as_ref() { // Clone members to avoid borrow issues let members: Vec<_> = composite.members.clone(); + let is_union = self.types.kind(resolved_typ) == TypeKind::Union; + let mut current_field_idx = 0; - for (idx, element) in elements.iter().enumerate() { - // Find the field (by designator or position) - // Use find_member to support anonymous struct/union members (C11 6.7.2.1p13) - let member_info = - if let Some(Designator::Field(name)) = element.designators.first() { - // Designated initializer: .field = value - // find_member handles anonymous struct/union members - self.types.find_member(resolved_typ, *name) - } else if idx < members.len() { - // Positional initializer - direct member only - let m = &members[idx]; - Some(crate::types::MemberInfo { - offset: m.offset, - typ: m.typ, - bit_offset: m.bit_offset, - bit_width: m.bit_width, - storage_unit_size: m.storage_unit_size, - }) - } else { - None // Too many initializers + for element in elements.iter() { + if element.designators.is_empty() { + let Some(member) = self.next_positional_member( + &members, + is_union, + &mut current_field_idx, + ) else { + continue; }; + let offset = base_offset + member.offset as i64; + let field_type = member.typ; + + // Check if this is a bitfield + if let (Some(bit_off), Some(bit_w), Some(storage_size)) = ( + member.bit_offset, + member.bit_width, + member.storage_unit_size, + ) { + let val = self.linearize_expr(&element.value); + let val_type = self.expr_type(&element.value); + let storage_type = match storage_size { + 1 => self.types.uchar_id, + 2 => self.types.ushort_id, + 4 => self.types.uint_id, + 8 => self.types.ulong_id, + _ => self.types.uint_id, + }; + let converted = self.emit_convert(val, val_type, storage_type); + self.emit_bitfield_store( + base_sym, + offset as usize, + bit_off, + bit_w, + storage_size, + converted, + ); + } else { + self.linearize_struct_field_init( + base_sym, + offset, + field_type, + &element.value, + ); + } + continue; + } - let Some(member) = member_info else { + let resolved = + self.resolve_designator_chain(resolved_typ, 0, &element.designators); + let Some(ResolvedDesignator { + offset, + typ: field_type, + bit_offset, + bit_width, + storage_unit_size, + }) = resolved + else { continue; }; + if let Some(Designator::Field(name)) = element.designators.first() { + if let Some(next_idx) = + self.member_index_for_designator(&members, *name) + { + current_field_idx = next_idx; + } + } + let offset = base_offset + offset as i64; - let offset = base_offset + member.offset as i64; - let field_type = member.typ; - - // Handle nested initializer lists or scalar values - if let ExprKind::InitList { - elements: nested_elems, - } = &element.value.kind + // Check if this is a bitfield + if let (Some(bit_off), Some(bit_w), Some(storage_size)) = + (bit_offset, bit_width, storage_unit_size) { - // Nested struct/array initialization - recurse with accumulated offset - self.linearize_init_list_at_offset( - base_sym, - offset, - field_type, - nested_elems, - ); - } else if let ExprKind::StringLit(s) = &element.value.kind { - // String literal initializing a char array field - if self.types.kind(field_type) == TypeKind::Array { - let elem_type = self - .types - .base_type(field_type) - .unwrap_or(self.types.char_id); - let elem_size = self.types.size_bits(elem_type); - - // Copy each byte from string literal to the array field - for (i, byte) in s.bytes().enumerate() { - let byte_val = self.emit_const(byte as i64, elem_type); - self.emit(Instruction::store( - byte_val, - base_sym, - offset + i as i64, - elem_type, - elem_size, - )); - } - // Store null terminator - let null_val = self.emit_const(0, elem_type); - self.emit(Instruction::store( - null_val, - base_sym, - offset + s.len() as i64, - elem_type, - elem_size, - )); - } else { - // Pointer initialized with string literal - store the address - let val = self.linearize_expr(&element.value); - let val_type = self.expr_type(&element.value); - let converted = self.emit_convert(val, val_type, field_type); - let size = self.types.size_bits(field_type); - self.emit(Instruction::store( - converted, base_sym, offset, field_type, size, - )); - } - } else { - // Scalar value - // If the field is an array and we're initializing with a scalar, - // initialize only the first element of the array (C99 6.7.8p14) - let (actual_type, actual_size) = - if self.types.kind(field_type) == TypeKind::Array { - let elem_type = - self.types.base_type(field_type).unwrap_or(field_type); - (elem_type, self.types.size_bits(elem_type)) - } else { - (field_type, self.types.size_bits(field_type)) - }; let val = self.linearize_expr(&element.value); let val_type = self.expr_type(&element.value); - let converted = self.emit_convert(val, val_type, actual_type); - self.emit(Instruction::store( + let storage_type = match storage_size { + 1 => self.types.uchar_id, + 2 => self.types.ushort_id, + 4 => self.types.uint_id, + 8 => self.types.ulong_id, + _ => self.types.uint_id, + }; + let converted = self.emit_convert(val, val_type, storage_type); + self.emit_bitfield_store( + base_sym, + offset as usize, + bit_off, + bit_w, + storage_size, converted, + ); + } else { + self.linearize_struct_field_init( base_sym, offset, - actual_type, - actual_size, - )); + field_type, + &element.value, + ); } } } @@ -2094,6 +2431,73 @@ impl<'a> Linearizer<'a> { } } + fn linearize_struct_field_init( + &mut self, + base_sym: PseudoId, + offset: i64, + field_type: TypeId, + value: &Expr, + ) { + if let ExprKind::InitList { + elements: nested_elems, + } = &value.kind + { + self.linearize_init_list_at_offset(base_sym, offset, field_type, nested_elems); + } else if let ExprKind::StringLit(s) = &value.kind { + if self.types.kind(field_type) == TypeKind::Array { + let elem_type = self + .types + .base_type(field_type) + .unwrap_or(self.types.char_id); + let elem_size = self.types.size_bits(elem_type); + + for (i, byte) in s.bytes().enumerate() { + let byte_val = self.emit_const(byte as i64, elem_type); + self.emit(Instruction::store( + byte_val, + base_sym, + offset + i as i64, + elem_type, + elem_size, + )); + } + let null_val = self.emit_const(0, elem_type); + self.emit(Instruction::store( + null_val, + base_sym, + offset + s.len() as i64, + elem_type, + elem_size, + )); + } else { + let val = self.linearize_expr(value); + let val_type = self.expr_type(value); + let converted = self.emit_convert(val, val_type, field_type); + let size = self.types.size_bits(field_type); + self.emit(Instruction::store( + converted, base_sym, offset, field_type, size, + )); + } + } else { + let (actual_type, actual_size) = if self.types.kind(field_type) == TypeKind::Array { + let elem_type = self.types.base_type(field_type).unwrap_or(field_type); + (elem_type, self.types.size_bits(elem_type)) + } else { + (field_type, self.types.size_bits(field_type)) + }; + let val = self.linearize_expr(value); + let val_type = self.expr_type(value); + let converted = self.emit_convert(val, val_type, actual_type); + self.emit(Instruction::store( + converted, + base_sym, + offset, + actual_type, + actual_size, + )); + } + } + fn linearize_if(&mut self, cond: &Expr, then_stmt: &Stmt, else_stmt: Option<&Stmt>) { let cond_val = self.linearize_expr(cond); @@ -2996,9 +3400,11 @@ impl<'a> Linearizer<'a> { self.is_pure_expr(left) && self.is_pure_expr(right) } - // Unary ops are pure if operand is pure, except for pre-inc/dec + // Unary ops are pure if operand is pure, except for pre-inc/dec and dereference. + // Dereference (*ptr) can cause UB/crash if the pointer is NULL or invalid, + // so we must not eagerly evaluate it in conditional expressions. ExprKind::Unary { op, operand, .. } => match op { - UnaryOp::PreInc | UnaryOp::PreDec => false, + UnaryOp::PreInc | UnaryOp::PreDec | UnaryOp::Deref => false, _ => self.is_pure_expr(operand), }, @@ -3019,13 +3425,16 @@ impl<'a> Linearizer<'a> { // Function calls are never pure (may have side effects) ExprKind::Call { .. } => false, - // Member access is pure if the base expression is pure - ExprKind::Member { expr, .. } | ExprKind::Arrow { expr, .. } => self.is_pure_expr(expr), + // Member access through struct value (.) is pure if the base is pure. + ExprKind::Member { expr, .. } => self.is_pure_expr(expr), - // Array indexing is pure if both parts are pure - ExprKind::Index { array, index } => { - self.is_pure_expr(array) && self.is_pure_expr(index) - } + // Arrow access (ptr->member) can cause UB/crash if ptr is NULL, + // so we must not eagerly evaluate it in conditional expressions. + ExprKind::Arrow { .. } => false, + + // Array indexing can cause UB/crash if the pointer is invalid, + // so we must not eagerly evaluate it in conditional expressions. + ExprKind::Index { .. } => false, // Casts are pure if the operand is pure ExprKind::Cast { expr, .. } => self.is_pure_expr(expr), @@ -3176,6 +3585,18 @@ impl<'a> Linearizer<'a> { unreachable!("static local sentinel without static_locals entry"); } } + // Check if this local holds a pointer to the actual data + // (e.g., va_list parameters due to array-to-pointer decay at call site). + // If so, load the pointer instead of taking the address. + if local.is_indirect { + // The local stores a pointer; load and return it + let ptr_type = self.types.pointer_to(local.typ); + let result = self.alloc_pseudo(); + let size = self.types.size_bits(ptr_type); + self.emit(Instruction::load(result, local.sym, 0, ptr_type, size)); + return result; + } + let result = self.alloc_pseudo(); self.emit(Instruction::sym_addr( result, @@ -3184,10 +3605,20 @@ impl<'a> Linearizer<'a> { )); result } else if let Some(¶m_pseudo) = self.var_map.get(&name_str) { - // Parameter whose address is taken - spill to local storage + // Parameter whose address is taken + let param_type = self.expr_type(expr); + let type_kind = self.types.kind(param_type); + + // va_list parameters are special: the parameter value IS already a pointer + // to the va_list structure (due to array-to-pointer decay at call site). + // Return the pointer value directly instead of spilling. + if type_kind == TypeKind::VaList { + return param_pseudo; + } + + // For other parameters, spill to local storage. // Parameters are pass-by-value in the IR (Arg pseudos), but if // their address is taken, we need to copy to a stack slot first. - let param_type = self.expr_type(expr); let size = self.types.size_bits(param_type); // Create a local variable to hold the parameter value @@ -3226,6 +3657,7 @@ impl<'a> Linearizer<'a> { vla_size_sym: None, vla_elem_type: None, vla_dim_syms: vec![], + is_indirect: false, }, ); @@ -3389,7 +3821,7 @@ impl<'a> Linearizer<'a> { } ExprKind::CompoundLiteral { typ, elements } => { // Compound literal as lvalue: create it and return its address - // This is used for &(struct S){...} + // This is used for &(struct S){...} and large struct assignment like *p = (struct S){...} let sym_id = self.alloc_pseudo(); let unique_name = format!(".compound_literal.{}", sym_id.0); let sym = Pseudo::sym(sym_id, unique_name.clone()); @@ -3405,6 +3837,18 @@ impl<'a> Linearizer<'a> { None, ); } + + // For compound literals with partial initialization, C99 6.7.8p21 requires + // zero-initialization of all subobjects not explicitly initialized. + // Zero the entire compound literal first, then initialize specific members. + let type_kind = self.types.kind(*typ); + if type_kind == TypeKind::Struct + || type_kind == TypeKind::Union + || type_kind == TypeKind::Array + { + self.emit_aggregate_zero(sym_id, *typ); + } + self.linearize_init_list(sym_id, *typ, elements); // Return address of the compound literal @@ -3701,16 +4145,37 @@ impl<'a> Linearizer<'a> { member_info.typ, ) } else { - let result = self.alloc_pseudo(); let size = self.types.size_bits(member_info.typ); - self.emit(Instruction::load( - result, - base, - member_info.offset as i64, - member_info.typ, - size, - )); - result + let member_kind = self.types.kind(member_info.typ); + + // Large structs (size > 64) can't be loaded into registers - return address + if (member_kind == TypeKind::Struct || member_kind == TypeKind::Union) && size > 64 { + if member_info.offset == 0 { + base + } else { + let result = self.alloc_pseudo(); + let offset_val = self.emit_const(member_info.offset as i64, self.types.long_id); + self.emit(Instruction::binop( + Opcode::Add, + result, + base, + offset_val, + self.types.long_id, + 64, + )); + result + } + } else { + let result = self.alloc_pseudo(); + self.emit(Instruction::load( + result, + base, + member_info.offset as i64, + member_info.typ, + size, + )); + result + } } } @@ -3771,16 +4236,37 @@ impl<'a> Linearizer<'a> { member_info.typ, ) } else { - let result = self.alloc_pseudo(); let size = self.types.size_bits(member_info.typ); - self.emit(Instruction::load( - result, - ptr, - member_info.offset as i64, - member_info.typ, - size, - )); - result + let member_kind = self.types.kind(member_info.typ); + + // Large structs (size > 64) can't be loaded into registers - return address + if (member_kind == TypeKind::Struct || member_kind == TypeKind::Union) && size > 64 { + if member_info.offset == 0 { + ptr + } else { + let result = self.alloc_pseudo(); + let offset_val = self.emit_const(member_info.offset as i64, self.types.long_id); + self.emit(Instruction::binop( + Opcode::Add, + result, + ptr, + offset_val, + self.types.long_id, + 64, + )); + result + } + } else { + let result = self.alloc_pseudo(); + self.emit(Instruction::load( + result, + ptr, + member_info.offset as i64, + member_info.typ, + size, + )); + result + } } } @@ -3913,13 +4399,20 @@ impl<'a> Linearizer<'a> { )); // If element type is an array, just return the address (arrays decay to pointers) - if self.types.kind(elem_type) == TypeKind::Array { + let elem_kind = self.types.kind(elem_type); + if elem_kind == TypeKind::Array { addr } else { - let result = self.alloc_pseudo(); let size = self.types.size_bits(elem_type); - self.emit(Instruction::load(result, addr, 0, elem_type, size)); - result + // Large structs/unions (> 64 bits) can't be loaded into registers - return address + // Assignment will handle the actual copy via emit_assign's large struct handling + if (elem_kind == TypeKind::Struct || elem_kind == TypeKind::Union) && size > 64 { + addr + } else { + let result = self.alloc_pseudo(); + self.emit(Instruction::load(result, addr, 0, elem_type, size)); + result + } } } @@ -4784,6 +5277,13 @@ impl<'a> Linearizer<'a> { let ptr_type = self.types.pointer_to(elem_type); self.emit(Instruction::sym_addr(result, sym_id, ptr_type)); return result; + } else if type_kind == TypeKind::VaList { + // va_list is defined as __va_list_tag[1] (an array type), so it decays to + // a pointer when used in expressions (C99 6.3.2.1, 7.15.1) + let result = self.alloc_pseudo(); + let ptr_type = self.types.pointer_to(typ); + self.emit(Instruction::sym_addr(result, sym_id, ptr_type)); + return result; } else if (type_kind == TypeKind::Struct || type_kind == TypeKind::Union) && size > 64 { @@ -4813,6 +5313,20 @@ impl<'a> Linearizer<'a> { let elem_type = self.types.base_type(local.typ).unwrap_or(self.types.int_id); let ptr_type = self.types.pointer_to(elem_type); self.emit(Instruction::sym_addr(result, local.sym, ptr_type)); + } else if type_kind == TypeKind::VaList { + // va_list is defined as __va_list_tag[1] (an array type), so it decays to + // a pointer when used in expressions (C99 6.3.2.1, 7.15.1) + if local.is_indirect { + // va_list parameter: local holds a pointer to the va_list struct + // Load the pointer value (array decay already happened at call site) + let ptr_type = self.types.pointer_to(local.typ); + let ptr_size = self.types.size_bits(ptr_type); + self.emit(Instruction::load(result, local.sym, 0, ptr_type, ptr_size)); + } else { + // Regular va_list local: take address (normal array decay) + let ptr_type = self.types.pointer_to(local.typ); + self.emit(Instruction::sym_addr(result, local.sym, ptr_type)); + } } else if (type_kind == TypeKind::Struct || type_kind == TypeKind::Union) && size > 64 { // Large structs can't be loaded into registers - return address let ptr_type = self.types.pointer_to(local.typ); @@ -4859,9 +5373,10 @@ impl<'a> Linearizer<'a> { self.emit(Instruction::sym_addr(result, sym_id, ptr_type)); result } - // Functions decay to function pointers, and large structs can't be loaded - // into registers - for both cases, return the address + // Functions decay to function pointers, va_list decays to pointer (C99 6.3.2.1, 7.15.1), + // and large structs can't be loaded into registers - for all cases, return the address else if type_kind == TypeKind::Function + || type_kind == TypeKind::VaList || ((type_kind == TypeKind::Struct || type_kind == TypeKind::Union) && size > 64) { let result = self.alloc_pseudo(); @@ -6030,6 +6545,57 @@ impl<'a> Linearizer<'a> { } } + /// Emit a block copy from src to dst using integer chunks. + fn emit_block_copy(&mut self, dst: PseudoId, src: PseudoId, size_bytes: i64) { + let mut offset: i64 = 0; + + // Copy in 8-byte chunks + while offset + 8 <= size_bytes { + let tmp = self.alloc_pseudo(); + self.emit(Instruction::load(tmp, src, offset, self.types.ulong_id, 64)); + self.emit(Instruction::store( + tmp, + dst, + offset, + self.types.ulong_id, + 64, + )); + offset += 8; + } + + // Handle remaining bytes + let remaining = size_bytes - offset; + if remaining >= 4 { + let tmp = self.alloc_pseudo(); + self.emit(Instruction::load(tmp, src, offset, self.types.uint_id, 32)); + self.emit(Instruction::store(tmp, dst, offset, self.types.uint_id, 32)); + offset += 4; + } + if remaining % 4 >= 2 { + let tmp = self.alloc_pseudo(); + self.emit(Instruction::load( + tmp, + src, + offset, + self.types.ushort_id, + 16, + )); + self.emit(Instruction::store( + tmp, + dst, + offset, + self.types.ushort_id, + 16, + )); + offset += 2; + } + if remaining % 2 == 1 { + let tmp = self.alloc_pseudo(); + self.emit(Instruction::load(tmp, src, offset, self.types.uchar_id, 8)); + self.emit(Instruction::store(tmp, dst, offset, self.types.uchar_id, 8)); + } + } + /// Emit code to load a bitfield value /// Returns the loaded value as a PseudoId fn emit_bitfield_load( @@ -7491,6 +8057,24 @@ impl<'a> Linearizer<'a> { return real; // Return real part as the result value } + // For large struct/union assignment (> 64 bits), do a block copy + // Similar to complex type handling - get addresses and copy in chunks + let target_kind = self.types.kind(target_typ); + let target_size = self.types.size_bits(target_typ); + if (target_kind == TypeKind::Struct || target_kind == TypeKind::Union) + && target_size > 64 + && op == AssignOp::Assign + { + let target_addr = self.linearize_lvalue(target); + let value_addr = self.linearize_lvalue(value); + let target_size_bytes = target_size / 8; + + self.emit_block_copy(target_addr, value_addr, target_size_bytes as i64); + + // Return the target address as the result + return target_addr; + } + let rhs = self.linearize_expr(value); // Check for pointer compound assignment (p += n or p -= n) diff --git a/cc/ir/test_linearize.rs b/cc/ir/test_linearize.rs index f3b47cff8..4c4872ab1 100644 --- a/cc/ir/test_linearize.rs +++ b/cc/ir/test_linearize.rs @@ -15,13 +15,13 @@ use super::*; use crate::parse::ast::{ - AssignOp, BlockItem, Declaration, ExprKind, ExternalDecl, FunctionDef, InitDeclarator, - InitElement, Parameter, UnaryOp, + AssignOp, BinaryOp, BlockItem, Declaration, Designator, ExprKind, ExternalDecl, FunctionDef, + InitDeclarator, InitElement, Parameter, UnaryOp, }; use crate::strings::StringTable; use crate::symbol::Symbol; use crate::target::Target; -use crate::types::{CompositeType, StructMember, Type}; +use crate::types::{CompositeType, StructMember, Type, TypeTable}; /// Create a default position for test code fn test_pos() -> Position { @@ -4128,3 +4128,1582 @@ fn test_return_address_emits_opcode() { "__builtin_return_address should emit ReturnAddress opcode" ); } + +// ============================================================================ +// Mixed designated + positional initializer field tracking +// Regression test: positional fields after a designator must use the correct +// field index (one past the designated field), not the element's enumeration index. +// Bug: {.b = 20, 30, 40} stored 30 at offset 4 (b) instead of offset 8 (c). +// ============================================================================ + +#[test] +fn test_mixed_designated_positional_struct_init() { + // Test: struct S { int a; int b; int c; int d; }; + // struct S s = {.b = 20, 30, 40}; + // Expected stores: offset 4 = 20 (b), offset 8 = 30 (c), offset 12 = 40 (d) + let mut ctx = TestContext::new(); + let test_id = ctx.str("test"); + let int_type = ctx.int_type(); + + // Create field name StringIds + let a_id = ctx.str("a"); + let b_id = ctx.str("b"); + let c_id = ctx.str("c"); + let d_id = ctx.str("d"); + + // Create struct S { int a; int b; int c; int d; } + let struct_composite = CompositeType { + tag: None, + members: vec![ + StructMember { + name: a_id, + typ: int_type, + offset: 0, + bit_offset: None, + bit_width: None, + storage_unit_size: None, + explicit_align: None, + }, + StructMember { + name: b_id, + typ: int_type, + offset: 4, + bit_offset: None, + bit_width: None, + storage_unit_size: None, + explicit_align: None, + }, + StructMember { + name: c_id, + typ: int_type, + offset: 8, + bit_offset: None, + bit_width: None, + storage_unit_size: None, + explicit_align: None, + }, + StructMember { + name: d_id, + typ: int_type, + offset: 12, + bit_offset: None, + bit_width: None, + storage_unit_size: None, + explicit_align: None, + }, + ], + enum_constants: vec![], + size: 16, + align: 4, + is_complete: true, + }; + let struct_type = ctx.types.intern(Type::struct_type(struct_composite)); + let s_sym = ctx.var("s", struct_type); + + // Create init list: {.b = 20, 30, 40} + let init_list = Expr::typed_unpositioned( + ExprKind::InitList { + elements: vec![ + InitElement { + designators: vec![Designator::Field(b_id)], + value: Box::new(Expr::int(20, &ctx.types)), + }, + InitElement { + designators: vec![], + value: Box::new(Expr::int(30, &ctx.types)), + }, + InitElement { + designators: vec![], + value: Box::new(Expr::int(40, &ctx.types)), + }, + ], + }, + struct_type, + ); + + // Function: int test() { struct S s = {.b = 20, 30, 40}; return 0; } + let func = FunctionDef { + return_type: ctx.types.int_id, + name: test_id, + params: vec![], + body: Stmt::Block(vec![ + BlockItem::Declaration(Declaration { + declarators: vec![InitDeclarator { + symbol: s_sym, + typ: struct_type, + storage_class: crate::types::TypeModifiers::empty(), + init: Some(init_list), + vla_sizes: vec![], + explicit_align: None, + }], + }), + BlockItem::Statement(Stmt::Return(Some(Expr::int(0, &ctx.types)))), + ]), + pos: test_pos(), + is_static: false, + is_inline: false, + calling_conv: crate::abi::CallingConv::default(), + }; + let tu = TranslationUnit { + items: vec![ExternalDecl::FunctionDef(func)], + }; + + let module = ctx.linearize(&tu); + let ir = format!("{}", module); + + // Should have 3 stores for the 3 initialized fields (b, c, d) + let store_count = ir.matches("store").count(); + assert!( + store_count >= 3, + "Expected at least 3 stores for .b=20, c=30, d=40, got {}: {}", + store_count, + ir + ); + + // Verify stores go to distinct offsets (not the same offset twice) + // The bug caused 20 and 30 to both be stored at offset +4 + // Extract all "+ N" store offsets from the IR + let store_lines: Vec<&str> = ir.lines().filter(|l| l.contains("store")).collect(); + + // Count unique offsets among store instructions + let mut offsets: Vec<&str> = store_lines + .iter() + .filter_map(|line| line.find("+ ").map(|pos| &line[pos..])) + .collect(); + offsets.sort(); + offsets.dedup(); + + // With the fix, we should have 3 distinct offsets (4, 8, 12) + // Without the fix, offset 4 appears twice and we'd only have 2 unique offsets + assert!( + offsets.len() >= 3, + "Expected 3 distinct store offsets for fields b(+4), c(+8), d(+12), \ + got {} unique offsets {:?}. IR:\n{}", + offsets.len(), + offsets, + ir + ); +} + +#[test] +fn test_mixed_designated_positional_array_init() { + // Test: int arr[5] = {[2] = 20, 30, 40}; + // Expected stores: index 2 = 20, index 3 = 30, index 4 = 40 + let mut ctx = TestContext::new(); + let test_id = ctx.str("test"); + let int_type = ctx.int_type(); + + let arr_type = ctx.types.intern(Type::array(int_type, 5)); + let arr_sym = ctx.var("arr", arr_type); + + // Create init list: {[2] = 20, 30, 40} + let init_list = Expr::typed_unpositioned( + ExprKind::InitList { + elements: vec![ + InitElement { + designators: vec![Designator::Index(2)], + value: Box::new(Expr::int(20, &ctx.types)), + }, + InitElement { + designators: vec![], + value: Box::new(Expr::int(30, &ctx.types)), + }, + InitElement { + designators: vec![], + value: Box::new(Expr::int(40, &ctx.types)), + }, + ], + }, + arr_type, + ); + + // Function: int test() { int arr[5] = {[2] = 20, 30, 40}; return 0; } + let func = FunctionDef { + return_type: ctx.types.int_id, + name: test_id, + params: vec![], + body: Stmt::Block(vec![ + BlockItem::Declaration(Declaration { + declarators: vec![InitDeclarator { + symbol: arr_sym, + typ: arr_type, + storage_class: crate::types::TypeModifiers::empty(), + init: Some(init_list), + vla_sizes: vec![], + explicit_align: None, + }], + }), + BlockItem::Statement(Stmt::Return(Some(Expr::int(0, &ctx.types)))), + ]), + pos: test_pos(), + is_static: false, + is_inline: false, + calling_conv: crate::abi::CallingConv::default(), + }; + let tu = TranslationUnit { + items: vec![ExternalDecl::FunctionDef(func)], + }; + + let module = ctx.linearize(&tu); + let ir = format!("{}", module); + + // Should have 3 stores for the 3 initialized elements + let store_count = ir.matches("store").count(); + assert!( + store_count >= 3, + "Expected at least 3 stores for [2]=20, [3]=30, [4]=40, got {}: {}", + store_count, + ir + ); + + // Verify stores go to distinct offsets + let store_lines: Vec<&str> = ir.lines().filter(|l| l.contains("store")).collect(); + let mut offsets: Vec<&str> = store_lines + .iter() + .filter_map(|line| line.find("+ ").map(|pos| &line[pos..])) + .collect(); + offsets.sort(); + offsets.dedup(); + + // With the fix: 3 distinct offsets (8, 12, 16 for indices 2, 3, 4) + // Without the fix: offset 8 appears twice (indices 2 and "1" via enumerate) + assert!( + offsets.len() >= 3, + "Expected 3 distinct store offsets for arr[2](+8), arr[3](+12), arr[4](+16), \ + got {} unique offsets {:?}. IR:\n{}", + offsets.len(), + offsets, + ir + ); +} + +#[test] +fn test_designator_chain_nested_struct_init() { + // struct { struct { int x; int y; } pt; int z; } s = { .pt.x = 10, .pt.y = 20, .z = 30 }; + let mut ctx = TestContext::new(); + let test_id = ctx.str("test"); + + let x_id = ctx.str("x"); + let y_id = ctx.str("y"); + let pt_members = vec![ + StructMember { + name: x_id, + typ: ctx.int_type(), + offset: 0, + bit_offset: None, + bit_width: None, + storage_unit_size: None, + explicit_align: None, + }, + StructMember { + name: y_id, + typ: ctx.int_type(), + offset: 4, + bit_offset: None, + bit_width: None, + storage_unit_size: None, + explicit_align: None, + }, + ]; + let pt_type = ctx.types.intern(Type::struct_type(CompositeType { + tag: None, + members: pt_members, + enum_constants: vec![], + size: 8, + align: 4, + is_complete: true, + })); + + let pt_id = ctx.str("pt"); + let z_id = ctx.str("z"); + let outer_members = vec![ + StructMember { + name: pt_id, + typ: pt_type, + offset: 0, + bit_offset: None, + bit_width: None, + storage_unit_size: None, + explicit_align: None, + }, + StructMember { + name: z_id, + typ: ctx.int_type(), + offset: 8, + bit_offset: None, + bit_width: None, + storage_unit_size: None, + explicit_align: None, + }, + ]; + let outer_type = ctx.types.intern(Type::struct_type(CompositeType { + tag: None, + members: outer_members, + enum_constants: vec![], + size: 12, + align: 4, + is_complete: true, + })); + let outer_sym = ctx.var("s", outer_type); + + let init_list = Expr::typed_unpositioned( + ExprKind::InitList { + elements: vec![ + InitElement { + designators: vec![Designator::Field(pt_id), Designator::Field(x_id)], + value: Box::new(Expr::int(10, &ctx.types)), + }, + InitElement { + designators: vec![Designator::Field(pt_id), Designator::Field(y_id)], + value: Box::new(Expr::int(20, &ctx.types)), + }, + InitElement { + designators: vec![Designator::Field(z_id)], + value: Box::new(Expr::int(30, &ctx.types)), + }, + ], + }, + outer_type, + ); + + let func = FunctionDef { + return_type: ctx.types.int_id, + name: test_id, + params: vec![], + body: Stmt::Block(vec![ + BlockItem::Declaration(Declaration { + declarators: vec![InitDeclarator { + symbol: outer_sym, + typ: outer_type, + storage_class: crate::types::TypeModifiers::empty(), + init: Some(init_list), + vla_sizes: vec![], + explicit_align: None, + }], + }), + BlockItem::Statement(Stmt::Return(Some(Expr::int(0, &ctx.types)))), + ]), + pos: test_pos(), + is_static: false, + is_inline: false, + calling_conv: crate::abi::CallingConv::default(), + }; + + let tu = TranslationUnit { + items: vec![ExternalDecl::FunctionDef(func)], + }; + let module = ctx.linearize(&tu); + let ir = format!("{}", module); + let store_count = ir.matches("store").count(); + assert!( + store_count >= 3, + "Expected stores for nested designators, got {}: {}", + store_count, + ir + ); +} + +#[test] +fn test_designator_chain_array_member_init() { + // struct { int arr[3]; } s = { .arr[1] = 42 }; + let mut ctx = TestContext::new(); + let test_id = ctx.str("test"); + let int_type = ctx.int_type(); + let arr_type = ctx.types.intern(Type::array(int_type, 3)); + let arr_id = ctx.str("arr"); + let members = vec![StructMember { + name: arr_id, + typ: arr_type, + offset: 0, + bit_offset: None, + bit_width: None, + storage_unit_size: None, + explicit_align: None, + }]; + let struct_type = ctx.types.intern(Type::struct_type(CompositeType { + tag: None, + members, + enum_constants: vec![], + size: 12, + align: 4, + is_complete: true, + })); + let s_sym = ctx.var("s", struct_type); + + let init_list = Expr::typed_unpositioned( + ExprKind::InitList { + elements: vec![InitElement { + designators: vec![Designator::Field(arr_id), Designator::Index(1)], + value: Box::new(Expr::int(42, &ctx.types)), + }], + }, + struct_type, + ); + + let func = FunctionDef { + return_type: ctx.types.int_id, + name: test_id, + params: vec![], + body: Stmt::Block(vec![ + BlockItem::Declaration(Declaration { + declarators: vec![InitDeclarator { + symbol: s_sym, + typ: struct_type, + storage_class: crate::types::TypeModifiers::empty(), + init: Some(init_list), + vla_sizes: vec![], + explicit_align: None, + }], + }), + BlockItem::Statement(Stmt::Return(Some(Expr::int(0, &ctx.types)))), + ]), + pos: test_pos(), + is_static: false, + is_inline: false, + calling_conv: crate::abi::CallingConv::default(), + }; + + let tu = TranslationUnit { + items: vec![ExternalDecl::FunctionDef(func)], + }; + let module = ctx.linearize(&tu); + let ir = format!("{}", module); + let store_count = ir.matches("store").count(); + assert!( + store_count >= 1, + "Expected store for array member designator, got {}: {}", + store_count, + ir + ); +} + +#[test] +fn test_repeated_designator_last_wins_array() { + // int arr[2] = {[0] = 1, [0] = 2}; should store only last value + let mut ctx = TestContext::new(); + let test_id = ctx.str("test"); + let int_type = ctx.int_type(); + let arr_type = ctx.types.intern(Type::array(int_type, 2)); + let arr_sym = ctx.var("arr", arr_type); + + let init_list = Expr::typed_unpositioned( + ExprKind::InitList { + elements: vec![ + InitElement { + designators: vec![Designator::Index(0)], + value: Box::new(Expr::int(1, &ctx.types)), + }, + InitElement { + designators: vec![Designator::Index(0)], + value: Box::new(Expr::int(2, &ctx.types)), + }, + ], + }, + arr_type, + ); + + let func = FunctionDef { + return_type: ctx.types.int_id, + name: test_id, + params: vec![], + body: Stmt::Block(vec![ + BlockItem::Declaration(Declaration { + declarators: vec![InitDeclarator { + symbol: arr_sym, + typ: arr_type, + storage_class: crate::types::TypeModifiers::empty(), + init: Some(init_list), + vla_sizes: vec![], + explicit_align: None, + }], + }), + BlockItem::Statement(Stmt::Return(Some(Expr::int(0, &ctx.types)))), + ]), + pos: test_pos(), + is_static: false, + is_inline: false, + calling_conv: crate::abi::CallingConv::default(), + }; + + let tu = TranslationUnit { + items: vec![ExternalDecl::FunctionDef(func)], + }; + let module = ctx.linearize(&tu); + let ir = format!("{}", module); + let store_count = ir.matches("store").count(); + assert!( + (1..=2).contains(&store_count), + "Expected one store for repeated designator, got {}: {}", + store_count, + ir + ); +} + +#[test] +fn test_skip_unnamed_bitfield_positional_init() { + let mut ctx = TestContext::new(); + let test_id = ctx.str("test"); + let int_type = ctx.int_type(); + let a_id = ctx.str("a"); + let b_id = ctx.str("b"); + let members = vec![ + StructMember { + name: a_id, + typ: int_type, + offset: 0, + bit_offset: None, + bit_width: None, + storage_unit_size: None, + explicit_align: None, + }, + StructMember { + name: StringId::EMPTY, + typ: int_type, + offset: 4, + bit_offset: Some(0), + bit_width: Some(8), + storage_unit_size: Some(4), + explicit_align: None, + }, + StructMember { + name: b_id, + typ: int_type, + offset: 8, + bit_offset: None, + bit_width: None, + storage_unit_size: None, + explicit_align: None, + }, + ]; + let struct_type = ctx.types.intern(Type::struct_type(CompositeType { + tag: None, + members, + enum_constants: vec![], + size: 12, + align: 4, + is_complete: true, + })); + let s_sym = ctx.var("s", struct_type); + + let init_list = Expr::typed_unpositioned( + ExprKind::InitList { + elements: vec![ + InitElement { + designators: vec![], + value: Box::new(Expr::int(10, &ctx.types)), + }, + InitElement { + designators: vec![], + value: Box::new(Expr::int(20, &ctx.types)), + }, + ], + }, + struct_type, + ); + + let func = FunctionDef { + return_type: ctx.types.int_id, + name: test_id, + params: vec![], + body: Stmt::Block(vec![ + BlockItem::Declaration(Declaration { + declarators: vec![InitDeclarator { + symbol: s_sym, + typ: struct_type, + storage_class: crate::types::TypeModifiers::empty(), + init: Some(init_list), + vla_sizes: vec![], + explicit_align: None, + }], + }), + BlockItem::Statement(Stmt::Return(Some(Expr::int(0, &ctx.types)))), + ]), + pos: test_pos(), + is_static: false, + is_inline: false, + calling_conv: crate::abi::CallingConv::default(), + }; + + let tu = TranslationUnit { + items: vec![ExternalDecl::FunctionDef(func)], + }; + let module = ctx.linearize(&tu); + let ir = format!("{}", module); + let store_count = ir.matches("store").count(); + assert!( + store_count >= 2, + "Expected stores for named fields a/b, got {}: {}", + store_count, + ir + ); +} + +#[test] +fn test_union_first_named_member_positional_init() { + let mut ctx = TestContext::new(); + let test_id = ctx.str("test"); + let int_type = ctx.int_type(); + let a_id = ctx.str("a"); + let members = vec![ + StructMember { + name: StringId::EMPTY, + typ: int_type, + offset: 0, + bit_offset: Some(0), + bit_width: Some(16), + storage_unit_size: Some(4), + explicit_align: None, + }, + StructMember { + name: a_id, + typ: int_type, + offset: 0, + bit_offset: None, + bit_width: None, + storage_unit_size: None, + explicit_align: None, + }, + ]; + let union_type = ctx.types.intern(Type::union_type(CompositeType { + tag: None, + members, + enum_constants: vec![], + size: 4, + align: 4, + is_complete: true, + })); + let u_sym = ctx.var("u", union_type); + + let init_list = Expr::typed_unpositioned( + ExprKind::InitList { + elements: vec![InitElement { + designators: vec![], + value: Box::new(Expr::int(42, &ctx.types)), + }], + }, + union_type, + ); + + let func = FunctionDef { + return_type: ctx.types.int_id, + name: test_id, + params: vec![], + body: Stmt::Block(vec![ + BlockItem::Declaration(Declaration { + declarators: vec![InitDeclarator { + symbol: u_sym, + typ: union_type, + storage_class: crate::types::TypeModifiers::empty(), + init: Some(init_list), + vla_sizes: vec![], + explicit_align: None, + }], + }), + BlockItem::Statement(Stmt::Return(Some(Expr::int(0, &ctx.types)))), + ]), + pos: test_pos(), + is_static: false, + is_inline: false, + calling_conv: crate::abi::CallingConv::default(), + }; + + let tu = TranslationUnit { + items: vec![ExternalDecl::FunctionDef(func)], + }; + let module = ctx.linearize(&tu); + let ir = format!("{}", module); + let store_count = ir.matches("store").count(); + assert!( + store_count >= 1, + "Expected store for union first named member, got {}: {}", + store_count, + ir + ); +} + +// ============================================================================ +// va_list parameter handling tests +// ============================================================================ +// va_list parameter handling tests +// ============================================================================ + +#[test] +fn test_valist_parameter_stored_as_pointer() { + // Test: va_list parameter is stored as 8-byte pointer (not 24-byte struct) + // because va_list decays to pointer at call site due to being an array type. + let mut ctx = TestContext::new(); + let test_id = ctx.str("test"); + + // Create va_list type + let valist_type = ctx + .types + .intern(Type::basic(crate::types::TypeKind::VaList)); + + // Create symbol for parameter va + let va_sym = ctx.var("va", valist_type); + + // Function: void test(va_list va) { } + let func = FunctionDef { + return_type: ctx.types.void_id, + name: test_id, + params: vec![Parameter { + symbol: Some(va_sym), + typ: valist_type, + }], + body: Stmt::Block(vec![]), + pos: test_pos(), + is_static: false, + is_inline: false, + calling_conv: crate::abi::CallingConv::default(), + }; + let tu = TranslationUnit { + items: vec![ExternalDecl::FunctionDef(func)], + }; + + let module = ctx.linearize(&tu); + let ir = format!("{}", module); + + // The va_list parameter should be stored with .64 (pointer size), + // not .192 (full va_list struct size of 24 bytes = 192 bits) + assert!( + ir.contains("store.64"), + "va_list parameter should be stored as 64-bit pointer, got: {}", + ir + ); + assert!( + !ir.contains("store.192"), + "va_list parameter should NOT be stored as 192-bit struct, got: {}", + ir + ); +} + +#[test] +fn test_valist_local_not_indirect() { + // Test: va_list local declaration (not parameter) should be direct storage + // Only va_list parameters are indirect (holding a pointer). + let mut ctx = TestContext::new(); + let test_id = ctx.str("test"); + + // Create va_list type + let valist_type = ctx + .types + .intern(Type::basic(crate::types::TypeKind::VaList)); + + // Create symbol for local va_list + let lva_sym = ctx.var("lva", valist_type); + + // Function: void test(void) { va_list lva; } + let lva_decl = Declaration { + declarators: vec![InitDeclarator { + symbol: lva_sym, + typ: valist_type, + storage_class: crate::types::TypeModifiers::empty(), + init: None, + vla_sizes: vec![], + explicit_align: None, + }], + }; + + let func = FunctionDef { + return_type: ctx.types.void_id, + name: test_id, + params: vec![], + body: Stmt::Block(vec![BlockItem::Declaration(lva_decl)]), + pos: test_pos(), + is_static: false, + is_inline: false, + calling_conv: crate::abi::CallingConv::default(), + }; + let tu = TranslationUnit { + items: vec![ExternalDecl::FunctionDef(func)], + }; + + let module = ctx.linearize(&tu); + + // Check that the function has a local for lva (may have suffix like ".0") + let test_func = module.functions.iter().find(|f| f.name == "test"); + assert!(test_func.is_some(), "Should have function 'test'"); + + let func = test_func.unwrap(); + let has_lva_local = func.locals.keys().any(|k| k.starts_with("lva")); + assert!( + has_lva_local, + "Should have local starting with 'lva'. Locals: {:?}", + func.locals.keys().collect::>() + ); +} + +#[test] +fn test_valist_expression_decay() { + // Test: va_list used in expression context should decay to pointer + // This is similar to array decay - va_list is __va_list_tag[1] + let mut ctx = TestContext::new(); + let test_id = ctx.str("test"); + + // Create va_list and pointer types + let valist_type = ctx + .types + .intern(Type::basic(crate::types::TypeKind::VaList)); + let uchar_ptr_type = ctx.types.pointer_to(ctx.types.uchar_id); + + // Create symbols + let va_sym = ctx.var("va", valist_type); + let ptr_sym = ctx.var("ptr", uchar_ptr_type); + + // va_list va (parameter) + // unsigned char *ptr = (unsigned char*)va; + let cast_expr = Expr::typed_unpositioned( + ExprKind::Cast { + cast_type: uchar_ptr_type, + expr: Box::new(Expr::var_typed(va_sym, valist_type)), + }, + uchar_ptr_type, + ); + + let ptr_decl = Declaration { + declarators: vec![InitDeclarator { + symbol: ptr_sym, + typ: uchar_ptr_type, + storage_class: crate::types::TypeModifiers::empty(), + init: Some(cast_expr), + vla_sizes: vec![], + explicit_align: None, + }], + }; + + let func = FunctionDef { + return_type: ctx.types.void_id, + name: test_id, + params: vec![Parameter { + symbol: Some(va_sym), + typ: valist_type, + }], + body: Stmt::Block(vec![BlockItem::Declaration(ptr_decl)]), + pos: test_pos(), + is_static: false, + is_inline: false, + calling_conv: crate::abi::CallingConv::default(), + }; + let tu = TranslationUnit { + items: vec![ExternalDecl::FunctionDef(func)], + }; + + let module = ctx.linearize(&tu); + let ir = format!("{}", module); + + // The cast from va_list to pointer should involve loading the va pointer + // (since va is a parameter and is_indirect), not taking symaddr of va_list struct + assert!( + ir.contains("load.64"), + "Cast of va_list parameter should load the pointer value, got: {}", + ir + ); +} + +/// Test that multiple bitfields at the same offset are all initialized +/// when using designated initializers (static local case, which uses same path as globals). +/// This tests the fix for the bug where only the last bitfield was initialized +/// due to incorrect deduplication of fields sharing the same offset. +#[test] +fn test_bitfield_designated_init_multiple_same_offset() { + let mut ctx = TestContext::new(); + let test_id = ctx.str("test"); + let int_type = ctx.int_type(); + + // Create a struct with multiple bitfields sharing the same storage unit + // Similar to CPython's PyASCIIObject state field + let kind_id = ctx.str("kind"); + let compact_id = ctx.str("compact"); + let ascii_id = ctx.str("ascii"); + let static_alloc_id = ctx.str("statically_allocated"); + + let members = vec![ + StructMember { + name: kind_id, + typ: int_type, + offset: 0, + bit_offset: Some(0), + bit_width: Some(3), + storage_unit_size: Some(1), + explicit_align: None, + }, + StructMember { + name: compact_id, + typ: int_type, + offset: 0, + bit_offset: Some(3), + bit_width: Some(1), + storage_unit_size: Some(1), + explicit_align: None, + }, + StructMember { + name: ascii_id, + typ: int_type, + offset: 0, + bit_offset: Some(4), + bit_width: Some(1), + storage_unit_size: Some(1), + explicit_align: None, + }, + StructMember { + name: static_alloc_id, + typ: int_type, + offset: 0, + bit_offset: Some(5), + bit_width: Some(1), + storage_unit_size: Some(1), + explicit_align: None, + }, + ]; + + // Create a static struct type (type must have STATIC modifier for static locals) + let struct_type = ctx.types.intern(Type { + kind: crate::types::TypeKind::Struct, + modifiers: TypeModifiers::STATIC, + composite: Some(Box::new(CompositeType { + tag: None, + members, + enum_constants: vec![], + size: 1, + align: 1, + is_complete: true, + })), + base: None, + array_size: None, + params: None, + noreturn: false, + variadic: false, + }); + + let s_sym = ctx.var("s", struct_type); + + // Create init list: { .kind = 1, .compact = 1, .ascii = 1, .statically_allocated = 1 } + let init_list = Expr::typed_unpositioned( + ExprKind::InitList { + elements: vec![ + InitElement { + designators: vec![Designator::Field(kind_id)], + value: Box::new(Expr::int(1, &ctx.types)), + }, + InitElement { + designators: vec![Designator::Field(compact_id)], + value: Box::new(Expr::int(1, &ctx.types)), + }, + InitElement { + designators: vec![Designator::Field(ascii_id)], + value: Box::new(Expr::int(1, &ctx.types)), + }, + InitElement { + designators: vec![Designator::Field(static_alloc_id)], + value: Box::new(Expr::int(1, &ctx.types)), + }, + ], + }, + struct_type, + ); + + // Create a function with static local declaration + let decl = Declaration { + declarators: vec![InitDeclarator { + symbol: s_sym, + typ: struct_type, + storage_class: crate::types::TypeModifiers::empty(), + init: Some(init_list), + vla_sizes: vec![], + explicit_align: None, + }], + }; + + let func = FunctionDef { + return_type: ctx.types.int_id, + name: test_id, + params: vec![], + body: Stmt::Block(vec![ + BlockItem::Declaration(decl), + BlockItem::Statement(Stmt::Return(Some(Expr::int(0, &ctx.types)))), + ]), + pos: test_pos(), + is_static: false, + is_inline: false, + calling_conv: crate::abi::CallingConv::default(), + }; + + let tu = TranslationUnit { + items: vec![ExternalDecl::FunctionDef(func)], + }; + + let module = ctx.linearize(&tu); + + // Check that the static local initializer has the correct packed value + // kind=1 (bits 0-2), compact=1 (bit 3), ascii=1 (bit 4), static_alloc=1 (bit 5) + // Expected: 1 | (1 << 3) | (1 << 4) | (1 << 5) = 1 + 8 + 16 + 32 = 57 + let global_names: Vec<_> = module.globals.iter().map(|g| &g.name).collect(); + let global = module + .globals + .iter() + .find(|g| g.name.contains("s")) + .unwrap_or_else(|| panic!("Should have static local 's', found: {:?}", global_names)); + + if let crate::ir::Initializer::Struct { fields, .. } = &global.init { + // Should have one field (the packed bitfield value) + assert_eq!(fields.len(), 1, "Should pack all bitfields into one field"); + let (offset, size, init) = &fields[0]; + assert_eq!(*offset, 0, "Packed field should be at offset 0"); + assert_eq!(*size, 1, "Storage unit size should be 1 byte"); + if let crate::ir::Initializer::Int(val) = init { + // All four bitfields set to 1 should give: 1 + 8 + 16 + 32 = 57 + assert_eq!( + *val, 57, + "Packed value should be 57 (all four bitfields set)" + ); + } else { + panic!("Expected Int initializer, got {:?}", init); + } + } else { + panic!("Expected Struct initializer, got {:?}", global.init); + } +} + +/// Test that multiple bitfields are correctly initialized with read-modify-write +/// when using designated initializers for local variables. +#[test] +fn test_bitfield_designated_init_local_var() { + let mut ctx = TestContext::new(); + let test_id = ctx.str("test"); + let int_type = ctx.int_type(); + + // Create a struct with multiple bitfields + let a_id = ctx.str("a"); + let b_id = ctx.str("b"); + let c_id = ctx.str("c"); + + let members = vec![ + StructMember { + name: a_id, + typ: int_type, + offset: 0, + bit_offset: Some(0), + bit_width: Some(4), + storage_unit_size: Some(1), + explicit_align: None, + }, + StructMember { + name: b_id, + typ: int_type, + offset: 0, + bit_offset: Some(4), + bit_width: Some(4), + storage_unit_size: Some(1), + explicit_align: None, + }, + StructMember { + name: c_id, + typ: int_type, + offset: 1, + bit_offset: Some(0), + bit_width: Some(8), + storage_unit_size: Some(1), + explicit_align: None, + }, + ]; + + let struct_type = ctx.types.intern(Type::struct_type(CompositeType { + tag: None, + members, + enum_constants: vec![], + size: 2, + align: 1, + is_complete: true, + })); + + let s_sym = ctx.var("s", struct_type); + + // Create init list: { .a = 5, .b = 10, .c = 255 } + let init_list = Expr::typed_unpositioned( + ExprKind::InitList { + elements: vec![ + InitElement { + designators: vec![Designator::Field(a_id)], + value: Box::new(Expr::int(5, &ctx.types)), + }, + InitElement { + designators: vec![Designator::Field(b_id)], + value: Box::new(Expr::int(10, &ctx.types)), + }, + InitElement { + designators: vec![Designator::Field(c_id)], + value: Box::new(Expr::int(255, &ctx.types)), + }, + ], + }, + struct_type, + ); + + let func = FunctionDef { + return_type: ctx.types.int_id, + name: test_id, + params: vec![], + body: Stmt::Block(vec![ + BlockItem::Declaration(Declaration { + declarators: vec![InitDeclarator { + symbol: s_sym, + typ: struct_type, + storage_class: crate::types::TypeModifiers::empty(), + init: Some(init_list), + vla_sizes: vec![], + explicit_align: None, + }], + }), + BlockItem::Statement(Stmt::Return(Some(Expr::int(0, &ctx.types)))), + ]), + pos: test_pos(), + is_static: false, + is_inline: false, + calling_conv: crate::abi::CallingConv::default(), + }; + + let tu = TranslationUnit { + items: vec![ExternalDecl::FunctionDef(func)], + }; + let module = ctx.linearize(&tu); + let ir = format!("{}", module); + + // For local variables, each bitfield should use read-modify-write pattern + // So we should see multiple load and store pairs for the bitfield operations + // Count the number of load and store operations + let load_count = ir.matches("load.8").count(); + let store_count = ir.matches("store.8").count(); + + // We have 3 bitfields: a, b at offset 0, c at offset 1 + // Each bitfield init should do read-modify-write, so we expect: + // - At least 3 loads (one per bitfield) + // - At least 3 stores (one per bitfield) + // Plus the zero initialization stores + assert!( + load_count >= 3, + "Expected at least 3 loads for bitfield read-modify-write, got {}: {}", + load_count, + ir + ); + assert!( + store_count >= 3, + "Expected at least 3 stores for bitfield initialization, got {}: {}", + store_count, + ir + ); + + // Also verify the AND operations for masking (part of read-modify-write) + // In IR format it appears as "and.8" (with type suffix) + let and_count = ir.matches("and.8").count(); + assert!( + and_count >= 3, + "Expected at least 3 AND operations for bitfield masking, got {}: {}", + and_count, + ir + ); +} + +/// Test that large struct (> 64 bits) copy from array element works correctly. +/// This was a bug where copying a struct from an array would incorrectly +/// dereference the first field as a pointer instead of doing a proper memcpy-style copy. +/// Bug manifested when struct size > 64 bits (e.g., struct with two pointers = 128 bits). +#[test] +fn test_large_struct_copy_from_array() { + let mut ctx = TestContext::new(); + let test_id = ctx.str("test"); + let ptr_type = ctx.types.void_ptr_id; + + // Create a struct with two pointers (128 bits on 64-bit systems) + // struct pair { void *ptr; const char *str; }; + let ptr_id = ctx.str("ptr"); + let str_id = ctx.str("str"); + + let members = vec![ + StructMember { + name: ptr_id, + typ: ptr_type, + offset: 0, + bit_offset: None, + bit_width: None, + storage_unit_size: None, + explicit_align: None, + }, + StructMember { + name: str_id, + typ: ptr_type, + offset: 8, + bit_offset: None, + bit_width: None, + storage_unit_size: None, + explicit_align: None, + }, + ]; + + let struct_type = ctx.types.intern(Type { + kind: crate::types::TypeKind::Struct, + modifiers: TypeModifiers::empty(), + composite: Some(Box::new(CompositeType { + tag: None, + members, + enum_constants: vec![], + size: 16, + align: 8, + is_complete: true, + })), + base: None, + array_size: None, + params: None, + noreturn: false, + variadic: false, + }); + + // Create array type: struct pair[2] + let array_type = ctx.types.intern(Type::array(struct_type, 2)); + + // Create global array variable + let arr_sym = ctx.var("arr", array_type); + + // Create local variable to copy into + let item_sym = ctx.var("item", struct_type); + + // Create function: void test(void) { struct pair item = arr[0]; } + let func = FunctionDef { + return_type: ctx.types.void_id, + name: test_id, + params: vec![], + body: Stmt::Block(vec![ + BlockItem::Declaration(Declaration { + declarators: vec![InitDeclarator { + symbol: item_sym, + typ: struct_type, + storage_class: crate::types::TypeModifiers::empty(), + init: Some(Expr::typed_unpositioned( + ExprKind::Index { + array: Box::new(Expr::typed_unpositioned( + ExprKind::Ident(arr_sym), + array_type, + )), + index: Box::new(Expr::int(0, &ctx.types)), + }, + struct_type, + )), + vla_sizes: vec![], + explicit_align: None, + }], + }), + BlockItem::Statement(Stmt::Return(None)), + ]), + pos: test_pos(), + is_static: false, + is_inline: false, + calling_conv: crate::abi::CallingConv::default(), + }; + + let tu = TranslationUnit { + items: vec![ExternalDecl::FunctionDef(func)], + }; + let module = ctx.linearize(&tu); + let ir = format!("{}", module); + + // For large struct copy (128 bits), we should see: + // - Multiple 64-bit load/store pairs (at least 2 for a 128-bit struct) + // The loads should NOT be dereferencing the struct's first field as a pointer + // Instead they should be copying the struct content directly + + // Count 64-bit loads and stores + let load64_count = ir.matches("load.64").count(); + let store64_count = ir.matches("store.64").count(); + + // We need at least 2 loads and 2 stores for a 128-bit struct copy + assert!( + load64_count >= 2, + "Expected at least 2 64-bit loads for struct copy, got {}: {}", + load64_count, + ir + ); + assert!( + store64_count >= 2, + "Expected at least 2 64-bit stores for struct copy, got {}: {}", + store64_count, + ir + ); +} + +/// Test that compound literals with designated initializers zero-initialize +/// fields that are not explicitly set. +/// This is required by C99 6.7.8p21: "If there are fewer initializers in a +/// brace-enclosed list than there are elements or members of an aggregate, +/// ... the remainder of the aggregate shall be initialized implicitly the same +/// as objects that have static storage duration." +/// Bug: When using `*p = (struct S){.field1 = val}`, fields not mentioned in +/// the initializer would contain garbage instead of being zeroed. +#[test] +fn test_compound_literal_zero_init_lvalue() { + let mut ctx = TestContext::new(); + let test_id = ctx.str("test"); + let ptr_type = ctx.types.void_ptr_id; + + // Create a struct with three pointer fields (192 bits) + // struct { void *a, *b, *c; }; + let a_id = ctx.str("a"); + let b_id = ctx.str("b"); + let c_id = ctx.str("c"); + + let members = vec![ + StructMember { + name: a_id, + typ: ptr_type, + offset: 0, + bit_offset: None, + bit_width: None, + storage_unit_size: None, + explicit_align: None, + }, + StructMember { + name: b_id, + typ: ptr_type, + offset: 8, + bit_offset: None, + bit_width: None, + storage_unit_size: None, + explicit_align: None, + }, + StructMember { + name: c_id, + typ: ptr_type, + offset: 16, + bit_offset: None, + bit_width: None, + storage_unit_size: None, + explicit_align: None, + }, + ]; + + let struct_type = ctx.types.intern(Type { + kind: crate::types::TypeKind::Struct, + modifiers: TypeModifiers::empty(), + composite: Some(Box::new(CompositeType { + tag: None, + members, + enum_constants: vec![], + size: 24, + align: 8, + is_complete: true, + })), + base: None, + array_size: None, + params: None, + noreturn: false, + variadic: false, + }); + + let struct_ptr_type = ctx.types.pointer_to(struct_type); + + // Create parameter: struct S *p + let p_sym = ctx.var("p", struct_ptr_type); + + // Create compound literal expression: (struct S){.a = (void*)0x1234} + // Only .a is set, .b and .c should be zero-initialized + let compound_literal = Expr::typed_unpositioned( + ExprKind::CompoundLiteral { + typ: struct_type, + elements: vec![InitElement { + designators: vec![Designator::Field(a_id)], + value: Box::new(Expr::typed_unpositioned( + ExprKind::Cast { + cast_type: ptr_type, + expr: Box::new(Expr::int(0x1234, &ctx.types)), + }, + ptr_type, + )), + }], + }, + struct_type, + ); + + // Create assignment: *p = compound_literal + let assign = Expr::typed_unpositioned( + ExprKind::Assign { + op: AssignOp::Assign, + target: Box::new(Expr::typed_unpositioned( + ExprKind::Unary { + op: UnaryOp::Deref, + operand: Box::new(Expr::typed_unpositioned( + ExprKind::Ident(p_sym), + struct_ptr_type, + )), + }, + struct_type, + )), + value: Box::new(compound_literal), + }, + struct_type, + ); + + // Create function: void test(struct S *p) { *p = (struct S){.a = ...}; } + let func = FunctionDef { + return_type: ctx.types.void_id, + name: test_id, + params: vec![Parameter { + symbol: Some(p_sym), + typ: struct_ptr_type, + }], + body: Stmt::Block(vec![ + BlockItem::Statement(Stmt::Expr(assign)), + BlockItem::Statement(Stmt::Return(None)), + ]), + pos: test_pos(), + is_static: false, + is_inline: false, + calling_conv: crate::abi::CallingConv::default(), + }; + + let tu = TranslationUnit { + items: vec![ExternalDecl::FunctionDef(func)], + }; + let module = ctx.linearize(&tu); + let ir = format!("{}", module); + + // The compound literal must be zero-initialized first, then the designated + // fields are set. The code will: + // 1. Zero all 3 fields (24 bytes at offsets 0, 8, 16) + // 2. Store the designated value to .a (offset 0) + // 3. Copy the compound literal to *p + // + // The zero-init may reuse the same register for all zero stores (optimization) + // so we count stores to the compound literal at different offsets. + + // Verify that we have stores to the compound literal at offset +8 and +16 + // These are the fields (.b and .c) that are not explicitly initialized + // and must be zero-initialized per C99 6.7.8p21 + assert!( + ir.contains("+ 8") && ir.contains("+ 16"), + "Expected stores to offsets +8 and +16 for zero-init of fields b and c: {}", + ir + ); + + // Count store.64 operations - we need: + // - 3 stores for zero-init (offsets 0, 8, 16) + // - 1 store for designated init of .a (offset 0) + // - 3 stores for copying to *p (offsets 0, 8, 16) + // Plus some for parameter handling + let store64_count = ir.matches("store.64").count(); + assert!( + store64_count >= 7, + "Expected at least 7 store.64 (3 zero-init + 1 designated + 3 copy), got {}: {}", + store64_count, + ir + ); +} + +/// Test that ternary conditional expressions with pointer dereference use +/// short-circuit evaluation (control flow + phi) instead of Select instruction. +/// +/// Bug: `value = entry == NULL ? 0 : entry->x` would evaluate `entry->x` +/// unconditionally, causing a crash when `entry` is NULL. The is_pure_expr() +/// function incorrectly considered Arrow expressions as "pure" when they can +/// cause undefined behavior if the pointer is NULL. +#[test] +fn test_conditional_short_circuit_arrow() { + let mut ctx = TestContext::new(); + let test_id = ctx.str("test"); + let x_id = ctx.str("x"); + let int_type = ctx.types.int_id; + + // Create a struct with an int field: struct { int x; } + let members = vec![StructMember { + name: x_id, + typ: int_type, + offset: 0, + bit_offset: None, + bit_width: None, + storage_unit_size: None, + explicit_align: None, + }]; + let struct_type = ctx.types.intern(Type::struct_type(CompositeType { + tag: None, + members, + enum_constants: vec![], + size: 4, + align: 4, + is_complete: true, + })); + let struct_ptr_type = ctx.types.intern(Type::pointer(struct_type)); + + // Create symbol for the parameter + let entry_sym = ctx.var("entry", struct_ptr_type); + + // Create: entry == NULL ? 0 : entry->x + let entry_expr = Expr::typed_unpositioned(ExprKind::Ident(entry_sym), struct_ptr_type); + let null_expr = Expr::typed_unpositioned(ExprKind::IntLit(0), int_type); + let zero_expr = Expr::typed_unpositioned(ExprKind::IntLit(0), int_type); + let arrow_expr = Expr::typed_unpositioned( + ExprKind::Arrow { + expr: Box::new(entry_expr.clone()), + member: x_id, + }, + int_type, + ); + let cond_eq = Expr::typed_unpositioned( + ExprKind::Binary { + left: Box::new(entry_expr), + op: BinaryOp::Eq, + right: Box::new(null_expr), + }, + int_type, + ); + let conditional = Expr::typed_unpositioned( + ExprKind::Conditional { + cond: Box::new(cond_eq), + then_expr: Box::new(zero_expr), + else_expr: Box::new(arrow_expr), + }, + int_type, + ); + + // Create function: int test(struct S *entry) { return entry == NULL ? 0 : entry->x; } + let func = FunctionDef { + return_type: int_type, + name: test_id, + params: vec![Parameter { + symbol: Some(entry_sym), + typ: struct_ptr_type, + }], + body: Stmt::Block(vec![BlockItem::Statement(Stmt::Return(Some(conditional)))]), + pos: test_pos(), + is_static: false, + is_inline: false, + calling_conv: crate::abi::CallingConv::default(), + }; + + let tu = TranslationUnit { + items: vec![ExternalDecl::FunctionDef(func)], + }; + let module = ctx.linearize(&tu); + let ir = format!("{}", module); + + // Verify that we use control flow (cbr + phi) instead of select instruction. + // Arrow expressions can cause UB if the pointer is NULL, so we must use + // short-circuit evaluation to avoid dereferencing NULL. + + // Should have conditional branch (cbr) for proper short-circuit evaluation + assert!( + ir.contains("cbr "), + "Expected conditional branch (cbr) for short-circuit evaluation: {}", + ir + ); + + // Should have phi instruction to merge results from both branches + assert!( + ir.contains("phi."), + "Expected phi instruction for merging conditional results: {}", + ir + ); + + // Should NOT have select instruction (would mean eager evaluation of both branches) + assert!( + !ir.contains("select."), + "Should NOT use select instruction with pointer dereference (causes UB): {}", + ir + ); +} diff --git a/cc/main.rs b/cc/main.rs index f7dd78076..949672eb4 100644 --- a/cc/main.rs +++ b/cc/main.rs @@ -146,6 +146,16 @@ struct Args { #[arg(long = "pcc-fpic", hide = true)] fpic: bool, + /// Generate position-independent executable code (PIE) + /// Set by preprocess_args() when -fPIE or -fpie is passed + #[arg(long = "pcc-fpie", action = clap::ArgAction::SetTrue, hide = true)] + fpie: bool, + + /// Disable PIE code generation (GCC compatibility) + /// Set by preprocess_args() when -fno-pie is passed + #[arg(long = "pcc-fno-pie", action = clap::ArgAction::SetTrue, hide = true)] + fno_pie: bool, + /// Produce a shared library #[arg(long = "shared", help = gettext("Produce a shared library"))] shared: bool, @@ -192,6 +202,14 @@ struct Args { /// Accepts -fno-builtin-FUNC format via preprocess_args #[arg(long = "pcc-fno-builtin-func", action = clap::ArgAction::Append, value_name = "func", hide = true)] fno_builtin_funcs: Vec, + + /// Extra flags to pass through to the linker (set by preprocess_args) + #[arg(long = "pcc-linker-flag", action = clap::ArgAction::Append, value_name = "flag", hide = true)] + linker_flags: Vec, + + /// Unsupported machine flags captured by preprocess_args + #[arg(long = "pcc-unsupported-mflag", action = clap::ArgAction::Append, value_name = "flag", hide = true)] + unsupported_mflags: Vec, } /// Print compilation statistics for capacity tuning @@ -422,7 +440,11 @@ fn process_file( // Generate assembly let emit_unwind_tables = !args.no_unwind_tables; - let mut codegen = arch::codegen::create_codegen(target.clone(), emit_unwind_tables, args.fpic); + let pie_mode = pie_enabled(args, target); + let pic_mode = args.fpic || args.shared || pie_mode; + let shared_mode = args.shared; + let mut codegen = + arch::codegen::create_codegen(target.clone(), emit_unwind_tables, pic_mode, shared_mode); let asm = codegen.generate(&module, &types); // Determine output file names @@ -506,6 +528,10 @@ fn process_file( // Pass -shared flag for shared library creation if args.shared { link_cmd.arg("-shared"); + } else if pie_mode { + link_cmd.arg("-pie"); + } else { + link_cmd.arg("-no-pie"); } link_cmd.args(["-o", &exe_file, &temp_obj]); // Add library search paths @@ -516,6 +542,10 @@ fn process_file( for lib in &args.libraries { link_cmd.arg(format!("-l{}", lib)); } + // Add extra linker flags + for flag in &args.linker_flags { + link_cmd.arg(flag); + } let status = link_cmd.status()?; let _ = std::fs::remove_file(&temp_obj); @@ -578,7 +608,7 @@ fn preprocess_args() -> Vec { seen_o = true; } i += 1; - } else if arg.starts_with("-W") && arg.len() > 2 { + } else if arg.starts_with("-W") && arg.len() > 2 && !arg.starts_with("-Wl,") { // -Wall → -W all, -Wextra → -W extra, etc. result.push("-W".to_string()); result.push(arg[2..].to_string()); @@ -603,6 +633,14 @@ fn preprocess_args() -> Vec { seen_fpic = true; } i += 1; + } else if arg == "-fPIE" || arg == "-fpie" { + // -fPIE / -fpie → --pcc-fpie (internal flag) + result.push("--pcc-fpie".to_string()); + i += 1; + } else if arg == "-fno-pie" { + // -fno-pie → --pcc-fno-pie (internal flag) + result.push("--pcc-fno-pie".to_string()); + i += 1; } else if arg == "-shared" { // -shared → --shared result.push("--shared".to_string()); @@ -624,9 +662,86 @@ fn preprocess_args() -> Vec { { // GCC optimization flags - silently ignore (pcc doesn't have these optimizations) i += 1; + } else if arg.starts_with("-m") && arg.len() > 2 { + // Machine flags - unsupported (SIMD/arch not supported yet) + result.push(format!("--pcc-unsupported-mflag={}", arg)); + i += 1; + } else if arg.starts_with("-fvisibility") + || arg == "-fno-semantic-interposition" + || arg.starts_with("-fstack-protector") + || arg == "-fno-reorder-blocks-and-partition" + || arg == "-fno-plt" + || arg == "-fno-common" + || arg == "-fexceptions" + || arg == "-fno-exceptions" + { + // GCC flags - silently ignore + i += 1; + } else if arg.starts_with("-fsanitize") { + // Sanitizer flags - silently ignore (pcc doesn't support sanitizers) + i += 1; + } else if arg.starts_with("-f") && !arg.starts_with("-fno-builtin") { + // Catch-all: silently ignore any other -f* flag we don't handle + i += 1; } else if arg == "-p" || arg == "-pg" { // Profiling flags - silently ignore (pcc doesn't support profiling) i += 1; + } else if arg == "-pipe" { + // Misc GCC flags - silently ignore + i += 1; + } else if arg == "-pie" { + // -pie enables PIE link mode + result.push("--pcc-fpie".to_string()); + result.push("--pcc-linker-flag=-pie".to_string()); + i += 1; + } else if arg == "-no-pie" { + // -no-pie disables PIE link mode + result.push("--pcc-fno-pie".to_string()); + result.push("--pcc-linker-flag=-no-pie".to_string()); + i += 1; + } else if let Some(wl_args) = arg.strip_prefix("-Wl,") { + // -Wl,flag1,flag2 -> pass each flag to linker + for flag in wl_args.split(',') { + result.push(format!("--pcc-linker-flag={}", flag)); + } + i += 1; + } else if arg == "-Xlinker" { + // -Xlinker -> pass next arg to linker + if i + 1 < raw_args.len() { + result.push(format!("--pcc-linker-flag={}", raw_args[i + 1])); + i += 2; + } else { + i += 1; + } + } else if arg == "-pthread" { + // -pthread -> pass to linker and define _REENTRANT + result.push("--pcc-linker-flag=-pthread".to_string()); + result.push("-D".to_string()); + result.push("_REENTRANT".to_string()); + i += 1; + } else if arg == "-rdynamic" { + // -rdynamic -> pass to linker + result.push("--pcc-linker-flag=-rdynamic".to_string()); + i += 1; + } else if arg == "--print-multiarch" { + // GCC compatibility: print multiarch tuple and exit + let target = Target::host(); + match (target.arch, target.os) { + (target::Arch::X86_64, Os::Linux) => println!("x86_64-linux-gnu"), + (target::Arch::Aarch64, Os::Linux) => println!("aarch64-linux-gnu"), + _ => {} // Empty output for unsupported platforms + } + std::process::exit(0); + } else if let Some(prog) = arg.strip_prefix("-print-prog-name=") { + // GCC compatibility: print program path and exit + // Just echo back the program name (like gcc does when it doesn't have a special path) + println!("{}", prog); + std::process::exit(0); + } else if arg == "-v" || arg == "--version" || arg == "-qversion" || arg == "-version" { + // Version query - handled by clap, but -v is also our verbose flag + // Let it pass through to clap + result.push(arg.clone()); + i += 1; } else { result.push(arg.clone()); i += 1; @@ -656,6 +771,17 @@ fn is_object_file(path: &str) -> bool { || path.contains(".so.") // versioned .so files like libz.so.1.3.1 } +/// Determine whether PIE should be enabled for this compilation. +fn pie_enabled(args: &Args, target: &Target) -> bool { + if args.shared || args.fno_pie { + return false; + } + if args.fpie { + return true; + } + target.os == Os::Linux +} + fn main() -> Result<(), Box> { setlocale(LocaleCategory::LcAll, ""); textdomain("posixutils-rs")?; @@ -663,6 +789,14 @@ fn main() -> Result<(), Box> { let args = Args::parse_from(preprocess_args()); + // Handle unsupported machine flags early + if !args.unsupported_mflags.is_empty() { + for flag in &args.unsupported_mflags { + eprintln!("pcc: unsupported machine flag: {}", flag); + } + std::process::exit(1); + } + // Handle --print-targets if args.print_targets { println!(" Registered Targets:"); @@ -790,6 +924,10 @@ fn main() -> Result<(), Box> { // Pass -shared flag for shared library creation if args.shared { link_cmd.arg("-shared"); + } else if pie_enabled(&args, &target) { + link_cmd.arg("-pie"); + } else { + link_cmd.arg("-no-pie"); } link_cmd.args(["-o", &exe_file]); for obj in &all_objects { @@ -803,6 +941,10 @@ fn main() -> Result<(), Box> { for lib in &args.libraries { link_cmd.arg(format!("-l{}", lib)); } + // Add extra linker flags + for flag in &args.linker_flags { + link_cmd.arg(flag); + } let status = link_cmd.status()?; // Clean up temp assembly objects for obj in &asm_objects { @@ -913,34 +1055,121 @@ mod tests { // ======================================================================== fn run_preprocess(args: &[&str]) -> Vec { - // We can't easily test preprocess_args() without modifying env::args, - // so we'll test the logic directly by simulating what it does - let input: Vec = std::iter::once("pcc".to_string()) + // Simulate preprocess_args logic with all flag handling + let raw_args: Vec = std::iter::once("pcc".to_string()) .chain(args.iter().map(|s| s.to_string())) .collect(); - // Simulate preprocess_args logic - let mut result = vec![input[0].clone()]; - let mut i = 1; - while i < input.len() { - let arg = &input[i]; - if arg == "-fPIC" || arg == "-fpic" { - result.push("--pcc-fpic".to_string()); - } else if arg == "-shared" { - result.push("--shared".to_string()); + let mut result = Vec::with_capacity(raw_args.len()); + let mut i = 0; + let mut seen_o = false; + let mut seen_fpic = false; + + while i < raw_args.len() { + let arg = &raw_args[i]; + + if arg == "-O" { + if seen_o { + if i + 1 < raw_args.len() && is_valid_opt_level(&raw_args[i + 1]) { + i += 2; + } else { + i += 1; + } + continue; + } + seen_o = true; + if i + 1 < raw_args.len() && is_valid_opt_level(&raw_args[i + 1]) { + result.push(format!("-O{}", raw_args[i + 1])); + i += 2; + continue; + } + result.push("-O1".to_string()); + i += 1; + } else if arg.starts_with("-O") && arg.len() > 2 { + if !seen_o { + result.push(arg.clone()); + seen_o = true; + } + i += 1; + } else if arg.starts_with("-W") && arg.len() > 2 && !arg.starts_with("-Wl,") { + result.push("-W".to_string()); + result.push(arg[2..].to_string()); + i += 1; } else if arg.starts_with("-L") && arg.len() > 2 { result.push("-L".to_string()); result.push(arg[2..].to_string()); + i += 1; } else if arg.starts_with("-l") && arg.len() > 2 { result.push("-l".to_string()); result.push(arg[2..].to_string()); - } else if arg.starts_with("-W") && arg.len() > 2 { - result.push("-W".to_string()); - result.push(arg[2..].to_string()); + i += 1; + } else if arg.starts_with("-std=") { + i += 1; + } else if arg == "-fPIC" || arg == "-fpic" { + if !seen_fpic { + result.push("--pcc-fpic".to_string()); + seen_fpic = true; + } + i += 1; + } else if arg == "-fPIE" || arg == "-fpie" { + result.push("--pcc-fpie".to_string()); + i += 1; + } else if arg == "-fno-pie" { + result.push("--pcc-fno-pie".to_string()); + i += 1; + } else if arg == "-shared" { + result.push("--shared".to_string()); + i += 1; + } else if arg == "-fno-builtin" { + result.push("--fno-builtin".to_string()); + i += 1; + } else if let Some(func) = arg.strip_prefix("-fno-builtin-") { + result.push("--pcc-fno-builtin-func".to_string()); + result.push(func.to_string()); + i += 1; + } else if arg.starts_with("-m") && arg.len() > 2 { + // Machine flags - unsupported (SIMD/arch not supported yet) + result.push(format!("--pcc-unsupported-mflag={}", arg)); + i += 1; + } else if (arg.starts_with("-f") && !arg.starts_with("-fno-builtin")) + || arg == "-p" + || arg == "-pg" + || arg == "-pipe" + { + // Silently ignore -f* flags, profiling flags, -pipe + i += 1; + } else if arg == "-pie" { + result.push("--pcc-fpie".to_string()); + result.push("--pcc-linker-flag=-pie".to_string()); + i += 1; + } else if arg == "-no-pie" { + result.push("--pcc-fno-pie".to_string()); + result.push("--pcc-linker-flag=-no-pie".to_string()); + i += 1; + } else if let Some(wl_args) = arg.strip_prefix("-Wl,") { + for flag in wl_args.split(',') { + result.push(format!("--pcc-linker-flag={}", flag)); + } + i += 1; + } else if arg == "-Xlinker" { + if i + 1 < raw_args.len() { + result.push(format!("--pcc-linker-flag={}", raw_args[i + 1])); + i += 2; + } else { + i += 1; + } + } else if arg == "-pthread" { + result.push("--pcc-linker-flag=-pthread".to_string()); + result.push("-D".to_string()); + result.push("_REENTRANT".to_string()); + i += 1; + } else if arg == "-rdynamic" { + result.push("--pcc-linker-flag=-rdynamic".to_string()); + i += 1; } else { result.push(arg.clone()); + i += 1; } - i += 1; } result } @@ -996,4 +1225,163 @@ mod tests { assert!(result.contains(&"-L".to_string())); assert!(result.contains(&".".to_string())); } + + // ======================================================================== + // Tests for silently-ignored flags + // ======================================================================== + + #[test] + fn test_preprocess_fvisibility_ignored() { + let result = run_preprocess(&["-fvisibility=hidden", "foo.c"]); + assert!(!result.contains(&"-fvisibility=hidden".to_string())); + assert!(result.contains(&"foo.c".to_string())); + } + + #[test] + fn test_preprocess_fstack_protector_ignored() { + for flag in &[ + "-fstack-protector", + "-fstack-protector-strong", + "-fstack-protector-all", + ] { + let result = run_preprocess(&[flag, "foo.c"]); + assert!( + !result.contains(&flag.to_string()), + "flag {} not ignored", + flag + ); + assert!(result.contains(&"foo.c".to_string())); + } + } + + #[test] + fn test_preprocess_misc_f_flags_ignored() { + for flag in &[ + "-fno-semantic-interposition", + "-fno-reorder-blocks-and-partition", + "-fno-plt", + "-fno-common", + ] { + let result = run_preprocess(&[flag, "foo.c"]); + assert!( + !result.contains(&flag.to_string()), + "flag {} not ignored", + flag + ); + } + } + + #[test] + fn test_preprocess_catchall_f_flags_ignored() { + // Unknown -f flags should be silently ignored + let result = run_preprocess(&["-funknown-flag", "foo.c"]); + assert!(!result.contains(&"-funknown-flag".to_string())); + assert!(result.contains(&"foo.c".to_string())); + } + + #[test] + fn test_preprocess_m_flags_unsupported() { + let result = run_preprocess(&["-msse2", "foo.c"]); + assert!(result.contains(&"--pcc-unsupported-mflag=-msse2".to_string())); + assert!(result.contains(&"foo.c".to_string())); + } + + #[test] + fn test_preprocess_pipe_ignored() { + let result = run_preprocess(&["-pipe", "foo.c"]); + assert!(!result.contains(&"-pipe".to_string())); + assert!(result.contains(&"foo.c".to_string())); + } + + #[test] + fn test_preprocess_fpie() { + let result = run_preprocess(&["-fPIE", "foo.c"]); + assert!(result.contains(&"--pcc-fpie".to_string())); + } + + #[test] + fn test_preprocess_fpie_lowercase() { + let result = run_preprocess(&["-fpie", "foo.c"]); + assert!(result.contains(&"--pcc-fpie".to_string())); + } + + #[test] + fn test_preprocess_fno_pie() { + let result = run_preprocess(&["-fno-pie", "foo.c"]); + assert!(result.contains(&"--pcc-fno-pie".to_string())); + } + + #[test] + fn test_preprocess_pie_linker_flag() { + let result = run_preprocess(&["-pie", "foo.c"]); + assert!(result.contains(&"--pcc-fpie".to_string())); + assert!(result.contains(&"--pcc-linker-flag=-pie".to_string())); + } + + #[test] + fn test_preprocess_no_pie_linker_flag() { + let result = run_preprocess(&["-no-pie", "foo.c"]); + assert!(result.contains(&"--pcc-fno-pie".to_string())); + assert!(result.contains(&"--pcc-linker-flag=-no-pie".to_string())); + } + + // ======================================================================== + // Tests for linker passthrough flags + // ======================================================================== + + #[test] + fn test_preprocess_wl_flags() { + let result = run_preprocess(&["-Wl,-z,now", "foo.c"]); + assert!(result.contains(&"--pcc-linker-flag=-z".to_string())); + assert!(result.contains(&"--pcc-linker-flag=now".to_string())); + assert!(!result.contains(&"-Wl,-z,now".to_string())); + } + + #[test] + fn test_preprocess_xlinker() { + let result = run_preprocess(&["-Xlinker", "--hash-style=gnu", "foo.c"]); + assert!(result.contains(&"--pcc-linker-flag=--hash-style=gnu".to_string())); + assert!(!result.contains(&"-Xlinker".to_string())); + } + + #[test] + fn test_preprocess_pthread() { + let result = run_preprocess(&["-pthread", "foo.c"]); + assert!(result.contains(&"--pcc-linker-flag=-pthread".to_string())); + // Should also define _REENTRANT + assert!(result.contains(&"-D".to_string())); + assert!(result.contains(&"_REENTRANT".to_string())); + } + + #[test] + fn test_preprocess_rdynamic() { + let result = run_preprocess(&["-rdynamic", "foo.c"]); + assert!(result.contains(&"--pcc-linker-flag=-rdynamic".to_string())); + } + + #[test] + fn test_preprocess_cpython_flags_combined() { + // Simulate a typical CPython configure compiler probe + let result = run_preprocess(&[ + "-fvisibility=hidden", + "-fno-semantic-interposition", + "-fstack-protector-strong", + "-fno-plt", + "-pipe", + "-pthread", + "-Wl,-z,now", + "-rdynamic", + "foo.c", + ]); + // Only foo.c and the passthrough flags should remain + assert!(result.contains(&"foo.c".to_string())); + // Silently-ignored flags should NOT appear + assert!(!result.contains(&"-fvisibility=hidden".to_string())); + assert!(!result.contains(&"-fno-semantic-interposition".to_string())); + assert!(!result.contains(&"-fstack-protector-strong".to_string())); + assert!(!result.contains(&"-fno-plt".to_string())); + assert!(!result.contains(&"-pipe".to_string())); + // Linker flags should be passed through + assert!(result.iter().any(|a| a.starts_with("--pcc-linker-flag="))); + } } diff --git a/cc/parse/expression.rs b/cc/parse/expression.rs index da00b08f2..d60328e3f 100644 --- a/cc/parse/expression.rs +++ b/cc/parse/expression.rs @@ -2745,10 +2745,10 @@ impl<'a> Parser<'a> { let final_typ = if self.types.kind(typ) == TypeKind::Array && self.types.get(typ).array_size == Some(0) { - // Array size should be determined from initializer element count let elem_type = self.types.base_type(typ).unwrap_or(self.types.int_id); - self.types.intern(Type::array(elem_type, elements.len())) + let array_size = self.array_size_from_elements(&elements); + self.types.intern(Type::array(elem_type, array_size)) } else { typ }; diff --git a/cc/parse/parser.rs b/cc/parse/parser.rs index 36858a8ac..c746f2654 100644 --- a/cc/parse/parser.rs +++ b/cc/parse/parser.rs @@ -11,8 +11,9 @@ // use super::ast::{ - AsmOperand, BinaryOp, BlockItem, Declaration, Expr, ExprKind, ExternalDecl, ForInit, - FunctionDef, InitDeclarator, OffsetOfPath, Parameter, Stmt, TranslationUnit, UnaryOp, + AsmOperand, BinaryOp, BlockItem, Declaration, Designator, Expr, ExprKind, ExternalDecl, + ForInit, FunctionDef, InitDeclarator, InitElement, OffsetOfPath, Parameter, Stmt, + TranslationUnit, UnaryOp, }; use crate::diag; use crate::strings::StringId; @@ -1406,7 +1407,7 @@ impl Parser<'_> { } let new_size = match &init.kind { - ExprKind::InitList { elements } => Some(elements.len()), + ExprKind::InitList { elements } => Some(self.array_size_from_elements(elements)), ExprKind::StringLit(s) => { // For char array initialized with string literal, // size is string length + 1 for null terminator @@ -1433,6 +1434,40 @@ impl Parser<'_> { } } + pub(crate) fn array_size_from_elements(&self, elements: &[InitElement]) -> usize { + let mut max_index: i64 = -1; + let mut current_index: i64 = 0; + + for element in elements { + let mut designator_index = None; + for designator in &element.designators { + if let Designator::Index(index) = designator { + designator_index = Some(*index); + break; + } + } + + let index = if let Some(explicit_index) = designator_index { + current_index = explicit_index + 1; + explicit_index + } else { + let idx = current_index; + current_index += 1; + idx + }; + + if index > max_index { + max_index = index; + } + } + + if max_index < 0 { + 0 + } else { + (max_index + 1) as usize + } + } + /// Parse a for-init declaration and bind variables to symbol table /// /// Same as `parse_declaration_and_bind()` but rejects storage class specifiers. @@ -1548,7 +1583,16 @@ impl Parser<'_> { // For incomplete array types, infer size from initializer if let Some(ref init_expr) = init { + let old_type = typ; typ = self.infer_array_size_from_init(typ, init_expr); + + // If the type changed (array size was inferred), update the symbol's type + // This is needed because the symbol was already added before parsing the initializer + if typ != old_type { + if let Some(sym_id) = symbol_id { + self.symbols.get_mut(sym_id).typ = typ; + } + } } // Bind typedef to symbol table (after parsing initializer, which @@ -3496,7 +3540,16 @@ impl Parser<'_> { // For incomplete array types, infer size from initializer if let Some(ref init_expr) = init { + let old_type = var_type_id; var_type_id = self.infer_array_size_from_init(var_type_id, init_expr); + + // If the type changed (array size was inferred), update the symbol's type + // This is needed because the symbol was already added before parsing the initializer + if var_type_id != old_type { + if let Some(sym_id) = symbol { + self.symbols.get_mut(sym_id).typ = var_type_id; + } + } } // Bind typedef to symbol table (after parsing initializer, which is forbidden anyway) @@ -3568,7 +3621,16 @@ impl Parser<'_> { // For incomplete array types, infer size from initializer if let Some(ref init_expr) = decl_init { + let old_type = decl_type; decl_type = self.infer_array_size_from_init(decl_type, init_expr); + + // If the type changed (array size was inferred), update the symbol's type + // This is needed because the symbol was already added before parsing the initializer + if decl_type != old_type { + if let Some(sym_id) = decl_symbol { + self.symbols.get_mut(sym_id).typ = decl_type; + } + } } // Bind typedef to symbol table (after parsing initializer, which is forbidden anyway) diff --git a/cc/parse/test_parser.rs b/cc/parse/test_parser.rs index 92a0f142f..eb3747156 100644 --- a/cc/parse/test_parser.rs +++ b/cc/parse/test_parser.rs @@ -3973,6 +3973,40 @@ fn test_incomplete_array_empty_string() { } } +#[test] +fn test_incomplete_array_designator_size() { + // int arr[] = {[10] = 1}; should infer size 11 + let (tu, types, _, _) = parse_tu("int arr[] = {[10] = 1};").unwrap(); + if let ExternalDecl::Declaration(decl) = &tu.items[0] { + let typ = decl.declarators[0].typ; + assert_eq!(types.kind(typ), TypeKind::Array); + assert_eq!( + types.get(typ).array_size, + Some(11), + "Array size should be 11 for {{[10] = 1}}" + ); + } else { + panic!("Expected Declaration"); + } +} + +#[test] +fn test_incomplete_array_designator_sequence_size() { + // int arr[] = {1, 2, [5] = 5, 6}; should infer size 7 + let (tu, types, _, _) = parse_tu("int arr[] = {1, 2, [5] = 5, 6};").unwrap(); + if let ExternalDecl::Declaration(decl) = &tu.items[0] { + let typ = decl.declarators[0].typ; + assert_eq!(types.kind(typ), TypeKind::Array); + assert_eq!( + types.get(typ).array_size, + Some(7), + "Array size should be 7 for {{1,2,[5]=5,6}}" + ); + } else { + panic!("Expected Declaration"); + } +} + // ======================================================================== // typeof operator tests (GCC extension) // ======================================================================== diff --git a/cc/symbol.rs b/cc/symbol.rs index 0a03a70a2..ff09db00b 100644 --- a/cc/symbol.rs +++ b/cc/symbol.rs @@ -350,6 +350,11 @@ impl SymbolTable { &self.symbols[id.0 as usize] } + /// Get a mutable reference to a symbol by its ID + pub fn get_mut(&mut self, id: SymbolId) -> &mut Symbol { + &mut self.symbols[id.0 as usize] + } + /// Look up a tag (struct/union/enum) by name pub fn lookup_tag(&self, name: StringId) -> Option<&Symbol> { self.lookup(name, Namespace::Tag) diff --git a/cc/tests/builtins/has_feature.rs b/cc/tests/builtins/has_feature.rs index 6aa7615c3..137030b5b 100644 --- a/cc/tests/builtins/has_feature.rs +++ b/cc/tests/builtins/has_feature.rs @@ -151,6 +151,60 @@ int main(void) { #if __has_attribute(nonexistent_attribute_xyz) return 75; #endif + + // CPython-required attributes +#if !__has_attribute(noinline) + return 76; +#endif + +#if !__has_attribute(always_inline) + return 77; +#endif + +#if !__has_attribute(hot) + return 78; +#endif + +#if !__has_attribute(cold) + return 79; +#endif + +#if !__has_attribute(warn_unused_result) + return 80; +#endif + +#if !__has_attribute(format) + return 81; +#endif + +#if !__has_attribute(fallthrough) + return 82; +#endif + +#if !__has_attribute(malloc) + return 83; +#endif + +#if !__has_attribute(pure) + return 84; +#endif + +#if !__has_attribute(sentinel) + return 85; +#endif + +#if !__has_attribute(no_sanitize_address) + return 86; +#endif + + // Double-underscore variants +#if !__has_attribute(__noinline__) + return 87; +#endif + +#if !__has_attribute(__always_inline__) + return 88; +#endif } return 0; diff --git a/cc/tests/c89/functions.rs b/cc/tests/c89/functions.rs index afc4705ab..d7d3aa958 100644 --- a/cc/tests/c89/functions.rs +++ b/cc/tests/c89/functions.rs @@ -214,3 +214,85 @@ int main(void) { "#; assert_eq!(compile_and_run("c89_functions_mega", code, &[]), 0); } + +/// Test that return statements properly convert expression types to function return type +/// This tests a bug where `return -1` in a `long long` function returned 0xFFFFFFFF instead of -1 +#[test] +fn c89_functions_return_type_conversion() { + let code = r#" +typedef long long Py_ssize_t; +#define DKIX_EMPTY (-1) + +// Function returns 64-bit but expression is 32-bit int +static Py_ssize_t return_minus_one(void) { + return -1; +} + +// Function returns 64-bit via macro (still 32-bit int expression) +static Py_ssize_t return_dkix_empty(void) { + return DKIX_EMPTY; +} + +// More complex case - return from inside loop +static Py_ssize_t return_from_loop(int condition) { + Py_ssize_t ix; + for (;;) { + ix = -1; + if (ix < 0) { + return DKIX_EMPTY; + } + } + return -100; +} + +// Return smaller types that need sign extension +static long long return_signed_char(void) { + return (signed char)-1; // Should be -1, not 255 +} + +static long long return_short(void) { + return (short)-1; // Should be -1, not 65535 +} + +static long long return_int(void) { + return (int)-1; // Should be -1, not 0xFFFFFFFF +} + +// Test with unsigned promotion +static unsigned long long return_uint(void) { + return (unsigned int)0xFFFFFFFF; // Should stay 0xFFFFFFFF +} + +int main(void) { + // Test direct -1 return + Py_ssize_t r1 = return_minus_one(); + if (r1 != -1) return 1; + // Check upper bits are set (sign extended) + if ((unsigned long long)r1 != 0xFFFFFFFFFFFFFFFFULL) return 2; + + // Test macro return + Py_ssize_t r2 = return_dkix_empty(); + if (r2 != -1) return 3; + if ((unsigned long long)r2 != 0xFFFFFFFFFFFFFFFFULL) return 4; + + // Test return from loop + Py_ssize_t r3 = return_from_loop(1); + if (r3 != -1) return 5; + if ((unsigned long long)r3 != 0xFFFFFFFFFFFFFFFFULL) return 6; + + // Test smaller signed types + if (return_signed_char() != -1) return 7; + if (return_short() != -1) return 8; + if (return_int() != -1) return 9; + + // Test unsigned - should zero-extend, not sign-extend + if (return_uint() != 0xFFFFFFFFULL) return 10; + + return 0; +} +"#; + assert_eq!( + compile_and_run("c89_functions_return_type_conversion", code, &[]), + 0 + ); +} diff --git a/cc/tests/c99/features.rs b/cc/tests/c99/features.rs index 872c73c13..b88dff603 100644 --- a/cc/tests/c99/features.rs +++ b/cc/tests/c99/features.rs @@ -128,6 +128,57 @@ int sum_twice(int count, ...) { return sum1 + sum2; } +// va_arg with string pointers (tests 64-bit loads) +// This test ensures va_arg correctly loads 64-bit pointers, not 32-bit +#include + +static int process_strings(va_list *p_va, int count) { + int total_len = 0; + for (int i = 0; i < count; i++) { + const char *str = va_arg(*p_va, const char *); + if (str == (const char*)0) return -1; + total_len += strlen(str); + } + return total_len; +} + +int test_va_arg_strings(int count, ...) { + va_list va; + va_start(va, count); + int result = process_strings(&va, count); + va_end(va); + return result; +} + +// va_list cast to pointer test (C99 6.3.2.1 - array decay) +// va_list is defined as __va_list_tag[1] and should decay to a pointer +int test_va_cast(int count, ...) { + va_list args; + va_start(args, count); + + // Cast va_list to pointer - this tests array decay of va_list + unsigned char* ptr = (unsigned char*)args; + + // Verify we got a valid pointer (non-null) + if (ptr == (unsigned char*)0) { + va_end(args); + return -1; + } + + // Read a few bytes to verify memory access works + unsigned char first_byte = ptr[0]; + (void)first_byte; // Suppress unused warning + + // Now consume the arguments normally to verify va_list still works + int sum = 0; + for (int i = 0; i < count; i++) { + sum += va_arg(args, int); + } + + va_end(args); + return sum; +} + int main(void) { // ========== INLINE SECTION (returns 1-9) ========== { @@ -245,6 +296,18 @@ int main(void) { // Varargs with different counts if (sum_varargs(0) != 0) return 67; // No args if (sum_varargs(10, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10) != 55) return 68; + + // va_list cast to pointer (tests array decay) + if (test_va_cast(3, 10, 20, 12) != 42) return 69; + if (test_va_cast(1, 42) != 42) return 70; + + // va_arg with string pointers (tests 64-bit loads and stack spill) + // Tests: 1) va_arg correctly loads 64-bit pointers + // 2) va_list passed through pointer works correctly + // 3) stack spill offset is calculated correctly + if (test_va_arg_strings(1, "hello") != 5) return 71; + if (test_va_arg_strings(2, "hi", "there") != 7) return 72; // 2 + 5 + if (test_va_arg_strings(4, "a", "bb", "ccc", "dddd") != 10) return 73; // 1+2+3+4 } // ========== C99 FOR LOOP DECLARATIONS (returns 80-89) ========== diff --git a/cc/tests/c99/initializers.rs b/cc/tests/c99/initializers.rs index 05e541ad9..413d2b7ac 100644 --- a/cc/tests/c99/initializers.rs +++ b/cc/tests/c99/initializers.rs @@ -107,3 +107,715 @@ int main(void) { "#; assert_eq!(compile_and_run("c99_initializers_mega", code, &[]), 0); } + +// ============================================================================ +// Complex initializers torture test +// ============================================================================ + +#[test] +fn c99_initializers_complex_mega() { + let code = r#" +#include + +// ===== Type definitions for all sections ===== + +// Section 1: Function pointer fields +typedef int (*int_func_t)(int); +typedef void (*void_func_t)(void); + +struct Callbacks { + int (*handler)(int); + void (*cleanup)(void); + int (*transform)(int); +}; + +int double_it(int x) { return x * 2; } +int triple_it(int x) { return x * 3; } +void do_nothing(void) { } + +// Section 3: Self-referencing structs +struct Node { + int val; + struct Node *next; +}; + +struct ListHead { + struct ListHead *next; + struct ListHead *prev; +}; + +// Section 4: Anonymous members +struct WithAnon { + int tag; + union { + int ival; + float fval; + }; + int after; +}; + +struct WithAnonStruct { + int before; + struct { + int x; + int y; + }; + int after; +}; + +// Section 5: Enums +enum Color { RED = 0, GREEN = 1, BLUE = 2 }; + +struct Tagged { + char *name; + enum Color color; + int size; +}; + +// Section 6: Large struct (~30 fields, CPython-scale) +struct BigType { + char *tp_name; + long tp_basicsize; + long tp_itemsize; + int (*tp_init)(int); + void (*tp_dealloc)(void); + int (*tp_compare)(int); + void *tp_as_number; + void *tp_as_sequence; + void *tp_as_mapping; + int (*tp_hash)(int); + int (*tp_call)(int); + char *tp_doc; + void *tp_methods; + void *tp_members; + void *tp_getset; + void *tp_base; + void *tp_dict; + int (*tp_descr_get)(int); + int (*tp_descr_set)(int); + long tp_dictoffset; + int (*tp_alloc)(int); + int (*tp_new)(int); + void (*tp_free)(void); + int tp_flags; + void *tp_subclasses; + void *tp_weaklist; + void *tp_del; + unsigned int tp_version_tag; + void *tp_finalize; + void *tp_vectorcall; + long tp_padding; +}; + +// Section 7: Array of structs +struct Entry { + int key; + int value; + char *label; +}; + +// Section 8: Deeply nested +struct Inner { + int a; + int b; +}; + +struct Middle { + struct Inner inner; + int c; +}; + +struct Outer { + struct Middle mid; + int d; +}; + + +// ===== Section 3 globals (self-referencing) ===== +struct Node self_ref = {42, &self_ref}; +struct ListHead self_list = {&self_list, &self_list}; +struct Node node_b; +struct Node node_a = {1, &node_b}; +struct Node node_b = {2, &node_a}; + +// ===== Section 6 globals (large struct) ===== +struct BigType my_type = { + .tp_name = "MyType", + .tp_basicsize = 64, + .tp_init = double_it, + .tp_flags = 0x1234, + .tp_doc = "A test type", + .tp_version_tag = 42, +}; + + +int main(void) { + + // ========== SECTION 1: Function pointer fields (returns 1-19) ========== + { + // Designated init with function pointers + struct Callbacks cb1 = {.handler = double_it, .cleanup = do_nothing, .transform = triple_it}; + if (cb1.handler(5) != 10) return 1; + if (cb1.transform(5) != 15) return 2; + + // NULL function pointer via cast + struct Callbacks cb2 = {.handler = double_it, .cleanup = (void(*)(void))0, .transform = 0}; + if (cb2.handler(3) != 6) return 3; + if (cb2.cleanup != 0) return 4; + if (cb2.transform != 0) return 5; + + // Positional init with function pointers + struct Callbacks cb3 = {double_it, do_nothing, triple_it}; + if (cb3.handler(7) != 14) return 6; + if (cb3.transform(4) != 12) return 7; + + // Partial init - missing fields should be NULL + struct Callbacks cb4 = {.handler = double_it}; + if (cb4.handler(10) != 20) return 8; + if (cb4.cleanup != 0) return 9; + if (cb4.transform != 0) return 10; + + // Function pointer via typedef + int_func_t fp = double_it; + struct Callbacks cb5 = {.handler = fp, .transform = fp}; + if (cb5.handler(3) != 6) return 11; + if (cb5.transform(3) != 6) return 12; + } + + // ========== SECTION 2: Mixed designated + positional (returns 20-39) ========== + { + // Positional first, then designated + struct { int a; int b; int c; int d; } m1 = {10, 20, .d = 40}; + if (m1.a != 10) return 20; + if (m1.b != 20) return 21; + if (m1.c != 0) return 22; + if (m1.d != 40) return 23; + + // Designated first, then positional continues from after that field + struct { int a; int b; int c; int d; } m2 = {.b = 20, 30, 40}; + if (m2.a != 0) return 24; + if (m2.b != 20) return 25; + if (m2.c != 30) return 26; + if (m2.d != 40) return 27; + + // Gaps (implicit zero) between designated fields + struct { int a; int b; int c; int d; int e; } m3 = {.a = 1, .c = 3, .e = 5}; + if (m3.a != 1) return 28; + if (m3.b != 0) return 29; + if (m3.c != 3) return 30; + if (m3.d != 0) return 31; + if (m3.e != 5) return 32; + + // Interleaving: designated, positional, designated + struct { int a; int b; int c; int d; } m4 = {.a = 10, 20, .d = 40}; + if (m4.a != 10) return 33; + if (m4.b != 20) return 34; + if (m4.c != 0) return 35; + if (m4.d != 40) return 36; + + // Override: designated field overrides earlier positional + struct { int a; int b; int c; } m5 = {100, 200, 300, .a = 999}; + if (m5.a != 999) return 37; + if (m5.b != 200) return 38; + if (m5.c != 300) return 39; + } + + // ========== SECTION 3: Self-referencing structs (returns 40-59) ========== + { + // Global self-referencing node + if (self_ref.val != 42) return 40; + if (self_ref.next != &self_ref) return 41; + if (self_ref.next->val != 42) return 42; + + // Global self-referencing list head (both pointers to self) + if (self_list.next != &self_list) return 43; + if (self_list.prev != &self_list) return 44; + + // Mutual references + if (node_a.val != 1) return 45; + if (node_a.next != &node_b) return 46; + if (node_b.val != 2) return 47; + if (node_b.next != &node_a) return 48; + if (node_a.next->val != 2) return 49; + if (node_b.next->val != 1) return 50; + + // Array of structs where elements point to each other (local static) + static struct Node nodes[3] = { + {10, &nodes[1]}, + {20, &nodes[2]}, + {30, &nodes[0]}, + }; + if (nodes[0].val != 10) return 51; + if (nodes[0].next != &nodes[1]) return 52; + if (nodes[1].val != 20) return 53; + if (nodes[1].next->val != 30) return 54; + if (nodes[2].next->val != 10) return 55; + } + + // ========== SECTION 4: Anonymous struct/union members (returns 60-79) ========== + { + // Anonymous union inside struct + struct WithAnon wa1 = {.tag = 1, .ival = 42, .after = 99}; + if (wa1.tag != 1) return 60; + if (wa1.ival != 42) return 61; + if (wa1.after != 99) return 62; + + // Anonymous struct inside struct + struct WithAnonStruct was1 = {.before = 10, .x = 20, .y = 30, .after = 40}; + if (was1.before != 10) return 63; + if (was1.x != 20) return 64; + if (was1.y != 30) return 65; + if (was1.after != 40) return 66; + + // Positional init of anonymous members + struct WithAnon wa2 = {5, 50, 55}; + if (wa2.tag != 5) return 67; + if (wa2.ival != 50) return 68; + if (wa2.after != 55) return 69; + + // Partial init with anonymous members + struct WithAnon wa3 = {.ival = 77}; + if (wa3.tag != 0) return 70; + if (wa3.ival != 77) return 71; + if (wa3.after != 0) return 72; + } + + // ========== SECTION 5: String, enum, sizeof, cast (returns 80-99) ========== + { + // char* string field + struct Tagged t1 = {.name = "hello", .color = GREEN, .size = 5}; + if (t1.name[0] != 'h') return 80; + if (t1.name[4] != 'o') return 81; + if (t1.color != GREEN) return 82; + if (t1.size != 5) return 83; + + // char array in struct (not pointer) + struct { char name[32]; int val; } sa1 = {.name = "world", .val = 42}; + if (sa1.name[0] != 'w') return 84; + if (sa1.name[4] != 'd') return 85; + if (sa1.name[5] != '\0') return 86; + if (sa1.val != 42) return 87; + + // Enum constant fields + struct Tagged t2 = {"blue", BLUE, 10}; + if (t2.color != 2) return 88; + + // sizeof in initializer + struct { int sz; int val; } sz1 = {sizeof(int), 42}; + if (sz1.sz != 4) return 89; + if (sz1.val != 42) return 90; + + struct { long sz; } sz2 = {.sz = sizeof(struct BigType)}; + if (sz2.sz == 0) return 91; + + // Cast expressions in initializer + struct { long lval; void *ptr; } c1 = {(long)42, (void*)0}; + if (c1.lval != 42) return 92; + if (c1.ptr != 0) return 93; + + // Negative initializer + struct { int a; int b; } neg1 = {-10, -20}; + if (neg1.a != -10) return 94; + if (neg1.b != -20) return 95; + } + + // ========== SECTION 6: Large struct (~30 fields, CPython-scale) (returns 100-119) ========== + { + // Check the global BigType + if (my_type.tp_name[0] != 'M') return 100; + if (my_type.tp_basicsize != 64) return 101; + if (my_type.tp_init(5) != 10) return 102; + if (my_type.tp_flags != 0x1234) return 103; + if (my_type.tp_doc[0] != 'A') return 104; + if (my_type.tp_version_tag != 42) return 105; + + // Uninitialized fields should be zero/NULL + if (my_type.tp_itemsize != 0) return 106; + if (my_type.tp_dealloc != 0) return 107; + if (my_type.tp_as_number != 0) return 108; + if (my_type.tp_dict != 0) return 109; + if (my_type.tp_padding != 0) return 110; + + // Local large struct with designated init + struct BigType local_type = { + .tp_name = "LocalType", + .tp_basicsize = 128, + .tp_hash = triple_it, + .tp_flags = 0xFF, + }; + if (local_type.tp_name[0] != 'L') return 111; + if (local_type.tp_basicsize != 128) return 112; + if (local_type.tp_hash(3) != 9) return 113; + if (local_type.tp_flags != 0xFF) return 114; + if (local_type.tp_init != 0) return 115; + if (local_type.tp_as_number != 0) return 116; + } + + // ========== SECTION 7: Array of structs with complex init (returns 120-139) ========== + { + // Static array of structs with mixed designated init + struct Entry table[] = { + {.key = 1, .value = 100, .label = "one"}, + {.key = 2, .value = 200, .label = "two"}, + {.key = 3, .value = 300, .label = "three"}, + }; + if (table[0].key != 1) return 120; + if (table[0].value != 100) return 121; + if (table[0].label[0] != 'o') return 122; + if (table[1].key != 2) return 123; + if (table[1].value != 200) return 124; + if (table[2].value != 300) return 125; + if (table[2].label[0] != 't') return 126; + + // Positional array of structs + struct Entry table2[] = { + {10, 1000, "ten"}, + {20, 2000, "twenty"}, + }; + if (table2[0].key != 10) return 127; + if (table2[0].value != 1000) return 128; + if (table2[1].key != 20) return 129; + if (table2[1].label[0] != 't') return 130; + + // Nested arrays of structs + struct { struct Entry entries[2]; int count; } group = { + .entries = { + {.key = 1, .value = 10, .label = "a"}, + {.key = 2, .value = 20, .label = "b"}, + }, + .count = 2, + }; + if (group.entries[0].key != 1) return 131; + if (group.entries[1].value != 20) return 132; + if (group.count != 2) return 133; + } + + // ========== SECTION 8: Deeply nested + compound literals (returns 140-159) ========== + { + // 3 levels of struct nesting + struct Outer o1 = { + .mid = { + .inner = {.a = 10, .b = 20}, + .c = 30, + }, + .d = 40, + }; + if (o1.mid.inner.a != 10) return 140; + if (o1.mid.inner.b != 20) return 141; + if (o1.mid.c != 30) return 142; + if (o1.d != 40) return 143; + + // Positional nested init + struct Outer o2 = {{{1, 2}, 3}, 4}; + if (o2.mid.inner.a != 1) return 144; + if (o2.mid.inner.b != 2) return 145; + if (o2.mid.c != 3) return 146; + if (o2.d != 4) return 147; + + // Compound literal as struct value + struct Middle m1 = {.inner = (struct Inner){100, 200}, .c = 300}; + if (m1.inner.a != 100) return 148; + if (m1.inner.b != 200) return 149; + if (m1.c != 300) return 150; + + // Address of compound literal in initializer + struct Inner *ip = &(struct Inner){55, 66}; + if (ip->a != 55) return 151; + if (ip->b != 66) return 152; + + // Nested compound literals + struct Outer *op = &(struct Outer){ + .mid = {.inner = {.a = 7, .b = 8}, .c = 9}, + .d = 10, + }; + if (op->mid.inner.a != 7) return 153; + if (op->mid.inner.b != 8) return 154; + if (op->mid.c != 9) return 155; + if (op->d != 10) return 156; + } + + return 0; +} +"#; + assert_eq!( + compile_and_run("c99_initializers_complex_mega", code, &[]), + 0 + ); +} + +#[test] +fn c99_initializers_cpython_llist_pattern() { + let code = r#" +struct llist_node { + struct llist_node *next; + struct llist_node *prev; +}; + +#define LLIST_INIT(head) { &head, &head } + +struct llist_node my_list = LLIST_INIT(my_list); + +int main(void) { + if (my_list.next != &my_list) return 1; + if (my_list.prev != &my_list) return 2; + return 0; +} +"#; + assert_eq!(compile_and_run("cpython_llist_init", code, &[]), 0); +} + +#[test] +fn c99_initializers_cpython_opcode_pattern() { + let code = r#" +struct uop { int op; int arg; int off; }; +struct expansion { int nuops; struct uop uops[4]; }; + +enum { OP_A = 5, OP_B = 10 }; + +struct expansion table[16] = { + [OP_A] = { .nuops = 2, .uops = { {1, 2, 3}, {3, 4, 5} } }, + [OP_B] = { .nuops = 1, .uops = { {5, 6, 7} } }, +}; + +int main(void) { + if (table[OP_A].nuops != 2) return 1; + if (table[OP_A].uops[0].op != 1) return 2; + if (table[OP_A].uops[1].arg != 4) return 3; + if (table[OP_B].nuops != 1) return 4; + if (table[OP_B].uops[0].op != 5) return 5; + if (table[0].nuops != 0) return 6; + return 0; +} +"#; + assert_eq!(compile_and_run("cpython_opcode_init", code, &[]), 0); +} + +#[test] +fn c99_initializers_cpython_pytypeobject_pattern() { + let code = r#" +typedef void (*func_t)(void); +struct PyTypeObject { + long ob_refcnt; + void *ob_type; + char *tp_name; + long tp_basicsize; + func_t tp_dealloc; + func_t tp_repr; + void *tp_as_number; + long tp_flags; + char *tp_doc; +}; + +void my_dealloc(void) {} + +struct PyTypeObject MyType = { + 1, + 0, + "MyType", + 64, + my_dealloc, + 0, + 0, + .tp_flags = 0x1234, + .tp_doc = "doc", +}; + +int main(void) { + if (MyType.ob_refcnt != 1) return 1; + if (MyType.tp_basicsize != 64) return 2; + if (MyType.tp_dealloc != my_dealloc) return 3; + if (MyType.tp_flags != 0x1234) return 4; + if (MyType.tp_doc[0] != 'd') return 5; + return 0; +} +"#; + assert_eq!(compile_and_run("cpython_pytypeobject_init", code, &[]), 0); +} + +#[test] +fn c99_initializers_nested_designated_pattern() { + let code = r#" +struct inner { int tag; int data; }; +struct outer { + void *type; + struct inner value; +}; + +struct outer obj = { + (void*)0x1234, + { .tag = 42, .data = 99 } +}; + +int main(void) { + if (obj.type != (void*)0x1234) return 1; + if (obj.value.tag != 42) return 2; + if (obj.value.data != 99) return 3; + return 0; +} +"#; + assert_eq!(compile_and_run("nested_designated_init", code, &[]), 0); +} + +/// Test that sizeof works correctly for arrays with size inferred from initializer +/// This tests the fix for GitHub issue where sizeof(arr) returned 0 for arr[] = {...} +#[test] +fn c99_initializers_sizeof_inferred_array() { + let code = r#" +// Global arrays with inferred size +int global_arr[] = {1, 2, 3, 4, 5}; +static int static_arr[] = {10, 20, 30, 40, 50, 60}; + +// Array of structs +struct Pair { int x; int y; }; +static struct Pair pairs[] = {{1, 2}, {3, 4}, {5, 6}}; + +// Pointer array +static int *ptrs[] = {0, 0, 0}; + +int main(void) { + // Test global array + if (sizeof(global_arr) != 20) return 1; // 5 * 4 bytes + if (sizeof(global_arr) / sizeof(global_arr[0]) != 5) return 2; + + // Test static array + if (sizeof(static_arr) != 24) return 10; // 6 * 4 bytes + if (sizeof(static_arr) / sizeof(static_arr[0]) != 6) return 11; + + // Test local array with inferred size + int local_arr[] = {100, 200, 300}; + if (sizeof(local_arr) != 12) return 20; // 3 * 4 bytes + if (sizeof(local_arr) / sizeof(local_arr[0]) != 3) return 21; + + // Test array of structs + if (sizeof(pairs) != 24) return 30; // 3 * (4 + 4) bytes + if (sizeof(pairs) / sizeof(pairs[0]) != 3) return 31; + + // Test pointer array + if (sizeof(ptrs) != 24) return 40; // 3 * 8 bytes on 64-bit + if (sizeof(ptrs) / sizeof(ptrs[0]) != 3) return 41; + + // Test complex pattern like CPython's static_types[] + typedef void *PyTypeObject; + static PyTypeObject types[] = { + (void*)1, + (void*)2, + (void*)3, + (void*)4, + }; + if (sizeof(types) / sizeof(types[0]) != 4) return 50; + + return 0; +} +"#; + assert_eq!(compile_and_run("sizeof_inferred_array", code, &[]), 0); +} + +/// Test designated initialization of multiple bitfields within the same storage unit +/// This tests the fix for a bug where only the last bitfield was initialized +/// (due to incorrect deduplication of fields at the same offset) +#[test] +fn c99_initializers_bitfield_designated() { + let code = r#" +#include + +// Bitfields all packed into a single storage unit (like CPython's PyASCIIObject state) +struct state { + unsigned int interned:2; + unsigned int kind:3; + unsigned int compact:1; + unsigned int ascii:1; + unsigned int statically_allocated:1; +}; + +struct obj { + void *ptr; + long length; + long hash; + struct state state; +}; + +// Test global designated initializer with multiple bitfields +struct obj test_obj = { + .ptr = (void*)0x12345678, + .length = 8, + .hash = -1, + .state = { + .kind = 1, + .compact = 1, + .ascii = 1, + .statically_allocated = 1, + }, +}; + +// Test that all bitfields within the same byte can be initialized +struct flags { + unsigned int a:1; + unsigned int b:1; + unsigned int c:1; + unsigned int d:1; + unsigned int e:1; + unsigned int f:1; + unsigned int g:1; + unsigned int h:1; +}; + +struct flags all_flags = { + .a = 1, .b = 1, .c = 1, .d = 1, + .e = 1, .f = 1, .g = 1, .h = 1, +}; + +struct flags some_flags = { + .b = 1, .d = 1, .f = 1, .h = 1, +}; + +int main(void) { + // Verify global struct with nested bitfield struct + if (test_obj.ptr != (void*)0x12345678) return 1; + if (test_obj.length != 8) return 2; + if (test_obj.hash != -1) return 3; + if (test_obj.state.interned != 0) return 4; + if (test_obj.state.kind != 1) return 5; + if (test_obj.state.compact != 1) return 6; + if (test_obj.state.ascii != 1) return 7; + if (test_obj.state.statically_allocated != 1) return 8; + + // Verify all flags set + if (all_flags.a != 1) return 10; + if (all_flags.b != 1) return 11; + if (all_flags.c != 1) return 12; + if (all_flags.d != 1) return 13; + if (all_flags.e != 1) return 14; + if (all_flags.f != 1) return 15; + if (all_flags.g != 1) return 16; + if (all_flags.h != 1) return 17; + + // Verify alternating flags + if (some_flags.a != 0) return 20; + if (some_flags.b != 1) return 21; + if (some_flags.c != 0) return 22; + if (some_flags.d != 1) return 23; + if (some_flags.e != 0) return 24; + if (some_flags.f != 1) return 25; + if (some_flags.g != 0) return 26; + if (some_flags.h != 1) return 27; + + // Local variable with bitfield designated init + struct obj local_obj = { + .state = { + .interned = 2, + .kind = 5, + .compact = 0, + .ascii = 1, + }, + }; + if (local_obj.state.interned != 2) return 30; + if (local_obj.state.kind != 5) return 31; + if (local_obj.state.compact != 0) return 32; + if (local_obj.state.ascii != 1) return 33; + if (local_obj.state.statically_allocated != 0) return 34; + + return 0; +} +"#; + assert_eq!(compile_and_run("bitfield_designated_init", code, &[]), 0); +} diff --git a/cc/tests/codegen/misc.rs b/cc/tests/codegen/misc.rs index f021a50e8..568f709a8 100644 --- a/cc/tests/codegen/misc.rs +++ b/cc/tests/codegen/misc.rs @@ -209,6 +209,52 @@ int main() { let _ = std::fs::remove_file(&obj_path); } +/// Test that non-PIE code uses direct RIP-relative addressing for globals. +/// With -fno-pie, local globals should use foo(%rip) not GOT. +#[cfg(target_arch = "x86_64")] +#[test] +fn codegen_global_accesses_use_rip_relative() { + let c_file = create_c_file( + "global_access_rip", + r#" +int foo = 3; + +int main(void) { + return foo; +} +"#, + ); + let c_path = c_file.path().to_path_buf(); + + let output = run_test_base( + "pcc", + &vec![ + "-fno-pie".to_string(), + "-S".to_string(), + "-o".to_string(), + "-".to_string(), + c_path.to_string_lossy().to_string(), + ], + &[], + ); + + assert!( + output.status.success(), + "pcc -S -o - failed: {}", + String::from_utf8_lossy(&output.stderr) + ); + + let asm = String::from_utf8_lossy(&output.stdout); + assert!( + asm.contains("foo(%rip)"), + "expected RIP-relative access in asm output" + ); + assert!( + !asm.contains("@GOTPCREL"), + "unexpected GOTPCREL usage for local globals" + ); +} + #[test] fn codegen_cfi_directives() { let c_file = create_c_file( @@ -677,3 +723,384 @@ int main(void) { exit_code ); } + +// ============================================================================ +// Test: Large struct member copy from global variable +// ============================================================================ +// Regression test for bug where accessing a large struct member (size > 64 bits) +// from a global variable would generate an extra dereference, treating the +// struct address as a pointer to dereference rather than the struct itself. +#[test] +fn codegen_large_struct_member_copy() { + let code = r#" +typedef struct { + void *ctx; + void *malloc_fn; + void *calloc_fn; + void *realloc_fn; + void *free_fn; +} Allocator; + +typedef struct { + Allocator raw; + Allocator mem; + Allocator obj; +} StandardAllocators; + +typedef struct { + int use_hugepages; + StandardAllocators standard; +} AllAllocators; + +typedef struct { + char padding[928]; + AllAllocators allocators; +} RuntimeState; + +RuntimeState _MyRuntime; + +#define _MyMem_Raw (_MyRuntime.allocators.standard.raw) + +void get_allocator(Allocator *result) { + *result = _MyMem_Raw; +} + +int main(void) { + // Initialize the source struct + _MyMem_Raw.ctx = (void*)0x1234; + _MyMem_Raw.malloc_fn = (void*)0x5678; + _MyMem_Raw.calloc_fn = (void*)0x9abc; + _MyMem_Raw.realloc_fn = (void*)0xdef0; + _MyMem_Raw.free_fn = (void*)0x1111; + + // Copy via function + Allocator a; + get_allocator(&a); + + // Verify all fields were copied correctly + if (a.ctx != (void*)0x1234) return 1; + if (a.malloc_fn != (void*)0x5678) return 2; + if (a.calloc_fn != (void*)0x9abc) return 3; + if (a.realloc_fn != (void*)0xdef0) return 4; + if (a.free_fn != (void*)0x1111) return 5; + + // Test direct assignment in main (not via function) + Allocator b = _MyMem_Raw; + if (b.ctx != (void*)0x1234) return 6; + if (b.malloc_fn != (void*)0x5678) return 7; + + // Test arrow access (p->member.field pattern) + RuntimeState *rp = &_MyRuntime; + Allocator c = rp->allocators.standard.raw; + if (c.ctx != (void*)0x1234) return 8; + if (c.malloc_fn != (void*)0x5678) return 9; + + return 0; +} +"#; + + let exit_code = compile_and_run_optimized("struct_member_copy", code); + assert_eq!( + exit_code, 0, + "Large struct member copy test failed with exit code {}", + exit_code + ); +} + +// Test for atomic compare-and-swap register clobbering bug. +// The bug: when regalloc assigns CAS operands to registers R9/R10/R11/RAX, +// loading one operand can clobber another before it's used. +// This test uses inline functions to trigger the problematic register allocation. +#[test] +fn codegen_atomic_cas_register_clobbering() { + let code = r#" +#include +#include + +#define UNLOCKED 0 +#define LOCKED 1 + +typedef struct { uintptr_t v; } RawMutex; + +__attribute__((always_inline)) +static inline int +atomic_cas_uintptr(uintptr_t *obj, uintptr_t *expected, uintptr_t desired) { + return atomic_compare_exchange_strong((_Atomic(uintptr_t)*)obj, expected, desired); +} + +__attribute__((always_inline)) +static inline int lock_mutex(RawMutex *m) { + uintptr_t unlocked = UNLOCKED; + return atomic_cas_uintptr(&m->v, &unlocked, LOCKED); +} + +__attribute__((always_inline)) +static inline int unlock_mutex(RawMutex *m) { + uintptr_t locked = LOCKED; + return atomic_cas_uintptr(&m->v, &locked, UNLOCKED); +} + +int main(void) { + RawMutex m = {0}; + + // Test 1: Lock should succeed (v: 0 -> 1) + if (m.v != 0) return 1; + if (!lock_mutex(&m)) return 2; // Lock should succeed + if (m.v != 1) return 3; // Value should be 1 after lock + + // Test 2: Double-lock should fail (v is already 1) + if (lock_mutex(&m)) return 4; // Second lock should fail + if (m.v != 1) return 5; // Value should still be 1 + + // Test 3: Unlock should succeed (v: 1 -> 0) + if (!unlock_mutex(&m)) return 6; // Unlock should succeed + if (m.v != 0) return 7; // Value should be 0 after unlock + + // Test 4: Double-unlock should fail (v is already 0) + if (unlock_mutex(&m)) return 8; // Second unlock should fail + if (m.v != 0) return 9; // Value should still be 0 + + // Test 5: Can lock again after unlock + if (!lock_mutex(&m)) return 10; + if (m.v != 1) return 11; + + return 0; +} +"#; + + let exit_code = compile_and_run_optimized("atomic_cas_clobber", code); + assert_eq!( + exit_code, 0, + "Atomic CAS register clobbering test failed with exit code {}", + exit_code + ); +} + +// Regression test for bug where copying a large struct (> 64 bits) from an +// array element would incorrectly dereference the first field as a pointer +// instead of doing a proper block copy. +// Bug: `struct pair item = array[0];` would crash when struct has 2+ pointers. +#[test] +fn codegen_large_struct_array_copy() { + let code = r#" +#include + +struct pair { + void *ptr; + const char *str; +}; + +static struct pair pairs[] = { + {(void*)0xDEADBEEF, "First"}, + {(void*)0xCAFEBABE, "Second"}, +}; + +int main(void) { + // Test: copy struct from array to local variable + // This was crashing because the code tried to dereference + // the first field (0xDEADBEEF) as a pointer to copy from + struct pair item = pairs[0]; + + // Verify the copy was correct + if (item.ptr != (void*)0xDEADBEEF) { + printf("FAIL: item.ptr = %p, expected 0xDEADBEEF\n", item.ptr); + return 1; + } + if (item.str[0] != 'F' || item.str[1] != 'i') { + printf("FAIL: item.str = %s, expected First\n", item.str); + return 2; + } + + // Test: copy second element + struct pair item2 = pairs[1]; + if (item2.ptr != (void*)0xCAFEBABE) { + printf("FAIL: item2.ptr = %p, expected 0xCAFEBABE\n", item2.ptr); + return 3; + } + if (item2.str[0] != 'S') { + printf("FAIL: item2.str = %s, expected Second\n", item2.str); + return 4; + } + + // Test: copy in a loop (dynamic index) + for (int i = 0; i < 2; i++) { + struct pair p = pairs[i]; + if (i == 0 && p.ptr != (void*)0xDEADBEEF) return 5; + if (i == 1 && p.ptr != (void*)0xCAFEBABE) return 6; + } + + printf("OK\n"); + return 0; +} +"#; + + let exit_code = compile_and_run_optimized("struct_array_copy", code); + assert_eq!( + exit_code, 0, + "Large struct array copy test failed with exit code {}", + exit_code + ); +} + +// ============================================================================ +// Compound literal zero-initialization test +// ============================================================================ +// C99 6.7.8p21: Fields not explicitly initialized in a compound literal +// must be zero-initialized. Bug: *p = (struct S){.a = val} left .b and .c +// as garbage instead of zero. +#[test] +fn codegen_compound_literal_zero_init() { + let code = r#" +typedef long int64_t; +void *malloc(unsigned long); +void free(void *); +int printf(const char *, ...); +#define NULL ((void*)0) + +typedef int (*func_ptr)(void); + +struct cached_m_dict { + void *copied; + int64_t extra; +}; + +typedef struct cached_m_dict *cached_m_dict_t; + +typedef enum { + ORIGIN_BUILTIN = 0, + ORIGIN_CORE = 1, + ORIGIN_DYNAMIC = 2 +} origin_t; + +struct extensions_cache_value { + void *def; // offset 0: 8 bytes + func_ptr m_init; // offset 8: 8 bytes + int64_t m_index; // offset 16: 8 bytes + cached_m_dict_t m_dict; // offset 24: 8 bytes (pointer) + struct cached_m_dict _m_dict; // offset 32: 16 bytes (embedded struct) + origin_t origin; // offset 48: 4 bytes +}; + +int main(void) { + struct extensions_cache_value *v = malloc(sizeof(*v)); + + // Fill with known non-zero pattern to detect failure to zero-init + v->def = (void*)0xAAAA; + v->m_init = (func_ptr)0xBBBB; + v->m_index = 0xCCCC; + v->m_dict = (cached_m_dict_t)0xDDDD; + v->_m_dict.copied = (void*)0xEEEE; + v->_m_dict.extra = 0xFFFF; + v->origin = ORIGIN_DYNAMIC; + + // Assign compound literal with partial initialization + // Only .def, .m_init, .m_index, and .origin are explicitly set + // .m_dict, ._m_dict.copied, ._m_dict.extra should become 0 + *v = (struct extensions_cache_value){ + .def = (void*)0x1234, + .m_init = NULL, + .m_index = 1, + .origin = ORIGIN_CORE, + }; + + // Check explicitly initialized fields + if (v->def != (void*)0x1234) { + printf("FAIL: v->def = %p, expected 0x1234\n", v->def); + return 1; + } + if (v->m_init != NULL) { + printf("FAIL: v->m_init = %p, expected NULL\n", (void*)v->m_init); + return 2; + } + if (v->m_index != 1) { + printf("FAIL: v->m_index = %ld, expected 1\n", (long)v->m_index); + return 3; + } + if (v->origin != ORIGIN_CORE) { + printf("FAIL: v->origin = %d, expected 1\n", v->origin); + return 4; + } + + // Check implicitly zero-initialized fields (the bug was here!) + if (v->m_dict != NULL) { + printf("FAIL: v->m_dict = %p, expected NULL (should be zero-init)\n", + (void*)v->m_dict); + return 5; + } + if (v->_m_dict.copied != NULL) { + printf("FAIL: v->_m_dict.copied = %p, expected NULL (should be zero-init)\n", + v->_m_dict.copied); + return 6; + } + if (v->_m_dict.extra != 0) { + printf("FAIL: v->_m_dict.extra = %ld, expected 0 (should be zero-init)\n", + (long)v->_m_dict.extra); + return 7; + } + + free(v); + printf("OK\n"); + return 0; +} +"#; + + let exit_code = compile_and_run_optimized("compound_literal_zero", code); + assert_eq!( + exit_code, 0, + "Compound literal zero-init test failed with exit code {}", + exit_code + ); +} + +// ============================================================================ +// Ternary conditional expressions with pointer dereference must use short-circuit +// evaluation. Bug: `value = ptr == NULL ? 0 : ptr->x` would evaluate `ptr->x` +// unconditionally, causing a crash when `ptr` is NULL because the compiler +// incorrectly used a select instruction (cmov) instead of proper branching. +#[test] +fn codegen_conditional_short_circuit() { + let code = r#" +#include +#include + +struct foo { + int x; +}; + +// Test function that uses ternary with pointer dereference +int get_value(struct foo *entry) { + // This MUST use short-circuit evaluation (branching) + // If implemented incorrectly with select/cmov, it will crash when entry is NULL + return entry == NULL ? 0 : entry->x; +} + +int main(void) { + struct foo f = { .x = 42 }; + + // Test 1: non-NULL pointer should return the value + int result1 = get_value(&f); + if (result1 != 42) { + printf("FAIL: get_value(&f) = %d, expected 42\n", result1); + return 1; + } + + // Test 2: NULL pointer should return 0 without crashing + // This will CRASH if the compiler eagerly evaluates entry->x + int result2 = get_value(NULL); + if (result2 != 0) { + printf("FAIL: get_value(NULL) = %d, expected 0\n", result2); + return 2; + } + + printf("OK\n"); + return 0; +} +"#; + + let exit_code = compile_and_run_optimized("conditional_short_circuit", code); + assert_eq!( + exit_code, 0, + "Conditional short-circuit test failed with exit code {} (likely crashed on NULL dereference)", + exit_code + ); +} diff --git a/cc/tests/codegen/pic.rs b/cc/tests/codegen/pic.rs index c5dbcc7da..4732e1129 100644 --- a/cc/tests/codegen/pic.rs +++ b/cc/tests/codegen/pic.rs @@ -176,3 +176,35 @@ int main(void) { 0 ); } + +/// Test PIE code generation (-fPIE / -fpie) +#[test] +fn codegen_pie_flags() { + let code = r#" +int main(void) { + return 0; +} +"#; + assert_eq!( + compile_and_run("pie_uppercase", code, &["-fPIE".to_string()]), + 0 + ); + assert_eq!( + compile_and_run("pie_lowercase", code, &["-fpie".to_string()]), + 0 + ); +} + +/// Test PIE disable flag (-fno-pie) +#[test] +fn codegen_pie_disable_flag() { + let code = r#" +int main(void) { + return 0; +} +"#; + assert_eq!( + compile_and_run("pie_disable", code, &["-fno-pie".to_string()]), + 0 + ); +} diff --git a/cc/tests/cpython/flags.rs b/cc/tests/cpython/flags.rs new file mode 100644 index 000000000..59bfa8fa1 --- /dev/null +++ b/cc/tests/cpython/flags.rs @@ -0,0 +1,66 @@ +// +// Copyright (c) 2025-2026 Jeff Garzik +// +// This file is part of the posixutils-rs project covered under +// the MIT License. For the full license text, please see the LICENSE +// file in the root directory of this project. +// SPDX-License-Identifier: MIT +// +// CPython flag compatibility tests +// + +use crate::common::compile_and_run; + +/// Test that pcc accepts CPython-style compiler flags without crashing +#[test] +fn cpython_flags_ignored_flags() { + let code = r#" +int main(void) { + return 0; +} +"#; + // Simulate flags that CPython's configure probes + let extra = vec![ + "-fvisibility=hidden".to_string(), + "-fno-semantic-interposition".to_string(), + "-fstack-protector-strong".to_string(), + "-fno-plt".to_string(), + "-pipe".to_string(), + "-fno-common".to_string(), + ]; + assert_eq!(compile_and_run("cpython_flags_ignored", code, &extra), 0); +} + +/// Test that -pthread defines _REENTRANT and links correctly +#[test] +fn cpython_flags_pthread() { + let code = r#" +#ifndef _REENTRANT +#error "_REENTRANT not defined" +#endif +int main(void) { + return 0; +} +"#; + let extra = vec!["-pthread".to_string()]; + assert_eq!(compile_and_run("cpython_flags_pthread", code, &extra), 0); +} + +#[test] +fn cpython_flags_m_flags_unsupported() { + let code = r#" +int main(void) { + return 0; +} +"#; + let extra = vec![ + "-msse".to_string(), + "-msse2".to_string(), + "-mavx2".to_string(), + "-march=x86-64".to_string(), + ]; + assert_ne!( + compile_and_run("cpython_flags_mflags_unsupported", code, &extra), + 0 + ); +} diff --git a/cc/tests/cpython/mod.rs b/cc/tests/cpython/mod.rs new file mode 100644 index 000000000..da335ab00 --- /dev/null +++ b/cc/tests/cpython/mod.rs @@ -0,0 +1,14 @@ +// +// Copyright (c) 2025-2026 Jeff Garzik +// +// This file is part of the posixutils-rs project covered under +// the MIT License. For the full license text, please see the LICENSE +// file in the root directory of this project. +// SPDX-License-Identifier: MIT +// +// CPython build compatibility tests for pcc +// +// Tests for features needed to build CPython with pcc. +// + +mod flags; diff --git a/cc/tests/integration.rs b/cc/tests/integration.rs index b24ed7cf9..0f1cb3cf1 100644 --- a/cc/tests/integration.rs +++ b/cc/tests/integration.rs @@ -25,6 +25,7 @@ mod c89; mod c99; mod codegen; mod common; +mod cpython; mod misc; mod preprocessor; mod tools; diff --git a/cc/token/preprocess.rs b/cc/token/preprocess.rs index 5c6b6839a..4e272cedb 100644 --- a/cc/token/preprocess.rs +++ b/cc/token/preprocess.rs @@ -368,6 +368,60 @@ pub struct Preprocessor<'a> { lexer_mode: LexerMode, } +/// Check if an attribute name is supported by pcc +fn is_supported_attribute(name: &str) -> bool { + matches!( + name, + "noreturn" + | "__noreturn__" + | "unused" + | "__unused__" + | "aligned" + | "__aligned__" + | "packed" + | "__packed__" + | "deprecated" + | "__deprecated__" + | "weak" + | "__weak__" + | "section" + | "__section__" + | "visibility" + | "__visibility__" + | "constructor" + | "__constructor__" + | "destructor" + | "__destructor__" + | "used" + | "__used__" + | "noinline" + | "__noinline__" + | "always_inline" + | "__always_inline__" + | "hot" + | "__hot__" + | "cold" + | "__cold__" + | "warn_unused_result" + | "__warn_unused_result__" + | "format" + | "__format__" + | "fallthrough" + | "__fallthrough__" + | "nonstring" + | "__nonstring__" + | "malloc" + | "__malloc__" + | "pure" + | "__pure__" + | "sentinel" + | "__sentinel__" + | "no_sanitize_memory" + | "no_sanitize_address" + | "no_sanitize_thread" + ) +} + impl<'a> Preprocessor<'a> { /// Format the current time as C99 __DATE__ and __TIME__ strings /// Returns (date_string, time_string) where: @@ -3006,34 +3060,7 @@ impl<'a> Preprocessor<'a> { }; match builtin { - BuiltinMacro::HasAttribute => { - // Return true for attributes we actually implement - matches!( - name.as_str(), - "noreturn" - | "__noreturn__" - | "unused" - | "__unused__" - | "aligned" - | "__aligned__" - | "packed" - | "__packed__" - | "deprecated" - | "__deprecated__" - | "weak" - | "__weak__" - | "section" - | "__section__" - | "visibility" - | "__visibility__" - | "constructor" - | "__constructor__" - | "destructor" - | "__destructor__" - | "used" - | "__used__" - ) - } + BuiltinMacro::HasAttribute => is_supported_attribute(&name), BuiltinMacro::HasBuiltin => { // Use centralized builtin registry crate::builtins::is_builtin(name.as_str()) @@ -3469,33 +3496,7 @@ impl<'a, 'b> ExprEvaluator<'a, 'b> { None => return 0, }; - // Return 1 for attributes we actually implement - let supported = matches!( - name.as_str(), - "noreturn" - | "__noreturn__" - | "unused" - | "__unused__" - | "aligned" - | "__aligned__" - | "packed" - | "__packed__" - | "deprecated" - | "__deprecated__" - | "weak" - | "__weak__" - | "section" - | "__section__" - | "visibility" - | "__visibility__" - | "constructor" - | "__constructor__" - | "destructor" - | "__destructor__" - | "used" - | "__used__" - ); - if supported { + if is_supported_attribute(&name) { 1 } else { 0 diff --git a/editors/vi/ui/terminal.rs b/editors/vi/ui/terminal.rs index ddea25319..fd846e3b4 100644 --- a/editors/vi/ui/terminal.rs +++ b/editors/vi/ui/terminal.rs @@ -284,6 +284,10 @@ impl Drop for Terminal { #[cfg(test)] mod tests { use super::*; + use std::sync::Mutex; + + // Env vars are process-global; serialize tests that modify COLUMNS/LINES + static ENV_LOCK: Mutex<()> = Mutex::new(()); #[test] fn test_terminal_size_default() { @@ -294,35 +298,33 @@ mod tests { #[test] fn test_columns_env_override() { - // Set COLUMNS environment variable + let _guard = ENV_LOCK.lock().unwrap(); std::env::set_var("COLUMNS", "60"); let term = Terminal::new(); std::env::remove_var("COLUMNS"); if let Ok(term) = term { let size = term.size(); - // Should use COLUMNS value if set assert_eq!(size.cols, 60); } } #[test] fn test_lines_env_override() { - // Set LINES environment variable + let _guard = ENV_LOCK.lock().unwrap(); std::env::set_var("LINES", "30"); let term = Terminal::new(); std::env::remove_var("LINES"); if let Ok(term) = term { let size = term.size(); - // Should use LINES value if set assert_eq!(size.rows, 30); } } #[test] fn test_both_env_override() { - // Set both environment variables + let _guard = ENV_LOCK.lock().unwrap(); std::env::set_var("COLUMNS", "100"); std::env::set_var("LINES", "40"); let term = Terminal::new(); @@ -338,7 +340,7 @@ mod tests { #[test] fn test_invalid_env_values_fallback() { - // Set invalid environment variables + let _guard = ENV_LOCK.lock().unwrap(); std::env::set_var("COLUMNS", "invalid"); std::env::set_var("LINES", "not-a-number"); let term = Terminal::new(); @@ -347,9 +349,8 @@ mod tests { if let Ok(term) = term { let size = term.size(); - // Should fall back to ioctl or defaults - assert!(size.cols >= 10); // At least minimum - assert!(size.rows >= 2); // At least minimum + assert!(size.cols >= 10); + assert!(size.rows >= 2); } } }