From 6c0a7d1580bf0a41f5b42c074e3c450b91ed75d9 Mon Sep 17 00:00:00 2001 From: Tm-C-mT <490534897@qq.com> Date: Tue, 28 Oct 2025 17:50:21 +0800 Subject: [PATCH 01/10] [libcpu-riscv]: [common64 virt64]:Fix compilation errors after enabling the SMP architecture. Currently, the bsp: qemu-virt64-riscv does not support the SMP architecture, and some necessary interfaces are not implemented. Solution: Add the interface declarations to make the compilation pass. Signed-off-by: Mengchen Teng --- libcpu/risc-v/common64/cpuport.c | 12 ++++++++++++ libcpu/risc-v/common64/interrupt_gcc.S | 16 ++++++++++++++++ libcpu/risc-v/common64/trap.c | 8 ++++++++ libcpu/risc-v/virt64/interrupt.c | 22 ++++++++++++++++++++++ 4 files changed, 58 insertions(+) diff --git a/libcpu/risc-v/common64/cpuport.c b/libcpu/risc-v/common64/cpuport.c index 76ae7d38271..1e49fea0b12 100644 --- a/libcpu/risc-v/common64/cpuport.c +++ b/libcpu/risc-v/common64/cpuport.c @@ -137,3 +137,15 @@ void rt_hw_set_process_id(int pid) { // TODO } + +#ifdef RT_USING_SMP +void rt_hw_secondary_cpu_up(void) +{ + +} + +void secondary_cpu_entry(void) +{ + +} +#endif /* RT_USING_SMP */ \ No newline at end of file diff --git a/libcpu/risc-v/common64/interrupt_gcc.S b/libcpu/risc-v/common64/interrupt_gcc.S index 015900a16dd..c58a857c231 100644 --- a/libcpu/risc-v/common64/interrupt_gcc.S +++ b/libcpu/risc-v/common64/interrupt_gcc.S @@ -88,6 +88,7 @@ _resume_kernel: csrw sscratch, zero sret +#ifndef RT_USING_SMP .global rt_hw_interrupt_enable rt_hw_interrupt_enable: csrs sstatus, a0 /* restore to old csr */ @@ -97,3 +98,18 @@ rt_hw_interrupt_enable: rt_hw_interrupt_disable: csrrci a0, sstatus, 2 /* clear SIE */ jr ra +#else +.global rt_hw_local_irq_disable +rt_hw_local_irq_disable: + csrrci a0, sstatus, 2 + jr ra + +.global rt_hw_local_irq_enable +rt_hw_local_irq_enable: + csrs sstatus, a0 + jr ra + +.global rt_hw_secondary_cpu_idle_exec +rt_hw_secondary_cpu_idle_exec: + jr ra +#endif /* RT_USING_SMP */ \ No newline at end of file diff --git a/libcpu/risc-v/common64/trap.c b/libcpu/risc-v/common64/trap.c index 1b79b73950c..5e97e4e0387 100644 --- a/libcpu/risc-v/common64/trap.c +++ b/libcpu/risc-v/common64/trap.c @@ -136,6 +136,14 @@ static volatile int nested = 0; #define EXIT_TRAP nested -= 1 #define CHECK_NESTED_PANIC(cause, tval, epc, eframe) \ if (nested != 1) handle_nested_trap_panic(cause, tval, epc, eframe) +#else +/* Add trap nesting detection under the SMP architecture. */ +static volatile int nested[RT_CPUS_NR] = {0}; +#define ENTER_TRAP nested[rt_hw_cpu_id()] += 1 +#define EXIT_TRAP nested[rt_hw_cpu_id()] -= 1 +#define CHECK_NESTED_PANIC(cause, tval, epc, eframe) \ + if (nested[rt_hw_cpu_id()] != 1) \ + handle_nested_trap_panic(cause, tval, epc, eframe) #endif /* RT_USING_SMP */ static const char *get_exception_msg(int id) diff --git a/libcpu/risc-v/virt64/interrupt.c b/libcpu/risc-v/virt64/interrupt.c index f627210dac2..14e682601ea 100644 --- a/libcpu/risc-v/virt64/interrupt.c +++ b/libcpu/risc-v/virt64/interrupt.c @@ -92,3 +92,25 @@ void rt_hw_interrupt_init() plic_set_threshold(0); } + +#ifdef RT_USING_SMP +void rt_hw_spin_lock_init(rt_hw_spinlock_t *_lock) +{ + +} + +void rt_hw_spin_lock(rt_hw_spinlock_t *lock) +{ + +} + +void rt_hw_spin_unlock(rt_hw_spinlock_t *lock) +{ + +} + +void rt_hw_ipi_send(int ipi_vector, unsigned int cpu_mask) +{ + +} +#endif /* RT_USING_SMP */ \ No newline at end of file From 7948f500d46b4b14d08f93f93b2aa7baae1af27c Mon Sep 17 00:00:00 2001 From: Tm-C-mT <490534897@qq.com> Date: Tue, 28 Oct 2025 18:25:20 +0800 Subject: [PATCH 02/10] [libcpu-riscv]: [common64 virt64]:Fix the normal startup of the SMP architecture. Tests conducted on bsp: qemu-virt64-riscv. Currently, the command line cannot start normally. This is because the SMP architecture requires scheduling information update operations; secondly, it does not yet support context switching operations within interrupts. Solution: In the two functions (rt_hw_context_switch_to and rt_hw_context_switch) in context_gcc.S, add a call to rt_cpus_lock_status_restore to update the scheduler information. For the second issue, if scheduling is triggered in an interrupt, pcpu->irq_switch_flag will be set to 1; thus, rt_scheduler_do_irq_switch is called in interrupt_gcc.S to determine whether to perform context switching. Signed-off-by: Mengchen Teng --- libcpu/risc-v/common64/context_gcc.S | 31 ++++++++++++++++++++++++-- libcpu/risc-v/common64/cpuport.c | 12 ++++++++++ libcpu/risc-v/common64/interrupt_gcc.S | 7 ++++++ libcpu/risc-v/virt64/interrupt.c | 8 +++++++ 4 files changed, 56 insertions(+), 2 deletions(-) diff --git a/libcpu/risc-v/common64/context_gcc.S b/libcpu/risc-v/common64/context_gcc.S index ed216716b09..6d667362983 100644 --- a/libcpu/risc-v/common64/context_gcc.S +++ b/libcpu/risc-v/common64/context_gcc.S @@ -69,14 +69,27 @@ .endm /* + * #ifdef RT_USING_SMP + * void rt_hw_context_switch_to(rt_ubase_t to, stuct rt_thread *to_thread); + * #else * void rt_hw_context_switch_to(rt_ubase_t to); - * - * a0 --> to SP pointer + * #endif + * a0 --> to + * a1 --> to_thread */ .globl rt_hw_context_switch_to rt_hw_context_switch_to: LOAD sp, (a0) +#ifdef RT_USING_SMP + /* + * Pass the previous CPU lock status to + * rt_cpus_lock_status_restore for restoration + */ + mv a0, a1 + call rt_cpus_lock_status_restore +#endif + call rt_thread_self mv s1, a0 @@ -88,10 +101,15 @@ rt_hw_context_switch_to: sret /* + * #ifdef RT_USING_SMP + * void rt_hw_context_switch(rt_ubase_t from, rt_ubase_t to, struct rt_thread *to_thread); + * #else * void rt_hw_context_switch(rt_ubase_t from, rt_ubase_t to); + * #endif * * a0 --> from SP pointer * a1 --> to SP pointer + * a2 --> to_thread * * It should only be used on local interrupt disable */ @@ -103,6 +121,15 @@ rt_hw_context_switch: // restore to thread SP LOAD sp, (a1) +#ifdef RT_USING_SMP + /* + * Pass the previous CPU lock status to + * rt_cpus_lock_status_restore for restoration + */ + mv a0, a2 + call rt_cpus_lock_status_restore +#endif /*RT_USING_SMP*/ + // restore Address Space call rt_thread_self mv s1, a0 diff --git a/libcpu/risc-v/common64/cpuport.c b/libcpu/risc-v/common64/cpuport.c index 1e49fea0b12..f6bac446c31 100644 --- a/libcpu/risc-v/common64/cpuport.c +++ b/libcpu/risc-v/common64/cpuport.c @@ -117,6 +117,18 @@ void rt_hw_context_switch_interrupt(rt_ubase_t from, rt_ubase_t to, rt_thread_t return; } +#else +void rt_hw_context_switch_interrupt(void *context, rt_ubase_t from, rt_ubase_t to, struct rt_thread *to_thread) +{ + /* Perform architecture-specific context switch. This call will + * restore the target thread context and should not return when a + * switch is performed. The caller (scheduler) invoked this function + * in a context where local IRQs are disabled. */ + rt_uint32_t level; + level = rt_hw_local_irq_disable(); + rt_hw_context_switch((rt_ubase_t)from, (rt_ubase_t)to, to_thread); + rt_hw_local_irq_enable(level); +} #endif /* end of RT_USING_SMP */ /** shutdown CPU */ diff --git a/libcpu/risc-v/common64/interrupt_gcc.S b/libcpu/risc-v/common64/interrupt_gcc.S index c58a857c231..5b1f2866688 100644 --- a/libcpu/risc-v/common64/interrupt_gcc.S +++ b/libcpu/risc-v/common64/interrupt_gcc.S @@ -60,10 +60,17 @@ _handle_interrupt_and_exception: call handle_trap _interrupt_exit: +#ifndef RT_USING_SMP la s0, rt_thread_switch_interrupt_flag lw s2, 0(s0) beqz s2, _resume_execution sw zero, 0(s0) +#else + mv a0, sp + call rt_scheduler_do_irq_switch + // if failed, jump to __resume_execution + j _resume_execution +#endif /* RT_USING_SMP */ _context_switch: la t0, rt_interrupt_from_thread diff --git a/libcpu/risc-v/virt64/interrupt.c b/libcpu/risc-v/virt64/interrupt.c index 14e682601ea..143181fb9e5 100644 --- a/libcpu/risc-v/virt64/interrupt.c +++ b/libcpu/risc-v/virt64/interrupt.c @@ -94,6 +94,14 @@ void rt_hw_interrupt_init() } #ifdef RT_USING_SMP +rt_bool_t rt_hw_interrupt_is_disabled(void) +{ + /* Determine the interrupt enable state */ + rt_ubase_t sstatus; + __asm__ volatile("csrr %0, sstatus" : "=r"(sstatus)); + return (sstatus & SSTATUS_SIE) == 0; +} + void rt_hw_spin_lock_init(rt_hw_spinlock_t *_lock) { From c7cbe2e4430f6a93b3f668aa147ab6a53f589fe7 Mon Sep 17 00:00:00 2001 From: Tm-C-mT <490534897@qq.com> Date: Tue, 28 Oct 2025 18:45:28 +0800 Subject: [PATCH 03/10] [libcpu-riscv]: [common64 virt64]:Add the specific implementation of the spinlock. The specific implementation of the spinlock is added in risc-v/virt64/interrupt.c. Due to the need for atomic operations, a new file atomic_riscv.c (copied from the common directory) is added under risc-v/common64. Signed-off-by: Mengchen Teng --- libcpu/risc-v/common64/atomic_riscv.c | 159 ++++++++++++++++++++++++++ libcpu/risc-v/virt64/interrupt.c | 42 ++++++- 2 files changed, 198 insertions(+), 3 deletions(-) create mode 100644 libcpu/risc-v/common64/atomic_riscv.c diff --git a/libcpu/risc-v/common64/atomic_riscv.c b/libcpu/risc-v/common64/atomic_riscv.c new file mode 100644 index 00000000000..bc1561f2ee3 --- /dev/null +++ b/libcpu/risc-v/common64/atomic_riscv.c @@ -0,0 +1,159 @@ +/* + * Copyright (c) 2006-2023, RT-Thread Development Team + * + * SPDX-License-Identifier: Apache-2.0 + * + * Change Logs: + * Date Author Notes + * 2023-03-14 WangShun first version + */ + +#include + +rt_atomic_t rt_hw_atomic_exchange(volatile rt_atomic_t *ptr, rt_atomic_t val) +{ + rt_atomic_t result = 0; +#if __riscv_xlen == 32 + asm volatile ("amoswap.w %0, %1, (%2)" : "=r"(result) : "r"(val), "r"(ptr) : "memory"); +#elif __riscv_xlen == 64 + asm volatile ("amoswap.d %0, %1, (%2)" : "=r"(result) : "r"(val), "r"(ptr) : "memory"); +#endif + return result; +} + +rt_atomic_t rt_hw_atomic_add(volatile rt_atomic_t *ptr, rt_atomic_t val) +{ + rt_atomic_t result = 0; +#if __riscv_xlen == 32 + asm volatile ("amoadd.w %0, %1, (%2)" : "=r"(result) : "r"(val), "r"(ptr) : "memory"); +#elif __riscv_xlen == 64 + asm volatile ("amoadd.d %0, %1, (%2)" : "=r"(result) : "r"(val), "r"(ptr) : "memory"); +#endif + return result; +} + +rt_atomic_t rt_hw_atomic_sub(volatile rt_atomic_t *ptr, rt_atomic_t val) +{ + rt_atomic_t result = 0; + val = -val; +#if __riscv_xlen == 32 + asm volatile ("amoadd.w %0, %1, (%2)" : "=r"(result) : "r"(val), "r"(ptr) : "memory"); +#elif __riscv_xlen == 64 + asm volatile ("amoadd.d %0, %1, (%2)" : "=r"(result) : "r"(val), "r"(ptr) : "memory"); +#endif + return result; +} + +rt_atomic_t rt_hw_atomic_xor(volatile rt_atomic_t *ptr, rt_atomic_t val) +{ + rt_atomic_t result = 0; +#if __riscv_xlen == 32 + asm volatile ("amoxor.w %0, %1, (%2)" : "=r"(result) : "r"(val), "r"(ptr) : "memory"); +#elif __riscv_xlen == 64 + asm volatile ("amoxor.d %0, %1, (%2)" : "=r"(result) : "r"(val), "r"(ptr) : "memory"); +#endif + return result; +} + +rt_atomic_t rt_hw_atomic_and(volatile rt_atomic_t *ptr, rt_atomic_t val) +{ + rt_atomic_t result = 0; +#if __riscv_xlen == 32 + asm volatile ("amoand.w %0, %1, (%2)" : "=r"(result) : "r"(val), "r"(ptr) : "memory"); +#elif __riscv_xlen == 64 + asm volatile ("amoand.d %0, %1, (%2)" : "=r"(result) : "r"(val), "r"(ptr) : "memory"); +#endif + return result; +} + +rt_atomic_t rt_hw_atomic_or(volatile rt_atomic_t *ptr, rt_atomic_t val) +{ + rt_atomic_t result = 0; +#if __riscv_xlen == 32 + asm volatile ("amoor.w %0, %1, (%2)" : "=r"(result) : "r"(val), "r"(ptr) : "memory"); +#elif __riscv_xlen == 64 + asm volatile ("amoor.d %0, %1, (%2)" : "=r"(result) : "r"(val), "r"(ptr) : "memory"); +#endif + return result; +} + +rt_atomic_t rt_hw_atomic_load(volatile rt_atomic_t *ptr) +{ + rt_atomic_t result = 0; +#if __riscv_xlen == 32 + asm volatile ("amoxor.w %0, x0, (%1)" : "=r"(result) : "r"(ptr) : "memory"); +#elif __riscv_xlen == 64 + asm volatile ("amoxor.d %0, x0, (%1)" : "=r"(result) : "r"(ptr) : "memory"); +#endif + return result; +} + +void rt_hw_atomic_store(volatile rt_atomic_t *ptr, rt_atomic_t val) +{ + rt_atomic_t result = 0; +#if __riscv_xlen == 32 + asm volatile ("amoswap.w %0, %1, (%2)" : "=r"(result) : "r"(val), "r"(ptr) : "memory"); +#elif __riscv_xlen == 64 + asm volatile ("amoswap.d %0, %1, (%2)" : "=r"(result) : "r"(val), "r"(ptr) : "memory"); +#endif +} + +rt_atomic_t rt_hw_atomic_flag_test_and_set(volatile rt_atomic_t *ptr) +{ + rt_atomic_t result = 0; + rt_atomic_t temp = 1; +#if __riscv_xlen == 32 + asm volatile ("amoor.w %0, %1, (%2)" : "=r"(result) : "r"(temp), "r"(ptr) : "memory"); +#elif __riscv_xlen == 64 + asm volatile ("amoor.d %0, %1, (%2)" : "=r"(result) : "r"(temp), "r"(ptr) : "memory"); +#endif + return result; +} + +void rt_hw_atomic_flag_clear(volatile rt_atomic_t *ptr) +{ + rt_atomic_t result = 0; +#if __riscv_xlen == 32 + asm volatile ("amoand.w %0, x0, (%1)" : "=r"(result) :"r"(ptr) : "memory"); +#elif __riscv_xlen == 64 + asm volatile ("amoand.d %0, x0, (%1)" : "=r"(result) :"r"(ptr) : "memory"); +#endif +} + +rt_atomic_t rt_hw_atomic_compare_exchange_strong(volatile rt_atomic_t *ptr, rt_atomic_t *old, rt_atomic_t desired) +{ + rt_atomic_t tmp = *old; + rt_atomic_t result = 0; +#if __riscv_xlen == 32 + asm volatile( + " fence iorw, ow\n" + "1: lr.w.aq %[result], (%[ptr])\n" + " bne %[result], %[tmp], 2f\n" + " sc.w.rl %[tmp], %[desired], (%[ptr])\n" + " bnez %[tmp], 1b\n" + " li %[result], 1\n" + " j 3f\n" + " 2:sw %[result], (%[old])\n" + " li %[result], 0\n" + " 3:\n" + : [result]"+r" (result), [tmp]"+r" (tmp), [ptr]"+r" (ptr) + : [desired]"r" (desired), [old]"r"(old) + : "memory"); +#elif __riscv_xlen == 64 + asm volatile( + " fence iorw, ow\n" + "1: lr.d.aq %[result], (%[ptr])\n" + " bne %[result], %[tmp], 2f\n" + " sc.d.rl %[tmp], %[desired], (%[ptr])\n" + " bnez %[tmp], 1b\n" + " li %[result], 1\n" + " j 3f\n" + " 2:sd %[result], (%[old])\n" + " li %[result], 0\n" + " 3:\n" + : [result]"+r" (result), [tmp]"+r" (tmp), [ptr]"+r" (ptr) + : [desired]"r" (desired), [old]"r"(old) + : "memory"); +#endif + return result; +} diff --git a/libcpu/risc-v/virt64/interrupt.c b/libcpu/risc-v/virt64/interrupt.c index 143181fb9e5..044570b1797 100644 --- a/libcpu/risc-v/virt64/interrupt.c +++ b/libcpu/risc-v/virt64/interrupt.c @@ -104,17 +104,53 @@ rt_bool_t rt_hw_interrupt_is_disabled(void) void rt_hw_spin_lock_init(rt_hw_spinlock_t *_lock) { - + // union rt_hw_spinlock_t *lock = (void *)_lock; + // _lock->slock = 0; } void rt_hw_spin_lock(rt_hw_spinlock_t *lock) { - + // /* Use ticket lock implemented on top of the 32/64-bit atomic AMO ops. + // * The combined word layout (slock) maps two uint16_t fields: + // * low 16 bits: owner + // * high 16 bits: next (ticket allocator) + // * We atomically increment the "next" field by (1 << 16) and use the + // * returned old value to compute our ticket. Then wait until owner == ticket. + // */ + // rt_atomic_t prev; + // rt_atomic_t ticket; + // rt_atomic_t owner; + + // /* Allocate a ticket by adding (1 << 16) to slock, prev holds previous value */ + // prev = rt_hw_atomic_add((volatile rt_atomic_t *)&lock->slock, (rt_atomic_t)(1UL << 16)); + // ticket = (prev >> 16) & 0xffffUL; + + // /* Wait until owner equals our ticket */ + // for (;;) + // { + // owner = rt_hw_atomic_load((volatile rt_atomic_t *)&lock->slock) & 0xffffUL; + // if (owner == ticket) + // break; + // /* TODO: low-power wait for interrupt while spinning */ + // // __asm__ volatile("wfi" ::: "memory"); + // } + + // /* Ensure all following memory accesses are ordered after acquiring the lock */ + // __asm__ volatile("fence rw, rw" ::: "memory"); } void rt_hw_spin_unlock(rt_hw_spinlock_t *lock) { - + // /* Ensure memory operations before unlock are visible before owner increment */ + // __asm__ volatile("fence rw, rw" ::: "memory"); + + // /* Increment owner (low 16 bits) to hand over lock to next ticket */ + // rt_hw_atomic_add((volatile rt_atomic_t *)&lock->slock, (rt_atomic_t)1); + + // // TODO: IPI interrupt to wake up other harts waiting for the lock + + // /* Make the increment visible to other harts */ + // __asm__ volatile("fence rw, rw" ::: "memory"); } void rt_hw_ipi_send(int ipi_vector, unsigned int cpu_mask) From 402ec68bbb68acd05877fb9ff31e1919acbaf5a6 Mon Sep 17 00:00:00 2001 From: Tm-C-mT <490534897@qq.com> Date: Tue, 28 Oct 2025 19:10:14 +0800 Subject: [PATCH 04/10] [libcpu-riscv]: [common64 virt64]: Add the dual-core feature under the SMP architecture. Currently, it does not support operation with two or more cores. Solution: Since the system runs in S-mode and does not support access to the mhartid register, the hartid is currently stored in the satp register (this register is not used when the bsp qemu-virt64-riscv runs RT-Thread). Additionally, logic for storing boot_hartid and multi-core initialization logic for the sp pointer have been added in startup_gcc.S. Logic for secondary core wake-up and entry has been added in cpuport.c. Signed-off-by: Mengchen Teng --- bsp/qemu-virt64-riscv/run.sh | 2 +- libcpu/risc-v/common64/cpuport.c | 37 ++++++++++++++++++++++++++-- libcpu/risc-v/common64/startup_gcc.S | 32 ++++++++++++++++++++++++ 3 files changed, 68 insertions(+), 3 deletions(-) diff --git a/bsp/qemu-virt64-riscv/run.sh b/bsp/qemu-virt64-riscv/run.sh index dd53c95f612..c94600e8b9d 100755 --- a/bsp/qemu-virt64-riscv/run.sh +++ b/bsp/qemu-virt64-riscv/run.sh @@ -24,7 +24,7 @@ if [ ! -f $path_image ]; then exit fi -qemu-system-riscv64 -nographic -machine virt -m 256M -kernel rtthread.bin \ +qemu-system-riscv64 -nographic -machine virt -m 256M -kernel rtthread.bin -smp 2 \ -drive if=none,file=$path_image,format=raw,id=blk0 -device virtio-blk-device,drive=blk0,bus=virtio-mmio-bus.0 \ -netdev user,id=tap0 -device virtio-net-device,netdev=tap0,bus=virtio-mmio-bus.1 \ -device virtio-serial-device -chardev socket,host=127.0.0.1,port=4321,server=on,wait=off,telnet=on,id=console0 -device virtserialport,chardev=console0 diff --git a/libcpu/risc-v/common64/cpuport.c b/libcpu/risc-v/common64/cpuport.c index f6bac446c31..e4bb2f3a6ad 100644 --- a/libcpu/risc-v/common64/cpuport.c +++ b/libcpu/risc-v/common64/cpuport.c @@ -64,7 +64,14 @@ void *_rt_hw_stack_init(rt_ubase_t *sp, rt_ubase_t ra, rt_ubase_t sstatus) int rt_hw_cpu_id(void) { +#ifndef RT_USING_SMP return 0; +#else + /* Currently, the hartid is stored in the satp register. */ + uint32_t hart_id; + asm volatile ("csrr %0, satp" : "=r"(hart_id)); + return hart_id; +#endif /* RT_USING_SMP */ } /** @@ -151,13 +158,39 @@ void rt_hw_set_process_id(int pid) } #ifdef RT_USING_SMP +extern void _start(void); +extern int boot_hartid; +/* Boot secondary harts using the SBI HSM hart_start call. */ void rt_hw_secondary_cpu_up(void) { - + rt_uint64_t entry_pa; + int hart, ret; + + /* translate kernel virtual _start to physical address */ + entry_pa = (rt_uint64_t)&_start;//(rt_uint64_t)rt_kmem_v2p((void *)&_start); + + for (hart = 0; hart < RT_CPUS_NR; hart++) + { + if (hart == boot_hartid) continue; + + ret = sbi_hsm_hart_start((unsigned long)hart, + (unsigned long)entry_pa, + 0UL); + if (ret) + { + rt_kprintf("sbi_hsm_hart_start failed for hart %d: %d\n", hart, ret); + } + } } void secondary_cpu_entry(void) { - + /* The PLIC peripheral interrupts are currently handled by the boot_hart. */ + /* Enable the Supervisor-Timer bit in SIE */ + rt_hw_tick_init(); + + rt_hw_spin_lock(&_cpus_lock); + /* invoke system scheduler start for secondary CPU */ + rt_system_scheduler_start(); } #endif /* RT_USING_SMP */ \ No newline at end of file diff --git a/libcpu/risc-v/common64/startup_gcc.S b/libcpu/risc-v/common64/startup_gcc.S index 184b48a1aeb..2a4153778b1 100644 --- a/libcpu/risc-v/common64/startup_gcc.S +++ b/libcpu/risc-v/common64/startup_gcc.S @@ -32,9 +32,19 @@ _start: 1: /* save hartid */ la t0, boot_hartid /* global varible rt_boot_hartid */ +#ifdef RT_USING_SMP + lw t2, (t0) + li t3, 0xdeadbeef + li t4, 0xffffffff + and t2, t2, t4 /* Extract the lower 32 bits. */ + bne t2, t3, system_init /* If the current value is 0xdeadbeef, skip the boot_hartid assignment operation. */ +#endif mv t1, a0 /* get hartid in S-mode frome a0 register */ sw t1, (t0) /* store t1 register low 4 bits in memory address which is stored in t0 */ +#ifdef RT_USING_SMP +system_init: +#endif /* clear Interrupt Registers */ csrw sie, 0 csrw sip, 0 @@ -51,7 +61,10 @@ _start: li x7, 0 li x8, 0 li x9, 0 +#ifndef RT_USING_SMP + /* In the SMP architecture, a0 will be used again later */ li x10,0 +#endif li x11,0 li x12,0 li x13,0 @@ -85,10 +98,29 @@ _start: la gp, __global_pointer$ .option pop +#ifndef RT_USING_SMP /* removed SMP support here */ la sp, __stack_start__ li t0, __STACKSIZE__ add sp, sp, t0 +#else + csrw satp, a0 /* Currently, the hartid is stored in the satp register. */ + /* Initialize the sp pointer according to different hartids. */ + mv t0, a0 + /* calculate stack offset: hartid * __STACKSIZE__ */ + li t1, __STACKSIZE__ + mul t0, t0, t1 /* t0 = hartid * __STACKSIZE__ */ + + /* set stack pointer */ + la sp, __stack_start__ + add sp, sp, t0 /* sp = __stack_start__ + hartid * __STACKSIZE__ */ + add sp, sp, t1 /* sp += __STACKSIZE__ (point to stack top) */ + + mv t0, a0 + lw t1, boot_hartid + bne t0, t1, secondary_cpu_entry + li x10,0 /* Clear the a0 register. */ +#endif /* RT_USING_SMP */ /** * sscratch is always zero on kernel mode From 8386d2998b4dad9b5460c9bbdebd3ba9442a2b32 Mon Sep 17 00:00:00 2001 From: Tm-C-mT <490534897@qq.com> Date: Tue, 28 Oct 2025 20:32:32 +0800 Subject: [PATCH 05/10] [libcpu-riscv]: [surpport SMP]: Add IPI handling. Add IPI handling logic based on the RISC-V architecture.We handle IPI-related requests in software interrupts. Up to this point, the RISC-V 64 architecture can support the 2-core SMP mode and has passed the SMP Utest. Signed-off-by: Mengchen Teng --- bsp/qemu-virt64-riscv/driver/board.c | 5 + libcpu/risc-v/common64/cpuport.c | 7 +- libcpu/risc-v/common64/trap.c | 9 ++ libcpu/risc-v/virt64/interrupt.c | 140 ++++++++++++++++++++------- 4 files changed, 123 insertions(+), 38 deletions(-) diff --git a/bsp/qemu-virt64-riscv/driver/board.c b/bsp/qemu-virt64-riscv/driver/board.c index c5116aad0c5..7d08c06ef69 100644 --- a/bsp/qemu-virt64-riscv/driver/board.c +++ b/bsp/qemu-virt64-riscv/driver/board.c @@ -88,6 +88,11 @@ void rt_hw_board_init(void) #endif /* RT_USING_CONSOLE */ rt_hw_tick_init(); + +#ifdef RT_USING_SMP + /* ipi init */ + rt_hw_ipi_init(); +#endif /* RT_USING_SMP */ #ifdef RT_USING_COMPONENTS_INIT rt_components_board_init(); diff --git a/libcpu/risc-v/common64/cpuport.c b/libcpu/risc-v/common64/cpuport.c index e4bb2f3a6ad..9c0b2d69a46 100644 --- a/libcpu/risc-v/common64/cpuport.c +++ b/libcpu/risc-v/common64/cpuport.c @@ -188,7 +188,12 @@ void secondary_cpu_entry(void) /* The PLIC peripheral interrupts are currently handled by the boot_hart. */ /* Enable the Supervisor-Timer bit in SIE */ rt_hw_tick_init(); - + +#ifdef RT_USING_SMP + /* ipi init */ + rt_hw_ipi_init(); +#endif /* RT_USING_SMP */ + rt_hw_spin_lock(&_cpus_lock); /* invoke system scheduler start for secondary CPU */ rt_system_scheduler_start(); diff --git a/libcpu/risc-v/common64/trap.c b/libcpu/risc-v/common64/trap.c index 5e97e4e0387..52e51ce5322 100644 --- a/libcpu/risc-v/common64/trap.c +++ b/libcpu/risc-v/common64/trap.c @@ -322,6 +322,15 @@ void handle_trap(rt_ubase_t scause, rt_ubase_t stval, rt_ubase_t sepc, tick_isr(); rt_interrupt_leave(); } +#ifdef RT_USING_SMP + else if ((SCAUSE_INTERRUPT | SCAUSE_S_SOFTWARE_INTR) == scause) + { + /* supervisor software interrupt for ipi */ + rt_interrupt_enter(); + rt_hw_ipi_handler(); + rt_interrupt_leave(); + } +#endif /* RT_USING_SMP */ else { if (SCAUSE_INTERRUPT & scause) diff --git a/libcpu/risc-v/virt64/interrupt.c b/libcpu/risc-v/virt64/interrupt.c index 044570b1797..30c99110e05 100644 --- a/libcpu/risc-v/virt64/interrupt.c +++ b/libcpu/risc-v/virt64/interrupt.c @@ -16,6 +16,10 @@ #include "interrupt.h" struct rt_irq_desc irq_desc[MAX_HANDLERS]; +#ifdef RT_USING_SMP +struct rt_irq_desc ipi_desc[RT_MAX_IPI]; +uint8_t ipi_vectors[RT_CPUS_NR] = {0}; +#endif static rt_isr_handler_t rt_hw_interrupt_handle(rt_uint32_t vector, void *param) { @@ -94,6 +98,16 @@ void rt_hw_interrupt_init() } #ifdef RT_USING_SMP +void rt_hw_interrupt_set_priority(int vector, unsigned int priority) +{ + plic_set_priority(vector, priority); +} + +unsigned int rt_hw_interrupt_get_priority(int vector) +{ + return (*(uint32_t *)PLIC_PRIORITY(vector)); +} + rt_bool_t rt_hw_interrupt_is_disabled(void) { /* Determine the interrupt enable state */ @@ -104,57 +118,109 @@ rt_bool_t rt_hw_interrupt_is_disabled(void) void rt_hw_spin_lock_init(rt_hw_spinlock_t *_lock) { - // union rt_hw_spinlock_t *lock = (void *)_lock; - // _lock->slock = 0; + union rt_hw_spinlock_t *lock = (void *)_lock; + _lock->slock = 0; } void rt_hw_spin_lock(rt_hw_spinlock_t *lock) { - // /* Use ticket lock implemented on top of the 32/64-bit atomic AMO ops. - // * The combined word layout (slock) maps two uint16_t fields: - // * low 16 bits: owner - // * high 16 bits: next (ticket allocator) - // * We atomically increment the "next" field by (1 << 16) and use the - // * returned old value to compute our ticket. Then wait until owner == ticket. - // */ - // rt_atomic_t prev; - // rt_atomic_t ticket; - // rt_atomic_t owner; - - // /* Allocate a ticket by adding (1 << 16) to slock, prev holds previous value */ - // prev = rt_hw_atomic_add((volatile rt_atomic_t *)&lock->slock, (rt_atomic_t)(1UL << 16)); - // ticket = (prev >> 16) & 0xffffUL; - - // /* Wait until owner equals our ticket */ - // for (;;) - // { - // owner = rt_hw_atomic_load((volatile rt_atomic_t *)&lock->slock) & 0xffffUL; - // if (owner == ticket) - // break; - // /* TODO: low-power wait for interrupt while spinning */ - // // __asm__ volatile("wfi" ::: "memory"); - // } - - // /* Ensure all following memory accesses are ordered after acquiring the lock */ - // __asm__ volatile("fence rw, rw" ::: "memory"); + /* Use ticket lock implemented on top of the 32/64-bit atomic AMO ops. + * The combined word layout (slock) maps two uint16_t fields: + * low 16 bits: owner + * high 16 bits: next (ticket allocator) + * We atomically increment the "next" field by (1 << 16) and use the + * returned old value to compute our ticket. Then wait until owner == ticket. + */ + rt_atomic_t prev; + rt_atomic_t ticket; + rt_atomic_t owner; + + /* Allocate a ticket by adding (1 << 16) to slock, prev holds previous value */ + prev = rt_hw_atomic_add((volatile rt_atomic_t *)&lock->slock, (rt_atomic_t)(1UL << 16)); + ticket = (prev >> 16) & 0xffffUL; + + /* Wait until owner equals our ticket */ + for (;;) + { + owner = rt_hw_atomic_load((volatile rt_atomic_t *)&lock->slock) & 0xffffUL; + if (owner == ticket) + break; + /* TODO: low-power wait for interrupt while spinning */ + // __asm__ volatile("wfi" ::: "memory"); + } + + /* Ensure all following memory accesses are ordered after acquiring the lock */ + __asm__ volatile("fence rw, rw" ::: "memory"); } void rt_hw_spin_unlock(rt_hw_spinlock_t *lock) { - // /* Ensure memory operations before unlock are visible before owner increment */ - // __asm__ volatile("fence rw, rw" ::: "memory"); + /* Ensure memory operations before unlock are visible before owner increment */ + __asm__ volatile("fence rw, rw" ::: "memory"); - // /* Increment owner (low 16 bits) to hand over lock to next ticket */ - // rt_hw_atomic_add((volatile rt_atomic_t *)&lock->slock, (rt_atomic_t)1); + /* Increment owner (low 16 bits) to hand over lock to next ticket */ + rt_hw_atomic_add((volatile rt_atomic_t *)&lock->slock, (rt_atomic_t)1); - // // TODO: IPI interrupt to wake up other harts waiting for the lock + // TODO: IPI interrupt to wake up other harts waiting for the lock - // /* Make the increment visible to other harts */ - // __asm__ volatile("fence rw, rw" ::: "memory"); + /* Make the increment visible to other harts */ + __asm__ volatile("fence rw, rw" ::: "memory"); } void rt_hw_ipi_send(int ipi_vector, unsigned int cpu_mask) { - + int cpuid = cpu_mask & -cpu_mask; // get the lowest set bit + ipi_vectors[cpuid] |= (uint8_t)ipi_vector; + sbi_send_ipi((const unsigned long *)&cpu_mask); +} + +void rt_hw_ipi_init(void) +{ + int idx = 0, cpuid = rt_cpu_get_id(); + ipi_vectors[cpuid] = 0; + /* init exceptions table */ + for (idx = 0; idx < RT_MAX_IPI; idx++) + { + ipi_desc[idx].handler = RT_NULL; + ipi_desc[idx].param = RT_NULL; + #ifdef RT_USING_INTERRUPT_INFO + rt_snprintf(ipi_desc[idx].name, RT_NAME_MAX - 1, "default"); + ipi_desc[idx].counter = 0; + #endif + } + set_csr(sie, SIP_SSIP); +} + +void rt_hw_ipi_handler_install(int ipi_vector, rt_isr_handler_t ipi_isr_handler) +{ + if(ipi_vector < RT_MAX_IPI) + { + if (ipi_isr_handler != RT_NULL) + { + ipi_desc[ipi_vector].handler = (rt_isr_handler_t)ipi_isr_handler; + ipi_desc[ipi_vector].param = RT_NULL; + } + } +} + +void rt_hw_ipi_handler(void) +{ + rt_uint32_t ipi_vector; + + ipi_vector = ipi_vectors[rt_cpu_get_id()]; + while (ipi_vector) + { + int bitpos = __builtin_ctz(ipi_vector); + ipi_vector &= ~(1 << bitpos); + if (bitpos < RT_MAX_IPI && ipi_desc[bitpos].handler != RT_NULL) + { + /* call the irq service routine */ + ipi_desc[bitpos].handler(bitpos, ipi_desc[bitpos].param); + } + } + ipi_vectors[rt_cpu_get_id()] = 0; + + // clear software interrupt pending bit + clear_csr(sip, SIP_SSIP); } #endif /* RT_USING_SMP */ \ No newline at end of file From 7a770cedfe9a7cdc10164e27837c714908ea4cc9 Mon Sep 17 00:00:00 2001 From: Tm-C-mT <490534897@qq.com> Date: Tue, 28 Oct 2025 20:48:13 +0800 Subject: [PATCH 06/10] [libcpu-riscv]: [surpport SMP]: Add dynamic startup based on core configuration. Add dynamic startup based on core configuration. It should be noted that to pass the SMP Utest, the maximum priority needs to be configured to 256. Signed-off-by: Mengchen Teng --- bsp/qemu-virt64-riscv/SConstruct | 20 ++++++++++++++++++++ bsp/qemu-virt64-riscv/link.lds | 10 ++++------ bsp/qemu-virt64-riscv/link_cpus.lds | 1 + bsp/qemu-virt64-riscv/qemu-dbg.sh | 16 ++++++++++++++-- bsp/qemu-virt64-riscv/run.sh | 16 ++++++++++++++-- 5 files changed, 53 insertions(+), 10 deletions(-) create mode 100644 bsp/qemu-virt64-riscv/link_cpus.lds diff --git a/bsp/qemu-virt64-riscv/SConstruct b/bsp/qemu-virt64-riscv/SConstruct index 3387d0e1490..ae0e3375e11 100644 --- a/bsp/qemu-virt64-riscv/SConstruct +++ b/bsp/qemu-virt64-riscv/SConstruct @@ -38,5 +38,25 @@ if GetDepend('__STACKSIZE__'): stack_size = GetDepend('__STACKSIZE__') stack_lds.write('__STACKSIZE__ = %d;\n' % stack_size) stack_lds.close() +# Obtain the number of harts from rtconfig.h and write +# it into link_cpus.lds for the linker script +try: + with open('rtconfig.h', 'r') as f: + rtconfig_content = f.readlines() +except FileNotFoundError: + cpus_nr = 1 +else: + cpus_nr = 1 # default value + for line in rtconfig_content: + line = line.strip() + if line.startswith('#define') and 'RT_CPUS_NR' in line: + parts = line.split() + if len(parts) >= 3 and parts[2].isdigit(): + cpus_nr = int(parts[2]) + break + +with open('link_cpus.lds', 'w') as cpus_lds: + cpus_lds.write(f'RT_CPUS_NR = {cpus_nr};\n') + # make a building DoBuilding(TARGET, objs) diff --git a/bsp/qemu-virt64-riscv/link.lds b/bsp/qemu-virt64-riscv/link.lds index a76fed4fa30..52010cdf1dc 100644 --- a/bsp/qemu-virt64-riscv/link.lds +++ b/bsp/qemu-virt64-riscv/link.lds @@ -9,6 +9,7 @@ */ INCLUDE "link_stacksize.lds" +INCLUDE "link_cpus.lds" OUTPUT_ARCH( "riscv" ) @@ -121,12 +122,9 @@ SECTIONS { . = ALIGN(64); __stack_start__ = .; - - . += __STACKSIZE__; - __stack_cpu0 = .; - - . += __STACKSIZE__; - __stack_cpu1 = .; + /* Dynamically allocate stack areas according to RT_CPUS_NR */ + . += (__STACKSIZE__ * RT_CPUS_NR); + __stack_end__ = .; } > SRAM .sbss : diff --git a/bsp/qemu-virt64-riscv/link_cpus.lds b/bsp/qemu-virt64-riscv/link_cpus.lds new file mode 100644 index 00000000000..2659b2befb4 --- /dev/null +++ b/bsp/qemu-virt64-riscv/link_cpus.lds @@ -0,0 +1 @@ +RT_CPUS_NR = 8; diff --git a/bsp/qemu-virt64-riscv/qemu-dbg.sh b/bsp/qemu-virt64-riscv/qemu-dbg.sh index a7958ef8e88..69f62e7f6fb 100755 --- a/bsp/qemu-virt64-riscv/qemu-dbg.sh +++ b/bsp/qemu-virt64-riscv/qemu-dbg.sh @@ -1,4 +1,16 @@ -qemu-system-riscv64 -nographic -machine virt -m 256M -kernel rtthread.bin -s -S \ +QEMU_CMD="qemu-system-riscv64 -nographic -machine virt -m 256M -kernel rtthread.bin -s -S" + +if grep -q "#define RT_USING_SMP" ./rtconfig.h 2>/dev/null; then + hart_num=$(grep "RT_CPUS_NR = [0-9]*;" ./link_cpus.lds | awk -F'[=;]' '{gsub(/ /, "", $2); print $2}') + if [ -z "$hart_num" ]; then + hart_num=1 + fi + QEMU_CMD="$QEMU_CMD -smp $hart_num" +fi + +QEMU_CMD="$QEMU_CMD \ -drive if=none,file=sd.bin,format=raw,id=blk0 -device virtio-blk-device,drive=blk0,bus=virtio-mmio-bus.0 \ -netdev user,id=tap0 -device virtio-net-device,netdev=tap0,bus=virtio-mmio-bus.1 \ --device virtio-serial-device -chardev socket,host=127.0.0.1,port=4321,server=on,wait=off,telnet=on,id=console0 -device virtserialport,chardev=console0 +-device virtio-serial-device -chardev socket,host=127.0.0.1,port=4321,server=on,wait=off,telnet=on,id=console0 -device virtserialport,chardev=console0" + +eval $QEMU_CMD \ No newline at end of file diff --git a/bsp/qemu-virt64-riscv/run.sh b/bsp/qemu-virt64-riscv/run.sh index c94600e8b9d..e723369fc7d 100755 --- a/bsp/qemu-virt64-riscv/run.sh +++ b/bsp/qemu-virt64-riscv/run.sh @@ -24,7 +24,19 @@ if [ ! -f $path_image ]; then exit fi -qemu-system-riscv64 -nographic -machine virt -m 256M -kernel rtthread.bin -smp 2 \ +QEMU_CMD="qemu-system-riscv64 -nographic -machine virt -m 256M -kernel rtthread.bin" + +if grep -q "#define RT_USING_SMP" ./rtconfig.h 2>/dev/null; then + hart_num=$(grep "RT_CPUS_NR = [0-9]*;" ./link_cpus.lds | awk -F'[=;]' '{gsub(/ /, "", $2); print $2}') + if [ -z "$hart_num" ]; then + hart_num=1 + fi + QEMU_CMD="$QEMU_CMD -smp $hart_num" +fi + +QEMU_CMD="$QEMU_CMD \ -drive if=none,file=$path_image,format=raw,id=blk0 -device virtio-blk-device,drive=blk0,bus=virtio-mmio-bus.0 \ -netdev user,id=tap0 -device virtio-net-device,netdev=tap0,bus=virtio-mmio-bus.1 \ --device virtio-serial-device -chardev socket,host=127.0.0.1,port=4321,server=on,wait=off,telnet=on,id=console0 -device virtserialport,chardev=console0 +-device virtio-serial-device -chardev socket,host=127.0.0.1,port=4321,server=on,wait=off,telnet=on,id=console0 -device virtserialport,chardev=console0" + +eval $QEMU_CMD \ No newline at end of file From 9cdbc3641b6d5570f76b3f1a95f7a5f6e984afca Mon Sep 17 00:00:00 2001 From: Tm-C-mT <490534897@qq.com> Date: Mon, 3 Nov 2025 20:28:58 +0800 Subject: [PATCH 07/10] [libcpu-riscv]: [surpport SMP]: Fix issues with non-standard formatting Fix issues with non-standard formatting Signed-off-by: Mengchen Teng --- bsp/qemu-virt64-riscv/driver/board.c | 6 +- libcpu/risc-v/common64/atomic_riscv.c | 92 +++++++-------- libcpu/risc-v/common64/cpuport.c | 38 ++++--- libcpu/risc-v/common64/trap.c | 155 +++++++++++++------------- libcpu/risc-v/virt64/interrupt.c | 26 ++--- libcpu/risc-v/virt64/interrupt.h | 6 +- 6 files changed, 169 insertions(+), 154 deletions(-) diff --git a/bsp/qemu-virt64-riscv/driver/board.c b/bsp/qemu-virt64-riscv/driver/board.c index 7d08c06ef69..092244278ed 100644 --- a/bsp/qemu-virt64-riscv/driver/board.c +++ b/bsp/qemu-virt64-riscv/driver/board.c @@ -24,6 +24,10 @@ #include "plic.h" #include "stack.h" +#ifdef RT_USING_SMP +#include "interrupt.h" +#endif /* RT_USING_SMP */ + #ifdef RT_USING_SMART #include "riscv_mmu.h" #include "mmu.h" @@ -88,7 +92,7 @@ void rt_hw_board_init(void) #endif /* RT_USING_CONSOLE */ rt_hw_tick_init(); - + #ifdef RT_USING_SMP /* ipi init */ rt_hw_ipi_init(); diff --git a/libcpu/risc-v/common64/atomic_riscv.c b/libcpu/risc-v/common64/atomic_riscv.c index bc1561f2ee3..08af84bf5f5 100644 --- a/libcpu/risc-v/common64/atomic_riscv.c +++ b/libcpu/risc-v/common64/atomic_riscv.c @@ -14,9 +14,9 @@ rt_atomic_t rt_hw_atomic_exchange(volatile rt_atomic_t *ptr, rt_atomic_t val) { rt_atomic_t result = 0; #if __riscv_xlen == 32 - asm volatile ("amoswap.w %0, %1, (%2)" : "=r"(result) : "r"(val), "r"(ptr) : "memory"); + asm volatile("amoswap.w %0, %1, (%2)" : "=r"(result) : "r"(val), "r"(ptr) : "memory"); #elif __riscv_xlen == 64 - asm volatile ("amoswap.d %0, %1, (%2)" : "=r"(result) : "r"(val), "r"(ptr) : "memory"); + asm volatile("amoswap.d %0, %1, (%2)" : "=r"(result) : "r"(val), "r"(ptr) : "memory"); #endif return result; } @@ -25,9 +25,9 @@ rt_atomic_t rt_hw_atomic_add(volatile rt_atomic_t *ptr, rt_atomic_t val) { rt_atomic_t result = 0; #if __riscv_xlen == 32 - asm volatile ("amoadd.w %0, %1, (%2)" : "=r"(result) : "r"(val), "r"(ptr) : "memory"); + asm volatile("amoadd.w %0, %1, (%2)" : "=r"(result) : "r"(val), "r"(ptr) : "memory"); #elif __riscv_xlen == 64 - asm volatile ("amoadd.d %0, %1, (%2)" : "=r"(result) : "r"(val), "r"(ptr) : "memory"); + asm volatile("amoadd.d %0, %1, (%2)" : "=r"(result) : "r"(val), "r"(ptr) : "memory"); #endif return result; } @@ -37,9 +37,9 @@ rt_atomic_t rt_hw_atomic_sub(volatile rt_atomic_t *ptr, rt_atomic_t val) rt_atomic_t result = 0; val = -val; #if __riscv_xlen == 32 - asm volatile ("amoadd.w %0, %1, (%2)" : "=r"(result) : "r"(val), "r"(ptr) : "memory"); + asm volatile("amoadd.w %0, %1, (%2)" : "=r"(result) : "r"(val), "r"(ptr) : "memory"); #elif __riscv_xlen == 64 - asm volatile ("amoadd.d %0, %1, (%2)" : "=r"(result) : "r"(val), "r"(ptr) : "memory"); + asm volatile("amoadd.d %0, %1, (%2)" : "=r"(result) : "r"(val), "r"(ptr) : "memory"); #endif return result; } @@ -48,9 +48,9 @@ rt_atomic_t rt_hw_atomic_xor(volatile rt_atomic_t *ptr, rt_atomic_t val) { rt_atomic_t result = 0; #if __riscv_xlen == 32 - asm volatile ("amoxor.w %0, %1, (%2)" : "=r"(result) : "r"(val), "r"(ptr) : "memory"); + asm volatile("amoxor.w %0, %1, (%2)" : "=r"(result) : "r"(val), "r"(ptr) : "memory"); #elif __riscv_xlen == 64 - asm volatile ("amoxor.d %0, %1, (%2)" : "=r"(result) : "r"(val), "r"(ptr) : "memory"); + asm volatile("amoxor.d %0, %1, (%2)" : "=r"(result) : "r"(val), "r"(ptr) : "memory"); #endif return result; } @@ -59,9 +59,9 @@ rt_atomic_t rt_hw_atomic_and(volatile rt_atomic_t *ptr, rt_atomic_t val) { rt_atomic_t result = 0; #if __riscv_xlen == 32 - asm volatile ("amoand.w %0, %1, (%2)" : "=r"(result) : "r"(val), "r"(ptr) : "memory"); + asm volatile("amoand.w %0, %1, (%2)" : "=r"(result) : "r"(val), "r"(ptr) : "memory"); #elif __riscv_xlen == 64 - asm volatile ("amoand.d %0, %1, (%2)" : "=r"(result) : "r"(val), "r"(ptr) : "memory"); + asm volatile("amoand.d %0, %1, (%2)" : "=r"(result) : "r"(val), "r"(ptr) : "memory"); #endif return result; } @@ -70,9 +70,9 @@ rt_atomic_t rt_hw_atomic_or(volatile rt_atomic_t *ptr, rt_atomic_t val) { rt_atomic_t result = 0; #if __riscv_xlen == 32 - asm volatile ("amoor.w %0, %1, (%2)" : "=r"(result) : "r"(val), "r"(ptr) : "memory"); + asm volatile("amoor.w %0, %1, (%2)" : "=r"(result) : "r"(val), "r"(ptr) : "memory"); #elif __riscv_xlen == 64 - asm volatile ("amoor.d %0, %1, (%2)" : "=r"(result) : "r"(val), "r"(ptr) : "memory"); + asm volatile("amoor.d %0, %1, (%2)" : "=r"(result) : "r"(val), "r"(ptr) : "memory"); #endif return result; } @@ -81,9 +81,9 @@ rt_atomic_t rt_hw_atomic_load(volatile rt_atomic_t *ptr) { rt_atomic_t result = 0; #if __riscv_xlen == 32 - asm volatile ("amoxor.w %0, x0, (%1)" : "=r"(result) : "r"(ptr) : "memory"); + asm volatile("amoxor.w %0, x0, (%1)" : "=r"(result) : "r"(ptr) : "memory"); #elif __riscv_xlen == 64 - asm volatile ("amoxor.d %0, x0, (%1)" : "=r"(result) : "r"(ptr) : "memory"); + asm volatile("amoxor.d %0, x0, (%1)" : "=r"(result) : "r"(ptr) : "memory"); #endif return result; } @@ -92,9 +92,9 @@ void rt_hw_atomic_store(volatile rt_atomic_t *ptr, rt_atomic_t val) { rt_atomic_t result = 0; #if __riscv_xlen == 32 - asm volatile ("amoswap.w %0, %1, (%2)" : "=r"(result) : "r"(val), "r"(ptr) : "memory"); + asm volatile("amoswap.w %0, %1, (%2)" : "=r"(result) : "r"(val), "r"(ptr) : "memory"); #elif __riscv_xlen == 64 - asm volatile ("amoswap.d %0, %1, (%2)" : "=r"(result) : "r"(val), "r"(ptr) : "memory"); + asm volatile("amoswap.d %0, %1, (%2)" : "=r"(result) : "r"(val), "r"(ptr) : "memory"); #endif } @@ -103,9 +103,9 @@ rt_atomic_t rt_hw_atomic_flag_test_and_set(volatile rt_atomic_t *ptr) rt_atomic_t result = 0; rt_atomic_t temp = 1; #if __riscv_xlen == 32 - asm volatile ("amoor.w %0, %1, (%2)" : "=r"(result) : "r"(temp), "r"(ptr) : "memory"); + asm volatile("amoor.w %0, %1, (%2)" : "=r"(result) : "r"(temp), "r"(ptr) : "memory"); #elif __riscv_xlen == 64 - asm volatile ("amoor.d %0, %1, (%2)" : "=r"(result) : "r"(temp), "r"(ptr) : "memory"); + asm volatile("amoor.d %0, %1, (%2)" : "=r"(result) : "r"(temp), "r"(ptr) : "memory"); #endif return result; } @@ -114,9 +114,9 @@ void rt_hw_atomic_flag_clear(volatile rt_atomic_t *ptr) { rt_atomic_t result = 0; #if __riscv_xlen == 32 - asm volatile ("amoand.w %0, x0, (%1)" : "=r"(result) :"r"(ptr) : "memory"); + asm volatile("amoand.w %0, x0, (%1)" : "=r"(result) : "r"(ptr) : "memory"); #elif __riscv_xlen == 64 - asm volatile ("amoand.d %0, x0, (%1)" : "=r"(result) :"r"(ptr) : "memory"); + asm volatile("amoand.d %0, x0, (%1)" : "=r"(result) : "r"(ptr) : "memory"); #endif } @@ -126,34 +126,34 @@ rt_atomic_t rt_hw_atomic_compare_exchange_strong(volatile rt_atomic_t *ptr, rt_a rt_atomic_t result = 0; #if __riscv_xlen == 32 asm volatile( - " fence iorw, ow\n" - "1: lr.w.aq %[result], (%[ptr])\n" - " bne %[result], %[tmp], 2f\n" - " sc.w.rl %[tmp], %[desired], (%[ptr])\n" - " bnez %[tmp], 1b\n" - " li %[result], 1\n" - " j 3f\n" - " 2:sw %[result], (%[old])\n" - " li %[result], 0\n" - " 3:\n" - : [result]"+r" (result), [tmp]"+r" (tmp), [ptr]"+r" (ptr) - : [desired]"r" (desired), [old]"r"(old) - : "memory"); + " fence iorw, ow\n" + "1: lr.w.aq %[result], (%[ptr])\n" + " bne %[result], %[tmp], 2f\n" + " sc.w.rl %[tmp], %[desired], (%[ptr])\n" + " bnez %[tmp], 1b\n" + " li %[result], 1\n" + " j 3f\n" + " 2:sw %[result], (%[old])\n" + " li %[result], 0\n" + " 3:\n" + : [result] "+r"(result), [tmp] "+r"(tmp), [ptr] "+r"(ptr) + : [desired] "r"(desired), [old] "r"(old) + : "memory"); #elif __riscv_xlen == 64 asm volatile( - " fence iorw, ow\n" - "1: lr.d.aq %[result], (%[ptr])\n" - " bne %[result], %[tmp], 2f\n" - " sc.d.rl %[tmp], %[desired], (%[ptr])\n" - " bnez %[tmp], 1b\n" - " li %[result], 1\n" - " j 3f\n" - " 2:sd %[result], (%[old])\n" - " li %[result], 0\n" - " 3:\n" - : [result]"+r" (result), [tmp]"+r" (tmp), [ptr]"+r" (ptr) - : [desired]"r" (desired), [old]"r"(old) - : "memory"); + " fence iorw, ow\n" + "1: lr.d.aq %[result], (%[ptr])\n" + " bne %[result], %[tmp], 2f\n" + " sc.d.rl %[tmp], %[desired], (%[ptr])\n" + " bnez %[tmp], 1b\n" + " li %[result], 1\n" + " j 3f\n" + " 2:sd %[result], (%[old])\n" + " li %[result], 0\n" + " 3:\n" + : [result] "+r"(result), [tmp] "+r"(tmp), [ptr] "+r"(ptr) + : [desired] "r"(desired), [old] "r"(old) + : "memory"); #endif return result; } diff --git a/libcpu/risc-v/common64/cpuport.c b/libcpu/risc-v/common64/cpuport.c index 9c0b2d69a46..29cbe6e9cee 100644 --- a/libcpu/risc-v/common64/cpuport.c +++ b/libcpu/risc-v/common64/cpuport.c @@ -18,16 +18,21 @@ #include #include +#ifdef RT_USING_SMP +#include "tick.h" +#include "interrupt.h" +#endif /* RT_USING_SMP */ + #ifdef ARCH_RISCV_FPU - #define K_SSTATUS_DEFAULT_BASE (SSTATUS_SPP | SSTATUS_SPIE | SSTATUS_SUM | SSTATUS_FS) +#define K_SSTATUS_DEFAULT_BASE (SSTATUS_SPP | SSTATUS_SPIE | SSTATUS_SUM | SSTATUS_FS) #else - #define K_SSTATUS_DEFAULT_BASE (SSTATUS_SPP | SSTATUS_SPIE | SSTATUS_SUM) +#define K_SSTATUS_DEFAULT_BASE (SSTATUS_SPP | SSTATUS_SPIE | SSTATUS_SUM) #endif #ifdef ARCH_RISCV_VECTOR - #define K_SSTATUS_DEFAULT (K_SSTATUS_DEFAULT_BASE | SSTATUS_VS) +#define K_SSTATUS_DEFAULT (K_SSTATUS_DEFAULT_BASE | SSTATUS_VS) #else - #define K_SSTATUS_DEFAULT K_SSTATUS_DEFAULT_BASE +#define K_SSTATUS_DEFAULT K_SSTATUS_DEFAULT_BASE #endif #ifdef RT_USING_SMART #include @@ -51,8 +56,7 @@ volatile rt_ubase_t rt_thread_switch_interrupt_flag = 0; void *_rt_hw_stack_init(rt_ubase_t *sp, rt_ubase_t ra, rt_ubase_t sstatus) { - rt_hw_switch_frame_t frame = (rt_hw_switch_frame_t) - ((rt_ubase_t)sp - sizeof(struct rt_hw_switch_frame)); + rt_hw_switch_frame_t frame = (rt_hw_switch_frame_t)((rt_ubase_t)sp - sizeof(struct rt_hw_switch_frame)); rt_memset(frame, 0, sizeof(struct rt_hw_switch_frame)); @@ -68,8 +72,8 @@ int rt_hw_cpu_id(void) return 0; #else /* Currently, the hartid is stored in the satp register. */ - uint32_t hart_id; - asm volatile ("csrr %0, satp" : "=r"(hart_id)); + rt_ubase_t hart_id; + asm volatile("csrr %0, satp" : "=r"(hart_id)); return hart_id; #endif /* RT_USING_SMP */ } @@ -126,7 +130,7 @@ void rt_hw_context_switch_interrupt(rt_ubase_t from, rt_ubase_t to, rt_thread_t } #else void rt_hw_context_switch_interrupt(void *context, rt_ubase_t from, rt_ubase_t to, struct rt_thread *to_thread) -{ +{ /* Perform architecture-specific context switch. This call will * restore the target thread context and should not return when a * switch is performed. The caller (scheduler) invoked this function @@ -166,12 +170,16 @@ void rt_hw_secondary_cpu_up(void) rt_uint64_t entry_pa; int hart, ret; - /* translate kernel virtual _start to physical address */ - entry_pa = (rt_uint64_t)&_start;//(rt_uint64_t)rt_kmem_v2p((void *)&_start); + /* translate kernel virtual _start to physical address. + * TODO: Virtual-to-physical translation is not needed here + * because &_start is already a physical address on this platform. + */ + entry_pa = (rt_uint64_t)&_start; for (hart = 0; hart < RT_CPUS_NR; hart++) { - if (hart == boot_hartid) continue; + if (hart == boot_hartid) + continue; ret = sbi_hsm_hart_start((unsigned long)hart, (unsigned long)entry_pa, @@ -188,14 +196,12 @@ void secondary_cpu_entry(void) /* The PLIC peripheral interrupts are currently handled by the boot_hart. */ /* Enable the Supervisor-Timer bit in SIE */ rt_hw_tick_init(); - -#ifdef RT_USING_SMP + /* ipi init */ rt_hw_ipi_init(); -#endif /* RT_USING_SMP */ rt_hw_spin_lock(&_cpus_lock); /* invoke system scheduler start for secondary CPU */ rt_system_scheduler_start(); } -#endif /* RT_USING_SMP */ \ No newline at end of file +#endif /* RT_USING_SMP */ diff --git a/libcpu/risc-v/common64/trap.c b/libcpu/risc-v/common64/trap.c index 52e51ce5322..4cfc9d82804 100644 --- a/libcpu/risc-v/common64/trap.c +++ b/libcpu/risc-v/common64/trap.c @@ -76,44 +76,44 @@ void dump_regs(struct rt_hw_stack_frame *regs) rt_kprintf("\tCurrent Page Table(Physical) = %p\n", __MASKVALUE(satp_v, __MASK(44)) << PAGE_OFFSET_BIT); rt_kprintf("\tCurrent ASID = %p\n", __MASKVALUE(satp_v >> 44, __MASK(16)) - << PAGE_OFFSET_BIT); + << PAGE_OFFSET_BIT); const char *mode_str = "Unknown Address Translation/Protection Mode"; switch (__MASKVALUE(satp_v >> 60, __MASK(4))) { - case 0: - mode_str = "No Address Translation/Protection Mode"; - break; + case 0: + mode_str = "No Address Translation/Protection Mode"; + break; - case 8: - mode_str = "Page-based 39-bit Virtual Addressing Mode"; - break; + case 8: + mode_str = "Page-based 39-bit Virtual Addressing Mode"; + break; - case 9: - mode_str = "Page-based 48-bit Virtual Addressing Mode"; - break; + case 9: + mode_str = "Page-based 48-bit Virtual Addressing Mode"; + break; } rt_kprintf("\tMode = %s\n", mode_str); rt_kprintf("-----------------Dump OK---------------------\n"); } -static const char *Exception_Name[] = {"Instruction Address Misaligned", - "Instruction Access Fault", - "Illegal Instruction", - "Breakpoint", - "Load Address Misaligned", - "Load Access Fault", - "Store/AMO Address Misaligned", - "Store/AMO Access Fault", - "Environment call from U-mode", - "Environment call from S-mode", - "Reserved-10", - "Reserved-11", - "Instruction Page Fault", - "Load Page Fault", - "Reserved-14", - "Store/AMO Page Fault"}; +static const char *Exception_Name[] = { "Instruction Address Misaligned", + "Instruction Access Fault", + "Illegal Instruction", + "Breakpoint", + "Load Address Misaligned", + "Load Access Fault", + "Store/AMO Address Misaligned", + "Store/AMO Access Fault", + "Environment call from U-mode", + "Environment call from S-mode", + "Reserved-10", + "Reserved-11", + "Instruction Page Fault", + "Load Page Fault", + "Reserved-14", + "Store/AMO Page Fault" }; static const char *Interrupt_Name[] = { "User Software Interrupt", @@ -135,15 +135,16 @@ static volatile int nested = 0; #define ENTER_TRAP nested += 1 #define EXIT_TRAP nested -= 1 #define CHECK_NESTED_PANIC(cause, tval, epc, eframe) \ - if (nested != 1) handle_nested_trap_panic(cause, tval, epc, eframe) + if (nested != 1) \ + handle_nested_trap_panic(cause, tval, epc, eframe) #else /* Add trap nesting detection under the SMP architecture. */ -static volatile int nested[RT_CPUS_NR] = {0}; +static volatile int nested[RT_CPUS_NR] = { 0 }; #define ENTER_TRAP nested[rt_hw_cpu_id()] += 1 -#define EXIT_TRAP nested[rt_hw_cpu_id()] -= 1 +#define EXIT_TRAP nested[rt_hw_cpu_id()] -= 1 #define CHECK_NESTED_PANIC(cause, tval, epc, eframe) \ - if (nested[rt_hw_cpu_id()] != 1) \ - handle_nested_trap_panic(cause, tval, epc, eframe) + if (nested[rt_hw_cpu_id()] != 1) \ + handle_nested_trap_panic(cause, tval, epc, eframe) #endif /* RT_USING_SMP */ static const char *get_exception_msg(int id) @@ -173,44 +174,44 @@ void handle_user(rt_ubase_t scause, rt_ubase_t stval, rt_ubase_t sepc, enum rt_mm_fault_type fault_type; switch (id) { - case EP_LOAD_PAGE_FAULT: - fault_op = MM_FAULT_OP_READ; - fault_type = MM_FAULT_TYPE_GENERIC_MMU; - break; - case EP_LOAD_ACCESS_FAULT: - fault_op = MM_FAULT_OP_READ; - fault_type = MM_FAULT_TYPE_BUS_ERROR; - break; - case EP_LOAD_ADDRESS_MISALIGNED: - fault_op = MM_FAULT_OP_READ; - fault_type = MM_FAULT_TYPE_BUS_ERROR; - break; - case EP_STORE_PAGE_FAULT: - fault_op = MM_FAULT_OP_WRITE; - fault_type = MM_FAULT_TYPE_GENERIC_MMU; - break; - case EP_STORE_ACCESS_FAULT: - fault_op = MM_FAULT_OP_WRITE; - fault_type = MM_FAULT_TYPE_BUS_ERROR; - break; - case EP_STORE_ADDRESS_MISALIGNED: - fault_op = MM_FAULT_OP_WRITE; - fault_type = MM_FAULT_TYPE_BUS_ERROR; - break; - case EP_INSTRUCTION_PAGE_FAULT: - fault_op = MM_FAULT_OP_EXECUTE; - fault_type = MM_FAULT_TYPE_GENERIC_MMU; - break; - case EP_INSTRUCTION_ACCESS_FAULT: - fault_op = MM_FAULT_OP_EXECUTE; - fault_type = MM_FAULT_TYPE_BUS_ERROR; - break; - case EP_INSTRUCTION_ADDRESS_MISALIGNED: - fault_op = MM_FAULT_OP_EXECUTE; - fault_type = MM_FAULT_TYPE_BUS_ERROR; - break; - default: - fault_op = 0; + case EP_LOAD_PAGE_FAULT: + fault_op = MM_FAULT_OP_READ; + fault_type = MM_FAULT_TYPE_GENERIC_MMU; + break; + case EP_LOAD_ACCESS_FAULT: + fault_op = MM_FAULT_OP_READ; + fault_type = MM_FAULT_TYPE_BUS_ERROR; + break; + case EP_LOAD_ADDRESS_MISALIGNED: + fault_op = MM_FAULT_OP_READ; + fault_type = MM_FAULT_TYPE_BUS_ERROR; + break; + case EP_STORE_PAGE_FAULT: + fault_op = MM_FAULT_OP_WRITE; + fault_type = MM_FAULT_TYPE_GENERIC_MMU; + break; + case EP_STORE_ACCESS_FAULT: + fault_op = MM_FAULT_OP_WRITE; + fault_type = MM_FAULT_TYPE_BUS_ERROR; + break; + case EP_STORE_ADDRESS_MISALIGNED: + fault_op = MM_FAULT_OP_WRITE; + fault_type = MM_FAULT_TYPE_BUS_ERROR; + break; + case EP_INSTRUCTION_PAGE_FAULT: + fault_op = MM_FAULT_OP_EXECUTE; + fault_type = MM_FAULT_TYPE_GENERIC_MMU; + break; + case EP_INSTRUCTION_ACCESS_FAULT: + fault_op = MM_FAULT_OP_EXECUTE; + fault_type = MM_FAULT_TYPE_BUS_ERROR; + break; + case EP_INSTRUCTION_ADDRESS_MISALIGNED: + fault_op = MM_FAULT_OP_EXECUTE; + fault_type = MM_FAULT_TYPE_BUS_ERROR; + break; + default: + fault_op = 0; } if (fault_op) @@ -236,7 +237,7 @@ void handle_user(rt_ubase_t scause, rt_ubase_t stval, rt_ubase_t sepc, dump_regs(sp); rt_thread_t cur_thr = rt_thread_self(); - struct rt_hw_backtrace_frame frame = {.fp = sp->s0_fp, .pc = sepc}; + struct rt_hw_backtrace_frame frame = { .fp = sp->s0_fp, .pc = sepc }; rt_kprintf("fp = %p\n", frame.fp); lwp_backtrace_frame(cur_thr, &frame); @@ -268,12 +269,12 @@ static int illegal_inst_recoverable(rt_ubase_t stval, switch (opcode) { - case 0x57: // V - case 0x27: // scalar FLOAT - case 0x07: - case 0x73: // CSR - flag = 1; - break; + case 0x57: // V + case 0x27: // scalar FLOAT + case 0x07: + case 0x73: // CSR + flag = 1; + break; } if (flag) @@ -381,7 +382,7 @@ void handle_trap(rt_ubase_t scause, rt_ubase_t stval, rt_ubase_t sepc, rt_kprintf("current thread: %s\n", cur_thr->parent.name); rt_kprintf("--------------Backtrace--------------\n"); - struct rt_hw_backtrace_frame frame = {.fp = sp->s0_fp, .pc = sepc}; + struct rt_hw_backtrace_frame frame = { .fp = sp->s0_fp, .pc = sepc }; #ifdef RT_USING_SMART if (!(sp->sstatus & 0x100)) diff --git a/libcpu/risc-v/virt64/interrupt.c b/libcpu/risc-v/virt64/interrupt.c index 30c99110e05..d1fc22f3fdf 100644 --- a/libcpu/risc-v/virt64/interrupt.c +++ b/libcpu/risc-v/virt64/interrupt.c @@ -16,10 +16,12 @@ #include "interrupt.h" struct rt_irq_desc irq_desc[MAX_HANDLERS]; + #ifdef RT_USING_SMP +#include "sbi.h" struct rt_irq_desc ipi_desc[RT_MAX_IPI]; -uint8_t ipi_vectors[RT_CPUS_NR] = {0}; -#endif +uint8_t ipi_vectors[RT_CPUS_NR] = { 0 }; +#endif /* RT_USING_SMP */ static rt_isr_handler_t rt_hw_interrupt_handle(rt_uint32_t vector, void *param) { @@ -57,11 +59,11 @@ void rt_hw_interrupt_umask(int vector) * @param old_handler the old interrupt service routine */ rt_isr_handler_t rt_hw_interrupt_install(int vector, rt_isr_handler_t handler, - void *param, const char *name) + void *param, const char *name) { rt_isr_handler_t old_handler = RT_NULL; - if(vector < MAX_HANDLERS) + if (vector < MAX_HANDLERS) { old_handler = irq_desc[vector].handler; if (handler != RT_NULL) @@ -118,7 +120,6 @@ rt_bool_t rt_hw_interrupt_is_disabled(void) void rt_hw_spin_lock_init(rt_hw_spinlock_t *_lock) { - union rt_hw_spinlock_t *lock = (void *)_lock; _lock->slock = 0; } @@ -146,7 +147,6 @@ void rt_hw_spin_lock(rt_hw_spinlock_t *lock) if (owner == ticket) break; /* TODO: low-power wait for interrupt while spinning */ - // __asm__ volatile("wfi" ::: "memory"); } /* Ensure all following memory accesses are ordered after acquiring the lock */ @@ -169,7 +169,7 @@ void rt_hw_spin_unlock(rt_hw_spinlock_t *lock) void rt_hw_ipi_send(int ipi_vector, unsigned int cpu_mask) { - int cpuid = cpu_mask & -cpu_mask; // get the lowest set bit + int cpuid = __builtin_ctz(cpu_mask); // get the bit position of the lowest set bit ipi_vectors[cpuid] |= (uint8_t)ipi_vector; sbi_send_ipi((const unsigned long *)&cpu_mask); } @@ -183,17 +183,17 @@ void rt_hw_ipi_init(void) { ipi_desc[idx].handler = RT_NULL; ipi_desc[idx].param = RT_NULL; - #ifdef RT_USING_INTERRUPT_INFO - rt_snprintf(ipi_desc[idx].name, RT_NAME_MAX - 1, "default"); - ipi_desc[idx].counter = 0; - #endif +#ifdef RT_USING_INTERRUPT_INFO + rt_snprintf(ipi_desc[idx].name, RT_NAME_MAX - 1, "default"); + ipi_desc[idx].counter = 0; +#endif } set_csr(sie, SIP_SSIP); } void rt_hw_ipi_handler_install(int ipi_vector, rt_isr_handler_t ipi_isr_handler) { - if(ipi_vector < RT_MAX_IPI) + if (ipi_vector < RT_MAX_IPI) { if (ipi_isr_handler != RT_NULL) { @@ -223,4 +223,4 @@ void rt_hw_ipi_handler(void) // clear software interrupt pending bit clear_csr(sip, SIP_SSIP); } -#endif /* RT_USING_SMP */ \ No newline at end of file +#endif /* RT_USING_SMP */ diff --git a/libcpu/risc-v/virt64/interrupt.h b/libcpu/risc-v/virt64/interrupt.h index 5b7ff57a476..9b0acf0cd9a 100644 --- a/libcpu/risc-v/virt64/interrupt.h +++ b/libcpu/risc-v/virt64/interrupt.h @@ -42,5 +42,9 @@ void rt_hw_interrupt_init(void); void rt_hw_interrupt_mask(int vector); rt_isr_handler_t rt_hw_interrupt_install(int vector, rt_isr_handler_t handler, void *param, const char *name); void handle_trap(rt_ubase_t xcause, rt_ubase_t xtval, rt_ubase_t xepc, struct rt_hw_stack_frame *sp); - +#ifdef RT_USING_SMP +void rt_hw_ipi_handler(void); +void rt_hw_ipi_init(void); +void rt_hw_ipi_send(int ipi_vector, unsigned int cpu_mask); +#endif /* RT_USING_SMP */ #endif From bf917a15c3d4e24ad26495b45f44aef2926343bd Mon Sep 17 00:00:00 2001 From: Tm-C-mT <490534897@qq.com> Date: Fri, 7 Nov 2025 09:21:53 +0800 Subject: [PATCH 08/10] [libcpu-riscv]: [surpport SMP]: Fix the overflow issue of the spinlock's owner field The owner field is of type unsigned short. When 0xffff is incremented by 1, it will wrap around to 0 and carry over, causing errors in the ticket field. Solution: When the owner field is 0xffff, simply set it to 0 manually. Signed-off-by: Mengchen Teng --- libcpu/risc-v/virt64/interrupt.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/libcpu/risc-v/virt64/interrupt.c b/libcpu/risc-v/virt64/interrupt.c index d1fc22f3fdf..6ada8f24f88 100644 --- a/libcpu/risc-v/virt64/interrupt.c +++ b/libcpu/risc-v/virt64/interrupt.c @@ -158,8 +158,20 @@ void rt_hw_spin_unlock(rt_hw_spinlock_t *lock) /* Ensure memory operations before unlock are visible before owner increment */ __asm__ volatile("fence rw, rw" ::: "memory"); - /* Increment owner (low 16 bits) to hand over lock to next ticket */ - rt_hw_atomic_add((volatile rt_atomic_t *)&lock->slock, (rt_atomic_t)1); + /* Increment owner (low 16 bits) to hand over lock to next ticket. + * Use an atomic load of the combined slock word and compare the low + * 16-bit owner field.If owner would overflow (0xffff), clear the owner field + * atomically by ANDing with 0xffff0000; otherwise increment owner by 1. + */ + if ((rt_hw_atomic_load((volatile rt_atomic_t *)&lock->slock) & (rt_atomic_t)0xffffUL) == (rt_atomic_t)0xffffUL) + { + /* Atomic clear owner (low 16 bits) when it overflows. Keep next ticket field. */ + rt_hw_atomic_and((volatile rt_atomic_t *)&lock->slock, (rt_atomic_t)0xffff0000UL); + } + else + { + rt_hw_atomic_add((volatile rt_atomic_t *)&lock->slock, (rt_atomic_t)1); + } // TODO: IPI interrupt to wake up other harts waiting for the lock From a59652d5a1444bddd5ab3483e363d63bd762784d Mon Sep 17 00:00:00 2001 From: Tm-C-mT <490534897@qq.com> Date: Fri, 14 Nov 2025 21:20:56 +0800 Subject: [PATCH 09/10] [libcpu-riscv]: [surpport SMP]: Support the SMP feature under the Smart framework/environment By defining a custom .percpu section and controlling the MMU mapping page table entries, different cores can access different physical memory when accessing the same virtual address (percpu_hartid), thereby storing their respective hartids. If the satp register is not used, the hartid is stored in the satp register. Signed-off-by: Mengchen Teng --- bsp/qemu-virt64-riscv/driver/board.h | 4 +- bsp/qemu-virt64-riscv/link.lds | 18 ++ bsp/qemu-virt64-riscv/link_smart.lds | 28 ++- libcpu/risc-v/common64/context_gcc.S | 10 +- libcpu/risc-v/common64/cpuport.c | 64 ++++- libcpu/risc-v/common64/cpuport.h | 4 + libcpu/risc-v/common64/encoding.h | 2 + libcpu/risc-v/common64/mmu.c | 363 ++++++++++++++++++++++----- libcpu/risc-v/common64/mmu.h | 9 + libcpu/risc-v/common64/startup_gcc.S | 77 +++++- libcpu/risc-v/virt64/interrupt.h | 3 + 11 files changed, 497 insertions(+), 85 deletions(-) diff --git a/bsp/qemu-virt64-riscv/driver/board.h b/bsp/qemu-virt64-riscv/driver/board.h index 433b1d8f53b..29b8716158b 100644 --- a/bsp/qemu-virt64-riscv/driver/board.h +++ b/bsp/qemu-virt64-riscv/driver/board.h @@ -15,14 +15,14 @@ extern unsigned int __bss_start; extern unsigned int __bss_end; - +extern unsigned int _end; #ifndef RT_USING_SMART #define KERNEL_VADDR_START 0x0 #endif #define VIRT64_SBI_MEMSZ (0x200000) -#define RT_HW_HEAP_BEGIN ((void *)&__bss_end) +#define RT_HW_HEAP_BEGIN ((void *)&_end) #define RT_HW_HEAP_END ((void *)(RT_HW_HEAP_BEGIN + 64 * 1024 * 1024)) #define RT_HW_PAGE_START RT_HW_HEAP_END #define RT_HW_PAGE_END ((void *)(KERNEL_VADDR_START + (256 * 1024 * 1024 - VIRT64_SBI_MEMSZ))) diff --git a/bsp/qemu-virt64-riscv/link.lds b/bsp/qemu-virt64-riscv/link.lds index 52010cdf1dc..3040665e648 100644 --- a/bsp/qemu-virt64-riscv/link.lds +++ b/bsp/qemu-virt64-riscv/link.lds @@ -145,6 +145,24 @@ SECTIONS __bss_end = .; } > SRAM + .percpu (NOLOAD) : + { + /* Align for MMU early map */ + . = ALIGN(1<<(12+9)); + PROVIDE(__percpu_start = .); + + *(.percpu) + + /* Align for MMU early map */ + . = ALIGN(1<<(12+9)); + + PROVIDE(__percpu_end = .); + + /* Clone the area */ + . = __percpu_end + (__percpu_end - __percpu_start) * (RT_CPUS_NR - 1); + PROVIDE(__percpu_real_end = .); + } > SRAM + _end = .; /* Stabs debugging sections. */ diff --git a/bsp/qemu-virt64-riscv/link_smart.lds b/bsp/qemu-virt64-riscv/link_smart.lds index ddf596630ed..ff852ab2abb 100644 --- a/bsp/qemu-virt64-riscv/link_smart.lds +++ b/bsp/qemu-virt64-riscv/link_smart.lds @@ -9,6 +9,7 @@ */ INCLUDE "link_stacksize.lds" +INCLUDE "link_cpus.lds" OUTPUT_ARCH( "riscv" ) @@ -122,12 +123,9 @@ SECTIONS { . = ALIGN(64); __stack_start__ = .; - - . += __STACKSIZE__; - __stack_cpu0 = .; - - . += __STACKSIZE__; - __stack_cpu1 = .; + /* Dynamically allocate stack areas according to RT_CPUS_NR */ + . += (__STACKSIZE__ * RT_CPUS_NR); + __stack_end__ = .; } > SRAM .sbss : @@ -147,6 +145,24 @@ SECTIONS *(COMMON) __bss_end = .; } > SRAM + + .percpu (NOLOAD) : + { + /* Align for MMU early map */ + . = ALIGN(1<<(12+9)); + PROVIDE(__percpu_start = .); + + *(.percpu) + + /* Align for MMU early map */ + . = ALIGN(1<<(12+9)); + + PROVIDE(__percpu_end = .); + + /* Clone the area */ + . = __percpu_end + (__percpu_end - __percpu_start) * (RT_CPUS_NR - 1); + PROVIDE(__percpu_real_end = .); + } > SRAM _end = .; diff --git a/libcpu/risc-v/common64/context_gcc.S b/libcpu/risc-v/common64/context_gcc.S index 6d667362983..51cb79ee9ca 100644 --- a/libcpu/risc-v/common64/context_gcc.S +++ b/libcpu/risc-v/common64/context_gcc.S @@ -93,9 +93,12 @@ rt_hw_context_switch_to: call rt_thread_self mv s1, a0 +#ifndef RT_USING_SMP + //if enable RT_USING_SMP, it will finished by rt_cpus_lock_status_restore. #ifdef RT_USING_SMART - call lwp_aspace_switch + call lwp_aspace_switch #endif +#endif RESTORE_CONTEXT sret @@ -134,9 +137,12 @@ rt_hw_context_switch: call rt_thread_self mv s1, a0 +#ifndef RT_USING_SMP + // if enable RT_USING_SMP, it will finished by rt_cpus_lock_status_restore. #ifdef RT_USING_SMART - call lwp_aspace_switch + call lwp_aspace_switch #endif +#endif RESTORE_CONTEXT sret diff --git a/libcpu/risc-v/common64/cpuport.c b/libcpu/risc-v/common64/cpuport.c index 29cbe6e9cee..f9969cf7d8a 100644 --- a/libcpu/risc-v/common64/cpuport.c +++ b/libcpu/risc-v/common64/cpuport.c @@ -18,6 +18,10 @@ #include #include +#ifdef ARCH_MM_MMU +#include "mmu.h" +#endif + #ifdef RT_USING_SMP #include "tick.h" #include "interrupt.h" @@ -54,6 +58,10 @@ volatile rt_ubase_t rt_interrupt_to_thread = 0; */ volatile rt_ubase_t rt_thread_switch_interrupt_flag = 0; +#ifdef ARCH_MM_MMU +static rt_ubase_t *percpu_hartid; +#endif + void *_rt_hw_stack_init(rt_ubase_t *sp, rt_ubase_t ra, rt_ubase_t sstatus) { rt_hw_switch_frame_t frame = (rt_hw_switch_frame_t)((rt_ubase_t)sp - sizeof(struct rt_hw_switch_frame)); @@ -71,10 +79,19 @@ int rt_hw_cpu_id(void) #ifndef RT_USING_SMP return 0; #else - /* Currently, the hartid is stored in the satp register. */ - rt_ubase_t hart_id; - asm volatile("csrr %0, satp" : "=r"(hart_id)); - return hart_id; + if (rt_kmem_pvoff() != 0) + { + return *percpu_hartid; + } + else + { + // if not enable MMU or pvoff==0, read hartid from satp register + rt_ubase_t hartid; + asm volatile("csrr %0, satp" : "=r"(hartid)); + return hartid; + } + + #endif /* RT_USING_SMP */ } @@ -170,11 +187,19 @@ void rt_hw_secondary_cpu_up(void) rt_uint64_t entry_pa; int hart, ret; - /* translate kernel virtual _start to physical address. - * TODO: Virtual-to-physical translation is not needed here - * because &_start is already a physical address on this platform. - */ + /* translate kernel virtual _start to physical address. */ +#ifdef ARCH_MM_MMU + if (rt_kmem_pvoff() != 0) + { + entry_pa = (rt_uint64_t)rt_kmem_v2p(&_start); + } + else + { + entry_pa = (rt_uint64_t)&_start; + } +#else entry_pa = (rt_uint64_t)&_start; +#endif /* ARCH_MM_MMU */ for (hart = 0; hart < RT_CPUS_NR; hart++) { @@ -191,8 +216,31 @@ void rt_hw_secondary_cpu_up(void) } } +#ifdef ARCH_MM_MMU +void rt_hw_percpu_hartid_init(rt_ubase_t *percpu_ptr, rt_ubase_t hartid) +{ + rt_ubase_t *percpu_hartid_paddr; + rt_size_t percpu_size = (rt_size_t)((rt_ubase_t)&__percpu_end - (rt_ubase_t)&__percpu_start); + + percpu_hartid = percpu_ptr; + + // from virtual address to physical address + percpu_ptr = (rt_ubase_t *)((rt_ubase_t)percpu_ptr + (rt_ubase_t)rt_kmem_pvoff()); + percpu_hartid_paddr = percpu_ptr; + + + /* Save to the real area */ + *(rt_ubase_t *)((void *)percpu_hartid_paddr + hartid * percpu_size) = hartid; +} +#endif /* ARCH_MM_MMU */ + void secondary_cpu_entry(void) { + +#ifdef RT_USING_SMART + /* switch to kernel address space */ + rt_hw_aspace_switch(&rt_kernel_space); +#endif /* The PLIC peripheral interrupts are currently handled by the boot_hart. */ /* Enable the Supervisor-Timer bit in SIE */ rt_hw_tick_init(); diff --git a/libcpu/risc-v/common64/cpuport.h b/libcpu/risc-v/common64/cpuport.h index ef241ae2600..9830b0dc266 100644 --- a/libcpu/risc-v/common64/cpuport.h +++ b/libcpu/risc-v/common64/cpuport.h @@ -43,6 +43,10 @@ rt_inline void rt_hw_isb(void) __asm__ volatile(OPC_FENCE_I:::"memory"); } +#ifdef ARCH_MM_MMU +void rt_hw_percpu_hartid_init(rt_ubase_t *percpu_ptr, rt_ubase_t hartid); +#endif + #endif #endif diff --git a/libcpu/risc-v/common64/encoding.h b/libcpu/risc-v/common64/encoding.h index 7e906fedd83..aed6bd8ca29 100644 --- a/libcpu/risc-v/common64/encoding.h +++ b/libcpu/risc-v/common64/encoding.h @@ -176,6 +176,8 @@ #define PTE_A 0x040 // Accessed #define PTE_D 0x080 // Dirty #define PTE_SOFT 0x300 // Reserved for Software +#define PTE_ATTR_RW (PTE_R | PTE_W) +#define PTE_ATTR_RWX (PTE_ATTR_RW | PTE_X) #define PTE_PPN_SHIFT 10 diff --git a/libcpu/risc-v/common64/mmu.c b/libcpu/risc-v/common64/mmu.c index 78f06be9be2..50b521dfc34 100644 --- a/libcpu/risc-v/common64/mmu.c +++ b/libcpu/risc-v/common64/mmu.c @@ -38,10 +38,21 @@ static size_t _unmap_area(struct rt_aspace *aspace, void *v_addr); +// Define the structure of early page table +struct page_table +{ + unsigned long page[ARCH_PAGE_SIZE / sizeof(unsigned long)]; +}; +static struct page_table *__init_page_array; + +#ifndef RT_USING_SMP static void *current_mmu_table = RT_NULL; +#else +static void *current_mmu_table[RT_CPUS_NR] = { RT_NULL }; +#endif /* RT_USING_SMP */ volatile __attribute__((aligned(4 * 1024))) -rt_ubase_t MMUTable[__SIZE(VPN2_BIT)]; +rt_ubase_t MMUTable[__SIZE(VPN2_BIT) * RT_CPUS_NR]; /** * @brief Switch the current address space to the specified one. @@ -69,8 +80,15 @@ void rt_hw_aspace_switch(rt_aspace_t aspace) #else /* !ARCH_USING_ASID */ void rt_hw_aspace_switch(rt_aspace_t aspace) { - uintptr_t page_table = (uintptr_t)rt_kmem_v2p(aspace->page_table); + // It is necessary to find the MMU page table specific to each core. + uint32_t hartid = rt_cpu_get_id(); + uintptr_t ptr = (uintptr_t)aspace->page_table + (uintptr_t)(hartid * ARCH_PAGE_SIZE); + uintptr_t page_table = (uintptr_t)rt_kmem_v2p((void *)ptr); +#ifndef RT_USING_SMP current_mmu_table = aspace->page_table; +#else + current_mmu_table[rt_hw_cpu_id()] = (void *)ptr; +#endif write_csr(satp, (((size_t)SATP_MODE) << SATP_MODE_OFFSET) | ((rt_ubase_t)page_table >> PAGE_OFFSET_BIT)); @@ -85,7 +103,11 @@ void rt_hw_asid_init(void) /* get current page table. */ void *rt_hw_mmu_tbl_get() { +#ifndef RT_USING_SMP return current_mmu_table; +#else + return current_mmu_table[rt_hw_cpu_id()]; +#endif /* RT_USING_SMP */ } /* Map a single virtual address page to a physical address page in the page table. */ @@ -98,66 +120,153 @@ static int _map_one_page(struct rt_aspace *aspace, void *va, void *pa, l1_off = GET_L1((size_t)va); l2_off = GET_L2((size_t)va); l3_off = GET_L3((size_t)va); + // create map for each hart + for (int hartid = 0; hartid < RT_CPUS_NR; hartid++) + { + mmu_l1 = (rt_ubase_t *)((rt_ubase_t)aspace->page_table + (rt_ubase_t)(hartid * ARCH_PAGE_SIZE)) + l1_off; - mmu_l1 = ((rt_ubase_t *)aspace->page_table) + l1_off; + if (PTE_USED(*mmu_l1)) + { + mmu_l2 = (rt_ubase_t *)PPN_TO_VPN(GET_PADDR(*mmu_l1), PV_OFFSET); + } + else + { + mmu_l2 = (rt_ubase_t *)rt_pages_alloc(0); - if (PTE_USED(*mmu_l1)) - { - mmu_l2 = (rt_ubase_t *)PPN_TO_VPN(GET_PADDR(*mmu_l1), PV_OFFSET); - } - else - { - mmu_l2 = (rt_ubase_t *)rt_pages_alloc(0); + if (mmu_l2) + { + rt_memset(mmu_l2, 0, PAGE_SIZE); + rt_hw_cpu_dcache_clean(mmu_l2, PAGE_SIZE); + *mmu_l1 = COMBINEPTE((rt_ubase_t)VPN_TO_PPN(mmu_l2, PV_OFFSET), + PAGE_DEFAULT_ATTR_NEXT); + rt_hw_cpu_dcache_clean(mmu_l1, sizeof(*mmu_l1)); + } + else + { + return -1; + } + } - if (mmu_l2) + if (PTE_USED(*(mmu_l2 + l2_off))) { - rt_memset(mmu_l2, 0, PAGE_SIZE); - rt_hw_cpu_dcache_clean(mmu_l2, PAGE_SIZE); - *mmu_l1 = COMBINEPTE((rt_ubase_t)VPN_TO_PPN(mmu_l2, PV_OFFSET), - PAGE_DEFAULT_ATTR_NEXT); - rt_hw_cpu_dcache_clean(mmu_l1, sizeof(*mmu_l1)); + RT_ASSERT(!PAGE_IS_LEAF(*(mmu_l2 + l2_off))); + mmu_l3 = + (rt_ubase_t *)PPN_TO_VPN(GET_PADDR(*(mmu_l2 + l2_off)), PV_OFFSET); } else { - return -1; + mmu_l3 = (rt_ubase_t *)rt_pages_alloc(0); + + if (mmu_l3) + { + rt_memset(mmu_l3, 0, PAGE_SIZE); + rt_hw_cpu_dcache_clean(mmu_l3, PAGE_SIZE); + *(mmu_l2 + l2_off) = + COMBINEPTE((rt_ubase_t)VPN_TO_PPN(mmu_l3, PV_OFFSET), + PAGE_DEFAULT_ATTR_NEXT); + rt_hw_cpu_dcache_clean(mmu_l2, sizeof(*mmu_l2)); + /* declares a reference to parent page table */ + rt_page_ref_inc((void *)mmu_l2, 0); + } + else + { + return -1; + } } - } - if (PTE_USED(*(mmu_l2 + l2_off))) - { - RT_ASSERT(!PAGE_IS_LEAF(*(mmu_l2 + l2_off))); - mmu_l3 = - (rt_ubase_t *)PPN_TO_VPN(GET_PADDR(*(mmu_l2 + l2_off)), PV_OFFSET); + RT_ASSERT(!PTE_USED(*(mmu_l3 + l3_off))); + /* declares a reference to parent page table */ + rt_page_ref_inc((void *)mmu_l3, 0); + *(mmu_l3 + l3_off) = COMBINEPTE((rt_ubase_t)pa, attr); + rt_hw_cpu_dcache_clean(mmu_l3 + l3_off, sizeof(*(mmu_l3 + l3_off))); } - else - { - mmu_l3 = (rt_ubase_t *)rt_pages_alloc(0); + + return 0; +} + +#if defined(RT_USING_SMP) && defined(ARCH_MM_MMU) +static int _map_percpu_area(rt_ubase_t *table, void *va, void *pa, int cpu_id) +{ + unsigned long page; + rt_ubase_t off, level_shift; + + level_shift = PPN2_SHIFT; - if (mmu_l3) + // map pages - 4KB + for (int level = 0; level < 2; ++level) + { + off = ((rt_ubase_t)va >> level_shift) & VPN_MASK; + if (table[off] & PTE_V) { - rt_memset(mmu_l3, 0, PAGE_SIZE); - rt_hw_cpu_dcache_clean(mmu_l3, PAGE_SIZE); - *(mmu_l2 + l2_off) = - COMBINEPTE((rt_ubase_t)VPN_TO_PPN(mmu_l3, PV_OFFSET), - PAGE_DEFAULT_ATTR_NEXT); - rt_hw_cpu_dcache_clean(mmu_l2, sizeof(*mmu_l2)); - /* declares a reference to parent page table */ - rt_page_ref_inc((void *)mmu_l2, 0); + /* Step into the next level page table */ + table = (unsigned long *)((table[off] >> PTE_BITS) << ARCH_PAGE_SHIFT); + level_shift -= VPN_BITS; + continue; } - else + if (!(page = get_free_page())) { - return -1; + return MMU_MAP_ERROR_NOPAGE; } + rt_memset((void *)page, 0, ARCH_PAGE_SIZE); + table[off] = ((page >> ARCH_PAGE_SHIFT) << PTE_BITS) | PTE_V; + + rt_hw_cpu_dcache_ops(RT_HW_CACHE_FLUSH, table + off, sizeof(void *)); + + /* Step into the next level page table */ + table = (unsigned long *)((table[off] >> PTE_BITS) << ARCH_PAGE_SHIFT); + + level_shift -= VPN_BITS; } - RT_ASSERT(!PTE_USED(*(mmu_l3 + l3_off))); - /* declares a reference to parent page table */ - rt_page_ref_inc((void *)mmu_l3, 0); - *(mmu_l3 + l3_off) = COMBINEPTE((rt_ubase_t)pa, attr); - rt_hw_cpu_dcache_clean(mmu_l3 + l3_off, sizeof(*(mmu_l3 + l3_off))); - return 0; + off = ((rt_ubase_t)va >> level_shift) & VPN_MASK; + table[off] = (((rt_ubase_t)pa >> ARCH_PAGE_SHIFT) << PTE_BITS) | MMU_MAP_K_RWCB; + + rt_hw_cpu_dcache_ops(RT_HW_CACHE_FLUSH, table + off, sizeof(void *)); + + return ARCH_PAGE_SIZE; } +// Ensure that the .percpu section is mapped in the specific address for each core. +static void rt_hw_percpu_mmu_init_check(void) +{ + size_t mapped, size; + void *page_table, *vaddr, *paddr; + static rt_bool_t inited = RT_FALSE; + + if (inited) + { + return; + } + inited = RT_TRUE; + + page_table = rt_kernel_space.page_table; + + for (int hartid = 0; hartid < RT_CPUS_NR; ++hartid) + { + vaddr = &__percpu_start; + paddr = vaddr + rt_kmem_pvoff(); + size = (size_t)((rt_ubase_t)&__percpu_end - (rt_ubase_t)&__percpu_start); + /* Offset to per-CPU partition for current CPU */ + paddr += size * hartid; + + while (size > 0) + { + MM_PGTBL_LOCK(&rt_kernel_space); + mapped = _map_percpu_area(page_table, vaddr, paddr, hartid); + MM_PGTBL_UNLOCK(&rt_kernel_space); + + RT_ASSERT(mapped > 0); + + size -= mapped; + vaddr += mapped; + paddr += mapped; + } + + page_table += ARCH_PAGE_SIZE; + } +} +#endif /* RT_USING_SMP && RT_USING_SMART */ + /** * @brief Maps a virtual address space to a physical address space. * @@ -185,24 +294,35 @@ void *rt_hw_mmu_map(struct rt_aspace *aspace, void *v_addr, void *p_addr, int ret = -1; void *unmap_va = v_addr; size_t npages = size >> ARCH_PAGE_SHIFT; +#if defined(RT_USING_SMP) && defined(ARCH_MM_MMU) + // Map the memory of the .percpu section separately for each core. + rt_hw_percpu_mmu_init_check(); +#endif /* TODO trying with HUGEPAGE here */ while (npages--) { - MM_PGTBL_LOCK(aspace); - ret = _map_one_page(aspace, v_addr, p_addr, attr); - MM_PGTBL_UNLOCK(aspace); - if (ret != 0) +#if defined(RT_USING_SMP) && defined(ARCH_MM_MMU) + // skip mapping .percpu section pages + if (v_addr < (void *)&__percpu_start || + v_addr >= (void *)&__percpu_end) +#endif { - /* error, undo map */ - while (unmap_va != v_addr) + MM_PGTBL_LOCK(aspace); + ret = _map_one_page(aspace, v_addr, p_addr, attr); + MM_PGTBL_UNLOCK(aspace); + if (ret != 0) { - MM_PGTBL_LOCK(aspace); - _unmap_area(aspace, unmap_va); - MM_PGTBL_UNLOCK(aspace); - unmap_va += ARCH_PAGE_SIZE; + /* error, undo map */ + while (unmap_va != v_addr) + { + MM_PGTBL_LOCK(aspace); + _unmap_area(aspace, unmap_va); + MM_PGTBL_UNLOCK(aspace); + unmap_va += ARCH_PAGE_SIZE; + } + break; } - break; } v_addr += ARCH_PAGE_SIZE; p_addr += ARCH_PAGE_SIZE; @@ -216,6 +336,98 @@ void *rt_hw_mmu_map(struct rt_aspace *aspace, void *v_addr, void *p_addr, return NULL; } +#ifdef ARCH_MM_MMU +void set_free_page(void *page_array) +{ + __init_page_array = page_array; +} + +// Early-stage page allocator +unsigned long get_free_page(void) +{ + static unsigned long page_off = 0UL; + + if (page_off < ARCH_PAGE_SIZE / sizeof(unsigned long)) + { + return (unsigned long)(__init_page_array[page_off++].page); + } + + return 0; +} + +#ifdef RT_USING_SMP +// Perform early mapping for the .percpu section +static int rt_hw_mmu_map_percpu_early(rt_ubase_t *tbl, rt_ubase_t va, rt_ubase_t pa) +{ + unsigned long page; + rt_ubase_t off, level_shift; + + level_shift = PPN2_SHIFT; + + // page size 2MB + off = (va >> level_shift) & VPN_MASK; + // Step into the next level page table + tbl = (rt_ubase_t *)((tbl[off] >> PTE_BITS) << ARCH_PAGE_SHIFT); + level_shift -= VPN_BITS; + + off = (va >> level_shift) & VPN_MASK; + tbl[off] = ((pa >> ARCH_PAGE_SHIFT) << PTE_BITS) | MMU_MAP_K_RWCB; + + asm volatile("sfence.vma x0, x0"); + return 0; +} +#endif /* RT_USING_SMP */ + +static int rt_hw_mmu_map_early(rt_ubase_t *tbl, rt_ubase_t va, rt_ubase_t pa, + rt_ubase_t attr) +{ + unsigned long page, *table; + rt_ubase_t off, level_shift; + + if ((va & (L2_PAGE_SIZE - 1)) || (pa & (L2_PAGE_SIZE - 1))) + { + return MMU_MAP_ERROR_VANOTALIGN; + } + + table = tbl; + level_shift = PPN2_SHIFT; + + // page size 2MB + for (int level = 0; level < 1; ++level) + { + off = (va >> level_shift) & VPN_MASK; + + if (!(table[off] & PTE_V)) + { + if (!(page = get_free_page())) + { + return MMU_MAP_ERROR_NOPAGE; + } + + rt_memset((void *)page, 0, ARCH_PAGE_SIZE); + table[off] = ((page >> ARCH_PAGE_SHIFT) << PTE_PPN_SHIFT) | PTE_V; + } + + if ((table[off] & PTE_ATTR_RWX) != 0) + { + /* No a page! */ + return MMU_MAP_ERROR_CONFLICT; + } + + /* Step into the next level page table */ + page = (table[off] >> PTE_BITS) << ARCH_PAGE_SHIFT; + table = (unsigned long *)page; + + level_shift -= VPN_BITS; + } + + off = (va >> level_shift) & VPN_MASK; + table[off] = ((pa >> ARCH_PAGE_SHIFT) << PTE_BITS) | attr; + + return 0; +} +#endif + /* unmap page table entry */ static void _unmap_pte(rt_ubase_t *pentry, rt_ubase_t *lvl_entry[], int level) { @@ -456,7 +668,7 @@ static rt_ubase_t *_query(struct rt_aspace *aspace, void *vaddr, int *level) return RT_NULL; } - mmu_l1 = ((rt_ubase_t *)aspace->page_table) + l1_off; + mmu_l1 = ((rt_ubase_t *)((rt_ubase_t)aspace->page_table + (rt_ubase_t)(rt_hw_cpu_id() * ARCH_PAGE_SIZE))) + l1_off; if (PTE_USED(*mmu_l1)) { @@ -649,7 +861,9 @@ void rt_hw_mmu_setup(rt_aspace_t aspace, struct mem_desc *mdesc, int desc_nr) #define SATP_BASE ((rt_ubase_t)SATP_MODE << SATP_MODE_OFFSET) extern unsigned int __bss_end; - +#if defined(RT_USING_SMP) && defined(ARCH_MM_MMU) +extern unsigned int __percpu_real_end; +#endif /** * @brief Early memory setup function for hardware initialization. * @@ -662,12 +876,12 @@ extern unsigned int __bss_end; * before the memory management system is fully operational. * Here the identity mapping is implemented by a 1-stage page table, whose page size is 1GB. */ -void rt_hw_mem_setup_early(void) +void rt_hw_mem_setup_early(void *pgtbl, rt_uint64_t hartid) { - rt_ubase_t pv_off; + rt_ubase_t pv_off, size; rt_ubase_t ps = 0x0; rt_ubase_t vs = 0x0; - rt_ubase_t *early_pgtbl = (rt_ubase_t *)(((size_t)&__bss_end + 4095) & ~0xfff); + rt_ubase_t *early_pgtbl = (rt_ubase_t *)(pgtbl + hartid * ARCH_PAGE_SIZE); /* calculate pv_offset */ void *symb_pc; @@ -705,14 +919,39 @@ void rt_hw_mem_setup_early(void) vs = ps - pv_off; /* relocate region */ - rt_ubase_t vs_idx = GET_L1(vs); - rt_ubase_t ve_idx = GET_L1(vs + 0x80000000); - for (size_t i = vs_idx; i < ve_idx; i++) + rt_ubase_t ve = vs + 0x80000000; +#if defined(RT_USING_SMP) && defined(ARCH_MM_MMU) + while (vs < ve) + { + rt_hw_mmu_map_early(early_pgtbl, vs, ps, MMU_MAP_EARLY); + vs += L2_PAGE_SIZE; + ps += L2_PAGE_SIZE; + } +#else + for (int i = GET_L1(vs); i < GET_L1(ve); i++) { early_pgtbl[i] = COMBINEPTE(ps, MMU_MAP_EARLY); ps += L1_PAGE_SIZE; } +#endif +#if defined(RT_USING_SMP) && defined(ARCH_MM_MMU) + // map .percpu section + ps = (rt_ubase_t)&__percpu_start; + vs = ps - rt_kmem_pvoff(); + size = (rt_size_t)((rt_ubase_t)&__percpu_end - (rt_ubase_t)&__percpu_start); + /* Offset to per-CPU partition for current CPU */ + ps += hartid * size; + ve = vs + size; + + while (vs < ve) + { + /* Map physical address per-CPU partition */ + rt_hw_mmu_map_percpu_early(early_pgtbl, vs, ps); + ps += L2_PAGE_SIZE; + vs += L2_PAGE_SIZE; + } +#endif /* apply new mapping */ asm volatile("sfence.vma x0, x0"); write_csr(satp, SATP_BASE | ((size_t)early_pgtbl >> PAGE_OFFSET_BIT)); diff --git a/libcpu/risc-v/common64/mmu.h b/libcpu/risc-v/common64/mmu.h index cb9ae45fa4a..eb4dd9a629c 100644 --- a/libcpu/risc-v/common64/mmu.h +++ b/libcpu/risc-v/common64/mmu.h @@ -56,6 +56,14 @@ struct mem_desc #define MMU_MAP_ERROR_NOPAGE -3 #define MMU_MAP_ERROR_CONFLICT -4 +#define VPN_MASK 0x1ffUL +#define PTE_BITS 10 +#define VPN_BITS 9 + +#if defined(RT_USING_SMP) && defined(ARCH_MM_MMU) +extern unsigned int __percpu_end, __percpu_start; +#endif /* RT_USING_SMP && ARCH_MM_MMU */ + void *rt_hw_mmu_tbl_get(void); int rt_hw_mmu_map_init(rt_aspace_t aspace, void *v_address, rt_ubase_t size, rt_ubase_t *vtable, rt_ubase_t pv_off); @@ -72,4 +80,5 @@ int rt_hw_mmu_control(struct rt_aspace *aspace, void *vaddr, size_t size, void *rt_hw_mmu_pgtbl_create(void); void rt_hw_mmu_pgtbl_delete(void *pgtbl); +unsigned long get_free_page(void); #endif diff --git a/libcpu/risc-v/common64/startup_gcc.S b/libcpu/risc-v/common64/startup_gcc.S index 2a4153778b1..f00ea56dc33 100644 --- a/libcpu/risc-v/common64/startup_gcc.S +++ b/libcpu/risc-v/common64/startup_gcc.S @@ -45,6 +45,12 @@ _start: #ifdef RT_USING_SMP system_init: #endif + + /* If ARCH_MM_MMU is not enabled or pvoff==0, save the hartid in the satp register; + * otherwise, save it in percpu_hartid. + */ + csrw satp, a0 + /* clear Interrupt Registers */ csrw sie, 0 csrw sip, 0 @@ -104,7 +110,6 @@ system_init: li t0, __STACKSIZE__ add sp, sp, t0 #else - csrw satp, a0 /* Currently, the hartid is stored in the satp register. */ /* Initialize the sp pointer according to different hartids. */ mv t0, a0 /* calculate stack offset: hartid * __STACKSIZE__ */ @@ -116,10 +121,10 @@ system_init: add sp, sp, t0 /* sp = __stack_start__ + hartid * __STACKSIZE__ */ add sp, sp, t1 /* sp += __STACKSIZE__ (point to stack top) */ - mv t0, a0 - lw t1, boot_hartid - bne t0, t1, secondary_cpu_entry - li x10,0 /* Clear the a0 register. */ + mv t0, a0 + lw t1, boot_hartid + mv tp, a0 + bne t0, t1, early_secondary_cpu_entry #endif /* RT_USING_SMP */ /** @@ -127,7 +132,14 @@ system_init: */ csrw sscratch, zero call init_bss +early_secondary_cpu_entry: #ifdef ARCH_MM_MMU + // Pages need to be managed manually in the early stage. + la a0, .early_page_array + call set_free_page + + la a0, .early_tbl_page + mv a1, tp call rt_hw_mem_setup_early call rt_kmem_pvoff /* a0 := pvoff */ @@ -138,13 +150,37 @@ system_init: sub x1, x1, a0 ret _after_pc_relocation: +#if defined(RT_USING_SMP) + /* If the MMU is enabled, save the hartid in percpu_hartid. + * -> .percpu_hartid (hartid_0) + * ...... align(2MB) + * -> (hartid_1) + * ...... + */ + la a0, .percpu_hartid + mv a1, tp + call rt_hw_percpu_hartid_init +#endif /* relocate gp */ sub gp, gp, a0 +#ifndef RT_USING_SMP /* relocate context: sp */ la sp, __stack_start__ li t0, __STACKSIZE__ add sp, sp, t0 +#else + /* Initialize the sp pointer according to different hartids. */ + mv t0, tp + /* calculate stack offset: hartid * __STACKSIZE__ */ + li t1, __STACKSIZE__ + mul t0, t0, t1 /* t0 = hartid * __STACKSIZE__ */ + + /* set stack pointer */ + la sp, __stack_start__ + add sp, sp, t0 /* sp = __stack_start__ + hartid * __STACKSIZE__ */ + add sp, sp, t1 /* sp += __STACKSIZE__ (point to stack top) */ +#endif /* RT_USING_SMP */ /* reset s0-fp */ mv s0, zero @@ -153,7 +189,12 @@ _after_pc_relocation: la t0, trap_entry csrw stvec, t0 1: +#ifdef RT_USING_SMP + mv t0, tp + lw t1, boot_hartid + bne t0, t1, secondary_cpu_entry #endif +#endif /* ARCH_MM_MMU */ call sbi_init call primary_cpu_entry @@ -163,3 +204,29 @@ _never_return_here: .global _start_link_addr _start_link_addr: .dword __text_start + +#ifdef ARCH_MM_MMU +/* + * CPU stack builtin + */ + .section ".percpu" +.percpu_hartid: + .space 16 + + .section ".bss" + +.equ page_size, 4096 + .balign page_size +.early_tbl_page: + .space 1 * page_size +#if defined(RT_USING_SMP) && RT_CPUS_NR > 1 + .space (RT_CPUS_NR - 1) * page_size +#endif + +.early_page_array: + .space (8 + 8) * page_size +#ifdef RT_USING_SMP + .space RT_CPUS_NR * 5 * page_size +#endif + +#endif /* ARCH_MM_MMU */ \ No newline at end of file diff --git a/libcpu/risc-v/virt64/interrupt.h b/libcpu/risc-v/virt64/interrupt.h index 9b0acf0cd9a..a32cac10881 100644 --- a/libcpu/risc-v/virt64/interrupt.h +++ b/libcpu/risc-v/virt64/interrupt.h @@ -43,7 +43,10 @@ void rt_hw_interrupt_mask(int vector); rt_isr_handler_t rt_hw_interrupt_install(int vector, rt_isr_handler_t handler, void *param, const char *name); void handle_trap(rt_ubase_t xcause, rt_ubase_t xtval, rt_ubase_t xepc, struct rt_hw_stack_frame *sp); #ifdef RT_USING_SMP +void rt_hw_interrupt_set_priority(int vector, unsigned int priority); +unsigned int rt_hw_interrupt_get_priority(int vector); void rt_hw_ipi_handler(void); +void rt_hw_ipi_handler_install(int ipi_vector, rt_isr_handler_t ipi_isr_handler); void rt_hw_ipi_init(void); void rt_hw_ipi_send(int ipi_vector, unsigned int cpu_mask); #endif /* RT_USING_SMP */ From 8a9066fa461954857f2ba257e309a8279c24a47d Mon Sep 17 00:00:00 2001 From: Tm-C-mT <490534897@qq.com> Date: Mon, 17 Nov 2025 09:37:51 +0800 Subject: [PATCH 10/10] [libcpu-riscv]: [surpport SMP]: Fix issues with non-standard formatting Fix issues with non-standard formatting Signed-off-by: Mengchen Teng --- libcpu/risc-v/common64/cpuport.c | 7 ++----- libcpu/risc-v/common64/mmu.c | 4 ++-- 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/libcpu/risc-v/common64/cpuport.c b/libcpu/risc-v/common64/cpuport.c index f9969cf7d8a..421b391c1e1 100644 --- a/libcpu/risc-v/common64/cpuport.c +++ b/libcpu/risc-v/common64/cpuport.c @@ -90,8 +90,6 @@ int rt_hw_cpu_id(void) asm volatile("csrr %0, satp" : "=r"(hartid)); return hartid; } - - #endif /* RT_USING_SMP */ } @@ -221,13 +219,12 @@ void rt_hw_percpu_hartid_init(rt_ubase_t *percpu_ptr, rt_ubase_t hartid) { rt_ubase_t *percpu_hartid_paddr; rt_size_t percpu_size = (rt_size_t)((rt_ubase_t)&__percpu_end - (rt_ubase_t)&__percpu_start); - + percpu_hartid = percpu_ptr; // from virtual address to physical address percpu_ptr = (rt_ubase_t *)((rt_ubase_t)percpu_ptr + (rt_ubase_t)rt_kmem_pvoff()); percpu_hartid_paddr = percpu_ptr; - /* Save to the real area */ *(rt_ubase_t *)((void *)percpu_hartid_paddr + hartid * percpu_size) = hartid; @@ -236,7 +233,7 @@ void rt_hw_percpu_hartid_init(rt_ubase_t *percpu_ptr, rt_ubase_t hartid) void secondary_cpu_entry(void) { - + #ifdef RT_USING_SMART /* switch to kernel address space */ rt_hw_aspace_switch(&rt_kernel_space); diff --git a/libcpu/risc-v/common64/mmu.c b/libcpu/risc-v/common64/mmu.c index 50b521dfc34..1452de2a332 100644 --- a/libcpu/risc-v/common64/mmu.c +++ b/libcpu/risc-v/common64/mmu.c @@ -180,7 +180,7 @@ static int _map_one_page(struct rt_aspace *aspace, void *va, void *pa, *(mmu_l3 + l3_off) = COMBINEPTE((rt_ubase_t)pa, attr); rt_hw_cpu_dcache_clean(mmu_l3 + l3_off, sizeof(*(mmu_l3 + l3_off))); } - + return 0; } @@ -951,7 +951,7 @@ void rt_hw_mem_setup_early(void *pgtbl, rt_uint64_t hartid) ps += L2_PAGE_SIZE; vs += L2_PAGE_SIZE; } -#endif +#endif /* apply new mapping */ asm volatile("sfence.vma x0, x0"); write_csr(satp, SATP_BASE | ((size_t)early_pgtbl >> PAGE_OFFSET_BIT));