From fa68a5b9a3aa0ce970985e63ebfaf9f6576165b7 Mon Sep 17 00:00:00 2001 From: Artemiy Volkov Date: Tue, 9 Jul 2024 08:10:29 +0200 Subject: [PATCH 1/3] arcv: add scheduling information for the Synopsys RMX-100 CPU This commit introduces a new -mtune=rmx100 tuning option together with relevant scheduler definitions. Instruction latencies and costs are based on the "RMX-100 Technical Reference Manual" document (revision 0.4, 13 September 2023) and are subject to change. The changes have been verified by running the Dhrystone and Coremark benchmarks and observing expected (small) improvements compared to the -mtune=generic results. Signed-off-by: Artemiy Volkov --- gcc/config/riscv/arcv-rmx100.md | 110 +++++++++++++++++++++++++++++++ gcc/config/riscv/riscv-cores.def | 1 + gcc/config/riscv/riscv-opts.h | 1 + gcc/config/riscv/riscv.cc | 24 +++++++ gcc/config/riscv/riscv.md | 3 +- gcc/doc/riscv-mtune.texi | 2 + 6 files changed, 140 insertions(+), 1 deletion(-) create mode 100644 gcc/config/riscv/arcv-rmx100.md diff --git a/gcc/config/riscv/arcv-rmx100.md b/gcc/config/riscv/arcv-rmx100.md new file mode 100644 index 000000000000..9194f510f9f8 --- /dev/null +++ b/gcc/config/riscv/arcv-rmx100.md @@ -0,0 +1,110 @@ +;; DFA scheduling description of the Synopsys RMX-100 cpu +;; for GNU C compiler +;; Copyright (C) 2023 Free Software Foundation, Inc. + +;; This file is part of GCC. + +;; GCC is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. + +;; GCC is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + +(define_automaton "arcv_rmx100") + +(define_cpu_unit "arcv_rmx100_ALU" "arcv_rmx100") +;(define_cpu_unit "arcv_rmx100_CSR" "arcv_rmx100") +(define_cpu_unit "arcv_rmx100_FPU" "arcv_rmx100") +(define_cpu_unit "arcv_rmx100_MPY" "arcv_rmx100") +(define_cpu_unit "arcv_rmx100_DIV" "arcv_rmx100") +(define_cpu_unit "arcv_rmx100_DMP" "arcv_rmx100") + +;; Instruction reservation for arithmetic instructions. +(define_insn_reservation "arcv_rmx100_alu_arith" 1 + (and (eq_attr "tune" "arcv_rmx100") + (eq_attr "type" "unknown, const, arith, shift, slt, multi, auipc, nop, + logical, move, atomic, mvpair, bitmanip, clz, ctz, cpop, + zicond, condmove, clmul, min, max, minu, maxu, rotate")) + "arcv_rmx100_ALU") + +(define_insn_reservation "arcv_rmx100_jmp_insn" 1 + (and (eq_attr "tune" "arcv_rmx100") + (eq_attr "type" "branch, jump, call, jalr, ret, trap")) + "arcv_rmx100_ALU") + +; DIV insn: latency may be overridden by a define_bypass +(define_insn_reservation "arcv_rmx100_div_insn" 35 + (and (eq_attr "tune" "arcv_rmx100") + (eq_attr "type" "idiv")) + "arcv_rmx100_DIV*35") + +; MPY insn: latency may be overridden by a define_bypass +(define_insn_reservation "arcv_rmx100_mpy32_insn" 9 + (and (eq_attr "tune" "arcv_rmx100") + (eq_attr "type" "imul")) + "arcv_rmx100_MPY") + +(define_insn_reservation "arcv_rmx100_load_insn" 3 + (and (eq_attr "tune" "arcv_rmx100") + (eq_attr "type" "load,fpload")) + "arcv_rmx100_DMP,nothing*2") + +(define_insn_reservation "arcv_rmx100_store_insn" 1 + (and (eq_attr "tune" "arcv_rmx100") + (eq_attr "type" "store,fpstore")) + "arcv_rmx100_DMP") + +(define_insn_reservation "arcv_rmx100_farith_insn" 2 + (and (eq_attr "tune" "arcv_rmx100") + (eq_attr "type" "fadd,fmul,fmadd,fcmp")) + "arcv_rmx100_FPU*2") + +(define_insn_reservation "arcv_rmx100_fdiv_insn" 17 + (and (eq_attr "tune" "arcv_rmx100") + (eq_attr "type" "fdiv,fsqrt")) + "arcv_rmx100_FPU*17") + +(define_insn_reservation "arcv_rmx100_xfer" 2 + (and (eq_attr "tune" "arcv_rmx100") + (eq_attr "type" "fmove,mtc,mfc,fcvt,fcvt_f2i,fcvt_i2f")) + "arcv_rmx100_FPU*2") + +;;(define_insn_reservation "core" 1 +;; (eq_attr "type" "block, brk, dmb, flag, lr, sr, sync") +;; "arcv_rmx100_ALU0 + arcv_rmx100_ALU1 + arcv_rmx100_DMP + arcv_rmx100_MPY + arcv_rmx100_MPY64 + arcv_rmx100_DIV") + +(define_insn_reservation "arcv_rmx100_fmul_half" 5 + (and (eq_attr "tune" "arcv_rmx100") + (and (eq_attr "type" "fadd,fmul,fmadd") + (eq_attr "mode" "HF"))) + "arcv_rmx100_FPU") + +(define_insn_reservation "arcv_rmx100_fmul_single" 5 + (and (eq_attr "tune" "arcv_rmx100") + (and (eq_attr "type" "fadd,fmul,fmadd") + (eq_attr "mode" "SF"))) + "arcv_rmx100_FPU") + +(define_insn_reservation "arcv_rmx100_fmul_double" 7 + (and (eq_attr "tune" "arcv_rmx100") + (and (eq_attr "type" "fadd,fmul,fmadd") + (eq_attr "mode" "DF"))) + "arcv_rmx100_FPU") + +(define_insn_reservation "arcv_rmx100_fdiv" 20 + (and (eq_attr "tune" "arcv_rmx100") + (eq_attr "type" "fdiv")) + "arcv_rmx100_FPU*20") + +(define_insn_reservation "arcv_rmx100_fsqrt" 25 + (and (eq_attr "tune" "arcv_rmx100") + (eq_attr "type" "fsqrt")) + "arcv_rmx100_FPU*25") diff --git a/gcc/config/riscv/riscv-cores.def b/gcc/config/riscv/riscv-cores.def index cc9d5c03cb8c..d1708f3785b6 100644 --- a/gcc/config/riscv/riscv-cores.def +++ b/gcc/config/riscv/riscv-cores.def @@ -50,6 +50,7 @@ RISCV_TUNE("xt-c920", generic, generic_ooo_tune_info) RISCV_TUNE("xt-c920v2", generic, generic_ooo_tune_info) RISCV_TUNE("xiangshan-nanhu", xiangshan, xiangshan_nanhu_tune_info) RISCV_TUNE("xiangshan-kunminghu", xiangshan, generic_ooo_tune_info) +RISCV_TUNE("arc-v-rmx-100-series", arcv_rmx100, arcv_rmx100_tune_info) RISCV_TUNE("generic-ooo", generic_ooo, generic_ooo_tune_info) RISCV_TUNE("size", generic, optimize_size_tune_info) RISCV_TUNE("mips-p8700", mips_p8700, mips_p8700_tune_info) diff --git a/gcc/config/riscv/riscv-opts.h b/gcc/config/riscv/riscv-opts.h index 4e4e9d8930e2..3feb211767cb 100644 --- a/gcc/config/riscv/riscv-opts.h +++ b/gcc/config/riscv/riscv-opts.h @@ -61,6 +61,7 @@ enum riscv_microarchitecture_type { generic_ooo, mips_p8700, tt_ascalon_d8, + arcv_rmx100, }; extern enum riscv_microarchitecture_type riscv_microarchitecture; diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc index e186c6a99e9d..dbb7cadf7fb4 100644 --- a/gcc/config/riscv/riscv.cc +++ b/gcc/config/riscv/riscv.cc @@ -685,6 +685,30 @@ static const struct riscv_tune_param tt_ascalon_d8_tune_info = { true, /* prefer-agnostic. */ }; +/* Costs to use when optimizing for Synopsys RMX-100. */ +static const struct riscv_tune_param arcv_rmx100_tune_info = { + {COSTS_N_INSNS (2), COSTS_N_INSNS (2)}, /* fp_add */ + {COSTS_N_INSNS (2), COSTS_N_INSNS (2)}, /* fp_mul */ + {COSTS_N_INSNS (17), COSTS_N_INSNS (17)}, /* fp_div */ + {COSTS_N_INSNS (2), COSTS_N_INSNS (2)}, /* int_mul */ + {COSTS_N_INSNS (17), COSTS_N_INSNS (17)}, /* int_div */ + 1, /* issue_rate */ + 4, /* branch_cost */ + 2, /* memory_cost */ + 4, /* fmv_cost */ + false, /* slow_unaligned_access */ + false, /* vector_unaligned_access */ + false, /* use_divmod_expansion */ + false, /* overlap_op_by_pieces */ + true, /* use_zero_stride_load */ + false, /* speculative_sched_vsetvl */ + RISCV_FUSE_NOTHING, /* fusible_ops */ + NULL, /* vector cost */ + NULL, /* function_align */ + NULL, /* jump_align */ + NULL, /* loop_align */ +}; + /* Costs to use when optimizing for size. */ static const struct riscv_tune_param optimize_size_tune_info = { {COSTS_N_INSNS (1), COSTS_N_INSNS (1)}, /* fp_add */ diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md index 640ca5f9b0ea..823f8dda8a30 100644 --- a/gcc/config/riscv/riscv.md +++ b/gcc/config/riscv/riscv.md @@ -672,7 +672,7 @@ ;; Microarchitectures we know how to tune for. ;; Keep this in sync with enum riscv_microarchitecture. (define_attr "tune" - "generic,sifive_7,sifive_p400,sifive_p600,xiangshan,generic_ooo,mips_p8700,tt_ascalon_d8" + "generic,sifive_7,sifive_p400,sifive_p600,xiangshan,generic_ooo,mips_p8700,tt_ascalon_d8,arcv_rmx100" (const (symbol_ref "((enum attr_tune) riscv_microarchitecture)"))) ;; Describe a user's asm statement. @@ -4966,3 +4966,4 @@ (include "generic-vector-ooo.md") (include "generic-ooo.md") (include "tt-ascalon-d8.md") +(include "arcv-rmx100.md") diff --git a/gcc/doc/riscv-mtune.texi b/gcc/doc/riscv-mtune.texi index a2a4d3e77dbb..63a01db67726 100644 --- a/gcc/doc/riscv-mtune.texi +++ b/gcc/doc/riscv-mtune.texi @@ -50,6 +50,8 @@ particular CPU name. Permissible values for this option are: @samp{xiangshan-kunminghu}, +@samp{arc-v-rmx-100-series}, + @samp{generic-ooo}, @samp{size}, From a32098e22b465ab4ee203a92f7e391e3b52d6faa Mon Sep 17 00:00:00 2001 From: Artemiy Volkov Date: Fri, 5 Jul 2024 06:46:11 -0700 Subject: [PATCH 2/3] arcv: introduce and incorporate the --param=arcv-mpy-option flag This commit adds the new arcv-mpy-option compilation parameter with the valid (string) values of 1c, 2c, and 10c. This corresponds to different versions of the MPY/DIV unit of the RMX100 core, each of which has different latencies for imul/idiv instructions. Internally, this option is propagated to the pipeline description information in rmx100.md with the use of new helper functions defined in riscv.cc. Signed-off-by: Artemiy Volkov --- gcc/config/riscv/arcv-rmx100.md | 11 ++++++----- gcc/config/riscv/riscv-opts.h | 7 +++++++ gcc/config/riscv/riscv-protos.h | 3 +++ gcc/config/riscv/riscv.cc | 24 ++++++++++++++++++++++++ gcc/config/riscv/riscv.opt | 17 +++++++++++++++++ 5 files changed, 57 insertions(+), 5 deletions(-) diff --git a/gcc/config/riscv/arcv-rmx100.md b/gcc/config/riscv/arcv-rmx100.md index 9194f510f9f8..003bf9ff268e 100644 --- a/gcc/config/riscv/arcv-rmx100.md +++ b/gcc/config/riscv/arcv-rmx100.md @@ -67,11 +67,6 @@ (eq_attr "type" "fadd,fmul,fmadd,fcmp")) "arcv_rmx100_FPU*2") -(define_insn_reservation "arcv_rmx100_fdiv_insn" 17 - (and (eq_attr "tune" "arcv_rmx100") - (eq_attr "type" "fdiv,fsqrt")) - "arcv_rmx100_FPU*17") - (define_insn_reservation "arcv_rmx100_xfer" 2 (and (eq_attr "tune" "arcv_rmx100") (eq_attr "type" "fmove,mtc,mfc,fcvt,fcvt_f2i,fcvt_i2f")) @@ -108,3 +103,9 @@ (and (eq_attr "tune" "arcv_rmx100") (eq_attr "type" "fsqrt")) "arcv_rmx100_FPU*25") + +(define_bypass 1 "arcv_rmx100_mpy32_insn" "arcv_rmx100_*" "arcv_mpy_1c_bypass_p") +(define_bypass 2 "arcv_rmx100_mpy32_insn" "arcv_rmx100_*" "arcv_mpy_2c_bypass_p") + +(define_bypass 9 "arcv_rmx100_div_insn" "arcv_rmx100_*" "arcv_mpy_1c_bypass_p") +(define_bypass 9 "arcv_rmx100_div_insn" "arcv_rmx100_*" "arcv_mpy_2c_bypass_p") diff --git a/gcc/config/riscv/riscv-opts.h b/gcc/config/riscv/riscv-opts.h index 3feb211767cb..7be10413b4d9 100644 --- a/gcc/config/riscv/riscv-opts.h +++ b/gcc/config/riscv/riscv-opts.h @@ -86,6 +86,13 @@ enum rvv_max_lmul_enum { RVV_DYNAMIC = 9 }; +/* ARC-V multiply option. */ +enum arcv_mpy_option_enum { + ARCV_MPY_OPTION_1C = 1, + ARCV_MPY_OPTION_2C = 2, + ARCV_MPY_OPTION_10C = 8, +}; + enum riscv_multilib_select_kind { /* Select multilib by builtin way. */ select_by_builtin, diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h index 570acb14f585..5881cb9529ce 100644 --- a/gcc/config/riscv/riscv-protos.h +++ b/gcc/config/riscv/riscv-protos.h @@ -165,6 +165,9 @@ extern bool riscv_epilogue_uses (unsigned int); extern bool riscv_can_use_return_insn (void); extern rtx riscv_function_value (const_tree, const_tree, enum machine_mode); extern bool riscv_store_data_bypass_p (rtx_insn *, rtx_insn *); +extern bool arcv_mpy_1c_bypass_p (rtx_insn *, rtx_insn *); +extern bool arcv_mpy_2c_bypass_p (rtx_insn *, rtx_insn *); +extern bool arcv_mpy_10c_bypass_p (rtx_insn *, rtx_insn *); extern rtx riscv_gen_gpr_save_insn (struct riscv_frame_info *); extern bool riscv_gpr_save_operation_p (rtx); extern void riscv_reinit (void); diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc index dbb7cadf7fb4..8719c2942b55 100644 --- a/gcc/config/riscv/riscv.cc +++ b/gcc/config/riscv/riscv.cc @@ -10490,6 +10490,30 @@ riscv_store_data_bypass_p (rtx_insn *out_insn, rtx_insn *in_insn) return store_data_bypass_p (out_insn, in_insn); } +/* Implement one boolean function for each of the values of the + arcv_mpy_option enum, for the needs of rhx100.md. */ + +bool +arcv_mpy_1c_bypass_p (rtx_insn *out_insn ATTRIBUTE_UNUSED, + rtx_insn *in_insn ATTRIBUTE_UNUSED) +{ + return arcv_mpy_option == ARCV_MPY_OPTION_1C; +} + +bool +arcv_mpy_2c_bypass_p (rtx_insn *out_insn ATTRIBUTE_UNUSED, + rtx_insn *in_insn ATTRIBUTE_UNUSED) +{ + return arcv_mpy_option == ARCV_MPY_OPTION_2C; +} + +bool +arcv_mpy_10c_bypass_p (rtx_insn *out_insn ATTRIBUTE_UNUSED, + rtx_insn *in_insn ATTRIBUTE_UNUSED) +{ + return arcv_mpy_option == ARCV_MPY_OPTION_10C; +} + /* Implement TARGET_SECONDARY_MEMORY_NEEDED. When floating-point registers are wider than integer ones, moves between diff --git a/gcc/config/riscv/riscv.opt b/gcc/config/riscv/riscv.opt index 6543fd1c4a72..663acf62dac4 100644 --- a/gcc/config/riscv/riscv.opt +++ b/gcc/config/riscv/riscv.opt @@ -396,3 +396,20 @@ Specifies whether the fence.tso instruction should be used. mautovec-segment Target Integer Var(riscv_mautovec_segment) Init(1) Enable (default) or disable generation of vector segment load/store instructions. + +Enum +Name(arcv_mpy_option) Type(enum arcv_mpy_option_enum) +Valid arguments to -param=arcv_mpy_option=: + +EnumValue +Enum(arcv_mpy_option) String(1c) Value(ARCV_MPY_OPTION_1C) + +EnumValue +Enum(arcv_mpy_option) String(2c) Value(ARCV_MPY_OPTION_2C) + +EnumValue +Enum(arcv_mpy_option) String(10c) Value(ARCV_MPY_OPTION_10C) + +-param=arcv-mpy-option= +Target RejectNegative Joined Enum(arcv_mpy_option) Var(arcv_mpy_option) Init(ARCV_MPY_OPTION_2C) +The type of MPY unit used by the RMX-100 core (to be used in combination with -mtune=rmx100) (default: 2c). From c8067d68e144833f046bd8ebfff82843e296ee94 Mon Sep 17 00:00:00 2001 From: Artemiy Volkov Date: Thu, 8 May 2025 01:36:17 -0700 Subject: [PATCH 3/3] arcv: add FPU insn latencies to the RMX-100 scheduling model This patch adds latencies related to FPU instructions to arcv-rmx100.md. The specific values used correspond to the 'fast' config, except fdiv where the latency was reduced to 10 cycles. In the future, FP latencies for RMX-100 should be made dependent on an external (-mfpu-like) option. Signed-off-by: Artemiy Volkov --- gcc/config/riscv/arcv-rmx100.md | 50 ++++++++++++++------------------- 1 file changed, 21 insertions(+), 29 deletions(-) diff --git a/gcc/config/riscv/arcv-rmx100.md b/gcc/config/riscv/arcv-rmx100.md index 003bf9ff268e..5a25dfb67cfc 100644 --- a/gcc/config/riscv/arcv-rmx100.md +++ b/gcc/config/riscv/arcv-rmx100.md @@ -54,7 +54,7 @@ (define_insn_reservation "arcv_rmx100_load_insn" 3 (and (eq_attr "tune" "arcv_rmx100") - (eq_attr "type" "load,fpload")) + (eq_attr "type" "load")) "arcv_rmx100_DMP,nothing*2") (define_insn_reservation "arcv_rmx100_store_insn" 1 @@ -62,47 +62,39 @@ (eq_attr "type" "store,fpstore")) "arcv_rmx100_DMP") +;; FPU scheduling. FIXME: This is based on the "fast" unit for now, the "slow" +;; option remains to be implemented later (together with the -mfpu flag). + +(define_insn_reservation "arcv_rmx100_fpload_insn" 3 + (and (eq_attr "tune" "arcv_rmx100") + (eq_attr "type" "fpload")) + "arcv_rmx100_DMP,nothing*2") + (define_insn_reservation "arcv_rmx100_farith_insn" 2 (and (eq_attr "tune" "arcv_rmx100") - (eq_attr "type" "fadd,fmul,fmadd,fcmp")) - "arcv_rmx100_FPU*2") + (eq_attr "type" "fadd,fcmp")) + "arcv_rmx100_FPU,nothing") -(define_insn_reservation "arcv_rmx100_xfer" 2 +(define_insn_reservation "arcv_rmx100_xfer" 1 (and (eq_attr "tune" "arcv_rmx100") (eq_attr "type" "fmove,mtc,mfc,fcvt,fcvt_f2i,fcvt_i2f")) - "arcv_rmx100_FPU*2") - -;;(define_insn_reservation "core" 1 -;; (eq_attr "type" "block, brk, dmb, flag, lr, sr, sync") -;; "arcv_rmx100_ALU0 + arcv_rmx100_ALU1 + arcv_rmx100_DMP + arcv_rmx100_MPY + arcv_rmx100_MPY64 + arcv_rmx100_DIV") + "arcv_rmx100_FPU") -(define_insn_reservation "arcv_rmx100_fmul_half" 5 +(define_insn_reservation "arcv_rmx100_fmul_insn" 2 (and (eq_attr "tune" "arcv_rmx100") - (and (eq_attr "type" "fadd,fmul,fmadd") - (eq_attr "mode" "HF"))) - "arcv_rmx100_FPU") + (eq_attr "type" "fmul")) + "arcv_rmx100_FPU,nothing") -(define_insn_reservation "arcv_rmx100_fmul_single" 5 +(define_insn_reservation "arcv_rmx100_fmac_insn" 2 (and (eq_attr "tune" "arcv_rmx100") - (and (eq_attr "type" "fadd,fmul,fmadd") - (eq_attr "mode" "SF"))) - "arcv_rmx100_FPU") + (eq_attr "type" "fmadd")) + "arcv_rmx100_FPU,nothing") -(define_insn_reservation "arcv_rmx100_fmul_double" 7 +(define_insn_reservation "arcv_rmx100_fdiv_insn" 10 (and (eq_attr "tune" "arcv_rmx100") - (and (eq_attr "type" "fadd,fmul,fmadd") - (eq_attr "mode" "DF"))) + (eq_attr "type" "fdiv,fsqrt")) "arcv_rmx100_FPU") -(define_insn_reservation "arcv_rmx100_fdiv" 20 - (and (eq_attr "tune" "arcv_rmx100") - (eq_attr "type" "fdiv")) - "arcv_rmx100_FPU*20") - -(define_insn_reservation "arcv_rmx100_fsqrt" 25 - (and (eq_attr "tune" "arcv_rmx100") - (eq_attr "type" "fsqrt")) - "arcv_rmx100_FPU*25") (define_bypass 1 "arcv_rmx100_mpy32_insn" "arcv_rmx100_*" "arcv_mpy_1c_bypass_p") (define_bypass 2 "arcv_rmx100_mpy32_insn" "arcv_rmx100_*" "arcv_mpy_2c_bypass_p")