- 
                Notifications
    You must be signed in to change notification settings 
- Fork 15k
[AArch64] Allow FPRCVT Instructions to Run in Streaming Mode #165432
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
This patch updates the SCVTF and UCVTF instructions to be used in streaming mode.
| @llvm/pr-subscribers-backend-aarch64 Author: Amina Chabane (Amichaxx) ChangesThis patch updates the SCVTF and UCVTF instructions to be used in streaming mode. Full diff: https://github.com/llvm/llvm-project/pull/165432.diff 3 Files Affected: 
 diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index b9e299ef37454..7139fa6f64022 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -5329,7 +5329,7 @@ def : Pat<(i64 (any_llround f64:$Rn)),
 defm SCVTF : IntegerToFP<0b00, 0b010, "scvtf", any_sint_to_fp>;
 defm UCVTF : IntegerToFP<0b00, 0b011, "ucvtf", any_uint_to_fp>;
 
-let Predicates = [HasNEON, HasFPRCVT] in {
+let Predicates = [HasNEONandIsStreamingSafe, HasFPRCVT] in {
   defm SCVTF : IntegerToFPSIMDScalar<0b11, 0b100, "scvtf", any_sint_to_fp>;
   defm UCVTF : IntegerToFPSIMDScalar<0b11, 0b101, "ucvtf", any_uint_to_fp>;
 
diff --git a/llvm/test/CodeGen/AArch64/fprcvt-cvtf.ll b/llvm/test/CodeGen/AArch64/fprcvt-cvtf.ll
index 3ea1a01cfc977..5f04d0c8e0287 100644
--- a/llvm/test/CodeGen/AArch64/fprcvt-cvtf.ll
+++ b/llvm/test/CodeGen/AArch64/fprcvt-cvtf.ll
@@ -1,6 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
 ; RUN: llc -mattr=+neon,+fullfp16,+fprcvt -verify-machineinstrs %s -o - | FileCheck %s
 ; RUN: llc -mattr=+neon -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK-NO-FPRCVT
+; RUN: llc -mattr=+sme,+neon,+fullfp16,+fprcvt -force-streaming < %s | FileCheck %s --check-prefix=CHECK-STREAMING
 
 target triple = "aarch64-unknown-linux-gnu"
 
@@ -21,6 +22,14 @@ define half @scvtf_f16i32(<4 x i32> %x) {
 ; CHECK-NO-FPRCVT-NEXT:    scvtf s0, s0
 ; CHECK-NO-FPRCVT-NEXT:    fcvt h0, s0
 ; CHECK-NO-FPRCVT-NEXT:    ret
+;
+; CHECK-STREAMING-LABEL: scvtf_f16i32:
+; CHECK-STREAMING:       // %bb.0:
+; CHECK-STREAMING-NEXT:    ptrue p0.s
+; CHECK-STREAMING-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-STREAMING-NEXT:    scvtf z0.h, p0/m, z0.s
+; CHECK-STREAMING-NEXT:    // kill: def $h0 killed $h0 killed $z0
+; CHECK-STREAMING-NEXT:    ret
  %extract = extractelement <4 x i32> %x, i64 0
  %conv = sitofp i32 %extract to half
  ret half %conv
@@ -39,6 +48,15 @@ define half @scvtf_f16i32_neg(<4 x i32> %x) {
 ; CHECK-NO-FPRCVT-NEXT:    scvtf s0, w8
 ; CHECK-NO-FPRCVT-NEXT:    fcvt h0, s0
 ; CHECK-NO-FPRCVT-NEXT:    ret
+;
+; CHECK-STREAMING-LABEL: scvtf_f16i32_neg:
+; CHECK-STREAMING:       // %bb.0:
+; CHECK-STREAMING-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-STREAMING-NEXT:    ptrue p0.s
+; CHECK-STREAMING-NEXT:    mov z0.s, z0.s[1]
+; CHECK-STREAMING-NEXT:    scvtf z0.h, p0/m, z0.s
+; CHECK-STREAMING-NEXT:    // kill: def $h0 killed $h0 killed $z0
+; CHECK-STREAMING-NEXT:    ret
  %extract = extractelement <4 x i32> %x, i64 1
  %conv = sitofp i32 %extract to half
  ret half %conv
@@ -57,6 +75,14 @@ define <1 x half> @scvtf_f16i32_simple(<1 x i32> %x) {
 ; CHECK-NO-FPRCVT-NEXT:    scvtf s0, s0
 ; CHECK-NO-FPRCVT-NEXT:    fcvt h0, s0
 ; CHECK-NO-FPRCVT-NEXT:    ret
+;
+; CHECK-STREAMING-LABEL: scvtf_f16i32_simple:
+; CHECK-STREAMING:       // %bb.0:
+; CHECK-STREAMING-NEXT:    ptrue p0.s
+; CHECK-STREAMING-NEXT:    // kill: def $d0 killed $d0 def $z0
+; CHECK-STREAMING-NEXT:    scvtf z0.h, p0/m, z0.s
+; CHECK-STREAMING-NEXT:    // kill: def $h0 killed $h0 killed $z0
+; CHECK-STREAMING-NEXT:    ret
  %conv = sitofp <1 x i32> %x to <1 x half>
  ret <1 x half> %conv
 }
@@ -72,6 +98,13 @@ define double @scvtf_f64i32(<4 x i32> %x) {
 ; CHECK-NO-FPRCVT-NEXT:    fmov w8, s0
 ; CHECK-NO-FPRCVT-NEXT:    scvtf d0, w8
 ; CHECK-NO-FPRCVT-NEXT:    ret
+;
+; CHECK-STREAMING-LABEL: scvtf_f64i32:
+; CHECK-STREAMING:       // %bb.0:
+; CHECK-STREAMING-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-STREAMING-NEXT:    fmov w8, s0
+; CHECK-STREAMING-NEXT:    scvtf d0, w8
+; CHECK-STREAMING-NEXT:    ret
  %extract = extractelement <4 x i32> %x, i64 0
  %conv = sitofp i32 %extract to double
  ret double %conv
@@ -89,6 +122,14 @@ define double @scvtf_f64i32_neg(<4 x i32> %x) {
 ; CHECK-NO-FPRCVT-NEXT:    mov w8, v0.s[1]
 ; CHECK-NO-FPRCVT-NEXT:    scvtf d0, w8
 ; CHECK-NO-FPRCVT-NEXT:    ret
+;
+; CHECK-STREAMING-LABEL: scvtf_f64i32_neg:
+; CHECK-STREAMING:       // %bb.0:
+; CHECK-STREAMING-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-STREAMING-NEXT:    mov z0.s, z0.s[1]
+; CHECK-STREAMING-NEXT:    fmov w8, s0
+; CHECK-STREAMING-NEXT:    scvtf d0, w8
+; CHECK-STREAMING-NEXT:    ret
  %extract = extractelement <4 x i32> %x, i64 1
  %conv = sitofp i32 %extract to double
  ret double %conv
@@ -106,6 +147,17 @@ define <1 x double> @scvtf_f64i32_simple(<1 x i32> %x) {
 ; CHECK-NO-FPRCVT-NEXT:    scvtf v0.2d, v0.2d
 ; CHECK-NO-FPRCVT-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NO-FPRCVT-NEXT:    ret
+;
+; CHECK-STREAMING-LABEL: scvtf_f64i32_simple:
+; CHECK-STREAMING:       // %bb.0:
+; CHECK-STREAMING-NEXT:    // kill: def $d0 killed $d0 def $z0
+; CHECK-STREAMING-NEXT:    ptrue p0.d, vl2
+; CHECK-STREAMING-NEXT:    sunpklo z0.d, z0.s
+; CHECK-STREAMING-NEXT:    scvtf z0.d, p0/m, z0.d
+; CHECK-STREAMING-NEXT:    str q0, [sp, #-16]!
+; CHECK-STREAMING-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-STREAMING-NEXT:    ldr d0, [sp], #16
+; CHECK-STREAMING-NEXT:    ret
  %conv = sitofp <1 x i32> %x to <1 x double>
  ret <1 x double> %conv
 }
@@ -122,6 +174,14 @@ define half @scvtf_f16i64(<2 x i64> %x) {
 ; CHECK-NO-FPRCVT-NEXT:    scvtf s0, x8
 ; CHECK-NO-FPRCVT-NEXT:    fcvt h0, s0
 ; CHECK-NO-FPRCVT-NEXT:    ret
+;
+; CHECK-STREAMING-LABEL: scvtf_f16i64:
+; CHECK-STREAMING:       // %bb.0:
+; CHECK-STREAMING-NEXT:    ptrue p0.d
+; CHECK-STREAMING-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-STREAMING-NEXT:    scvtf z0.h, p0/m, z0.d
+; CHECK-STREAMING-NEXT:    // kill: def $h0 killed $h0 killed $z0
+; CHECK-STREAMING-NEXT:    ret
  %extract = extractelement <2 x i64> %x, i64 0
  %conv = sitofp i64 %extract to half
  ret half %conv
@@ -140,6 +200,15 @@ define half @scvtf_f16i64_neg(<2 x i64> %x) {
 ; CHECK-NO-FPRCVT-NEXT:    scvtf s0, x8
 ; CHECK-NO-FPRCVT-NEXT:    fcvt h0, s0
 ; CHECK-NO-FPRCVT-NEXT:    ret
+;
+; CHECK-STREAMING-LABEL: scvtf_f16i64_neg:
+; CHECK-STREAMING:       // %bb.0:
+; CHECK-STREAMING-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-STREAMING-NEXT:    ptrue p0.d
+; CHECK-STREAMING-NEXT:    mov z0.d, z0.d[1]
+; CHECK-STREAMING-NEXT:    scvtf z0.h, p0/m, z0.d
+; CHECK-STREAMING-NEXT:    // kill: def $h0 killed $h0 killed $z0
+; CHECK-STREAMING-NEXT:    ret
  %extract = extractelement <2 x i64> %x, i64 1
  %conv = sitofp i64 %extract to half
  ret half %conv
@@ -159,6 +228,14 @@ define <1 x half> @scvtf_f16i64_simple(<1 x i64> %x) {
 ; CHECK-NO-FPRCVT-NEXT:    scvtf s0, x8
 ; CHECK-NO-FPRCVT-NEXT:    fcvt h0, s0
 ; CHECK-NO-FPRCVT-NEXT:    ret
+;
+; CHECK-STREAMING-LABEL: scvtf_f16i64_simple:
+; CHECK-STREAMING:       // %bb.0:
+; CHECK-STREAMING-NEXT:    ptrue p0.d
+; CHECK-STREAMING-NEXT:    // kill: def $d0 killed $d0 def $z0
+; CHECK-STREAMING-NEXT:    scvtf z0.h, p0/m, z0.d
+; CHECK-STREAMING-NEXT:    // kill: def $h0 killed $h0 killed $z0
+; CHECK-STREAMING-NEXT:    ret
  %conv = sitofp <1 x i64> %x to <1 x half>
  ret <1 x half> %conv
 }
@@ -174,6 +251,14 @@ define float @scvtf_f32i64(<2 x i64> %x) {
 ; CHECK-NO-FPRCVT-NEXT:    fmov x8, d0
 ; CHECK-NO-FPRCVT-NEXT:    scvtf s0, x8
 ; CHECK-NO-FPRCVT-NEXT:    ret
+;
+; CHECK-STREAMING-LABEL: scvtf_f32i64:
+; CHECK-STREAMING:       // %bb.0:
+; CHECK-STREAMING-NEXT:    ptrue p0.d
+; CHECK-STREAMING-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-STREAMING-NEXT:    scvtf z0.s, p0/m, z0.d
+; CHECK-STREAMING-NEXT:    // kill: def $s0 killed $s0 killed $z0
+; CHECK-STREAMING-NEXT:    ret
  %extract = extractelement <2 x i64> %x, i64 0
  %conv = sitofp i64 %extract to float
  ret float %conv
@@ -191,6 +276,15 @@ define float @scvtf_f32i64_neg(<2 x i64> %x) {
 ; CHECK-NO-FPRCVT-NEXT:    mov x8, v0.d[1]
 ; CHECK-NO-FPRCVT-NEXT:    scvtf s0, x8
 ; CHECK-NO-FPRCVT-NEXT:    ret
+;
+; CHECK-STREAMING-LABEL: scvtf_f32i64_neg:
+; CHECK-STREAMING:       // %bb.0:
+; CHECK-STREAMING-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-STREAMING-NEXT:    ptrue p0.d
+; CHECK-STREAMING-NEXT:    mov z0.d, z0.d[1]
+; CHECK-STREAMING-NEXT:    scvtf z0.s, p0/m, z0.d
+; CHECK-STREAMING-NEXT:    // kill: def $s0 killed $s0 killed $z0
+; CHECK-STREAMING-NEXT:    ret
  %extract = extractelement <2 x i64> %x, i64 1
  %conv = sitofp i64 %extract to float
  ret float %conv
@@ -219,6 +313,15 @@ define <1 x float> @scvtf_f32i64_simple(<1 x i64> %x) {
 ; CHECK-NO-FPRCVT-NEXT:    mov v1.s[0], v0.s[0]
 ; CHECK-NO-FPRCVT-NEXT:    fmov d0, d1
 ; CHECK-NO-FPRCVT-NEXT:    ret
+;
+; CHECK-STREAMING-LABEL: scvtf_f32i64_simple:
+; CHECK-STREAMING:       // %bb.0:
+; CHECK-STREAMING-NEXT:    ptrue p0.d, vl2
+; CHECK-STREAMING-NEXT:    // kill: def $d0 killed $d0 def $z0
+; CHECK-STREAMING-NEXT:    scvtf z0.s, p0/m, z0.d
+; CHECK-STREAMING-NEXT:    uzp1 z0.s, z0.s, z0.s
+; CHECK-STREAMING-NEXT:    // kill: def $d0 killed $d0 killed $z0
+; CHECK-STREAMING-NEXT:    ret
  %conv = sitofp <1 x i64> %x to <1 x float>
  ret <1 x float> %conv
 }
@@ -236,6 +339,14 @@ define half @ucvtf_f16i32(<4 x i32> %x) {
 ; CHECK-NO-FPRCVT-NEXT:    ucvtf s0, s0
 ; CHECK-NO-FPRCVT-NEXT:    fcvt h0, s0
 ; CHECK-NO-FPRCVT-NEXT:    ret
+;
+; CHECK-STREAMING-LABEL: ucvtf_f16i32:
+; CHECK-STREAMING:       // %bb.0:
+; CHECK-STREAMING-NEXT:    ptrue p0.s
+; CHECK-STREAMING-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-STREAMING-NEXT:    ucvtf z0.h, p0/m, z0.s
+; CHECK-STREAMING-NEXT:    // kill: def $h0 killed $h0 killed $z0
+; CHECK-STREAMING-NEXT:    ret
  %extract = extractelement <4 x i32> %x, i64 0
  %conv = uitofp i32 %extract to half
  ret half %conv
@@ -254,6 +365,15 @@ define half @ucvtf_f16i32_neg(<4 x i32> %x) {
 ; CHECK-NO-FPRCVT-NEXT:    ucvtf s0, w8
 ; CHECK-NO-FPRCVT-NEXT:    fcvt h0, s0
 ; CHECK-NO-FPRCVT-NEXT:    ret
+;
+; CHECK-STREAMING-LABEL: ucvtf_f16i32_neg:
+; CHECK-STREAMING:       // %bb.0:
+; CHECK-STREAMING-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-STREAMING-NEXT:    ptrue p0.s
+; CHECK-STREAMING-NEXT:    mov z0.s, z0.s[1]
+; CHECK-STREAMING-NEXT:    ucvtf z0.h, p0/m, z0.s
+; CHECK-STREAMING-NEXT:    // kill: def $h0 killed $h0 killed $z0
+; CHECK-STREAMING-NEXT:    ret
  %extract = extractelement <4 x i32> %x, i64 1
  %conv = uitofp i32 %extract to half
  ret half %conv
@@ -272,6 +392,14 @@ define <1 x half> @ucvtf_f16i32_simple(<1 x i32> %x) {
 ; CHECK-NO-FPRCVT-NEXT:    ucvtf s0, s0
 ; CHECK-NO-FPRCVT-NEXT:    fcvt h0, s0
 ; CHECK-NO-FPRCVT-NEXT:    ret
+;
+; CHECK-STREAMING-LABEL: ucvtf_f16i32_simple:
+; CHECK-STREAMING:       // %bb.0:
+; CHECK-STREAMING-NEXT:    ptrue p0.s
+; CHECK-STREAMING-NEXT:    // kill: def $d0 killed $d0 def $z0
+; CHECK-STREAMING-NEXT:    ucvtf z0.h, p0/m, z0.s
+; CHECK-STREAMING-NEXT:    // kill: def $h0 killed $h0 killed $z0
+; CHECK-STREAMING-NEXT:    ret
  %conv = uitofp <1 x i32> %x to <1 x half>
  ret <1 x half> %conv
 }
@@ -287,6 +415,13 @@ define double @ucvtf_f64i32(<4 x i32> %x) {
 ; CHECK-NO-FPRCVT-NEXT:    fmov w8, s0
 ; CHECK-NO-FPRCVT-NEXT:    ucvtf d0, w8
 ; CHECK-NO-FPRCVT-NEXT:    ret
+;
+; CHECK-STREAMING-LABEL: ucvtf_f64i32:
+; CHECK-STREAMING:       // %bb.0:
+; CHECK-STREAMING-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-STREAMING-NEXT:    fmov w8, s0
+; CHECK-STREAMING-NEXT:    ucvtf d0, w8
+; CHECK-STREAMING-NEXT:    ret
  %extract = extractelement <4 x i32> %x, i64 0
  %conv = uitofp i32 %extract to double
  ret double %conv
@@ -304,6 +439,14 @@ define double @ucvtf_f64i32_neg(<4 x i32> %x) {
 ; CHECK-NO-FPRCVT-NEXT:    mov w8, v0.s[1]
 ; CHECK-NO-FPRCVT-NEXT:    ucvtf d0, w8
 ; CHECK-NO-FPRCVT-NEXT:    ret
+;
+; CHECK-STREAMING-LABEL: ucvtf_f64i32_neg:
+; CHECK-STREAMING:       // %bb.0:
+; CHECK-STREAMING-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-STREAMING-NEXT:    mov z0.s, z0.s[1]
+; CHECK-STREAMING-NEXT:    fmov w8, s0
+; CHECK-STREAMING-NEXT:    ucvtf d0, w8
+; CHECK-STREAMING-NEXT:    ret
  %extract = extractelement <4 x i32> %x, i64 1
  %conv = uitofp i32 %extract to double
  ret double %conv
@@ -321,6 +464,17 @@ define <1 x double> @ucvtf_f64i32_simple(<1 x i32> %x) {
 ; CHECK-NO-FPRCVT-NEXT:    ucvtf v0.2d, v0.2d
 ; CHECK-NO-FPRCVT-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NO-FPRCVT-NEXT:    ret
+;
+; CHECK-STREAMING-LABEL: ucvtf_f64i32_simple:
+; CHECK-STREAMING:       // %bb.0:
+; CHECK-STREAMING-NEXT:    // kill: def $d0 killed $d0 def $z0
+; CHECK-STREAMING-NEXT:    ptrue p0.d, vl2
+; CHECK-STREAMING-NEXT:    uunpklo z0.d, z0.s
+; CHECK-STREAMING-NEXT:    ucvtf z0.d, p0/m, z0.d
+; CHECK-STREAMING-NEXT:    str q0, [sp, #-16]!
+; CHECK-STREAMING-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-STREAMING-NEXT:    ldr d0, [sp], #16
+; CHECK-STREAMING-NEXT:    ret
  %conv = uitofp <1 x i32> %x to <1 x double>
  ret <1 x double> %conv
 }
@@ -337,6 +491,14 @@ define half @ucvtf_f16i64(<2 x i64> %x) {
 ; CHECK-NO-FPRCVT-NEXT:    ucvtf s0, x8
 ; CHECK-NO-FPRCVT-NEXT:    fcvt h0, s0
 ; CHECK-NO-FPRCVT-NEXT:    ret
+;
+; CHECK-STREAMING-LABEL: ucvtf_f16i64:
+; CHECK-STREAMING:       // %bb.0:
+; CHECK-STREAMING-NEXT:    ptrue p0.d
+; CHECK-STREAMING-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-STREAMING-NEXT:    ucvtf z0.h, p0/m, z0.d
+; CHECK-STREAMING-NEXT:    // kill: def $h0 killed $h0 killed $z0
+; CHECK-STREAMING-NEXT:    ret
  %extract = extractelement <2 x i64> %x, i64 0
  %conv = uitofp i64 %extract to half
  ret half %conv
@@ -355,6 +517,15 @@ define half @ucvtf_f16i64_neg(<2 x i64> %x) {
 ; CHECK-NO-FPRCVT-NEXT:    ucvtf s0, x8
 ; CHECK-NO-FPRCVT-NEXT:    fcvt h0, s0
 ; CHECK-NO-FPRCVT-NEXT:    ret
+;
+; CHECK-STREAMING-LABEL: ucvtf_f16i64_neg:
+; CHECK-STREAMING:       // %bb.0:
+; CHECK-STREAMING-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-STREAMING-NEXT:    ptrue p0.d
+; CHECK-STREAMING-NEXT:    mov z0.d, z0.d[1]
+; CHECK-STREAMING-NEXT:    ucvtf z0.h, p0/m, z0.d
+; CHECK-STREAMING-NEXT:    // kill: def $h0 killed $h0 killed $z0
+; CHECK-STREAMING-NEXT:    ret
  %extract = extractelement <2 x i64> %x, i64 1
  %conv = uitofp i64 %extract to half
  ret half %conv
@@ -374,6 +545,14 @@ define <1 x half> @ucvtf_f16i64_simple(<1 x i64> %x) {
 ; CHECK-NO-FPRCVT-NEXT:    ucvtf s0, x8
 ; CHECK-NO-FPRCVT-NEXT:    fcvt h0, s0
 ; CHECK-NO-FPRCVT-NEXT:    ret
+;
+; CHECK-STREAMING-LABEL: ucvtf_f16i64_simple:
+; CHECK-STREAMING:       // %bb.0:
+; CHECK-STREAMING-NEXT:    ptrue p0.d
+; CHECK-STREAMING-NEXT:    // kill: def $d0 killed $d0 def $z0
+; CHECK-STREAMING-NEXT:    ucvtf z0.h, p0/m, z0.d
+; CHECK-STREAMING-NEXT:    // kill: def $h0 killed $h0 killed $z0
+; CHECK-STREAMING-NEXT:    ret
  %conv = uitofp <1 x i64> %x to <1 x half>
  ret <1 x half> %conv
 }
@@ -389,6 +568,14 @@ define float @ucvtf_f32i64(<2 x i64> %x) {
 ; CHECK-NO-FPRCVT-NEXT:    fmov x8, d0
 ; CHECK-NO-FPRCVT-NEXT:    ucvtf s0, x8
 ; CHECK-NO-FPRCVT-NEXT:    ret
+;
+; CHECK-STREAMING-LABEL: ucvtf_f32i64:
+; CHECK-STREAMING:       // %bb.0:
+; CHECK-STREAMING-NEXT:    ptrue p0.d
+; CHECK-STREAMING-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-STREAMING-NEXT:    ucvtf z0.s, p0/m, z0.d
+; CHECK-STREAMING-NEXT:    // kill: def $s0 killed $s0 killed $z0
+; CHECK-STREAMING-NEXT:    ret
  %extract = extractelement <2 x i64> %x, i64 0
  %conv = uitofp i64 %extract to float
  ret float %conv
@@ -406,6 +593,15 @@ define float @ucvtf_f32i64_neg(<2 x i64> %x) {
 ; CHECK-NO-FPRCVT-NEXT:    mov x8, v0.d[1]
 ; CHECK-NO-FPRCVT-NEXT:    ucvtf s0, x8
 ; CHECK-NO-FPRCVT-NEXT:    ret
+;
+; CHECK-STREAMING-LABEL: ucvtf_f32i64_neg:
+; CHECK-STREAMING:       // %bb.0:
+; CHECK-STREAMING-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-STREAMING-NEXT:    ptrue p0.d
+; CHECK-STREAMING-NEXT:    mov z0.d, z0.d[1]
+; CHECK-STREAMING-NEXT:    ucvtf z0.s, p0/m, z0.d
+; CHECK-STREAMING-NEXT:    // kill: def $s0 killed $s0 killed $z0
+; CHECK-STREAMING-NEXT:    ret
  %extract = extractelement <2 x i64> %x, i64 1
  %conv = uitofp i64 %extract to float
  ret float %conv
@@ -434,6 +630,15 @@ define <1 x float> @ucvtf_f32i64_simple(<1 x i64> %x) {
 ; CHECK-NO-FPRCVT-NEXT:    mov v1.s[0], v0.s[0]
 ; CHECK-NO-FPRCVT-NEXT:    fmov d0, d1
 ; CHECK-NO-FPRCVT-NEXT:    ret
+;
+; CHECK-STREAMING-LABEL: ucvtf_f32i64_simple:
+; CHECK-STREAMING:       // %bb.0:
+; CHECK-STREAMING-NEXT:    ptrue p0.d, vl2
+; CHECK-STREAMING-NEXT:    // kill: def $d0 killed $d0 def $z0
+; CHECK-STREAMING-NEXT:    ucvtf z0.s, p0/m, z0.d
+; CHECK-STREAMING-NEXT:    uzp1 z0.s, z0.s, z0.s
+; CHECK-STREAMING-NEXT:    // kill: def $d0 killed $d0 killed $z0
+; CHECK-STREAMING-NEXT:    ret
  %conv = uitofp <1 x i64> %x to <1 x float>
  ret <1 x float> %conv
 }
@@ -452,6 +657,17 @@ define <1 x double> @uitofp_sext_v2i32_extract_lane0(<2 x i32> %x) {
 ; CHECK-NO-FPRCVT-NEXT:    ucvtf v0.2d, v0.2d
 ; CHECK-NO-FPRCVT-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NO-FPRCVT-NEXT:    ret
+;
+; CHECK-STREAMING-LABEL: uitofp_sext_v2i32_extract_lane0:
+; CHECK-STREAMING:       // %bb.0:
+; CHECK-STREAMING-NEXT:    // kill: def $d0 killed $d0 def $z0
+; CHECK-STREAMING-NEXT:    ptrue p0.d, vl2
+; CHECK-STREAMING-NEXT:    sunpklo z0.d, z0.s
+; CHECK-STREAMING-NEXT:    ucvtf z0.d, p0/m, z0.d
+; CHECK-STREAMING-NEXT:    str q0, [sp, #-16]!
+; CHECK-STREAMING-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-STREAMING-NEXT:    ldr d0, [sp], #16
+; CHECK-STREAMING-NEXT:    ret
   %wide  = sext <2 x i32> %x to <2 x i64>
   %fpv2  = uitofp <2 x i64> %wide to <2 x double>
   %lane0 = shufflevector <2 x double> %fpv2, <2 x double> poison, <1 x i32> zeroinitializer
diff --git a/llvm/test/MC/AArch64/SME/streaming-mode-fprcvt.s b/llvm/test/MC/AArch64/SME/streaming-mode-fprcvt.s
new file mode 100644
index 0000000000000..cc7b9e0988d0f
--- /dev/null
+++ b/llvm/test/MC/AArch64/SME/streaming-mode-fprcvt.s
@@ -0,0 +1,24 @@
+// NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 6
+// REQUIRES: aarch64-registered-target
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme,+neon,+fprcvt < %s \
+// RUN:   | FileCheck %s --check-prefixes=CHECK-ENCODING
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme,+neon,+fprcvt < %s \
+// RUN:   | sed '/.text/d' | sed 's/.*encoding: //g' \
+// RUN:   | llvm-mc -triple=aarch64 -mattr=+sme,+neon,+fprcvt -disassemble -show-encoding \
+// RUN:   | FileCheck %s --check-prefixes=CHECK-ENCODING
+
+scvtf   d0, x0
+// CHECK-ENCODING: scvtf	d0, x0                          // encoding: [0x00,0x00,0x62,0x9e]
+// CHECK-ERROR:    error: instruction requires: neon
+
+ucvtf   d1, x1
+// CHECK-ENCODING: ucvtf	d1, x1                          // encoding: [0x21,0x00,0x63,0x9e]
+// CHECK-ERROR:    error: instruction requires: neon
+
+scvtf   s2, w2
+// CHECK-ENCODING: scvtf	s2, w2                          // encoding: [0x42,0x00,0x22,0x1e]
+// CHECK-ERROR:    error: instruction requires: neon
+
+ucvtf   s3, w3
+// CHECK-ENCODING: ucvtf	s3, w3                          // encoding: [0x63,0x00,0x23,0x1e]
+// CHECK-ERROR:    error: instruction requires: neon
 | 
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The issue you are trying to solve here is not unique to new CVTF instructions, but also affects FCVT ones as far as I can tell. Would it possible to extend this PR to handle those as well? Or is there a followup patch which will adress those ?
| ; CHECK-STREAMING: // %bb.0: | ||
| ; CHECK-STREAMING-NEXT: ptrue p0.s | ||
| ; CHECK-STREAMING-NEXT: // kill: def $q0 killed $q0 def $z0 | ||
| ; CHECK-STREAMING-NEXT: scvtf z0.h, p0/m, z0.s | 
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is incorrect assembly to be emitted. There is no reason to use SVE instructions here. It seems like we neeed to modify this piece of code in Aarch64IselLowering:
  if (useSVEForFixedLengthVectorVT(VT, !Subtarget->isNeonAvailable()) ||
      useSVEForFixedLengthVectorVT(InVT, !Subtarget->isNeonAvailable()))
This gets trigger in this scenario even though it is perfectly fine to use either GPR to FPR variant of SCVTF instruction or because FPRCVT is enabled we could even use FPR to FPR variant
| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 | ||
| ; RUN: llc -mattr=+neon,+fullfp16,+fprcvt -verify-machineinstrs %s -o - | FileCheck %s | ||
| ; RUN: llc -mattr=+neon -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK-NO-FPRCVT | ||
| ; RUN: llc -mattr=+sme,+neon,+fullfp16,+fprcvt -force-streaming < %s | FileCheck %s --check-prefix=CHECK-STREAMING | 
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
| ; RUN: llc -mattr=+sme,+neon,+fullfp16,+fprcvt -force-streaming < %s | FileCheck %s --check-prefix=CHECK-STREAMING | |
| ; RUN: llc -mattr=+sme,+neon,+fullfp16,+fprcvt -force-streaming < %s | FileCheck %s | 
I don't think streaming variant needs special prefix. There shouldn't be any difference in generated assembly because of streaming mode
| // RUN: | llvm-mc -triple=aarch64 -mattr=+sme,+neon,+fprcvt -disassemble -show-encoding \ | ||
| // RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING | ||
|  | ||
| scvtf d0, x0 | 
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You are not testing new FPRCVT instructions here, but old GPR ones ? New ones are present in /home/marluk01/review/llvm/test/MC/AArch64/armv9.6a-cvtf.s
This patch updates the SCVTF and UCVTF instructions to be used in streaming mode.