@@ -458,6 +458,16 @@ unittest
458
458
assert (generateShuffles2! (4 , 4 ) == [[0 , 1 , 2 , 3 ], [4 , 5 , 6 , 7 ]]);
459
459
}
460
460
461
+ unittest
462
+ {
463
+ enum ai = [0 , 1 , 2 , 3 ];
464
+ enum bi = [4 , 5 , 6 , 7 ];
465
+ align (32 )
466
+ double [4 ] a = [0 , 1 , 2 , 3 ], b = [4 , 5 , 6 , 7 ], c, d;
467
+ shuffle3! 1 (a, b, c, d);
468
+ assert ([c, d] == [[0.0 , 4 , 1 , 5 ], [2.0 , 6 , 3 , 7 ]]);
469
+ }
470
+
461
471
unittest
462
472
{
463
473
enum ai = [0 , 1 , 2 , 3 ];
@@ -490,78 +500,78 @@ import mir.internal.utility;
490
500
491
501
auto vectorize (Kernel, F, size_t N, size_t R)(ref Kernel kernel, ref F[N] a0, ref F[N] b0, ref F[N] a1, ref F[N] b1, ref F[N][R] c)
492
502
{
493
- static if (LDC && F.mant_dig != 64 )
494
- {
495
- alias V = __vector (F[N]); // @FUTURE@ vector support
496
- * cast (V[R]* ) c.ptr = kernel(
497
- * cast (V* )a0.ptr, * cast (V* )b0.ptr,
498
- * cast (V* )a1.ptr, * cast (V* )b1.ptr);
499
- }
500
- else
501
- static if (F.sizeof <= double .sizeof && F[N].sizeof >= (double [2 ]).sizeof)
502
- {
503
- import mir.utility;
504
- enum S = _avx ? 32u : 16u ;
505
- enum M = min(S, F[N].sizeof) / F.sizeof;
506
- alias V = __vector (F[M]); // @FUTURE@ vector support
507
- enum C = N / M;
508
- foreach (i; Iota! C)
509
- {
510
- auto r = kernel(
511
- * cast (V* )(a0.ptr + i * M), * cast (V* )(b0.ptr + i * M),
512
- * cast (V* )(a1.ptr + i * M), * cast (V* )(b1.ptr + i * M),
513
- );
514
- static if (R == 1 )
515
- * cast (V* )(c[0 ].ptr + i * M) = r;
516
- else
517
- foreach (j; Iota! R)
518
- * cast (V* )(c[j].ptr + i * M) = r[j];
519
- }
520
- }
521
- else
522
- {
503
+ // static if (LDC && F.mant_dig != 64)
504
+ // {
505
+ // alias V = __vector(F[N]); // @FUTURE@ vector support
506
+ // *cast(V[R]*) c.ptr = kernel(
507
+ // *cast(V*)a0.ptr, *cast(V*)b0.ptr,
508
+ // *cast(V*)a1.ptr, *cast(V*)b1.ptr);
509
+ // }
510
+ // else
511
+ // static if (F.sizeof <= double.sizeof && F[N].sizeof >= (double[2]).sizeof)
512
+ // {
513
+ // import mir.utility;
514
+ // enum S = _avx ? 32u : 16u;
515
+ // enum M = min(S, F[N].sizeof) / F.sizeof;
516
+ // alias V = __vector(F[M]); // @FUTURE@ vector support
517
+ // enum C = N / M;
518
+ // foreach(i; Iota!C)
519
+ // {
520
+ // auto r = kernel(
521
+ // *cast(V*)(a0.ptr + i * M), *cast(V*)(b0.ptr + i * M),
522
+ // *cast(V*)(a1.ptr + i * M), *cast(V*)(b1.ptr + i * M),
523
+ // );
524
+ // static if (R == 1)
525
+ // *cast(V*)(c[0].ptr + i * M) = r;
526
+ // else
527
+ // foreach(j; Iota!R)
528
+ // *cast(V*)(c[j].ptr + i * M) = r[j];
529
+ // }
530
+ // }
531
+ // else
532
+ // {
523
533
foreach (i; Iota! N)
524
534
{
525
535
auto r = kernel(a0[i], b0[i], a1[i], b1[i]);
526
536
static if (R == 1 )
527
- return c[0 ] = r;
537
+ c[0 ][i ] = r;
528
538
else
529
539
foreach (j; Iota! R)
530
540
c[j][i] = r[j];
531
541
}
532
- }
542
+ // }
533
543
}
534
544
535
545
auto vectorize (Kernel, F, size_t N, size_t R)(ref Kernel kernel, ref F[N] a, ref F[N] b, ref F[N][R] c)
536
546
{
537
- static if (LDC && F.mant_dig != 64 && is (__vector (F[N])))
538
- {
539
- alias V = __vector (F[N]); // @FUTURE@ vector support
540
- * cast (V[R]* ) c.ptr = kernel(* cast (V* )a.ptr, * cast (V* )b.ptr);
541
- }
542
- else
543
- static if (F.sizeof <= double .sizeof && F[N].sizeof >= (double [2 ]).sizeof && x86_64)
544
- {
545
- import mir.utility;
546
- enum S = _avx ? 32u : 16u ;
547
- enum M = min(S, F[N].sizeof) / F.sizeof;
548
- alias V = __vector (F[M]); // @FUTURE@ vector support
549
- enum C = N / M;
550
- foreach (i; Iota! C)
551
- {
552
- auto r = kernel(
553
- * cast (V* )(a.ptr + i * M),
554
- * cast (V* )(b.ptr + i * M),
555
- );
556
- static if (R == 1 )
557
- * cast (V* )(c[0 ].ptr + i * M) = r;
558
- else
559
- foreach (j; Iota! R)
560
- * cast (V* )(c[j].ptr + i * M) = r[j];
561
- }
562
- }
563
- else
564
- {
547
+ // static if (LDC && F.mant_dig != 64 && is(__vector(F[N])))
548
+ // {
549
+ // alias V = __vector(F[N]); // @FUTURE@ vector support
550
+ // *cast(V[R]*) c.ptr = kernel(*cast(V*)a.ptr, *cast(V*)b.ptr);
551
+ // }
552
+ // else
553
+ // static if (F.sizeof <= double.sizeof && F[N].sizeof >= (double[2]).sizeof && x86_64)
554
+ // {
555
+ // import mir.utility;
556
+ // enum S = _avx ? 32u : 16u;
557
+ // enum M = min(S, F[N].sizeof) / F.sizeof;
558
+ // alias V = __vector(F[M]); // @FUTURE@ vector support
559
+ // enum C = N / M;
560
+ // foreach(i; Iota!C)
561
+ // {
562
+ // auto r = kernel(
563
+ // *cast(V*)(a.ptr + i * M),
564
+ // *cast(V*)(b.ptr + i * M),
565
+ // );
566
+ // static if (R == 1)
567
+ // *cast(V*)(c[0].ptr + i * M) = r;
568
+ // else
569
+ // foreach(j; Iota!R)
570
+ // *cast(V*)(c[j].ptr + i * M) = r[j];
571
+ // }
572
+ // }
573
+ // else
574
+ // {
565
575
F[N][R] _c = void ;// Temporary array in case "c" overlaps "a" and/or "b".
566
576
foreach (i; Iota! N)
567
577
{
@@ -573,7 +583,7 @@ auto vectorize(Kernel, F, size_t N, size_t R)(ref Kernel kernel, ref F[N] a, ref
573
583
_c[j][i] = r[j];
574
584
}
575
585
c = _c;
576
- }
586
+ // }
577
587
}
578
588
579
589
// version(unittest)
0 commit comments