Skip to content

Commit cb623de

Browse files
authored
ggml : add missing AVX512 feature checks (#17270)
_mm512_cvtepu8_epi16 requires __AVX512BW__ _mm512_srli_epi16 requires __AVX512BW__ __builtin_ia32_inserti32x8 requires __AVX512DQ__ Signed-off-by: Adrien Gallouët <angt@huggingface.co>
1 parent 7aaeedc commit cb623de

File tree

1 file changed

+6
-6
lines changed

1 file changed

+6
-6
lines changed

ggml/src/ggml-cpu/arch/x86/repack.cpp

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -646,7 +646,7 @@ static void gemm_q4_b32_8x8_q8_0_lut_avx(int n, float * GGML_RESTRICT s, size_t
646646
__m256i requiredOrder = _mm256_set_epi32(3, 2, 1, 0, 7, 6, 5, 4);
647647
int64_t xstart = 0;
648648
int anr = nr - nr%16; // Used to align nr with boundary of 16
649-
#ifdef __AVX512F__
649+
#if defined(__AVX512BW__) && defined(__AVX512DQ__)
650650
int anc = nc - nc%16; // Used to align nc with boundary of 16
651651
// Mask to mask out nibbles from packed bytes expanded to 512 bit length
652652
const __m512i m4bexpanded = _mm512_set1_epi8(0x0F);
@@ -1041,7 +1041,7 @@ static void gemm_q4_b32_8x8_q8_0_lut_avx(int n, float * GGML_RESTRICT s, size_t
10411041
xstart = anc/8;
10421042
y = 0;
10431043
}
1044-
#endif // __AVX512F__
1044+
#endif // __AVX512BW__ && __AVX512DQ__
10451045

10461046
// Take group of four block_q8_0x4 structures at each pass of the loop and perform dot product operation
10471047

@@ -1989,7 +1989,7 @@ void ggml_gemm_q4_K_8x8_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const vo
19891989
__m256i requiredOrder = _mm256_set_epi32(3, 2, 1, 0, 7, 6, 5, 4);
19901990
int64_t xstart = 0;
19911991
int anr = nr - nr % 16;; // Used to align nr with boundary of 16
1992-
#ifdef __AVX512F__
1992+
#if defined(__AVX512BW__) && defined(__AVX512DQ__)
19931993
int anc = nc - nc % 16; // Used to align nc with boundary of 16
19941994
// Mask to mask out nibbles from packed bytes expanded to 512 bit length
19951995
const __m512i m4bexpanded = _mm512_set1_epi8(0x0F);
@@ -2727,7 +2727,7 @@ void ggml_gemm_q4_K_8x8_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const vo
27272727
xstart = anc/8;
27282728
y = 0;
27292729
}
2730-
#endif //AVX512F
2730+
#endif // __AVX512BW__ && __AVX512DQ__
27312731

27322732
// Take group of four block_q8_Kx4 structures at each pass of the loop and perform dot product operation
27332733
for (; y < anr / 4; y += 4) {
@@ -3467,7 +3467,7 @@ void ggml_gemm_q2_K_8x8_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const vo
34673467
__m256i scalesmask2 = _mm256_castsi128_si256(scalesmask2_sse);
34683468
scalesmask2 = _mm256_permute2f128_si256(scalesmask2, scalesmask2, 0);
34693469

3470-
#ifdef __AVX512F__
3470+
#if defined(__AVX512BW__) && defined(__AVX512DQ__)
34713471

34723472
int anc = nc - nc % 16; // Used to align nc with boundary of 16
34733473

@@ -4947,7 +4947,7 @@ void ggml_gemm_q2_K_8x8_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const vo
49474947
y = 0;
49484948
}
49494949

4950-
#endif //AVX512F
4950+
#endif // __AVX512BW__ && __AVX512DQ__
49514951

49524952
// Take group of four block_q8_Kx4 structures at each pass of the loop and perform dot product operation
49534953
for (; y < anr / 4; y += 4) {

0 commit comments

Comments
 (0)