diff --git a/.github/workflows/linux.yml b/.github/workflows/linux.yml index 649998c4..8f43f9a8 100644 --- a/.github/workflows/linux.yml +++ b/.github/workflows/linux.yml @@ -113,14 +113,14 @@ jobs: repository: 'intel/intel-ipsec-mb' - name: Configure CMake - run: cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} + run: cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} && cat /proc/cpuinfo - name: Build run: cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}} --parallel - name: Test working-directory: ${{github.workspace}}/build - run: ctest -j 5 -C ${{env.BUILD_TYPE}} + run: ctest -j 5 -C ${{env.BUILD_TYPE}} --output-on-failure - name: Install working-directory: ${{github.workspace}}/build @@ -153,5 +153,5 @@ jobs: - name: Test working-directory: ${{github.workspace}}/build - run: ctest -j 5 -C ${{env.BUILD_TYPE}} + run: ctest -j 5 -C ${{env.BUILD_TYPE}} --output-on-failure diff --git a/lib/sse_t1/sha1_one_block_sse.asm b/lib/sse_t1/sha1_one_block_sse.asm index 1b20f3d5..7aaacc32 100644 --- a/lib/sse_t1/sha1_one_block_sse.asm +++ b/lib/sse_t1/sha1_one_block_sse.asm @@ -551,6 +551,8 @@ sha1_update_sse: mov r14, ARG3 +align 32 +process_block: ;; set up a-f based on h0-h4 mov a, [SZ*0 + CTX] mov b, [SZ*1 + CTX] @@ -558,15 +560,8 @@ sha1_update_sse: mov d, [SZ*3 + CTX] mov e, [SZ*4 + CTX] -align 32 -process_block: one_block - add INP, 64 - dec r14 - cmp r14, 0 - ja process_block - ;; update result digest h0-h4 add [SZ*0 + CTX], a add [SZ*1 + CTX], b @@ -574,6 +569,10 @@ process_block: add [SZ*3 + CTX], d add [SZ*4 + CTX], e + add INP, 64 + dec r14 + jnz process_block + %ifndef LINUX movdqa xmm8, [rsp + 2 * 16] movdqa xmm7, [rsp + 1 * 16] diff --git a/lib/sse_t2/sha1_ni_one_block_sse.asm b/lib/sse_t2/sha1_ni_one_block_sse.asm index a705b2a0..50ad37d8 100644 --- a/lib/sse_t2/sha1_ni_one_block_sse.asm +++ b/lib/sse_t2/sha1_ni_one_block_sse.asm @@ -322,18 +322,17 @@ sha1_ni_update_sse: movdqa SHUF_MASK, [rel PSHUFFLE_BYTE_FLIP_MASK] movdqa E_MASK, [rel UPPER_WORD_MASK] +align 32 +process_block: ;; Copy digests movdqa [rsp + frame.ABCD_SAVE], ABCD movdqa [rsp + frame.E_SAVE], E0 -align 32 -process_block: one_block_ni add INP, 64 dec ARG3 - cmp ARG3, 0 - ja process_block + jnz process_block ;; write out digests pshufd ABCD, ABCD, 0x1B