|
| 1 | +/* |
| 2 | + * This software is Copyright (c) 2025 magnum, |
| 3 | + * and it is hereby released to the general public under the following terms: |
| 4 | + * Redistribution and use in source and binary forms, with or without |
| 5 | + * modification, are permitted. |
| 6 | + * |
| 7 | + * Copyright 2014-2023 The GmSSL Project. All Rights Reserved. |
| 8 | + * |
| 9 | + * Licensed under the Apache License, Version 2.0 (the License); you may |
| 10 | + * not use this file except in compliance with the License. |
| 11 | + * |
| 12 | + * http://www.apache.org/licenses/LICENSE-2.0 |
| 13 | + */ |
| 14 | +#ifndef OPENCL_SM3_H |
| 15 | +#define OPENCL_SM3_H |
| 16 | + |
| 17 | +#include "opencl_misc.h" |
| 18 | + |
| 19 | +#define SM3_BLOCK_SIZE 64 |
| 20 | +#define SM3_HASH_LENGTH 32 |
| 21 | + |
| 22 | +/* algorithm context */ |
| 23 | +typedef struct sm3_ctx { |
| 24 | + uint32_t hash[8]; /* 256-bit hash */ |
| 25 | + uchar block[SM3_BLOCK_SIZE]; /* 512-bit message block */ |
| 26 | + uint64_t num_blocks; /* processed number of blocks */ |
| 27 | + uint64_t num; /* index in the buffer of the last byte stored */ |
| 28 | +} sm3_ctx; |
| 29 | + |
| 30 | +__constant uint32_t K[64] = { |
| 31 | + 0x79cc4519U, 0xf3988a32U, 0xe7311465U, 0xce6228cbU, |
| 32 | + 0x9cc45197U, 0x3988a32fU, 0x7311465eU, 0xe6228cbcU, |
| 33 | + 0xcc451979U, 0x988a32f3U, 0x311465e7U, 0x6228cbceU, |
| 34 | + 0xc451979cU, 0x88a32f39U, 0x11465e73U, 0x228cbce6U, |
| 35 | + 0x9d8a7a87U, 0x3b14f50fU, 0x7629ea1eU, 0xec53d43cU, |
| 36 | + 0xd8a7a879U, 0xb14f50f3U, 0x629ea1e7U, 0xc53d43ceU, |
| 37 | + 0x8a7a879dU, 0x14f50f3bU, 0x29ea1e76U, 0x53d43cecU, |
| 38 | + 0xa7a879d8U, 0x4f50f3b1U, 0x9ea1e762U, 0x3d43cec5U, |
| 39 | + 0x7a879d8aU, 0xf50f3b14U, 0xea1e7629U, 0xd43cec53U, |
| 40 | + 0xa879d8a7U, 0x50f3b14fU, 0xa1e7629eU, 0x43cec53dU, |
| 41 | + 0x879d8a7aU, 0x0f3b14f5U, 0x1e7629eaU, 0x3cec53d4U, |
| 42 | + 0x79d8a7a8U, 0xf3b14f50U, 0xe7629ea1U, 0xcec53d43U, |
| 43 | + 0x9d8a7a87U, 0x3b14f50fU, 0x7629ea1eU, 0xec53d43cU, |
| 44 | + 0xd8a7a879U, 0xb14f50f3U, 0x629ea1e7U, 0xc53d43ceU, |
| 45 | + 0x8a7a879dU, 0x14f50f3bU, 0x29ea1e76U, 0x53d43cecU, |
| 46 | + 0xa7a879d8U, 0x4f50f3b1U, 0x9ea1e762U, 0x3d43cec5U, |
| 47 | +}; |
| 48 | + |
| 49 | +#define GETU32(x) \ |
| 50 | + ((uint32_t)(x)[0] << 24 | \ |
| 51 | + (uint32_t)(x)[1] << 16 | \ |
| 52 | + (uint32_t)(x)[2] << 8 | \ |
| 53 | + (uint32_t)(x)[3]) |
| 54 | + |
| 55 | +#define PUTU32(x,y) \ |
| 56 | + ((x)[0] = (uchar)((y) >> 24), \ |
| 57 | + (x)[1] = (uchar)((y) >> 16), \ |
| 58 | + (x)[2] = (uchar)((y) >> 8), \ |
| 59 | + (x)[3] = (uchar)(y)) |
| 60 | + |
| 61 | +#define P0(x) ((x) ^ rol32((x), 9) ^ rol32((x),17)) |
| 62 | +#define P1(x) ((x) ^ rol32((x),15) ^ rol32((x),23)) |
| 63 | + |
| 64 | +#define SM3_LUT3 HAVE_LUT3 |
| 65 | + |
| 66 | +#if SM3_LUT3 |
| 67 | +#define FF00(x, y, z) lut3(x, y, z, 0x96) |
| 68 | +#define FF16(x, y, z) lut3(x, y, z, 0xE8) |
| 69 | +#define GG00(x, y, z) lut3(x, y, z, 0x96) |
| 70 | +#define GG16(x, y, z) lut3(x, y, z, 0xCA) |
| 71 | +#else |
| 72 | +#define FF00(x,y,z) ((x) ^ (y) ^ (z)) |
| 73 | +#define FF16(x,y,z) (((x) & (y)) | ((x) & (z)) | ((y) & (z))) |
| 74 | +#define GG00(x,y,z) ((x) ^ (y) ^ (z)) |
| 75 | +#define GG16(x,y,z) ((((y) ^ (z)) & (x)) ^ (z)) |
| 76 | +#endif |
| 77 | + |
| 78 | +#define rol32(a, b) rotate((a), (uint)(b)) |
| 79 | + |
| 80 | +#define SM3_ROUND_0(j,A,B,C,D,E,F,G,H) \ |
| 81 | + SS0 = rol32(A, 12); \ |
| 82 | + SS1 = rol32(SS0 + E + K[j], 7); \ |
| 83 | + SS2 = SS1 ^ SS0; \ |
| 84 | + D += FF00(A, B, C) + SS2 + (W[j] ^ W[j + 4]); \ |
| 85 | + SS1 += GG00(E, F, G) + H + W[j]; \ |
| 86 | + B = rol32(B, 9); \ |
| 87 | + H = P0(SS1); \ |
| 88 | + F = rol32(F, 19); \ |
| 89 | + W[j+16] = P1(W[j] ^ W[j+7] ^ rol32(W[j+13], 15)) ^ rol32(W[j+3], 7) ^ W[j+10]; |
| 90 | + |
| 91 | +#define SM3_ROUND_1(j,A,B,C,D,E,F,G,H) \ |
| 92 | + SS0 = rol32(A, 12); \ |
| 93 | + SS1 = rol32(SS0 + E + K[j], 7); \ |
| 94 | + SS2 = SS1 ^ SS0; \ |
| 95 | + D += FF16(A, B, C) + SS2 + (W[j] ^ W[j + 4]); \ |
| 96 | + SS1 += GG16(E, F, G) + H + W[j]; \ |
| 97 | + B = rol32(B, 9); \ |
| 98 | + H = P0(SS1); \ |
| 99 | + F = rol32(F, 19); \ |
| 100 | + W[j+16] = P1(W[j] ^ W[j+7] ^ rol32(W[j+13], 15)) ^ rol32(W[j+3], 7) ^ W[j+10]; |
| 101 | + |
| 102 | +#define SM3_ROUND_2(j,A,B,C,D,E,F,G,H) \ |
| 103 | + SS0 = rol32(A, 12); \ |
| 104 | + SS1 = rol32(SS0 + E + K[j], 7); \ |
| 105 | + SS2 = SS1 ^ SS0; \ |
| 106 | + D += FF16(A, B, C) + SS2 + (W[j] ^ W[j + 4]); \ |
| 107 | + SS1 += GG16(E, F, G) + H + W[j]; \ |
| 108 | + B = rol32(B, 9); \ |
| 109 | + H = P0(SS1); \ |
| 110 | + F = rol32(F, 19); |
| 111 | + |
| 112 | + |
| 113 | +INLINE void sm3_compress_blocks(uint32_t *hash, const uchar *data, size_t blocks) |
| 114 | +{ |
| 115 | + uint32_t A, B, C, D, E, F, G, H; |
| 116 | + uint32_t W[68]; |
| 117 | + uint32_t SS0, SS1, SS2; |
| 118 | + int j; |
| 119 | + |
| 120 | + while (blocks--) { |
| 121 | + |
| 122 | + A = hash[0]; |
| 123 | + B = hash[1]; |
| 124 | + C = hash[2]; |
| 125 | + D = hash[3]; |
| 126 | + E = hash[4]; |
| 127 | + F = hash[5]; |
| 128 | + G = hash[6]; |
| 129 | + H = hash[7]; |
| 130 | + |
| 131 | + for (j = 0; j < 16; j++) { |
| 132 | + W[j] = GETU32(data + j * 4); |
| 133 | + } |
| 134 | + |
| 135 | + SM3_ROUND_0(0, A, B, C, D, E, F, G, H); |
| 136 | + SM3_ROUND_0(1, D, A, B, C, H, E, F, G); |
| 137 | + SM3_ROUND_0(2, C, D, A, B, G, H, E, F); |
| 138 | + SM3_ROUND_0(3, B, C, D, A, F, G, H, E); |
| 139 | + SM3_ROUND_0(4, A, B, C, D, E, F, G, H); |
| 140 | + SM3_ROUND_0(5, D, A, B, C, H, E, F, G); |
| 141 | + SM3_ROUND_0(6, C, D, A, B, G, H, E, F); |
| 142 | + SM3_ROUND_0(7, B, C, D, A, F, G, H, E); |
| 143 | + SM3_ROUND_0(8, A, B, C, D, E, F, G, H); |
| 144 | + SM3_ROUND_0(9, D, A, B, C, H, E, F, G); |
| 145 | + SM3_ROUND_0(10, C, D, A, B, G, H, E, F); |
| 146 | + SM3_ROUND_0(11, B, C, D, A, F, G, H, E); |
| 147 | + SM3_ROUND_0(12, A, B, C, D, E, F, G, H); |
| 148 | + SM3_ROUND_0(13, D, A, B, C, H, E, F, G); |
| 149 | + SM3_ROUND_0(14, C, D, A, B, G, H, E, F); |
| 150 | + SM3_ROUND_0(15, B, C, D, A, F, G, H, E); |
| 151 | + SM3_ROUND_1(16, A, B, C, D, E, F, G, H); |
| 152 | + SM3_ROUND_1(17, D, A, B, C, H, E, F, G); |
| 153 | + SM3_ROUND_1(18, C, D, A, B, G, H, E, F); |
| 154 | + SM3_ROUND_1(19, B, C, D, A, F, G, H, E); |
| 155 | + SM3_ROUND_1(20, A, B, C, D, E, F, G, H); |
| 156 | + SM3_ROUND_1(21, D, A, B, C, H, E, F, G); |
| 157 | + SM3_ROUND_1(22, C, D, A, B, G, H, E, F); |
| 158 | + SM3_ROUND_1(23, B, C, D, A, F, G, H, E); |
| 159 | + SM3_ROUND_1(24, A, B, C, D, E, F, G, H); |
| 160 | + SM3_ROUND_1(25, D, A, B, C, H, E, F, G); |
| 161 | + SM3_ROUND_1(26, C, D, A, B, G, H, E, F); |
| 162 | + SM3_ROUND_1(27, B, C, D, A, F, G, H, E); |
| 163 | + SM3_ROUND_1(28, A, B, C, D, E, F, G, H); |
| 164 | + SM3_ROUND_1(29, D, A, B, C, H, E, F, G); |
| 165 | + SM3_ROUND_1(30, C, D, A, B, G, H, E, F); |
| 166 | + SM3_ROUND_1(31, B, C, D, A, F, G, H, E); |
| 167 | + SM3_ROUND_1(32, A, B, C, D, E, F, G, H); |
| 168 | + SM3_ROUND_1(33, D, A, B, C, H, E, F, G); |
| 169 | + SM3_ROUND_1(34, C, D, A, B, G, H, E, F); |
| 170 | + SM3_ROUND_1(35, B, C, D, A, F, G, H, E); |
| 171 | + SM3_ROUND_1(36, A, B, C, D, E, F, G, H); |
| 172 | + SM3_ROUND_1(37, D, A, B, C, H, E, F, G); |
| 173 | + SM3_ROUND_1(38, C, D, A, B, G, H, E, F); |
| 174 | + SM3_ROUND_1(39, B, C, D, A, F, G, H, E); |
| 175 | + SM3_ROUND_1(40, A, B, C, D, E, F, G, H); |
| 176 | + SM3_ROUND_1(41, D, A, B, C, H, E, F, G); |
| 177 | + SM3_ROUND_1(42, C, D, A, B, G, H, E, F); |
| 178 | + SM3_ROUND_1(43, B, C, D, A, F, G, H, E); |
| 179 | + SM3_ROUND_1(44, A, B, C, D, E, F, G, H); |
| 180 | + SM3_ROUND_1(45, D, A, B, C, H, E, F, G); |
| 181 | + SM3_ROUND_1(46, C, D, A, B, G, H, E, F); |
| 182 | + SM3_ROUND_1(47, B, C, D, A, F, G, H, E); |
| 183 | + SM3_ROUND_1(48, A, B, C, D, E, F, G, H); |
| 184 | + SM3_ROUND_1(49, D, A, B, C, H, E, F, G); |
| 185 | + SM3_ROUND_1(50, C, D, A, B, G, H, E, F); |
| 186 | + SM3_ROUND_1(51, B, C, D, A, F, G, H, E); |
| 187 | + SM3_ROUND_2(52, A, B, C, D, E, F, G, H); |
| 188 | + SM3_ROUND_2(53, D, A, B, C, H, E, F, G); |
| 189 | + SM3_ROUND_2(54, C, D, A, B, G, H, E, F); |
| 190 | + SM3_ROUND_2(55, B, C, D, A, F, G, H, E); |
| 191 | + SM3_ROUND_2(56, A, B, C, D, E, F, G, H); |
| 192 | + SM3_ROUND_2(57, D, A, B, C, H, E, F, G); |
| 193 | + SM3_ROUND_2(58, C, D, A, B, G, H, E, F); |
| 194 | + SM3_ROUND_2(59, B, C, D, A, F, G, H, E); |
| 195 | + SM3_ROUND_2(60, A, B, C, D, E, F, G, H); |
| 196 | + SM3_ROUND_2(61, D, A, B, C, H, E, F, G); |
| 197 | + SM3_ROUND_2(62, C, D, A, B, G, H, E, F); |
| 198 | + SM3_ROUND_2(63, B, C, D, A, F, G, H, E); |
| 199 | + |
| 200 | + hash[0] ^= A; |
| 201 | + hash[1] ^= B; |
| 202 | + hash[2] ^= C; |
| 203 | + hash[3] ^= D; |
| 204 | + hash[4] ^= E; |
| 205 | + hash[5] ^= F; |
| 206 | + hash[6] ^= G; |
| 207 | + hash[7] ^= H; |
| 208 | + |
| 209 | + data += SM3_BLOCK_SIZE; |
| 210 | + } |
| 211 | +} |
| 212 | + |
| 213 | +INLINE void sm3_init(sm3_ctx *ctx) |
| 214 | +{ |
| 215 | + memset_p(ctx, 0, sizeof(sm3_ctx)); |
| 216 | + /* Set IV */ |
| 217 | + ctx->hash[0] = 0x7380166fU; |
| 218 | + ctx->hash[1] = 0x4914b2b9U; |
| 219 | + ctx->hash[2] = 0x172442d7U; |
| 220 | + ctx->hash[3] = 0xda8a0600U; |
| 221 | + ctx->hash[4] = 0xa96f30bcU; |
| 222 | + ctx->hash[5] = 0x163138aaU; |
| 223 | + ctx->hash[6] = 0xe38dee4dU; |
| 224 | + ctx->hash[7] = 0xb0fb0e4eU; |
| 225 | +} |
| 226 | + |
| 227 | +INLINE void sm3_update(sm3_ctx *ctx, const void *_data, size_t size) |
| 228 | +{ |
| 229 | + const uchar *data = _data; |
| 230 | + size_t blocks; |
| 231 | + |
| 232 | + ctx->num &= 0x3f; |
| 233 | + if (ctx->num) { |
| 234 | + size_t left = SM3_BLOCK_SIZE - ctx->num; |
| 235 | + |
| 236 | + if (size < left) { |
| 237 | + memcpy_pp(ctx->block + ctx->num, data, size); |
| 238 | + ctx->num += size; |
| 239 | + return; |
| 240 | + } else { |
| 241 | + memcpy_pp(ctx->block + ctx->num, data, left); |
| 242 | + sm3_compress_blocks(ctx->hash, ctx->block, 1); |
| 243 | + ctx->num_blocks++; |
| 244 | + data += left; |
| 245 | + size -= left; |
| 246 | + } |
| 247 | + } |
| 248 | + |
| 249 | + blocks = size / SM3_BLOCK_SIZE; |
| 250 | + if (blocks) { |
| 251 | + sm3_compress_blocks(ctx->hash, data, blocks); |
| 252 | + ctx->num_blocks += blocks; |
| 253 | + data += SM3_BLOCK_SIZE * blocks; |
| 254 | + size -= SM3_BLOCK_SIZE * blocks; |
| 255 | + } |
| 256 | + |
| 257 | + ctx->num = size; |
| 258 | + if (size) { |
| 259 | + memcpy_pp(ctx->block, data, size); |
| 260 | + } |
| 261 | +} |
| 262 | + |
| 263 | +INLINE void sm3_final(sm3_ctx *ctx, void *_result) |
| 264 | +{ |
| 265 | + uchar *result = _result; |
| 266 | + int i; |
| 267 | + |
| 268 | + ctx->num &= 0x3f; |
| 269 | + ctx->block[ctx->num] = 0x80; |
| 270 | + |
| 271 | + if (ctx->num <= SM3_BLOCK_SIZE - 9) { |
| 272 | + memset_p(ctx->block + ctx->num + 1, 0, SM3_BLOCK_SIZE - ctx->num - 9); |
| 273 | + } else { |
| 274 | + memset_p(ctx->block + ctx->num + 1, 0, SM3_BLOCK_SIZE - ctx->num - 1); |
| 275 | + sm3_compress_blocks(ctx->hash, ctx->block, 1); |
| 276 | + memset_p(ctx->block, 0, SM3_BLOCK_SIZE - 8); |
| 277 | + } |
| 278 | + |
| 279 | + PUTU32(ctx->block + 56, ctx->num_blocks >> 23); |
| 280 | + PUTU32(ctx->block + 60, (ctx->num_blocks << 9) + (ctx->num << 3)); |
| 281 | + sm3_compress_blocks(ctx->hash, ctx->block, 1); |
| 282 | + |
| 283 | + for (i = 0; i < 8; i++) { |
| 284 | + PUTU32(result + i * 4, ctx->hash[i]); |
| 285 | + } |
| 286 | +} |
| 287 | + |
| 288 | +#endif /* OPENCL_SM3_H */ |
0 commit comments