From 27447f1b5f6d432384e2f076a7b4aa5fc63e2bdf Mon Sep 17 00:00:00 2001 From: David Turner Date: Sun, 16 Mar 2014 22:33:17 -0400 Subject: [PATCH 1/8] Document RUN_SETUP_GENTLY and improve RUN_SETUP docs Signed-off-by: David Turner --- Documentation/technical/api-builtin.txt | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/Documentation/technical/api-builtin.txt b/Documentation/technical/api-builtin.txt index e3d6e7a79a6c21..b250c1a267d958 100644 --- a/Documentation/technical/api-builtin.txt +++ b/Documentation/technical/api-builtin.txt @@ -23,10 +23,15 @@ where options is the bitwise-or of: `RUN_SETUP`:: - Make sure there is a Git directory to work on, and if there is a - work tree, chdir to the top of it if the command was invoked - in a subdirectory. If there is no work tree, no chdir() is - done. + If there is not a Git directory to work on, abort. If there + is a work tree, chdir to the top of it if the command was + invoked in a subdirectory. If there is no work tree, no + chdir() is done. + +`RUN_SETUP_GENTLY`:: + + If there is a Git directory, chdir as per RUN_SETUP, otherwise, + don't chdir anywhere. `USE_PAGER`:: From 59fa6612a9b27292b6bef55be4c47c709673043d Mon Sep 17 00:00:00 2001 From: David Turner Date: Sun, 4 May 2014 20:11:23 -0400 Subject: [PATCH 2/8] Break out code to write integrity-checked file The index is integrity-checked with SHA1. Break the code to write the index while computing the SHA1 out into a separate file, so that it's easier for other code to use. Signed-off-by: David Turner --- Makefile | 2 ++ block-sha1/sha1.h | 5 +++ hash-io.c | 79 +++++++++++++++++++++++++++++++++++++++++++ hash-io.h | 37 ++++++++++++++++++++ read-cache.c | 86 +++++++++-------------------------------------- 5 files changed, 138 insertions(+), 71 deletions(-) create mode 100644 hash-io.c create mode 100644 hash-io.h diff --git a/Makefile b/Makefile index a53f3a8326c2e6..e4f689cffced7c 100644 --- a/Makefile +++ b/Makefile @@ -673,6 +673,7 @@ LIB_H += git-compat-util.h LIB_H += gpg-interface.h LIB_H += graph.h LIB_H += grep.h +LIB_H += hash-io.h LIB_H += hashmap.h LIB_H += help.h LIB_H += http.h @@ -810,6 +811,7 @@ LIB_OBJS += gettext.o LIB_OBJS += gpg-interface.o LIB_OBJS += graph.o LIB_OBJS += grep.o +LIB_OBJS += hash-io.o LIB_OBJS += hashmap.o LIB_OBJS += help.o LIB_OBJS += hex.o diff --git a/block-sha1/sha1.h b/block-sha1/sha1.h index b864df623e3b89..9235c26bf97200 100644 --- a/block-sha1/sha1.h +++ b/block-sha1/sha1.h @@ -1,3 +1,6 @@ +#ifndef BLOCK_SHA1_H +#define BLOCK_SHA1_H + /* * SHA1 routine optimized to do word accesses rather than byte accesses, * and to avoid unnecessary copies into the context array. @@ -20,3 +23,5 @@ void blk_SHA1_Final(unsigned char hashout[20], blk_SHA_CTX *ctx); #define git_SHA1_Init blk_SHA1_Init #define git_SHA1_Update blk_SHA1_Update #define git_SHA1_Final blk_SHA1_Final + +#endif diff --git a/hash-io.c b/hash-io.c new file mode 100644 index 00000000000000..40f27d33ce9e31 --- /dev/null +++ b/hash-io.c @@ -0,0 +1,79 @@ +#include +#include +#include + +#include "git-compat-util.h" +#include "hash-io.h" + +extern ssize_t write_in_full(int fd, const void *buf, size_t count); + +static int write_buf_with_hash(struct hash_context *ctx, int fd) +{ + unsigned int buffered = ctx->write_buffer_len; + git_SHA1_Update(ctx->sc, ctx->write_buffer, buffered); + + if (write_in_full(fd, ctx->write_buffer, buffered) != buffered) + return -1; + ctx->write_buffer_len = 0; + return 0; +} + +int write_with_hash(struct hash_context *ctx, int fd, const void *data, unsigned int len) +{ + while (len) { + unsigned int buffered = ctx->write_buffer_len; + unsigned int partial = HASH_IO_WRITE_BUFFER_SIZE - buffered; + if (partial > len) + partial = len; + memcpy(ctx->write_buffer + buffered, data, partial); + buffered += partial; + if (buffered == HASH_IO_WRITE_BUFFER_SIZE) { + ctx->write_buffer_len = buffered; + if (write_buf_with_hash(ctx, fd)) + return -1; + buffered = 0; + } + ctx->write_buffer_len = buffered; + len -= partial; + data = (char *) data + partial; + } + return 0; +} + +static int write_with_sha1_flush(struct hash_context *ctx, int fd) +{ + unsigned int left = ctx->write_buffer_len; + + if (left) { + git_SHA1_Update(ctx->sc, ctx->write_buffer, left); + } + + /* Flush first if not enough space for SHA1 signature */ + if (left + 20 > HASH_IO_WRITE_BUFFER_SIZE) { + if (write_in_full(fd, ctx->write_buffer, left) != left) + return -1; + left = 0; + } + + /* Append the SHA1 signature at the end */ + git_SHA1_Final(ctx->write_buffer + left, ctx->sc); + left += 20; + return (write_in_full(fd, ctx->write_buffer, left) != left) ? -1 : 0; +} + + +int write_with_hash_flush(struct hash_context *ctx, int fd) { + return write_with_sha1_flush(ctx, fd); +} + +void hash_context_init(struct hash_context *ctx) +{ + ctx->write_buffer_len = 0; + ctx->sc = xmalloc(sizeof *ctx->sc); + git_SHA1_Init(ctx->sc); +} + +void hash_context_release(struct hash_context *ctx) +{ + free(ctx->sc); +} diff --git a/hash-io.h b/hash-io.h new file mode 100644 index 00000000000000..d44a725eb108dd --- /dev/null +++ b/hash-io.h @@ -0,0 +1,37 @@ +#ifndef HASH_IO_H +#define HASH_IO_H + +#include "vmac.h" + +#include SHA1_HEADER +#ifndef git_SHA_CTX +#define git_SHA_CTX SHA_CTX +#define git_SHA1_Init SHA1_Init +#define git_SHA1_Update SHA1_Update +#define git_SHA1_Final SHA1_Final +#endif + +enum hash_io_type { + HASH_IO_VMAC, + HASH_IO_SHA1 +}; + + +//must be a multiple of VMAC_NHBYTES +#define HASH_IO_WRITE_BUFFER_SIZE 8192 + +struct hash_context { + git_SHA_CTX *sc; + unsigned long write_buffer_len; + unsigned char write_buffer[HASH_IO_WRITE_BUFFER_SIZE]; +}; + +const unsigned char *VMAC_KEY; + +void hash_context_init(struct hash_context *ctx); +void hash_context_release(struct hash_context *ctx); + +int write_with_hash(struct hash_context *context, int fd, const void *data, unsigned int len); +int write_with_hash_flush(struct hash_context *context, int fd); + +#endif diff --git a/read-cache.c b/read-cache.c index ba13353b377d4f..516b9f377c41df 100644 --- a/read-cache.c +++ b/read-cache.c @@ -14,6 +14,7 @@ #include "resolve-undo.h" #include "strbuf.h" #include "varint.h" +#include "hash-io.h" static struct cache_entry *refresh_cache_entry(struct cache_entry *ce, unsigned int options); @@ -1565,73 +1566,13 @@ int unmerged_index(const struct index_state *istate) return 0; } -#define WRITE_BUFFER_SIZE 8192 -static unsigned char write_buffer[WRITE_BUFFER_SIZE]; -static unsigned long write_buffer_len; - -static int ce_write_flush(git_SHA_CTX *context, int fd) -{ - unsigned int buffered = write_buffer_len; - if (buffered) { - git_SHA1_Update(context, write_buffer, buffered); - if (write_in_full(fd, write_buffer, buffered) != buffered) - return -1; - write_buffer_len = 0; - } - return 0; -} - -static int ce_write(git_SHA_CTX *context, int fd, void *data, unsigned int len) -{ - while (len) { - unsigned int buffered = write_buffer_len; - unsigned int partial = WRITE_BUFFER_SIZE - buffered; - if (partial > len) - partial = len; - memcpy(write_buffer + buffered, data, partial); - buffered += partial; - if (buffered == WRITE_BUFFER_SIZE) { - write_buffer_len = buffered; - if (ce_write_flush(context, fd)) - return -1; - buffered = 0; - } - write_buffer_len = buffered; - len -= partial; - data = (char *) data + partial; - } - return 0; -} - -static int write_index_ext_header(git_SHA_CTX *context, int fd, +static int write_index_ext_header(struct hash_context *context, int fd, unsigned int ext, unsigned int sz) { ext = htonl(ext); sz = htonl(sz); - return ((ce_write(context, fd, &ext, 4) < 0) || - (ce_write(context, fd, &sz, 4) < 0)) ? -1 : 0; -} - -static int ce_flush(git_SHA_CTX *context, int fd) -{ - unsigned int left = write_buffer_len; - - if (left) { - write_buffer_len = 0; - git_SHA1_Update(context, write_buffer, left); - } - - /* Flush first if not enough space for SHA1 signature */ - if (left + 20 > WRITE_BUFFER_SIZE) { - if (write_in_full(fd, write_buffer, left) != left) - return -1; - left = 0; - } - - /* Append the SHA1 signature at the end */ - git_SHA1_Final(write_buffer + left, context); - left += 20; - return (write_in_full(fd, write_buffer, left) != left) ? -1 : 0; + return ((write_with_hash(context, fd, &ext, 4) < 0) || + (write_with_hash(context, fd, &sz, 4) < 0)) ? -1 : 0; } static void ce_smudge_racily_clean_entry(struct cache_entry *ce) @@ -1715,7 +1656,7 @@ static char *copy_cache_entry_to_ondisk(struct ondisk_cache_entry *ondisk, } } -static int ce_write_entry(git_SHA_CTX *c, int fd, struct cache_entry *ce, +static int ce_write_entry(struct hash_context *c, int fd, struct cache_entry *ce, struct strbuf *previous_name) { int size; @@ -1755,7 +1696,7 @@ static int ce_write_entry(git_SHA_CTX *c, int fd, struct cache_entry *ce, ce->name + common, ce_namelen(ce) - common); } - result = ce_write(c, fd, ondisk, size); + result = write_with_hash(c, fd, ondisk, size); free(ondisk); return result; } @@ -1787,7 +1728,7 @@ void update_index_if_able(struct index_state *istate, struct lock_file *lockfile int write_index(struct index_state *istate, int newfd) { - git_SHA_CTX c; + struct hash_context c; struct cache_header hdr; int i, err, removed, extended, hdr_version; struct cache_entry **cache = istate->cache; @@ -1820,8 +1761,9 @@ int write_index(struct index_state *istate, int newfd) hdr.hdr_version = htonl(hdr_version); hdr.hdr_entries = htonl(entries - removed); - git_SHA1_Init(&c); - if (ce_write(&c, newfd, &hdr, sizeof(hdr)) < 0) + hash_context_init(&c); + + if (write_with_hash(&c, newfd, &hdr, sizeof(hdr)) < 0) return -1; previous_name = (hdr_version == 4) ? &previous_name_buf : NULL; @@ -1853,7 +1795,7 @@ int write_index(struct index_state *istate, int newfd) cache_tree_write(&sb, istate->cache_tree); err = write_index_ext_header(&c, newfd, CACHE_EXT_TREE, sb.len) < 0 - || ce_write(&c, newfd, sb.buf, sb.len) < 0; + || write_with_hash(&c, newfd, sb.buf, sb.len) < 0; strbuf_release(&sb); if (err) return -1; @@ -1864,16 +1806,18 @@ int write_index(struct index_state *istate, int newfd) resolve_undo_write(&sb, istate->resolve_undo); err = write_index_ext_header(&c, newfd, CACHE_EXT_RESOLVE_UNDO, sb.len) < 0 - || ce_write(&c, newfd, sb.buf, sb.len) < 0; + || write_with_hash(&c, newfd, sb.buf, sb.len) < 0; strbuf_release(&sb); if (err) return -1; } - if (ce_flush(&c, newfd) || fstat(newfd, &st)) + if (write_with_hash_flush(&c, newfd) || fstat(newfd, &st)) return -1; istate->timestamp.sec = (unsigned int)st.st_mtime; istate->timestamp.nsec = ST_MTIME_NSEC(st); + hash_context_release(&c); + return 0; } From 6a1fcdd2093cc788b92fbd76aca249a7f697f2b1 Mon Sep 17 00:00:00 2001 From: David Turner Date: Mon, 17 Mar 2014 11:56:58 -0400 Subject: [PATCH 3/8] Optionally use VMAC for index integrity checking Using SHA-1 to check the index integrity takes up about 5% of the runtime of git status (depending on the repository). We can do better by using VMAC instead. VMAC isn't really intended for integrity checking, but it has the correct properties. It is also approximately 5 times faster than SHA-1 on my machine. Signed-off-by: David Turner --- Documentation/technical/index-format.txt | 6 +- Makefile | 4 + cache.h | 4 +- hash-io.c | 118 +- hash-io.h | 15 +- read-cache.c | 30 +- rijndael-alg-fst.c | 1400 ++++++++++++++++++++++ rijndael-alg-fst.h | 47 + vmac.c | 1215 +++++++++++++++++++ vmac.h | 176 +++ 10 files changed, 2991 insertions(+), 24 deletions(-) create mode 100644 rijndael-alg-fst.c create mode 100644 rijndael-alg-fst.h create mode 100644 vmac.c create mode 100644 vmac.h diff --git a/Documentation/technical/index-format.txt b/Documentation/technical/index-format.txt index f352a9b22e3138..491a637b8a515a 100644 --- a/Documentation/technical/index-format.txt +++ b/Documentation/technical/index-format.txt @@ -32,8 +32,10 @@ Git index format Extension data - - 160-bit SHA-1 over the content of the index file before this - checksum. + - For versions before 5, a 160-bit SHA-1 over the content of the + index file before this checksum. For version 5, a 128-bit VMAC + hash (keyed with "abcdefghijklmnop") over same, followed by 4 + bytes of zeros. == Index entry diff --git a/Makefile b/Makefile index e4f689cffced7c..4cd642d354f19e 100644 --- a/Makefile +++ b/Makefile @@ -712,6 +712,7 @@ LIB_H += remote.h LIB_H += rerere.h LIB_H += resolve-undo.h LIB_H += revision.h +LIB_H += rijndael-alg-fst.h LIB_H += run-command.h LIB_H += send-pack.h LIB_H += sequencer.h @@ -742,6 +743,7 @@ LIB_H += vcs-svn/repo_tree.h LIB_H += vcs-svn/sliding_window.h LIB_H += vcs-svn/svndiff.h LIB_H += vcs-svn/svndump.h +LIB_H += vmac.h LIB_H += walker.h LIB_H += wildmatch.h LIB_H += wt-status.h @@ -865,6 +867,7 @@ LIB_OBJS += replace_object.o LIB_OBJS += rerere.o LIB_OBJS += resolve-undo.o LIB_OBJS += revision.o +LIB_OBJS += rijndael-alg-fst.o LIB_OBJS += run-command.o LIB_OBJS += send-pack.o LIB_OBJS += sequencer.o @@ -898,6 +901,7 @@ LIB_OBJS += utf8.o LIB_OBJS += varint.o LIB_OBJS += version.o LIB_OBJS += versioncmp.o +LIB_OBJS += vmac.o LIB_OBJS += walker.o LIB_OBJS += wildmatch.o LIB_OBJS += wrapper.o diff --git a/cache.h b/cache.h index 107ac61b68f15b..c5917f478ee08c 100644 --- a/cache.h +++ b/cache.h @@ -107,8 +107,8 @@ struct cache_header { }; #define INDEX_FORMAT_LB 2 -#define INDEX_FORMAT_UB 4 - +#define INDEX_FORMAT_UB 5 +#define INDEX_FORMAT_VMAC_LB 5 /* * The "cache_time" is just the low 32 bits of the * time. It doesn't matter if it overflows - we only diff --git a/hash-io.c b/hash-io.c index 40f27d33ce9e31..907a3608aedf50 100644 --- a/hash-io.c +++ b/hash-io.c @@ -4,13 +4,25 @@ #include "git-compat-util.h" #include "hash-io.h" +#include "vmac.h" + +const unsigned char *VMAC_KEY = (const unsigned char*) "abcdefghijklmnop"; extern ssize_t write_in_full(int fd, const void *buf, size_t count); static int write_buf_with_hash(struct hash_context *ctx, int fd) { unsigned int buffered = ctx->write_buffer_len; - git_SHA1_Update(ctx->sc, ctx->write_buffer, buffered); + switch (ctx->ty) { + case HASH_IO_VMAC: + vhash_update(ctx->write_buffer, buffered, ctx->c.vc); + break; + case HASH_IO_SHA1: + git_SHA1_Update(ctx->c.sc, ctx->write_buffer, buffered); + break; + default: + error("Bad hash type"); + } if (write_in_full(fd, ctx->write_buffer, buffered) != buffered) return -1; @@ -40,12 +52,52 @@ int write_with_hash(struct hash_context *ctx, int fd, const void *data, unsigned return 0; } +void vmac_final(unsigned char *buf, vmac_ctx_t *ctx) +{ + uint64_t tagl; + uint64_t tagh = htonll(vhash(NULL, 0, &tagl, ctx)); + tagl = htonll(tagl); + + memcpy(buf, &tagl, 8); + memcpy(buf + 8, &tagh, 8); + memset(buf + 16, 0, 4); +} + +static int write_with_vmac_flush(struct hash_context *ctx, int fd) +{ + unsigned int left = ctx->write_buffer_len; + + if (left) { + int unaligned = left % VMAC_NHBYTES; + int bytes_to_hash = left; + ctx->write_buffer_len = 0; + if (unaligned) { + int zeros = VMAC_NHBYTES - unaligned; + memset(ctx->write_buffer + left, 0, zeros); + bytes_to_hash += zeros; + } + vhash_update(ctx->write_buffer, bytes_to_hash, ctx->c.vc); + } + + /* Flush first if not enough space for SHA1 signature */ + if (left + 20 > HASH_IO_WRITE_BUFFER_SIZE) { + if (write_in_full(fd, ctx->write_buffer, left) != left) + return -1; + left = 0; + } + + /* Append the VMAC signature at the end */ + vmac_final(ctx->write_buffer + left, ctx->c.vc); + left += 20; + return (write_in_full(fd, ctx->write_buffer, left) != left) ? -1 : 0; +} + static int write_with_sha1_flush(struct hash_context *ctx, int fd) { unsigned int left = ctx->write_buffer_len; if (left) { - git_SHA1_Update(ctx->sc, ctx->write_buffer, left); + git_SHA1_Update(ctx->c.sc, ctx->write_buffer, left); } /* Flush first if not enough space for SHA1 signature */ @@ -56,24 +108,74 @@ static int write_with_sha1_flush(struct hash_context *ctx, int fd) } /* Append the SHA1 signature at the end */ - git_SHA1_Final(ctx->write_buffer + left, ctx->sc); + git_SHA1_Final(ctx->write_buffer + left, ctx->c.sc); left += 20; return (write_in_full(fd, ctx->write_buffer, left) != left) ? -1 : 0; } int write_with_hash_flush(struct hash_context *ctx, int fd) { - return write_with_sha1_flush(ctx, fd); + switch (ctx->ty) { + case HASH_IO_VMAC: + return write_with_vmac_flush(ctx, fd); + break; + case HASH_IO_SHA1: + return write_with_sha1_flush(ctx, fd); + break; + default: + error("Bad hash type"); + return -1; + } + } -void hash_context_init(struct hash_context *ctx) +static unsigned char extra[VMAC_NHBYTES]; + +void vmac_update_unaligned(const void *buf, unsigned int len, vmac_ctx_t *ctx) { + size_t first_len = len; + size_t extra_bytes = first_len % VMAC_NHBYTES; + + if (first_len - extra_bytes) + vhash_update(buf, first_len - extra_bytes, ctx); + + if (extra_bytes) { + first_len -= extra_bytes; + memcpy(extra, (const unsigned char *) buf + first_len, + extra_bytes); + memset(extra + extra_bytes, 0, VMAC_NHBYTES - extra_bytes); + vhash_update(extra, VMAC_NHBYTES, ctx); + } +} + +void hash_context_init(struct hash_context *ctx, enum hash_io_type type) +{ + ctx->ty = type; ctx->write_buffer_len = 0; - ctx->sc = xmalloc(sizeof *ctx->sc); - git_SHA1_Init(ctx->sc); + switch (type) { + case HASH_IO_VMAC: + ctx->c.vc = xmalloc(sizeof *ctx->c.vc); + vmac_set_key(VMAC_KEY, ctx->c.vc); + break; + case HASH_IO_SHA1: + ctx->c.sc = xmalloc(sizeof *ctx->c.sc); + git_SHA1_Init(ctx->c.sc); + break; + default: + error("Bad hash type"); + } } void hash_context_release(struct hash_context *ctx) { - free(ctx->sc); + switch (ctx->ty) { + case HASH_IO_VMAC: + free(ctx->c.vc); + break; + case HASH_IO_SHA1: + free(ctx->c.sc); + break; + default: + error("Bad hash type"); + } } diff --git a/hash-io.h b/hash-io.h index d44a725eb108dd..fe746f7ef1660a 100644 --- a/hash-io.h +++ b/hash-io.h @@ -21,17 +21,28 @@ enum hash_io_type { #define HASH_IO_WRITE_BUFFER_SIZE 8192 struct hash_context { - git_SHA_CTX *sc; + enum hash_io_type ty; + union { + vmac_ctx_t *vc; + git_SHA_CTX *sc; + } c; unsigned long write_buffer_len; unsigned char write_buffer[HASH_IO_WRITE_BUFFER_SIZE]; }; const unsigned char *VMAC_KEY; -void hash_context_init(struct hash_context *ctx); +void hash_context_init(struct hash_context *ctx, enum hash_io_type ty); void hash_context_release(struct hash_context *ctx); int write_with_hash(struct hash_context *context, int fd, const void *data, unsigned int len); int write_with_hash_flush(struct hash_context *context, int fd); +/* These are some helper functions to make the vmac interface closer + * to the SHA interface. vmac_update_unaligned is necessary because + * vmac operates on 128-byte chunks. */ + +void vmac_update_unaligned(const void *buf, unsigned int len, vmac_ctx_t *context); +void vmac_final(unsigned char *buf, vmac_ctx_t *context); + #endif diff --git a/read-cache.c b/read-cache.c index 516b9f377c41df..a124e594b72f39 100644 --- a/read-cache.c +++ b/read-cache.c @@ -1305,8 +1305,7 @@ struct ondisk_cache_entry_extended { static int verify_hdr(struct cache_header *hdr, unsigned long size) { - git_SHA_CTX c; - unsigned char sha1[20]; + unsigned char hash[20]; int hdr_version; if (hdr->hdr_signature != htonl(CACHE_SIGNATURE)) @@ -1314,11 +1313,19 @@ static int verify_hdr(struct cache_header *hdr, unsigned long size) hdr_version = ntohl(hdr->hdr_version); if (hdr_version < INDEX_FORMAT_LB || INDEX_FORMAT_UB < hdr_version) return error("bad index version %d", hdr_version); - git_SHA1_Init(&c); - git_SHA1_Update(&c, hdr, size - 20); - git_SHA1_Final(sha1, &c); - if (hashcmp(sha1, (unsigned char *)hdr + size - 20)) - return error("bad index file sha1 signature"); + if (hdr_version >= INDEX_FORMAT_VMAC_LB) { + vmac_ctx_t c; + vmac_set_key(VMAC_KEY, &c); + vmac_update_unaligned(hdr, size - 20, &c); + vmac_final(hash, &c); + } else { + git_SHA_CTX c; + git_SHA1_Init(&c); + git_SHA1_Update(&c, hdr, size - 20); + git_SHA1_Final(hash, &c); + } + if (hashcmp(hash, (unsigned char *)hdr + size - 20)) + return error("bad index file signature"); return 0; } @@ -1484,7 +1491,7 @@ int read_index_from(struct index_state *istate, const char *path) istate->cache = xcalloc(istate->cache_alloc, sizeof(*istate->cache)); istate->initialized = 1; - if (istate->version == 4) + if (istate->version >= 4) previous_name = &previous_name_buf; else previous_name = NULL; @@ -1761,12 +1768,15 @@ int write_index(struct index_state *istate, int newfd) hdr.hdr_version = htonl(hdr_version); hdr.hdr_entries = htonl(entries - removed); - hash_context_init(&c); + if (istate->version >= INDEX_FORMAT_VMAC_LB) + hash_context_init(&c, HASH_IO_VMAC); + else + hash_context_init(&c, HASH_IO_SHA1); if (write_with_hash(&c, newfd, &hdr, sizeof(hdr)) < 0) return -1; - previous_name = (hdr_version == 4) ? &previous_name_buf : NULL; + previous_name = (hdr_version >= 4) ? &previous_name_buf : NULL; for (i = 0; i < entries; i++) { struct cache_entry *ce = cache[i]; if (ce->ce_flags & CE_REMOVE) diff --git a/rijndael-alg-fst.c b/rijndael-alg-fst.c new file mode 100644 index 00000000000000..8a100404dcf828 --- /dev/null +++ b/rijndael-alg-fst.c @@ -0,0 +1,1400 @@ +/** + * rijndael-alg-fst.c + * + * @version 3.0 (December 2000) + * + * Optimised ANSI C code for the Rijndael cipher (now AES) + * + * @author Vincent Rijmen + * @author Antoon Bosselaers + * @author Paulo Barreto + * + * This code is hereby placed in the public domain. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ''AS IS'' AND ANY EXPRESS + * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE + * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, + * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#include +#include + +#include "rijndael-alg-fst.h" + +/* +Te0[x] = S [x].[02, 01, 01, 03]; +Te1[x] = S [x].[03, 02, 01, 01]; +Te2[x] = S [x].[01, 03, 02, 01]; +Te3[x] = S [x].[01, 01, 03, 02]; +Te4[x] = S [x].[01, 01, 01, 01]; + +Td0[x] = Si[x].[0e, 09, 0d, 0b]; +Td1[x] = Si[x].[0b, 0e, 09, 0d]; +Td2[x] = Si[x].[0d, 0b, 0e, 09]; +Td3[x] = Si[x].[09, 0d, 0b, 0e]; +Td4[x] = Si[x].[01, 01, 01, 01]; +*/ + +static const u32 Te0[256] = { + 0xc66363a5U, 0xf87c7c84U, 0xee777799U, 0xf67b7b8dU, + 0xfff2f20dU, 0xd66b6bbdU, 0xde6f6fb1U, 0x91c5c554U, + 0x60303050U, 0x02010103U, 0xce6767a9U, 0x562b2b7dU, + 0xe7fefe19U, 0xb5d7d762U, 0x4dababe6U, 0xec76769aU, + 0x8fcaca45U, 0x1f82829dU, 0x89c9c940U, 0xfa7d7d87U, + 0xeffafa15U, 0xb25959ebU, 0x8e4747c9U, 0xfbf0f00bU, + 0x41adadecU, 0xb3d4d467U, 0x5fa2a2fdU, 0x45afafeaU, + 0x239c9cbfU, 0x53a4a4f7U, 0xe4727296U, 0x9bc0c05bU, + 0x75b7b7c2U, 0xe1fdfd1cU, 0x3d9393aeU, 0x4c26266aU, + 0x6c36365aU, 0x7e3f3f41U, 0xf5f7f702U, 0x83cccc4fU, + 0x6834345cU, 0x51a5a5f4U, 0xd1e5e534U, 0xf9f1f108U, + 0xe2717193U, 0xabd8d873U, 0x62313153U, 0x2a15153fU, + 0x0804040cU, 0x95c7c752U, 0x46232365U, 0x9dc3c35eU, + 0x30181828U, 0x379696a1U, 0x0a05050fU, 0x2f9a9ab5U, + 0x0e070709U, 0x24121236U, 0x1b80809bU, 0xdfe2e23dU, + 0xcdebeb26U, 0x4e272769U, 0x7fb2b2cdU, 0xea75759fU, + 0x1209091bU, 0x1d83839eU, 0x582c2c74U, 0x341a1a2eU, + 0x361b1b2dU, 0xdc6e6eb2U, 0xb45a5aeeU, 0x5ba0a0fbU, + 0xa45252f6U, 0x763b3b4dU, 0xb7d6d661U, 0x7db3b3ceU, + 0x5229297bU, 0xdde3e33eU, 0x5e2f2f71U, 0x13848497U, + 0xa65353f5U, 0xb9d1d168U, 0x00000000U, 0xc1eded2cU, + 0x40202060U, 0xe3fcfc1fU, 0x79b1b1c8U, 0xb65b5bedU, + 0xd46a6abeU, 0x8dcbcb46U, 0x67bebed9U, 0x7239394bU, + 0x944a4adeU, 0x984c4cd4U, 0xb05858e8U, 0x85cfcf4aU, + 0xbbd0d06bU, 0xc5efef2aU, 0x4faaaae5U, 0xedfbfb16U, + 0x864343c5U, 0x9a4d4dd7U, 0x66333355U, 0x11858594U, + 0x8a4545cfU, 0xe9f9f910U, 0x04020206U, 0xfe7f7f81U, + 0xa05050f0U, 0x783c3c44U, 0x259f9fbaU, 0x4ba8a8e3U, + 0xa25151f3U, 0x5da3a3feU, 0x804040c0U, 0x058f8f8aU, + 0x3f9292adU, 0x219d9dbcU, 0x70383848U, 0xf1f5f504U, + 0x63bcbcdfU, 0x77b6b6c1U, 0xafdada75U, 0x42212163U, + 0x20101030U, 0xe5ffff1aU, 0xfdf3f30eU, 0xbfd2d26dU, + 0x81cdcd4cU, 0x180c0c14U, 0x26131335U, 0xc3ecec2fU, + 0xbe5f5fe1U, 0x359797a2U, 0x884444ccU, 0x2e171739U, + 0x93c4c457U, 0x55a7a7f2U, 0xfc7e7e82U, 0x7a3d3d47U, + 0xc86464acU, 0xba5d5de7U, 0x3219192bU, 0xe6737395U, + 0xc06060a0U, 0x19818198U, 0x9e4f4fd1U, 0xa3dcdc7fU, + 0x44222266U, 0x542a2a7eU, 0x3b9090abU, 0x0b888883U, + 0x8c4646caU, 0xc7eeee29U, 0x6bb8b8d3U, 0x2814143cU, + 0xa7dede79U, 0xbc5e5ee2U, 0x160b0b1dU, 0xaddbdb76U, + 0xdbe0e03bU, 0x64323256U, 0x743a3a4eU, 0x140a0a1eU, + 0x924949dbU, 0x0c06060aU, 0x4824246cU, 0xb85c5ce4U, + 0x9fc2c25dU, 0xbdd3d36eU, 0x43acacefU, 0xc46262a6U, + 0x399191a8U, 0x319595a4U, 0xd3e4e437U, 0xf279798bU, + 0xd5e7e732U, 0x8bc8c843U, 0x6e373759U, 0xda6d6db7U, + 0x018d8d8cU, 0xb1d5d564U, 0x9c4e4ed2U, 0x49a9a9e0U, + 0xd86c6cb4U, 0xac5656faU, 0xf3f4f407U, 0xcfeaea25U, + 0xca6565afU, 0xf47a7a8eU, 0x47aeaee9U, 0x10080818U, + 0x6fbabad5U, 0xf0787888U, 0x4a25256fU, 0x5c2e2e72U, + 0x381c1c24U, 0x57a6a6f1U, 0x73b4b4c7U, 0x97c6c651U, + 0xcbe8e823U, 0xa1dddd7cU, 0xe874749cU, 0x3e1f1f21U, + 0x964b4bddU, 0x61bdbddcU, 0x0d8b8b86U, 0x0f8a8a85U, + 0xe0707090U, 0x7c3e3e42U, 0x71b5b5c4U, 0xcc6666aaU, + 0x904848d8U, 0x06030305U, 0xf7f6f601U, 0x1c0e0e12U, + 0xc26161a3U, 0x6a35355fU, 0xae5757f9U, 0x69b9b9d0U, + 0x17868691U, 0x99c1c158U, 0x3a1d1d27U, 0x279e9eb9U, + 0xd9e1e138U, 0xebf8f813U, 0x2b9898b3U, 0x22111133U, + 0xd26969bbU, 0xa9d9d970U, 0x078e8e89U, 0x339494a7U, + 0x2d9b9bb6U, 0x3c1e1e22U, 0x15878792U, 0xc9e9e920U, + 0x87cece49U, 0xaa5555ffU, 0x50282878U, 0xa5dfdf7aU, + 0x038c8c8fU, 0x59a1a1f8U, 0x09898980U, 0x1a0d0d17U, + 0x65bfbfdaU, 0xd7e6e631U, 0x844242c6U, 0xd06868b8U, + 0x824141c3U, 0x299999b0U, 0x5a2d2d77U, 0x1e0f0f11U, + 0x7bb0b0cbU, 0xa85454fcU, 0x6dbbbbd6U, 0x2c16163aU, +}; +static const u32 Te1[256] = { + 0xa5c66363U, 0x84f87c7cU, 0x99ee7777U, 0x8df67b7bU, + 0x0dfff2f2U, 0xbdd66b6bU, 0xb1de6f6fU, 0x5491c5c5U, + 0x50603030U, 0x03020101U, 0xa9ce6767U, 0x7d562b2bU, + 0x19e7fefeU, 0x62b5d7d7U, 0xe64dababU, 0x9aec7676U, + 0x458fcacaU, 0x9d1f8282U, 0x4089c9c9U, 0x87fa7d7dU, + 0x15effafaU, 0xebb25959U, 0xc98e4747U, 0x0bfbf0f0U, + 0xec41adadU, 0x67b3d4d4U, 0xfd5fa2a2U, 0xea45afafU, + 0xbf239c9cU, 0xf753a4a4U, 0x96e47272U, 0x5b9bc0c0U, + 0xc275b7b7U, 0x1ce1fdfdU, 0xae3d9393U, 0x6a4c2626U, + 0x5a6c3636U, 0x417e3f3fU, 0x02f5f7f7U, 0x4f83ccccU, + 0x5c683434U, 0xf451a5a5U, 0x34d1e5e5U, 0x08f9f1f1U, + 0x93e27171U, 0x73abd8d8U, 0x53623131U, 0x3f2a1515U, + 0x0c080404U, 0x5295c7c7U, 0x65462323U, 0x5e9dc3c3U, + 0x28301818U, 0xa1379696U, 0x0f0a0505U, 0xb52f9a9aU, + 0x090e0707U, 0x36241212U, 0x9b1b8080U, 0x3ddfe2e2U, + 0x26cdebebU, 0x694e2727U, 0xcd7fb2b2U, 0x9fea7575U, + 0x1b120909U, 0x9e1d8383U, 0x74582c2cU, 0x2e341a1aU, + 0x2d361b1bU, 0xb2dc6e6eU, 0xeeb45a5aU, 0xfb5ba0a0U, + 0xf6a45252U, 0x4d763b3bU, 0x61b7d6d6U, 0xce7db3b3U, + 0x7b522929U, 0x3edde3e3U, 0x715e2f2fU, 0x97138484U, + 0xf5a65353U, 0x68b9d1d1U, 0x00000000U, 0x2cc1ededU, + 0x60402020U, 0x1fe3fcfcU, 0xc879b1b1U, 0xedb65b5bU, + 0xbed46a6aU, 0x468dcbcbU, 0xd967bebeU, 0x4b723939U, + 0xde944a4aU, 0xd4984c4cU, 0xe8b05858U, 0x4a85cfcfU, + 0x6bbbd0d0U, 0x2ac5efefU, 0xe54faaaaU, 0x16edfbfbU, + 0xc5864343U, 0xd79a4d4dU, 0x55663333U, 0x94118585U, + 0xcf8a4545U, 0x10e9f9f9U, 0x06040202U, 0x81fe7f7fU, + 0xf0a05050U, 0x44783c3cU, 0xba259f9fU, 0xe34ba8a8U, + 0xf3a25151U, 0xfe5da3a3U, 0xc0804040U, 0x8a058f8fU, + 0xad3f9292U, 0xbc219d9dU, 0x48703838U, 0x04f1f5f5U, + 0xdf63bcbcU, 0xc177b6b6U, 0x75afdadaU, 0x63422121U, + 0x30201010U, 0x1ae5ffffU, 0x0efdf3f3U, 0x6dbfd2d2U, + 0x4c81cdcdU, 0x14180c0cU, 0x35261313U, 0x2fc3ececU, + 0xe1be5f5fU, 0xa2359797U, 0xcc884444U, 0x392e1717U, + 0x5793c4c4U, 0xf255a7a7U, 0x82fc7e7eU, 0x477a3d3dU, + 0xacc86464U, 0xe7ba5d5dU, 0x2b321919U, 0x95e67373U, + 0xa0c06060U, 0x98198181U, 0xd19e4f4fU, 0x7fa3dcdcU, + 0x66442222U, 0x7e542a2aU, 0xab3b9090U, 0x830b8888U, + 0xca8c4646U, 0x29c7eeeeU, 0xd36bb8b8U, 0x3c281414U, + 0x79a7dedeU, 0xe2bc5e5eU, 0x1d160b0bU, 0x76addbdbU, + 0x3bdbe0e0U, 0x56643232U, 0x4e743a3aU, 0x1e140a0aU, + 0xdb924949U, 0x0a0c0606U, 0x6c482424U, 0xe4b85c5cU, + 0x5d9fc2c2U, 0x6ebdd3d3U, 0xef43acacU, 0xa6c46262U, + 0xa8399191U, 0xa4319595U, 0x37d3e4e4U, 0x8bf27979U, + 0x32d5e7e7U, 0x438bc8c8U, 0x596e3737U, 0xb7da6d6dU, + 0x8c018d8dU, 0x64b1d5d5U, 0xd29c4e4eU, 0xe049a9a9U, + 0xb4d86c6cU, 0xfaac5656U, 0x07f3f4f4U, 0x25cfeaeaU, + 0xafca6565U, 0x8ef47a7aU, 0xe947aeaeU, 0x18100808U, + 0xd56fbabaU, 0x88f07878U, 0x6f4a2525U, 0x725c2e2eU, + 0x24381c1cU, 0xf157a6a6U, 0xc773b4b4U, 0x5197c6c6U, + 0x23cbe8e8U, 0x7ca1ddddU, 0x9ce87474U, 0x213e1f1fU, + 0xdd964b4bU, 0xdc61bdbdU, 0x860d8b8bU, 0x850f8a8aU, + 0x90e07070U, 0x427c3e3eU, 0xc471b5b5U, 0xaacc6666U, + 0xd8904848U, 0x05060303U, 0x01f7f6f6U, 0x121c0e0eU, + 0xa3c26161U, 0x5f6a3535U, 0xf9ae5757U, 0xd069b9b9U, + 0x91178686U, 0x5899c1c1U, 0x273a1d1dU, 0xb9279e9eU, + 0x38d9e1e1U, 0x13ebf8f8U, 0xb32b9898U, 0x33221111U, + 0xbbd26969U, 0x70a9d9d9U, 0x89078e8eU, 0xa7339494U, + 0xb62d9b9bU, 0x223c1e1eU, 0x92158787U, 0x20c9e9e9U, + 0x4987ceceU, 0xffaa5555U, 0x78502828U, 0x7aa5dfdfU, + 0x8f038c8cU, 0xf859a1a1U, 0x80098989U, 0x171a0d0dU, + 0xda65bfbfU, 0x31d7e6e6U, 0xc6844242U, 0xb8d06868U, + 0xc3824141U, 0xb0299999U, 0x775a2d2dU, 0x111e0f0fU, + 0xcb7bb0b0U, 0xfca85454U, 0xd66dbbbbU, 0x3a2c1616U, +}; +static const u32 Te2[256] = { + 0x63a5c663U, 0x7c84f87cU, 0x7799ee77U, 0x7b8df67bU, + 0xf20dfff2U, 0x6bbdd66bU, 0x6fb1de6fU, 0xc55491c5U, + 0x30506030U, 0x01030201U, 0x67a9ce67U, 0x2b7d562bU, + 0xfe19e7feU, 0xd762b5d7U, 0xabe64dabU, 0x769aec76U, + 0xca458fcaU, 0x829d1f82U, 0xc94089c9U, 0x7d87fa7dU, + 0xfa15effaU, 0x59ebb259U, 0x47c98e47U, 0xf00bfbf0U, + 0xadec41adU, 0xd467b3d4U, 0xa2fd5fa2U, 0xafea45afU, + 0x9cbf239cU, 0xa4f753a4U, 0x7296e472U, 0xc05b9bc0U, + 0xb7c275b7U, 0xfd1ce1fdU, 0x93ae3d93U, 0x266a4c26U, + 0x365a6c36U, 0x3f417e3fU, 0xf702f5f7U, 0xcc4f83ccU, + 0x345c6834U, 0xa5f451a5U, 0xe534d1e5U, 0xf108f9f1U, + 0x7193e271U, 0xd873abd8U, 0x31536231U, 0x153f2a15U, + 0x040c0804U, 0xc75295c7U, 0x23654623U, 0xc35e9dc3U, + 0x18283018U, 0x96a13796U, 0x050f0a05U, 0x9ab52f9aU, + 0x07090e07U, 0x12362412U, 0x809b1b80U, 0xe23ddfe2U, + 0xeb26cdebU, 0x27694e27U, 0xb2cd7fb2U, 0x759fea75U, + 0x091b1209U, 0x839e1d83U, 0x2c74582cU, 0x1a2e341aU, + 0x1b2d361bU, 0x6eb2dc6eU, 0x5aeeb45aU, 0xa0fb5ba0U, + 0x52f6a452U, 0x3b4d763bU, 0xd661b7d6U, 0xb3ce7db3U, + 0x297b5229U, 0xe33edde3U, 0x2f715e2fU, 0x84971384U, + 0x53f5a653U, 0xd168b9d1U, 0x00000000U, 0xed2cc1edU, + 0x20604020U, 0xfc1fe3fcU, 0xb1c879b1U, 0x5bedb65bU, + 0x6abed46aU, 0xcb468dcbU, 0xbed967beU, 0x394b7239U, + 0x4ade944aU, 0x4cd4984cU, 0x58e8b058U, 0xcf4a85cfU, + 0xd06bbbd0U, 0xef2ac5efU, 0xaae54faaU, 0xfb16edfbU, + 0x43c58643U, 0x4dd79a4dU, 0x33556633U, 0x85941185U, + 0x45cf8a45U, 0xf910e9f9U, 0x02060402U, 0x7f81fe7fU, + 0x50f0a050U, 0x3c44783cU, 0x9fba259fU, 0xa8e34ba8U, + 0x51f3a251U, 0xa3fe5da3U, 0x40c08040U, 0x8f8a058fU, + 0x92ad3f92U, 0x9dbc219dU, 0x38487038U, 0xf504f1f5U, + 0xbcdf63bcU, 0xb6c177b6U, 0xda75afdaU, 0x21634221U, + 0x10302010U, 0xff1ae5ffU, 0xf30efdf3U, 0xd26dbfd2U, + 0xcd4c81cdU, 0x0c14180cU, 0x13352613U, 0xec2fc3ecU, + 0x5fe1be5fU, 0x97a23597U, 0x44cc8844U, 0x17392e17U, + 0xc45793c4U, 0xa7f255a7U, 0x7e82fc7eU, 0x3d477a3dU, + 0x64acc864U, 0x5de7ba5dU, 0x192b3219U, 0x7395e673U, + 0x60a0c060U, 0x81981981U, 0x4fd19e4fU, 0xdc7fa3dcU, + 0x22664422U, 0x2a7e542aU, 0x90ab3b90U, 0x88830b88U, + 0x46ca8c46U, 0xee29c7eeU, 0xb8d36bb8U, 0x143c2814U, + 0xde79a7deU, 0x5ee2bc5eU, 0x0b1d160bU, 0xdb76addbU, + 0xe03bdbe0U, 0x32566432U, 0x3a4e743aU, 0x0a1e140aU, + 0x49db9249U, 0x060a0c06U, 0x246c4824U, 0x5ce4b85cU, + 0xc25d9fc2U, 0xd36ebdd3U, 0xacef43acU, 0x62a6c462U, + 0x91a83991U, 0x95a43195U, 0xe437d3e4U, 0x798bf279U, + 0xe732d5e7U, 0xc8438bc8U, 0x37596e37U, 0x6db7da6dU, + 0x8d8c018dU, 0xd564b1d5U, 0x4ed29c4eU, 0xa9e049a9U, + 0x6cb4d86cU, 0x56faac56U, 0xf407f3f4U, 0xea25cfeaU, + 0x65afca65U, 0x7a8ef47aU, 0xaee947aeU, 0x08181008U, + 0xbad56fbaU, 0x7888f078U, 0x256f4a25U, 0x2e725c2eU, + 0x1c24381cU, 0xa6f157a6U, 0xb4c773b4U, 0xc65197c6U, + 0xe823cbe8U, 0xdd7ca1ddU, 0x749ce874U, 0x1f213e1fU, + 0x4bdd964bU, 0xbddc61bdU, 0x8b860d8bU, 0x8a850f8aU, + 0x7090e070U, 0x3e427c3eU, 0xb5c471b5U, 0x66aacc66U, + 0x48d89048U, 0x03050603U, 0xf601f7f6U, 0x0e121c0eU, + 0x61a3c261U, 0x355f6a35U, 0x57f9ae57U, 0xb9d069b9U, + 0x86911786U, 0xc15899c1U, 0x1d273a1dU, 0x9eb9279eU, + 0xe138d9e1U, 0xf813ebf8U, 0x98b32b98U, 0x11332211U, + 0x69bbd269U, 0xd970a9d9U, 0x8e89078eU, 0x94a73394U, + 0x9bb62d9bU, 0x1e223c1eU, 0x87921587U, 0xe920c9e9U, + 0xce4987ceU, 0x55ffaa55U, 0x28785028U, 0xdf7aa5dfU, + 0x8c8f038cU, 0xa1f859a1U, 0x89800989U, 0x0d171a0dU, + 0xbfda65bfU, 0xe631d7e6U, 0x42c68442U, 0x68b8d068U, + 0x41c38241U, 0x99b02999U, 0x2d775a2dU, 0x0f111e0fU, + 0xb0cb7bb0U, 0x54fca854U, 0xbbd66dbbU, 0x163a2c16U, +}; +static const u32 Te3[256] = { + + 0x6363a5c6U, 0x7c7c84f8U, 0x777799eeU, 0x7b7b8df6U, + 0xf2f20dffU, 0x6b6bbdd6U, 0x6f6fb1deU, 0xc5c55491U, + 0x30305060U, 0x01010302U, 0x6767a9ceU, 0x2b2b7d56U, + 0xfefe19e7U, 0xd7d762b5U, 0xababe64dU, 0x76769aecU, + 0xcaca458fU, 0x82829d1fU, 0xc9c94089U, 0x7d7d87faU, + 0xfafa15efU, 0x5959ebb2U, 0x4747c98eU, 0xf0f00bfbU, + 0xadadec41U, 0xd4d467b3U, 0xa2a2fd5fU, 0xafafea45U, + 0x9c9cbf23U, 0xa4a4f753U, 0x727296e4U, 0xc0c05b9bU, + 0xb7b7c275U, 0xfdfd1ce1U, 0x9393ae3dU, 0x26266a4cU, + 0x36365a6cU, 0x3f3f417eU, 0xf7f702f5U, 0xcccc4f83U, + 0x34345c68U, 0xa5a5f451U, 0xe5e534d1U, 0xf1f108f9U, + 0x717193e2U, 0xd8d873abU, 0x31315362U, 0x15153f2aU, + 0x04040c08U, 0xc7c75295U, 0x23236546U, 0xc3c35e9dU, + 0x18182830U, 0x9696a137U, 0x05050f0aU, 0x9a9ab52fU, + 0x0707090eU, 0x12123624U, 0x80809b1bU, 0xe2e23ddfU, + 0xebeb26cdU, 0x2727694eU, 0xb2b2cd7fU, 0x75759feaU, + 0x09091b12U, 0x83839e1dU, 0x2c2c7458U, 0x1a1a2e34U, + 0x1b1b2d36U, 0x6e6eb2dcU, 0x5a5aeeb4U, 0xa0a0fb5bU, + 0x5252f6a4U, 0x3b3b4d76U, 0xd6d661b7U, 0xb3b3ce7dU, + 0x29297b52U, 0xe3e33eddU, 0x2f2f715eU, 0x84849713U, + 0x5353f5a6U, 0xd1d168b9U, 0x00000000U, 0xeded2cc1U, + 0x20206040U, 0xfcfc1fe3U, 0xb1b1c879U, 0x5b5bedb6U, + 0x6a6abed4U, 0xcbcb468dU, 0xbebed967U, 0x39394b72U, + 0x4a4ade94U, 0x4c4cd498U, 0x5858e8b0U, 0xcfcf4a85U, + 0xd0d06bbbU, 0xefef2ac5U, 0xaaaae54fU, 0xfbfb16edU, + 0x4343c586U, 0x4d4dd79aU, 0x33335566U, 0x85859411U, + 0x4545cf8aU, 0xf9f910e9U, 0x02020604U, 0x7f7f81feU, + 0x5050f0a0U, 0x3c3c4478U, 0x9f9fba25U, 0xa8a8e34bU, + 0x5151f3a2U, 0xa3a3fe5dU, 0x4040c080U, 0x8f8f8a05U, + 0x9292ad3fU, 0x9d9dbc21U, 0x38384870U, 0xf5f504f1U, + 0xbcbcdf63U, 0xb6b6c177U, 0xdada75afU, 0x21216342U, + 0x10103020U, 0xffff1ae5U, 0xf3f30efdU, 0xd2d26dbfU, + 0xcdcd4c81U, 0x0c0c1418U, 0x13133526U, 0xecec2fc3U, + 0x5f5fe1beU, 0x9797a235U, 0x4444cc88U, 0x1717392eU, + 0xc4c45793U, 0xa7a7f255U, 0x7e7e82fcU, 0x3d3d477aU, + 0x6464acc8U, 0x5d5de7baU, 0x19192b32U, 0x737395e6U, + 0x6060a0c0U, 0x81819819U, 0x4f4fd19eU, 0xdcdc7fa3U, + 0x22226644U, 0x2a2a7e54U, 0x9090ab3bU, 0x8888830bU, + 0x4646ca8cU, 0xeeee29c7U, 0xb8b8d36bU, 0x14143c28U, + 0xdede79a7U, 0x5e5ee2bcU, 0x0b0b1d16U, 0xdbdb76adU, + 0xe0e03bdbU, 0x32325664U, 0x3a3a4e74U, 0x0a0a1e14U, + 0x4949db92U, 0x06060a0cU, 0x24246c48U, 0x5c5ce4b8U, + 0xc2c25d9fU, 0xd3d36ebdU, 0xacacef43U, 0x6262a6c4U, + 0x9191a839U, 0x9595a431U, 0xe4e437d3U, 0x79798bf2U, + 0xe7e732d5U, 0xc8c8438bU, 0x3737596eU, 0x6d6db7daU, + 0x8d8d8c01U, 0xd5d564b1U, 0x4e4ed29cU, 0xa9a9e049U, + 0x6c6cb4d8U, 0x5656faacU, 0xf4f407f3U, 0xeaea25cfU, + 0x6565afcaU, 0x7a7a8ef4U, 0xaeaee947U, 0x08081810U, + 0xbabad56fU, 0x787888f0U, 0x25256f4aU, 0x2e2e725cU, + 0x1c1c2438U, 0xa6a6f157U, 0xb4b4c773U, 0xc6c65197U, + 0xe8e823cbU, 0xdddd7ca1U, 0x74749ce8U, 0x1f1f213eU, + 0x4b4bdd96U, 0xbdbddc61U, 0x8b8b860dU, 0x8a8a850fU, + 0x707090e0U, 0x3e3e427cU, 0xb5b5c471U, 0x6666aaccU, + 0x4848d890U, 0x03030506U, 0xf6f601f7U, 0x0e0e121cU, + 0x6161a3c2U, 0x35355f6aU, 0x5757f9aeU, 0xb9b9d069U, + 0x86869117U, 0xc1c15899U, 0x1d1d273aU, 0x9e9eb927U, + 0xe1e138d9U, 0xf8f813ebU, 0x9898b32bU, 0x11113322U, + 0x6969bbd2U, 0xd9d970a9U, 0x8e8e8907U, 0x9494a733U, + 0x9b9bb62dU, 0x1e1e223cU, 0x87879215U, 0xe9e920c9U, + 0xcece4987U, 0x5555ffaaU, 0x28287850U, 0xdfdf7aa5U, + 0x8c8c8f03U, 0xa1a1f859U, 0x89898009U, 0x0d0d171aU, + 0xbfbfda65U, 0xe6e631d7U, 0x4242c684U, 0x6868b8d0U, + 0x4141c382U, 0x9999b029U, 0x2d2d775aU, 0x0f0f111eU, + 0xb0b0cb7bU, 0x5454fca8U, 0xbbbbd66dU, 0x16163a2cU, +}; +static const u32 Te4[256] = { + 0x63636363U, 0x7c7c7c7cU, 0x77777777U, 0x7b7b7b7bU, + 0xf2f2f2f2U, 0x6b6b6b6bU, 0x6f6f6f6fU, 0xc5c5c5c5U, + 0x30303030U, 0x01010101U, 0x67676767U, 0x2b2b2b2bU, + 0xfefefefeU, 0xd7d7d7d7U, 0xababababU, 0x76767676U, + 0xcacacacaU, 0x82828282U, 0xc9c9c9c9U, 0x7d7d7d7dU, + 0xfafafafaU, 0x59595959U, 0x47474747U, 0xf0f0f0f0U, + 0xadadadadU, 0xd4d4d4d4U, 0xa2a2a2a2U, 0xafafafafU, + 0x9c9c9c9cU, 0xa4a4a4a4U, 0x72727272U, 0xc0c0c0c0U, + 0xb7b7b7b7U, 0xfdfdfdfdU, 0x93939393U, 0x26262626U, + 0x36363636U, 0x3f3f3f3fU, 0xf7f7f7f7U, 0xccccccccU, + 0x34343434U, 0xa5a5a5a5U, 0xe5e5e5e5U, 0xf1f1f1f1U, + 0x71717171U, 0xd8d8d8d8U, 0x31313131U, 0x15151515U, + 0x04040404U, 0xc7c7c7c7U, 0x23232323U, 0xc3c3c3c3U, + 0x18181818U, 0x96969696U, 0x05050505U, 0x9a9a9a9aU, + 0x07070707U, 0x12121212U, 0x80808080U, 0xe2e2e2e2U, + 0xebebebebU, 0x27272727U, 0xb2b2b2b2U, 0x75757575U, + 0x09090909U, 0x83838383U, 0x2c2c2c2cU, 0x1a1a1a1aU, + 0x1b1b1b1bU, 0x6e6e6e6eU, 0x5a5a5a5aU, 0xa0a0a0a0U, + 0x52525252U, 0x3b3b3b3bU, 0xd6d6d6d6U, 0xb3b3b3b3U, + 0x29292929U, 0xe3e3e3e3U, 0x2f2f2f2fU, 0x84848484U, + 0x53535353U, 0xd1d1d1d1U, 0x00000000U, 0xededededU, + 0x20202020U, 0xfcfcfcfcU, 0xb1b1b1b1U, 0x5b5b5b5bU, + 0x6a6a6a6aU, 0xcbcbcbcbU, 0xbebebebeU, 0x39393939U, + 0x4a4a4a4aU, 0x4c4c4c4cU, 0x58585858U, 0xcfcfcfcfU, + 0xd0d0d0d0U, 0xefefefefU, 0xaaaaaaaaU, 0xfbfbfbfbU, + 0x43434343U, 0x4d4d4d4dU, 0x33333333U, 0x85858585U, + 0x45454545U, 0xf9f9f9f9U, 0x02020202U, 0x7f7f7f7fU, + 0x50505050U, 0x3c3c3c3cU, 0x9f9f9f9fU, 0xa8a8a8a8U, + 0x51515151U, 0xa3a3a3a3U, 0x40404040U, 0x8f8f8f8fU, + 0x92929292U, 0x9d9d9d9dU, 0x38383838U, 0xf5f5f5f5U, + 0xbcbcbcbcU, 0xb6b6b6b6U, 0xdadadadaU, 0x21212121U, + 0x10101010U, 0xffffffffU, 0xf3f3f3f3U, 0xd2d2d2d2U, + 0xcdcdcdcdU, 0x0c0c0c0cU, 0x13131313U, 0xececececU, + 0x5f5f5f5fU, 0x97979797U, 0x44444444U, 0x17171717U, + 0xc4c4c4c4U, 0xa7a7a7a7U, 0x7e7e7e7eU, 0x3d3d3d3dU, + 0x64646464U, 0x5d5d5d5dU, 0x19191919U, 0x73737373U, + 0x60606060U, 0x81818181U, 0x4f4f4f4fU, 0xdcdcdcdcU, + 0x22222222U, 0x2a2a2a2aU, 0x90909090U, 0x88888888U, + 0x46464646U, 0xeeeeeeeeU, 0xb8b8b8b8U, 0x14141414U, + 0xdedededeU, 0x5e5e5e5eU, 0x0b0b0b0bU, 0xdbdbdbdbU, + 0xe0e0e0e0U, 0x32323232U, 0x3a3a3a3aU, 0x0a0a0a0aU, + 0x49494949U, 0x06060606U, 0x24242424U, 0x5c5c5c5cU, + 0xc2c2c2c2U, 0xd3d3d3d3U, 0xacacacacU, 0x62626262U, + 0x91919191U, 0x95959595U, 0xe4e4e4e4U, 0x79797979U, + 0xe7e7e7e7U, 0xc8c8c8c8U, 0x37373737U, 0x6d6d6d6dU, + 0x8d8d8d8dU, 0xd5d5d5d5U, 0x4e4e4e4eU, 0xa9a9a9a9U, + 0x6c6c6c6cU, 0x56565656U, 0xf4f4f4f4U, 0xeaeaeaeaU, + 0x65656565U, 0x7a7a7a7aU, 0xaeaeaeaeU, 0x08080808U, + 0xbabababaU, 0x78787878U, 0x25252525U, 0x2e2e2e2eU, + 0x1c1c1c1cU, 0xa6a6a6a6U, 0xb4b4b4b4U, 0xc6c6c6c6U, + 0xe8e8e8e8U, 0xddddddddU, 0x74747474U, 0x1f1f1f1fU, + 0x4b4b4b4bU, 0xbdbdbdbdU, 0x8b8b8b8bU, 0x8a8a8a8aU, + 0x70707070U, 0x3e3e3e3eU, 0xb5b5b5b5U, 0x66666666U, + 0x48484848U, 0x03030303U, 0xf6f6f6f6U, 0x0e0e0e0eU, + 0x61616161U, 0x35353535U, 0x57575757U, 0xb9b9b9b9U, + 0x86868686U, 0xc1c1c1c1U, 0x1d1d1d1dU, 0x9e9e9e9eU, + 0xe1e1e1e1U, 0xf8f8f8f8U, 0x98989898U, 0x11111111U, + 0x69696969U, 0xd9d9d9d9U, 0x8e8e8e8eU, 0x94949494U, + 0x9b9b9b9bU, 0x1e1e1e1eU, 0x87878787U, 0xe9e9e9e9U, + 0xcecececeU, 0x55555555U, 0x28282828U, 0xdfdfdfdfU, + 0x8c8c8c8cU, 0xa1a1a1a1U, 0x89898989U, 0x0d0d0d0dU, + 0xbfbfbfbfU, 0xe6e6e6e6U, 0x42424242U, 0x68686868U, + 0x41414141U, 0x99999999U, 0x2d2d2d2dU, 0x0f0f0f0fU, + 0xb0b0b0b0U, 0x54545454U, 0xbbbbbbbbU, 0x16161616U, +}; +static const u32 Td0[256] = { + 0x51f4a750U, 0x7e416553U, 0x1a17a4c3U, 0x3a275e96U, + 0x3bab6bcbU, 0x1f9d45f1U, 0xacfa58abU, 0x4be30393U, + 0x2030fa55U, 0xad766df6U, 0x88cc7691U, 0xf5024c25U, + 0x4fe5d7fcU, 0xc52acbd7U, 0x26354480U, 0xb562a38fU, + 0xdeb15a49U, 0x25ba1b67U, 0x45ea0e98U, 0x5dfec0e1U, + 0xc32f7502U, 0x814cf012U, 0x8d4697a3U, 0x6bd3f9c6U, + 0x038f5fe7U, 0x15929c95U, 0xbf6d7aebU, 0x955259daU, + 0xd4be832dU, 0x587421d3U, 0x49e06929U, 0x8ec9c844U, + 0x75c2896aU, 0xf48e7978U, 0x99583e6bU, 0x27b971ddU, + 0xbee14fb6U, 0xf088ad17U, 0xc920ac66U, 0x7dce3ab4U, + 0x63df4a18U, 0xe51a3182U, 0x97513360U, 0x62537f45U, + 0xb16477e0U, 0xbb6bae84U, 0xfe81a01cU, 0xf9082b94U, + 0x70486858U, 0x8f45fd19U, 0x94de6c87U, 0x527bf8b7U, + 0xab73d323U, 0x724b02e2U, 0xe31f8f57U, 0x6655ab2aU, + 0xb2eb2807U, 0x2fb5c203U, 0x86c57b9aU, 0xd33708a5U, + 0x302887f2U, 0x23bfa5b2U, 0x02036abaU, 0xed16825cU, + 0x8acf1c2bU, 0xa779b492U, 0xf307f2f0U, 0x4e69e2a1U, + 0x65daf4cdU, 0x0605bed5U, 0xd134621fU, 0xc4a6fe8aU, + 0x342e539dU, 0xa2f355a0U, 0x058ae132U, 0xa4f6eb75U, + 0x0b83ec39U, 0x4060efaaU, 0x5e719f06U, 0xbd6e1051U, + 0x3e218af9U, 0x96dd063dU, 0xdd3e05aeU, 0x4de6bd46U, + 0x91548db5U, 0x71c45d05U, 0x0406d46fU, 0x605015ffU, + 0x1998fb24U, 0xd6bde997U, 0x894043ccU, 0x67d99e77U, + 0xb0e842bdU, 0x07898b88U, 0xe7195b38U, 0x79c8eedbU, + 0xa17c0a47U, 0x7c420fe9U, 0xf8841ec9U, 0x00000000U, + 0x09808683U, 0x322bed48U, 0x1e1170acU, 0x6c5a724eU, + 0xfd0efffbU, 0x0f853856U, 0x3daed51eU, 0x362d3927U, + 0x0a0fd964U, 0x685ca621U, 0x9b5b54d1U, 0x24362e3aU, + 0x0c0a67b1U, 0x9357e70fU, 0xb4ee96d2U, 0x1b9b919eU, + 0x80c0c54fU, 0x61dc20a2U, 0x5a774b69U, 0x1c121a16U, + 0xe293ba0aU, 0xc0a02ae5U, 0x3c22e043U, 0x121b171dU, + 0x0e090d0bU, 0xf28bc7adU, 0x2db6a8b9U, 0x141ea9c8U, + 0x57f11985U, 0xaf75074cU, 0xee99ddbbU, 0xa37f60fdU, + 0xf701269fU, 0x5c72f5bcU, 0x44663bc5U, 0x5bfb7e34U, + 0x8b432976U, 0xcb23c6dcU, 0xb6edfc68U, 0xb8e4f163U, + 0xd731dccaU, 0x42638510U, 0x13972240U, 0x84c61120U, + 0x854a247dU, 0xd2bb3df8U, 0xaef93211U, 0xc729a16dU, + 0x1d9e2f4bU, 0xdcb230f3U, 0x0d8652ecU, 0x77c1e3d0U, + 0x2bb3166cU, 0xa970b999U, 0x119448faU, 0x47e96422U, + 0xa8fc8cc4U, 0xa0f03f1aU, 0x567d2cd8U, 0x223390efU, + 0x87494ec7U, 0xd938d1c1U, 0x8ccaa2feU, 0x98d40b36U, + 0xa6f581cfU, 0xa57ade28U, 0xdab78e26U, 0x3fadbfa4U, + 0x2c3a9de4U, 0x5078920dU, 0x6a5fcc9bU, 0x547e4662U, + 0xf68d13c2U, 0x90d8b8e8U, 0x2e39f75eU, 0x82c3aff5U, + 0x9f5d80beU, 0x69d0937cU, 0x6fd52da9U, 0xcf2512b3U, + 0xc8ac993bU, 0x10187da7U, 0xe89c636eU, 0xdb3bbb7bU, + 0xcd267809U, 0x6e5918f4U, 0xec9ab701U, 0x834f9aa8U, + 0xe6956e65U, 0xaaffe67eU, 0x21bccf08U, 0xef15e8e6U, + 0xbae79bd9U, 0x4a6f36ceU, 0xea9f09d4U, 0x29b07cd6U, + 0x31a4b2afU, 0x2a3f2331U, 0xc6a59430U, 0x35a266c0U, + 0x744ebc37U, 0xfc82caa6U, 0xe090d0b0U, 0x33a7d815U, + 0xf104984aU, 0x41ecdaf7U, 0x7fcd500eU, 0x1791f62fU, + 0x764dd68dU, 0x43efb04dU, 0xccaa4d54U, 0xe49604dfU, + 0x9ed1b5e3U, 0x4c6a881bU, 0xc12c1fb8U, 0x4665517fU, + 0x9d5eea04U, 0x018c355dU, 0xfa877473U, 0xfb0b412eU, + 0xb3671d5aU, 0x92dbd252U, 0xe9105633U, 0x6dd64713U, + 0x9ad7618cU, 0x37a10c7aU, 0x59f8148eU, 0xeb133c89U, + 0xcea927eeU, 0xb761c935U, 0xe11ce5edU, 0x7a47b13cU, + 0x9cd2df59U, 0x55f2733fU, 0x1814ce79U, 0x73c737bfU, + 0x53f7cdeaU, 0x5ffdaa5bU, 0xdf3d6f14U, 0x7844db86U, + 0xcaaff381U, 0xb968c43eU, 0x3824342cU, 0xc2a3405fU, + 0x161dc372U, 0xbce2250cU, 0x283c498bU, 0xff0d9541U, + 0x39a80171U, 0x080cb3deU, 0xd8b4e49cU, 0x6456c190U, + 0x7bcb8461U, 0xd532b670U, 0x486c5c74U, 0xd0b85742U, +}; +static const u32 Td1[256] = { + 0x5051f4a7U, 0x537e4165U, 0xc31a17a4U, 0x963a275eU, + 0xcb3bab6bU, 0xf11f9d45U, 0xabacfa58U, 0x934be303U, + 0x552030faU, 0xf6ad766dU, 0x9188cc76U, 0x25f5024cU, + 0xfc4fe5d7U, 0xd7c52acbU, 0x80263544U, 0x8fb562a3U, + 0x49deb15aU, 0x6725ba1bU, 0x9845ea0eU, 0xe15dfec0U, + 0x02c32f75U, 0x12814cf0U, 0xa38d4697U, 0xc66bd3f9U, + 0xe7038f5fU, 0x9515929cU, 0xebbf6d7aU, 0xda955259U, + 0x2dd4be83U, 0xd3587421U, 0x2949e069U, 0x448ec9c8U, + 0x6a75c289U, 0x78f48e79U, 0x6b99583eU, 0xdd27b971U, + 0xb6bee14fU, 0x17f088adU, 0x66c920acU, 0xb47dce3aU, + 0x1863df4aU, 0x82e51a31U, 0x60975133U, 0x4562537fU, + 0xe0b16477U, 0x84bb6baeU, 0x1cfe81a0U, 0x94f9082bU, + 0x58704868U, 0x198f45fdU, 0x8794de6cU, 0xb7527bf8U, + 0x23ab73d3U, 0xe2724b02U, 0x57e31f8fU, 0x2a6655abU, + 0x07b2eb28U, 0x032fb5c2U, 0x9a86c57bU, 0xa5d33708U, + 0xf2302887U, 0xb223bfa5U, 0xba02036aU, 0x5ced1682U, + 0x2b8acf1cU, 0x92a779b4U, 0xf0f307f2U, 0xa14e69e2U, + 0xcd65daf4U, 0xd50605beU, 0x1fd13462U, 0x8ac4a6feU, + 0x9d342e53U, 0xa0a2f355U, 0x32058ae1U, 0x75a4f6ebU, + 0x390b83ecU, 0xaa4060efU, 0x065e719fU, 0x51bd6e10U, + 0xf93e218aU, 0x3d96dd06U, 0xaedd3e05U, 0x464de6bdU, + 0xb591548dU, 0x0571c45dU, 0x6f0406d4U, 0xff605015U, + 0x241998fbU, 0x97d6bde9U, 0xcc894043U, 0x7767d99eU, + 0xbdb0e842U, 0x8807898bU, 0x38e7195bU, 0xdb79c8eeU, + 0x47a17c0aU, 0xe97c420fU, 0xc9f8841eU, 0x00000000U, + 0x83098086U, 0x48322bedU, 0xac1e1170U, 0x4e6c5a72U, + 0xfbfd0effU, 0x560f8538U, 0x1e3daed5U, 0x27362d39U, + 0x640a0fd9U, 0x21685ca6U, 0xd19b5b54U, 0x3a24362eU, + 0xb10c0a67U, 0x0f9357e7U, 0xd2b4ee96U, 0x9e1b9b91U, + 0x4f80c0c5U, 0xa261dc20U, 0x695a774bU, 0x161c121aU, + 0x0ae293baU, 0xe5c0a02aU, 0x433c22e0U, 0x1d121b17U, + 0x0b0e090dU, 0xadf28bc7U, 0xb92db6a8U, 0xc8141ea9U, + 0x8557f119U, 0x4caf7507U, 0xbbee99ddU, 0xfda37f60U, + 0x9ff70126U, 0xbc5c72f5U, 0xc544663bU, 0x345bfb7eU, + 0x768b4329U, 0xdccb23c6U, 0x68b6edfcU, 0x63b8e4f1U, + 0xcad731dcU, 0x10426385U, 0x40139722U, 0x2084c611U, + 0x7d854a24U, 0xf8d2bb3dU, 0x11aef932U, 0x6dc729a1U, + 0x4b1d9e2fU, 0xf3dcb230U, 0xec0d8652U, 0xd077c1e3U, + 0x6c2bb316U, 0x99a970b9U, 0xfa119448U, 0x2247e964U, + 0xc4a8fc8cU, 0x1aa0f03fU, 0xd8567d2cU, 0xef223390U, + 0xc787494eU, 0xc1d938d1U, 0xfe8ccaa2U, 0x3698d40bU, + 0xcfa6f581U, 0x28a57adeU, 0x26dab78eU, 0xa43fadbfU, + 0xe42c3a9dU, 0x0d507892U, 0x9b6a5fccU, 0x62547e46U, + 0xc2f68d13U, 0xe890d8b8U, 0x5e2e39f7U, 0xf582c3afU, + 0xbe9f5d80U, 0x7c69d093U, 0xa96fd52dU, 0xb3cf2512U, + 0x3bc8ac99U, 0xa710187dU, 0x6ee89c63U, 0x7bdb3bbbU, + 0x09cd2678U, 0xf46e5918U, 0x01ec9ab7U, 0xa8834f9aU, + 0x65e6956eU, 0x7eaaffe6U, 0x0821bccfU, 0xe6ef15e8U, + 0xd9bae79bU, 0xce4a6f36U, 0xd4ea9f09U, 0xd629b07cU, + 0xaf31a4b2U, 0x312a3f23U, 0x30c6a594U, 0xc035a266U, + 0x37744ebcU, 0xa6fc82caU, 0xb0e090d0U, 0x1533a7d8U, + 0x4af10498U, 0xf741ecdaU, 0x0e7fcd50U, 0x2f1791f6U, + 0x8d764dd6U, 0x4d43efb0U, 0x54ccaa4dU, 0xdfe49604U, + 0xe39ed1b5U, 0x1b4c6a88U, 0xb8c12c1fU, 0x7f466551U, + 0x049d5eeaU, 0x5d018c35U, 0x73fa8774U, 0x2efb0b41U, + 0x5ab3671dU, 0x5292dbd2U, 0x33e91056U, 0x136dd647U, + 0x8c9ad761U, 0x7a37a10cU, 0x8e59f814U, 0x89eb133cU, + 0xeecea927U, 0x35b761c9U, 0xede11ce5U, 0x3c7a47b1U, + 0x599cd2dfU, 0x3f55f273U, 0x791814ceU, 0xbf73c737U, + 0xea53f7cdU, 0x5b5ffdaaU, 0x14df3d6fU, 0x867844dbU, + 0x81caaff3U, 0x3eb968c4U, 0x2c382434U, 0x5fc2a340U, + 0x72161dc3U, 0x0cbce225U, 0x8b283c49U, 0x41ff0d95U, + 0x7139a801U, 0xde080cb3U, 0x9cd8b4e4U, 0x906456c1U, + 0x617bcb84U, 0x70d532b6U, 0x74486c5cU, 0x42d0b857U, +}; +static const u32 Td2[256] = { + 0xa75051f4U, 0x65537e41U, 0xa4c31a17U, 0x5e963a27U, + 0x6bcb3babU, 0x45f11f9dU, 0x58abacfaU, 0x03934be3U, + 0xfa552030U, 0x6df6ad76U, 0x769188ccU, 0x4c25f502U, + 0xd7fc4fe5U, 0xcbd7c52aU, 0x44802635U, 0xa38fb562U, + 0x5a49deb1U, 0x1b6725baU, 0x0e9845eaU, 0xc0e15dfeU, + 0x7502c32fU, 0xf012814cU, 0x97a38d46U, 0xf9c66bd3U, + 0x5fe7038fU, 0x9c951592U, 0x7aebbf6dU, 0x59da9552U, + 0x832dd4beU, 0x21d35874U, 0x692949e0U, 0xc8448ec9U, + 0x896a75c2U, 0x7978f48eU, 0x3e6b9958U, 0x71dd27b9U, + 0x4fb6bee1U, 0xad17f088U, 0xac66c920U, 0x3ab47dceU, + 0x4a1863dfU, 0x3182e51aU, 0x33609751U, 0x7f456253U, + 0x77e0b164U, 0xae84bb6bU, 0xa01cfe81U, 0x2b94f908U, + 0x68587048U, 0xfd198f45U, 0x6c8794deU, 0xf8b7527bU, + 0xd323ab73U, 0x02e2724bU, 0x8f57e31fU, 0xab2a6655U, + 0x2807b2ebU, 0xc2032fb5U, 0x7b9a86c5U, 0x08a5d337U, + 0x87f23028U, 0xa5b223bfU, 0x6aba0203U, 0x825ced16U, + 0x1c2b8acfU, 0xb492a779U, 0xf2f0f307U, 0xe2a14e69U, + 0xf4cd65daU, 0xbed50605U, 0x621fd134U, 0xfe8ac4a6U, + 0x539d342eU, 0x55a0a2f3U, 0xe132058aU, 0xeb75a4f6U, + 0xec390b83U, 0xefaa4060U, 0x9f065e71U, 0x1051bd6eU, + + 0x8af93e21U, 0x063d96ddU, 0x05aedd3eU, 0xbd464de6U, + 0x8db59154U, 0x5d0571c4U, 0xd46f0406U, 0x15ff6050U, + 0xfb241998U, 0xe997d6bdU, 0x43cc8940U, 0x9e7767d9U, + 0x42bdb0e8U, 0x8b880789U, 0x5b38e719U, 0xeedb79c8U, + 0x0a47a17cU, 0x0fe97c42U, 0x1ec9f884U, 0x00000000U, + 0x86830980U, 0xed48322bU, 0x70ac1e11U, 0x724e6c5aU, + 0xfffbfd0eU, 0x38560f85U, 0xd51e3daeU, 0x3927362dU, + 0xd9640a0fU, 0xa621685cU, 0x54d19b5bU, 0x2e3a2436U, + 0x67b10c0aU, 0xe70f9357U, 0x96d2b4eeU, 0x919e1b9bU, + 0xc54f80c0U, 0x20a261dcU, 0x4b695a77U, 0x1a161c12U, + 0xba0ae293U, 0x2ae5c0a0U, 0xe0433c22U, 0x171d121bU, + 0x0d0b0e09U, 0xc7adf28bU, 0xa8b92db6U, 0xa9c8141eU, + 0x198557f1U, 0x074caf75U, 0xddbbee99U, 0x60fda37fU, + 0x269ff701U, 0xf5bc5c72U, 0x3bc54466U, 0x7e345bfbU, + 0x29768b43U, 0xc6dccb23U, 0xfc68b6edU, 0xf163b8e4U, + 0xdccad731U, 0x85104263U, 0x22401397U, 0x112084c6U, + 0x247d854aU, 0x3df8d2bbU, 0x3211aef9U, 0xa16dc729U, + 0x2f4b1d9eU, 0x30f3dcb2U, 0x52ec0d86U, 0xe3d077c1U, + 0x166c2bb3U, 0xb999a970U, 0x48fa1194U, 0x642247e9U, + 0x8cc4a8fcU, 0x3f1aa0f0U, 0x2cd8567dU, 0x90ef2233U, + 0x4ec78749U, 0xd1c1d938U, 0xa2fe8ccaU, 0x0b3698d4U, + 0x81cfa6f5U, 0xde28a57aU, 0x8e26dab7U, 0xbfa43fadU, + 0x9de42c3aU, 0x920d5078U, 0xcc9b6a5fU, 0x4662547eU, + 0x13c2f68dU, 0xb8e890d8U, 0xf75e2e39U, 0xaff582c3U, + 0x80be9f5dU, 0x937c69d0U, 0x2da96fd5U, 0x12b3cf25U, + 0x993bc8acU, 0x7da71018U, 0x636ee89cU, 0xbb7bdb3bU, + 0x7809cd26U, 0x18f46e59U, 0xb701ec9aU, 0x9aa8834fU, + 0x6e65e695U, 0xe67eaaffU, 0xcf0821bcU, 0xe8e6ef15U, + 0x9bd9bae7U, 0x36ce4a6fU, 0x09d4ea9fU, 0x7cd629b0U, + 0xb2af31a4U, 0x23312a3fU, 0x9430c6a5U, 0x66c035a2U, + 0xbc37744eU, 0xcaa6fc82U, 0xd0b0e090U, 0xd81533a7U, + 0x984af104U, 0xdaf741ecU, 0x500e7fcdU, 0xf62f1791U, + 0xd68d764dU, 0xb04d43efU, 0x4d54ccaaU, 0x04dfe496U, + 0xb5e39ed1U, 0x881b4c6aU, 0x1fb8c12cU, 0x517f4665U, + 0xea049d5eU, 0x355d018cU, 0x7473fa87U, 0x412efb0bU, + 0x1d5ab367U, 0xd25292dbU, 0x5633e910U, 0x47136dd6U, + 0x618c9ad7U, 0x0c7a37a1U, 0x148e59f8U, 0x3c89eb13U, + 0x27eecea9U, 0xc935b761U, 0xe5ede11cU, 0xb13c7a47U, + 0xdf599cd2U, 0x733f55f2U, 0xce791814U, 0x37bf73c7U, + 0xcdea53f7U, 0xaa5b5ffdU, 0x6f14df3dU, 0xdb867844U, + 0xf381caafU, 0xc43eb968U, 0x342c3824U, 0x405fc2a3U, + 0xc372161dU, 0x250cbce2U, 0x498b283cU, 0x9541ff0dU, + 0x017139a8U, 0xb3de080cU, 0xe49cd8b4U, 0xc1906456U, + 0x84617bcbU, 0xb670d532U, 0x5c74486cU, 0x5742d0b8U, +}; +static const u32 Td3[256] = { + 0xf4a75051U, 0x4165537eU, 0x17a4c31aU, 0x275e963aU, + 0xab6bcb3bU, 0x9d45f11fU, 0xfa58abacU, 0xe303934bU, + 0x30fa5520U, 0x766df6adU, 0xcc769188U, 0x024c25f5U, + 0xe5d7fc4fU, 0x2acbd7c5U, 0x35448026U, 0x62a38fb5U, + 0xb15a49deU, 0xba1b6725U, 0xea0e9845U, 0xfec0e15dU, + 0x2f7502c3U, 0x4cf01281U, 0x4697a38dU, 0xd3f9c66bU, + 0x8f5fe703U, 0x929c9515U, 0x6d7aebbfU, 0x5259da95U, + 0xbe832dd4U, 0x7421d358U, 0xe0692949U, 0xc9c8448eU, + 0xc2896a75U, 0x8e7978f4U, 0x583e6b99U, 0xb971dd27U, + 0xe14fb6beU, 0x88ad17f0U, 0x20ac66c9U, 0xce3ab47dU, + 0xdf4a1863U, 0x1a3182e5U, 0x51336097U, 0x537f4562U, + 0x6477e0b1U, 0x6bae84bbU, 0x81a01cfeU, 0x082b94f9U, + 0x48685870U, 0x45fd198fU, 0xde6c8794U, 0x7bf8b752U, + 0x73d323abU, 0x4b02e272U, 0x1f8f57e3U, 0x55ab2a66U, + 0xeb2807b2U, 0xb5c2032fU, 0xc57b9a86U, 0x3708a5d3U, + 0x2887f230U, 0xbfa5b223U, 0x036aba02U, 0x16825cedU, + 0xcf1c2b8aU, 0x79b492a7U, 0x07f2f0f3U, 0x69e2a14eU, + 0xdaf4cd65U, 0x05bed506U, 0x34621fd1U, 0xa6fe8ac4U, + 0x2e539d34U, 0xf355a0a2U, 0x8ae13205U, 0xf6eb75a4U, + 0x83ec390bU, 0x60efaa40U, 0x719f065eU, 0x6e1051bdU, + 0x218af93eU, 0xdd063d96U, 0x3e05aeddU, 0xe6bd464dU, + 0x548db591U, 0xc45d0571U, 0x06d46f04U, 0x5015ff60U, + 0x98fb2419U, 0xbde997d6U, 0x4043cc89U, 0xd99e7767U, + 0xe842bdb0U, 0x898b8807U, 0x195b38e7U, 0xc8eedb79U, + 0x7c0a47a1U, 0x420fe97cU, 0x841ec9f8U, 0x00000000U, + 0x80868309U, 0x2bed4832U, 0x1170ac1eU, 0x5a724e6cU, + 0x0efffbfdU, 0x8538560fU, 0xaed51e3dU, 0x2d392736U, + 0x0fd9640aU, 0x5ca62168U, 0x5b54d19bU, 0x362e3a24U, + 0x0a67b10cU, 0x57e70f93U, 0xee96d2b4U, 0x9b919e1bU, + 0xc0c54f80U, 0xdc20a261U, 0x774b695aU, 0x121a161cU, + 0x93ba0ae2U, 0xa02ae5c0U, 0x22e0433cU, 0x1b171d12U, + 0x090d0b0eU, 0x8bc7adf2U, 0xb6a8b92dU, 0x1ea9c814U, + 0xf1198557U, 0x75074cafU, 0x99ddbbeeU, 0x7f60fda3U, + 0x01269ff7U, 0x72f5bc5cU, 0x663bc544U, 0xfb7e345bU, + 0x4329768bU, 0x23c6dccbU, 0xedfc68b6U, 0xe4f163b8U, + 0x31dccad7U, 0x63851042U, 0x97224013U, 0xc6112084U, + 0x4a247d85U, 0xbb3df8d2U, 0xf93211aeU, 0x29a16dc7U, + 0x9e2f4b1dU, 0xb230f3dcU, 0x8652ec0dU, 0xc1e3d077U, + 0xb3166c2bU, 0x70b999a9U, 0x9448fa11U, 0xe9642247U, + 0xfc8cc4a8U, 0xf03f1aa0U, 0x7d2cd856U, 0x3390ef22U, + 0x494ec787U, 0x38d1c1d9U, 0xcaa2fe8cU, 0xd40b3698U, + 0xf581cfa6U, 0x7ade28a5U, 0xb78e26daU, 0xadbfa43fU, + 0x3a9de42cU, 0x78920d50U, 0x5fcc9b6aU, 0x7e466254U, + 0x8d13c2f6U, 0xd8b8e890U, 0x39f75e2eU, 0xc3aff582U, + 0x5d80be9fU, 0xd0937c69U, 0xd52da96fU, 0x2512b3cfU, + 0xac993bc8U, 0x187da710U, 0x9c636ee8U, 0x3bbb7bdbU, + 0x267809cdU, 0x5918f46eU, 0x9ab701ecU, 0x4f9aa883U, + 0x956e65e6U, 0xffe67eaaU, 0xbccf0821U, 0x15e8e6efU, + 0xe79bd9baU, 0x6f36ce4aU, 0x9f09d4eaU, 0xb07cd629U, + 0xa4b2af31U, 0x3f23312aU, 0xa59430c6U, 0xa266c035U, + 0x4ebc3774U, 0x82caa6fcU, 0x90d0b0e0U, 0xa7d81533U, + 0x04984af1U, 0xecdaf741U, 0xcd500e7fU, 0x91f62f17U, + 0x4dd68d76U, 0xefb04d43U, 0xaa4d54ccU, 0x9604dfe4U, + 0xd1b5e39eU, 0x6a881b4cU, 0x2c1fb8c1U, 0x65517f46U, + 0x5eea049dU, 0x8c355d01U, 0x877473faU, 0x0b412efbU, + 0x671d5ab3U, 0xdbd25292U, 0x105633e9U, 0xd647136dU, + 0xd7618c9aU, 0xa10c7a37U, 0xf8148e59U, 0x133c89ebU, + 0xa927eeceU, 0x61c935b7U, 0x1ce5ede1U, 0x47b13c7aU, + 0xd2df599cU, 0xf2733f55U, 0x14ce7918U, 0xc737bf73U, + 0xf7cdea53U, 0xfdaa5b5fU, 0x3d6f14dfU, 0x44db8678U, + 0xaff381caU, 0x68c43eb9U, 0x24342c38U, 0xa3405fc2U, + 0x1dc37216U, 0xe2250cbcU, 0x3c498b28U, 0x0d9541ffU, + 0xa8017139U, 0x0cb3de08U, 0xb4e49cd8U, 0x56c19064U, + 0xcb84617bU, 0x32b670d5U, 0x6c5c7448U, 0xb85742d0U, +}; +static const u32 Td4[256] = { + 0x52525252U, 0x09090909U, 0x6a6a6a6aU, 0xd5d5d5d5U, + 0x30303030U, 0x36363636U, 0xa5a5a5a5U, 0x38383838U, + 0xbfbfbfbfU, 0x40404040U, 0xa3a3a3a3U, 0x9e9e9e9eU, + 0x81818181U, 0xf3f3f3f3U, 0xd7d7d7d7U, 0xfbfbfbfbU, + 0x7c7c7c7cU, 0xe3e3e3e3U, 0x39393939U, 0x82828282U, + 0x9b9b9b9bU, 0x2f2f2f2fU, 0xffffffffU, 0x87878787U, + 0x34343434U, 0x8e8e8e8eU, 0x43434343U, 0x44444444U, + 0xc4c4c4c4U, 0xdedededeU, 0xe9e9e9e9U, 0xcbcbcbcbU, + 0x54545454U, 0x7b7b7b7bU, 0x94949494U, 0x32323232U, + 0xa6a6a6a6U, 0xc2c2c2c2U, 0x23232323U, 0x3d3d3d3dU, + 0xeeeeeeeeU, 0x4c4c4c4cU, 0x95959595U, 0x0b0b0b0bU, + 0x42424242U, 0xfafafafaU, 0xc3c3c3c3U, 0x4e4e4e4eU, + 0x08080808U, 0x2e2e2e2eU, 0xa1a1a1a1U, 0x66666666U, + 0x28282828U, 0xd9d9d9d9U, 0x24242424U, 0xb2b2b2b2U, + 0x76767676U, 0x5b5b5b5bU, 0xa2a2a2a2U, 0x49494949U, + 0x6d6d6d6dU, 0x8b8b8b8bU, 0xd1d1d1d1U, 0x25252525U, + 0x72727272U, 0xf8f8f8f8U, 0xf6f6f6f6U, 0x64646464U, + 0x86868686U, 0x68686868U, 0x98989898U, 0x16161616U, + 0xd4d4d4d4U, 0xa4a4a4a4U, 0x5c5c5c5cU, 0xccccccccU, + 0x5d5d5d5dU, 0x65656565U, 0xb6b6b6b6U, 0x92929292U, + 0x6c6c6c6cU, 0x70707070U, 0x48484848U, 0x50505050U, + 0xfdfdfdfdU, 0xededededU, 0xb9b9b9b9U, 0xdadadadaU, + 0x5e5e5e5eU, 0x15151515U, 0x46464646U, 0x57575757U, + 0xa7a7a7a7U, 0x8d8d8d8dU, 0x9d9d9d9dU, 0x84848484U, + 0x90909090U, 0xd8d8d8d8U, 0xababababU, 0x00000000U, + 0x8c8c8c8cU, 0xbcbcbcbcU, 0xd3d3d3d3U, 0x0a0a0a0aU, + 0xf7f7f7f7U, 0xe4e4e4e4U, 0x58585858U, 0x05050505U, + 0xb8b8b8b8U, 0xb3b3b3b3U, 0x45454545U, 0x06060606U, + 0xd0d0d0d0U, 0x2c2c2c2cU, 0x1e1e1e1eU, 0x8f8f8f8fU, + 0xcacacacaU, 0x3f3f3f3fU, 0x0f0f0f0fU, 0x02020202U, + 0xc1c1c1c1U, 0xafafafafU, 0xbdbdbdbdU, 0x03030303U, + 0x01010101U, 0x13131313U, 0x8a8a8a8aU, 0x6b6b6b6bU, + 0x3a3a3a3aU, 0x91919191U, 0x11111111U, 0x41414141U, + 0x4f4f4f4fU, 0x67676767U, 0xdcdcdcdcU, 0xeaeaeaeaU, + 0x97979797U, 0xf2f2f2f2U, 0xcfcfcfcfU, 0xcecececeU, + 0xf0f0f0f0U, 0xb4b4b4b4U, 0xe6e6e6e6U, 0x73737373U, + 0x96969696U, 0xacacacacU, 0x74747474U, 0x22222222U, + 0xe7e7e7e7U, 0xadadadadU, 0x35353535U, 0x85858585U, + 0xe2e2e2e2U, 0xf9f9f9f9U, 0x37373737U, 0xe8e8e8e8U, + 0x1c1c1c1cU, 0x75757575U, 0xdfdfdfdfU, 0x6e6e6e6eU, + 0x47474747U, 0xf1f1f1f1U, 0x1a1a1a1aU, 0x71717171U, + 0x1d1d1d1dU, 0x29292929U, 0xc5c5c5c5U, 0x89898989U, + 0x6f6f6f6fU, 0xb7b7b7b7U, 0x62626262U, 0x0e0e0e0eU, + 0xaaaaaaaaU, 0x18181818U, 0xbebebebeU, 0x1b1b1b1bU, + 0xfcfcfcfcU, 0x56565656U, 0x3e3e3e3eU, 0x4b4b4b4bU, + 0xc6c6c6c6U, 0xd2d2d2d2U, 0x79797979U, 0x20202020U, + 0x9a9a9a9aU, 0xdbdbdbdbU, 0xc0c0c0c0U, 0xfefefefeU, + 0x78787878U, 0xcdcdcdcdU, 0x5a5a5a5aU, 0xf4f4f4f4U, + 0x1f1f1f1fU, 0xddddddddU, 0xa8a8a8a8U, 0x33333333U, + 0x88888888U, 0x07070707U, 0xc7c7c7c7U, 0x31313131U, + 0xb1b1b1b1U, 0x12121212U, 0x10101010U, 0x59595959U, + 0x27272727U, 0x80808080U, 0xececececU, 0x5f5f5f5fU, + 0x60606060U, 0x51515151U, 0x7f7f7f7fU, 0xa9a9a9a9U, + 0x19191919U, 0xb5b5b5b5U, 0x4a4a4a4aU, 0x0d0d0d0dU, + 0x2d2d2d2dU, 0xe5e5e5e5U, 0x7a7a7a7aU, 0x9f9f9f9fU, + 0x93939393U, 0xc9c9c9c9U, 0x9c9c9c9cU, 0xefefefefU, + 0xa0a0a0a0U, 0xe0e0e0e0U, 0x3b3b3b3bU, 0x4d4d4d4dU, + 0xaeaeaeaeU, 0x2a2a2a2aU, 0xf5f5f5f5U, 0xb0b0b0b0U, + 0xc8c8c8c8U, 0xebebebebU, 0xbbbbbbbbU, 0x3c3c3c3cU, + 0x83838383U, 0x53535353U, 0x99999999U, 0x61616161U, + 0x17171717U, 0x2b2b2b2bU, 0x04040404U, 0x7e7e7e7eU, + 0xbabababaU, 0x77777777U, 0xd6d6d6d6U, 0x26262626U, + 0xe1e1e1e1U, 0x69696969U, 0x14141414U, 0x63636363U, + 0x55555555U, 0x21212121U, 0x0c0c0c0cU, 0x7d7d7d7dU, +}; +static const u32 rcon[] = { + 0x01000000, 0x02000000, 0x04000000, 0x08000000, + 0x10000000, 0x20000000, 0x40000000, 0x80000000, + 0x1B000000, 0x36000000, /* for 128-bit blocks, Rijndael never uses more than 10 rcon values */ +}; + +#define SWAP(x) (_lrotl(x, 8) & 0x00ff00ff | _lrotr(x, 8) & 0xff00ff00) + +#ifdef _MSC_VER +#define GETU32(p) SWAP(*((u32 *)(p))) +#define PUTU32(ct, st) { *((u32 *)(ct)) = SWAP((st)); } +#else +#define GETU32(pt) (((u32)(pt)[0] << 24) ^ ((u32)(pt)[1] << 16) ^ ((u32)(pt)[2] << 8) ^ ((u32)(pt)[3])) +#define PUTU32(ct, st) { (ct)[0] = (u8)((st) >> 24); (ct)[1] = (u8)((st) >> 16); (ct)[2] = (u8)((st) >> 8); (ct)[3] = (u8)(st); } +#endif + +/** + * Expand the cipher key into the encryption key schedule. + * + * @return the number of rounds for the given cipher key size. + */ +int rijndaelKeySetupEnc(u32 rk[/*4*(Nr + 1)*/], const u8 cipherKey[], int keyBits) { + int i = 0; + u32 temp; + + rk[0] = GETU32(cipherKey ); + rk[1] = GETU32(cipherKey + 4); + rk[2] = GETU32(cipherKey + 8); + rk[3] = GETU32(cipherKey + 12); + if (keyBits == 128) { + for (;;) { + temp = rk[3]; + rk[4] = rk[0] ^ + (Te4[(temp >> 16) & 0xff] & 0xff000000) ^ + (Te4[(temp >> 8) & 0xff] & 0x00ff0000) ^ + (Te4[(temp ) & 0xff] & 0x0000ff00) ^ + (Te4[(temp >> 24) ] & 0x000000ff) ^ + rcon[i]; + rk[5] = rk[1] ^ rk[4]; + rk[6] = rk[2] ^ rk[5]; + rk[7] = rk[3] ^ rk[6]; + if (++i == 10) { + return 10; + } + rk += 4; + } + } + rk[4] = GETU32(cipherKey + 16); + rk[5] = GETU32(cipherKey + 20); + if (keyBits == 192) { + for (;;) { + temp = rk[ 5]; + rk[ 6] = rk[ 0] ^ + (Te4[(temp >> 16) & 0xff] & 0xff000000) ^ + (Te4[(temp >> 8) & 0xff] & 0x00ff0000) ^ + (Te4[(temp ) & 0xff] & 0x0000ff00) ^ + (Te4[(temp >> 24) ] & 0x000000ff) ^ + rcon[i]; + rk[ 7] = rk[ 1] ^ rk[ 6]; + rk[ 8] = rk[ 2] ^ rk[ 7]; + rk[ 9] = rk[ 3] ^ rk[ 8]; + if (++i == 8) { + return 12; + } + rk[10] = rk[ 4] ^ rk[ 9]; + rk[11] = rk[ 5] ^ rk[10]; + rk += 6; + } + } + rk[6] = GETU32(cipherKey + 24); + rk[7] = GETU32(cipherKey + 28); + if (keyBits == 256) { + for (;;) { + temp = rk[ 7]; + rk[ 8] = rk[ 0] ^ + (Te4[(temp >> 16) & 0xff] & 0xff000000) ^ + (Te4[(temp >> 8) & 0xff] & 0x00ff0000) ^ + (Te4[(temp ) & 0xff] & 0x0000ff00) ^ + (Te4[(temp >> 24) ] & 0x000000ff) ^ + rcon[i]; + rk[ 9] = rk[ 1] ^ rk[ 8]; + rk[10] = rk[ 2] ^ rk[ 9]; + rk[11] = rk[ 3] ^ rk[10]; + if (++i == 7) { + return 14; + } + temp = rk[11]; + rk[12] = rk[ 4] ^ + (Te4[(temp >> 24) ] & 0xff000000) ^ + (Te4[(temp >> 16) & 0xff] & 0x00ff0000) ^ + (Te4[(temp >> 8) & 0xff] & 0x0000ff00) ^ + (Te4[(temp ) & 0xff] & 0x000000ff); + rk[13] = rk[ 5] ^ rk[12]; + rk[14] = rk[ 6] ^ rk[13]; + rk[15] = rk[ 7] ^ rk[14]; + + rk += 8; + } + } + return 0; +} + +/** + * Expand the cipher key into the decryption key schedule. + * + * @return the number of rounds for the given cipher key size. + */ +int rijndaelKeySetupDec(u32 rk[/*4*(Nr + 1)*/], const u8 cipherKey[], int keyBits) { + int Nr, i, j; + u32 temp; + + /* expand the cipher key: */ + Nr = rijndaelKeySetupEnc(rk, cipherKey, keyBits); + /* invert the order of the round keys: */ + for (i = 0, j = 4*Nr; i < j; i += 4, j -= 4) { + temp = rk[i ]; rk[i ] = rk[j ]; rk[j ] = temp; + temp = rk[i + 1]; rk[i + 1] = rk[j + 1]; rk[j + 1] = temp; + temp = rk[i + 2]; rk[i + 2] = rk[j + 2]; rk[j + 2] = temp; + temp = rk[i + 3]; rk[i + 3] = rk[j + 3]; rk[j + 3] = temp; + } + /* apply the inverse MixColumn transform to all round keys but the first and the last: */ + for (i = 1; i < Nr; i++) { + rk += 4; + rk[0] = + Td0[Te4[(rk[0] >> 24) ] & 0xff] ^ + Td1[Te4[(rk[0] >> 16) & 0xff] & 0xff] ^ + Td2[Te4[(rk[0] >> 8) & 0xff] & 0xff] ^ + Td3[Te4[(rk[0] ) & 0xff] & 0xff]; + rk[1] = + Td0[Te4[(rk[1] >> 24) ] & 0xff] ^ + Td1[Te4[(rk[1] >> 16) & 0xff] & 0xff] ^ + Td2[Te4[(rk[1] >> 8) & 0xff] & 0xff] ^ + Td3[Te4[(rk[1] ) & 0xff] & 0xff]; + rk[2] = + Td0[Te4[(rk[2] >> 24) ] & 0xff] ^ + Td1[Te4[(rk[2] >> 16) & 0xff] & 0xff] ^ + Td2[Te4[(rk[2] >> 8) & 0xff] & 0xff] ^ + Td3[Te4[(rk[2] ) & 0xff] & 0xff]; + rk[3] = + Td0[Te4[(rk[3] >> 24) ] & 0xff] ^ + Td1[Te4[(rk[3] >> 16) & 0xff] & 0xff] ^ + Td2[Te4[(rk[3] >> 8) & 0xff] & 0xff] ^ + Td3[Te4[(rk[3] ) & 0xff] & 0xff]; + } + return Nr; +} + +void rijndaelEncrypt(const u32 rk[/*4*(Nr + 1)*/], int Nr, const u8 pt[16], u8 ct[16]) { + u32 s0, s1, s2, s3, t0, t1, t2, t3; +#ifndef FULL_UNROLL + int r; +#endif /* ?FULL_UNROLL */ + + /* + * map byte array block to cipher state + * and add initial round key: + */ + s0 = GETU32(pt ) ^ rk[0]; + s1 = GETU32(pt + 4) ^ rk[1]; + s2 = GETU32(pt + 8) ^ rk[2]; + s3 = GETU32(pt + 12) ^ rk[3]; +#ifdef FULL_UNROLL + /* round 1: */ + t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >> 8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[ 4]; + t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >> 8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[ 5]; + t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >> 8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[ 6]; + t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >> 8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[ 7]; + /* round 2: */ + s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >> 8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[ 8]; + s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >> 8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[ 9]; + s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >> 8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[10]; + s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >> 8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[11]; + /* round 3: */ + t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >> 8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[12]; + t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >> 8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[13]; + t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >> 8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[14]; + t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >> 8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[15]; + /* round 4: */ + s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >> 8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[16]; + s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >> 8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[17]; + s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >> 8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[18]; + s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >> 8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[19]; + /* round 5: */ + t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >> 8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[20]; + t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >> 8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[21]; + t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >> 8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[22]; + t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >> 8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[23]; + /* round 6: */ + s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >> 8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[24]; + s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >> 8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[25]; + s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >> 8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[26]; + s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >> 8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[27]; + /* round 7: */ + t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >> 8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[28]; + t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >> 8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[29]; + t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >> 8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[30]; + t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >> 8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[31]; + /* round 8: */ + s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >> 8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[32]; + s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >> 8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[33]; + s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >> 8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[34]; + s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >> 8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[35]; + /* round 9: */ + t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >> 8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[36]; + t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >> 8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[37]; + t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >> 8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[38]; + t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >> 8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[39]; + if (Nr > 10) { + /* round 10: */ + s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >> 8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[40]; + s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >> 8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[41]; + s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >> 8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[42]; + s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >> 8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[43]; + /* round 11: */ + t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >> 8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[44]; + t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >> 8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[45]; + t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >> 8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[46]; + t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >> 8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[47]; + if (Nr > 12) { + /* round 12: */ + s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >> 8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[48]; + s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >> 8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[49]; + s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >> 8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[50]; + s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >> 8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[51]; + /* round 13: */ + t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >> 8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[52]; + t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >> 8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[53]; + t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >> 8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[54]; + t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >> 8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[55]; + } + } + rk += Nr << 2; +#else /* !FULL_UNROLL */ + /* + * Nr - 1 full rounds: + */ + r = Nr >> 1; + for (;;) { + t0 = + Te0[(s0 >> 24) ] ^ + Te1[(s1 >> 16) & 0xff] ^ + Te2[(s2 >> 8) & 0xff] ^ + Te3[(s3 ) & 0xff] ^ + rk[4]; + t1 = + Te0[(s1 >> 24) ] ^ + Te1[(s2 >> 16) & 0xff] ^ + Te2[(s3 >> 8) & 0xff] ^ + Te3[(s0 ) & 0xff] ^ + rk[5]; + t2 = + Te0[(s2 >> 24) ] ^ + Te1[(s3 >> 16) & 0xff] ^ + Te2[(s0 >> 8) & 0xff] ^ + Te3[(s1 ) & 0xff] ^ + rk[6]; + t3 = + Te0[(s3 >> 24) ] ^ + Te1[(s0 >> 16) & 0xff] ^ + Te2[(s1 >> 8) & 0xff] ^ + Te3[(s2 ) & 0xff] ^ + rk[7]; + + rk += 8; + if (--r == 0) { + break; + } + + s0 = + Te0[(t0 >> 24) ] ^ + Te1[(t1 >> 16) & 0xff] ^ + Te2[(t2 >> 8) & 0xff] ^ + Te3[(t3 ) & 0xff] ^ + rk[0]; + s1 = + Te0[(t1 >> 24) ] ^ + Te1[(t2 >> 16) & 0xff] ^ + Te2[(t3 >> 8) & 0xff] ^ + Te3[(t0 ) & 0xff] ^ + rk[1]; + s2 = + Te0[(t2 >> 24) ] ^ + Te1[(t3 >> 16) & 0xff] ^ + Te2[(t0 >> 8) & 0xff] ^ + Te3[(t1 ) & 0xff] ^ + rk[2]; + s3 = + Te0[(t3 >> 24) ] ^ + Te1[(t0 >> 16) & 0xff] ^ + Te2[(t1 >> 8) & 0xff] ^ + Te3[(t2 ) & 0xff] ^ + rk[3]; + } +#endif /* ?FULL_UNROLL */ + /* + * apply last round and + * map cipher state to byte array block: + */ + s0 = + (Te4[(t0 >> 24) ] & 0xff000000) ^ + (Te4[(t1 >> 16) & 0xff] & 0x00ff0000) ^ + (Te4[(t2 >> 8) & 0xff] & 0x0000ff00) ^ + (Te4[(t3 ) & 0xff] & 0x000000ff) ^ + rk[0]; + PUTU32(ct , s0); + s1 = + (Te4[(t1 >> 24) ] & 0xff000000) ^ + (Te4[(t2 >> 16) & 0xff] & 0x00ff0000) ^ + (Te4[(t3 >> 8) & 0xff] & 0x0000ff00) ^ + (Te4[(t0 ) & 0xff] & 0x000000ff) ^ + rk[1]; + PUTU32(ct + 4, s1); + s2 = + (Te4[(t2 >> 24) ] & 0xff000000) ^ + (Te4[(t3 >> 16) & 0xff] & 0x00ff0000) ^ + (Te4[(t0 >> 8) & 0xff] & 0x0000ff00) ^ + (Te4[(t1 ) & 0xff] & 0x000000ff) ^ + rk[2]; + PUTU32(ct + 8, s2); + s3 = + (Te4[(t3 >> 24) ] & 0xff000000) ^ + (Te4[(t0 >> 16) & 0xff] & 0x00ff0000) ^ + (Te4[(t1 >> 8) & 0xff] & 0x0000ff00) ^ + (Te4[(t2 ) & 0xff] & 0x000000ff) ^ + rk[3]; + PUTU32(ct + 12, s3); +} + +void rijndaelDecrypt(const u32 rk[/*4*(Nr + 1)*/], int Nr, const u8 ct[16], u8 pt[16]) { + u32 s0, s1, s2, s3, t0, t1, t2, t3; +#ifndef FULL_UNROLL + int r; +#endif /* ?FULL_UNROLL */ + + /* + * map byte array block to cipher state + * and add initial round key: + */ + s0 = GETU32(ct ) ^ rk[0]; + s1 = GETU32(ct + 4) ^ rk[1]; + s2 = GETU32(ct + 8) ^ rk[2]; + s3 = GETU32(ct + 12) ^ rk[3]; +#ifdef FULL_UNROLL + /* round 1: */ + t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >> 8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[ 4]; + t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >> 8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[ 5]; + t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >> 8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[ 6]; + t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >> 8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[ 7]; + /* round 2: */ + s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >> 8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[ 8]; + s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >> 8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[ 9]; + s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >> 8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[10]; + s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >> 8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[11]; + /* round 3: */ + t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >> 8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[12]; + t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >> 8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[13]; + t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >> 8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[14]; + t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >> 8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[15]; + /* round 4: */ + s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >> 8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[16]; + s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >> 8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[17]; + s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >> 8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[18]; + s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >> 8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[19]; + /* round 5: */ + t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >> 8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[20]; + t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >> 8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[21]; + t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >> 8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[22]; + t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >> 8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[23]; + /* round 6: */ + s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >> 8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[24]; + s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >> 8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[25]; + s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >> 8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[26]; + s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >> 8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[27]; + /* round 7: */ + t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >> 8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[28]; + t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >> 8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[29]; + t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >> 8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[30]; + t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >> 8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[31]; + /* round 8: */ + s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >> 8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[32]; + s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >> 8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[33]; + s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >> 8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[34]; + s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >> 8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[35]; + /* round 9: */ + t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >> 8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[36]; + t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >> 8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[37]; + t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >> 8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[38]; + t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >> 8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[39]; + if (Nr > 10) { + /* round 10: */ + s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >> 8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[40]; + s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >> 8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[41]; + s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >> 8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[42]; + s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >> 8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[43]; + /* round 11: */ + t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >> 8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[44]; + t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >> 8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[45]; + t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >> 8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[46]; + t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >> 8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[47]; + if (Nr > 12) { + /* round 12: */ + s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >> 8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[48]; + s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >> 8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[49]; + s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >> 8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[50]; + s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >> 8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[51]; + /* round 13: */ + t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >> 8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[52]; + t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >> 8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[53]; + t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >> 8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[54]; + t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >> 8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[55]; + } + } + rk += Nr << 2; +#else /* !FULL_UNROLL */ + /* + * Nr - 1 full rounds: + */ + r = Nr >> 1; + for (;;) { + t0 = + Td0[(s0 >> 24) ] ^ + Td1[(s3 >> 16) & 0xff] ^ + Td2[(s2 >> 8) & 0xff] ^ + Td3[(s1 ) & 0xff] ^ + rk[4]; + t1 = + Td0[(s1 >> 24) ] ^ + Td1[(s0 >> 16) & 0xff] ^ + Td2[(s3 >> 8) & 0xff] ^ + Td3[(s2 ) & 0xff] ^ + rk[5]; + t2 = + Td0[(s2 >> 24) ] ^ + Td1[(s1 >> 16) & 0xff] ^ + Td2[(s0 >> 8) & 0xff] ^ + Td3[(s3 ) & 0xff] ^ + rk[6]; + t3 = + Td0[(s3 >> 24) ] ^ + Td1[(s2 >> 16) & 0xff] ^ + Td2[(s1 >> 8) & 0xff] ^ + Td3[(s0 ) & 0xff] ^ + rk[7]; + + rk += 8; + if (--r == 0) { + break; + } + + s0 = + Td0[(t0 >> 24) ] ^ + Td1[(t3 >> 16) & 0xff] ^ + Td2[(t2 >> 8) & 0xff] ^ + Td3[(t1 ) & 0xff] ^ + rk[0]; + s1 = + Td0[(t1 >> 24) ] ^ + Td1[(t0 >> 16) & 0xff] ^ + Td2[(t3 >> 8) & 0xff] ^ + Td3[(t2 ) & 0xff] ^ + rk[1]; + s2 = + Td0[(t2 >> 24) ] ^ + Td1[(t1 >> 16) & 0xff] ^ + Td2[(t0 >> 8) & 0xff] ^ + Td3[(t3 ) & 0xff] ^ + rk[2]; + s3 = + Td0[(t3 >> 24) ] ^ + Td1[(t2 >> 16) & 0xff] ^ + Td2[(t1 >> 8) & 0xff] ^ + Td3[(t0 ) & 0xff] ^ + rk[3]; + } +#endif /* ?FULL_UNROLL */ + /* + * apply last round and + * map cipher state to byte array block: + */ + s0 = + (Td4[(t0 >> 24) ] & 0xff000000) ^ + (Td4[(t3 >> 16) & 0xff] & 0x00ff0000) ^ + (Td4[(t2 >> 8) & 0xff] & 0x0000ff00) ^ + (Td4[(t1 ) & 0xff] & 0x000000ff) ^ + rk[0]; + PUTU32(pt , s0); + s1 = + (Td4[(t1 >> 24) ] & 0xff000000) ^ + (Td4[(t0 >> 16) & 0xff] & 0x00ff0000) ^ + (Td4[(t3 >> 8) & 0xff] & 0x0000ff00) ^ + (Td4[(t2 ) & 0xff] & 0x000000ff) ^ + rk[1]; + PUTU32(pt + 4, s1); + s2 = + (Td4[(t2 >> 24) ] & 0xff000000) ^ + (Td4[(t1 >> 16) & 0xff] & 0x00ff0000) ^ + (Td4[(t0 >> 8) & 0xff] & 0x0000ff00) ^ + (Td4[(t3 ) & 0xff] & 0x000000ff) ^ + rk[2]; + PUTU32(pt + 8, s2); + s3 = + (Td4[(t3 >> 24) ] & 0xff000000) ^ + (Td4[(t2 >> 16) & 0xff] & 0x00ff0000) ^ + (Td4[(t1 >> 8) & 0xff] & 0x0000ff00) ^ + (Td4[(t0 ) & 0xff] & 0x000000ff) ^ + rk[3]; + PUTU32(pt + 12, s3); +} + +#ifdef INTERMEDIATE_VALUE_KAT + +void rijndaelEncryptRound(const u32 rk[/*4*(Nr + 1)*/], int Nr, u8 block[16], int rounds) { + int r; + u32 s0, s1, s2, s3, t0, t1, t2, t3; + + /* + * map byte array block to cipher state + * and add initial round key: + */ + s0 = GETU32(block ) ^ rk[0]; + s1 = GETU32(block + 4) ^ rk[1]; + s2 = GETU32(block + 8) ^ rk[2]; + s3 = GETU32(block + 12) ^ rk[3]; + rk += 4; + + /* + * Nr - 1 full rounds: + */ + for (r = (rounds < Nr ? rounds : Nr - 1); r > 0; r--) { + t0 = + Te0[(s0 >> 24) ] ^ + Te1[(s1 >> 16) & 0xff] ^ + Te2[(s2 >> 8) & 0xff] ^ + Te3[(s3 ) & 0xff] ^ + rk[0]; + t1 = + Te0[(s1 >> 24) ] ^ + Te1[(s2 >> 16) & 0xff] ^ + Te2[(s3 >> 8) & 0xff] ^ + Te3[(s0 ) & 0xff] ^ + rk[1]; + t2 = + Te0[(s2 >> 24) ] ^ + Te1[(s3 >> 16) & 0xff] ^ + Te2[(s0 >> 8) & 0xff] ^ + Te3[(s1 ) & 0xff] ^ + rk[2]; + t3 = + Te0[(s3 >> 24) ] ^ + Te1[(s0 >> 16) & 0xff] ^ + Te2[(s1 >> 8) & 0xff] ^ + Te3[(s2 ) & 0xff] ^ + rk[3]; + + s0 = t0; + s1 = t1; + s2 = t2; + s3 = t3; + rk += 4; + + } + + /* + * apply last round and + * map cipher state to byte array block: + */ + if (rounds == Nr) { + t0 = + (Te4[(s0 >> 24) ] & 0xff000000) ^ + (Te4[(s1 >> 16) & 0xff] & 0x00ff0000) ^ + (Te4[(s2 >> 8) & 0xff] & 0x0000ff00) ^ + (Te4[(s3 ) & 0xff] & 0x000000ff) ^ + rk[0]; + t1 = + (Te4[(s1 >> 24) ] & 0xff000000) ^ + (Te4[(s2 >> 16) & 0xff] & 0x00ff0000) ^ + (Te4[(s3 >> 8) & 0xff] & 0x0000ff00) ^ + (Te4[(s0 ) & 0xff] & 0x000000ff) ^ + rk[1]; + t2 = + (Te4[(s2 >> 24) ] & 0xff000000) ^ + (Te4[(s3 >> 16) & 0xff] & 0x00ff0000) ^ + (Te4[(s0 >> 8) & 0xff] & 0x0000ff00) ^ + (Te4[(s1 ) & 0xff] & 0x000000ff) ^ + rk[2]; + t3 = + (Te4[(s3 >> 24) ] & 0xff000000) ^ + (Te4[(s0 >> 16) & 0xff] & 0x00ff0000) ^ + (Te4[(s1 >> 8) & 0xff] & 0x0000ff00) ^ + (Te4[(s2 ) & 0xff] & 0x000000ff) ^ + rk[3]; + + s0 = t0; + s1 = t1; + s2 = t2; + s3 = t3; + } + + PUTU32(block , s0); + PUTU32(block + 4, s1); + PUTU32(block + 8, s2); + PUTU32(block + 12, s3); +} + +void rijndaelDecryptRound(const u32 rk[/*4*(Nr + 1)*/], int Nr, u8 block[16], int rounds) { + int r; + u32 s0, s1, s2, s3, t0, t1, t2, t3; + + /* + * map byte array block to cipher state + * and add initial round key: + */ + s0 = GETU32(block ) ^ rk[0]; + s1 = GETU32(block + 4) ^ rk[1]; + s2 = GETU32(block + 8) ^ rk[2]; + s3 = GETU32(block + 12) ^ rk[3]; + rk += 4; + + /* + * Nr - 1 full rounds: + */ + for (r = (rounds < Nr ? rounds : Nr) - 1; r > 0; r--) { + t0 = + Td0[(s0 >> 24) ] ^ + Td1[(s3 >> 16) & 0xff] ^ + Td2[(s2 >> 8) & 0xff] ^ + Td3[(s1 ) & 0xff] ^ + rk[0]; + t1 = + Td0[(s1 >> 24) ] ^ + Td1[(s0 >> 16) & 0xff] ^ + Td2[(s3 >> 8) & 0xff] ^ + Td3[(s2 ) & 0xff] ^ + rk[1]; + t2 = + Td0[(s2 >> 24) ] ^ + Td1[(s1 >> 16) & 0xff] ^ + Td2[(s0 >> 8) & 0xff] ^ + Td3[(s3 ) & 0xff] ^ + rk[2]; + t3 = + Td0[(s3 >> 24) ] ^ + Td1[(s2 >> 16) & 0xff] ^ + Td2[(s1 >> 8) & 0xff] ^ + Td3[(s0 ) & 0xff] ^ + rk[3]; + + s0 = t0; + s1 = t1; + s2 = t2; + s3 = t3; + rk += 4; + + } + + /* + * complete the last round and + * map cipher state to byte array block: + */ + t0 = + (Td4[(s0 >> 24) ] & 0xff000000) ^ + (Td4[(s3 >> 16) & 0xff] & 0x00ff0000) ^ + (Td4[(s2 >> 8) & 0xff] & 0x0000ff00) ^ + (Td4[(s1 ) & 0xff] & 0x000000ff); + t1 = + (Td4[(s1 >> 24) ] & 0xff000000) ^ + (Td4[(s0 >> 16) & 0xff] & 0x00ff0000) ^ + (Td4[(s3 >> 8) & 0xff] & 0x0000ff00) ^ + (Td4[(s2 ) & 0xff] & 0x000000ff); + t2 = + (Td4[(s2 >> 24) ] & 0xff000000) ^ + (Td4[(s1 >> 16) & 0xff] & 0x00ff0000) ^ + (Td4[(s0 >> 8) & 0xff] & 0x0000ff00) ^ + (Td4[(s3 ) & 0xff] & 0x000000ff); + t3 = + (Td4[(s3 >> 24) ] & 0xff000000) ^ + (Td4[(s2 >> 16) & 0xff] & 0x00ff0000) ^ + (Td4[(s1 >> 8) & 0xff] & 0x0000ff00) ^ + (Td4[(s0 ) & 0xff] & 0x000000ff); + + if (rounds == Nr) { + t0 ^= rk[0]; + t1 ^= rk[1]; + t2 ^= rk[2]; + t3 ^= rk[3]; + } + + PUTU32(block , t0); + PUTU32(block + 4, t1); + PUTU32(block + 8, t2); + PUTU32(block + 12, t3); +} + +#endif /* INTERMEDIATE_VALUE_KAT */ diff --git a/rijndael-alg-fst.h b/rijndael-alg-fst.h new file mode 100644 index 00000000000000..f093e5b4777f03 --- /dev/null +++ b/rijndael-alg-fst.h @@ -0,0 +1,47 @@ +/** + * rijndael-alg-fst.h + * + * @version 3.0 (December 2000) + * + * Optimised ANSI C code for the Rijndael cipher (now AES) + * + * @author Vincent Rijmen + * @author Antoon Bosselaers + * @author Paulo Barreto + * + * This code is hereby placed in the public domain. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ''AS IS'' AND ANY EXPRESS + * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE + * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, + * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef __RIJNDAEL_ALG_FST_H +#define __RIJNDAEL_ALG_FST_H + +#define MAXKC (256/32) +#define MAXKB (256/8) +#define MAXNR 14 + +typedef unsigned char u8; +typedef unsigned short u16; +typedef unsigned int u32; + +int rijndaelKeySetupEnc(u32 rk[/*4*(Nr + 1)*/], const u8 cipherKey[], int keyBits); +int rijndaelKeySetupDec(u32 rk[/*4*(Nr + 1)*/], const u8 cipherKey[], int keyBits); +void rijndaelEncrypt(const u32 rk[/*4*(Nr + 1)*/], int Nr, const u8 pt[16], u8 ct[16]); +void rijndaelDecrypt(const u32 rk[/*4*(Nr + 1)*/], int Nr, const u8 ct[16], u8 pt[16]); + +#ifdef INTERMEDIATE_VALUE_KAT +void rijndaelEncryptRound(const u32 rk[/*4*(Nr + 1)*/], int Nr, u8 block[16], int rounds); +void rijndaelDecryptRound(const u32 rk[/*4*(Nr + 1)*/], int Nr, u8 block[16], int rounds); +#endif /* INTERMEDIATE_VALUE_KAT */ + +#endif /* __RIJNDAEL_ALG_FST_H */ diff --git a/vmac.c b/vmac.c new file mode 100644 index 00000000000000..413148beec0a69 --- /dev/null +++ b/vmac.c @@ -0,0 +1,1215 @@ +/* -------------------------------------------------------------------------- + * VMAC and VHASH Implementation by Ted Krovetz (tdk@acm.org) and Wei Dai. + * This implementation is herby placed in the public domain. + * The authors offers no warranty. Use at your own risk. + * Please send bug reports to the authors. + * Last modified: 17 APR 08, 1700 PDT + * ----------------------------------------------------------------------- */ + +#include "vmac.h" +#include +#include + +/* Enable code tuned for 64-bit registers; otherwise tuned for 32-bit */ +#ifndef VMAC_ARCH_64 +#define VMAC_ARCH_64 (__x86_64__ || __ppc64__ || _M_X64) +#endif + +/* Enable code tuned for Intel SSE2 instruction set */ +#if ((__SSE2__ || (_M_IX86_FP >= 2)) && ( ! VMAC_ARCH_64)) +#define VMAC_USE_SSE2 1 +#include +#endif + +/* Native word reads. Update (or define via compiler) if incorrect */ +#ifndef VMAC_ARCH_BIG_ENDIAN /* Assume big-endian unless on the list */ +#define VMAC_ARCH_BIG_ENDIAN \ + (!(__x86_64__ || __i386__ || _M_IX86 || \ + _M_X64 || __ARMEL__ || __MIPSEL__)) +#endif + +/* ----------------------------------------------------------------------- */ +/* Constants and masks */ + +const uint64_t p64 = UINT64_C(0xfffffffffffffeff); /* 2^64 - 257 prime */ +const uint64_t m62 = UINT64_C(0x3fffffffffffffff); /* 62-bit mask */ +const uint64_t m63 = UINT64_C(0x7fffffffffffffff); /* 63-bit mask */ +const uint64_t m64 = UINT64_C(0xffffffffffffffff); /* 64-bit mask */ +const uint64_t mpoly = UINT64_C(0x1fffffff1fffffff); /* Poly key mask */ + +/* ----------------------------------------------------------------------- * + * The following routines are used in this implementation. They are + * written via macros to simulate zero-overhead call-by-reference. + * All have default implemantations for when they are not defined in an + * architecture-specific manner. + * + * MUL64: 64x64->128-bit multiplication + * PMUL64: assumes top bits cleared on inputs + * ADD128: 128x128->128-bit addition + * GET_REVERSED_64: load and byte-reverse 64-bit word + * ----------------------------------------------------------------------- */ + +/* ----------------------------------------------------------------------- */ +#if (__GNUC__ && (__x86_64__ || __amd64__)) +/* ----------------------------------------------------------------------- */ + +#define ADD128(rh,rl,ih,il) \ + asm ("addq %3, %1 \n\t" \ + "adcq %2, %0" \ + : "+r"(rh),"+r"(rl) \ + : "r"(ih),"r"(il) : "cc"); + +#define MUL64(rh,rl,i1,i2) \ + asm ("mulq %3" : "=a"(rl), "=d"(rh) : "a"(i1), "r"(i2) : "cc") + +#define PMUL64 MUL64 + +#define GET_REVERSED_64(p) \ + ({uint64_t x; \ + asm ("bswapq %0" : "=r" (x) : "0"(*(uint64_t *)(p))); x;}) + +/* ----------------------------------------------------------------------- */ +#elif (__GNUC__ && __i386__) +/* ----------------------------------------------------------------------- */ + +#define GET_REVERSED_64(p) \ + ({ uint64_t x; \ + uint32_t *tp = (uint32_t *)(p); \ + asm ("bswap %%edx\n\t" \ + "bswap %%eax" \ + : "=A"(x) \ + : "a"(tp[1]), "d"(tp[0])); \ + x; }) + +/* ----------------------------------------------------------------------- */ +#elif (__GNUC__ && __ppc64__) +/* ----------------------------------------------------------------------- */ + +#define ADD128(rh,rl,ih,il) \ + asm volatile ( "addc %1, %1, %3 \n\t" \ + "adde %0, %0, %2" \ + : "+r"(rh),"+r"(rl) \ + : "r"(ih),"r"(il)); + +#define MUL64(rh,rl,i1,i2) \ +{ uint64_t _i1 = (i1), _i2 = (i2); \ + rl = _i1 * _i2; \ + asm volatile ("mulhdu %0, %1, %2" : "=r" (rh) : "r" (_i1), "r" (_i2));\ +} + +#define PMUL64 MUL64 + +#define GET_REVERSED_64(p) \ + ({ uint32_t hi, lo, *_p = (uint32_t *)(p); \ + asm volatile ("lwbrx %0, %1, %2" : "=r"(lo) : "b%"(0), "r"(_p) ); \ + asm volatile ("lwbrx %0, %1, %2" : "=r"(hi) : "b%"(4), "r"(_p) ); \ + ((uint64_t)hi << 32) | (uint64_t)lo; } ) + +/* ----------------------------------------------------------------------- */ +#elif (__GNUC__ && (__ppc__ || __PPC__)) +/* ----------------------------------------------------------------------- */ + +#define GET_REVERSED_64(p) \ + ({ uint32_t hi, lo, *_p = (uint32_t *)(p); \ + asm volatile ("lwbrx %0, %1, %2" : "=r"(lo) : "b%"(0), "r"(_p) ); \ + asm volatile ("lwbrx %0, %1, %2" : "=r"(hi) : "b%"(4), "r"(_p) ); \ + ((uint64_t)hi << 32) | (uint64_t)lo; } ) + +/* ----------------------------------------------------------------------- */ +#elif (__GNUC__ && (__ARMEL__ || __ARM__)) +/* ----------------------------------------------------------------------- */ + +#define bswap32(v) \ +({ uint32_t tmp,out; \ + asm volatile( \ + "eor %1, %2, %2, ror #16\n" \ + "bic %1, %1, #0x00ff0000\n" \ + "mov %0, %2, ror #8\n" \ + "eor %0, %0, %1, lsr #8" \ + : "=r" (out), "=&r" (tmp) \ + : "r" (v)); \ + out;}) + +/* ----------------------------------------------------------------------- */ +#elif _MSC_VER +/* ----------------------------------------------------------------------- */ + +#include + +#if (_M_IA64 || _M_X64) && \ + (!defined(__INTEL_COMPILER) || __INTEL_COMPILER >= 1000) +#define MUL64(rh,rl,i1,i2) (rl) = _umul128(i1,i2,&(rh)); +#pragma intrinsic(_umul128) +#define PMUL64 MUL64 +#endif + +/* MSVC uses add, adc in this version */ +#define ADD128(rh,rl,ih,il) \ + { uint64_t _il = (il); \ + (rl) += (_il); \ + (rh) += (ih) + ((rl) < (_il)); \ + } + +#if _MSC_VER >= 1300 +#define GET_REVERSED_64(p) _byteswap_uint64(*(uint64_t *)(p)) +#pragma intrinsic(_byteswap_uint64) +#endif + +#if _MSC_VER >= 1400 && \ + (!defined(__INTEL_COMPILER) || __INTEL_COMPILER >= 1000) +#define MUL32(i1,i2) (__emulu((uint32_t)(i1),(uint32_t)(i2))) +#pragma intrinsic(__emulu) +#endif + +/* ----------------------------------------------------------------------- */ +#endif +/* ----------------------------------------------------------------------- */ + +#if __GNUC__ +#define ALIGN(n) __attribute__ ((aligned(n))) +#define NOINLINE __attribute__ ((noinline)) +#define FASTCALL +#elif _MSC_VER +#define ALIGN(n) __declspec(align(n)) +#define NOINLINE __declspec(noinline) +#define FASTCALL __fastcall +#else +#define ALIGN(n) +#define NOINLINE +#define FASTCALL +#endif + +/* ----------------------------------------------------------------------- */ +/* Default implementations, if not defined above */ +/* ----------------------------------------------------------------------- */ + +#ifndef ADD128 +#define ADD128(rh,rl,ih,il) \ + { uint64_t _il = (il); \ + (rl) += (_il); \ + if ((rl) < (_il)) (rh)++; \ + (rh) += (ih); \ + } +#endif + +#ifndef MUL32 +#define MUL32(i1,i2) ((uint64_t)(uint32_t)(i1)*(uint32_t)(i2)) +#endif + +#ifndef PMUL64 /* rh may not be same as i1 or i2 */ +#define PMUL64(rh,rl,i1,i2) /* Assumes m doesn't overflow */ \ + { uint64_t _i1 = (i1), _i2 = (i2); \ + uint64_t m = MUL32(_i1,_i2>>32) + MUL32(_i1>>32,_i2); \ + rh = MUL32(_i1>>32,_i2>>32); \ + rl = MUL32(_i1,_i2); \ + ADD128(rh,rl,(m >> 32),(m << 32)); \ + } +#endif + +#ifndef MUL64 +#define MUL64(rh,rl,i1,i2) \ + { uint64_t _i1 = (i1), _i2 = (i2); \ + uint64_t m1= MUL32(_i1,_i2>>32); \ + uint64_t m2= MUL32(_i1>>32,_i2); \ + rh = MUL32(_i1>>32,_i2>>32); \ + rl = MUL32(_i1,_i2); \ + ADD128(rh,rl,(m1 >> 32),(m1 << 32)); \ + ADD128(rh,rl,(m2 >> 32),(m2 << 32)); \ + } +#endif + +#ifndef GET_REVERSED_64 +#ifndef bswap64 +#ifndef bswap32 +#define bswap32(x) \ + ({ uint32_t bsx = (x); \ + ((((bsx) & 0xff000000u) >> 24) | (((bsx) & 0x00ff0000u) >> 8) | \ + (((bsx) & 0x0000ff00u) << 8) | (((bsx) & 0x000000ffu) << 24)); }) +#endif +#define bswap64(x) \ + ({ union { uint64_t ll; uint32_t l[2]; } w, r; \ + w.ll = (x); \ + r.l[0] = bswap32 (w.l[1]); \ + r.l[1] = bswap32 (w.l[0]); \ + r.ll; }) +#endif +#define GET_REVERSED_64(p) bswap64(*(uint64_t *)(p)) +#endif + +/* ----------------------------------------------------------------------- */ + +#if (VMAC_PREFER_BIG_ENDIAN) +# define get64PE get64BE +#else +# define get64PE get64LE +#endif + +#if (VMAC_ARCH_BIG_ENDIAN) +# define get64BE(ptr) (*(uint64_t *)(ptr)) +# define get64LE(ptr) GET_REVERSED_64(ptr) +#else /* assume little-endian */ +# define get64BE(ptr) GET_REVERSED_64(ptr) +# define get64LE(ptr) (*(uint64_t *)(ptr)) +#endif + + +/* --------------------------------------------------------------------- * + * For highest performance the L1 NH and L2 polynomial hashes should be + * carefully implemented to take advantage of one's target architechture. + * Here these two hash functions are defined multiple time; once for + * 64-bit architectures, once for 32-bit SSE2 architectures, and once + * for the rest (32-bit) architectures. + * For each, nh_16 *must* be defined (works on multiples of 16 bytes). + * Optionally, nh_vmac_nhbytes can be defined (for multiples of + * VMAC_NHBYTES), and nh_16_2 and nh_vmac_nhbytes_2 (versions that do two + * NH computations at once). + * --------------------------------------------------------------------- */ + +/* ----------------------------------------------------------------------- */ +#if VMAC_ARCH_64 +/* ----------------------------------------------------------------------- */ + +#define nh_16(mp, kp, nw, rh, rl) \ +{ int nh16_i; uint64_t th, tl; \ + rh = rl = 0; \ + for (nh16_i = 0; nh16_i < nw; nh16_i+= 2) { \ + MUL64(th,tl,get64PE((mp)nh16_+i)+(kp)[nh16_i],get64PE((mp)+nh16_i+1)+(kp)[nh16_i+1]);\ + ADD128(rh,rl,th,tl); \ + } \ +} +#define nh_16_2(mp, kp, nw, rh, rl, rh1, rl1) \ +{ int nh162_i; uint64_t th, tl; \ + rh1 = rl1 = rh = rl = 0; \ + for (nh162_i = 0; nh162_i < nw; nh162_i+= 2) { \ + MUL64(th,tl,get64PE((mp)+nh162_i)+(kp)[nh162_i],get64PE((mp)+nh162_i+1)+(kp)[nh162_i+1]);\ + ADD128(rh,rl,th,tl); \ + MUL64(th,tl,get64PE((mp)+nh162_i)+(kp)[nh162_i+2],get64PE((mp)+nh162_i+1)+(kp)[nh162_i+3]);\ + ADD128(rh1,rl1,th,tl); \ + } \ +} + +#if (VMAC_NHBYTES >= 64) /* These versions do 64-bytes of message at a time */ +#define nh_vmac_nhbytes(mp, kp, nw, rh, rl) \ +{ int nvn_i; uint64_t th, tl; \ + rh = rl = 0; \ + for (nvn_i = 0; nvn_i < nw; nvn_i+= 8) { \ + MUL64(th,tl,get64PE((mp)+nvn_i )+(kp)[nvn_i ],get64PE((mp)+nvn_i+1)+(kp)[nvn_i+1]);\ + ADD128(rh,rl,th,tl); \ + MUL64(th,tl,get64PE((mp)+nvn_i+2)+(kp)[nvn_i+2],get64PE((mp)+nvn_i+3)+(kp)[nvn_i+3]);\ + ADD128(rh,rl,th,tl); \ + MUL64(th,tl,get64PE((mp)+nvn_i+4)+(kp)[nvn_i+4],get64PE((mp)+nvn_i+5)+(kp)[nvn_i+5]);\ + ADD128(rh,rl,th,tl); \ + MUL64(th,tl,get64PE((mp)+nvn_i+6)+(kp)[nvn_i+6],get64PE((mp)+nvn_i+7)+(kp)[nvn_i+7]);\ + ADD128(rh,rl,th,tl); \ + } \ +} +#define nh_vmac_nhbytes_2(mp, kp, nw, rh, rl, rh1, rl1) \ +{ int nvn_i; uint64_t th, tl; \ + rh1 = rl1 = rh = rl = 0; \ + for (nvn_i = 0; nvn_i < nw; nvn_i+= 8) { \ + MUL64(th,tl,get64PE((mp)+nvn_i )+(kp)[nvn_i ],get64PE((mp)+nvn_i+1)+(kp)[nvn_i+1]);\ + ADD128(rh,rl,th,tl); \ + MUL64(th,tl,get64PE((mp)+nvn_i )+(kp)[nvn_i+2],get64PE((mp)+nvn_i+1)+(kp)[nvn_i+3]);\ + ADD128(rh1,rl1,th,tl); \ + MUL64(th,tl,get64PE((mp)+nvn_i+2)+(kp)[nvn_i+2],get64PE((mp)+nvn_i+3)+(kp)[nvn_i+3]);\ + ADD128(rh,rl,th,tl); \ + MUL64(th,tl,get64PE((mp)+nvn_i+2)+(kp)[nvn_i+4],get64PE((mp)+nvn_i+3)+(kp)[nvn_i+5]);\ + ADD128(rh1,rl1,th,tl); \ + MUL64(th,tl,get64PE((mp)+nvn_i+4)+(kp)[nvn_i+4],get64PE((mp)+nvn_i+5)+(kp)[nvn_i+5]);\ + ADD128(rh,rl,th,tl); \ + MUL64(th,tl,get64PE((mp)+nvn_i+4)+(kp)[nvn_i+6],get64PE((mp)+nvn_i+5)+(kp)[nvn_i+7]);\ + ADD128(rh1,rl1,th,tl); \ + MUL64(th,tl,get64PE((mp)+nvn_i+6)+(kp)[nvn_i+6],get64PE((mp)+nvn_i+7)+(kp)[nvn_i+7]);\ + ADD128(rh,rl,th,tl); \ + MUL64(th,tl,get64PE((mp)+nvn_i+6)+(kp)[nvn_i+8],get64PE((mp)+nvn_i+7)+(kp)[nvn_i+9]);\ + ADD128(rh1,rl1,th,tl); \ + } \ +} +#endif + +#define poly_step(ah, al, kh, kl, mh, ml) \ +{ uint64_t t1h, t1l, t2h, t2l, t3h, t3l, z=0; \ + /* compute ab*cd, put bd into result registers */ \ + PMUL64(t3h,t3l,al,kh); \ + PMUL64(t2h,t2l,ah,kl); \ + PMUL64(t1h,t1l,ah,2*kh); \ + PMUL64(ah,al,al,kl); \ + /* add 2 * ac to result */ \ + ADD128(ah,al,t1h,t1l); \ + /* add together ad + bc */ \ + ADD128(t2h,t2l,t3h,t3l); \ + /* now (ah,al), (t2l,2*t2h) need summing */ \ + /* first add the high registers, carrying into t2h */ \ + ADD128(t2h,ah,z,t2l); \ + /* double t2h and add top bit of ah */ \ + t2h = 2 * t2h + (ah >> 63); \ + ah &= m63; \ + /* now add the low registers */ \ + ADD128(ah,al,mh,ml); \ + ADD128(ah,al,z,t2h); \ +} + +/* ----------------------------------------------------------------------- */ +#elif VMAC_USE_SSE2 +/* ----------------------------------------------------------------------- */ + +// macros from Crypto++ for sharing inline assembly code between MSVC and GNU C +#if defined(__GNUC__) + // define these in two steps to allow arguments to be expanded + #define GNU_AS2(x, y) #x ", " #y ";" + #define GNU_AS3(x, y, z) #x ", " #y ", " #z ";" + #define GNU_ASL(x) "\n" #x ":" + #define GNU_ASJ(x, y, z) #x " " #y #z ";" + #define AS2(x, y) GNU_AS2(x, y) + #define AS3(x, y, z) GNU_AS3(x, y, z) + #define ASS(x, y, a, b, c, d) #x ", " #y ", " #a "*64+" #b "*16+" #c "*4+" #d ";" + #define ASL(x) GNU_ASL(x) + #define ASJ(x, y, z) GNU_ASJ(x, y, z) +#else + #define AS2(x, y) __asm {x, y} + #define AS3(x, y, z) __asm {x, y, z} + #define ASS(x, y, a, b, c, d) __asm {x, y, _MM_SHUFFLE(a, b, c, d)} + #define ASL(x) __asm {label##x:} + #define ASJ(x, y, z) __asm {x label##y} +#endif + +static void NOINLINE nh_16_func(const uint64_t *mp, const uint64_t *kp, size_t nw, uint64_t *rh, uint64_t *rl) +{ + // This assembly version, using MMX registers, is just as fast as the + // intrinsics version (which uses XMM registers) on the Intel Core 2, + // but is much faster on the Pentium 4. In order to schedule multiplies + // as early as possible, the loop interleaves operations for the current + // block and the next block. To mask out high 32-bits, we use "movd" + // to move the lower 32-bits to the stack and then back. Surprisingly, + // this is faster than any other method. +#ifdef __GNUC__ + __asm__ __volatile__ + ( + ".intel_syntax noprefix;" +#else + AS2( mov esi, mp) + AS2( mov edi, kp) + AS2( mov ecx, nw) + AS2( mov eax, rl) + AS2( mov edx, rh) +#endif + AS2( sub esp, 12) + AS2( movq mm6, [esi]) + AS2( paddq mm6, [edi]) + AS2( movq mm5, [esi+8]) + AS2( paddq mm5, [edi+8]) + AS2( add esi, 16) + AS2( add edi, 16) + AS2( movq mm4, mm6) + ASS( pshufw mm2, mm6, 1, 0, 3, 2) + AS2( pmuludq mm6, mm5) + ASS( pshufw mm3, mm5, 1, 0, 3, 2) + AS2( pmuludq mm5, mm2) + AS2( pmuludq mm2, mm3) + AS2( pmuludq mm3, mm4) + AS2( pxor mm7, mm7) + AS2( movd [esp], mm6) + AS2( psrlq mm6, 32) + AS2( movd [esp+4], mm5) + AS2( psrlq mm5, 32) + AS2( sub ecx, 2) + ASJ( jz, 1, f) + ASL(0) + AS2( movq mm0, [esi]) + AS2( paddq mm0, [edi]) + AS2( movq mm1, [esi+8]) + AS2( paddq mm1, [edi+8]) + AS2( add esi, 16) + AS2( add edi, 16) + AS2( movq mm4, mm0) + AS2( paddq mm5, mm2) + ASS( pshufw mm2, mm0, 1, 0, 3, 2) + AS2( pmuludq mm0, mm1) + AS2( movd [esp+8], mm3) + AS2( psrlq mm3, 32) + AS2( paddq mm5, mm3) + ASS( pshufw mm3, mm1, 1, 0, 3, 2) + AS2( pmuludq mm1, mm2) + AS2( pmuludq mm2, mm3) + AS2( pmuludq mm3, mm4) + AS2( movd mm4, [esp]) + AS2( paddq mm7, mm4) + AS2( movd mm4, [esp+4]) + AS2( paddq mm6, mm4) + AS2( movd mm4, [esp+8]) + AS2( paddq mm6, mm4) + AS2( movd [esp], mm0) + AS2( psrlq mm0, 32) + AS2( paddq mm6, mm0) + AS2( movd [esp+4], mm1) + AS2( psrlq mm1, 32) + AS2( paddq mm5, mm1) + AS2( sub ecx, 2) + ASJ( jnz, 0, b) + ASL(1) + AS2( paddq mm5, mm2) + AS2( movd [esp+8], mm3) + AS2( psrlq mm3, 32) + AS2( paddq mm5, mm3) + AS2( movd mm4, [esp]) + AS2( paddq mm7, mm4) + AS2( movd mm4, [esp+4]) + AS2( paddq mm6, mm4) + AS2( movd mm4, [esp+8]) + AS2( paddq mm6, mm4) + + ASS( pshufw mm0, mm7, 3, 2, 1, 0) + AS2( psrlq mm7, 32) + AS2( paddq mm6, mm7) + AS2( punpckldq mm0, mm6) + AS2( psrlq mm6, 32) + AS2( paddq mm5, mm6) + AS2( movq [eax], mm0) + AS2( movq [edx], mm5) + AS2( add esp, 12) +#ifdef __GNUC__ + ".att_syntax prefix;" + : + : "S" (mp), "D" (kp), "c" (nw), "a" (rl), "d" (rh) + : "memory", "cc" + ); +#endif +} +#define nh_16(mp, kp, nw, rh, rl) nh_16_func(mp, kp, nw, &(rh), &(rl)); + +static void poly_step_func(uint64_t *ahi, uint64_t *alo, const uint64_t *kh, + const uint64_t *kl, const uint64_t *mh, const uint64_t *ml) +{ + // This code tries to schedule the multiplies as early as possible to overcome + // the long latencies on the Pentium 4. It also minimizes "movq" instructions + // which are very expensive on the P4. + +#define a0 [eax+0] +#define a1 [eax+4] +#define a2 [ebx+0] +#define a3 [ebx+4] +#define k0 [ecx+0] +#define k1 [ecx+4] +#define k2 [edx+0] +#define k3 [edx+4] + +#ifdef __GNUC__ + uint32_t temp; + __asm__ __volatile__ + ( + "mov %%ebx, %0;" + "mov %1, %%ebx;" + ".intel_syntax noprefix;" +#else + AS2( mov ebx, ahi) + AS2( mov edx, kh) + AS2( mov eax, alo) + AS2( mov ecx, kl) + AS2( mov esi, mh) + AS2( mov edi, ml) +#endif + + AS2( movd mm0, a3) + AS2( movq mm4, mm0) + AS2( pmuludq mm0, k3) // a3*k3 + AS2( movd mm1, a0) + AS2( pmuludq mm1, k2) // a0*k2 + AS2( movd mm2, a1) + AS2( movd mm6, k1) + AS2( pmuludq mm2, mm6) // a1*k1 + AS2( movd mm3, a2) + AS2( movq mm5, mm3) + AS2( movd mm7, k0) + AS2( pmuludq mm3, mm7) // a2*k0 + AS2( pmuludq mm4, mm7) // a3*k0 + AS2( pmuludq mm5, mm6) // a2*k1 + AS2( psllq mm0, 1) + AS2( paddq mm0, [esi]) + AS2( paddq mm0, mm1) + AS2( movd mm1, a1) + AS2( paddq mm4, mm5) + AS2( movq mm5, mm1) + AS2( pmuludq mm1, k2) // a1*k2 + AS2( paddq mm0, mm2) + AS2( movd mm2, a0) + AS2( paddq mm0, mm3) + AS2( movq mm3, mm2) + AS2( pmuludq mm2, k3) // a0*k3 + AS2( pmuludq mm3, mm7) // a0*k0 + AS2( movd esi, mm0) + AS2( psrlq mm0, 32) + AS2( pmuludq mm7, mm5) // a1*k0 + AS2( pmuludq mm5, k3) // a1*k3 + AS2( paddq mm0, mm1) + AS2( movd mm1, a2) + AS2( pmuludq mm1, k2) // a2*k2 + AS2( paddq mm0, mm2) + AS2( paddq mm0, mm4) + AS2( movq mm4, mm0) + AS2( movd mm2, a3) + AS2( pmuludq mm2, mm6) // a3*k1 + AS2( pmuludq mm6, a0) // a0*k1 + AS2( psrlq mm0, 31) + AS2( paddq mm0, mm3) + AS2( movd mm3, [edi]) + AS2( paddq mm0, mm3) + AS2( movd mm3, a2) + AS2( pmuludq mm3, k3) // a2*k3 + AS2( paddq mm5, mm1) + AS2( movd mm1, a3) + AS2( pmuludq mm1, k2) // a3*k2 + AS2( paddq mm5, mm2) + AS2( movd mm2, [edi+4]) + AS2( psllq mm5, 1) + AS2( paddq mm0, mm5) + AS2( movq mm5, mm0) + AS2( psllq mm4, 33) + AS2( psrlq mm0, 32) + AS2( paddq mm6, mm7) + AS2( movd mm7, esi) + AS2( paddq mm0, mm6) + AS2( paddq mm0, mm2) + AS2( paddq mm3, mm1) + AS2( psllq mm3, 1) + AS2( paddq mm0, mm3) + AS2( psrlq mm4, 1) + AS2( punpckldq mm5, mm0) + AS2( psrlq mm0, 32) + AS2( por mm4, mm7) + AS2( paddq mm0, mm4) + AS2( movq a0, mm5) + AS2( movq a2, mm0) +#ifdef __GNUC__ + ".att_syntax prefix;" + "mov %0, %%ebx;" + : "=m" (temp) + : "m" (ahi), "D" (ml), "d" (kh), "a" (alo), "S" (mh), "c" (kl) + : "memory", "cc" + ); +#endif + + +#undef a0 +#undef a1 +#undef a2 +#undef a3 +#undef k0 +#undef k1 +#undef k2 +#undef k3 +} + +#define poly_step(ah, al, kh, kl, mh, ml) \ + poly_step_func(&(ah), &(al), &(kh), &(kl), &(mh), &(ml)) + +/* ----------------------------------------------------------------------- */ +#else /* not VMAC_ARCH_64 and not SSE2 */ +/* ----------------------------------------------------------------------- */ + +#ifndef nh_16 +#define nh_16(mp, kp, nw, rh, rl) \ +{ uint64_t t1,t2,m1,m2,t; \ + int nh16_i; \ + rh = rl = t = 0; \ + for (nh16_i = 0; nh16_i < nw; nh16_i+=2) { \ + t1 = get64PE(mp+nh16_i) + kp[nh16_i]; \ + t2 = get64PE(mp+nh16_i+1) + kp[nh16_i+1]; \ + m2 = MUL32(t1 >> 32, t2); \ + m1 = MUL32(t1, t2 >> 32); \ + ADD128(rh,rl,MUL32(t1 >> 32,t2 >> 32),MUL32(t1,t2)); \ + rh += (uint64_t)(uint32_t)(m1 >> 32) + (uint32_t)(m2 >> 32); \ + t += (uint64_t)(uint32_t)m1 + (uint32_t)m2; \ + } \ + ADD128(rh,rl,(t >> 32),(t << 32)); \ +} +#endif + +static void poly_step_func(uint64_t *ahi, uint64_t *alo, const uint64_t *kh, + const uint64_t *kl, const uint64_t *mh, const uint64_t *ml) +{ + +#if VMAC_ARCH_BIG_ENDIAN +#define INDEX_HIGH 0 +#define INDEX_LOW 1 +#else +#define INDEX_HIGH 1 +#define INDEX_LOW 0 +#endif + +#define a0 *(((uint32_t*)alo)+INDEX_LOW) +#define a1 *(((uint32_t*)alo)+INDEX_HIGH) +#define a2 *(((uint32_t*)ahi)+INDEX_LOW) +#define a3 *(((uint32_t*)ahi)+INDEX_HIGH) +#define k0 *(((uint32_t*)kl)+INDEX_LOW) +#define k1 *(((uint32_t*)kl)+INDEX_HIGH) +#define k2 *(((uint32_t*)kh)+INDEX_LOW) +#define k3 *(((uint32_t*)kh)+INDEX_HIGH) + + uint64_t p, q, t; + uint32_t t2; + + p = MUL32(a3, k3); + p += p; + p += *(uint64_t *)mh; + p += MUL32(a0, k2); + p += MUL32(a1, k1); + p += MUL32(a2, k0); + t = (uint32_t)(p); + p >>= 32; + p += MUL32(a0, k3); + p += MUL32(a1, k2); + p += MUL32(a2, k1); + p += MUL32(a3, k0); + t |= ((uint64_t)((uint32_t)p & 0x7fffffff)) << 32; + p >>= 31; + p += (uint64_t)(((uint32_t*)ml)[INDEX_LOW]); + p += MUL32(a0, k0); + q = MUL32(a1, k3); + q += MUL32(a2, k2); + q += MUL32(a3, k1); + q += q; + p += q; + t2 = (uint32_t)(p); + p >>= 32; + p += (uint64_t)(((uint32_t*)ml)[INDEX_HIGH]); + p += MUL32(a0, k1); + p += MUL32(a1, k0); + q = MUL32(a2, k3); + q += MUL32(a3, k2); + q += q; + p += q; + *(uint64_t *)(alo) = (p << 32) | t2; + p >>= 32; + *(uint64_t *)(ahi) = p + t; + +#undef a0 +#undef a1 +#undef a2 +#undef a3 +#undef k0 +#undef k1 +#undef k2 +#undef k3 +} + +#define poly_step(ah, al, kh, kl, mh, ml) \ + poly_step_func(&(ah), &(al), &(kh), &(kl), &(mh), &(ml)) + +/* ----------------------------------------------------------------------- */ +#endif /* end of specialized NH and poly definitions */ +/* ----------------------------------------------------------------------- */ + +/* At least nh_16 is defined. Defined others as needed here */ +#ifndef nh_16_2 +#define nh_16_2(mp, kp, nw, rh, rl, rh2, rl2) \ + nh_16(mp, kp, nw, rh, rl); \ + nh_16(mp, ((kp)+2), nw, rh2, rl2); +#endif +#ifndef nh_vmac_nhbytes +#define nh_vmac_nhbytes(mp, kp, nw, rh, rl) \ + nh_16(mp, kp, nw, rh, rl) +#endif +#ifndef nh_vmac_nhbytes_2 +#define nh_vmac_nhbytes_2(mp, kp, nw, rh, rl, rh2, rl2) \ + nh_vmac_nhbytes(mp, kp, nw, rh, rl); \ + nh_vmac_nhbytes(mp, ((kp)+2), nw, rh2, rl2); +#endif + +/* ----------------------------------------------------------------------- */ + +void vhash_abort(vmac_ctx_t *ctx) +{ + ctx->polytmp[0] = ctx->polykey[0] ; + ctx->polytmp[1] = ctx->polykey[1] ; + #if (VMAC_TAG_LEN == 128) + ctx->polytmp[2] = ctx->polykey[2] ; + ctx->polytmp[3] = ctx->polykey[3] ; + #endif + ctx->first_block_processed = 0; +} + +/* ----------------------------------------------------------------------- */ +static uint64_t l3hash(uint64_t p1, uint64_t p2, + uint64_t k1, uint64_t k2, uint64_t len) +{ + uint64_t rh, rl, t, z=0; + + /* fully reduce (p1,p2)+(len,0) mod p127 */ + t = p1 >> 63; + p1 &= m63; + ADD128(p1, p2, len, t); + /* At this point, (p1,p2) is at most 2^127+(len<<64) */ + t = (p1 > m63) + ((p1 == m63) && (p2 == m64)); + ADD128(p1, p2, z, t); + p1 &= m63; + + /* compute (p1,p2)/(2^64-2^32) and (p1,p2)%(2^64-2^32) */ + t = p1 + (p2 >> 32); + t += (t >> 32); + t += (uint32_t)t > 0xfffffffeu; + p1 += (t >> 32); + p2 += (p1 << 32); + + /* compute (p1+k1)%p64 and (p2+k2)%p64 */ + p1 += k1; + p1 += (0 - (p1 < k1)) & 257; + p2 += k2; + p2 += (0 - (p2 < k2)) & 257; + + /* compute (p1+k1)*(p2+k2)%p64 */ + MUL64(rh, rl, p1, p2); + t = rh >> 56; + ADD128(t, rl, z, rh); + rh <<= 8; + ADD128(t, rl, z, rh); + t += t << 8; + rl += t; + rl += (0 - (rl < t)) & 257; + rl += (0 - (rl > p64-1)) & 257; + return rl; +} + +/* ----------------------------------------------------------------------- */ + +void vhash_update(const unsigned char *m, + unsigned int mbytes, /* Pos multiple of VMAC_NHBYTES */ + vmac_ctx_t *ctx) +{ + uint64_t rh, rl, *mptr; + const uint64_t *kptr = (uint64_t *)ctx->nhkey; + int i; + uint64_t ch, cl; + uint64_t pkh = ctx->polykey[0]; + uint64_t pkl = ctx->polykey[1]; + #if (VMAC_TAG_LEN == 128) + uint64_t ch2, cl2, rh2, rl2; + uint64_t pkh2 = ctx->polykey[2]; + uint64_t pkl2 = ctx->polykey[3]; + #endif + + mptr = (uint64_t *)m; + i = mbytes / VMAC_NHBYTES; /* Must be non-zero */ + + ch = ctx->polytmp[0]; + cl = ctx->polytmp[1]; + #if (VMAC_TAG_LEN == 128) + ch2 = ctx->polytmp[2]; + cl2 = ctx->polytmp[3]; + #endif + + if ( ! ctx->first_block_processed) { + ctx->first_block_processed = 1; + #if (VMAC_TAG_LEN == 64) + nh_vmac_nhbytes(mptr,kptr,VMAC_NHBYTES/8,rh,rl); + #else + nh_vmac_nhbytes_2(mptr,kptr,VMAC_NHBYTES/8,rh,rl,rh2,rl2); + rh2 &= m62; + ADD128(ch2,cl2,rh2,rl2); + #endif + rh &= m62; + ADD128(ch,cl,rh,rl); + mptr += (VMAC_NHBYTES/sizeof(uint64_t)); + i--; + } + + while (i--) { + #if (VMAC_TAG_LEN == 64) + nh_vmac_nhbytes(mptr,kptr,VMAC_NHBYTES/8,rh,rl); + #else + nh_vmac_nhbytes_2(mptr,kptr,VMAC_NHBYTES/8,rh,rl,rh2,rl2); + rh2 &= m62; + poly_step(ch2,cl2,pkh2,pkl2,rh2,rl2); + #endif + rh &= m62; + poly_step(ch,cl,pkh,pkl,rh,rl); + mptr += (VMAC_NHBYTES/sizeof(uint64_t)); + } + + ctx->polytmp[0] = ch; + ctx->polytmp[1] = cl; + #if (VMAC_TAG_LEN == 128) + ctx->polytmp[2] = ch2; + ctx->polytmp[3] = cl2; + #endif + #if VMAC_USE_SSE2 + _mm_empty(); /* SSE2 version of poly_step uses mmx instructions */ + #endif +} + +/* ----------------------------------------------------------------------- */ + +uint64_t xvhash(const unsigned char m[], + unsigned int mbytes, + uint64_t *tagl, + vmac_ctx_t *ctx) +{ + uint64_t ch, cl, rh, rl, *mptr; + #if (VMAC_TAG_LEN == 128) + uint64_t ch2, cl2, rh2, rl2; + #endif + const uint64_t *kptr = (uint64_t *)ctx->nhkey; + int i, remaining; + + remaining = mbytes % VMAC_NHBYTES; + i = mbytes-remaining; + mptr = (uint64_t *)(m+i); + if (i) vhash_update(m,i,ctx); + + ch = ctx->polytmp[0]; + cl = ctx->polytmp[1]; + #if (VMAC_TAG_LEN == 128) + ch2 = ctx->polytmp[2]; + cl2 = ctx->polytmp[3]; + #endif + + if (remaining) { + #if (VMAC_TAG_LEN == 128) + nh_16_2(mptr,kptr,2*((remaining+15)/16),rh,rl,rh2,rl2); + rh2 &= m62; + #else + nh_16(mptr,kptr,2*((remaining+15)/16),rh,rl); + #endif + rh &= m62; + if (i) { + poly_step(ch,cl,ctx->polykey[0],ctx->polykey[1],rh,rl); + #if (VMAC_TAG_LEN == 128) + poly_step(ch2,cl2,ctx->polykey[2],ctx->polykey[3],rh2,rl2); + #endif + } else { + ADD128(ch,cl,rh,rl); + #if (VMAC_TAG_LEN == 128) + ADD128(ch2,cl2,rh2,rl2); + #endif + } + } + + #if VMAC_USE_SSE2 + _mm_empty(); /* SSE2 version of poly_step uses mmx instructions */ + #endif + vhash_abort(ctx); + remaining *= 8; +#if (VMAC_TAG_LEN == 128) + *tagl = l3hash(ch2, cl2, ctx->l3key[2], ctx->l3key[3],remaining); +#endif + return l3hash(ch, cl, ctx->l3key[0], ctx->l3key[1],remaining); +} + +uint64_t vhash(const unsigned char m[], + unsigned int mbytes, + uint64_t *tagl, + vmac_ctx_t *ctx) +{ + uint64_t rh, rl, *mptr; + const uint64_t *kptr = (uint64_t *)ctx->nhkey; + int i, remaining; + uint64_t ch, cl; + uint64_t pkh = ctx->polykey[0]; + uint64_t pkl = ctx->polykey[1]; + #if (VMAC_TAG_LEN == 128) + uint64_t ch2, cl2, rh2, rl2; + uint64_t pkh2 = ctx->polykey[2]; + uint64_t pkl2 = ctx->polykey[3]; + #endif + + mptr = (uint64_t *)m; + i = mbytes / VMAC_NHBYTES; + remaining = mbytes % VMAC_NHBYTES; + + if (ctx->first_block_processed) + { + ch = ctx->polytmp[0]; + cl = ctx->polytmp[1]; + #if (VMAC_TAG_LEN == 128) + ch2 = ctx->polytmp[2]; + cl2 = ctx->polytmp[3]; + #endif + } + else if (i) + { + #if (VMAC_TAG_LEN == 64) + nh_vmac_nhbytes(mptr,kptr,VMAC_NHBYTES/8,ch,cl); + #else + nh_vmac_nhbytes_2(mptr,kptr,VMAC_NHBYTES/8,ch,cl,ch2,cl2); + ch2 &= m62; + ADD128(ch2,cl2,pkh2,pkl2); + #endif + ch &= m62; + ADD128(ch,cl,pkh,pkl); + mptr += (VMAC_NHBYTES/sizeof(uint64_t)); + i--; + } + else if (remaining) + { + #if (VMAC_TAG_LEN == 64) + nh_16(mptr,kptr,2*((remaining+15)/16),ch,cl); + #else + nh_16_2(mptr,kptr,2*((remaining+15)/16),ch,cl,ch2,cl2); + ch2 &= m62; + ADD128(ch2,cl2,pkh2,pkl2); + #endif + ch &= m62; + ADD128(ch,cl,pkh,pkl); + mptr += (VMAC_NHBYTES/sizeof(uint64_t)); + goto do_l3; + } + else /* Empty String */ + { + ch = pkh; cl = pkl; + #if (VMAC_TAG_LEN == 128) + ch2 = pkh2; cl2 = pkl2; + #endif + goto do_l3; + } + + while (i--) { + #if (VMAC_TAG_LEN == 64) + nh_vmac_nhbytes(mptr,kptr,VMAC_NHBYTES/8,rh,rl); + #else + nh_vmac_nhbytes_2(mptr,kptr,VMAC_NHBYTES/8,rh,rl,rh2,rl2); + rh2 &= m62; + poly_step(ch2,cl2,pkh2,pkl2,rh2,rl2); + #endif + rh &= m62; + poly_step(ch,cl,pkh,pkl,rh,rl); + mptr += (VMAC_NHBYTES/sizeof(uint64_t)); + } + if (remaining) { + #if (VMAC_TAG_LEN == 64) + nh_16(mptr,kptr,2*((remaining+15)/16),rh,rl); + #else + nh_16_2(mptr,kptr,2*((remaining+15)/16),rh,rl,rh2,rl2); + rh2 &= m62; + poly_step(ch2,cl2,pkh2,pkl2,rh2,rl2); + #endif + rh &= m62; + poly_step(ch,cl,pkh,pkl,rh,rl); + } + +do_l3: + #if VMAC_USE_SSE2 + _mm_empty(); /* SSE2 version of poly_step uses mmx instructions */ + #endif + vhash_abort(ctx); + remaining *= 8; +#if (VMAC_TAG_LEN == 128) + *tagl = l3hash(ch2, cl2, ctx->l3key[2], ctx->l3key[3],remaining); +#endif + return l3hash(ch, cl, ctx->l3key[0], ctx->l3key[1],remaining); +} + +/* ----------------------------------------------------------------------- */ + +uint64_t vmac(const unsigned char m[], + unsigned int mbytes, + unsigned char n[16], + uint64_t *tagl, + vmac_ctx_t *ctx) +{ +#if (VMAC_TAG_LEN == 64) + uint64_t *in_n, *out_p; + uint64_t p, h; + int i; + + #if VMAC_CACHE_NONCES + in_n = ctx->cached_nonce; + out_p = ctx->cached_aes; + #else + uint64_t tmp[2]; + in_n = out_p = tmp; + #endif + + i = n[15] & 1; + #if VMAC_CACHE_NONCES + if ((*(uint64_t *)(n+8) != in_n[1]) || + (*(uint64_t *)(n ) != in_n[0])) { + #endif + + in_n[0] = *(uint64_t *)(n ); + in_n[1] = *(uint64_t *)(n+8); + ((unsigned char *)in_n)[15] &= 0xFE; + aes_encryption(in_n, out_p, &ctx->cipher_key); + + #if VMAC_CACHE_NONCES + ((unsigned char *)in_n)[15] |= (unsigned char)(1-i); + } + #endif + p = get64BE(out_p + i); + h = vhash(m, mbytes, (uint64_t *)0, ctx); + return p + h; +#else + uint64_t tmp[2]; + uint64_t th,tl; + aes_encryption(n, (unsigned char *)tmp, &ctx->cipher_key); + th = vhash(m, mbytes, &tl, ctx); + th += get64BE(tmp); + *tagl = tl + get64BE(tmp+1); + return th; +#endif +} + +/* ----------------------------------------------------------------------- */ + +void vmac_set_key(const unsigned char user_key[], vmac_ctx_t *ctx) +{ + uint64_t in[2] = {0}, out[2]; + unsigned i; + aes_key_setup(user_key, &ctx->cipher_key); + + /* Fill nh key */ + ((unsigned char *)in)[0] = 0x80; + for (i = 0; i < sizeof(ctx->nhkey)/8; i+=2) { + aes_encryption((unsigned char *)in, (unsigned char *)out, + &ctx->cipher_key); + ctx->nhkey[i ] = get64BE(out); + ctx->nhkey[i+1] = get64BE(out+1); + ((unsigned char *)in)[15] += 1; + } + + /* Fill poly key */ + ((unsigned char *)in)[0] = 0xC0; + in[1] = 0; + for (i = 0; i < sizeof(ctx->polykey)/8; i+=2) { + aes_encryption((unsigned char *)in, (unsigned char *)out, + &ctx->cipher_key); + ctx->polytmp[i ] = ctx->polykey[i ] = get64BE(out) & mpoly; + ctx->polytmp[i+1] = ctx->polykey[i+1] = get64BE(out+1) & mpoly; + ((unsigned char *)in)[15] += 1; + } + + /* Fill ip key */ + ((unsigned char *)in)[0] = 0xE0; + in[1] = 0; + for (i = 0; i < sizeof(ctx->l3key)/8; i+=2) { + do { + aes_encryption((unsigned char *)in, (unsigned char *)out, + &ctx->cipher_key); + ctx->l3key[i ] = get64BE(out); + ctx->l3key[i+1] = get64BE(out+1); + ((unsigned char *)in)[15] += 1; + } while (ctx->l3key[i] >= p64 || ctx->l3key[i+1] >= p64); + } + + /* Invalidate nonce/aes cache and reset other elements */ + #if (VMAC_TAG_LEN == 64) && (VMAC_CACHE_NONCES) + ctx->cached_nonce[0] = (uint64_t)-1; /* Ensure illegal nonce */ + ctx->cached_nonce[1] = (uint64_t)0; /* Ensure illegal nonce */ + #endif + ctx->first_block_processed = 0; +} + +/* ----------------------------------------------------------------------- */ + + +#if VMAC_RUN_TESTS + +#include +#include +#include +#include + +unsigned prime(void) /* Wake variable speed cpu, get rough speed estimate */ +{ + volatile uint64_t i; + volatile uint64_t j=1; + unsigned cnt=0; + volatile clock_t ticks = clock(); + do { + for (i = 0; i < 500000; i++) { + uint64_t x = get64PE(&j); + j = x * x + (uint64_t)ticks; + } + cnt++; + } while (clock() - ticks < (CLOCKS_PER_SEC/2)); + return cnt; /* cnt is millions of iterations per second */ +} + +int main(void) +{ + ALIGN(16) vmac_ctx_t ctx, ctx_aio, ctx_inc1, ctx_inc2; + uint64_t res, tagl; + void *p; + unsigned char *m; + ALIGN(4) unsigned char key[] = "abcdefghijklmnop"; + ALIGN(4) unsigned char nonce[] = "\0\0\0\0\0\0\0\0bcdefghi"; + unsigned int vector_lengths[] = {0,3,48,300,3000000}; + #if (VMAC_TAG_LEN == 64) + ALIGN(4) char *should_be[] = {"2576BE1C56D8B81B","2D376CF5B1813CE5", + "E8421F61D573D298","4492DF6C5CAC1BBE", + "09BA597DD7601113"}; + #else + ALIGN(4) char *should_be[] = {"472766C70F74ED23481D6D7DE4E80DAC", + "4EE815A06A1D71EDD36FC75D51188A42", + "09F2C80C8E1007A0C12FAE19FE4504AE", + "66438817154850C61D8A412164803BCB", + "2B6B02288FFC461B75485DE893C629DC"}; + #endif + unsigned speed_lengths[] = {16, 32, 64, 128, 256, 512, 1024, 2048, 4096}; + unsigned i, j, *speed_iters; + clock_t ticks; + double cpb; + const unsigned int buf_len = 3 * (1 << 20); + + j = prime(); + i = sizeof(speed_lengths)/sizeof(speed_lengths[0]); + speed_iters = (unsigned *)malloc(i*sizeof(speed_iters[0])); + speed_iters[i-1] = j * (1 << 12); + while (--i) speed_iters[i-1] = (unsigned)(1.3 * speed_iters[i]); + + /* Initialize context and message buffer, all 16-byte aligned */ + p = malloc(buf_len + 32); + m = (unsigned char *)(((size_t)p + 16) & ~((size_t)15)); + memset(m, 0, buf_len + 16); + vmac_set_key(key, &ctx); + + /* Test incremental and all-in-one interfaces for correctness */ + vmac_set_key(key, &ctx_aio); + vmac_set_key(key, &ctx_inc1); + vmac_set_key(key, &ctx_inc2); + + + /* + for (i = 0; i <= 512; i++) { + vhash_update(m,(i/VMAC_NHBYTES)*VMAC_NHBYTES,&ctx_inc1); + tagh = vmac(m+(i/VMAC_NHBYTES)*VMAC_NHBYTES, i%VMAC_NHBYTES, + nonce, &tagl, &ctx); + vhash_update(m,(i/VMAC_NHBYTES)*VMAC_NHBYTES,&ctx_inc1); + for (j = 0; j < vector_lengths[i]; j++) + m[j] = (unsigned char)('a'+j%3); + + } + */ + + /* Generate vectors */ + for (i = 0; i < sizeof(vector_lengths)/sizeof(unsigned int); i++) { + for (j = 0; j < vector_lengths[i]; j++) + m[j] = (unsigned char)('a'+j%3); + res = vmac(m, vector_lengths[i], nonce, &tagl, &ctx); + #if (VMAC_TAG_LEN == 64) + printf("\'abc\' * %7u: %016llX Should be: %s\n", + vector_lengths[i]/3,res,should_be[i]); + #else + printf("\'abc\' * %7u: %016llX%016llX\nShould be : %s\n", + vector_lengths[i]/3,res,tagl,should_be[i]); + #endif + } + + /* Speed test */ + for (i = 0; i < sizeof(speed_lengths)/sizeof(unsigned int); i++) { + ticks = clock(); + for (j = 0; j < speed_iters[i]; j++) { + #if HASH_ONLY + res = vhash(m, speed_lengths[i], &tagl, &ctx); + #else + res = vmac(m, speed_lengths[i], nonce, &tagl, &ctx); + nonce[7]++; + #endif + } + ticks = clock() - ticks; + cpb = ((ticks*VMAC_HZ)/ + ((double)CLOCKS_PER_SEC*speed_lengths[i]*speed_iters[i])); + printf("%4u bytes, %2.2f cpb\n", speed_lengths[i], cpb); + } + return 1; +} + +#endif diff --git a/vmac.h b/vmac.h new file mode 100644 index 00000000000000..777ba17da6aced --- /dev/null +++ b/vmac.h @@ -0,0 +1,176 @@ +#ifndef HEADER_VMAC_H +#define HEADER_VMAC_H + +/* -------------------------------------------------------------------------- + * VMAC and VHASH Implementation by Ted Krovetz (tdk@acm.org) and Wei Dai. + * This implementation is herby placed in the public domain. + * The authors offers no warranty. Use at your own risk. + * Please send bug reports to the authors. + * Last modified: 17 APR 08, 1700 PDT + * ----------------------------------------------------------------------- */ + +/* -------------------------------------------------------------------------- + * User definable settings. + * ----------------------------------------------------------------------- */ +#define VMAC_TAG_LEN 128 /* Must be 64 or 128 - 64 sufficient for most */ +#define VMAC_KEY_LEN 128 /* Must be 128, 192 or 256 */ +#define VMAC_NHBYTES 128 /* Must 2^i for any 3 < i < 13. Standard = 128 */ +#define VMAC_PREFER_BIG_ENDIAN 0 /* Prefer non-x86 */ + +#define VMAC_USE_OPENSSL 0 /* Set to non-zero to use OpenSSL's AES */ +#define VMAC_CACHE_NONCES 1 /* Set to non-zero to cause caching */ + /* of consecutive nonces on 64-bit tags */ + +#define VMAC_RUN_TESTS 0 /* Set to non-zero to check vectors and speed */ +#define VMAC_HZ (448e6) /* Set to hz of host machine to get speed */ +#define VMAC_HASH_ONLY 0 /* Set to non-zero to time hash only (not-mac) */ +/* Speeds of cpus I have access to +#define hz (2400e6) glyme Core 2 "Conroe" +#define hz (2000e6) jupiter G5 +#define hz (1592e6) titan +#define hz (2793e6) athena/gaia +#define hz (1250e6) isis G4 +#define hz (2160e6) imac Core 2 "Merom" +#define hz (266e6) ppc/arm +#define hz (400e6) mips +*/ + +/* -------------------------------------------------------------------------- + * This implementation uses uint32_t and uint64_t as names for unsigned 32- + * and 64-bit integer types. These are defined in C99 stdint.h. The + * following may need adaptation if you are not running a C99 or + * Microsoft C environment. + * ----------------------------------------------------------------------- */ +#define VMAC_USE_STDINT 1 /* Set to zero if system has no stdint.h */ + +#if VMAC_USE_STDINT && !_MSC_VER /* Try stdint.h if non-Microsoft */ +#ifdef __cplusplus +#define __STDC_CONSTANT_MACROS +#endif +#include +#elif (_MSC_VER) /* Microsoft C does not have stdint.h */ +typedef unsigned __int32 uint32_t; +typedef unsigned __int64 uint64_t; +#define UINT64_C(v) v ## UI64 +#else /* Guess sensibly - may need adaptation */ +typedef unsigned int uint32_t; +typedef unsigned long long uint64_t; +#define UINT64_C(v) v ## ULL +#endif + +/* -------------------------------------------------------------------------- + * This implementation supports two free AES implementations: OpenSSL's and + * Paulo Barreto's. To use OpenSSL's, you will need to include the OpenSSL + * crypto library (eg, gcc -lcrypto foo.c). For Barreto's, you will need + * to compile rijndael-alg-fst.c, last seen at http://www.iaik.tu-graz.ac.at/ + * research/krypto/AES/old/~rijmen/rijndael/rijndael-fst-3.0.zip and + * http://homes.esat.kuleuven.be/~rijmen/rijndael/rijndael-fst-3.0.zip. + * To use a different implementation, use these definitions as a model. + * ----------------------------------------------------------------------- */ +#if VMAC_USE_OPENSSL + +#include +typedef AES_KEY aes_int_key; + +#define aes_encryption(in,out,int_key) \ + AES_encrypt((unsigned char *)(in),(unsigned char *)(out),(int_key)) +#define aes_key_setup(key,int_key) \ + AES_set_encrypt_key((key),VMAC_KEY_LEN,(int_key)) + +#else + +#include "rijndael-alg-fst.h" +typedef u32 aes_int_key[4*(VMAC_KEY_LEN/32+7)]; + +#define aes_encryption(in,out,int_key) \ + rijndaelEncrypt((u32 *)(int_key), \ + ((VMAC_KEY_LEN/32)+6), \ + (u8 *)(in), (u8 *)(out)) +#define aes_key_setup(user_key,int_key) \ + rijndaelKeySetupEnc((u32 *)(int_key), \ + (u8 *)(user_key), \ + VMAC_KEY_LEN) +#endif + +/* --------------------------------------------------------------------- */ + +typedef struct { + uint64_t nhkey [(VMAC_NHBYTES/8)+2*(VMAC_TAG_LEN/64-1)]; + uint64_t polykey[2*VMAC_TAG_LEN/64]; + uint64_t l3key [2*VMAC_TAG_LEN/64]; + uint64_t polytmp[2*VMAC_TAG_LEN/64]; + aes_int_key cipher_key; + #if (VMAC_TAG_LEN == 64) && (VMAC_CACHE_NONCES) + uint64_t cached_nonce[2]; + uint64_t cached_aes[2]; + #endif + int first_block_processed; +} vmac_ctx_t; + +/* --------------------------------------------------------------------- */ +#ifdef __cplusplus +extern "C" { +#endif +/* -------------------------------------------------------------------------- + * <<<<< USAGE NOTES >>>>> + * + * Given msg m (mbytes in length) and nonce buffer n + * this function returns a tag as its output. The tag is returned as + * a number. When VMAC_TAG_LEN == 64, the 'return'ed integer is the tag, + * and *tagl is meaningless. When VMAC_TAG_LEN == 128 the tag is the + * number y * 2^64 + *tagl where y is the function's return value. + * If you want to consider tags to be strings, then you must do so with + * an agreed upon endian orientation for interoperability, and convert + * the results appropriately. VHASH hashes m without creating any tag. + * Consecutive substrings forming a prefix of a message may be passed + * to vhash_update, with vhash or vmac being called with the remainder + * to produce the output. + * + * Requirements: + * - On 32-bit architectures with SSE2 instructions, ctx and m MUST be + * begin on 16-byte memory boundaries. + * - m MUST be your message followed by zeroes to the nearest 16-byte + * boundary. If m is a length multiple of 16 bytes, then it is already + * at a 16-byte boundary and needs no padding. mbytes should be your + * message length without any padding. + * - The first bit of the nonce buffer n must be 0. An i byte nonce, is made + * as the first 16-i bytes of n being zero, and the final i the nonce. + * - vhash_update MUST have mbytes be a positive multiple of VMAC_NHBYTES + * ----------------------------------------------------------------------- */ + +#define vmac_update vhash_update + +void vhash_update(const unsigned char m[], + unsigned int mbytes, + vmac_ctx_t *ctx); + +uint64_t vmac(const unsigned char m[], + unsigned int mbytes, + unsigned char n[16], + uint64_t *tagl, + vmac_ctx_t *ctx); + +uint64_t vhash(const unsigned char m[], + unsigned int mbytes, + uint64_t *tagl, + vmac_ctx_t *ctx); + +/* -------------------------------------------------------------------------- + * When passed a VMAC_KEY_LEN bit user_key, this function initialazies ctx. + * ----------------------------------------------------------------------- */ + +void vmac_set_key(const unsigned char user_key[], vmac_ctx_t *ctx); + +/* -------------------------------------------------------------------------- + * This function aborts current hash and resets ctx, ready for a new message. + * ----------------------------------------------------------------------- */ + +void vhash_abort(vmac_ctx_t *ctx); + +/* --------------------------------------------------------------------- */ + +#ifdef __cplusplus +} +#endif + +#endif /* HEADER_AES_H */ From f4794962cd4fa14352dc4899da8577bf772bad09 Mon Sep 17 00:00:00 2001 From: David Turner Date: Mon, 17 Mar 2014 12:54:47 -0400 Subject: [PATCH 4/8] Add filesystem view caching Cache a cache for git's view of the working tree. This can be used to reduce the number of syscalls to opendir, readdir, and lstat. Signed-off-by: David Turner --- Makefile | 2 + cache.h | 2 + diff-lib.c | 65 ++- dir.c | 204 +++++++--- dir.h | 16 +- environment.c | 16 +- fs_cache.c | 651 +++++++++++++++++++++++++++++++ fs_cache.h | 137 +++++++ read-cache.c | 29 +- t/t1012-read-tree-df.sh | 2 +- t/t2201-add-update-typechange.sh | 5 +- t/t2204-add-ignored.sh | 6 +- t/t6001-rev-list-graft.sh | 2 +- 13 files changed, 1050 insertions(+), 87 deletions(-) create mode 100644 fs_cache.c create mode 100644 fs_cache.h diff --git a/Makefile b/Makefile index 4cd642d354f19e..58db237855d039 100644 --- a/Makefile +++ b/Makefile @@ -667,6 +667,7 @@ LIB_H += ewah/ewok.h LIB_H += ewah/ewok_rlw.h LIB_H += fetch-pack.h LIB_H += fmt-merge-msg.h +LIB_H += fs_cache.h LIB_H += fsck.h LIB_H += gettext.h LIB_H += git-compat-util.h @@ -808,6 +809,7 @@ LIB_OBJS += ewah/ewah_io.o LIB_OBJS += ewah/ewah_rlw.o LIB_OBJS += exec_cmd.o LIB_OBJS += fetch-pack.o +LIB_OBJS += fs_cache.o LIB_OBJS += fsck.o LIB_OBJS += gettext.o LIB_OBJS += gpg-interface.o diff --git a/cache.h b/cache.h index c5917f478ee08c..ed1eb235d84295 100644 --- a/cache.h +++ b/cache.h @@ -272,6 +272,7 @@ struct index_state { struct cache_entry **cache; unsigned int version; unsigned int cache_nr, cache_alloc, cache_changed; + struct fs_cache *fs_cache; struct string_list *resolve_undo; struct cache_tree *cache_tree; struct cache_time timestamp; @@ -352,6 +353,7 @@ static inline enum object_type object_type(unsigned int mode) #define DEFAULT_GIT_DIR_ENVIRONMENT ".git" #define DB_ENVIRONMENT "GIT_OBJECT_DIRECTORY" #define INDEX_ENVIRONMENT "GIT_INDEX_FILE" +#define FS_CACHE_ENVIRONMENT "GIT_FS_CACHE_FILE" #define GRAFT_ENVIRONMENT "GIT_GRAFT_FILE" #define GIT_SHALLOW_FILE_ENVIRONMENT "GIT_SHALLOW_FILE" #define TEMPLATE_DIR_ENVIRONMENT "GIT_TEMPLATE_DIR" diff --git a/diff-lib.c b/diff-lib.c index 044872935c30d8..59eb972cfc73e1 100644 --- a/diff-lib.c +++ b/diff-lib.c @@ -2,6 +2,7 @@ * Copyright (C) 2005 Junio C Hamano */ #include "cache.h" +#include "fs_cache.h" #include "quote.h" #include "commit.h" #include "diff.h" @@ -17,24 +18,8 @@ * diff-files */ -/* - * Has the work tree entity been removed? - * - * Return 1 if it was removed from the work tree, 0 if an entity to be - * compared with the cache entry ce still exists (the latter includes - * the case where a directory that is not a submodule repository - * exists for ce that is a submodule -- it is a submodule that is not - * checked out). Return negative for an error. - */ -static int check_removed(const struct cache_entry *ce, struct stat *st) +static int check_gitlink(const struct cache_entry *ce, struct stat *st) { - if (lstat(ce->name, st) < 0) { - if (errno != ENOENT && errno != ENOTDIR) - return -1; - return 1; - } - if (has_symlink_leading_path(ce->name, ce_namelen(ce))) - return 1; if (S_ISDIR(st->st_mode)) { unsigned char sub[20]; @@ -56,6 +41,52 @@ static int check_removed(const struct cache_entry *ce, struct stat *st) return 0; } +static int fs_cache_check_removed(const struct fs_cache *fs_cache, const struct cache_entry *ce, struct stat *st) +{ + struct fsc_entry *fe; + + fe = fs_cache_file_exists(fs_cache, ce->name, ce_namelen(ce)); + if (!fe) { + return 1; + } + if (fe_deleted(fe)) { + return 1; + } + + fe_to_stat(fe, st); + + if (check_gitlink(ce, st)) + return 1; + + return 0; +} + +/* + * Has the work tree entity been removed? + * + * Return 1 if it was removed from the work tree, 0 if an entity to be + * compared with the cache entry ce still exists (the latter includes + * the case where a directory that is not a submodule repository + * exists for ce that is a submodule -- it is a submodule that is not + * checked out). Return negative for an error. + */ +static int check_removed(const struct cache_entry *ce, struct stat *st) +{ + if (the_index.fs_cache) + return fs_cache_check_removed(the_index.fs_cache, ce, st); + + if (lstat(ce->name, st) < 0) { + if (errno != ENOENT && errno != ENOTDIR) + return -1; + return 1; + } + if (has_symlink_leading_path(ce->name, ce_namelen(ce))) + return 1; + if (check_gitlink(ce, st)) + return 1; + return 0; +} + /* * Has a file changed or has a submodule new commits or a dirty work tree? * diff --git a/dir.c b/dir.c index eb6f581270f81a..3d311f7af41611 100644 --- a/dir.c +++ b/dir.c @@ -8,6 +8,7 @@ * Junio Hamano, 2005-2006 */ #include "cache.h" +#include "fs_cache.h" #include "dir.h" #include "refs.h" #include "wildmatch.h" @@ -33,8 +34,8 @@ enum path_treatment { static enum path_treatment read_directory_recursive(struct dir_struct *dir, const char *path, int len, - int check_only, const struct path_simplify *simplify); -static int get_dtype(struct dirent *de, const char *path, int len); + int check_only, const struct path_simplify *simplify, + struct fsc_entry *fe); /* helper string functions with support for the ignore_case flag */ int strcmp_icase(const char *a, const char *b) @@ -536,7 +537,7 @@ int add_excludes_from_file_to_list(const char *fname, size_t size = 0; char *buf, *entry; - fd = open(fname, O_RDONLY); + fd = fs_cache_open(the_index.fs_cache, fname, O_RDONLY); if (fd < 0 || fstat(fd, &st) < 0) { if (errno != ENOENT) warn_on_inaccessible(fname); @@ -597,6 +598,8 @@ struct exclude_list *add_exclude_list(struct dir_struct *dir, el = &group->el[group->nr++]; memset(el, 0, sizeof(*el)); el->src = src; + if (group_type == EXC_FILE) + dir->flags &= ~DIR_STD_EXCLUDES; return el; } @@ -609,6 +612,7 @@ void add_excludes_from_file(struct dir_struct *dir, const char *fname) el = add_exclude_list(dir, EXC_FILE, fname); if (add_excludes_from_file_to_list(fname, "", 0, el, 0) < 0) die("cannot use %s as an exclude file", fname); + dir->flags &= ~DIR_STD_EXCLUDES; } int match_basename(const char *basename, int basenamelen, @@ -763,7 +767,9 @@ static struct exclude *last_exclude_matching_from_lists(struct dir_struct *dir, int i, j; struct exclude_list_group *group; struct exclude *exclude; - for (i = EXC_CMDL; i <= EXC_FILE; i++) { + int last = dir->flags & DIR_EXCLUDE_CMDL_ONLY ? EXC_CMDL : EXC_FILE; + + for (i = EXC_CMDL; i <= last; i++) { group = &dir->exclude_list_group[i]; for (j = group->nr - 1; j >= 0; j--) { exclude = last_exclude_matching_from_list( @@ -852,6 +858,7 @@ static void prep_exclude(struct dir_struct *dir, const char *base, int baselen) /* Try to read per-directory file unless path is too long */ if (dir->exclude_per_dir && + !(dir->flags & DIR_EXCLUDE_CMDL_ONLY) && stk->baselen + strlen(dir->exclude_per_dir) < PATH_MAX) { strcpy(dir->basebuf + stk->baselen, dir->exclude_per_dir); @@ -910,6 +917,17 @@ int is_excluded(struct dir_struct *dir, const char *pathname, int *dtype_p) return 0; } +static int fs_cache_is_excluded(struct dir_struct *dir, const char *pathname, int *dtype_p, struct fsc_entry *fe) +{ + struct exclude *exclude; + exclude = last_exclude_matching(dir, pathname, dtype_p); + if (exclude) + return exclude->flags & EXC_FLAG_NEGATIVE ? 0 : 1; + if (dir->flags & DIR_STD_EXCLUDES && fe) + return fe_excluded(fe); + return 0; +} + static struct dir_entry *dir_entry_new(const char *pathname, int len) { struct dir_entry *ent; @@ -1047,7 +1065,7 @@ static enum exist_status directory_exists_in_index(const char *dirname, int len) */ static enum path_treatment treat_directory(struct dir_struct *dir, const char *dirname, int len, int exclude, - const struct path_simplify *simplify) + const struct path_simplify *simplify, struct fsc_entry *fe) { /* The "len-1" is to strip the final '/' */ switch (directory_exists_in_index(dirname, len-1)) { @@ -1073,7 +1091,7 @@ static enum path_treatment treat_directory(struct dir_struct *dir, if (!(dir->flags & DIR_HIDE_EMPTY_DIRECTORIES)) return exclude ? path_excluded : path_untracked; - return read_directory_recursive(dir, dirname, len, 1, simplify); + return read_directory_recursive(dir, dirname, len, 1, simplify, fe); } /* @@ -1167,7 +1185,7 @@ static int get_index_dtype(const char *path, int len) return DT_UNKNOWN; } -static int get_dtype(struct dirent *de, const char *path, int len) +int get_dtype(struct dirent *de, const char *path, int len) { int dtype = de ? DTYPE(de) : DT_UNKNOWN; struct stat st; @@ -1191,7 +1209,8 @@ static int get_dtype(struct dirent *de, const char *path, int len) static enum path_treatment treat_one_path(struct dir_struct *dir, struct strbuf *path, const struct path_simplify *simplify, - int dtype, struct dirent *de) + int dtype, struct dirent *de, + struct fsc_entry *fe) { int exclude; int has_path_in_index = !!cache_file_exists(path->buf, path->len, ignore_case); @@ -1227,7 +1246,7 @@ static enum path_treatment treat_one_path(struct dir_struct *dir, (directory_exists_in_index(path->buf, path->len) == index_nonexistent)) return path_none; - exclude = is_excluded(dir, path->buf, &dtype); + exclude = fs_cache_is_excluded(dir, path->buf, &dtype, fe); /* * Excluded? If we don't explicitly want to show @@ -1242,7 +1261,7 @@ static enum path_treatment treat_one_path(struct dir_struct *dir, case DT_DIR: strbuf_addch(path, '/'); return treat_directory(dir, path->buf, path->len, exclude, - simplify); + simplify, fe); case DT_REG: case DT_LNK: return exclude ? path_excluded : path_untracked; @@ -1253,7 +1272,8 @@ static enum path_treatment treat_path(struct dir_struct *dir, struct dirent *de, struct strbuf *path, int baselen, - const struct path_simplify *simplify) + const struct path_simplify *simplify, + struct fsc_entry *fe) { int dtype; @@ -1265,7 +1285,59 @@ static enum path_treatment treat_path(struct dir_struct *dir, return path_none; dtype = DTYPE(de); - return treat_one_path(dir, path, simplify, dtype, de); + return treat_one_path(dir, path, simplify, dtype, de, fe); +} + +static int handle(struct dir_struct *dir, const char *base, int baselen, + int check_only, const struct path_simplify *simplify, + struct dirent *de, enum path_treatment *dir_state, + struct strbuf *path, struct fsc_entry *fe) +{ + enum path_treatment state, subdir_state; + + /* check how the file or directory should be treated */ + state = treat_path(dir, de, path, baselen, simplify, fe); + + if (state > *dir_state) + *dir_state = state; + + /* recurse into subdir if instructed by treat_path */ + if (state == path_recurse) { + subdir_state = read_directory_recursive(dir, path->buf, + path->len, check_only, simplify, fe); + if (subdir_state > *dir_state) + *dir_state = subdir_state; + } + + if (check_only) { + /* abort early if maximum state has been reached */ + if (*dir_state == path_untracked) + return 1; + /* skip the dir_add_* part */ + return 0; + } + + /* add the path to the appropriate result list */ + switch (state) { + case path_excluded: + if (dir->flags & DIR_SHOW_IGNORED) + dir_add_name(dir, path->buf, path->len); + else if ((dir->flags & DIR_SHOW_IGNORED_TOO) || + ((dir->flags & DIR_COLLECT_IGNORED) && + exclude_matches_pathspec(path->buf, path->len, + simplify))) + dir_add_ignored(dir, path->buf, path->len); + break; + + case path_untracked: + if (!(dir->flags & DIR_SHOW_IGNORED)) + dir_add_name(dir, path->buf, path->len); + break; + + default: + break; + } + return 0; } /* @@ -1282,63 +1354,44 @@ static enum path_treatment treat_path(struct dir_struct *dir, static enum path_treatment read_directory_recursive(struct dir_struct *dir, const char *base, int baselen, int check_only, - const struct path_simplify *simplify) + const struct path_simplify *simplify, + struct fsc_entry *parent) { DIR *fdir; - enum path_treatment state, subdir_state, dir_state = path_none; - struct dirent *de; + enum path_treatment dir_state = path_none; struct strbuf path = STRBUF_INIT; strbuf_add(&path, base, baselen); - fdir = opendir(path.len ? path.buf : "."); - if (!fdir) - goto out; - - while ((de = readdir(fdir)) != NULL) { - /* check how the file or directory should be treated */ - state = treat_path(dir, de, &path, baselen, simplify); - if (state > dir_state) - dir_state = state; - - /* recurse into subdir if instructed by treat_path */ - if (state == path_recurse) { - subdir_state = read_directory_recursive(dir, path.buf, - path.len, check_only, simplify); - if (subdir_state > dir_state) - dir_state = subdir_state; - } + if (the_index.fs_cache) { + struct fsc_entry *fe; - if (check_only) { - /* abort early if maximum state has been reached */ - if (dir_state == path_untracked) + if (!parent) + goto out; + + for (fe = parent->first_child; fe; fe = fe->next_sibling) { + struct dirent de; + if (fe_deleted(fe)) + continue; + de.d_ino = -1; + de.d_type = fe_dtype(fe); + strcpy(de.d_name, basename(fe->path)); + if (handle(dir, base, baselen, check_only, simplify, &de, &dir_state, &path, fe)) break; - /* skip the dir_add_* part */ - continue; } + } else { + struct dirent *de; + fdir = opendir(path.len ? path.buf : "."); + if (!fdir) + goto out; - /* add the path to the appropriate result list */ - switch (state) { - case path_excluded: - if (dir->flags & DIR_SHOW_IGNORED) - dir_add_name(dir, path.buf, path.len); - else if ((dir->flags & DIR_SHOW_IGNORED_TOO) || - ((dir->flags & DIR_COLLECT_IGNORED) && - exclude_matches_pathspec(path.buf, path.len, - simplify))) - dir_add_ignored(dir, path.buf, path.len); - break; - - case path_untracked: - if (!(dir->flags & DIR_SHOW_IGNORED)) - dir_add_name(dir, path.buf, path.len); - break; - - default: - break; + while ((de = readdir(fdir)) != NULL) { + if (handle(dir, base, baselen, check_only, simplify, de, &dir_state, &path, NULL)) + break; } + closedir(fdir); } - closedir(fdir); + out: strbuf_release(&path); @@ -1383,7 +1436,8 @@ static void free_simplify(struct path_simplify *simplify) static int treat_leading_path(struct dir_struct *dir, const char *path, int len, - const struct path_simplify *simplify) + const struct path_simplify *simplify, + struct fsc_entry *fe) { struct strbuf sb = STRBUF_INIT; int baselen, rc = 0; @@ -1410,7 +1464,7 @@ static int treat_leading_path(struct dir_struct *dir, if (simplify_away(sb.buf, sb.len, simplify)) break; if (treat_one_path(dir, &sb, simplify, - DT_DIR, NULL) == path_none) + DT_DIR, NULL, fe) == path_none) break; /* do not recurse into it */ if (len <= baselen) { rc = 1; @@ -1422,9 +1476,12 @@ static int treat_leading_path(struct dir_struct *dir, return rc; } + int read_directory(struct dir_struct *dir, const char *path, int len, const struct pathspec *pathspec) { struct path_simplify *simplify; + int saved_flags = dir->flags; + struct fsc_entry *fe = NULL; /* * Check out create_simplify() @@ -1448,11 +1505,32 @@ int read_directory(struct dir_struct *dir, const char *path, int len, const stru * create_simplify(). */ simplify = create_simplify(pathspec ? pathspec->_raw : NULL); - if (!len || treat_leading_path(dir, path, len, simplify)) - read_directory_recursive(dir, path, len, 0, simplify); + + /* + Check for standard excludes. + Standard excludes means: exclude_per_dir + */ + if (!dir->exclude_per_dir || strcmp(dir->exclude_per_dir, ".gitignore")) + dir->flags &= ~DIR_STD_EXCLUDES; + + if (the_index.fs_cache && dir->flags & DIR_STD_EXCLUDES) { + dir->flags |= DIR_EXCLUDE_CMDL_ONLY; + } + + if (the_index.fs_cache) { + int len_no_slash = len; + if (len && path[len - 1] == '/') + len_no_slash --; + fe = fs_cache_file_exists(the_index.fs_cache, path, len_no_slash); + } + + if (!len || treat_leading_path(dir, path, len, simplify, fe)) { + read_directory_recursive(dir, path, len, 0, simplify, fe); + } free_simplify(simplify); qsort(dir->entries, dir->nr, sizeof(struct dir_entry *), cmp_name); qsort(dir->ignored, dir->ignored_nr, sizeof(struct dir_entry *), cmp_name); + dir->flags = saved_flags; return dir->nr; } @@ -1608,6 +1686,7 @@ void setup_standard_excludes(struct dir_struct *dir) { const char *path; char *xdg_path; + int previously_empty; dir->exclude_per_dir = ".gitignore"; path = git_path("info/exclude"); @@ -1615,10 +1694,13 @@ void setup_standard_excludes(struct dir_struct *dir) home_config_paths(NULL, &xdg_path, "ignore"); excludes_file = xdg_path; } + previously_empty = dir->exclude_list_group[EXC_FILE].nr == 0; if (!access_or_warn(path, R_OK, 0)) add_excludes_from_file(dir, path); if (excludes_file && !access_or_warn(excludes_file, R_OK, 0)) add_excludes_from_file(dir, excludes_file); + if (previously_empty) + dir->flags |= DIR_STD_EXCLUDES; } int remove_path(const char *name) diff --git a/dir.h b/dir.h index 55e53456afab4c..a7437c149892df 100644 --- a/dir.h +++ b/dir.h @@ -81,7 +81,18 @@ struct dir_struct { DIR_NO_GITLINKS = 1<<3, DIR_COLLECT_IGNORED = 1<<4, DIR_SHOW_IGNORED_TOO = 1<<5, - DIR_COLLECT_KILLED_ONLY = 1<<6 + DIR_COLLECT_KILLED_ONLY = 1<<6, + /* + * Whether the standard excludes are the only file + * excludes which have been set up (if so, we can use + * the fs_cache to optimize is_excluded). + */ + DIR_STD_EXCLUDES = 1<<7, + /* + * Excludes should only check the command-line (for + * use with fs_cache) + */ + DIR_EXCLUDE_CMDL_ONLY = 1<<8 } flags; struct dir_entry **entries; struct dir_entry **ignored; @@ -223,4 +234,7 @@ static inline int dir_path_match(const struct dir_entry *ent, has_trailing_dir); } +int get_dtype(struct dirent *de, const char *path, int len); + + #endif diff --git a/environment.c b/environment.c index 5c4815dbe132fc..946f300ec3323d 100644 --- a/environment.c +++ b/environment.c @@ -81,7 +81,7 @@ static const char *namespace; static size_t namespace_len; static const char *git_dir; -static char *git_object_dir, *git_index_file, *git_graft_file; +static char *git_object_dir, *git_index_file, *git_graft_file, *git_fs_cache_file; /* * Repository-local GIT_* environment variables; see cache.h for details. @@ -143,6 +143,13 @@ static void setup_git_env(void) git_index_file = xmalloc(strlen(git_dir) + 7); sprintf(git_index_file, "%s/index", git_dir); } + git_fs_cache_file = getenv(FS_CACHE_ENVIRONMENT); + if (!git_fs_cache_file) { + char *fs_cache_file = xmalloc(strlen(git_dir) + 10); + sprintf(fs_cache_file, "%s/fs_cache", git_dir); + git_fs_cache_file = xstrdup(real_path(fs_cache_file)); + free(fs_cache_file); + } git_graft_file = getenv(GRAFT_ENVIRONMENT); if (!git_graft_file) git_graft_file = git_pathdup("info/grafts"); @@ -266,6 +273,13 @@ char *get_graft_file(void) return git_graft_file; } +char *get_fs_cache_file(void) +{ + if (!git_fs_cache_file) + setup_git_env(); + return git_fs_cache_file; +} + int set_git_dir(const char *path) { if (setenv(GIT_DIR_ENVIRONMENT, path, 1)) diff --git a/fs_cache.c b/fs_cache.c new file mode 100644 index 00000000000000..a573cf231e034d --- /dev/null +++ b/fs_cache.c @@ -0,0 +1,651 @@ +#include "cache.h" +#include "fs_cache.h" +#include "strbuf.h" +#include "hashmap.h" +#include "hash-io.h" + +#define FS_CACHE_SIGNATURE 0x4D4F4443 /* "MODC" */ + +static int fe_entry_cmp(const struct fsc_entry *f1, + const struct fsc_entry *f2, + const char *name) +{ + if (f1->pathlen != f2->pathlen) + return 1; + name = name ? name : f2->path; + return ignore_case ? strncasecmp(f1->path, name, f1->pathlen) : + strncmp(f1->path, name, f1->pathlen); + +} + +unsigned char fe_dtype(struct fsc_entry *file) +{ + if (fe_is_reg(file)) { + return DT_REG; + } + if (fe_is_dir(file)) { + return DT_DIR; + } + if (fe_is_lnk(file)) { + return DT_LNK; + } + return DT_UNKNOWN; +} + +#define FS_CACHE_FORMAT_LB 1 +#define FS_CACHE_FORMAT_UB 2 + +static int verify_hdr(struct fs_cache_header *hdr, unsigned long size) +{ + vmac_ctx_t c; + unsigned char sha1[20]; + int hdr_version; + + if (hdr->hdr_signature != htonl(FS_CACHE_SIGNATURE)) { + warning("bad fs_cache signature"); + return -1; + } + hdr_version = ntohl(hdr->hdr_version); + if (hdr_version < FS_CACHE_FORMAT_LB || FS_CACHE_FORMAT_UB < hdr_version) { + warning("bad fs_cache version %d", hdr_version); + return -1; + } + + unsigned char *key = (unsigned char *)"abcdefghijklmnop"; + vmac_set_key(key, &c); + vmac_update_unaligned(hdr, size - 20, &c); + vmac_final(sha1, &c); + if (hashcmp(sha1, (unsigned char *)hdr + size - 20)) { + warning("bad fs_cache file vmac signature"); + return -1; + } + + return 0; +} + +static struct fsc_entry *create_from_disk(struct fs_cache *fs_cache, struct ondisk_fsc_entry *disk_fe, unsigned long *consumed) +{ + struct fsc_entry *fe; + int pathlen = strlen(disk_fe->path); + + fe = obstack_alloc(&fs_cache->obs, sizeof(*fe) + pathlen + 1); + + fe->size = ntohl(disk_fe->size); + fe->mode = ntohl(disk_fe->mode); + fe->flags = ntohl(disk_fe->flags); + + fe->ctime.sec = ntohl(disk_fe->ctime.sec); + fe->mtime.sec = ntohl(disk_fe->mtime.sec); + fe->ctime.nsec = ntohl(disk_fe->ctime.nsec); + fe->mtime.nsec = ntohl(disk_fe->mtime.nsec); + + fe->ino = ntohl(disk_fe->ino); + fe->dev = ntohl(disk_fe->dev); + + fe->uid = ntohl(disk_fe->uid); + fe->gid = ntohl(disk_fe->gid); + + fe->parent = NULL; + fe->first_child = NULL; + fe->next_sibling = NULL; + memcpy(fe->path, disk_fe->path, pathlen + 1); + fe->pathlen = pathlen; + + hashmap_entry_init(fe, memihash(fe->path, pathlen)); + *consumed = sizeof(*disk_fe) + pathlen + 1; + return fe; +} + +static void copy_fs_cache_entry_to_ondisk( + struct ondisk_fsc_entry *ondisk, + struct fsc_entry *fe) +{ + + ondisk->size = htonl(fe->size); + ondisk->mode = htonl(fe->mode); + ondisk->flags = htonl(fe->flags & ~FE_NEW); + + ondisk->ctime.sec = htonl(fe->ctime.sec); + ondisk->mtime.sec = htonl(fe->mtime.sec); + ondisk->ctime.nsec = htonl(fe->ctime.nsec); + ondisk->mtime.nsec = htonl(fe->mtime.nsec); + + ondisk->ino = htonl(fe->ino); + ondisk->dev = htonl(fe->dev); + + ondisk->uid = htonl(fe->uid); + ondisk->gid = htonl(fe->gid); + + memcpy(ondisk->path, fe->path, fe->pathlen + 1); + +} + +static int fe_write_entry(struct fsc_entry *fe, int fd, struct hash_context *context) +{ + int result; + static struct ondisk_fsc_entry *ondisk = NULL; + static size_t max_size = sizeof(*ondisk) + 1 + PATH_MAX; + size_t size; + + size = sizeof(*ondisk) + fe->pathlen + 1; + if (size > max_size) { + max_size = size; + if (ondisk) { + ondisk = xrealloc(ondisk, max_size); + memset(ondisk, 0, max_size); + } + } + + + if (!ondisk) + ondisk = xcalloc(1, max_size); + + copy_fs_cache_entry_to_ondisk(ondisk, fe); + + result = write_with_hash(context, fd, ondisk, size); + + return result ? -1 : 0; +} + +static int fe_write_entry_recursive(struct fsc_entry *fe, int fd, struct hash_context *c) +{ + if (fe_write_entry(fe, fd, c)) + return error("failed to write some file of fs_cache"); + fe = fe->first_child; + while (fe) { + fe_write_entry_recursive(fe, fd, c); + fe = fe->next_sibling; + } + + return 0; +} + +static char *xstrcpy(char *dest, const char *src) +{ + while ((*dest++ = *src++)) { + } + + return dest; +} + +int write_fs_cache(struct fs_cache *fs_cache) +{ + struct hash_context c; + struct fs_cache_header *hdr; + int hdr_size; + struct stat st; + int fd; + const char *path; + char *cur; + int string_size; + + path = get_fs_cache_file(); + + fd = open(path, O_WRONLY|O_TRUNC|O_CREAT, 0666); + if (fd < 0) + die_errno("failed to open fs_cache file %s", path); + + string_size = strlen(fs_cache->last_update) + + strlen(fs_cache->repo_path) + + strlen(fs_cache->excludes_file) + 3; + + hdr_size = sizeof(*hdr) + string_size; + hdr = xmalloc(hdr_size); + hdr->hdr_signature = htonl(FS_CACHE_SIGNATURE); + hdr->hdr_version = htonl(fs_cache->version); + hdr->hdr_entries = htonl(fs_cache->nr); + hdr->flags = htonl(fs_cache->flags); + hashcpy(hdr->git_excludes_sha1, fs_cache->git_excludes_sha1); + hashcpy(hdr->user_excludes_sha1, fs_cache->user_excludes_sha1); + cur = xstrcpy(hdr->strings, fs_cache->last_update); + cur = xstrcpy(cur, fs_cache->repo_path); + cur = xstrcpy(cur, fs_cache->excludes_file); + + hash_context_init(&c, HASH_IO_VMAC); + + if (write_with_hash(&c, fd, hdr, hdr_size) < 0) + die_errno("failed to write header of fs_cache"); + + fe_write_entry_recursive(fs_cache_file_exists(fs_cache, "", 0), fd, &c); + if (write_with_hash_flush(&c, fd) || fstat(fd, &st)) + return error("Failed to flush/fstat fs_cache file"); + + hash_context_release(&c); + free(hdr); + return 0; +} + +void *mmap_fs_cache(size_t *mmap_size) +{ + struct stat st; + void *mmap; + const char *path = get_fs_cache_file(); + int fd = open(path, O_RDONLY); + if (fd < 0) { + if (errno == ENOENT) + return NULL; + die_errno("fs_cache file open failed"); + } + + if (fstat(fd, &st)) + die_errno("cannot stat the open fs_cache"); + + *mmap_size = xsize_t(st.st_size); + if (*mmap_size < sizeof(struct fs_cache_header) + 20) { + warning("fs_cache file smaller than expected"); + return NULL; + } + + mmap = xmmap(NULL, *mmap_size, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0); + if (mmap == MAP_FAILED) + die_errno("unable to map fs_cache file"); + close(fd); + return mmap; +} + +/* Loading the fs_cache can take some time, and we might want to thread + * it with other loads; we need the last-update time and the repo path + * to check whether this is a good idea, so this function will preload + * it. Note that the caller must free the returned strings. + */ +void fs_cache_preload_metadata(char **last_update, char **repo_path) +{ + size_t mmap_size; + void *mmap; + struct fs_cache_header *hdr; + int version; + + mmap = mmap_fs_cache(&mmap_size); + if (!mmap) { + *last_update = *repo_path = NULL; + return; + } + hdr = mmap; + version = ntohl(hdr->hdr_version); + if (version < FS_CACHE_FORMAT_LB || FS_CACHE_FORMAT_UB < version) { + warning("bad fs_cache version %d", version); + goto unmap; + } + + *last_update = xstrdup(hdr->strings); + *repo_path = xstrdup(hdr->strings + strlen(*last_update) + 1); + +unmap: + munmap(mmap, mmap_size); +} + +static void write_fs_cache_if_necessary(void) +{ + struct fs_cache *fs_cache = the_index.fs_cache; + if (fs_cache && fs_cache->needs_write && fs_cache->fully_loaded) { + write_fs_cache(fs_cache); + the_index.fs_cache = 0; + } +} + +static void fe_set_parent(struct fsc_entry *fe, struct fsc_entry *parent) +{ + fe->parent = parent; + fe->next_sibling = fe->parent->first_child; + fe->parent->first_child = fe; +} + +void set_up_parent(struct fs_cache *fs_cache, struct fsc_entry *fe) +{ + char *last_slash; + int parent_len; + struct fsc_entry *parent; + if (fe->pathlen == 0) + return; + + last_slash = strrchr(fe->path, '/'); + + if (last_slash) { + parent_len = last_slash - fe->path; + } else { + parent_len = 0; + } + + parent = fs_cache_file_exists(fs_cache, fe->path, parent_len); + if (!parent) { + die("Missing parent directory for %s", fe->path); + } + fe_set_parent(fe, parent); +} + +static char *read_string(char **out, char *in) +{ + int len = strlen(in); + *out = xstrdup(in); + return in + len + 1; +} + +/* Load the modified file cache from disk. If the cache is corrupt, + * prints a warning and returns NULL; we can safely recreate it. If + * the cache is missing, also returns NULL. If there is some other + * problem reading the cache (say it's read-only, or we get an io + * error), die with an error message. */ +struct fs_cache *read_fs_cache(void) +{ + struct fs_cache *fs_cache; + struct fs_cache_header *hdr; + int i; + unsigned int nr; + void *mmap; + void *mmap_cur; + size_t mmap_size; + + mmap = mmap_fs_cache(&mmap_size); + if (!mmap) { + return NULL; + } + + hdr = mmap; + if (verify_hdr(hdr, mmap_size) < 0) + goto unmap; + + fs_cache = xcalloc(1, sizeof(*fs_cache)); + obstack_init(&fs_cache->obs); + nr = ntohl(hdr->hdr_entries); + fs_cache->flags = ntohl(hdr->flags); + fs_cache->version = ntohl(hdr->hdr_version); + hashmap_init(&fs_cache->paths, (hashmap_cmp_fn) fe_entry_cmp, nr); + fs_cache->nr = 0; + hashcpy(fs_cache->git_excludes_sha1, hdr->git_excludes_sha1); + hashcpy(fs_cache->user_excludes_sha1, hdr->user_excludes_sha1); + + mmap_cur = hdr->strings; + mmap_cur = read_string(&fs_cache->last_update, mmap_cur); + mmap_cur = read_string(&fs_cache->repo_path, mmap_cur); + mmap_cur = read_string(&fs_cache->excludes_file, mmap_cur); + + struct fsc_entry *parent_stack[PATH_MAX]; + int parent_top = -1; + + for (i = 0; i < nr; i++) { + struct ondisk_fsc_entry *disk_fe; + struct fsc_entry *fe; + unsigned long consumed; + + disk_fe = (struct ondisk_fsc_entry *) mmap_cur; + fe = create_from_disk(fs_cache, disk_fe, &consumed); + /* + * We eliminate deleted cache entries on read because + * otherwise we have to count them in advance to fill + * in nr, and that would be expensive. + */ + if (!fe_deleted(fe)) { + fs_cache_insert(fs_cache, fe); + if (parent_top == -1) { + parent_top = 0; + parent_stack[0] = fe; + } else { + char *p = parent_stack[parent_top]->path; + char *c = fe->path; + parent_top = 1; + for (; *p && *c; ++p, ++c) { + if (*p != *c) + break; + if (*p == '/') + parent_top ++; + } + if (*p == 0 && *c == '/') + parent_top ++; + parent_stack[parent_top] = fe; + fe_set_parent(fe, parent_stack[parent_top - 1]); + } + } + mmap_cur += consumed; + } + + fs_cache->fully_loaded = 1; + + munmap(mmap, mmap_size); + + atexit(write_fs_cache_if_necessary); + return fs_cache; + +unmap: + munmap(mmap, mmap_size); + return NULL; +} + +struct fs_cache *empty_fs_cache(void) +{ + struct fs_cache *fs_cache = xcalloc(1, sizeof(*fs_cache)); + fs_cache->version = 1; + fs_cache->needs_write = 1; + fs_cache->fully_loaded = 1; + hashmap_init(&fs_cache->paths, (hashmap_cmp_fn) fe_entry_cmp, 1); + atexit(write_fs_cache_if_necessary); + return fs_cache; +} + +struct fsc_entry *fs_cache_file_exists(const struct fs_cache *fs_cache, + const char *name, int namelen) +{ + return fs_cache_file_exists_prehash(fs_cache, name, namelen, + memihash(name, namelen)); +} + +struct fsc_entry *fs_cache_file_exists_prehash(const struct fs_cache *fs_cache, const char *path, int pathlen, unsigned int hash) +{ + struct fsc_entry key; + + hashmap_entry_init(&key, hash); + key.pathlen = pathlen; + return hashmap_get(&fs_cache->paths, &key, path); +} + +struct fsc_entry *make_fs_cache_entry(const char *path) +{ + return make_fs_cache_entry_len(path, strlen(path)); +} + +struct fsc_entry *make_fs_cache_entry_len(const char *path, int len) +{ + struct fsc_entry *fe = xcalloc(1, sizeof(*fe) + len + 1); + fe_set_new(fe); + memcpy(fe->path, path, len); + fe->pathlen = len; + hashmap_entry_init(fe, memihash(fe->path, fe->pathlen)); + return fe; +} + +void fs_cache_insert(struct fs_cache *fs_cache, struct fsc_entry *fe) +{ + hashmap_add(&fs_cache->paths, fe); + fs_cache->nr ++; +} + +static void fs_cache_remove_recursive(struct fs_cache *fs_cache, + struct fsc_entry *fe) +{ + struct fsc_entry *cur, *next; + for (cur = fe->first_child; cur; cur = next) { + fs_cache_remove_recursive(fs_cache, cur); + next = cur->next_sibling; + cur->next_sibling = NULL; + cur->parent = NULL; + cur->first_child = NULL; + } + + hashmap_remove(&fs_cache->paths, fe, fe); + fs_cache->nr --; +} + +static void fe_remove_from_parent(struct fsc_entry *fe) +{ + struct fsc_entry *prev, *cur; + if (fe->parent) { + prev = NULL; + for (cur = fe->parent->first_child; cur; cur = cur->next_sibling) { + if (cur == fe) { + if (prev) + prev->next_sibling = fe->next_sibling; + else + fe->parent->first_child = fe->next_sibling; + break; + } + prev = cur; + } + } +} + +void fe_delete_children(struct fsc_entry *fe) +{ + for (fe = fe->first_child; fe; fe = fe->next_sibling) { + fe_set_deleted(fe); + } +} + +void fe_clear_children(struct fs_cache *fs_cache, struct fsc_entry *fe) +{ + for (fe = fe->first_child; fe; fe = fe->next_sibling) { + fs_cache_remove(fs_cache, fe); + } + +} + +void fe_set_deleted(struct fsc_entry *fe) +{ + fe->flags |= FE_DELETED; + fe_delete_children(fe); +} + +void fs_cache_remove_nonrecursive(struct fs_cache *fs_cache, + struct fsc_entry *fe) +{ + + hashmap_remove(&fs_cache->paths, fe, fe); + fs_cache->nr --; + + fe_remove_from_parent(fe); +} + +void fs_cache_remove(struct fs_cache *fs_cache, + struct fsc_entry *fe) +{ + + fs_cache_remove_recursive(fs_cache, fe); + + fe_remove_from_parent(fe); +} + +void free_fs_cache(struct fs_cache *fs_cache) +{ + obstack_free(&fs_cache->obs, NULL); + free(fs_cache->last_update); + free(fs_cache->repo_path); + free(fs_cache->excludes_file); +} + +void fe_to_stat(struct fsc_entry *fe, struct stat *st) +{ + st->st_mtime = fe->mtime.sec; + st->st_ctime = fe->ctime.sec; +#ifndef NO_NSEC +#ifdef USE_ST_TIMESPEC + st->st_mtimespec.tv_nsec = fe->mtime.nsec; + st->st_ctimespec.tv_nsec = fe->ctime.nsec; +#else + st->st_mtim.tv_nsec = fe->mtime.nsec; + st->st_ctim.tv_nsec = fe->ctime.nsec; +#endif +#endif + st->st_mode = fe->mode; + st->st_ino = fe->ino; + st->st_dev = fe->dev; + st->st_uid = fe->uid; + st->st_gid = fe->gid; + st->st_size = fe->size; +} + +int is_in_dot_git(const char *name) +{ + char *evil = ".git"; + char *cur = evil; + while (*name) { + if (*name == *cur++) { + name++; + if (*cur == 0) { + if (*name == 0 || *name == '/') { + return 1; + } + } + } else { + if (*name == '/') { + cur = evil; + } else { + cur = ""; + } + name++; + } + } + return 0; +} + +static int is_path_prefix(const char *putative_parent, const char *fname) +{ + const char* c; + for (c = putative_parent; *c && *fname; ++c, ++fname) { + if (*c != *fname) { + return 0; + } + } + return *c == 0 && (*fname == 0 || *fname == '/'); +} + +int fs_cache_open(struct fs_cache *fs_cache, const char *fname, int flags) +{ + if (fs_cache && fname[0] != '/' && !is_path_prefix(get_git_dir(), fname)) { + struct fsc_entry *fe = fs_cache_file_exists(fs_cache, fname, strlen(fname)); + if (!fe || fe_deleted(fe)) { + errno = ENOENT; + return -1; + } + } + return open(fname, flags); +} + +static const int topological_rank[256] = { + 0, /* slash moved here */ 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, + 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, + 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, + 1 /* slash is special */, + 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, + 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, + 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, + 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, + 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, + 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, + 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, + 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, + 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, + 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, + 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, + 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, + 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, + 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, + 247, 248, 249, 250, 251, 252, 253, 254, 255 +}; + +/* + * Compare fsc_entry structs topologically -- that is, so that parent + * directories come before their children. + */ +int cmp_fsc_entry(const void *a, const void *b) +{ + struct fsc_entry* const * sa = a; + struct fsc_entry* const * sb = b; + const unsigned char* pa = (unsigned char *)(*sa)->path; + const unsigned char* pb = (unsigned char *)(*sb)->path; + while (*pa && *pb) { + int ca = topological_rank[*pa++]; + int cb = topological_rank[*pb++]; + int diff = ca - cb; + if (diff) + return diff; + } + return topological_rank[*pa] - topological_rank[*pb]; +} diff --git a/fs_cache.h b/fs_cache.h new file mode 100644 index 00000000000000..a80856eb3b94d0 --- /dev/null +++ b/fs_cache.h @@ -0,0 +1,137 @@ +#ifndef FS_CACHE_H +#define FS_CACHE_H + +#include +#include +#include +#include "compat/obstack.h" + +#include "git-compat-util.h" +#include "strbuf.h" +#include "hashmap.h" + +#define obstack_chunk_alloc xmalloc +#define obstack_chunk_free free + +/* The filesystem cache (fs_cache) stores the state of every file + * inside the root directory (excluding those in .git). The state + * includes whether or not the file exists, as well as most of what + * lstat returns. + */ + +#define fe_is_reg(fe) (S_ISREG((fe)->mode)) +#define fe_is_dir(fe) (S_ISDIR((fe)->mode)) +#define fe_is_lnk(fe) (S_ISLNK((fe)->mode)) + +/* Directories get very different treatment generally; the normal bits + * don't apply to them, since they have no independent existence in + * git. Also, they are subject to spooky action at a distance -- if a + * file called x/a/b/c is created (and added to the index), then x + * suddenly must get added to the index. + */ + +struct fsc_entry { + struct hashmap_entry ent; + unsigned int mode; + off_t size; + unsigned int flags; + struct cache_time ctime; + struct cache_time mtime; + ino_t ino; + dev_t dev; + uid_t uid; + gid_t gid; + struct fsc_entry *parent; + struct fsc_entry *first_child; + struct fsc_entry *next_sibling; + int pathlen; + char path[FLEX_ARRAY]; +}; + +#define FE_DELETED (1 << 0) + +/* Excluded by the standard set of gitexcludes */ +#define FE_EXCLUDED (1 << 8) + +/* Not yet saved to disk */ +#define FE_NEW (1 << 10) + +void fe_set_deleted(struct fsc_entry *fe); +#define fe_clear_deleted(fe) ((fe)->flags &= ~FE_DELETED) +#define fe_deleted(fe) ((fe)->flags & FE_DELETED) + +#define fe_excluded(fe) ((fe)->flags & FE_EXCLUDED) +#define fe_set_excluded(fe) ((fe)->flags |= FE_EXCLUDED) +#define fe_clear_excluded(fe) ((fe)->flags &= ~FE_EXCLUDED) + +#define fe_new(fe) ((fe)->flags & FE_NEW) +#define fe_set_new(fe) ((fe)->flags |= FE_NEW) +#define fe_clear_new(fe) ((fe)->flags &= ~FE_NEW) + +struct fs_cache { + char *last_update; + char *repo_path; + char *excludes_file; + unsigned char git_excludes_sha1[20]; /* for .git/info/exclude */ + unsigned char user_excludes_sha1[20]; /* for core.excludesfile */ + unsigned int version; + struct hashmap paths; + int nr; + unsigned invalid : 1; /* A commit hook might have made fs + * changes, necessitating a reload. */ + unsigned needs_write : 1; + unsigned fully_loaded : 1; + uint32_t flags; + struct obstack obs; +}; + +struct fs_cache_header { + uint32_t hdr_signature; + uint32_t hdr_version; + uint32_t hdr_entries; + uint32_t flags; + unsigned char git_excludes_sha1[20]; + unsigned char user_excludes_sha1[20]; + char strings[FLEX_ARRAY]; +}; + +struct ondisk_fsc_entry { + uint64_t ino; + uint64_t dev; + struct cache_time ctime; + struct cache_time mtime; + uint32_t mode; + uint32_t size; + uint32_t flags; + uint32_t uid; + uint32_t gid; + char path[FLEX_ARRAY]; +}; + +extern char *get_fs_cache_file(void); + +unsigned char fe_dtype(struct fsc_entry *file); +void fe_to_stat(struct fsc_entry *fe, struct stat *st); +void fe_delete_children(struct fsc_entry *fe); +void fe_clear_children(struct fs_cache *fs_cache, struct fsc_entry *fe); + +struct fs_cache *read_fs_cache(void); +int fs_cache_open(struct fs_cache *fs_cache, const char *fname, int flags); +int write_fs_cache(struct fs_cache *fs_cache); +struct fs_cache *empty_fs_cache(void); +struct fsc_entry *fs_cache_file_exists(const struct fs_cache *fs_cache, const char *name, int namelen); +struct fsc_entry *fs_cache_file_exists_prehash(const struct fs_cache *fs_cache, const char *name, int namelen, unsigned int hash); +struct fsc_entry *make_fs_cache_entry(const char *path); +struct fsc_entry *make_fs_cache_entry_len(const char *path, int len); +void fs_cache_preload_metadata(char **last_update, char **repo_path); + +void fs_cache_remove(struct fs_cache *fs_cache, struct fsc_entry *fe); +void fs_cache_insert(struct fs_cache *fs_cache, struct fsc_entry *fe); +void free_fs_cache(struct fs_cache *fs_cache); +void set_up_parent(struct fs_cache *fs_cache, struct fsc_entry *fe); + +int is_in_dot_git(const char *name); + +int cmp_fsc_entry(const void *a, const void *b); + +#endif /* FS_CACHE_H */ diff --git a/read-cache.c b/read-cache.c index a124e594b72f39..4e12a7e0344784 100644 --- a/read-cache.c +++ b/read-cache.c @@ -5,6 +5,7 @@ */ #define NO_THE_INDEX_COMPATIBILITY_MACROS #include "cache.h" +#include "fs_cache.h" #include "cache-tree.h" #include "refs.h" #include "dir.h" @@ -1004,6 +1005,30 @@ int add_index_entry(struct index_state *istate, struct cache_entry *ce, int opti return 0; } +static int fs_cache_lstat(struct fs_cache *fs_cache, + const char *name, int len, struct stat *st) +{ + + struct fsc_entry *fe; + if (!fs_cache) + return lstat(name, st); + + fe = fs_cache_file_exists(fs_cache, name, len); + if (!fe) { + /* This is necessary because children of symlinks are not + * included in the fs_cache. */ + return lstat(name, st); + } + + if (fe_deleted(fe)) { + errno = ENOENT; + return -1; + } else { + fe_to_stat(fe, st); + } + return 0; +} + /* * "refresh" does not calculate a new sha1 file or bring the * cache up-to-date for mode/content changes. But what it @@ -1045,7 +1070,7 @@ static struct cache_entry *refresh_cache_ent(struct index_state *istate, return ce; } - if (lstat(ce->name, &st) < 0) { + if (fs_cache_lstat(the_index.fs_cache, ce->name, ce_namelen(ce), &st) < 0) { if (ignore_missing && errno == ENOENT) return ce; if (err) @@ -1560,6 +1585,8 @@ int discard_index(struct index_state *istate) free(istate->cache); istate->cache = NULL; istate->cache_alloc = 0; + if (istate->fs_cache) + istate->fs_cache->invalid = 1; return 0; } diff --git a/t/t1012-read-tree-df.sh b/t/t1012-read-tree-df.sh index a6a04b6b90d290..a677053fc18355 100755 --- a/t/t1012-read-tree-df.sh +++ b/t/t1012-read-tree-df.sh @@ -23,7 +23,7 @@ maketree () { } settree () { - rm -f .git/index .git/index.lock && + rm -f .git/index .git/index.lock .git/fs_cache && git clean -d -f -f -q -x && git read-tree "$1" && git checkout-index -f -q -u -a && diff --git a/t/t2201-add-update-typechange.sh b/t/t2201-add-update-typechange.sh index 954fc51e5b560a..ad4953558f2330 100755 --- a/t/t2201-add-update-typechange.sh +++ b/t/t2201-add-update-typechange.sh @@ -124,7 +124,9 @@ test_expect_success diff-index ' test_expect_success 'add -u' ' rm -f ".git/saved-index" && + rm -f ".git/saved-fs_cache" && cp -p ".git/index" ".git/saved-index" && + (test ! -f .git/fs_cache || cp -p ".git/fs_cache" ".git/saved-fs_cache") && git add -u && git ls-files -s >actual && test_cmp expect-final actual @@ -134,7 +136,8 @@ test_expect_success 'commit -a' ' if test -f ".git/saved-index" then rm -f ".git/index" && - mv ".git/saved-index" ".git/index" + mv ".git/saved-index" ".git/index" && + (test ! -f .git/saved-fs_cache || mv ".git/saved-fs_cache" ".git/fs_cache") fi && git commit -m "second" -a && git ls-files -s >actual && diff --git a/t/t2204-add-ignored.sh b/t/t2204-add-ignored.sh index 8340ac2f073446..6653aa8d80bb5b 100755 --- a/t/t2204-add-ignored.sh +++ b/t/t2204-add-ignored.sh @@ -18,7 +18,7 @@ test_expect_success setup ' for i in file dir/file dir 'd*' do test_expect_success "no complaints for unignored $i" ' - rm -f .git/index && + rm -f .git/index .git/fs_cache && git add "$i" && git ls-files "$i" >out && test -s out @@ -28,7 +28,7 @@ done for i in ign dir/ign dir/sub dir/sub/*ign sub/file sub sub/* do test_expect_success "complaints for ignored $i" ' - rm -f .git/index && + rm -f .git/index .git/fs_cache && test_must_fail git add "$i" 2>err && git ls-files "$i" >out && ! test -s out @@ -39,7 +39,7 @@ do ' test_expect_success "complaints for ignored $i with unignored file" ' - rm -f .git/index && + rm -f .git/index .git/fs_cache && test_must_fail git add "$i" file 2>err && git ls-files "$i" >out && ! test -s out diff --git a/t/t6001-rev-list-graft.sh b/t/t6001-rev-list-graft.sh index 8efcd130795890..56f8ac04a2ea28 100755 --- a/t/t6001-rev-list-graft.sh +++ b/t/t6001-rev-list-graft.sh @@ -20,7 +20,7 @@ test_expect_success setup ' git commit -a -m "Third in one history." && A2=`git rev-parse --verify HEAD` && - rm -f .git/refs/heads/master .git/index && + rm -f .git/refs/heads/master .git/index .git/fs_cache && echo >fileA fileA again && echo >subdir/fileB fileB again && From 3fe93aeaee9719ee171a253c49af5126a057c513 Mon Sep 17 00:00:00 2001 From: David Turner Date: Wed, 7 May 2014 12:54:29 -0700 Subject: [PATCH 5/8] fs_cache: Make the fs_cache path absolute on first query When we first call get_fs_cache_file, set git_fs_cache_file to an absolute path. This means that writing the fs_cache in atexit will work despite intervening chdirs. Signed-off-by: David Turner --- environment.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/environment.c b/environment.c index 946f300ec3323d..9d8b2ce4c27065 100644 --- a/environment.c +++ b/environment.c @@ -81,6 +81,7 @@ static const char *namespace; static size_t namespace_len; static const char *git_dir; +static int fs_cache_file_relative; static char *git_object_dir, *git_index_file, *git_graft_file, *git_fs_cache_file; /* @@ -145,10 +146,9 @@ static void setup_git_env(void) } git_fs_cache_file = getenv(FS_CACHE_ENVIRONMENT); if (!git_fs_cache_file) { - char *fs_cache_file = xmalloc(strlen(git_dir) + 10); - sprintf(fs_cache_file, "%s/fs_cache", git_dir); - git_fs_cache_file = xstrdup(real_path(fs_cache_file)); - free(fs_cache_file); + git_fs_cache_file = xmalloc(strlen(git_dir) + 10); + sprintf(git_fs_cache_file, "%s/fs_cache", git_dir); + fs_cache_file_relative = 1; } git_graft_file = getenv(GRAFT_ENVIRONMENT); if (!git_graft_file) @@ -277,6 +277,12 @@ char *get_fs_cache_file(void) { if (!git_fs_cache_file) setup_git_env(); + if (fs_cache_file_relative) { + char *abs_fs_cache_file = strdup(real_path(git_fs_cache_file)); + free(git_fs_cache_file); + git_fs_cache_file = abs_fs_cache_file; + fs_cache_file_relative = 0; + } return git_fs_cache_file; } From d3986bfd7a9c1086b06523eb8e3b789ffed80d3e Mon Sep 17 00:00:00 2001 From: David Turner Date: Mon, 17 Mar 2014 12:54:47 -0400 Subject: [PATCH 6/8] Watchman support Add support for filesystem view caching and updating via Facebook's Watchman daemon. Signed-off-by: David Turner --- Documentation/watchman.txt | 27 ++ Makefile | 9 + cache.h | 2 + config.c | 10 + configure.ac | 6 + environment.c | 4 + read-cache.c | 32 +- t/t7900-watchman.sh | 249 +++++++++++++++ t/test-lib.sh | 1 + watchman-support.c | 640 +++++++++++++++++++++++++++++++++++++ watchman-support.h | 10 + 11 files changed, 989 insertions(+), 1 deletion(-) create mode 100644 Documentation/watchman.txt create mode 100755 t/t7900-watchman.sh create mode 100644 watchman-support.c create mode 100644 watchman-support.h diff --git a/Documentation/watchman.txt b/Documentation/watchman.txt new file mode 100644 index 00000000000000..b849e98eaab3d3 --- /dev/null +++ b/Documentation/watchman.txt @@ -0,0 +1,27 @@ +How git uses watchman +--------------------- + +Git status (and some other commands) have to determine which files +have changed between the working copy and the index. Ordinarily, this +requires checking every file in the working directory. But if you +have watchman (https://github.com/facebook/watchman) installed, git +can cache the state of the working directory and use watchman to track +file changes, making commands like git status faster. + +set core.usewatchman = true to use watchman. +You can also set +core.watchmansynctimeout = [number of milliseconds] +to change watchman's sync timeout; see the watchman docs for details +on this. You should only change this if you see watchman timeout +error messages. + +Internals +--------- + +The filesystem cache stores information about every file in the +working tree. In almost every case where git calls lstat or +opendir/readdir, the modified file cache can be consulted instead. + +The file system cache is stored on disk in .git/fs_cache. It is +stored very similarly to the index, except without path prefix +compression. diff --git a/Makefile b/Makefile index 58db237855d039..997879c33eb7cc 100644 --- a/Makefile +++ b/Makefile @@ -405,6 +405,7 @@ TCLTK_PATH = wish XGETTEXT = xgettext MSGFMT = msgfmt PTHREAD_LIBS = -lpthread +WATCHMAN_LIBS = -lwatchman PTHREAD_CFLAGS = GCOV = gcov @@ -1447,6 +1448,13 @@ ifdef RUNTIME_PREFIX COMPAT_CFLAGS += -DRUNTIME_PREFIX endif +ifdef USE_WATCHMAN + LIB_H += watchman-support.h + LIB_OBJS += watchman-support.o + EXTLIBS += $(WATCHMAN_LIBS) + BASIC_CFLAGS += -DUSE_WATCHMAN +endif + ifdef NO_PTHREADS BASIC_CFLAGS += -DNO_PTHREADS else @@ -2207,6 +2215,7 @@ GIT-BUILD-OPTIONS: FORCE @echo NO_PERL=\''$(subst ','\'',$(subst ','\'',$(NO_PERL)))'\' >>$@ @echo NO_PYTHON=\''$(subst ','\'',$(subst ','\'',$(NO_PYTHON)))'\' >>$@ @echo NO_UNIX_SOCKETS=\''$(subst ','\'',$(subst ','\'',$(NO_UNIX_SOCKETS)))'\' >>$@ + @echo USE_WATCHMAN=\''$(subst ','\'',$(subst ','\'',$(USE_WATCHMAN)))'\' >>$@ ifdef TEST_OUTPUT_DIRECTORY @echo TEST_OUTPUT_DIRECTORY=\''$(subst ','\'',$(subst ','\'',$(TEST_OUTPUT_DIRECTORY)))'\' >>$@ endif diff --git a/cache.h b/cache.h index ed1eb235d84295..023f9c942112d3 100644 --- a/cache.h +++ b/cache.h @@ -596,6 +596,8 @@ extern int check_replace_refs; extern int fsync_object_files; extern int core_preload_index; +extern int core_use_watchman; +extern int core_watchman_sync_timeout; extern int core_apply_sparse_checkout; extern int precomposed_unicode; diff --git a/config.c b/config.c index a30cb5c07db18a..6bbdac411698d5 100644 --- a/config.c +++ b/config.c @@ -854,6 +854,16 @@ static int git_default_core_config(const char *var, const char *value) return 0; } + if (!strcmp(var, "core.usewatchman")) { + core_use_watchman = git_config_bool(var, value); + return 0; + } + + if (!strcmp(var, "core.watchmansynctimeout")) { + core_watchman_sync_timeout = git_config_int(var, value); + return 0; + } + if (!strcmp(var, "core.createobject")) { if (!strcmp(value, "rename")) object_creation_mode = OBJECT_CREATION_USES_RENAMES; diff --git a/configure.ac b/configure.ac index b7112542b4b62f..b6e603200b3857 100644 --- a/configure.ac +++ b/configure.ac @@ -962,6 +962,12 @@ GIT_CONF_SUBST([NO_INITGROUPS]) # # Define NO_ICONV if your libc does not properly support iconv. +# Check for watchman client library + +AC_CHECK_LIB([watchman], [watchman_connect], + [USE_WATCHMAN=YesPlease], + [USE_WATCHMAN=]) +GIT_CONF_SUBST([USE_WATCHMAN]) ## Other checks. # Define USE_PIC if you need the main git objects to be built with -fPIC diff --git a/environment.c b/environment.c index 9d8b2ce4c27065..a4613db777bdd3 100644 --- a/environment.c +++ b/environment.c @@ -73,6 +73,10 @@ char comment_line_char = '#'; /* Parallel index stat data preload? */ int core_preload_index = 0; +/* Use Watchman for faster status queries */ +int core_use_watchman = 0; +int core_watchman_sync_timeout = 300; + /* This is set by setup_git_dir_gently() and/or git_default_config() */ char *git_work_tree_cfg; static char *work_tree; diff --git a/read-cache.c b/read-cache.c index 4e12a7e0344784..5b1ca78ae22f0e 100644 --- a/read-cache.c +++ b/read-cache.c @@ -17,6 +17,10 @@ #include "varint.h" #include "hash-io.h" +#ifdef USE_WATCHMAN +#include "watchman-support.h" +#endif + static struct cache_entry *refresh_cache_entry(struct cache_entry *ce, unsigned int options); @@ -1471,6 +1475,25 @@ static struct cache_entry *create_from_disk(struct ondisk_cache_entry *ondisk, return ce; } +static void do_load_fs_cache(struct index_state *istate, int force) +{ +#ifdef USE_WATCHMAN + if (core_use_watchman && (istate->initialized || force)) { + if (istate->fs_cache) { + if (istate->fs_cache->invalid) + watchman_reload_fs_cache(istate); + } else { + if (watchman_load_fs_cache(istate)) { + if (istate->fs_cache) { + istate->fs_cache->needs_write = 0; + istate->fs_cache = NULL; + } + } + } + } +#endif +} + /* remember to discard_cache() before reading a different cache! */ int read_index_from(struct index_state *istate, const char *path) { @@ -1482,6 +1505,8 @@ int read_index_from(struct index_state *istate, const char *path) size_t mmap_size; struct strbuf previous_name_buf = STRBUF_INIT, *previous_name; + do_load_fs_cache(istate, 0); + if (istate->initialized) return istate->cache_nr; @@ -1489,8 +1514,10 @@ int read_index_from(struct index_state *istate, const char *path) istate->timestamp.nsec = 0; fd = open(path, O_RDONLY); if (fd < 0) { - if (errno == ENOENT) + if (errno == ENOENT) { + do_load_fs_cache(istate, 1); return 0; + } die_errno("index file open failed"); } @@ -1555,6 +1582,9 @@ int read_index_from(struct index_state *istate, const char *path) src_offset += 8; src_offset += extsize; } + + do_load_fs_cache(istate, 0); + munmap(mmap, mmap_size); return istate->cache_nr; diff --git a/t/t7900-watchman.sh b/t/t7900-watchman.sh new file mode 100755 index 00000000000000..bea61801e708c3 --- /dev/null +++ b/t/t7900-watchman.sh @@ -0,0 +1,249 @@ +#!/bin/sh +# +# Copyright (c) 2014 Twitter, Inc +# + +test_description='Watchman' + +. ./test-lib.sh + +if ! test_have_prereq WATCHMAN +then + skip_all='skipping watchman tests - no watchman' + test_done +fi + +xpgrep () { + result=$(ps xopid,comm | grep " $1\$" | awk '{ print $1 }') + echo $result + test "x$result" != "x" +} + +kill_watchman() { + #stop watchman + xpgrep watchman | xargs kill +} + +#make sure that watchman is not running, but that it is runnable +test_expect_success setup ' + git config core.usewatchman true && + #watchman is maybe running + xpgrep watchman > running-watchman + grep . running-watchman > /dev/null && kill $(cat running-watchman) + rm running-watchman && + sleep 0.25 && + #watchman is stopped + ! xpgrep watchman > /dev/null && + #watchman is startable + watchman && + kill_watchman +' + +cat >expect <<\EOF +?? expect +?? morx +?? output +EOF + +test_expect_success 'watchman sees new files' ' + touch morx && + git status -s > output && + test_cmp expect output +' + +cat >expect <<\EOF +?? expect +?? output +EOF + +test_expect_success 'watchman sees file deletion' ' + rm morx && + git status -s > output && + test_cmp expect output +' + +cat >expect <<\EOF +?? .gitignore +?? bramp +EOF + +test_expect_success 'watchman understands .gitignore' ' + touch bramp && + cat >.gitignore <<-EOF && + expect* + output* +EOF + git status -s > output && + test_cmp expect output +' + +cat >expect <<\EOF +?? .gitignore +EOF + +test_expect_success 'watchman notices changes to .gitignore' ' + cat >.gitignore <<-EOF && + expect* + output* + bramp +EOF + git status -s > output && + test_cmp expect output +' + +cat >expect <<\EOF +?? .gitignore +EOF + +test_expect_success 'watchman understands .git/info/exclude' ' + touch fleem && + cat >.git/info/exclude <<-EOF && + fleem +EOF + git status -s > output && + test_cmp expect output +' + +cat >expect <<\EOF +?? .gitignore +?? fleem +EOF + +test_expect_success 'watchman notices changes to .git/info/exclude' ' + touch crubbins && + cat >.git/info/exclude <<-EOF && + crubbins +EOF + git status -s > output && + test_cmp expect output +' + +cat >expect <<\EOF +?? .gitignore +?? crubbins +?? fleem +EOF + +test_expect_success 'watchman notices removal of .git/info/exclude' ' + rm .git/info/exclude && + git status -s > output && + test_cmp expect output && + rm crubbins bramp fleem +' + + +cat >expect <<\EOF +?? .gitignore +?? fleem +?? myignore +EOF + +test_expect_success 'watchman notices changes to file configured by core.excludesfile' ' + touch fleem && + touch crubbins && + cat >myignore <<-EOF && + crubbins +EOF + git config core.excludesfile myignore && + git status -s > output && + test_cmp expect output +' + +cat >expect <<\EOF +?? .gitignore +?? crubbins +?? myignore +?? myignore2 +EOF + +test_expect_success 'watchman notices changes to config variable core.excludesfile' ' + touch fleem && + touch crubbins && + cat >myignore2 <<-EOF && + fleem +EOF + git config core.excludesfile myignore2 && + git status -s > output && + test_cmp expect output +' + +cat >expect <<\EOF +?? .gitignore +?? crubbins +?? fleem +?? myignore +EOF + +test_expect_success 'watchman notices removal of file referred to by' ' + rm myignore2 && + git status -s > output && + test_cmp expect output && + rm crubbins fleem myignore +' + + +cat >expect.nothing <<\EOF +EOF + +cat >expect.2 <<\EOF +EOF + +test_expect_success 'git diff still works' ' + echo 1 > diffy && + git add diffy .gitignore && + git commit -m initial && + git status -s > output && + test_cmp expect.nothing output && + echo 2 >> diffy && + test_cmp expect.2 output +' + +cat >expect <<\EOF + D diffy +EOF + +test_expect_success 'file to directory changes still work' ' + rm diffy && + mkdir diffy && + touch diffy/a && + git status -s > output && + test_cmp expect output && + git add diffy/a && + git commit -m two && + git status -s > output.nothing +' + +cat >expect <<\EOF + D diffy/a +?? diffy +EOF + +test_expect_success 'directory to file changes still work' ' + rm -r diffy && + touch diffy && + git status -s > output && + test_cmp expect output && + rm diffy && + git rm diffy/a && + git commit -m "remove diffy" +' + +cat >expect <<\EOF +?? dead +EOF + +test_expect_success 'changes while watchman is not running are detected' ' + kill_watchman && + sleep 0.25 && + ! xpgrep watchman > /dev/null && + touch dead && + git status -s > output && + test_cmp expect output +' + +test_expect_success 'Restore default test environment' ' + git config --unset core.usewatchman && + kill_watchman +' + +test_done diff --git a/t/test-lib.sh b/t/test-lib.sh index c081668dfe16c8..19d1ec55c31cf1 100644 --- a/t/test-lib.sh +++ b/t/test-lib.sh @@ -781,6 +781,7 @@ test -z "$NO_PERL" && test_set_prereq PERL test -z "$NO_PYTHON" && test_set_prereq PYTHON test -n "$USE_LIBPCRE" && test_set_prereq LIBPCRE test -z "$NO_GETTEXT" && test_set_prereq GETTEXT +test -n "$USE_WATCHMAN" && test_set_prereq WATCHMAN # Can we rely on git's output in the C locale? if test -n "$GETTEXT_POISON" diff --git a/watchman-support.c b/watchman-support.c new file mode 100644 index 00000000000000..9c5a87dd394cee --- /dev/null +++ b/watchman-support.c @@ -0,0 +1,640 @@ +#include + +#include "git-compat-util.h" +#include "cache.h" +#include "dir.h" +#include "fs_cache.h" +#include "strbuf.h" +#include "pathspec.h" +#include "watchman-support.h" + +#define NS_PER_SEC 1000000000L + +#define SET_TIME_FROM_NS(time, ns) \ + do { \ + (time).sec = (ns) / NS_PER_SEC; \ + (time).nsec = (ns) % NS_PER_SEC; \ + } while(0) + +static inline unsigned int create_fe_mode(unsigned int mode) +{ + if (S_ISLNK(mode)) + return S_IFLNK; + if (S_ISDIR(mode)) + return S_IFDIR; + return S_IFREG | ce_permissions(mode); +} + +static void copy_wm_stat_to_fe(struct watchman_stat *wm, struct fsc_entry *fe) +{ + if (!wm->exists) { + fe_set_deleted(fe); + return; + } else + fe_clear_deleted(fe); + fe->size = wm->size; + fe->mode = create_fe_mode(wm->mode); + fe->ino = wm->ino; + fe->dev = wm->dev; + fe->uid = wm->uid; + fe->gid = wm->gid; + SET_TIME_FROM_NS(fe->mtime, wm->mtime_ns); + SET_TIME_FROM_NS(fe->ctime, wm->ctime_ns); + return; +} + +static struct fsc_entry *wm_stat_to_fe(struct watchman_stat *wm) +{ + struct fsc_entry *fe = make_fs_cache_entry(wm->name); + fe_set_new(fe); + copy_wm_stat_to_fe(wm, fe); + return fe; +} + +static void update_exclude(struct dir_struct *dir, struct fsc_entry *fe) +{ + int dtype = fe_dtype(fe); + if (is_excluded(dir, fe->path, &dtype)) { + fe_set_excluded(fe); + } else { + fe_clear_excluded(fe); + } + for (fe = fe->first_child; fe; fe = fe->next_sibling) { + update_exclude(dir, fe); + } +} + +static struct fsc_entry *fs_cache_file_deleted(struct fs_cache *fs_cache, + struct watchman_stat *wm) +{ + int namelen = strlen(wm->name); + struct fsc_entry *fe; + + fe = fs_cache_file_exists(fs_cache, wm->name, namelen); + + if (fe) { + fe_set_deleted(fe); + fe_clear_children(fs_cache, fe); + } + + return fe; +} + +static struct fsc_entry *fs_cache_file_modified(struct fs_cache *fs_cache, + struct watchman_stat *wm) +{ + int namelen = strlen(wm->name); + struct fsc_entry *fe; + fe = fs_cache_file_exists(fs_cache, wm->name, namelen); + if (!fe) { + fe = wm_stat_to_fe(wm); + fs_cache_insert(fs_cache, fe); + set_up_parent(fs_cache, fe); + } else { + int was_dir = fe_is_dir(fe); + if (fe_deleted(fe)) + fe_set_new(fe); + copy_wm_stat_to_fe(wm, fe); + if (was_dir && !fe_is_dir(fe)) { + fe_clear_children(fs_cache, fe); + } + } + return fe; +} + +static struct watchman_expression *make_expression() +{ + struct watchman_expression *types[3]; + types[0] = watchman_type_expression('f'); + types[1] = watchman_type_expression('d'); + types[2] = watchman_type_expression('l'); + struct watchman_expression *expr = watchman_anyof_expression(3, types); + + return expr; +} + +struct watchman_query *make_query(const char *last_update) +{ + struct watchman_query *query = watchman_query(); + + watchman_query_set_fields(query, + WATCHMAN_FIELD_NAME | + WATCHMAN_FIELD_MTIME_NS | + WATCHMAN_FIELD_CTIME_NS | + WATCHMAN_FIELD_INO | + WATCHMAN_FIELD_DEV | + WATCHMAN_FIELD_UID | + WATCHMAN_FIELD_GID | + WATCHMAN_FIELD_EXISTS | + WATCHMAN_FIELD_MODE | + WATCHMAN_FIELD_SIZE); + watchman_query_set_empty_on_fresh(query, 1); + + query->sync_timeout = core_watchman_sync_timeout; + + if (last_update) { + watchman_query_set_since_oclock(query, last_update); + } + return query; +} + +enum path_treatment { + path_recurse, + path_file +}; + +void fe_from_stat(struct fsc_entry *fe, struct stat *st) +{ + fe->mode = create_fe_mode(st->st_mode); + fe->size = st->st_size; + fe->ino = st->st_ino; + fe->dev = st->st_dev; + fe->ctime.sec = st->st_ctime; + fe->ctime.nsec = ST_CTIME_NSEC(*st); + fe->mtime.sec = st->st_mtime; + fe->mtime.nsec = ST_MTIME_NSEC(*st); + fe->uid = st->st_uid; + fe->gid = st->st_gid; +} + +static void update_all_excludes(struct fs_cache *fs_cache) +{ + struct fsc_entry *root = fs_cache_file_exists(fs_cache, "", 0); + struct dir_struct dir; + char original_path[PATH_MAX + 1]; + const char *fs_path = get_git_work_tree(); + + if (!getcwd(original_path, PATH_MAX + 1)) + die_errno("failed to get working directory\n"); + if (chdir(fs_path)) + die_errno("failed to chdir to git work tree\n"); + + assert (root); + + memset(&dir, 0, sizeof(dir)); + setup_standard_excludes(&dir); + update_exclude(&dir, root); + clear_directory(&dir); + + if (chdir(original_path)) + die_errno("failed to chdir back to original path\n"); +} + +static enum path_treatment watchman_handle(struct index_state *istate, struct strbuf *path, struct dirent *de, int rootlen, struct fsc_entry **out) +{ + struct fs_cache *fs_cache = istate->fs_cache; + struct fsc_entry *fe; + struct stat st; + int dtype; + + fe = make_fs_cache_entry(path->buf + rootlen); + *out = fe; + fs_cache_insert(fs_cache, fe); + set_up_parent(fs_cache, fe); + lstat(path->buf, &st); + fe_from_stat(fe, &st); + + dtype = DTYPE(de); + if (dtype == DT_UNKNOWN) { + /* this involves an extra stat call, but only on + * Cygwin, which watchman doesn't support anyway. */ + dtype = get_dtype(de, path->buf, path->len); + } + if (dtype == DT_DIR) { + return path_recurse; + } + + return path_file; +} + +static void path_set_last_component(struct strbuf *path, int baselen, const char *add) +{ + strbuf_setlen(path, baselen); + if (baselen) { + strbuf_addch(path, '/'); + } + strbuf_addstr(path, add); +} + +static int preload_wt_recursive(struct index_state *istate, struct strbuf *path, int rootlen) +{ + DIR *fdir; + struct dirent *de; + int baselen = path->len; + + fdir = opendir(path->buf); + if (!fdir) { + return error("Failed to open %s", path->buf); + } + + while ((de = readdir(fdir)) != NULL) { + struct fsc_entry *fe; + if (is_dot_or_dotdot(de->d_name) || is_in_dot_git(de->d_name)) + continue; + + path_set_last_component(path, baselen, de->d_name); + + /* recurse into subdir if necessary */ + if (watchman_handle(istate, path, de, rootlen, &fe) == path_recurse) { + int result = preload_wt_recursive(istate, path, rootlen); + if (result) { + closedir(fdir); + return result; + } + } + } + + closedir(fdir); + return 0; +} + +static void init_excludes_config() +{ + char *xdg_path; + if (!excludes_file) { + home_config_paths(NULL, &xdg_path, "ignore"); + excludes_file = xdg_path; + } +} + +static void compute_sha1(const char *path, unsigned char *sha1) +{ + struct stat st; + if (stat(path, &st)) { + memset(sha1, 0, 20); + } else { + if (index_path(sha1, path, &st, 0)) { + memset(sha1, 0, 20); + } + } +} + +static void init_excludes_files(struct fs_cache *fs_cache) +{ + init_excludes_config(); + if (fs_cache->excludes_file) { + free(fs_cache->excludes_file); + } + if (excludes_file) { + fs_cache->excludes_file = xstrdup(excludes_file); + compute_sha1(excludes_file, fs_cache->user_excludes_sha1); + } else { + fs_cache->excludes_file = xstrdup(""); + memset(fs_cache->user_excludes_sha1, 0, 20); + } + compute_sha1(git_path("info/excludes"), fs_cache->git_excludes_sha1); +} + +static int git_excludes_file_changed(struct fs_cache *fs_cache) +{ + unsigned char sha1[20]; + + compute_sha1(git_path("info/exclude"), sha1); + if (!hashcmp(fs_cache->git_excludes_sha1, sha1)) + return 0; + hashcpy(fs_cache->git_excludes_sha1, sha1); + return 1; +} + +static int user_excludes_file_changed(struct fs_cache *fs_cache) +{ + unsigned char sha1[20] = {0}; + struct stat st; + + init_excludes_config(); + + if (!excludes_file) { + if (strlen(fs_cache->excludes_file) == 0) { + return 0; + } + + fs_cache->excludes_file[0] = 0; + if (is_null_sha1(fs_cache->user_excludes_sha1)) + return 0; + + memset(fs_cache->user_excludes_sha1, 0, 20); + return 1; + } + + /* A change in exclude filename forces an exclude reload */ + if (strcmp(fs_cache->excludes_file, excludes_file)) { + init_excludes_files(fs_cache); + return 1; + } + + if (!strlen(fs_cache->excludes_file)) { + return 0; + } + + if (stat(excludes_file, &st)) { + /* There is a problem reading the excludes file; this + * could be a persistent condition, so we need to + * check if the file is presently marked as invalid */ + if (is_null_sha1(fs_cache->user_excludes_sha1)) + return 0; + else { + memset(fs_cache->user_excludes_sha1, 0, 20); + return 1; + } + } + + if (index_path(sha1, excludes_file, &st, 0)) { + if (is_null_sha1(fs_cache->user_excludes_sha1)) { + return 0; + } else { + memset(fs_cache->user_excludes_sha1, 0, 20); + return 1; + } + } else { + if (!hashcmp(fs_cache->user_excludes_sha1, sha1)) + return 0; + hashcpy(fs_cache->user_excludes_sha1, sha1); + return 1; + } +} + +static void create_fs_cache(struct index_state *istate) +{ + struct strbuf buf = STRBUF_INIT; + const char *fs_path = get_git_work_tree(); + struct fsc_entry *root; + + strbuf_addstr(&buf, fs_path); + istate->fs_cache = empty_fs_cache(); + root = make_fs_cache_entry(""); + root->mode = 040644; + fs_cache_insert(istate->fs_cache, root); + preload_wt_recursive(istate, &buf, buf.len + 1); + strbuf_release(&buf); + + init_excludes_files(istate->fs_cache); + update_all_excludes(istate->fs_cache); +} + +static void load_fs_cache(struct index_state *istate) +{ + if (istate->fs_cache) + return; + istate->fs_cache = read_fs_cache(); + if (!istate->fs_cache) { + create_fs_cache(istate); + } +} + +static struct watchman_query_result *watchman_fs_cache_query(struct watchman_connection *connection, const char *fs_path, const char *last_update) +{ + struct watchman_error wm_error; + struct watchman_expression *expr; + struct watchman_query *query; + struct watchman_query_result *result = NULL; + struct stat st; + int fs_path_len = strlen(fs_path); + char *git_path; + + expr = make_expression(); + query = make_query(last_update); + if (lstat(fs_path, &st)) { + return NULL; + } + + git_path = xmalloc(fs_path_len + 6); + strcpy(git_path, fs_path); + strcpy(git_path + fs_path_len, "/.git"); + + if (lstat(git_path, &st)) { + /* Watchman gets confused if we delete the .git + * directory out from under it, since that's where it + * stores its cookies. So we'll need to delete the + * watch and then recreate it. It's OK for this to + * fail, as the watch might have already been + * deleted. */ + watchman_watch_del(connection, fs_path, &wm_error); + + if (watchman_watch(connection, fs_path, &wm_error)) { + warning("Watchman watch error: %s", wm_error.message); + goto out; + } + } + result = watchman_do_query(connection, fs_path, query, expr, &wm_error); + if (!result) { + warning("Watchman query error: %s (at %s)", wm_error.message, last_update); + goto out; + } + watchman_free_expression(expr); + watchman_free_query(query); + +out: + free(git_path); + return result; +} + +static int cmp_stat(const void *a, const void *b) +{ + const struct watchman_stat* sa = a; + const struct watchman_stat* sb = b; + return strcmp(sa->name, sb->name); +} + +static void append(struct fsc_entry ***list, int* cap, int* len, struct fsc_entry *entry) +{ + if (*len >= *cap) { + int sz; + *cap = *cap ? *cap * 2 : 10; + sz = *cap * sizeof(**list); + *list = xrealloc(*list, sz); + } + (*list)[(*len)++] = entry; +} + +static int is_child_of(struct fsc_entry *putative_child, struct fsc_entry *parent) +{ + while (putative_child) { + putative_child = putative_child->parent; + if (putative_child == parent) { + return 1; + } + } + return 0; +} + +static void update_fs_cache(struct index_state *istate, struct watchman_query_result *result) +{ + struct fs_cache *fs_cache = istate->fs_cache; + struct fsc_entry *fe; + int i; + struct fsc_entry **exclude_dirty = NULL; + int cap = 0, len = 0, all_dirty = 0; + /* note that we always want to call both of these functions, + * since they update the fs_cache's view of files which are + * not watched by watchman */ + int user_changed = user_excludes_file_changed(fs_cache); + int git_changed = git_excludes_file_changed(fs_cache); + + all_dirty = user_changed || git_changed; + + qsort(result->stats, result->nr, sizeof(*result->stats), cmp_stat); + + for (i = 0; i < result->nr; ++i) { + /*for each result in the changed set, we need to check + it against the index and HEAD */ + + struct watchman_stat *wm = result->stats + i; + + if (is_in_dot_git(wm->name)) { + continue; + } + fs_cache->needs_write = 1; + if (wm->exists) { + fe = fs_cache_file_modified(fs_cache, wm); + } else { + fe = fs_cache_file_deleted(fs_cache, wm); + } + if (fe && !all_dirty) { + if (ends_with(wm->name, "/.gitignore") || + !strcmp(wm->name, ".gitignore")) { + append(&exclude_dirty, &cap, &len, fe->parent); + } else if (fe_new(fe)) { + append(&exclude_dirty, &cap, &len, fe); + } + } + } + + if (exclude_dirty) { + struct dir_struct dir; + struct fsc_entry *last = NULL; + char original_path[PATH_MAX + 1]; + qsort(exclude_dirty, len, sizeof(*exclude_dirty), cmp_fsc_entry); + + if (!getcwd(original_path, PATH_MAX + 1)) + die_errno("failed to get working directory\n"); + if (chdir(get_git_work_tree())) + die_errno("failed to chdir to git work tree\n"); + + memset(&dir, 0, sizeof(dir)); + setup_standard_excludes(&dir); + + for (i = 0; i < len; i++) { + struct fsc_entry *fe = exclude_dirty[i]; + + if (i == 0 || !is_child_of(fe, last)) { + update_exclude(&dir, fe); + last = fe; + } + } + clear_directory(&dir); + free(exclude_dirty); + if (chdir(original_path)) + die_errno("failed to chdir back to original path\n"); + } else if (all_dirty) { + update_all_excludes(fs_cache); + } + +} + +int watchman_reload_fs_cache(struct index_state *istate) +{ + struct watchman_error wm_error; + struct watchman_query_result *result; + struct watchman_connection *connection; + int ret = -1; + const char *fs_path; + const char *last_update = istate->fs_cache->last_update; + + fs_path = get_git_work_tree(); + if (!fs_path) + return -1; + + connection = watchman_connect(&wm_error); + + if (!connection) { + warning("Watchman watch error: %s", wm_error.message); + return -1; + } + + result = watchman_fs_cache_query(connection, fs_path, last_update); + if (!result) { + goto done; + } + istate->fs_cache->last_update = xstrdup(result->clock); + + update_fs_cache(istate, result); + watchman_free_query_result(result); + ret = 0; +done: + watchman_connection_close(connection); + return ret; +} + +int watchman_load_fs_cache(struct index_state *istate) +{ + struct watchman_error wm_error; + int ret = -1; + const char *fs_path; + char *last_update = NULL; + char *stored_repo_path = NULL; + struct watchman_query_result *result; + struct watchman_connection *connection; + + fs_path = get_git_work_tree(); + if (!fs_path) + return -1; + + connection = watchman_connect(&wm_error); + + if (!connection) { + warning("Watchman watch error: %s", wm_error.message); + return -1; + } + + if (watchman_watch(connection, fs_path, &wm_error)) { + warning("Watchman watch error: %s", wm_error.message); + goto done; + } + + fs_cache_preload_metadata(&last_update, &stored_repo_path); + if (!last_update || strcmp(stored_repo_path, fs_path)) { + if (istate->fs_cache) { + free_fs_cache(istate->fs_cache); + istate->fs_cache = NULL; + } + /* fs_cache is corrupt, or refers to another repo path; + * let's try recreating it. */ + if (last_update) + free(last_update); + last_update = NULL; + /* now we continue, because we need to get the + * a last-update time from watchman. */ + } + free(stored_repo_path); + + result = watchman_fs_cache_query(connection, fs_path, last_update); + if (last_update) { + free(last_update); + last_update = NULL; + } + if (!result) { + goto done; + } + + if (result->is_fresh_instance) { + if (istate->fs_cache) { + free_fs_cache(istate->fs_cache); + istate->fs_cache = NULL; + } + create_fs_cache(istate); + istate->fs_cache->repo_path = xstrdup(fs_path); + } else { + load_fs_cache(istate); + update_fs_cache(istate, result); + } + + istate->fs_cache->last_update = xstrdup(result->clock); + + watchman_free_query_result(result); + ret = 0; + +done: + watchman_connection_close(connection); + return ret; + +} diff --git a/watchman-support.h b/watchman-support.h new file mode 100644 index 00000000000000..1ab865f1e4743f --- /dev/null +++ b/watchman-support.h @@ -0,0 +1,10 @@ +#ifndef WATCHMAN_SUPPORT_H +#define WATCHMAN_SUPPORT_H + +#include "cache.h" +#include + +int watchman_load_fs_cache(struct index_state *index); +int watchman_reload_fs_cache(struct index_state *index); + +#endif /* WATCHMAN_SUPPORT_H */ From baf7018eef7684498ea60c495148393ad012945e Mon Sep 17 00:00:00 2001 From: David Turner Date: Mon, 17 Mar 2014 02:29:34 -0400 Subject: [PATCH 7/8] hashmap: Remove unused functions Remove str(i?)hash functions from hashmap.c as they are unused Signed-off-by: David Turner --- hashmap.c | 19 ------------------- hashmap.h | 2 -- t/t0011-hashmap.sh | 8 ++++---- test-hashmap.c | 17 ++++++++--------- 4 files changed, 12 insertions(+), 34 deletions(-) diff --git a/hashmap.c b/hashmap.c index d1b8056d8d53c3..4f01e6905d6210 100644 --- a/hashmap.c +++ b/hashmap.c @@ -7,25 +7,6 @@ #define FNV32_BASE ((unsigned int) 0x811c9dc5) #define FNV32_PRIME ((unsigned int) 0x01000193) -unsigned int strhash(const char *str) -{ - unsigned int c, hash = FNV32_BASE; - while ((c = (unsigned char) *str++)) - hash = (hash * FNV32_PRIME) ^ c; - return hash; -} - -unsigned int strihash(const char *str) -{ - unsigned int c, hash = FNV32_BASE; - while ((c = (unsigned char) *str++)) { - if (c >= 'a' && c <= 'z') - c -= 'a' - 'A'; - hash = (hash * FNV32_PRIME) ^ c; - } - return hash; -} - unsigned int memhash(const void *buf, size_t len) { unsigned int hash = FNV32_BASE; diff --git a/hashmap.h b/hashmap.h index a816ad47b14d2d..fc2c19ee3ccf69 100644 --- a/hashmap.h +++ b/hashmap.h @@ -8,8 +8,6 @@ /* FNV-1 functions */ -extern unsigned int strhash(const char *buf); -extern unsigned int strihash(const char *buf); extern unsigned int memhash(const void *buf, size_t len); extern unsigned int memihash(const void *buf, size_t len); diff --git a/t/t0011-hashmap.sh b/t/t0011-hashmap.sh index 391e2b64927d7d..2de1ee99268f52 100755 --- a/t/t0011-hashmap.sh +++ b/t/t0011-hashmap.sh @@ -11,10 +11,10 @@ test_hashmap() { test_expect_success 'hash functions' ' -test_hashmap "hash key1" "2215982743 2215982743 116372151 116372151" && -test_hashmap "hash key2" "2215982740 2215982740 116372148 116372148" && -test_hashmap "hash fooBarFrotz" "1383912807 1383912807 3189766727 3189766727" && -test_hashmap "hash foobarfrotz" "2862305959 2862305959 3189766727 3189766727" +test_hashmap "hash key1" "2215982743 116372151" && +test_hashmap "hash key2" "2215982740 116372148" && +test_hashmap "hash fooBarFrotz" "1383912807 3189766727" && +test_hashmap "hash foobarfrotz" "2862305959 3189766727" ' diff --git a/test-hashmap.c b/test-hashmap.c index f5183fb9e82575..77dff166aab233 100644 --- a/test-hashmap.c +++ b/test-hashmap.c @@ -45,13 +45,13 @@ static struct test_entry *alloc_test_entry(int hash, char *key, int klen, #define TEST_ADD 16 #define TEST_SIZE 100000 -static unsigned int hash(unsigned int method, unsigned int i, const char *key) +static unsigned int hash(unsigned int method, unsigned int i, const char *key, int len) { unsigned int hash; switch (method & 3) { case HASH_METHOD_FNV: - hash = strhash(key); + hash = memhash(key, len); break; case HASH_METHOD_I: hash = i; @@ -84,9 +84,9 @@ static void perf_hashmap(unsigned int method, unsigned int rounds) entries = malloc(TEST_SIZE * sizeof(struct test_entry *)); hashes = malloc(TEST_SIZE * sizeof(int)); for (i = 0; i < TEST_SIZE; i++) { - snprintf(buf, sizeof(buf), "%i", i); - entries[i] = alloc_test_entry(0, buf, strlen(buf), "", 0); - hashes[i] = hash(method, i, entries[i]->key); + int len = snprintf(buf, sizeof(buf), "%i", i); + entries[i] = alloc_test_entry(0, buf, len, "", 0); + hashes[i] = hash(method, i, entries[i]->key, len); } if (method & TEST_ADD) { @@ -130,7 +130,7 @@ static void perf_hashmap(unsigned int method, unsigned int rounds) /* * Read stdin line by line and print result of commands to stdout: * - * hash key -> strhash(key) memhash(key) strihash(key) memihash(key) + * hash key -> memhash(key) memihash(key) * put key value -> NULL / old value * get key -> NULL / value * remove key -> NULL / old value @@ -165,7 +165,7 @@ int main(int argc, char *argv[]) p1 = strtok(NULL, DELIM); if (p1) { l1 = strlen(p1); - hash = icase ? strihash(p1) : strhash(p1); + hash = icase ? memihash(p1, l1) : memhash(p1, l1); p2 = strtok(NULL, DELIM); if (p2) l2 = strlen(p2); @@ -174,8 +174,7 @@ int main(int argc, char *argv[]) if (!strcmp("hash", cmd) && l1) { /* print results of different hash functions */ - printf("%u %u %u %u\n", strhash(p1), memhash(p1, l1), - strihash(p1), memihash(p1, l1)); + printf("%u %u\n", memhash(p1, l1), memihash(p1, l1)); } else if (!strcmp("add", cmd) && l1 && l2) { From f5a6a46ddfa47c4f9eb7300acfdcfced796b5624 Mon Sep 17 00:00:00 2001 From: David Turner Date: Sun, 4 May 2014 19:53:26 -0400 Subject: [PATCH 8/8] hashmap: Use SSE to speed up hashmap The original FNV hashing algorithm processes one byte at a time. SSE lets us process more. This is a lackadaisically tuned implementation; in my tests, it was the fastest by a hair. It is about twice as fast as the original FNV, for about a 5-10% speedup on git status (with watchman). Note that you are unlikely to see speed improvements in test-hashmap, since it uses very short, very similar strings. Signed-off-by: David Turner --- Makefile | 6 +++ configure.ac | 6 +++ hashmap.c | 108 +++++++++++++++++++++++++++++++++++++++++++++ t/t0011-hashmap.sh | 37 ++++++++++++++-- t/test-lib.sh | 1 + 5 files changed, 154 insertions(+), 4 deletions(-) diff --git a/Makefile b/Makefile index 997879c33eb7cc..4210b635728610 100644 --- a/Makefile +++ b/Makefile @@ -1335,6 +1335,11 @@ else COMPAT_OBJS += compat/win32mmap.o endif endif +ifdef NO_SSE + BASIC_CFLAGS += -DNO_SSE +else + BASIC_CFLAGS += -msse4 +endif ifdef OBJECT_CREATION_USES_RENAMES COMPAT_CFLAGS += -DOBJECT_CREATION_MODE=1 endif @@ -2216,6 +2221,7 @@ GIT-BUILD-OPTIONS: FORCE @echo NO_PYTHON=\''$(subst ','\'',$(subst ','\'',$(NO_PYTHON)))'\' >>$@ @echo NO_UNIX_SOCKETS=\''$(subst ','\'',$(subst ','\'',$(NO_UNIX_SOCKETS)))'\' >>$@ @echo USE_WATCHMAN=\''$(subst ','\'',$(subst ','\'',$(USE_WATCHMAN)))'\' >>$@ + @echo NO_SSE=\''$(subst ','\'',$(subst ','\'',$(NO_SSE)))'\' >>$@ ifdef TEST_OUTPUT_DIRECTORY @echo TEST_OUTPUT_DIRECTORY=\''$(subst ','\'',$(subst ','\'',$(TEST_OUTPUT_DIRECTORY)))'\' >>$@ endif diff --git a/configure.ac b/configure.ac index b6e603200b3857..eda4a335b475f7 100644 --- a/configure.ac +++ b/configure.ac @@ -382,6 +382,11 @@ AS_HELP_STRING([],[Tcl/Tk interpreter will be found in a system.]), GIT_PARSE_WITH(tcltk)) # +# Declare the with-sse/without-sse options. +AC_ARG_WITH(sse, +AS_HELP_STRING([--with-sse],[use SSE instructions (default is YES)]), +GIT_PARSE_WITH(sse)) + ## Checks for programs. AC_MSG_NOTICE([CHECKS for programs]) @@ -449,6 +454,7 @@ else fi fi GIT_CONF_SUBST([TCLTK_PATH]) +GIT_CONF_SUBST([NO_SSE]) AC_CHECK_PROGS(ASCIIDOC, [asciidoc]) if test -n "$ASCIIDOC"; then AC_MSG_CHECKING([for asciidoc version]) diff --git a/hashmap.c b/hashmap.c index 4f01e6905d6210..50ea3ef8e482f8 100644 --- a/hashmap.c +++ b/hashmap.c @@ -7,6 +7,113 @@ #define FNV32_BASE ((unsigned int) 0x811c9dc5) #define FNV32_PRIME ((unsigned int) 0x01000193) +#ifndef NO_SSE +#include + +#define BLOCK_SIZE 16 + +#ifdef __clang__ +typedef int v4si __attribute__((ext_vector_type(4))); +#define __v4si v4si +#endif + +static const __v4si FNV32_BASE_4 = {FNV32_BASE, FNV32_BASE, FNV32_BASE, FNV32_BASE}; +static const __v4si FNV32_PRIME_4 = {FNV32_PRIME, FNV32_PRIME, FNV32_PRIME, FNV32_PRIME}; +static const __v4si CASE_MASK = {~0x20202020,~0x20202020,~0x20202020,~0x20202020}; + +static inline __v4si hash_one_block(__v4si hash, __v4si data) +{ + __v4si data_lshift, data_rshift; + data_lshift = (__v4si)_mm_slli_epi32((__m128i)data, 16); + data_rshift = (__v4si)_mm_srai_epi32((__m128i)data, 16); + hash = (hash * FNV32_PRIME_4) ^ data; + data = data_lshift | data_rshift; + hash = (hash * FNV32_PRIME_4) ^ data; + + return hash; +} + +__v4si read_partial_block(const char *buf, int remainder) { + __v4si data = {0}; + uint32_t bottom = 0; + int i = 0; + + if (remainder & 8) { + data = (__v4si) _mm_loadl_epi64((__v2di*)(buf)); + i += 8; + _mm_slli_si128((__m128i) data, 8); + } + if (remainder & 4) { + uint32_t dword = (*(uint32_t *)(buf + i)); + data = (__v4si)_mm_insert_epi32((__m128i)data, dword, 2); + i += 4; + } + if (remainder & 2) { + bottom = ((uint32_t)(*(uint16_t *)(buf + i))) << 8; + i += 2; + } + if (remainder & 1) { + bottom |= buf[i]; + } + return (__v4si)_mm_insert_epi32((__m128i)data, bottom, 3); +} + +unsigned int memhash(const void *bufp, size_t len) +{ + const char *buf = bufp; + __v4si hash = FNV32_BASE_4; + __v4si data; + int i; + int full_blocks = (len / BLOCK_SIZE); + int block_aligned_size = full_blocks * BLOCK_SIZE; + int remainder = len - block_aligned_size; + + for (i = 0; i < block_aligned_size; i += BLOCK_SIZE) { + data = (__v4si)_mm_lddqu_si128((__m128i *)(buf + i)); + hash = hash_one_block(hash, data); + } + + if (remainder) { + data = read_partial_block(buf + i, remainder); + hash = hash_one_block(hash, data); + } + + return _mm_extract_epi32((__m128i)hash, 0) ^ + _mm_extract_epi32((__m128i)hash, 1) ^ + _mm_extract_epi32((__m128i)hash, 2) ^ + _mm_extract_epi32((__m128i)hash, 3); +} + +unsigned int memihash(const void *bufp, size_t len) +{ + const char *buf = bufp; + __v4si hash = FNV32_BASE_4; + __v4si data; + int i; + int full_blocks = (len / BLOCK_SIZE); + int block_aligned_size = full_blocks * BLOCK_SIZE; + int remainder = len - block_aligned_size; + + for (i = 0; i < block_aligned_size; i += BLOCK_SIZE) { + data = (__v4si)_mm_lddqu_si128((__m128i *)(buf + i)); + data &= CASE_MASK; + hash = hash_one_block(hash, data); + } + + if (remainder) { + data = read_partial_block(buf + i, remainder); + data &= CASE_MASK; + hash = hash_one_block(hash, data); + } + + return _mm_extract_epi32((__m128i)hash, 0) ^ + _mm_extract_epi32((__m128i)hash, 1) ^ + _mm_extract_epi32((__m128i)hash, 2) ^ + _mm_extract_epi32((__m128i)hash, 3); +} + +#else + unsigned int memhash(const void *buf, size_t len) { unsigned int hash = FNV32_BASE; @@ -30,6 +137,7 @@ unsigned int memihash(const void *buf, size_t len) } return hash; } +#endif #define HASHMAP_INITIAL_SIZE 64 /* grow / shrink by 2^2 */ diff --git a/t/t0011-hashmap.sh b/t/t0011-hashmap.sh index 2de1ee99268f52..1a5bfbbad05241 100755 --- a/t/t0011-hashmap.sh +++ b/t/t0011-hashmap.sh @@ -9,6 +9,18 @@ test_hashmap() { test_cmp expect actual } + +if test_have_prereq SSE +then +test_expect_success 'hash functions' ' + +test_hashmap "hash key1" "2090498088 29936296" && +test_hashmap "hash key2" "3046800168 1523108264" && +test_hashmap "hash fooBarFrotz" "4292625486 2911672302" && +test_hashmap "hash foobarfrotz" "574383214 2911672302" + +' +else test_expect_success 'hash functions' ' test_hashmap "hash key1" "2215982743 116372151" && @@ -17,6 +29,7 @@ test_hashmap "hash fooBarFrotz" "1383912807 3189766727" && test_hashmap "hash foobarfrotz" "2862305959 3189766727" ' +fi test_expect_success 'put' ' @@ -177,7 +190,22 @@ NULL 64 1" ignorecase ' +if test_have_prereq SSE +then +test_expect_success 'iterate' ' + +test_hashmap "put key1 value1 +put key2 value2 +put fooBarFrotz value3 +iterate" "NULL +NULL +NULL +fooBarFrotz value3 +key2 value2 +key1 value1" +' +else test_expect_success 'iterate' ' test_hashmap "put key1 value1 @@ -186,11 +214,13 @@ put fooBarFrotz value3 iterate" "NULL NULL NULL +fooBarFrotz value3 key2 value2 key1 value1 -fooBarFrotz value3" +" ' +fi test_expect_success 'iterate (case insensitive)' ' @@ -200,10 +230,9 @@ put fooBarFrotz value3 iterate" "NULL NULL NULL -fooBarFrotz value3 key2 value2 -key1 value1" ignorecase - +key1 value1 +fooBarFrotz value3" ignorecase ' test_expect_success 'grow / shrink' ' diff --git a/t/test-lib.sh b/t/test-lib.sh index 19d1ec55c31cf1..81c95bc2b93797 100644 --- a/t/test-lib.sh +++ b/t/test-lib.sh @@ -782,6 +782,7 @@ test -z "$NO_PYTHON" && test_set_prereq PYTHON test -n "$USE_LIBPCRE" && test_set_prereq LIBPCRE test -z "$NO_GETTEXT" && test_set_prereq GETTEXT test -n "$USE_WATCHMAN" && test_set_prereq WATCHMAN +test -z "$NO_SSE" && test_set_prereq SSE # Can we rely on git's output in the C locale? if test -n "$GETTEXT_POISON"