diff --git a/.clang-format b/.clang-format new file mode 100644 index 0000000..b6f9e9c --- /dev/null +++ b/.clang-format @@ -0,0 +1,147 @@ +--- +BasedOnStyle: Google +Standard: Latest +AccessModifierOffset: -4 +AlignAfterOpenBracket: Align +AlignConsecutiveMacros: false +AlignConsecutiveAssignments: false +AlignConsecutiveBitFields: true +AlignConsecutiveDeclarations: false +AlignEscapedNewlines: Right +AlignOperands: true +AlignTrailingComments: true +AllowAllArgumentsOnNextLine: true +AllowAllConstructorInitializersOnNextLine: true +AllowAllParametersOfDeclarationOnNextLine: true +AllowShortEnumsOnASingleLine: false +AllowShortBlocksOnASingleLine: Empty +AllowShortCaseLabelsOnASingleLine: false +AllowShortFunctionsOnASingleLine: All +AllowShortLambdasOnASingleLine: All +AllowShortIfStatementsOnASingleLine: false +AllowShortLoopsOnASingleLine: false +AlwaysBreakAfterDefinitionReturnType: None +AlwaysBreakAfterReturnType: None +AlwaysBreakBeforeMultilineStrings: false +AlwaysBreakTemplateDeclarations: true +BinPackArguments: false +BinPackParameters: false +BraceWrapping: + AfterCaseLabel: false + AfterClass: false + AfterControlStatement: Never + AfterEnum: false + AfterFunction: false + AfterNamespace: false + AfterObjCDeclaration: false + AfterStruct: false + AfterUnion: false + AfterExternBlock: false + BeforeCatch: false + BeforeElse: false + BeforeLambdaBody: false + BeforeWhile: false + IndentBraces: false + SplitEmptyFunction: false + SplitEmptyRecord: true + SplitEmptyNamespace: true +BreakBeforeBinaryOperators: None +BreakBeforeBraces: Custom +BreakBeforeInheritanceComma: false +BreakInheritanceList: BeforeColon +BreakBeforeTernaryOperators: true +BreakConstructorInitializersBeforeComma: false +BreakConstructorInitializers: BeforeComma +BreakAfterJavaFieldAnnotations: false +BreakStringLiterals: true +ColumnLimit: 120 +CommentPragmas: '^ IWYU pragma:' +CompactNamespaces: false +ConstructorInitializerAllOnOneLineOrOnePerLine: true +ConstructorInitializerIndentWidth: 2 +ContinuationIndentWidth: 2 +Cpp11BracedListStyle: true +DeriveLineEnding: true +DerivePointerAlignment: false +DisableFormat: false +ExperimentalAutoDetectBinPacking: false +FixNamespaceComments: true +ForEachMacros: + - foreach + - Q_FOREACH + - BOOST_FOREACH +IncludeBlocks: Regroup +IncludeCategories: + - Regex: '^"(llvm|llvm-c|clang|clang-c)/' + Priority: 2 + SortPriority: 1 + - Regex: '^(<|"(gtest|gmock|isl|json)/)' + Priority: 3 + SortPriority: 2 + - Regex: '.*' + Priority: 1 + SortPriority: 3 +IncludeIsMainRegex: '(Test)?$' +IncludeIsMainSourceRegex: '' +IndentCaseLabels: false +IndentCaseBlocks: false +IndentGotoLabels: true +IndentPPDirectives: None +IndentExternBlock: AfterExternBlock +IndentWidth: 2 +IndentWrappedFunctionNames: false +InsertTrailingCommas: None +JavaScriptQuotes: Leave +JavaScriptWrapImports: true +KeepEmptyLinesAtTheStartOfBlocks: true +MacroBlockBegin: '' +MacroBlockEnd: '' +MaxEmptyLinesToKeep: 1 +NamespaceIndentation: None +ObjCBinPackProtocolList: Auto +ObjCBlockIndentWidth: 2 +ObjCBreakBeforeNestedBlockParam: true +ObjCSpaceAfterProperty: true +ObjCSpaceBeforeProtocolList: true +PenaltyBreakAssignment: 2 +PenaltyBreakBeforeFirstCallParameter: 19 +PenaltyBreakComment: 300 +PenaltyBreakFirstLessLess: 120 +PenaltyBreakString: 1000 +PenaltyBreakTemplateDeclaration: 10 +PenaltyExcessCharacter: 1000000 +PenaltyReturnTypeOnItsOwnLine: 60 +PointerAlignment: Right +ReflowComments: true +SortIncludes: true +SortUsingDeclarations: true +SpaceAfterCStyleCast: false +SpaceAfterLogicalNot: false +SpaceAfterTemplateKeyword: true +SpaceBeforeAssignmentOperators: true +SpaceBeforeCpp11BracedList: false +SpaceBeforeCtorInitializerColon: true +SpaceBeforeInheritanceColon: true +SpaceBeforeParens: ControlStatements +SpaceBeforeRangeBasedForLoopColon: true +SpaceInEmptyBlock: false +SpaceInEmptyParentheses: false +SpacesBeforeTrailingComments: 1 +SpacesInAngles: false +SpacesInConditionalStatement: false +SpacesInContainerLiterals: true +SpacesInCStyleCastParentheses: false +SpacesInParentheses: false +SpacesInSquareBrackets: false +SpaceBeforeSquareBrackets: false +StatementMacros: + - Q_UNUSED + - QT_REQUIRE_VERSION +TabWidth: 8 +UseCRLF: false +UseTab: Never +WhitespaceSensitiveMacros: + - STRINGIZE + - PP_STRINGIZE + - BOOST_PP_STRINGIZE +... diff --git a/bench/fio/fastfs_fio.cpp b/bench/fio/fastfs_fio.cpp index 7b61b37..dcbf005 100644 --- a/bench/fio/fastfs_fio.cpp +++ b/bench/fio/fastfs_fio.cpp @@ -4,105 +4,104 @@ */ #include "core/FastFS.h" -#include "spdk/stdinc.h" +#include "fio.h" +#include "spdk/accel.h" #include "spdk/bdev.h" #include "spdk/bdev_zone.h" -#include "spdk/accel.h" #include "spdk/env.h" #include "spdk/file.h" #include "spdk/init.h" -#include "spdk/thread.h" #include "spdk/log.h" -#include "spdk/string.h" #include "spdk/queue.h" -#include "spdk/util.h" #include "spdk/rpc.h" +#include "spdk/stdinc.h" +#include "spdk/string.h" +#include "spdk/thread.h" +#include "spdk/util.h" #include "spdk_internal/event.h" -#include "fio.h" extern "C" { #include "optgroup.h" void get_ioengine(struct ioengine_ops **ioengine_ptr); } -static const char* g_config_file = nullptr; +static const char *g_config_file = nullptr; static void *g_json_data; static size_t g_config_file_size; -static const char* g_bdev_name = nullptr; +static const char *g_bdev_name = nullptr; static bool g_do_format = true; -static const char* g_test_file = "/test.txt"; +static const char *g_test_file = "/test.txt"; static uint64_t g_test_file_size = 0; struct fastfs_fio_options { int __pad; - char* conf = NULL; - char* bdev = NULL; - char* file = NULL; - char* format = NULL; + char *conf = NULL; + char *bdev = NULL; + char *file = NULL; + char *format = NULL; }; // fio-3.39 static struct fio_option options[] = { - { - "spdk_conf", - "SPDK configuration file", - NULL, - FIO_OPT_STR_STORE, - offsetof(struct fastfs_fio_options, conf), - }, - { - "spdk_bdev", - "SPDK bdev name", - NULL, - FIO_OPT_STR_STORE, - offsetof(struct fastfs_fio_options, bdev), - }, - { - "test_file", - "target test file", - NULL, - FIO_OPT_STR_STORE, - offsetof(struct fastfs_fio_options, file), - }, - { - "do_format", - "format fastfs before mount", - NULL, - FIO_OPT_STR_STORE, - offsetof(struct fastfs_fio_options, format), - }, - { - NULL, // end flag - }, + { + "spdk_conf", + "SPDK configuration file", + NULL, + FIO_OPT_STR_STORE, + offsetof(struct fastfs_fio_options, conf), + }, + { + "spdk_bdev", + "SPDK bdev name", + NULL, + FIO_OPT_STR_STORE, + offsetof(struct fastfs_fio_options, bdev), + }, + { + "test_file", + "target test file", + NULL, + FIO_OPT_STR_STORE, + offsetof(struct fastfs_fio_options, file), + }, + { + "do_format", + "format fastfs before mount", + NULL, + FIO_OPT_STR_STORE, + offsetof(struct fastfs_fio_options, format), + }, + { + NULL, // end flag + }, }; struct fastfs_fio_thread { - FastFS* fastfs = nullptr; - ByteBuffer* buff = nullptr; - struct thread_data* td; - struct spdk_thread* thread; - struct io_u** iocq; - ByteBuffer** buffers; + FastFS *fastfs = nullptr; + ByteBuffer *buff = nullptr; + struct thread_data *td; + struct spdk_thread *thread; + struct io_u **iocq; + ByteBuffer **buffers; int reqs = 0; int count = 0; bool writing = false; }; static void bdev_fini_done(void *cb_arg) { - struct fastfs_fio_thread* fio_thread = - reinterpret_cast(cb_arg); + struct fastfs_fio_thread *fio_thread = reinterpret_cast(cb_arg); fio_thread->fastfs->ready = false; } -static void write_file_complete(void* cb_args, int code) { - fs_op_context* opCtx = reinterpret_cast(cb_args); - WriteContext* writeCtx = reinterpret_cast(opCtx->private_data); +static void write_file_complete(void *cb_args, int code) { + fs_op_context *opCtx = reinterpret_cast(cb_args); + WriteContext *writeCtx = reinterpret_cast(opCtx->private_data); if (code != 0) { SPDK_ERRLOG("write failed: %d\n", code); exit(code); } - FastFS* fastfs = opCtx->fastfs; - FastFile& file = (*fastfs->files)[writeCtx->fd]; + FastFS *fastfs = opCtx->fastfs; + FastFile &file = (*fastfs->files)[writeCtx->fd]; if (file.pos_ < g_test_file_size) { writeCtx->direct_buff->clear(); fastfs->write(*opCtx); @@ -114,12 +113,11 @@ static void write_file_complete(void* cb_args, int code) { } } -static void create_file_complete(void* cb_args, int code) { - struct fastfs_fio_thread* fio_thread = - reinterpret_cast( - spdk_thread_get_ctx(spdk_get_thread())); - fs_op_context* opCtx = reinterpret_cast(cb_args); - FastFS* fastfs = opCtx->fastfs; +static void create_file_complete(void *cb_args, int code) { + struct fastfs_fio_thread *fio_thread = + reinterpret_cast(spdk_thread_get_ctx(spdk_get_thread())); + fs_op_context *opCtx = reinterpret_cast(cb_args); + FastFS *fastfs = opCtx->fastfs; fastfs->freeFsOp(opCtx); if (code != 0) { printf("create test file failed : %d\n", code); @@ -128,7 +126,7 @@ static void create_file_complete(void* cb_args, int code) { if (fio_thread->td->o.td_ddir & TD_DDIR_READ) { // mock file's data opCtx = fastfs->allocFsOp(); - WriteContext* writeCtx = new (opCtx->private_data) WriteContext(); + WriteContext *writeCtx = new (opCtx->private_data) WriteContext(); writeCtx->fd = fio_thread->fastfs->open(g_test_file, F_MULTI_WRITE); writeCtx->count = FastFS::fs_context.extentSize; writeCtx->append = true; @@ -144,19 +142,18 @@ static void create_file_complete(void* cb_args, int code) { } } -static void mount_complete(FastFS* fastfs, int code) { +static void mount_complete(FastFS *fastfs, int code) { if (code != 0) { printf("mount fastfs failed: %d\n", code); return; } - struct fastfs_fio_thread* fio_thread = - reinterpret_cast( - spdk_thread_get_ctx(spdk_get_thread())); - FastInode* inode = fastfs->status(g_test_file); - fs_op_context* opCtx = fastfs->allocFsOp(); + struct fastfs_fio_thread *fio_thread = + reinterpret_cast(spdk_thread_get_ctx(spdk_get_thread())); + FastInode *inode = fastfs->status(g_test_file); + fs_op_context *opCtx = fastfs->allocFsOp(); if (!inode) { // create test file - CreateContext* createCtx = new (opCtx->private_data) CreateContext(); + CreateContext *createCtx = new (opCtx->private_data) CreateContext(); createCtx->parentId = 0; createCtx->name = g_test_file + 1; createCtx->mode = 493; @@ -166,7 +163,7 @@ static void mount_complete(FastFS* fastfs, int code) { fastfs->create(*opCtx); } else { // file already exist if (fio_thread->td->o.td_ddir & TD_DDIR_WRITE) { - TruncateContext* truncateCtx = new (opCtx->private_data) TruncateContext(); + TruncateContext *truncateCtx = new (opCtx->private_data) TruncateContext(); truncateCtx->ino = inode->ino_; truncateCtx->size = 0; opCtx->callback = create_file_complete; @@ -178,7 +175,7 @@ static void mount_complete(FastFS* fastfs, int code) { } } -static void format_complete(FastFS* fastfs, int code) { +static void format_complete(FastFS *fastfs, int code) { if (code != 0) { printf("format fastfs failed: %d\n", code); exit(code); @@ -186,8 +183,7 @@ static void format_complete(FastFS* fastfs, int code) { fastfs->mount(mount_complete, 128, 128); } -static void fsbench_event_cb( - enum spdk_bdev_event_type type, struct spdk_bdev* bdev, void* ctx) { +static void fsbench_event_cb(enum spdk_bdev_event_type type, struct spdk_bdev *bdev, void *ctx) { printf("Unsupported bdev event: type %d\n", type); } @@ -196,14 +192,12 @@ static void bdev_init_done(int rc, void *cb_arg) { SPDK_ERRLOG("RUNTIME RPCs failed\n"); exit(-1); } - struct fastfs_fio_thread* fio_thread = - reinterpret_cast(cb_arg); - FastFS* fastfs = fio_thread->fastfs; - fs_context_t* fs_context = &FastFS::fs_context; + struct fastfs_fio_thread *fio_thread = reinterpret_cast(cb_arg); + FastFS *fastfs = fio_thread->fastfs; + fs_context_t *fs_context = &FastFS::fs_context; fs_context->bdev = NULL; fs_context->bdev_desc = NULL; - rc = spdk_bdev_open_ext(fs_context->bdev_name, true, fsbench_event_cb, NULL, - &fs_context->bdev_desc); + rc = spdk_bdev_open_ext(fs_context->bdev_name, true, fsbench_event_cb, NULL, &fs_context->bdev_desc); if (rc) { printf("Could not open bdev: %s\n", fs_context->bdev_name); exit(-1); @@ -228,8 +222,7 @@ static void bdev_subsystem_init_done(int rc, void *cb_arg) { exit(-1); } spdk_rpc_set_state(SPDK_RPC_RUNTIME); - spdk_subsystem_load_config( - g_json_data, g_config_file_size, bdev_init_done, cb_arg, true); + spdk_subsystem_load_config(g_json_data, g_config_file_size, bdev_init_done, cb_arg, true); } static void bdev_startup_done(int rc, void *cb_arg) { @@ -241,13 +234,11 @@ static void bdev_startup_done(int rc, void *cb_arg) { } static void bdev_init_start(void *arg) { - g_json_data = spdk_posix_file_load_from_name( - g_config_file, &g_config_file_size); - spdk_subsystem_load_config( - g_json_data, g_config_file_size, bdev_startup_done, arg, true); + g_json_data = spdk_posix_file_load_from_name(g_config_file, &g_config_file_size); + spdk_subsystem_load_config(g_json_data, g_config_file_size, bdev_startup_done, arg, true); } -static int start_reactor(thread_data* td) { +static int start_reactor(thread_data *td) { struct spdk_env_opts opts; spdk_env_opts_init(&opts); opts.name = "fastfs-fio"; @@ -257,26 +248,23 @@ static int start_reactor(thread_data* td) { return -1; } spdk_thread_lib_init(NULL, sizeof(struct fastfs_fio_thread)); - struct spdk_thread* thread = spdk_thread_create("fio_thread", NULL); + struct spdk_thread *thread = spdk_thread_create("fio_thread", NULL); if (!thread) { printf("failed to allocate thread\n"); return -2; } - struct fastfs_fio_thread* fio_thread = - reinterpret_cast(spdk_thread_get_ctx(thread)); + struct fastfs_fio_thread *fio_thread = reinterpret_cast(spdk_thread_get_ctx(thread)); fio_thread->td = td; fio_thread->thread = thread; g_test_file_size = td->o.size; - fio_thread->iocq = - (struct io_u**) calloc(td->o.iodepth, sizeof(struct io_u*)); - fio_thread->buffers = - (struct ByteBuffer**) calloc(td->o.iodepth, sizeof(struct ByteBuffer*)); + fio_thread->iocq = (struct io_u **)calloc(td->o.iodepth, sizeof(struct io_u *)); + fio_thread->buffers = (struct ByteBuffer **)calloc(td->o.iodepth, sizeof(struct ByteBuffer *)); fio_thread->count = 0; td->io_ops_data = fio_thread; spdk_set_thread(thread); - FastFS* fastfs = new FastFS(g_bdev_name); + FastFS *fastfs = new FastFS(g_bdev_name); fio_thread->fastfs = fastfs; spdk_thread_send_msg(fio_thread->thread, bdev_init_start, fio_thread); @@ -289,26 +277,26 @@ static int start_reactor(thread_data* td) { } static int fastfs_init(struct thread_data *td) { - struct fastfs_fio_options* fio_options = - reinterpret_cast(td->eo); + struct fastfs_fio_options *fio_options = reinterpret_cast(td->eo); g_config_file = fio_options->conf; g_bdev_name = fio_options->bdev; g_test_file = fio_options->file; - if (fio_options->format != NULL && - strcmp(fio_options->format, "true") == 0) { + if (fio_options->format != NULL && strcmp(fio_options->format, "true") == 0) { g_do_format = true; } else { g_do_format = false; } printf("spark conf file %s, bdev name %s, test file %s, do format %s\n", - g_config_file, g_bdev_name, g_test_file, fio_options->format); + g_config_file, + g_bdev_name, + g_test_file, + fio_options->format); return start_reactor(td); } static void fastfs_cleanup(struct thread_data *td) { - struct fastfs_fio_thread* fio_thread = - reinterpret_cast(td->io_ops_data); + struct fastfs_fio_thread *fio_thread = reinterpret_cast(td->io_ops_data); if (FastFS::fs_context.bdev_io_channel) { spdk_put_io_channel(FastFS::fs_context.bdev_io_channel); } @@ -323,13 +311,12 @@ static void fastfs_cleanup(struct thread_data *td) { spdk_env_fini(); } -static int fastfs_invalidate(struct thread_data*, struct fio_file*) { +static int fastfs_invalidate(struct thread_data *, struct fio_file *) { return 0; // nothing to do } static int fastfs_open(struct thread_data *td, struct fio_file *f) { - struct fastfs_fio_thread* fio_thread = - reinterpret_cast(td->io_ops_data); + struct fastfs_fio_thread *fio_thread = reinterpret_cast(td->io_ops_data); int fd = fio_thread->fastfs->open(g_test_file, 0); if (fd < 0) { printf("Failed to open file\n"); @@ -339,18 +326,17 @@ static int fastfs_open(struct thread_data *td, struct fio_file *f) { return 0; } -static void fsync_complete(void* cb_args, int) { - int* rc = reinterpret_cast(cb_args); +static void fsync_complete(void *cb_args, int) { + int *rc = reinterpret_cast(cb_args); *rc = 1; } static int fastfs_close(struct thread_data *td, struct fio_file *f) { - struct fastfs_fio_thread* fio_thread = - reinterpret_cast(td->io_ops_data); + struct fastfs_fio_thread *fio_thread = reinterpret_cast(td->io_ops_data); int rc = 0; // do fsync first - fs_op_context* opCtx = fio_thread->fastfs->allocFsOp(); - FSyncContext* fsyncCtx = new (opCtx->private_data) FSyncContext(); + fs_op_context *opCtx = fio_thread->fastfs->allocFsOp(); + FSyncContext *fsyncCtx = new (opCtx->private_data) FSyncContext(); fsyncCtx->fd = f->fd; opCtx->callback = fsync_complete; opCtx->cb_args = &rc; @@ -365,11 +351,10 @@ static int fastfs_close(struct thread_data *td, struct fio_file *f) { return rc; } -static void read_write_complete(void* arg, int code) { - struct io_u* io_u = reinterpret_cast(arg); - struct fastfs_fio_thread* fio_thread = - reinterpret_cast( - spdk_thread_get_ctx(spdk_get_thread())); +static void read_write_complete(void *arg, int code) { + struct io_u *io_u = reinterpret_cast(arg); + struct fastfs_fio_thread *fio_thread = + reinterpret_cast(spdk_thread_get_ctx(spdk_get_thread())); if (code != 0) { if (io_u->ddir == DDIR_WRITE) { printf("write file failed : %d\n", code); @@ -382,11 +367,10 @@ static void read_write_complete(void* arg, int code) { } static void write_file(struct thread_data *td, struct io_u *io_u) { - struct fastfs_fio_thread* fio_thread = - reinterpret_cast(td->io_ops_data); - FastFS* fastfs = fio_thread->fastfs; - fs_op_context* opCtx = reinterpret_cast(io_u->engine_data); - WriteContext* writeCtx = new (opCtx->private_data) WriteContext(); + struct fastfs_fio_thread *fio_thread = reinterpret_cast(td->io_ops_data); + FastFS *fastfs = fio_thread->fastfs; + fs_op_context *opCtx = reinterpret_cast(io_u->engine_data); + WriteContext *writeCtx = new (opCtx->private_data) WriteContext(); writeCtx->fd = io_u->file->fd; writeCtx->pwrite = true; @@ -402,11 +386,10 @@ static void write_file(struct thread_data *td, struct io_u *io_u) { } static void read_file(struct thread_data *td, struct io_u *io_u) { - struct fastfs_fio_thread* fio_thread = - reinterpret_cast(td->io_ops_data); - FastFS* fastfs = fio_thread->fastfs; - fs_op_context* opCtx = reinterpret_cast(io_u->engine_data); - ReadContext* readCtx = new (opCtx->private_data) ReadContext(); + struct fastfs_fio_thread *fio_thread = reinterpret_cast(td->io_ops_data); + FastFS *fastfs = fio_thread->fastfs; + fs_op_context *opCtx = reinterpret_cast(io_u->engine_data); + ReadContext *readCtx = new (opCtx->private_data) ReadContext(); readCtx->fd = io_u->file->fd; readCtx->pread = true; @@ -421,8 +404,7 @@ static void read_file(struct thread_data *td, struct io_u *io_u) { } static enum fio_q_status fastfs_queue(struct thread_data *td, struct io_u *io_u) { - struct fastfs_fio_thread* fio_thread = - reinterpret_cast(td->io_ops_data); + struct fastfs_fio_thread *fio_thread = reinterpret_cast(td->io_ops_data); if (fio_thread->writing) { return FIO_Q_BUSY; } @@ -441,10 +423,8 @@ static enum fio_q_status fastfs_queue(struct thread_data *td, struct io_u *io_u) } } -static int fastfs_getevents(struct thread_data *td, unsigned int min, - unsigned int max, const struct timespec *t) { - struct fastfs_fio_thread* fio_thread = - reinterpret_cast(td->io_ops_data); +static int fastfs_getevents(struct thread_data *td, unsigned int min, unsigned int max, const struct timespec *t) { + struct fastfs_fio_thread *fio_thread = reinterpret_cast(td->io_ops_data); while (fio_thread->count < fio_thread->reqs) { spdk_thread_poll(fio_thread->thread, 0, 0); } @@ -455,23 +435,20 @@ static int fastfs_getevents(struct thread_data *td, unsigned int min, return res; } -static struct io_u* fastfs_event(struct thread_data *td, int event) { - struct fastfs_fio_thread* fio_thread = - reinterpret_cast(td->io_ops_data); +static struct io_u *fastfs_event(struct thread_data *td, int event) { + struct fastfs_fio_thread *fio_thread = reinterpret_cast(td->io_ops_data); return fio_thread->iocq[event]; } -static int fastfs_io_u_init(struct thread_data* td, struct io_u *io_u) { - struct fastfs_fio_thread* fio_thread = - reinterpret_cast(td->io_ops_data); +static int fastfs_io_u_init(struct thread_data *td, struct io_u *io_u) { + struct fastfs_fio_thread *fio_thread = reinterpret_cast(td->io_ops_data); io_u->engine_data = fio_thread->fastfs->allocFsOp(); return 0; } static void fastfs_io_u_free(struct thread_data *td, struct io_u *io_u) { - struct fastfs_fio_thread* fio_thread = - reinterpret_cast(td->io_ops_data); - fs_op_context* opCtx = reinterpret_cast(io_u->engine_data); + struct fastfs_fio_thread *fio_thread = reinterpret_cast(td->io_ops_data); + fs_op_context *opCtx = reinterpret_cast(io_u->engine_data); if (opCtx) { fio_thread->fastfs->freeFsOp(opCtx); io_u->engine_data = NULL; @@ -479,10 +456,8 @@ static void fastfs_io_u_free(struct thread_data *td, struct io_u *io_u) { } static int fastfs_iomem_alloc(struct thread_data *td, size_t total_mem) { - struct fastfs_fio_thread* fio_thread = - reinterpret_cast(td->io_ops_data); - fio_thread->buff = new ByteBuffer( - total_mem, true, FastFS::fs_context.localNuma, 4096/*page_size*/); + struct fastfs_fio_thread *fio_thread = reinterpret_cast(td->io_ops_data); + fio_thread->buff = new ByteBuffer(total_mem, true, FastFS::fs_context.localNuma, 4096 /*page_size*/); if (!fio_thread->buff) { printf("failed to allocate ByteBuffer\n"); exit(-1); @@ -490,15 +465,13 @@ static int fastfs_iomem_alloc(struct thread_data *td, size_t total_mem) { td->orig_buffer = fio_thread->buff->getBuffer(); uint32_t max_bs = td_max_bs(td); for (uint32_t i = 0; i < td->o.iodepth; i++) { - fio_thread->buffers[i] = new ByteBuffer( - fio_thread->buff->p_buffer_ + max_bs * i, max_bs); + fio_thread->buffers[i] = new ByteBuffer(fio_thread->buff->p_buffer_ + max_bs * i, max_bs); } return 0; } static void fastfs_iomem_free(struct thread_data *td) { - struct fastfs_fio_thread* fio_thread = - reinterpret_cast(td->io_ops_data); + struct fastfs_fio_thread *fio_thread = reinterpret_cast(td->io_ops_data); if (fio_thread->buff) { delete fio_thread->buff; fio_thread->buff = nullptr; @@ -514,24 +487,23 @@ static int fastfs_setup(struct thread_data *td) { extern "C" { static struct ioengine_ops ioengine; void get_ioengine(struct ioengine_ops **ioengine_ptr) { - *ioengine_ptr = &ioengine; - ioengine.name = "fastfs", - ioengine.version = FIO_IOOPS_VERSION; - ioengine.flags = FIO_NODISKUTIL; - ioengine.setup = fastfs_setup; - ioengine.init = fastfs_init; - ioengine.invalidate = fastfs_invalidate; - ioengine.open_file = fastfs_open; - ioengine.queue = fastfs_queue; - ioengine.getevents = fastfs_getevents; - ioengine.event = fastfs_event; - ioengine.close_file = fastfs_close; - ioengine.cleanup = fastfs_cleanup; - ioengine.io_u_init = fastfs_io_u_init; - ioengine.io_u_free = fastfs_io_u_free; - ioengine.iomem_alloc = fastfs_iomem_alloc; - ioengine.iomem_free = fastfs_iomem_free; - ioengine.option_struct_size = sizeof(struct fastfs_fio_options); - ioengine.options = options; + *ioengine_ptr = &ioengine; + ioengine.name = "fastfs", ioengine.version = FIO_IOOPS_VERSION; + ioengine.flags = FIO_NODISKUTIL; + ioengine.setup = fastfs_setup; + ioengine.init = fastfs_init; + ioengine.invalidate = fastfs_invalidate; + ioengine.open_file = fastfs_open; + ioengine.queue = fastfs_queue; + ioengine.getevents = fastfs_getevents; + ioengine.event = fastfs_event; + ioengine.close_file = fastfs_close; + ioengine.cleanup = fastfs_cleanup; + ioengine.io_u_init = fastfs_io_u_init; + ioengine.io_u_free = fastfs_io_u_free; + ioengine.iomem_alloc = fastfs_iomem_alloc; + ioengine.iomem_free = fastfs_iomem_free; + ioengine.option_struct_size = sizeof(struct fastfs_fio_options); + ioengine.options = options; } } diff --git a/bench/mdtest/fs_bench.cpp b/bench/mdtest/fs_bench.cpp index ce4a1a3..5057a72 100644 --- a/bench/mdtest/fs_bench.cpp +++ b/bench/mdtest/fs_bench.cpp @@ -4,16 +4,16 @@ */ #include "core/FastFS.h" -#include "spdk/stdinc.h" -#include "spdk/thread.h" #include "spdk/bdev.h" +#include "spdk/bdev_zone.h" #include "spdk/env.h" #include "spdk/event.h" #include "spdk/log.h" +#include "spdk/stdinc.h" #include "spdk/string.h" -#include "spdk/bdev_zone.h" +#include "spdk/thread.h" -static const char* bdevName = "Malloc0"; +static const char *bdevName = "Malloc0"; static bool verify = false; static bool format = true; static bool direct = false; @@ -24,7 +24,7 @@ static int subdirs = 1024; static int files_per_dir = 64; struct fs_bench_context { - struct spdk_poller* poller; + struct spdk_poller *poller; struct timespec time_start; std::string path; int subdir_index; @@ -56,33 +56,35 @@ static void print_time_cost(int stage) { std::string msg; long counts = 0; switch (stage) { - case 0 : + case 0: msg = "[MKDIR] create %ld dirs use %ld ms, ops is %s.\n"; counts = subdirs; break; - case 1 : + case 1: msg = "[Create] create %ld files use %ld ms, ops is %s.\n"; counts = subdirs * files_per_dir; break; - case 2 : + case 2: msg = "[Stats] stats %ld files use %ld ms, ops is %s.\n"; counts = subdirs * files_per_dir; break; - case 3 : + case 3: msg = "[Write] write %ld files use %ld ms, ops is %s.\n"; counts = subdirs * files_per_dir; break; - case 4 : + case 4: msg = "[Read] read %ld files use %ld ms, ops is %s.\n"; counts = subdirs * files_per_dir; break; - case 5 : + case 5: msg = "[Remove] delete %ld files use %ld ms, ops is %s.\n"; counts = subdirs * files_per_dir; break; } - printf(msg.c_str(), counts, (long)(d.tv_sec * 1000 + d.tv_nsec / 1000000.0), - std::to_string(counts / ((double) d.tv_sec + d.tv_nsec / 1000000000.0)).c_str()); + printf(msg.c_str(), + counts, + (long)(d.tv_sec * 1000 + d.tv_nsec / 1000000.0), + std::to_string(counts / ((double)d.tv_sec + d.tv_nsec / 1000000000.0)).c_str()); } static void fsbench_usage(void) { @@ -109,19 +111,19 @@ static int fsbench_parse_arg(int ch, char *arg) { format = false; break; case 'S': - extentSize = (int) std::stoi(arg); + extentSize = (int)std::stoi(arg); break; case 'P': - parallelism = (int) std::stoi(arg); + parallelism = (int)std::stoi(arg); break; case 'D': direct = true; break; case 'N': - subdirs = (int) std::stoi(arg); + subdirs = (int)std::stoi(arg); break; case 'F': - files_per_dir = (int) std::stoi(arg); + files_per_dir = (int)std::stoi(arg); break; case 'w': readWrite = true; @@ -132,18 +134,18 @@ static int fsbench_parse_arg(int ch, char *arg) { return 0; } -static void delete_complete(void* cb_args, int code) { +static void delete_complete(void *cb_args, int code) { spdk_poller_unregister(&ctx.poller); - fs_op_context* opCtx = reinterpret_cast(cb_args); - FastFS* fastfs = opCtx->fastfs; + fs_op_context *opCtx = reinterpret_cast(cb_args); + FastFS *fastfs = opCtx->fastfs; fastfs->freeFsOp(opCtx); fastfs->unmount(); spdk_app_stop(code); } -static void delete_file_complete(void* cb_args, int code) { - fs_op_context* opCtx = reinterpret_cast(cb_args); - FastFS* fastfs = opCtx->fastfs; +static void delete_file_complete(void *cb_args, int code) { + fs_op_context *opCtx = reinterpret_cast(cb_args); + FastFS *fastfs = opCtx->fastfs; fastfs->freeFsOp(opCtx); if (code != 0) { SPDK_ERRLOG("delete file failed: %d\n", code); @@ -154,12 +156,12 @@ static void delete_file_complete(void* cb_args, int code) { ctx.inflights--; } -static void delete_file(FastFS* fastfs) { +static void delete_file(FastFS *fastfs) { ctx.path = "/mdtest/mdtest_tree." + std::to_string(ctx.subdir_index); - FastInode* parent = fastfs->status(ctx.path); + FastInode *parent = fastfs->status(ctx.path); ctx.path = "file." + std::to_string(ctx.subfile_index); - fs_op_context* opCtx = fastfs->allocFsOp(); - DeleteContext* delCtx = new (opCtx->private_data) DeleteContext(); + fs_op_context *opCtx = fastfs->allocFsOp(); + DeleteContext *delCtx = new (opCtx->private_data) DeleteContext(); delCtx->parentId = parent->ino_; delCtx->name = ctx.path.c_str(); opCtx->callback = delete_file_complete; @@ -168,10 +170,10 @@ static void delete_file(FastFS* fastfs) { ctx.subfile_index++; } -static void read_file_complete(void* cb_args, int code) { - fs_op_context* opCtx = reinterpret_cast(cb_args); - ReadContext* readCtx = reinterpret_cast(opCtx->private_data); - FastFS* fastfs = opCtx->fastfs; +static void read_file_complete(void *cb_args, int code) { + fs_op_context *opCtx = reinterpret_cast(cb_args); + ReadContext *readCtx = reinterpret_cast(opCtx->private_data); + FastFS *fastfs = opCtx->fastfs; if (verify) { if (direct) { readCtx->direct_buff->position(readCtx->direct_cursor); @@ -195,13 +197,12 @@ static void read_file_complete(void* cb_args, int code) { ctx.inflights--; } -static void read_file(FastFS* fastfs) { - ctx.path = "/mdtest/mdtest_tree." + std::to_string(ctx.subdir_index) - + "/file." + std::to_string(ctx.subfile_index); +static void read_file(FastFS *fastfs) { + ctx.path = "/mdtest/mdtest_tree." + std::to_string(ctx.subdir_index) + "/file." + std::to_string(ctx.subfile_index); int fd = fastfs->open(ctx.path, O_RDONLY); if (readWrite) { - fs_op_context* opCtx = fastfs->allocFsOp(); - ReadContext* readCtx = new (opCtx->private_data) ReadContext(); + fs_op_context *opCtx = fastfs->allocFsOp(); + ReadContext *readCtx = new (opCtx->private_data) ReadContext(); if (direct) { readCtx->dirctRead(fastfs, fd, 0, ctx.size); } else { @@ -221,10 +222,10 @@ static void read_file(FastFS* fastfs) { ctx.subfile_index++; } -static void write_file_complete(void* cb_args, int code) { - fs_op_context* opCtx = reinterpret_cast(cb_args); - WriteContext* writeCtx = reinterpret_cast(opCtx->private_data); - FastFS* fastfs = opCtx->fastfs; +static void write_file_complete(void *cb_args, int code) { + fs_op_context *opCtx = reinterpret_cast(cb_args); + WriteContext *writeCtx = reinterpret_cast(opCtx->private_data); + FastFS *fastfs = opCtx->fastfs; fastfs->close(writeCtx->fd); if (direct) { fastfs->freeBuffer(writeCtx->direct_buff); @@ -239,13 +240,12 @@ static void write_file_complete(void* cb_args, int code) { ctx.inflights--; } -static void write_file(FastFS* fastfs) { - ctx.path = "/mdtest/mdtest_tree." + std::to_string(ctx.subdir_index) - + "/file." + std::to_string(ctx.subfile_index); +static void write_file(FastFS *fastfs) { + ctx.path = "/mdtest/mdtest_tree." + std::to_string(ctx.subdir_index) + "/file." + std::to_string(ctx.subfile_index); int fd = fastfs->open(ctx.path, O_RDWR); // F_MULTI_WRITE if (readWrite) { - fs_op_context* opCtx = fastfs->allocFsOp(); - WriteContext* writeCtx = new (opCtx->private_data) WriteContext(); + fs_op_context *opCtx = fastfs->allocFsOp(); + WriteContext *writeCtx = new (opCtx->private_data) WriteContext(); if (direct) { writeCtx->dirctWrite(fastfs, fd, 0, ctx.size, ctx.data); } else { @@ -264,10 +264,9 @@ static void write_file(FastFS* fastfs) { ctx.subfile_index++; } -static void stat_file(FastFS* fastfs) { - ctx.path = "/mdtest/mdtest_tree." + std::to_string(ctx.subdir_index) - + "/file." + std::to_string(ctx.subfile_index); - FastInode* inode = fastfs->status(ctx.path); +static void stat_file(FastFS *fastfs) { + ctx.path = "/mdtest/mdtest_tree." + std::to_string(ctx.subdir_index) + "/file." + std::to_string(ctx.subfile_index); + FastInode *inode = fastfs->status(ctx.path); if (!inode) { SPDK_ERRLOG("stat file failed: %s\n", ctx.path.c_str()); spdk_poller_unregister(&ctx.poller); @@ -278,9 +277,9 @@ static void stat_file(FastFS* fastfs) { ctx.subfile_index++; } -static void create_file_complete(void* cb_args, int code) { - fs_op_context* opCtx = reinterpret_cast(cb_args); - FastFS* fastfs = opCtx->fastfs; +static void create_file_complete(void *cb_args, int code) { + fs_op_context *opCtx = reinterpret_cast(cb_args); + FastFS *fastfs = opCtx->fastfs; fastfs->freeFsOp(opCtx); if (code != 0) { SPDK_ERRLOG("create file failed: %d\n", code); @@ -291,12 +290,12 @@ static void create_file_complete(void* cb_args, int code) { ctx.inflights--; } -static void create_file(FastFS* fastfs) { +static void create_file(FastFS *fastfs) { ctx.path = "/mdtest/mdtest_tree." + std::to_string(ctx.subdir_index); - FastInode* parent = fastfs->status(ctx.path); + FastInode *parent = fastfs->status(ctx.path); ctx.path = "file." + std::to_string(ctx.subfile_index); - fs_op_context* opCtx = fastfs->allocFsOp(); - CreateContext* createCtx = new (opCtx->private_data) CreateContext(); + fs_op_context *opCtx = fastfs->allocFsOp(); + CreateContext *createCtx = new (opCtx->private_data) CreateContext(); createCtx->parentId = parent->ino_; createCtx->name = ctx.path.c_str(); createCtx->mode = 493; @@ -307,9 +306,9 @@ static void create_file(FastFS* fastfs) { ctx.subfile_index++; } -static void create_subdir_complete(void* cb_args, int code) { - fs_op_context* opCtx = reinterpret_cast(cb_args); - FastFS* fastfs = opCtx->fastfs; +static void create_subdir_complete(void *cb_args, int code) { + fs_op_context *opCtx = reinterpret_cast(cb_args); + FastFS *fastfs = opCtx->fastfs; fastfs->freeFsOp(opCtx); if (code != 0) { SPDK_ERRLOG("create sub dir failed: %d\n", code); @@ -320,12 +319,12 @@ static void create_subdir_complete(void* cb_args, int code) { ctx.inflights--; } -static void create_subdir(FastFS* fastfs) { +static void create_subdir(FastFS *fastfs) { ctx.path = "/mdtest"; - FastInode* parent = fastfs->status(ctx.path); + FastInode *parent = fastfs->status(ctx.path); ctx.path = "mdtest_tree." + std::to_string(ctx.subdir_index); - fs_op_context* opCtx = fastfs->allocFsOp(); - CreateContext* createCtx = new (opCtx->private_data) CreateContext(); + fs_op_context *opCtx = fastfs->allocFsOp(); + CreateContext *createCtx = new (opCtx->private_data) CreateContext(); createCtx->parentId = parent->ino_; createCtx->name = ctx.path.c_str(); createCtx->mode = 493; @@ -369,72 +368,72 @@ static int do_bench(void *arg) { if (ctx.inflights >= parallelism) { return SPDK_POLLER_IDLE; } - FastFS* fastfs = reinterpret_cast(arg); + FastFS *fastfs = reinterpret_cast(arg); switch (ctx.stage) { - case 0 : { // mkdir - if (ctx.subdir_index == subdirs) { - ctx.wait = true; - return SPDK_POLLER_IDLE; - } - ctx.inflights++; - create_subdir(fastfs); - return SPDK_POLLER_BUSY; - } - case 1 : { // create - if (!advanceProgress()) { - return SPDK_POLLER_IDLE; - } - create_file(fastfs); - return SPDK_POLLER_BUSY; + case 0: { // mkdir + if (ctx.subdir_index == subdirs) { + ctx.wait = true; + return SPDK_POLLER_IDLE; } - case 2 : { // stats - if (!advanceProgress()) { - return SPDK_POLLER_IDLE; - } - stat_file(fastfs); - return SPDK_POLLER_BUSY; + ctx.inflights++; + create_subdir(fastfs); + return SPDK_POLLER_BUSY; + } + case 1: { // create + if (!advanceProgress()) { + return SPDK_POLLER_IDLE; } - case 3 : { // write - if (!advanceProgress()) { - return SPDK_POLLER_IDLE; - } - write_file(fastfs); - return SPDK_POLLER_BUSY; + create_file(fastfs); + return SPDK_POLLER_BUSY; + } + case 2: { // stats + if (!advanceProgress()) { + return SPDK_POLLER_IDLE; } - case 4 : { // read - if (!advanceProgress()) { - return SPDK_POLLER_IDLE; - } - read_file(fastfs); - return SPDK_POLLER_BUSY; + stat_file(fastfs); + return SPDK_POLLER_BUSY; + } + case 3: { // write + if (!advanceProgress()) { + return SPDK_POLLER_IDLE; } - case 5 : { // read - if (!advanceProgress()) { - return SPDK_POLLER_IDLE; - } - delete_file(fastfs); - return SPDK_POLLER_BUSY; + write_file(fastfs); + return SPDK_POLLER_BUSY; + } + case 4: { // read + if (!advanceProgress()) { + return SPDK_POLLER_IDLE; } - case 6 : { // remove test dir - ctx.path = "mdtest"; - fs_op_context* opCtx = fastfs->allocFsOp(); - DeleteContext* delCtx = new (opCtx->private_data) DeleteContext(); - delCtx->parentId = 0; - delCtx->name = ctx.path.c_str(); - delCtx->recursive = true; - opCtx->callback = delete_complete; - opCtx->cb_args = opCtx; - fastfs->remove(*opCtx); - ctx.stage++; - return SPDK_POLLER_BUSY; + read_file(fastfs); + return SPDK_POLLER_BUSY; + } + case 5: { // read + if (!advanceProgress()) { + return SPDK_POLLER_IDLE; } + delete_file(fastfs); + return SPDK_POLLER_BUSY; + } + case 6: { // remove test dir + ctx.path = "mdtest"; + fs_op_context *opCtx = fastfs->allocFsOp(); + DeleteContext *delCtx = new (opCtx->private_data) DeleteContext(); + delCtx->parentId = 0; + delCtx->name = ctx.path.c_str(); + delCtx->recursive = true; + opCtx->callback = delete_complete; + opCtx->cb_args = opCtx; + fastfs->remove(*opCtx); + ctx.stage++; + return SPDK_POLLER_BUSY; + } } return SPDK_POLLER_IDLE; } -static void create_dir_complete(void* cb_args, int code) { - fs_op_context* opCtx = reinterpret_cast(cb_args); - FastFS* fastfs = opCtx->fastfs; +static void create_dir_complete(void *cb_args, int code) { + fs_op_context *opCtx = reinterpret_cast(cb_args); + FastFS *fastfs = opCtx->fastfs; fastfs->freeFsOp(opCtx); if (code != 0) { SPDK_ERRLOG("create test dir failed: %d\n", code); @@ -451,15 +450,14 @@ static void create_dir_complete(void* cb_args, int code) { ctx.poller = SPDK_POLLER_REGISTER(do_bench, fastfs, 0); } - -static void mount_complete(FastFS* fastfs, int code) { +static void mount_complete(FastFS *fastfs, int code) { if (code != 0) { SPDK_ERRLOG("mount fastfs failed: %d\n", code); return; } ctx.path = "mdtest"; - fs_op_context* opCtx = fastfs->allocFsOp(); - CreateContext* createCtx = new (opCtx->private_data) CreateContext(); + fs_op_context *opCtx = fastfs->allocFsOp(); + CreateContext *createCtx = new (opCtx->private_data) CreateContext(); createCtx->parentId = 0; // root createCtx->name = ctx.path.c_str(); createCtx->mode = 493; @@ -470,7 +468,7 @@ static void mount_complete(FastFS* fastfs, int code) { fastfs->create(*opCtx); } -static void format_complete(FastFS* fastfs, int code) { +static void format_complete(FastFS *fastfs, int code) { if (code != 0) { SPDK_ERRLOG("format fastfs failed: %d\n", code); return; @@ -479,23 +477,21 @@ static void format_complete(FastFS* fastfs, int code) { fastfs->mount(mount_complete); } -static void fsbench_event_cb(enum spdk_bdev_event_type type, struct spdk_bdev *bdev, - void *event_ctx) { +static void fsbench_event_cb(enum spdk_bdev_event_type type, struct spdk_bdev *bdev, void *event_ctx) { SPDK_NOTICELOG("Unsupported bdev event: type %d\n", type); } static void fsbench_start(void *arg) { SPDK_NOTICELOG("Successfully started the application\n"); - FastFS* fastfs = (FastFS*) arg; - fs_context_t* fs_context = &FastFS::fs_context; + FastFS *fastfs = (FastFS *)arg; + fs_context_t *fs_context = &FastFS::fs_context; int rc = 0; fs_context->bdev = NULL; fs_context->bdev_desc = NULL; SPDK_NOTICELOG("Opening the bdev %s\n", fs_context->bdev_name); - rc = spdk_bdev_open_ext(fs_context->bdev_name, true, fsbench_event_cb, NULL, - &fs_context->bdev_desc); + rc = spdk_bdev_open_ext(fs_context->bdev_name, true, fsbench_event_cb, NULL, &fs_context->bdev_desc); if (rc) { SPDK_ERRLOG("Could not open bdev: %s\n", fs_context->bdev_name); spdk_app_stop(-1); @@ -522,8 +518,7 @@ static void fsbench_start(void *arg) { int main(int argc, char **argv) { struct spdk_app_opts opts = {}; spdk_app_opts_init(&opts, sizeof(opts)); - int rc = spdk_app_parse_args( - argc, argv, &opts, "b:VMS:P:DN:F:w", NULL, fsbench_parse_arg, fsbench_usage); + int rc = spdk_app_parse_args(argc, argv, &opts, "b:VMS:P:DN:F:w", NULL, fsbench_parse_arg, fsbench_usage); if (rc != SPDK_APP_PARSE_ARGS_SUCCESS) { exit(rc); } diff --git a/bench/mdtest/kernel_bench.cpp b/bench/mdtest/kernel_bench.cpp index 2e78811..ba58ec0 100644 --- a/bench/mdtest/kernel_bench.cpp +++ b/bench/mdtest/kernel_bench.cpp @@ -2,13 +2,13 @@ * Copyright (C) 2025 chenxu14 * SPDX-License-Identifier: BSD-3-Clause */ -#include #include -#include -#include #include +#include +#include +#include -#define FILEMODE S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH +#define FILEMODE S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH static int subdirs = 1024; static int files_per_dir = 128; @@ -21,7 +21,7 @@ struct timespec time_end; struct fs_bench_context { uint32_t size = 4096; - void* data = nullptr; + void *data = nullptr; bool verify = false; int writeFlags = 0; fs_bench_context() { @@ -67,8 +67,9 @@ static void remove() { auto d = timespec_diff(time_start, time_end); long inodes = subdirs * files_per_dir + subdirs; printf("[Remove] delete %ld inodes use %ld ms, ops is %s.\n", - inodes, (long)(d.tv_sec * 1000 + d.tv_nsec / 1000000.0), - std::to_string(inodes / ((double) d.tv_sec + d.tv_nsec / 1000000000.0)).c_str()); + inodes, + (long)(d.tv_sec * 1000 + d.tv_nsec / 1000000.0), + std::to_string(inodes / ((double)d.tv_sec + d.tv_nsec / 1000000000.0)).c_str()); } static void read() { @@ -78,7 +79,7 @@ static void read() { std::string path = PATH_PREFIX + std::to_string(i) + "/file." + std::to_string(j); int fd = open(path.c_str(), O_RDONLY, FILEMODE); if (readWrite) { - void* read_buff = nullptr; + void *read_buff = nullptr; if (DIRECT_IO) { if (posix_memalign(&read_buff, getpagesize(), ctx.size) != 0) { printf("alloc read buffer failed.\n"); @@ -87,8 +88,7 @@ static void read() { } else { read_buff = malloc(ctx.size); } - if (read(fd, read_buff, ctx.size) < 0 || - (ctx.verify && memcmp(read_buff, ctx.data, ctx.size) != 0)) { + if (read(fd, read_buff, ctx.size) < 0 || (ctx.verify && memcmp(read_buff, ctx.data, ctx.size) != 0)) { printf("read file failed"); exit(-1); } @@ -103,8 +103,9 @@ static void read() { auto d = timespec_diff(time_start, time_end); long filesCnt = subdirs * files_per_dir; printf("[Read] read %ld files use %ld ms, ops is %s.\n", - filesCnt, (long)(d.tv_sec * 1000 + d.tv_nsec / 1000000.0), - std::to_string(filesCnt / ((double) d.tv_sec + d.tv_nsec / 1000000000.0)).c_str()); + filesCnt, + (long)(d.tv_sec * 1000 + d.tv_nsec / 1000000.0), + std::to_string(filesCnt / ((double)d.tv_sec + d.tv_nsec / 1000000000.0)).c_str()); } static void write() { @@ -126,8 +127,9 @@ static void write() { auto d = timespec_diff(time_start, time_end); long filesCnt = subdirs * files_per_dir; printf("[Write] write %ld files use %ld ms, ops is %s.\n", - filesCnt, (long)(d.tv_sec * 1000 + d.tv_nsec / 1000000.0), - std::to_string(filesCnt / ((double) d.tv_sec + d.tv_nsec / 1000000000.0)).c_str()); + filesCnt, + (long)(d.tv_sec * 1000 + d.tv_nsec / 1000000.0), + std::to_string(filesCnt / ((double)d.tv_sec + d.tv_nsec / 1000000000.0)).c_str()); } static void stat() { @@ -146,8 +148,9 @@ static void stat() { auto d = timespec_diff(time_start, time_end); long filesCnt = subdirs * files_per_dir; printf("[Stats] stats %ld files use %ld ms, ops is %s.\n", - filesCnt, (long)(d.tv_sec * 1000 + d.tv_nsec / 1000000.0), - std::to_string(filesCnt / ((double) d.tv_sec + d.tv_nsec / 1000000000.0)).c_str()); + filesCnt, + (long)(d.tv_sec * 1000 + d.tv_nsec / 1000000.0), + std::to_string(filesCnt / ((double)d.tv_sec + d.tv_nsec / 1000000000.0)).c_str()); } static void create() { @@ -171,8 +174,9 @@ static void create() { auto d = timespec_diff(time_start, time_end); long files = subdirs * files_per_dir; printf("[Creation] create %ld inodes use %ld ms, ops is %s.\n", - files, (long)(d.tv_sec * 1000 + d.tv_nsec / 1000000.0), - std::to_string(files / ((double) d.tv_sec + d.tv_nsec / 1000000000.0)).c_str()); + files, + (long)(d.tv_sec * 1000 + d.tv_nsec / 1000000.0), + std::to_string(files / ((double)d.tv_sec + d.tv_nsec / 1000000000.0)).c_str()); } int main(int argc, char **argv) { diff --git a/core/Allocator.h b/core/Allocator.h index cf6f802..dfe4bc2 100644 --- a/core/Allocator.h +++ b/core/Allocator.h @@ -6,109 +6,79 @@ #ifndef FASTFS_ALLOCATOR_H_ #define FASTFS_ALLOCATOR_H_ -#include "spdk/bit_pool.h" #include + +#include "spdk/bit_pool.h" #define EXTENT_SIZE 1048576 class BitsAllocator { - private: - struct spdk_bit_pool* pool; + private: + struct spdk_bit_pool *pool; uint32_t capacity_; - public: - BitsAllocator(uint32_t capacity) : capacity_(capacity) { - pool = spdk_bit_pool_create(capacity); - } + public: + BitsAllocator(uint32_t capacity) : capacity_(capacity) { pool = spdk_bit_pool_create(capacity); } - ~BitsAllocator() { - spdk_bit_pool_free(&pool); - } + ~BitsAllocator() { spdk_bit_pool_free(&pool); } - uint32_t allocate() { - return spdk_bit_pool_allocate_bit(pool); - } + uint32_t allocate() { return spdk_bit_pool_allocate_bit(pool); } void reserve(uint32_t index) { pool->lowest_free_bit = index; spdk_bit_pool_allocate_bit(pool); } - void release(uint32_t index) { - spdk_bit_pool_free_bit(pool, index); - } + void release(uint32_t index) { spdk_bit_pool_free_bit(pool, index); } - uint32_t getFree() const { - return spdk_bit_pool_count_free(pool); - } + uint32_t getFree() const { return spdk_bit_pool_count_free(pool); } - uint32_t getAllocated() const { - return spdk_bit_pool_count_allocated(pool); - } + uint32_t getAllocated() const { return spdk_bit_pool_count_allocated(pool); } }; class BlockAllocator { - private: - struct spdk_bit_pool* pool; + private: + struct spdk_bit_pool *pool; uint32_t capacity_; uint32_t blockSize_; uint32_t extentSize_; uint32_t extentBlocks_; - public: - BlockAllocator( - uint64_t blocks, uint32_t blockSize, uint32_t extentSize) - : blockSize_(blockSize), extentSize_(extentSize) { + public: + BlockAllocator(uint64_t blocks, uint32_t blockSize, uint32_t extentSize) + : blockSize_(blockSize), extentSize_(extentSize) { capacity_ = blocks * blockSize / extentSize; extentBlocks_ = extentSize / blockSize; pool = spdk_bit_pool_create(capacity_); reserve(0); // reserve first extent } - ~BlockAllocator() { - spdk_bit_pool_free(&pool); - } + ~BlockAllocator() { spdk_bit_pool_free(&pool); } - uint32_t allocate() { - return spdk_bit_pool_allocate_bit(pool); - } + uint32_t allocate() { return spdk_bit_pool_allocate_bit(pool); } void reserve(uint32_t index) { pool->lowest_free_bit = index; spdk_bit_pool_allocate_bit(pool); } - void release(uint32_t index) { - spdk_bit_pool_free_bit(pool, index); - } + void release(uint32_t index) { spdk_bit_pool_free_bit(pool, index); } - uint32_t getBlockSize() const { - return blockSize_; - } + uint32_t getBlockSize() const { return blockSize_; } - uint32_t getExtentSize() const { - return extentSize_; - } + uint32_t getExtentSize() const { return extentSize_; } - uint32_t getCapacity() const { - return capacity_; - } + uint32_t getCapacity() const { return capacity_; } - uint32_t getExtentBlocks() const { - return extentBlocks_; - } + uint32_t getExtentBlocks() const { return extentBlocks_; } - uint32_t getFree() const { - return spdk_bit_pool_count_free(pool); - } + uint32_t getFree() const { return spdk_bit_pool_count_free(pool); } - uint32_t getLowestFreeIndex() const { - return pool->lowest_free_bit; - } + uint32_t getLowestFreeIndex() const { return pool->lowest_free_bit; } }; template class MemAllocator { -public: + public: using value_type = T; template @@ -116,33 +86,25 @@ class MemAllocator { using other = MemAllocator; }; - explicit MemAllocator(uint32_t numa, uint32_t align) - : numa_id(numa), align_(align) {} + explicit MemAllocator(uint32_t numa, uint32_t align) : numa_id(numa), align_(align) {} - T* allocate(size_t n) { + T *allocate(size_t n) { size_t size = n * sizeof(T); - void* ptr = spdk_dma_zmalloc_socket(size, align_, NULL, numa_id); + void *ptr = spdk_dma_zmalloc_socket(size, align_, NULL, numa_id); if (!ptr) { throw std::bad_alloc(); } - return static_cast(ptr); + return static_cast(ptr); } - void deallocate(T* ptr, size_t n) noexcept { - spdk_dma_free(ptr); - } + void deallocate(T *ptr, size_t n) noexcept { spdk_dma_free(ptr); } template - MemAllocator(const MemAllocator& other) - : numa_id(other.numa_id), align_(other.align_) {} + MemAllocator(const MemAllocator &other) : numa_id(other.numa_id), align_(other.align_) {} - bool operator==(const MemAllocator& other) const { - return (numa_id == other.numa_id) && (align_ == other.align_); - } + bool operator==(const MemAllocator &other) const { return (numa_id == other.numa_id) && (align_ == other.align_); } - bool operator!=(const MemAllocator& other) const { - return !(*this == other); - } + bool operator!=(const MemAllocator &other) const { return !(*this == other); } uint32_t numa_id; uint32_t align_; diff --git a/core/ByteBuffer.h b/core/ByteBuffer.h index 8b71843..fa62626 100644 --- a/core/ByteBuffer.h +++ b/core/ByteBuffer.h @@ -6,27 +6,25 @@ #define FAST_FS_BYTE_BUFFER_H_ #include -#include #include -#include #include +#include +#include + #include "spdk/env.h" #define DEFAULT_BUFFER_SIZE 2048 class ByteBuffer { -public: - ByteBuffer(uint32_t capacity = DEFAULT_BUFFER_SIZE, - bool alloc = true, int32_t numa = 0, int32_t align = 1) - : mark_(0), limit_(capacity), position_(0), capacity_(capacity), alloc_(alloc) { + public: + ByteBuffer(uint32_t capacity = DEFAULT_BUFFER_SIZE, bool alloc = true, int32_t numa = 0, int32_t align = 1) + : mark_(0), limit_(capacity), position_(0), capacity_(capacity), alloc_(alloc) { if (alloc_) { - p_buffer_ = static_cast(spdk_dma_zmalloc_socket(capacity_, align, NULL, numa)); + p_buffer_ = static_cast(spdk_dma_zmalloc_socket(capacity_, align, NULL, numa)); } } - ByteBuffer(char* buffer, uint32_t size) : ByteBuffer(size, false) { - p_buffer_ = buffer; - } + ByteBuffer(char *buffer, uint32_t size) : ByteBuffer(size, false) { p_buffer_ = buffer; } ~ByteBuffer() { if (alloc_ && p_buffer_) { @@ -35,7 +33,7 @@ class ByteBuffer { p_buffer_ = nullptr; } - ByteBuffer& limit(uint32_t newLimit) { + ByteBuffer &limit(uint32_t newLimit) { if (position_ > newLimit) { position_ = newLimit; } @@ -43,25 +41,25 @@ class ByteBuffer { return *this; } - ByteBuffer& position(uint32_t newPosition) { + ByteBuffer &position(uint32_t newPosition) { position_ = newPosition; return *this; } - ByteBuffer& skip(uint32_t len) { + ByteBuffer &skip(uint32_t len) { position_ += len; return *this; } - ByteBuffer* duplicate() { - ByteBuffer* newBuffer = new ByteBuffer(capacity_, false); + ByteBuffer *duplicate() { + ByteBuffer *newBuffer = new ByteBuffer(capacity_, false); newBuffer->p_buffer_ = p_buffer_; newBuffer->limit(limit_); newBuffer->position(position_); return newBuffer; } - ByteBuffer* slice() { + ByteBuffer *slice() { ByteBuffer *newBuffer = new ByteBuffer(remaining(), false); newBuffer->p_buffer_ = p_buffer_ + position_; newBuffer->limit(remaining()); @@ -69,31 +67,31 @@ class ByteBuffer { return newBuffer; } - ByteBuffer& clear() { + ByteBuffer &clear() { position_ = 0; mark_ = 0; limit_ = capacity_; return *this; } - ByteBuffer& flip() { + ByteBuffer &flip() { limit_ = position_; position_ = 0; mark_ = 0; return *this; } - ByteBuffer& mark() { + ByteBuffer &mark() { mark_ = position_; return *this; } - ByteBuffer& reset() { + ByteBuffer &reset() { position_ = mark_; return *this; } - bool putBytes(const char* buf, uint32_t len) { + bool putBytes(const char *buf, uint32_t len) { if (!p_buffer_ || position_ + len > capacity_) { return false; } @@ -102,7 +100,7 @@ class ByteBuffer { return true; } - bool putBytes(uint32_t index, const char* buf, uint32_t len) { + bool putBytes(uint32_t index, const char *buf, uint32_t len) { if (!p_buffer_ || index + len > capacity_) { return false; } @@ -126,7 +124,7 @@ class ByteBuffer { return true; } - template + template bool write(T data) { uint32_t len = sizeof(data); if (!p_buffer_ || position_ + len > capacity_) { @@ -137,7 +135,7 @@ class ByteBuffer { return true; } - template + template bool pwrite(uint32_t index, T data) { uint32_t len = sizeof(data); if (!p_buffer_ || index + len > capacity_) { @@ -147,7 +145,7 @@ class ByteBuffer { return true; } - bool getBytes(char* buf, uint32_t len) { + bool getBytes(char *buf, uint32_t len) { if (!p_buffer_ || position_ + len > limit_) { return false; } @@ -156,7 +154,7 @@ class ByteBuffer { return true; } - bool getBytes(uint32_t index, char* buf, uint32_t len) const { + bool getBytes(uint32_t index, char *buf, uint32_t len) const { if (!p_buffer_ || index + len > limit_) { return false; } @@ -164,7 +162,7 @@ class ByteBuffer { return true; } - bool getByte(char& val) { + bool getByte(char &val) { if (!p_buffer_ || position_ >= limit_) { return false; } @@ -172,7 +170,7 @@ class ByteBuffer { return true; } - bool getByte(uint32_t index, char& val) const { + bool getByte(uint32_t index, char &val) const { if (!p_buffer_ || index >= limit_) { return false; } @@ -180,61 +178,49 @@ class ByteBuffer { return true; } - template - bool read(T& val) { + template + bool read(T &val) { uint32_t len = sizeof(val); if (!p_buffer_ || position_ + len > limit_) { return false; } - val = *((T *) &p_buffer_[position_]); + val = *((T *)&p_buffer_[position_]); position_ += len; return true; } - template - bool pread(uint32_t index, T& val) const { + template + bool pread(uint32_t index, T &val) const { uint32_t len = sizeof(val); if (!p_buffer_ || index + len > limit_) { return false; } - val = *((T *) &p_buffer_[index]); + val = *((T *)&p_buffer_[index]); return true; } - uint32_t remaining() const { - return position_ < limit_ ? limit_ - position_ : 0; - } + uint32_t remaining() const { return position_ < limit_ ? limit_ - position_ : 0; } - bool writable(int size) { - return position_ + size < limit_; - } + bool writable(int size) { return position_ + size < limit_; } - uint32_t capacity() const { - return capacity_; - } + uint32_t capacity() const { return capacity_; } - uint32_t position() const { - return position_; - } + uint32_t position() const { return position_; } - uint32_t limit() const { - return limit_; - } + uint32_t limit() const { return limit_; } - char* getBuffer() { - return p_buffer_ + position_; - } + char *getBuffer() { return p_buffer_ + position_; } - public: + public: uint64_t mark_; uint32_t limit_; uint32_t position_; uint32_t capacity_; - char* p_buffer_ = nullptr; - ByteBuffer* next = nullptr; - void* private_data = nullptr; + char *p_buffer_ = nullptr; + ByteBuffer *next = nullptr; + void *private_data = nullptr; bool alloc_; char padding[8]; // align cache line }; -#endif /* FAST_FS_BYTE_BUFFER_H_ */ +#endif /* FAST_FS_BYTE_BUFFER_H_ */ diff --git a/core/FastCkpt.cpp b/core/FastCkpt.cpp index 45e0c52..ff458f0 100644 --- a/core/FastCkpt.cpp +++ b/core/FastCkpt.cpp @@ -6,12 +6,12 @@ #include "FastFS.h" static constexpr int32_t kMinExtentSize = 21; -static void ckptINode(ByteBuffer* extentBuf); -static void ckptDentryAndExtents(ByteBuffer* extentBuf); +static void ckptINode(ByteBuffer *extentBuf); +static void ckptDentryAndExtents(ByteBuffer *extentBuf); -static void loadSuccess(ByteBuffer* extentBuf) { - FastFS* fastfs = reinterpret_cast(extentBuf->private_data); - fs_context_t& ctx = FastFS::fs_context; +static void loadSuccess(ByteBuffer *extentBuf) { + FastFS *fastfs = reinterpret_cast(extentBuf->private_data); + fs_context_t &ctx = FastFS::fs_context; fastfs->freeBuffer(extentBuf); SPDK_NOTICELOG("load checkpoint success, do replay journal now.\n"); // replay journal @@ -19,8 +19,7 @@ static void loadSuccess(ByteBuffer* extentBuf) { fastfs->journal->logReplay(); } -bool FastCkpt::parseExtent( - ByteBuffer* extentBuf, uint32_t& nextExtent, INodeCache& inodes) { +bool FastCkpt::parseExtent(ByteBuffer *extentBuf, uint32_t &nextExtent, INodeCache &inodes) { uint32_t numOps = 0; extentBuf->read(nextExtent); extentBuf->read(numOps); @@ -49,8 +48,7 @@ bool FastCkpt::parseExtent( if (extentBuf->read(ino) && ino < fs_context.maxInodes) { extentBuf->read(childCount); for (uint32_t j = 0; j < childCount; j++) { - if (extentBuf->read(childIno) && - childIno < fs_context.maxInodes) { + if (extentBuf->read(childIno) && childIno < fs_context.maxInodes) { inodes[ino].children_->insert(childIno); } else { SPDK_WARNLOG("child's inodeId overflow\n"); @@ -69,24 +67,22 @@ bool FastCkpt::parseExtent( return true; } -static void loadDentryAndExtents( - struct spdk_bdev_io* bdev_io, bool success, void *cb_arg) { +static void loadDentryAndExtents(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) { if (bdev_io) { spdk_bdev_free_io(bdev_io); } - ByteBuffer* extentBuf = reinterpret_cast(cb_arg); - FastFS* fastfs = reinterpret_cast(extentBuf->private_data); - fs_context_t& ctx = FastFS::fs_context; + ByteBuffer *extentBuf = reinterpret_cast(cb_arg); + FastFS *fastfs = reinterpret_cast(extentBuf->private_data); + fs_context_t &ctx = FastFS::fs_context; if (!success) { fastfs->freeBuffer(extentBuf); - ctx.callback(fastfs, -7/*load dentry failed*/); + ctx.callback(fastfs, -7 /*load dentry failed*/); return; } uint32_t nextExtent = 0; if (!fastfs->checkpoint->parseExtent(extentBuf, nextExtent, *fastfs->inodes)) { - SPDK_WARNLOG("extent %d's OP record can't parse successfully!\n", - fastfs->checkpoint->curExtent); + SPDK_WARNLOG("extent %d's OP record can't parse successfully!\n", fastfs->checkpoint->curExtent); fastfs->freeBuffer(extentBuf); FastFS::fs_context.callback(fastfs, -8); return; @@ -97,26 +93,29 @@ static void loadDentryAndExtents( fastfs->checkpoint->extents_.push_front(nextExtent); uint64_t bdevOffset = static_cast(nextExtent) << ctx.extentBits; extentBuf->clear(); - spdk_bdev_read(ctx.bdev_desc, ctx.bdev_io_channel, - extentBuf->p_buffer_, bdevOffset, ctx.extentSize, - loadDentryAndExtents, extentBuf); + spdk_bdev_read(ctx.bdev_desc, + ctx.bdev_io_channel, + extentBuf->p_buffer_, + bdevOffset, + ctx.extentSize, + loadDentryAndExtents, + extentBuf); return; } loadSuccess(extentBuf); } -static void loadInodes( - struct spdk_bdev_io* bdev_io, bool success, void *cb_arg) { +static void loadInodes(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) { if (bdev_io) { spdk_bdev_free_io(bdev_io); } - ByteBuffer* extentBuf = reinterpret_cast(cb_arg); - FastFS* fastfs = reinterpret_cast(extentBuf->private_data); - fs_context_t& ctx = FastFS::fs_context; + ByteBuffer *extentBuf = reinterpret_cast(cb_arg); + FastFS *fastfs = reinterpret_cast(extentBuf->private_data); + fs_context_t &ctx = FastFS::fs_context; if (!success) { fastfs->freeBuffer(extentBuf); - ctx.callback(fastfs, -3/*read extent failed*/); + ctx.callback(fastfs, -3 /*read extent failed*/); return; } @@ -130,7 +129,7 @@ static void loadInodes( bool head = false; for (uint32_t i = 0; i < numOps; i++) { extentBuf->read(opSize); - auto& inodeProto = fastfs->checkpoint->inodeProto; + auto &inodeProto = fastfs->checkpoint->inodeProto; if (inodeProto.deserialize(extentBuf)) { if (inodeProto.ino >= FastFS::fs_context.maxInodes) { SPDK_WARNLOG("No available inodes"); @@ -148,14 +147,12 @@ static void loadInodes( } FastFS::fs_context.inodeAllocator->reserve(inodeProto.ino); - FastInode& inode = (*fastfs->inodes)[inodeProto.ino]; - inode.create(inodeProto.ino, inodeProto.parent_id, - inodeProto.name, inodeProto.type); + FastInode &inode = (*fastfs->inodes)[inodeProto.ino]; + inode.create(inodeProto.ino, inodeProto.parent_id, inodeProto.name, inodeProto.type); inode.size_ = inodeProto.size; inode.mode_ = inodeProto.mode; } else { - SPDK_WARNLOG("extent %d's OP record can't parse successfully!\n", - fastfs->checkpoint->curExtent); + SPDK_WARNLOG("extent %d's OP record can't parse successfully!\n", fastfs->checkpoint->curExtent); fastfs->freeBuffer(extentBuf); ctx.callback(fastfs, -6); return; @@ -167,9 +164,8 @@ static void loadInodes( fastfs->checkpoint->extents_.push_front(nextExtent); uint64_t bdevOffset = static_cast(nextExtent) << ctx.extentBits; extentBuf->clear(); - spdk_bdev_read(ctx.bdev_desc, ctx.bdev_io_channel, - extentBuf->p_buffer_, bdevOffset, ctx.extentSize, - loadInodes, extentBuf); + spdk_bdev_read( + ctx.bdev_desc, ctx.bdev_io_channel, extentBuf->p_buffer_, bdevOffset, ctx.extentSize, loadInodes, extentBuf); return; } @@ -178,50 +174,57 @@ static void loadInodes( ctx.allocator->reserve(dentryLocation); fastfs->checkpoint->dentryLocation = dentryLocation; fastfs->checkpoint->extents_.push_front(dentryLocation); - uint64_t bdevOffset = - static_cast(dentryLocation) << ctx.extentBits; + uint64_t bdevOffset = static_cast(dentryLocation) << ctx.extentBits; extentBuf->clear(); - spdk_bdev_read(ctx.bdev_desc, ctx.bdev_io_channel, - extentBuf->p_buffer_, bdevOffset, ctx.extentSize, - loadDentryAndExtents, extentBuf); + spdk_bdev_read(ctx.bdev_desc, + ctx.bdev_io_channel, + extentBuf->p_buffer_, + bdevOffset, + ctx.extentSize, + loadDentryAndExtents, + extentBuf); } else { loadSuccess(extentBuf); } } void FastCkpt::loadImage() { - ByteBuffer* extentBuf = fs_context.fastfs->allocBuffer(); + ByteBuffer *extentBuf = fs_context.fastfs->allocBuffer(); extentBuf->private_data = fs_context.fastfs; inodesLocation = fs_context.superBlock.ckptInodesLoc; if (inodesLocation > 0) { fs_context.allocator->reserve(inodesLocation); extents_.push_front(inodesLocation); - uint64_t bdevOffset = - static_cast(inodesLocation) << fs_context.extentBits; - spdk_bdev_read(fs_context.bdev_desc, fs_context.bdev_io_channel, - extentBuf->p_buffer_, bdevOffset, fs_context.extentSize, - loadInodes, extentBuf); + uint64_t bdevOffset = static_cast(inodesLocation) << fs_context.extentBits; + spdk_bdev_read(fs_context.bdev_desc, + fs_context.bdev_io_channel, + extentBuf->p_buffer_, + bdevOffset, + fs_context.extentSize, + loadInodes, + extentBuf); } else { loadSuccess(extentBuf); } } -static void writeSuperBlockComplete( - struct spdk_bdev_io* bdev_io, bool success, void *cb_arg) { +static void writeSuperBlockComplete(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) { if (bdev_io) { spdk_bdev_free_io(bdev_io); } - ByteBuffer* extentBuf = reinterpret_cast(cb_arg); - FastFS* fastfs = reinterpret_cast(extentBuf->private_data); - FastCkpt& ckpt = *fastfs->checkpoint; + ByteBuffer *extentBuf = reinterpret_cast(cb_arg); + FastFS *fastfs = reinterpret_cast(extentBuf->private_data); + FastCkpt &ckpt = *fastfs->checkpoint; if (success) { SPDK_NOTICELOG("checkpoint finished, inodes begin at %d, " - "dentry begin at %d, inodes extents %d, dentry extents %d\n", - ckpt.inodesLocation, ckpt.dentryLocation, - ckpt.inodeExtents, ckpt.dentryExtents); + "dentry begin at %d, inodes extents %d, dentry extents %d\n", + ckpt.inodesLocation, + ckpt.dentryLocation, + ckpt.inodeExtents, + ckpt.dentryExtents); // release Journal's old extents - auto& journalExtents = fastfs->journal->extents_; + auto &journalExtents = fastfs->journal->extents_; int count = 0; for (auto it = fastfs->journal->cusor; it != journalExtents.end(); it++) { count++; @@ -238,10 +241,10 @@ static void writeSuperBlockComplete( ckpt.ckpt_cb(fastfs, success ? 0 : -4); } -static void writeSuperBlock(ByteBuffer* buffer) { - FastFS* fastfs = reinterpret_cast(buffer->private_data); - FastCkpt& ckpt = *fastfs->checkpoint; - fs_context_t& ctx = FastFS::fs_context; +static void writeSuperBlock(ByteBuffer *buffer) { + FastFS *fastfs = reinterpret_cast(buffer->private_data); + FastCkpt &ckpt = *fastfs->checkpoint; + fs_context_t &ctx = FastFS::fs_context; ctx.superBlock.ckptInodesLoc = ckpt.inodesLocation; ctx.superBlock.ckptDentryLoc = ckpt.dentryLocation; ctx.superBlock.lastTxid = fastfs->journal->txid; @@ -251,18 +254,17 @@ static void writeSuperBlock(ByteBuffer* buffer) { ctx.superBlock.journalSkipOps = fastfs->journal->num_ops; buffer->clear(); ctx.superBlock.serialize(buffer); - spdk_bdev_write(ctx.bdev_desc, ctx.bdev_io_channel, - buffer->p_buffer_, 0, ctx.blockSize, writeSuperBlockComplete, buffer); + spdk_bdev_write( + ctx.bdev_desc, ctx.bdev_io_channel, buffer->p_buffer_, 0, ctx.blockSize, writeSuperBlockComplete, buffer); } -static void ckptDentryComplete( - struct spdk_bdev_io* bdev_io, bool success, void *cb_arg) { +static void ckptDentryComplete(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) { if (bdev_io) { spdk_bdev_free_io(bdev_io); } - ByteBuffer* extentBuf = reinterpret_cast(cb_arg); - FastFS* fastfs = reinterpret_cast(extentBuf->private_data); - FastCkpt& ckpt = *fastfs->checkpoint; + ByteBuffer *extentBuf = reinterpret_cast(cb_arg); + FastFS *fastfs = reinterpret_cast(extentBuf->private_data); + FastCkpt &ckpt = *fastfs->checkpoint; if (success) { ckpt.curExtent = ckpt.nextExtent; @@ -275,9 +277,9 @@ static void ckptDentryComplete( } } -static void ckptDentryAndExtents(ByteBuffer* extentBuf) { - FastFS* fastfs = reinterpret_cast(extentBuf->private_data); - FastCkpt& ckpt = *fastfs->checkpoint; +static void ckptDentryAndExtents(ByteBuffer *extentBuf) { + FastFS *fastfs = reinterpret_cast(extentBuf->private_data); + FastCkpt &ckpt = *fastfs->checkpoint; // reset extent buffer extentBuf->clear(); @@ -286,9 +288,8 @@ static void ckptDentryAndExtents(ByteBuffer* extentBuf) { ckpt.nextExtent = 0; int count = 0; while (ckpt.cusor != fastfs->inodes->end()) { - auto& inode = *ckpt.cusor; - if (inode.status_ == 1 && inode.type_ == FASTFS_REGULAR_FILE && - inode.extents_->size() > 0) { + auto &inode = *ckpt.cusor; + if (inode.status_ == 1 && inode.type_ == FASTFS_REGULAR_FILE && inode.extents_->size() > 0) { if (ckpt.extentEnd) { ckpt.extentCusor = inode.extents_->begin(); ckpt.extentEnd = false; @@ -322,8 +323,7 @@ static void ckptDentryAndExtents(ByteBuffer* extentBuf) { } extentBuf->pwrite(extentBuf->mark_, childCount); ckpt.extentEnd = true; - } else if (inode.status_ == 1 && inode.type_ == FASTFS_DIR && - inode.children_->size() > 0) { + } else if (inode.status_ == 1 && inode.type_ == FASTFS_DIR && inode.children_->size() > 0) { if (ckpt.dentryEnd) { ckpt.dentryCusor = inode.children_->begin(); ckpt.dentryEnd = false; @@ -367,25 +367,24 @@ static void ckptDentryAndExtents(ByteBuffer* extentBuf) { } endLoop: - fs_context_t& ctx = FastFS::fs_context; + fs_context_t &ctx = FastFS::fs_context; if (count > 0) { extentBuf->pwrite(4, count); // update count uint64_t offset = static_cast(ckpt.curExtent) << ctx.extentBits; - spdk_bdev_write(ctx.bdev_desc, ctx.bdev_io_channel, extentBuf->p_buffer_, - offset, ctx.extentSize, ckptDentryComplete, extentBuf); + spdk_bdev_write( + ctx.bdev_desc, ctx.bdev_io_channel, extentBuf->p_buffer_, offset, ctx.extentSize, ckptDentryComplete, extentBuf); } else { writeSuperBlock(extentBuf); } } -static void ckptINodeComplete( - struct spdk_bdev_io* bdev_io, bool success, void *cb_arg) { +static void ckptINodeComplete(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) { if (bdev_io) { spdk_bdev_free_io(bdev_io); } - ByteBuffer* extentBuf = reinterpret_cast(cb_arg); - FastFS* fastfs = reinterpret_cast(extentBuf->private_data); - FastCkpt& ckpt = *fastfs->checkpoint; + ByteBuffer *extentBuf = reinterpret_cast(cb_arg); + FastFS *fastfs = reinterpret_cast(extentBuf->private_data); + FastCkpt &ckpt = *fastfs->checkpoint; if (success) { ckpt.curExtent = ckpt.nextExtent; ckptINode(extentBuf); @@ -397,9 +396,9 @@ static void ckptINodeComplete( } } -static void ckptINode(ByteBuffer* extentBuf) { - FastFS* fastfs = reinterpret_cast(extentBuf->private_data); - FastCkpt& ckpt = *fastfs->checkpoint; +static void ckptINode(ByteBuffer *extentBuf) { + FastFS *fastfs = reinterpret_cast(extentBuf->private_data); + FastCkpt &ckpt = *fastfs->checkpoint; // reset extent buffer extentBuf->clear(); extentBuf->write(0); // next extentId @@ -409,7 +408,7 @@ static void ckptINode(ByteBuffer* extentBuf) { // serialize INodes int count = 0; while (ckpt.cusor != fastfs->inodes->end()) { - auto& inode = *ckpt.cusor; + auto &inode = *ckpt.cusor; if (inode.status_ != 1) { ckpt.cusor++; continue; @@ -422,7 +421,7 @@ static void ckptINode(ByteBuffer* extentBuf) { ckpt.inodeProto.mode = inode.mode_; int size = INodeFile::kFixSize + ckpt.inodeProto.name.size(); - if (extentBuf->writable(size + 5/*1 type and 4 size*/)) { + if (extentBuf->writable(size + 5 /*1 type and 4 size*/)) { extentBuf->write(size); ckpt.inodeProto.serialize(extentBuf); } else { // extents full @@ -443,10 +442,10 @@ static void ckptINode(ByteBuffer* extentBuf) { } extentBuf->pwrite(4, count); // update count // write extent - fs_context_t& ctx = FastFS::fs_context; + fs_context_t &ctx = FastFS::fs_context; uint64_t offset = static_cast(ckpt.curExtent) << ctx.extentBits; - spdk_bdev_write(ctx.bdev_desc, ctx.bdev_io_channel, extentBuf->p_buffer_, - offset, ctx.extentSize, ckptINodeComplete, extentBuf); + spdk_bdev_write( + ctx.bdev_desc, ctx.bdev_io_channel, extentBuf->p_buffer_, offset, ctx.extentSize, ckptINodeComplete, extentBuf); } else { ckpt.cusor = fastfs->inodes->begin(); ckpt.dentryLocation = FastFS::fs_context.allocator->allocate(); @@ -460,7 +459,7 @@ void FastCkpt::checkpoint(fs_cb callback) { uint32_t count = fs_context.inodeAllocator->getAllocated(); SPDK_NOTICELOG("do checkpoint now, total inodes %d\n", count); ckpt_cb = callback; - ByteBuffer* extentBuf = fs_context.fastfs->allocBuffer(); + ByteBuffer *extentBuf = fs_context.fastfs->allocBuffer(); extentBuf->private_data = fs_context.fastfs; // notify FastJournal first fs_context.fastfs->journal->startCheckpoint(); diff --git a/core/FastFS.cpp b/core/FastFS.cpp index 2c993e2..a8f2f79 100644 --- a/core/FastFS.cpp +++ b/core/FastFS.cpp @@ -7,16 +7,14 @@ fs_context_t FastFS::fs_context; -FastFS::FastFS(const char* bdev) { - fs_context.bdev_name = bdev; -} +FastFS::FastFS(const char *bdev) { fs_context.bdev_name = bdev; } -void FSyncContext::serialize(ByteBuffer* buf) { +void FSyncContext::serialize(ByteBuffer *buf) { buf->write(file->inode_->ino_); buf->write(file->inode_->size_); if (dirtyExtents) { buf->write(dirtyExtents->size()); - for (auto& [index, extentInfo] : *dirtyExtents) { + for (auto &[index, extentInfo] : *dirtyExtents) { buf->write(index); buf->write(extentInfo.second); } @@ -25,29 +23,28 @@ void FSyncContext::serialize(ByteBuffer* buf) { } } -static void writeSuperBlockComplete( - struct spdk_bdev_io* bdev_io, bool success, void *cb_arg) { +static void writeSuperBlockComplete(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) { if (bdev_io) { spdk_bdev_free_io(bdev_io); } - ByteBuffer* buffer = reinterpret_cast(cb_arg); - FastFS* fastfs = reinterpret_cast(buffer->private_data); - auto& fs_context = FastFS::fs_context; + ByteBuffer *buffer = reinterpret_cast(cb_arg); + FastFS *fastfs = reinterpret_cast(buffer->private_data); + auto &fs_context = FastFS::fs_context; spdk_dma_free(buffer->p_buffer_); delete buffer; SPDK_NOTICELOG("format FastFS successfully, current epoch %d, extentSize %d\n", - fs_context.superBlock.epoch, fs_context.superBlock.extentSize); + fs_context.superBlock.epoch, + fs_context.superBlock.extentSize); fs_context.callback(fastfs, success ? 0 : -1); } -static void writeSuperBlock( - struct spdk_bdev_io* bdev_io, bool success, void *cb_arg) { +static void writeSuperBlock(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) { if (bdev_io) { spdk_bdev_free_io(bdev_io); } - ByteBuffer* buffer = reinterpret_cast(cb_arg); - fs_context_t& ctx = FastFS::fs_context; + ByteBuffer *buffer = reinterpret_cast(cb_arg); + fs_context_t &ctx = FastFS::fs_context; if (success && ctx.superBlock.deserialize(buffer)) { ctx.superBlock.epoch += 1; } else { @@ -64,8 +61,8 @@ static void writeSuperBlock( ctx.superBlock.version = 0; buffer->clear(); ctx.superBlock.serialize(buffer); - spdk_bdev_write(ctx.bdev_desc, ctx.bdev_io_channel, - buffer->p_buffer_, 0, ctx.blockSize, writeSuperBlockComplete, buffer); + spdk_bdev_write( + ctx.bdev_desc, ctx.bdev_io_channel, buffer->p_buffer_, 0, ctx.blockSize, writeSuperBlockComplete, buffer); } void FastFS::format(uint32_t extentSize, fs_cb callback, bool skipJournal) { @@ -84,22 +81,25 @@ void FastFS::format(uint32_t extentSize, fs_cb callback, bool skipJournal) { fs_context.localCore = spdk_env_get_current_core(); fs_context.localNuma = spdk_env_get_numa_id(fs_context.localCore); - char* addr = (char*) spdk_dma_zmalloc_socket( - fs_context.blockSize, fs_context.bufAlign, NULL, fs_context.localNuma); - ByteBuffer* buffer = new ByteBuffer(addr, fs_context.blockSize); + char *addr = (char *)spdk_dma_zmalloc_socket(fs_context.blockSize, fs_context.bufAlign, NULL, fs_context.localNuma); + ByteBuffer *buffer = new ByteBuffer(addr, fs_context.blockSize); buffer->private_data = this; - spdk_bdev_read(fs_context.bdev_desc, fs_context.bdev_io_channel, - buffer->p_buffer_, 0, fs_context.blockSize, writeSuperBlock, buffer); + spdk_bdev_read(fs_context.bdev_desc, + fs_context.bdev_io_channel, + buffer->p_buffer_, + 0, + fs_context.blockSize, + writeSuperBlock, + buffer); } -static void loadCheckpoint( - struct spdk_bdev_io* bdev_io, bool success, void *cb_arg) { +static void loadCheckpoint(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) { if (bdev_io) { spdk_bdev_free_io(bdev_io); } - ByteBuffer* buffer = reinterpret_cast(cb_arg); - FastFS* fastfs = reinterpret_cast(buffer->private_data); - fs_context_t& ctx = FastFS::fs_context; + ByteBuffer *buffer = reinterpret_cast(cb_arg); + FastFS *fastfs = reinterpret_cast(buffer->private_data); + fs_context_t &ctx = FastFS::fs_context; success = success && ctx.superBlock.deserialize(buffer); spdk_dma_free(buffer->p_buffer_); delete buffer; @@ -142,40 +142,42 @@ void FastFS::mount(fs_cb callback, uint32_t maxInodes, uint32_t maxFiles) { fs_context.fdAllocator->reserve(0); // stdin fs_context.fdAllocator->reserve(1); // stdout fs_context.fdAllocator->reserve(2); // stderr - files = new FileCache( - fs_context.maxFiles, MemAllocator(fs_context.localNuma, 64)); + files = new FileCache(fs_context.maxFiles, MemAllocator(fs_context.localNuma, 64)); fs_context.inodeAllocator = new BitsAllocator(fs_context.maxInodes); fs_context.inodeAllocator->reserve(0); // root - slots = new HashSlots( - fs_context.inodesMask + 1, MemAllocator(fs_context.localNuma, 64)); - inodes = new INodeCache( - fs_context.maxInodes, MemAllocator(fs_context.localNuma, 64)); + slots = new HashSlots(fs_context.inodesMask + 1, MemAllocator(fs_context.localNuma, 64)); + inodes = new INodeCache(fs_context.maxInodes, MemAllocator(fs_context.localNuma, 64)); root = &(*inodes)[0]; root->create(0, 0, "root", FASTFS_DIR); // read super block - char* addr = (char*) spdk_dma_zmalloc_socket( - fs_context.blockSize, fs_context.bufAlign, NULL, fs_context.localNuma); - ByteBuffer* buffer = new ByteBuffer(addr, fs_context.blockSize); + char *addr = (char *)spdk_dma_zmalloc_socket(fs_context.blockSize, fs_context.bufAlign, NULL, fs_context.localNuma); + ByteBuffer *buffer = new ByteBuffer(addr, fs_context.blockSize); buffer->private_data = this; - spdk_bdev_read(fs_context.bdev_desc, fs_context.bdev_io_channel, - buffer->p_buffer_, 0, fs_context.blockSize, loadCheckpoint, buffer); + spdk_bdev_read(fs_context.bdev_desc, + fs_context.bdev_io_channel, + buffer->p_buffer_, + 0, + fs_context.blockSize, + loadCheckpoint, + buffer); } void FastFS::dumpInfo() { journal->dumpInfo(); SPDK_NOTICELOG("inodes %d, free extents %d, first slot %d\n", - fs_context.inodeAllocator->getAllocated(), - fs_context.allocator->getFree(), fs_context.allocator->getLowestFreeIndex()); + fs_context.inodeAllocator->getAllocated(), + fs_context.allocator->getFree(), + fs_context.allocator->getLowestFreeIndex()); } void FastFS::unmount() { dumpInfo(); - for (auto& file : *files) { + for (auto &file : *files) { file.close(); } - for (auto& inode : *inodes) { + for (auto &inode : *inodes) { inode.unlink(); } @@ -214,15 +216,14 @@ void FastFS::unmount() { } void FastFS::initObjPool(int poolSize) { - fs_ops = new std::vector>( - MemAllocator(fs_context.localNuma, 64)); - buffers = new std::vector>( - MemAllocator(fs_context.localNuma, 64)); + fs_ops = + new std::vector>(MemAllocator(fs_context.localNuma, 64)); + buffers = new std::vector>(MemAllocator(fs_context.localNuma, 64)); fs_ops->reserve(poolSize); buffers->reserve(poolSize); for (int i = 0; i < poolSize; i++) { - char* buffer = (char*) spdk_dma_zmalloc_socket( - fs_context.extentSize, fs_context.bufAlign, NULL, fs_context.localNuma); + char *buffer = + (char *)spdk_dma_zmalloc_socket(fs_context.extentSize, fs_context.bufAlign, NULL, fs_context.localNuma); buffers->emplace_back(buffer, fs_context.extentSize); } for (int i = 0; i < poolSize - 1; i++) { @@ -237,36 +238,35 @@ void FastFS::initObjPool(int poolSize) { buf_head = &(*buffers)[0]; } -fs_op_context* FastFS::allocFsOp() { - fs_op_context* res = op_head; +fs_op_context *FastFS::allocFsOp() { + fs_op_context *res = op_head; if (op_head) { op_head = op_head->next; } return res; } -void FastFS::freeFsOp(fs_op_context* fs_op) { +void FastFS::freeFsOp(fs_op_context *fs_op) { fs_op->next = op_head; op_head = fs_op; } // TODO chenxu14 consider NULL -ByteBuffer* FastFS::allocBuffer() { - ByteBuffer* res = buf_head; +ByteBuffer *FastFS::allocBuffer() { + ByteBuffer *res = buf_head; if (buf_head) { buf_head = buf_head->next; } return res; } -ByteBuffer* FastFS::allocReadBuffer(uint64_t offset, uint32_t len) { +ByteBuffer *FastFS::allocReadBuffer(uint64_t offset, uint32_t len) { uint64_t extentOffset = offset & fs_context.extentMask; uint64_t blockOffset = extentOffset & fs_context.blockMask; uint64_t startOffset = extentOffset - blockOffset; - uint64_t endOffset = - (extentOffset + len + fs_context.blockMask) & ~(fs_context.blockMask); + uint64_t endOffset = (extentOffset + len + fs_context.blockMask) & ~(fs_context.blockMask); uint32_t capacity = endOffset - startOffset; - ByteBuffer* buff = nullptr; + ByteBuffer *buff = nullptr; if (capacity <= fs_context.extentSize) { buff = allocBuffer(); } else { @@ -276,9 +276,8 @@ ByteBuffer* FastFS::allocReadBuffer(uint64_t offset, uint32_t len) { return buff; } -ByteBuffer* FastFS::allocWriteBuffer(uint32_t len) { - uint32_t capacity = - ((len + fs_context.blockMask) & ~(fs_context.blockMask)); +ByteBuffer *FastFS::allocWriteBuffer(uint32_t len) { + uint32_t capacity = ((len + fs_context.blockMask) & ~(fs_context.blockMask)); if (capacity <= fs_context.extentSize) { return allocBuffer(); } else { @@ -286,7 +285,7 @@ ByteBuffer* FastFS::allocWriteBuffer(uint32_t len) { } } -void FastFS::freeBuffer(ByteBuffer* buffer) { +void FastFS::freeBuffer(ByteBuffer *buffer) { if (buffer->alloc_) { delete buffer; return; diff --git a/core/FastFS.h b/core/FastFS.h index 1f6d710..9de63b2 100644 --- a/core/FastFS.h +++ b/core/FastFS.h @@ -26,14 +26,14 @@ using INodeCache = std::vector>; using ExtentMap = std::unordered_map>; struct fs_context_t { - const char* bdev_name; - struct spdk_bdev* bdev = nullptr; - struct spdk_bdev_desc* bdev_desc = nullptr; - struct spdk_io_channel* bdev_io_channel = nullptr; - BlockAllocator* allocator = nullptr; - BitsAllocator* fdAllocator = nullptr; - BitsAllocator* inodeAllocator = nullptr; - FastFS* fastfs; + const char *bdev_name; + struct spdk_bdev *bdev = nullptr; + struct spdk_bdev_desc *bdev_desc = nullptr; + struct spdk_io_channel *bdev_io_channel = nullptr; + BlockAllocator *allocator = nullptr; + BitsAllocator *fdAllocator = nullptr; + BitsAllocator *inodeAllocator = nullptr; + FastFS *fastfs; uint64_t blocks; uint32_t blockSize; uint32_t blockBits; @@ -53,15 +53,15 @@ struct fs_context_t { }; struct fs_op_context { - fs_op_context* next; + fs_op_context *next; op_cb callback; - void* cb_args; - FastFS* fastfs; + void *cb_args; + FastFS *fastfs; char private_data[96]; // align 128 bytes }; class WriteExtent { - public : + public: uint64_t offset; // bdev offset uint32_t len; uint32_t bufOff; @@ -70,11 +70,11 @@ class WriteExtent { uint32_t extentId; uint32_t extentOff; uint32_t newId; - fs_op_context* op_ctx; + fs_op_context *op_ctx; }; class WriteContext { - public : + public: uint32_t fd; uint32_t count; uint64_t offset; @@ -82,71 +82,68 @@ class WriteContext { bool append; bool direct; bool success; - FastFile* file; - ByteBuffer* direct_buff; - const char* write_buff; + FastFile *file; + ByteBuffer *direct_buff; + const char *write_buff; // internal use uint32_t writingSize; uint32_t writedExtents; // clear carefully to avoid memory leak std::list writeExtents; - public : + + public: WriteContext() : pwrite(false), append(false), direct(false), success(true) {} - void reset(FastFile* f, uint64_t off); - void dirctWrite( - FastFS* fs, int handle, uint64_t off, uint32_t len, const char* data); - uint32_t remainingSize() { - return count - writingSize; - } + void reset(FastFile *f, uint64_t off); + void dirctWrite(FastFS *fs, int handle, uint64_t off, uint32_t len, const char *data); + uint32_t remainingSize() { return count - writingSize; } }; class FSyncContext { - public : + public: uint32_t fd; - FastFile* file; - ExtentMap* dirtyExtents; - public : - void serialize(ByteBuffer* buf); + FastFile *file; + ExtentMap *dirtyExtents; + + public: + void serialize(ByteBuffer *buf); }; class ReadContext { - public : + public: uint32_t fd; uint32_t count; uint64_t offset; bool pread; bool direct; bool success; - FastFile* file; - char* read_buff; - ByteBuffer* direct_buff; + FastFile *file; + char *read_buff; + ByteBuffer *direct_buff; uint32_t direct_cursor; // internal use uint32_t readingSize; uint32_t extentsToRead; uint32_t extentsReaded; - public : - void reset(FastFile* f); - void dirctRead(FastFS* fs, int handle, uint64_t off, uint32_t len); - uint32_t remainingSize() { - return count - readingSize; - } + public: + void reset(FastFile *f); + void dirctRead(FastFS *fs, int handle, uint64_t off, uint32_t len); + uint32_t remainingSize() { return count - readingSize; } }; class FastFile { - public: + public: uint32_t flags_; uint64_t pos_; - FastInode* inode_; - ByteBuffer* tail_block; + FastInode *inode_; + ByteBuffer *tail_block; - public: + public: FastFile() : flags_(0), pos_(0), inode_(nullptr), tail_block(nullptr) {} - void open(uint32_t flags, FastInode* inode); + void open(uint32_t flags, FastInode *inode); void close(); - ByteBuffer* getTailBlock(); + ByteBuffer *getTailBlock(); void clearTailBlock() { if (tail_block) { @@ -156,82 +153,81 @@ class FastFile { }; class FastFS { - public: + public: static fs_context_t fs_context; bool ready = false; - HashSlots* slots = nullptr; - INodeCache* inodes = nullptr; - FileCache* files = nullptr; - std::vector>* fs_ops = nullptr; - std::vector>* buffers = nullptr; - fs_op_context* op_head = nullptr; - ByteBuffer* buf_head = nullptr; - FastJournal* journal = nullptr; - FastCkpt* checkpoint = nullptr; - FastInode* root = nullptr; - - public: - FastFS(const char* bdev); + HashSlots *slots = nullptr; + INodeCache *inodes = nullptr; + FileCache *files = nullptr; + std::vector> *fs_ops = nullptr; + std::vector> *buffers = nullptr; + fs_op_context *op_head = nullptr; + ByteBuffer *buf_head = nullptr; + FastJournal *journal = nullptr; + FastCkpt *checkpoint = nullptr; + FastInode *root = nullptr; + + public: + FastFS(const char *bdev); void format(uint32_t extentSize, fs_cb callback, bool skipJournal = false); void mount(fs_cb callback, uint32_t maxInodes = 2097152, uint32_t maxFiles = 65536); void unmount(); - FastInode* lookup(uint32_t parentId, std::string_view name) const; + FastInode *lookup(uint32_t parentId, std::string_view name) const; /** * lookup for write * pre, target inode's previous PTR, when hash conflict happen * ino, target inode's ino * return true when pre is HEAD */ - bool lookup(uint32_t parentId, std::string_view name, - uint32_t& pre, uint32_t& ino) const; - FastInode* status(const std::string& path) const; + bool lookup(uint32_t parentId, std::string_view name, uint32_t &pre, uint32_t &ino) const; + FastInode *status(const std::string &path) const; int open(uint32_t ino, uint32_t flags); - int open(const std::string& path, uint32_t flags); + int open(const std::string &path, uint32_t flags); int close(uint32_t fd); - int applyCreate(CreateContext* createCtx); - void create(fs_op_context& ctx); + int applyCreate(CreateContext *createCtx); + void create(fs_op_context &ctx); // mkdir -p - void createRecursive(const std::string& path, op_cb callback, void* args); + void createRecursive(const std::string &path, op_cb callback, void *args); - int applyTruncate(TruncateContext* truncateCtx); - void truncate(fs_op_context& ctx); + int applyTruncate(TruncateContext *truncateCtx); + void truncate(fs_op_context &ctx); - int applyRemove(DeleteContext* delCtx); - void remove(fs_op_context& ctx); + int applyRemove(DeleteContext *delCtx); + void remove(fs_op_context &ctx); - int applyRename(RenameContext* renameCtx); - void rename(fs_op_context& ctx); + int applyRename(RenameContext *renameCtx); + void rename(fs_op_context &ctx); - void writeComplete(fs_op_context* ctx); - void write(fs_op_context& ctx); - void fsync(fs_op_context& ctx); + void writeComplete(fs_op_context *ctx); + void write(fs_op_context &ctx); + void fsync(fs_op_context &ctx); int64_t seek(uint32_t fd, uint64_t offset, int whence); - void read(fs_op_context& ctx); + void read(fs_op_context &ctx); void initObjPool(int poolSize); - fs_op_context* allocFsOp(); - void freeFsOp(fs_op_context* fs_op); + fs_op_context *allocFsOp(); + void freeFsOp(fs_op_context *fs_op); - ByteBuffer* allocBuffer(); - ByteBuffer* allocReadBuffer(uint64_t offset, uint32_t len); - ByteBuffer* allocWriteBuffer(uint32_t len); - void freeBuffer(ByteBuffer* buffer); + ByteBuffer *allocBuffer(); + ByteBuffer *allocReadBuffer(uint64_t offset, uint32_t len); + ByteBuffer *allocWriteBuffer(uint32_t len); + void freeBuffer(ByteBuffer *buffer); void dumpInfo(); - private: - void removeRecursive(FastInode* target); + private: + void removeRecursive(FastInode *target); inline uint32_t hashSlot(uint32_t parentId, std::string_view name) const; }; class FastInode { - public: + public: uint32_t parentId_; uint32_t next_; uint32_t ino_; @@ -241,25 +237,25 @@ class FastInode { FileType type_; uint64_t size_; union { - std::vector* extents_; - std::set* children_; + std::vector *extents_; + std::set *children_; }; - ExtentMap* dirtyExtents; + ExtentMap *dirtyExtents; std::string name_; - public: + + public: FastInode() : next_(0), refCnts_(0), status_(0), dirtyExtents(nullptr) {} - void create( - uint32_t ino, uint32_t parentId, std::string_view name, FileType type); + void create(uint32_t ino, uint32_t parentId, std::string_view name, FileType type); void unlink(); - bool getExtent(uint64_t offset, uint32_t& index, uint32_t& extentId); + bool getExtent(uint64_t offset, uint32_t &index, uint32_t &extentId); }; class FastCkpt { - public: - const fs_context_t& fs_context; + public: + const fs_context_t &fs_context; INodeCache::const_iterator cusor; std::set::const_iterator dentryCusor; std::vector::const_iterator extentCusor; @@ -275,16 +271,15 @@ class FastCkpt { INodeFile inodeProto; fs_cb ckpt_cb = nullptr; - public: - FastCkpt(const fs_context_t& context) : fs_context(context) {} + public: + FastCkpt(const fs_context_t &context) : fs_context(context) {} ~FastCkpt() {} void loadImage(); void checkpoint(fs_cb callback); - bool parseExtent( - ByteBuffer* extentBuf, uint32_t& nextExtent, INodeCache& inodes); + bool parseExtent(ByteBuffer *extentBuf, uint32_t &nextExtent, INodeCache &inodes); inline int releaseExtents() { int count = 0; - for (auto& extentId : extents_) { + for (auto &extentId : extents_) { count++; fs_context.allocator->release(extentId); } diff --git a/core/FastInode.cpp b/core/FastInode.cpp index 7a15850..14c963c 100644 --- a/core/FastInode.cpp +++ b/core/FastInode.cpp @@ -6,7 +6,7 @@ #include "FastFS.h" #include "xxh3.h" -void FastFile::open(uint32_t flags, FastInode* inode) { +void FastFile::open(uint32_t flags, FastInode *inode) { this->flags_ = flags; this->inode_ = inode; this->tail_block = nullptr; @@ -26,8 +26,7 @@ void FastFile::close() { } } -void FastInode::create(uint32_t ino, uint32_t parentId, - std::string_view name, FileType type) { +void FastInode::create(uint32_t ino, uint32_t parentId, std::string_view name, FileType type) { this->ino_ = ino; this->parentId_ = parentId; this->next_ = 0; @@ -46,7 +45,7 @@ void FastInode::create(uint32_t ino, uint32_t parentId, void FastInode::unlink() { if (--refCnts_ == 0) { if (type_ == FASTFS_REGULAR_FILE) { - for (auto& extentId : *extents_) { + for (auto &extentId : *extents_) { if (extentId != UINT32_MAX) { FastFS::fs_context.allocator->release(extentId); } @@ -55,7 +54,7 @@ void FastInode::unlink() { if (dirtyExtents) { // clear dirty extents - for (auto& [index, extentInfo] : *dirtyExtents) { + for (auto &[index, extentInfo] : *dirtyExtents) { if (extentInfo.first != UINT32_MAX) { FastFS::fs_context.allocator->release(extentInfo.first); } @@ -71,7 +70,7 @@ void FastInode::unlink() { } } -bool FastInode::getExtent(uint64_t offset, uint32_t& index, uint32_t& extentId) { +bool FastInode::getExtent(uint64_t offset, uint32_t &index, uint32_t &extentId) { index = static_cast(offset >> FastFS::fs_context.extentBits); if (index < extents_->size()) { extentId = (*extents_)[index]; @@ -87,10 +86,10 @@ uint32_t FastFS::hashSlot(uint32_t parentId, std::string_view name) const { return (h1 ^ (h2 << 1)) & fs_context.inodesMask; } -FastInode* FastFS::lookup(uint32_t parentId, std::string_view name) const { +FastInode *FastFS::lookup(uint32_t parentId, std::string_view name) const { uint32_t ino = (*slots)[hashSlot(parentId, name)]; while (ino) { - FastInode& inode = (*inodes)[ino]; + FastInode &inode = (*inodes)[ino]; if ((parentId == inode.parentId_) && (name == inode.name_)) { return &inode; } @@ -99,12 +98,11 @@ FastInode* FastFS::lookup(uint32_t parentId, std::string_view name) const { return nullptr; } -bool FastFS::lookup(uint32_t parentId, std::string_view name, - uint32_t& pre, uint32_t& ino) const { +bool FastFS::lookup(uint32_t parentId, std::string_view name, uint32_t &pre, uint32_t &ino) const { bool head = true; pre = hashSlot(parentId, name); uint32_t inodeId = (*slots)[pre]; - FastInode* inode = nullptr; + FastInode *inode = nullptr; while (inodeId) { inode = &(*inodes)[inodeId]; if ((parentId == inode->parentId_) && (name == inode->name_)) { @@ -118,8 +116,8 @@ bool FastFS::lookup(uint32_t parentId, std::string_view name, return head; } -FastInode* FastFS::status(const std::string& path) const { - FastInode* cusor = root; +FastInode *FastFS::status(const std::string &path) const { + FastInode *cusor = root; size_t start = 1; size_t i = 1; for (; i < path.size(); i++) { @@ -149,8 +147,8 @@ int FastFS::open(uint32_t ino, uint32_t flags) { return fd; } -int FastFS::open(const std::string& path, uint32_t flags) { - FastInode* inode = status(path); +int FastFS::open(const std::string &path, uint32_t flags) { + FastInode *inode = status(path); if (!inode) { return -1; } @@ -176,7 +174,7 @@ int FastFS::close(uint32_t fd) { int64_t FastFS::seek(uint32_t fd, uint64_t offset, int whence) { if (fd < fs_context.maxFiles) { - FastFile& file = (*files)[fd]; + FastFile &file = (*files)[fd]; uint64_t pos = offset; // SEEK_SET if (whence == SEEK_CUR) { pos = file.pos_ + offset; @@ -189,20 +187,20 @@ int64_t FastFS::seek(uint32_t fd, uint64_t offset, int whence) { return -1; } -static void writeJournalComplete(void* cb_args, int code) { - fs_op_context* ctx = reinterpret_cast(cb_args); +static void writeJournalComplete(void *cb_args, int code) { + fs_op_context *ctx = reinterpret_cast(cb_args); ctx->fastfs->journal->freeEditOp(); ctx->callback(ctx->cb_args, code); } -int FastFS::applyCreate(CreateContext* createCtx) { +int FastFS::applyCreate(CreateContext *createCtx) { if (!fs_context.inodeAllocator->getFree()) { SPDK_WARNLOG("no free INode.\n"); return -1; } uint32_t parentId = createCtx->parentId; - FastInode& parent = (*inodes)[parentId]; + FastInode &parent = (*inodes)[parentId]; if (parent.status_ != 1) { return -2; // parent not exist } @@ -227,7 +225,7 @@ int FastFS::applyCreate(CreateContext* createCtx) { return -4; // no available inodes } - FastInode& target = (*inodes)[ino]; + FastInode &target = (*inodes)[ino]; target.create(ino, parentId, createCtx->name, createCtx->type); target.mode_ = createCtx->mode; @@ -240,8 +238,8 @@ int FastFS::applyCreate(CreateContext* createCtx) { return 0; } -void FastFS::create(fs_op_context& ctx) { - CreateContext* createCtx = reinterpret_cast(ctx.private_data); +void FastFS::create(fs_op_context &ctx) { + CreateContext *createCtx = reinterpret_cast(ctx.private_data); int code = applyCreate(createCtx); if (code != 0) { return ctx.callback(ctx.cb_args, code); @@ -250,7 +248,7 @@ void FastFS::create(fs_op_context& ctx) { return ctx.callback(ctx.cb_args, 0); } - EditOp* editOp = journal->allocEditOp(); + EditOp *editOp = journal->allocEditOp(); editOp->opctx = createCtx; editOp->type = 0; editOp->size = 14 + createCtx->name.size(); @@ -259,9 +257,9 @@ void FastFS::create(fs_op_context& ctx) { editOp->phrase = !editOp->phrase; } -static void createPath(void* cb_args, int code) { - fs_op_context* ctx = reinterpret_cast(cb_args); - CreateContext* createCtx = reinterpret_cast(ctx->private_data); +static void createPath(void *cb_args, int code) { + fs_op_context *ctx = reinterpret_cast(cb_args); + CreateContext *createCtx = reinterpret_cast(ctx->private_data); if (code != 0) { createCtx->callback(createCtx->args, code); ctx->fastfs->freeFsOp(ctx); @@ -276,10 +274,9 @@ static void createPath(void* cb_args, int code) { size_t i = createCtx->pos; for (; i < createCtx->path.size(); i++) { if (createCtx->path[i] == kDelimiter) { - createCtx->name = std::string_view( - createCtx->path.data() + createCtx->pos, i - createCtx->pos); + createCtx->name = std::string_view(createCtx->path.data() + createCtx->pos, i - createCtx->pos); createCtx->pos = i + 1; - FastInode* inode = ctx->fastfs->lookup(createCtx->parentId, createCtx->name); + FastInode *inode = ctx->fastfs->lookup(createCtx->parentId, createCtx->name); if (!inode) { return ctx->fastfs->create(*ctx); } else { @@ -288,10 +285,9 @@ static void createPath(void* cb_args, int code) { } } if (createCtx->pos != i) { // path not end with '/' - createCtx->name = std::string_view( - createCtx->path.data() + createCtx->pos, i - createCtx->pos); + createCtx->name = std::string_view(createCtx->path.data() + createCtx->pos, i - createCtx->pos); createCtx->pos = i; // make position reach end - FastInode* inode = ctx->fastfs->lookup(createCtx->parentId, createCtx->name); + FastInode *inode = ctx->fastfs->lookup(createCtx->parentId, createCtx->name); if (!inode) { // create last return ctx->fastfs->create(*ctx); } @@ -300,9 +296,9 @@ static void createPath(void* cb_args, int code) { ctx->fastfs->freeFsOp(ctx); } -void FastFS::createRecursive(const std::string& path, op_cb callback, void* args) { - fs_op_context* ctx = allocFsOp(); - CreateContext* createCtx = new (ctx->private_data) CreateContext(); +void FastFS::createRecursive(const std::string &path, op_cb callback, void *args) { + fs_op_context *ctx = allocFsOp(); + CreateContext *createCtx = new (ctx->private_data) CreateContext(); createCtx->parentId = 0; createCtx->mode = 493; createCtx->type = FASTFS_DIR; @@ -315,8 +311,8 @@ void FastFS::createRecursive(const std::string& path, op_cb callback, void* args return createPath(ctx, 0); } -int FastFS::applyTruncate(TruncateContext* truncateCtx) { - FastInode& inode = (*inodes)[truncateCtx->ino]; +int FastFS::applyTruncate(TruncateContext *truncateCtx) { + FastInode &inode = (*inodes)[truncateCtx->ino]; if (inode.status_ != 1) { return -1; // file not find } @@ -342,9 +338,8 @@ int FastFS::applyTruncate(TruncateContext* truncateCtx) { return 0; } -void FastFS::truncate(fs_op_context& ctx) { - TruncateContext* truncateCtx = - reinterpret_cast(ctx.private_data); +void FastFS::truncate(fs_op_context &ctx) { + TruncateContext *truncateCtx = reinterpret_cast(ctx.private_data); int code = applyTruncate(truncateCtx); if (code != 0) { return ctx.callback(ctx.cb_args, code); @@ -353,7 +348,7 @@ void FastFS::truncate(fs_op_context& ctx) { return ctx.callback(ctx.cb_args, 0); } - EditOp* editOp = journal->allocEditOp(); + EditOp *editOp = journal->allocEditOp(); editOp->opctx = ctx.private_data; editOp->type = 1; editOp->size = 12; /*ino(4) + size(8)*/ @@ -362,11 +357,11 @@ void FastFS::truncate(fs_op_context& ctx) { editOp->phrase = !editOp->phrase; } -void FastFS::removeRecursive(FastInode* dir) { +void FastFS::removeRecursive(FastInode *dir) { uint32_t pre = 0; uint32_t ino = 0; - for (auto& inodeId : *(dir->children_)) { - FastInode& inode = (*inodes)[inodeId]; + for (auto &inodeId : *(dir->children_)) { + FastInode &inode = (*inodes)[inodeId]; bool head = lookup(inode.parentId_, inode.name_, pre, ino); if (!ino) { SPDK_WARNLOG("inode lookup failed, this should not happen!\n"); @@ -380,25 +375,25 @@ void FastFS::removeRecursive(FastInode* dir) { inode.next_ = 0; switch (inode.type_) { - case FASTFS_REGULAR_FILE: { - inode.status_ = 2; - inode.unlink(); - break; - } - case FASTFS_DIR: { - removeRecursive(&inode); - break; - } - default: - break; + case FASTFS_REGULAR_FILE: { + inode.status_ = 2; + inode.unlink(); + break; + } + case FASTFS_DIR: { + removeRecursive(&inode); + break; + } + default: + break; } } dir->status_ = 2; dir->unlink(); } -int FastFS::applyRemove(DeleteContext* delCtx) { - FastInode& parent = (*inodes)[delCtx->parentId]; +int FastFS::applyRemove(DeleteContext *delCtx) { + FastInode &parent = (*inodes)[delCtx->parentId]; if (parent.status_ != 1) { return -1; // parent not exist } @@ -410,9 +405,8 @@ int FastFS::applyRemove(DeleteContext* delCtx) { return -2; // file not exist } - FastInode& target = (*inodes)[ino]; - if (target.type_ == FASTFS_DIR && !delCtx->recursive - && target.children_->size() > 0) { + FastInode &target = (*inodes)[ino]; + if (target.type_ == FASTFS_DIR && !delCtx->recursive && target.children_->size() > 0) { return -3; // dir not empty } @@ -426,23 +420,23 @@ int FastFS::applyRemove(DeleteContext* delCtx) { target.next_ = 0; switch (target.type_) { - case FASTFS_REGULAR_FILE: { - target.status_ = 2/*delete*/; - target.unlink(); - break; - } - case FASTFS_DIR: { - removeRecursive(&target); - break; - } - default: - break; + case FASTFS_REGULAR_FILE: { + target.status_ = 2 /*delete*/; + target.unlink(); + break; + } + case FASTFS_DIR: { + removeRecursive(&target); + break; + } + default: + break; } return 0; } -void FastFS::remove(fs_op_context& ctx) { - DeleteContext* delCtx = reinterpret_cast(ctx.private_data); +void FastFS::remove(fs_op_context &ctx) { + DeleteContext *delCtx = reinterpret_cast(ctx.private_data); int code = applyRemove(delCtx); if (code != 0) { return ctx.callback(ctx.cb_args, code); @@ -451,7 +445,7 @@ void FastFS::remove(fs_op_context& ctx) { return ctx.callback(ctx.cb_args, 0); } - EditOp* editOp = journal->allocEditOp(); + EditOp *editOp = journal->allocEditOp(); editOp->opctx = delCtx; editOp->type = 2; editOp->size = 6 + delCtx->name.size(); @@ -460,9 +454,9 @@ void FastFS::remove(fs_op_context& ctx) { editOp->phrase = !editOp->phrase; } -int FastFS::applyRename(RenameContext* renameCtx) { - FastInode& parentOld = (*inodes)[renameCtx->olddir]; - FastInode& parentNew = (*inodes)[renameCtx->newdir]; +int FastFS::applyRename(RenameContext *renameCtx) { + FastInode &parentOld = (*inodes)[renameCtx->olddir]; + FastInode &parentNew = (*inodes)[renameCtx->newdir]; if (parentOld.status_ != 1 || parentNew.status_ != 1) { return -1; } @@ -477,8 +471,8 @@ int FastFS::applyRename(RenameContext* renameCtx) { uint32_t tgtPre = 0; uint32_t tgtIno = 0; bool tgtHead = lookup(renameCtx->newdir, renameCtx->newname, tgtPre, tgtIno); - FastInode& source = (*inodes)[srcIno]; - FastInode& target = (*inodes)[tgtIno]; + FastInode &source = (*inodes)[srcIno]; + FastInode &target = (*inodes)[tgtIno]; if (tgtIno && (target.type_ != source.type_)) { return -3; } @@ -517,8 +511,8 @@ int FastFS::applyRename(RenameContext* renameCtx) { return 0; } -void FastFS::rename(fs_op_context& ctx) { - RenameContext* renameCtx = reinterpret_cast(ctx.private_data); +void FastFS::rename(fs_op_context &ctx) { + RenameContext *renameCtx = reinterpret_cast(ctx.private_data); int code = applyRename(renameCtx); if (code != 0) { return ctx.callback(ctx.cb_args, code); @@ -530,7 +524,7 @@ void FastFS::rename(fs_op_context& ctx) { int32_t size = 10; /*olddir(4) + newdir(4) + oldnameLen(1) + newnameLen(1)*/ size += renameCtx->oldname.size(); size += renameCtx->newname.size(); - EditOp* editOp = journal->allocEditOp(); + EditOp *editOp = journal->allocEditOp(); editOp->opctx = renameCtx; editOp->type = 5; editOp->size = size; diff --git a/core/FastJournal.cpp b/core/FastJournal.cpp index 8a89429..a604cb1 100644 --- a/core/FastJournal.cpp +++ b/core/FastJournal.cpp @@ -5,19 +5,18 @@ #include "FastFS.h" -static void replayNextExtent( - struct spdk_bdev_io* bdev_io, bool success, void *cb_arg); +static void replayNextExtent(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg); static int journalPollEditOp(void *arg) { - FastJournal* journal = reinterpret_cast(arg); + FastJournal *journal = reinterpret_cast(arg); return journal->pollEditOp(); } -FastJournal::FastJournal(const fs_context_t& context) : fs_context(context) { +FastJournal::FastJournal(const fs_context_t &context) : fs_context(context) { extentBlocks = fs_context.allocator->getExtentBlocks(); epoch = fs_context.superBlock.epoch; - tail_block_buffer = (char*) spdk_dma_zmalloc_socket( - fs_context.blockSize, fs_context.bufAlign, NULL, fs_context.localNuma); + tail_block_buffer = + (char *)spdk_dma_zmalloc_socket(fs_context.blockSize, fs_context.bufAlign, NULL, fs_context.localNuma); tail_block = new ByteBuffer(tail_block_buffer, fs_context.blockSize); resetTailBlock(); initObjPool(); @@ -38,10 +37,10 @@ FastJournal::~FastJournal() { } } -static void parseExtent(ByteBuffer* buffer, uint32_t startOff, uint32_t skipOps) { - FastJournal* journal = reinterpret_cast(buffer->private_data); - auto& fs_context = journal->fs_context; - FastFS* fastfs = fs_context.fastfs; +static void parseExtent(ByteBuffer *buffer, uint32_t startOff, uint32_t skipOps) { + FastJournal *journal = reinterpret_cast(buffer->private_data); + auto &fs_context = journal->fs_context; + FastFS *fastfs = fs_context.fastfs; char flag = 1; uint32_t offset = startOff; @@ -49,24 +48,25 @@ static void parseExtent(ByteBuffer* buffer, uint32_t startOff, uint32_t skipOps) while (flag == 1 && offset < buffer->limit()) { buffer->position(offset).getByte(flag); if (flag != 0 && flag != 1) { - SPDK_WARNLOG("block %ld's next flag incorrect!\n", - (journal->offset + offset) >> fs_context.blockBits); + SPDK_WARNLOG("block %ld's next flag incorrect!\n", (journal->offset + offset) >> fs_context.blockBits); blockCorect = false; break; } uint32_t epochNum = 0; if (!buffer->read(epochNum) || epochNum != journal->epoch) { SPDK_WARNLOG("block %ld's epoch is %d, expect %d\n", - (journal->offset + offset) >> fs_context.blockBits, - epochNum, journal->epoch); + (journal->offset + offset) >> fs_context.blockBits, + epochNum, + journal->epoch); blockCorect = false; break; } uint64_t txid = 0; if (!buffer->read(txid) || txid != journal->txid) { SPDK_WARNLOG("block %ld's txid is %ld, but expect %ld\n", - (journal->offset + offset) >> fs_context.blockBits, - txid, journal->txid); + (journal->offset + offset) >> fs_context.blockBits, + txid, + journal->txid); blockCorect = false; break; } @@ -86,78 +86,78 @@ static void parseExtent(ByteBuffer* buffer, uint32_t startOff, uint32_t skipOps) } // TODO chenxu14 consider disk data corruption switch (opType) { - case 0 : { // createOp - journal->createCtx.deserialize(buffer); - if (fastfs->applyCreate(&journal->createCtx) != 0) { - parsed = false; - } - break; + case 0: { // createOp + journal->createCtx.deserialize(buffer); + if (fastfs->applyCreate(&journal->createCtx) != 0) { + parsed = false; } - case 1 : { // truncateOp - journal->truncateCtx.deserialize(buffer); - if (fastfs->applyTruncate(&journal->truncateCtx) != 0) { - parsed = false; - } - break; + break; + } + case 1: { // truncateOp + journal->truncateCtx.deserialize(buffer); + if (fastfs->applyTruncate(&journal->truncateCtx) != 0) { + parsed = false; + } + break; + } + case 2: { // deleteOp + journal->delCtx.deserialize(buffer); + if (fastfs->applyRemove(&journal->delCtx) != 0) { + parsed = false; } - case 2 : { // deleteOp - journal->delCtx.deserialize(buffer); - if (fastfs->applyRemove(&journal->delCtx) != 0) { + break; + } + case 3: { // allocOp + buffer->read(journal->nextExtentId); + fs_context.allocator->reserve(journal->nextExtentId); + journal->extents_.push_front(journal->nextExtentId); + break; + } + case 4: { // fsyncOp + uint32_t ino = 0; + buffer->read(ino); + FastInode &inode = (*fastfs->inodes)[ino]; + uint64_t size = 0; + buffer->read(size); + inode.size_ = size; + uint16_t counts = 0; + buffer->read(counts); + auto &extents = *inode.extents_; + for (int i = 0; i < counts; i++) { + uint32_t index = 0; + uint32_t extentId = 0; + if (!buffer->read(index) || !buffer->read(extentId) || + (size >> fs_context.extentBits) < index) { parsed = false; + break; } - break; - } - case 3 : { // allocOp - buffer->read(journal->nextExtentId); - fs_context.allocator->reserve(journal->nextExtentId); - journal->extents_.push_front(journal->nextExtentId); - break; - } - case 4 : { // fsyncOp - uint32_t ino = 0; - buffer->read(ino); - FastInode& inode = (*fastfs->inodes)[ino]; - uint64_t size = 0; - buffer->read(size); - inode.size_ = size; - uint16_t counts = 0; - buffer->read(counts); - auto& extents = *inode.extents_; - for (int i = 0; i < counts; i++) { - uint32_t index = 0; - uint32_t extentId = 0; - if (!buffer->read(index) || !buffer->read(extentId) - || (size >> fs_context.extentBits) < index) { - parsed = false; - break; - } - for (uint32_t i = extents.size(); i <= index; i++) { - extents.emplace_back(UINT32_MAX); - } - if (extents[index] != UINT32_MAX) { - fs_context.allocator->release(extents[index]); - } - extents[index] = extentId; - fs_context.allocator->reserve(extentId); + for (uint32_t i = extents.size(); i <= index; i++) { + extents.emplace_back(UINT32_MAX); } - break; - } - case 5 : { // renameOp - journal->renameCtx.deserialize(buffer); - if (fastfs->applyRename(&journal->renameCtx) != 0) { - parsed = false; + if (extents[index] != UINT32_MAX) { + fs_context.allocator->release(extents[index]); } - break; + extents[index] = extentId; + fs_context.allocator->reserve(extentId); } - default: { + break; + } + case 5: { // renameOp + journal->renameCtx.deserialize(buffer); + if (fastfs->applyRename(&journal->renameCtx) != 0) { parsed = false; - SPDK_WARNLOG("Unknown operation type %d\n", opType); - break; } + break; + } + default: { + parsed = false; + SPDK_WARNLOG("Unknown operation type %d\n", opType); + break; + } } if (!parsed) { SPDK_WARNLOG("block %ld's OP record can't parse successfully!\n", - (journal->offset + offset) >> fs_context.blockBits); + (journal->offset + offset) >> fs_context.blockBits); } journal->replayOps++; } @@ -171,12 +171,15 @@ static void parseExtent(ByteBuffer* buffer, uint32_t startOff, uint32_t skipOps) // read next extent if (flag == 1 && journal->nextExtentId > 0) { buffer->clear(); - journal->offset = - static_cast(journal->nextExtentId) << fs_context.extentBits; + journal->offset = static_cast(journal->nextExtentId) << fs_context.extentBits; journal->curBlock = 0; - spdk_bdev_read(fs_context.bdev_desc, fs_context.bdev_io_channel, - buffer->p_buffer_, journal->offset, fs_context.extentSize, - replayNextExtent, buffer); + spdk_bdev_read(fs_context.bdev_desc, + fs_context.bdev_io_channel, + buffer->p_buffer_, + journal->offset, + fs_context.extentSize, + replayNextExtent, + buffer); return; } @@ -184,46 +187,41 @@ static void parseExtent(ByteBuffer* buffer, uint32_t startOff, uint32_t skipOps) journal->curBlock = (offset >> fs_context.blockBits); if (blockCorect) { // reset tail block - journal->tail_block->putBytes( - 0, buffer->p_buffer_ + offset, fs_context.blockSize); + journal->tail_block->putBytes(0, buffer->p_buffer_ + offset, fs_context.blockSize); journal->tail_block->position(buffer->position() & fs_context.blockMask); } else if (journal->offset != fs_context.extentSize) { // exclude format case - SPDK_WARNLOG( - "block %d's data not as expected, FastFS does't stop gracefully?\n", - journal->curBlock >> fs_context.blockBits); + SPDK_WARNLOG("block %d's data not as expected, FastFS does't stop gracefully?\n", + journal->curBlock >> fs_context.blockBits); } fastfs->freeBuffer(buffer); - SPDK_NOTICELOG( - "replay journal finished, total replay %ld OPs\n", journal->replayOps); + SPDK_NOTICELOG("replay journal finished, total replay %ld OPs\n", journal->replayOps); fastfs->dumpInfo(); fs_context.callback(fastfs, 0); } -static void replayNextExtent( - struct spdk_bdev_io* bdev_io, bool success, void *cb_arg) { +static void replayNextExtent(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) { if (bdev_io) { spdk_bdev_free_io(bdev_io); } - ByteBuffer* buffer = reinterpret_cast(cb_arg); - FastJournal* journal = reinterpret_cast(buffer->private_data); - auto& fs_context = journal->fs_context; - FastFS* fastfs = fs_context.fastfs; + ByteBuffer *buffer = reinterpret_cast(cb_arg); + FastJournal *journal = reinterpret_cast(buffer->private_data); + auto &fs_context = journal->fs_context; + FastFS *fastfs = fs_context.fastfs; journal->nextExtentId = 0; // clear next if (!success) { - fs_context.callback(fastfs, -8/*read extent failed*/); + fs_context.callback(fastfs, -8 /*read extent failed*/); return; } parseExtent(buffer, 0, 0); } -static void replayFirstExtent( - struct spdk_bdev_io* bdev_io, bool success, void *cb_arg) { +static void replayFirstExtent(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) { if (bdev_io) { spdk_bdev_free_io(bdev_io); } - ByteBuffer* buffer = reinterpret_cast(cb_arg); - FastJournal* journal = reinterpret_cast(buffer->private_data); - auto& fs_context = journal->fs_context; + ByteBuffer *buffer = reinterpret_cast(cb_arg); + FastJournal *journal = reinterpret_cast(buffer->private_data); + auto &fs_context = journal->fs_context; journal->txid = fs_context.superBlock.lastTxid; uint32_t skipBlocks = fs_context.superBlock.journalSkipBlocks; uint32_t skipOps = fs_context.superBlock.journalSkipOps; @@ -236,12 +234,16 @@ void FastJournal::logReplay() { // reserve journal's start extent fs_context.allocator->reserve(extentId); extents_.push_front(extentId); - ByteBuffer* extentBuf = fs_context.fastfs->allocBuffer(); + ByteBuffer *extentBuf = fs_context.fastfs->allocBuffer(); extentBuf->private_data = this; replayOps = 0; - spdk_bdev_read(fs_context.bdev_desc, fs_context.bdev_io_channel, - extentBuf->p_buffer_, offset, fs_context.extentSize, - replayFirstExtent, extentBuf); + spdk_bdev_read(fs_context.bdev_desc, + fs_context.bdev_io_channel, + extentBuf->p_buffer_, + offset, + fs_context.extentSize, + replayFirstExtent, + extentBuf); } void FastJournal::startCheckpoint() { @@ -288,9 +290,8 @@ void FastJournal::writeComplete(int code) { } } -static void journalWriteComplete( - struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) { - FastJournal* journal = reinterpret_cast(cb_arg); +static void journalWriteComplete(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) { + FastJournal *journal = reinterpret_cast(cb_arg); spdk_bdev_free_io(bdev_io); journal->writeComplete(success ? 0 : -1); // TODO chenxu14 consider exit when journal write failed @@ -309,43 +310,43 @@ int FastJournal::pollEditOp() { if (inflights > 0 || tail_block_full) { return status; // previous transaction has not been commit } - EditOp* editOp = &(*editObjs)[commitIdx]; + EditOp *editOp = &(*editObjs)[commitIdx]; // iterate committed OP while (editOp->phrase ^ phrase) { int32_t size = editOp->size; - if (tail_block->writable(size + 5/*opcode(1) and size(4)*/)) { + if (tail_block->writable(size + 5 /*opcode(1) and size(4)*/)) { tail_block->pwrite(kNumOpsIndex, ++num_ops); tail_block->putByte(editOp->type); tail_block->write(size); switch (editOp->type) { - case 0 : { - CreateContext* createCtx = reinterpret_cast(editOp->opctx); - createCtx->serialize(tail_block); - break; - } - case 1 : { - TruncateContext* truncateCtx = reinterpret_cast(editOp->opctx); - truncateCtx->serialize(tail_block); - break; - } - case 2 : { - DeleteContext* delCtx = reinterpret_cast(editOp->opctx); - delCtx->serialize(tail_block); - break; - } - case 4 : { - FSyncContext* syncCtx = reinterpret_cast(editOp->opctx); - syncCtx->serialize(tail_block); - break; - } - case 5 : { - RenameContext* renameCtx = reinterpret_cast(editOp->opctx); - renameCtx->serialize(tail_block); - break; - } - default : { - break; - } + case 0: { + CreateContext *createCtx = reinterpret_cast(editOp->opctx); + createCtx->serialize(tail_block); + break; + } + case 1: { + TruncateContext *truncateCtx = reinterpret_cast(editOp->opctx); + truncateCtx->serialize(tail_block); + break; + } + case 2: { + DeleteContext *delCtx = reinterpret_cast(editOp->opctx); + delCtx->serialize(tail_block); + break; + } + case 4: { + FSyncContext *syncCtx = reinterpret_cast(editOp->opctx); + syncCtx->serialize(tail_block); + break; + } + case 5: { + RenameContext *renameCtx = reinterpret_cast(editOp->opctx); + renameCtx->serialize(tail_block); + break; + } + default: { + break; + } } // advance commitIdx @@ -362,9 +363,13 @@ int FastJournal::pollEditOp() { if (inflights > 0) { status = SPDK_POLLER_BUSY; } - int rc = spdk_bdev_write( - fs_context.bdev_desc, fs_context.bdev_io_channel, tail_block_buffer, - offset, fs_context.blockSize, journalWriteComplete, this); + int rc = spdk_bdev_write(fs_context.bdev_desc, + fs_context.bdev_io_channel, + tail_block_buffer, + offset, + fs_context.blockSize, + journalWriteComplete, + this); if (rc == -ENOMEM) { // TODO chenxu14 do back off } @@ -374,9 +379,13 @@ int FastJournal::pollEditOp() { if (inflights > 0) { status = SPDK_POLLER_BUSY; - int rc = spdk_bdev_write( - fs_context.bdev_desc, fs_context.bdev_io_channel, tail_block_buffer, - offset, fs_context.blockSize, journalWriteComplete, this); + int rc = spdk_bdev_write(fs_context.bdev_desc, + fs_context.bdev_io_channel, + tail_block_buffer, + offset, + fs_context.blockSize, + journalWriteComplete, + this); if (rc == -ENOMEM) { // TODO chenxu14 do back off } @@ -389,8 +398,7 @@ void FastJournal::initObjPool() { poolSize = DEFAULT_POOL_SIZE; limitIdx = poolSize - 1; poolMask = limitIdx; - editObjs = new std::vector>( - MemAllocator(fs_context.localNuma, 64)); + editObjs = new std::vector>(MemAllocator(fs_context.localNuma, 64)); editObjs->reserve(poolSize); for (int i = 0; i < poolSize; i++) { (*editObjs)[i].phrase = false; @@ -405,7 +413,11 @@ void FastJournal::dumpInfo() { if (nextExtentId > 0 && nextExtentId != extentId) { nextExtent = std::to_string(nextExtentId); } - SPDK_NOTICELOG( - "epoch %d, txid %ld, extentId %ld, nextExtent %s, blockId %d, num_ops %d\n", - epoch, txid, extentId, nextExtent.c_str(), curBlock, num_ops); + SPDK_NOTICELOG("epoch %d, txid %ld, extentId %ld, nextExtent %s, blockId %d, num_ops %d\n", + epoch, + txid, + extentId, + nextExtent.c_str(), + curBlock, + num_ops); } diff --git a/core/FastJournal.h b/core/FastJournal.h index e85bdad..88528ea 100644 --- a/core/FastJournal.h +++ b/core/FastJournal.h @@ -6,21 +6,22 @@ #ifndef FAST_JOURNAL_H_ #define FAST_JOURNAL_H_ -#include "ByteBuffer.h" +#include +#include +#include +#include +#include +#include + #include "Allocator.h" +#include "ByteBuffer.h" #include "Serialization.h" #include "spdk/bdev.h" #include "spdk/env.h" +#include "spdk/event.h" #include "spdk/log.h" #include "spdk/thread.h" -#include "spdk/event.h" #include "spdk/util.h" -#include -#include -#include -#include -#include -#include #define DEFAULT_POOL_SIZE 256 static constexpr int32_t kNumOpsIndex = 13; @@ -31,12 +32,12 @@ struct fs_context_t; class FastJournal; class FastFS; -typedef void (*fs_cb)(FastFS* fastfs, int code); +typedef void (*fs_cb)(FastFS *fastfs, int code); struct EditOp { op_cb callback; - void* cb_args; - void* opctx; + void *cb_args; + void *opctx; int32_t size; char type; bool phrase; @@ -44,9 +45,9 @@ struct EditOp { }; class FastJournal { - private: - struct spdk_poller* op_poller = nullptr; - char* tail_block_buffer = nullptr; + private: + struct spdk_poller *op_poller = nullptr; + char *tail_block_buffer = nullptr; bool tail_block_full = false; uint32_t extentBlocks; int allocIdx; @@ -54,20 +55,20 @@ class FastJournal { int poolSize; int poolMask; int commitIdx; - std::vector>* editObjs = nullptr; + std::vector> *editObjs = nullptr; int inflights = 0; bool phrase = false; bool waiting = false; - public: - const fs_context_t& fs_context; + public: + const fs_context_t &fs_context; DeleteContext delCtx; CreateContext createCtx; TruncateContext truncateCtx; RenameContext renameCtx; std::forward_list extents_; std::forward_list::const_iterator cusor; - ByteBuffer* tail_block = nullptr; + ByteBuffer *tail_block = nullptr; uint32_t nextExtentId = 0; uint64_t replayOps = 0; uint64_t offset = 0; @@ -76,8 +77,8 @@ class FastJournal { uint32_t num_ops = 0; uint32_t epoch; - public: - FastJournal(const fs_context_t& context); + public: + FastJournal(const fs_context_t &context); ~FastJournal(); int pollEditOp(); @@ -91,8 +92,8 @@ class FastJournal { void startCheckpoint(); - EditOp* allocEditOp() { - EditOp* res = nullptr; + EditOp *allocEditOp() { + EditOp *res = nullptr; if (allocIdx != limitIdx) { res = &(*editObjs)[allocIdx++]; allocIdx &= poolMask; @@ -112,7 +113,7 @@ class FastJournal { void dumpInfo(); -private: + private: inline void initObjPool(); /** @@ -121,8 +122,8 @@ class FastJournal { void allocNewExtent(uint32_t extentId) { nextExtentId = extentId; tail_block->pwrite(kNumOpsIndex, ++num_ops); - tail_block->putByte(3/*opType*/); - tail_block->write(4/*opSize*/); + tail_block->putByte(3 /*opType*/); + tail_block->write(4 /*opSize*/); tail_block->write(nextExtentId); extents_.push_front(extentId); } diff --git a/core/FastRead.cpp b/core/FastRead.cpp index c0da8b2..1a9a11b 100644 --- a/core/FastRead.cpp +++ b/core/FastRead.cpp @@ -5,7 +5,7 @@ #include "FastFS.h" -void ReadContext::reset(FastFile* f) { +void ReadContext::reset(FastFile *f) { file = f; extentsToRead = 0; extentsReaded = 0; @@ -13,8 +13,7 @@ void ReadContext::reset(FastFile* f) { success = true; } -void ReadContext::dirctRead( - FastFS* fs, int handle, uint64_t off, uint32_t len) { +void ReadContext::dirctRead(FastFS *fs, int handle, uint64_t off, uint32_t len) { fd = handle; pread = true; direct = true; @@ -24,21 +23,20 @@ void ReadContext::dirctRead( direct_cursor = direct_buff->position(); } -static void readExtentComplete( - struct spdk_bdev_io* bdev_io, bool success, void *cb_arg) { +static void readExtentComplete(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) { if (bdev_io) { spdk_bdev_free_io(bdev_io); } - ByteBuffer* buffer = reinterpret_cast(cb_arg); - fs_op_context* ctx = reinterpret_cast(buffer->private_data); - ReadContext* readCtx = reinterpret_cast(ctx->private_data); + ByteBuffer *buffer = reinterpret_cast(cb_arg); + fs_op_context *ctx = reinterpret_cast(buffer->private_data); + ReadContext *readCtx = reinterpret_cast(ctx->private_data); readCtx->extentsReaded++; readCtx->success = (readCtx->success & success); if (!readCtx->direct) { if (success) { // use mark_ to decision target cursor - char* buf = readCtx->read_buff + buffer->mark_; + char *buf = readCtx->read_buff + buffer->mark_; memcpy(buf, buffer->getBuffer(), buffer->remaining()); } // [NOTICE] can't do this before memcpy @@ -46,25 +44,24 @@ static void readExtentComplete( } // all extent commit and complete - if ((readCtx->readingSize == readCtx->count) && - (readCtx->extentsReaded == readCtx->extentsToRead)) { + if ((readCtx->readingSize == readCtx->count) && (readCtx->extentsReaded == readCtx->extentsToRead)) { if (!readCtx->pread) { readCtx->file->pos_ += readCtx->count; } - int code = readCtx->success ? 0 : -4/*ReadDataFail*/; + int code = readCtx->success ? 0 : -4 /*ReadDataFail*/; ctx->callback(ctx->cb_args, code); } } -void FastFS::read(fs_op_context& ctx) { - ReadContext* readCtx = reinterpret_cast(ctx.private_data); +void FastFS::read(fs_op_context &ctx) { + ReadContext *readCtx = reinterpret_cast(ctx.private_data); if (readCtx->fd > fs_context.maxFiles) { return ctx.callback(ctx.cb_args, -1); } - FastFile& file = (*files)[readCtx->fd]; + FastFile &file = (*files)[readCtx->fd]; uint64_t offset = readCtx->pread ? readCtx->offset : file.pos_; if (offset > file.inode_->size_) { - return ctx.callback(ctx.cb_args, -2/*EOF*/); + return ctx.callback(ctx.cb_args, -2 /*EOF*/); } readCtx->reset(&file); @@ -90,7 +87,7 @@ void FastFS::read(fs_op_context& ctx) { extentOffset -= blockOffset; readCtx->extentsToRead++; - ByteBuffer* extentBuf = nullptr; + ByteBuffer *extentBuf = nullptr; if (readCtx->direct) { extentBuf = readCtx->direct_buff; } else { @@ -102,8 +99,7 @@ void FastFS::read(fs_op_context& ctx) { // most cases, one extent is enough readCtx->readingSize = readCtx->count; extentBuf->limit(blockOffset + readCtx->count); - nbytes = (extentBuf->limit() + fs_context.blockMask) - & ~(fs_context.blockMask); + nbytes = (extentBuf->limit() + fs_context.blockMask) & ~(fs_context.blockMask); } else { nbytes = fs_context.extentSize - extentOffset; extentBuf->limit(nbytes); @@ -114,10 +110,14 @@ void FastFS::read(fs_op_context& ctx) { readExtentComplete(nullptr, true, extentBuf); } else { // read extent data - bdevOffset = (static_cast(extentId) << fs_context.extentBits) - + extentOffset; - spdk_bdev_read(fs_context.bdev_desc, fs_context.bdev_io_channel, - extentBuf->p_buffer_, bdevOffset, nbytes, readExtentComplete, extentBuf); + bdevOffset = (static_cast(extentId) << fs_context.extentBits) + extentOffset; + spdk_bdev_read(fs_context.bdev_desc, + fs_context.bdev_io_channel, + extentBuf->p_buffer_, + bdevOffset, + nbytes, + readExtentComplete, + extentBuf); } index++; } @@ -125,7 +125,7 @@ void FastFS::read(fs_op_context& ctx) { // read other extents while (readCtx->readingSize < readCtx->count) { readCtx->extentsToRead++; - ByteBuffer* extentBuf = nullptr; + ByteBuffer *extentBuf = nullptr; if (readCtx->direct) { extentBuf = readCtx->direct_buff; if (readCtx->extentsToRead > 1) { @@ -158,8 +158,13 @@ void FastFS::read(fs_op_context& ctx) { readExtentComplete(nullptr, true, extentBuf); } else { bdevOffset = (static_cast(extentId) << fs_context.extentBits); - spdk_bdev_read(fs_context.bdev_desc, fs_context.bdev_io_channel, - extentBuf->getBuffer(), bdevOffset, nbytes, readExtentComplete, extentBuf); + spdk_bdev_read(fs_context.bdev_desc, + fs_context.bdev_io_channel, + extentBuf->getBuffer(), + bdevOffset, + nbytes, + readExtentComplete, + extentBuf); } index++; } diff --git a/core/FastWrite.cpp b/core/FastWrite.cpp index 63cc3e9..1584784 100644 --- a/core/FastWrite.cpp +++ b/core/FastWrite.cpp @@ -5,7 +5,7 @@ #include "FastFS.h" -void WriteContext::reset(FastFile* f, uint64_t off) { +void WriteContext::reset(FastFile *f, uint64_t off) { offset = off; writingSize = 0; writedExtents = 0; @@ -20,8 +20,7 @@ void WriteContext::reset(FastFile* f, uint64_t off) { } } -void WriteContext::dirctWrite( - FastFS* fs, int handle, uint64_t off, uint32_t len, const char* data) { +void WriteContext::dirctWrite(FastFS *fs, int handle, uint64_t off, uint32_t len, const char *data) { fd = handle; pwrite = true; direct = true; @@ -32,20 +31,20 @@ void WriteContext::dirctWrite( write_buff = direct_buff->p_buffer_; } -ByteBuffer* FastFile::getTailBlock() { +ByteBuffer *FastFile::getTailBlock() { if (!tail_block) { - fs_context_t& fs_context = FastFS::fs_context; - char* tail_block_buffer = (char*) spdk_dma_zmalloc_socket( - fs_context.blockSize, fs_context.bufAlign, NULL, fs_context.localNuma); + fs_context_t &fs_context = FastFS::fs_context; + char *tail_block_buffer = + (char *)spdk_dma_zmalloc_socket(fs_context.blockSize, fs_context.bufAlign, NULL, fs_context.localNuma); tail_block = new ByteBuffer(tail_block_buffer, fs_context.blockSize); } return tail_block; } -void FastFS::writeComplete(fs_op_context* ctx) { - WriteContext* writeCtx = reinterpret_cast(ctx->private_data); - FastFile* file = writeCtx->file; - FastInode* inode = file->inode_; +void FastFS::writeComplete(fs_op_context *ctx) { + WriteContext *writeCtx = reinterpret_cast(ctx->private_data); + FastFile *file = writeCtx->file; + FastInode *inode = file->inode_; // if inode has been deleted, no need to update if (inode->status_ == 1) { uint64_t size = writeCtx->offset + writeCtx->writingSize; @@ -61,11 +60,11 @@ void FastFS::writeComplete(fs_op_context* ctx) { uint64_t endoff = writeCtx->offset + writeCtx->count; uint32_t tailSize = endoff & fs_context.blockMask; if (tailSize > 0) { - ByteBuffer* tailBlock = file->getTailBlock(); + ByteBuffer *tailBlock = file->getTailBlock(); tailBlock->mark_ = inode->size_; if (tailSize < writeCtx->count) { // write span blocks - const char* buf = writeCtx->write_buff + (writeCtx->count - tailSize); + const char *buf = writeCtx->write_buff + (writeCtx->count - tailSize); tailBlock->clear().putBytes(buf, tailSize); } else if ((writeCtx->offset & fs_context.blockMask) == 0) { // align small write case @@ -74,8 +73,8 @@ void FastFS::writeComplete(fs_op_context* ctx) { } } // modify extents info - ExtentMap* dirtyExtents = file->inode_->dirtyExtents; - for (auto& extent : writeCtx->writeExtents) { + ExtentMap *dirtyExtents = file->inode_->dirtyExtents; + for (auto &extent : writeCtx->writeExtents) { if (extent.newId != 0) { // newAdd or copyOnWrite if (extent.index >= inode->extents_->size()) { // append case or sparse file @@ -93,10 +92,9 @@ void FastFS::writeComplete(fs_op_context* ctx) { if (extent.newId != extent.extentId) { original = extent.extentId; } - dirtyExtents->emplace(extent.index, - std::pair(original, extent.newId)); + dirtyExtents->emplace(extent.index, std::pair(original, extent.newId)); } else { // extent written multi times after fsync - auto& extentInfo = iter->second; + auto &extentInfo = iter->second; fs_context.allocator->release(extentInfo.second); extentInfo.second = extent.newId; } @@ -106,13 +104,13 @@ void FastFS::writeComplete(fs_op_context* ctx) { if (file->flags_ & O_SYNC) { writeCtx->writeExtents.clear(); int fd = writeCtx->fd; - FSyncContext* fsyncCtx = new (ctx->private_data) FSyncContext(); + FSyncContext *fsyncCtx = new (ctx->private_data) FSyncContext(); fsyncCtx->fd = fd; return fsync(*ctx); } } else { // free allocated extents - for (WriteExtent& extentInfo : writeCtx->writeExtents) { + for (WriteExtent &extentInfo : writeCtx->writeExtents) { if (extentInfo.newId) { fs_context.allocator->release(extentInfo.newId); } @@ -122,23 +120,21 @@ void FastFS::writeComplete(fs_op_context* ctx) { ctx->callback(ctx->cb_args, 0); } -static void writeExtentComplete( - struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) { +static void writeExtentComplete(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) { if (bdev_io) { spdk_bdev_free_io(bdev_io); } - fs_op_context* ctx; - WriteContext* writeCtx; + fs_op_context *ctx; + WriteContext *writeCtx; if (!success && !bdev_io) { // read tail block or extent fail - ctx = reinterpret_cast(cb_arg); - writeCtx = reinterpret_cast(ctx->private_data); + ctx = reinterpret_cast(cb_arg); + writeCtx = reinterpret_cast(ctx->private_data); } else { - ByteBuffer* buffer = reinterpret_cast(cb_arg); - ctx = reinterpret_cast(buffer->private_data); - writeCtx = reinterpret_cast(ctx->private_data); - if (!writeCtx->direct || - buffer->p_buffer_ != writeCtx->direct_buff->p_buffer_) { + ByteBuffer *buffer = reinterpret_cast(cb_arg); + ctx = reinterpret_cast(buffer->private_data); + writeCtx = reinterpret_cast(ctx->private_data); + if (!writeCtx->direct || buffer->p_buffer_ != writeCtx->direct_buff->p_buffer_) { ctx->fastfs->freeBuffer(buffer); } } @@ -146,39 +142,38 @@ static void writeExtentComplete( writeCtx->success = (writeCtx->success & success); writeCtx->writedExtents++; // all WriteExtent commit and complete - if ((writeCtx->writingSize == writeCtx->count) && - (writeCtx->writedExtents == writeCtx->writeExtents.size())) { + if ((writeCtx->writingSize == writeCtx->count) && (writeCtx->writedExtents == writeCtx->writeExtents.size())) { if (writeCtx->success) { ctx->fastfs->writeComplete(ctx); } else { // remove tail block since it maybe dirty writeCtx->file->clearTailBlock(); // free new allocated extents - for (auto& extent : writeCtx->writeExtents) { + for (auto &extent : writeCtx->writeExtents) { if (extent.newId) { FastFS::fs_context.allocator->release(extent.newId); } } writeCtx->writeExtents.clear(); - ctx->callback(ctx->cb_args, -5/*WriteDataFail*/); + ctx->callback(ctx->cb_args, -5 /*WriteDataFail*/); } } } -static void copyOnWrite(struct spdk_bdev_io* bdev_io, bool success, void* cb_arg) { +static void copyOnWrite(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) { if (bdev_io) { spdk_bdev_free_io(bdev_io); } - ByteBuffer* extentBuf = reinterpret_cast(cb_arg); - WriteExtent* extent = reinterpret_cast(extentBuf->private_data); - fs_op_context* ctx = extent->op_ctx; + ByteBuffer *extentBuf = reinterpret_cast(cb_arg); + WriteExtent *extent = reinterpret_cast(extentBuf->private_data); + fs_op_context *ctx = extent->op_ctx; if (!success) { // read extent fail ctx->fastfs->freeBuffer(extentBuf); return writeExtentComplete(nullptr, false, ctx); } - WriteContext* writeCtx = reinterpret_cast(ctx->private_data); - char* data = writeCtx->direct_buff->p_buffer_ + extent->bufOff; + WriteContext *writeCtx = reinterpret_cast(ctx->private_data); + char *data = writeCtx->direct_buff->p_buffer_ + extent->bufOff; extentBuf->putBytes(extent->extentOff, data, extent->bufLen); extentBuf->private_data = ctx; // allocate new extent for write @@ -188,36 +183,39 @@ static void copyOnWrite(struct spdk_bdev_io* bdev_io, bool success, void* cb_arg ctx->fastfs->freeBuffer(extentBuf); return writeExtentComplete(nullptr, false, ctx); } - uint64_t bdevOff = - static_cast(extent->newId) << FastFS::fs_context.extentBits; - spdk_bdev_write(FastFS::fs_context.bdev_desc, FastFS::fs_context.bdev_io_channel, - extentBuf->p_buffer_, bdevOff, FastFS::fs_context.extentSize, - writeExtentComplete, extentBuf); + uint64_t bdevOff = static_cast(extent->newId) << FastFS::fs_context.extentBits; + spdk_bdev_write(FastFS::fs_context.bdev_desc, + FastFS::fs_context.bdev_io_channel, + extentBuf->p_buffer_, + bdevOff, + FastFS::fs_context.extentSize, + writeExtentComplete, + extentBuf); } -static void writeExtent(struct spdk_bdev_io* bdev_io, bool success, void* cb_arg) { +static void writeExtent(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) { if (bdev_io) { // bdev_io used for read tail block spdk_bdev_free_io(bdev_io); } - WriteExtent* extent = reinterpret_cast(cb_arg); - fs_op_context* ctx = extent->op_ctx; + WriteExtent *extent = reinterpret_cast(cb_arg); + fs_op_context *ctx = extent->op_ctx; if (!success) { // read tail block fail return writeExtentComplete(nullptr, false, ctx); } - WriteContext* writeCtx = reinterpret_cast(ctx->private_data); + WriteContext *writeCtx = reinterpret_cast(ctx->private_data); if (writeCtx->direct) { // skip non-required data extent->bufOff += writeCtx->direct_buff->position_; } - auto& fs_context = FastFS::fs_context; + auto &fs_context = FastFS::fs_context; if (writeCtx->append) { struct iovec iov[2]; int iovcnt = 0; // write tail block if (!extent->newId && (writeCtx->offset & fs_context.blockMask)) { - ByteBuffer* tailBlock = writeCtx->file->tail_block; + ByteBuffer *tailBlock = writeCtx->file->tail_block; uint32_t tailSize = std::min(tailBlock->remaining(), writeCtx->count); tailBlock->putBytes(writeCtx->write_buff, tailSize); extent->bufOff += tailSize; @@ -227,42 +225,57 @@ static void writeExtent(struct spdk_bdev_io* bdev_io, bool success, void* cb_arg iovcnt++; } // write remaining - ByteBuffer* buffer = writeCtx->direct ? writeCtx->direct_buff : ctx->fastfs->allocBuffer(); + ByteBuffer *buffer = writeCtx->direct ? writeCtx->direct_buff : ctx->fastfs->allocBuffer(); buffer->private_data = ctx; if (extent->bufLen > 0) { if (writeCtx->direct) { iov[iovcnt].iov_base = buffer->p_buffer_ + extent->bufOff; } else { - const char* targetBuf = writeCtx->write_buff + extent->bufOff; + const char *targetBuf = writeCtx->write_buff + extent->bufOff; buffer->putBytes(targetBuf, extent->bufLen); iov[iovcnt].iov_base = buffer->p_buffer_; } - iov[iovcnt].iov_len = - ((extent->bufLen + fs_context.blockMask) & ~(fs_context.blockMask)); + iov[iovcnt].iov_len = ((extent->bufLen + fs_context.blockMask) & ~(fs_context.blockMask)); iovcnt++; - spdk_bdev_writev(fs_context.bdev_desc, fs_context.bdev_io_channel, - iov, iovcnt, extent->offset, extent->len, writeExtentComplete, buffer); + spdk_bdev_writev(fs_context.bdev_desc, + fs_context.bdev_io_channel, + iov, + iovcnt, + extent->offset, + extent->len, + writeExtentComplete, + buffer); } else { // small write case, write size less than one block - spdk_bdev_write(fs_context.bdev_desc, fs_context.bdev_io_channel, - iov[0].iov_base, extent->offset, extent->len, writeExtentComplete, buffer); + spdk_bdev_write(fs_context.bdev_desc, + fs_context.bdev_io_channel, + iov[0].iov_base, + extent->offset, + extent->len, + writeExtentComplete, + buffer); } } else if (extent->newId) { // sparse file - ByteBuffer* buffer = ctx->fastfs->allocBuffer(); + ByteBuffer *buffer = ctx->fastfs->allocBuffer(); memset(buffer->p_buffer_, 0, fs_context.extentSize); // TODO chenxu14 consider no direct case - char* data = writeCtx->direct_buff->p_buffer_ + extent->bufOff; + char *data = writeCtx->direct_buff->p_buffer_ + extent->bufOff; buffer->putBytes(extent->extentOff, data, extent->bufLen); buffer->private_data = ctx; - spdk_bdev_write(fs_context.bdev_desc, fs_context.bdev_io_channel, - buffer->p_buffer_, extent->offset, extent->len, writeExtentComplete, buffer); + spdk_bdev_write(fs_context.bdev_desc, + fs_context.bdev_io_channel, + buffer->p_buffer_, + extent->offset, + extent->len, + writeExtentComplete, + buffer); } else { // random write case if (extent->bufLen == fs_context.extentSize) { // whole extent write, no need to copy first - ByteBuffer* buffer = writeCtx->direct_buff; + ByteBuffer *buffer = writeCtx->direct_buff; // TODO chenxu14 consider no direct case - char* addr = buffer->p_buffer_ + extent->bufOff; + char *addr = buffer->p_buffer_ + extent->bufOff; buffer->private_data = ctx; // allocate new extent for write extent->newId = fs_context.allocator->allocate(); @@ -270,37 +283,46 @@ static void writeExtent(struct spdk_bdev_io* bdev_io, bool success, void* cb_arg SPDK_WARNLOG("no free extents.\n"); return writeExtentComplete(nullptr, false, ctx); } - uint64_t bdevOff = - static_cast(extent->newId) << fs_context.extentBits; - spdk_bdev_write(fs_context.bdev_desc, fs_context.bdev_io_channel, - addr, bdevOff, fs_context.extentSize, writeExtentComplete, buffer); + uint64_t bdevOff = static_cast(extent->newId) << fs_context.extentBits; + spdk_bdev_write(fs_context.bdev_desc, + fs_context.bdev_io_channel, + addr, + bdevOff, + fs_context.extentSize, + writeExtentComplete, + buffer); } else { // should do copy on write - ByteBuffer* buffer = ctx->fastfs->allocBuffer(); + ByteBuffer *buffer = ctx->fastfs->allocBuffer(); buffer->private_data = extent; - spdk_bdev_read(fs_context.bdev_desc, fs_context.bdev_io_channel, - buffer->p_buffer_, extent->offset, extent->len, copyOnWrite, buffer); + spdk_bdev_read(fs_context.bdev_desc, + fs_context.bdev_io_channel, + buffer->p_buffer_, + extent->offset, + extent->len, + copyOnWrite, + buffer); } } } -static void writeRange(struct spdk_bdev_io* bdev_io, bool success, void* cb_arg) { - fs_op_context* ctx = nullptr; +static void writeRange(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) { + fs_op_context *ctx = nullptr; if (bdev_io) { // used by paddingZero spdk_bdev_free_io(bdev_io); - ByteBuffer* buffer = reinterpret_cast(cb_arg); - ctx = reinterpret_cast(buffer->private_data); + ByteBuffer *buffer = reinterpret_cast(cb_arg); + ctx = reinterpret_cast(buffer->private_data); ctx->fastfs->freeBuffer(buffer); if (!success) { - return ctx->callback(ctx->cb_args, -2/*PaddingZeroFailed*/); + return ctx->callback(ctx->cb_args, -2 /*PaddingZeroFailed*/); } } else { - ctx = reinterpret_cast(cb_arg); + ctx = reinterpret_cast(cb_arg); } - WriteContext* writeCtx = reinterpret_cast(ctx->private_data); - auto& fs_context = FastFS::fs_context; - auto& file = *(writeCtx->file); + WriteContext *writeCtx = reinterpret_cast(ctx->private_data); + auto &fs_context = FastFS::fs_context; + auto &file = *(writeCtx->file); uint64_t offset = writeCtx->offset; WriteExtent extent; uint64_t extentOffset = offset & fs_context.extentMask; @@ -310,7 +332,7 @@ static void writeRange(struct spdk_bdev_io* bdev_io, bool success, void* cb_arg) extent.extentId = fs_context.allocator->allocate(); if (extent.extentId == UINT32_MAX) { SPDK_WARNLOG("no free extents.\n"); - return ctx->callback(ctx->cb_args, -3/*NoFreeExtents*/); + return ctx->callback(ctx->cb_args, -3 /*NoFreeExtents*/); } extent.newId = extent.extentId; } @@ -319,18 +341,15 @@ static void writeRange(struct spdk_bdev_io* bdev_io, bool success, void* cb_arg) // compute bdev's offset & length extent.bufOff = 0; extent.extentOff = extentOffset; - WriteExtent* target = nullptr; + WriteExtent *target = nullptr; if (writeCtx->append) { // align extentOffset with blockSize extentOffset -= blockOffset; - extent.offset = - (static_cast(extent.extentId) << fs_context.extentBits) - + extentOffset; + extent.offset = (static_cast(extent.extentId) << fs_context.extentBits) + extentOffset; // most cases, one extent is enough if (writeCtx->writingSize >= writeCtx->count) { writeCtx->writingSize = writeCtx->count; - extent.len = (writeCtx->count + blockOffset + fs_context.blockMask) - & ~(fs_context.blockMask); + extent.len = (writeCtx->count + blockOffset + fs_context.blockMask) & ~(fs_context.blockMask); } else { extent.len = fs_context.extentSize - extentOffset; } @@ -338,20 +357,22 @@ static void writeRange(struct spdk_bdev_io* bdev_io, bool success, void* cb_arg) target = &writeCtx->writeExtents.emplace_back(extent); target->op_ctx = ctx; // tail block's data maybe need in append case - if (blockOffset != 0 && - (!file.tail_block || file.tail_block->position() == 0)) { - ByteBuffer* tailBlockBuf = file.getTailBlock(); + if (blockOffset != 0 && (!file.tail_block || file.tail_block->position() == 0)) { + ByteBuffer *tailBlockBuf = file.getTailBlock(); // advance position to block's write cursor tailBlockBuf->position(blockOffset); - spdk_bdev_read(fs_context.bdev_desc, fs_context.bdev_io_channel, - tailBlockBuf->p_buffer_, extent.offset, fs_context.blockSize, - writeExtent, target); + spdk_bdev_read(fs_context.bdev_desc, + fs_context.bdev_io_channel, + tailBlockBuf->p_buffer_, + extent.offset, + fs_context.blockSize, + writeExtent, + target); } else { writeExtent(nullptr, true, target); } } else { // random write - extent.offset = - static_cast(extent.extentId) << fs_context.extentBits; + extent.offset = static_cast(extent.extentId) << fs_context.extentBits; extent.len = fs_context.extentSize; if (writeCtx->writingSize >= writeCtx->count) { writeCtx->writingSize = writeCtx->count; @@ -367,24 +388,22 @@ static void writeRange(struct spdk_bdev_io* bdev_io, bool success, void* cb_arg) while (writeCtx->writingSize < writeCtx->count) { extent.extentOff = 0; extent.newId = 0; - if (!file.inode_->getExtent( - offset + writeCtx->writingSize, extent.index, extent.extentId)) { + if (!file.inode_->getExtent(offset + writeCtx->writingSize, extent.index, extent.extentId)) { extent.extentId = fs_context.allocator->allocate(); if (extent.extentId == UINT32_MAX) { SPDK_WARNLOG("no free extents.\n"); // free allocated extents - for (auto& e : writeCtx->writeExtents) { + for (auto &e : writeCtx->writeExtents) { if (e.newId) { fs_context.allocator->release(e.newId); } } writeCtx->writeExtents.clear(); - return ctx->callback(ctx->cb_args, -4/*NoFreeExtents*/); + return ctx->callback(ctx->cb_args, -4 /*NoFreeExtents*/); } extent.newId = extent.extentId; } - extent.offset = - static_cast(extent.extentId) << fs_context.extentBits; + extent.offset = static_cast(extent.extentId) << fs_context.extentBits; extent.bufOff = writeCtx->writingSize; uint32_t remaining = writeCtx->remainingSize(); if (remaining < fs_context.extentSize) { // the last extent @@ -400,36 +419,40 @@ static void writeRange(struct spdk_bdev_io* bdev_io, bool success, void* cb_arg) extent.bufLen = fs_context.extentSize; writeCtx->writingSize += fs_context.extentSize; } - auto& target = writeCtx->writeExtents.emplace_back(extent); + auto &target = writeCtx->writeExtents.emplace_back(extent); target.op_ctx = ctx; writeExtent(nullptr, true, &target); } } -static void paddingZero(struct spdk_bdev_io* bdev_io, bool success, void* cb_arg) { +static void paddingZero(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) { spdk_bdev_free_io(bdev_io); - ByteBuffer* buffer = reinterpret_cast(cb_arg); - fs_op_context* ctx = reinterpret_cast(buffer->private_data); - WriteContext* writeCtx = reinterpret_cast(ctx->private_data); - auto& fs_context = FastFS::fs_context; + ByteBuffer *buffer = reinterpret_cast(cb_arg); + fs_op_context *ctx = reinterpret_cast(buffer->private_data); + WriteContext *writeCtx = reinterpret_cast(ctx->private_data); + auto &fs_context = FastFS::fs_context; if (!success) { ctx->fastfs->freeBuffer(buffer); - return ctx->callback(ctx->cb_args, -2/*PaddingZeroFailed*/); + return ctx->callback(ctx->cb_args, -2 /*PaddingZeroFailed*/); } - uint64_t extentOffset = - writeCtx->file->inode_->size_ & fs_context.extentMask; + uint64_t extentOffset = writeCtx->file->inode_->size_ & fs_context.extentMask; uint64_t len = fs_context.extentSize - extentOffset; memset(buffer->p_buffer_ + extentOffset, 0, len); - spdk_bdev_write(fs_context.bdev_desc, fs_context.bdev_io_channel, - buffer->p_buffer_, buffer->mark_, fs_context.extentSize, writeRange, buffer); + spdk_bdev_write(fs_context.bdev_desc, + fs_context.bdev_io_channel, + buffer->p_buffer_, + buffer->mark_, + fs_context.extentSize, + writeRange, + buffer); } -void FastFS::write(fs_op_context& ctx) { - WriteContext* writeCtx = reinterpret_cast(ctx.private_data); +void FastFS::write(fs_op_context &ctx) { + WriteContext *writeCtx = reinterpret_cast(ctx.private_data); if (writeCtx->fd > fs_context.maxFiles) { return ctx.callback(ctx.cb_args, -1); } - FastFile& file = (*files)[writeCtx->fd]; + FastFile &file = (*files)[writeCtx->fd]; // determine write offset uint64_t offset = file.pos_; @@ -441,18 +464,22 @@ void FastFS::write(fs_op_context& ctx) { writeCtx->reset(&file, offset); if (offset > file.inode_->size_) { - auto& extents = *(file.inode_->extents_); + auto &extents = *(file.inode_->extents_); if (extents.size() > 0) { // padding zero with last extent uint32_t extentId = extents[extents.size() - 1]; if (extentId != UINT32_MAX) { // truncate case - uint64_t offset = - static_cast(extentId) << fs_context.extentBits; - ByteBuffer* buffer = allocBuffer(); + uint64_t offset = static_cast(extentId) << fs_context.extentBits; + ByteBuffer *buffer = allocBuffer(); buffer->private_data = &ctx; buffer->mark_ = offset; - spdk_bdev_read(fs_context.bdev_desc, fs_context.bdev_io_channel, - buffer->p_buffer_, offset, fs_context.extentSize, paddingZero, buffer); + spdk_bdev_read(fs_context.bdev_desc, + fs_context.bdev_io_channel, + buffer->p_buffer_, + offset, + fs_context.extentSize, + paddingZero, + buffer); return; } } @@ -460,12 +487,12 @@ void FastFS::write(fs_op_context& ctx) { writeRange(nullptr, true, &ctx); } -static void fsyncComplete(void* cb_args, int code) { - fs_op_context* ctx = reinterpret_cast(cb_args); - FSyncContext* syncCtx = reinterpret_cast(ctx->private_data); +static void fsyncComplete(void *cb_args, int code) { + fs_op_context *ctx = reinterpret_cast(cb_args); + FSyncContext *syncCtx = reinterpret_cast(ctx->private_data); // release dirty extents if (syncCtx->dirtyExtents) { - for (auto& [index, extentInfo] : *syncCtx->dirtyExtents) { + for (auto &[index, extentInfo] : *syncCtx->dirtyExtents) { if (extentInfo.first != UINT32_MAX) { FastFS::fs_context.allocator->release(extentInfo.first); } @@ -476,14 +503,14 @@ static void fsyncComplete(void* cb_args, int code) { ctx->callback(ctx->cb_args, code); } -void FastFS::fsync(fs_op_context& ctx) { - FSyncContext* syncCtx = reinterpret_cast(ctx.private_data); +void FastFS::fsync(fs_op_context &ctx) { + FSyncContext *syncCtx = reinterpret_cast(ctx.private_data); if (syncCtx->fd > fs_context.maxFiles) { return ctx.callback(ctx.cb_args, -1); } int32_t size = 14; /*ino(4) + size(8) + extentsCnt(2)*/ - FastFile& file = (*files)[syncCtx->fd]; + FastFile &file = (*files)[syncCtx->fd]; syncCtx->file = &file; syncCtx->dirtyExtents = nullptr; if (file.inode_->dirtyExtents && file.inode_->dirtyExtents->size() > 0) { @@ -496,7 +523,7 @@ void FastFS::fsync(fs_op_context& ctx) { return fsyncComplete(&ctx, 0); } - EditOp* editOp = journal->allocEditOp(); + EditOp *editOp = journal->allocEditOp(); editOp->opctx = syncCtx; editOp->type = 4; editOp->size = size; diff --git a/core/Serialization.h b/core/Serialization.h index 2535cc2..dc9d83b 100644 --- a/core/Serialization.h +++ b/core/Serialization.h @@ -6,9 +6,10 @@ #ifndef FASTFS_SERDE_H_ #define FASTFS_SERDE_H_ -#include "ByteBuffer.h" #include +#include "ByteBuffer.h" + class FastInode; enum FileType : int { @@ -17,10 +18,10 @@ enum FileType : int { FASTFS_SYMBAL_LINK = 2 }; -typedef void (*op_cb)(void* cb_args, int code); +typedef void (*op_cb)(void *cb_args, int code); class SuperBlock { - public: + public: uint64_t ckptInodesLoc; uint64_t ckptDentryLoc; uint64_t lastTxid; @@ -32,7 +33,7 @@ class SuperBlock { uint32_t flags; uint32_t version; - void serialize(ByteBuffer* buf) { + void serialize(ByteBuffer *buf) { buf->putBytes("FAsT", 4); buf->write(ckptInodesLoc); buf->write(ckptDentryLoc); @@ -46,8 +47,8 @@ class SuperBlock { buf->write(version); } - bool deserialize(ByteBuffer* buf) { - char magicWord[4] {0}; + bool deserialize(ByteBuffer *buf) { + char magicWord[4]{0}; buf->getBytes(magicWord, 4); if (memcmp(magicWord, "FAsT", 4) != 0) { return false; @@ -67,7 +68,7 @@ class SuperBlock { }; class INodeFile { - public: + public: static constexpr int32_t kFixSize = 22; uint32_t ino; uint32_t parent_id; @@ -76,7 +77,7 @@ class INodeFile { std::string_view name; FileType type; - void serialize(ByteBuffer* buf) { + void serialize(ByteBuffer *buf) { buf->write(ino); buf->write(parent_id); buf->write(mode); @@ -87,7 +88,7 @@ class INodeFile { buf->putByte((type == FASTFS_DIR ? 1 : 0)); } - bool deserialize(ByteBuffer* buf) { + bool deserialize(ByteBuffer *buf) { bool res = true; buf->read(ino); buf->read(parent_id); @@ -105,7 +106,7 @@ class INodeFile { }; class CreateContext { - public: + public: uint32_t parentId; std::string_view name; uint32_t ino; // used by journal replay case @@ -115,11 +116,11 @@ class CreateContext { std::string_view path; // should start with '/' uint32_t pos; op_cb callback; - void* args; + void *args; CreateContext() : ino(UINT32_MAX) {} - void serialize(ByteBuffer* buf) { // TODO chenxu14 add magic header + void serialize(ByteBuffer *buf) { // TODO chenxu14 add magic header buf->write(parentId); uint8_t nameLen = name.size(); buf->write(nameLen); @@ -129,7 +130,7 @@ class CreateContext { buf->putByte((type == FASTFS_DIR ? 1 : 0)); } - void deserialize(ByteBuffer* buf) { + void deserialize(ByteBuffer *buf) { buf->read(parentId); uint8_t nameLen = 0; buf->read(nameLen); @@ -144,12 +145,12 @@ class CreateContext { }; class DeleteContext { - public: + public: uint32_t parentId; std::string name; bool recursive = false; - void serialize(ByteBuffer* buf) { + void serialize(ByteBuffer *buf) { buf->write(parentId); uint8_t nameLen = name.size(); buf->write(nameLen); @@ -158,7 +159,7 @@ class DeleteContext { buf->putByte(val); } - void deserialize(ByteBuffer* buf) { + void deserialize(ByteBuffer *buf) { buf->read(parentId); uint8_t nameLen = 0; buf->read(nameLen); @@ -171,29 +172,29 @@ class DeleteContext { }; class TruncateContext { - public: + public: uint32_t ino; uint64_t size; - void serialize(ByteBuffer* buf) { + void serialize(ByteBuffer *buf) { buf->write(ino); buf->write(size); } - void deserialize(ByteBuffer* buf) { + void deserialize(ByteBuffer *buf) { buf->read(ino); buf->read(size); } }; class RenameContext { - public: + public: uint32_t olddir; uint32_t newdir; std::string oldname; std::string newname; - void serialize(ByteBuffer* buf) { + void serialize(ByteBuffer *buf) { buf->write(olddir); buf->write(newdir); uint8_t nameLen = oldname.size(); @@ -204,7 +205,7 @@ class RenameContext { buf->putBytes(newname.data(), nameLen); } - void deserialize(ByteBuffer* buf) { + void deserialize(ByteBuffer *buf) { buf->read(olddir); buf->read(newdir); uint8_t nameLen = 0; diff --git a/core/xxhash.h b/core/xxhash.h index 78fc2e8..cc7716c 100644 --- a/core/xxhash.h +++ b/core/xxhash.h @@ -241,7 +241,7 @@ * xxHash prototypes and implementation */ -#if defined (__cplusplus) +#if defined(__cplusplus) extern "C" { #endif @@ -265,7 +265,7 @@ extern "C" { * #include "xxhash.h" * @endcode */ -# define XXH_STATIC_LINKING_ONLY +#define XXH_STATIC_LINKING_ONLY /* Do not undef XXH_STATIC_LINKING_ONLY for Doxygen */ /*! @@ -278,7 +278,7 @@ extern "C" { * #include "xxhash.h" * @endcode */ -# define XXH_IMPLEMENTATION +#define XXH_IMPLEMENTATION /* Do not undef XXH_IMPLEMENTATION for Doxygen */ /*! @@ -299,13 +299,13 @@ extern "C" { * @endcode * Do not compile and link xxhash.o as a separate object, as it is not useful. */ -# define XXH_INLINE_ALL -# undef XXH_INLINE_ALL +#define XXH_INLINE_ALL +#undef XXH_INLINE_ALL /*! * @brief Exposes the implementation without marking functions as inline. */ -# define XXH_PRIVATE_API -# undef XXH_PRIVATE_API +#define XXH_PRIVATE_API +#undef XXH_PRIVATE_API /*! * @brief Emulate a namespace by transparently prefixing all symbols. * @@ -319,122 +319,121 @@ extern "C" { * includes `xxhash.h`: Regular symbol names will be automatically translated * by this header. */ -# define XXH_NAMESPACE /* YOUR NAME HERE */ -# undef XXH_NAMESPACE -#endif - -#if (defined(XXH_INLINE_ALL) || defined(XXH_PRIVATE_API)) \ - && !defined(XXH_INLINE_ALL_31684351384) - /* this section should be traversed only once */ -# define XXH_INLINE_ALL_31684351384 - /* give access to the advanced API, required to compile implementations */ -# undef XXH_STATIC_LINKING_ONLY /* avoid macro redef */ -# define XXH_STATIC_LINKING_ONLY - /* make all functions private */ -# undef XXH_PUBLIC_API -# if defined(__GNUC__) -# define XXH_PUBLIC_API static __inline __attribute__((__unused__)) -# elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) -# define XXH_PUBLIC_API static inline -# elif defined(_MSC_VER) -# define XXH_PUBLIC_API static __inline -# else - /* note: this version may generate warnings for unused static functions */ -# define XXH_PUBLIC_API static -# endif - - /* - * This part deals with the special case where a unit wants to inline xxHash, - * but "xxhash.h" has previously been included without XXH_INLINE_ALL, - * such as part of some previously included *.h header file. - * Without further action, the new include would just be ignored, - * and functions would effectively _not_ be inlined (silent failure). - * The following macros solve this situation by prefixing all inlined names, - * avoiding naming collision with previous inclusions. - */ - /* Before that, we unconditionally #undef all symbols, - * in case they were already defined with XXH_NAMESPACE. - * They will then be redefined for XXH_INLINE_ALL - */ -# undef XXH_versionNumber - /* XXH32 */ -# undef XXH32 -# undef XXH32_createState -# undef XXH32_freeState -# undef XXH32_reset -# undef XXH32_update -# undef XXH32_digest -# undef XXH32_copyState -# undef XXH32_canonicalFromHash -# undef XXH32_hashFromCanonical - /* XXH64 */ -# undef XXH64 -# undef XXH64_createState -# undef XXH64_freeState -# undef XXH64_reset -# undef XXH64_update -# undef XXH64_digest -# undef XXH64_copyState -# undef XXH64_canonicalFromHash -# undef XXH64_hashFromCanonical - /* XXH3_64bits */ -# undef XXH3_64bits -# undef XXH3_64bits_withSecret -# undef XXH3_64bits_withSeed -# undef XXH3_64bits_withSecretandSeed -# undef XXH3_createState -# undef XXH3_freeState -# undef XXH3_copyState -# undef XXH3_64bits_reset -# undef XXH3_64bits_reset_withSeed -# undef XXH3_64bits_reset_withSecret -# undef XXH3_64bits_update -# undef XXH3_64bits_digest -# undef XXH3_generateSecret - /* XXH3_128bits */ -# undef XXH128 -# undef XXH3_128bits -# undef XXH3_128bits_withSeed -# undef XXH3_128bits_withSecret -# undef XXH3_128bits_reset -# undef XXH3_128bits_reset_withSeed -# undef XXH3_128bits_reset_withSecret -# undef XXH3_128bits_reset_withSecretandSeed -# undef XXH3_128bits_update -# undef XXH3_128bits_digest -# undef XXH128_isEqual -# undef XXH128_cmp -# undef XXH128_canonicalFromHash -# undef XXH128_hashFromCanonical - /* Finally, free the namespace itself */ -# undef XXH_NAMESPACE - - /* employ the namespace for XXH_INLINE_ALL */ -# define XXH_NAMESPACE XXH_INLINE_ - /* - * Some identifiers (enums, type names) are not symbols, - * but they must nonetheless be renamed to avoid redeclaration. - * Alternative solution: do not redeclare them. - * However, this requires some #ifdefs, and has a more dispersed impact. - * Meanwhile, renaming can be achieved in a single place. - */ -# define XXH_IPREF(Id) XXH_NAMESPACE ## Id -# define XXH_OK XXH_IPREF(XXH_OK) -# define XXH_ERROR XXH_IPREF(XXH_ERROR) -# define XXH_errorcode XXH_IPREF(XXH_errorcode) -# define XXH32_canonical_t XXH_IPREF(XXH32_canonical_t) -# define XXH64_canonical_t XXH_IPREF(XXH64_canonical_t) -# define XXH128_canonical_t XXH_IPREF(XXH128_canonical_t) -# define XXH32_state_s XXH_IPREF(XXH32_state_s) -# define XXH32_state_t XXH_IPREF(XXH32_state_t) -# define XXH64_state_s XXH_IPREF(XXH64_state_s) -# define XXH64_state_t XXH_IPREF(XXH64_state_t) -# define XXH3_state_s XXH_IPREF(XXH3_state_s) -# define XXH3_state_t XXH_IPREF(XXH3_state_t) -# define XXH128_hash_t XXH_IPREF(XXH128_hash_t) - /* Ensure the header is parsed again, even if it was previously included */ -# undef XXHASH_H_5627135585666179 -# undef XXHASH_H_STATIC_13879238742 +#define XXH_NAMESPACE /* YOUR NAME HERE */ +#undef XXH_NAMESPACE +#endif + +#if (defined(XXH_INLINE_ALL) || defined(XXH_PRIVATE_API)) && !defined(XXH_INLINE_ALL_31684351384) +/* this section should be traversed only once */ +#define XXH_INLINE_ALL_31684351384 +/* give access to the advanced API, required to compile implementations */ +#undef XXH_STATIC_LINKING_ONLY /* avoid macro redef */ +#define XXH_STATIC_LINKING_ONLY +/* make all functions private */ +#undef XXH_PUBLIC_API +#if defined(__GNUC__) +#define XXH_PUBLIC_API static __inline __attribute__((__unused__)) +#elif defined(__cplusplus) || (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) +#define XXH_PUBLIC_API static inline +#elif defined(_MSC_VER) +#define XXH_PUBLIC_API static __inline +#else +/* note: this version may generate warnings for unused static functions */ +#define XXH_PUBLIC_API static +#endif + +/* + * This part deals with the special case where a unit wants to inline xxHash, + * but "xxhash.h" has previously been included without XXH_INLINE_ALL, + * such as part of some previously included *.h header file. + * Without further action, the new include would just be ignored, + * and functions would effectively _not_ be inlined (silent failure). + * The following macros solve this situation by prefixing all inlined names, + * avoiding naming collision with previous inclusions. + */ +/* Before that, we unconditionally #undef all symbols, + * in case they were already defined with XXH_NAMESPACE. + * They will then be redefined for XXH_INLINE_ALL + */ +#undef XXH_versionNumber +/* XXH32 */ +#undef XXH32 +#undef XXH32_createState +#undef XXH32_freeState +#undef XXH32_reset +#undef XXH32_update +#undef XXH32_digest +#undef XXH32_copyState +#undef XXH32_canonicalFromHash +#undef XXH32_hashFromCanonical +/* XXH64 */ +#undef XXH64 +#undef XXH64_createState +#undef XXH64_freeState +#undef XXH64_reset +#undef XXH64_update +#undef XXH64_digest +#undef XXH64_copyState +#undef XXH64_canonicalFromHash +#undef XXH64_hashFromCanonical +/* XXH3_64bits */ +#undef XXH3_64bits +#undef XXH3_64bits_withSecret +#undef XXH3_64bits_withSeed +#undef XXH3_64bits_withSecretandSeed +#undef XXH3_createState +#undef XXH3_freeState +#undef XXH3_copyState +#undef XXH3_64bits_reset +#undef XXH3_64bits_reset_withSeed +#undef XXH3_64bits_reset_withSecret +#undef XXH3_64bits_update +#undef XXH3_64bits_digest +#undef XXH3_generateSecret +/* XXH3_128bits */ +#undef XXH128 +#undef XXH3_128bits +#undef XXH3_128bits_withSeed +#undef XXH3_128bits_withSecret +#undef XXH3_128bits_reset +#undef XXH3_128bits_reset_withSeed +#undef XXH3_128bits_reset_withSecret +#undef XXH3_128bits_reset_withSecretandSeed +#undef XXH3_128bits_update +#undef XXH3_128bits_digest +#undef XXH128_isEqual +#undef XXH128_cmp +#undef XXH128_canonicalFromHash +#undef XXH128_hashFromCanonical +/* Finally, free the namespace itself */ +#undef XXH_NAMESPACE + +/* employ the namespace for XXH_INLINE_ALL */ +#define XXH_NAMESPACE XXH_INLINE_ +/* + * Some identifiers (enums, type names) are not symbols, + * but they must nonetheless be renamed to avoid redeclaration. + * Alternative solution: do not redeclare them. + * However, this requires some #ifdefs, and has a more dispersed impact. + * Meanwhile, renaming can be achieved in a single place. + */ +#define XXH_IPREF(Id) XXH_NAMESPACE##Id +#define XXH_OK XXH_IPREF(XXH_OK) +#define XXH_ERROR XXH_IPREF(XXH_ERROR) +#define XXH_errorcode XXH_IPREF(XXH_errorcode) +#define XXH32_canonical_t XXH_IPREF(XXH32_canonical_t) +#define XXH64_canonical_t XXH_IPREF(XXH64_canonical_t) +#define XXH128_canonical_t XXH_IPREF(XXH128_canonical_t) +#define XXH32_state_s XXH_IPREF(XXH32_state_s) +#define XXH32_state_t XXH_IPREF(XXH32_state_t) +#define XXH64_state_s XXH_IPREF(XXH64_state_s) +#define XXH64_state_t XXH_IPREF(XXH64_state_t) +#define XXH3_state_s XXH_IPREF(XXH3_state_s) +#define XXH3_state_t XXH_IPREF(XXH3_state_t) +#define XXH128_hash_t XXH_IPREF(XXH128_hash_t) +/* Ensure the header is parsed again, even if it was previously included */ +#undef XXHASH_H_5627135585666179 +#undef XXHASH_H_STATIC_13879238742 #endif /* XXH_INLINE_ALL || XXH_PRIVATE_API */ /* **************************************************************** @@ -445,111 +444,110 @@ extern "C" { /*! @brief Marks a global symbol. */ #if !defined(XXH_INLINE_ALL) && !defined(XXH_PRIVATE_API) -# if defined(_WIN32) && defined(_MSC_VER) && (defined(XXH_IMPORT) || defined(XXH_EXPORT)) -# ifdef XXH_EXPORT -# define XXH_PUBLIC_API __declspec(dllexport) -# elif XXH_IMPORT -# define XXH_PUBLIC_API __declspec(dllimport) -# endif -# else -# define XXH_PUBLIC_API /* do nothing */ -# endif +#if defined(_WIN32) && defined(_MSC_VER) && (defined(XXH_IMPORT) || defined(XXH_EXPORT)) +#ifdef XXH_EXPORT +#define XXH_PUBLIC_API __declspec(dllexport) +#elif XXH_IMPORT +#define XXH_PUBLIC_API __declspec(dllimport) +#endif +#else +#define XXH_PUBLIC_API /* do nothing */ +#endif #endif #ifdef XXH_NAMESPACE -# define XXH_CAT(A,B) A##B -# define XXH_NAME2(A,B) XXH_CAT(A,B) -# define XXH_versionNumber XXH_NAME2(XXH_NAMESPACE, XXH_versionNumber) +#define XXH_CAT(A, B) A##B +#define XXH_NAME2(A, B) XXH_CAT(A, B) +#define XXH_versionNumber XXH_NAME2(XXH_NAMESPACE, XXH_versionNumber) /* XXH32 */ -# define XXH32 XXH_NAME2(XXH_NAMESPACE, XXH32) -# define XXH32_createState XXH_NAME2(XXH_NAMESPACE, XXH32_createState) -# define XXH32_freeState XXH_NAME2(XXH_NAMESPACE, XXH32_freeState) -# define XXH32_reset XXH_NAME2(XXH_NAMESPACE, XXH32_reset) -# define XXH32_update XXH_NAME2(XXH_NAMESPACE, XXH32_update) -# define XXH32_digest XXH_NAME2(XXH_NAMESPACE, XXH32_digest) -# define XXH32_copyState XXH_NAME2(XXH_NAMESPACE, XXH32_copyState) -# define XXH32_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH32_canonicalFromHash) -# define XXH32_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH32_hashFromCanonical) +#define XXH32 XXH_NAME2(XXH_NAMESPACE, XXH32) +#define XXH32_createState XXH_NAME2(XXH_NAMESPACE, XXH32_createState) +#define XXH32_freeState XXH_NAME2(XXH_NAMESPACE, XXH32_freeState) +#define XXH32_reset XXH_NAME2(XXH_NAMESPACE, XXH32_reset) +#define XXH32_update XXH_NAME2(XXH_NAMESPACE, XXH32_update) +#define XXH32_digest XXH_NAME2(XXH_NAMESPACE, XXH32_digest) +#define XXH32_copyState XXH_NAME2(XXH_NAMESPACE, XXH32_copyState) +#define XXH32_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH32_canonicalFromHash) +#define XXH32_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH32_hashFromCanonical) /* XXH64 */ -# define XXH64 XXH_NAME2(XXH_NAMESPACE, XXH64) -# define XXH64_createState XXH_NAME2(XXH_NAMESPACE, XXH64_createState) -# define XXH64_freeState XXH_NAME2(XXH_NAMESPACE, XXH64_freeState) -# define XXH64_reset XXH_NAME2(XXH_NAMESPACE, XXH64_reset) -# define XXH64_update XXH_NAME2(XXH_NAMESPACE, XXH64_update) -# define XXH64_digest XXH_NAME2(XXH_NAMESPACE, XXH64_digest) -# define XXH64_copyState XXH_NAME2(XXH_NAMESPACE, XXH64_copyState) -# define XXH64_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH64_canonicalFromHash) -# define XXH64_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH64_hashFromCanonical) +#define XXH64 XXH_NAME2(XXH_NAMESPACE, XXH64) +#define XXH64_createState XXH_NAME2(XXH_NAMESPACE, XXH64_createState) +#define XXH64_freeState XXH_NAME2(XXH_NAMESPACE, XXH64_freeState) +#define XXH64_reset XXH_NAME2(XXH_NAMESPACE, XXH64_reset) +#define XXH64_update XXH_NAME2(XXH_NAMESPACE, XXH64_update) +#define XXH64_digest XXH_NAME2(XXH_NAMESPACE, XXH64_digest) +#define XXH64_copyState XXH_NAME2(XXH_NAMESPACE, XXH64_copyState) +#define XXH64_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH64_canonicalFromHash) +#define XXH64_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH64_hashFromCanonical) /* XXH3_64bits */ -# define XXH3_64bits XXH_NAME2(XXH_NAMESPACE, XXH3_64bits) -# define XXH3_64bits_withSecret XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_withSecret) -# define XXH3_64bits_withSeed XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_withSeed) -# define XXH3_64bits_withSecretandSeed XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_withSecretandSeed) -# define XXH3_createState XXH_NAME2(XXH_NAMESPACE, XXH3_createState) -# define XXH3_freeState XXH_NAME2(XXH_NAMESPACE, XXH3_freeState) -# define XXH3_copyState XXH_NAME2(XXH_NAMESPACE, XXH3_copyState) -# define XXH3_64bits_reset XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_reset) -# define XXH3_64bits_reset_withSeed XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_reset_withSeed) -# define XXH3_64bits_reset_withSecret XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_reset_withSecret) -# define XXH3_64bits_reset_withSecretandSeed XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_reset_withSecretandSeed) -# define XXH3_64bits_update XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_update) -# define XXH3_64bits_digest XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_digest) -# define XXH3_generateSecret XXH_NAME2(XXH_NAMESPACE, XXH3_generateSecret) -# define XXH3_generateSecret_fromSeed XXH_NAME2(XXH_NAMESPACE, XXH3_generateSecret_fromSeed) +#define XXH3_64bits XXH_NAME2(XXH_NAMESPACE, XXH3_64bits) +#define XXH3_64bits_withSecret XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_withSecret) +#define XXH3_64bits_withSeed XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_withSeed) +#define XXH3_64bits_withSecretandSeed XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_withSecretandSeed) +#define XXH3_createState XXH_NAME2(XXH_NAMESPACE, XXH3_createState) +#define XXH3_freeState XXH_NAME2(XXH_NAMESPACE, XXH3_freeState) +#define XXH3_copyState XXH_NAME2(XXH_NAMESPACE, XXH3_copyState) +#define XXH3_64bits_reset XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_reset) +#define XXH3_64bits_reset_withSeed XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_reset_withSeed) +#define XXH3_64bits_reset_withSecret XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_reset_withSecret) +#define XXH3_64bits_reset_withSecretandSeed XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_reset_withSecretandSeed) +#define XXH3_64bits_update XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_update) +#define XXH3_64bits_digest XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_digest) +#define XXH3_generateSecret XXH_NAME2(XXH_NAMESPACE, XXH3_generateSecret) +#define XXH3_generateSecret_fromSeed XXH_NAME2(XXH_NAMESPACE, XXH3_generateSecret_fromSeed) /* XXH3_128bits */ -# define XXH128 XXH_NAME2(XXH_NAMESPACE, XXH128) -# define XXH3_128bits XXH_NAME2(XXH_NAMESPACE, XXH3_128bits) -# define XXH3_128bits_withSeed XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_withSeed) -# define XXH3_128bits_withSecret XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_withSecret) -# define XXH3_128bits_withSecretandSeed XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_withSecretandSeed) -# define XXH3_128bits_reset XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_reset) -# define XXH3_128bits_reset_withSeed XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_reset_withSeed) -# define XXH3_128bits_reset_withSecret XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_reset_withSecret) -# define XXH3_128bits_reset_withSecretandSeed XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_reset_withSecretandSeed) -# define XXH3_128bits_update XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_update) -# define XXH3_128bits_digest XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_digest) -# define XXH128_isEqual XXH_NAME2(XXH_NAMESPACE, XXH128_isEqual) -# define XXH128_cmp XXH_NAME2(XXH_NAMESPACE, XXH128_cmp) -# define XXH128_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH128_canonicalFromHash) -# define XXH128_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH128_hashFromCanonical) +#define XXH128 XXH_NAME2(XXH_NAMESPACE, XXH128) +#define XXH3_128bits XXH_NAME2(XXH_NAMESPACE, XXH3_128bits) +#define XXH3_128bits_withSeed XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_withSeed) +#define XXH3_128bits_withSecret XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_withSecret) +#define XXH3_128bits_withSecretandSeed XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_withSecretandSeed) +#define XXH3_128bits_reset XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_reset) +#define XXH3_128bits_reset_withSeed XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_reset_withSeed) +#define XXH3_128bits_reset_withSecret XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_reset_withSecret) +#define XXH3_128bits_reset_withSecretandSeed XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_reset_withSecretandSeed) +#define XXH3_128bits_update XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_update) +#define XXH3_128bits_digest XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_digest) +#define XXH128_isEqual XXH_NAME2(XXH_NAMESPACE, XXH128_isEqual) +#define XXH128_cmp XXH_NAME2(XXH_NAMESPACE, XXH128_cmp) +#define XXH128_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH128_canonicalFromHash) +#define XXH128_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH128_hashFromCanonical) #endif - /* ************************************* -* Compiler specifics -***************************************/ + * Compiler specifics + ***************************************/ /* specific declaration modes for Windows */ #if !defined(XXH_INLINE_ALL) && !defined(XXH_PRIVATE_API) -# if defined(_WIN32) && defined(_MSC_VER) && (defined(XXH_IMPORT) || defined(XXH_EXPORT)) -# ifdef XXH_EXPORT -# define XXH_PUBLIC_API __declspec(dllexport) -# elif XXH_IMPORT -# define XXH_PUBLIC_API __declspec(dllimport) -# endif -# else -# define XXH_PUBLIC_API /* do nothing */ -# endif -#endif - -#if defined (__GNUC__) -# define XXH_CONSTF __attribute__((__const__)) -# define XXH_PUREF __attribute__((__pure__)) -# define XXH_MALLOCF __attribute__((__malloc__)) +#if defined(_WIN32) && defined(_MSC_VER) && (defined(XXH_IMPORT) || defined(XXH_EXPORT)) +#ifdef XXH_EXPORT +#define XXH_PUBLIC_API __declspec(dllexport) +#elif XXH_IMPORT +#define XXH_PUBLIC_API __declspec(dllimport) +#endif +#else +#define XXH_PUBLIC_API /* do nothing */ +#endif +#endif + +#if defined(__GNUC__) +#define XXH_CONSTF __attribute__((__const__)) +#define XXH_PUREF __attribute__((__pure__)) +#define XXH_MALLOCF __attribute__((__malloc__)) #else -# define XXH_CONSTF /* disable */ -# define XXH_PUREF -# define XXH_MALLOCF +#define XXH_CONSTF /* disable */ +#define XXH_PUREF +#define XXH_MALLOCF #endif /* ************************************* -* Version -***************************************/ -#define XXH_VERSION_MAJOR 0 -#define XXH_VERSION_MINOR 8 -#define XXH_VERSION_RELEASE 3 + * Version + ***************************************/ +#define XXH_VERSION_MAJOR 0 +#define XXH_VERSION_MINOR 8 +#define XXH_VERSION_RELEASE 3 /*! @brief Version number, encoded as two digits each */ -#define XXH_VERSION_NUMBER (XXH_VERSION_MAJOR *100*100 + XXH_VERSION_MINOR *100 + XXH_VERSION_RELEASE) +#define XXH_VERSION_NUMBER (XXH_VERSION_MAJOR * 100 * 100 + XXH_VERSION_MINOR * 100 + XXH_VERSION_RELEASE) /*! * @brief Obtains the xxHash version. @@ -559,25 +557,23 @@ extern "C" { * * @return @ref XXH_VERSION_NUMBER of the invoked library. */ -XXH_PUBLIC_API XXH_CONSTF unsigned XXH_versionNumber (void); - +XXH_PUBLIC_API XXH_CONSTF unsigned XXH_versionNumber(void); /* **************************** -* Common basic types -******************************/ -#include /* size_t */ + * Common basic types + ******************************/ +#include /* size_t */ /*! * @brief Exit code for the streaming API. */ typedef enum { - XXH_OK = 0, /*!< OK */ - XXH_ERROR /*!< Error */ + XXH_OK = 0, /*!< OK */ + XXH_ERROR /*!< Error */ } XXH_errorcode; - /*-********************************************************************** -* 32-bit hash -************************************************************************/ + * 32-bit hash + ************************************************************************/ #if defined(XXH_DOXYGEN) /* Don't show include */ /*! * @brief An unsigned 32-bit integer. @@ -586,25 +582,24 @@ typedef enum { */ typedef uint32_t XXH32_hash_t; -#elif !defined (__VMS) \ - && (defined (__cplusplus) \ - || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) ) -# ifdef _AIX -# include -# else -# include -# endif - typedef uint32_t XXH32_hash_t; +#elif !defined(__VMS) && \ + (defined(__cplusplus) || (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)) +#ifdef _AIX +#include +#else +#include +#endif +typedef uint32_t XXH32_hash_t; #else -# include -# if UINT_MAX == 0xFFFFFFFFUL - typedef unsigned int XXH32_hash_t; -# elif ULONG_MAX == 0xFFFFFFFFUL - typedef unsigned long XXH32_hash_t; -# else -# error "unsupported platform: need a 32-bit type" -# endif +#include +#if UINT_MAX == 0xFFFFFFFFUL +typedef unsigned int XXH32_hash_t; +#elif ULONG_MAX == 0xFFFFFFFFUL +typedef unsigned long XXH32_hash_t; +#else +#error "unsupported platform: need a 32-bit type" +#endif #endif /*! @@ -640,7 +635,7 @@ typedef uint32_t XXH32_hash_t; * * @see @ref single_shot_example "Single Shot Example" for an example. */ -XXH_PUBLIC_API XXH_PUREF XXH32_hash_t XXH32 (const void* input, size_t length, XXH32_hash_t seed); +XXH_PUBLIC_API XXH_PUREF XXH32_hash_t XXH32(const void *input, size_t length, XXH32_hash_t seed); #ifndef XXH_NO_STREAM /*! @@ -662,7 +657,7 @@ typedef struct XXH32_state_s XXH32_state_t; * * @see @ref streaming_example "Streaming Example" */ -XXH_PUBLIC_API XXH_MALLOCF XXH32_state_t* XXH32_createState(void); +XXH_PUBLIC_API XXH_MALLOCF XXH32_state_t *XXH32_createState(void); /*! * @brief Frees an @ref XXH32_state_t. * @@ -675,7 +670,7 @@ XXH_PUBLIC_API XXH_MALLOCF XXH32_state_t* XXH32_createState(void); * @see @ref streaming_example "Streaming Example" * */ -XXH_PUBLIC_API XXH_errorcode XXH32_freeState(XXH32_state_t* statePtr); +XXH_PUBLIC_API XXH_errorcode XXH32_freeState(XXH32_state_t *statePtr); /*! * @brief Copies one @ref XXH32_state_t to another. * @@ -684,7 +679,7 @@ XXH_PUBLIC_API XXH_errorcode XXH32_freeState(XXH32_state_t* statePtr); * @pre * @p dst_state and @p src_state must not be `NULL` and must not overlap. */ -XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t* dst_state, const XXH32_state_t* src_state); +XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t *dst_state, const XXH32_state_t *src_state); /*! * @brief Resets an @ref XXH32_state_t to begin a new hash. @@ -702,7 +697,7 @@ XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t* dst_state, const XXH32_state_ * * @see @ref streaming_example "Streaming Example" */ -XXH_PUBLIC_API XXH_errorcode XXH32_reset (XXH32_state_t* statePtr, XXH32_hash_t seed); +XXH_PUBLIC_API XXH_errorcode XXH32_reset(XXH32_state_t *statePtr, XXH32_hash_t seed); /*! * @brief Consumes a block of @p input to an @ref XXH32_state_t. @@ -725,7 +720,7 @@ XXH_PUBLIC_API XXH_errorcode XXH32_reset (XXH32_state_t* statePtr, XXH32_hash_t * * @see @ref streaming_example "Streaming Example" */ -XXH_PUBLIC_API XXH_errorcode XXH32_update (XXH32_state_t* statePtr, const void* input, size_t length); +XXH_PUBLIC_API XXH_errorcode XXH32_update(XXH32_state_t *statePtr, const void *input, size_t length); /*! * @brief Returns the calculated hash value from an @ref XXH32_state_t. @@ -743,7 +738,7 @@ XXH_PUBLIC_API XXH_errorcode XXH32_update (XXH32_state_t* statePtr, const void* * * @see @ref streaming_example "Streaming Example" */ -XXH_PUBLIC_API XXH_PUREF XXH32_hash_t XXH32_digest (const XXH32_state_t* statePtr); +XXH_PUBLIC_API XXH_PUREF XXH32_hash_t XXH32_digest(const XXH32_state_t *statePtr); #endif /* !XXH_NO_STREAM */ /******* Canonical representation *******/ @@ -752,7 +747,7 @@ XXH_PUBLIC_API XXH_PUREF XXH32_hash_t XXH32_digest (const XXH32_state_t* statePt * @brief Canonical (big endian) representation of @ref XXH32_hash_t. */ typedef struct { - unsigned char digest[4]; /*!< Hash bytes, big endian */ + unsigned char digest[4]; /*!< Hash bytes, big endian */ } XXH32_canonical_t; /*! @@ -766,7 +761,7 @@ typedef struct { * * @see @ref canonical_representation_example "Canonical Representation Example" */ -XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t hash); +XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t *dst, XXH32_hash_t hash); /*! * @brief Converts an @ref XXH32_canonical_t to a native @ref XXH32_hash_t. @@ -780,14 +775,13 @@ XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t * * @see @ref canonical_representation_example "Canonical Representation Example" */ -XXH_PUBLIC_API XXH_PUREF XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src); - +XXH_PUBLIC_API XXH_PUREF XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t *src); /*! @cond Doxygen ignores this part */ #ifdef __has_attribute -# define XXH_HAS_ATTRIBUTE(x) __has_attribute(x) +#define XXH_HAS_ATTRIBUTE(x) __has_attribute(x) #else -# define XXH_HAS_ATTRIBUTE(x) 0 +#define XXH_HAS_ATTRIBUTE(x) 0 #endif /*! @endcond */ @@ -803,17 +797,17 @@ XXH_PUBLIC_API XXH_PUREF XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canoni /*! @cond Doxygen ignores this part */ /* C-language Attributes are added in C23. */ #if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= XXH_C23_VN) && defined(__has_c_attribute) -# define XXH_HAS_C_ATTRIBUTE(x) __has_c_attribute(x) +#define XXH_HAS_C_ATTRIBUTE(x) __has_c_attribute(x) #else -# define XXH_HAS_C_ATTRIBUTE(x) 0 +#define XXH_HAS_C_ATTRIBUTE(x) 0 #endif /*! @endcond */ /*! @cond Doxygen ignores this part */ #if defined(__cplusplus) && defined(__has_cpp_attribute) -# define XXH_HAS_CPP_ATTRIBUTE(x) __has_cpp_attribute(x) +#define XXH_HAS_CPP_ATTRIBUTE(x) __has_cpp_attribute(x) #else -# define XXH_HAS_CPP_ATTRIBUTE(x) 0 +#define XXH_HAS_CPP_ATTRIBUTE(x) 0 #endif /*! @endcond */ @@ -825,11 +819,11 @@ XXH_PUBLIC_API XXH_PUREF XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canoni * C23 : https://en.cppreference.com/w/c/language/attributes/fallthrough */ #if XXH_HAS_C_ATTRIBUTE(fallthrough) || XXH_HAS_CPP_ATTRIBUTE(fallthrough) -# define XXH_FALLTHROUGH [[fallthrough]] +#define XXH_FALLTHROUGH [[fallthrough]] #elif XXH_HAS_ATTRIBUTE(__fallthrough__) -# define XXH_FALLTHROUGH __attribute__ ((__fallthrough__)) +#define XXH_FALLTHROUGH __attribute__((__fallthrough__)) #else -# define XXH_FALLTHROUGH /* fallthrough */ +#define XXH_FALLTHROUGH /* fallthrough */ #endif /*! @endcond */ @@ -840,13 +834,12 @@ XXH_PUBLIC_API XXH_PUREF XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canoni * As of writing this, only supported by clang. */ #if XXH_HAS_ATTRIBUTE(noescape) -# define XXH_NOESCAPE __attribute__((__noescape__)) +#define XXH_NOESCAPE __attribute__((__noescape__)) #else -# define XXH_NOESCAPE +#define XXH_NOESCAPE #endif /*! @endcond */ - /*! * @} * @ingroup public @@ -855,8 +848,8 @@ XXH_PUBLIC_API XXH_PUREF XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canoni #ifndef XXH_NO_LONG_LONG /*-********************************************************************** -* 64-bit hash -************************************************************************/ + * 64-bit hash + ************************************************************************/ #if defined(XXH_DOXYGEN) /* don't include */ /*! * @brief An unsigned 64-bit integer. @@ -864,24 +857,23 @@ XXH_PUBLIC_API XXH_PUREF XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canoni * Not necessarily defined to `uint64_t` but functionally equivalent. */ typedef uint64_t XXH64_hash_t; -#elif !defined (__VMS) \ - && (defined (__cplusplus) \ - || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) ) -# ifdef _AIX -# include -# else -# include -# endif - typedef uint64_t XXH64_hash_t; +#elif !defined(__VMS) && \ + (defined(__cplusplus) || (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)) +#ifdef _AIX +#include +#else +#include +#endif +typedef uint64_t XXH64_hash_t; #else -# include -# if defined(__LP64__) && ULONG_MAX == 0xFFFFFFFFFFFFFFFFULL - /* LP64 ABI says uint64_t is unsigned long */ - typedef unsigned long XXH64_hash_t; -# else - /* the following type must have a width of 64-bit */ - typedef unsigned long long XXH64_hash_t; -# endif +#include +#if defined(__LP64__) && ULONG_MAX == 0xFFFFFFFFFFFFFFFFULL +/* LP64 ABI says uint64_t is unsigned long */ +typedef unsigned long XXH64_hash_t; +#else +/* the following type must have a width of 64-bit */ +typedef unsigned long long XXH64_hash_t; +#endif #endif /*! @@ -914,7 +906,7 @@ typedef uint64_t XXH64_hash_t; * * @see @ref single_shot_example "Single Shot Example" for an example. */ -XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH64(XXH_NOESCAPE const void* input, size_t length, XXH64_hash_t seed); +XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH64(XXH_NOESCAPE const void *input, size_t length, XXH64_hash_t seed); /******* Streaming *******/ #ifndef XXH_NO_STREAM @@ -924,7 +916,7 @@ XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH64(XXH_NOESCAPE const void* input, size * @see XXH64_state_s for details. * @see @ref streaming_example "Streaming Example" */ -typedef struct XXH64_state_s XXH64_state_t; /* incomplete type */ +typedef struct XXH64_state_s XXH64_state_t; /* incomplete type */ /*! * @brief Allocates an @ref XXH64_state_t. @@ -936,7 +928,7 @@ typedef struct XXH64_state_s XXH64_state_t; /* incomplete type */ * * @see @ref streaming_example "Streaming Example" */ -XXH_PUBLIC_API XXH_MALLOCF XXH64_state_t* XXH64_createState(void); +XXH_PUBLIC_API XXH_MALLOCF XXH64_state_t *XXH64_createState(void); /*! * @brief Frees an @ref XXH64_state_t. @@ -949,7 +941,7 @@ XXH_PUBLIC_API XXH_MALLOCF XXH64_state_t* XXH64_createState(void); * * @see @ref streaming_example "Streaming Example" */ -XXH_PUBLIC_API XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr); +XXH_PUBLIC_API XXH_errorcode XXH64_freeState(XXH64_state_t *statePtr); /*! * @brief Copies one @ref XXH64_state_t to another. @@ -959,7 +951,7 @@ XXH_PUBLIC_API XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr); * @pre * @p dst_state and @p src_state must not be `NULL` and must not overlap. */ -XXH_PUBLIC_API void XXH64_copyState(XXH_NOESCAPE XXH64_state_t* dst_state, const XXH64_state_t* src_state); +XXH_PUBLIC_API void XXH64_copyState(XXH_NOESCAPE XXH64_state_t *dst_state, const XXH64_state_t *src_state); /*! * @brief Resets an @ref XXH64_state_t to begin a new hash. @@ -977,7 +969,7 @@ XXH_PUBLIC_API void XXH64_copyState(XXH_NOESCAPE XXH64_state_t* dst_state, const * * @see @ref streaming_example "Streaming Example" */ -XXH_PUBLIC_API XXH_errorcode XXH64_reset (XXH_NOESCAPE XXH64_state_t* statePtr, XXH64_hash_t seed); +XXH_PUBLIC_API XXH_errorcode XXH64_reset(XXH_NOESCAPE XXH64_state_t *statePtr, XXH64_hash_t seed); /*! * @brief Consumes a block of @p input to an @ref XXH64_state_t. @@ -1000,7 +992,9 @@ XXH_PUBLIC_API XXH_errorcode XXH64_reset (XXH_NOESCAPE XXH64_state_t* statePtr, * * @see @ref streaming_example "Streaming Example" */ -XXH_PUBLIC_API XXH_errorcode XXH64_update (XXH_NOESCAPE XXH64_state_t* statePtr, XXH_NOESCAPE const void* input, size_t length); +XXH_PUBLIC_API XXH_errorcode XXH64_update(XXH_NOESCAPE XXH64_state_t *statePtr, + XXH_NOESCAPE const void *input, + size_t length); /*! * @brief Returns the calculated hash value from an @ref XXH64_state_t. @@ -1018,14 +1012,16 @@ XXH_PUBLIC_API XXH_errorcode XXH64_update (XXH_NOESCAPE XXH64_state_t* statePtr, * * @see @ref streaming_example "Streaming Example" */ -XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH64_digest (XXH_NOESCAPE const XXH64_state_t* statePtr); +XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH64_digest(XXH_NOESCAPE const XXH64_state_t *statePtr); #endif /* !XXH_NO_STREAM */ /******* Canonical representation *******/ /*! * @brief Canonical (big endian) representation of @ref XXH64_hash_t. */ -typedef struct { unsigned char digest[sizeof(XXH64_hash_t)]; } XXH64_canonical_t; +typedef struct { + unsigned char digest[sizeof(XXH64_hash_t)]; +} XXH64_canonical_t; /*! * @brief Converts an @ref XXH64_hash_t to a big endian @ref XXH64_canonical_t. @@ -1038,7 +1034,7 @@ typedef struct { unsigned char digest[sizeof(XXH64_hash_t)]; } XXH64_canonical_t * * @see @ref canonical_representation_example "Canonical Representation Example" */ -XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH_NOESCAPE XXH64_canonical_t* dst, XXH64_hash_t hash); +XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH_NOESCAPE XXH64_canonical_t *dst, XXH64_hash_t hash); /*! * @brief Converts an @ref XXH64_canonical_t to a native @ref XXH64_hash_t. @@ -1052,7 +1048,7 @@ XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH_NOESCAPE XXH64_canonical_t* dst, * * @see @ref canonical_representation_example "Canonical Representation Example" */ -XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH64_hashFromCanonical(XXH_NOESCAPE const XXH64_canonical_t* src); +XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH64_hashFromCanonical(XXH_NOESCAPE const XXH64_canonical_t *src); #ifndef XXH_NO_XXH3 @@ -1117,19 +1113,18 @@ XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH64_hashFromCanonical(XXH_NOESCAPE const * * Unless set explicitly, determined automatically. */ -# define XXH_SCALAR 0 /*!< Portable scalar version */ -# define XXH_SSE2 1 /*!< SSE2 for Pentium 4, Opteron, all x86_64. */ -# define XXH_AVX2 2 /*!< AVX2 for Haswell and Bulldozer */ -# define XXH_AVX512 3 /*!< AVX512 for Skylake and Icelake */ -# define XXH_NEON 4 /*!< NEON for most ARMv7-A, all AArch64, and WASM SIMD128 */ -# define XXH_VSX 5 /*!< VSX and ZVector for POWER8/z13 (64-bit) */ -# define XXH_SVE 6 /*!< SVE for some ARMv8-A and ARMv9-A */ -# define XXH_LSX 7 /*!< LSX (128-bit SIMD) for LoongArch64 */ - +#define XXH_SCALAR 0 /*!< Portable scalar version */ +#define XXH_SSE2 1 /*!< SSE2 for Pentium 4, Opteron, all x86_64. */ +#define XXH_AVX2 2 /*!< AVX2 for Haswell and Bulldozer */ +#define XXH_AVX512 3 /*!< AVX512 for Skylake and Icelake */ +#define XXH_NEON 4 /*!< NEON for most ARMv7-A, all AArch64, and WASM SIMD128 */ +#define XXH_VSX 5 /*!< VSX and ZVector for POWER8/z13 (64-bit) */ +#define XXH_SVE 6 /*!< SVE for some ARMv8-A and ARMv9-A */ +#define XXH_LSX 7 /*!< LSX (128-bit SIMD) for LoongArch64 */ /*-********************************************************************** -* XXH3 64-bit variant -************************************************************************/ + * XXH3 64-bit variant + ************************************************************************/ /*! * @brief Calculates 64-bit unseeded variant of XXH3 hash of @p input. @@ -1153,7 +1148,7 @@ XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH64_hashFromCanonical(XXH_NOESCAPE const * XXH3_64bits_withSeed(), XXH3_64bits_withSecret(): other seeding variants * @see @ref single_shot_example "Single Shot Example" for an example. */ -XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH3_64bits(XXH_NOESCAPE const void* input, size_t length); +XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH3_64bits(XXH_NOESCAPE const void *input, size_t length); /*! * @brief Calculates 64-bit seeded variant of XXH3 hash of @p input. @@ -1179,7 +1174,9 @@ XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH3_64bits(XXH_NOESCAPE const void* input * * @see @ref single_shot_example "Single Shot Example" for an example. */ -XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH3_64bits_withSeed(XXH_NOESCAPE const void* input, size_t length, XXH64_hash_t seed); +XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH3_64bits_withSeed(XXH_NOESCAPE const void *input, + size_t length, + XXH64_hash_t seed); /*! * The bare minimum size for a custom secret. @@ -1222,8 +1219,10 @@ XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH3_64bits_withSeed(XXH_NOESCAPE const vo * * @see @ref single_shot_example "Single Shot Example" for an example. */ -XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH3_64bits_withSecret(XXH_NOESCAPE const void* data, size_t len, XXH_NOESCAPE const void* secret, size_t secretSize); - +XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH3_64bits_withSecret(XXH_NOESCAPE const void *data, + size_t len, + XXH_NOESCAPE const void *secret, + size_t secretSize); /******* Streaming *******/ #ifndef XXH_NO_STREAM @@ -1241,8 +1240,8 @@ XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH3_64bits_withSecret(XXH_NOESCAPE const * @see @ref streaming_example "Streaming Example" */ typedef struct XXH3_state_s XXH3_state_t; -XXH_PUBLIC_API XXH_MALLOCF XXH3_state_t* XXH3_createState(void); -XXH_PUBLIC_API XXH_errorcode XXH3_freeState(XXH3_state_t* statePtr); +XXH_PUBLIC_API XXH_MALLOCF XXH3_state_t *XXH3_createState(void); +XXH_PUBLIC_API XXH_errorcode XXH3_freeState(XXH3_state_t *statePtr); /*! * @brief Copies one @ref XXH3_state_t to another. @@ -1252,7 +1251,7 @@ XXH_PUBLIC_API XXH_errorcode XXH3_freeState(XXH3_state_t* statePtr); * @pre * @p dst_state and @p src_state must not be `NULL` and must not overlap. */ -XXH_PUBLIC_API void XXH3_copyState(XXH_NOESCAPE XXH3_state_t* dst_state, XXH_NOESCAPE const XXH3_state_t* src_state); +XXH_PUBLIC_API void XXH3_copyState(XXH_NOESCAPE XXH3_state_t *dst_state, XXH_NOESCAPE const XXH3_state_t *src_state); /*! * @brief Resets an @ref XXH3_state_t to begin a new hash. @@ -1273,7 +1272,7 @@ XXH_PUBLIC_API void XXH3_copyState(XXH_NOESCAPE XXH3_state_t* dst_state, XXH_NOE * @see @ref streaming_example "Streaming Example" * */ -XXH_PUBLIC_API XXH_errorcode XXH3_64bits_reset(XXH_NOESCAPE XXH3_state_t* statePtr); +XXH_PUBLIC_API XXH_errorcode XXH3_64bits_reset(XXH_NOESCAPE XXH3_state_t *statePtr); /*! * @brief Resets an @ref XXH3_state_t with 64-bit seed to begin a new hash. @@ -1295,7 +1294,7 @@ XXH_PUBLIC_API XXH_errorcode XXH3_64bits_reset(XXH_NOESCAPE XXH3_state_t* stateP * @see @ref streaming_example "Streaming Example" * */ -XXH_PUBLIC_API XXH_errorcode XXH3_64bits_reset_withSeed(XXH_NOESCAPE XXH3_state_t* statePtr, XXH64_hash_t seed); +XXH_PUBLIC_API XXH_errorcode XXH3_64bits_reset_withSeed(XXH_NOESCAPE XXH3_state_t *statePtr, XXH64_hash_t seed); /*! * @brief Resets an @ref XXH3_state_t with secret data to begin a new hash. @@ -1321,7 +1320,9 @@ XXH_PUBLIC_API XXH_errorcode XXH3_64bits_reset_withSeed(XXH_NOESCAPE XXH3_state_ * * @see @ref streaming_example "Streaming Example" */ -XXH_PUBLIC_API XXH_errorcode XXH3_64bits_reset_withSecret(XXH_NOESCAPE XXH3_state_t* statePtr, XXH_NOESCAPE const void* secret, size_t secretSize); +XXH_PUBLIC_API XXH_errorcode XXH3_64bits_reset_withSecret(XXH_NOESCAPE XXH3_state_t *statePtr, + XXH_NOESCAPE const void *secret, + size_t secretSize); /*! * @brief Consumes a block of @p input to an @ref XXH3_state_t. @@ -1344,7 +1345,9 @@ XXH_PUBLIC_API XXH_errorcode XXH3_64bits_reset_withSecret(XXH_NOESCAPE XXH3_stat * * @see @ref streaming_example "Streaming Example" */ -XXH_PUBLIC_API XXH_errorcode XXH3_64bits_update (XXH_NOESCAPE XXH3_state_t* statePtr, XXH_NOESCAPE const void* input, size_t length); +XXH_PUBLIC_API XXH_errorcode XXH3_64bits_update(XXH_NOESCAPE XXH3_state_t *statePtr, + XXH_NOESCAPE const void *input, + size_t length); /*! * @brief Returns the calculated XXH3 64-bit hash value from an @ref XXH3_state_t. @@ -1362,16 +1365,15 @@ XXH_PUBLIC_API XXH_errorcode XXH3_64bits_update (XXH_NOESCAPE XXH3_state_t* stat * * @see @ref streaming_example "Streaming Example" */ -XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH3_64bits_digest (XXH_NOESCAPE const XXH3_state_t* statePtr); +XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH3_64bits_digest(XXH_NOESCAPE const XXH3_state_t *statePtr); #endif /* !XXH_NO_STREAM */ /* note : canonical representation of XXH3 is the same as XXH64 * since they both produce XXH64_hash_t values */ - /*-********************************************************************** -* XXH3 128-bit variant -************************************************************************/ + * XXH3 128-bit variant + ************************************************************************/ /*! * @brief The return value from 128-bit hashes. @@ -1380,8 +1382,8 @@ XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH3_64bits_digest (XXH_NOESCAPE const XX * endianness. */ typedef struct { - XXH64_hash_t low64; /*!< `value & 0xFFFFFFFFFFFFFFFF` */ - XXH64_hash_t high64; /*!< `value >> 64` */ + XXH64_hash_t low64; /*!< `value & 0xFFFFFFFFFFFFFFFF` */ + XXH64_hash_t high64; /*!< `value >> 64` */ } XXH128_hash_t; /*! @@ -1402,7 +1404,7 @@ typedef struct { * @see XXH3_128bits_withSeed(), XXH3_128bits_withSecret(): other seeding variants * @see @ref single_shot_example "Single Shot Example" for an example. */ -XXH_PUBLIC_API XXH_PUREF XXH128_hash_t XXH3_128bits(XXH_NOESCAPE const void* data, size_t len); +XXH_PUBLIC_API XXH_PUREF XXH128_hash_t XXH3_128bits(XXH_NOESCAPE const void *data, size_t len); /*! @brief Calculates 128-bit seeded variant of XXH3 hash of @p data. * * @param data The block of data to be hashed, at least @p length bytes in size. @@ -1422,7 +1424,9 @@ XXH_PUBLIC_API XXH_PUREF XXH128_hash_t XXH3_128bits(XXH_NOESCAPE const void* dat * @see XXH3_128bits(), XXH3_128bits_withSecret(): other seeding variants * @see @ref single_shot_example "Single Shot Example" for an example. */ -XXH_PUBLIC_API XXH_PUREF XXH128_hash_t XXH3_128bits_withSeed(XXH_NOESCAPE const void* data, size_t len, XXH64_hash_t seed); +XXH_PUBLIC_API XXH_PUREF XXH128_hash_t XXH3_128bits_withSeed(XXH_NOESCAPE const void *data, + size_t len, + XXH64_hash_t seed); /*! * @brief Calculates 128-bit variant of XXH3 with a custom "secret". * @@ -1450,7 +1454,10 @@ XXH_PUBLIC_API XXH_PUREF XXH128_hash_t XXH3_128bits_withSeed(XXH_NOESCAPE const * * @see @ref single_shot_example "Single Shot Example" for an example. */ -XXH_PUBLIC_API XXH_PUREF XXH128_hash_t XXH3_128bits_withSecret(XXH_NOESCAPE const void* data, size_t len, XXH_NOESCAPE const void* secret, size_t secretSize); +XXH_PUBLIC_API XXH_PUREF XXH128_hash_t XXH3_128bits_withSecret(XXH_NOESCAPE const void *data, + size_t len, + XXH_NOESCAPE const void *secret, + size_t secretSize); /******* Streaming *******/ #ifndef XXH_NO_STREAM @@ -1484,7 +1491,7 @@ XXH_PUBLIC_API XXH_PUREF XXH128_hash_t XXH3_128bits_withSecret(XXH_NOESCAPE cons * * @see @ref streaming_example "Streaming Example" */ -XXH_PUBLIC_API XXH_errorcode XXH3_128bits_reset(XXH_NOESCAPE XXH3_state_t* statePtr); +XXH_PUBLIC_API XXH_errorcode XXH3_128bits_reset(XXH_NOESCAPE XXH3_state_t *statePtr); /*! * @brief Resets an @ref XXH3_state_t with 64-bit seed to begin a new hash. @@ -1505,7 +1512,7 @@ XXH_PUBLIC_API XXH_errorcode XXH3_128bits_reset(XXH_NOESCAPE XXH3_state_t* state * * @see @ref streaming_example "Streaming Example" */ -XXH_PUBLIC_API XXH_errorcode XXH3_128bits_reset_withSeed(XXH_NOESCAPE XXH3_state_t* statePtr, XXH64_hash_t seed); +XXH_PUBLIC_API XXH_errorcode XXH3_128bits_reset_withSeed(XXH_NOESCAPE XXH3_state_t *statePtr, XXH64_hash_t seed); /*! * @brief Resets an @ref XXH3_state_t with secret data to begin a new hash. * @@ -1528,7 +1535,9 @@ XXH_PUBLIC_API XXH_errorcode XXH3_128bits_reset_withSeed(XXH_NOESCAPE XXH3_state * * @see @ref streaming_example "Streaming Example" */ -XXH_PUBLIC_API XXH_errorcode XXH3_128bits_reset_withSecret(XXH_NOESCAPE XXH3_state_t* statePtr, XXH_NOESCAPE const void* secret, size_t secretSize); +XXH_PUBLIC_API XXH_errorcode XXH3_128bits_reset_withSecret(XXH_NOESCAPE XXH3_state_t *statePtr, + XXH_NOESCAPE const void *secret, + size_t secretSize); /*! * @brief Consumes a block of @p input to an @ref XXH3_state_t. @@ -1551,7 +1560,9 @@ XXH_PUBLIC_API XXH_errorcode XXH3_128bits_reset_withSecret(XXH_NOESCAPE XXH3_sta * `NULL`. In C++, this also must be *TriviallyCopyable*. * */ -XXH_PUBLIC_API XXH_errorcode XXH3_128bits_update (XXH_NOESCAPE XXH3_state_t* statePtr, XXH_NOESCAPE const void* input, size_t length); +XXH_PUBLIC_API XXH_errorcode XXH3_128bits_update(XXH_NOESCAPE XXH3_state_t *statePtr, + XXH_NOESCAPE const void *input, + size_t length); /*! * @brief Returns the calculated XXH3 128-bit hash value from an @ref XXH3_state_t. @@ -1568,7 +1579,7 @@ XXH_PUBLIC_API XXH_errorcode XXH3_128bits_update (XXH_NOESCAPE XXH3_state_t* sta * digest, and update again. * */ -XXH_PUBLIC_API XXH_PUREF XXH128_hash_t XXH3_128bits_digest (XXH_NOESCAPE const XXH3_state_t* statePtr); +XXH_PUBLIC_API XXH_PUREF XXH128_hash_t XXH3_128bits_digest(XXH_NOESCAPE const XXH3_state_t *statePtr); #endif /* !XXH_NO_STREAM */ /* Following helper functions make it possible to compare XXH128_hast_t values. @@ -1598,12 +1609,12 @@ XXH_PUBLIC_API XXH_PUREF int XXH128_isEqual(XXH128_hash_t h1, XXH128_hash_t h2); * @return =0 if @p h128_1 == @p h128_2 * @return <0 if @p h128_1 < @p h128_2 */ -XXH_PUBLIC_API XXH_PUREF int XXH128_cmp(XXH_NOESCAPE const void* h128_1, XXH_NOESCAPE const void* h128_2); - +XXH_PUBLIC_API XXH_PUREF int XXH128_cmp(XXH_NOESCAPE const void *h128_1, XXH_NOESCAPE const void *h128_2); /******* Canonical representation *******/ -typedef struct { unsigned char digest[sizeof(XXH128_hash_t)]; } XXH128_canonical_t; - +typedef struct { + unsigned char digest[sizeof(XXH128_hash_t)]; +} XXH128_canonical_t; /*! * @brief Converts an @ref XXH128_hash_t to a big endian @ref XXH128_canonical_t. @@ -1615,7 +1626,7 @@ typedef struct { unsigned char digest[sizeof(XXH128_hash_t)]; } XXH128_canonical * @p dst must not be `NULL`. * @see @ref canonical_representation_example "Canonical Representation Example" */ -XXH_PUBLIC_API void XXH128_canonicalFromHash(XXH_NOESCAPE XXH128_canonical_t* dst, XXH128_hash_t hash); +XXH_PUBLIC_API void XXH128_canonicalFromHash(XXH_NOESCAPE XXH128_canonical_t *dst, XXH128_hash_t hash); /*! * @brief Converts an @ref XXH128_canonical_t to a native @ref XXH128_hash_t. @@ -1628,19 +1639,16 @@ XXH_PUBLIC_API void XXH128_canonicalFromHash(XXH_NOESCAPE XXH128_canonical_t* ds * @return The converted hash. * @see @ref canonical_representation_example "Canonical Representation Example" */ -XXH_PUBLIC_API XXH_PUREF XXH128_hash_t XXH128_hashFromCanonical(XXH_NOESCAPE const XXH128_canonical_t* src); - +XXH_PUBLIC_API XXH_PUREF XXH128_hash_t XXH128_hashFromCanonical(XXH_NOESCAPE const XXH128_canonical_t *src); -#endif /* !XXH_NO_XXH3 */ -#endif /* XXH_NO_LONG_LONG */ +#endif /* !XXH_NO_XXH3 */ +#endif /* XXH_NO_LONG_LONG */ /*! * @} */ #endif /* XXHASH_H_5627135585666179 */ - - #if defined(XXH_STATIC_LINKING_ONLY) && !defined(XXHASH_H_STATIC_13879238742) #define XXHASH_H_STATIC_13879238742 /* **************************************************************************** @@ -1670,16 +1678,15 @@ XXH_PUBLIC_API XXH_PUREF XXH128_hash_t XXH128_hashFromCanonical(XXH_NOESCAPE con * @see XXH64_state_s, XXH3_state_s */ struct XXH32_state_s { - XXH32_hash_t total_len_32; /*!< Total length hashed, modulo 2^32 */ - XXH32_hash_t large_len; /*!< Whether the hash is >= 16 (handles @ref total_len_32 overflow) */ - XXH32_hash_t acc[4]; /*!< Accumulator lanes */ - unsigned char buffer[16]; /*!< Internal buffer for partial reads. */ - XXH32_hash_t bufferedSize; /*!< Amount of data in @ref buffer */ - XXH32_hash_t reserved; /*!< Reserved field. Do not read nor write to it. */ -}; /* typedef'd to XXH32_state_t */ + XXH32_hash_t total_len_32; /*!< Total length hashed, modulo 2^32 */ + XXH32_hash_t large_len; /*!< Whether the hash is >= 16 (handles @ref total_len_32 overflow) */ + XXH32_hash_t acc[4]; /*!< Accumulator lanes */ + unsigned char buffer[16]; /*!< Internal buffer for partial reads. */ + XXH32_hash_t bufferedSize; /*!< Amount of data in @ref buffer */ + XXH32_hash_t reserved; /*!< Reserved field. Do not read nor write to it. */ +}; /* typedef'd to XXH32_state_t */ - -#ifndef XXH_NO_LONG_LONG /* defined when there is no 64-bit support */ +#ifndef XXH_NO_LONG_LONG /* defined when there is no 64-bit support */ /*! * @internal @@ -1694,36 +1701,36 @@ struct XXH32_state_s { * @see XXH32_state_s, XXH3_state_s */ struct XXH64_state_s { - XXH64_hash_t total_len; /*!< Total length hashed. This is always 64-bit. */ - XXH64_hash_t acc[4]; /*!< Accumulator lanes */ - unsigned char buffer[32]; /*!< Internal buffer for partial reads.. */ - XXH32_hash_t bufferedSize; /*!< Amount of data in @ref buffer */ - XXH32_hash_t reserved32; /*!< Reserved field, needed for padding anyways*/ - XXH64_hash_t reserved64; /*!< Reserved field. Do not read or write to it. */ -}; /* typedef'd to XXH64_state_t */ + XXH64_hash_t total_len; /*!< Total length hashed. This is always 64-bit. */ + XXH64_hash_t acc[4]; /*!< Accumulator lanes */ + unsigned char buffer[32]; /*!< Internal buffer for partial reads.. */ + XXH32_hash_t bufferedSize; /*!< Amount of data in @ref buffer */ + XXH32_hash_t reserved32; /*!< Reserved field, needed for padding anyways*/ + XXH64_hash_t reserved64; /*!< Reserved field. Do not read or write to it. */ +}; /* typedef'd to XXH64_state_t */ #ifndef XXH_NO_XXH3 #if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) /* >= C11 */ -# define XXH_ALIGN(n) _Alignas(n) +#define XXH_ALIGN(n) _Alignas(n) #elif defined(__cplusplus) && (__cplusplus >= 201103L) /* >= C++11 */ /* In C++ alignas() is a keyword */ -# define XXH_ALIGN(n) alignas(n) +#define XXH_ALIGN(n) alignas(n) #elif defined(__GNUC__) -# define XXH_ALIGN(n) __attribute__ ((aligned(n))) +#define XXH_ALIGN(n) __attribute__((aligned(n))) #elif defined(_MSC_VER) -# define XXH_ALIGN(n) __declspec(align(n)) +#define XXH_ALIGN(n) __declspec(align(n)) #else -# define XXH_ALIGN(n) /* disabled */ +#define XXH_ALIGN(n) /* disabled */ #endif /* Old GCC versions only accept the attribute after the type in structures. */ -#if !(defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) /* C11+ */ \ - && ! (defined(__cplusplus) && (__cplusplus >= 201103L)) /* >= C++11 */ \ - && defined(__GNUC__) -# define XXH_ALIGN_MEMBER(align, type) type XXH_ALIGN(align) +#if !(defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) /* C11+ */ \ + && !(defined(__cplusplus) && (__cplusplus >= 201103L)) /* >= C++11 */ \ + && defined(__GNUC__) +#define XXH_ALIGN_MEMBER(align, type) type XXH_ALIGN(align) #else -# define XXH_ALIGN_MEMBER(align, type) XXH_ALIGN(align) type +#define XXH_ALIGN_MEMBER(align, type) XXH_ALIGN(align) type #endif /*! @@ -1768,32 +1775,32 @@ struct XXH64_state_s { * @see XXH32_state_s, XXH64_state_s */ struct XXH3_state_s { - XXH_ALIGN_MEMBER(64, XXH64_hash_t acc[8]); - /*!< The 8 accumulators. See @ref XXH32_state_s::v and @ref XXH64_state_s::v */ - XXH_ALIGN_MEMBER(64, unsigned char customSecret[XXH3_SECRET_DEFAULT_SIZE]); - /*!< Used to store a custom secret generated from a seed. */ - XXH_ALIGN_MEMBER(64, unsigned char buffer[XXH3_INTERNALBUFFER_SIZE]); - /*!< The internal buffer. @see XXH32_state_s::mem32 */ - XXH32_hash_t bufferedSize; - /*!< The amount of memory in @ref buffer, @see XXH32_state_s::memsize */ - XXH32_hash_t useSeed; - /*!< Reserved field. Needed for padding on 64-bit. */ - size_t nbStripesSoFar; - /*!< Number or stripes processed. */ - XXH64_hash_t totalLen; - /*!< Total length hashed. 64-bit even on 32-bit targets. */ - size_t nbStripesPerBlock; - /*!< Number of stripes per block. */ - size_t secretLimit; - /*!< Size of @ref customSecret or @ref extSecret */ - XXH64_hash_t seed; - /*!< Seed for _withSeed variants. Must be zero otherwise, @see XXH3_INITSTATE() */ - XXH64_hash_t reserved64; - /*!< Reserved field. */ - const unsigned char* extSecret; - /*!< Reference to an external secret for the _withSecret variants, NULL - * for other variants. */ - /* note: there may be some padding at the end due to alignment on 64 bytes */ + XXH_ALIGN_MEMBER(64, XXH64_hash_t acc[8]); + /*!< The 8 accumulators. See @ref XXH32_state_s::v and @ref XXH64_state_s::v */ + XXH_ALIGN_MEMBER(64, unsigned char customSecret[XXH3_SECRET_DEFAULT_SIZE]); + /*!< Used to store a custom secret generated from a seed. */ + XXH_ALIGN_MEMBER(64, unsigned char buffer[XXH3_INTERNALBUFFER_SIZE]); + /*!< The internal buffer. @see XXH32_state_s::mem32 */ + XXH32_hash_t bufferedSize; + /*!< The amount of memory in @ref buffer, @see XXH32_state_s::memsize */ + XXH32_hash_t useSeed; + /*!< Reserved field. Needed for padding on 64-bit. */ + size_t nbStripesSoFar; + /*!< Number or stripes processed. */ + XXH64_hash_t totalLen; + /*!< Total length hashed. 64-bit even on 32-bit targets. */ + size_t nbStripesPerBlock; + /*!< Number of stripes per block. */ + size_t secretLimit; + /*!< Size of @ref customSecret or @ref extSecret */ + XXH64_hash_t seed; + /*!< Seed for _withSeed variants. Must be zero otherwise, @see XXH3_INITSTATE() */ + XXH64_hash_t reserved64; + /*!< Reserved field. */ + const unsigned char *extSecret; + /*!< Reference to an external secret for the _withSecret variants, NULL + * for other variants. */ + /* note: there may be some padding at the end due to alignment on 64 bytes */ }; /* typedef'd to XXH3_state_t */ #undef XXH_ALIGN_MEMBER @@ -1809,13 +1816,12 @@ struct XXH3_state_s { * Note that this doesn't prepare the state for a streaming operation, * it's still necessary to use XXH3_NNbits_reset*() afterwards. */ -#define XXH3_INITSTATE(XXH3_state_ptr) \ - do { \ - XXH3_state_t* tmp_xxh3_state_ptr = (XXH3_state_ptr); \ - tmp_xxh3_state_ptr->seed = 0; \ - tmp_xxh3_state_ptr->extSecret = NULL; \ - } while(0) - +#define XXH3_INITSTATE(XXH3_state_ptr) \ + do { \ + XXH3_state_t *tmp_xxh3_state_ptr = (XXH3_state_ptr); \ + tmp_xxh3_state_ptr->seed = 0; \ + tmp_xxh3_state_ptr->extSecret = NULL; \ + } while (0) /*! * @brief Calculates the 128-bit hash of @p data using XXH3. @@ -1833,8 +1839,7 @@ struct XXH3_state_s { * * @see @ref single_shot_example "Single Shot Example" for an example. */ -XXH_PUBLIC_API XXH_PUREF XXH128_hash_t XXH128(XXH_NOESCAPE const void* data, size_t len, XXH64_hash_t seed); - +XXH_PUBLIC_API XXH_PUREF XXH128_hash_t XXH128(XXH_NOESCAPE const void *data, size_t len, XXH64_hash_t seed); /* === Experimental API === */ /* Symbols defined below must be considered tied to a specific library version. */ @@ -1897,7 +1902,10 @@ XXH_PUBLIC_API XXH_PUREF XXH128_hash_t XXH128(XXH_NOESCAPE const void* data, siz * } * @endcode */ -XXH_PUBLIC_API XXH_errorcode XXH3_generateSecret(XXH_NOESCAPE void* secretBuffer, size_t secretSize, XXH_NOESCAPE const void* customSeed, size_t customSeedSize); +XXH_PUBLIC_API XXH_errorcode XXH3_generateSecret(XXH_NOESCAPE void *secretBuffer, + size_t secretSize, + XXH_NOESCAPE const void *customSeed, + size_t customSeedSize); /*! * @brief Generate the same secret as the _withSeed() variants. @@ -1937,7 +1945,7 @@ XXH_PUBLIC_API XXH_errorcode XXH3_generateSecret(XXH_NOESCAPE void* secretBuffer * }; * @endcode */ -XXH_PUBLIC_API void XXH3_generateSecret_fromSeed(XXH_NOESCAPE void* secretBuffer, XXH64_hash_t seed); +XXH_PUBLIC_API void XXH3_generateSecret_fromSeed(XXH_NOESCAPE void *secretBuffer, XXH64_hash_t seed); /*! * @brief Maximum size of "short" key in bytes. @@ -1978,10 +1986,8 @@ XXH_PUBLIC_API void XXH3_generateSecret_fromSeed(XXH_NOESCAPE void* secretBuffer * This is not guaranteed when using the secret directly in "small data" scenarios, * because only portions of the secret are employed for small data. */ -XXH_PUBLIC_API XXH_PUREF XXH64_hash_t -XXH3_64bits_withSecretandSeed(XXH_NOESCAPE const void* data, size_t len, - XXH_NOESCAPE const void* secret, size_t secretSize, - XXH64_hash_t seed); +XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH3_64bits_withSecretandSeed( + XXH_NOESCAPE const void *data, size_t len, XXH_NOESCAPE const void *secret, size_t secretSize, XXH64_hash_t seed); /*! * @brief Calculates 128-bit seeded variant of XXH3 hash of @p data. @@ -1997,10 +2003,11 @@ XXH3_64bits_withSecretandSeed(XXH_NOESCAPE const void* data, size_t len, * * @see XXH3_64bits_withSecretandSeed(): contract is the same. */ -XXH_PUBLIC_API XXH_PUREF XXH128_hash_t -XXH3_128bits_withSecretandSeed(XXH_NOESCAPE const void* input, size_t length, - XXH_NOESCAPE const void* secret, size_t secretSize, - XXH64_hash_t seed64); +XXH_PUBLIC_API XXH_PUREF XXH128_hash_t XXH3_128bits_withSecretandSeed(XXH_NOESCAPE const void *input, + size_t length, + XXH_NOESCAPE const void *secret, + size_t secretSize, + XXH64_hash_t seed64); #ifndef XXH_NO_STREAM /*! @@ -2016,10 +2023,10 @@ XXH3_128bits_withSecretandSeed(XXH_NOESCAPE const void* input, size_t length, * * @see XXH3_64bits_withSecretandSeed(). Contract is identical. */ -XXH_PUBLIC_API XXH_errorcode -XXH3_64bits_reset_withSecretandSeed(XXH_NOESCAPE XXH3_state_t* statePtr, - XXH_NOESCAPE const void* secret, size_t secretSize, - XXH64_hash_t seed64); +XXH_PUBLIC_API XXH_errorcode XXH3_64bits_reset_withSecretandSeed(XXH_NOESCAPE XXH3_state_t *statePtr, + XXH_NOESCAPE const void *secret, + size_t secretSize, + XXH64_hash_t seed64); /*! * @brief Resets an @ref XXH3_state_t with secret data to begin a new hash. @@ -2042,27 +2049,25 @@ XXH3_64bits_reset_withSecretandSeed(XXH_NOESCAPE XXH3_state_t* statePtr, * the same as XXH3_128bits_withSeed() when @p length <= XXH3_MIDSIZE_MAX. * Results generated by this older version are wrong, hence not comparable. */ -XXH_PUBLIC_API XXH_errorcode -XXH3_128bits_reset_withSecretandSeed(XXH_NOESCAPE XXH3_state_t* statePtr, - XXH_NOESCAPE const void* secret, size_t secretSize, - XXH64_hash_t seed64); +XXH_PUBLIC_API XXH_errorcode XXH3_128bits_reset_withSecretandSeed(XXH_NOESCAPE XXH3_state_t *statePtr, + XXH_NOESCAPE const void *secret, + size_t secretSize, + XXH64_hash_t seed64); #endif /* !XXH_NO_STREAM */ -#endif /* !XXH_NO_XXH3 */ -#endif /* XXH_NO_LONG_LONG */ +#endif /* !XXH_NO_XXH3 */ +#endif /* XXH_NO_LONG_LONG */ #if defined(XXH_INLINE_ALL) || defined(XXH_PRIVATE_API) -# define XXH_IMPLEMENTATION +#define XXH_IMPLEMENTATION #endif -#endif /* defined(XXH_STATIC_LINKING_ONLY) && !defined(XXHASH_H_STATIC_13879238742) */ - +#endif /* defined(XXH_STATIC_LINKING_ONLY) && !defined(XXHASH_H_STATIC_13879238742) */ /* ======================================================================== */ /* ======================================================================== */ /* ======================================================================== */ - /*-********************************************************************** * xxHash implementation *-********************************************************************** @@ -2085,13 +2090,13 @@ XXH3_128bits_reset_withSecretandSeed(XXH_NOESCAPE XXH3_state_t* statePtr, * which can then be linked into the final binary. ************************************************************************/ -#if ( defined(XXH_INLINE_ALL) || defined(XXH_PRIVATE_API) \ - || defined(XXH_IMPLEMENTATION) ) && !defined(XXH_IMPLEM_13a8737387) -# define XXH_IMPLEM_13a8737387 +#if (defined(XXH_INLINE_ALL) || defined(XXH_PRIVATE_API) || defined(XXH_IMPLEMENTATION)) && \ + !defined(XXH_IMPLEM_13a8737387) +#define XXH_IMPLEM_13a8737387 /* ************************************* -* Tuning parameters -***************************************/ + * Tuning parameters + ***************************************/ /*! * @defgroup tuning Tuning parameters @@ -2105,8 +2110,8 @@ XXH3_128bits_reset_withSecretandSeed(XXH_NOESCAPE XXH3_state_t* statePtr, * * Useful if only using the @ref XXH32_family and you have a strict C90 compiler. */ -# define XXH_NO_LONG_LONG -# undef XXH_NO_LONG_LONG /* don't actually */ +#define XXH_NO_LONG_LONG +#undef XXH_NO_LONG_LONG /* don't actually */ /*! * @brief Controls how unaligned memory is accessed. * @@ -2157,7 +2162,7 @@ XXH3_128bits_reset_withSecretandSeed(XXH_NOESCAPE XXH3_state_t* statePtr, * * Prefer these methods in priority order (0 > 3 > 1 > 2) */ -# define XXH_FORCE_MEMORY_ACCESS 0 +#define XXH_FORCE_MEMORY_ACCESS 0 /*! * @def XXH_SIZE_OPT @@ -2185,7 +2190,7 @@ XXH3_128bits_reset_withSecretandSeed(XXH_NOESCAPE XXH3_state_t* statePtr, * Performance may cry. For example, the single shot functions just use the * streaming API. */ -# define XXH_SIZE_OPT 0 +#define XXH_SIZE_OPT 0 /*! * @def XXH_FORCE_ALIGN_CHECK @@ -2215,7 +2220,7 @@ XXH3_128bits_reset_withSecretandSeed(XXH_NOESCAPE XXH3_state_t* statePtr, * * This option does not affect XXH3 (only XXH32 and XXH64). */ -# define XXH_FORCE_ALIGN_CHECK 0 +#define XXH_FORCE_ALIGN_CHECK 0 /*! * @def XXH_NO_INLINE_HINTS @@ -2237,7 +2242,7 @@ XXH3_128bits_reset_withSecretandSeed(XXH_NOESCAPE XXH3_state_t* statePtr, * When not optimizing (-O0), using `-fno-inline` with GCC or Clang, or if * @ref XXH_SIZE_OPT >= 1, this will automatically be defined. */ -# define XXH_NO_INLINE_HINTS 0 +#define XXH_NO_INLINE_HINTS 0 /*! * @def XXH3_INLINE_SECRET @@ -2254,7 +2259,7 @@ XXH3_128bits_reset_withSecretandSeed(XXH_NOESCAPE XXH3_state_t* statePtr, * that are *sometimes* force inline on -Og, and it is impossible to automatically * detect this optimization level. */ -# define XXH3_INLINE_SECRET 0 +#define XXH3_INLINE_SECRET 0 /*! * @def XXH32_ENDJMP @@ -2266,7 +2271,7 @@ XXH3_128bits_reset_withSecretandSeed(XXH_NOESCAPE XXH3_state_t* statePtr, * * This setting is only possibly making a difference for very small inputs. */ -# define XXH32_ENDJMP 0 +#define XXH32_ENDJMP 0 /*! * @internal @@ -2275,8 +2280,8 @@ XXH3_128bits_reset_withSecretandSeed(XXH_NOESCAPE XXH3_state_t* statePtr, * For compatibility with code that uses xxHash's internals before the names * were changed to improve namespacing. There is no other reason to use this. */ -# define XXH_OLD_NAMES -# undef XXH_OLD_NAMES /* don't actually use, it is ugly. */ +#define XXH_OLD_NAMES +#undef XXH_OLD_NAMES /* don't actually use, it is ugly. */ /*! * @def XXH_NO_STREAM @@ -2286,62 +2291,61 @@ XXH3_128bits_reset_withSecretandSeed(XXH_NOESCAPE XXH3_state_t* statePtr, * the streaming functions can improve code size significantly, especially with * the @ref XXH3_family which tends to make constant folded copies of itself. */ -# define XXH_NO_STREAM -# undef XXH_NO_STREAM /* don't actually */ -#endif /* XXH_DOXYGEN */ +#define XXH_NO_STREAM +#undef XXH_NO_STREAM /* don't actually */ +#endif /* XXH_DOXYGEN */ /*! * @} */ -#ifndef XXH_FORCE_MEMORY_ACCESS /* can be defined externally, on command line for example */ - /* prefer __packed__ structures (method 1) for GCC - * < ARMv7 with unaligned access (e.g. Raspbian armhf) still uses byte shifting, so we use memcpy - * which for some reason does unaligned loads. */ -# if defined(__GNUC__) && !(defined(__ARM_ARCH) && __ARM_ARCH < 7 && defined(__ARM_FEATURE_UNALIGNED)) -# define XXH_FORCE_MEMORY_ACCESS 1 -# endif +#ifndef XXH_FORCE_MEMORY_ACCESS /* can be defined externally, on command line for example */ + /* prefer __packed__ structures (method 1) for GCC + * < ARMv7 with unaligned access (e.g. Raspbian armhf) still uses byte shifting, so we use memcpy + * which for some reason does unaligned loads. */ +#if defined(__GNUC__) && !(defined(__ARM_ARCH) && __ARM_ARCH < 7 && defined(__ARM_FEATURE_UNALIGNED)) +#define XXH_FORCE_MEMORY_ACCESS 1 +#endif #endif #ifndef XXH_SIZE_OPT - /* default to 1 for -Os or -Oz */ -# if (defined(__GNUC__) || defined(__clang__)) && defined(__OPTIMIZE_SIZE__) -# define XXH_SIZE_OPT 1 -# else -# define XXH_SIZE_OPT 0 -# endif +/* default to 1 for -Os or -Oz */ +#if (defined(__GNUC__) || defined(__clang__)) && defined(__OPTIMIZE_SIZE__) +#define XXH_SIZE_OPT 1 +#else +#define XXH_SIZE_OPT 0 +#endif #endif -#ifndef XXH_FORCE_ALIGN_CHECK /* can be defined externally */ - /* don't check on sizeopt, x86, aarch64, or arm when unaligned access is available */ -# if XXH_SIZE_OPT >= 1 || \ - defined(__i386) || defined(__x86_64__) || defined(__aarch64__) || defined(__ARM_FEATURE_UNALIGNED) \ - || defined(_M_IX86) || defined(_M_X64) || defined(_M_ARM64) || defined(_M_ARM) /* visual */ -# define XXH_FORCE_ALIGN_CHECK 0 -# else -# define XXH_FORCE_ALIGN_CHECK 1 -# endif +#ifndef XXH_FORCE_ALIGN_CHECK /* can be defined externally */ + /* don't check on sizeopt, x86, aarch64, or arm when unaligned access is available */ +#if XXH_SIZE_OPT >= 1 || defined(__i386) || defined(__x86_64__) || defined(__aarch64__) || \ + defined(__ARM_FEATURE_UNALIGNED) || defined(_M_IX86) || defined(_M_X64) || defined(_M_ARM64) || \ + defined(_M_ARM) /* visual */ +#define XXH_FORCE_ALIGN_CHECK 0 +#else +#define XXH_FORCE_ALIGN_CHECK 1 +#endif #endif #ifndef XXH_NO_INLINE_HINTS -# if XXH_SIZE_OPT >= 1 || defined(__NO_INLINE__) /* -O0, -fno-inline */ -# define XXH_NO_INLINE_HINTS 1 -# else -# define XXH_NO_INLINE_HINTS 0 -# endif +#if XXH_SIZE_OPT >= 1 || defined(__NO_INLINE__) /* -O0, -fno-inline */ +#define XXH_NO_INLINE_HINTS 1 +#else +#define XXH_NO_INLINE_HINTS 0 +#endif #endif #ifndef XXH3_INLINE_SECRET -# if (defined(__GNUC__) && !defined(__clang__) && __GNUC__ >= 12) \ - || !defined(XXH_INLINE_ALL) -# define XXH3_INLINE_SECRET 0 -# else -# define XXH3_INLINE_SECRET 1 -# endif +#if (defined(__GNUC__) && !defined(__clang__) && __GNUC__ >= 12) || !defined(XXH_INLINE_ALL) +#define XXH3_INLINE_SECRET 0 +#else +#define XXH3_INLINE_SECRET 1 +#endif #endif #ifndef XXH32_ENDJMP /* generally preferable for performance */ -# define XXH32_ENDJMP 0 +#define XXH32_ENDJMP 0 #endif /*! @@ -2349,10 +2353,9 @@ XXH3_128bits_reset_withSecretandSeed(XXH_NOESCAPE XXH3_state_t* statePtr, * @{ */ - /* ************************************* -* Includes & Memory related functions -***************************************/ + * Includes & Memory related functions + ***************************************/ #if defined(XXH_NO_STREAM) /* nothing */ #elif defined(XXH_NO_STDLIB) @@ -2366,8 +2369,11 @@ XXH3_128bits_reset_withSecretandSeed(XXH_NOESCAPE XXH3_state_t* statePtr, * without access to dynamic allocation. */ -static XXH_CONSTF void* XXH_malloc(size_t s) { (void)s; return NULL; } -static void XXH_free(void* p) { (void)p; } +static XXH_CONSTF void *XXH_malloc(size_t s) { + (void)s; + return NULL; +} +static void XXH_free(void *p) { (void)p; } #else @@ -2381,15 +2387,15 @@ static void XXH_free(void* p) { (void)p; } * @internal * @brief Modify this function to use a different routine than malloc(). */ -static XXH_MALLOCF void* XXH_malloc(size_t s) { return malloc(s); } +static XXH_MALLOCF void *XXH_malloc(size_t s) { return malloc(s); } /*! * @internal * @brief Modify this function to use a different routine than free(). */ -static void XXH_free(void* p) { free(p); } +static void XXH_free(void *p) { free(p); } -#endif /* XXH_NO_STDLIB */ +#endif /* XXH_NO_STDLIB */ #include @@ -2397,76 +2403,70 @@ static void XXH_free(void* p) { free(p); } * @internal * @brief Modify this function to use a different routine than memcpy(). */ -static void* XXH_memcpy(void* dest, const void* src, size_t size) -{ - return memcpy(dest,src,size); -} - -#include /* ULLONG_MAX */ +static void *XXH_memcpy(void *dest, const void *src, size_t size) { return memcpy(dest, src, size); } +#include /* ULLONG_MAX */ /* ************************************* -* Compiler Specific Options -***************************************/ -#ifdef _MSC_VER /* Visual Studio warning fix */ -# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ -#endif - -#if XXH_NO_INLINE_HINTS /* disable inlining hints */ -# if defined(__GNUC__) || defined(__clang__) -# define XXH_FORCE_INLINE static __attribute__((__unused__)) -# else -# define XXH_FORCE_INLINE static -# endif -# define XXH_NO_INLINE static + * Compiler Specific Options + ***************************************/ +#ifdef _MSC_VER /* Visual Studio warning fix */ +#pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ +#endif + +#if XXH_NO_INLINE_HINTS /* disable inlining hints */ +#if defined(__GNUC__) || defined(__clang__) +#define XXH_FORCE_INLINE static __attribute__((__unused__)) +#else +#define XXH_FORCE_INLINE static +#endif +#define XXH_NO_INLINE static /* enable inlining hints */ #elif defined(__GNUC__) || defined(__clang__) -# define XXH_FORCE_INLINE static __inline__ __attribute__((__always_inline__, __unused__)) -# define XXH_NO_INLINE static __attribute__((__noinline__)) -#elif defined(_MSC_VER) /* Visual Studio */ -# define XXH_FORCE_INLINE static __forceinline -# define XXH_NO_INLINE static __declspec(noinline) -#elif defined (__cplusplus) \ - || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)) /* C99 */ -# define XXH_FORCE_INLINE static inline -# define XXH_NO_INLINE static +#define XXH_FORCE_INLINE static __inline__ __attribute__((__always_inline__, __unused__)) +#define XXH_NO_INLINE static __attribute__((__noinline__)) +#elif defined(_MSC_VER) /* Visual Studio */ +#define XXH_FORCE_INLINE static __forceinline +#define XXH_NO_INLINE static __declspec(noinline) +#elif defined(__cplusplus) || (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)) /* C99 */ +#define XXH_FORCE_INLINE static inline +#define XXH_NO_INLINE static #else -# define XXH_FORCE_INLINE static -# define XXH_NO_INLINE static +#define XXH_FORCE_INLINE static +#define XXH_NO_INLINE static #endif #if defined(XXH_INLINE_ALL) -# define XXH_STATIC XXH_FORCE_INLINE +#define XXH_STATIC XXH_FORCE_INLINE #else -# define XXH_STATIC static +#define XXH_STATIC static #endif #if XXH3_INLINE_SECRET -# define XXH3_WITH_SECRET_INLINE XXH_FORCE_INLINE +#define XXH3_WITH_SECRET_INLINE XXH_FORCE_INLINE #else -# define XXH3_WITH_SECRET_INLINE XXH_NO_INLINE +#define XXH3_WITH_SECRET_INLINE XXH_NO_INLINE #endif -#if ((defined(sun) || defined(__sun)) && __cplusplus) /* Solaris includes __STDC_VERSION__ with C++. Tested with GCC 5.5 */ -# define XXH_RESTRICT /* disable */ -#elif defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* >= C99 */ -# define XXH_RESTRICT restrict -#elif (defined (__GNUC__) && ((__GNUC__ > 3) || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1))) \ - || (defined (__clang__)) \ - || (defined (_MSC_VER) && (_MSC_VER >= 1400)) \ - || (defined (__INTEL_COMPILER) && (__INTEL_COMPILER >= 1300)) +#if ((defined(sun) || defined(__sun)) && \ + __cplusplus) /* Solaris includes __STDC_VERSION__ with C++. Tested with GCC 5.5 */ +#define XXH_RESTRICT /* disable */ +#elif defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* >= C99 */ +#define XXH_RESTRICT restrict +#elif (defined(__GNUC__) && ((__GNUC__ > 3) || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1))) || (defined(__clang__)) || \ + (defined(_MSC_VER) && (_MSC_VER >= 1400)) || (defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 1300)) /* * There are a LOT more compilers that recognize __restrict but this * covers the major ones. */ -# define XXH_RESTRICT __restrict +#define XXH_RESTRICT __restrict #else -# define XXH_RESTRICT /* disable */ +#define XXH_RESTRICT /* disable */ #endif /* ************************************* -* Debug -***************************************/ + * Debug + ***************************************/ /*! * @ingroup tuning * @def XXH_DEBUGLEVEL @@ -2476,34 +2476,45 @@ static void* XXH_memcpy(void* dest, const void* src, size_t size) * compiler's command line options. The value must be a number. */ #ifndef XXH_DEBUGLEVEL -# ifdef DEBUGLEVEL /* backwards compat */ -# define XXH_DEBUGLEVEL DEBUGLEVEL -# else -# define XXH_DEBUGLEVEL 0 -# endif +#ifdef DEBUGLEVEL /* backwards compat */ +#define XXH_DEBUGLEVEL DEBUGLEVEL +#else +#define XXH_DEBUGLEVEL 0 +#endif #endif -#if (XXH_DEBUGLEVEL>=1) -# include /* note: can still be disabled with NDEBUG */ -# define XXH_ASSERT(c) assert(c) +#if (XXH_DEBUGLEVEL >= 1) +#include /* note: can still be disabled with NDEBUG */ +#define XXH_ASSERT(c) assert(c) +#else +#if defined(__INTEL_COMPILER) +#define XXH_ASSERT(c) XXH_ASSUME((unsigned char)(c)) #else -# if defined(__INTEL_COMPILER) -# define XXH_ASSERT(c) XXH_ASSUME((unsigned char) (c)) -# else -# define XXH_ASSERT(c) XXH_ASSUME(c) -# endif +#define XXH_ASSERT(c) XXH_ASSUME(c) +#endif #endif /* note: use after variable declarations */ #ifndef XXH_STATIC_ASSERT -# if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) /* C11 */ -# define XXH_STATIC_ASSERT_WITH_MESSAGE(c,m) do { _Static_assert((c),m); } while(0) -# elif defined(__cplusplus) && (__cplusplus >= 201103L) /* C++11 */ -# define XXH_STATIC_ASSERT_WITH_MESSAGE(c,m) do { static_assert((c),m); } while(0) -# else -# define XXH_STATIC_ASSERT_WITH_MESSAGE(c,m) do { struct xxh_sa { char x[(c) ? 1 : -1]; }; } while(0) -# endif -# define XXH_STATIC_ASSERT(c) XXH_STATIC_ASSERT_WITH_MESSAGE((c),#c) +#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) /* C11 */ +#define XXH_STATIC_ASSERT_WITH_MESSAGE(c, m) \ + do { \ + _Static_assert((c), m); \ + } while (0) +#elif defined(__cplusplus) && (__cplusplus >= 201103L) /* C++11 */ +#define XXH_STATIC_ASSERT_WITH_MESSAGE(c, m) \ + do { \ + static_assert((c), m); \ + } while (0) +#else +#define XXH_STATIC_ASSERT_WITH_MESSAGE(c, m) \ + do { \ + struct xxh_sa { \ + char x[(c) ? 1 : -1]; \ + }; \ + } while (0) +#endif +#define XXH_STATIC_ASSERT(c) XXH_STATIC_ASSERT_WITH_MESSAGE((c), #c) #endif /*! @@ -2523,41 +2534,41 @@ static void* XXH_memcpy(void* dest, const void* src, size_t size) * XXH3_initCustomSecret_scalar(). */ #if defined(__GNUC__) || defined(__clang__) -# define XXH_COMPILER_GUARD(var) __asm__("" : "+r" (var)) +#define XXH_COMPILER_GUARD(var) __asm__("" : "+r"(var)) #else -# define XXH_COMPILER_GUARD(var) ((void)0) +#define XXH_COMPILER_GUARD(var) ((void)0) #endif /* Specifically for NEON vectors which use the "w" constraint, on * Clang. */ #if defined(__clang__) && defined(__ARM_ARCH) && !defined(__wasm__) -# define XXH_COMPILER_GUARD_CLANG_NEON(var) __asm__("" : "+w" (var)) +#define XXH_COMPILER_GUARD_CLANG_NEON(var) __asm__("" : "+w"(var)) #else -# define XXH_COMPILER_GUARD_CLANG_NEON(var) ((void)0) +#define XXH_COMPILER_GUARD_CLANG_NEON(var) ((void)0) #endif /* ************************************* -* Basic Types -***************************************/ -#if !defined (__VMS) \ - && (defined (__cplusplus) \ - || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) ) -# ifdef _AIX -# include -# else -# include -# endif - typedef uint8_t xxh_u8; + * Basic Types + ***************************************/ +#if !defined(__VMS) && (defined(__cplusplus) || (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 \ + */)) +#ifdef _AIX +#include +#else +#include +#endif +typedef uint8_t xxh_u8; #else - typedef unsigned char xxh_u8; +typedef unsigned char xxh_u8; #endif typedef XXH32_hash_t xxh_u32; #ifdef XXH_OLD_NAMES -# warning "XXH_OLD_NAMES is planned to be removed starting v0.9. If the program depends on it, consider moving away from it by employing newer type names directly" -# define BYTE xxh_u8 -# define U8 xxh_u8 -# define U32 xxh_u32 +#warning \ + "XXH_OLD_NAMES is planned to be removed starting v0.9. If the program depends on it, consider moving away from it by employing newer type names directly" +#define BYTE xxh_u8 +#define U8 xxh_u8 +#define U32 xxh_u32 #endif /* *** Memory access *** */ @@ -2612,20 +2623,20 @@ typedef XXH32_hash_t xxh_u32; * @return The 32-bit little endian integer from the bytes at @p ptr. */ -#if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==3)) +#if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS == 3)) /* * Manual byteshift. Best for old compilers which don't inline memcpy. * We actually directly use XXH_readLE32 and XXH_readBE32. */ -#elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==2)) +#elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS == 2)) /* * Force direct memory access. Only works on CPU which support unaligned memory * access in hardware. */ -static xxh_u32 XXH_read32(const void* memPtr) { return *(const xxh_u32*) memPtr; } +static xxh_u32 XXH_read32(const void *memPtr) { return *(const xxh_u32 *)memPtr; } -#elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==1)) +#elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS == 1)) /* * __attribute__((aligned(1))) is supported by gcc and clang. Originally the @@ -2635,12 +2646,13 @@ static xxh_u32 XXH_read32(const void* memPtr) { return *(const xxh_u32*) memPtr; * https://gcc.godbolt.org/z/xYez1j67Y. */ #ifdef XXH_OLD_NAMES -typedef union { xxh_u32 u32; } __attribute__((__packed__)) unalign; +typedef union { + xxh_u32 u32; +} __attribute__((__packed__)) unalign; #endif -static xxh_u32 XXH_read32(const void* ptr) -{ - typedef __attribute__((__aligned__(1))) xxh_u32 xxh_unalign32; - return *((const xxh_unalign32*)ptr); +static xxh_u32 XXH_read32(const void *ptr) { + typedef __attribute__((__aligned__(1))) xxh_u32 xxh_unalign32; + return *((const xxh_unalign32 *)ptr); } #else @@ -2649,15 +2661,13 @@ static xxh_u32 XXH_read32(const void* ptr) * Portable and safe solution. Generally efficient. * see: https://fastcompression.blogspot.com/2015/08/accessing-unaligned-memory.html */ -static xxh_u32 XXH_read32(const void* memPtr) -{ - xxh_u32 val; - XXH_memcpy(&val, memPtr, sizeof(val)); - return val; +static xxh_u32 XXH_read32(const void *memPtr) { + xxh_u32 val; + XXH_memcpy(&val, memPtr, sizeof(val)); + return val; } -#endif /* XXH_FORCE_DIRECT_MEMORY_ACCESS */ - +#endif /* XXH_FORCE_DIRECT_MEMORY_ACCESS */ /* *** Endianness *** */ @@ -2682,49 +2692,44 @@ static xxh_u32 XXH_read32(const void* memPtr) * Try to detect endianness automatically, to avoid the nonstandard behavior * in `XXH_isLittleEndian()` */ -# if defined(_WIN32) /* Windows is always little endian */ \ - || defined(__LITTLE_ENDIAN__) \ - || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) -# define XXH_CPU_LITTLE_ENDIAN 1 -# elif defined(__BIG_ENDIAN__) \ - || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) -# define XXH_CPU_LITTLE_ENDIAN 0 -# else +#if defined(_WIN32) /* Windows is always little endian */ \ + || defined(__LITTLE_ENDIAN__) || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) +#define XXH_CPU_LITTLE_ENDIAN 1 +#elif defined(__BIG_ENDIAN__) || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) +#define XXH_CPU_LITTLE_ENDIAN 0 +#else /*! * @internal * @brief Runtime check for @ref XXH_CPU_LITTLE_ENDIAN. * * Most compilers will constant fold this. */ -static int XXH_isLittleEndian(void) -{ - /* - * Portable and well-defined behavior. - * Don't use static: it is detrimental to performance. - */ - const union { xxh_u32 u; xxh_u8 c[4]; } one = { 1 }; - return one.c[0]; -} -# define XXH_CPU_LITTLE_ENDIAN XXH_isLittleEndian() -# endif +static int XXH_isLittleEndian(void) { + /* + * Portable and well-defined behavior. + * Don't use static: it is detrimental to performance. + */ + const union { + xxh_u32 u; + xxh_u8 c[4]; + } one = {1}; + return one.c[0]; +} +#define XXH_CPU_LITTLE_ENDIAN XXH_isLittleEndian() +#endif #endif - - - /* **************************************** -* Compiler-specific Functions and Macros -******************************************/ + * Compiler-specific Functions and Macros + ******************************************/ #define XXH_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) #ifdef __has_builtin -# define XXH_HAS_BUILTIN(x) __has_builtin(x) +#define XXH_HAS_BUILTIN(x) __has_builtin(x) #else -# define XXH_HAS_BUILTIN(x) 0 +#define XXH_HAS_BUILTIN(x) 0 #endif - - /* * C23 and future versions have standard "unreachable()". * Once it has been implemented reliably we can add it as an @@ -2753,19 +2758,22 @@ static int XXH_isLittleEndian(void) */ #if XXH_HAS_BUILTIN(__builtin_unreachable) -# define XXH_UNREACHABLE() __builtin_unreachable() +#define XXH_UNREACHABLE() __builtin_unreachable() #elif defined(_MSC_VER) -# define XXH_UNREACHABLE() __assume(0) +#define XXH_UNREACHABLE() __assume(0) #else -# define XXH_UNREACHABLE() +#define XXH_UNREACHABLE() #endif #if XXH_HAS_BUILTIN(__builtin_assume) -# define XXH_ASSUME(c) __builtin_assume(c) +#define XXH_ASSUME(c) __builtin_assume(c) #else -# define XXH_ASSUME(c) if (!(c)) { XXH_UNREACHABLE(); } +#define XXH_ASSUME(c) \ + if (!(c)) { \ + XXH_UNREACHABLE(); \ + } #endif /*! @@ -2781,20 +2789,19 @@ static int XXH_isLittleEndian(void) * @p x and @p r may be evaluated multiple times. * @return The rotated result. */ -#if !defined(NO_CLANG_BUILTIN) && XXH_HAS_BUILTIN(__builtin_rotateleft32) \ - && XXH_HAS_BUILTIN(__builtin_rotateleft64) -# define XXH_rotl32 __builtin_rotateleft32 -# define XXH_rotl64 __builtin_rotateleft64 +#if !defined(NO_CLANG_BUILTIN) && XXH_HAS_BUILTIN(__builtin_rotateleft32) && XXH_HAS_BUILTIN(__builtin_rotateleft64) +#define XXH_rotl32 __builtin_rotateleft32 +#define XXH_rotl64 __builtin_rotateleft64 #elif XXH_HAS_BUILTIN(__builtin_stdc_rotate_left) -# define XXH_rotl32 __builtin_stdc_rotate_left -# define XXH_rotl64 __builtin_stdc_rotate_left +#define XXH_rotl32 __builtin_stdc_rotate_left +#define XXH_rotl64 __builtin_stdc_rotate_left /* Note: although _rotl exists for minGW (GCC under windows), performance seems poor */ #elif defined(_MSC_VER) -# define XXH_rotl32(x,r) _rotl(x,r) -# define XXH_rotl64(x,r) _rotl64(x,r) +#define XXH_rotl32(x, r) _rotl(x, r) +#define XXH_rotl64(x, r) _rotl64(x, r) #else -# define XXH_rotl32(x,r) (((x) << (r)) | ((x) >> (32 - (r)))) -# define XXH_rotl64(x,r) (((x) << (r)) | ((x) >> (64 - (r)))) +#define XXH_rotl32(x, r) (((x) << (r)) | ((x) >> (32 - (r)))) +#define XXH_rotl64(x, r) (((x) << (r)) | ((x) >> (64 - (r)))) #endif /*! @@ -2805,32 +2812,27 @@ static int XXH_isLittleEndian(void) * @param x The 32-bit integer to byteswap. * @return @p x, byteswapped. */ -#if defined(_MSC_VER) /* Visual Studio */ -# define XXH_swap32 _byteswap_ulong +#if defined(_MSC_VER) /* Visual Studio */ +#define XXH_swap32 _byteswap_ulong #elif XXH_GCC_VERSION >= 403 -# define XXH_swap32 __builtin_bswap32 +#define XXH_swap32 __builtin_bswap32 #else -static xxh_u32 XXH_swap32 (xxh_u32 x) -{ - return ((x << 24) & 0xff000000 ) | - ((x << 8) & 0x00ff0000 ) | - ((x >> 8) & 0x0000ff00 ) | - ((x >> 24) & 0x000000ff ); +static xxh_u32 XXH_swap32(xxh_u32 x) { + return ((x << 24) & 0xff000000) | ((x << 8) & 0x00ff0000) | ((x >> 8) & 0x0000ff00) | ((x >> 24) & 0x000000ff); } #endif - /* *************************** -* Memory reads -*****************************/ + * Memory reads + *****************************/ /*! * @internal * @brief Enum to indicate whether a pointer is aligned. */ typedef enum { - XXH_aligned, /*!< Aligned */ - XXH_unaligned /*!< Possibly unaligned */ + XXH_aligned, /*!< Aligned */ + XXH_unaligned /*!< Possibly unaligned */ } XXH_alignment; /* @@ -2838,59 +2840,45 @@ typedef enum { * * This is ideal for older compilers which don't inline memcpy. */ -#if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==3)) +#if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS == 3)) -XXH_FORCE_INLINE xxh_u32 XXH_readLE32(const void* memPtr) -{ - const xxh_u8* bytePtr = (const xxh_u8 *)memPtr; - return bytePtr[0] - | ((xxh_u32)bytePtr[1] << 8) - | ((xxh_u32)bytePtr[2] << 16) - | ((xxh_u32)bytePtr[3] << 24); +XXH_FORCE_INLINE xxh_u32 XXH_readLE32(const void *memPtr) { + const xxh_u8 *bytePtr = (const xxh_u8 *)memPtr; + return bytePtr[0] | ((xxh_u32)bytePtr[1] << 8) | ((xxh_u32)bytePtr[2] << 16) | ((xxh_u32)bytePtr[3] << 24); } -XXH_FORCE_INLINE xxh_u32 XXH_readBE32(const void* memPtr) -{ - const xxh_u8* bytePtr = (const xxh_u8 *)memPtr; - return bytePtr[3] - | ((xxh_u32)bytePtr[2] << 8) - | ((xxh_u32)bytePtr[1] << 16) - | ((xxh_u32)bytePtr[0] << 24); +XXH_FORCE_INLINE xxh_u32 XXH_readBE32(const void *memPtr) { + const xxh_u8 *bytePtr = (const xxh_u8 *)memPtr; + return bytePtr[3] | ((xxh_u32)bytePtr[2] << 8) | ((xxh_u32)bytePtr[1] << 16) | ((xxh_u32)bytePtr[0] << 24); } #else -XXH_FORCE_INLINE xxh_u32 XXH_readLE32(const void* ptr) -{ - return XXH_CPU_LITTLE_ENDIAN ? XXH_read32(ptr) : XXH_swap32(XXH_read32(ptr)); +XXH_FORCE_INLINE xxh_u32 XXH_readLE32(const void *ptr) { + return XXH_CPU_LITTLE_ENDIAN ? XXH_read32(ptr) : XXH_swap32(XXH_read32(ptr)); } -static xxh_u32 XXH_readBE32(const void* ptr) -{ - return XXH_CPU_LITTLE_ENDIAN ? XXH_swap32(XXH_read32(ptr)) : XXH_read32(ptr); +static xxh_u32 XXH_readBE32(const void *ptr) { + return XXH_CPU_LITTLE_ENDIAN ? XXH_swap32(XXH_read32(ptr)) : XXH_read32(ptr); } #endif -XXH_FORCE_INLINE xxh_u32 -XXH_readLE32_align(const void* ptr, XXH_alignment align) -{ - if (align==XXH_unaligned) { - return XXH_readLE32(ptr); - } else { - return XXH_CPU_LITTLE_ENDIAN ? *(const xxh_u32*)ptr : XXH_swap32(*(const xxh_u32*)ptr); - } +XXH_FORCE_INLINE xxh_u32 XXH_readLE32_align(const void *ptr, XXH_alignment align) { + if (align == XXH_unaligned) { + return XXH_readLE32(ptr); + } else { + return XXH_CPU_LITTLE_ENDIAN ? *(const xxh_u32 *)ptr : XXH_swap32(*(const xxh_u32 *)ptr); + } } - /* ************************************* -* Misc -***************************************/ + * Misc + ***************************************/ /*! @ingroup public */ -XXH_PUBLIC_API unsigned XXH_versionNumber (void) { return XXH_VERSION_NUMBER; } - +XXH_PUBLIC_API unsigned XXH_versionNumber(void) { return XXH_VERSION_NUMBER; } /* ******************************************************************* -* 32-bit hash functions -*********************************************************************/ + * 32-bit hash functions + *********************************************************************/ /*! * @} * @defgroup XXH32_impl XXH32 implementation @@ -2899,19 +2887,19 @@ XXH_PUBLIC_API unsigned XXH_versionNumber (void) { return XXH_VERSION_NUMBER; } * Details on the XXH32 implementation. * @{ */ - /* #define instead of static const, to be used as initializers */ -#define XXH_PRIME32_1 0x9E3779B1U /*!< 0b10011110001101110111100110110001 */ -#define XXH_PRIME32_2 0x85EBCA77U /*!< 0b10000101111010111100101001110111 */ -#define XXH_PRIME32_3 0xC2B2AE3DU /*!< 0b11000010101100101010111000111101 */ -#define XXH_PRIME32_4 0x27D4EB2FU /*!< 0b00100111110101001110101100101111 */ -#define XXH_PRIME32_5 0x165667B1U /*!< 0b00010110010101100110011110110001 */ +/* #define instead of static const, to be used as initializers */ +#define XXH_PRIME32_1 0x9E3779B1U /*!< 0b10011110001101110111100110110001 */ +#define XXH_PRIME32_2 0x85EBCA77U /*!< 0b10000101111010111100101001110111 */ +#define XXH_PRIME32_3 0xC2B2AE3DU /*!< 0b11000010101100101010111000111101 */ +#define XXH_PRIME32_4 0x27D4EB2FU /*!< 0b00100111110101001110101100101111 */ +#define XXH_PRIME32_5 0x165667B1U /*!< 0b00010110010101100110011110110001 */ #ifdef XXH_OLD_NAMES -# define PRIME32_1 XXH_PRIME32_1 -# define PRIME32_2 XXH_PRIME32_2 -# define PRIME32_3 XXH_PRIME32_3 -# define PRIME32_4 XXH_PRIME32_4 -# define PRIME32_5 XXH_PRIME32_5 +#define PRIME32_1 XXH_PRIME32_1 +#define PRIME32_2 XXH_PRIME32_2 +#define PRIME32_3 XXH_PRIME32_3 +#define PRIME32_4 XXH_PRIME32_4 +#define PRIME32_5 XXH_PRIME32_5 #endif /*! @@ -2925,51 +2913,50 @@ XXH_PUBLIC_API unsigned XXH_versionNumber (void) { return XXH_VERSION_NUMBER; } * @param input The stripe of input to mix. * @return The mixed accumulator lane. */ -static xxh_u32 XXH32_round(xxh_u32 acc, xxh_u32 input) -{ - acc += input * XXH_PRIME32_2; - acc = XXH_rotl32(acc, 13); - acc *= XXH_PRIME32_1; +static xxh_u32 XXH32_round(xxh_u32 acc, xxh_u32 input) { + acc += input * XXH_PRIME32_2; + acc = XXH_rotl32(acc, 13); + acc *= XXH_PRIME32_1; #if (defined(__SSE4_1__) || defined(__aarch64__) || defined(__wasm_simd128__)) && !defined(XXH_ENABLE_AUTOVECTORIZE) - /* - * UGLY HACK: - * A compiler fence is used to prevent GCC and Clang from - * autovectorizing the XXH32 loop (pragmas and attributes don't work for some - * reason) without globally disabling SSE4.1. - * - * The reason we want to avoid vectorization is because despite working on - * 4 integers at a time, there are multiple factors slowing XXH32 down on - * SSE4: - * - There's a ridiculous amount of lag from pmulld (10 cycles of latency on - * newer chips!) making it slightly slower to multiply four integers at - * once compared to four integers independently. Even when pmulld was - * fastest, Sandy/Ivy Bridge, it is still not worth it to go into SSE - * just to multiply unless doing a long operation. - * - * - Four instructions are required to rotate, - * movqda tmp, v // not required with VEX encoding - * pslld tmp, 13 // tmp <<= 13 - * psrld v, 19 // x >>= 19 - * por v, tmp // x |= tmp - * compared to one for scalar: - * roll v, 13 // reliably fast across the board - * shldl v, v, 13 // Sandy Bridge and later prefer this for some reason - * - * - Instruction level parallelism is actually more beneficial here because - * the SIMD actually serializes this operation: While v1 is rotating, v2 - * can load data, while v3 can multiply. SSE forces them to operate - * together. - * - * This is also enabled on AArch64, as Clang is *very aggressive* in vectorizing - * the loop. NEON is only faster on the A53, and with the newer cores, it is less - * than half the speed. - * - * Additionally, this is used on WASM SIMD128 because it JITs to the same - * SIMD instructions and has the same issue. - */ - XXH_COMPILER_GUARD(acc); + /* + * UGLY HACK: + * A compiler fence is used to prevent GCC and Clang from + * autovectorizing the XXH32 loop (pragmas and attributes don't work for some + * reason) without globally disabling SSE4.1. + * + * The reason we want to avoid vectorization is because despite working on + * 4 integers at a time, there are multiple factors slowing XXH32 down on + * SSE4: + * - There's a ridiculous amount of lag from pmulld (10 cycles of latency on + * newer chips!) making it slightly slower to multiply four integers at + * once compared to four integers independently. Even when pmulld was + * fastest, Sandy/Ivy Bridge, it is still not worth it to go into SSE + * just to multiply unless doing a long operation. + * + * - Four instructions are required to rotate, + * movqda tmp, v // not required with VEX encoding + * pslld tmp, 13 // tmp <<= 13 + * psrld v, 19 // x >>= 19 + * por v, tmp // x |= tmp + * compared to one for scalar: + * roll v, 13 // reliably fast across the board + * shldl v, v, 13 // Sandy Bridge and later prefer this for some reason + * + * - Instruction level parallelism is actually more beneficial here because + * the SIMD actually serializes this operation: While v1 is rotating, v2 + * can load data, while v3 can multiply. SSE forces them to operate + * together. + * + * This is also enabled on AArch64, as Clang is *very aggressive* in vectorizing + * the loop. NEON is only faster on the A53, and with the newer cores, it is less + * than half the speed. + * + * Additionally, this is used on WASM SIMD128 because it JITs to the same + * SIMD instructions and has the same issue. + */ + XXH_COMPILER_GUARD(acc); #endif - return acc; + return acc; } /*! @@ -2982,14 +2969,13 @@ static xxh_u32 XXH32_round(xxh_u32 acc, xxh_u32 input) * @param hash The hash to avalanche. * @return The avalanched hash. */ -static xxh_u32 XXH32_avalanche(xxh_u32 hash) -{ - hash ^= hash >> 15; - hash *= XXH_PRIME32_2; - hash ^= hash >> 13; - hash *= XXH_PRIME32_3; - hash ^= hash >> 16; - return hash; +static xxh_u32 XXH32_avalanche(xxh_u32 hash) { + hash ^= hash >> 15; + hash *= XXH_PRIME32_2; + hash ^= hash >> 13; + hash *= XXH_PRIME32_3; + hash ^= hash >> 16; + return hash; } #define XXH_get32bits(p) XXH_readLE32_align(p, align) @@ -2998,14 +2984,12 @@ static xxh_u32 XXH32_avalanche(xxh_u32 hash) * @internal * @brief Sets up the initial accumulator state for XXH32(). */ -XXH_FORCE_INLINE void -XXH32_initAccs(xxh_u32 *acc, xxh_u32 seed) -{ - XXH_ASSERT(acc != NULL); - acc[0] = seed + XXH_PRIME32_1 + XXH_PRIME32_2; - acc[1] = seed + XXH_PRIME32_2; - acc[2] = seed + 0; - acc[3] = seed - XXH_PRIME32_1; +XXH_FORCE_INLINE void XXH32_initAccs(xxh_u32 *acc, xxh_u32 seed) { + XXH_ASSERT(acc != NULL); + acc[0] = seed + XXH_PRIME32_1 + XXH_PRIME32_2; + acc[1] = seed + XXH_PRIME32_2; + acc[2] = seed + 0; + acc[3] = seed - XXH_PRIME32_1; } /*! @@ -3015,38 +2999,33 @@ XXH32_initAccs(xxh_u32 *acc, xxh_u32 seed) * @return the end input pointer. */ XXH_FORCE_INLINE const xxh_u8 * -XXH32_consumeLong( - xxh_u32 *XXH_RESTRICT acc, - xxh_u8 const *XXH_RESTRICT input, - size_t len, - XXH_alignment align -) -{ - const xxh_u8* const bEnd = input + len; - const xxh_u8* const limit = bEnd - 15; - XXH_ASSERT(acc != NULL); - XXH_ASSERT(input != NULL); - XXH_ASSERT(len >= 16); - do { - acc[0] = XXH32_round(acc[0], XXH_get32bits(input)); input += 4; - acc[1] = XXH32_round(acc[1], XXH_get32bits(input)); input += 4; - acc[2] = XXH32_round(acc[2], XXH_get32bits(input)); input += 4; - acc[3] = XXH32_round(acc[3], XXH_get32bits(input)); input += 4; - } while (input < limit); - - return input; +XXH32_consumeLong(xxh_u32 *XXH_RESTRICT acc, xxh_u8 const *XXH_RESTRICT input, size_t len, XXH_alignment align) { + const xxh_u8 *const bEnd = input + len; + const xxh_u8 *const limit = bEnd - 15; + XXH_ASSERT(acc != NULL); + XXH_ASSERT(input != NULL); + XXH_ASSERT(len >= 16); + do { + acc[0] = XXH32_round(acc[0], XXH_get32bits(input)); + input += 4; + acc[1] = XXH32_round(acc[1], XXH_get32bits(input)); + input += 4; + acc[2] = XXH32_round(acc[2], XXH_get32bits(input)); + input += 4; + acc[3] = XXH32_round(acc[3], XXH_get32bits(input)); + input += 4; + } while (input < limit); + + return input; } /*! * @internal * @brief Merges the accumulator lanes together for XXH32() */ -XXH_FORCE_INLINE XXH_PUREF xxh_u32 -XXH32_mergeAccs(const xxh_u32 *acc) -{ - XXH_ASSERT(acc != NULL); - return XXH_rotl32(acc[0], 1) + XXH_rotl32(acc[1], 7) - + XXH_rotl32(acc[2], 12) + XXH_rotl32(acc[3], 18); +XXH_FORCE_INLINE XXH_PUREF xxh_u32 XXH32_mergeAccs(const xxh_u32 *acc) { + XXH_ASSERT(acc != NULL); + return XXH_rotl32(acc[0], 1) + XXH_rotl32(acc[1], 7) + XXH_rotl32(acc[2], 12) + XXH_rotl32(acc[3], 18); } /*! @@ -3064,85 +3043,102 @@ XXH32_mergeAccs(const xxh_u32 *acc) * @return The finalized hash. * @see XXH64_finalize(). */ -static XXH_PUREF xxh_u32 -XXH32_finalize(xxh_u32 hash, const xxh_u8* ptr, size_t len, XXH_alignment align) -{ -#define XXH_PROCESS1 do { \ - hash += (*ptr++) * XXH_PRIME32_5; \ - hash = XXH_rotl32(hash, 11) * XXH_PRIME32_1; \ -} while (0) - -#define XXH_PROCESS4 do { \ - hash += XXH_get32bits(ptr) * XXH_PRIME32_3; \ - ptr += 4; \ - hash = XXH_rotl32(hash, 17) * XXH_PRIME32_4; \ -} while (0) - - if (ptr==NULL) XXH_ASSERT(len == 0); - - /* Compact rerolled version; generally faster */ - if (!XXH32_ENDJMP) { - len &= 15; - while (len >= 4) { - XXH_PROCESS4; - len -= 4; - } - while (len > 0) { - XXH_PROCESS1; - --len; - } - return XXH32_avalanche(hash); - } else { - switch(len&15) /* or switch(bEnd - p) */ { - case 12: XXH_PROCESS4; - XXH_FALLTHROUGH; /* fallthrough */ - case 8: XXH_PROCESS4; - XXH_FALLTHROUGH; /* fallthrough */ - case 4: XXH_PROCESS4; - return XXH32_avalanche(hash); - - case 13: XXH_PROCESS4; - XXH_FALLTHROUGH; /* fallthrough */ - case 9: XXH_PROCESS4; - XXH_FALLTHROUGH; /* fallthrough */ - case 5: XXH_PROCESS4; - XXH_PROCESS1; - return XXH32_avalanche(hash); - - case 14: XXH_PROCESS4; - XXH_FALLTHROUGH; /* fallthrough */ - case 10: XXH_PROCESS4; - XXH_FALLTHROUGH; /* fallthrough */ - case 6: XXH_PROCESS4; - XXH_PROCESS1; - XXH_PROCESS1; - return XXH32_avalanche(hash); - - case 15: XXH_PROCESS4; - XXH_FALLTHROUGH; /* fallthrough */ - case 11: XXH_PROCESS4; - XXH_FALLTHROUGH; /* fallthrough */ - case 7: XXH_PROCESS4; - XXH_FALLTHROUGH; /* fallthrough */ - case 3: XXH_PROCESS1; - XXH_FALLTHROUGH; /* fallthrough */ - case 2: XXH_PROCESS1; - XXH_FALLTHROUGH; /* fallthrough */ - case 1: XXH_PROCESS1; - XXH_FALLTHROUGH; /* fallthrough */ - case 0: return XXH32_avalanche(hash); - } - XXH_ASSERT(0); - return hash; /* reaching this point is deemed impossible */ +static XXH_PUREF xxh_u32 XXH32_finalize(xxh_u32 hash, const xxh_u8 *ptr, size_t len, XXH_alignment align) { +#define XXH_PROCESS1 \ + do { \ + hash += (*ptr++) * XXH_PRIME32_5; \ + hash = XXH_rotl32(hash, 11) * XXH_PRIME32_1; \ + } while (0) + +#define XXH_PROCESS4 \ + do { \ + hash += XXH_get32bits(ptr) * XXH_PRIME32_3; \ + ptr += 4; \ + hash = XXH_rotl32(hash, 17) * XXH_PRIME32_4; \ + } while (0) + + if (ptr == NULL) + XXH_ASSERT(len == 0); + + /* Compact rerolled version; generally faster */ + if (!XXH32_ENDJMP) { + len &= 15; + while (len >= 4) { + XXH_PROCESS4; + len -= 4; + } + while (len > 0) { + XXH_PROCESS1; + --len; + } + return XXH32_avalanche(hash); + } else { + switch (len & 15) /* or switch(bEnd - p) */ { + case 12: + XXH_PROCESS4; + XXH_FALLTHROUGH; /* fallthrough */ + case 8: + XXH_PROCESS4; + XXH_FALLTHROUGH; /* fallthrough */ + case 4: + XXH_PROCESS4; + return XXH32_avalanche(hash); + + case 13: + XXH_PROCESS4; + XXH_FALLTHROUGH; /* fallthrough */ + case 9: + XXH_PROCESS4; + XXH_FALLTHROUGH; /* fallthrough */ + case 5: + XXH_PROCESS4; + XXH_PROCESS1; + return XXH32_avalanche(hash); + + case 14: + XXH_PROCESS4; + XXH_FALLTHROUGH; /* fallthrough */ + case 10: + XXH_PROCESS4; + XXH_FALLTHROUGH; /* fallthrough */ + case 6: + XXH_PROCESS4; + XXH_PROCESS1; + XXH_PROCESS1; + return XXH32_avalanche(hash); + + case 15: + XXH_PROCESS4; + XXH_FALLTHROUGH; /* fallthrough */ + case 11: + XXH_PROCESS4; + XXH_FALLTHROUGH; /* fallthrough */ + case 7: + XXH_PROCESS4; + XXH_FALLTHROUGH; /* fallthrough */ + case 3: + XXH_PROCESS1; + XXH_FALLTHROUGH; /* fallthrough */ + case 2: + XXH_PROCESS1; + XXH_FALLTHROUGH; /* fallthrough */ + case 1: + XXH_PROCESS1; + XXH_FALLTHROUGH; /* fallthrough */ + case 0: + return XXH32_avalanche(hash); } + XXH_ASSERT(0); + return hash; /* reaching this point is deemed impossible */ + } } #ifdef XXH_OLD_NAMES -# define PROCESS1 XXH_PROCESS1 -# define PROCESS4 XXH_PROCESS4 +#define PROCESS1 XXH_PROCESS1 +#define PROCESS4 XXH_PROCESS4 #else -# undef XXH_PROCESS1 -# undef XXH_PROCESS4 +#undef XXH_PROCESS1 +#undef XXH_PROCESS4 #endif /*! @@ -3153,165 +3149,151 @@ XXH32_finalize(xxh_u32 hash, const xxh_u8* ptr, size_t len, XXH_alignment align) * @param align Whether @p input is aligned. * @return The calculated hash. */ -XXH_FORCE_INLINE XXH_PUREF xxh_u32 -XXH32_endian_align(const xxh_u8* input, size_t len, xxh_u32 seed, XXH_alignment align) -{ - xxh_u32 h32; +XXH_FORCE_INLINE XXH_PUREF xxh_u32 XXH32_endian_align(const xxh_u8 *input, + size_t len, + xxh_u32 seed, + XXH_alignment align) { + xxh_u32 h32; - if (input==NULL) XXH_ASSERT(len == 0); + if (input == NULL) + XXH_ASSERT(len == 0); - if (len>=16) { - xxh_u32 acc[4]; - XXH32_initAccs(acc, seed); + if (len >= 16) { + xxh_u32 acc[4]; + XXH32_initAccs(acc, seed); - input = XXH32_consumeLong(acc, input, len, align); + input = XXH32_consumeLong(acc, input, len, align); - h32 = XXH32_mergeAccs(acc); - } else { - h32 = seed + XXH_PRIME32_5; - } + h32 = XXH32_mergeAccs(acc); + } else { + h32 = seed + XXH_PRIME32_5; + } - h32 += (xxh_u32)len; + h32 += (xxh_u32)len; - return XXH32_finalize(h32, input, len&15, align); + return XXH32_finalize(h32, input, len & 15, align); } /*! @ingroup XXH32_family */ -XXH_PUBLIC_API XXH32_hash_t XXH32 (const void* input, size_t len, XXH32_hash_t seed) -{ +XXH_PUBLIC_API XXH32_hash_t XXH32(const void *input, size_t len, XXH32_hash_t seed) { #if !defined(XXH_NO_STREAM) && XXH_SIZE_OPT >= 2 - /* Simple version, good for code maintenance, but unfortunately slow for small inputs */ - XXH32_state_t state; - XXH32_reset(&state, seed); - XXH32_update(&state, (const xxh_u8*)input, len); - return XXH32_digest(&state); + /* Simple version, good for code maintenance, but unfortunately slow for small inputs */ + XXH32_state_t state; + XXH32_reset(&state, seed); + XXH32_update(&state, (const xxh_u8 *)input, len); + return XXH32_digest(&state); #else - if (XXH_FORCE_ALIGN_CHECK) { - if ((((size_t)input) & 3) == 0) { /* Input is 4-bytes aligned, leverage the speed benefit */ - return XXH32_endian_align((const xxh_u8*)input, len, seed, XXH_aligned); - } } + if (XXH_FORCE_ALIGN_CHECK) { + if ((((size_t)input) & 3) == 0) { /* Input is 4-bytes aligned, leverage the speed benefit */ + return XXH32_endian_align((const xxh_u8 *)input, len, seed, XXH_aligned); + } + } - return XXH32_endian_align((const xxh_u8*)input, len, seed, XXH_unaligned); + return XXH32_endian_align((const xxh_u8 *)input, len, seed, XXH_unaligned); #endif } - - /******* Hash streaming *******/ #ifndef XXH_NO_STREAM /*! @ingroup XXH32_family */ -XXH_PUBLIC_API XXH32_state_t* XXH32_createState(void) -{ - return (XXH32_state_t*)XXH_malloc(sizeof(XXH32_state_t)); -} +XXH_PUBLIC_API XXH32_state_t *XXH32_createState(void) { return (XXH32_state_t *)XXH_malloc(sizeof(XXH32_state_t)); } /*! @ingroup XXH32_family */ -XXH_PUBLIC_API XXH_errorcode XXH32_freeState(XXH32_state_t* statePtr) -{ - XXH_free(statePtr); - return XXH_OK; +XXH_PUBLIC_API XXH_errorcode XXH32_freeState(XXH32_state_t *statePtr) { + XXH_free(statePtr); + return XXH_OK; } /*! @ingroup XXH32_family */ -XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t* dstState, const XXH32_state_t* srcState) -{ - XXH_memcpy(dstState, srcState, sizeof(*dstState)); +XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t *dstState, const XXH32_state_t *srcState) { + XXH_memcpy(dstState, srcState, sizeof(*dstState)); } /*! @ingroup XXH32_family */ -XXH_PUBLIC_API XXH_errorcode XXH32_reset(XXH32_state_t* statePtr, XXH32_hash_t seed) -{ - XXH_ASSERT(statePtr != NULL); - memset(statePtr, 0, sizeof(*statePtr)); - XXH32_initAccs(statePtr->acc, seed); - return XXH_OK; +XXH_PUBLIC_API XXH_errorcode XXH32_reset(XXH32_state_t *statePtr, XXH32_hash_t seed) { + XXH_ASSERT(statePtr != NULL); + memset(statePtr, 0, sizeof(*statePtr)); + XXH32_initAccs(statePtr->acc, seed); + return XXH_OK; } - /*! @ingroup XXH32_family */ -XXH_PUBLIC_API XXH_errorcode -XXH32_update(XXH32_state_t* state, const void* input, size_t len) -{ - if (input==NULL) { - XXH_ASSERT(len == 0); - return XXH_OK; - } +XXH_PUBLIC_API XXH_errorcode XXH32_update(XXH32_state_t *state, const void *input, size_t len) { + if (input == NULL) { + XXH_ASSERT(len == 0); + return XXH_OK; + } - state->total_len_32 += (XXH32_hash_t)len; - state->large_len |= (XXH32_hash_t)((len>=16) | (state->total_len_32>=16)); + state->total_len_32 += (XXH32_hash_t)len; + state->large_len |= (XXH32_hash_t)((len >= 16) | (state->total_len_32 >= 16)); - XXH_ASSERT(state->bufferedSize < sizeof(state->buffer)); - if (len < sizeof(state->buffer) - state->bufferedSize) { /* fill in tmp buffer */ - XXH_memcpy(state->buffer + state->bufferedSize, input, len); - state->bufferedSize += (XXH32_hash_t)len; - return XXH_OK; + XXH_ASSERT(state->bufferedSize < sizeof(state->buffer)); + if (len < sizeof(state->buffer) - state->bufferedSize) { /* fill in tmp buffer */ + XXH_memcpy(state->buffer + state->bufferedSize, input, len); + state->bufferedSize += (XXH32_hash_t)len; + return XXH_OK; + } + + { + const xxh_u8 *xinput = (const xxh_u8 *)input; + const xxh_u8 *const bEnd = xinput + len; + + if (state->bufferedSize) { /* non-empty buffer: complete first */ + XXH_memcpy(state->buffer + state->bufferedSize, xinput, sizeof(state->buffer) - state->bufferedSize); + xinput += sizeof(state->buffer) - state->bufferedSize; + /* then process one round */ + (void)XXH32_consumeLong(state->acc, state->buffer, sizeof(state->buffer), XXH_aligned); + state->bufferedSize = 0; } - { const xxh_u8* xinput = (const xxh_u8*)input; - const xxh_u8* const bEnd = xinput + len; - - if (state->bufferedSize) { /* non-empty buffer: complete first */ - XXH_memcpy(state->buffer + state->bufferedSize, xinput, sizeof(state->buffer) - state->bufferedSize); - xinput += sizeof(state->buffer) - state->bufferedSize; - /* then process one round */ - (void)XXH32_consumeLong(state->acc, state->buffer, sizeof(state->buffer), XXH_aligned); - state->bufferedSize = 0; - } - - XXH_ASSERT(xinput <= bEnd); - if ((size_t)(bEnd - xinput) >= sizeof(state->buffer)) { - /* Process the remaining data */ - xinput = XXH32_consumeLong(state->acc, xinput, (size_t)(bEnd - xinput), XXH_unaligned); - } + XXH_ASSERT(xinput <= bEnd); + if ((size_t)(bEnd - xinput) >= sizeof(state->buffer)) { + /* Process the remaining data */ + xinput = XXH32_consumeLong(state->acc, xinput, (size_t)(bEnd - xinput), XXH_unaligned); + } - if (xinput < bEnd) { - /* Copy the leftover to the tmp buffer */ - XXH_memcpy(state->buffer, xinput, (size_t)(bEnd-xinput)); - state->bufferedSize = (unsigned)(bEnd-xinput); - } + if (xinput < bEnd) { + /* Copy the leftover to the tmp buffer */ + XXH_memcpy(state->buffer, xinput, (size_t)(bEnd - xinput)); + state->bufferedSize = (unsigned)(bEnd - xinput); } + } - return XXH_OK; + return XXH_OK; } - /*! @ingroup XXH32_family */ -XXH_PUBLIC_API XXH32_hash_t XXH32_digest(const XXH32_state_t* state) -{ - xxh_u32 h32; +XXH_PUBLIC_API XXH32_hash_t XXH32_digest(const XXH32_state_t *state) { + xxh_u32 h32; - if (state->large_len) { - h32 = XXH32_mergeAccs(state->acc); - } else { - h32 = state->acc[2] /* == seed */ + XXH_PRIME32_5; - } + if (state->large_len) { + h32 = XXH32_mergeAccs(state->acc); + } else { + h32 = state->acc[2] /* == seed */ + XXH_PRIME32_5; + } - h32 += state->total_len_32; + h32 += state->total_len_32; - return XXH32_finalize(h32, state->buffer, state->bufferedSize, XXH_aligned); + return XXH32_finalize(h32, state->buffer, state->bufferedSize, XXH_aligned); } #endif /* !XXH_NO_STREAM */ /******* Canonical representation *******/ /*! @ingroup XXH32_family */ -XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t hash) -{ - XXH_STATIC_ASSERT(sizeof(XXH32_canonical_t) == sizeof(XXH32_hash_t)); - if (XXH_CPU_LITTLE_ENDIAN) hash = XXH_swap32(hash); - XXH_memcpy(dst, &hash, sizeof(*dst)); +XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t *dst, XXH32_hash_t hash) { + XXH_STATIC_ASSERT(sizeof(XXH32_canonical_t) == sizeof(XXH32_hash_t)); + if (XXH_CPU_LITTLE_ENDIAN) + hash = XXH_swap32(hash); + XXH_memcpy(dst, &hash, sizeof(*dst)); } /*! @ingroup XXH32_family */ -XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src) -{ - return XXH_readBE32(src); -} - +XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t *src) { return XXH_readBE32(src); } #ifndef XXH_NO_LONG_LONG /* ******************************************************************* -* 64-bit hash functions -*********************************************************************/ + * 64-bit hash functions + *********************************************************************/ /*! * @} * @ingroup impl @@ -3322,23 +3304,20 @@ XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src typedef XXH64_hash_t xxh_u64; #ifdef XXH_OLD_NAMES -# define U64 xxh_u64 +#define U64 xxh_u64 #endif -#if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==3)) +#if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS == 3)) /* * Manual byteshift. Best for old compilers which don't inline memcpy. * We actually directly use XXH_readLE64 and XXH_readBE64. */ -#elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==2)) +#elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS == 2)) /* Force direct memory access. Only works on CPU which support unaligned memory access in hardware */ -static xxh_u64 XXH_read64(const void* memPtr) -{ - return *(const xxh_u64*) memPtr; -} +static xxh_u64 XXH_read64(const void *memPtr) { return *(const xxh_u64 *)memPtr; } -#elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==1)) +#elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS == 1)) /* * __attribute__((aligned(1))) is supported by gcc and clang. Originally the @@ -3348,12 +3327,14 @@ static xxh_u64 XXH_read64(const void* memPtr) * https://gcc.godbolt.org/z/xYez1j67Y. */ #ifdef XXH_OLD_NAMES -typedef union { xxh_u32 u32; xxh_u64 u64; } __attribute__((__packed__)) unalign64; +typedef union { + xxh_u32 u32; + xxh_u64 u64; +} __attribute__((__packed__)) unalign64; #endif -static xxh_u64 XXH_read64(const void* ptr) -{ - typedef __attribute__((__aligned__(1))) xxh_u64 xxh_unalign64; - return *((const xxh_unalign64*)ptr); +static xxh_u64 XXH_read64(const void *ptr) { + typedef __attribute__((__aligned__(1))) xxh_u64 xxh_unalign64; + return *((const xxh_unalign64 *)ptr); } #else @@ -3362,85 +3343,61 @@ static xxh_u64 XXH_read64(const void* ptr) * Portable and safe solution. Generally efficient. * see: https://fastcompression.blogspot.com/2015/08/accessing-unaligned-memory.html */ -static xxh_u64 XXH_read64(const void* memPtr) -{ - xxh_u64 val; - XXH_memcpy(&val, memPtr, sizeof(val)); - return val; +static xxh_u64 XXH_read64(const void *memPtr) { + xxh_u64 val; + XXH_memcpy(&val, memPtr, sizeof(val)); + return val; } -#endif /* XXH_FORCE_DIRECT_MEMORY_ACCESS */ +#endif /* XXH_FORCE_DIRECT_MEMORY_ACCESS */ -#if defined(_MSC_VER) /* Visual Studio */ -# define XXH_swap64 _byteswap_uint64 +#if defined(_MSC_VER) /* Visual Studio */ +#define XXH_swap64 _byteswap_uint64 #elif XXH_GCC_VERSION >= 403 -# define XXH_swap64 __builtin_bswap64 +#define XXH_swap64 __builtin_bswap64 #else -static xxh_u64 XXH_swap64(xxh_u64 x) -{ - return ((x << 56) & 0xff00000000000000ULL) | - ((x << 40) & 0x00ff000000000000ULL) | - ((x << 24) & 0x0000ff0000000000ULL) | - ((x << 8) & 0x000000ff00000000ULL) | - ((x >> 8) & 0x00000000ff000000ULL) | - ((x >> 24) & 0x0000000000ff0000ULL) | - ((x >> 40) & 0x000000000000ff00ULL) | - ((x >> 56) & 0x00000000000000ffULL); +static xxh_u64 XXH_swap64(xxh_u64 x) { + return ((x << 56) & 0xff00000000000000ULL) | ((x << 40) & 0x00ff000000000000ULL) | + ((x << 24) & 0x0000ff0000000000ULL) | ((x << 8) & 0x000000ff00000000ULL) | ((x >> 8) & 0x00000000ff000000ULL) | + ((x >> 24) & 0x0000000000ff0000ULL) | ((x >> 40) & 0x000000000000ff00ULL) | + ((x >> 56) & 0x00000000000000ffULL); } #endif - /* XXH_FORCE_MEMORY_ACCESS==3 is an endian-independent byteshift load. */ -#if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==3)) +#if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS == 3)) -XXH_FORCE_INLINE xxh_u64 XXH_readLE64(const void* memPtr) -{ - const xxh_u8* bytePtr = (const xxh_u8 *)memPtr; - return bytePtr[0] - | ((xxh_u64)bytePtr[1] << 8) - | ((xxh_u64)bytePtr[2] << 16) - | ((xxh_u64)bytePtr[3] << 24) - | ((xxh_u64)bytePtr[4] << 32) - | ((xxh_u64)bytePtr[5] << 40) - | ((xxh_u64)bytePtr[6] << 48) - | ((xxh_u64)bytePtr[7] << 56); -} - -XXH_FORCE_INLINE xxh_u64 XXH_readBE64(const void* memPtr) -{ - const xxh_u8* bytePtr = (const xxh_u8 *)memPtr; - return bytePtr[7] - | ((xxh_u64)bytePtr[6] << 8) - | ((xxh_u64)bytePtr[5] << 16) - | ((xxh_u64)bytePtr[4] << 24) - | ((xxh_u64)bytePtr[3] << 32) - | ((xxh_u64)bytePtr[2] << 40) - | ((xxh_u64)bytePtr[1] << 48) - | ((xxh_u64)bytePtr[0] << 56); +XXH_FORCE_INLINE xxh_u64 XXH_readLE64(const void *memPtr) { + const xxh_u8 *bytePtr = (const xxh_u8 *)memPtr; + return bytePtr[0] | ((xxh_u64)bytePtr[1] << 8) | ((xxh_u64)bytePtr[2] << 16) | ((xxh_u64)bytePtr[3] << 24) | + ((xxh_u64)bytePtr[4] << 32) | ((xxh_u64)bytePtr[5] << 40) | ((xxh_u64)bytePtr[6] << 48) | + ((xxh_u64)bytePtr[7] << 56); +} + +XXH_FORCE_INLINE xxh_u64 XXH_readBE64(const void *memPtr) { + const xxh_u8 *bytePtr = (const xxh_u8 *)memPtr; + return bytePtr[7] | ((xxh_u64)bytePtr[6] << 8) | ((xxh_u64)bytePtr[5] << 16) | ((xxh_u64)bytePtr[4] << 24) | + ((xxh_u64)bytePtr[3] << 32) | ((xxh_u64)bytePtr[2] << 40) | ((xxh_u64)bytePtr[1] << 48) | + ((xxh_u64)bytePtr[0] << 56); } #else -XXH_FORCE_INLINE xxh_u64 XXH_readLE64(const void* ptr) -{ - return XXH_CPU_LITTLE_ENDIAN ? XXH_read64(ptr) : XXH_swap64(XXH_read64(ptr)); +XXH_FORCE_INLINE xxh_u64 XXH_readLE64(const void *ptr) { + return XXH_CPU_LITTLE_ENDIAN ? XXH_read64(ptr) : XXH_swap64(XXH_read64(ptr)); } -static xxh_u64 XXH_readBE64(const void* ptr) -{ - return XXH_CPU_LITTLE_ENDIAN ? XXH_swap64(XXH_read64(ptr)) : XXH_read64(ptr); +static xxh_u64 XXH_readBE64(const void *ptr) { + return XXH_CPU_LITTLE_ENDIAN ? XXH_swap64(XXH_read64(ptr)) : XXH_read64(ptr); } #endif -XXH_FORCE_INLINE xxh_u64 -XXH_readLE64_align(const void* ptr, XXH_alignment align) -{ - if (align==XXH_unaligned) - return XXH_readLE64(ptr); - else - return XXH_CPU_LITTLE_ENDIAN ? *(const xxh_u64*)ptr : XXH_swap64(*(const xxh_u64*)ptr); +XXH_FORCE_INLINE xxh_u64 XXH_readLE64_align(const void *ptr, XXH_alignment align) { + if (align == XXH_unaligned) + return XXH_readLE64(ptr); + else + return XXH_CPU_LITTLE_ENDIAN ? *(const xxh_u64 *)ptr : XXH_swap64(*(const xxh_u64 *)ptr); } - /******* xxh64 *******/ /*! * @} @@ -3451,80 +3408,74 @@ XXH_readLE64_align(const void* ptr, XXH_alignment align) * @{ */ /* #define rather that static const, to be used as initializers */ -#define XXH_PRIME64_1 0x9E3779B185EBCA87ULL /*!< 0b1001111000110111011110011011000110000101111010111100101010000111 */ -#define XXH_PRIME64_2 0xC2B2AE3D27D4EB4FULL /*!< 0b1100001010110010101011100011110100100111110101001110101101001111 */ -#define XXH_PRIME64_3 0x165667B19E3779F9ULL /*!< 0b0001011001010110011001111011000110011110001101110111100111111001 */ -#define XXH_PRIME64_4 0x85EBCA77C2B2AE63ULL /*!< 0b1000010111101011110010100111011111000010101100101010111001100011 */ -#define XXH_PRIME64_5 0x27D4EB2F165667C5ULL /*!< 0b0010011111010100111010110010111100010110010101100110011111000101 */ +#define XXH_PRIME64_1 0x9E3779B185EBCA87ULL /*!< 0b1001111000110111011110011011000110000101111010111100101010000111 */ +#define XXH_PRIME64_2 0xC2B2AE3D27D4EB4FULL /*!< 0b1100001010110010101011100011110100100111110101001110101101001111 */ +#define XXH_PRIME64_3 0x165667B19E3779F9ULL /*!< 0b0001011001010110011001111011000110011110001101110111100111111001 */ +#define XXH_PRIME64_4 0x85EBCA77C2B2AE63ULL /*!< 0b1000010111101011110010100111011111000010101100101010111001100011 */ +#define XXH_PRIME64_5 0x27D4EB2F165667C5ULL /*!< 0b0010011111010100111010110010111100010110010101100110011111000101 */ #ifdef XXH_OLD_NAMES -# define PRIME64_1 XXH_PRIME64_1 -# define PRIME64_2 XXH_PRIME64_2 -# define PRIME64_3 XXH_PRIME64_3 -# define PRIME64_4 XXH_PRIME64_4 -# define PRIME64_5 XXH_PRIME64_5 +#define PRIME64_1 XXH_PRIME64_1 +#define PRIME64_2 XXH_PRIME64_2 +#define PRIME64_3 XXH_PRIME64_3 +#define PRIME64_4 XXH_PRIME64_4 +#define PRIME64_5 XXH_PRIME64_5 #endif /*! @copydoc XXH32_round */ -static xxh_u64 XXH64_round(xxh_u64 acc, xxh_u64 input) -{ - acc += input * XXH_PRIME64_2; - acc = XXH_rotl64(acc, 31); - acc *= XXH_PRIME64_1; +static xxh_u64 XXH64_round(xxh_u64 acc, xxh_u64 input) { + acc += input * XXH_PRIME64_2; + acc = XXH_rotl64(acc, 31); + acc *= XXH_PRIME64_1; #if (defined(__AVX512F__)) && !defined(XXH_ENABLE_AUTOVECTORIZE) - /* - * DISABLE AUTOVECTORIZATION: - * A compiler fence is used to prevent GCC and Clang from - * autovectorizing the XXH64 loop (pragmas and attributes don't work for some - * reason) without globally disabling AVX512. - * - * Autovectorization of XXH64 tends to be detrimental, - * though the exact outcome may change depending on exact cpu and compiler version. - * For information, it has been reported as detrimental for Skylake-X, - * but possibly beneficial for Zen4. - * - * The default is to disable auto-vectorization, - * but you can select to enable it instead using `XXH_ENABLE_AUTOVECTORIZE` build variable. - */ - XXH_COMPILER_GUARD(acc); + /* + * DISABLE AUTOVECTORIZATION: + * A compiler fence is used to prevent GCC and Clang from + * autovectorizing the XXH64 loop (pragmas and attributes don't work for some + * reason) without globally disabling AVX512. + * + * Autovectorization of XXH64 tends to be detrimental, + * though the exact outcome may change depending on exact cpu and compiler version. + * For information, it has been reported as detrimental for Skylake-X, + * but possibly beneficial for Zen4. + * + * The default is to disable auto-vectorization, + * but you can select to enable it instead using `XXH_ENABLE_AUTOVECTORIZE` build variable. + */ + XXH_COMPILER_GUARD(acc); #endif - return acc; + return acc; } -static xxh_u64 XXH64_mergeRound(xxh_u64 acc, xxh_u64 val) -{ - val = XXH64_round(0, val); - acc ^= val; - acc = acc * XXH_PRIME64_1 + XXH_PRIME64_4; - return acc; +static xxh_u64 XXH64_mergeRound(xxh_u64 acc, xxh_u64 val) { + val = XXH64_round(0, val); + acc ^= val; + acc = acc * XXH_PRIME64_1 + XXH_PRIME64_4; + return acc; } /*! @copydoc XXH32_avalanche */ -static xxh_u64 XXH64_avalanche(xxh_u64 hash) -{ - hash ^= hash >> 33; - hash *= XXH_PRIME64_2; - hash ^= hash >> 29; - hash *= XXH_PRIME64_3; - hash ^= hash >> 32; - return hash; +static xxh_u64 XXH64_avalanche(xxh_u64 hash) { + hash ^= hash >> 33; + hash *= XXH_PRIME64_2; + hash ^= hash >> 29; + hash *= XXH_PRIME64_3; + hash ^= hash >> 32; + return hash; } - #define XXH_get64bits(p) XXH_readLE64_align(p, align) /*! * @internal * @brief Sets up the initial accumulator state for XXH64(). */ -XXH_FORCE_INLINE void -XXH64_initAccs(xxh_u64 *acc, xxh_u64 seed) -{ - XXH_ASSERT(acc != NULL); - acc[0] = seed + XXH_PRIME64_1 + XXH_PRIME64_2; - acc[1] = seed + XXH_PRIME64_2; - acc[2] = seed + 0; - acc[3] = seed - XXH_PRIME64_1; +XXH_FORCE_INLINE void XXH64_initAccs(xxh_u64 *acc, xxh_u64 seed) { + XXH_ASSERT(acc != NULL); + acc[0] = seed + XXH_PRIME64_1 + XXH_PRIME64_2; + acc[1] = seed + XXH_PRIME64_2; + acc[2] = seed + 0; + acc[3] = seed - XXH_PRIME64_1; } /*! @@ -3534,62 +3485,57 @@ XXH64_initAccs(xxh_u64 *acc, xxh_u64 seed) * @return the end input pointer. */ XXH_FORCE_INLINE const xxh_u8 * -XXH64_consumeLong( - xxh_u64 *XXH_RESTRICT acc, - xxh_u8 const *XXH_RESTRICT input, - size_t len, - XXH_alignment align -) -{ - const xxh_u8* const bEnd = input + len; - const xxh_u8* const limit = bEnd - 31; - XXH_ASSERT(acc != NULL); - XXH_ASSERT(input != NULL); - XXH_ASSERT(len >= 32); - do { - /* reroll on 32-bit */ - if (sizeof(void *) < sizeof(xxh_u64)) { - size_t i; - for (i = 0; i < 4; i++) { - acc[i] = XXH64_round(acc[i], XXH_get64bits(input)); - input += 8; - } - } else { - acc[0] = XXH64_round(acc[0], XXH_get64bits(input)); input += 8; - acc[1] = XXH64_round(acc[1], XXH_get64bits(input)); input += 8; - acc[2] = XXH64_round(acc[2], XXH_get64bits(input)); input += 8; - acc[3] = XXH64_round(acc[3], XXH_get64bits(input)); input += 8; - } - } while (input < limit); +XXH64_consumeLong(xxh_u64 *XXH_RESTRICT acc, xxh_u8 const *XXH_RESTRICT input, size_t len, XXH_alignment align) { + const xxh_u8 *const bEnd = input + len; + const xxh_u8 *const limit = bEnd - 31; + XXH_ASSERT(acc != NULL); + XXH_ASSERT(input != NULL); + XXH_ASSERT(len >= 32); + do { + /* reroll on 32-bit */ + if (sizeof(void *) < sizeof(xxh_u64)) { + size_t i; + for (i = 0; i < 4; i++) { + acc[i] = XXH64_round(acc[i], XXH_get64bits(input)); + input += 8; + } + } else { + acc[0] = XXH64_round(acc[0], XXH_get64bits(input)); + input += 8; + acc[1] = XXH64_round(acc[1], XXH_get64bits(input)); + input += 8; + acc[2] = XXH64_round(acc[2], XXH_get64bits(input)); + input += 8; + acc[3] = XXH64_round(acc[3], XXH_get64bits(input)); + input += 8; + } + } while (input < limit); - return input; + return input; } /*! * @internal * @brief Merges the accumulator lanes together for XXH64() */ -XXH_FORCE_INLINE XXH_PUREF xxh_u64 -XXH64_mergeAccs(const xxh_u64 *acc) -{ - XXH_ASSERT(acc != NULL); - { - xxh_u64 h64 = XXH_rotl64(acc[0], 1) + XXH_rotl64(acc[1], 7) - + XXH_rotl64(acc[2], 12) + XXH_rotl64(acc[3], 18); - /* reroll on 32-bit */ - if (sizeof(void *) < sizeof(xxh_u64)) { - size_t i; - for (i = 0; i < 4; i++) { - h64 = XXH64_mergeRound(h64, acc[i]); - } - } else { - h64 = XXH64_mergeRound(h64, acc[0]); - h64 = XXH64_mergeRound(h64, acc[1]); - h64 = XXH64_mergeRound(h64, acc[2]); - h64 = XXH64_mergeRound(h64, acc[3]); - } - return h64; +XXH_FORCE_INLINE XXH_PUREF xxh_u64 XXH64_mergeAccs(const xxh_u64 *acc) { + XXH_ASSERT(acc != NULL); + { + xxh_u64 h64 = XXH_rotl64(acc[0], 1) + XXH_rotl64(acc[1], 7) + XXH_rotl64(acc[2], 12) + XXH_rotl64(acc[3], 18); + /* reroll on 32-bit */ + if (sizeof(void *) < sizeof(xxh_u64)) { + size_t i; + for (i = 0; i < 4; i++) { + h64 = XXH64_mergeRound(h64, acc[i]); + } + } else { + h64 = XXH64_mergeRound(h64, acc[0]); + h64 = XXH64_mergeRound(h64, acc[1]); + h64 = XXH64_mergeRound(h64, acc[2]); + h64 = XXH64_mergeRound(h64, acc[3]); } + return h64; + } } /*! @@ -3607,40 +3553,39 @@ XXH64_mergeAccs(const xxh_u64 *acc) * @return The finalized hash * @see XXH32_finalize(). */ -XXH_STATIC XXH_PUREF xxh_u64 -XXH64_finalize(xxh_u64 hash, const xxh_u8* ptr, size_t len, XXH_alignment align) -{ - if (ptr==NULL) XXH_ASSERT(len == 0); - len &= 31; - while (len >= 8) { - xxh_u64 const k1 = XXH64_round(0, XXH_get64bits(ptr)); - ptr += 8; - hash ^= k1; - hash = XXH_rotl64(hash,27) * XXH_PRIME64_1 + XXH_PRIME64_4; - len -= 8; - } - if (len >= 4) { - hash ^= (xxh_u64)(XXH_get32bits(ptr)) * XXH_PRIME64_1; - ptr += 4; - hash = XXH_rotl64(hash, 23) * XXH_PRIME64_2 + XXH_PRIME64_3; - len -= 4; - } - while (len > 0) { - hash ^= (*ptr++) * XXH_PRIME64_5; - hash = XXH_rotl64(hash, 11) * XXH_PRIME64_1; - --len; - } - return XXH64_avalanche(hash); +XXH_STATIC XXH_PUREF xxh_u64 XXH64_finalize(xxh_u64 hash, const xxh_u8 *ptr, size_t len, XXH_alignment align) { + if (ptr == NULL) + XXH_ASSERT(len == 0); + len &= 31; + while (len >= 8) { + xxh_u64 const k1 = XXH64_round(0, XXH_get64bits(ptr)); + ptr += 8; + hash ^= k1; + hash = XXH_rotl64(hash, 27) * XXH_PRIME64_1 + XXH_PRIME64_4; + len -= 8; + } + if (len >= 4) { + hash ^= (xxh_u64)(XXH_get32bits(ptr)) * XXH_PRIME64_1; + ptr += 4; + hash = XXH_rotl64(hash, 23) * XXH_PRIME64_2 + XXH_PRIME64_3; + len -= 4; + } + while (len > 0) { + hash ^= (*ptr++) * XXH_PRIME64_5; + hash = XXH_rotl64(hash, 11) * XXH_PRIME64_1; + --len; + } + return XXH64_avalanche(hash); } #ifdef XXH_OLD_NAMES -# define PROCESS1_64 XXH_PROCESS1_64 -# define PROCESS4_64 XXH_PROCESS4_64 -# define PROCESS8_64 XXH_PROCESS8_64 +#define PROCESS1_64 XXH_PROCESS1_64 +#define PROCESS4_64 XXH_PROCESS4_64 +#define PROCESS8_64 XXH_PROCESS8_64 #else -# undef XXH_PROCESS1_64 -# undef XXH_PROCESS4_64 -# undef XXH_PROCESS8_64 +#undef XXH_PROCESS1_64 +#undef XXH_PROCESS4_64 +#undef XXH_PROCESS8_64 #endif /*! @@ -3651,45 +3596,46 @@ XXH64_finalize(xxh_u64 hash, const xxh_u8* ptr, size_t len, XXH_alignment align) * @param align Whether @p input is aligned. * @return The calculated hash. */ -XXH_FORCE_INLINE XXH_PUREF xxh_u64 -XXH64_endian_align(const xxh_u8* input, size_t len, xxh_u64 seed, XXH_alignment align) -{ - xxh_u64 h64; - if (input==NULL) XXH_ASSERT(len == 0); +XXH_FORCE_INLINE XXH_PUREF xxh_u64 XXH64_endian_align(const xxh_u8 *input, + size_t len, + xxh_u64 seed, + XXH_alignment align) { + xxh_u64 h64; + if (input == NULL) + XXH_ASSERT(len == 0); - if (len>=32) { /* Process a large block of data */ - xxh_u64 acc[4]; - XXH64_initAccs(acc, seed); + if (len >= 32) { /* Process a large block of data */ + xxh_u64 acc[4]; + XXH64_initAccs(acc, seed); - input = XXH64_consumeLong(acc, input, len, align); + input = XXH64_consumeLong(acc, input, len, align); - h64 = XXH64_mergeAccs(acc); - } else { - h64 = seed + XXH_PRIME64_5; - } + h64 = XXH64_mergeAccs(acc); + } else { + h64 = seed + XXH_PRIME64_5; + } - h64 += (xxh_u64) len; + h64 += (xxh_u64)len; - return XXH64_finalize(h64, input, len, align); + return XXH64_finalize(h64, input, len, align); } - /*! @ingroup XXH64_family */ -XXH_PUBLIC_API XXH64_hash_t XXH64 (XXH_NOESCAPE const void* input, size_t len, XXH64_hash_t seed) -{ +XXH_PUBLIC_API XXH64_hash_t XXH64(XXH_NOESCAPE const void *input, size_t len, XXH64_hash_t seed) { #if !defined(XXH_NO_STREAM) && XXH_SIZE_OPT >= 2 - /* Simple version, good for code maintenance, but unfortunately slow for small inputs */ - XXH64_state_t state; - XXH64_reset(&state, seed); - XXH64_update(&state, (const xxh_u8*)input, len); - return XXH64_digest(&state); + /* Simple version, good for code maintenance, but unfortunately slow for small inputs */ + XXH64_state_t state; + XXH64_reset(&state, seed); + XXH64_update(&state, (const xxh_u8 *)input, len); + return XXH64_digest(&state); #else - if (XXH_FORCE_ALIGN_CHECK) { - if ((((size_t)input) & 7)==0) { /* Input is aligned, let's leverage the speed advantage */ - return XXH64_endian_align((const xxh_u8*)input, len, seed, XXH_aligned); - } } + if (XXH_FORCE_ALIGN_CHECK) { + if ((((size_t)input) & 7) == 0) { /* Input is aligned, let's leverage the speed advantage */ + return XXH64_endian_align((const xxh_u8 *)input, len, seed, XXH_aligned); + } + } - return XXH64_endian_align((const xxh_u8*)input, len, seed, XXH_unaligned); + return XXH64_endian_align((const xxh_u8 *)input, len, seed, XXH_unaligned); #endif } @@ -3697,117 +3643,109 @@ XXH_PUBLIC_API XXH64_hash_t XXH64 (XXH_NOESCAPE const void* input, size_t len, X /******* Hash Streaming *******/ #ifndef XXH_NO_STREAM /*! @ingroup XXH64_family*/ -XXH_PUBLIC_API XXH64_state_t* XXH64_createState(void) -{ - return (XXH64_state_t*)XXH_malloc(sizeof(XXH64_state_t)); -} +XXH_PUBLIC_API XXH64_state_t *XXH64_createState(void) { return (XXH64_state_t *)XXH_malloc(sizeof(XXH64_state_t)); } /*! @ingroup XXH64_family */ -XXH_PUBLIC_API XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr) -{ - XXH_free(statePtr); - return XXH_OK; +XXH_PUBLIC_API XXH_errorcode XXH64_freeState(XXH64_state_t *statePtr) { + XXH_free(statePtr); + return XXH_OK; } /*! @ingroup XXH64_family */ -XXH_PUBLIC_API void XXH64_copyState(XXH_NOESCAPE XXH64_state_t* dstState, const XXH64_state_t* srcState) -{ - XXH_memcpy(dstState, srcState, sizeof(*dstState)); +XXH_PUBLIC_API void XXH64_copyState(XXH_NOESCAPE XXH64_state_t *dstState, const XXH64_state_t *srcState) { + XXH_memcpy(dstState, srcState, sizeof(*dstState)); } /*! @ingroup XXH64_family */ -XXH_PUBLIC_API XXH_errorcode XXH64_reset(XXH_NOESCAPE XXH64_state_t* statePtr, XXH64_hash_t seed) -{ - XXH_ASSERT(statePtr != NULL); - memset(statePtr, 0, sizeof(*statePtr)); - XXH64_initAccs(statePtr->acc, seed); - return XXH_OK; +XXH_PUBLIC_API XXH_errorcode XXH64_reset(XXH_NOESCAPE XXH64_state_t *statePtr, XXH64_hash_t seed) { + XXH_ASSERT(statePtr != NULL); + memset(statePtr, 0, sizeof(*statePtr)); + XXH64_initAccs(statePtr->acc, seed); + return XXH_OK; } /*! @ingroup XXH64_family */ -XXH_PUBLIC_API XXH_errorcode -XXH64_update (XXH_NOESCAPE XXH64_state_t* state, XXH_NOESCAPE const void* input, size_t len) -{ - if (input==NULL) { - XXH_ASSERT(len == 0); - return XXH_OK; - } +XXH_PUBLIC_API XXH_errorcode XXH64_update(XXH_NOESCAPE XXH64_state_t *state, + XXH_NOESCAPE const void *input, + size_t len) { + if (input == NULL) { + XXH_ASSERT(len == 0); + return XXH_OK; + } - state->total_len += len; + state->total_len += len; - XXH_ASSERT(state->bufferedSize <= sizeof(state->buffer)); - if (len < sizeof(state->buffer) - state->bufferedSize) { /* fill in tmp buffer */ - XXH_memcpy(state->buffer + state->bufferedSize, input, len); - state->bufferedSize += (XXH32_hash_t)len; - return XXH_OK; + XXH_ASSERT(state->bufferedSize <= sizeof(state->buffer)); + if (len < sizeof(state->buffer) - state->bufferedSize) { /* fill in tmp buffer */ + XXH_memcpy(state->buffer + state->bufferedSize, input, len); + state->bufferedSize += (XXH32_hash_t)len; + return XXH_OK; + } + + { + const xxh_u8 *xinput = (const xxh_u8 *)input; + const xxh_u8 *const bEnd = xinput + len; + + if (state->bufferedSize) { /* non-empty buffer => complete first */ + XXH_memcpy(state->buffer + state->bufferedSize, xinput, sizeof(state->buffer) - state->bufferedSize); + xinput += sizeof(state->buffer) - state->bufferedSize; + /* and process one round */ + (void)XXH64_consumeLong(state->acc, state->buffer, sizeof(state->buffer), XXH_aligned); + state->bufferedSize = 0; } - { const xxh_u8* xinput = (const xxh_u8*)input; - const xxh_u8* const bEnd = xinput + len; - - if (state->bufferedSize) { /* non-empty buffer => complete first */ - XXH_memcpy(state->buffer + state->bufferedSize, xinput, sizeof(state->buffer) - state->bufferedSize); - xinput += sizeof(state->buffer) - state->bufferedSize; - /* and process one round */ - (void)XXH64_consumeLong(state->acc, state->buffer, sizeof(state->buffer), XXH_aligned); - state->bufferedSize = 0; - } - - XXH_ASSERT(xinput <= bEnd); - if ((size_t)(bEnd - xinput) >= sizeof(state->buffer)) { - /* Process the remaining data */ - xinput = XXH64_consumeLong(state->acc, xinput, (size_t)(bEnd - xinput), XXH_unaligned); - } + XXH_ASSERT(xinput <= bEnd); + if ((size_t)(bEnd - xinput) >= sizeof(state->buffer)) { + /* Process the remaining data */ + xinput = XXH64_consumeLong(state->acc, xinput, (size_t)(bEnd - xinput), XXH_unaligned); + } - if (xinput < bEnd) { - /* Copy the leftover to the tmp buffer */ - XXH_memcpy(state->buffer, xinput, (size_t)(bEnd-xinput)); - state->bufferedSize = (unsigned)(bEnd-xinput); - } + if (xinput < bEnd) { + /* Copy the leftover to the tmp buffer */ + XXH_memcpy(state->buffer, xinput, (size_t)(bEnd - xinput)); + state->bufferedSize = (unsigned)(bEnd - xinput); } + } - return XXH_OK; + return XXH_OK; } - /*! @ingroup XXH64_family */ -XXH_PUBLIC_API XXH64_hash_t XXH64_digest(XXH_NOESCAPE const XXH64_state_t* state) -{ - xxh_u64 h64; +XXH_PUBLIC_API XXH64_hash_t XXH64_digest(XXH_NOESCAPE const XXH64_state_t *state) { + xxh_u64 h64; - if (state->total_len >= 32) { - h64 = XXH64_mergeAccs(state->acc); - } else { - h64 = state->acc[2] /*seed*/ + XXH_PRIME64_5; - } + if (state->total_len >= 32) { + h64 = XXH64_mergeAccs(state->acc); + } else { + h64 = state->acc[2] /*seed*/ + XXH_PRIME64_5; + } - h64 += (xxh_u64) state->total_len; + h64 += (xxh_u64)state->total_len; - return XXH64_finalize(h64, state->buffer, (size_t)state->total_len, XXH_aligned); + return XXH64_finalize(h64, state->buffer, (size_t)state->total_len, XXH_aligned); } #endif /* !XXH_NO_STREAM */ /******* Canonical representation *******/ /*! @ingroup XXH64_family */ -XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH_NOESCAPE XXH64_canonical_t* dst, XXH64_hash_t hash) -{ - XXH_STATIC_ASSERT(sizeof(XXH64_canonical_t) == sizeof(XXH64_hash_t)); - if (XXH_CPU_LITTLE_ENDIAN) hash = XXH_swap64(hash); - XXH_memcpy(dst, &hash, sizeof(*dst)); +XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH_NOESCAPE XXH64_canonical_t *dst, XXH64_hash_t hash) { + XXH_STATIC_ASSERT(sizeof(XXH64_canonical_t) == sizeof(XXH64_hash_t)); + if (XXH_CPU_LITTLE_ENDIAN) + hash = XXH_swap64(hash); + XXH_memcpy(dst, &hash, sizeof(*dst)); } /*! @ingroup XXH64_family */ -XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(XXH_NOESCAPE const XXH64_canonical_t* src) -{ - return XXH_readBE64(src); +XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(XXH_NOESCAPE const XXH64_canonical_t *src) { + return XXH_readBE64(src); } #ifndef XXH_NO_XXH3 /* ********************************************************************* -* XXH3 -* New generation hash designed for speed on small keys and vectorization -************************************************************************ */ + * XXH3 + * New generation hash designed for speed on small keys and vectorization + ************************************************************************ */ /*! * @} * @defgroup XXH3_impl XXH3 implementation @@ -3817,51 +3755,47 @@ XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(XXH_NOESCAPE const XXH64_can /* === Compiler specifics === */ - -#if (defined(__GNUC__) && (__GNUC__ >= 3)) \ - || (defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 800)) \ - || defined(__clang__) -# define XXH_likely(x) __builtin_expect(x, 1) -# define XXH_unlikely(x) __builtin_expect(x, 0) +#if (defined(__GNUC__) && (__GNUC__ >= 3)) || (defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 800)) || \ + defined(__clang__) +#define XXH_likely(x) __builtin_expect(x, 1) +#define XXH_unlikely(x) __builtin_expect(x, 0) #else -# define XXH_likely(x) (x) -# define XXH_unlikely(x) (x) +#define XXH_likely(x) (x) +#define XXH_unlikely(x) (x) #endif #ifndef XXH_HAS_INCLUDE -# ifdef __has_include +#ifdef __has_include /* * Not defined as XXH_HAS_INCLUDE(x) (function-like) because * this causes segfaults in Apple Clang 4.2 (on Mac OS X 10.7 Lion) */ -# define XXH_HAS_INCLUDE __has_include -# else -# define XXH_HAS_INCLUDE(x) 0 -# endif +#define XXH_HAS_INCLUDE __has_include +#else +#define XXH_HAS_INCLUDE(x) 0 +#endif #endif #if defined(__GNUC__) || defined(__clang__) -# if defined(__ARM_FEATURE_SVE) -# include -# endif -# if defined(__ARM_NEON__) || defined(__ARM_NEON) \ - || (defined(_M_ARM) && _M_ARM >= 7) \ - || defined(_M_ARM64) || defined(_M_ARM64EC) \ - || (defined(__wasm_simd128__) && XXH_HAS_INCLUDE()) /* WASM SIMD128 via SIMDe */ -# define inline __inline__ /* circumvent a clang bug */ -# include -# undef inline -# elif defined(__AVX2__) -# include -# elif defined(__SSE2__) -# include -# elif defined(__loongarch_sx) -# include -# endif +#if defined(__ARM_FEATURE_SVE) +#include +#endif +#if defined(__ARM_NEON__) || defined(__ARM_NEON) || (defined(_M_ARM) && _M_ARM >= 7) || defined(_M_ARM64) || \ + defined(_M_ARM64EC) || (defined(__wasm_simd128__) && XXH_HAS_INCLUDE()) /* WASM SIMD128 via SIMDe */ +#define inline __inline__ /* circumvent a clang bug */ +#include +#undef inline +#elif defined(__AVX2__) +#include +#elif defined(__SSE2__) +#include +#elif defined(__loongarch_sx) +#include +#endif #endif #if defined(_MSC_VER) -# include +#include #endif /* @@ -3934,7 +3868,7 @@ XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(XXH_NOESCAPE const XXH64_can * have been contributed by @easyaspi314 */ #if defined(__thumb__) && !defined(__thumb2__) && defined(__ARM_ARCH_ISA_ARM) -# warning "XXH3 is highly inefficient without ARM or Thumb-2." +#warning "XXH3 is highly inefficient without ARM or Thumb-2." #endif /* ========================================== @@ -3952,7 +3886,7 @@ XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(XXH_NOESCAPE const XXH64_can * If this is not defined, it uses predefined macros to determine the best * implementation. */ -# define XXH_VECTOR XXH_SCALAR +#define XXH_VECTOR XXH_SCALAR /*! * @ingroup tuning * @brief Selects the minimum alignment for XXH3's accumulators. @@ -3962,51 +3896,48 @@ XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(XXH_NOESCAPE const XXH64_can * * Default: Auto detected. */ -# define XXH_ACC_ALIGN 8 +#define XXH_ACC_ALIGN 8 #endif /* Actual definition */ #ifndef XXH_DOXYGEN #endif -#ifndef XXH_VECTOR /* can be defined on command line */ -# if defined(__ARM_FEATURE_SVE) -# define XXH_VECTOR XXH_SVE -# elif ( \ - defined(__ARM_NEON__) || defined(__ARM_NEON) /* gcc */ \ - || defined(_M_ARM) || defined(_M_ARM64) || defined(_M_ARM64EC) /* msvc */ \ - || (defined(__wasm_simd128__) && XXH_HAS_INCLUDE()) /* wasm simd128 via SIMDe */ \ - ) && ( \ - defined(_WIN32) || defined(__LITTLE_ENDIAN__) /* little endian only */ \ - || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) \ - ) -# define XXH_VECTOR XXH_NEON -# elif defined(__AVX512F__) -# define XXH_VECTOR XXH_AVX512 -# elif defined(__AVX2__) -# define XXH_VECTOR XXH_AVX2 -# elif defined(__SSE2__) || defined(_M_AMD64) || defined(_M_X64) || (defined(_M_IX86_FP) && (_M_IX86_FP == 2)) -# define XXH_VECTOR XXH_SSE2 -# elif (defined(__PPC64__) && defined(__POWER8_VECTOR__)) \ - || (defined(__s390x__) && defined(__VEC__)) \ - && defined(__GNUC__) /* TODO: IBM XL */ -# define XXH_VECTOR XXH_VSX -# elif defined(__loongarch_sx) -# define XXH_VECTOR XXH_LSX -# else -# define XXH_VECTOR XXH_SCALAR -# endif +#ifndef XXH_VECTOR /* can be defined on command line */ +#if defined(__ARM_FEATURE_SVE) +#define XXH_VECTOR XXH_SVE +#elif (defined(__ARM_NEON__) || defined(__ARM_NEON) /* gcc */ \ + || defined(_M_ARM) || defined(_M_ARM64) || defined(_M_ARM64EC) /* msvc */ \ + || (defined(__wasm_simd128__) && XXH_HAS_INCLUDE()) /* wasm simd128 via SIMDe */ \ + ) && \ + (defined(_WIN32) || defined(__LITTLE_ENDIAN__) /* little endian only */ \ + || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)) +#define XXH_VECTOR XXH_NEON +#elif defined(__AVX512F__) +#define XXH_VECTOR XXH_AVX512 +#elif defined(__AVX2__) +#define XXH_VECTOR XXH_AVX2 +#elif defined(__SSE2__) || defined(_M_AMD64) || defined(_M_X64) || (defined(_M_IX86_FP) && (_M_IX86_FP == 2)) +#define XXH_VECTOR XXH_SSE2 +#elif (defined(__PPC64__) && defined(__POWER8_VECTOR__)) || \ + (defined(__s390x__) && defined(__VEC__)) && defined(__GNUC__) /* TODO: IBM XL */ +#define XXH_VECTOR XXH_VSX +#elif defined(__loongarch_sx) +#define XXH_VECTOR XXH_LSX +#else +#define XXH_VECTOR XXH_SCALAR +#endif #endif /* __ARM_FEATURE_SVE is only supported by GCC & Clang. */ #if (XXH_VECTOR == XXH_SVE) && !defined(__ARM_FEATURE_SVE) -# ifdef _MSC_VER -# pragma warning(once : 4606) -# else -# warning "__ARM_FEATURE_SVE isn't supported. Use SCALAR instead." -# endif -# undef XXH_VECTOR -# define XXH_VECTOR XXH_SCALAR +#ifdef _MSC_VER +#pragma warning(once : 4606) +#else +#warning "__ARM_FEATURE_SVE isn't supported. Use SCALAR instead." +#endif +#undef XXH_VECTOR +#define XXH_VECTOR XXH_SCALAR #endif /* @@ -4014,40 +3945,39 @@ XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(XXH_NOESCAPE const XXH64_can * for compatibility with aligned vector loads, which are usually faster. */ #ifndef XXH_ACC_ALIGN -# if defined(XXH_X86DISPATCH) -# define XXH_ACC_ALIGN 64 /* for compatibility with avx512 */ -# elif XXH_VECTOR == XXH_SCALAR /* scalar */ -# define XXH_ACC_ALIGN 8 -# elif XXH_VECTOR == XXH_SSE2 /* sse2 */ -# define XXH_ACC_ALIGN 16 -# elif XXH_VECTOR == XXH_AVX2 /* avx2 */ -# define XXH_ACC_ALIGN 32 -# elif XXH_VECTOR == XXH_NEON /* neon */ -# define XXH_ACC_ALIGN 16 -# elif XXH_VECTOR == XXH_VSX /* vsx */ -# define XXH_ACC_ALIGN 16 -# elif XXH_VECTOR == XXH_AVX512 /* avx512 */ -# define XXH_ACC_ALIGN 64 -# elif XXH_VECTOR == XXH_SVE /* sve */ -# define XXH_ACC_ALIGN 64 -# elif XXH_VECTOR == XXH_LSX /* lsx */ -# define XXH_ACC_ALIGN 64 -# endif -#endif - -#if defined(XXH_X86DISPATCH) || XXH_VECTOR == XXH_SSE2 \ - || XXH_VECTOR == XXH_AVX2 || XXH_VECTOR == XXH_AVX512 -# define XXH_SEC_ALIGN XXH_ACC_ALIGN +#if defined(XXH_X86DISPATCH) +#define XXH_ACC_ALIGN 64 /* for compatibility with avx512 */ +#elif XXH_VECTOR == XXH_SCALAR /* scalar */ +#define XXH_ACC_ALIGN 8 +#elif XXH_VECTOR == XXH_SSE2 /* sse2 */ +#define XXH_ACC_ALIGN 16 +#elif XXH_VECTOR == XXH_AVX2 /* avx2 */ +#define XXH_ACC_ALIGN 32 +#elif XXH_VECTOR == XXH_NEON /* neon */ +#define XXH_ACC_ALIGN 16 +#elif XXH_VECTOR == XXH_VSX /* vsx */ +#define XXH_ACC_ALIGN 16 +#elif XXH_VECTOR == XXH_AVX512 /* avx512 */ +#define XXH_ACC_ALIGN 64 +#elif XXH_VECTOR == XXH_SVE /* sve */ +#define XXH_ACC_ALIGN 64 +#elif XXH_VECTOR == XXH_LSX /* lsx */ +#define XXH_ACC_ALIGN 64 +#endif +#endif + +#if defined(XXH_X86DISPATCH) || XXH_VECTOR == XXH_SSE2 || XXH_VECTOR == XXH_AVX2 || XXH_VECTOR == XXH_AVX512 +#define XXH_SEC_ALIGN XXH_ACC_ALIGN #elif XXH_VECTOR == XXH_SVE -# define XXH_SEC_ALIGN XXH_ACC_ALIGN +#define XXH_SEC_ALIGN XXH_ACC_ALIGN #else -# define XXH_SEC_ALIGN 8 +#define XXH_SEC_ALIGN 8 #endif #if defined(__GNUC__) || defined(__clang__) -# define XXH_ALIASING __attribute__((__may_alias__)) +#define XXH_ALIASING __attribute__((__may_alias__)) #else -# define XXH_ALIASING /* nothing */ +#define XXH_ALIASING /* nothing */ #endif /* @@ -4071,11 +4001,11 @@ XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(XXH_NOESCAPE const XXH64_can * -O2, but the other one we can't control without "failed to inline always * inline function due to target mismatch" warnings. */ -#if XXH_VECTOR == XXH_AVX2 /* AVX2 */ \ - && defined(__GNUC__) && !defined(__clang__) /* GCC, not Clang */ \ +#if XXH_VECTOR == XXH_AVX2 /* AVX2 */ \ + && defined(__GNUC__) && !defined(__clang__) /* GCC, not Clang */ \ && defined(__OPTIMIZE__) && XXH_SIZE_OPT <= 0 /* respect -O0 and -Os */ -# pragma GCC push_options -# pragma GCC optimize("-O2") +#pragma GCC push_options +#pragma GCC optimize("-O2") #endif #if XXH_VECTOR == XXH_NEON @@ -4103,14 +4033,13 @@ typedef uint64x2_t xxh_aliasing_uint64x2_t XXH_ALIASING; * unaligned load. */ #if defined(__aarch64__) && defined(__GNUC__) && !defined(__clang__) -XXH_FORCE_INLINE uint64x2_t XXH_vld1q_u64(void const* ptr) /* silence -Wcast-align */ +XXH_FORCE_INLINE uint64x2_t XXH_vld1q_u64(void const *ptr) /* silence -Wcast-align */ { - return *(xxh_aliasing_uint64x2_t const *)ptr; + return *(xxh_aliasing_uint64x2_t const *)ptr; } #else -XXH_FORCE_INLINE uint64x2_t XXH_vld1q_u64(void const* ptr) -{ - return vreinterpretq_u64_u8(vld1q_u8((uint8_t const*)ptr)); +XXH_FORCE_INLINE uint64x2_t XXH_vld1q_u64(void const *ptr) { + return vreinterpretq_u64_u8(vld1q_u8((uint8_t const *)ptr)); } #endif @@ -4123,32 +4052,24 @@ XXH_FORCE_INLINE uint64x2_t XXH_vld1q_u64(void const* ptr) * with `vmlal_u32`. */ #if defined(__aarch64__) && defined(__GNUC__) && !defined(__clang__) && __GNUC__ < 11 -XXH_FORCE_INLINE uint64x2_t -XXH_vmlal_low_u32(uint64x2_t acc, uint32x4_t lhs, uint32x4_t rhs) -{ - /* Inline assembly is the only way */ - __asm__("umlal %0.2d, %1.2s, %2.2s" : "+w" (acc) : "w" (lhs), "w" (rhs)); - return acc; +XXH_FORCE_INLINE uint64x2_t XXH_vmlal_low_u32(uint64x2_t acc, uint32x4_t lhs, uint32x4_t rhs) { + /* Inline assembly is the only way */ + __asm__("umlal %0.2d, %1.2s, %2.2s" : "+w"(acc) : "w"(lhs), "w"(rhs)); + return acc; } -XXH_FORCE_INLINE uint64x2_t -XXH_vmlal_high_u32(uint64x2_t acc, uint32x4_t lhs, uint32x4_t rhs) -{ - /* This intrinsic works as expected */ - return vmlal_high_u32(acc, lhs, rhs); +XXH_FORCE_INLINE uint64x2_t XXH_vmlal_high_u32(uint64x2_t acc, uint32x4_t lhs, uint32x4_t rhs) { + /* This intrinsic works as expected */ + return vmlal_high_u32(acc, lhs, rhs); } #else /* Portable intrinsic versions */ -XXH_FORCE_INLINE uint64x2_t -XXH_vmlal_low_u32(uint64x2_t acc, uint32x4_t lhs, uint32x4_t rhs) -{ - return vmlal_u32(acc, vget_low_u32(lhs), vget_low_u32(rhs)); +XXH_FORCE_INLINE uint64x2_t XXH_vmlal_low_u32(uint64x2_t acc, uint32x4_t lhs, uint32x4_t rhs) { + return vmlal_u32(acc, vget_low_u32(lhs), vget_low_u32(rhs)); } /*! @copydoc XXH_vmlal_low_u32 * Assume the compiler converts this to vmlal_high_u32 on aarch64 */ -XXH_FORCE_INLINE uint64x2_t -XXH_vmlal_high_u32(uint64x2_t acc, uint32x4_t lhs, uint32x4_t rhs) -{ - return vmlal_u32(acc, vget_high_u32(lhs), vget_high_u32(rhs)); +XXH_FORCE_INLINE uint64x2_t XXH_vmlal_high_u32(uint64x2_t acc, uint32x4_t lhs, uint32x4_t rhs) { + return vmlal_u32(acc, vget_high_u32(lhs), vget_high_u32(rhs)); } #endif @@ -4190,15 +4111,15 @@ XXH_vmlal_high_u32(uint64x2_t acc, uint32x4_t lhs, uint32x4_t rhs) * * @see XXH3_accumulate_512_neon() */ -# ifndef XXH3_NEON_LANES -# if (defined(__aarch64__) || defined(__arm64__) || defined(_M_ARM64) || defined(_M_ARM64EC)) \ - && !defined(__APPLE__) && XXH_SIZE_OPT <= 0 -# define XXH3_NEON_LANES 6 -# else -# define XXH3_NEON_LANES XXH_ACC_NB -# endif -# endif -#endif /* XXH_VECTOR == XXH_NEON */ +#ifndef XXH3_NEON_LANES +#if (defined(__aarch64__) || defined(__arm64__) || defined(_M_ARM64) || defined(_M_ARM64EC)) && !defined(__APPLE__) && \ + XXH_SIZE_OPT <= 0 +#define XXH3_NEON_LANES 6 +#else +#define XXH3_NEON_LANES XXH_ACC_NB +#endif +#endif +#endif /* XXH_VECTOR == XXH_NEON */ /* * VSX and Z Vector helpers. @@ -4217,24 +4138,24 @@ XXH_vmlal_high_u32(uint64x2_t acc, uint32x4_t lhs, uint32x4_t rhs) * after including the header. * * We use pragma push_macro/pop_macro to keep the namespace clean. */ -# pragma push_macro("bool") -# pragma push_macro("vector") -# pragma push_macro("pixel") +#pragma push_macro("bool") +#pragma push_macro("vector") +#pragma push_macro("pixel") /* silence potential macro redefined warnings */ -# undef bool -# undef vector -# undef pixel - -# if defined(__s390x__) -# include -# else -# include -# endif +#undef bool +#undef vector +#undef pixel + +#if defined(__s390x__) +#include +#else +#include +#endif /* Restore the original macro values, if applicable. */ -# pragma pop_macro("pixel") -# pragma pop_macro("vector") -# pragma pop_macro("bool") +#pragma pop_macro("pixel") +#pragma pop_macro("vector") +#pragma pop_macro("bool") typedef __vector unsigned long long xxh_u64x2; typedef __vector unsigned char xxh_u8x16; @@ -4245,45 +4166,42 @@ typedef __vector unsigned xxh_u32x4; */ typedef xxh_u64x2 xxh_aliasing_u64x2 XXH_ALIASING; -# ifndef XXH_VSX_BE -# if defined(__BIG_ENDIAN__) \ - || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) -# define XXH_VSX_BE 1 -# elif defined(__VEC_ELEMENT_REG_ORDER__) && __VEC_ELEMENT_REG_ORDER__ == __ORDER_BIG_ENDIAN__ -# warning "-maltivec=be is not recommended. Please use native endianness." -# define XXH_VSX_BE 1 -# else -# define XXH_VSX_BE 0 -# endif -# endif /* !defined(XXH_VSX_BE) */ - -# if XXH_VSX_BE -# if defined(__POWER9_VECTOR__) || (defined(__clang__) && defined(__s390x__)) -# define XXH_vec_revb vec_revb -# else +#ifndef XXH_VSX_BE +#if defined(__BIG_ENDIAN__) || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) +#define XXH_VSX_BE 1 +#elif defined(__VEC_ELEMENT_REG_ORDER__) && __VEC_ELEMENT_REG_ORDER__ == __ORDER_BIG_ENDIAN__ +#warning "-maltivec=be is not recommended. Please use native endianness." +#define XXH_VSX_BE 1 +#else +#define XXH_VSX_BE 0 +#endif +#endif /* !defined(XXH_VSX_BE) */ + +#if XXH_VSX_BE +#if defined(__POWER9_VECTOR__) || (defined(__clang__) && defined(__s390x__)) +#define XXH_vec_revb vec_revb +#else /*! * A polyfill for POWER9's vec_revb(). */ -XXH_FORCE_INLINE xxh_u64x2 XXH_vec_revb(xxh_u64x2 val) -{ - xxh_u8x16 const vByteSwap = { 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00, - 0x0F, 0x0E, 0x0D, 0x0C, 0x0B, 0x0A, 0x09, 0x08 }; - return vec_perm(val, val, vByteSwap); +XXH_FORCE_INLINE xxh_u64x2 XXH_vec_revb(xxh_u64x2 val) { + xxh_u8x16 const vByteSwap = { + 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00, 0x0F, 0x0E, 0x0D, 0x0C, 0x0B, 0x0A, 0x09, 0x08}; + return vec_perm(val, val, vByteSwap); } -# endif -# endif /* XXH_VSX_BE */ +#endif +#endif /* XXH_VSX_BE */ /*! * Performs an unaligned vector load and byte swaps it on big endian. */ -XXH_FORCE_INLINE xxh_u64x2 XXH_vec_loadu(const void *ptr) -{ - xxh_u64x2 ret; - XXH_memcpy(&ret, ptr, sizeof(xxh_u64x2)); -# if XXH_VSX_BE - ret = XXH_vec_revb(ret); -# endif - return ret; +XXH_FORCE_INLINE xxh_u64x2 XXH_vec_loadu(const void *ptr) { + xxh_u64x2 ret; + XXH_memcpy(&ret, ptr, sizeof(xxh_u64x2)); +#if XXH_VSX_BE + ret = XXH_vec_revb(ret); +#endif + return ret; } /* @@ -4292,96 +4210,95 @@ XXH_FORCE_INLINE xxh_u64x2 XXH_vec_loadu(const void *ptr) * These intrinsics weren't added until GCC 8, despite existing for a while, * and they are endian dependent. Also, their meaning swap depending on version. * */ -# if defined(__s390x__) - /* s390x is always big endian, no issue on this platform */ -# define XXH_vec_mulo vec_mulo -# define XXH_vec_mule vec_mule -# elif defined(__clang__) && XXH_HAS_BUILTIN(__builtin_altivec_vmuleuw) && !defined(__ibmxl__) +#if defined(__s390x__) +/* s390x is always big endian, no issue on this platform */ +#define XXH_vec_mulo vec_mulo +#define XXH_vec_mule vec_mule +#elif defined(__clang__) && XXH_HAS_BUILTIN(__builtin_altivec_vmuleuw) && !defined(__ibmxl__) /* Clang has a better way to control this, we can just use the builtin which doesn't swap. */ - /* The IBM XL Compiler (which defined __clang__) only implements the vec_* operations */ -# define XXH_vec_mulo __builtin_altivec_vmulouw -# define XXH_vec_mule __builtin_altivec_vmuleuw -# else +/* The IBM XL Compiler (which defined __clang__) only implements the vec_* operations */ +#define XXH_vec_mulo __builtin_altivec_vmulouw +#define XXH_vec_mule __builtin_altivec_vmuleuw +#else /* gcc needs inline assembly */ /* Adapted from https://github.com/google/highwayhash/blob/master/highwayhash/hh_vsx.h. */ -XXH_FORCE_INLINE xxh_u64x2 XXH_vec_mulo(xxh_u32x4 a, xxh_u32x4 b) -{ - xxh_u64x2 result; - __asm__("vmulouw %0, %1, %2" : "=v" (result) : "v" (a), "v" (b)); - return result; +XXH_FORCE_INLINE xxh_u64x2 XXH_vec_mulo(xxh_u32x4 a, xxh_u32x4 b) { + xxh_u64x2 result; + __asm__("vmulouw %0, %1, %2" : "=v"(result) : "v"(a), "v"(b)); + return result; } -XXH_FORCE_INLINE xxh_u64x2 XXH_vec_mule(xxh_u32x4 a, xxh_u32x4 b) -{ - xxh_u64x2 result; - __asm__("vmuleuw %0, %1, %2" : "=v" (result) : "v" (a), "v" (b)); - return result; +XXH_FORCE_INLINE xxh_u64x2 XXH_vec_mule(xxh_u32x4 a, xxh_u32x4 b) { + xxh_u64x2 result; + __asm__("vmuleuw %0, %1, %2" : "=v"(result) : "v"(a), "v"(b)); + return result; } -# endif /* XXH_vec_mulo, XXH_vec_mule */ +#endif /* XXH_vec_mulo, XXH_vec_mule */ #endif /* XXH_VECTOR == XXH_VSX */ #if XXH_VECTOR == XXH_SVE -#define ACCRND(acc, offset) \ -do { \ - svuint64_t input_vec = svld1_u64(mask, xinput + offset); \ - svuint64_t secret_vec = svld1_u64(mask, xsecret + offset); \ - svuint64_t mixed = sveor_u64_x(mask, secret_vec, input_vec); \ - svuint64_t swapped = svtbl_u64(input_vec, kSwap); \ - svuint64_t mixed_lo = svextw_u64_x(mask, mixed); \ - svuint64_t mixed_hi = svlsr_n_u64_x(mask, mixed, 32); \ - svuint64_t mul = svmad_u64_x(mask, mixed_lo, mixed_hi, swapped); \ - acc = svadd_u64_x(mask, acc, mul); \ -} while (0) +#define ACCRND(acc, offset) \ + do { \ + svuint64_t input_vec = svld1_u64(mask, xinput + offset); \ + svuint64_t secret_vec = svld1_u64(mask, xsecret + offset); \ + svuint64_t mixed = sveor_u64_x(mask, secret_vec, input_vec); \ + svuint64_t swapped = svtbl_u64(input_vec, kSwap); \ + svuint64_t mixed_lo = svextw_u64_x(mask, mixed); \ + svuint64_t mixed_hi = svlsr_n_u64_x(mask, mixed, 32); \ + svuint64_t mul = svmad_u64_x(mask, mixed_lo, mixed_hi, swapped); \ + acc = svadd_u64_x(mask, acc, mul); \ + } while (0) #endif /* XXH_VECTOR == XXH_SVE */ /* prefetch * can be disabled, by declaring XXH_NO_PREFETCH build macro */ #if defined(XXH_NO_PREFETCH) -# define XXH_PREFETCH(ptr) (void)(ptr) /* disabled */ +#define XXH_PREFETCH(ptr) (void)(ptr) /* disabled */ #else -# if XXH_SIZE_OPT >= 1 -# define XXH_PREFETCH(ptr) (void)(ptr) -# elif defined(_MSC_VER) && (defined(_M_X64) || defined(_M_IX86)) /* _mm_prefetch() not defined outside of x86/x64 */ -# include /* https://msdn.microsoft.com/fr-fr/library/84szxsww(v=vs.90).aspx */ -# define XXH_PREFETCH(ptr) _mm_prefetch((const char*)(ptr), _MM_HINT_T0) -# elif defined(__GNUC__) && ( (__GNUC__ >= 4) || ( (__GNUC__ == 3) && (__GNUC_MINOR__ >= 1) ) ) -# define XXH_PREFETCH(ptr) __builtin_prefetch((ptr), 0 /* rw==read */, 3 /* locality */) -# else -# define XXH_PREFETCH(ptr) (void)(ptr) /* disabled */ -# endif -#endif /* XXH_NO_PREFETCH */ - +#if XXH_SIZE_OPT >= 1 +#define XXH_PREFETCH(ptr) (void)(ptr) +#elif defined(_MSC_VER) && (defined(_M_X64) || defined(_M_IX86)) /* _mm_prefetch() not defined outside of x86/x64 */ +#include /* https://msdn.microsoft.com/fr-fr/library/84szxsww(v=vs.90).aspx */ +#define XXH_PREFETCH(ptr) _mm_prefetch((const char *)(ptr), _MM_HINT_T0) +#elif defined(__GNUC__) && ((__GNUC__ >= 4) || ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 1))) +#define XXH_PREFETCH(ptr) __builtin_prefetch((ptr), 0 /* rw==read */, 3 /* locality */) +#else +#define XXH_PREFETCH(ptr) (void)(ptr) /* disabled */ +#endif +#endif /* XXH_NO_PREFETCH */ /* ========================================== * XXH3 default settings * ========================================== */ -#define XXH_SECRET_DEFAULT_SIZE 192 /* minimum XXH3_SECRET_SIZE_MIN */ +#define XXH_SECRET_DEFAULT_SIZE 192 /* minimum XXH3_SECRET_SIZE_MIN */ #if (XXH_SECRET_DEFAULT_SIZE < XXH3_SECRET_SIZE_MIN) -# error "default keyset is not large enough" +#error "default keyset is not large enough" #endif /*! Pseudorandom secret taken directly from FARSH. */ -XXH_ALIGN(64) static const xxh_u8 XXH3_kSecret[XXH_SECRET_DEFAULT_SIZE] = { - 0xb8, 0xfe, 0x6c, 0x39, 0x23, 0xa4, 0x4b, 0xbe, 0x7c, 0x01, 0x81, 0x2c, 0xf7, 0x21, 0xad, 0x1c, - 0xde, 0xd4, 0x6d, 0xe9, 0x83, 0x90, 0x97, 0xdb, 0x72, 0x40, 0xa4, 0xa4, 0xb7, 0xb3, 0x67, 0x1f, - 0xcb, 0x79, 0xe6, 0x4e, 0xcc, 0xc0, 0xe5, 0x78, 0x82, 0x5a, 0xd0, 0x7d, 0xcc, 0xff, 0x72, 0x21, - 0xb8, 0x08, 0x46, 0x74, 0xf7, 0x43, 0x24, 0x8e, 0xe0, 0x35, 0x90, 0xe6, 0x81, 0x3a, 0x26, 0x4c, - 0x3c, 0x28, 0x52, 0xbb, 0x91, 0xc3, 0x00, 0xcb, 0x88, 0xd0, 0x65, 0x8b, 0x1b, 0x53, 0x2e, 0xa3, - 0x71, 0x64, 0x48, 0x97, 0xa2, 0x0d, 0xf9, 0x4e, 0x38, 0x19, 0xef, 0x46, 0xa9, 0xde, 0xac, 0xd8, - 0xa8, 0xfa, 0x76, 0x3f, 0xe3, 0x9c, 0x34, 0x3f, 0xf9, 0xdc, 0xbb, 0xc7, 0xc7, 0x0b, 0x4f, 0x1d, - 0x8a, 0x51, 0xe0, 0x4b, 0xcd, 0xb4, 0x59, 0x31, 0xc8, 0x9f, 0x7e, 0xc9, 0xd9, 0x78, 0x73, 0x64, - 0xea, 0xc5, 0xac, 0x83, 0x34, 0xd3, 0xeb, 0xc3, 0xc5, 0x81, 0xa0, 0xff, 0xfa, 0x13, 0x63, 0xeb, - 0x17, 0x0d, 0xdd, 0x51, 0xb7, 0xf0, 0xda, 0x49, 0xd3, 0x16, 0x55, 0x26, 0x29, 0xd4, 0x68, 0x9e, - 0x2b, 0x16, 0xbe, 0x58, 0x7d, 0x47, 0xa1, 0xfc, 0x8f, 0xf8, 0xb8, 0xd1, 0x7a, 0xd0, 0x31, 0xce, - 0x45, 0xcb, 0x3a, 0x8f, 0x95, 0x16, 0x04, 0x28, 0xaf, 0xd7, 0xfb, 0xca, 0xbb, 0x4b, 0x40, 0x7e, +XXH_ALIGN(64) +static const xxh_u8 XXH3_kSecret[XXH_SECRET_DEFAULT_SIZE] = { + 0xb8, 0xfe, 0x6c, 0x39, 0x23, 0xa4, 0x4b, 0xbe, 0x7c, 0x01, 0x81, 0x2c, 0xf7, 0x21, 0xad, 0x1c, 0xde, 0xd4, + 0x6d, 0xe9, 0x83, 0x90, 0x97, 0xdb, 0x72, 0x40, 0xa4, 0xa4, 0xb7, 0xb3, 0x67, 0x1f, 0xcb, 0x79, 0xe6, 0x4e, + 0xcc, 0xc0, 0xe5, 0x78, 0x82, 0x5a, 0xd0, 0x7d, 0xcc, 0xff, 0x72, 0x21, 0xb8, 0x08, 0x46, 0x74, 0xf7, 0x43, + 0x24, 0x8e, 0xe0, 0x35, 0x90, 0xe6, 0x81, 0x3a, 0x26, 0x4c, 0x3c, 0x28, 0x52, 0xbb, 0x91, 0xc3, 0x00, 0xcb, + 0x88, 0xd0, 0x65, 0x8b, 0x1b, 0x53, 0x2e, 0xa3, 0x71, 0x64, 0x48, 0x97, 0xa2, 0x0d, 0xf9, 0x4e, 0x38, 0x19, + 0xef, 0x46, 0xa9, 0xde, 0xac, 0xd8, 0xa8, 0xfa, 0x76, 0x3f, 0xe3, 0x9c, 0x34, 0x3f, 0xf9, 0xdc, 0xbb, 0xc7, + 0xc7, 0x0b, 0x4f, 0x1d, 0x8a, 0x51, 0xe0, 0x4b, 0xcd, 0xb4, 0x59, 0x31, 0xc8, 0x9f, 0x7e, 0xc9, 0xd9, 0x78, + 0x73, 0x64, 0xea, 0xc5, 0xac, 0x83, 0x34, 0xd3, 0xeb, 0xc3, 0xc5, 0x81, 0xa0, 0xff, 0xfa, 0x13, 0x63, 0xeb, + 0x17, 0x0d, 0xdd, 0x51, 0xb7, 0xf0, 0xda, 0x49, 0xd3, 0x16, 0x55, 0x26, 0x29, 0xd4, 0x68, 0x9e, 0x2b, 0x16, + 0xbe, 0x58, 0x7d, 0x47, 0xa1, 0xfc, 0x8f, 0xf8, 0xb8, 0xd1, 0x7a, 0xd0, 0x31, 0xce, 0x45, 0xcb, 0x3a, 0x8f, + 0x95, 0x16, 0x04, 0x28, 0xaf, 0xd7, 0xfb, 0xca, 0xbb, 0x4b, 0x40, 0x7e, }; -static const xxh_u64 PRIME_MX1 = 0x165667919E3779F9ULL; /*!< 0b0001011001010110011001111001000110011110001101110111100111111001 */ -static const xxh_u64 PRIME_MX2 = 0x9FB21C651E98DF25ULL; /*!< 0b1001111110110010000111000110010100011110100110001101111100100101 */ +static const xxh_u64 PRIME_MX1 = + 0x165667919E3779F9ULL; /*!< 0b0001011001010110011001111001000110011110001101110111100111111001 */ +static const xxh_u64 PRIME_MX2 = + 0x9FB21C651E98DF25ULL; /*!< 0b1001111110110010000111000110010100011110100110001101111100100101 */ #ifdef XXH_OLD_NAMES -# define kSecret XXH3_kSecret +#define kSecret XXH3_kSecret #endif #ifdef XXH_DOXYGEN @@ -4401,13 +4318,9 @@ static const xxh_u64 PRIME_MX2 = 0x9FB21C651E98DF25ULL; /*!< 0b1001111110110010 * @param x, y Numbers to be multiplied * @return 64-bit product of the low 32 bits of @p x and @p y. */ -XXH_FORCE_INLINE xxh_u64 -XXH_mult32to64(xxh_u64 x, xxh_u64 y) -{ - return (x & 0xFFFFFFFF) * (y & 0xFFFFFFFF); -} +XXH_FORCE_INLINE xxh_u64 XXH_mult32to64(xxh_u64 x, xxh_u64 y) { return (x & 0xFFFFFFFF) * (y & 0xFFFFFFFF); } #elif defined(_MSC_VER) && defined(_M_IX86) -# define XXH_mult32to64(x, y) __emulu((unsigned)(x), (unsigned)(y)) +#define XXH_mult32to64(x, y) __emulu((unsigned)(x), (unsigned)(y)) #else /* * Downcast + upcast is usually better than masking on older compilers like @@ -4416,7 +4329,7 @@ XXH_mult32to64(xxh_u64 x, xxh_u64 y) * The other method, (x & 0xFFFFFFFF) * (y & 0xFFFFFFFF), will AND both operands * and perform a full 64x64 multiply -- entirely redundant on 32-bit. */ -# define XXH_mult32to64(x, y) ((xxh_u64)(xxh_u32)(x) * (xxh_u64)(xxh_u32)(y)) +#define XXH_mult32to64(x, y) ((xxh_u64)(xxh_u32)(x) * (xxh_u64)(xxh_u32)(y)) #endif /*! @@ -4428,127 +4341,124 @@ XXH_mult32to64(xxh_u64 x, xxh_u64 y) * @param lhs , rhs The 64-bit integers to be multiplied * @return The 128-bit result represented in an @ref XXH128_hash_t. */ -static XXH128_hash_t -XXH_mult64to128(xxh_u64 lhs, xxh_u64 rhs) -{ - /* - * GCC/Clang __uint128_t method. - * - * On most 64-bit targets, GCC and Clang define a __uint128_t type. - * This is usually the best way as it usually uses a native long 64-bit - * multiply, such as MULQ on x86_64 or MUL + UMULH on aarch64. - * - * Usually. - * - * Despite being a 32-bit platform, Clang (and emscripten) define this type - * despite not having the arithmetic for it. This results in a laggy - * compiler builtin call which calculates a full 128-bit multiply. - * In that case it is best to use the portable one. - * https://github.com/Cyan4973/xxHash/issues/211#issuecomment-515575677 - */ -#if (defined(__GNUC__) || defined(__clang__)) && !defined(__wasm__) \ - && defined(__SIZEOF_INT128__) \ - || (defined(_INTEGRAL_MAX_BITS) && _INTEGRAL_MAX_BITS >= 128) - - __uint128_t const product = (__uint128_t)lhs * (__uint128_t)rhs; - XXH128_hash_t r128; - r128.low64 = (xxh_u64)(product); - r128.high64 = (xxh_u64)(product >> 64); - return r128; - - /* - * MSVC for x64's _umul128 method. - * - * xxh_u64 _umul128(xxh_u64 Multiplier, xxh_u64 Multiplicand, xxh_u64 *HighProduct); - * - * This compiles to single operand MUL on x64. - */ +static XXH128_hash_t XXH_mult64to128(xxh_u64 lhs, xxh_u64 rhs) { + /* + * GCC/Clang __uint128_t method. + * + * On most 64-bit targets, GCC and Clang define a __uint128_t type. + * This is usually the best way as it usually uses a native long 64-bit + * multiply, such as MULQ on x86_64 or MUL + UMULH on aarch64. + * + * Usually. + * + * Despite being a 32-bit platform, Clang (and emscripten) define this type + * despite not having the arithmetic for it. This results in a laggy + * compiler builtin call which calculates a full 128-bit multiply. + * In that case it is best to use the portable one. + * https://github.com/Cyan4973/xxHash/issues/211#issuecomment-515575677 + */ +#if (defined(__GNUC__) || defined(__clang__)) && !defined(__wasm__) && defined(__SIZEOF_INT128__) || \ + (defined(_INTEGRAL_MAX_BITS) && _INTEGRAL_MAX_BITS >= 128) + + __uint128_t const product = (__uint128_t)lhs * (__uint128_t)rhs; + XXH128_hash_t r128; + r128.low64 = (xxh_u64)(product); + r128.high64 = (xxh_u64)(product >> 64); + return r128; + + /* + * MSVC for x64's _umul128 method. + * + * xxh_u64 _umul128(xxh_u64 Multiplier, xxh_u64 Multiplicand, xxh_u64 *HighProduct); + * + * This compiles to single operand MUL on x64. + */ #elif (defined(_M_X64) || defined(_M_IA64)) && !defined(_M_ARM64EC) #ifndef _MSC_VER -# pragma intrinsic(_umul128) +#pragma intrinsic(_umul128) #endif - xxh_u64 product_high; - xxh_u64 const product_low = _umul128(lhs, rhs, &product_high); - XXH128_hash_t r128; - r128.low64 = product_low; - r128.high64 = product_high; - return r128; - - /* - * MSVC for ARM64's __umulh method. - * - * This compiles to the same MUL + UMULH as GCC/Clang's __uint128_t method. - */ + xxh_u64 product_high; + xxh_u64 const product_low = _umul128(lhs, rhs, &product_high); + XXH128_hash_t r128; + r128.low64 = product_low; + r128.high64 = product_high; + return r128; + + /* + * MSVC for ARM64's __umulh method. + * + * This compiles to the same MUL + UMULH as GCC/Clang's __uint128_t method. + */ #elif defined(_M_ARM64) || defined(_M_ARM64EC) #ifndef _MSC_VER -# pragma intrinsic(__umulh) +#pragma intrinsic(__umulh) #endif - XXH128_hash_t r128; - r128.low64 = lhs * rhs; - r128.high64 = __umulh(lhs, rhs); - return r128; + XXH128_hash_t r128; + r128.low64 = lhs * rhs; + r128.high64 = __umulh(lhs, rhs); + return r128; #else - /* - * Portable scalar method. Optimized for 32-bit and 64-bit ALUs. - * - * This is a fast and simple grade school multiply, which is shown below - * with base 10 arithmetic instead of base 0x100000000. - * - * 9 3 // D2 lhs = 93 - * x 7 5 // D2 rhs = 75 - * ---------- - * 1 5 // D2 lo_lo = (93 % 10) * (75 % 10) = 15 - * 4 5 | // D2 hi_lo = (93 / 10) * (75 % 10) = 45 - * 2 1 | // D2 lo_hi = (93 % 10) * (75 / 10) = 21 - * + 6 3 | | // D2 hi_hi = (93 / 10) * (75 / 10) = 63 - * --------- - * 2 7 | // D2 cross = (15 / 10) + (45 % 10) + 21 = 27 - * + 6 7 | | // D2 upper = (27 / 10) + (45 / 10) + 63 = 67 - * --------- - * 6 9 7 5 // D4 res = (27 * 10) + (15 % 10) + (67 * 100) = 6975 - * - * The reasons for adding the products like this are: - * 1. It avoids manual carry tracking. Just like how - * (9 * 9) + 9 + 9 = 99, the same applies with this for UINT64_MAX. - * This avoids a lot of complexity. - * - * 2. It hints for, and on Clang, compiles to, the powerful UMAAL - * instruction available in ARM's Digital Signal Processing extension - * in 32-bit ARMv6 and later, which is shown below: - * - * void UMAAL(xxh_u32 *RdLo, xxh_u32 *RdHi, xxh_u32 Rn, xxh_u32 Rm) - * { - * xxh_u64 product = (xxh_u64)*RdLo * (xxh_u64)*RdHi + Rn + Rm; - * *RdLo = (xxh_u32)(product & 0xFFFFFFFF); - * *RdHi = (xxh_u32)(product >> 32); - * } - * - * This instruction was designed for efficient long multiplication, and - * allows this to be calculated in only 4 instructions at speeds - * comparable to some 64-bit ALUs. - * - * 3. It isn't terrible on other platforms. Usually this will be a couple - * of 32-bit ADD/ADCs. - */ - - /* First calculate all of the cross products. */ - xxh_u64 const lo_lo = XXH_mult32to64(lhs & 0xFFFFFFFF, rhs & 0xFFFFFFFF); - xxh_u64 const hi_lo = XXH_mult32to64(lhs >> 32, rhs & 0xFFFFFFFF); - xxh_u64 const lo_hi = XXH_mult32to64(lhs & 0xFFFFFFFF, rhs >> 32); - xxh_u64 const hi_hi = XXH_mult32to64(lhs >> 32, rhs >> 32); - - /* Now add the products together. These will never overflow. */ - xxh_u64 const cross = (lo_lo >> 32) + (hi_lo & 0xFFFFFFFF) + lo_hi; - xxh_u64 const upper = (hi_lo >> 32) + (cross >> 32) + hi_hi; - xxh_u64 const lower = (cross << 32) | (lo_lo & 0xFFFFFFFF); - - XXH128_hash_t r128; - r128.low64 = lower; - r128.high64 = upper; - return r128; + /* + * Portable scalar method. Optimized for 32-bit and 64-bit ALUs. + * + * This is a fast and simple grade school multiply, which is shown below + * with base 10 arithmetic instead of base 0x100000000. + * + * 9 3 // D2 lhs = 93 + * x 7 5 // D2 rhs = 75 + * ---------- + * 1 5 // D2 lo_lo = (93 % 10) * (75 % 10) = 15 + * 4 5 | // D2 hi_lo = (93 / 10) * (75 % 10) = 45 + * 2 1 | // D2 lo_hi = (93 % 10) * (75 / 10) = 21 + * + 6 3 | | // D2 hi_hi = (93 / 10) * (75 / 10) = 63 + * --------- + * 2 7 | // D2 cross = (15 / 10) + (45 % 10) + 21 = 27 + * + 6 7 | | // D2 upper = (27 / 10) + (45 / 10) + 63 = 67 + * --------- + * 6 9 7 5 // D4 res = (27 * 10) + (15 % 10) + (67 * 100) = 6975 + * + * The reasons for adding the products like this are: + * 1. It avoids manual carry tracking. Just like how + * (9 * 9) + 9 + 9 = 99, the same applies with this for UINT64_MAX. + * This avoids a lot of complexity. + * + * 2. It hints for, and on Clang, compiles to, the powerful UMAAL + * instruction available in ARM's Digital Signal Processing extension + * in 32-bit ARMv6 and later, which is shown below: + * + * void UMAAL(xxh_u32 *RdLo, xxh_u32 *RdHi, xxh_u32 Rn, xxh_u32 Rm) + * { + * xxh_u64 product = (xxh_u64)*RdLo * (xxh_u64)*RdHi + Rn + Rm; + * *RdLo = (xxh_u32)(product & 0xFFFFFFFF); + * *RdHi = (xxh_u32)(product >> 32); + * } + * + * This instruction was designed for efficient long multiplication, and + * allows this to be calculated in only 4 instructions at speeds + * comparable to some 64-bit ALUs. + * + * 3. It isn't terrible on other platforms. Usually this will be a couple + * of 32-bit ADD/ADCs. + */ + + /* First calculate all of the cross products. */ + xxh_u64 const lo_lo = XXH_mult32to64(lhs & 0xFFFFFFFF, rhs & 0xFFFFFFFF); + xxh_u64 const hi_lo = XXH_mult32to64(lhs >> 32, rhs & 0xFFFFFFFF); + xxh_u64 const lo_hi = XXH_mult32to64(lhs & 0xFFFFFFFF, rhs >> 32); + xxh_u64 const hi_hi = XXH_mult32to64(lhs >> 32, rhs >> 32); + + /* Now add the products together. These will never overflow. */ + xxh_u64 const cross = (lo_lo >> 32) + (hi_lo & 0xFFFFFFFF) + lo_hi; + xxh_u64 const upper = (hi_lo >> 32) + (cross >> 32) + hi_hi; + xxh_u64 const lower = (cross << 32) | (lo_lo & 0xFFFFFFFF); + + XXH128_hash_t r128; + r128.low64 = lower; + r128.high64 = upper; + return r128; #endif } @@ -4562,30 +4472,26 @@ XXH_mult64to128(xxh_u64 lhs, xxh_u64 rhs) * @return The low 64 bits of the product XOR'd by the high 64 bits. * @see XXH_mult64to128() */ -static xxh_u64 -XXH3_mul128_fold64(xxh_u64 lhs, xxh_u64 rhs) -{ - XXH128_hash_t product = XXH_mult64to128(lhs, rhs); - return product.low64 ^ product.high64; +static xxh_u64 XXH3_mul128_fold64(xxh_u64 lhs, xxh_u64 rhs) { + XXH128_hash_t product = XXH_mult64to128(lhs, rhs); + return product.low64 ^ product.high64; } /*! Seems to produce slightly better code on GCC for some reason. */ -XXH_FORCE_INLINE XXH_CONSTF xxh_u64 XXH_xorshift64(xxh_u64 v64, int shift) -{ - XXH_ASSERT(0 <= shift && shift < 64); - return v64 ^ (v64 >> shift); +XXH_FORCE_INLINE XXH_CONSTF xxh_u64 XXH_xorshift64(xxh_u64 v64, int shift) { + XXH_ASSERT(0 <= shift && shift < 64); + return v64 ^ (v64 >> shift); } /* * This is a fast avalanche stage, * suitable when input bits are already partially mixed */ -static XXH64_hash_t XXH3_avalanche(xxh_u64 h64) -{ - h64 = XXH_xorshift64(h64, 37); - h64 *= PRIME_MX1; - h64 = XXH_xorshift64(h64, 32); - return h64; +static XXH64_hash_t XXH3_avalanche(xxh_u64 h64) { + h64 = XXH_xorshift64(h64, 37); + h64 *= PRIME_MX1; + h64 = XXH_xorshift64(h64, 32); + return h64; } /* @@ -4593,17 +4499,15 @@ static XXH64_hash_t XXH3_avalanche(xxh_u64 h64) * inspired by Pelle Evensen's rrmxmx * preferable when input has not been previously mixed */ -static XXH64_hash_t XXH3_rrmxmx(xxh_u64 h64, xxh_u64 len) -{ - /* this mix is inspired by Pelle Evensen's rrmxmx */ - h64 ^= XXH_rotl64(h64, 49) ^ XXH_rotl64(h64, 24); - h64 *= PRIME_MX2; - h64 ^= (h64 >> 35) + len ; - h64 *= PRIME_MX2; - return XXH_xorshift64(h64, 28); +static XXH64_hash_t XXH3_rrmxmx(xxh_u64 h64, xxh_u64 len) { + /* this mix is inspired by Pelle Evensen's rrmxmx */ + h64 ^= XXH_rotl64(h64, 49) ^ XXH_rotl64(h64, 24); + h64 *= PRIME_MX2; + h64 ^= (h64 >> 35) + len; + h64 *= PRIME_MX2; + return XXH_xorshift64(h64, 28); } - /* ========================================== * Short keys * ========================================== @@ -4637,70 +4541,78 @@ static XXH64_hash_t XXH3_rrmxmx(xxh_u64 h64, xxh_u64 len) * * This adds an extra layer of strength for custom secrets. */ -XXH_FORCE_INLINE XXH_PUREF XXH64_hash_t -XXH3_len_1to3_64b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed) -{ - XXH_ASSERT(input != NULL); - XXH_ASSERT(1 <= len && len <= 3); - XXH_ASSERT(secret != NULL); - /* - * len = 1: combined = { input[0], 0x01, input[0], input[0] } - * len = 2: combined = { input[1], 0x02, input[0], input[1] } - * len = 3: combined = { input[2], 0x03, input[0], input[1] } - */ - { xxh_u8 const c1 = input[0]; - xxh_u8 const c2 = input[len >> 1]; - xxh_u8 const c3 = input[len - 1]; - xxh_u32 const combined = ((xxh_u32)c1 << 16) | ((xxh_u32)c2 << 24) - | ((xxh_u32)c3 << 0) | ((xxh_u32)len << 8); - xxh_u64 const bitflip = (XXH_readLE32(secret) ^ XXH_readLE32(secret+4)) + seed; - xxh_u64 const keyed = (xxh_u64)combined ^ bitflip; - return XXH64_avalanche(keyed); - } -} - -XXH_FORCE_INLINE XXH_PUREF XXH64_hash_t -XXH3_len_4to8_64b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed) -{ - XXH_ASSERT(input != NULL); - XXH_ASSERT(secret != NULL); - XXH_ASSERT(4 <= len && len <= 8); - seed ^= (xxh_u64)XXH_swap32((xxh_u32)seed) << 32; - { xxh_u32 const input1 = XXH_readLE32(input); - xxh_u32 const input2 = XXH_readLE32(input + len - 4); - xxh_u64 const bitflip = (XXH_readLE64(secret+8) ^ XXH_readLE64(secret+16)) - seed; - xxh_u64 const input64 = input2 + (((xxh_u64)input1) << 32); - xxh_u64 const keyed = input64 ^ bitflip; - return XXH3_rrmxmx(keyed, len); - } -} - -XXH_FORCE_INLINE XXH_PUREF XXH64_hash_t -XXH3_len_9to16_64b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed) -{ - XXH_ASSERT(input != NULL); - XXH_ASSERT(secret != NULL); - XXH_ASSERT(9 <= len && len <= 16); - { xxh_u64 const bitflip1 = (XXH_readLE64(secret+24) ^ XXH_readLE64(secret+32)) + seed; - xxh_u64 const bitflip2 = (XXH_readLE64(secret+40) ^ XXH_readLE64(secret+48)) - seed; - xxh_u64 const input_lo = XXH_readLE64(input) ^ bitflip1; - xxh_u64 const input_hi = XXH_readLE64(input + len - 8) ^ bitflip2; - xxh_u64 const acc = len - + XXH_swap64(input_lo) + input_hi - + XXH3_mul128_fold64(input_lo, input_hi); - return XXH3_avalanche(acc); - } -} - -XXH_FORCE_INLINE XXH_PUREF XXH64_hash_t -XXH3_len_0to16_64b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed) -{ - XXH_ASSERT(len <= 16); - { if (XXH_likely(len > 8)) return XXH3_len_9to16_64b(input, len, secret, seed); - if (XXH_likely(len >= 4)) return XXH3_len_4to8_64b(input, len, secret, seed); - if (len) return XXH3_len_1to3_64b(input, len, secret, seed); - return XXH64_avalanche(seed ^ (XXH_readLE64(secret+56) ^ XXH_readLE64(secret+64))); - } +XXH_FORCE_INLINE XXH_PUREF XXH64_hash_t XXH3_len_1to3_64b(const xxh_u8 *input, + size_t len, + const xxh_u8 *secret, + XXH64_hash_t seed) { + XXH_ASSERT(input != NULL); + XXH_ASSERT(1 <= len && len <= 3); + XXH_ASSERT(secret != NULL); + /* + * len = 1: combined = { input[0], 0x01, input[0], input[0] } + * len = 2: combined = { input[1], 0x02, input[0], input[1] } + * len = 3: combined = { input[2], 0x03, input[0], input[1] } + */ + { + xxh_u8 const c1 = input[0]; + xxh_u8 const c2 = input[len >> 1]; + xxh_u8 const c3 = input[len - 1]; + xxh_u32 const combined = ((xxh_u32)c1 << 16) | ((xxh_u32)c2 << 24) | ((xxh_u32)c3 << 0) | ((xxh_u32)len << 8); + xxh_u64 const bitflip = (XXH_readLE32(secret) ^ XXH_readLE32(secret + 4)) + seed; + xxh_u64 const keyed = (xxh_u64)combined ^ bitflip; + return XXH64_avalanche(keyed); + } +} + +XXH_FORCE_INLINE XXH_PUREF XXH64_hash_t XXH3_len_4to8_64b(const xxh_u8 *input, + size_t len, + const xxh_u8 *secret, + XXH64_hash_t seed) { + XXH_ASSERT(input != NULL); + XXH_ASSERT(secret != NULL); + XXH_ASSERT(4 <= len && len <= 8); + seed ^= (xxh_u64)XXH_swap32((xxh_u32)seed) << 32; + { + xxh_u32 const input1 = XXH_readLE32(input); + xxh_u32 const input2 = XXH_readLE32(input + len - 4); + xxh_u64 const bitflip = (XXH_readLE64(secret + 8) ^ XXH_readLE64(secret + 16)) - seed; + xxh_u64 const input64 = input2 + (((xxh_u64)input1) << 32); + xxh_u64 const keyed = input64 ^ bitflip; + return XXH3_rrmxmx(keyed, len); + } +} + +XXH_FORCE_INLINE XXH_PUREF XXH64_hash_t XXH3_len_9to16_64b(const xxh_u8 *input, + size_t len, + const xxh_u8 *secret, + XXH64_hash_t seed) { + XXH_ASSERT(input != NULL); + XXH_ASSERT(secret != NULL); + XXH_ASSERT(9 <= len && len <= 16); + { + xxh_u64 const bitflip1 = (XXH_readLE64(secret + 24) ^ XXH_readLE64(secret + 32)) + seed; + xxh_u64 const bitflip2 = (XXH_readLE64(secret + 40) ^ XXH_readLE64(secret + 48)) - seed; + xxh_u64 const input_lo = XXH_readLE64(input) ^ bitflip1; + xxh_u64 const input_hi = XXH_readLE64(input + len - 8) ^ bitflip2; + xxh_u64 const acc = len + XXH_swap64(input_lo) + input_hi + XXH3_mul128_fold64(input_lo, input_hi); + return XXH3_avalanche(acc); + } +} + +XXH_FORCE_INLINE XXH_PUREF XXH64_hash_t XXH3_len_0to16_64b(const xxh_u8 *input, + size_t len, + const xxh_u8 *secret, + XXH64_hash_t seed) { + XXH_ASSERT(len <= 16); + { + if (XXH_likely(len > 8)) + return XXH3_len_9to16_64b(input, len, secret, seed); + if (XXH_likely(len >= 4)) + return XXH3_len_4to8_64b(input, len, secret, seed); + if (len) + return XXH3_len_1to3_64b(input, len, secret, seed); + return XXH64_avalanche(seed ^ (XXH_readLE64(secret + 56) ^ XXH_readLE64(secret + 64))); + } } /* @@ -4729,157 +4641,159 @@ XXH3_len_0to16_64b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_ * by this, although it is always a good idea to use a proper seed if you care * about strength. */ -XXH_FORCE_INLINE xxh_u64 XXH3_mix16B(const xxh_u8* XXH_RESTRICT input, - const xxh_u8* XXH_RESTRICT secret, xxh_u64 seed64) -{ -#if defined(__GNUC__) && !defined(__clang__) /* GCC, not Clang */ \ - && defined(__i386__) && defined(__SSE2__) /* x86 + SSE2 */ \ +XXH_FORCE_INLINE xxh_u64 XXH3_mix16B(const xxh_u8 *XXH_RESTRICT input, + const xxh_u8 *XXH_RESTRICT secret, + xxh_u64 seed64) { +#if defined(__GNUC__) && !defined(__clang__) /* GCC, not Clang */ \ + && defined(__i386__) && defined(__SSE2__) /* x86 + SSE2 */ \ && !defined(XXH_ENABLE_AUTOVECTORIZE) /* Define to disable like XXH32 hack */ - /* - * UGLY HACK: - * GCC for x86 tends to autovectorize the 128-bit multiply, resulting in - * slower code. - * - * By forcing seed64 into a register, we disrupt the cost model and - * cause it to scalarize. See `XXH32_round()` - * - * FIXME: Clang's output is still _much_ faster -- On an AMD Ryzen 3600, - * XXH3_64bits @ len=240 runs at 4.6 GB/s with Clang 9, but 3.3 GB/s on - * GCC 9.2, despite both emitting scalar code. - * - * GCC generates much better scalar code than Clang for the rest of XXH3, - * which is why finding a more optimal codepath is an interest. - */ - XXH_COMPILER_GUARD(seed64); -#endif - { xxh_u64 const input_lo = XXH_readLE64(input); - xxh_u64 const input_hi = XXH_readLE64(input+8); - return XXH3_mul128_fold64( - input_lo ^ (XXH_readLE64(secret) + seed64), - input_hi ^ (XXH_readLE64(secret+8) - seed64) - ); - } + /* + * UGLY HACK: + * GCC for x86 tends to autovectorize the 128-bit multiply, resulting in + * slower code. + * + * By forcing seed64 into a register, we disrupt the cost model and + * cause it to scalarize. See `XXH32_round()` + * + * FIXME: Clang's output is still _much_ faster -- On an AMD Ryzen 3600, + * XXH3_64bits @ len=240 runs at 4.6 GB/s with Clang 9, but 3.3 GB/s on + * GCC 9.2, despite both emitting scalar code. + * + * GCC generates much better scalar code than Clang for the rest of XXH3, + * which is why finding a more optimal codepath is an interest. + */ + XXH_COMPILER_GUARD(seed64); +#endif + { + xxh_u64 const input_lo = XXH_readLE64(input); + xxh_u64 const input_hi = XXH_readLE64(input + 8); + return XXH3_mul128_fold64(input_lo ^ (XXH_readLE64(secret) + seed64), + input_hi ^ (XXH_readLE64(secret + 8) - seed64)); + } } /* For mid range keys, XXH3 uses a Mum-hash variant. */ -XXH_FORCE_INLINE XXH_PUREF XXH64_hash_t -XXH3_len_17to128_64b(const xxh_u8* XXH_RESTRICT input, size_t len, - const xxh_u8* XXH_RESTRICT secret, size_t secretSize, - XXH64_hash_t seed) -{ - XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN); (void)secretSize; - XXH_ASSERT(16 < len && len <= 128); - - { xxh_u64 acc = len * XXH_PRIME64_1; +XXH_FORCE_INLINE XXH_PUREF XXH64_hash_t XXH3_len_17to128_64b(const xxh_u8 *XXH_RESTRICT input, + size_t len, + const xxh_u8 *XXH_RESTRICT secret, + size_t secretSize, + XXH64_hash_t seed) { + XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN); + (void)secretSize; + XXH_ASSERT(16 < len && len <= 128); + + { + xxh_u64 acc = len * XXH_PRIME64_1; #if XXH_SIZE_OPT >= 1 - /* Smaller and cleaner, but slightly slower. */ - unsigned int i = (unsigned int)(len - 1) / 32; - do { - acc += XXH3_mix16B(input+16 * i, secret+32*i, seed); - acc += XXH3_mix16B(input+len-16*(i+1), secret+32*i+16, seed); - } while (i-- != 0); + /* Smaller and cleaner, but slightly slower. */ + unsigned int i = (unsigned int)(len - 1) / 32; + do { + acc += XXH3_mix16B(input + 16 * i, secret + 32 * i, seed); + acc += XXH3_mix16B(input + len - 16 * (i + 1), secret + 32 * i + 16, seed); + } while (i-- != 0); #else - if (len > 32) { - if (len > 64) { - if (len > 96) { - acc += XXH3_mix16B(input+48, secret+96, seed); - acc += XXH3_mix16B(input+len-64, secret+112, seed); - } - acc += XXH3_mix16B(input+32, secret+64, seed); - acc += XXH3_mix16B(input+len-48, secret+80, seed); - } - acc += XXH3_mix16B(input+16, secret+32, seed); - acc += XXH3_mix16B(input+len-32, secret+48, seed); + if (len > 32) { + if (len > 64) { + if (len > 96) { + acc += XXH3_mix16B(input + 48, secret + 96, seed); + acc += XXH3_mix16B(input + len - 64, secret + 112, seed); } - acc += XXH3_mix16B(input+0, secret+0, seed); - acc += XXH3_mix16B(input+len-16, secret+16, seed); -#endif - return XXH3_avalanche(acc); + acc += XXH3_mix16B(input + 32, secret + 64, seed); + acc += XXH3_mix16B(input + len - 48, secret + 80, seed); + } + acc += XXH3_mix16B(input + 16, secret + 32, seed); + acc += XXH3_mix16B(input + len - 32, secret + 48, seed); } -} - -XXH_NO_INLINE XXH_PUREF XXH64_hash_t -XXH3_len_129to240_64b(const xxh_u8* XXH_RESTRICT input, size_t len, - const xxh_u8* XXH_RESTRICT secret, size_t secretSize, - XXH64_hash_t seed) -{ - XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN); (void)secretSize; + acc += XXH3_mix16B(input + 0, secret + 0, seed); + acc += XXH3_mix16B(input + len - 16, secret + 16, seed); +#endif + return XXH3_avalanche(acc); + } +} + +XXH_NO_INLINE XXH_PUREF XXH64_hash_t XXH3_len_129to240_64b(const xxh_u8 *XXH_RESTRICT input, + size_t len, + const xxh_u8 *XXH_RESTRICT secret, + size_t secretSize, + XXH64_hash_t seed) { + XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN); + (void)secretSize; + XXH_ASSERT(128 < len && len <= XXH3_MIDSIZE_MAX); + +#define XXH3_MIDSIZE_STARTOFFSET 3 +#define XXH3_MIDSIZE_LASTOFFSET 17 + + { + xxh_u64 acc = len * XXH_PRIME64_1; + xxh_u64 acc_end; + unsigned int const nbRounds = (unsigned int)len / 16; + unsigned int i; XXH_ASSERT(128 < len && len <= XXH3_MIDSIZE_MAX); - - #define XXH3_MIDSIZE_STARTOFFSET 3 - #define XXH3_MIDSIZE_LASTOFFSET 17 - - { xxh_u64 acc = len * XXH_PRIME64_1; - xxh_u64 acc_end; - unsigned int const nbRounds = (unsigned int)len / 16; - unsigned int i; - XXH_ASSERT(128 < len && len <= XXH3_MIDSIZE_MAX); - for (i=0; i<8; i++) { - acc += XXH3_mix16B(input+(16*i), secret+(16*i), seed); - } - /* last bytes */ - acc_end = XXH3_mix16B(input + len - 16, secret + XXH3_SECRET_SIZE_MIN - XXH3_MIDSIZE_LASTOFFSET, seed); - XXH_ASSERT(nbRounds >= 8); - acc = XXH3_avalanche(acc); -#if defined(__clang__) /* Clang */ \ - && (defined(__ARM_NEON) || defined(__ARM_NEON__)) /* NEON */ \ - && !defined(XXH_ENABLE_AUTOVECTORIZE) /* Define to disable */ - /* - * UGLY HACK: - * Clang for ARMv7-A tries to vectorize this loop, similar to GCC x86. - * In everywhere else, it uses scalar code. - * - * For 64->128-bit multiplies, even if the NEON was 100% optimal, it - * would still be slower than UMAAL (see XXH_mult64to128). - * - * Unfortunately, Clang doesn't handle the long multiplies properly and - * converts them to the nonexistent "vmulq_u64" intrinsic, which is then - * scalarized into an ugly mess of VMOV.32 instructions. - * - * This mess is difficult to avoid without turning autovectorization - * off completely, but they are usually relatively minor and/or not - * worth it to fix. - * - * This loop is the easiest to fix, as unlike XXH32, this pragma - * _actually works_ because it is a loop vectorization instead of an - * SLP vectorization. - */ - #pragma clang loop vectorize(disable) -#endif - for (i=8 ; i < nbRounds; i++) { - /* - * Prevents clang for unrolling the acc loop and interleaving with this one. - */ - XXH_COMPILER_GUARD(acc); - acc_end += XXH3_mix16B(input+(16*i), secret+(16*(i-8)) + XXH3_MIDSIZE_STARTOFFSET, seed); - } - return XXH3_avalanche(acc + acc_end); + for (i = 0; i < 8; i++) { + acc += XXH3_mix16B(input + (16 * i), secret + (16 * i), seed); } + /* last bytes */ + acc_end = XXH3_mix16B(input + len - 16, secret + XXH3_SECRET_SIZE_MIN - XXH3_MIDSIZE_LASTOFFSET, seed); + XXH_ASSERT(nbRounds >= 8); + acc = XXH3_avalanche(acc); +#if defined(__clang__) /* Clang */ \ + && (defined(__ARM_NEON) || defined(__ARM_NEON__)) /* NEON */ \ + && !defined(XXH_ENABLE_AUTOVECTORIZE) /* Define to disable */ +/* + * UGLY HACK: + * Clang for ARMv7-A tries to vectorize this loop, similar to GCC x86. + * In everywhere else, it uses scalar code. + * + * For 64->128-bit multiplies, even if the NEON was 100% optimal, it + * would still be slower than UMAAL (see XXH_mult64to128). + * + * Unfortunately, Clang doesn't handle the long multiplies properly and + * converts them to the nonexistent "vmulq_u64" intrinsic, which is then + * scalarized into an ugly mess of VMOV.32 instructions. + * + * This mess is difficult to avoid without turning autovectorization + * off completely, but they are usually relatively minor and/or not + * worth it to fix. + * + * This loop is the easiest to fix, as unlike XXH32, this pragma + * _actually works_ because it is a loop vectorization instead of an + * SLP vectorization. + */ +#pragma clang loop vectorize(disable) +#endif + for (i = 8; i < nbRounds; i++) { + /* + * Prevents clang for unrolling the acc loop and interleaving with this one. + */ + XXH_COMPILER_GUARD(acc); + acc_end += XXH3_mix16B(input + (16 * i), secret + (16 * (i - 8)) + XXH3_MIDSIZE_STARTOFFSET, seed); + } + return XXH3_avalanche(acc + acc_end); + } } - /* ======= Long Keys ======= */ #define XXH_STRIPE_LEN 64 -#define XXH_SECRET_CONSUME_RATE 8 /* nb of secret bytes consumed at each accumulation */ +#define XXH_SECRET_CONSUME_RATE 8 /* nb of secret bytes consumed at each accumulation */ #define XXH_ACC_NB (XXH_STRIPE_LEN / sizeof(xxh_u64)) #ifdef XXH_OLD_NAMES -# define STRIPE_LEN XXH_STRIPE_LEN -# define ACC_NB XXH_ACC_NB +#define STRIPE_LEN XXH_STRIPE_LEN +#define ACC_NB XXH_ACC_NB #endif #ifndef XXH_PREFETCH_DIST -# ifdef __clang__ -# define XXH_PREFETCH_DIST 320 -# else -# if (XXH_VECTOR == XXH_AVX512) -# define XXH_PREFETCH_DIST 512 -# else -# define XXH_PREFETCH_DIST 384 -# endif -# endif /* __clang__ */ -#endif /* XXH_PREFETCH_DIST */ +#ifdef __clang__ +#define XXH_PREFETCH_DIST 320 +#else +#if (XXH_VECTOR == XXH_AVX512) +#define XXH_PREFETCH_DIST 512 +#else +#define XXH_PREFETCH_DIST 384 +#endif +#endif /* __clang__ */ +#endif /* XXH_PREFETCH_DIST */ /* * These macros are to generate an XXH3_accumulate() function. @@ -4891,29 +4805,23 @@ XXH3_len_129to240_64b(const xxh_u8* XXH_RESTRICT input, size_t len, * It may be useful to hand implement this function if the compiler fails to * optimize the inline function. */ -#define XXH3_ACCUMULATE_TEMPLATE(name) \ -void \ -XXH3_accumulate_##name(xxh_u64* XXH_RESTRICT acc, \ - const xxh_u8* XXH_RESTRICT input, \ - const xxh_u8* XXH_RESTRICT secret, \ - size_t nbStripes) \ -{ \ - size_t n; \ - for (n = 0; n < nbStripes; n++ ) { \ - const xxh_u8* const in = input + n*XXH_STRIPE_LEN; \ - XXH_PREFETCH(in + XXH_PREFETCH_DIST); \ - XXH3_accumulate_512_##name( \ - acc, \ - in, \ - secret + n*XXH_SECRET_CONSUME_RATE); \ - } \ -} - - -XXH_FORCE_INLINE void XXH_writeLE64(void* dst, xxh_u64 v64) -{ - if (!XXH_CPU_LITTLE_ENDIAN) v64 = XXH_swap64(v64); - XXH_memcpy(dst, &v64, sizeof(v64)); +#define XXH3_ACCUMULATE_TEMPLATE(name) \ + void XXH3_accumulate_##name(xxh_u64 *XXH_RESTRICT acc, \ + const xxh_u8 *XXH_RESTRICT input, \ + const xxh_u8 *XXH_RESTRICT secret, \ + size_t nbStripes) { \ + size_t n; \ + for (n = 0; n < nbStripes; n++) { \ + const xxh_u8 *const in = input + n * XXH_STRIPE_LEN; \ + XXH_PREFETCH(in + XXH_PREFETCH_DIST); \ + XXH3_accumulate_512_##name(acc, in, secret + n * XXH_SECRET_CONSUME_RATE); \ + } \ + } + +XXH_FORCE_INLINE void XXH_writeLE64(void *dst, xxh_u64 v64) { + if (!XXH_CPU_LITTLE_ENDIAN) + v64 = XXH_swap64(v64); + XXH_memcpy(dst, &v64, sizeof(v64)); } /* Several intrinsic functions below are supposed to accept __int64 as argument, @@ -4921,16 +4829,14 @@ XXH_FORCE_INLINE void XXH_writeLE64(void* dst, xxh_u64 v64) * However, several environments do not define __int64 type, * requiring a workaround. */ -#if !defined (__VMS) \ - && (defined (__cplusplus) \ - || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) ) - typedef int64_t xxh_i64; +#if !defined(__VMS) && (defined(__cplusplus) || (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 \ + */)) +typedef int64_t xxh_i64; #else - /* the following type must have a width of 64-bit */ - typedef long long xxh_i64; +/* the following type must have a width of 64-bit */ +typedef long long xxh_i64; #endif - /* * XXH3_accumulate_512 is the tightest loop for long inputs, and it is the most optimized. * @@ -4954,211 +4860,206 @@ XXH_FORCE_INLINE void XXH_writeLE64(void* dst, xxh_u64 v64) * Both XXH3_64bits and XXH3_128bits use this subroutine. */ -#if (XXH_VECTOR == XXH_AVX512) \ - || (defined(XXH_DISPATCH_AVX512) && XXH_DISPATCH_AVX512 != 0) +#if (XXH_VECTOR == XXH_AVX512) || (defined(XXH_DISPATCH_AVX512) && XXH_DISPATCH_AVX512 != 0) #ifndef XXH_TARGET_AVX512 -# define XXH_TARGET_AVX512 /* disable attribute target */ +#define XXH_TARGET_AVX512 /* disable attribute target */ #endif XXH_FORCE_INLINE XXH_TARGET_AVX512 void -XXH3_accumulate_512_avx512(void* XXH_RESTRICT acc, - const void* XXH_RESTRICT input, - const void* XXH_RESTRICT secret) -{ - __m512i* const xacc = (__m512i *) acc; - XXH_ASSERT((((size_t)acc) & 63) == 0); - XXH_STATIC_ASSERT(XXH_STRIPE_LEN == sizeof(__m512i)); - - { - /* data_vec = input[0]; */ - __m512i const data_vec = _mm512_loadu_si512 (input); - /* key_vec = secret[0]; */ - __m512i const key_vec = _mm512_loadu_si512 (secret); - /* data_key = data_vec ^ key_vec; */ - __m512i const data_key = _mm512_xor_si512 (data_vec, key_vec); - /* data_key_lo = data_key >> 32; */ - __m512i const data_key_lo = _mm512_srli_epi64 (data_key, 32); - /* product = (data_key & 0xffffffff) * (data_key_lo & 0xffffffff); */ - __m512i const product = _mm512_mul_epu32 (data_key, data_key_lo); - /* xacc[0] += swap(data_vec); */ - __m512i const data_swap = _mm512_shuffle_epi32(data_vec, (_MM_PERM_ENUM)_MM_SHUFFLE(1, 0, 3, 2)); - __m512i const sum = _mm512_add_epi64(*xacc, data_swap); - /* xacc[0] += product; */ - *xacc = _mm512_add_epi64(product, sum); - } +XXH3_accumulate_512_avx512(void *XXH_RESTRICT acc, const void *XXH_RESTRICT input, const void *XXH_RESTRICT secret) { + __m512i *const xacc = (__m512i *)acc; + XXH_ASSERT((((size_t)acc) & 63) == 0); + XXH_STATIC_ASSERT(XXH_STRIPE_LEN == sizeof(__m512i)); + + { + /* data_vec = input[0]; */ + __m512i const data_vec = _mm512_loadu_si512(input); + /* key_vec = secret[0]; */ + __m512i const key_vec = _mm512_loadu_si512(secret); + /* data_key = data_vec ^ key_vec; */ + __m512i const data_key = _mm512_xor_si512(data_vec, key_vec); + /* data_key_lo = data_key >> 32; */ + __m512i const data_key_lo = _mm512_srli_epi64(data_key, 32); + /* product = (data_key & 0xffffffff) * (data_key_lo & 0xffffffff); */ + __m512i const product = _mm512_mul_epu32(data_key, data_key_lo); + /* xacc[0] += swap(data_vec); */ + __m512i const data_swap = _mm512_shuffle_epi32(data_vec, (_MM_PERM_ENUM)_MM_SHUFFLE(1, 0, 3, 2)); + __m512i const sum = _mm512_add_epi64(*xacc, data_swap); + /* xacc[0] += product; */ + *xacc = _mm512_add_epi64(product, sum); + } } XXH_FORCE_INLINE XXH_TARGET_AVX512 XXH3_ACCUMULATE_TEMPLATE(avx512) -/* - * XXH3_scrambleAcc: Scrambles the accumulators to improve mixing. - * - * Multiplication isn't perfect, as explained by Google in HighwayHash: - * - * // Multiplication mixes/scrambles bytes 0-7 of the 64-bit result to - * // varying degrees. In descending order of goodness, bytes - * // 3 4 2 5 1 6 0 7 have quality 228 224 164 160 100 96 36 32. - * // As expected, the upper and lower bytes are much worse. - * - * Source: https://github.com/google/highwayhash/blob/0aaf66b/highwayhash/hh_avx2.h#L291 - * - * Since our algorithm uses a pseudorandom secret to add some variance into the - * mix, we don't need to (or want to) mix as often or as much as HighwayHash does. - * - * This isn't as tight as XXH3_accumulate, but still written in SIMD to avoid - * extraction. - * - * Both XXH3_64bits and XXH3_128bits use this subroutine. - */ - -XXH_FORCE_INLINE XXH_TARGET_AVX512 void -XXH3_scrambleAcc_avx512(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret) -{ - XXH_ASSERT((((size_t)acc) & 63) == 0); - XXH_STATIC_ASSERT(XXH_STRIPE_LEN == sizeof(__m512i)); - { __m512i* const xacc = (__m512i*) acc; - const __m512i prime32 = _mm512_set1_epi32((int)XXH_PRIME32_1); - - /* xacc[0] ^= (xacc[0] >> 47) */ - __m512i const acc_vec = *xacc; - __m512i const shifted = _mm512_srli_epi64 (acc_vec, 47); - /* xacc[0] ^= secret; */ - __m512i const key_vec = _mm512_loadu_si512 (secret); - __m512i const data_key = _mm512_ternarylogic_epi32(key_vec, acc_vec, shifted, 0x96 /* key_vec ^ acc_vec ^ shifted */); - - /* xacc[0] *= XXH_PRIME32_1; */ - __m512i const data_key_hi = _mm512_srli_epi64 (data_key, 32); - __m512i const prod_lo = _mm512_mul_epu32 (data_key, prime32); - __m512i const prod_hi = _mm512_mul_epu32 (data_key_hi, prime32); - *xacc = _mm512_add_epi64(prod_lo, _mm512_slli_epi64(prod_hi, 32)); + /* + * XXH3_scrambleAcc: Scrambles the accumulators to improve mixing. + * + * Multiplication isn't perfect, as explained by Google in HighwayHash: + * + * // Multiplication mixes/scrambles bytes 0-7 of the 64-bit result to + * // varying degrees. In descending order of goodness, bytes + * // 3 4 2 5 1 6 0 7 have quality 228 224 164 160 100 96 36 32. + * // As expected, the upper and lower bytes are much worse. + * + * Source: https://github.com/google/highwayhash/blob/0aaf66b/highwayhash/hh_avx2.h#L291 + * + * Since our algorithm uses a pseudorandom secret to add some variance into the + * mix, we don't need to (or want to) mix as often or as much as HighwayHash does. + * + * This isn't as tight as XXH3_accumulate, but still written in SIMD to avoid + * extraction. + * + * Both XXH3_64bits and XXH3_128bits use this subroutine. + */ + + XXH_FORCE_INLINE XXH_TARGET_AVX512 + void XXH3_scrambleAcc_avx512(void *XXH_RESTRICT acc, const void *XXH_RESTRICT secret) { + XXH_ASSERT((((size_t)acc) & 63) == 0); + XXH_STATIC_ASSERT(XXH_STRIPE_LEN == sizeof(__m512i)); + { + __m512i *const xacc = (__m512i *)acc; + const __m512i prime32 = _mm512_set1_epi32((int)XXH_PRIME32_1); + + /* xacc[0] ^= (xacc[0] >> 47) */ + __m512i const acc_vec = *xacc; + __m512i const shifted = _mm512_srli_epi64(acc_vec, 47); + /* xacc[0] ^= secret; */ + __m512i const key_vec = _mm512_loadu_si512(secret); + __m512i const data_key = + _mm512_ternarylogic_epi32(key_vec, acc_vec, shifted, 0x96 /* key_vec ^ acc_vec ^ shifted */); + + /* xacc[0] *= XXH_PRIME32_1; */ + __m512i const data_key_hi = _mm512_srli_epi64(data_key, 32); + __m512i const prod_lo = _mm512_mul_epu32(data_key, prime32); + __m512i const prod_hi = _mm512_mul_epu32(data_key_hi, prime32); + *xacc = _mm512_add_epi64(prod_lo, _mm512_slli_epi64(prod_hi, 32)); + } +} + +XXH_FORCE_INLINE XXH_TARGET_AVX512 void XXH3_initCustomSecret_avx512(void *XXH_RESTRICT customSecret, xxh_u64 seed64) { + XXH_STATIC_ASSERT((XXH_SECRET_DEFAULT_SIZE & 63) == 0); + XXH_STATIC_ASSERT(XXH_SEC_ALIGN == 64); + XXH_ASSERT(((size_t)customSecret & 63) == 0); + (void)(&XXH_writeLE64); + { + int const nbRounds = XXH_SECRET_DEFAULT_SIZE / sizeof(__m512i); + __m512i const seed_pos = _mm512_set1_epi64((xxh_i64)seed64); + __m512i const seed = _mm512_mask_sub_epi64(seed_pos, 0xAA, _mm512_set1_epi8(0), seed_pos); + + const __m512i *const src = (const __m512i *)((const void *)XXH3_kSecret); + __m512i *const dest = (__m512i *)customSecret; + int i; + XXH_ASSERT(((size_t)src & 63) == 0); /* control alignment */ + XXH_ASSERT(((size_t)dest & 63) == 0); + for (i = 0; i < nbRounds; ++i) { + dest[i] = _mm512_add_epi64(_mm512_load_si512(src + i), seed); } -} - -XXH_FORCE_INLINE XXH_TARGET_AVX512 void -XXH3_initCustomSecret_avx512(void* XXH_RESTRICT customSecret, xxh_u64 seed64) -{ - XXH_STATIC_ASSERT((XXH_SECRET_DEFAULT_SIZE & 63) == 0); - XXH_STATIC_ASSERT(XXH_SEC_ALIGN == 64); - XXH_ASSERT(((size_t)customSecret & 63) == 0); - (void)(&XXH_writeLE64); - { int const nbRounds = XXH_SECRET_DEFAULT_SIZE / sizeof(__m512i); - __m512i const seed_pos = _mm512_set1_epi64((xxh_i64)seed64); - __m512i const seed = _mm512_mask_sub_epi64(seed_pos, 0xAA, _mm512_set1_epi8(0), seed_pos); - - const __m512i* const src = (const __m512i*) ((const void*) XXH3_kSecret); - __m512i* const dest = ( __m512i*) customSecret; - int i; - XXH_ASSERT(((size_t)src & 63) == 0); /* control alignment */ - XXH_ASSERT(((size_t)dest & 63) == 0); - for (i=0; i < nbRounds; ++i) { - dest[i] = _mm512_add_epi64(_mm512_load_si512(src + i), seed); - } } + } } #endif -#if (XXH_VECTOR == XXH_AVX2) \ - || (defined(XXH_DISPATCH_AVX2) && XXH_DISPATCH_AVX2 != 0) +#if (XXH_VECTOR == XXH_AVX2) || (defined(XXH_DISPATCH_AVX2) && XXH_DISPATCH_AVX2 != 0) #ifndef XXH_TARGET_AVX2 -# define XXH_TARGET_AVX2 /* disable attribute target */ +#define XXH_TARGET_AVX2 /* disable attribute target */ #endif XXH_FORCE_INLINE XXH_TARGET_AVX2 void -XXH3_accumulate_512_avx2( void* XXH_RESTRICT acc, - const void* XXH_RESTRICT input, - const void* XXH_RESTRICT secret) -{ - XXH_ASSERT((((size_t)acc) & 31) == 0); - { __m256i* const xacc = (__m256i *) acc; - /* Unaligned. This is mainly for pointer arithmetic, and because - * _mm256_loadu_si256 requires a const __m256i * pointer for some reason. */ - const __m256i* const xinput = (const __m256i *) input; - /* Unaligned. This is mainly for pointer arithmetic, and because - * _mm256_loadu_si256 requires a const __m256i * pointer for some reason. */ - const __m256i* const xsecret = (const __m256i *) secret; - - size_t i; - for (i=0; i < XXH_STRIPE_LEN/sizeof(__m256i); i++) { - /* data_vec = xinput[i]; */ - __m256i const data_vec = _mm256_loadu_si256 (xinput+i); - /* key_vec = xsecret[i]; */ - __m256i const key_vec = _mm256_loadu_si256 (xsecret+i); - /* data_key = data_vec ^ key_vec; */ - __m256i const data_key = _mm256_xor_si256 (data_vec, key_vec); - /* data_key_lo = data_key >> 32; */ - __m256i const data_key_lo = _mm256_srli_epi64 (data_key, 32); - /* product = (data_key & 0xffffffff) * (data_key_lo & 0xffffffff); */ - __m256i const product = _mm256_mul_epu32 (data_key, data_key_lo); - /* xacc[i] += swap(data_vec); */ - __m256i const data_swap = _mm256_shuffle_epi32(data_vec, _MM_SHUFFLE(1, 0, 3, 2)); - __m256i const sum = _mm256_add_epi64(xacc[i], data_swap); - /* xacc[i] += product; */ - xacc[i] = _mm256_add_epi64(product, sum); - } } +XXH3_accumulate_512_avx2(void *XXH_RESTRICT acc, const void *XXH_RESTRICT input, const void *XXH_RESTRICT secret) { + XXH_ASSERT((((size_t)acc) & 31) == 0); + { + __m256i *const xacc = (__m256i *)acc; + /* Unaligned. This is mainly for pointer arithmetic, and because + * _mm256_loadu_si256 requires a const __m256i * pointer for some reason. */ + const __m256i *const xinput = (const __m256i *)input; + /* Unaligned. This is mainly for pointer arithmetic, and because + * _mm256_loadu_si256 requires a const __m256i * pointer for some reason. */ + const __m256i *const xsecret = (const __m256i *)secret; + + size_t i; + for (i = 0; i < XXH_STRIPE_LEN / sizeof(__m256i); i++) { + /* data_vec = xinput[i]; */ + __m256i const data_vec = _mm256_loadu_si256(xinput + i); + /* key_vec = xsecret[i]; */ + __m256i const key_vec = _mm256_loadu_si256(xsecret + i); + /* data_key = data_vec ^ key_vec; */ + __m256i const data_key = _mm256_xor_si256(data_vec, key_vec); + /* data_key_lo = data_key >> 32; */ + __m256i const data_key_lo = _mm256_srli_epi64(data_key, 32); + /* product = (data_key & 0xffffffff) * (data_key_lo & 0xffffffff); */ + __m256i const product = _mm256_mul_epu32(data_key, data_key_lo); + /* xacc[i] += swap(data_vec); */ + __m256i const data_swap = _mm256_shuffle_epi32(data_vec, _MM_SHUFFLE(1, 0, 3, 2)); + __m256i const sum = _mm256_add_epi64(xacc[i], data_swap); + /* xacc[i] += product; */ + xacc[i] = _mm256_add_epi64(product, sum); + } + } } XXH_FORCE_INLINE XXH_TARGET_AVX2 XXH3_ACCUMULATE_TEMPLATE(avx2) -XXH_FORCE_INLINE XXH_TARGET_AVX2 void -XXH3_scrambleAcc_avx2(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret) -{ - XXH_ASSERT((((size_t)acc) & 31) == 0); - { __m256i* const xacc = (__m256i*) acc; - /* Unaligned. This is mainly for pointer arithmetic, and because - * _mm256_loadu_si256 requires a const __m256i * pointer for some reason. */ - const __m256i* const xsecret = (const __m256i *) secret; - const __m256i prime32 = _mm256_set1_epi32((int)XXH_PRIME32_1); - - size_t i; - for (i=0; i < XXH_STRIPE_LEN/sizeof(__m256i); i++) { - /* xacc[i] ^= (xacc[i] >> 47) */ - __m256i const acc_vec = xacc[i]; - __m256i const shifted = _mm256_srli_epi64 (acc_vec, 47); - __m256i const data_vec = _mm256_xor_si256 (acc_vec, shifted); - /* xacc[i] ^= xsecret; */ - __m256i const key_vec = _mm256_loadu_si256 (xsecret+i); - __m256i const data_key = _mm256_xor_si256 (data_vec, key_vec); - - /* xacc[i] *= XXH_PRIME32_1; */ - __m256i const data_key_hi = _mm256_srli_epi64 (data_key, 32); - __m256i const prod_lo = _mm256_mul_epu32 (data_key, prime32); - __m256i const prod_hi = _mm256_mul_epu32 (data_key_hi, prime32); - xacc[i] = _mm256_add_epi64(prod_lo, _mm256_slli_epi64(prod_hi, 32)); - } + XXH_FORCE_INLINE XXH_TARGET_AVX2 void XXH3_scrambleAcc_avx2(void *XXH_RESTRICT acc, const void *XXH_RESTRICT secret) { + XXH_ASSERT((((size_t)acc) & 31) == 0); + { + __m256i *const xacc = (__m256i *)acc; + /* Unaligned. This is mainly for pointer arithmetic, and because + * _mm256_loadu_si256 requires a const __m256i * pointer for some reason. */ + const __m256i *const xsecret = (const __m256i *)secret; + const __m256i prime32 = _mm256_set1_epi32((int)XXH_PRIME32_1); + + size_t i; + for (i = 0; i < XXH_STRIPE_LEN / sizeof(__m256i); i++) { + /* xacc[i] ^= (xacc[i] >> 47) */ + __m256i const acc_vec = xacc[i]; + __m256i const shifted = _mm256_srli_epi64(acc_vec, 47); + __m256i const data_vec = _mm256_xor_si256(acc_vec, shifted); + /* xacc[i] ^= xsecret; */ + __m256i const key_vec = _mm256_loadu_si256(xsecret + i); + __m256i const data_key = _mm256_xor_si256(data_vec, key_vec); + + /* xacc[i] *= XXH_PRIME32_1; */ + __m256i const data_key_hi = _mm256_srli_epi64(data_key, 32); + __m256i const prod_lo = _mm256_mul_epu32(data_key, prime32); + __m256i const prod_hi = _mm256_mul_epu32(data_key_hi, prime32); + xacc[i] = _mm256_add_epi64(prod_lo, _mm256_slli_epi64(prod_hi, 32)); } + } } -XXH_FORCE_INLINE XXH_TARGET_AVX2 void XXH3_initCustomSecret_avx2(void* XXH_RESTRICT customSecret, xxh_u64 seed64) -{ - XXH_STATIC_ASSERT((XXH_SECRET_DEFAULT_SIZE & 31) == 0); - XXH_STATIC_ASSERT((XXH_SECRET_DEFAULT_SIZE / sizeof(__m256i)) == 6); - XXH_STATIC_ASSERT(XXH_SEC_ALIGN <= 64); - (void)(&XXH_writeLE64); - XXH_PREFETCH(customSecret); - { __m256i const seed = _mm256_set_epi64x((xxh_i64)(0U - seed64), (xxh_i64)seed64, (xxh_i64)(0U - seed64), (xxh_i64)seed64); - - const __m256i* const src = (const __m256i*) ((const void*) XXH3_kSecret); - __m256i* dest = ( __m256i*) customSecret; - -# if defined(__GNUC__) || defined(__clang__) - /* - * On GCC & Clang, marking 'dest' as modified will cause the compiler: - * - do not extract the secret from sse registers in the internal loop - * - use less common registers, and avoid pushing these reg into stack - */ - XXH_COMPILER_GUARD(dest); -# endif - XXH_ASSERT(((size_t)src & 31) == 0); /* control alignment */ - XXH_ASSERT(((size_t)dest & 31) == 0); - - /* GCC -O2 need unroll loop manually */ - dest[0] = _mm256_add_epi64(_mm256_load_si256(src+0), seed); - dest[1] = _mm256_add_epi64(_mm256_load_si256(src+1), seed); - dest[2] = _mm256_add_epi64(_mm256_load_si256(src+2), seed); - dest[3] = _mm256_add_epi64(_mm256_load_si256(src+3), seed); - dest[4] = _mm256_add_epi64(_mm256_load_si256(src+4), seed); - dest[5] = _mm256_add_epi64(_mm256_load_si256(src+5), seed); - } +XXH_FORCE_INLINE XXH_TARGET_AVX2 void XXH3_initCustomSecret_avx2(void *XXH_RESTRICT customSecret, xxh_u64 seed64) { + XXH_STATIC_ASSERT((XXH_SECRET_DEFAULT_SIZE & 31) == 0); + XXH_STATIC_ASSERT((XXH_SECRET_DEFAULT_SIZE / sizeof(__m256i)) == 6); + XXH_STATIC_ASSERT(XXH_SEC_ALIGN <= 64); + (void)(&XXH_writeLE64); + XXH_PREFETCH(customSecret); + { + __m256i const seed = + _mm256_set_epi64x((xxh_i64)(0U - seed64), (xxh_i64)seed64, (xxh_i64)(0U - seed64), (xxh_i64)seed64); + + const __m256i *const src = (const __m256i *)((const void *)XXH3_kSecret); + __m256i *dest = (__m256i *)customSecret; + +#if defined(__GNUC__) || defined(__clang__) + /* + * On GCC & Clang, marking 'dest' as modified will cause the compiler: + * - do not extract the secret from sse registers in the internal loop + * - use less common registers, and avoid pushing these reg into stack + */ + XXH_COMPILER_GUARD(dest); +#endif + XXH_ASSERT(((size_t)src & 31) == 0); /* control alignment */ + XXH_ASSERT(((size_t)dest & 31) == 0); + + /* GCC -O2 need unroll loop manually */ + dest[0] = _mm256_add_epi64(_mm256_load_si256(src + 0), seed); + dest[1] = _mm256_add_epi64(_mm256_load_si256(src + 1), seed); + dest[2] = _mm256_add_epi64(_mm256_load_si256(src + 2), seed); + dest[3] = _mm256_add_epi64(_mm256_load_si256(src + 3), seed); + dest[4] = _mm256_add_epi64(_mm256_load_si256(src + 4), seed); + dest[5] = _mm256_add_epi64(_mm256_load_si256(src + 5), seed); + } } #endif @@ -5167,105 +5068,104 @@ XXH_FORCE_INLINE XXH_TARGET_AVX2 void XXH3_initCustomSecret_avx2(void* XXH_RESTR #if (XXH_VECTOR == XXH_SSE2) || defined(XXH_X86DISPATCH) #ifndef XXH_TARGET_SSE2 -# define XXH_TARGET_SSE2 /* disable attribute target */ +#define XXH_TARGET_SSE2 /* disable attribute target */ #endif XXH_FORCE_INLINE XXH_TARGET_SSE2 void -XXH3_accumulate_512_sse2( void* XXH_RESTRICT acc, - const void* XXH_RESTRICT input, - const void* XXH_RESTRICT secret) -{ - /* SSE2 is just a half-scale version of the AVX2 version. */ - XXH_ASSERT((((size_t)acc) & 15) == 0); - { __m128i* const xacc = (__m128i *) acc; - /* Unaligned. This is mainly for pointer arithmetic, and because - * _mm_loadu_si128 requires a const __m128i * pointer for some reason. */ - const __m128i* const xinput = (const __m128i *) input; - /* Unaligned. This is mainly for pointer arithmetic, and because - * _mm_loadu_si128 requires a const __m128i * pointer for some reason. */ - const __m128i* const xsecret = (const __m128i *) secret; - - size_t i; - for (i=0; i < XXH_STRIPE_LEN/sizeof(__m128i); i++) { - /* data_vec = xinput[i]; */ - __m128i const data_vec = _mm_loadu_si128 (xinput+i); - /* key_vec = xsecret[i]; */ - __m128i const key_vec = _mm_loadu_si128 (xsecret+i); - /* data_key = data_vec ^ key_vec; */ - __m128i const data_key = _mm_xor_si128 (data_vec, key_vec); - /* data_key_lo = data_key >> 32; */ - __m128i const data_key_lo = _mm_shuffle_epi32 (data_key, _MM_SHUFFLE(0, 3, 0, 1)); - /* product = (data_key & 0xffffffff) * (data_key_lo & 0xffffffff); */ - __m128i const product = _mm_mul_epu32 (data_key, data_key_lo); - /* xacc[i] += swap(data_vec); */ - __m128i const data_swap = _mm_shuffle_epi32(data_vec, _MM_SHUFFLE(1,0,3,2)); - __m128i const sum = _mm_add_epi64(xacc[i], data_swap); - /* xacc[i] += product; */ - xacc[i] = _mm_add_epi64(product, sum); - } } +XXH3_accumulate_512_sse2(void *XXH_RESTRICT acc, const void *XXH_RESTRICT input, const void *XXH_RESTRICT secret) { + /* SSE2 is just a half-scale version of the AVX2 version. */ + XXH_ASSERT((((size_t)acc) & 15) == 0); + { + __m128i *const xacc = (__m128i *)acc; + /* Unaligned. This is mainly for pointer arithmetic, and because + * _mm_loadu_si128 requires a const __m128i * pointer for some reason. */ + const __m128i *const xinput = (const __m128i *)input; + /* Unaligned. This is mainly for pointer arithmetic, and because + * _mm_loadu_si128 requires a const __m128i * pointer for some reason. */ + const __m128i *const xsecret = (const __m128i *)secret; + + size_t i; + for (i = 0; i < XXH_STRIPE_LEN / sizeof(__m128i); i++) { + /* data_vec = xinput[i]; */ + __m128i const data_vec = _mm_loadu_si128(xinput + i); + /* key_vec = xsecret[i]; */ + __m128i const key_vec = _mm_loadu_si128(xsecret + i); + /* data_key = data_vec ^ key_vec; */ + __m128i const data_key = _mm_xor_si128(data_vec, key_vec); + /* data_key_lo = data_key >> 32; */ + __m128i const data_key_lo = _mm_shuffle_epi32(data_key, _MM_SHUFFLE(0, 3, 0, 1)); + /* product = (data_key & 0xffffffff) * (data_key_lo & 0xffffffff); */ + __m128i const product = _mm_mul_epu32(data_key, data_key_lo); + /* xacc[i] += swap(data_vec); */ + __m128i const data_swap = _mm_shuffle_epi32(data_vec, _MM_SHUFFLE(1, 0, 3, 2)); + __m128i const sum = _mm_add_epi64(xacc[i], data_swap); + /* xacc[i] += product; */ + xacc[i] = _mm_add_epi64(product, sum); + } + } } XXH_FORCE_INLINE XXH_TARGET_SSE2 XXH3_ACCUMULATE_TEMPLATE(sse2) -XXH_FORCE_INLINE XXH_TARGET_SSE2 void -XXH3_scrambleAcc_sse2(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret) -{ - XXH_ASSERT((((size_t)acc) & 15) == 0); - { __m128i* const xacc = (__m128i*) acc; - /* Unaligned. This is mainly for pointer arithmetic, and because - * _mm_loadu_si128 requires a const __m128i * pointer for some reason. */ - const __m128i* const xsecret = (const __m128i *) secret; - const __m128i prime32 = _mm_set1_epi32((int)XXH_PRIME32_1); - - size_t i; - for (i=0; i < XXH_STRIPE_LEN/sizeof(__m128i); i++) { - /* xacc[i] ^= (xacc[i] >> 47) */ - __m128i const acc_vec = xacc[i]; - __m128i const shifted = _mm_srli_epi64 (acc_vec, 47); - __m128i const data_vec = _mm_xor_si128 (acc_vec, shifted); - /* xacc[i] ^= xsecret[i]; */ - __m128i const key_vec = _mm_loadu_si128 (xsecret+i); - __m128i const data_key = _mm_xor_si128 (data_vec, key_vec); - - /* xacc[i] *= XXH_PRIME32_1; */ - __m128i const data_key_hi = _mm_shuffle_epi32 (data_key, _MM_SHUFFLE(0, 3, 0, 1)); - __m128i const prod_lo = _mm_mul_epu32 (data_key, prime32); - __m128i const prod_hi = _mm_mul_epu32 (data_key_hi, prime32); - xacc[i] = _mm_add_epi64(prod_lo, _mm_slli_epi64(prod_hi, 32)); - } + XXH_FORCE_INLINE XXH_TARGET_SSE2 void XXH3_scrambleAcc_sse2(void *XXH_RESTRICT acc, const void *XXH_RESTRICT secret) { + XXH_ASSERT((((size_t)acc) & 15) == 0); + { + __m128i *const xacc = (__m128i *)acc; + /* Unaligned. This is mainly for pointer arithmetic, and because + * _mm_loadu_si128 requires a const __m128i * pointer for some reason. */ + const __m128i *const xsecret = (const __m128i *)secret; + const __m128i prime32 = _mm_set1_epi32((int)XXH_PRIME32_1); + + size_t i; + for (i = 0; i < XXH_STRIPE_LEN / sizeof(__m128i); i++) { + /* xacc[i] ^= (xacc[i] >> 47) */ + __m128i const acc_vec = xacc[i]; + __m128i const shifted = _mm_srli_epi64(acc_vec, 47); + __m128i const data_vec = _mm_xor_si128(acc_vec, shifted); + /* xacc[i] ^= xsecret[i]; */ + __m128i const key_vec = _mm_loadu_si128(xsecret + i); + __m128i const data_key = _mm_xor_si128(data_vec, key_vec); + + /* xacc[i] *= XXH_PRIME32_1; */ + __m128i const data_key_hi = _mm_shuffle_epi32(data_key, _MM_SHUFFLE(0, 3, 0, 1)); + __m128i const prod_lo = _mm_mul_epu32(data_key, prime32); + __m128i const prod_hi = _mm_mul_epu32(data_key_hi, prime32); + xacc[i] = _mm_add_epi64(prod_lo, _mm_slli_epi64(prod_hi, 32)); } + } } -XXH_FORCE_INLINE XXH_TARGET_SSE2 void XXH3_initCustomSecret_sse2(void* XXH_RESTRICT customSecret, xxh_u64 seed64) -{ - XXH_STATIC_ASSERT((XXH_SECRET_DEFAULT_SIZE & 15) == 0); - (void)(&XXH_writeLE64); - { int const nbRounds = XXH_SECRET_DEFAULT_SIZE / sizeof(__m128i); - -# if defined(_MSC_VER) && defined(_M_IX86) && _MSC_VER < 1900 - /* MSVC 32bit mode does not support _mm_set_epi64x before 2015 */ - XXH_ALIGN(16) const xxh_i64 seed64x2[2] = { (xxh_i64)seed64, (xxh_i64)(0U - seed64) }; - __m128i const seed = _mm_load_si128((__m128i const*)seed64x2); -# else - __m128i const seed = _mm_set_epi64x((xxh_i64)(0U - seed64), (xxh_i64)seed64); -# endif - int i; - - const void* const src16 = XXH3_kSecret; - __m128i* dst16 = (__m128i*) customSecret; -# if defined(__GNUC__) || defined(__clang__) - /* - * On GCC & Clang, marking 'dest' as modified will cause the compiler: - * - do not extract the secret from sse registers in the internal loop - * - use less common registers, and avoid pushing these reg into stack - */ - XXH_COMPILER_GUARD(dst16); -# endif - XXH_ASSERT(((size_t)src16 & 15) == 0); /* control alignment */ - XXH_ASSERT(((size_t)dst16 & 15) == 0); - - for (i=0; i < nbRounds; ++i) { - dst16[i] = _mm_add_epi64(_mm_load_si128((const __m128i *)src16+i), seed); - } } +XXH_FORCE_INLINE XXH_TARGET_SSE2 void XXH3_initCustomSecret_sse2(void *XXH_RESTRICT customSecret, xxh_u64 seed64) { + XXH_STATIC_ASSERT((XXH_SECRET_DEFAULT_SIZE & 15) == 0); + (void)(&XXH_writeLE64); + { + int const nbRounds = XXH_SECRET_DEFAULT_SIZE / sizeof(__m128i); + +#if defined(_MSC_VER) && defined(_M_IX86) && _MSC_VER < 1900 + /* MSVC 32bit mode does not support _mm_set_epi64x before 2015 */ + XXH_ALIGN(16) const xxh_i64 seed64x2[2] = {(xxh_i64)seed64, (xxh_i64)(0U - seed64)}; + __m128i const seed = _mm_load_si128((__m128i const *)seed64x2); +#else + __m128i const seed = _mm_set_epi64x((xxh_i64)(0U - seed64), (xxh_i64)seed64); +#endif + int i; + + const void *const src16 = XXH3_kSecret; + __m128i *dst16 = (__m128i *)customSecret; +#if defined(__GNUC__) || defined(__clang__) + /* + * On GCC & Clang, marking 'dest' as modified will cause the compiler: + * - do not extract the secret from sse registers in the internal loop + * - use less common registers, and avoid pushing these reg into stack + */ + XXH_COMPILER_GUARD(dst16); +#endif + XXH_ASSERT(((size_t)src16 & 15) == 0); /* control alignment */ + XXH_ASSERT(((size_t)dst16 & 15) == 0); + + for (i = 0; i < nbRounds; ++i) { + dst16[i] = _mm_add_epi64(_mm_load_si128((const __m128i *)src16 + i), seed); + } + } } #endif @@ -5274,12 +5174,9 @@ XXH_FORCE_INLINE XXH_TARGET_SSE2 void XXH3_initCustomSecret_sse2(void* XXH_RESTR /* forward declarations for the scalar routines */ XXH_FORCE_INLINE void -XXH3_scalarRound(void* XXH_RESTRICT acc, void const* XXH_RESTRICT input, - void const* XXH_RESTRICT secret, size_t lane); +XXH3_scalarRound(void *XXH_RESTRICT acc, void const *XXH_RESTRICT input, void const *XXH_RESTRICT secret, size_t lane); -XXH_FORCE_INLINE void -XXH3_scalarScrambleRound(void* XXH_RESTRICT acc, - void const* XXH_RESTRICT secret, size_t lane); +XXH_FORCE_INLINE void XXH3_scalarScrambleRound(void *XXH_RESTRICT acc, void const *XXH_RESTRICT secret, size_t lane); /*! * @internal @@ -5306,256 +5203,246 @@ XXH3_scalarScrambleRound(void* XXH_RESTRICT acc, */ XXH_FORCE_INLINE void -XXH3_accumulate_512_neon( void* XXH_RESTRICT acc, - const void* XXH_RESTRICT input, - const void* XXH_RESTRICT secret) -{ - XXH_ASSERT((((size_t)acc) & 15) == 0); - XXH_STATIC_ASSERT(XXH3_NEON_LANES > 0 && XXH3_NEON_LANES <= XXH_ACC_NB && XXH3_NEON_LANES % 2 == 0); - { /* GCC for darwin arm64 does not like aliasing here */ - xxh_aliasing_uint64x2_t* const xacc = (xxh_aliasing_uint64x2_t*) acc; - /* We don't use a uint32x4_t pointer because it causes bus errors on ARMv7. */ - uint8_t const* xinput = (const uint8_t *) input; - uint8_t const* xsecret = (const uint8_t *) secret; - - size_t i; +XXH3_accumulate_512_neon(void *XXH_RESTRICT acc, const void *XXH_RESTRICT input, const void *XXH_RESTRICT secret) { + XXH_ASSERT((((size_t)acc) & 15) == 0); + XXH_STATIC_ASSERT(XXH3_NEON_LANES > 0 && XXH3_NEON_LANES <= XXH_ACC_NB && XXH3_NEON_LANES % 2 == 0); + { /* GCC for darwin arm64 does not like aliasing here */ + xxh_aliasing_uint64x2_t *const xacc = (xxh_aliasing_uint64x2_t *)acc; + /* We don't use a uint32x4_t pointer because it causes bus errors on ARMv7. */ + uint8_t const *xinput = (const uint8_t *)input; + uint8_t const *xsecret = (const uint8_t *)secret; + + size_t i; #ifdef __wasm_simd128__ - /* - * On WASM SIMD128, Clang emits direct address loads when XXH3_kSecret - * is constant propagated, which results in it converting it to this - * inside the loop: - * - * a = v128.load(XXH3_kSecret + 0 + $secret_offset, offset = 0) - * b = v128.load(XXH3_kSecret + 16 + $secret_offset, offset = 0) - * ... - * - * This requires a full 32-bit address immediate (and therefore a 6 byte - * instruction) as well as an add for each offset. - * - * Putting an asm guard prevents it from folding (at the cost of losing - * the alignment hint), and uses the free offset in `v128.load` instead - * of adding secret_offset each time which overall reduces code size by - * about a kilobyte and improves performance. - */ - XXH_COMPILER_GUARD(xsecret); -#endif - /* Scalar lanes use the normal scalarRound routine */ - for (i = XXH3_NEON_LANES; i < XXH_ACC_NB; i++) { - XXH3_scalarRound(acc, input, secret, i); - } - i = 0; - /* 4 NEON lanes at a time. */ - for (; i+1 < XXH3_NEON_LANES / 2; i+=2) { - /* data_vec = xinput[i]; */ - uint64x2_t data_vec_1 = XXH_vld1q_u64(xinput + (i * 16)); - uint64x2_t data_vec_2 = XXH_vld1q_u64(xinput + ((i+1) * 16)); - /* key_vec = xsecret[i]; */ - uint64x2_t key_vec_1 = XXH_vld1q_u64(xsecret + (i * 16)); - uint64x2_t key_vec_2 = XXH_vld1q_u64(xsecret + ((i+1) * 16)); - /* data_swap = swap(data_vec) */ - uint64x2_t data_swap_1 = vextq_u64(data_vec_1, data_vec_1, 1); - uint64x2_t data_swap_2 = vextq_u64(data_vec_2, data_vec_2, 1); - /* data_key = data_vec ^ key_vec; */ - uint64x2_t data_key_1 = veorq_u64(data_vec_1, key_vec_1); - uint64x2_t data_key_2 = veorq_u64(data_vec_2, key_vec_2); - - /* - * If we reinterpret the 64x2 vectors as 32x4 vectors, we can use a - * de-interleave operation for 4 lanes in 1 step with `vuzpq_u32` to - * get one vector with the low 32 bits of each lane, and one vector - * with the high 32 bits of each lane. - * - * The intrinsic returns a double vector because the original ARMv7-a - * instruction modified both arguments in place. AArch64 and SIMD128 emit - * two instructions from this intrinsic. - * - * [ dk11L | dk11H | dk12L | dk12H ] -> [ dk11L | dk12L | dk21L | dk22L ] - * [ dk21L | dk21H | dk22L | dk22H ] -> [ dk11H | dk12H | dk21H | dk22H ] - */ - uint32x4x2_t unzipped = vuzpq_u32( - vreinterpretq_u32_u64(data_key_1), - vreinterpretq_u32_u64(data_key_2) - ); - /* data_key_lo = data_key & 0xFFFFFFFF */ - uint32x4_t data_key_lo = unzipped.val[0]; - /* data_key_hi = data_key >> 32 */ - uint32x4_t data_key_hi = unzipped.val[1]; - /* - * Then, we can split the vectors horizontally and multiply which, as for most - * widening intrinsics, have a variant that works on both high half vectors - * for free on AArch64. A similar instruction is available on SIMD128. - * - * sum = data_swap + (u64x2) data_key_lo * (u64x2) data_key_hi - */ - uint64x2_t sum_1 = XXH_vmlal_low_u32(data_swap_1, data_key_lo, data_key_hi); - uint64x2_t sum_2 = XXH_vmlal_high_u32(data_swap_2, data_key_lo, data_key_hi); - /* - * Clang reorders - * a += b * c; // umlal swap.2d, dkl.2s, dkh.2s - * c += a; // add acc.2d, acc.2d, swap.2d - * to - * c += a; // add acc.2d, acc.2d, swap.2d - * c += b * c; // umlal acc.2d, dkl.2s, dkh.2s - * - * While it would make sense in theory since the addition is faster, - * for reasons likely related to umlal being limited to certain NEON - * pipelines, this is worse. A compiler guard fixes this. - */ - XXH_COMPILER_GUARD_CLANG_NEON(sum_1); - XXH_COMPILER_GUARD_CLANG_NEON(sum_2); - /* xacc[i] = acc_vec + sum; */ - xacc[i] = vaddq_u64(xacc[i], sum_1); - xacc[i+1] = vaddq_u64(xacc[i+1], sum_2); - } - /* Operate on the remaining NEON lanes 2 at a time. */ - for (; i < XXH3_NEON_LANES / 2; i++) { - /* data_vec = xinput[i]; */ - uint64x2_t data_vec = XXH_vld1q_u64(xinput + (i * 16)); - /* key_vec = xsecret[i]; */ - uint64x2_t key_vec = XXH_vld1q_u64(xsecret + (i * 16)); - /* acc_vec_2 = swap(data_vec) */ - uint64x2_t data_swap = vextq_u64(data_vec, data_vec, 1); - /* data_key = data_vec ^ key_vec; */ - uint64x2_t data_key = veorq_u64(data_vec, key_vec); - /* For two lanes, just use VMOVN and VSHRN. */ - /* data_key_lo = data_key & 0xFFFFFFFF; */ - uint32x2_t data_key_lo = vmovn_u64(data_key); - /* data_key_hi = data_key >> 32; */ - uint32x2_t data_key_hi = vshrn_n_u64(data_key, 32); - /* sum = data_swap + (u64x2) data_key_lo * (u64x2) data_key_hi; */ - uint64x2_t sum = vmlal_u32(data_swap, data_key_lo, data_key_hi); - /* Same Clang workaround as before */ - XXH_COMPILER_GUARD_CLANG_NEON(sum); - /* xacc[i] = acc_vec + sum; */ - xacc[i] = vaddq_u64 (xacc[i], sum); - } + /* + * On WASM SIMD128, Clang emits direct address loads when XXH3_kSecret + * is constant propagated, which results in it converting it to this + * inside the loop: + * + * a = v128.load(XXH3_kSecret + 0 + $secret_offset, offset = 0) + * b = v128.load(XXH3_kSecret + 16 + $secret_offset, offset = 0) + * ... + * + * This requires a full 32-bit address immediate (and therefore a 6 byte + * instruction) as well as an add for each offset. + * + * Putting an asm guard prevents it from folding (at the cost of losing + * the alignment hint), and uses the free offset in `v128.load` instead + * of adding secret_offset each time which overall reduces code size by + * about a kilobyte and improves performance. + */ + XXH_COMPILER_GUARD(xsecret); +#endif + /* Scalar lanes use the normal scalarRound routine */ + for (i = XXH3_NEON_LANES; i < XXH_ACC_NB; i++) { + XXH3_scalarRound(acc, input, secret, i); + } + i = 0; + /* 4 NEON lanes at a time. */ + for (; i + 1 < XXH3_NEON_LANES / 2; i += 2) { + /* data_vec = xinput[i]; */ + uint64x2_t data_vec_1 = XXH_vld1q_u64(xinput + (i * 16)); + uint64x2_t data_vec_2 = XXH_vld1q_u64(xinput + ((i + 1) * 16)); + /* key_vec = xsecret[i]; */ + uint64x2_t key_vec_1 = XXH_vld1q_u64(xsecret + (i * 16)); + uint64x2_t key_vec_2 = XXH_vld1q_u64(xsecret + ((i + 1) * 16)); + /* data_swap = swap(data_vec) */ + uint64x2_t data_swap_1 = vextq_u64(data_vec_1, data_vec_1, 1); + uint64x2_t data_swap_2 = vextq_u64(data_vec_2, data_vec_2, 1); + /* data_key = data_vec ^ key_vec; */ + uint64x2_t data_key_1 = veorq_u64(data_vec_1, key_vec_1); + uint64x2_t data_key_2 = veorq_u64(data_vec_2, key_vec_2); + + /* + * If we reinterpret the 64x2 vectors as 32x4 vectors, we can use a + * de-interleave operation for 4 lanes in 1 step with `vuzpq_u32` to + * get one vector with the low 32 bits of each lane, and one vector + * with the high 32 bits of each lane. + * + * The intrinsic returns a double vector because the original ARMv7-a + * instruction modified both arguments in place. AArch64 and SIMD128 emit + * two instructions from this intrinsic. + * + * [ dk11L | dk11H | dk12L | dk12H ] -> [ dk11L | dk12L | dk21L | dk22L ] + * [ dk21L | dk21H | dk22L | dk22H ] -> [ dk11H | dk12H | dk21H | dk22H ] + */ + uint32x4x2_t unzipped = vuzpq_u32(vreinterpretq_u32_u64(data_key_1), vreinterpretq_u32_u64(data_key_2)); + /* data_key_lo = data_key & 0xFFFFFFFF */ + uint32x4_t data_key_lo = unzipped.val[0]; + /* data_key_hi = data_key >> 32 */ + uint32x4_t data_key_hi = unzipped.val[1]; + /* + * Then, we can split the vectors horizontally and multiply which, as for most + * widening intrinsics, have a variant that works on both high half vectors + * for free on AArch64. A similar instruction is available on SIMD128. + * + * sum = data_swap + (u64x2) data_key_lo * (u64x2) data_key_hi + */ + uint64x2_t sum_1 = XXH_vmlal_low_u32(data_swap_1, data_key_lo, data_key_hi); + uint64x2_t sum_2 = XXH_vmlal_high_u32(data_swap_2, data_key_lo, data_key_hi); + /* + * Clang reorders + * a += b * c; // umlal swap.2d, dkl.2s, dkh.2s + * c += a; // add acc.2d, acc.2d, swap.2d + * to + * c += a; // add acc.2d, acc.2d, swap.2d + * c += b * c; // umlal acc.2d, dkl.2s, dkh.2s + * + * While it would make sense in theory since the addition is faster, + * for reasons likely related to umlal being limited to certain NEON + * pipelines, this is worse. A compiler guard fixes this. + */ + XXH_COMPILER_GUARD_CLANG_NEON(sum_1); + XXH_COMPILER_GUARD_CLANG_NEON(sum_2); + /* xacc[i] = acc_vec + sum; */ + xacc[i] = vaddq_u64(xacc[i], sum_1); + xacc[i + 1] = vaddq_u64(xacc[i + 1], sum_2); } + /* Operate on the remaining NEON lanes 2 at a time. */ + for (; i < XXH3_NEON_LANES / 2; i++) { + /* data_vec = xinput[i]; */ + uint64x2_t data_vec = XXH_vld1q_u64(xinput + (i * 16)); + /* key_vec = xsecret[i]; */ + uint64x2_t key_vec = XXH_vld1q_u64(xsecret + (i * 16)); + /* acc_vec_2 = swap(data_vec) */ + uint64x2_t data_swap = vextq_u64(data_vec, data_vec, 1); + /* data_key = data_vec ^ key_vec; */ + uint64x2_t data_key = veorq_u64(data_vec, key_vec); + /* For two lanes, just use VMOVN and VSHRN. */ + /* data_key_lo = data_key & 0xFFFFFFFF; */ + uint32x2_t data_key_lo = vmovn_u64(data_key); + /* data_key_hi = data_key >> 32; */ + uint32x2_t data_key_hi = vshrn_n_u64(data_key, 32); + /* sum = data_swap + (u64x2) data_key_lo * (u64x2) data_key_hi; */ + uint64x2_t sum = vmlal_u32(data_swap, data_key_lo, data_key_hi); + /* Same Clang workaround as before */ + XXH_COMPILER_GUARD_CLANG_NEON(sum); + /* xacc[i] = acc_vec + sum; */ + xacc[i] = vaddq_u64(xacc[i], sum); + } + } } XXH_FORCE_INLINE XXH3_ACCUMULATE_TEMPLATE(neon) -XXH_FORCE_INLINE void -XXH3_scrambleAcc_neon(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret) -{ - XXH_ASSERT((((size_t)acc) & 15) == 0); + XXH_FORCE_INLINE void XXH3_scrambleAcc_neon(void *XXH_RESTRICT acc, const void *XXH_RESTRICT secret) { + XXH_ASSERT((((size_t)acc) & 15) == 0); - { xxh_aliasing_uint64x2_t* xacc = (xxh_aliasing_uint64x2_t*) acc; - uint8_t const* xsecret = (uint8_t const*) secret; + { + xxh_aliasing_uint64x2_t *xacc = (xxh_aliasing_uint64x2_t *)acc; + uint8_t const *xsecret = (uint8_t const *)secret; - size_t i; - /* WASM uses operator overloads and doesn't need these. */ + size_t i; + /* WASM uses operator overloads and doesn't need these. */ #ifndef __wasm_simd128__ - /* { prime32_1, prime32_1 } */ - uint32x2_t const kPrimeLo = vdup_n_u32(XXH_PRIME32_1); - /* { 0, prime32_1, 0, prime32_1 } */ - uint32x4_t const kPrimeHi = vreinterpretq_u32_u64(vdupq_n_u64((xxh_u64)XXH_PRIME32_1 << 32)); + /* { prime32_1, prime32_1 } */ + uint32x2_t const kPrimeLo = vdup_n_u32(XXH_PRIME32_1); + /* { 0, prime32_1, 0, prime32_1 } */ + uint32x4_t const kPrimeHi = vreinterpretq_u32_u64(vdupq_n_u64((xxh_u64)XXH_PRIME32_1 << 32)); #endif - /* AArch64 uses both scalar and neon at the same time */ - for (i = XXH3_NEON_LANES; i < XXH_ACC_NB; i++) { - XXH3_scalarScrambleRound(acc, secret, i); - } - for (i=0; i < XXH3_NEON_LANES / 2; i++) { - /* xacc[i] ^= (xacc[i] >> 47); */ - uint64x2_t acc_vec = xacc[i]; - uint64x2_t shifted = vshrq_n_u64(acc_vec, 47); - uint64x2_t data_vec = veorq_u64(acc_vec, shifted); - - /* xacc[i] ^= xsecret[i]; */ - uint64x2_t key_vec = XXH_vld1q_u64(xsecret + (i * 16)); - uint64x2_t data_key = veorq_u64(data_vec, key_vec); - /* xacc[i] *= XXH_PRIME32_1 */ + /* AArch64 uses both scalar and neon at the same time */ + for (i = XXH3_NEON_LANES; i < XXH_ACC_NB; i++) { + XXH3_scalarScrambleRound(acc, secret, i); + } + for (i = 0; i < XXH3_NEON_LANES / 2; i++) { + /* xacc[i] ^= (xacc[i] >> 47); */ + uint64x2_t acc_vec = xacc[i]; + uint64x2_t shifted = vshrq_n_u64(acc_vec, 47); + uint64x2_t data_vec = veorq_u64(acc_vec, shifted); + + /* xacc[i] ^= xsecret[i]; */ + uint64x2_t key_vec = XXH_vld1q_u64(xsecret + (i * 16)); + uint64x2_t data_key = veorq_u64(data_vec, key_vec); + /* xacc[i] *= XXH_PRIME32_1 */ #ifdef __wasm_simd128__ - /* SIMD128 has multiply by u64x2, use it instead of expanding and scalarizing */ - xacc[i] = data_key * XXH_PRIME32_1; + /* SIMD128 has multiply by u64x2, use it instead of expanding and scalarizing */ + xacc[i] = data_key * XXH_PRIME32_1; #else - /* - * Expanded version with portable NEON intrinsics - * - * lo(x) * lo(y) + (hi(x) * lo(y) << 32) - * - * prod_hi = hi(data_key) * lo(prime) << 32 - * - * Since we only need 32 bits of this multiply a trick can be used, reinterpreting the vector - * as a uint32x4_t and multiplying by { 0, prime, 0, prime } to cancel out the unwanted bits - * and avoid the shift. - */ - uint32x4_t prod_hi = vmulq_u32 (vreinterpretq_u32_u64(data_key), kPrimeHi); - /* Extract low bits for vmlal_u32 */ - uint32x2_t data_key_lo = vmovn_u64(data_key); - /* xacc[i] = prod_hi + lo(data_key) * XXH_PRIME32_1; */ - xacc[i] = vmlal_u32(vreinterpretq_u64_u32(prod_hi), data_key_lo, kPrimeLo); + /* + * Expanded version with portable NEON intrinsics + * + * lo(x) * lo(y) + (hi(x) * lo(y) << 32) + * + * prod_hi = hi(data_key) * lo(prime) << 32 + * + * Since we only need 32 bits of this multiply a trick can be used, reinterpreting the vector + * as a uint32x4_t and multiplying by { 0, prime, 0, prime } to cancel out the unwanted bits + * and avoid the shift. + */ + uint32x4_t prod_hi = vmulq_u32(vreinterpretq_u32_u64(data_key), kPrimeHi); + /* Extract low bits for vmlal_u32 */ + uint32x2_t data_key_lo = vmovn_u64(data_key); + /* xacc[i] = prod_hi + lo(data_key) * XXH_PRIME32_1; */ + xacc[i] = vmlal_u32(vreinterpretq_u64_u32(prod_hi), data_key_lo, kPrimeLo); #endif - } } + } } #endif #if (XXH_VECTOR == XXH_VSX) XXH_FORCE_INLINE void -XXH3_accumulate_512_vsx( void* XXH_RESTRICT acc, - const void* XXH_RESTRICT input, - const void* XXH_RESTRICT secret) -{ - /* presumed aligned */ - xxh_aliasing_u64x2* const xacc = (xxh_aliasing_u64x2*) acc; - xxh_u8 const* const xinput = (xxh_u8 const*) input; /* no alignment restriction */ - xxh_u8 const* const xsecret = (xxh_u8 const*) secret; /* no alignment restriction */ - xxh_u64x2 const v32 = { 32, 32 }; - size_t i; - for (i = 0; i < XXH_STRIPE_LEN / sizeof(xxh_u64x2); i++) { - /* data_vec = xinput[i]; */ - xxh_u64x2 const data_vec = XXH_vec_loadu(xinput + 16*i); - /* key_vec = xsecret[i]; */ - xxh_u64x2 const key_vec = XXH_vec_loadu(xsecret + 16*i); - xxh_u64x2 const data_key = data_vec ^ key_vec; - /* shuffled = (data_key << 32) | (data_key >> 32); */ - xxh_u32x4 const shuffled = (xxh_u32x4)vec_rl(data_key, v32); - /* product = ((xxh_u64x2)data_key & 0xFFFFFFFF) * ((xxh_u64x2)shuffled & 0xFFFFFFFF); */ - xxh_u64x2 const product = XXH_vec_mulo((xxh_u32x4)data_key, shuffled); - /* acc_vec = xacc[i]; */ - xxh_u64x2 acc_vec = xacc[i]; - acc_vec += product; - - /* swap high and low halves */ +XXH3_accumulate_512_vsx(void *XXH_RESTRICT acc, const void *XXH_RESTRICT input, const void *XXH_RESTRICT secret) { + /* presumed aligned */ + xxh_aliasing_u64x2 *const xacc = (xxh_aliasing_u64x2 *)acc; + xxh_u8 const *const xinput = (xxh_u8 const *)input; /* no alignment restriction */ + xxh_u8 const *const xsecret = (xxh_u8 const *)secret; /* no alignment restriction */ + xxh_u64x2 const v32 = {32, 32}; + size_t i; + for (i = 0; i < XXH_STRIPE_LEN / sizeof(xxh_u64x2); i++) { + /* data_vec = xinput[i]; */ + xxh_u64x2 const data_vec = XXH_vec_loadu(xinput + 16 * i); + /* key_vec = xsecret[i]; */ + xxh_u64x2 const key_vec = XXH_vec_loadu(xsecret + 16 * i); + xxh_u64x2 const data_key = data_vec ^ key_vec; + /* shuffled = (data_key << 32) | (data_key >> 32); */ + xxh_u32x4 const shuffled = (xxh_u32x4)vec_rl(data_key, v32); + /* product = ((xxh_u64x2)data_key & 0xFFFFFFFF) * ((xxh_u64x2)shuffled & 0xFFFFFFFF); */ + xxh_u64x2 const product = XXH_vec_mulo((xxh_u32x4)data_key, shuffled); + /* acc_vec = xacc[i]; */ + xxh_u64x2 acc_vec = xacc[i]; + acc_vec += product; + + /* swap high and low halves */ #ifdef __s390x__ - acc_vec += vec_permi(data_vec, data_vec, 2); + acc_vec += vec_permi(data_vec, data_vec, 2); #else - acc_vec += vec_xxpermdi(data_vec, data_vec, 2); + acc_vec += vec_xxpermdi(data_vec, data_vec, 2); #endif - xacc[i] = acc_vec; - } + xacc[i] = acc_vec; + } } XXH_FORCE_INLINE XXH3_ACCUMULATE_TEMPLATE(vsx) -XXH_FORCE_INLINE void -XXH3_scrambleAcc_vsx(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret) -{ - XXH_ASSERT((((size_t)acc) & 15) == 0); - - { xxh_aliasing_u64x2* const xacc = (xxh_aliasing_u64x2*) acc; - const xxh_u8* const xsecret = (const xxh_u8*) secret; - /* constants */ - xxh_u64x2 const v32 = { 32, 32 }; - xxh_u64x2 const v47 = { 47, 47 }; - xxh_u32x4 const prime = { XXH_PRIME32_1, XXH_PRIME32_1, XXH_PRIME32_1, XXH_PRIME32_1 }; - size_t i; - for (i = 0; i < XXH_STRIPE_LEN / sizeof(xxh_u64x2); i++) { - /* xacc[i] ^= (xacc[i] >> 47); */ - xxh_u64x2 const acc_vec = xacc[i]; - xxh_u64x2 const data_vec = acc_vec ^ (acc_vec >> v47); + XXH_FORCE_INLINE void XXH3_scrambleAcc_vsx(void *XXH_RESTRICT acc, const void *XXH_RESTRICT secret) { + XXH_ASSERT((((size_t)acc) & 15) == 0); - /* xacc[i] ^= xsecret[i]; */ - xxh_u64x2 const key_vec = XXH_vec_loadu(xsecret + 16*i); - xxh_u64x2 const data_key = data_vec ^ key_vec; - - /* xacc[i] *= XXH_PRIME32_1 */ - /* prod_lo = ((xxh_u64x2)data_key & 0xFFFFFFFF) * ((xxh_u64x2)prime & 0xFFFFFFFF); */ - xxh_u64x2 const prod_even = XXH_vec_mule((xxh_u32x4)data_key, prime); - /* prod_hi = ((xxh_u64x2)data_key >> 32) * ((xxh_u64x2)prime >> 32); */ - xxh_u64x2 const prod_odd = XXH_vec_mulo((xxh_u32x4)data_key, prime); - xacc[i] = prod_odd + (prod_even << v32); - } } + { + xxh_aliasing_u64x2 *const xacc = (xxh_aliasing_u64x2 *)acc; + const xxh_u8 *const xsecret = (const xxh_u8 *)secret; + /* constants */ + xxh_u64x2 const v32 = {32, 32}; + xxh_u64x2 const v47 = {47, 47}; + xxh_u32x4 const prime = {XXH_PRIME32_1, XXH_PRIME32_1, XXH_PRIME32_1, XXH_PRIME32_1}; + size_t i; + for (i = 0; i < XXH_STRIPE_LEN / sizeof(xxh_u64x2); i++) { + /* xacc[i] ^= (xacc[i] >> 47); */ + xxh_u64x2 const acc_vec = xacc[i]; + xxh_u64x2 const data_vec = acc_vec ^ (acc_vec >> v47); + + /* xacc[i] ^= xsecret[i]; */ + xxh_u64x2 const key_vec = XXH_vec_loadu(xsecret + 16 * i); + xxh_u64x2 const data_key = data_vec ^ key_vec; + + /* xacc[i] *= XXH_PRIME32_1 */ + /* prod_lo = ((xxh_u64x2)data_key & 0xFFFFFFFF) * ((xxh_u64x2)prime & 0xFFFFFFFF); */ + xxh_u64x2 const prod_even = XXH_vec_mule((xxh_u32x4)data_key, prime); + /* prod_hi = ((xxh_u64x2)data_key >> 32) * ((xxh_u64x2)prime >> 32); */ + xxh_u64x2 const prod_odd = XXH_vec_mulo((xxh_u32x4)data_key, prime); + xacc[i] = prod_odd + (prod_even << v32); + } + } } #endif @@ -5563,108 +5450,103 @@ XXH3_scrambleAcc_vsx(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret) #if (XXH_VECTOR == XXH_SVE) XXH_FORCE_INLINE void -XXH3_accumulate_512_sve( void* XXH_RESTRICT acc, - const void* XXH_RESTRICT input, - const void* XXH_RESTRICT secret) -{ +XXH3_accumulate_512_sve(void *XXH_RESTRICT acc, const void *XXH_RESTRICT input, const void *XXH_RESTRICT secret) { + uint64_t *xacc = (uint64_t *)acc; + const uint64_t *xinput = (const uint64_t *)(const void *)input; + const uint64_t *xsecret = (const uint64_t *)(const void *)secret; + svuint64_t kSwap = sveor_n_u64_z(svptrue_b64(), svindex_u64(0, 1), 1); + uint64_t element_count = svcntd(); + if (element_count >= 8) { + svbool_t mask = svptrue_pat_b64(SV_VL8); + svuint64_t vacc = svld1_u64(mask, xacc); + ACCRND(vacc, 0); + svst1_u64(mask, xacc, vacc); + } else if (element_count == 2) { /* sve128 */ + svbool_t mask = svptrue_pat_b64(SV_VL2); + svuint64_t acc0 = svld1_u64(mask, xacc + 0); + svuint64_t acc1 = svld1_u64(mask, xacc + 2); + svuint64_t acc2 = svld1_u64(mask, xacc + 4); + svuint64_t acc3 = svld1_u64(mask, xacc + 6); + ACCRND(acc0, 0); + ACCRND(acc1, 2); + ACCRND(acc2, 4); + ACCRND(acc3, 6); + svst1_u64(mask, xacc + 0, acc0); + svst1_u64(mask, xacc + 2, acc1); + svst1_u64(mask, xacc + 4, acc2); + svst1_u64(mask, xacc + 6, acc3); + } else { + svbool_t mask = svptrue_pat_b64(SV_VL4); + svuint64_t acc0 = svld1_u64(mask, xacc + 0); + svuint64_t acc1 = svld1_u64(mask, xacc + 4); + ACCRND(acc0, 0); + ACCRND(acc1, 4); + svst1_u64(mask, xacc + 0, acc0); + svst1_u64(mask, xacc + 4, acc1); + } +} + +XXH_FORCE_INLINE void XXH3_accumulate_sve(xxh_u64 *XXH_RESTRICT acc, + const xxh_u8 *XXH_RESTRICT input, + const xxh_u8 *XXH_RESTRICT secret, + size_t nbStripes) { + if (nbStripes != 0) { uint64_t *xacc = (uint64_t *)acc; const uint64_t *xinput = (const uint64_t *)(const void *)input; const uint64_t *xsecret = (const uint64_t *)(const void *)secret; svuint64_t kSwap = sveor_n_u64_z(svptrue_b64(), svindex_u64(0, 1), 1); uint64_t element_count = svcntd(); if (element_count >= 8) { - svbool_t mask = svptrue_pat_b64(SV_VL8); - svuint64_t vacc = svld1_u64(mask, xacc); + svbool_t mask = svptrue_pat_b64(SV_VL8); + svuint64_t vacc = svld1_u64(mask, xacc + 0); + do { + /* svprfd(svbool_t, void *, enum svfprop); */ + svprfd(mask, xinput + 128, SV_PLDL1STRM); ACCRND(vacc, 0); - svst1_u64(mask, xacc, vacc); - } else if (element_count == 2) { /* sve128 */ - svbool_t mask = svptrue_pat_b64(SV_VL2); - svuint64_t acc0 = svld1_u64(mask, xacc + 0); - svuint64_t acc1 = svld1_u64(mask, xacc + 2); - svuint64_t acc2 = svld1_u64(mask, xacc + 4); - svuint64_t acc3 = svld1_u64(mask, xacc + 6); + xinput += 8; + xsecret += 1; + nbStripes--; + } while (nbStripes != 0); + + svst1_u64(mask, xacc + 0, vacc); + } else if (element_count == 2) { /* sve128 */ + svbool_t mask = svptrue_pat_b64(SV_VL2); + svuint64_t acc0 = svld1_u64(mask, xacc + 0); + svuint64_t acc1 = svld1_u64(mask, xacc + 2); + svuint64_t acc2 = svld1_u64(mask, xacc + 4); + svuint64_t acc3 = svld1_u64(mask, xacc + 6); + do { + svprfd(mask, xinput + 128, SV_PLDL1STRM); ACCRND(acc0, 0); ACCRND(acc1, 2); ACCRND(acc2, 4); ACCRND(acc3, 6); - svst1_u64(mask, xacc + 0, acc0); - svst1_u64(mask, xacc + 2, acc1); - svst1_u64(mask, xacc + 4, acc2); - svst1_u64(mask, xacc + 6, acc3); + xinput += 8; + xsecret += 1; + nbStripes--; + } while (nbStripes != 0); + + svst1_u64(mask, xacc + 0, acc0); + svst1_u64(mask, xacc + 2, acc1); + svst1_u64(mask, xacc + 4, acc2); + svst1_u64(mask, xacc + 6, acc3); } else { - svbool_t mask = svptrue_pat_b64(SV_VL4); - svuint64_t acc0 = svld1_u64(mask, xacc + 0); - svuint64_t acc1 = svld1_u64(mask, xacc + 4); + svbool_t mask = svptrue_pat_b64(SV_VL4); + svuint64_t acc0 = svld1_u64(mask, xacc + 0); + svuint64_t acc1 = svld1_u64(mask, xacc + 4); + do { + svprfd(mask, xinput + 128, SV_PLDL1STRM); ACCRND(acc0, 0); ACCRND(acc1, 4); - svst1_u64(mask, xacc + 0, acc0); - svst1_u64(mask, xacc + 4, acc1); - } -} + xinput += 8; + xsecret += 1; + nbStripes--; + } while (nbStripes != 0); -XXH_FORCE_INLINE void -XXH3_accumulate_sve(xxh_u64* XXH_RESTRICT acc, - const xxh_u8* XXH_RESTRICT input, - const xxh_u8* XXH_RESTRICT secret, - size_t nbStripes) -{ - if (nbStripes != 0) { - uint64_t *xacc = (uint64_t *)acc; - const uint64_t *xinput = (const uint64_t *)(const void *)input; - const uint64_t *xsecret = (const uint64_t *)(const void *)secret; - svuint64_t kSwap = sveor_n_u64_z(svptrue_b64(), svindex_u64(0, 1), 1); - uint64_t element_count = svcntd(); - if (element_count >= 8) { - svbool_t mask = svptrue_pat_b64(SV_VL8); - svuint64_t vacc = svld1_u64(mask, xacc + 0); - do { - /* svprfd(svbool_t, void *, enum svfprop); */ - svprfd(mask, xinput + 128, SV_PLDL1STRM); - ACCRND(vacc, 0); - xinput += 8; - xsecret += 1; - nbStripes--; - } while (nbStripes != 0); - - svst1_u64(mask, xacc + 0, vacc); - } else if (element_count == 2) { /* sve128 */ - svbool_t mask = svptrue_pat_b64(SV_VL2); - svuint64_t acc0 = svld1_u64(mask, xacc + 0); - svuint64_t acc1 = svld1_u64(mask, xacc + 2); - svuint64_t acc2 = svld1_u64(mask, xacc + 4); - svuint64_t acc3 = svld1_u64(mask, xacc + 6); - do { - svprfd(mask, xinput + 128, SV_PLDL1STRM); - ACCRND(acc0, 0); - ACCRND(acc1, 2); - ACCRND(acc2, 4); - ACCRND(acc3, 6); - xinput += 8; - xsecret += 1; - nbStripes--; - } while (nbStripes != 0); - - svst1_u64(mask, xacc + 0, acc0); - svst1_u64(mask, xacc + 2, acc1); - svst1_u64(mask, xacc + 4, acc2); - svst1_u64(mask, xacc + 6, acc3); - } else { - svbool_t mask = svptrue_pat_b64(SV_VL4); - svuint64_t acc0 = svld1_u64(mask, xacc + 0); - svuint64_t acc1 = svld1_u64(mask, xacc + 4); - do { - svprfd(mask, xinput + 128, SV_PLDL1STRM); - ACCRND(acc0, 0); - ACCRND(acc1, 4); - xinput += 8; - xsecret += 1; - nbStripes--; - } while (nbStripes != 0); - - svst1_u64(mask, xacc + 0, acc0); - svst1_u64(mask, xacc + 4, acc1); - } + svst1_u64(mask, xacc + 0, acc0); + svst1_u64(mask, xacc + 4, acc1); } + } } #endif @@ -5673,63 +5555,58 @@ XXH3_accumulate_sve(xxh_u64* XXH_RESTRICT acc, #define _LSX_SHUFFLE(z, y, x, w) (((z) << 6) | ((y) << 4) | ((x) << 2) | (w)) XXH_FORCE_INLINE void -XXH3_accumulate_512_lsx( void* XXH_RESTRICT acc, - const void* XXH_RESTRICT input, - const void* XXH_RESTRICT secret) -{ - XXH_ASSERT((((size_t)acc) & 15) == 0); - { - __m128i* const xacc = (__m128i *) acc; - const __m128i* const xinput = (const __m128i *) input; - const __m128i* const xsecret = (const __m128i *) secret; - - for (size_t i = 0; i < XXH_STRIPE_LEN / sizeof(__m128i); i++) { - /* data_vec = xinput[i]; */ - __m128i const data_vec = __lsx_vld(xinput + i, 0); - /* key_vec = xsecret[i]; */ - __m128i const key_vec = __lsx_vld(xsecret + i, 0); - /* data_key = data_vec ^ key_vec; */ - __m128i const data_key = __lsx_vxor_v(data_vec, key_vec); - /* data_key_lo = data_key >> 32; */ - __m128i const data_key_lo = __lsx_vsrli_d(data_key, 32); - // __m128i const data_key_lo = __lsx_vsrli_d(data_key, 32); - /* product = (data_key & 0xffffffff) * (data_key_lo & 0xffffffff); */ - __m128i const product = __lsx_vmulwev_d_wu(data_key, data_key_lo); - /* xacc[i] += swap(data_vec); */ - __m128i const data_swap = __lsx_vshuf4i_w(data_vec, _LSX_SHUFFLE(1, 0, 3, 2)); - __m128i const sum = __lsx_vadd_d(xacc[i], data_swap); - /* xacc[i] += product; */ - xacc[i] = __lsx_vadd_d(product, sum); - } +XXH3_accumulate_512_lsx(void *XXH_RESTRICT acc, const void *XXH_RESTRICT input, const void *XXH_RESTRICT secret) { + XXH_ASSERT((((size_t)acc) & 15) == 0); + { + __m128i *const xacc = (__m128i *)acc; + const __m128i *const xinput = (const __m128i *)input; + const __m128i *const xsecret = (const __m128i *)secret; + + for (size_t i = 0; i < XXH_STRIPE_LEN / sizeof(__m128i); i++) { + /* data_vec = xinput[i]; */ + __m128i const data_vec = __lsx_vld(xinput + i, 0); + /* key_vec = xsecret[i]; */ + __m128i const key_vec = __lsx_vld(xsecret + i, 0); + /* data_key = data_vec ^ key_vec; */ + __m128i const data_key = __lsx_vxor_v(data_vec, key_vec); + /* data_key_lo = data_key >> 32; */ + __m128i const data_key_lo = __lsx_vsrli_d(data_key, 32); + // __m128i const data_key_lo = __lsx_vsrli_d(data_key, 32); + /* product = (data_key & 0xffffffff) * (data_key_lo & 0xffffffff); */ + __m128i const product = __lsx_vmulwev_d_wu(data_key, data_key_lo); + /* xacc[i] += swap(data_vec); */ + __m128i const data_swap = __lsx_vshuf4i_w(data_vec, _LSX_SHUFFLE(1, 0, 3, 2)); + __m128i const sum = __lsx_vadd_d(xacc[i], data_swap); + /* xacc[i] += product; */ + xacc[i] = __lsx_vadd_d(product, sum); } + } } XXH_FORCE_INLINE XXH3_ACCUMULATE_TEMPLATE(lsx) -XXH_FORCE_INLINE void -XXH3_scrambleAcc_lsx(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret) -{ - XXH_ASSERT((((size_t)acc) & 15) == 0); - { - __m128i* const xacc = (__m128i*) acc; - const __m128i* const xsecret = (const __m128i *) secret; - const __m128i prime32 = __lsx_vreplgr2vr_w((int)XXH_PRIME32_1); - - for (size_t i = 0; i < XXH_STRIPE_LEN / sizeof(__m128i); i++) { - /* xacc[i] ^= (xacc[i] >> 47) */ - __m128i const acc_vec = xacc[i]; - __m128i const shifted = __lsx_vsrli_d(acc_vec, 47); - __m128i const data_vec = __lsx_vxor_v(acc_vec, shifted); - /* xacc[i] ^= xsecret[i]; */ - __m128i const key_vec = __lsx_vld(xsecret + i, 0); - __m128i const data_key = __lsx_vxor_v(data_vec, key_vec); - - /* xacc[i] *= XXH_PRIME32_1; */ - __m128i const data_key_hi = __lsx_vsrli_d(data_key, 32); - __m128i const prod_lo = __lsx_vmulwev_d_wu(data_key, prime32); - __m128i const prod_hi = __lsx_vmulwev_d_wu(data_key_hi, prime32); - xacc[i] = __lsx_vadd_d(prod_lo, __lsx_vslli_d(prod_hi, 32)); - } + XXH_FORCE_INLINE void XXH3_scrambleAcc_lsx(void *XXH_RESTRICT acc, const void *XXH_RESTRICT secret) { + XXH_ASSERT((((size_t)acc) & 15) == 0); + { + __m128i *const xacc = (__m128i *)acc; + const __m128i *const xsecret = (const __m128i *)secret; + const __m128i prime32 = __lsx_vreplgr2vr_w((int)XXH_PRIME32_1); + + for (size_t i = 0; i < XXH_STRIPE_LEN / sizeof(__m128i); i++) { + /* xacc[i] ^= (xacc[i] >> 47) */ + __m128i const acc_vec = xacc[i]; + __m128i const shifted = __lsx_vsrli_d(acc_vec, 47); + __m128i const data_vec = __lsx_vxor_v(acc_vec, shifted); + /* xacc[i] ^= xsecret[i]; */ + __m128i const key_vec = __lsx_vld(xsecret + i, 0); + __m128i const data_key = __lsx_vxor_v(data_vec, key_vec); + + /* xacc[i] *= XXH_PRIME32_1; */ + __m128i const data_key_hi = __lsx_vsrli_d(data_key, 32); + __m128i const prod_lo = __lsx_vmulwev_d_wu(data_key, prime32); + __m128i const prod_hi = __lsx_vmulwev_d_wu(data_key_hi, prime32); + xacc[i] = __lsx_vadd_d(prod_lo, __lsx_vslli_d(prod_hi, 32)); } + } } #endif @@ -5751,19 +5628,15 @@ XXH3_scrambleAcc_lsx(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret) * Thankfully, AArch64 still provides the 32-bit long multiply-add (UMADDL) which does * not have this penalty and does the mask automatically. */ -XXH_FORCE_INLINE xxh_u64 -XXH_mult32to64_add64(xxh_u64 lhs, xxh_u64 rhs, xxh_u64 acc) -{ - xxh_u64 ret; - /* note: %x = 64-bit register, %w = 32-bit register */ - __asm__("umaddl %x0, %w1, %w2, %x3" : "=r" (ret) : "r" (lhs), "r" (rhs), "r" (acc)); - return ret; +XXH_FORCE_INLINE xxh_u64 XXH_mult32to64_add64(xxh_u64 lhs, xxh_u64 rhs, xxh_u64 acc) { + xxh_u64 ret; + /* note: %x = 64-bit register, %w = 32-bit register */ + __asm__("umaddl %x0, %w1, %w2, %x3" : "=r"(ret) : "r"(lhs), "r"(rhs), "r"(acc)); + return ret; } #else -XXH_FORCE_INLINE xxh_u64 -XXH_mult32to64_add64(xxh_u64 lhs, xxh_u64 rhs, xxh_u64 acc) -{ - return XXH_mult32to64((xxh_u32)lhs, (xxh_u32)rhs) + acc; +XXH_FORCE_INLINE xxh_u64 XXH_mult32to64_add64(xxh_u64 lhs, xxh_u64 rhs, xxh_u64 acc) { + return XXH_mult32to64((xxh_u32)lhs, (xxh_u32)rhs) + acc; } #endif @@ -5775,22 +5648,18 @@ XXH_mult32to64_add64(xxh_u64 lhs, xxh_u64 rhs, xxh_u64 acc) * of NEON and scalar. */ XXH_FORCE_INLINE void -XXH3_scalarRound(void* XXH_RESTRICT acc, - void const* XXH_RESTRICT input, - void const* XXH_RESTRICT secret, - size_t lane) -{ - xxh_u64* xacc = (xxh_u64*) acc; - xxh_u8 const* xinput = (xxh_u8 const*) input; - xxh_u8 const* xsecret = (xxh_u8 const*) secret; - XXH_ASSERT(lane < XXH_ACC_NB); - XXH_ASSERT(((size_t)acc & (XXH_ACC_ALIGN-1)) == 0); - { - xxh_u64 const data_val = XXH_readLE64(xinput + lane * 8); - xxh_u64 const data_key = data_val ^ XXH_readLE64(xsecret + lane * 8); - xacc[lane ^ 1] += data_val; /* swap adjacent lanes */ - xacc[lane] = XXH_mult32to64_add64(data_key /* & 0xFFFFFFFF */, data_key >> 32, xacc[lane]); - } +XXH3_scalarRound(void *XXH_RESTRICT acc, void const *XXH_RESTRICT input, void const *XXH_RESTRICT secret, size_t lane) { + xxh_u64 *xacc = (xxh_u64 *)acc; + xxh_u8 const *xinput = (xxh_u8 const *)input; + xxh_u8 const *xsecret = (xxh_u8 const *)secret; + XXH_ASSERT(lane < XXH_ACC_NB); + XXH_ASSERT(((size_t)acc & (XXH_ACC_ALIGN - 1)) == 0); + { + xxh_u64 const data_val = XXH_readLE64(xinput + lane * 8); + xxh_u64 const data_key = data_val ^ XXH_readLE64(xsecret + lane * 8); + xacc[lane ^ 1] += data_val; /* swap adjacent lanes */ + xacc[lane] = XXH_mult32to64_add64(data_key /* & 0xFFFFFFFF */, data_key >> 32, xacc[lane]); + } } /*! @@ -5798,286 +5667,279 @@ XXH3_scalarRound(void* XXH_RESTRICT acc, * @brief Processes a 64 byte block of data using the scalar path. */ XXH_FORCE_INLINE void -XXH3_accumulate_512_scalar(void* XXH_RESTRICT acc, - const void* XXH_RESTRICT input, - const void* XXH_RESTRICT secret) -{ - size_t i; - /* ARM GCC refuses to unroll this loop, resulting in a 24% slowdown on ARMv6. */ -#if defined(__GNUC__) && !defined(__clang__) \ - && (defined(__arm__) || defined(__thumb2__)) \ - && defined(__ARM_FEATURE_UNALIGNED) /* no unaligned access just wastes bytes */ \ +XXH3_accumulate_512_scalar(void *XXH_RESTRICT acc, const void *XXH_RESTRICT input, const void *XXH_RESTRICT secret) { + size_t i; + /* ARM GCC refuses to unroll this loop, resulting in a 24% slowdown on ARMv6. */ +#if defined(__GNUC__) && !defined(__clang__) && (defined(__arm__) || defined(__thumb2__)) && \ + defined(__ARM_FEATURE_UNALIGNED) /* no unaligned access just wastes bytes */ \ && XXH_SIZE_OPT <= 0 -# pragma GCC unroll 8 +#pragma GCC unroll 8 #endif - for (i=0; i < XXH_ACC_NB; i++) { - XXH3_scalarRound(acc, input, secret, i); - } + for (i = 0; i < XXH_ACC_NB; i++) { + XXH3_scalarRound(acc, input, secret, i); + } } XXH_FORCE_INLINE XXH3_ACCUMULATE_TEMPLATE(scalar) -/*! - * @internal - * @brief Scalar scramble step for @ref XXH3_scrambleAcc_scalar(). - * - * This is extracted to its own function because the NEON path uses a combination - * of NEON and scalar. - */ -XXH_FORCE_INLINE void -XXH3_scalarScrambleRound(void* XXH_RESTRICT acc, - void const* XXH_RESTRICT secret, - size_t lane) -{ - xxh_u64* const xacc = (xxh_u64*) acc; /* presumed aligned */ - const xxh_u8* const xsecret = (const xxh_u8*) secret; /* no alignment restriction */ - XXH_ASSERT((((size_t)acc) & (XXH_ACC_ALIGN-1)) == 0); - XXH_ASSERT(lane < XXH_ACC_NB); - { - xxh_u64 const key64 = XXH_readLE64(xsecret + lane * 8); - xxh_u64 acc64 = xacc[lane]; - acc64 = XXH_xorshift64(acc64, 47); - acc64 ^= key64; - acc64 *= XXH_PRIME32_1; - xacc[lane] = acc64; - } + /*! + * @internal + * @brief Scalar scramble step for @ref XXH3_scrambleAcc_scalar(). + * + * This is extracted to its own function because the NEON path uses a combination + * of NEON and scalar. + */ + XXH_FORCE_INLINE void XXH3_scalarScrambleRound(void *XXH_RESTRICT acc, void const *XXH_RESTRICT secret, size_t lane) { + xxh_u64 *const xacc = (xxh_u64 *)acc; /* presumed aligned */ + const xxh_u8 *const xsecret = (const xxh_u8 *)secret; /* no alignment restriction */ + XXH_ASSERT((((size_t)acc) & (XXH_ACC_ALIGN - 1)) == 0); + XXH_ASSERT(lane < XXH_ACC_NB); + { + xxh_u64 const key64 = XXH_readLE64(xsecret + lane * 8); + xxh_u64 acc64 = xacc[lane]; + acc64 = XXH_xorshift64(acc64, 47); + acc64 ^= key64; + acc64 *= XXH_PRIME32_1; + xacc[lane] = acc64; + } } /*! * @internal * @brief Scrambles the accumulators after a large chunk has been read */ -XXH_FORCE_INLINE void -XXH3_scrambleAcc_scalar(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret) -{ - size_t i; - for (i=0; i < XXH_ACC_NB; i++) { - XXH3_scalarScrambleRound(acc, secret, i); - } +XXH_FORCE_INLINE void XXH3_scrambleAcc_scalar(void *XXH_RESTRICT acc, const void *XXH_RESTRICT secret) { + size_t i; + for (i = 0; i < XXH_ACC_NB; i++) { + XXH3_scalarScrambleRound(acc, secret, i); + } } -XXH_FORCE_INLINE void -XXH3_initCustomSecret_scalar(void* XXH_RESTRICT customSecret, xxh_u64 seed64) -{ - /* - * We need a separate pointer for the hack below, - * which requires a non-const pointer. - * Any decent compiler will optimize this out otherwise. - */ - const xxh_u8* kSecretPtr = XXH3_kSecret; - XXH_STATIC_ASSERT((XXH_SECRET_DEFAULT_SIZE & 15) == 0); +XXH_FORCE_INLINE void XXH3_initCustomSecret_scalar(void *XXH_RESTRICT customSecret, xxh_u64 seed64) { + /* + * We need a separate pointer for the hack below, + * which requires a non-const pointer. + * Any decent compiler will optimize this out otherwise. + */ + const xxh_u8 *kSecretPtr = XXH3_kSecret; + XXH_STATIC_ASSERT((XXH_SECRET_DEFAULT_SIZE & 15) == 0); #if defined(__GNUC__) && defined(__aarch64__) - /* - * UGLY HACK: - * GCC and Clang generate a bunch of MOV/MOVK pairs for aarch64, and they are - * placed sequentially, in order, at the top of the unrolled loop. - * - * While MOVK is great for generating constants (2 cycles for a 64-bit - * constant compared to 4 cycles for LDR), it fights for bandwidth with - * the arithmetic instructions. - * - * I L S - * MOVK - * MOVK - * MOVK - * MOVK - * ADD - * SUB STR - * STR - * By forcing loads from memory (as the asm line causes the compiler to assume - * that XXH3_kSecretPtr has been changed), the pipelines are used more - * efficiently: - * I L S - * LDR - * ADD LDR - * SUB STR - * STR - * - * See XXH3_NEON_LANES for details on the pipsline. - * - * XXH3_64bits_withSeed, len == 256, Snapdragon 835 - * without hack: 2654.4 MB/s - * with hack: 3202.9 MB/s - */ - XXH_COMPILER_GUARD(kSecretPtr); + /* + * UGLY HACK: + * GCC and Clang generate a bunch of MOV/MOVK pairs for aarch64, and they are + * placed sequentially, in order, at the top of the unrolled loop. + * + * While MOVK is great for generating constants (2 cycles for a 64-bit + * constant compared to 4 cycles for LDR), it fights for bandwidth with + * the arithmetic instructions. + * + * I L S + * MOVK + * MOVK + * MOVK + * MOVK + * ADD + * SUB STR + * STR + * By forcing loads from memory (as the asm line causes the compiler to assume + * that XXH3_kSecretPtr has been changed), the pipelines are used more + * efficiently: + * I L S + * LDR + * ADD LDR + * SUB STR + * STR + * + * See XXH3_NEON_LANES for details on the pipsline. + * + * XXH3_64bits_withSeed, len == 256, Snapdragon 835 + * without hack: 2654.4 MB/s + * with hack: 3202.9 MB/s + */ + XXH_COMPILER_GUARD(kSecretPtr); #endif - { int const nbRounds = XXH_SECRET_DEFAULT_SIZE / 16; - int i; - for (i=0; i < nbRounds; i++) { - /* - * The asm hack causes the compiler to assume that kSecretPtr aliases with - * customSecret, and on aarch64, this prevented LDP from merging two - * loads together for free. Putting the loads together before the stores - * properly generates LDP. - */ - xxh_u64 lo = XXH_readLE64(kSecretPtr + 16*i) + seed64; - xxh_u64 hi = XXH_readLE64(kSecretPtr + 16*i + 8) - seed64; - XXH_writeLE64((xxh_u8*)customSecret + 16*i, lo); - XXH_writeLE64((xxh_u8*)customSecret + 16*i + 8, hi); - } } + { + int const nbRounds = XXH_SECRET_DEFAULT_SIZE / 16; + int i; + for (i = 0; i < nbRounds; i++) { + /* + * The asm hack causes the compiler to assume that kSecretPtr aliases with + * customSecret, and on aarch64, this prevented LDP from merging two + * loads together for free. Putting the loads together before the stores + * properly generates LDP. + */ + xxh_u64 lo = XXH_readLE64(kSecretPtr + 16 * i) + seed64; + xxh_u64 hi = XXH_readLE64(kSecretPtr + 16 * i + 8) - seed64; + XXH_writeLE64((xxh_u8 *)customSecret + 16 * i, lo); + XXH_writeLE64((xxh_u8 *)customSecret + 16 * i + 8, hi); + } + } } - -typedef void (*XXH3_f_accumulate)(xxh_u64* XXH_RESTRICT, const xxh_u8* XXH_RESTRICT, const xxh_u8* XXH_RESTRICT, size_t); -typedef void (*XXH3_f_scrambleAcc)(void* XXH_RESTRICT, const void*); -typedef void (*XXH3_f_initCustomSecret)(void* XXH_RESTRICT, xxh_u64); - +typedef void (*XXH3_f_accumulate)(xxh_u64 *XXH_RESTRICT, + const xxh_u8 *XXH_RESTRICT, + const xxh_u8 *XXH_RESTRICT, + size_t); +typedef void (*XXH3_f_scrambleAcc)(void *XXH_RESTRICT, const void *); +typedef void (*XXH3_f_initCustomSecret)(void *XXH_RESTRICT, xxh_u64); #if (XXH_VECTOR == XXH_AVX512) #define XXH3_accumulate_512 XXH3_accumulate_512_avx512 -#define XXH3_accumulate XXH3_accumulate_avx512 -#define XXH3_scrambleAcc XXH3_scrambleAcc_avx512 +#define XXH3_accumulate XXH3_accumulate_avx512 +#define XXH3_scrambleAcc XXH3_scrambleAcc_avx512 #define XXH3_initCustomSecret XXH3_initCustomSecret_avx512 #elif (XXH_VECTOR == XXH_AVX2) #define XXH3_accumulate_512 XXH3_accumulate_512_avx2 -#define XXH3_accumulate XXH3_accumulate_avx2 -#define XXH3_scrambleAcc XXH3_scrambleAcc_avx2 +#define XXH3_accumulate XXH3_accumulate_avx2 +#define XXH3_scrambleAcc XXH3_scrambleAcc_avx2 #define XXH3_initCustomSecret XXH3_initCustomSecret_avx2 #elif (XXH_VECTOR == XXH_SSE2) #define XXH3_accumulate_512 XXH3_accumulate_512_sse2 -#define XXH3_accumulate XXH3_accumulate_sse2 -#define XXH3_scrambleAcc XXH3_scrambleAcc_sse2 +#define XXH3_accumulate XXH3_accumulate_sse2 +#define XXH3_scrambleAcc XXH3_scrambleAcc_sse2 #define XXH3_initCustomSecret XXH3_initCustomSecret_sse2 #elif (XXH_VECTOR == XXH_NEON) #define XXH3_accumulate_512 XXH3_accumulate_512_neon -#define XXH3_accumulate XXH3_accumulate_neon -#define XXH3_scrambleAcc XXH3_scrambleAcc_neon +#define XXH3_accumulate XXH3_accumulate_neon +#define XXH3_scrambleAcc XXH3_scrambleAcc_neon #define XXH3_initCustomSecret XXH3_initCustomSecret_scalar #elif (XXH_VECTOR == XXH_VSX) #define XXH3_accumulate_512 XXH3_accumulate_512_vsx -#define XXH3_accumulate XXH3_accumulate_vsx -#define XXH3_scrambleAcc XXH3_scrambleAcc_vsx +#define XXH3_accumulate XXH3_accumulate_vsx +#define XXH3_scrambleAcc XXH3_scrambleAcc_vsx #define XXH3_initCustomSecret XXH3_initCustomSecret_scalar #elif (XXH_VECTOR == XXH_SVE) #define XXH3_accumulate_512 XXH3_accumulate_512_sve -#define XXH3_accumulate XXH3_accumulate_sve -#define XXH3_scrambleAcc XXH3_scrambleAcc_scalar +#define XXH3_accumulate XXH3_accumulate_sve +#define XXH3_scrambleAcc XXH3_scrambleAcc_scalar #define XXH3_initCustomSecret XXH3_initCustomSecret_scalar #elif (XXH_VECTOR == XXH_LSX) #define XXH3_accumulate_512 XXH3_accumulate_512_lsx -#define XXH3_accumulate XXH3_accumulate_lsx -#define XXH3_scrambleAcc XXH3_scrambleAcc_lsx +#define XXH3_accumulate XXH3_accumulate_lsx +#define XXH3_scrambleAcc XXH3_scrambleAcc_lsx #define XXH3_initCustomSecret XXH3_initCustomSecret_scalar #else /* scalar */ #define XXH3_accumulate_512 XXH3_accumulate_512_scalar -#define XXH3_accumulate XXH3_accumulate_scalar -#define XXH3_scrambleAcc XXH3_scrambleAcc_scalar +#define XXH3_accumulate XXH3_accumulate_scalar +#define XXH3_scrambleAcc XXH3_scrambleAcc_scalar #define XXH3_initCustomSecret XXH3_initCustomSecret_scalar #endif #if XXH_SIZE_OPT >= 1 /* don't do SIMD for initialization */ -# undef XXH3_initCustomSecret -# define XXH3_initCustomSecret XXH3_initCustomSecret_scalar +#undef XXH3_initCustomSecret +#define XXH3_initCustomSecret XXH3_initCustomSecret_scalar #endif -XXH_FORCE_INLINE void -XXH3_hashLong_internal_loop(xxh_u64* XXH_RESTRICT acc, - const xxh_u8* XXH_RESTRICT input, size_t len, - const xxh_u8* XXH_RESTRICT secret, size_t secretSize, - XXH3_f_accumulate f_acc, - XXH3_f_scrambleAcc f_scramble) -{ - size_t const nbStripesPerBlock = (secretSize - XXH_STRIPE_LEN) / XXH_SECRET_CONSUME_RATE; - size_t const block_len = XXH_STRIPE_LEN * nbStripesPerBlock; - size_t const nb_blocks = (len - 1) / block_len; - - size_t n; - - XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN); - - for (n = 0; n < nb_blocks; n++) { - f_acc(acc, input + n*block_len, secret, nbStripesPerBlock); - f_scramble(acc, secret + secretSize - XXH_STRIPE_LEN); +XXH_FORCE_INLINE void XXH3_hashLong_internal_loop(xxh_u64 *XXH_RESTRICT acc, + const xxh_u8 *XXH_RESTRICT input, + size_t len, + const xxh_u8 *XXH_RESTRICT secret, + size_t secretSize, + XXH3_f_accumulate f_acc, + XXH3_f_scrambleAcc f_scramble) { + size_t const nbStripesPerBlock = (secretSize - XXH_STRIPE_LEN) / XXH_SECRET_CONSUME_RATE; + size_t const block_len = XXH_STRIPE_LEN * nbStripesPerBlock; + size_t const nb_blocks = (len - 1) / block_len; + + size_t n; + + XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN); + + for (n = 0; n < nb_blocks; n++) { + f_acc(acc, input + n * block_len, secret, nbStripesPerBlock); + f_scramble(acc, secret + secretSize - XXH_STRIPE_LEN); + } + + /* last partial block */ + XXH_ASSERT(len > XXH_STRIPE_LEN); + { + size_t const nbStripes = ((len - 1) - (block_len * nb_blocks)) / XXH_STRIPE_LEN; + XXH_ASSERT(nbStripes <= (secretSize / XXH_SECRET_CONSUME_RATE)); + f_acc(acc, input + nb_blocks * block_len, secret, nbStripes); + + /* last stripe */ + { + const xxh_u8 *const p = input + len - XXH_STRIPE_LEN; +#define XXH_SECRET_LASTACC_START 7 /* not aligned on 8, last secret is different from acc & scrambler */ + XXH3_accumulate_512(acc, p, secret + secretSize - XXH_STRIPE_LEN - XXH_SECRET_LASTACC_START); } - - /* last partial block */ - XXH_ASSERT(len > XXH_STRIPE_LEN); - { size_t const nbStripes = ((len - 1) - (block_len * nb_blocks)) / XXH_STRIPE_LEN; - XXH_ASSERT(nbStripes <= (secretSize / XXH_SECRET_CONSUME_RATE)); - f_acc(acc, input + nb_blocks*block_len, secret, nbStripes); - - /* last stripe */ - { const xxh_u8* const p = input + len - XXH_STRIPE_LEN; -#define XXH_SECRET_LASTACC_START 7 /* not aligned on 8, last secret is different from acc & scrambler */ - XXH3_accumulate_512(acc, p, secret + secretSize - XXH_STRIPE_LEN - XXH_SECRET_LASTACC_START); - } } + } } -XXH_FORCE_INLINE xxh_u64 -XXH3_mix2Accs(const xxh_u64* XXH_RESTRICT acc, const xxh_u8* XXH_RESTRICT secret) -{ - return XXH3_mul128_fold64( - acc[0] ^ XXH_readLE64(secret), - acc[1] ^ XXH_readLE64(secret+8) ); +XXH_FORCE_INLINE xxh_u64 XXH3_mix2Accs(const xxh_u64 *XXH_RESTRICT acc, const xxh_u8 *XXH_RESTRICT secret) { + return XXH3_mul128_fold64(acc[0] ^ XXH_readLE64(secret), acc[1] ^ XXH_readLE64(secret + 8)); } -static XXH_PUREF XXH64_hash_t -XXH3_mergeAccs(const xxh_u64* XXH_RESTRICT acc, const xxh_u8* XXH_RESTRICT secret, xxh_u64 start) -{ - xxh_u64 result64 = start; - size_t i = 0; - - for (i = 0; i < 4; i++) { - result64 += XXH3_mix2Accs(acc+2*i, secret + 16*i); -#if defined(__clang__) /* Clang */ \ - && (defined(__arm__) || defined(__thumb__)) /* ARMv7 */ \ - && (defined(__ARM_NEON) || defined(__ARM_NEON__)) /* NEON */ \ - && !defined(XXH_ENABLE_AUTOVECTORIZE) /* Define to disable */ - /* - * UGLY HACK: - * Prevent autovectorization on Clang ARMv7-a. Exact same problem as - * the one in XXH3_len_129to240_64b. Speeds up shorter keys > 240b. - * XXH3_64bits, len == 256, Snapdragon 835: - * without hack: 2063.7 MB/s - * with hack: 2560.7 MB/s - */ - XXH_COMPILER_GUARD(result64); +static XXH_PUREF XXH64_hash_t XXH3_mergeAccs(const xxh_u64 *XXH_RESTRICT acc, + const xxh_u8 *XXH_RESTRICT secret, + xxh_u64 start) { + xxh_u64 result64 = start; + size_t i = 0; + + for (i = 0; i < 4; i++) { + result64 += XXH3_mix2Accs(acc + 2 * i, secret + 16 * i); +#if defined(__clang__) /* Clang */ \ + && (defined(__arm__) || defined(__thumb__)) /* ARMv7 */ \ + && (defined(__ARM_NEON) || defined(__ARM_NEON__)) /* NEON */ \ + && !defined(XXH_ENABLE_AUTOVECTORIZE) /* Define to disable */ + /* + * UGLY HACK: + * Prevent autovectorization on Clang ARMv7-a. Exact same problem as + * the one in XXH3_len_129to240_64b. Speeds up shorter keys > 240b. + * XXH3_64bits, len == 256, Snapdragon 835: + * without hack: 2063.7 MB/s + * with hack: 2560.7 MB/s + */ + XXH_COMPILER_GUARD(result64); #endif - } + } - return XXH3_avalanche(result64); + return XXH3_avalanche(result64); } /* do not align on 8, so that the secret is different from the accumulator */ #define XXH_SECRET_MERGEACCS_START 11 -static XXH_PUREF XXH64_hash_t -XXH3_finalizeLong_64b(const xxh_u64* XXH_RESTRICT acc, const xxh_u8* XXH_RESTRICT secret, xxh_u64 len) -{ - return XXH3_mergeAccs(acc, secret + XXH_SECRET_MERGEACCS_START, len * XXH_PRIME64_1); +static XXH_PUREF XXH64_hash_t XXH3_finalizeLong_64b(const xxh_u64 *XXH_RESTRICT acc, + const xxh_u8 *XXH_RESTRICT secret, + xxh_u64 len) { + return XXH3_mergeAccs(acc, secret + XXH_SECRET_MERGEACCS_START, len * XXH_PRIME64_1); } -#define XXH3_INIT_ACC { XXH_PRIME32_3, XXH_PRIME64_1, XXH_PRIME64_2, XXH_PRIME64_3, \ - XXH_PRIME64_4, XXH_PRIME32_2, XXH_PRIME64_5, XXH_PRIME32_1 } +#define XXH3_INIT_ACC \ + { \ + XXH_PRIME32_3, XXH_PRIME64_1, XXH_PRIME64_2, XXH_PRIME64_3, XXH_PRIME64_4, XXH_PRIME32_2, XXH_PRIME64_5, \ + XXH_PRIME32_1 \ + } -XXH_FORCE_INLINE XXH64_hash_t -XXH3_hashLong_64b_internal(const void* XXH_RESTRICT input, size_t len, - const void* XXH_RESTRICT secret, size_t secretSize, - XXH3_f_accumulate f_acc, - XXH3_f_scrambleAcc f_scramble) -{ - XXH_ALIGN(XXH_ACC_ALIGN) xxh_u64 acc[XXH_ACC_NB] = XXH3_INIT_ACC; +XXH_FORCE_INLINE XXH64_hash_t XXH3_hashLong_64b_internal(const void *XXH_RESTRICT input, + size_t len, + const void *XXH_RESTRICT secret, + size_t secretSize, + XXH3_f_accumulate f_acc, + XXH3_f_scrambleAcc f_scramble) { + XXH_ALIGN(XXH_ACC_ALIGN) xxh_u64 acc[XXH_ACC_NB] = XXH3_INIT_ACC; - XXH3_hashLong_internal_loop(acc, (const xxh_u8*)input, len, (const xxh_u8*)secret, secretSize, f_acc, f_scramble); + XXH3_hashLong_internal_loop(acc, (const xxh_u8 *)input, len, (const xxh_u8 *)secret, secretSize, f_acc, f_scramble); - /* converge into final hash */ - XXH_STATIC_ASSERT(sizeof(acc) == 64); - XXH_ASSERT(secretSize >= sizeof(acc) + XXH_SECRET_MERGEACCS_START); - return XXH3_finalizeLong_64b(acc, (const xxh_u8*)secret, (xxh_u64)len); + /* converge into final hash */ + XXH_STATIC_ASSERT(sizeof(acc) == 64); + XXH_ASSERT(secretSize >= sizeof(acc) + XXH_SECRET_MERGEACCS_START); + return XXH3_finalizeLong_64b(acc, (const xxh_u8 *)secret, (xxh_u64)len); } /* @@ -6087,12 +5949,13 @@ XXH3_hashLong_64b_internal(const void* XXH_RESTRICT input, size_t len, * When the secret size is unknown, or on GCC 12 where the mix of NO_INLINE and FORCE_INLINE * breaks -Og, this is XXH_NO_INLINE. */ -XXH3_WITH_SECRET_INLINE XXH64_hash_t -XXH3_hashLong_64b_withSecret(const void* XXH_RESTRICT input, size_t len, - XXH64_hash_t seed64, const xxh_u8* XXH_RESTRICT secret, size_t secretLen) -{ - (void)seed64; - return XXH3_hashLong_64b_internal(input, len, secret, secretLen, XXH3_accumulate, XXH3_scrambleAcc); +XXH3_WITH_SECRET_INLINE XXH64_hash_t XXH3_hashLong_64b_withSecret(const void *XXH_RESTRICT input, + size_t len, + XXH64_hash_t seed64, + const xxh_u8 *XXH_RESTRICT secret, + size_t secretLen) { + (void)seed64; + return XXH3_hashLong_64b_internal(input, len, secret, secretLen, XXH3_accumulate, XXH3_scrambleAcc); } /* @@ -6101,12 +5964,15 @@ XXH3_hashLong_64b_withSecret(const void* XXH_RESTRICT input, size_t len, * Note that inside this no_inline function, we do inline the internal loop, * and provide a statically defined secret size to allow optimization of vector loop. */ -XXH_NO_INLINE XXH_PUREF XXH64_hash_t -XXH3_hashLong_64b_default(const void* XXH_RESTRICT input, size_t len, - XXH64_hash_t seed64, const xxh_u8* XXH_RESTRICT secret, size_t secretLen) -{ - (void)seed64; (void)secret; (void)secretLen; - return XXH3_hashLong_64b_internal(input, len, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_accumulate, XXH3_scrambleAcc); +XXH_NO_INLINE XXH_PUREF XXH64_hash_t XXH3_hashLong_64b_default(const void *XXH_RESTRICT input, + size_t len, + XXH64_hash_t seed64, + const xxh_u8 *XXH_RESTRICT secret, + size_t secretLen) { + (void)seed64; + (void)secret; + (void)secretLen; + return XXH3_hashLong_64b_internal(input, len, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_accumulate, XXH3_scrambleAcc); } /* @@ -6120,96 +5986,90 @@ XXH3_hashLong_64b_default(const void* XXH_RESTRICT input, size_t len, * It's important for performance that XXH3_hashLong is not inlined. Not sure * why (uop cache maybe?), but the difference is large and easily measurable. */ -XXH_FORCE_INLINE XXH64_hash_t -XXH3_hashLong_64b_withSeed_internal(const void* input, size_t len, - XXH64_hash_t seed, - XXH3_f_accumulate f_acc, - XXH3_f_scrambleAcc f_scramble, - XXH3_f_initCustomSecret f_initSec) -{ +XXH_FORCE_INLINE XXH64_hash_t XXH3_hashLong_64b_withSeed_internal(const void *input, + size_t len, + XXH64_hash_t seed, + XXH3_f_accumulate f_acc, + XXH3_f_scrambleAcc f_scramble, + XXH3_f_initCustomSecret f_initSec) { #if XXH_SIZE_OPT <= 0 - if (seed == 0) - return XXH3_hashLong_64b_internal(input, len, - XXH3_kSecret, sizeof(XXH3_kSecret), - f_acc, f_scramble); -#endif - { XXH_ALIGN(XXH_SEC_ALIGN) xxh_u8 secret[XXH_SECRET_DEFAULT_SIZE]; - f_initSec(secret, seed); - return XXH3_hashLong_64b_internal(input, len, secret, sizeof(secret), - f_acc, f_scramble); - } + if (seed == 0) + return XXH3_hashLong_64b_internal(input, len, XXH3_kSecret, sizeof(XXH3_kSecret), f_acc, f_scramble); +#endif + { + XXH_ALIGN(XXH_SEC_ALIGN) xxh_u8 secret[XXH_SECRET_DEFAULT_SIZE]; + f_initSec(secret, seed); + return XXH3_hashLong_64b_internal(input, len, secret, sizeof(secret), f_acc, f_scramble); + } } /* * It's important for performance that XXH3_hashLong is not inlined. */ -XXH_NO_INLINE XXH64_hash_t -XXH3_hashLong_64b_withSeed(const void* XXH_RESTRICT input, size_t len, - XXH64_hash_t seed, const xxh_u8* XXH_RESTRICT secret, size_t secretLen) -{ - (void)secret; (void)secretLen; - return XXH3_hashLong_64b_withSeed_internal(input, len, seed, - XXH3_accumulate, XXH3_scrambleAcc, XXH3_initCustomSecret); -} - - -typedef XXH64_hash_t (*XXH3_hashLong64_f)(const void* XXH_RESTRICT, size_t, - XXH64_hash_t, const xxh_u8* XXH_RESTRICT, size_t); - -XXH_FORCE_INLINE XXH64_hash_t -XXH3_64bits_internal(const void* XXH_RESTRICT input, size_t len, - XXH64_hash_t seed64, const void* XXH_RESTRICT secret, size_t secretLen, - XXH3_hashLong64_f f_hashLong) -{ - XXH_ASSERT(secretLen >= XXH3_SECRET_SIZE_MIN); - /* - * If an action is to be taken if `secretLen` condition is not respected, - * it should be done here. - * For now, it's a contract pre-condition. - * Adding a check and a branch here would cost performance at every hash. - * Also, note that function signature doesn't offer room to return an error. - */ - if (len <= 16) - return XXH3_len_0to16_64b((const xxh_u8*)input, len, (const xxh_u8*)secret, seed64); - if (len <= 128) - return XXH3_len_17to128_64b((const xxh_u8*)input, len, (const xxh_u8*)secret, secretLen, seed64); - if (len <= XXH3_MIDSIZE_MAX) - return XXH3_len_129to240_64b((const xxh_u8*)input, len, (const xxh_u8*)secret, secretLen, seed64); - return f_hashLong(input, len, seed64, (const xxh_u8*)secret, secretLen); +XXH_NO_INLINE XXH64_hash_t XXH3_hashLong_64b_withSeed( + const void *XXH_RESTRICT input, size_t len, XXH64_hash_t seed, const xxh_u8 *XXH_RESTRICT secret, size_t secretLen) { + (void)secret; + (void)secretLen; + return XXH3_hashLong_64b_withSeed_internal( + input, len, seed, XXH3_accumulate, XXH3_scrambleAcc, XXH3_initCustomSecret); +} + +typedef XXH64_hash_t (*XXH3_hashLong64_f)( + const void *XXH_RESTRICT, size_t, XXH64_hash_t, const xxh_u8 *XXH_RESTRICT, size_t); + +XXH_FORCE_INLINE XXH64_hash_t XXH3_64bits_internal(const void *XXH_RESTRICT input, + size_t len, + XXH64_hash_t seed64, + const void *XXH_RESTRICT secret, + size_t secretLen, + XXH3_hashLong64_f f_hashLong) { + XXH_ASSERT(secretLen >= XXH3_SECRET_SIZE_MIN); + /* + * If an action is to be taken if `secretLen` condition is not respected, + * it should be done here. + * For now, it's a contract pre-condition. + * Adding a check and a branch here would cost performance at every hash. + * Also, note that function signature doesn't offer room to return an error. + */ + if (len <= 16) + return XXH3_len_0to16_64b((const xxh_u8 *)input, len, (const xxh_u8 *)secret, seed64); + if (len <= 128) + return XXH3_len_17to128_64b((const xxh_u8 *)input, len, (const xxh_u8 *)secret, secretLen, seed64); + if (len <= XXH3_MIDSIZE_MAX) + return XXH3_len_129to240_64b((const xxh_u8 *)input, len, (const xxh_u8 *)secret, secretLen, seed64); + return f_hashLong(input, len, seed64, (const xxh_u8 *)secret, secretLen); } - /* === Public entry point === */ /*! @ingroup XXH3_family */ -XXH_PUBLIC_API XXH64_hash_t XXH3_64bits(XXH_NOESCAPE const void* input, size_t length) -{ - return XXH3_64bits_internal(input, length, 0, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_hashLong_64b_default); +XXH_PUBLIC_API XXH64_hash_t XXH3_64bits(XXH_NOESCAPE const void *input, size_t length) { + return XXH3_64bits_internal(input, length, 0, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_hashLong_64b_default); } /*! @ingroup XXH3_family */ -XXH_PUBLIC_API XXH64_hash_t -XXH3_64bits_withSecret(XXH_NOESCAPE const void* input, size_t length, XXH_NOESCAPE const void* secret, size_t secretSize) -{ - return XXH3_64bits_internal(input, length, 0, secret, secretSize, XXH3_hashLong_64b_withSecret); +XXH_PUBLIC_API XXH64_hash_t XXH3_64bits_withSecret(XXH_NOESCAPE const void *input, + size_t length, + XXH_NOESCAPE const void *secret, + size_t secretSize) { + return XXH3_64bits_internal(input, length, 0, secret, secretSize, XXH3_hashLong_64b_withSecret); } /*! @ingroup XXH3_family */ -XXH_PUBLIC_API XXH64_hash_t -XXH3_64bits_withSeed(XXH_NOESCAPE const void* input, size_t length, XXH64_hash_t seed) -{ - return XXH3_64bits_internal(input, length, seed, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_hashLong_64b_withSeed); +XXH_PUBLIC_API XXH64_hash_t XXH3_64bits_withSeed(XXH_NOESCAPE const void *input, size_t length, XXH64_hash_t seed) { + return XXH3_64bits_internal(input, length, seed, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_hashLong_64b_withSeed); } -XXH_PUBLIC_API XXH64_hash_t -XXH3_64bits_withSecretandSeed(XXH_NOESCAPE const void* input, size_t length, XXH_NOESCAPE const void* secret, size_t secretSize, XXH64_hash_t seed) -{ - if (length <= XXH3_MIDSIZE_MAX) - return XXH3_64bits_internal(input, length, seed, XXH3_kSecret, sizeof(XXH3_kSecret), NULL); - return XXH3_hashLong_64b_withSecret(input, length, seed, (const xxh_u8*)secret, secretSize); +XXH_PUBLIC_API XXH64_hash_t XXH3_64bits_withSecretandSeed(XXH_NOESCAPE const void *input, + size_t length, + XXH_NOESCAPE const void *secret, + size_t secretSize, + XXH64_hash_t seed) { + if (length <= XXH3_MIDSIZE_MAX) + return XXH3_64bits_internal(input, length, seed, XXH3_kSecret, sizeof(XXH3_kSecret), NULL); + return XXH3_hashLong_64b_withSecret(input, length, seed, (const xxh_u8 *)secret, secretSize); } - /* === XXH3 streaming === */ #ifndef XXH_NO_STREAM /* @@ -6235,47 +6095,45 @@ XXH3_64bits_withSecretandSeed(XXH_NOESCAPE const void* input, size_t length, XXH * * Align must be a power of 2 and 8 <= align <= 128. */ -static XXH_MALLOCF void* XXH_alignedMalloc(size_t s, size_t align) -{ - XXH_ASSERT(align <= 128 && align >= 8); /* range check */ - XXH_ASSERT((align & (align-1)) == 0); /* power of 2 */ - XXH_ASSERT(s != 0 && s < (s + align)); /* empty/overflow */ - { /* Overallocate to make room for manual realignment and an offset byte */ - xxh_u8* base = (xxh_u8*)XXH_malloc(s + align); - if (base != NULL) { - /* - * Get the offset needed to align this pointer. - * - * Even if the returned pointer is aligned, there will always be - * at least one byte to store the offset to the original pointer. - */ - size_t offset = align - ((size_t)base & (align - 1)); /* base % align */ - /* Add the offset for the now-aligned pointer */ - xxh_u8* ptr = base + offset; - - XXH_ASSERT((size_t)ptr % align == 0); - - /* Store the offset immediately before the returned pointer. */ - ptr[-1] = (xxh_u8)offset; - return ptr; - } - return NULL; +static XXH_MALLOCF void *XXH_alignedMalloc(size_t s, size_t align) { + XXH_ASSERT(align <= 128 && align >= 8); /* range check */ + XXH_ASSERT((align & (align - 1)) == 0); /* power of 2 */ + XXH_ASSERT(s != 0 && s < (s + align)); /* empty/overflow */ + { /* Overallocate to make room for manual realignment and an offset byte */ + xxh_u8 *base = (xxh_u8 *)XXH_malloc(s + align); + if (base != NULL) { + /* + * Get the offset needed to align this pointer. + * + * Even if the returned pointer is aligned, there will always be + * at least one byte to store the offset to the original pointer. + */ + size_t offset = align - ((size_t)base & (align - 1)); /* base % align */ + /* Add the offset for the now-aligned pointer */ + xxh_u8 *ptr = base + offset; + + XXH_ASSERT((size_t)ptr % align == 0); + + /* Store the offset immediately before the returned pointer. */ + ptr[-1] = (xxh_u8)offset; + return ptr; } + return NULL; + } } /* * Frees an aligned pointer allocated by XXH_alignedMalloc(). Don't pass * normal malloc'd pointers, XXH_alignedMalloc has a specific data layout. */ -static void XXH_alignedFree(void* p) -{ - if (p != NULL) { - xxh_u8* ptr = (xxh_u8*)p; - /* Get the offset byte we added in XXH_malloc. */ - xxh_u8 offset = ptr[-1]; - /* Free the original malloc'd pointer */ - xxh_u8* base = ptr - offset; - XXH_free(base); - } +static void XXH_alignedFree(void *p) { + if (p != NULL) { + xxh_u8 *ptr = (xxh_u8 *)p; + /* Get the offset byte we added in XXH_malloc. */ + xxh_u8 offset = ptr[-1]; + /* Free the original malloc'd pointer */ + xxh_u8 *base = ptr - offset; + XXH_free(base); + } } /*! @ingroup XXH3_family */ /*! @@ -6288,12 +6146,12 @@ static void XXH_alignedFree(void* p) * * @see @ref streaming_example "Streaming Example" */ -XXH_PUBLIC_API XXH3_state_t* XXH3_createState(void) -{ - XXH3_state_t* const state = (XXH3_state_t*)XXH_alignedMalloc(sizeof(XXH3_state_t), 64); - if (state==NULL) return NULL; - XXH3_INITSTATE(state); - return state; +XXH_PUBLIC_API XXH3_state_t *XXH3_createState(void) { + XXH3_state_t *const state = (XXH3_state_t *)XXH_alignedMalloc(sizeof(XXH3_state_t), 64); + if (state == NULL) + return NULL; + XXH3_INITSTATE(state); + return state; } /*! @ingroup XXH3_family */ @@ -6308,88 +6166,87 @@ XXH_PUBLIC_API XXH3_state_t* XXH3_createState(void) * * @see @ref streaming_example "Streaming Example" */ -XXH_PUBLIC_API XXH_errorcode XXH3_freeState(XXH3_state_t* statePtr) -{ - XXH_alignedFree(statePtr); - return XXH_OK; +XXH_PUBLIC_API XXH_errorcode XXH3_freeState(XXH3_state_t *statePtr) { + XXH_alignedFree(statePtr); + return XXH_OK; } /*! @ingroup XXH3_family */ -XXH_PUBLIC_API void -XXH3_copyState(XXH_NOESCAPE XXH3_state_t* dst_state, XXH_NOESCAPE const XXH3_state_t* src_state) -{ - XXH_memcpy(dst_state, src_state, sizeof(*dst_state)); -} - -static void -XXH3_reset_internal(XXH3_state_t* statePtr, - XXH64_hash_t seed, - const void* secret, size_t secretSize) -{ - size_t const initStart = offsetof(XXH3_state_t, bufferedSize); - size_t const initLength = offsetof(XXH3_state_t, nbStripesPerBlock) - initStart; - XXH_ASSERT(offsetof(XXH3_state_t, nbStripesPerBlock) > initStart); - XXH_ASSERT(statePtr != NULL); - /* set members from bufferedSize to nbStripesPerBlock (excluded) to 0 */ - memset((char*)statePtr + initStart, 0, initLength); - statePtr->acc[0] = XXH_PRIME32_3; - statePtr->acc[1] = XXH_PRIME64_1; - statePtr->acc[2] = XXH_PRIME64_2; - statePtr->acc[3] = XXH_PRIME64_3; - statePtr->acc[4] = XXH_PRIME64_4; - statePtr->acc[5] = XXH_PRIME32_2; - statePtr->acc[6] = XXH_PRIME64_5; - statePtr->acc[7] = XXH_PRIME32_1; - statePtr->seed = seed; - statePtr->useSeed = (seed != 0); - statePtr->extSecret = (const unsigned char*)secret; - XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN); - statePtr->secretLimit = secretSize - XXH_STRIPE_LEN; - statePtr->nbStripesPerBlock = statePtr->secretLimit / XXH_SECRET_CONSUME_RATE; +XXH_PUBLIC_API void XXH3_copyState(XXH_NOESCAPE XXH3_state_t *dst_state, XXH_NOESCAPE const XXH3_state_t *src_state) { + XXH_memcpy(dst_state, src_state, sizeof(*dst_state)); +} + +static void XXH3_reset_internal(XXH3_state_t *statePtr, XXH64_hash_t seed, const void *secret, size_t secretSize) { + size_t const initStart = offsetof(XXH3_state_t, bufferedSize); + size_t const initLength = offsetof(XXH3_state_t, nbStripesPerBlock) - initStart; + XXH_ASSERT(offsetof(XXH3_state_t, nbStripesPerBlock) > initStart); + XXH_ASSERT(statePtr != NULL); + /* set members from bufferedSize to nbStripesPerBlock (excluded) to 0 */ + memset((char *)statePtr + initStart, 0, initLength); + statePtr->acc[0] = XXH_PRIME32_3; + statePtr->acc[1] = XXH_PRIME64_1; + statePtr->acc[2] = XXH_PRIME64_2; + statePtr->acc[3] = XXH_PRIME64_3; + statePtr->acc[4] = XXH_PRIME64_4; + statePtr->acc[5] = XXH_PRIME32_2; + statePtr->acc[6] = XXH_PRIME64_5; + statePtr->acc[7] = XXH_PRIME32_1; + statePtr->seed = seed; + statePtr->useSeed = (seed != 0); + statePtr->extSecret = (const unsigned char *)secret; + XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN); + statePtr->secretLimit = secretSize - XXH_STRIPE_LEN; + statePtr->nbStripesPerBlock = statePtr->secretLimit / XXH_SECRET_CONSUME_RATE; } /*! @ingroup XXH3_family */ -XXH_PUBLIC_API XXH_errorcode -XXH3_64bits_reset(XXH_NOESCAPE XXH3_state_t* statePtr) -{ - if (statePtr == NULL) return XXH_ERROR; - XXH3_reset_internal(statePtr, 0, XXH3_kSecret, XXH_SECRET_DEFAULT_SIZE); - return XXH_OK; +XXH_PUBLIC_API XXH_errorcode XXH3_64bits_reset(XXH_NOESCAPE XXH3_state_t *statePtr) { + if (statePtr == NULL) + return XXH_ERROR; + XXH3_reset_internal(statePtr, 0, XXH3_kSecret, XXH_SECRET_DEFAULT_SIZE); + return XXH_OK; } /*! @ingroup XXH3_family */ -XXH_PUBLIC_API XXH_errorcode -XXH3_64bits_reset_withSecret(XXH_NOESCAPE XXH3_state_t* statePtr, XXH_NOESCAPE const void* secret, size_t secretSize) -{ - if (statePtr == NULL) return XXH_ERROR; - XXH3_reset_internal(statePtr, 0, secret, secretSize); - if (secret == NULL) return XXH_ERROR; - if (secretSize < XXH3_SECRET_SIZE_MIN) return XXH_ERROR; - return XXH_OK; +XXH_PUBLIC_API XXH_errorcode XXH3_64bits_reset_withSecret(XXH_NOESCAPE XXH3_state_t *statePtr, + XXH_NOESCAPE const void *secret, + size_t secretSize) { + if (statePtr == NULL) + return XXH_ERROR; + XXH3_reset_internal(statePtr, 0, secret, secretSize); + if (secret == NULL) + return XXH_ERROR; + if (secretSize < XXH3_SECRET_SIZE_MIN) + return XXH_ERROR; + return XXH_OK; } /*! @ingroup XXH3_family */ -XXH_PUBLIC_API XXH_errorcode -XXH3_64bits_reset_withSeed(XXH_NOESCAPE XXH3_state_t* statePtr, XXH64_hash_t seed) -{ - if (statePtr == NULL) return XXH_ERROR; - if (seed==0) return XXH3_64bits_reset(statePtr); - if ((seed != statePtr->seed) || (statePtr->extSecret != NULL)) - XXH3_initCustomSecret(statePtr->customSecret, seed); - XXH3_reset_internal(statePtr, seed, NULL, XXH_SECRET_DEFAULT_SIZE); - return XXH_OK; +XXH_PUBLIC_API XXH_errorcode XXH3_64bits_reset_withSeed(XXH_NOESCAPE XXH3_state_t *statePtr, XXH64_hash_t seed) { + if (statePtr == NULL) + return XXH_ERROR; + if (seed == 0) + return XXH3_64bits_reset(statePtr); + if ((seed != statePtr->seed) || (statePtr->extSecret != NULL)) + XXH3_initCustomSecret(statePtr->customSecret, seed); + XXH3_reset_internal(statePtr, seed, NULL, XXH_SECRET_DEFAULT_SIZE); + return XXH_OK; } /*! @ingroup XXH3_family */ -XXH_PUBLIC_API XXH_errorcode -XXH3_64bits_reset_withSecretandSeed(XXH_NOESCAPE XXH3_state_t* statePtr, XXH_NOESCAPE const void* secret, size_t secretSize, XXH64_hash_t seed64) -{ - if (statePtr == NULL) return XXH_ERROR; - if (secret == NULL) return XXH_ERROR; - if (secretSize < XXH3_SECRET_SIZE_MIN) return XXH_ERROR; - XXH3_reset_internal(statePtr, seed64, secret, secretSize); - statePtr->useSeed = 1; /* always, even if seed64==0 */ - return XXH_OK; +XXH_PUBLIC_API XXH_errorcode XXH3_64bits_reset_withSecretandSeed(XXH_NOESCAPE XXH3_state_t *statePtr, + XXH_NOESCAPE const void *secret, + size_t secretSize, + XXH64_hash_t seed64) { + if (statePtr == NULL) + return XXH_ERROR; + if (secret == NULL) + return XXH_ERROR; + if (secretSize < XXH3_SECRET_SIZE_MIN) + return XXH_ERROR; + XXH3_reset_internal(statePtr, seed64, secret, secretSize); + statePtr->useSeed = 1; /* always, even if seed64==0 */ + return XXH_OK; } /*! @@ -6409,193 +6266,194 @@ XXH3_64bits_reset_withSecretandSeed(XXH_NOESCAPE XXH3_state_t* statePtr, XXH_NOE * @param f_scramble Pointer to an XXH3_scrambleAcc implementation * @return Pointer past the end of @p input after processing */ -XXH_FORCE_INLINE const xxh_u8 * -XXH3_consumeStripes(xxh_u64* XXH_RESTRICT acc, - size_t* XXH_RESTRICT nbStripesSoFarPtr, size_t nbStripesPerBlock, - const xxh_u8* XXH_RESTRICT input, size_t nbStripes, - const xxh_u8* XXH_RESTRICT secret, size_t secretLimit, - XXH3_f_accumulate f_acc, - XXH3_f_scrambleAcc f_scramble) -{ - const xxh_u8* initialSecret = secret + *nbStripesSoFarPtr * XXH_SECRET_CONSUME_RATE; - /* Process full blocks */ - if (nbStripes >= (nbStripesPerBlock - *nbStripesSoFarPtr)) { - /* Process the initial partial block... */ - size_t nbStripesThisIter = nbStripesPerBlock - *nbStripesSoFarPtr; - - do { - /* Accumulate and scramble */ - f_acc(acc, input, initialSecret, nbStripesThisIter); - f_scramble(acc, secret + secretLimit); - input += nbStripesThisIter * XXH_STRIPE_LEN; - nbStripes -= nbStripesThisIter; - /* Then continue the loop with the full block size */ - nbStripesThisIter = nbStripesPerBlock; - initialSecret = secret; - } while (nbStripes >= nbStripesPerBlock); - *nbStripesSoFarPtr = 0; - } - /* Process a partial block */ - if (nbStripes > 0) { - f_acc(acc, input, initialSecret, nbStripes); - input += nbStripes * XXH_STRIPE_LEN; - *nbStripesSoFarPtr += nbStripes; - } - /* Return end pointer */ - return input; +XXH_FORCE_INLINE const xxh_u8 *XXH3_consumeStripes(xxh_u64 *XXH_RESTRICT acc, + size_t *XXH_RESTRICT nbStripesSoFarPtr, + size_t nbStripesPerBlock, + const xxh_u8 *XXH_RESTRICT input, + size_t nbStripes, + const xxh_u8 *XXH_RESTRICT secret, + size_t secretLimit, + XXH3_f_accumulate f_acc, + XXH3_f_scrambleAcc f_scramble) { + const xxh_u8 *initialSecret = secret + *nbStripesSoFarPtr * XXH_SECRET_CONSUME_RATE; + /* Process full blocks */ + if (nbStripes >= (nbStripesPerBlock - *nbStripesSoFarPtr)) { + /* Process the initial partial block... */ + size_t nbStripesThisIter = nbStripesPerBlock - *nbStripesSoFarPtr; + + do { + /* Accumulate and scramble */ + f_acc(acc, input, initialSecret, nbStripesThisIter); + f_scramble(acc, secret + secretLimit); + input += nbStripesThisIter * XXH_STRIPE_LEN; + nbStripes -= nbStripesThisIter; + /* Then continue the loop with the full block size */ + nbStripesThisIter = nbStripesPerBlock; + initialSecret = secret; + } while (nbStripes >= nbStripesPerBlock); + *nbStripesSoFarPtr = 0; + } + /* Process a partial block */ + if (nbStripes > 0) { + f_acc(acc, input, initialSecret, nbStripes); + input += nbStripes * XXH_STRIPE_LEN; + *nbStripesSoFarPtr += nbStripes; + } + /* Return end pointer */ + return input; } #ifndef XXH3_STREAM_USE_STACK -# if XXH_SIZE_OPT <= 0 && !defined(__clang__) /* clang doesn't need additional stack space */ -# define XXH3_STREAM_USE_STACK 1 -# endif +#if XXH_SIZE_OPT <= 0 && !defined(__clang__) /* clang doesn't need additional stack space */ +#define XXH3_STREAM_USE_STACK 1 +#endif #endif /* * Both XXH3_64bits_update and XXH3_128bits_update use this routine. */ -XXH_FORCE_INLINE XXH_errorcode -XXH3_update(XXH3_state_t* XXH_RESTRICT const state, - const xxh_u8* XXH_RESTRICT input, size_t len, - XXH3_f_accumulate f_acc, - XXH3_f_scrambleAcc f_scramble) -{ - if (input==NULL) { - XXH_ASSERT(len == 0); - return XXH_OK; - } +XXH_FORCE_INLINE XXH_errorcode XXH3_update(XXH3_state_t *XXH_RESTRICT const state, + const xxh_u8 *XXH_RESTRICT input, + size_t len, + XXH3_f_accumulate f_acc, + XXH3_f_scrambleAcc f_scramble) { + if (input == NULL) { + XXH_ASSERT(len == 0); + return XXH_OK; + } - XXH_ASSERT(state != NULL); - { const xxh_u8* const bEnd = input + len; - const unsigned char* const secret = (state->extSecret == NULL) ? state->customSecret : state->extSecret; + XXH_ASSERT(state != NULL); + { + const xxh_u8 *const bEnd = input + len; + const unsigned char *const secret = (state->extSecret == NULL) ? state->customSecret : state->extSecret; #if defined(XXH3_STREAM_USE_STACK) && XXH3_STREAM_USE_STACK >= 1 - /* For some reason, gcc and MSVC seem to suffer greatly - * when operating accumulators directly into state. - * Operating into stack space seems to enable proper optimization. - * clang, on the other hand, doesn't seem to need this trick */ - XXH_ALIGN(XXH_ACC_ALIGN) xxh_u64 acc[8]; - XXH_memcpy(acc, state->acc, sizeof(acc)); + /* For some reason, gcc and MSVC seem to suffer greatly + * when operating accumulators directly into state. + * Operating into stack space seems to enable proper optimization. + * clang, on the other hand, doesn't seem to need this trick */ + XXH_ALIGN(XXH_ACC_ALIGN) xxh_u64 acc[8]; + XXH_memcpy(acc, state->acc, sizeof(acc)); #else - xxh_u64* XXH_RESTRICT const acc = state->acc; + xxh_u64 *XXH_RESTRICT const acc = state->acc; #endif - state->totalLen += len; - XXH_ASSERT(state->bufferedSize <= XXH3_INTERNALBUFFER_SIZE); + state->totalLen += len; + XXH_ASSERT(state->bufferedSize <= XXH3_INTERNALBUFFER_SIZE); + + /* small input : just fill in tmp buffer */ + if (len <= XXH3_INTERNALBUFFER_SIZE - state->bufferedSize) { + XXH_memcpy(state->buffer + state->bufferedSize, input, len); + state->bufferedSize += (XXH32_hash_t)len; + return XXH_OK; + } - /* small input : just fill in tmp buffer */ - if (len <= XXH3_INTERNALBUFFER_SIZE - state->bufferedSize) { - XXH_memcpy(state->buffer + state->bufferedSize, input, len); - state->bufferedSize += (XXH32_hash_t)len; - return XXH_OK; - } +/* total input is now > XXH3_INTERNALBUFFER_SIZE */ +#define XXH3_INTERNALBUFFER_STRIPES (XXH3_INTERNALBUFFER_SIZE / XXH_STRIPE_LEN) + XXH_STATIC_ASSERT(XXH3_INTERNALBUFFER_SIZE % XXH_STRIPE_LEN == 0); /* clean multiple */ - /* total input is now > XXH3_INTERNALBUFFER_SIZE */ - #define XXH3_INTERNALBUFFER_STRIPES (XXH3_INTERNALBUFFER_SIZE / XXH_STRIPE_LEN) - XXH_STATIC_ASSERT(XXH3_INTERNALBUFFER_SIZE % XXH_STRIPE_LEN == 0); /* clean multiple */ - - /* - * Internal buffer is partially filled (always, except at beginning) - * Complete it, then consume it. - */ - if (state->bufferedSize) { - size_t const loadSize = XXH3_INTERNALBUFFER_SIZE - state->bufferedSize; - XXH_memcpy(state->buffer + state->bufferedSize, input, loadSize); - input += loadSize; - XXH3_consumeStripes(acc, - &state->nbStripesSoFar, state->nbStripesPerBlock, - state->buffer, XXH3_INTERNALBUFFER_STRIPES, - secret, state->secretLimit, - f_acc, f_scramble); - state->bufferedSize = 0; - } - XXH_ASSERT(input < bEnd); - if (bEnd - input > XXH3_INTERNALBUFFER_SIZE) { - size_t nbStripes = (size_t)(bEnd - 1 - input) / XXH_STRIPE_LEN; - input = XXH3_consumeStripes(acc, - &state->nbStripesSoFar, state->nbStripesPerBlock, - input, nbStripes, - secret, state->secretLimit, - f_acc, f_scramble); - XXH_memcpy(state->buffer + sizeof(state->buffer) - XXH_STRIPE_LEN, input - XXH_STRIPE_LEN, XXH_STRIPE_LEN); - - } - /* Some remaining input (always) : buffer it */ - XXH_ASSERT(input < bEnd); - XXH_ASSERT(bEnd - input <= XXH3_INTERNALBUFFER_SIZE); - XXH_ASSERT(state->bufferedSize == 0); - XXH_memcpy(state->buffer, input, (size_t)(bEnd-input)); - state->bufferedSize = (XXH32_hash_t)(bEnd-input); + /* + * Internal buffer is partially filled (always, except at beginning) + * Complete it, then consume it. + */ + if (state->bufferedSize) { + size_t const loadSize = XXH3_INTERNALBUFFER_SIZE - state->bufferedSize; + XXH_memcpy(state->buffer + state->bufferedSize, input, loadSize); + input += loadSize; + XXH3_consumeStripes(acc, + &state->nbStripesSoFar, + state->nbStripesPerBlock, + state->buffer, + XXH3_INTERNALBUFFER_STRIPES, + secret, + state->secretLimit, + f_acc, + f_scramble); + state->bufferedSize = 0; + } + XXH_ASSERT(input < bEnd); + if (bEnd - input > XXH3_INTERNALBUFFER_SIZE) { + size_t nbStripes = (size_t)(bEnd - 1 - input) / XXH_STRIPE_LEN; + input = XXH3_consumeStripes(acc, + &state->nbStripesSoFar, + state->nbStripesPerBlock, + input, + nbStripes, + secret, + state->secretLimit, + f_acc, + f_scramble); + XXH_memcpy(state->buffer + sizeof(state->buffer) - XXH_STRIPE_LEN, input - XXH_STRIPE_LEN, XXH_STRIPE_LEN); + } + /* Some remaining input (always) : buffer it */ + XXH_ASSERT(input < bEnd); + XXH_ASSERT(bEnd - input <= XXH3_INTERNALBUFFER_SIZE); + XXH_ASSERT(state->bufferedSize == 0); + XXH_memcpy(state->buffer, input, (size_t)(bEnd - input)); + state->bufferedSize = (XXH32_hash_t)(bEnd - input); #if defined(XXH3_STREAM_USE_STACK) && XXH3_STREAM_USE_STACK >= 1 - /* save stack accumulators into state */ - XXH_memcpy(state->acc, acc, sizeof(acc)); + /* save stack accumulators into state */ + XXH_memcpy(state->acc, acc, sizeof(acc)); #endif - } + } - return XXH_OK; + return XXH_OK; } /*! @ingroup XXH3_family */ -XXH_PUBLIC_API XXH_errorcode -XXH3_64bits_update(XXH_NOESCAPE XXH3_state_t* state, XXH_NOESCAPE const void* input, size_t len) -{ - return XXH3_update(state, (const xxh_u8*)input, len, - XXH3_accumulate, XXH3_scrambleAcc); -} - - -XXH_FORCE_INLINE void -XXH3_digest_long (XXH64_hash_t* acc, - const XXH3_state_t* state, - const unsigned char* secret) -{ - xxh_u8 lastStripe[XXH_STRIPE_LEN]; - const xxh_u8* lastStripePtr; - - /* - * Digest on a local copy. This way, the state remains unaltered, and it can - * continue ingesting more input afterwards. - */ - XXH_memcpy(acc, state->acc, sizeof(state->acc)); - if (state->bufferedSize >= XXH_STRIPE_LEN) { - /* Consume remaining stripes then point to remaining data in buffer */ - size_t const nbStripes = (state->bufferedSize - 1) / XXH_STRIPE_LEN; - size_t nbStripesSoFar = state->nbStripesSoFar; - XXH3_consumeStripes(acc, - &nbStripesSoFar, state->nbStripesPerBlock, - state->buffer, nbStripes, - secret, state->secretLimit, - XXH3_accumulate, XXH3_scrambleAcc); - lastStripePtr = state->buffer + state->bufferedSize - XXH_STRIPE_LEN; - } else { /* bufferedSize < XXH_STRIPE_LEN */ - /* Copy to temp buffer */ - size_t const catchupSize = XXH_STRIPE_LEN - state->bufferedSize; - XXH_ASSERT(state->bufferedSize > 0); /* there is always some input buffered */ - XXH_memcpy(lastStripe, state->buffer + sizeof(state->buffer) - catchupSize, catchupSize); - XXH_memcpy(lastStripe + catchupSize, state->buffer, state->bufferedSize); - lastStripePtr = lastStripe; - } - /* Last stripe */ - XXH3_accumulate_512(acc, - lastStripePtr, - secret + state->secretLimit - XXH_SECRET_LASTACC_START); +XXH_PUBLIC_API XXH_errorcode XXH3_64bits_update(XXH_NOESCAPE XXH3_state_t *state, + XXH_NOESCAPE const void *input, + size_t len) { + return XXH3_update(state, (const xxh_u8 *)input, len, XXH3_accumulate, XXH3_scrambleAcc); +} + +XXH_FORCE_INLINE void XXH3_digest_long(XXH64_hash_t *acc, const XXH3_state_t *state, const unsigned char *secret) { + xxh_u8 lastStripe[XXH_STRIPE_LEN]; + const xxh_u8 *lastStripePtr; + + /* + * Digest on a local copy. This way, the state remains unaltered, and it can + * continue ingesting more input afterwards. + */ + XXH_memcpy(acc, state->acc, sizeof(state->acc)); + if (state->bufferedSize >= XXH_STRIPE_LEN) { + /* Consume remaining stripes then point to remaining data in buffer */ + size_t const nbStripes = (state->bufferedSize - 1) / XXH_STRIPE_LEN; + size_t nbStripesSoFar = state->nbStripesSoFar; + XXH3_consumeStripes(acc, + &nbStripesSoFar, + state->nbStripesPerBlock, + state->buffer, + nbStripes, + secret, + state->secretLimit, + XXH3_accumulate, + XXH3_scrambleAcc); + lastStripePtr = state->buffer + state->bufferedSize - XXH_STRIPE_LEN; + } else { /* bufferedSize < XXH_STRIPE_LEN */ + /* Copy to temp buffer */ + size_t const catchupSize = XXH_STRIPE_LEN - state->bufferedSize; + XXH_ASSERT(state->bufferedSize > 0); /* there is always some input buffered */ + XXH_memcpy(lastStripe, state->buffer + sizeof(state->buffer) - catchupSize, catchupSize); + XXH_memcpy(lastStripe + catchupSize, state->buffer, state->bufferedSize); + lastStripePtr = lastStripe; + } + /* Last stripe */ + XXH3_accumulate_512(acc, lastStripePtr, secret + state->secretLimit - XXH_SECRET_LASTACC_START); } /*! @ingroup XXH3_family */ -XXH_PUBLIC_API XXH64_hash_t XXH3_64bits_digest (XXH_NOESCAPE const XXH3_state_t* state) -{ - const unsigned char* const secret = (state->extSecret == NULL) ? state->customSecret : state->extSecret; - if (state->totalLen > XXH3_MIDSIZE_MAX) { - XXH_ALIGN(XXH_ACC_ALIGN) XXH64_hash_t acc[XXH_ACC_NB]; - XXH3_digest_long(acc, state, secret); - return XXH3_finalizeLong_64b(acc, secret, (xxh_u64)state->totalLen); - } - /* totalLen <= XXH3_MIDSIZE_MAX: digesting a short input */ - if (state->useSeed) - return XXH3_64bits_withSeed(state->buffer, (size_t)state->totalLen, state->seed); - return XXH3_64bits_withSecret(state->buffer, (size_t)(state->totalLen), - secret, state->secretLimit + XXH_STRIPE_LEN); +XXH_PUBLIC_API XXH64_hash_t XXH3_64bits_digest(XXH_NOESCAPE const XXH3_state_t *state) { + const unsigned char *const secret = (state->extSecret == NULL) ? state->customSecret : state->extSecret; + if (state->totalLen > XXH3_MIDSIZE_MAX) { + XXH_ALIGN(XXH_ACC_ALIGN) XXH64_hash_t acc[XXH_ACC_NB]; + XXH3_digest_long(acc, state, secret); + return XXH3_finalizeLong_64b(acc, secret, (xxh_u64)state->totalLen); + } + /* totalLen <= XXH3_MIDSIZE_MAX: digesting a short input */ + if (state->useSeed) + return XXH3_64bits_withSeed(state->buffer, (size_t)state->totalLen, state->seed); + return XXH3_64bits_withSecret(state->buffer, (size_t)(state->totalLen), secret, state->secretLimit + XXH_STRIPE_LEN); } #endif /* !XXH_NO_STREAM */ - /* ========================================== * XXH3 128 bits (a.k.a XXH128) * ========================================== @@ -6613,308 +6471,310 @@ XXH_PUBLIC_API XXH64_hash_t XXH3_64bits_digest (XXH_NOESCAPE const XXH3_state_t* * fast for a _128-bit_ hash on 32-bit (it usually clears XXH64). */ -XXH_FORCE_INLINE XXH_PUREF XXH128_hash_t -XXH3_len_1to3_128b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed) -{ - /* A doubled version of 1to3_64b with different constants. */ - XXH_ASSERT(input != NULL); - XXH_ASSERT(1 <= len && len <= 3); - XXH_ASSERT(secret != NULL); +XXH_FORCE_INLINE XXH_PUREF XXH128_hash_t XXH3_len_1to3_128b(const xxh_u8 *input, + size_t len, + const xxh_u8 *secret, + XXH64_hash_t seed) { + /* A doubled version of 1to3_64b with different constants. */ + XXH_ASSERT(input != NULL); + XXH_ASSERT(1 <= len && len <= 3); + XXH_ASSERT(secret != NULL); + /* + * len = 1: combinedl = { input[0], 0x01, input[0], input[0] } + * len = 2: combinedl = { input[1], 0x02, input[0], input[1] } + * len = 3: combinedl = { input[2], 0x03, input[0], input[1] } + */ + { + xxh_u8 const c1 = input[0]; + xxh_u8 const c2 = input[len >> 1]; + xxh_u8 const c3 = input[len - 1]; + xxh_u32 const combinedl = ((xxh_u32)c1 << 16) | ((xxh_u32)c2 << 24) | ((xxh_u32)c3 << 0) | ((xxh_u32)len << 8); + xxh_u32 const combinedh = XXH_rotl32(XXH_swap32(combinedl), 13); + xxh_u64 const bitflipl = (XXH_readLE32(secret) ^ XXH_readLE32(secret + 4)) + seed; + xxh_u64 const bitfliph = (XXH_readLE32(secret + 8) ^ XXH_readLE32(secret + 12)) - seed; + xxh_u64 const keyed_lo = (xxh_u64)combinedl ^ bitflipl; + xxh_u64 const keyed_hi = (xxh_u64)combinedh ^ bitfliph; + XXH128_hash_t h128; + h128.low64 = XXH64_avalanche(keyed_lo); + h128.high64 = XXH64_avalanche(keyed_hi); + return h128; + } +} + +XXH_FORCE_INLINE XXH_PUREF XXH128_hash_t XXH3_len_4to8_128b(const xxh_u8 *input, + size_t len, + const xxh_u8 *secret, + XXH64_hash_t seed) { + XXH_ASSERT(input != NULL); + XXH_ASSERT(secret != NULL); + XXH_ASSERT(4 <= len && len <= 8); + seed ^= (xxh_u64)XXH_swap32((xxh_u32)seed) << 32; + { + xxh_u32 const input_lo = XXH_readLE32(input); + xxh_u32 const input_hi = XXH_readLE32(input + len - 4); + xxh_u64 const input_64 = input_lo + ((xxh_u64)input_hi << 32); + xxh_u64 const bitflip = (XXH_readLE64(secret + 16) ^ XXH_readLE64(secret + 24)) + seed; + xxh_u64 const keyed = input_64 ^ bitflip; + + /* Shift len to the left to ensure it is even, this avoids even multiplies. */ + XXH128_hash_t m128 = XXH_mult64to128(keyed, XXH_PRIME64_1 + (len << 2)); + + m128.high64 += (m128.low64 << 1); + m128.low64 ^= (m128.high64 >> 3); + + m128.low64 = XXH_xorshift64(m128.low64, 35); + m128.low64 *= PRIME_MX2; + m128.low64 = XXH_xorshift64(m128.low64, 28); + m128.high64 = XXH3_avalanche(m128.high64); + return m128; + } +} + +XXH_FORCE_INLINE XXH_PUREF XXH128_hash_t XXH3_len_9to16_128b(const xxh_u8 *input, + size_t len, + const xxh_u8 *secret, + XXH64_hash_t seed) { + XXH_ASSERT(input != NULL); + XXH_ASSERT(secret != NULL); + XXH_ASSERT(9 <= len && len <= 16); + { + xxh_u64 const bitflipl = (XXH_readLE64(secret + 32) ^ XXH_readLE64(secret + 40)) - seed; + xxh_u64 const bitfliph = (XXH_readLE64(secret + 48) ^ XXH_readLE64(secret + 56)) + seed; + xxh_u64 const input_lo = XXH_readLE64(input); + xxh_u64 input_hi = XXH_readLE64(input + len - 8); + XXH128_hash_t m128 = XXH_mult64to128(input_lo ^ input_hi ^ bitflipl, XXH_PRIME64_1); /* - * len = 1: combinedl = { input[0], 0x01, input[0], input[0] } - * len = 2: combinedl = { input[1], 0x02, input[0], input[1] } - * len = 3: combinedl = { input[2], 0x03, input[0], input[1] } + * Put len in the middle of m128 to ensure that the length gets mixed to + * both the low and high bits in the 128x64 multiply below. */ - { xxh_u8 const c1 = input[0]; - xxh_u8 const c2 = input[len >> 1]; - xxh_u8 const c3 = input[len - 1]; - xxh_u32 const combinedl = ((xxh_u32)c1 <<16) | ((xxh_u32)c2 << 24) - | ((xxh_u32)c3 << 0) | ((xxh_u32)len << 8); - xxh_u32 const combinedh = XXH_rotl32(XXH_swap32(combinedl), 13); - xxh_u64 const bitflipl = (XXH_readLE32(secret) ^ XXH_readLE32(secret+4)) + seed; - xxh_u64 const bitfliph = (XXH_readLE32(secret+8) ^ XXH_readLE32(secret+12)) - seed; - xxh_u64 const keyed_lo = (xxh_u64)combinedl ^ bitflipl; - xxh_u64 const keyed_hi = (xxh_u64)combinedh ^ bitfliph; - XXH128_hash_t h128; - h128.low64 = XXH64_avalanche(keyed_lo); - h128.high64 = XXH64_avalanche(keyed_hi); - return h128; - } -} - -XXH_FORCE_INLINE XXH_PUREF XXH128_hash_t -XXH3_len_4to8_128b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed) -{ - XXH_ASSERT(input != NULL); - XXH_ASSERT(secret != NULL); - XXH_ASSERT(4 <= len && len <= 8); - seed ^= (xxh_u64)XXH_swap32((xxh_u32)seed) << 32; - { xxh_u32 const input_lo = XXH_readLE32(input); - xxh_u32 const input_hi = XXH_readLE32(input + len - 4); - xxh_u64 const input_64 = input_lo + ((xxh_u64)input_hi << 32); - xxh_u64 const bitflip = (XXH_readLE64(secret+16) ^ XXH_readLE64(secret+24)) + seed; - xxh_u64 const keyed = input_64 ^ bitflip; - - /* Shift len to the left to ensure it is even, this avoids even multiplies. */ - XXH128_hash_t m128 = XXH_mult64to128(keyed, XXH_PRIME64_1 + (len << 2)); - - m128.high64 += (m128.low64 << 1); - m128.low64 ^= (m128.high64 >> 3); - - m128.low64 = XXH_xorshift64(m128.low64, 35); - m128.low64 *= PRIME_MX2; - m128.low64 = XXH_xorshift64(m128.low64, 28); - m128.high64 = XXH3_avalanche(m128.high64); - return m128; + m128.low64 += (xxh_u64)(len - 1) << 54; + input_hi ^= bitfliph; + /* + * Add the high 32 bits of input_hi to the high 32 bits of m128, then + * add the long product of the low 32 bits of input_hi and XXH_PRIME32_2 to + * the high 64 bits of m128. + * + * The best approach to this operation is different on 32-bit and 64-bit. + */ + if (sizeof(void *) < sizeof(xxh_u64)) { /* 32-bit */ + /* + * 32-bit optimized version, which is more readable. + * + * On 32-bit, it removes an ADC and delays a dependency between the two + * halves of m128.high64, but it generates an extra mask on 64-bit. + */ + m128.high64 += (input_hi & 0xFFFFFFFF00000000ULL) + XXH_mult32to64((xxh_u32)input_hi, XXH_PRIME32_2); + } else { + /* + * 64-bit optimized (albeit more confusing) version. + * + * Uses some properties of addition and multiplication to remove the mask: + * + * Let: + * a = input_hi.lo = (input_hi & 0x00000000FFFFFFFF) + * b = input_hi.hi = (input_hi & 0xFFFFFFFF00000000) + * c = XXH_PRIME32_2 + * + * a + (b * c) + * Inverse Property: x + y - x == y + * a + (b * (1 + c - 1)) + * Distributive Property: x * (y + z) == (x * y) + (x * z) + * a + (b * 1) + (b * (c - 1)) + * Identity Property: x * 1 == x + * a + b + (b * (c - 1)) + * + * Substitute a, b, and c: + * input_hi.hi + input_hi.lo + ((xxh_u64)input_hi.lo * (XXH_PRIME32_2 - 1)) + * + * Since input_hi.hi + input_hi.lo == input_hi, we get this: + * input_hi + ((xxh_u64)input_hi.lo * (XXH_PRIME32_2 - 1)) + */ + m128.high64 += input_hi + XXH_mult32to64((xxh_u32)input_hi, XXH_PRIME32_2 - 1); } -} - -XXH_FORCE_INLINE XXH_PUREF XXH128_hash_t -XXH3_len_9to16_128b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed) -{ - XXH_ASSERT(input != NULL); - XXH_ASSERT(secret != NULL); - XXH_ASSERT(9 <= len && len <= 16); - { xxh_u64 const bitflipl = (XXH_readLE64(secret+32) ^ XXH_readLE64(secret+40)) - seed; - xxh_u64 const bitfliph = (XXH_readLE64(secret+48) ^ XXH_readLE64(secret+56)) + seed; - xxh_u64 const input_lo = XXH_readLE64(input); - xxh_u64 input_hi = XXH_readLE64(input + len - 8); - XXH128_hash_t m128 = XXH_mult64to128(input_lo ^ input_hi ^ bitflipl, XXH_PRIME64_1); - /* - * Put len in the middle of m128 to ensure that the length gets mixed to - * both the low and high bits in the 128x64 multiply below. - */ - m128.low64 += (xxh_u64)(len - 1) << 54; - input_hi ^= bitfliph; - /* - * Add the high 32 bits of input_hi to the high 32 bits of m128, then - * add the long product of the low 32 bits of input_hi and XXH_PRIME32_2 to - * the high 64 bits of m128. - * - * The best approach to this operation is different on 32-bit and 64-bit. - */ - if (sizeof(void *) < sizeof(xxh_u64)) { /* 32-bit */ - /* - * 32-bit optimized version, which is more readable. - * - * On 32-bit, it removes an ADC and delays a dependency between the two - * halves of m128.high64, but it generates an extra mask on 64-bit. - */ - m128.high64 += (input_hi & 0xFFFFFFFF00000000ULL) + XXH_mult32to64((xxh_u32)input_hi, XXH_PRIME32_2); - } else { - /* - * 64-bit optimized (albeit more confusing) version. - * - * Uses some properties of addition and multiplication to remove the mask: - * - * Let: - * a = input_hi.lo = (input_hi & 0x00000000FFFFFFFF) - * b = input_hi.hi = (input_hi & 0xFFFFFFFF00000000) - * c = XXH_PRIME32_2 - * - * a + (b * c) - * Inverse Property: x + y - x == y - * a + (b * (1 + c - 1)) - * Distributive Property: x * (y + z) == (x * y) + (x * z) - * a + (b * 1) + (b * (c - 1)) - * Identity Property: x * 1 == x - * a + b + (b * (c - 1)) - * - * Substitute a, b, and c: - * input_hi.hi + input_hi.lo + ((xxh_u64)input_hi.lo * (XXH_PRIME32_2 - 1)) - * - * Since input_hi.hi + input_hi.lo == input_hi, we get this: - * input_hi + ((xxh_u64)input_hi.lo * (XXH_PRIME32_2 - 1)) - */ - m128.high64 += input_hi + XXH_mult32to64((xxh_u32)input_hi, XXH_PRIME32_2 - 1); - } - /* m128 ^= XXH_swap64(m128 >> 64); */ - m128.low64 ^= XXH_swap64(m128.high64); + /* m128 ^= XXH_swap64(m128 >> 64); */ + m128.low64 ^= XXH_swap64(m128.high64); - { /* 128x64 multiply: h128 = m128 * XXH_PRIME64_2; */ - XXH128_hash_t h128 = XXH_mult64to128(m128.low64, XXH_PRIME64_2); - h128.high64 += m128.high64 * XXH_PRIME64_2; + { /* 128x64 multiply: h128 = m128 * XXH_PRIME64_2; */ + XXH128_hash_t h128 = XXH_mult64to128(m128.low64, XXH_PRIME64_2); + h128.high64 += m128.high64 * XXH_PRIME64_2; - h128.low64 = XXH3_avalanche(h128.low64); - h128.high64 = XXH3_avalanche(h128.high64); - return h128; - } } + h128.low64 = XXH3_avalanche(h128.low64); + h128.high64 = XXH3_avalanche(h128.high64); + return h128; + } + } } /* * Assumption: `secret` size is >= XXH3_SECRET_SIZE_MIN */ -XXH_FORCE_INLINE XXH_PUREF XXH128_hash_t -XXH3_len_0to16_128b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed) -{ - XXH_ASSERT(len <= 16); - { if (len > 8) return XXH3_len_9to16_128b(input, len, secret, seed); - if (len >= 4) return XXH3_len_4to8_128b(input, len, secret, seed); - if (len) return XXH3_len_1to3_128b(input, len, secret, seed); - { XXH128_hash_t h128; - xxh_u64 const bitflipl = XXH_readLE64(secret+64) ^ XXH_readLE64(secret+72); - xxh_u64 const bitfliph = XXH_readLE64(secret+80) ^ XXH_readLE64(secret+88); - h128.low64 = XXH64_avalanche(seed ^ bitflipl); - h128.high64 = XXH64_avalanche( seed ^ bitfliph); - return h128; - } } +XXH_FORCE_INLINE XXH_PUREF XXH128_hash_t XXH3_len_0to16_128b(const xxh_u8 *input, + size_t len, + const xxh_u8 *secret, + XXH64_hash_t seed) { + XXH_ASSERT(len <= 16); + { + if (len > 8) + return XXH3_len_9to16_128b(input, len, secret, seed); + if (len >= 4) + return XXH3_len_4to8_128b(input, len, secret, seed); + if (len) + return XXH3_len_1to3_128b(input, len, secret, seed); + { + XXH128_hash_t h128; + xxh_u64 const bitflipl = XXH_readLE64(secret + 64) ^ XXH_readLE64(secret + 72); + xxh_u64 const bitfliph = XXH_readLE64(secret + 80) ^ XXH_readLE64(secret + 88); + h128.low64 = XXH64_avalanche(seed ^ bitflipl); + h128.high64 = XXH64_avalanche(seed ^ bitfliph); + return h128; + } + } } /* * A bit slower than XXH3_mix16B, but handles multiply by zero better. */ -XXH_FORCE_INLINE XXH128_hash_t -XXH128_mix32B(XXH128_hash_t acc, const xxh_u8* input_1, const xxh_u8* input_2, - const xxh_u8* secret, XXH64_hash_t seed) -{ - acc.low64 += XXH3_mix16B (input_1, secret+0, seed); - acc.low64 ^= XXH_readLE64(input_2) + XXH_readLE64(input_2 + 8); - acc.high64 += XXH3_mix16B (input_2, secret+16, seed); - acc.high64 ^= XXH_readLE64(input_1) + XXH_readLE64(input_1 + 8); - return acc; -} - - -XXH_FORCE_INLINE XXH_PUREF XXH128_hash_t -XXH3_len_17to128_128b(const xxh_u8* XXH_RESTRICT input, size_t len, - const xxh_u8* XXH_RESTRICT secret, size_t secretSize, - XXH64_hash_t seed) -{ - XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN); (void)secretSize; - XXH_ASSERT(16 < len && len <= 128); - - { XXH128_hash_t acc; - acc.low64 = len * XXH_PRIME64_1; - acc.high64 = 0; +XXH_FORCE_INLINE XXH128_hash_t XXH128_mix32B( + XXH128_hash_t acc, const xxh_u8 *input_1, const xxh_u8 *input_2, const xxh_u8 *secret, XXH64_hash_t seed) { + acc.low64 += XXH3_mix16B(input_1, secret + 0, seed); + acc.low64 ^= XXH_readLE64(input_2) + XXH_readLE64(input_2 + 8); + acc.high64 += XXH3_mix16B(input_2, secret + 16, seed); + acc.high64 ^= XXH_readLE64(input_1) + XXH_readLE64(input_1 + 8); + return acc; +} + +XXH_FORCE_INLINE XXH_PUREF XXH128_hash_t XXH3_len_17to128_128b(const xxh_u8 *XXH_RESTRICT input, + size_t len, + const xxh_u8 *XXH_RESTRICT secret, + size_t secretSize, + XXH64_hash_t seed) { + XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN); + (void)secretSize; + XXH_ASSERT(16 < len && len <= 128); + + { + XXH128_hash_t acc; + acc.low64 = len * XXH_PRIME64_1; + acc.high64 = 0; #if XXH_SIZE_OPT >= 1 - { - /* Smaller, but slightly slower. */ - unsigned int i = (unsigned int)(len - 1) / 32; - do { - acc = XXH128_mix32B(acc, input+16*i, input+len-16*(i+1), secret+32*i, seed); - } while (i-- != 0); - } + { + /* Smaller, but slightly slower. */ + unsigned int i = (unsigned int)(len - 1) / 32; + do { + acc = XXH128_mix32B(acc, input + 16 * i, input + len - 16 * (i + 1), secret + 32 * i, seed); + } while (i-- != 0); + } #else - if (len > 32) { - if (len > 64) { - if (len > 96) { - acc = XXH128_mix32B(acc, input+48, input+len-64, secret+96, seed); - } - acc = XXH128_mix32B(acc, input+32, input+len-48, secret+64, seed); - } - acc = XXH128_mix32B(acc, input+16, input+len-32, secret+32, seed); - } - acc = XXH128_mix32B(acc, input, input+len-16, secret, seed); -#endif - { XXH128_hash_t h128; - h128.low64 = acc.low64 + acc.high64; - h128.high64 = (acc.low64 * XXH_PRIME64_1) - + (acc.high64 * XXH_PRIME64_4) - + ((len - seed) * XXH_PRIME64_2); - h128.low64 = XXH3_avalanche(h128.low64); - h128.high64 = (XXH64_hash_t)0 - XXH3_avalanche(h128.high64); - return h128; + if (len > 32) { + if (len > 64) { + if (len > 96) { + acc = XXH128_mix32B(acc, input + 48, input + len - 64, secret + 96, seed); } + acc = XXH128_mix32B(acc, input + 32, input + len - 48, secret + 64, seed); + } + acc = XXH128_mix32B(acc, input + 16, input + len - 32, secret + 32, seed); } -} - -XXH_NO_INLINE XXH_PUREF XXH128_hash_t -XXH3_len_129to240_128b(const xxh_u8* XXH_RESTRICT input, size_t len, - const xxh_u8* XXH_RESTRICT secret, size_t secretSize, - XXH64_hash_t seed) -{ - XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN); (void)secretSize; - XXH_ASSERT(128 < len && len <= XXH3_MIDSIZE_MAX); + acc = XXH128_mix32B(acc, input, input + len - 16, secret, seed); +#endif + { + XXH128_hash_t h128; + h128.low64 = acc.low64 + acc.high64; + h128.high64 = (acc.low64 * XXH_PRIME64_1) + (acc.high64 * XXH_PRIME64_4) + ((len - seed) * XXH_PRIME64_2); + h128.low64 = XXH3_avalanche(h128.low64); + h128.high64 = (XXH64_hash_t)0 - XXH3_avalanche(h128.high64); + return h128; + } + } +} + +XXH_NO_INLINE XXH_PUREF XXH128_hash_t XXH3_len_129to240_128b(const xxh_u8 *XXH_RESTRICT input, + size_t len, + const xxh_u8 *XXH_RESTRICT secret, + size_t secretSize, + XXH64_hash_t seed) { + XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN); + (void)secretSize; + XXH_ASSERT(128 < len && len <= XXH3_MIDSIZE_MAX); + + { + XXH128_hash_t acc; + unsigned i; + acc.low64 = len * XXH_PRIME64_1; + acc.high64 = 0; + /* + * We set as `i` as offset + 32. We do this so that unchanged + * `len` can be used as upper bound. This reaches a sweet spot + * where both x86 and aarch64 get simple agen and good codegen + * for the loop. + */ + for (i = 32; i < 160; i += 32) { + acc = XXH128_mix32B(acc, input + i - 32, input + i - 16, secret + i - 32, seed); + } + acc.low64 = XXH3_avalanche(acc.low64); + acc.high64 = XXH3_avalanche(acc.high64); + /* + * NB: `i <= len` will duplicate the last 32-bytes if + * len % 32 was zero. This is an unfortunate necessity to keep + * the hash result stable. + */ + for (i = 160; i <= len; i += 32) { + acc = XXH128_mix32B(acc, input + i - 32, input + i - 16, secret + XXH3_MIDSIZE_STARTOFFSET + i - 160, seed); + } + /* last bytes */ + acc = XXH128_mix32B(acc, + input + len - 16, + input + len - 32, + secret + XXH3_SECRET_SIZE_MIN - XXH3_MIDSIZE_LASTOFFSET - 16, + (XXH64_hash_t)0 - seed); - { XXH128_hash_t acc; - unsigned i; - acc.low64 = len * XXH_PRIME64_1; - acc.high64 = 0; - /* - * We set as `i` as offset + 32. We do this so that unchanged - * `len` can be used as upper bound. This reaches a sweet spot - * where both x86 and aarch64 get simple agen and good codegen - * for the loop. - */ - for (i = 32; i < 160; i += 32) { - acc = XXH128_mix32B(acc, - input + i - 32, - input + i - 16, - secret + i - 32, - seed); - } - acc.low64 = XXH3_avalanche(acc.low64); - acc.high64 = XXH3_avalanche(acc.high64); - /* - * NB: `i <= len` will duplicate the last 32-bytes if - * len % 32 was zero. This is an unfortunate necessity to keep - * the hash result stable. - */ - for (i=160; i <= len; i += 32) { - acc = XXH128_mix32B(acc, - input + i - 32, - input + i - 16, - secret + XXH3_MIDSIZE_STARTOFFSET + i - 160, - seed); - } - /* last bytes */ - acc = XXH128_mix32B(acc, - input + len - 16, - input + len - 32, - secret + XXH3_SECRET_SIZE_MIN - XXH3_MIDSIZE_LASTOFFSET - 16, - (XXH64_hash_t)0 - seed); - - { XXH128_hash_t h128; - h128.low64 = acc.low64 + acc.high64; - h128.high64 = (acc.low64 * XXH_PRIME64_1) - + (acc.high64 * XXH_PRIME64_4) - + ((len - seed) * XXH_PRIME64_2); - h128.low64 = XXH3_avalanche(h128.low64); - h128.high64 = (XXH64_hash_t)0 - XXH3_avalanche(h128.high64); - return h128; - } + { + XXH128_hash_t h128; + h128.low64 = acc.low64 + acc.high64; + h128.high64 = (acc.low64 * XXH_PRIME64_1) + (acc.high64 * XXH_PRIME64_4) + ((len - seed) * XXH_PRIME64_2); + h128.low64 = XXH3_avalanche(h128.low64); + h128.high64 = (XXH64_hash_t)0 - XXH3_avalanche(h128.high64); + return h128; } + } } -static XXH_PUREF XXH128_hash_t -XXH3_finalizeLong_128b(const xxh_u64* XXH_RESTRICT acc, const xxh_u8* XXH_RESTRICT secret, size_t secretSize, xxh_u64 len) -{ - XXH128_hash_t h128; - h128.low64 = XXH3_finalizeLong_64b(acc, secret, len); - h128.high64 = XXH3_mergeAccs(acc, secret + secretSize - - XXH_STRIPE_LEN - XXH_SECRET_MERGEACCS_START, - ~(len * XXH_PRIME64_2)); - return h128; +static XXH_PUREF XXH128_hash_t XXH3_finalizeLong_128b(const xxh_u64 *XXH_RESTRICT acc, + const xxh_u8 *XXH_RESTRICT secret, + size_t secretSize, + xxh_u64 len) { + XXH128_hash_t h128; + h128.low64 = XXH3_finalizeLong_64b(acc, secret, len); + h128.high64 = + XXH3_mergeAccs(acc, secret + secretSize - XXH_STRIPE_LEN - XXH_SECRET_MERGEACCS_START, ~(len * XXH_PRIME64_2)); + return h128; } -XXH_FORCE_INLINE XXH128_hash_t -XXH3_hashLong_128b_internal(const void* XXH_RESTRICT input, size_t len, - const xxh_u8* XXH_RESTRICT secret, size_t secretSize, - XXH3_f_accumulate f_acc, - XXH3_f_scrambleAcc f_scramble) -{ - XXH_ALIGN(XXH_ACC_ALIGN) xxh_u64 acc[XXH_ACC_NB] = XXH3_INIT_ACC; +XXH_FORCE_INLINE XXH128_hash_t XXH3_hashLong_128b_internal(const void *XXH_RESTRICT input, + size_t len, + const xxh_u8 *XXH_RESTRICT secret, + size_t secretSize, + XXH3_f_accumulate f_acc, + XXH3_f_scrambleAcc f_scramble) { + XXH_ALIGN(XXH_ACC_ALIGN) xxh_u64 acc[XXH_ACC_NB] = XXH3_INIT_ACC; - XXH3_hashLong_internal_loop(acc, (const xxh_u8*)input, len, secret, secretSize, f_acc, f_scramble); + XXH3_hashLong_internal_loop(acc, (const xxh_u8 *)input, len, secret, secretSize, f_acc, f_scramble); - /* converge into final hash */ - XXH_STATIC_ASSERT(sizeof(acc) == 64); - XXH_ASSERT(secretSize >= sizeof(acc) + XXH_SECRET_MERGEACCS_START); - return XXH3_finalizeLong_128b(acc, secret, secretSize, (xxh_u64)len); + /* converge into final hash */ + XXH_STATIC_ASSERT(sizeof(acc) == 64); + XXH_ASSERT(secretSize >= sizeof(acc) + XXH_SECRET_MERGEACCS_START); + return XXH3_finalizeLong_128b(acc, secret, secretSize, (xxh_u64)len); } /* * It's important for performance that XXH3_hashLong() is not inlined. */ -XXH_NO_INLINE XXH_PUREF XXH128_hash_t -XXH3_hashLong_128b_default(const void* XXH_RESTRICT input, size_t len, - XXH64_hash_t seed64, - const void* XXH_RESTRICT secret, size_t secretLen) -{ - (void)seed64; (void)secret; (void)secretLen; - return XXH3_hashLong_128b_internal(input, len, XXH3_kSecret, sizeof(XXH3_kSecret), - XXH3_accumulate, XXH3_scrambleAcc); +XXH_NO_INLINE XXH_PUREF XXH128_hash_t XXH3_hashLong_128b_default( + const void *XXH_RESTRICT input, size_t len, XXH64_hash_t seed64, const void *XXH_RESTRICT secret, size_t secretLen) { + (void)seed64; + (void)secret; + (void)secretLen; + return XXH3_hashLong_128b_internal(input, len, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_accumulate, XXH3_scrambleAcc); } /* @@ -6924,116 +6784,96 @@ XXH3_hashLong_128b_default(const void* XXH_RESTRICT input, size_t len, * When the secret size is unknown, or on GCC 12 where the mix of NO_INLINE and FORCE_INLINE * breaks -Og, this is XXH_NO_INLINE. */ -XXH3_WITH_SECRET_INLINE XXH128_hash_t -XXH3_hashLong_128b_withSecret(const void* XXH_RESTRICT input, size_t len, - XXH64_hash_t seed64, - const void* XXH_RESTRICT secret, size_t secretLen) -{ - (void)seed64; - return XXH3_hashLong_128b_internal(input, len, (const xxh_u8*)secret, secretLen, - XXH3_accumulate, XXH3_scrambleAcc); -} - -XXH_FORCE_INLINE XXH128_hash_t -XXH3_hashLong_128b_withSeed_internal(const void* XXH_RESTRICT input, size_t len, - XXH64_hash_t seed64, - XXH3_f_accumulate f_acc, - XXH3_f_scrambleAcc f_scramble, - XXH3_f_initCustomSecret f_initSec) -{ - if (seed64 == 0) - return XXH3_hashLong_128b_internal(input, len, - XXH3_kSecret, sizeof(XXH3_kSecret), - f_acc, f_scramble); - { XXH_ALIGN(XXH_SEC_ALIGN) xxh_u8 secret[XXH_SECRET_DEFAULT_SIZE]; - f_initSec(secret, seed64); - return XXH3_hashLong_128b_internal(input, len, (const xxh_u8*)secret, sizeof(secret), - f_acc, f_scramble); - } +XXH3_WITH_SECRET_INLINE XXH128_hash_t XXH3_hashLong_128b_withSecret( + const void *XXH_RESTRICT input, size_t len, XXH64_hash_t seed64, const void *XXH_RESTRICT secret, size_t secretLen) { + (void)seed64; + return XXH3_hashLong_128b_internal(input, len, (const xxh_u8 *)secret, secretLen, XXH3_accumulate, XXH3_scrambleAcc); +} + +XXH_FORCE_INLINE XXH128_hash_t XXH3_hashLong_128b_withSeed_internal(const void *XXH_RESTRICT input, + size_t len, + XXH64_hash_t seed64, + XXH3_f_accumulate f_acc, + XXH3_f_scrambleAcc f_scramble, + XXH3_f_initCustomSecret f_initSec) { + if (seed64 == 0) + return XXH3_hashLong_128b_internal(input, len, XXH3_kSecret, sizeof(XXH3_kSecret), f_acc, f_scramble); + { + XXH_ALIGN(XXH_SEC_ALIGN) xxh_u8 secret[XXH_SECRET_DEFAULT_SIZE]; + f_initSec(secret, seed64); + return XXH3_hashLong_128b_internal(input, len, (const xxh_u8 *)secret, sizeof(secret), f_acc, f_scramble); + } } /* * It's important for performance that XXH3_hashLong is not inlined. */ -XXH_NO_INLINE XXH128_hash_t -XXH3_hashLong_128b_withSeed(const void* input, size_t len, - XXH64_hash_t seed64, const void* XXH_RESTRICT secret, size_t secretLen) -{ - (void)secret; (void)secretLen; - return XXH3_hashLong_128b_withSeed_internal(input, len, seed64, - XXH3_accumulate, XXH3_scrambleAcc, XXH3_initCustomSecret); -} - -typedef XXH128_hash_t (*XXH3_hashLong128_f)(const void* XXH_RESTRICT, size_t, - XXH64_hash_t, const void* XXH_RESTRICT, size_t); - -XXH_FORCE_INLINE XXH128_hash_t -XXH3_128bits_internal(const void* input, size_t len, - XXH64_hash_t seed64, const void* XXH_RESTRICT secret, size_t secretLen, - XXH3_hashLong128_f f_hl128) -{ - XXH_ASSERT(secretLen >= XXH3_SECRET_SIZE_MIN); - /* - * If an action is to be taken if `secret` conditions are not respected, - * it should be done here. - * For now, it's a contract pre-condition. - * Adding a check and a branch here would cost performance at every hash. - */ - if (len <= 16) - return XXH3_len_0to16_128b((const xxh_u8*)input, len, (const xxh_u8*)secret, seed64); - if (len <= 128) - return XXH3_len_17to128_128b((const xxh_u8*)input, len, (const xxh_u8*)secret, secretLen, seed64); - if (len <= XXH3_MIDSIZE_MAX) - return XXH3_len_129to240_128b((const xxh_u8*)input, len, (const xxh_u8*)secret, secretLen, seed64); - return f_hl128(input, len, seed64, secret, secretLen); +XXH_NO_INLINE XXH128_hash_t XXH3_hashLong_128b_withSeed( + const void *input, size_t len, XXH64_hash_t seed64, const void *XXH_RESTRICT secret, size_t secretLen) { + (void)secret; + (void)secretLen; + return XXH3_hashLong_128b_withSeed_internal( + input, len, seed64, XXH3_accumulate, XXH3_scrambleAcc, XXH3_initCustomSecret); +} + +typedef XXH128_hash_t (*XXH3_hashLong128_f)( + const void *XXH_RESTRICT, size_t, XXH64_hash_t, const void *XXH_RESTRICT, size_t); + +XXH_FORCE_INLINE XXH128_hash_t XXH3_128bits_internal(const void *input, + size_t len, + XXH64_hash_t seed64, + const void *XXH_RESTRICT secret, + size_t secretLen, + XXH3_hashLong128_f f_hl128) { + XXH_ASSERT(secretLen >= XXH3_SECRET_SIZE_MIN); + /* + * If an action is to be taken if `secret` conditions are not respected, + * it should be done here. + * For now, it's a contract pre-condition. + * Adding a check and a branch here would cost performance at every hash. + */ + if (len <= 16) + return XXH3_len_0to16_128b((const xxh_u8 *)input, len, (const xxh_u8 *)secret, seed64); + if (len <= 128) + return XXH3_len_17to128_128b((const xxh_u8 *)input, len, (const xxh_u8 *)secret, secretLen, seed64); + if (len <= XXH3_MIDSIZE_MAX) + return XXH3_len_129to240_128b((const xxh_u8 *)input, len, (const xxh_u8 *)secret, secretLen, seed64); + return f_hl128(input, len, seed64, secret, secretLen); } - /* === Public XXH128 API === */ /*! @ingroup XXH3_family */ -XXH_PUBLIC_API XXH128_hash_t XXH3_128bits(XXH_NOESCAPE const void* input, size_t len) -{ - return XXH3_128bits_internal(input, len, 0, - XXH3_kSecret, sizeof(XXH3_kSecret), - XXH3_hashLong_128b_default); +XXH_PUBLIC_API XXH128_hash_t XXH3_128bits(XXH_NOESCAPE const void *input, size_t len) { + return XXH3_128bits_internal(input, len, 0, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_hashLong_128b_default); } /*! @ingroup XXH3_family */ -XXH_PUBLIC_API XXH128_hash_t -XXH3_128bits_withSecret(XXH_NOESCAPE const void* input, size_t len, XXH_NOESCAPE const void* secret, size_t secretSize) -{ - return XXH3_128bits_internal(input, len, 0, - (const xxh_u8*)secret, secretSize, - XXH3_hashLong_128b_withSecret); +XXH_PUBLIC_API XXH128_hash_t XXH3_128bits_withSecret(XXH_NOESCAPE const void *input, + size_t len, + XXH_NOESCAPE const void *secret, + size_t secretSize) { + return XXH3_128bits_internal(input, len, 0, (const xxh_u8 *)secret, secretSize, XXH3_hashLong_128b_withSecret); } /*! @ingroup XXH3_family */ -XXH_PUBLIC_API XXH128_hash_t -XXH3_128bits_withSeed(XXH_NOESCAPE const void* input, size_t len, XXH64_hash_t seed) -{ - return XXH3_128bits_internal(input, len, seed, - XXH3_kSecret, sizeof(XXH3_kSecret), - XXH3_hashLong_128b_withSeed); +XXH_PUBLIC_API XXH128_hash_t XXH3_128bits_withSeed(XXH_NOESCAPE const void *input, size_t len, XXH64_hash_t seed) { + return XXH3_128bits_internal(input, len, seed, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_hashLong_128b_withSeed); } /*! @ingroup XXH3_family */ -XXH_PUBLIC_API XXH128_hash_t -XXH3_128bits_withSecretandSeed(XXH_NOESCAPE const void* input, size_t len, XXH_NOESCAPE const void* secret, size_t secretSize, XXH64_hash_t seed) -{ - if (len <= XXH3_MIDSIZE_MAX) - return XXH3_128bits_internal(input, len, seed, XXH3_kSecret, sizeof(XXH3_kSecret), NULL); - return XXH3_hashLong_128b_withSecret(input, len, seed, secret, secretSize); +XXH_PUBLIC_API XXH128_hash_t XXH3_128bits_withSecretandSeed( + XXH_NOESCAPE const void *input, size_t len, XXH_NOESCAPE const void *secret, size_t secretSize, XXH64_hash_t seed) { + if (len <= XXH3_MIDSIZE_MAX) + return XXH3_128bits_internal(input, len, seed, XXH3_kSecret, sizeof(XXH3_kSecret), NULL); + return XXH3_hashLong_128b_withSecret(input, len, seed, secret, secretSize); } /*! @ingroup XXH3_family */ -XXH_PUBLIC_API XXH128_hash_t -XXH128(XXH_NOESCAPE const void* input, size_t len, XXH64_hash_t seed) -{ - return XXH3_128bits_withSeed(input, len, seed); +XXH_PUBLIC_API XXH128_hash_t XXH128(XXH_NOESCAPE const void *input, size_t len, XXH64_hash_t seed) { + return XXH3_128bits_withSeed(input, len, seed); } - /* === XXH3 128-bit streaming === */ #ifndef XXH_NO_STREAM /* @@ -7042,67 +6882,61 @@ XXH128(XXH_NOESCAPE const void* input, size_t len, XXH64_hash_t seed) */ /*! @ingroup XXH3_family */ -XXH_PUBLIC_API XXH_errorcode -XXH3_128bits_reset(XXH_NOESCAPE XXH3_state_t* statePtr) -{ - return XXH3_64bits_reset(statePtr); +XXH_PUBLIC_API XXH_errorcode XXH3_128bits_reset(XXH_NOESCAPE XXH3_state_t *statePtr) { + return XXH3_64bits_reset(statePtr); } /*! @ingroup XXH3_family */ -XXH_PUBLIC_API XXH_errorcode -XXH3_128bits_reset_withSecret(XXH_NOESCAPE XXH3_state_t* statePtr, XXH_NOESCAPE const void* secret, size_t secretSize) -{ - return XXH3_64bits_reset_withSecret(statePtr, secret, secretSize); +XXH_PUBLIC_API XXH_errorcode XXH3_128bits_reset_withSecret(XXH_NOESCAPE XXH3_state_t *statePtr, + XXH_NOESCAPE const void *secret, + size_t secretSize) { + return XXH3_64bits_reset_withSecret(statePtr, secret, secretSize); } /*! @ingroup XXH3_family */ -XXH_PUBLIC_API XXH_errorcode -XXH3_128bits_reset_withSeed(XXH_NOESCAPE XXH3_state_t* statePtr, XXH64_hash_t seed) -{ - return XXH3_64bits_reset_withSeed(statePtr, seed); +XXH_PUBLIC_API XXH_errorcode XXH3_128bits_reset_withSeed(XXH_NOESCAPE XXH3_state_t *statePtr, XXH64_hash_t seed) { + return XXH3_64bits_reset_withSeed(statePtr, seed); } /*! @ingroup XXH3_family */ -XXH_PUBLIC_API XXH_errorcode -XXH3_128bits_reset_withSecretandSeed(XXH_NOESCAPE XXH3_state_t* statePtr, XXH_NOESCAPE const void* secret, size_t secretSize, XXH64_hash_t seed) -{ - return XXH3_64bits_reset_withSecretandSeed(statePtr, secret, secretSize, seed); +XXH_PUBLIC_API XXH_errorcode XXH3_128bits_reset_withSecretandSeed(XXH_NOESCAPE XXH3_state_t *statePtr, + XXH_NOESCAPE const void *secret, + size_t secretSize, + XXH64_hash_t seed) { + return XXH3_64bits_reset_withSecretandSeed(statePtr, secret, secretSize, seed); } /*! @ingroup XXH3_family */ -XXH_PUBLIC_API XXH_errorcode -XXH3_128bits_update(XXH_NOESCAPE XXH3_state_t* state, XXH_NOESCAPE const void* input, size_t len) -{ - return XXH3_64bits_update(state, input, len); +XXH_PUBLIC_API XXH_errorcode XXH3_128bits_update(XXH_NOESCAPE XXH3_state_t *state, + XXH_NOESCAPE const void *input, + size_t len) { + return XXH3_64bits_update(state, input, len); } /*! @ingroup XXH3_family */ -XXH_PUBLIC_API XXH128_hash_t XXH3_128bits_digest (XXH_NOESCAPE const XXH3_state_t* state) -{ - const unsigned char* const secret = (state->extSecret == NULL) ? state->customSecret : state->extSecret; - if (state->totalLen > XXH3_MIDSIZE_MAX) { - XXH_ALIGN(XXH_ACC_ALIGN) XXH64_hash_t acc[XXH_ACC_NB]; - XXH3_digest_long(acc, state, secret); - XXH_ASSERT(state->secretLimit + XXH_STRIPE_LEN >= sizeof(acc) + XXH_SECRET_MERGEACCS_START); - return XXH3_finalizeLong_128b(acc, secret, state->secretLimit + XXH_STRIPE_LEN, (xxh_u64)state->totalLen); - } - /* len <= XXH3_MIDSIZE_MAX : short code */ - if (state->useSeed) - return XXH3_128bits_withSeed(state->buffer, (size_t)state->totalLen, state->seed); - return XXH3_128bits_withSecret(state->buffer, (size_t)(state->totalLen), - secret, state->secretLimit + XXH_STRIPE_LEN); +XXH_PUBLIC_API XXH128_hash_t XXH3_128bits_digest(XXH_NOESCAPE const XXH3_state_t *state) { + const unsigned char *const secret = (state->extSecret == NULL) ? state->customSecret : state->extSecret; + if (state->totalLen > XXH3_MIDSIZE_MAX) { + XXH_ALIGN(XXH_ACC_ALIGN) XXH64_hash_t acc[XXH_ACC_NB]; + XXH3_digest_long(acc, state, secret); + XXH_ASSERT(state->secretLimit + XXH_STRIPE_LEN >= sizeof(acc) + XXH_SECRET_MERGEACCS_START); + return XXH3_finalizeLong_128b(acc, secret, state->secretLimit + XXH_STRIPE_LEN, (xxh_u64)state->totalLen); + } + /* len <= XXH3_MIDSIZE_MAX : short code */ + if (state->useSeed) + return XXH3_128bits_withSeed(state->buffer, (size_t)state->totalLen, state->seed); + return XXH3_128bits_withSecret(state->buffer, (size_t)(state->totalLen), secret, state->secretLimit + XXH_STRIPE_LEN); } #endif /* !XXH_NO_STREAM */ /* 128-bit utility functions */ -#include /* memcmp, memcpy */ +#include /* memcmp, memcpy */ /* return : 1 is equal, 0 if different */ /*! @ingroup XXH3_family */ -XXH_PUBLIC_API int XXH128_isEqual(XXH128_hash_t h1, XXH128_hash_t h2) -{ - /* note : XXH128_hash_t is compact, it has no padding byte */ - return !(memcmp(&h1, &h2, sizeof(h1))); +XXH_PUBLIC_API int XXH128_isEqual(XXH128_hash_t h1, XXH128_hash_t h2) { + /* note : XXH128_hash_t is compact, it has no padding byte */ + return !(memcmp(&h1, &h2, sizeof(h1))); } /* This prototype is compatible with stdlib's qsort(). @@ -7110,129 +6944,123 @@ XXH_PUBLIC_API int XXH128_isEqual(XXH128_hash_t h1, XXH128_hash_t h2) * <0 if *h128_1 < *h128_2 * =0 if *h128_1 == *h128_2 */ /*! @ingroup XXH3_family */ -XXH_PUBLIC_API int XXH128_cmp(XXH_NOESCAPE const void* h128_1, XXH_NOESCAPE const void* h128_2) -{ - XXH128_hash_t const h1 = *(const XXH128_hash_t*)h128_1; - XXH128_hash_t const h2 = *(const XXH128_hash_t*)h128_2; - int const hcmp = (h1.high64 > h2.high64) - (h2.high64 > h1.high64); - /* note : bets that, in most cases, hash values are different */ - if (hcmp) return hcmp; - return (h1.low64 > h2.low64) - (h2.low64 > h1.low64); +XXH_PUBLIC_API int XXH128_cmp(XXH_NOESCAPE const void *h128_1, XXH_NOESCAPE const void *h128_2) { + XXH128_hash_t const h1 = *(const XXH128_hash_t *)h128_1; + XXH128_hash_t const h2 = *(const XXH128_hash_t *)h128_2; + int const hcmp = (h1.high64 > h2.high64) - (h2.high64 > h1.high64); + /* note : bets that, in most cases, hash values are different */ + if (hcmp) + return hcmp; + return (h1.low64 > h2.low64) - (h2.low64 > h1.low64); } - /*====== Canonical representation ======*/ /*! @ingroup XXH3_family */ -XXH_PUBLIC_API void -XXH128_canonicalFromHash(XXH_NOESCAPE XXH128_canonical_t* dst, XXH128_hash_t hash) -{ - XXH_STATIC_ASSERT(sizeof(XXH128_canonical_t) == sizeof(XXH128_hash_t)); - if (XXH_CPU_LITTLE_ENDIAN) { - hash.high64 = XXH_swap64(hash.high64); - hash.low64 = XXH_swap64(hash.low64); - } - XXH_memcpy(dst, &hash.high64, sizeof(hash.high64)); - XXH_memcpy((char*)dst + sizeof(hash.high64), &hash.low64, sizeof(hash.low64)); +XXH_PUBLIC_API void XXH128_canonicalFromHash(XXH_NOESCAPE XXH128_canonical_t *dst, XXH128_hash_t hash) { + XXH_STATIC_ASSERT(sizeof(XXH128_canonical_t) == sizeof(XXH128_hash_t)); + if (XXH_CPU_LITTLE_ENDIAN) { + hash.high64 = XXH_swap64(hash.high64); + hash.low64 = XXH_swap64(hash.low64); + } + XXH_memcpy(dst, &hash.high64, sizeof(hash.high64)); + XXH_memcpy((char *)dst + sizeof(hash.high64), &hash.low64, sizeof(hash.low64)); } /*! @ingroup XXH3_family */ -XXH_PUBLIC_API XXH128_hash_t -XXH128_hashFromCanonical(XXH_NOESCAPE const XXH128_canonical_t* src) -{ - XXH128_hash_t h; - h.high64 = XXH_readBE64(src); - h.low64 = XXH_readBE64(src->digest + 8); - return h; +XXH_PUBLIC_API XXH128_hash_t XXH128_hashFromCanonical(XXH_NOESCAPE const XXH128_canonical_t *src) { + XXH128_hash_t h; + h.high64 = XXH_readBE64(src); + h.low64 = XXH_readBE64(src->digest + 8); + return h; } - - /* ========================================== * Secret generators * ========================================== */ #define XXH_MIN(x, y) (((x) > (y)) ? (y) : (x)) -XXH_FORCE_INLINE void XXH3_combine16(void* dst, XXH128_hash_t h128) -{ - XXH_writeLE64( dst, XXH_readLE64(dst) ^ h128.low64 ); - XXH_writeLE64( (char*)dst+8, XXH_readLE64((char*)dst+8) ^ h128.high64 ); +XXH_FORCE_INLINE void XXH3_combine16(void *dst, XXH128_hash_t h128) { + XXH_writeLE64(dst, XXH_readLE64(dst) ^ h128.low64); + XXH_writeLE64((char *)dst + 8, XXH_readLE64((char *)dst + 8) ^ h128.high64); } /*! @ingroup XXH3_family */ -XXH_PUBLIC_API XXH_errorcode -XXH3_generateSecret(XXH_NOESCAPE void* secretBuffer, size_t secretSize, XXH_NOESCAPE const void* customSeed, size_t customSeedSize) -{ +XXH_PUBLIC_API XXH_errorcode XXH3_generateSecret(XXH_NOESCAPE void *secretBuffer, + size_t secretSize, + XXH_NOESCAPE const void *customSeed, + size_t customSeedSize) { #if (XXH_DEBUGLEVEL >= 1) - XXH_ASSERT(secretBuffer != NULL); - XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN); + XXH_ASSERT(secretBuffer != NULL); + XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN); #else - /* production mode, assert() are disabled */ - if (secretBuffer == NULL) return XXH_ERROR; - if (secretSize < XXH3_SECRET_SIZE_MIN) return XXH_ERROR; + /* production mode, assert() are disabled */ + if (secretBuffer == NULL) + return XXH_ERROR; + if (secretSize < XXH3_SECRET_SIZE_MIN) + return XXH_ERROR; #endif - if (customSeedSize == 0) { - customSeed = XXH3_kSecret; - customSeedSize = XXH_SECRET_DEFAULT_SIZE; - } + if (customSeedSize == 0) { + customSeed = XXH3_kSecret; + customSeedSize = XXH_SECRET_DEFAULT_SIZE; + } #if (XXH_DEBUGLEVEL >= 1) - XXH_ASSERT(customSeed != NULL); + XXH_ASSERT(customSeed != NULL); #else - if (customSeed == NULL) return XXH_ERROR; -#endif - - /* Fill secretBuffer with a copy of customSeed - repeat as needed */ - { size_t pos = 0; - while (pos < secretSize) { - size_t const toCopy = XXH_MIN((secretSize - pos), customSeedSize); - memcpy((char*)secretBuffer + pos, customSeed, toCopy); - pos += toCopy; - } } - - { size_t const nbSeg16 = secretSize / 16; - size_t n; - XXH128_canonical_t scrambler; - XXH128_canonicalFromHash(&scrambler, XXH128(customSeed, customSeedSize, 0)); - for (n=0; n + #include "fastfs_ops.h" #include "spdk/file.h" #include "spdk/rpc.h" -#include extern int inflights; static void *g_json_data; static size_t g_config_file_size; struct { - const char* spdk_conf = NULL; - const char* spdk_bdev = NULL; - const char* mountpoint = NULL; + const char *spdk_conf = NULL; + const char *spdk_bdev = NULL; + const char *mountpoint = NULL; int format = false; int debug = false; } options; static void print_usage() { printf("fastfs_fuse -c bdev.json -b Malloc0 -f\n" - " -c, --spdk_conf SPDK configuration file\n" - " -b, --spdk_bdev SPDK bdev name\n" - " -m, --mountpoint fastfs mount point\n" - " -f, --format format fastfs before mount\n" - " -d, --debug enable debug\n"); + " -c, --spdk_conf SPDK configuration file\n" + " -b, --spdk_bdev SPDK bdev name\n" + " -m, --mountpoint fastfs mount point\n" + " -f, --format format fastfs before mount\n" + " -d, --debug enable debug\n"); } static void parse_options(int argc, char *argv[]) { - static struct option options_config[] = { - {"spdk_conf", required_argument, 0, 'c'}, - {"spdk_bdev", required_argument, 0, 'b'}, - {"mountpoint", required_argument, 0, 'm'}, - {"format", no_argument, &options.format, 'f'}, - {"debug", no_argument, &options.debug, 'd'}, - {0, 0, 0, 0} - }; + static struct option options_config[] = {{"spdk_conf", required_argument, 0, 'c'}, + {"spdk_bdev", required_argument, 0, 'b'}, + {"mountpoint", required_argument, 0, 'm'}, + {"format", no_argument, &options.format, 'f'}, + {"debug", no_argument, &options.debug, 'd'}, + {0, 0, 0, 0}}; int c = 0; while (c >= 0) { int option_index; c = getopt_long(argc, argv, "c:b:m:fd", options_config, &option_index); switch (c) { - case 'c': - options.spdk_conf = optarg; - break; - case 'b': - options.spdk_bdev = optarg; - break; - case 'm': - options.mountpoint = optarg; - break; - case 'f': - options.format = true; - break; - case 'd': - options.debug = true; - break; - default: - break; + case 'c': + options.spdk_conf = optarg; + break; + case 'b': + options.spdk_bdev = optarg; + break; + case 'm': + options.mountpoint = optarg; + break; + case 'f': + options.format = true; + break; + case 'd': + options.debug = true; + break; + default: + break; } } @@ -70,7 +69,7 @@ static void parse_options(int argc, char *argv[]) { } } -static void mount_complete(FastFS* fastfs, int code) { +static void mount_complete(FastFS *fastfs, int code) { if (code != 0) { printf("mount fastfs failed: %d\n", code); exit(code); @@ -78,7 +77,7 @@ static void mount_complete(FastFS* fastfs, int code) { fastfs->ready = true; } -static void format_complete(FastFS* fastfs, int code) { +static void format_complete(FastFS *fastfs, int code) { if (code != 0) { printf("format fastfs failed: %d\n", code); exit(code); @@ -86,18 +85,16 @@ static void format_complete(FastFS* fastfs, int code) { fastfs->mount(mount_complete); } -static void fuse_event_cb( - enum spdk_bdev_event_type type, struct spdk_bdev* bdev, void* ctx) { +static void fuse_event_cb(enum spdk_bdev_event_type type, struct spdk_bdev *bdev, void *ctx) { printf("Unsupported bdev event: type %d\n", type); } static void bdev_init_done(int rc, void *cb_arg) { - FastFS* fastfs = reinterpret_cast(cb_arg); - fs_context_t& fs_context = FastFS::fs_context; + FastFS *fastfs = reinterpret_cast(cb_arg); + fs_context_t &fs_context = FastFS::fs_context; fs_context.bdev = NULL; fs_context.bdev_desc = NULL; - rc = spdk_bdev_open_ext(fs_context.bdev_name, true, fuse_event_cb, NULL, - &fs_context.bdev_desc); + rc = spdk_bdev_open_ext(fs_context.bdev_name, true, fuse_event_cb, NULL, &fs_context.bdev_desc); if (rc) { printf("Could not open bdev: %s\n", fs_context.bdev_name); exit(-1); @@ -122,8 +119,7 @@ static void bdev_subsystem_init_done(int rc, void *cb_arg) { exit(-1); } spdk_rpc_set_state(SPDK_RPC_RUNTIME); - spdk_subsystem_load_config( - g_json_data, g_config_file_size, bdev_init_done, cb_arg, true); + spdk_subsystem_load_config(g_json_data, g_config_file_size, bdev_init_done, cb_arg, true); } static void bdev_startup_done(int rc, void *cb_arg) { @@ -135,13 +131,11 @@ static void bdev_startup_done(int rc, void *cb_arg) { } static void bdev_init_start(void *arg) { - g_json_data = spdk_posix_file_load_from_name( - options.spdk_conf, &g_config_file_size); - spdk_subsystem_load_config( - g_json_data, g_config_file_size, bdev_startup_done, arg, true); + g_json_data = spdk_posix_file_load_from_name(options.spdk_conf, &g_config_file_size); + spdk_subsystem_load_config(g_json_data, g_config_file_size, bdev_startup_done, arg, true); } -static FastFS* mount_fastfs() { +static FastFS *mount_fastfs() { struct spdk_env_opts opts; spdk_env_opts_init(&opts); opts.name = "fastfs-fuse"; @@ -153,17 +147,16 @@ static FastFS* mount_fastfs() { return nullptr; } spdk_thread_lib_init(NULL, sizeof(struct fastfs_fuse_context)); - struct spdk_thread* thread = spdk_thread_create("fuse_thread", NULL); + struct spdk_thread *thread = spdk_thread_create("fuse_thread", NULL); if (!thread) { printf("failed to allocate thread\n"); return nullptr; } - fuseCtx = reinterpret_cast( - spdk_thread_get_ctx(thread)); + fuseCtx = reinterpret_cast(spdk_thread_get_ctx(thread)); fuseCtx->thread = thread; spdk_set_thread(thread); - FastFS* fastfs = new FastFS(options.spdk_bdev); + FastFS *fastfs = new FastFS(options.spdk_bdev); fuseCtx->fastfs = fastfs; spdk_thread_send_msg(thread, bdev_init_start, fastfs); @@ -176,11 +169,11 @@ static FastFS* mount_fastfs() { } static void bdev_fini_done(void *cb_arg) { - FastFS* fastfs = reinterpret_cast(cb_arg); + FastFS *fastfs = reinterpret_cast(cb_arg); fastfs->ready = false; } -static void umount_fastfs(FastFS* fastfs) { +static void umount_fastfs(FastFS *fastfs) { if (FastFS::fs_context.bdev_io_channel) { spdk_put_io_channel(FastFS::fs_context.bdev_io_channel); } @@ -188,7 +181,7 @@ static void umount_fastfs(FastFS* fastfs) { spdk_bdev_close(FastFS::fs_context.bdev_desc); } spdk_subsystem_fini(bdev_fini_done, fastfs); - struct spdk_thread* thread = spdk_get_thread(); + struct spdk_thread *thread = spdk_get_thread(); do { spdk_thread_poll(thread, 0, 0); } while (fastfs->ready); @@ -218,12 +211,11 @@ int main(int argc, char **argv) { for (auto &arg : fuseArgs) { fuseArgsPtr.push_back(const_cast(arg.c_str())); } - struct fuse_args args = - FUSE_ARGS_INIT((int)fuseArgsPtr.size(), fuseArgsPtr.data()); + struct fuse_args args = FUSE_ARGS_INIT((int)fuseArgsPtr.size(), fuseArgsPtr.data()); - struct fuse_session* se = nullptr; - struct spdk_thread* thread = nullptr; - FastFS* fastfs = nullptr; + struct fuse_session *se = nullptr; + struct spdk_thread *thread = nullptr; + FastFS *fastfs = nullptr; struct fuse_cmdline_opts opts; struct pollfd fds[1]; int ret = -1; diff --git a/fuse/fastfs_ops.h b/fuse/fastfs_ops.h index 4e91643..17d4fd9 100644 --- a/fuse/fastfs_ops.h +++ b/fuse/fastfs_ops.h @@ -6,9 +6,10 @@ #ifndef FASTFS_OPS_H_ #define FASTFS_OPS_H_ +#include + #include "core/FastFS.h" #include "fastfs_fuse.h" -#include #define FUSE_USE_VERSION 30 #include "fuse3/fuse.h" #include "fuse3/fuse_lowlevel.h" @@ -19,32 +20,32 @@ static bool DIRECT_IO = true; static int inflights = 0; struct fastfs_fuse_context { - FastFS* fastfs = nullptr; - ByteBuffer* buffer = nullptr; - struct spdk_thread* thread; + FastFS *fastfs = nullptr; + ByteBuffer *buffer = nullptr; + struct spdk_thread *thread; }; -static struct fastfs_fuse_context* fuseCtx; +static struct fastfs_fuse_context *fuseCtx; struct FuseOp { - FuseOp* next = nullptr; + FuseOp *next = nullptr; fuse_req_t req = nullptr; - struct fuse_file_info* file = nullptr; - fs_op_context* opCtx = nullptr; - ByteBuffer* buffer = nullptr; + struct fuse_file_info *file = nullptr; + fs_op_context *opCtx = nullptr; + ByteBuffer *buffer = nullptr; fuse_ino_t ino; }; -static FuseOp* op_head = nullptr; +static FuseOp *op_head = nullptr; static std::vector fuseOps; -static FuseOp* allocFuseOp() { - FuseOp* res = op_head; +static FuseOp *allocFuseOp() { + FuseOp *res = op_head; if (op_head) { op_head = op_head->next; } return res; } -static void freeFuseOp(FuseOp* fuseOp) { +static void freeFuseOp(FuseOp *fuseOp) { if (fuseOp->buffer) { fuseCtx->fastfs->freeBuffer(fuseOp->buffer); fuseOp->buffer = nullptr; @@ -60,14 +61,14 @@ static void freeFuseOp(FuseOp* fuseOp) { op_head = fuseOp; } -static void fastfs_init(void* userdata, struct fuse_conn_info* conn) { +static void fastfs_init(void *userdata, struct fuse_conn_info *conn) { conn->max_read = 0; // no limit // limit max_write to extentSize // in order to use FastFS::allocBuffer to allocate fuse_buf conn->max_write = FastFS::fs_context.extentSize - WRITE_HEADER_SIZE; conn->max_background = 128; conn->want &= ~FUSE_CAP_SPLICE_READ; - conn->want &= ~FUSE_CAP_READDIRPLUS; // not support yet + conn->want &= ~FUSE_CAP_READDIRPLUS; // not support yet conn->want &= ~FUSE_CAP_AUTO_INVAL_DATA; // TTL disable conn->want |= FUSE_CAP_ASYNC_READ; conn->want |= FUSE_CAP_ASYNC_DIO; @@ -85,9 +86,8 @@ static void fastfs_init(void* userdata, struct fuse_conn_info* conn) { op_head = &fuseOps[0]; } -static void fastfs_getattr( - fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) { - FastInode& targetInode = (*fuseCtx->fastfs->inodes)[ino - 1]; +static void fastfs_getattr(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) { + FastInode &targetInode = (*fuseCtx->fastfs->inodes)[ino - 1]; struct stat stbuf; memset(&stbuf, 0, sizeof(stbuf)); stbuf.st_ino = targetInode.ino_; @@ -104,15 +104,13 @@ static void fastfs_getattr( fuse_reply_attr(req, &stbuf, TTL_PERIOD); } -static void fastfs_setattr(fuse_req_t req, fuse_ino_t ino, - struct stat *attr, int to_set, struct fuse_file_info *fi) { +static void fastfs_setattr(fuse_req_t req, fuse_ino_t ino, struct stat *attr, int to_set, struct fuse_file_info *fi) { // TODO chenxu14 support chmod, time and truncate fastfs_getattr(req, ino, fi); } -static void fastfs_lookup( - fuse_req_t req, fuse_ino_t parentId, const char *name) { - FastInode* targetInode = fuseCtx->fastfs->lookup(parentId - 1, name); +static void fastfs_lookup(fuse_req_t req, fuse_ino_t parentId, const char *name) { + FastInode *targetInode = fuseCtx->fastfs->lookup(parentId - 1, name); if (!targetInode) { fuse_reply_err(req, ENOENT); } else { @@ -136,10 +134,9 @@ static void fastfs_lookup( } } -static void create_dir_complete(void* cb_args, int code) { - FuseOp* fuseOp = reinterpret_cast(cb_args); - CreateContext* createCtx = - reinterpret_cast(fuseOp->opCtx->private_data); +static void create_dir_complete(void *cb_args, int code) { + FuseOp *fuseOp = reinterpret_cast(cb_args); + CreateContext *createCtx = reinterpret_cast(fuseOp->opCtx->private_data); if (code == 0) { struct fuse_entry_param e; memset(&e, 0, sizeof(e)); @@ -157,13 +154,12 @@ static void create_dir_complete(void* cb_args, int code) { inflights--; } -static void fastfs_mkdir( - fuse_req_t req, fuse_ino_t pid, const char *name, mode_t mode) { +static void fastfs_mkdir(fuse_req_t req, fuse_ino_t pid, const char *name, mode_t mode) { inflights++; - FuseOp* fuseOp = allocFuseOp(); + FuseOp *fuseOp = allocFuseOp(); fuseOp->req = req; fuseOp->opCtx = fuseCtx->fastfs->allocFsOp(); - CreateContext* createCtx = new (fuseOp->opCtx->private_data) CreateContext(); + CreateContext *createCtx = new (fuseOp->opCtx->private_data) CreateContext(); createCtx->parentId = pid - 1; createCtx->name = name; createCtx->mode = mode; @@ -173,10 +169,9 @@ static void fastfs_mkdir( fuseCtx->fastfs->create(*fuseOp->opCtx); } -static void create_file_complete(void* cb_args, int code) { - FuseOp* fuseOp = reinterpret_cast(cb_args); - CreateContext* createCtx = - reinterpret_cast(fuseOp->opCtx->private_data); +static void create_file_complete(void *cb_args, int code) { + FuseOp *fuseOp = reinterpret_cast(cb_args); + CreateContext *createCtx = reinterpret_cast(fuseOp->opCtx->private_data); if (code == 0) { struct fuse_entry_param e; memset(&e, 0, sizeof(e)); @@ -186,12 +181,11 @@ static void create_file_complete(void* cb_args, int code) { e.attr.st_ino = createCtx->ino; e.attr.st_mode = S_IFREG | 0644; e.attr.st_nlink = 1; - FastInode& targetInode = (*fuseCtx->fastfs->inodes)[createCtx->ino]; + FastInode &targetInode = (*fuseCtx->fastfs->inodes)[createCtx->ino]; e.attr.st_size = targetInode.size_; e.attr.st_blksize = FastFS::fs_context.extentSize; e.attr.st_blocks = targetInode.extents_->size(); - fuseOp->file->fh = fuseCtx->fastfs->open( - createCtx->ino, fuseOp->file->flags | O_SYNC); + fuseOp->file->fh = fuseCtx->fastfs->open(createCtx->ino, fuseOp->file->flags | O_SYNC); fuse_reply_create(fuseOp->req, &e, fuseOp->file); } else { fuse_reply_err(fuseOp->req, EEXIST); @@ -200,14 +194,13 @@ static void create_file_complete(void* cb_args, int code) { inflights--; } -static void fastfs_create(fuse_req_t req, fuse_ino_t pid, - const char *name, mode_t mode, struct fuse_file_info *fi) { +static void fastfs_create(fuse_req_t req, fuse_ino_t pid, const char *name, mode_t mode, struct fuse_file_info *fi) { inflights++; - FuseOp* fuseOp = allocFuseOp(); + FuseOp *fuseOp = allocFuseOp(); fuseOp->req = req; fuseOp->file = fi; fuseOp->opCtx = fuseCtx->fastfs->allocFsOp(); - CreateContext* createCtx = new (fuseOp->opCtx->private_data) CreateContext(); + CreateContext *createCtx = new (fuseOp->opCtx->private_data) CreateContext(); createCtx->parentId = pid - 1; createCtx->name = name; createCtx->mode = mode; @@ -217,8 +210,8 @@ static void fastfs_create(fuse_req_t req, fuse_ino_t pid, fuseCtx->fastfs->create(*fuseOp->opCtx); } -static void delete_complete(void* cb_args, int code) { - FuseOp* fuseOp = reinterpret_cast(cb_args); +static void delete_complete(void *cb_args, int code) { + FuseOp *fuseOp = reinterpret_cast(cb_args); fuse_reply_err(fuseOp->req, code == 0 ? code : ENOENT); freeFuseOp(fuseOp); inflights--; @@ -226,10 +219,10 @@ static void delete_complete(void* cb_args, int code) { static void fastfs_unlink(fuse_req_t req, fuse_ino_t pid, const char *name) { inflights++; - FuseOp* fuseOp = allocFuseOp(); + FuseOp *fuseOp = allocFuseOp(); fuseOp->req = req; fuseOp->opCtx = fuseCtx->fastfs->allocFsOp(); - DeleteContext* delCtx = new (fuseOp->opCtx->private_data) DeleteContext(); + DeleteContext *delCtx = new (fuseOp->opCtx->private_data) DeleteContext(); delCtx->parentId = pid - 1; delCtx->name = name; delCtx->recursive = true; @@ -238,25 +231,22 @@ static void fastfs_unlink(fuse_req_t req, fuse_ino_t pid, const char *name) { fuseCtx->fastfs->remove(*fuseOp->opCtx); } -static void fastfs_rmdir(fuse_req_t req, fuse_ino_t pid, const char *name) { - fastfs_unlink(req, pid, name); -} +static void fastfs_rmdir(fuse_req_t req, fuse_ino_t pid, const char *name) { fastfs_unlink(req, pid, name); } -static void rename_complete(void* cb_args, int code) { - FuseOp* fuseOp = reinterpret_cast(cb_args); +static void rename_complete(void *cb_args, int code) { + FuseOp *fuseOp = reinterpret_cast(cb_args); fuse_reply_err(fuseOp->req, code == 0 ? code : ENOENT); freeFuseOp(fuseOp); inflights--; } -static void fastfs_rename(fuse_req_t req, fuse_ino_t olddir, - const char *oldname, fuse_ino_t newdir, - const char *newname, unsigned int flags) { +static void fastfs_rename( + fuse_req_t req, fuse_ino_t olddir, const char *oldname, fuse_ino_t newdir, const char *newname, unsigned int flags) { inflights++; - FuseOp* fuseOp = allocFuseOp(); + FuseOp *fuseOp = allocFuseOp(); fuseOp->req = req; fuseOp->opCtx = fuseCtx->fastfs->allocFsOp(); - RenameContext* renameCtx = new (fuseOp->opCtx->private_data) RenameContext(); + RenameContext *renameCtx = new (fuseOp->opCtx->private_data) RenameContext(); renameCtx->olddir = olddir - 1; renameCtx->oldname = oldname; renameCtx->newdir = newdir - 1; @@ -266,22 +256,14 @@ static void fastfs_rename(fuse_req_t req, fuse_ino_t olddir, fuseCtx->fastfs->rename(*fuseOp->opCtx); } -static void fastfs_forget(fuse_req_t req, fuse_ino_t ino, uint64_t nlookup) { - fuse_reply_none(req); -} +static void fastfs_forget(fuse_req_t req, fuse_ino_t ino, uint64_t nlookup) { fuse_reply_none(req); } -static void fastfs_forgetmulti(fuse_req_t req, size_t count, - struct fuse_forget_data *forgets) { - fuse_reply_none(req); -} +static void fastfs_forgetmulti(fuse_req_t req, size_t count, struct fuse_forget_data *forgets) { fuse_reply_none(req); } -static void fastfs_flush( - fuse_req_t req, fuse_ino_t fino, struct fuse_file_info *fi) { - fuse_reply_err(req, 0); -} +static void fastfs_flush(fuse_req_t req, fuse_ino_t fino, struct fuse_file_info *fi) { fuse_reply_err(req, 0); } -static void fsync_complete(void* cb_args, int code) { - FuseOp* fuseOp = reinterpret_cast(cb_args); +static void fsync_complete(void *cb_args, int code) { + FuseOp *fuseOp = reinterpret_cast(cb_args); if (code != 0) { fuse_reply_err(fuseOp->req, ENOENT); } else { @@ -291,21 +273,20 @@ static void fsync_complete(void* cb_args, int code) { inflights--; } -static void fastfs_fsync( - fuse_req_t req, fuse_ino_t fino, int datasync, struct fuse_file_info *fi) { +static void fastfs_fsync(fuse_req_t req, fuse_ino_t fino, int datasync, struct fuse_file_info *fi) { inflights++; - FuseOp* fuseOp = allocFuseOp(); + FuseOp *fuseOp = allocFuseOp(); fuseOp->req = req; fuseOp->opCtx = fuseCtx->fastfs->allocFsOp(); - FSyncContext* fsyncCtx = new (fuseOp->opCtx->private_data) FSyncContext(); + FSyncContext *fsyncCtx = new (fuseOp->opCtx->private_data) FSyncContext(); fsyncCtx->fd = fi->fh; fuseOp->opCtx->callback = fsync_complete; fuseOp->opCtx->cb_args = fuseOp; fuseCtx->fastfs->fsync(*fuseOp->opCtx); } -static void truncate_complete(void* cb_args, int code) { - FuseOp* fuseOp = reinterpret_cast(cb_args); +static void truncate_complete(void *cb_args, int code) { + FuseOp *fuseOp = reinterpret_cast(cb_args); if (code != 0) { fuse_reply_err(fuseOp->req, ENOENT); } else { @@ -324,17 +305,15 @@ static void truncate_complete(void* cb_args, int code) { inflights--; } -static void fastfs_open( - fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) { +static void fastfs_open(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) { if (fi->flags & O_TRUNC) { inflights++; - FuseOp* fuseOp = allocFuseOp(); + FuseOp *fuseOp = allocFuseOp(); fuseOp->req = req; fuseOp->file = fi; fuseOp->ino = ino; fuseOp->opCtx = fuseCtx->fastfs->allocFsOp(); - TruncateContext* truncateCtx = - new (fuseOp->opCtx->private_data) TruncateContext(); + TruncateContext *truncateCtx = new (fuseOp->opCtx->private_data) TruncateContext(); truncateCtx->ino = ino - 1; truncateCtx->size = 0; fuseOp->opCtx->callback = truncate_complete; @@ -354,40 +333,35 @@ static void fastfs_open( } } -static void fastfs_release( - fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) { +static void fastfs_release(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) { uint32_t fd = static_cast(fi->fh); int rc = fuseCtx->fastfs->close(fd); fuse_reply_err(req, rc < 0 ? ENOENT : 0); } -static void fastfs_add_dentry( - fuse_req_t req, ByteBuffer* buffer, const char *name, fuse_ino_t ino) { +static void fastfs_add_dentry(fuse_req_t req, ByteBuffer *buffer, const char *name, fuse_ino_t ino) { size_t len = fuse_add_direntry(req, NULL, 0, name, NULL, 0); struct stat stbuf; memset(&stbuf, 0, sizeof(stbuf)); stbuf.st_ino = ino; - fuse_add_direntry( - req, buffer->getBuffer(), buffer->remaining(), - name, &stbuf, buffer->position_ + len); + fuse_add_direntry(req, buffer->getBuffer(), buffer->remaining(), name, &stbuf, buffer->position_ + len); buffer->skip(len); } // TODO chenxu14 consider offset -static void fastfs_readdir(fuse_req_t req, fuse_ino_t ino, - size_t size, off_t off, struct fuse_file_info *fi) { - FastInode& targetInode = (*fuseCtx->fastfs->inodes)[ino - 1]; +static void fastfs_readdir(fuse_req_t req, fuse_ino_t ino, size_t size, off_t off, struct fuse_file_info *fi) { + FastInode &targetInode = (*fuseCtx->fastfs->inodes)[ino - 1]; if (targetInode.type_ != FASTFS_DIR) { fuse_reply_err(req, ENOTDIR); } else { - ByteBuffer* buffer = fuseCtx->fastfs->allocBuffer(); + ByteBuffer *buffer = fuseCtx->fastfs->allocBuffer(); fastfs_add_dentry(req, buffer, ".", 1); fastfs_add_dentry(req, buffer, "..", 1); - for (auto& ino : *(targetInode.children_)) { - FastInode& inode = (*fuseCtx->fastfs->inodes)[ino]; + for (auto &ino : *(targetInode.children_)) { + FastInode &inode = (*fuseCtx->fastfs->inodes)[ino]; fastfs_add_dentry(req, buffer, inode.name_.c_str(), inode.ino_ + 1); } - if ((uint32_t) off < buffer->position_) { + if ((uint32_t)off < buffer->position_) { size_t len = buffer->position_ - off; fuse_reply_buf(req, buffer->p_buffer_ + off, len < size ? len : size); } else { @@ -398,20 +372,13 @@ static void fastfs_readdir(fuse_req_t req, fuse_ino_t ino, } } -static void fastfs_releasedir( - fuse_req_t req, fuse_ino_t fino, struct fuse_file_info *fi) { - fuse_reply_err(req, 0); -} +static void fastfs_releasedir(fuse_req_t req, fuse_ino_t fino, struct fuse_file_info *fi) { fuse_reply_err(req, 0); } -static void fastfs_opendir( - fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) { - fuse_reply_open(req, fi); -} +static void fastfs_opendir(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) { fuse_reply_open(req, fi); } -static void read_complete(void* cb_args, int code) { - FuseOp* fuseOp = reinterpret_cast(cb_args); - ReadContext* readCtx = - reinterpret_cast(fuseOp->opCtx->private_data); +static void read_complete(void *cb_args, int code) { + FuseOp *fuseOp = reinterpret_cast(cb_args); + ReadContext *readCtx = reinterpret_cast(fuseOp->opCtx->private_data); if (code == -2) { // EOF fuse_reply_buf(fuseOp->req, NULL, 0); } else if (code != 0) { @@ -429,14 +396,13 @@ static void read_complete(void* cb_args, int code) { inflights--; } -static void fastfs_read(fuse_req_t req, fuse_ino_t ino, size_t size, - off_t off, struct fuse_file_info *fi) { +static void fastfs_read(fuse_req_t req, fuse_ino_t ino, size_t size, off_t off, struct fuse_file_info *fi) { inflights++; - FuseOp* fuseOp = allocFuseOp(); + FuseOp *fuseOp = allocFuseOp(); fuseOp->buffer = fuseCtx->fastfs->allocReadBuffer(off, size); fuseOp->req = req; fuseOp->opCtx = fuseCtx->fastfs->allocFsOp(); - ReadContext* readCtx = new (fuseOp->opCtx->private_data) ReadContext(); + ReadContext *readCtx = new (fuseOp->opCtx->private_data) ReadContext(); readCtx->fd = fi->fh; readCtx->pread = true; readCtx->direct = true; @@ -449,10 +415,9 @@ static void fastfs_read(fuse_req_t req, fuse_ino_t ino, size_t size, fuseCtx->fastfs->read(*fuseOp->opCtx); } -static void write_complete(void* cb_args, int code) { - FuseOp* fuseOp = reinterpret_cast(cb_args); - WriteContext* writeCtx = - reinterpret_cast(fuseOp->opCtx->private_data); +static void write_complete(void *cb_args, int code) { + FuseOp *fuseOp = reinterpret_cast(cb_args); + WriteContext *writeCtx = reinterpret_cast(fuseOp->opCtx->private_data); if (code != 0) { fuse_reply_err(fuseOp->req, EIO); } else { @@ -462,51 +427,46 @@ static void write_complete(void* cb_args, int code) { inflights--; } -static void fastfs_write(fuse_req_t req, fuse_ino_t ino, - struct fuse_bufvec *buf, off_t off, struct fuse_file_info *fi) { +static void +fastfs_write(fuse_req_t req, fuse_ino_t ino, struct fuse_bufvec *buf, off_t off, struct fuse_file_info *fi) { inflights++; - struct fuse_buf* flatbuf = &buf->buf[0]; - FuseOp* fuseOp = allocFuseOp(); + struct fuse_buf *flatbuf = &buf->buf[0]; + FuseOp *fuseOp = allocFuseOp(); fuseOp->req = req; fuseOp->buffer = fuseCtx->buffer; fuseCtx->buffer = nullptr; // avoid release in main loop fuseOp->opCtx = fuseCtx->fastfs->allocFsOp(); - WriteContext* writeCtx = new (fuseOp->opCtx->private_data) WriteContext(); + WriteContext *writeCtx = new (fuseOp->opCtx->private_data) WriteContext(); writeCtx->fd = fi->fh; writeCtx->pwrite = true; writeCtx->offset = off; writeCtx->count = flatbuf->size; writeCtx->direct = true; writeCtx->direct_buff = &fuseOp->buffer->skip(WRITE_HEADER_SIZE); - writeCtx->write_buff = (char*) flatbuf->mem; + writeCtx->write_buff = (char *)flatbuf->mem; fuseOp->opCtx->callback = write_complete; fuseOp->opCtx->cb_args = fuseOp; fuseCtx->fastfs->write(*fuseOp->opCtx); } -static void fastfs_getxattr( - fuse_req_t req, fuse_ino_t ino, const char *name, size_t size) { +static void fastfs_getxattr(fuse_req_t req, fuse_ino_t ino, const char *name, size_t size) { fuse_reply_err(req, ENOTSUP); } -static void fastfs_setxattr( - fuse_req_t req, fuse_ino_t ino, const char *name, - const char *value, size_t size, int flags) { +static void +fastfs_setxattr(fuse_req_t req, fuse_ino_t ino, const char *name, const char *value, size_t size, int flags) { fuse_reply_err(req, ENOTSUP); } -static void fastfs_removexattr( - fuse_req_t req, fuse_ino_t ino, const char *name) { - fuse_reply_err(req, ENOTSUP); -} +static void fastfs_removexattr(fuse_req_t req, fuse_ino_t ino, const char *name) { fuse_reply_err(req, ENOTSUP); } static void fastfs_statfs(fuse_req_t req, fuse_ino_t fino) { - auto& ctx = FastFS::fs_context; + auto &ctx = FastFS::fs_context; struct statvfs buf; memset(&buf, 0, sizeof(buf)); buf.f_bsize = ctx.blockSize; buf.f_blocks = ctx.blocks; - buf.f_bfree = ctx.allocator->getFree() * (uint64_t) ctx.allocator->getExtentBlocks(); + buf.f_bfree = ctx.allocator->getFree() * (uint64_t)ctx.allocator->getExtentBlocks(); buf.f_bavail = buf.f_bfree; buf.f_namemax = NAME_MAX; buf.f_files = ctx.maxInodes; @@ -515,26 +475,32 @@ static void fastfs_statfs(fuse_req_t req, fuse_ino_t fino) { fuse_reply_statfs(req, &buf); } -static void fastfs_ioctl(fuse_req_t req, fuse_ino_t fino, int cmd, - void *arg, struct fuse_file_info *fi, unsigned flags, const void *in_buf, - size_t in_bufsz, size_t out_bufsz) { +static void fastfs_ioctl(fuse_req_t req, + fuse_ino_t fino, + int cmd, + void *arg, + struct fuse_file_info *fi, + unsigned flags, + const void *in_buf, + size_t in_bufsz, + size_t out_bufsz) { if (flags & FUSE_IOCTL_COMPAT) { fuse_reply_err(req, ENOSYS); return; } switch (cmd) { - case FASTFS_IOCTL_GET_FD : { - int32_t realFd = fi->fh; - fuse_reply_ioctl(req, 0, &realFd, sizeof(int32_t)); - break; - } - default : { - fuse_reply_err(req, ENOTTY); - } + case FASTFS_IOCTL_GET_FD: { + int32_t realFd = fi->fh; + fuse_reply_ioctl(req, 0, &realFd, sizeof(int32_t)); + break; + } + default: { + fuse_reply_err(req, ENOTTY); + } } } -static void init_fuse_ops(struct fuse_lowlevel_ops& ops) { +static void init_fuse_ops(struct fuse_lowlevel_ops &ops) { ops.init = fastfs_init; ops.lookup = fastfs_lookup; ops.forget = fastfs_forget; @@ -546,8 +512,7 @@ static void init_fuse_ops(struct fuse_lowlevel_ops& ops) { ops.readdir = fastfs_readdir; ops.mkdir = fastfs_mkdir; ops.rmdir = fastfs_rmdir; - ops.rename = fastfs_rename, - ops.create = fastfs_create; + ops.rename = fastfs_rename, ops.create = fastfs_create; ops.unlink = fastfs_unlink; ops.flush = fastfs_flush; ops.fsync = fastfs_fsync; diff --git a/test/bytebuffer/bytebuffer_ut.cpp b/test/bytebuffer/bytebuffer_ut.cpp index 367c569..7e48886 100644 --- a/test/bytebuffer/bytebuffer_ut.cpp +++ b/test/bytebuffer/bytebuffer_ut.cpp @@ -4,19 +4,14 @@ */ extern "C" { -#include #include +#include } #include "core/ByteBuffer.h" -void* spdk_dma_zmalloc_socket( - size_t size, size_t align, uint64_t *unused, int numa_id) { - return malloc(size); -} +void *spdk_dma_zmalloc_socket(size_t size, size_t align, uint64_t *unused, int numa_id) { return malloc(size); } -void spdk_dma_free(void *buf) { - free(buf); -} +void spdk_dma_free(void *buf) { free(buf); } static void test_base_operation(void) { ByteBuffer buffer(1024); @@ -27,12 +22,12 @@ static void test_base_operation(void) { buffer.mark(); CU_ASSERT(buffer.write(long_src)); - ByteBuffer* dupBuf = buffer.duplicate(); - CU_ASSERT(dupBuf->position() == size + 8/*int64_t*/); + ByteBuffer *dupBuf = buffer.duplicate(); + CU_ASSERT(dupBuf->position() == size + 8 /*int64_t*/); CU_ASSERT(dupBuf->limit() == 1024); delete dupBuf; - ByteBuffer* sliceBuf = buffer.slice(); + ByteBuffer *sliceBuf = buffer.slice(); CU_ASSERT(dupBuf->position() == 0); CU_ASSERT(dupBuf->limit() == buffer.remaining()); delete sliceBuf; @@ -147,12 +142,10 @@ int main() { return CU_get_error(); } - if ( - CU_add_test(suite, "base operation", test_base_operation) == NULL || + if (CU_add_test(suite, "base operation", test_base_operation) == NULL || CU_add_test(suite, "buffer overflow", test_buffer_overflow) == NULL || CU_add_test(suite, "random read write", test_random_rw) == NULL || - CU_add_test(suite, "sequence read write", test_sequence_rw) == NULL - ) { + CU_add_test(suite, "sequence read write", test_sequence_rw) == NULL) { CU_cleanup_registry(); return CU_get_error(); } diff --git a/test/ckpt/checkpoint_ut.cpp b/test/ckpt/checkpoint_ut.cpp index 37002ee..5d53e66 100644 --- a/test/ckpt/checkpoint_ut.cpp +++ b/test/ckpt/checkpoint_ut.cpp @@ -4,10 +4,11 @@ */ extern "C" { -#include #include -#include "spdk_internal/mock.h" +#include + #include "rte_mempool.h" +#include "spdk_internal/mock.h" } #include "core/FastFS.h" @@ -27,82 +28,65 @@ static int READ_STAGE = 0; // format(0) -> ckptINodes(1) -> ckptDentry(2) -> writeSuperBlock(3) static int WRITE_STAGE = 0; -size_t spdk_bdev_get_buf_align(const struct spdk_bdev *bdev) { - return 1; -} -struct rte_mempool * rte_mempool_create( - const char *name, unsigned n, unsigned elt_size, unsigned cache_size, - unsigned private_data_size, rte_mempool_ctor_t *mp_init, void *mp_init_arg, - rte_mempool_obj_cb_t *obj_init, void *obj_init_arg, int socket_id, unsigned flags) { +size_t spdk_bdev_get_buf_align(const struct spdk_bdev *bdev) { return 1; } +struct rte_mempool *rte_mempool_create(const char *name, + unsigned n, + unsigned elt_size, + unsigned cache_size, + unsigned private_data_size, + rte_mempool_ctor_t *mp_init, + void *mp_init_arg, + rte_mempool_obj_cb_t *obj_init, + void *obj_init_arg, + int socket_id, + unsigned flags) { return nullptr; } -void* spdk_mempool_get(struct spdk_mempool *mp) { - return malloc(sizeof(FastInode)); -} -void spdk_mempool_put(struct spdk_mempool *mp, void *ele) { - free(ele); -} -void *spdk_malloc(size_t size, size_t align, uint64_t *unused, int numa_id, uint32_t flags) { - return malloc(size); -} -void spdk_free(void *buf) { - free(buf); -} -void* spdk_realloc(void *buf, size_t size, size_t align) { - return realloc(buf, size); -} -void* spdk_dma_zmalloc_socket( - size_t size, size_t align, uint64_t *unused, int numa_id) { - return malloc(size); -} -void spdk_dma_free(void *buf) { - free(buf); -} -struct spdk_poller* spdk_poller_register_named( - spdk_poller_fn fn, void *arg, uint64_t period, const char *name) { +void *spdk_mempool_get(struct spdk_mempool *mp) { return malloc(sizeof(FastInode)); } +void spdk_mempool_put(struct spdk_mempool *mp, void *ele) { free(ele); } +void *spdk_malloc(size_t size, size_t align, uint64_t *unused, int numa_id, uint32_t flags) { return malloc(size); } +void spdk_free(void *buf) { free(buf); } +void *spdk_realloc(void *buf, size_t size, size_t align) { return realloc(buf, size); } +void *spdk_dma_zmalloc_socket(size_t size, size_t align, uint64_t *unused, int numa_id) { return malloc(size); } +void spdk_dma_free(void *buf) { free(buf); } +struct spdk_poller *spdk_poller_register_named(spdk_poller_fn fn, void *arg, uint64_t period, const char *name) { return nullptr; } -uint32_t spdk_bdev_get_write_unit_size(const struct spdk_bdev *bdev) { - return 1; -} -uint32_t spdk_bdev_get_block_size(const struct spdk_bdev *bdev) { - return BLOCK_SIZE; -} -uint64_t spdk_bdev_get_num_blocks(const struct spdk_bdev *bdev) { - return 5000; -} +uint32_t spdk_bdev_get_write_unit_size(const struct spdk_bdev *bdev) { return 1; } +uint32_t spdk_bdev_get_block_size(const struct spdk_bdev *bdev) { return BLOCK_SIZE; } +uint64_t spdk_bdev_get_num_blocks(const struct spdk_bdev *bdev) { return 5000; } static INodeCache mocked(0, MemAllocator(0, 1)); -static void mockFile(FastInode* file) { +static void mockFile(FastInode *file) { file->parentId_ = 0; file->size_ = 0; file->mode_ = 493; } -static void mockFsync(ByteBuffer* buffer, int ino, int extentId) { - buffer->putByte(4); // opType +static void mockFsync(ByteBuffer *buffer, int ino, int extentId) { + buffer->putByte(4); // opType buffer->write(22); // opSize buffer->write(ino); buffer->write(BLOCK_SIZE); // file size - buffer->write(1); // dirty count - buffer->write(0); // extent index + buffer->write(1); // dirty count + buffer->write(0); // extent index buffer->write(extentId); } -static void mockDeletes(ByteBuffer* buffer, int start, int end) { +static void mockDeletes(ByteBuffer *buffer, int start, int end) { DeleteContext deleteCtx; deleteCtx.recursive = true; for (int i = start; i < end; i += 2) { deleteCtx.parentId = 0; deleteCtx.name = "dir_" + std::to_string(i); - buffer->putByte(2/*type*/); + buffer->putByte(2 /*type*/); buffer->write(6 + deleteCtx.name.size()); deleteCtx.serialize(buffer); } } -static void mockCreates(ByteBuffer* buffer, int start, int end) { +static void mockCreates(ByteBuffer *buffer, int start, int end) { CreateContext createCtx; createCtx.mode = 493; for (int i = start; i < end; i++) { @@ -118,120 +102,123 @@ static void mockCreates(ByteBuffer* buffer, int start, int end) { createCtx.name = "file"; createCtx.type = FASTFS_REGULAR_FILE; } - buffer->putByte(0/*type*/); + buffer->putByte(0 /*type*/); buffer->write(14 + createCtx.name.size()); createCtx.serialize(buffer); } } -int spdk_bdev_read( - struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, - void *buf, uint64_t offset, uint64_t nbytes, - spdk_bdev_io_completion_cb cb, void *cb_arg) { +int spdk_bdev_read(struct spdk_bdev_desc *desc, + struct spdk_io_channel *ch, + void *buf, + uint64_t offset, + uint64_t nbytes, + spdk_bdev_io_completion_cb cb, + void *cb_arg) { if (READ_STAGE == 0) { // format READ_STAGE++; cb(nullptr, false, cb_arg); return 0; } else { - ByteBuffer* buffer = reinterpret_cast(cb_arg); - fs_context_t& ctx = FastFS::fs_context; + ByteBuffer *buffer = reinterpret_cast(cb_arg); + fs_context_t &ctx = FastFS::fs_context; switch (READ_STAGE) { - case 1 : { // mount - // mock super block - ctx.superBlock.journalLoc = 1; - ctx.superBlock.journalSkipBlocks = 1; - ctx.superBlock.journalSkipOps = 4; - ctx.superBlock.ckptInodesLoc = 2; - ctx.superBlock.ckptDentryLoc = 3; - ctx.superBlock.lastTxid = 100; - ctx.superBlock.serialize(buffer); - buffer->position(0); - break; - } - case 2 : { // loadInodes - // mock INodes - buffer->write(0); // nextExtent - buffer->write(10); // numOps - INodeFile inodeProto; - inodeProto.size = 0; - inodeProto.mode = 0; - for (int i = 10; i < 20; i++) { - inodeProto.ino = i; - if (i % 2 == 0) { - inodeProto.parent_id = 0; - inodeProto.type = FASTFS_DIR; - inodeProto.name = "dir_" + std::to_string(i); - } else { - inodeProto.parent_id = i - 1; - inodeProto.type = FASTFS_REGULAR_FILE; - inodeProto.name = "file"; - } - int size = INodeFile::kFixSize + inodeProto.name.size(); - buffer->write(size); - inodeProto.serialize(buffer); - } - buffer->position(0); - break; - } - case 3 : { // loadDentry - // mock dentry - buffer->write(0); // nextExtent - buffer->write(11); // numOps - for (int i = 10; i < 20; i++) { - if (i % 2 == 0) { - buffer->putByte(FASTFS_DIR); // type - buffer->write(i); // inodeId - buffer->write(1); // child count - buffer->write(i + 1); // child inodeId - } else { - buffer->putByte(FASTFS_REGULAR_FILE); // type - buffer->write(i); // inodeId - buffer->write(1); // extents count - buffer->write(i); // extentId - } - } - // record root - buffer->putByte(FASTFS_DIR); // type - buffer->write(0); // inodeId - buffer->write(5); // child count - for (int i = 10; i < 20; i += 2) { - buffer->write(i); // child inodeId + case 1: { // mount + // mock super block + ctx.superBlock.journalLoc = 1; + ctx.superBlock.journalSkipBlocks = 1; + ctx.superBlock.journalSkipOps = 4; + ctx.superBlock.ckptInodesLoc = 2; + ctx.superBlock.ckptDentryLoc = 3; + ctx.superBlock.lastTxid = 100; + ctx.superBlock.serialize(buffer); + buffer->position(0); + break; + } + case 2: { // loadInodes + // mock INodes + buffer->write(0); // nextExtent + buffer->write(10); // numOps + INodeFile inodeProto; + inodeProto.size = 0; + inodeProto.mode = 0; + for (int i = 10; i < 20; i++) { + inodeProto.ino = i; + if (i % 2 == 0) { + inodeProto.parent_id = 0; + inodeProto.type = FASTFS_DIR; + inodeProto.name = "dir_" + std::to_string(i); + } else { + inodeProto.parent_id = i - 1; + inodeProto.type = FASTFS_REGULAR_FILE; + inodeProto.name = "file"; } - buffer->position(0); - break; + int size = INodeFile::kFixSize + inodeProto.name.size(); + buffer->write(size); + inodeProto.serialize(buffer); } - case 4 : { // replayJournal - // mock first block (will skip during mount) - buffer->putByte(1/*flag*/); - buffer->write(0/*epoch*/); - buffer->write(94/*txid*/); - buffer->write(6/*num_ops*/); - mockCreates(buffer, 10, 16); - // mock second block (will skip 4 OPs during mount) - buffer->position(BLOCK_SIZE); - buffer->putByte(0/*flag*/); - buffer->write(0/*epoch*/); - buffer->write(100/*txid*/); - if (NO_INODES) { - buffer->write(4/*create*/ + 5/*delete*/); - mockCreates(buffer, 16, 20); - mockDeletes(buffer, 10, 20); - } else if (FSYNC) { - buffer->write(6/*create*/ + 2/*fsync*/); - mockCreates(buffer, 16, 22); - mockFsync(buffer, 21/*ino*/, 21/*extentId*/); - mockFsync(buffer, 21/*ino*/, 22/*extentId*/); + buffer->position(0); + break; + } + case 3: { // loadDentry + // mock dentry + buffer->write(0); // nextExtent + buffer->write(11); // numOps + for (int i = 10; i < 20; i++) { + if (i % 2 == 0) { + buffer->putByte(FASTFS_DIR); // type + buffer->write(i); // inodeId + buffer->write(1); // child count + buffer->write(i + 1); // child inodeId } else { - buffer->write(8/*num_ops*/); - mockCreates(buffer, 16, 24); + buffer->putByte(FASTFS_REGULAR_FILE); // type + buffer->write(i); // inodeId + buffer->write(1); // extents count + buffer->write(i); // extentId } - buffer->position(0); - break; } - default : { - CU_FAIL("has wrong read stage!"); - break; + // record root + buffer->putByte(FASTFS_DIR); // type + buffer->write(0); // inodeId + buffer->write(5); // child count + for (int i = 10; i < 20; i += 2) { + buffer->write(i); // child inodeId + } + buffer->position(0); + break; + } + case 4: { // replayJournal + // mock first block (will skip during mount) + buffer->putByte(1 /*flag*/); + buffer->write(0 /*epoch*/); + buffer->write(94 /*txid*/); + buffer->write(6 /*num_ops*/); + mockCreates(buffer, 10, 16); + // mock second block (will skip 4 OPs during mount) + buffer->position(BLOCK_SIZE); + buffer->putByte(0 /*flag*/); + buffer->write(0 /*epoch*/); + buffer->write(100 /*txid*/); + if (NO_INODES) { + buffer->write(4 /*create*/ + 5 /*delete*/); + mockCreates(buffer, 16, 20); + mockDeletes(buffer, 10, 20); + } else if (FSYNC) { + buffer->write(6 /*create*/ + 2 /*fsync*/); + mockCreates(buffer, 16, 22); + mockFsync(buffer, 21 /*ino*/, 21 /*extentId*/); + mockFsync(buffer, 21 /*ino*/, 22 /*extentId*/); + } else { + buffer->write(8 /*num_ops*/); + mockCreates(buffer, 16, 24); } + buffer->position(0); + break; + } + default: { + CU_FAIL("has wrong read stage!"); + break; + } } READ_STAGE++; } @@ -239,90 +226,93 @@ int spdk_bdev_read( return 0; } -int spdk_bdev_write( - struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, - void *buf, uint64_t offset, uint64_t nbytes, - spdk_bdev_io_completion_cb cb, void *cb_arg) { - ByteBuffer* buffer = reinterpret_cast(cb_arg); - FastFS* fastfs = reinterpret_cast(buffer->private_data); - fs_context_t& ctx = FastFS::fs_context; +int spdk_bdev_write(struct spdk_bdev_desc *desc, + struct spdk_io_channel *ch, + void *buf, + uint64_t offset, + uint64_t nbytes, + spdk_bdev_io_completion_cb cb, + void *cb_arg) { + ByteBuffer *buffer = reinterpret_cast(cb_arg); + FastFS *fastfs = reinterpret_cast(buffer->private_data); + fs_context_t &ctx = FastFS::fs_context; switch (WRITE_STAGE) { - case 0 : { - if (NO_INODES) { - WRITE_STAGE = 3; // if only root, no need to do checkpoint + case 0: { + if (NO_INODES) { + WRITE_STAGE = 3; // if only root, no need to do checkpoint + } else { + WRITE_STAGE++; + } + break; + } + case 1: { // ckptINodes + uint32_t extentId = 0; + uint32_t numOps = 0; + buffer->pread(0, extentId); + buffer->pread(4, numOps); + if (LARGE_DIR || LARGE_FILE) { + if (extentId > 0) { + WRITE_STAGE = 1; // has more data to write } else { WRITE_STAGE++; } - break; + } else { + // normal case + CU_ASSERT_EQUAL(extentId, 0); + // 1 root, 10 in checkpoint, 4 in journal + CU_ASSERT_EQUAL(numOps, 15); + WRITE_STAGE++; } - case 1 : { // ckptINodes - uint32_t extentId = 0; - uint32_t numOps = 0; - buffer->pread(0, extentId); - buffer->pread(4, numOps); - if (LARGE_DIR || LARGE_FILE) { - if (extentId > 0) { - WRITE_STAGE = 1; // has more data to write - } else { - WRITE_STAGE++; - } + break; + } + case 2: { // ckptDentry + uint32_t extentId = 0; + uint32_t numOps = 0; + if (LARGE_DIR || LARGE_FILE) { + buffer->position(0); + CU_ASSERT(fastfs->checkpoint->parseExtent(buffer, extentId, mocked)); + if (extentId > 0) { + WRITE_STAGE = 2; // has more data to write } else { - // normal case - CU_ASSERT_EQUAL(extentId, 0); - // 1 root, 10 in checkpoint, 4 in journal - CU_ASSERT_EQUAL(numOps, 15); WRITE_STAGE++; - } - break; - } - case 2 : { // ckptDentry - uint32_t extentId = 0; - uint32_t numOps = 0; - if (LARGE_DIR || LARGE_FILE) { - buffer->position(0); - CU_ASSERT(fastfs->checkpoint->parseExtent(buffer, extentId, mocked)); - if (extentId > 0) { - WRITE_STAGE = 2; // has more data to write + if (LARGE_DIR) { + int size = mocked[LARGE_DIR_ID].children_->size(); + CU_ASSERT_EQUAL(size, LARGE_DIR_SIZE); } else { - WRITE_STAGE++; - if (LARGE_DIR) { - int size = mocked[LARGE_DIR_ID].children_->size(); - CU_ASSERT_EQUAL(size, LARGE_DIR_SIZE); - } else { - int size = mocked[LARGE_FILE_ID].extents_->size(); - CU_ASSERT_EQUAL(size, LARGE_FILE_SIZE); - } + int size = mocked[LARGE_FILE_ID].extents_->size(); + CU_ASSERT_EQUAL(size, LARGE_FILE_SIZE); } - } else { - // normal case - buffer->pread(0, extentId); - buffer->pread(4, numOps); - CU_ASSERT_EQUAL(extentId, 0); - CU_ASSERT_EQUAL(numOps, 13); // 2 file has no extents - WRITE_STAGE++; } - break; - } - case 3 : { // writeSuperBlock - // no new OP add in journal - CU_ASSERT_EQUAL(ctx.superBlock.lastTxid, 100); - CU_ASSERT_EQUAL(ctx.superBlock.journalSkipBlocks, 1); + } else { + // normal case + buffer->pread(0, extentId); + buffer->pread(4, numOps); + CU_ASSERT_EQUAL(extentId, 0); + CU_ASSERT_EQUAL(numOps, 13); // 2 file has no extents WRITE_STAGE++; - break; - } - default : { - CU_FAIL("has wrong write stage!"); - break; } + break; + } + case 3: { // writeSuperBlock + // no new OP add in journal + CU_ASSERT_EQUAL(ctx.superBlock.lastTxid, 100); + CU_ASSERT_EQUAL(ctx.superBlock.journalSkipBlocks, 1); + WRITE_STAGE++; + break; + } + default: { + CU_FAIL("has wrong write stage!"); + break; + } } cb(nullptr, true, cb_arg); return 0; } -static void ckpt_complete(FastFS* fastfs, int code) { +static void ckpt_complete(FastFS *fastfs, int code) { CU_ASSERT_EQUAL(code, 0); - auto& ckpt = *fastfs->checkpoint; - fs_context_t& ctx = FastFS::fs_context; + auto &ckpt = *fastfs->checkpoint; + fs_context_t &ctx = FastFS::fs_context; if (NO_INODES) { CU_ASSERT_EQUAL(ckpt.inodeExtents, 0); CU_ASSERT_EQUAL(ckpt.dentryExtents, 0); @@ -342,12 +332,9 @@ static void ckpt_complete(FastFS* fastfs, int code) { } } -static void mount_complete(FastFS* fastfs, int code) { - CU_ASSERT_FATAL(code == 0 && fastfs != nullptr); -} +static void mount_complete(FastFS *fastfs, int code) { CU_ASSERT_FATAL(code == 0 && fastfs != nullptr); } -static void do_mount(FastFS& fastfs, uint32_t extentSize, - uint32_t inodes = 128, uint32_t files = 128) { +static void do_mount(FastFS &fastfs, uint32_t extentSize, uint32_t inodes = 128, uint32_t files = 128) { fastfs.format(extentSize, mount_complete); fastfs.mount(mount_complete, inodes, files); CU_ASSERT(fastfs.checkpoint != nullptr); @@ -358,25 +345,25 @@ static void test_large_dir(void) { WRITE_STAGE = 0; LARGE_DIR = true; FastFS fastfs("Malloc0"); - fs_context_t& ctx = FastFS::fs_context; + fs_context_t &ctx = FastFS::fs_context; do_mount(fastfs, BLOCK_SIZE * 2, 2048); // mock 1000 file under large dir LARGE_DIR_ID = ctx.inodeAllocator->allocate(); - FastInode& largeDir = (*fastfs.inodes)[LARGE_DIR_ID]; + FastInode &largeDir = (*fastfs.inodes)[LARGE_DIR_ID]; largeDir.create(LARGE_DIR_ID, 0, "largeDir", FASTFS_DIR); mockFile(&largeDir); for (int i = 0; i < LARGE_DIR_SIZE; i++) { uint32_t ino = ctx.inodeAllocator->allocate(); - FastInode& file = (*fastfs.inodes)[ino]; + FastInode &file = (*fastfs.inodes)[ino]; file.create(ino, 0, "file_" + std::to_string(i), FASTFS_REGULAR_FILE); mockFile(&file); largeDir.children_->insert(ino); } mocked = *fastfs.inodes; // clear children to verify FastCkpt::parseExtent worked - mocked[LARGE_DIR_ID].children_= new std::set(); + mocked[LARGE_DIR_ID].children_ = new std::set(); // do checkpoint fastfs.checkpoint->checkpoint(ckpt_complete); @@ -388,12 +375,12 @@ static void test_large_file(void) { WRITE_STAGE = 0; LARGE_FILE = true; FastFS fastfs("Malloc0"); - fs_context_t& ctx = FastFS::fs_context; + fs_context_t &ctx = FastFS::fs_context; do_mount(fastfs, BLOCK_SIZE * 2); // mock one file with 2000 extents LARGE_FILE_ID = ctx.inodeAllocator->allocate(); - FastInode& file = (*fastfs.inodes)[LARGE_FILE_ID]; + FastInode &file = (*fastfs.inodes)[LARGE_FILE_ID]; file.create(LARGE_FILE_ID, 0, "largeFile", FASTFS_REGULAR_FILE); mockFile(&file); for (int i = 0; i < LARGE_FILE_SIZE; i++) { @@ -414,7 +401,7 @@ static void test_normal(void) { WRITE_STAGE = 0; FastFS fastfs("Malloc0"); do_mount(fastfs, BLOCK_SIZE * 2); - BitsAllocator* allocator = FastFS::fs_context.inodeAllocator; + BitsAllocator *allocator = FastFS::fs_context.inodeAllocator; CU_ASSERT_EQUAL(fastfs.journal->extents_.front(), 1); CU_ASSERT_EQUAL(fastfs.journal->txid, 100); @@ -441,7 +428,7 @@ static void test_no_inodes(void) { NO_INODES = true; FastFS fastfs("Malloc0"); do_mount(fastfs, BLOCK_SIZE * 2); - BitsAllocator* allocator = FastFS::fs_context.inodeAllocator; + BitsAllocator *allocator = FastFS::fs_context.inodeAllocator; // only root left CU_ASSERT(allocator->getAllocated() == 1); CU_ASSERT_EQUAL(fastfs.root->children_->size(), 0); @@ -480,13 +467,11 @@ int main() { return CU_get_error(); } - if ( - CU_add_test(suite, "large dir", test_large_dir) == NULL || + if (CU_add_test(suite, "large dir", test_large_dir) == NULL || CU_add_test(suite, "large file", test_large_file) == NULL || CU_add_test(suite, "normal case", test_normal) == NULL || CU_add_test(suite, "fsync record", test_fsync_record) == NULL || - CU_add_test(suite, "only root inode", test_no_inodes) == NULL - ) { + CU_add_test(suite, "only root inode", test_no_inodes) == NULL) { CU_cleanup_registry(); return CU_get_error(); } diff --git a/test/fs/fastfs_ut.cpp b/test/fs/fastfs_ut.cpp index ccbbae1..181e35a 100644 --- a/test/fs/fastfs_ut.cpp +++ b/test/fs/fastfs_ut.cpp @@ -4,10 +4,11 @@ */ extern "C" { -#include #include -#include "spdk_internal/mock.h" +#include + #include "rte_mempool.h" +#include "spdk_internal/mock.h" } #include "core/FastFS.h" @@ -22,7 +23,7 @@ static bool MOUNT_FS = false; struct read_write_task { int offset; int count; - char* data; + char *data; read_write_task(int off, int len) : offset(off), count(len) { data = new char[count]; for (int i = 0; i < count; i++) { @@ -38,63 +39,49 @@ static read_write_task largeTask(44960, 57344); static read_write_task alignTask(4096, 12192); static read_write_task alignSmallTask(4096, 4000); -size_t spdk_bdev_get_buf_align(const struct spdk_bdev *bdev) { - return 1; -} -struct rte_mempool * rte_mempool_create( - const char *name, unsigned n, unsigned elt_size, unsigned cache_size, - unsigned private_data_size, rte_mempool_ctor_t *mp_init, void *mp_init_arg, - rte_mempool_obj_cb_t *obj_init, void *obj_init_arg, int socket_id, unsigned flags) { +size_t spdk_bdev_get_buf_align(const struct spdk_bdev *bdev) { return 1; } +struct rte_mempool *rte_mempool_create(const char *name, + unsigned n, + unsigned elt_size, + unsigned cache_size, + unsigned private_data_size, + rte_mempool_ctor_t *mp_init, + void *mp_init_arg, + rte_mempool_obj_cb_t *obj_init, + void *obj_init_arg, + int socket_id, + unsigned flags) { return nullptr; } -void* spdk_mempool_get(struct spdk_mempool *mp) { - return malloc(sizeof(FastInode)); -} -void spdk_mempool_put(struct spdk_mempool *mp, void *ele) { - free(ele); -} -void *spdk_malloc(size_t size, size_t align, uint64_t *unused, int numa_id, uint32_t flags) { - return malloc(size); -} -void spdk_free(void *buf) { - free(buf); -} -void* spdk_realloc(void *buf, size_t size, size_t align) { - return realloc(buf, size); -} -void* spdk_dma_zmalloc_socket( - size_t size, size_t align, uint64_t *unused, int numa_id) { - return malloc(size); -} -void spdk_dma_free(void *buf) { - free(buf); -} -struct spdk_poller* spdk_poller_register_named( - spdk_poller_fn fn, void *arg, uint64_t period, const char *name) { +void *spdk_mempool_get(struct spdk_mempool *mp) { return malloc(sizeof(FastInode)); } +void spdk_mempool_put(struct spdk_mempool *mp, void *ele) { free(ele); } +void *spdk_malloc(size_t size, size_t align, uint64_t *unused, int numa_id, uint32_t flags) { return malloc(size); } +void spdk_free(void *buf) { free(buf); } +void *spdk_realloc(void *buf, size_t size, size_t align) { return realloc(buf, size); } +void *spdk_dma_zmalloc_socket(size_t size, size_t align, uint64_t *unused, int numa_id) { return malloc(size); } +void spdk_dma_free(void *buf) { free(buf); } +struct spdk_poller *spdk_poller_register_named(spdk_poller_fn fn, void *arg, uint64_t period, const char *name) { return nullptr; } -uint32_t spdk_bdev_get_write_unit_size(const struct spdk_bdev *bdev) { - return 1; -} -uint32_t spdk_bdev_get_block_size(const struct spdk_bdev *bdev) { - return BLOCK_SIZE; -} -uint64_t spdk_bdev_get_num_blocks(const struct spdk_bdev *bdev) { - return 100; -} -int spdk_bdev_read( - struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, - void *buf, uint64_t offset, uint64_t nbytes, - spdk_bdev_io_completion_cb cb, void *cb_arg) { +uint32_t spdk_bdev_get_write_unit_size(const struct spdk_bdev *bdev) { return 1; } +uint32_t spdk_bdev_get_block_size(const struct spdk_bdev *bdev) { return BLOCK_SIZE; } +uint64_t spdk_bdev_get_num_blocks(const struct spdk_bdev *bdev) { return 100; } +int spdk_bdev_read(struct spdk_bdev_desc *desc, + struct spdk_io_channel *ch, + void *buf, + uint64_t offset, + uint64_t nbytes, + spdk_bdev_io_completion_cb cb, + void *cb_arg) { if (FILE_READ) { - ByteBuffer* buffer = reinterpret_cast(cb_arg); - fs_op_context* ctx = reinterpret_cast(buffer->private_data); - ReadContext* readCtx = reinterpret_cast(ctx->private_data); - FastInode* inode = readCtx->file->inode_; + ByteBuffer *buffer = reinterpret_cast(cb_arg); + fs_op_context *ctx = reinterpret_cast(buffer->private_data); + ReadContext *readCtx = reinterpret_cast(ctx->private_data); + FastInode *inode = readCtx->file->inode_; // check if extentId exists uint32_t targetId = offset >> FastFS::fs_context.extentBits; bool exist = false; - for (auto& extentId : *inode->extents_) { + for (auto &extentId : *inode->extents_) { if (extentId == targetId) { exist = true; } @@ -112,30 +99,33 @@ int spdk_bdev_read( return 0; } else if (MOUNT_FS) { MOUNT_FS = false; // avoid conflicts with reading journal - ByteBuffer* buffer = reinterpret_cast(cb_arg); - fs_context_t& ctx = FastFS::fs_context; + ByteBuffer *buffer = reinterpret_cast(cb_arg); + fs_context_t &ctx = FastFS::fs_context; ctx.superBlock.serialize(buffer); buffer->position(0); } cb(nullptr, true, cb_arg); return 0; } -int spdk_bdev_write( - struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, - void *buf, uint64_t offset, uint64_t nbytes, - spdk_bdev_io_completion_cb cb, void *cb_arg) { +int spdk_bdev_write(struct spdk_bdev_desc *desc, + struct spdk_io_channel *ch, + void *buf, + uint64_t offset, + uint64_t nbytes, + spdk_bdev_io_completion_cb cb, + void *cb_arg) { if (SMALL_WRITE) { SMALL_WRITE = false; // avoid conflicts with writing journal - ByteBuffer* buffer = reinterpret_cast(cb_arg); - fs_op_context* ctx = reinterpret_cast(buffer->private_data); - WriteContext* writeCtx = reinterpret_cast(ctx->private_data); + ByteBuffer *buffer = reinterpret_cast(cb_arg); + fs_op_context *ctx = reinterpret_cast(buffer->private_data); + WriteContext *writeCtx = reinterpret_cast(ctx->private_data); CU_ASSERT(writeCtx->file != nullptr); // verify tail block - ByteBuffer* tailBlock = writeCtx->file->tail_block; + ByteBuffer *tailBlock = writeCtx->file->tail_block; CU_ASSERT(tailBlock->position() > writeCtx->count); uint32_t start = tailBlock->position() - writeCtx->count; - char* buf = tailBlock->p_buffer_ + start; + char *buf = tailBlock->p_buffer_ + start; char val = 0; uint32_t index = 0; for (; index < writeCtx->count; index++) { @@ -152,22 +142,26 @@ int spdk_bdev_write( } else if (FORMAT_FS || MOUNT_FS || RANDOM_WRITE) { cb(nullptr, true, cb_arg); } else { - FastJournal* journal = reinterpret_cast(cb_arg); + FastJournal *journal = reinterpret_cast(cb_arg); journal->writeComplete(0); } return 0; } -int spdk_bdev_writev( - struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, - struct iovec *iov, int iovcnt, uint64_t offset, uint64_t len, - spdk_bdev_io_completion_cb cb, void *cb_arg) { - ByteBuffer* buffer = reinterpret_cast(cb_arg); - fs_op_context* ctx = reinterpret_cast(buffer->private_data); - WriteContext* writeCtx = reinterpret_cast(ctx->private_data); +int spdk_bdev_writev(struct spdk_bdev_desc *desc, + struct spdk_io_channel *ch, + struct iovec *iov, + int iovcnt, + uint64_t offset, + uint64_t len, + spdk_bdev_io_completion_cb cb, + void *cb_arg) { + ByteBuffer *buffer = reinterpret_cast(cb_arg); + fs_op_context *ctx = reinterpret_cast(buffer->private_data); + WriteContext *writeCtx = reinterpret_cast(ctx->private_data); CU_ASSERT((offset & (BLOCK_SIZE - 1)) == 0); CU_ASSERT((len & (BLOCK_SIZE - 1)) == 0); CU_ASSERT((iov[0].iov_len & (BLOCK_SIZE - 1)) == 0); - for (WriteExtent& extent : writeCtx->writeExtents) { + for (WriteExtent &extent : writeCtx->writeExtents) { uint32_t targetId = extent.offset >> FastFS::fs_context.extentBits; CU_ASSERT(targetId == extent.extentId); } @@ -181,7 +175,7 @@ int spdk_bdev_writev( if (iovcnt == 2) { // first extent // verify tail block CU_ASSERT(iov[0].iov_len == BLOCK_SIZE); - char* tailBlock = (char*) iov[0].iov_base; + char *tailBlock = (char *)iov[0].iov_base; CU_ASSERT(tailBlock != nullptr); uint32_t blockOffset = writeCtx->offset & (BLOCK_SIZE - 1); int tailBlockWrite = BLOCK_SIZE - blockOffset; @@ -195,9 +189,8 @@ int spdk_bdev_writev( CU_ASSERT(index == BLOCK_SIZE); // verify remaining CU_ASSERT((iov[1].iov_len & (BLOCK_SIZE - 1)) == 0); - uint32_t toWrite = std::min( - (uint32_t) iov[1].iov_len, (writeCtx->count - tailBlockWrite)); - char* extentBuf = (char*) iov[1].iov_base; + uint32_t toWrite = std::min((uint32_t)iov[1].iov_len, (writeCtx->count - tailBlockWrite)); + char *extentBuf = (char *)iov[1].iov_base; CU_ASSERT(extentBuf != nullptr); bool correct = true; for (uint32_t i = 0; i < toWrite; i++) { @@ -215,9 +208,9 @@ int spdk_bdev_writev( return 0; } -static void create_complete(void* cb_args, int code) { +static void create_complete(void *cb_args, int code) { if (cb_args) { - CreateContext* ctx = reinterpret_cast(cb_args); + CreateContext *ctx = reinterpret_cast(cb_args); if (code == 0) { CU_ASSERT(ctx->ino != UINT32_MAX); } @@ -229,12 +222,9 @@ static void create_complete(void* cb_args, int code) { } } -static void mount_complete(FastFS* fastfs, int code) { - CU_ASSERT_FATAL(code == 0 && fastfs != nullptr); -} +static void mount_complete(FastFS *fastfs, int code) { CU_ASSERT_FATAL(code == 0 && fastfs != nullptr); } -static void do_mount(FastFS& fastfs, uint32_t extentSize, - uint32_t inodes = 128, uint32_t files = 128) { +static void do_mount(FastFS &fastfs, uint32_t extentSize, uint32_t inodes = 128, uint32_t files = 128) { FORMAT_FS = true; fastfs.format(extentSize, mount_complete); FORMAT_FS = false; @@ -244,20 +234,17 @@ static void do_mount(FastFS& fastfs, uint32_t extentSize, MOUNT_FS = false; } -static void delete_complete(void* cb_args, int code) { - CU_ASSERT(code == 0); -} +static void delete_complete(void *cb_args, int code) { CU_ASSERT(code == 0); } -static void delete_failed(void* cb_args, int code) { +static void delete_failed(void *cb_args, int code) { CU_ASSERT(code == -3); // dir not empty } -static bool mockCreate(FastFS& fs, const std::string& name, - FileType type = FASTFS_REGULAR_FILE, - uint32_t parentId = 0) { +static bool +mockCreate(FastFS &fs, const std::string &name, FileType type = FASTFS_REGULAR_FILE, uint32_t parentId = 0) { bool res = true; - fs_op_context* ctx = fs.allocFsOp(); - CreateContext* createCtx = new (ctx->private_data) CreateContext(); + fs_op_context *ctx = fs.allocFsOp(); + CreateContext *createCtx = new (ctx->private_data) CreateContext(); createCtx->parentId = parentId; createCtx->name = name.c_str(); createCtx->mode = 493; @@ -273,10 +260,10 @@ static bool mockCreate(FastFS& fs, const std::string& name, return res; } -static void mockDelete(FastFS& fs, uint32_t pid, const std::string& name, - bool recursive = false, bool shouldFail = false) { - fs_op_context* ctx = fs.allocFsOp(); - DeleteContext* delCtx = new (ctx->private_data) DeleteContext(); +static void +mockDelete(FastFS &fs, uint32_t pid, const std::string &name, bool recursive = false, bool shouldFail = false) { + fs_op_context *ctx = fs.allocFsOp(); + DeleteContext *delCtx = new (ctx->private_data) DeleteContext(); delCtx->parentId = pid; delCtx->name = name.c_str(); delCtx->recursive = recursive; @@ -293,7 +280,7 @@ static void mockDelete(FastFS& fs, uint32_t pid, const std::string& name, static void test_alloc_buffer(void) { FastFS fs("Malloc0"); do_mount(fs, BLOCK_SIZE * 8); - ByteBuffer* buffer = fs.allocReadBuffer(task.offset, task.count); + ByteBuffer *buffer = fs.allocReadBuffer(task.offset, task.count); CU_ASSERT(!buffer->alloc_); CU_ASSERT(buffer->position_ == 4000); CU_ASSERT(buffer->capacity_ == FastFS::fs_context.extentSize); @@ -310,7 +297,7 @@ static void test_alloc_buffer(void) { static void test_mkdir(void) { FastFS fs("Malloc0"); do_mount(fs, BLOCK_SIZE * 2); - BitsAllocator* allocator = FastFS::fs_context.inodeAllocator; + BitsAllocator *allocator = FastFS::fs_context.inodeAllocator; // ino 0 is reserved CU_ASSERT(allocator->getAllocated() == 1); std::string name = "dir"; @@ -323,7 +310,7 @@ static void test_mkdir(void) { // parent dir not exist name = "dir-not-exist"; - mockCreate(fs, name, FASTFS_DIR, 100/*parentId*/); + mockCreate(fs, name, FASTFS_DIR, 100 /*parentId*/); CU_ASSERT(allocator->getAllocated() == 2); // create recursive @@ -343,11 +330,11 @@ static void test_mkdir(void) { static void test_delete(void) { FastFS fs("Malloc0"); do_mount(fs, BLOCK_SIZE * 2); - BitsAllocator* allocator = FastFS::fs_context.inodeAllocator; + BitsAllocator *allocator = FastFS::fs_context.inodeAllocator; std::string dirName = "dir"; mockCreate(fs, dirName, FASTFS_DIR); - FastInode* dirInode = fs.lookup(0, dirName); + FastInode *dirInode = fs.lookup(0, dirName); CU_ASSERT_FATAL(dirInode != nullptr); std::string fileName = "file"; @@ -366,15 +353,15 @@ static void test_delete(void) { delPath = "dir"; mockDelete(fs, 0, delPath, false, true); CU_ASSERT(allocator->getAllocated() == 3); - mockDelete(fs, 0, delPath, true/*recursive*/); + mockDelete(fs, 0, delPath, true /*recursive*/); CU_ASSERT(allocator->getAllocated() == 1); } static void test_open_close(void) { uint32_t count = 16; FastFS fs("Malloc0"); - do_mount(fs, BLOCK_SIZE * 2, count/*inodes*/, count/*files*/); - BitsAllocator* allocator = FastFS::fs_context.inodeAllocator; + do_mount(fs, BLOCK_SIZE * 2, count /*inodes*/, count /*files*/); + BitsAllocator *allocator = FastFS::fs_context.inodeAllocator; std::string fileName = "file"; std::string path = "/" + fileName; @@ -383,7 +370,7 @@ static void test_open_close(void) { uint32_t free = FastFS::fs_context.fdAllocator->getFree(); CU_ASSERT(free == count - 3); // 3 reserved - int fd = fs.open(path, 1/*flag*/); + int fd = fs.open(path, 1 /*flag*/); CU_ASSERT(fd == 3); // start from 3 free = FastFS::fs_context.fdAllocator->getFree(); CU_ASSERT(free == count - 4); @@ -403,7 +390,7 @@ static void test_open_close(void) { static void test_fd_overflow(void) { uint32_t count = 16; FastFS fs("Malloc0"); - do_mount(fs, BLOCK_SIZE * 2, count/*inodes*/, count/*files*/); + do_mount(fs, BLOCK_SIZE * 2, count /*inodes*/, count /*files*/); bool flag = true; int val = 3; // 0, 1 and 2 reserved for (uint32_t i = 0; i < count - 3; i++) { @@ -426,8 +413,8 @@ static void test_inode_overflow(void) { uint32_t count = 16; FastFS fs("Malloc0"); bool flag = true; - do_mount(fs, BLOCK_SIZE * 2, count/*inodes*/, count/*files*/); - for (uint32_t i = 0; i < count - 1/*root*/; i++) { + do_mount(fs, BLOCK_SIZE * 2, count /*inodes*/, count /*files*/); + for (uint32_t i = 0; i < count - 1 /*root*/; i++) { if (!mockCreate(fs, "file_" + std::to_string(i))) { flag = false; break; @@ -441,17 +428,17 @@ static void test_inode_overflow(void) { CU_ASSERT(flag); } -static void write_complete(void* cb_args, int code) { - fs_op_context* opCtx = reinterpret_cast(cb_args); - WriteContext* ctx = reinterpret_cast(opCtx->private_data); +static void write_complete(void *cb_args, int code) { + fs_op_context *opCtx = reinterpret_cast(cb_args); + WriteContext *ctx = reinterpret_cast(opCtx->private_data); if (code == 0) { CU_ASSERT(ctx->file->inode_->size_ == ctx->offset + ctx->count); - auto* extents = ctx->file->inode_->extents_; - for (auto& extent : ctx->writeExtents) { + auto *extents = ctx->file->inode_->extents_; + for (auto &extent : ctx->writeExtents) { CU_ASSERT(extent.index < extents->size()); } // verify tail block's data - ByteBuffer* tailBlock = ctx->file->tail_block; + ByteBuffer *tailBlock = ctx->file->tail_block; char val = ctx->count - 1; bool correct = true; int start = 0; @@ -471,16 +458,16 @@ static void write_complete(void* cb_args, int code) { } } -static void read_complete(void* cb_args, int code) { - fs_op_context* opCtx = reinterpret_cast(cb_args); - ReadContext* ctx = reinterpret_cast(opCtx->private_data); - CU_ASSERT(ctx->file!= nullptr); +static void read_complete(void *cb_args, int code) { + fs_op_context *opCtx = reinterpret_cast(cb_args); + ReadContext *ctx = reinterpret_cast(opCtx->private_data); + CU_ASSERT(ctx->file != nullptr); } -static int createTestFile(FastFS& fs, const std::string& name, int size) { +static int createTestFile(FastFS &fs, const std::string &name, int size) { mockCreate(fs, name); // mock file - FastInode* inode = fs.lookup(0, name); + FastInode *inode = fs.lookup(0, name); CU_ASSERT_FATAL(inode != nullptr); inode->size_ = size; int len = 0; @@ -496,8 +483,8 @@ static int createTestFile(FastFS& fs, const std::string& name, int size) { return fd; } -static void truncate_complete(void* cb_args, int code) { - fs_op_context* opCtx = reinterpret_cast(cb_args); +static void truncate_complete(void *cb_args, int code) { + fs_op_context *opCtx = reinterpret_cast(cb_args); opCtx->fastfs->freeFsOp(opCtx); CU_ASSERT(code == 0); } @@ -508,8 +495,8 @@ static void test_truncate(void) { std::string fileName = "test_file"; int fd = createTestFile(fs, fileName, BLOCK_SIZE * 8); // 4 extents uint32_t ino = (*fs.files)[fd].inode_->ino_; - fs_op_context* ctx = fs.allocFsOp(); - TruncateContext* truncateCtx = new (ctx->private_data) TruncateContext(); + fs_op_context *ctx = fs.allocFsOp(); + TruncateContext *truncateCtx = new (ctx->private_data) TruncateContext(); fileName = "/" + fileName; truncateCtx->ino = ino; truncateCtx->size = BLOCK_SIZE * 4; // 2 extents @@ -517,7 +504,7 @@ static void test_truncate(void) { ctx->cb_args = ctx; fs.truncate(*ctx); fs.journal->pollEditOp(); - FastInode* inode = fs.status(fileName); + FastInode *inode = fs.status(fileName); CU_ASSERT(inode != nullptr); CU_ASSERT(inode->extents_->size() == 2); CU_ASSERT(inode->size_ == truncateCtx->size); @@ -555,9 +542,9 @@ static void test_truncate(void) { CU_ASSERT(inode->size_ == 0); } -static void writeFile(FastFS& fs, int fd, read_write_task& t) { - fs_op_context* ctx = fs.allocFsOp(); - WriteContext* writeCtx = new (ctx->private_data) WriteContext(); +static void writeFile(FastFS &fs, int fd, read_write_task &t) { + fs_op_context *ctx = fs.allocFsOp(); + WriteContext *writeCtx = new (ctx->private_data) WriteContext(); writeCtx->fd = fd; writeCtx->pwrite = true; writeCtx->offset = t.offset; @@ -569,9 +556,9 @@ static void writeFile(FastFS& fs, int fd, read_write_task& t) { fs.freeFsOp(ctx); } -static void writeDirect(FastFS& fs, int fd, read_write_task& t) { - fs_op_context* ctx = fs.allocFsOp(); - WriteContext* writeCtx = new (ctx->private_data) WriteContext(); +static void writeDirect(FastFS &fs, int fd, read_write_task &t) { + fs_op_context *ctx = fs.allocFsOp(); + WriteContext *writeCtx = new (ctx->private_data) WriteContext(); writeCtx->dirctWrite(&fs, fd, t.offset, t.count, t.data); ctx->callback = write_complete; ctx->cb_args = ctx; @@ -580,9 +567,9 @@ static void writeDirect(FastFS& fs, int fd, read_write_task& t) { fs.freeBuffer(writeCtx->direct_buff); } -static void readFile(FastFS& fs, int fd, read_write_task& t) { - fs_op_context* ctx = fs.allocFsOp(); - ReadContext* readCtx = new (ctx->private_data) ReadContext(); +static void readFile(FastFS &fs, int fd, read_write_task &t) { + fs_op_context *ctx = fs.allocFsOp(); + ReadContext *readCtx = new (ctx->private_data) ReadContext(); readCtx->fd = fd; readCtx->pread = true; readCtx->offset = t.offset; @@ -594,7 +581,7 @@ static void readFile(FastFS& fs, int fd, read_write_task& t) { fs.read(*ctx); bool correct = true; for (uint32_t i = 0; i < readCtx->count; i++) { - if (readCtx->read_buff[i] != (char) i) { + if (readCtx->read_buff[i] != (char)i) { correct = false; break; } @@ -604,9 +591,9 @@ static void readFile(FastFS& fs, int fd, read_write_task& t) { FILE_READ = false; } -static void readDirect(FastFS& fs, int fd, read_write_task& t) { - fs_op_context* ctx = fs.allocFsOp(); - ReadContext* readCtx = new (ctx->private_data) ReadContext(); +static void readDirect(FastFS &fs, int fd, read_write_task &t) { + fs_op_context *ctx = fs.allocFsOp(); + ReadContext *readCtx = new (ctx->private_data) ReadContext(); readCtx->dirctRead(&fs, fd, t.offset, t.count); ctx->callback = read_complete; ctx->cb_args = ctx; @@ -617,7 +604,7 @@ static void readDirect(FastFS& fs, int fd, read_write_task& t) { char val; int count = 0; for (uint32_t i = 0; i < readCtx->count; i++) { - if (!readCtx->direct_buff->getByte(val) || val != (char) i) { + if (!readCtx->direct_buff->getByte(val) || val != (char)i) { correct = false; break; } @@ -636,7 +623,7 @@ static void test_large_offset(void) { // mock file std::string name = "file"; mockCreate(fs, name); - FastInode* inode = fs.lookup(0, name); + FastInode *inode = fs.lookup(0, name); CU_ASSERT_FATAL(inode != nullptr); inode->size_ = task.offset; uint32_t extentId = UINT32_MAX >> FastFS::fs_context.extentBits; @@ -781,8 +768,8 @@ static void test_write_error(void) { static void test_fs_op_pool(void) { FastFS fs("Malloc0"); do_mount(fs, BLOCK_SIZE * 2); - fs_op_context* opCtx = nullptr; - ByteBuffer* buffer = nullptr; + fs_op_context *opCtx = nullptr; + ByteBuffer *buffer = nullptr; for (int i = 0; i < DEFAULT_POOL_SIZE; i++) { opCtx = fs.allocFsOp(); buffer = fs.allocBuffer(); @@ -793,8 +780,8 @@ static void test_fs_op_pool(void) { CU_ASSERT(opCtx != nullptr); CU_ASSERT(buffer != nullptr); - fs_op_context* opCtx2 = fs.allocFsOp(); - ByteBuffer* buffer2 = fs.allocBuffer(); + fs_op_context *opCtx2 = fs.allocFsOp(); + ByteBuffer *buffer2 = fs.allocBuffer(); CU_ASSERT(!opCtx2); CU_ASSERT(!buffer2); @@ -806,27 +793,27 @@ static void test_fs_op_pool(void) { CU_ASSERT(buffer2 != nullptr); } -static void random_write_complete(void* cb_args, int code) { - fs_op_context* opCtx = reinterpret_cast(cb_args); - WriteContext* ctx = reinterpret_cast(opCtx->private_data); +static void random_write_complete(void *cb_args, int code) { + fs_op_context *opCtx = reinterpret_cast(cb_args); + WriteContext *ctx = reinterpret_cast(opCtx->private_data); CU_ASSERT(code == 0); CU_ASSERT(ctx->file->inode_->size_ > ctx->offset + ctx->count); - auto* extents = ctx->file->inode_->extents_; - for (auto& extent : ctx->writeExtents) { + auto *extents = ctx->file->inode_->extents_; + for (auto &extent : ctx->writeExtents) { CU_ASSERT(extent.index < extents->size()); } - ExtentMap* dirtyExtents = ctx->file->inode_->dirtyExtents; + ExtentMap *dirtyExtents = ctx->file->inode_->dirtyExtents; CU_ASSERT(dirtyExtents != nullptr && dirtyExtents->size() == 3); - for (auto& [index, extentInfo] : *dirtyExtents) { + for (auto &[index, extentInfo] : *dirtyExtents) { CU_ASSERT(extentInfo.first != extentInfo.second); } } -static void fsync_complete(void* cb_args, int code) { +static void fsync_complete(void *cb_args, int code) { CU_ASSERT(code == 0); - fs_op_context* opCtx = reinterpret_cast(cb_args); - FSyncContext* ctx = reinterpret_cast(opCtx->private_data); - ExtentMap* dirtyExtents = ctx->file->inode_->dirtyExtents; + fs_op_context *opCtx = reinterpret_cast(cb_args); + FSyncContext *ctx = reinterpret_cast(opCtx->private_data); + ExtentMap *dirtyExtents = ctx->file->inode_->dirtyExtents; CU_ASSERT(dirtyExtents != nullptr && dirtyExtents->size() == 0); } @@ -836,8 +823,8 @@ static void test_random_write(void) { int fileSize = largeTask.offset + largeTask.count + BLOCK_SIZE; int fd = createTestFile(fs, "file", fileSize); - fs_op_context* ctx = fs.allocFsOp(); - WriteContext* writeCtx = new (ctx->private_data) WriteContext(); + fs_op_context *ctx = fs.allocFsOp(); + WriteContext *writeCtx = new (ctx->private_data) WriteContext(); writeCtx->dirctWrite(&fs, fd, largeTask.offset, largeTask.count, largeTask.data); ctx->callback = random_write_complete; ctx->cb_args = ctx; @@ -846,7 +833,7 @@ static void test_random_write(void) { RANDOM_WRITE = false; fs.freeBuffer(writeCtx->direct_buff); - FSyncContext* fsyncCtx = new (ctx->private_data) FSyncContext(); + FSyncContext *fsyncCtx = new (ctx->private_data) FSyncContext(); fsyncCtx->fd = fd; ctx->callback = fsync_complete; fs.fsync(*ctx); @@ -854,19 +841,19 @@ static void test_random_write(void) { fs.close(fd); } -static void sparse_write_complete(void* cb_args, int code) { +static void sparse_write_complete(void *cb_args, int code) { CU_ASSERT(code == 0); - fs_op_context* opCtx = reinterpret_cast(cb_args); - WriteContext* ctx = reinterpret_cast(opCtx->private_data); + fs_op_context *opCtx = reinterpret_cast(cb_args); + WriteContext *ctx = reinterpret_cast(opCtx->private_data); CU_ASSERT(ctx->file->inode_->size_ == ctx->offset + ctx->count); - auto* extents = ctx->file->inode_->extents_; + auto *extents = ctx->file->inode_->extents_; CU_ASSERT(extents->size() == 2); CU_ASSERT((*extents)[0] == UINT32_MAX); - ExtentMap* dirtyExtents = ctx->file->inode_->dirtyExtents; + ExtentMap *dirtyExtents = ctx->file->inode_->dirtyExtents; CU_ASSERT(dirtyExtents != nullptr && dirtyExtents->size() == 1); - auto it = dirtyExtents->find(1/*indxe*/); + auto it = dirtyExtents->find(1 /*indxe*/); CU_ASSERT(it != dirtyExtents->end()); - auto& extentInfo = it->second; + auto &extentInfo = it->second; CU_ASSERT(extentInfo.second == (*extents)[1]); } @@ -876,8 +863,8 @@ static void test_sparse_read_write(void) { int fd = createTestFile(fs, "file", 0); // empty file fs.seek(fd, smallTask.offset, SEEK_SET); - fs_op_context* ctx = fs.allocFsOp(); - WriteContext* writeCtx = new (ctx->private_data) WriteContext(); + fs_op_context *ctx = fs.allocFsOp(); + WriteContext *writeCtx = new (ctx->private_data) WriteContext(); writeCtx->fd = fd; writeCtx->direct = true; writeCtx->count = smallTask.count; @@ -906,9 +893,7 @@ int main() { return CU_get_error(); } - if ( - CU_add_test(suite, "mkdir", test_mkdir) == NULL || - CU_add_test(suite, "delete", test_delete) == NULL || + if (CU_add_test(suite, "mkdir", test_mkdir) == NULL || CU_add_test(suite, "delete", test_delete) == NULL || CU_add_test(suite, "truncate", test_truncate) == NULL || CU_add_test(suite, "objs poll", test_fs_op_pool) == NULL || CU_add_test(suite, "open close", test_open_close) == NULL || @@ -930,8 +915,7 @@ int main() { CU_add_test(suite, "direct small read write", test_direct_read_write_small) == NULL || CU_add_test(suite, "direct large read write", test_direct_read_write_large) == NULL || CU_add_test(suite, "direct align read write", test_direct_align_read_write) == NULL || - CU_add_test(suite, "direct align small write", test_direct_align_small_write) == NULL - ) { + CU_add_test(suite, "direct align small write", test_direct_align_small_write) == NULL) { CU_cleanup_registry(); return CU_get_error(); } diff --git a/test/journal/journal_ut.cpp b/test/journal/journal_ut.cpp index 6979229..0904394 100644 --- a/test/journal/journal_ut.cpp +++ b/test/journal/journal_ut.cpp @@ -4,59 +4,51 @@ */ extern "C" { -#include #include -#include "spdk_internal/mock.h" +#include + #include "rte_mempool.h" +#include "spdk_internal/mock.h" } #include "core/FastFS.h" -size_t spdk_bdev_get_buf_align(const struct spdk_bdev *bdev) { - return 1; -} -struct rte_mempool * rte_mempool_create( - const char *name, unsigned n, unsigned elt_size, unsigned cache_size, - unsigned private_data_size, rte_mempool_ctor_t *mp_init, void *mp_init_arg, - rte_mempool_obj_cb_t *obj_init, void *obj_init_arg, int socket_id, unsigned flags) { +size_t spdk_bdev_get_buf_align(const struct spdk_bdev *bdev) { return 1; } +struct rte_mempool *rte_mempool_create(const char *name, + unsigned n, + unsigned elt_size, + unsigned cache_size, + unsigned private_data_size, + rte_mempool_ctor_t *mp_init, + void *mp_init_arg, + rte_mempool_obj_cb_t *obj_init, + void *obj_init_arg, + int socket_id, + unsigned flags) { return nullptr; } -void* spdk_mempool_get(struct spdk_mempool *mp) { - return malloc(sizeof(FastInode)); -} -void spdk_mempool_put(struct spdk_mempool *mp, void *ele) { - free(ele); -} -void *spdk_malloc(size_t size, size_t align, uint64_t *unused, int numa_id, uint32_t flags) { - return malloc(size); -} -void spdk_free(void *buf) { - free(buf); -} -void* spdk_realloc(void *buf, size_t size, size_t align) { - return realloc(buf, size); -} -void* spdk_dma_zmalloc_socket( - size_t size, size_t align, uint64_t *unused, int numa_id) { - return malloc(size); -} -void spdk_dma_free(void *buf) { - free(buf); -} -struct spdk_poller* spdk_poller_register_named( - spdk_poller_fn fn, void *arg, uint64_t period, const char *name) { +void *spdk_mempool_get(struct spdk_mempool *mp) { return malloc(sizeof(FastInode)); } +void spdk_mempool_put(struct spdk_mempool *mp, void *ele) { free(ele); } +void *spdk_malloc(size_t size, size_t align, uint64_t *unused, int numa_id, uint32_t flags) { return malloc(size); } +void spdk_free(void *buf) { free(buf); } +void *spdk_realloc(void *buf, size_t size, size_t align) { return realloc(buf, size); } +void *spdk_dma_zmalloc_socket(size_t size, size_t align, uint64_t *unused, int numa_id) { return malloc(size); } +void spdk_dma_free(void *buf) { free(buf); } +struct spdk_poller *spdk_poller_register_named(spdk_poller_fn fn, void *arg, uint64_t period, const char *name) { return nullptr; } -int spdk_bdev_write( - struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, - void *buf, uint64_t offset, uint64_t nbytes, - spdk_bdev_io_completion_cb cb, void *cb_arg) { - FastJournal* journal = reinterpret_cast(cb_arg); +int spdk_bdev_write(struct spdk_bdev_desc *desc, + struct spdk_io_channel *ch, + void *buf, + uint64_t offset, + uint64_t nbytes, + spdk_bdev_io_completion_cb cb, + void *cb_arg) { + FastJournal *journal = reinterpret_cast(cb_arg); journal->writeComplete(0); return 0; } -static void init_fs_context( - fs_context_t& ctx, uint32_t blockSize, uint32_t extentSize, uint32_t blocks) { +static void init_fs_context(fs_context_t &ctx, uint32_t blockSize, uint32_t extentSize, uint32_t blocks) { ctx.blockSize = blockSize; ctx.extentSize = extentSize; ctx.bufAlign = 1; @@ -64,8 +56,8 @@ static void init_fs_context( ctx.superBlock.epoch = 0; } -static void recordCreate(ByteBuffer* buffer, EditOp& editOp, uint32_t& num_ops, - FileType type, uint32_t pid, const std::string& name) { +static void recordCreate( + ByteBuffer *buffer, EditOp &editOp, uint32_t &num_ops, FileType type, uint32_t pid, const std::string &name) { CreateContext createCtx; createCtx.parentId = pid; createCtx.name = name; @@ -77,8 +69,7 @@ static void recordCreate(ByteBuffer* buffer, EditOp& editOp, uint32_t& num_ops, createCtx.serialize(buffer); } -static void recordTruncate(ByteBuffer* buffer, EditOp& editOp, - uint32_t& num_ops, uint32_t ino, uint64_t file_size) { +static void recordTruncate(ByteBuffer *buffer, EditOp &editOp, uint32_t &num_ops, uint32_t ino, uint64_t file_size) { TruncateContext truncateCtx; truncateCtx.ino = ino; truncateCtx.size = file_size; @@ -88,8 +79,7 @@ static void recordTruncate(ByteBuffer* buffer, EditOp& editOp, truncateCtx.serialize(buffer); } -static void recordDelete(ByteBuffer* buffer, EditOp& editOp, - uint32_t& num_ops, uint32_t pid, const std::string& name) { +static void recordDelete(ByteBuffer *buffer, EditOp &editOp, uint32_t &num_ops, uint32_t pid, const std::string &name) { DeleteContext deleteCtx; deleteCtx.parentId = pid; deleteCtx.name = name; @@ -100,9 +90,13 @@ static void recordDelete(ByteBuffer* buffer, EditOp& editOp, deleteCtx.serialize(buffer); } -static void recordRename(ByteBuffer* buffer, EditOp& editOp, uint32_t& num_ops, - uint32_t olddir, uint32_t newdir, - const std::string& oldname, const std::string& newname) { +static void recordRename(ByteBuffer *buffer, + EditOp &editOp, + uint32_t &num_ops, + uint32_t olddir, + uint32_t newdir, + const std::string &oldname, + const std::string &newname) { RenameContext renameCtx; renameCtx.olddir = olddir; renameCtx.newdir = newdir; @@ -185,7 +179,7 @@ static void test_block_has_padding(void) { init_fs_context(fs_context, 4096, 8192, 10); FastJournal journal(fs_context); auto buffer = journal.tail_block; - EditOp* editOp = journal.allocEditOp(); + EditOp *editOp = journal.allocEditOp(); CU_ASSERT_PTR_NOT_NULL(editOp); uint32_t num_ops = 0; recordCreate(buffer, *editOp, num_ops, FASTFS_DIR, 0, "dir"); @@ -222,7 +216,7 @@ static void test_editop_alloc(void) { fs_context_t fs_context; init_fs_context(fs_context, 4096, 8192, 10); FastJournal journal(fs_context); - EditOp* editOp = nullptr; + EditOp *editOp = nullptr; uint32_t num = DEFAULT_POOL_SIZE - 1; for (uint32_t i = 0; i < num; i++) { editOp = journal.allocEditOp(); @@ -249,8 +243,8 @@ static void test_editop_alloc(void) { CU_ASSERT_PTR_NOT_NULL(editOp); } -static void write_complete(void* cb_args, int code) { - FastJournal* journal = reinterpret_cast(cb_args); +static void write_complete(void *cb_args, int code) { + FastJournal *journal = reinterpret_cast(cb_args); journal->freeEditOp(); } @@ -263,12 +257,12 @@ static void test_op_poller(void) { bool allocated = true; uint32_t num = DEFAULT_POOL_SIZE - 1; for (uint32_t i = 0; i < num; i++) { - EditOp* editOp = journal.allocEditOp(); + EditOp *editOp = journal.allocEditOp(); if (!editOp) { allocated = false; break; } - DeleteContext* deleteCtx = new DeleteContext(); + DeleteContext *deleteCtx = new DeleteContext(); deleteCtx->parentId = 0; deleteCtx->name = "dir"; deleteCtx->recursive = false; @@ -287,12 +281,12 @@ static void test_op_poller(void) { } // phrase bit revert case for (uint32_t i = 0; i < num; i++) { - EditOp* editOp = journal.allocEditOp(); + EditOp *editOp = journal.allocEditOp(); if (!editOp) { allocated = false; break; } - TruncateContext* truncateCtx = new TruncateContext(); + TruncateContext *truncateCtx = new TruncateContext(); truncateCtx->ino = i; truncateCtx->size = 1024; editOp->opctx = truncateCtx; @@ -311,7 +305,7 @@ static void test_op_poller(void) { static void test_no_space(void) { uint32_t blockSize = 4096; fs_context_t fs_context; - init_fs_context(fs_context, blockSize, 8192, 4/*two extents*/); + init_fs_context(fs_context, blockSize, 8192, 4 /*two extents*/); FastJournal journal(fs_context); auto buffer = journal.tail_block; @@ -320,8 +314,8 @@ static void test_no_space(void) { // first extent is reserved CU_ASSERT(fs_context.allocator->getFree() == 0); - EditOp* editOp = journal.allocEditOp(); - DeleteContext* deleteCtx = new DeleteContext(); + EditOp *editOp = journal.allocEditOp(); + DeleteContext *deleteCtx = new DeleteContext(); deleteCtx->parentId = 0; deleteCtx->name = "file"; deleteCtx->recursive = false; @@ -364,7 +358,7 @@ static void test_release_extents(void) { extents.erase_after(cusor, extents.end()); uint32_t target = 19; bool correct = true; - for (auto& extentId : extents) { + for (auto &extentId : extents) { if (extentId != target--) { correct = false; } @@ -383,14 +377,12 @@ int main() { return CU_get_error(); } - if ( - CU_add_test(suite, "Poller", test_op_poller) == NULL || + if (CU_add_test(suite, "Poller", test_op_poller) == NULL || CU_add_test(suite, "log replay", test_log_replay) == NULL || CU_add_test(suite, "EditOp alloc", test_editop_alloc) == NULL || CU_add_test(suite, "no space left", test_no_space) == NULL || CU_add_test(suite, "release extents", test_release_extents) == NULL || - CU_add_test(suite, "block has padding", test_block_has_padding) == NULL - ) { + CU_add_test(suite, "block has padding", test_block_has_padding) == NULL) { CU_cleanup_registry(); return CU_get_error(); } diff --git a/tools/fastfs_tools.cpp b/tools/fastfs_tools.cpp index c763a00..3af6d23 100644 --- a/tools/fastfs_tools.cpp +++ b/tools/fastfs_tools.cpp @@ -4,16 +4,16 @@ */ #include "core/FastFS.h" -#include "spdk/stdinc.h" -#include "spdk/thread.h" #include "spdk/bdev.h" +#include "spdk/bdev_zone.h" #include "spdk/env.h" #include "spdk/event.h" #include "spdk/log.h" +#include "spdk/stdinc.h" #include "spdk/string.h" -#include "spdk/bdev_zone.h" +#include "spdk/thread.h" -static const char* bdevName = NULL; +static const char *bdevName = NULL; static bool format = false; static bool dump = false; static bool checkpoint = false; @@ -36,7 +36,7 @@ static int tools_parse_arg(int ch, char *arg) { format = true; break; case 'S': - extentSize = (int) std::stoi(arg); + extentSize = (int)std::stoi(arg); break; case 'D': dump = true; @@ -50,13 +50,13 @@ static int tools_parse_arg(int ch, char *arg) { return 0; } -static void operate_complete(FastFS* fastfs, int code) { +static void operate_complete(FastFS *fastfs, int code) { if (code != 0) { SPDK_ERRLOG("operate failed: %d\n", code); } else { SPDK_NOTICELOG("operate successfuly.\n"); } - fs_context_t& fs_context = FastFS::fs_context; + fs_context_t &fs_context = FastFS::fs_context; if (fs_context.bdev_io_channel) { spdk_put_io_channel(fs_context.bdev_io_channel); } @@ -66,27 +66,26 @@ static void operate_complete(FastFS* fastfs, int code) { spdk_app_stop(code); } -static void tools_event_cb(enum spdk_bdev_event_type type, struct spdk_bdev *bdev, - void *event_ctx) { +static void tools_event_cb(enum spdk_bdev_event_type type, struct spdk_bdev *bdev, void *event_ctx) { SPDK_NOTICELOG("Unsupported bdev event: type %d\n", type); } -static void ckpt_complete(FastFS* fastfs, int code) { -// if (code == 0) { -// for (auto& inode : *fastfs->inodes) { -// if (inode.type_ == FASTFS_DIR) { -// printf("dentrys : %ld\n", inode.children_->size()); -// for (auto& childIno : *inode.children_) { -// printf(" name : %s\n", (*fastfs->inodes)[childIno].name_.c_str()); -// } -// } -// } -// } +static void ckpt_complete(FastFS *fastfs, int code) { + // if (code == 0) { + // for (auto& inode : *fastfs->inodes) { + // if (inode.type_ == FASTFS_DIR) { + // printf("dentrys : %ld\n", inode.children_->size()); + // for (auto& childIno : *inode.children_) { + // printf(" name : %s\n", (*fastfs->inodes)[childIno].name_.c_str()); + // } + // } + // } + // } fastfs->unmount(); spdk_app_stop(code); } -static void mount_complete(FastFS* fastfs, int code) { +static void mount_complete(FastFS *fastfs, int code) { if (code != 0) { SPDK_ERRLOG("mount fastfs failed: %d\n", code); return operate_complete(fastfs, code); @@ -94,12 +93,11 @@ static void mount_complete(FastFS* fastfs, int code) { fastfs->checkpoint->checkpoint(ckpt_complete); } -static void parseJournal( - struct spdk_bdev_io* bdev_io, bool success, void *cb_arg) { +static void parseJournal(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) { spdk_bdev_free_io(bdev_io); - ByteBuffer* buffer = reinterpret_cast(cb_arg); - FastFS* fastfs = reinterpret_cast(buffer->private_data); - fs_context_t& ctx = FastFS::fs_context; + ByteBuffer *buffer = reinterpret_cast(cb_arg); + FastFS *fastfs = reinterpret_cast(buffer->private_data); + fs_context_t &ctx = FastFS::fs_context; uint32_t nextExtentId = 0; char flag = 1; uint32_t offset = 0; @@ -125,8 +123,7 @@ static void parseJournal( break; } uint32_t epochNum = 0; - if (!buffer->read(epochNum) || - epochNum != ctx.superBlock.epoch) { + if (!buffer->read(epochNum) || epochNum != ctx.superBlock.epoch) { break; } uint32_t num_ops = 0; @@ -140,34 +137,34 @@ static void parseJournal( buffer->getByte(opType); buffer->read(opSize); switch (opType) { - case 0 : { // createOp - create_ops++; - break; - } - case 1 : { // truncateOp - truncate_ops++; - break; - } - case 2 : { // deleteOp - delete_ops++; - break; - } - case 3 : { // allocOp - buffer->read(nextExtentId); - break; - } - case 4 : { // fsyncOp - fsync_ops++; - break; - } - case 5 : { // renameOp - rename_ops++; - break; - } - default: { - unkown_ops++; - break; - } + case 0: { // createOp + create_ops++; + break; + } + case 1: { // truncateOp + truncate_ops++; + break; + } + case 2: { // deleteOp + delete_ops++; + break; + } + case 3: { // allocOp + buffer->read(nextExtentId); + break; + } + case 4: { // fsyncOp + fsync_ops++; + break; + } + case 5: { // renameOp + rename_ops++; + break; + } + default: { + unkown_ops++; + break; + } } if (opType != 3) { // allocOp buffer->skip(opSize); @@ -178,24 +175,29 @@ static void parseJournal( } } printf("creates %d, truncates %d, fsyncs %d, deletes %d, renames %d, unkown %d, total %d]\n", - create_ops, truncate_ops, fsync_ops, delete_ops, rename_ops, unkown_ops, total_ops); + create_ops, + truncate_ops, + fsync_ops, + delete_ops, + rename_ops, + unkown_ops, + total_ops); if (flag == 1 && nextExtentId > 0) { buffer->clear(); SPDK_NOTICELOG("JOURNAL %d [", nextExtentId); uint64_t nextOffset = static_cast(nextExtentId) << ctx.extentBits; - spdk_bdev_read(ctx.bdev_desc, ctx.bdev_io_channel, buffer->p_buffer_, - nextOffset, ctx.extentSize, parseJournal, buffer); + spdk_bdev_read( + ctx.bdev_desc, ctx.bdev_io_channel, buffer->p_buffer_, nextOffset, ctx.extentSize, parseJournal, buffer); return; } return operate_complete(fastfs, 0); } -static void parseDentry( - struct spdk_bdev_io* bdev_io, bool success, void *cb_arg) { +static void parseDentry(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) { spdk_bdev_free_io(bdev_io); - ByteBuffer* extentBuf = reinterpret_cast(cb_arg); - fs_context_t& ctx = FastFS::fs_context; + ByteBuffer *extentBuf = reinterpret_cast(cb_arg); + fs_context_t &ctx = FastFS::fs_context; uint32_t nextExtent = 0; uint32_t numOps = 0; uint32_t dentryOps = 0; @@ -207,52 +209,55 @@ static void parseDentry( char opType = -1; extentBuf->getByte(opType); switch (opType) { - case FASTFS_REGULAR_FILE : { - extentOps++; - extentBuf->skip(4); // inodeId - uint32_t extentCount = 0; - extentBuf->read(extentCount); - extentBuf->skip(extentCount * 4); - break; - } - case FASTFS_DIR : { - dentryOps++; - extentBuf->skip(4); // inodeId - uint32_t childCount = 0; - extentBuf->read(childCount); - extentBuf->skip(childCount * 4/*childInode*/); - break; - } - default: { - unkownOps++; - break; - } + case FASTFS_REGULAR_FILE: { + extentOps++; + extentBuf->skip(4); // inodeId + uint32_t extentCount = 0; + extentBuf->read(extentCount); + extentBuf->skip(extentCount * 4); + break; + } + case FASTFS_DIR: { + dentryOps++; + extentBuf->skip(4); // inodeId + uint32_t childCount = 0; + extentBuf->read(childCount); + extentBuf->skip(childCount * 4 /*childInode*/); + break; + } + default: { + unkownOps++; + break; + } } } - printf("numOps %d, dentryOps %d, extentOps %d, unkownOps %d]\n", - numOps, dentryOps, extentOps, unkownOps); + printf("numOps %d, dentryOps %d, extentOps %d, unkownOps %d]\n", numOps, dentryOps, extentOps, unkownOps); if (nextExtent > 0) { SPDK_NOTICELOG("DENTRY %d [", nextExtent); extentBuf->clear(); uint64_t offset = static_cast(nextExtent) << ctx.extentBits; - spdk_bdev_read(ctx.bdev_desc, ctx.bdev_io_channel, extentBuf->p_buffer_, - offset, ctx.extentSize, parseDentry, extentBuf); + spdk_bdev_read( + ctx.bdev_desc, ctx.bdev_io_channel, extentBuf->p_buffer_, offset, ctx.extentSize, parseDentry, extentBuf); return; } uint64_t journalStart = ctx.superBlock.journalLoc; SPDK_NOTICELOG("JOURNAL %ld [", journalStart); extentBuf->clear(); - spdk_bdev_read(ctx.bdev_desc, ctx.bdev_io_channel, extentBuf->p_buffer_, - journalStart << ctx.extentBits, ctx.extentSize, parseJournal, extentBuf); + spdk_bdev_read(ctx.bdev_desc, + ctx.bdev_io_channel, + extentBuf->p_buffer_, + journalStart << ctx.extentBits, + ctx.extentSize, + parseJournal, + extentBuf); } -static void parseInodes( - struct spdk_bdev_io* bdev_io, bool success, void *cb_arg) { +static void parseInodes(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) { spdk_bdev_free_io(bdev_io); - ByteBuffer* extentBuf = reinterpret_cast(cb_arg); - fs_context_t& ctx = FastFS::fs_context; + ByteBuffer *extentBuf = reinterpret_cast(cb_arg); + fs_context_t &ctx = FastFS::fs_context; uint32_t nextExtent = 0; uint32_t numOps = 0; extentBuf->read(nextExtent); @@ -262,24 +267,28 @@ static void parseInodes( SPDK_NOTICELOG("INODES %d [", nextExtent); extentBuf->clear(); uint64_t offset = static_cast(nextExtent) << ctx.extentBits; - spdk_bdev_read(ctx.bdev_desc, ctx.bdev_io_channel, extentBuf->p_buffer_, - offset, ctx.extentSize, parseInodes, extentBuf); + spdk_bdev_read( + ctx.bdev_desc, ctx.bdev_io_channel, extentBuf->p_buffer_, offset, ctx.extentSize, parseInodes, extentBuf); return; } uint64_t ckptDentryLoc = ctx.superBlock.ckptDentryLoc; SPDK_NOTICELOG("DENTRY %ld [", ckptDentryLoc); extentBuf->clear(); - spdk_bdev_read(ctx.bdev_desc, ctx.bdev_io_channel, extentBuf->p_buffer_, - ckptDentryLoc << ctx.extentBits, ctx.extentSize, parseDentry, extentBuf); + spdk_bdev_read(ctx.bdev_desc, + ctx.bdev_io_channel, + extentBuf->p_buffer_, + ckptDentryLoc << ctx.extentBits, + ctx.extentSize, + parseDentry, + extentBuf); } -static void readSuperBlockComplete( - struct spdk_bdev_io* bdev_io, bool success, void *cb_arg) { +static void readSuperBlockComplete(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) { spdk_bdev_free_io(bdev_io); - ByteBuffer* buffer = reinterpret_cast(cb_arg); - FastFS* fastfs = reinterpret_cast(buffer->private_data); - fs_context_t& ctx = FastFS::fs_context; + ByteBuffer *buffer = reinterpret_cast(cb_arg); + FastFS *fastfs = reinterpret_cast(buffer->private_data); + fs_context_t &ctx = FastFS::fs_context; success = success && ctx.superBlock.deserialize(buffer); spdk_dma_free(buffer->p_buffer_); delete buffer; @@ -294,24 +303,40 @@ static void readSuperBlockComplete( uint64_t ckptDentryLoc = ctx.superBlock.ckptDentryLoc; uint64_t lastTxid = ctx.superBlock.lastTxid; SPDK_NOTICELOG("SuperBlock [epoch %d, extentSize %d, journal_loc %ld, " - "skip_blocks %d, skip_ops %d, ckpt_inodes_loc %ld, ckpt_dentry_loc %ld, " - "last_txid %ld]\n", - ctx.superBlock.epoch, ctx.extentSize, journalStart, skipBlocks, skipOps, - ckptInodesLoc, ckptDentryLoc, lastTxid); + "skip_blocks %d, skip_ops %d, ckpt_inodes_loc %ld, ckpt_dentry_loc %ld, " + "last_txid %ld]\n", + ctx.superBlock.epoch, + ctx.extentSize, + journalStart, + skipBlocks, + skipOps, + ckptInodesLoc, + ckptDentryLoc, + lastTxid); uint32_t buf_align = spdk_bdev_get_buf_align(ctx.bdev); - char* addr = (char*) spdk_dma_zmalloc(ctx.extentSize, buf_align, NULL); - ByteBuffer* extentBuf = new ByteBuffer(addr, ctx.extentSize); + char *addr = (char *)spdk_dma_zmalloc(ctx.extentSize, buf_align, NULL); + ByteBuffer *extentBuf = new ByteBuffer(addr, ctx.extentSize); extentBuf->private_data = fastfs; if (ckptInodesLoc > 0) { SPDK_NOTICELOG("INODES %ld [", ckptInodesLoc); - spdk_bdev_read(ctx.bdev_desc, ctx.bdev_io_channel, extentBuf->p_buffer_, - ckptInodesLoc << ctx.extentBits, ctx.extentSize, parseInodes, extentBuf); + spdk_bdev_read(ctx.bdev_desc, + ctx.bdev_io_channel, + extentBuf->p_buffer_, + ckptInodesLoc << ctx.extentBits, + ctx.extentSize, + parseInodes, + extentBuf); } else { SPDK_NOTICELOG("JOURNAL %ld [", journalStart); - spdk_bdev_read(ctx.bdev_desc, ctx.bdev_io_channel, extentBuf->p_buffer_, - journalStart << ctx.extentBits, ctx.extentSize, parseJournal, extentBuf); + spdk_bdev_read(ctx.bdev_desc, + ctx.bdev_io_channel, + extentBuf->p_buffer_, + journalStart << ctx.extentBits, + ctx.extentSize, + parseJournal, + extentBuf); } } else { SPDK_WARNLOG("can't read super block, FastFS not format?\n"); @@ -319,26 +344,30 @@ static void readSuperBlockComplete( } } -static void dumpFS(FastFS* fastfs) { - auto& fs_context = FastFS::fs_context; +static void dumpFS(FastFS *fastfs) { + auto &fs_context = FastFS::fs_context; uint32_t ioUnitSize = spdk_bdev_get_write_unit_size(fs_context.bdev); uint32_t buf_align = spdk_bdev_get_buf_align(fs_context.bdev); fs_context.blocks = spdk_bdev_get_num_blocks(fs_context.bdev) / ioUnitSize; fs_context.blockSize = spdk_bdev_get_block_size(fs_context.bdev) * ioUnitSize; fs_context.blockBits = spdk_u32log2(fs_context.blockSize); fs_context.blockMask = (1 << fs_context.blockBits) - 1; - char* addr = (char*) spdk_dma_zmalloc(fs_context.blockSize, buf_align, NULL); - ByteBuffer* buffer = new ByteBuffer(addr, fs_context.blockSize); + char *addr = (char *)spdk_dma_zmalloc(fs_context.blockSize, buf_align, NULL); + ByteBuffer *buffer = new ByteBuffer(addr, fs_context.blockSize); buffer->private_data = fastfs; // read super block - spdk_bdev_read(fs_context.bdev_desc, fs_context.bdev_io_channel, - buffer->p_buffer_, 0, fs_context.blockSize, readSuperBlockComplete, buffer); - + spdk_bdev_read(fs_context.bdev_desc, + fs_context.bdev_io_channel, + buffer->p_buffer_, + 0, + fs_context.blockSize, + readSuperBlockComplete, + buffer); } static void tools_start(void *arg) { - FastFS* fastfs = (FastFS*) arg; - fs_context_t* fs_context = &FastFS::fs_context; + FastFS *fastfs = (FastFS *)arg; + fs_context_t *fs_context = &FastFS::fs_context; fs_context->fastfs = fastfs; int rc = 0; @@ -346,8 +375,7 @@ static void tools_start(void *arg) { fs_context->bdev_desc = NULL; SPDK_NOTICELOG("Opening the bdev %s\n", fs_context->bdev_name); - rc = spdk_bdev_open_ext(fs_context->bdev_name, true, tools_event_cb, NULL, - &fs_context->bdev_desc); + rc = spdk_bdev_open_ext(fs_context->bdev_name, true, tools_event_cb, NULL, &fs_context->bdev_desc); if (rc) { SPDK_ERRLOG("Could not open bdev: %s\n", fs_context->bdev_name); spdk_app_stop(-1); @@ -380,8 +408,7 @@ int main(int argc, char **argv) { opts.name = "fastfs_tools"; opts.rpc_addr = NULL; - int rc = spdk_app_parse_args( - argc, argv, &opts, "b:fS:DC", NULL, tools_parse_arg, tools_usage); + int rc = spdk_app_parse_args(argc, argv, &opts, "b:fS:DC", NULL, tools_parse_arg, tools_usage); if (rc != SPDK_APP_PARSE_ARGS_SUCCESS) { exit(rc); }