From 1072cdd3d64008e2fd15ea8e53af798fdc13344c Mon Sep 17 00:00:00 2001 From: Lourdle Duan Date: Sun, 3 Nov 2024 22:32:11 +0800 Subject: [PATCH 1/2] Add Windows and Visual Studio initial support and replace Makefile with CMake. --- .gitignore | 4 +++ CMakeLists.txt | 4 +++ Makefile | 9 ------ gguf-tools.c | 34 +++++++++++++++---- gguflib.c | 88 +++++++++++++++++++++++++++++++++++++++----------- gguflib.h | 15 +++++++++ sds.c | 8 ++--- sds.h | 14 ++++++++ 8 files changed, 138 insertions(+), 38 deletions(-) create mode 100644 CMakeLists.txt delete mode 100644 Makefile diff --git a/.gitignore b/.gitignore index 8eb55dd..a7a2a3c 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,5 @@ gguf-tools +out +build +.vs +.vscode diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 0000000..1ea72e6 --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,4 @@ +cmake_minimum_required(VERSION 3.5) +project(gguf-tools) +set(SRC_LIST gguf-tools.c gguflib.c sds.c fp16.c) +add_executable(gguf-tools ${SRC_LIST}) diff --git a/Makefile b/Makefile deleted file mode 100644 index b1f9127..0000000 --- a/Makefile +++ /dev/null @@ -1,9 +0,0 @@ -all: gguf-tools - -gguf-tools: gguf-tools.c gguflib.c gguflib.h sds.c sds.h sdsalloc.h fp16.h bf16.h - $(CC) gguf-tools.c gguflib.c sds.c fp16.c \ - -march=native -ffast-math \ - -g -ggdb -Wall -W -pedantic -O3 -o gguf-tools - -clean: - rm -rf gguf-tools diff --git a/gguf-tools.c b/gguf-tools.c index c847c17..4e1aecc 100644 --- a/gguf-tools.c +++ b/gguf-tools.c @@ -11,6 +11,26 @@ #include "sds.h" #include "fp16.h" +#ifdef _WIN32 +#include + +static void win_perror(const char* s) { + if (errno != 0) { + perror(s); + return; + } + + char* msg; + FormatMessageA(FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS, + NULL, GetLastError(), MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), + (LPSTR)&msg, 0, NULL); + fprintf(stderr, "%s: %s", s, msg); + LocalFree(msg); +} + +#define perror win_perror +#endif + /* Global options that can could be used for all the subcommands. */ struct { int verbose; // --verbose option @@ -19,7 +39,7 @@ struct { /* ========================== Utility functions ============================ */ /* Glob-style pattern matching. Return 1 on match, 0 otherwise. */ -int strmatch(const char *pattern, int patternLen, +static int strmatch(const char *pattern, int patternLen, const char *string, int stringLen, int nocase) { while(patternLen && stringLen) { @@ -141,7 +161,7 @@ int strmatch(const char *pattern, int patternLen, /* ========================== 'show' subcommand ============================= */ -void gguf_tools_show(const char *filename) { +static void gguf_tools_show(const char *filename) { gguf_ctx *ctx = gguf_open(filename); if (ctx == NULL) { perror(filename); @@ -190,7 +210,7 @@ void gguf_tools_show(const char *filename) { /* Read a Mixtral MoE model and creates a new non-MoE GGUF file based * on the weights of the experts with IDs in the array of 'experts_id'. * The array must contain 32 integers, one for each layer. */ -void gguf_tools_split_mixtral(int *experts_id, const char *mixtral_filename, const char *output_filename) { +static void gguf_tools_split_mixtral(int *experts_id, const char *mixtral_filename, const char *output_filename) { gguf_ctx *mixtral = gguf_open(mixtral_filename); if (mixtral == NULL) { perror(mixtral_filename); @@ -331,7 +351,7 @@ void gguf_tools_split_mixtral(int *experts_id, const char *mixtral_filename, con /* ====================== 'inspect-weights' subcommand ====================== */ -void gguf_tools_inspect_weights(const char *filename, const char *tname, uint64_t count) { +static void gguf_tools_inspect_weights(const char *filename, const char *tname, uint64_t count) { gguf_ctx *ctx = gguf_open(filename); if (ctx == NULL) { perror(filename); @@ -421,7 +441,7 @@ void gguf_tools_inspect_weights(const char *filename, const char *tname, uint64_ * * Returns 1 on success, 0 if one or both the provided tensors can't be * dequantized. */ -int tensors_avg_diff(gguf_tensor *t1, gguf_tensor *t2, double *diff) { +static int tensors_avg_diff(gguf_tensor *t1, gguf_tensor *t2, double *diff) { float *weights1 = gguf_tensor_to_float(t1); float *weights2 = gguf_tensor_to_float(t2); if (weights1 == NULL || weights2 == NULL) { @@ -453,7 +473,7 @@ int tensors_avg_diff(gguf_tensor *t1, gguf_tensor *t2, double *diff) { return 1; } -void gguf_tools_compare(const char *file1, const char *file2) { +static void gguf_tools_compare(const char *file1, const char *file2) { gguf_ctx *ctx1 = gguf_open(file1); if (ctx1 == NULL) { perror(file1); @@ -498,7 +518,7 @@ void gguf_tools_compare(const char *file1, const char *file2) { /* ======================= Main and CLI options parsing ===================== */ -void gguf_tools_usage(const char *progname) { +static void gguf_tools_usage(const char *progname) { printf("Usage: %s [arguments...] [options...]\n" "Subcommands:\n" " show -- show GGUF model keys and tensors.\n" diff --git a/gguflib.c b/gguflib.c index aa68527..0155732 100644 --- a/gguflib.c +++ b/gguflib.c @@ -1,15 +1,23 @@ #include #include #include +#ifndef _WIN32 #include -#include +#include #include +#endif +#include #include -#include #include #include #include +#ifdef _WIN32 +#include + +typedef UINT_PTR ssize_t; +#endif + #include "gguflib.h" #include "fp16.h" #include "bf16.h" @@ -108,18 +116,34 @@ uint64_t gguf_value_len(uint32_t type, union gguf_value *val) { } /* =============================== GGUF file API ============================ */ - /* Open a GGUF file and return a parsing context. */ -gguf_ctx *gguf_open(const char *filename) { - int fd = open(filename,O_RDWR|O_APPEND); +gguf_ctx*gguf_open(const char *filename) { +#ifdef _WIN32 + HANDLE fd = CreateFileA(filename, GENERIC_READ | GENERIC_WRITE, FILE_SHARE_READ | FILE_SHARE_WRITE, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL); + if (fd == INVALID_HANDLE_VALUE) return NULL; +#else + int fd = open(filename, O_RDWR | O_APPEND); if (fd == -1) return NULL; +#endif /* Mapping successful. We can create our context object. */ - gguf_ctx *ctx = calloc(1, sizeof(*ctx)); + gguf_ctx*ctx = calloc(1, sizeof(*ctx)); if (!ctx) return NULL; ctx->fd = fd; ctx->alignment = 32; // Default alignment of GGUF files. ctx->data_off = 0; // Set later. + +#ifdef _WIN32 + /* We must create file mapping object under Windows. */ + HANDLE mapping = CreateFileMappingA(fd, NULL, PAGE_READWRITE, 0, 0, NULL); + if (mapping == NULL) { + CloseHandle(fd); + free(ctx); + return 0; + } + ctx->mapping = mapping; +#endif + if (gguf_remap(ctx) == 0) { gguf_close(ctx); return NULL; @@ -146,6 +170,7 @@ void gguf_rewind(gguf_ctx *ctx) { * * Return 1 on success, 0 on error. */ int gguf_remap(gguf_ctx *ctx) { +#ifndef _WIN32 struct stat sb; /* Unmap if the file was already memory mapped. */ @@ -159,14 +184,34 @@ int gguf_remap(gguf_ctx *ctx) { /* Minimal sanity check... */ if (sb.st_size < (signed)sizeof(struct gguf_header) || - memcmp(mapped,"GGUF",4) != 0) + memcmp(mapped, "GGUF", 4) != 0) { errno = EINVAL; return 0; } + ctx->size = sb.st_size; +#else + if (ctx->data) UnmapViewOfFile(ctx->data); + + /* Get the size of the file. */ + LARGE_INTEGER size; + if (!GetFileSizeEx(ctx->fd, &size)) return 0; + + /* Map the file by the handle to the file mapping object. */ + LPVOID mapped = MapViewOfFile(ctx->mapping, FILE_MAP_ALL_ACCESS, 0, 0, size.QuadPart); + if (mapped == NULL) return 0; + + if (size.QuadPart < (signed)sizeof(struct gguf_header) || + memcmp(mapped, "GGUF", 4) != 0) + { + errno = EINVAL; + return 0; + } + ctx->size = size.QuadPart; +#endif + ctx->data = mapped; ctx->header = mapped; - ctx->size = sb.st_size; return 1; } @@ -174,8 +219,15 @@ int gguf_remap(gguf_ctx *ctx) { * and cleanup resources. */ void gguf_close(gguf_ctx *ctx) { if (ctx == NULL) return; +#ifndef _WIN32 if (ctx->data) munmap(ctx->data,ctx->size); close(ctx->fd); +#else + if (ctx->data) UnmapViewOfFile(ctx->data); + /* Don't forget to close the handle to the file mapping object to destory this kernel object. */ + CloseHandle(ctx->mapping); + CloseHandle(ctx->fd); +#endif free(ctx); } @@ -222,7 +274,7 @@ uint64_t gguf_get_alignment_padding(uint64_t alignment, uint64_t offset) { * all the key-values are consumed, in the context of the first call of * gguf_get_tensor(): this way we will be able to return tensor offsets * as absolute positions and pointers to the mmapped file. */ -void gguf_set_data_offset(gguf_ctx *ctx) { +static void gguf_set_data_offset(gguf_ctx *ctx) { assert(ctx->left_kv == 0 && ctx->left_tensors == ctx->header->tensor_count); uint64_t offset = ctx->off; @@ -373,7 +425,7 @@ struct gguf_print_options { * may be NULL if no options are provided. * * The function is designed to be used as a callback of gguf_do_with_value(). */ -void gguf_print_value_callback(void *privdata, uint32_t type, union gguf_value *val, uint64_t in_array, uint64_t array_len) { +static void gguf_print_value_callback(void *privdata, uint32_t type, union gguf_value *val, uint64_t in_array, uint64_t array_len) { struct gguf_print_options *po = privdata; if (po && po->max_array_items && in_array > po->max_array_items) { if (in_array-1 == po->max_array_items) @@ -525,20 +577,20 @@ int gguf_append_tensor_data(gguf_ctx *ctx, void *tensor, uint64_t tensor_size) { typedef void (*store_float_callback)(void *dst, uint64_t idx, float f); /* Callback used to store F16 when dequantizing. */ -void gguf_store_f16_callback(void *dst, uint64_t idx, float f) { +static void gguf_store_f16_callback(void *dst, uint64_t idx, float f) { uint16_t *f16 = dst; f16[idx] = to_half(f); } /* Callback used to store BF16 when dequantizing. */ -void gguf_store_bf16_callback(void *dst, uint64_t idx, float f) { +static void gguf_store_bf16_callback(void *dst, uint64_t idx, float f) { uint16_t *f16 = dst; f16[idx] = to_brain(f); } /* Q8_0 blocks dequantization to floats. * 'dst' is supposed to have enough space for 'count' weights. */ -void gguf_q8_0_to_float(void *weights_data, void *dst, uint64_t count, store_float_callback store_callback) { +static void gguf_q8_0_to_float(void *weights_data, void *dst, uint64_t count, store_float_callback store_callback) { float *f = dst; struct gguf_tensor_type_features *tf = gguf_get_tensor_type_features(GGUF_TYPE_Q8_0); @@ -565,7 +617,7 @@ void gguf_q8_0_to_float(void *weights_data, void *dst, uint64_t count, store_flo /* Q4_K blocks dequantization to floats. * 'y' is supposed to have enough space for 'count' weights. */ -void gguf_q4_k_to_float(void *weights_data, void *dst, uint64_t count, store_float_callback store_callback) { +static void gguf_q4_k_to_float(void *weights_data, void *dst, uint64_t count, store_float_callback store_callback) { float *f = dst; uint8_t *block = weights_data; uint64_t i = 0; // i-th weight to dequantize. @@ -655,7 +707,7 @@ void gguf_q4_k_to_float(void *weights_data, void *dst, uint64_t count, store_flo /* Q6_K blocks dequantization to floats. * 'y' is supposed to have enough space for 'count' weights. */ -void gguf_q6_k_to_float(void *weights_data, void *dst, uint64_t count, store_float_callback store_callback) { +static void gguf_q6_k_to_float(void *weights_data, void *dst, uint64_t count, store_float_callback store_callback) { float *f = dst; uint8_t *block = weights_data; uint64_t i = 0; // i-th weight to dequantize. @@ -735,7 +787,7 @@ void gguf_q6_k_to_float(void *weights_data, void *dst, uint64_t count, store_flo /* Q2_K blocks dequantization to floats. * 'y' is supposed to have enough space for 'count' weights. */ -void gguf_q2_k_to_float(void *weights_data, void *dst, uint64_t count, store_float_callback store_callback) { +static void gguf_q2_k_to_float(void *weights_data, void *dst, uint64_t count, store_float_callback store_callback) { float *f = dst; uint8_t *block = weights_data; uint64_t i = 0; // i-th weight to dequantize. @@ -800,7 +852,7 @@ void gguf_q2_k_to_float(void *weights_data, void *dst, uint64_t count, store_flo /* Q4_0 blocks dequantization to floats. * 'dst' is supposed to have enough space for 'count' weights. */ -void gguf_q4_0_to_float(void *weights_data, void *dst, uint64_t count, store_float_callback store_callback) { +static void gguf_q4_0_to_float(void *weights_data, void *dst, uint64_t count, store_float_callback store_callback) { float *f = dst; struct gguf_tensor_type_features *tf = gguf_get_tensor_type_features(GGUF_TYPE_Q4_0); @@ -841,7 +893,7 @@ void gguf_q4_0_to_float(void *weights_data, void *dst, uint64_t count, store_flo /* Q4_1 blocks dequantization to floats. * 'dst' is supposed to have enough space for 'count' weights. */ -void gguf_q4_1_to_float(void *weights_data, void *dst, uint64_t count, store_float_callback store_callback) { +static void gguf_q4_1_to_float(void *weights_data, void *dst, uint64_t count, store_float_callback store_callback) { float *f = dst; struct gguf_tensor_type_features *tf = gguf_get_tensor_type_features(GGUF_TYPE_Q4_1); diff --git a/gguflib.h b/gguflib.h index 4cb973a..17ea4a6 100644 --- a/gguflib.h +++ b/gguflib.h @@ -10,6 +10,7 @@ #include + /* ============================ Enums and structures ======================== */ /* Flags that can be used in different functions with the same meaning. */ @@ -110,6 +111,11 @@ union gguf_value { double float64; uint8_t boolval; struct gguf_string string; + +#ifdef _MSC_VER +#pragma pack(push, 1) +#define __attribute__(x) +#endif struct { // Any value type is valid, including arrays. uint32_t type; @@ -117,6 +123,10 @@ union gguf_value { uint64_t len; // The array of values follow... } __attribute__((packed)) array; +#ifdef _MSC_VER +#pragma pack(pop) +#undef __attribute__ +#endif }; // Header @@ -159,7 +169,12 @@ typedef struct { /* The context you get after opening a GGUF file with gguf_init(). */ typedef struct { +#ifdef _WIN32 + void* fd; + void* mapping; +#else int fd; +#endif uint8_t *data; // Memory mapped data. uint64_t size; // Total file size. struct gguf_header *header; // GUFF file header info. diff --git a/sds.c b/sds.c index 1189716..8fdb7e8 100644 --- a/sds.c +++ b/sds.c @@ -447,7 +447,7 @@ sds sdscpy(sds s, const char *t) { * The function returns the length of the null-terminated string * representation stored at 's'. */ #define SDS_LLSTR_SIZE 21 -int sdsll2str(char *s, long long value) { +static int sdsll2str(char *s, long long value) { char *p, aux; unsigned long long v; size_t l; @@ -479,7 +479,7 @@ int sdsll2str(char *s, long long value) { } /* Identical sdsll2str(), but for unsigned long long type. */ -int sdsull2str(char *s, unsigned long long v) { +static int sdsull2str(char *s, unsigned long long v) { char *p, aux; size_t l; @@ -908,14 +908,14 @@ sds sdscatrepr(sds s, const char *p, size_t len) { /* Helper function for sdssplitargs() that returns non zero if 'c' * is a valid hex digit. */ -int is_hex_digit(char c) { +static int is_hex_digit(char c) { return (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F'); } /* Helper function for sdssplitargs() that converts a hex digit into an * integer from 0 to 15 */ -int hex_digit_to_int(char c) { +static int hex_digit_to_int(char c) { switch(c) { case '0': return 0; case '1': return 1; diff --git a/sds.h b/sds.h index adcc12c..80f8901 100644 --- a/sds.h +++ b/sds.h @@ -40,10 +40,20 @@ extern const char *SDS_NOINIT; #include #include +#ifdef _WIN32 +#include +typedef UINT_PTR ssize_t; +#endif + typedef char *sds; /* Note: sdshdr5 is never used, we just access the flags byte directly. * However is here to document the layout of type 5 SDS strings. */ + +#ifdef _MSC_VER +#pragma pack(push, 1) +#define __attribute__(x) +#endif struct __attribute__ ((__packed__)) sdshdr5 { unsigned char flags; /* 3 lsb of type, and 5 msb of string length */ char buf[]; @@ -72,6 +82,10 @@ struct __attribute__ ((__packed__)) sdshdr64 { unsigned char flags; /* 3 lsb of type, 5 unused bits */ char buf[]; }; +#ifdef _MSC_VER +#pragma pack(push, 1) +#undef __attribute__ +#endif #define SDS_TYPE_5 0 #define SDS_TYPE_8 1 From bac796ada809ac293e685db59b075971181cb008 Mon Sep 17 00:00:00 2001 From: Lourdle Duan Date: Mon, 4 Nov 2024 23:02:51 +0800 Subject: [PATCH 2/2] --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index a7a2a3c..9808ebb 100644 --- a/.gitignore +++ b/.gitignore @@ -3,3 +3,4 @@ out build .vs .vscode +CMakeSettings.json