From acf1e232366c24676d25baead723d334766b5fb4 Mon Sep 17 00:00:00 2001 From: x86_64 Date: Mon, 16 Apr 2012 18:37:25 +0400 Subject: [PATCH 01/15] Renamed files --- Makefile | 4 +- export.cpp => export_cpp.cpp | 2 +- export.h => export_cpp.h | 0 seekgzip.py | 91 ++++++++++++++++++++++++++++++++++++ setup.py | 2 +- export.i => swig.i | 4 +- 6 files changed, 97 insertions(+), 6 deletions(-) rename export.cpp => export_cpp.cpp (98%) rename export.h => export_cpp.h (100%) create mode 100644 seekgzip.py rename export.i => swig.i (91%) diff --git a/Makefile b/Makefile index ed66115..51d4658 100644 --- a/Makefile +++ b/Makefile @@ -16,5 +16,5 @@ clean-python: seekgzip: seekgzip.c $(CC) $(CFLAGS) -o $@ -DBUILD_UTILITY $< -$(PYTHON_TARGETS): export.h export.i - swig -c++ -python -o export_python.cpp export.i +$(PYTHON_TARGETS): export_cpp.h swig.i + swig -c++ -python -o export_python.cpp swig.i diff --git a/export.cpp b/export_cpp.cpp similarity index 98% rename from export.cpp rename to export_cpp.cpp index dea72da..dc52a5d 100644 --- a/export.cpp +++ b/export_cpp.cpp @@ -1,7 +1,7 @@ #include #include #include "seekgzip.h" -#include "export.h" +#include "export_cpp.h" static std::string error_string(int errorcode) { diff --git a/export.h b/export_cpp.h similarity index 100% rename from export.h rename to export_cpp.h diff --git a/seekgzip.py b/seekgzip.py new file mode 100644 index 0000000..13d8279 --- /dev/null +++ b/seekgzip.py @@ -0,0 +1,91 @@ +# This file was automatically generated by SWIG (http://www.swig.org). +# Version 2.0.4 +# +# Do not make changes to this file unless you know what you are doing--modify +# the SWIG interface file instead. + + + +from sys import version_info +if version_info >= (2,6,0): + def swig_import_helper(): + from os.path import dirname + import imp + fp = None + try: + fp, pathname, description = imp.find_module('_seekgzip', [dirname(__file__)]) + except ImportError: + import _seekgzip + return _seekgzip + if fp is not None: + try: + _mod = imp.load_module('_seekgzip', fp, pathname, description) + finally: + fp.close() + return _mod + _seekgzip = swig_import_helper() + del swig_import_helper +else: + import _seekgzip +del version_info +try: + _swig_property = property +except NameError: + pass # Python < 2.2 doesn't have 'property'. +def _swig_setattr_nondynamic(self,class_type,name,value,static=1): + if (name == "thisown"): return self.this.own(value) + if (name == "this"): + if type(value).__name__ == 'SwigPyObject': + self.__dict__[name] = value + return + method = class_type.__swig_setmethods__.get(name,None) + if method: return method(self,value) + if (not static): + self.__dict__[name] = value + else: + raise AttributeError("You cannot add attributes to %s" % self) + +def _swig_setattr(self,class_type,name,value): + return _swig_setattr_nondynamic(self,class_type,name,value,0) + +def _swig_getattr(self,class_type,name): + if (name == "thisown"): return self.this.own() + method = class_type.__swig_getmethods__.get(name,None) + if method: return method(self) + raise AttributeError(name) + +def _swig_repr(self): + try: strthis = "proxy of " + self.this.__repr__() + except: strthis = "" + return "<%s.%s; %s >" % (self.__class__.__module__, self.__class__.__name__, strthis,) + +try: + _object = object + _newclass = 1 +except AttributeError: + class _object : pass + _newclass = 0 + + +class reader(_object): + __swig_setmethods__ = {} + __setattr__ = lambda self, name, value: _swig_setattr(self, reader, name, value) + __swig_getmethods__ = {} + __getattr__ = lambda self, name: _swig_getattr(self, reader, name) + __repr__ = _swig_repr + def __init__(self, *args): + this = _seekgzip.new_reader(*args) + try: self.this.append(this) + except: self.this = this + __swig_destroy__ = _seekgzip.delete_reader + __del__ = lambda self : None; + def close(self): return _seekgzip.reader_close(self) + def seek(self, *args): return _seekgzip.reader_seek(self, *args) + def tell(self): return _seekgzip.reader_tell(self) + def read(self, *args): return _seekgzip.reader_read(self, *args) +reader_swigregister = _seekgzip.reader_swigregister +reader_swigregister(reader) + +# This file is compatible with both classic and new-style classes. + + diff --git a/setup.py b/setup.py index d9dc0f8..dacd6a4 100644 --- a/setup.py +++ b/setup.py @@ -16,7 +16,7 @@ '_seekgzip', sources = [ 'seekgzip.c', - 'export.cpp', + 'export_cpp.cpp', 'export_python.cpp', ], libraries=['z'], diff --git a/export.i b/swig.i similarity index 91% rename from export.i rename to swig.i index 294d293..13a9abb 100644 --- a/export.i +++ b/swig.i @@ -1,7 +1,7 @@ %module seekgzip %{ -#include "export.h" +#include "export_cpp.h" %} %include "std_string.i" @@ -21,4 +21,4 @@ } } -%include "export.h" +%include "export_cpp.h" From d07cc5c71184e4ff86befab596c32e4fda8d0964 Mon Sep 17 00:00:00 2001 From: x86_64 Date: Mon, 16 Apr 2012 19:01:04 +0400 Subject: [PATCH 02/15] Better makefile --- Makefile | 38 ++++++++++++++++++++++++++------------ 1 file changed, 26 insertions(+), 12 deletions(-) diff --git a/Makefile b/Makefile index 51d4658..1703609 100644 --- a/Makefile +++ b/Makefile @@ -1,20 +1,34 @@ CC=gcc -CFLAGS=-O3 -lz -TARGETS=seekgzip -PYTHON_TARGETS=export_python.cpp seekgzip.py +SWIG=swig +PYTHON=python -all: $(TARGETS) +LDFLAGS=-lz -clean: - rm $(TARGETS) +USR_BIN_TARGETS=seekgzip +USR_LIB_TARGETS=libseekgzip.so +PHONY_TARGETS=.python -python: $(PYTHON_TARGETS) +TARGETS=$(USR_BIN_TARGETS) $(USR_LIB_TARGETS) $(PHONY_TARGETS) -clean-python: - rm $(PYTHON_TARGETS) +all: $(TARGETS) +clean: + rm -rf $(TARGETS) + rm -rf export_python.cpp + +install: + mkdir -p $(DESTDIR)/usr/bin/ $(DESTDIR)/usr/lib/ + cp $(USR_BIN_TARGETS) $(DESTDIR)/usr/bin/ + cp $(USR_LIB_TARGETS) $(DESTDIR)/usr/lib/ + $(PYTHON) setup.py install seekgzip: seekgzip.c - $(CC) $(CFLAGS) -o $@ -DBUILD_UTILITY $< + $(CC) $(CFLAGS) $(LDFLAGS) -o $@ -DBUILD_UTILITY $< + +libseekgzip.so: seekgzip.c + $(CC) $(CFLAGS) $(LDFLAGS) -fPIC -shared -o $@ $< + +.python: swig.i export_cpp.h export_cpp.cpp setup.py + $(SWIG) -c++ -python -o export_python.cpp swig.i + $(PYTHON) setup.py build + touch $@ -$(PYTHON_TARGETS): export_cpp.h swig.i - swig -c++ -python -o export_python.cpp swig.i From d19680f761170621e18bb135b8288830d4b92897 Mon Sep 17 00:00:00 2001 From: x86_64 Date: Mon, 16 Apr 2012 19:26:04 +0400 Subject: [PATCH 03/15] Fix python install --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 1703609..81e98c3 100644 --- a/Makefile +++ b/Makefile @@ -19,7 +19,7 @@ install: mkdir -p $(DESTDIR)/usr/bin/ $(DESTDIR)/usr/lib/ cp $(USR_BIN_TARGETS) $(DESTDIR)/usr/bin/ cp $(USR_LIB_TARGETS) $(DESTDIR)/usr/lib/ - $(PYTHON) setup.py install + test -f .python && $(PYTHON) setup.py install || exit 0 seekgzip: seekgzip.c $(CC) $(CFLAGS) $(LDFLAGS) -o $@ -DBUILD_UTILITY $< From 231e76eb0dd3f1cb475594e112d78aa10dbf7f18 Mon Sep 17 00:00:00 2001 From: x86_64 Date: Mon, 16 Apr 2012 19:28:23 +0400 Subject: [PATCH 04/15] Install headers --- Makefile | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 81e98c3..c1b7234 100644 --- a/Makefile +++ b/Makefile @@ -6,6 +6,7 @@ LDFLAGS=-lz USR_BIN_TARGETS=seekgzip USR_LIB_TARGETS=libseekgzip.so +USR_INC_TARGETS=seekgzip.h PHONY_TARGETS=.python TARGETS=$(USR_BIN_TARGETS) $(USR_LIB_TARGETS) $(PHONY_TARGETS) @@ -16,9 +17,10 @@ clean: rm -rf export_python.cpp install: - mkdir -p $(DESTDIR)/usr/bin/ $(DESTDIR)/usr/lib/ + mkdir -p $(DESTDIR)/usr/bin/ $(DESTDIR)/usr/lib/ $(DESTDIR)/usr/include/seekgzip/ cp $(USR_BIN_TARGETS) $(DESTDIR)/usr/bin/ cp $(USR_LIB_TARGETS) $(DESTDIR)/usr/lib/ + cp $(USR_INC_TARGETS) $(DESTDIR)/usr/include/seekgzip/ test -f .python && $(PYTHON) setup.py install || exit 0 seekgzip: seekgzip.c From a6f532958cc599c0e7c6058c788279e3606cf6be Mon Sep 17 00:00:00 2001 From: x86_64 Date: Tue, 17 Apr 2012 15:49:18 +0400 Subject: [PATCH 05/15] Split library source and utility source --- Makefile | 4 +- main.c | 131 +++++++++++++++++++++++++++++++++++++++++++++++++++++ seekgzip.c | 131 ++++------------------------------------------------- 3 files changed, 141 insertions(+), 125 deletions(-) create mode 100644 main.c diff --git a/Makefile b/Makefile index c1b7234..a7f8c6e 100644 --- a/Makefile +++ b/Makefile @@ -23,8 +23,8 @@ install: cp $(USR_INC_TARGETS) $(DESTDIR)/usr/include/seekgzip/ test -f .python && $(PYTHON) setup.py install || exit 0 -seekgzip: seekgzip.c - $(CC) $(CFLAGS) $(LDFLAGS) -o $@ -DBUILD_UTILITY $< +seekgzip: seekgzip.c main.c + $(CC) $(CFLAGS) $(LDFLAGS) -o $@ seekgzip.c main.c libseekgzip.so: seekgzip.c $(CC) $(CFLAGS) $(LDFLAGS) -fPIC -shared -o $@ $< diff --git a/main.c b/main.c new file mode 100644 index 0000000..0dfb758 --- /dev/null +++ b/main.c @@ -0,0 +1,131 @@ +/* + * SeekGzip utility/library. + * + * Copyright (c) 2010-2011, Naoaki Okazaki + * All rights reserved. + * + * For conditions of distribution and use, see copyright notice in README + * or zlib.h. + * + * The core algorithm for random access originates from zran.c in zlib/gzip + * distribution. This code simply implements a data structure and algorithm + * for indices, wraps the functionality of random access as a library, and + * provides a command-line utility. + */ + +#include +#include +#include +#include +#include "seekgzip.h" + +#define CHUNK 16384 /* file input buffer size */ + +static void seekgzip_perror(int ret) +{ + switch (ret) { + case SEEKGZIP_ERROR: + fprintf(stderr, "ERROR: An unknown error occurred.\n"); + break; + case SEEKGZIP_OPENERROR: + fprintf(stderr, "ERROR: Failed to open a file.\n"); + break; + case SEEKGZIP_READERROR: + fprintf(stderr, "ERROR: Failed to read a file.\n"); + break; + case SEEKGZIP_WRITEERROR: + fprintf(stderr, "ERROR: Failed to write a file.\n"); + break; + case SEEKGZIP_DATAERROR: + fprintf(stderr, "ERROR: The file is corrupted.\n"); + break; + case SEEKGZIP_OUTOFMEMORY: + fprintf(stderr, "ERROR: Out of memory.\n"); + break; + case SEEKGZIP_IMCOMPATIBLE: + fprintf(stderr, "ERROR: The imcompatible file.\n"); + break; + case SEEKGZIP_ZLIBERROR: + fprintf(stderr, "ERROR: An error occurred in zlib.\n"); + break; + } +} + +int main(int argc, char *argv[]) +{ + int ret = 0; + + if (argc != 3) { + printf("This utility manages an index for random (seekable) access to a gzip file.\n"); + printf("USAGE:\n"); + printf(" %s -b \n", argv[0]); + printf(" Build an index file \"$FILE.idx\" for the gzip file $FILE.\n"); + printf(" %s [BEGIN-END]\n", argv[0]); + printf(" Output the content of the gzip file $FILE of offset range [BEGIN:END).\n"); + return 0; + + } else if (strcmp(argv[1], "-b") == 0) { + const char *target = argv[2]; + + printf("Building an index: %s.idx\n", target); + printf("Filesize up to: %d bit\n", (int)sizeof(off_t) * 8); + + ret = seekgzip_build(target); + if (ret != 0) { + seekgzip_perror(ret); + return 1; + } + return 0; + + } else { + char *arg = argv[2], *p = NULL; + off_t begin = 0, end = (off_t)-1; + seekgzip_t* zs = seekgzip_open(argv[1], NULL); + if (zs == NULL) { + fprintf(stderr, "ERROR: Failed to open the index file.\n"); + return 1; + } + + p = strchr(arg, '-'); + if (p == NULL) { + begin =(off_t)strtoull(arg, NULL, 10); + end = begin+1; + } else if (p == arg) { + begin = 0; + end = (off_t)strtoull(p+1, NULL, 10); + } else if (p == arg + strlen(arg) - 1) { + *p = 0; + begin = (off_t)strtoull(arg, NULL, 10); + } else { + *p++ = 0; + begin =(off_t)strtoull(arg, NULL, 10); + end =(off_t)strtoull(p, NULL, 10); + } + + seekgzip_seek(zs, begin); + + while (begin < end) { + int read; + char buffer[CHUNK]; + off_t size = (end - begin); + if (CHUNK < size) { + size = CHUNK; + } + read = seekgzip_read(zs, buffer, (int)size); + if (0 < read) { + fwrite(buffer, read, sizeof(char), stdout); + begin += read; + } else if (read == 0) { + break; + } else { + fprintf(stderr, "ERROR: An error occurred while reading the gzip file.\n"); + ret = 1; + break; + } + } + + seekgzip_close(zs); + return ret; + } +} + diff --git a/seekgzip.c b/seekgzip.c index cc542ee..35d4235 100644 --- a/seekgzip.c +++ b/seekgzip.c @@ -22,7 +22,7 @@ #define SEEKGZIP_OPTIMIZATION -/*===== Begin of the portion of zran.c =====*/ +/*===== Begin of the portion of zran.c ===== {{{*/ /* zran.c -- example of zlib/gzip stream indexing and random access * Copyright (C) 2005 Mark Adler @@ -393,9 +393,14 @@ static int extract(FILE *in, struct access *index, off_t offset, return ret; } -/*===== End of the portion of zran.c =====*/ - +/*===== End of the portion of zran.c ===== }}}*/ +struct tag_seekgzip { + FILE *fp; + struct access index; + off_t offset; + int errorcode; +}; static char *get_index_file(const char *target) { @@ -420,14 +425,6 @@ static uint32_t read_uint32(gzFile gz) return v; } -struct tag_seekgzip -{ - FILE *fp; - struct access index; - off_t offset; - int errorcode; -}; - int seekgzip_build(const char *target) { int i, len, ret = SEEKGZIP_SUCCESS; @@ -652,115 +649,3 @@ int seekgzip_error(seekgzip_t* sgz) { return sgz->errorcode; } - -#ifdef BUILD_UTILITY - -static void seekgzip_perror(int ret) -{ - switch (ret) { - case SEEKGZIP_ERROR: - fprintf(stderr, "ERROR: An unknown error occurred.\n"); - break; - case SEEKGZIP_OPENERROR: - fprintf(stderr, "ERROR: Failed to open a file.\n"); - break; - case SEEKGZIP_READERROR: - fprintf(stderr, "ERROR: Failed to read a file.\n"); - break; - case SEEKGZIP_WRITEERROR: - fprintf(stderr, "ERROR: Failed to write a file.\n"); - break; - case SEEKGZIP_DATAERROR: - fprintf(stderr, "ERROR: The file is corrupted.\n"); - break; - case SEEKGZIP_OUTOFMEMORY: - fprintf(stderr, "ERROR: Out of memory.\n"); - break; - case SEEKGZIP_IMCOMPATIBLE: - fprintf(stderr, "ERROR: The imcompatible file.\n"); - break; - case SEEKGZIP_ZLIBERROR: - fprintf(stderr, "ERROR: An error occurred in zlib.\n"); - break; - } -} - -int main(int argc, char *argv[]) -{ - int ret = 0; - - if (argc != 3) { - printf("This utility manages an index for random (seekable) access to a gzip file.\n"); - printf("USAGE:\n"); - printf(" %s -b \n", argv[0]); - printf(" Build an index file \"$FILE.idx\" for the gzip file $FILE.\n"); - printf(" %s [BEGIN-END]\n", argv[0]); - printf(" Output the content of the gzip file $FILE of offset range [BEGIN:END).\n"); - return 0; - - } else if (strcmp(argv[1], "-b") == 0) { - const char *target = argv[2]; - - printf("Building an index: %s.idx\n", target); - printf("Filesize up to: %d bit\n", (int)sizeof(off_t) * 8); - - ret = seekgzip_build(target); - if (ret != 0) { - seekgzip_perror(ret); - return 1; - } - return 0; - - } else { - char *arg = argv[2], *p = NULL; - off_t begin = 0, end = (off_t)-1; - seekgzip_t* zs = seekgzip_open(argv[1], NULL); - if (zs == NULL) { - fprintf(stderr, "ERROR: Failed to open the index file.\n"); - return 1; - } - - p = strchr(arg, '-'); - if (p == NULL) { - begin =(off_t)strtoull(arg, NULL, 10); - end = begin+1; - } else if (p == arg) { - begin = 0; - end = (off_t)strtoull(p+1, NULL, 10); - } else if (p == arg + strlen(arg) - 1) { - *p = 0; - begin = (off_t)strtoull(arg, NULL, 10); - } else { - *p++ = 0; - begin =(off_t)strtoull(arg, NULL, 10); - end =(off_t)strtoull(p, NULL, 10); - } - - seekgzip_seek(zs, begin); - - while (begin < end) { - int read; - char buffer[CHUNK]; - off_t size = (end - begin); - if (CHUNK < size) { - size = CHUNK; - } - read = seekgzip_read(zs, buffer, (int)size); - if (0 < read) { - fwrite(buffer, read, sizeof(char), stdout); - begin += read; - } else if (read == 0) { - break; - } else { - fprintf(stderr, "ERROR: An error occurred while reading the gzip file.\n"); - ret = 1; - break; - } - } - - seekgzip_close(zs); - return ret; - } -} - -#endif From 84859a05c59231e098243f2d9068316a3381ab62 Mon Sep 17 00:00:00 2001 From: x86_64 Date: Wed, 18 Apr 2012 13:51:12 +0400 Subject: [PATCH 06/15] Refactor code. New set of functions to work with index --- main.c | 14 ++- seekgzip.c | 350 +++++++++++++++++++++++++---------------------------- seekgzip.h | 2 +- 3 files changed, 176 insertions(+), 190 deletions(-) diff --git a/main.c b/main.c index 0dfb758..282c11f 100644 --- a/main.c +++ b/main.c @@ -61,7 +61,7 @@ int main(int argc, char *argv[]) printf(" %s -b \n", argv[0]); printf(" Build an index file \"$FILE.idx\" for the gzip file $FILE.\n"); printf(" %s [BEGIN-END]\n", argv[0]); - printf(" Output the content of the gzip file $FILE of offset range [BEGIN:END).\n"); + printf(" Output the content of the gzip file $FILE of offset range [BEGIN-END].\n"); return 0; } else if (strcmp(argv[1], "-b") == 0) { @@ -69,13 +69,15 @@ int main(int argc, char *argv[]) printf("Building an index: %s.idx\n", target); printf("Filesize up to: %d bit\n", (int)sizeof(off_t) * 8); + printf("WARNING: if program fail to write index to file, it would silently ignore that\n"); - ret = seekgzip_build(target); - if (ret != 0) { + seekgzip_t* zs = seekgzip_open(target, &ret); + if (zs == NULL) { seekgzip_perror(ret); return 1; } - return 0; + seekgzip_close(zs); + return 0; } else { char *arg = argv[2], *p = NULL; @@ -113,8 +115,10 @@ int main(int argc, char *argv[]) } read = seekgzip_read(zs, buffer, (int)size); if (0 < read) { - fwrite(buffer, read, sizeof(char), stdout); begin += read; + + if(fwrite(buffer, read, sizeof(char), stdout) == 0) + continue; } else if (read == 0) { break; } else { diff --git a/seekgzip.c b/seekgzip.c index 35d4235..3240ba7 100644 --- a/seekgzip.c +++ b/seekgzip.c @@ -22,6 +22,9 @@ #define SEEKGZIP_OPTIMIZATION +int seekgzip_index_alloc(seekgzip_t *sz); +void seekgzip_index_free(seekgzip_t *sz); + /*===== Begin of the portion of zran.c ===== {{{*/ /* zran.c -- example of zlib/gzip stream indexing and random access @@ -87,20 +90,11 @@ struct point { /* access point list */ struct access { - int have; /* number of list entries filled in */ - int size; /* number of list entries allocated */ + unsigned int nelements; /* number of list entries filled in */ + unsigned int allocated; /* number of list entries allocated */ struct point *list; /* allocated list */ }; -/* Deallocate an index built by build_index() */ -static void free_index(struct access *index) -{ - if (index != NULL) { - free(index->list); - free(index); - } -} - /* Add an entry to the access point list. If out of memory, deallocate the existing list and return NULL. */ static struct access *addpoint(struct access *index, int bits, @@ -108,32 +102,17 @@ static struct access *addpoint(struct access *index, int bits, { struct point *next; - /* if list is empty, create it (start with eight points) */ - if (index == NULL) { - index = (struct access*)malloc(sizeof(struct access)); - if (index == NULL) return NULL; - index->list = (struct point*)malloc(sizeof(struct point) << 3); - if (index->list == NULL) { - free(index); - return NULL; - } - index->size = 8; - index->have = 0; - } - /* if list is full, make it bigger */ - else if (index->have == index->size) { - index->size <<= 1; - next = (struct point*)realloc(index->list, sizeof(struct point) * index->size); - if (next == NULL) { - free_index(index); - return NULL; - } - index->list = next; + if (index->nelements == index->allocated) { + index->allocated = index->allocated != 0 ? index->allocated : 1; + index->allocated <<= 1; + index->list = (struct point*)realloc(index->list, sizeof(struct point) * index->allocated); + if (index->list == NULL) + return NULL; } /* fill in entry and increment how many we have */ - next = index->list + index->have; + next = index->list + index->nelements; next->bits = bits; next->in = in; next->out = out; @@ -141,7 +120,7 @@ static struct access *addpoint(struct access *index, int bits, memcpy(next->window, window + WINSIZE - left, left); if (left < WINSIZE) memcpy(next->window + left, window, WINSIZE - left); - index->have++; + index->nelements++; /* return list, possibly reallocated */ return index; @@ -150,7 +129,7 @@ static struct access *addpoint(struct access *index, int bits, #ifdef SEEKGZIP_OPTIMIZATION struct point *findpoint(struct access *index, off_t offset) { - int half, len = index->have; + int half, len = index->nelements; struct point *first = &index->list[0], *middle; /* equivalent to std::upper_bound() */ @@ -183,7 +162,7 @@ static int build_index(FILE *in, off_t span, struct access **built) int ret; off_t totin, totout; /* our own total counters to avoid 4GB limit */ off_t last; /* totout value of last access point */ - struct access *index; /* access points being generated */ + struct access *index = *built; /* access points being generated */ z_stream strm; unsigned char input[CHUNK]; unsigned char window[WINSIZE]; @@ -197,12 +176,15 @@ static int build_index(FILE *in, off_t span, struct access **built) ret = inflateInit2(&strm, 47); /* automatic zlib or gzip decoding */ if (ret != Z_OK) return ret; + + // Rewind to beginning of file + if( (ret = fseeko(in, 0, SEEK_SET)) == -1) + goto build_index_error; /* inflate the input, maintain a sliding window, and build an index -- this also validates the integrity of the compressed data using the check information at the end of the gzip or zlib stream */ totin = totout = last = 0; - index = NULL; /* will be allocated by first addpoint() */ strm.avail_out = 0; do { /* get some compressed data from input file */ @@ -263,16 +245,14 @@ static int build_index(FILE *in, off_t span, struct access **built) /* clean up and return index (release unused entries in list) */ (void)inflateEnd(&strm); - index = (struct access*)realloc(index, sizeof(struct point) * index->have); - index->size = index->have; + index = (struct access*)realloc(index, sizeof(struct point) * index->nelements); + index->allocated = index->nelements; *built = index; - return index->size; + return index->allocated; /* return error */ build_index_error: (void)inflateEnd(&strm); - if (index != NULL) - free_index(index); return ret; } @@ -305,7 +285,7 @@ static int extract(FILE *in, struct access *index, off_t offset, } #else here = index->list; - ret = index->have; + ret = index->nelements; while (--ret && here[1].out <= offset) here++; #endif/*SEEKGZIP_OPTIMIZATION*/ @@ -396,8 +376,9 @@ static int extract(FILE *in, struct access *index, off_t offset, /*===== End of the portion of zran.c ===== }}}*/ struct tag_seekgzip { + char *path_index; FILE *fp; - struct access index; + struct access *index; off_t offset; int errorcode; }; @@ -425,23 +406,36 @@ static uint32_t read_uint32(gzFile gz) return v; } -int seekgzip_build(const char *target) -{ - int i, len, ret = SEEKGZIP_SUCCESS; - FILE *fp = NULL; - struct access *index = NULL; - char *target_idx = NULL; - gzFile gz = NULL; +void seekgzip_index_free(seekgzip_t *sz){ + if(sz->index == NULL) + return; + + if(sz->index->list != NULL) + free(sz->index->list); + + free(sz->index); + sz->index = NULL; +} - // Open the target gzip file. - fp = fopen(target, "rb"); - if (fp == NULL) { - ret = SEEKGZIP_OPENERROR; - goto force_exit; - } +int seekgzip_index_alloc(seekgzip_t *sz){ + if(sz->index != NULL) + seekgzip_index_free(sz); + + if( (sz->index = (struct access *)malloc(sizeof(struct access))) == NULL) + return SEEKGZIP_OUTOFMEMORY; + + sz->index->nelements = 0; + sz->index->allocated = 0; + sz->index->list = NULL; + return SEEKGZIP_SUCCESS; +} + +int seekgzip_index_build(seekgzip_t *sz) +{ + int len, ret = SEEKGZIP_SUCCESS; // Build an index for the file. - len = build_index(fp, SPAN, &index); + len = build_index(sz->fp, SPAN, &sz->index); if (len < 0) { switch (len) { case Z_MEM_ERROR: @@ -453,93 +447,56 @@ int seekgzip_build(const char *target) default: ret = SEEKGZIP_ERROR; } - goto force_exit; + + // invalid index, so - free it + seekgzip_index_free(sz); } + return ret; +} - // Close the target file. - fclose(fp); - fp = NULL; - - // Prepare the name for the index file. - target_idx = get_index_file(target); - if (target_idx == NULL) { - ret = SEEKGZIP_OUTOFMEMORY; - goto force_exit; - } +int seekgzip_index_save(seekgzip_t *sz){ + int i, ret = SEEKGZIP_SUCCESS; + gzFile gz; // Open the index file for writing. - gz = gzopen(target_idx, "wb"); - if (gz == NULL) { - ret = SEEKGZIP_OPENERROR; - goto force_exit; - } + gz = gzopen(sz->path_index, "wb"); + if (gz == NULL) + return SEEKGZIP_OPENERROR; // Write a header. gzwrite(gz, "ZSEK", 4); write_uint32(gz, (uint32_t)sizeof(off_t)); - write_uint32(gz, (uint32_t)index->have); + write_uint32(gz, (uint32_t)sz->index->nelements); // Write out entry points. - for (i = 0;i < index->have;++i) { - gzwrite(gz, &index->list[i].out, sizeof(off_t)); - gzwrite(gz, &index->list[i].in, sizeof(off_t)); - gzwrite(gz, &index->list[i].bits, sizeof(int)); - gzwrite(gz, index->list[i].window, WINSIZE); - } - -force_exit: - if (gz != NULL) { - gzclose(gz); - } - if (target_idx != NULL) { - free(target_idx); - } - if (index != NULL) { - free_index(index); - } - if (fp != NULL) { - fclose(fp); + for (i = 0;i < sz->index->nelements;++i) { + gzwrite(gz, &sz->index->list[i].out, sizeof(off_t)); + gzwrite(gz, &sz->index->list[i].in, sizeof(off_t)); + gzwrite(gz, &sz->index->list[i].bits, sizeof(int)); + gzwrite(gz, sz->index->list[i].window, WINSIZE); } + gzclose(gz); return ret; } -seekgzip_t* seekgzip_open(const char *target, int *errorcode) -{ +int seekgzip_index_load(seekgzip_t *sz){ int i, ret = SEEKGZIP_SUCCESS; - FILE *fp = NULL; - gzFile gz = NULL; - char *target_idx = NULL; - seekgzip_t *zs = NULL; - - // Open the target gzip file for reading. - fp = fopen(target, "rb"); - if (fp == NULL) { - ret = SEEKGZIP_OPENERROR; - goto error_exit; - } - - // Prepare the name for the index file. - target_idx = get_index_file(target); - if (target_idx == NULL) { - ret = SEEKGZIP_OUTOFMEMORY; - goto error_exit; - } + gzFile gz; + + if( (ret = seekgzip_index_alloc(sz)) != SEEKGZIP_SUCCESS) + return ret; // Open the index file for reading. - gz = gzopen(target_idx, "rb"); - if (gz == NULL) { - ret = SEEKGZIP_OPENERROR; - goto error_exit; - } + gz = gzopen(sz->path_index, "rb"); + if (gz == NULL) + return SEEKGZIP_OPENERROR; // Read the magic string. - ret = SEEKGZIP_IMCOMPATIBLE; - if (gzgetc(gz) != 'Z') goto error_exit; - if (gzgetc(gz) != 'S') goto error_exit; - if (gzgetc(gz) != 'E') goto error_exit; - if (gzgetc(gz) != 'K') goto error_exit; - ret = SEEKGZIP_SUCCESS; + if (gzgetc(gz) != 'Z' || gzgetc(gz) != 'S' || gzgetc(gz) != 'E' || gzgetc(gz) != 'K'){ + ret = SEEKGZIP_IMCOMPATIBLE; + goto error_exit; + } // Check the size of off_t. if (read_uint32(gz) != sizeof(off_t)) { @@ -547,105 +504,130 @@ seekgzip_t* seekgzip_open(const char *target, int *errorcode) goto error_exit; } - // Allocate a seekgzip_t instance. - zs = (seekgzip_t*)malloc(sizeof(seekgzip_t)); - if (zs == NULL) { - ret = SEEKGZIP_OUTOFMEMORY; - goto error_exit; - } - memset(zs, 0, sizeof(*zs)); - // Read the number of entry points. - zs->index.have = zs->index.size = read_uint32(gz); + sz->index->nelements = sz->index->allocated = read_uint32(gz); // Allocate an array for entry points. - zs->index.list = (struct point*)malloc(sizeof(struct point) * zs->index.have); - if (zs->index.list == NULL) { + sz->index->list = (struct point*)malloc(sizeof(struct point) * sz->index->nelements); + if (sz->index->list == NULL) { ret = SEEKGZIP_OUTOFMEMORY; goto error_exit; } // Read entry points. - for (i = 0;i < zs->index.have;++i) { - gzread(gz, &zs->index.list[i].out, sizeof(off_t)); - gzread(gz, &zs->index.list[i].in, sizeof(off_t)); - gzread(gz, &zs->index.list[i].bits, sizeof(int)); - gzread(gz, zs->index.list[i].window, WINSIZE); + for (i = 0; i < sz->index->nelements; ++i) { + gzread(gz, &sz->index->list[i].out, sizeof(off_t)); + gzread(gz, &sz->index->list[i].in, sizeof(off_t)); + gzread(gz, &sz->index->list[i].bits, sizeof(int)); + gzread(gz, sz->index->list[i].window, WINSIZE); } +error_exit: // Close the index filiiiie. if (gzclose(gz) != 0) { ret = SEEKGZIP_ZLIBERROR; goto error_exit; } + return ret; +} - free(target_idx); - - zs->fp = fp; - zs->offset = 0; - zs->errorcode = 0; - - if (errorcode != NULL) { - *errorcode = 0; +seekgzip_t* seekgzip_open(const char *target, int *errorcode) +{ + int i, ret = SEEKGZIP_SUCCESS; + gzFile gz = NULL; + seekgzip_t *sz; + + if( (sz = (seekgzip_t *)malloc(sizeof(seekgzip_t))) == NULL){ + ret = SEEKGZIP_OUTOFMEMORY; + goto error_exit; } - return zs; + + sz->offset = 0; + sz->errorcode = 0; + sz->index = NULL; -error_exit: - if (zs != NULL) { - if (zs->index.list != NULL) { - free(zs->index.list); - } - free(zs); - } - if (gz != NULL) { - gzclose(gz); + // Open the target gzip file for reading. + sz->fp = fopen(target, "rb"); + if (sz->fp == NULL) { + ret = SEEKGZIP_OPENERROR; + goto error_exit; } - if (target_idx != NULL) { - free(target_idx); + + // Prepare the name for the index file. + sz->path_index = get_index_file(target); + if (sz->path_index == NULL) { + ret = SEEKGZIP_OUTOFMEMORY; + goto error_exit; } - if (fp != NULL) { - fclose(fp); + + // Load index + ret = seekgzip_index_load(sz); + switch(ret){ + case SEEKGZIP_OPENERROR: + case SEEKGZIP_IMCOMPATIBLE: + // build index and save it + + ret = seekgzip_index_build(sz); + if( ret == SEEKGZIP_SUCCESS ){ + seekgzip_index_save(sz); // return value is not important, maybe we cannot write to file, so + // we rebuild index on every program start. (should be warning somehow shown) + } + break; + case SEEKGZIP_SUCCESS: + break; + + default: + goto error_exit; } - if (errorcode != NULL) { + if (errorcode != NULL) + *errorcode = ret; + return sz; + +error_exit: + seekgzip_close(sz); + if (errorcode != NULL) *errorcode = ret; - } return NULL; } -void seekgzip_close(seekgzip_t* zs) +void seekgzip_close(seekgzip_t* sz) { - if (zs != NULL) { - if (zs->fp != NULL) { - fclose(zs->fp); - } - if (zs->index.list != NULL) { - free(zs->index.list); - } - free(zs); - } + if (sz == NULL) + return; + + seekgzip_index_free(sz); + if (sz->fp != NULL){ + fclose(sz->fp); + sz->fp = NULL; + } + if (sz->path_index != NULL){ + free(sz->path_index); + sz->path_index == NULL; + } + free(sz); } -void seekgzip_seek(seekgzip_t *zs, off_t offset) +void seekgzip_seek(seekgzip_t *sz, off_t offset) { - zs->offset = offset; + sz->offset = offset; } -off_t seekgzip_tell(seekgzip_t *zs) +off_t seekgzip_tell(seekgzip_t *sz) { - return zs->offset; + return sz->offset; } -int seekgzip_read(seekgzip_t* zs, void *buffer, int size) +int seekgzip_read(seekgzip_t* sz, void *buffer, int size) { - int len = extract(zs->fp, &zs->index, zs->offset, (unsigned char*)buffer, size); + int len = extract(sz->fp, sz->index, sz->offset, (unsigned char*)buffer, size); if (0 < len) { - zs->offset += len; + sz->offset += len; } return len; } -int seekgzip_error(seekgzip_t* sgz) +int seekgzip_error(seekgzip_t* sz) { - return sgz->errorcode; + return sz->errorcode; } diff --git a/seekgzip.h b/seekgzip.h index e65ef02..a71c880 100644 --- a/seekgzip.h +++ b/seekgzip.h @@ -1,7 +1,7 @@ #ifndef __SEEKGZIP_H__ #define __SEEKGZIP_H__ -struct tag_seekgzip_t; typedef struct tag_seekgzip seekgzip_t; +struct tag_seekgzip; typedef struct tag_seekgzip seekgzip_t; enum { SEEKGZIP_SUCCESS=0, From 9f1165dcfaa2d25ce6cd9b0b8a81fa06a38f2179 Mon Sep 17 00:00:00 2001 From: x86_64 Date: Wed, 18 Apr 2012 13:54:18 +0400 Subject: [PATCH 07/15] Fix relloc on wrong object. Memory usage reduced --- seekgzip.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/seekgzip.c b/seekgzip.c index 3240ba7..7a00723 100644 --- a/seekgzip.c +++ b/seekgzip.c @@ -245,7 +245,7 @@ static int build_index(FILE *in, off_t span, struct access **built) /* clean up and return index (release unused entries in list) */ (void)inflateEnd(&strm); - index = (struct access*)realloc(index, sizeof(struct point) * index->nelements); + index->list = (struct point*)realloc(index->list, sizeof(struct point) * index->nelements); index->allocated = index->nelements; *built = index; return index->allocated; From b5c09e89762a9c1e6081c3dff6d7b1e1c26b305d Mon Sep 17 00:00:00 2001 From: x86_64 Date: Wed, 18 Apr 2012 13:57:25 +0400 Subject: [PATCH 08/15] Spaces replaced with tabs --- main.c | 190 ++++++------- seekgzip.c | 788 ++++++++++++++++++++++++++--------------------------- seekgzip.h | 54 ++-- 3 files changed, 516 insertions(+), 516 deletions(-) diff --git a/main.c b/main.c index 282c11f..0be3625 100644 --- a/main.c +++ b/main.c @@ -1,5 +1,5 @@ /* - * SeekGzip utility/library. + * SeekGzip utility/library. * * Copyright (c) 2010-2011, Naoaki Okazaki * All rights reserved. @@ -19,117 +19,117 @@ #include #include "seekgzip.h" -#define CHUNK 16384 /* file input buffer size */ +#define CHUNK 16384 /* file input buffer size */ static void seekgzip_perror(int ret) { - switch (ret) { - case SEEKGZIP_ERROR: - fprintf(stderr, "ERROR: An unknown error occurred.\n"); - break; - case SEEKGZIP_OPENERROR: - fprintf(stderr, "ERROR: Failed to open a file.\n"); - break; - case SEEKGZIP_READERROR: - fprintf(stderr, "ERROR: Failed to read a file.\n"); - break; - case SEEKGZIP_WRITEERROR: - fprintf(stderr, "ERROR: Failed to write a file.\n"); - break; - case SEEKGZIP_DATAERROR: - fprintf(stderr, "ERROR: The file is corrupted.\n"); - break; - case SEEKGZIP_OUTOFMEMORY: - fprintf(stderr, "ERROR: Out of memory.\n"); - break; - case SEEKGZIP_IMCOMPATIBLE: - fprintf(stderr, "ERROR: The imcompatible file.\n"); - break; - case SEEKGZIP_ZLIBERROR: - fprintf(stderr, "ERROR: An error occurred in zlib.\n"); - break; - } + switch (ret) { + case SEEKGZIP_ERROR: + fprintf(stderr, "ERROR: An unknown error occurred.\n"); + break; + case SEEKGZIP_OPENERROR: + fprintf(stderr, "ERROR: Failed to open a file.\n"); + break; + case SEEKGZIP_READERROR: + fprintf(stderr, "ERROR: Failed to read a file.\n"); + break; + case SEEKGZIP_WRITEERROR: + fprintf(stderr, "ERROR: Failed to write a file.\n"); + break; + case SEEKGZIP_DATAERROR: + fprintf(stderr, "ERROR: The file is corrupted.\n"); + break; + case SEEKGZIP_OUTOFMEMORY: + fprintf(stderr, "ERROR: Out of memory.\n"); + break; + case SEEKGZIP_IMCOMPATIBLE: + fprintf(stderr, "ERROR: The imcompatible file.\n"); + break; + case SEEKGZIP_ZLIBERROR: + fprintf(stderr, "ERROR: An error occurred in zlib.\n"); + break; + } } int main(int argc, char *argv[]) { - int ret = 0; + int ret = 0; - if (argc != 3) { - printf("This utility manages an index for random (seekable) access to a gzip file.\n"); - printf("USAGE:\n"); - printf(" %s -b \n", argv[0]); - printf(" Build an index file \"$FILE.idx\" for the gzip file $FILE.\n"); - printf(" %s [BEGIN-END]\n", argv[0]); - printf(" Output the content of the gzip file $FILE of offset range [BEGIN-END].\n"); - return 0; + if (argc != 3) { + printf("This utility manages an index for random (seekable) access to a gzip file.\n"); + printf("USAGE:\n"); + printf(" %s -b \n", argv[0]); + printf(" Build an index file \"$FILE.idx\" for the gzip file $FILE.\n"); + printf(" %s [BEGIN-END]\n", argv[0]); + printf(" Output the content of the gzip file $FILE of offset range [BEGIN-END].\n"); + return 0; - } else if (strcmp(argv[1], "-b") == 0) { - const char *target = argv[2]; + } else if (strcmp(argv[1], "-b") == 0) { + const char *target = argv[2]; - printf("Building an index: %s.idx\n", target); - printf("Filesize up to: %d bit\n", (int)sizeof(off_t) * 8); - printf("WARNING: if program fail to write index to file, it would silently ignore that\n"); + printf("Building an index: %s.idx\n", target); + printf("Filesize up to: %d bit\n", (int)sizeof(off_t) * 8); + printf("WARNING: if program fail to write index to file, it would silently ignore that\n"); - seekgzip_t* zs = seekgzip_open(target, &ret); - if (zs == NULL) { - seekgzip_perror(ret); - return 1; - } - seekgzip_close(zs); + seekgzip_t* zs = seekgzip_open(target, &ret); + if (zs == NULL) { + seekgzip_perror(ret); + return 1; + } + seekgzip_close(zs); return 0; - } else { - char *arg = argv[2], *p = NULL; - off_t begin = 0, end = (off_t)-1; - seekgzip_t* zs = seekgzip_open(argv[1], NULL); - if (zs == NULL) { - fprintf(stderr, "ERROR: Failed to open the index file.\n"); - return 1; - } + } else { + char *arg = argv[2], *p = NULL; + off_t begin = 0, end = (off_t)-1; + seekgzip_t* zs = seekgzip_open(argv[1], NULL); + if (zs == NULL) { + fprintf(stderr, "ERROR: Failed to open the index file.\n"); + return 1; + } - p = strchr(arg, '-'); - if (p == NULL) { - begin =(off_t)strtoull(arg, NULL, 10); - end = begin+1; - } else if (p == arg) { - begin = 0; - end = (off_t)strtoull(p+1, NULL, 10); - } else if (p == arg + strlen(arg) - 1) { - *p = 0; - begin = (off_t)strtoull(arg, NULL, 10); - } else { - *p++ = 0; - begin =(off_t)strtoull(arg, NULL, 10); - end =(off_t)strtoull(p, NULL, 10); - } + p = strchr(arg, '-'); + if (p == NULL) { + begin =(off_t)strtoull(arg, NULL, 10); + end = begin+1; + } else if (p == arg) { + begin = 0; + end = (off_t)strtoull(p+1, NULL, 10); + } else if (p == arg + strlen(arg) - 1) { + *p = 0; + begin = (off_t)strtoull(arg, NULL, 10); + } else { + *p++ = 0; + begin =(off_t)strtoull(arg, NULL, 10); + end =(off_t)strtoull(p, NULL, 10); + } - seekgzip_seek(zs, begin); + seekgzip_seek(zs, begin); - while (begin < end) { - int read; - char buffer[CHUNK]; - off_t size = (end - begin); - if (CHUNK < size) { - size = CHUNK; - } - read = seekgzip_read(zs, buffer, (int)size); - if (0 < read) { - begin += read; - + while (begin < end) { + int read; + char buffer[CHUNK]; + off_t size = (end - begin); + if (CHUNK < size) { + size = CHUNK; + } + read = seekgzip_read(zs, buffer, (int)size); + if (0 < read) { + begin += read; + if(fwrite(buffer, read, sizeof(char), stdout) == 0) continue; - } else if (read == 0) { - break; - } else { - fprintf(stderr, "ERROR: An error occurred while reading the gzip file.\n"); - ret = 1; - break; - } - } - - seekgzip_close(zs); - return ret; - } + } else if (read == 0) { + break; + } else { + fprintf(stderr, "ERROR: An error occurred while reading the gzip file.\n"); + ret = 1; + break; + } + } + + seekgzip_close(zs); + return ret; + } } diff --git a/seekgzip.c b/seekgzip.c index 7a00723..4aac5c4 100644 --- a/seekgzip.c +++ b/seekgzip.c @@ -1,5 +1,5 @@ /* - * SeekGzip utility/library. + * SeekGzip utility/library. * * Copyright (c) 2010-2011, Naoaki Okazaki * All rights reserved. @@ -76,76 +76,76 @@ void seekgzip_index_free(seekgzip_t *sz); index in a file. */ -#define SPAN 1048576L /* desired distance between access points */ -#define WINSIZE 32768U /* sliding window size */ -#define CHUNK 16384 /* file input buffer size */ +#define SPAN 1048576L /* desired distance between access points */ +#define WINSIZE 32768U /* sliding window size */ +#define CHUNK 16384 /* file input buffer size */ /* access point entry */ struct point { - off_t out; /* corresponding offset in uncompressed data */ - off_t in; /* offset in input file of first full byte */ - int bits; /* number of bits (1-7) from byte at in - 1, or 0 */ - unsigned char window[WINSIZE]; /* preceding 32K of uncompressed data */ + off_t out; /* corresponding offset in uncompressed data */ + off_t in; /* offset in input file of first full byte */ + int bits; /* number of bits (1-7) from byte at in - 1, or 0 */ + unsigned char window[WINSIZE]; /* preceding 32K of uncompressed data */ }; /* access point list */ struct access { - unsigned int nelements; /* number of list entries filled in */ - unsigned int allocated; /* number of list entries allocated */ - struct point *list; /* allocated list */ + unsigned int nelements; /* number of list entries filled in */ + unsigned int allocated; /* number of list entries allocated */ + struct point *list; /* allocated list */ }; /* Add an entry to the access point list. If out of memory, deallocate the existing list and return NULL. */ static struct access *addpoint(struct access *index, int bits, - off_t in, off_t out, unsigned left, unsigned char *window) + off_t in, off_t out, unsigned left, unsigned char *window) { - struct point *next; - - /* if list is full, make it bigger */ - if (index->nelements == index->allocated) { - index->allocated = index->allocated != 0 ? index->allocated : 1; - index->allocated <<= 1; - index->list = (struct point*)realloc(index->list, sizeof(struct point) * index->allocated); - if (index->list == NULL) + struct point *next; + + /* if list is full, make it bigger */ + if (index->nelements == index->allocated) { + index->allocated = index->allocated != 0 ? index->allocated : 1; + index->allocated <<= 1; + index->list = (struct point*)realloc(index->list, sizeof(struct point) * index->allocated); + if (index->list == NULL) return NULL; - } - - /* fill in entry and increment how many we have */ - next = index->list + index->nelements; - next->bits = bits; - next->in = in; - next->out = out; - if (left) - memcpy(next->window, window + WINSIZE - left, left); - if (left < WINSIZE) - memcpy(next->window + left, window, WINSIZE - left); - index->nelements++; - - /* return list, possibly reallocated */ - return index; + } + + /* fill in entry and increment how many we have */ + next = index->list + index->nelements; + next->bits = bits; + next->in = in; + next->out = out; + if (left) + memcpy(next->window, window + WINSIZE - left, left); + if (left < WINSIZE) + memcpy(next->window + left, window, WINSIZE - left); + index->nelements++; + + /* return list, possibly reallocated */ + return index; } #ifdef SEEKGZIP_OPTIMIZATION struct point *findpoint(struct access *index, off_t offset) { - int half, len = index->nelements; - struct point *first = &index->list[0], *middle; - - /* equivalent to std::upper_bound() */ - while (0 < len) { - half = (len >> 1); - middle = first + half; - if (offset < middle->out) { - len = half; - } else { - first = middle + 1; - len = len - half - 1; - } - } - - /* decrement the point */ - return (first == &index->list[0] ? NULL : first-1); + int half, len = index->nelements; + struct point *first = &index->list[0], *middle; + + /* equivalent to std::upper_bound() */ + while (0 < len) { + half = (len >> 1); + middle = first + half; + if (offset < middle->out) { + len = half; + } else { + first = middle + 1; + len = len - half - 1; + } + } + + /* decrement the point */ + return (first == &index->list[0] ? NULL : first-1); } #endif/*SEEKGZIP_OPTIMIZATION*/ @@ -159,101 +159,101 @@ struct point *findpoint(struct access *index, off_t offset) file read error. On success, *built points to the resulting index. */ static int build_index(FILE *in, off_t span, struct access **built) { - int ret; - off_t totin, totout; /* our own total counters to avoid 4GB limit */ - off_t last; /* totout value of last access point */ - struct access *index = *built; /* access points being generated */ - z_stream strm; - unsigned char input[CHUNK]; - unsigned char window[WINSIZE]; - - /* initialize inflate */ - strm.zalloc = Z_NULL; - strm.zfree = Z_NULL; - strm.opaque = Z_NULL; - strm.avail_in = 0; - strm.next_in = Z_NULL; - ret = inflateInit2(&strm, 47); /* automatic zlib or gzip decoding */ - if (ret != Z_OK) - return ret; - - // Rewind to beginning of file - if( (ret = fseeko(in, 0, SEEK_SET)) == -1) - goto build_index_error; - - /* inflate the input, maintain a sliding window, and build an index -- this - also validates the integrity of the compressed data using the check - information at the end of the gzip or zlib stream */ - totin = totout = last = 0; - strm.avail_out = 0; - do { - /* get some compressed data from input file */ - strm.avail_in = fread(input, 1, CHUNK, in); - if (ferror(in)) { - ret = Z_ERRNO; - goto build_index_error; - } - if (strm.avail_in == 0) { - ret = Z_DATA_ERROR; - goto build_index_error; - } - strm.next_in = input; - - /* process all of that, or until end of stream */ - do { - /* reset sliding window if necessary */ - if (strm.avail_out == 0) { - strm.avail_out = WINSIZE; - strm.next_out = window; - } - - /* inflate until out of input, output, or at end of block -- - update the total input and output counters */ - totin += strm.avail_in; - totout += strm.avail_out; - ret = inflate(&strm, Z_BLOCK); /* return at end of block */ - totin -= strm.avail_in; - totout -= strm.avail_out; - if (ret == Z_NEED_DICT) - ret = Z_DATA_ERROR; - if (ret == Z_MEM_ERROR || ret == Z_DATA_ERROR) - goto build_index_error; - if (ret == Z_STREAM_END) - break; - - /* if at end of block, consider adding an index entry (note that if - data_type indicates an end-of-block, then all of the - uncompressed data from that block has been delivered, and none - of the compressed data after that block has been consumed, - except for up to seven bits) -- the totout == 0 provides an - entry point after the zlib or gzip header, and assures that the - index always has at least one access point; we avoid creating an - access point after the last block by checking bit 6 of data_type - */ - if ((strm.data_type & 128) && !(strm.data_type & 64) && - (totout == 0 || totout - last > span)) { - index = addpoint(index, strm.data_type & 7, totin, - totout, strm.avail_out, window); - if (index == NULL) { - ret = Z_MEM_ERROR; - goto build_index_error; - } - last = totout; - } - } while (strm.avail_in != 0); - } while (ret != Z_STREAM_END); - - /* clean up and return index (release unused entries in list) */ - (void)inflateEnd(&strm); - index->list = (struct point*)realloc(index->list, sizeof(struct point) * index->nelements); - index->allocated = index->nelements; - *built = index; - return index->allocated; - - /* return error */ + int ret; + off_t totin, totout; /* our own total counters to avoid 4GB limit */ + off_t last; /* totout value of last access point */ + struct access *index = *built; /* access points being generated */ + z_stream strm; + unsigned char input[CHUNK]; + unsigned char window[WINSIZE]; + + /* initialize inflate */ + strm.zalloc = Z_NULL; + strm.zfree = Z_NULL; + strm.opaque = Z_NULL; + strm.avail_in = 0; + strm.next_in = Z_NULL; + ret = inflateInit2(&strm, 47); /* automatic zlib or gzip decoding */ + if (ret != Z_OK) + return ret; + + // Rewind to beginning of file + if( (ret = fseeko(in, 0, SEEK_SET)) == -1) + goto build_index_error; + + /* inflate the input, maintain a sliding window, and build an index -- this + also validates the integrity of the compressed data using the check + information at the end of the gzip or zlib stream */ + totin = totout = last = 0; + strm.avail_out = 0; + do { + /* get some compressed data from input file */ + strm.avail_in = fread(input, 1, CHUNK, in); + if (ferror(in)) { + ret = Z_ERRNO; + goto build_index_error; + } + if (strm.avail_in == 0) { + ret = Z_DATA_ERROR; + goto build_index_error; + } + strm.next_in = input; + + /* process all of that, or until end of stream */ + do { + /* reset sliding window if necessary */ + if (strm.avail_out == 0) { + strm.avail_out = WINSIZE; + strm.next_out = window; + } + + /* inflate until out of input, output, or at end of block -- + update the total input and output counters */ + totin += strm.avail_in; + totout += strm.avail_out; + ret = inflate(&strm, Z_BLOCK); /* return at end of block */ + totin -= strm.avail_in; + totout -= strm.avail_out; + if (ret == Z_NEED_DICT) + ret = Z_DATA_ERROR; + if (ret == Z_MEM_ERROR || ret == Z_DATA_ERROR) + goto build_index_error; + if (ret == Z_STREAM_END) + break; + + /* if at end of block, consider adding an index entry (note that if + data_type indicates an end-of-block, then all of the + uncompressed data from that block has been delivered, and none + of the compressed data after that block has been consumed, + except for up to seven bits) -- the totout == 0 provides an + entry point after the zlib or gzip header, and assures that the + index always has at least one access point; we avoid creating an + access point after the last block by checking bit 6 of data_type + */ + if ((strm.data_type & 128) && !(strm.data_type & 64) && + (totout == 0 || totout - last > span)) { + index = addpoint(index, strm.data_type & 7, totin, + totout, strm.avail_out, window); + if (index == NULL) { + ret = Z_MEM_ERROR; + goto build_index_error; + } + last = totout; + } + } while (strm.avail_in != 0); + } while (ret != Z_STREAM_END); + + /* clean up and return index (release unused entries in list) */ + (void)inflateEnd(&strm); + index->list = (struct point*)realloc(index->list, sizeof(struct point) * index->nelements); + index->allocated = index->nelements; + *built = index; + return index->allocated; + + /* return error */ build_index_error: - (void)inflateEnd(&strm); - return ret; + (void)inflateEnd(&strm); + return ret; } /* Use the index to read len bytes from offset into buf, return bytes read or @@ -264,146 +264,146 @@ static int build_index(FILE *in, off_t span, struct access **built) was generated. extract() may also return Z_ERRNO if there is an error on reading or seeking the input file. */ static int extract(FILE *in, struct access *index, off_t offset, - unsigned char *buf, int len) + unsigned char *buf, int len) { - int ret, skip; - z_stream strm; - struct point *here; - unsigned char input[CHUNK]; - unsigned char discard[WINSIZE]; + int ret, skip; + z_stream strm; + struct point *here; + unsigned char input[CHUNK]; + unsigned char discard[WINSIZE]; - /* proceed only if something reasonable to do */ - if (len < 0) - return 0; + /* proceed only if something reasonable to do */ + if (len < 0) + return 0; - /* find where in stream to start */ + /* find where in stream to start */ #ifdef SEEKGZIP_OPTIMIZATION - here = findpoint(index, offset); - if (here == NULL) { - /* possibly out of range. */ - return 0; - } + here = findpoint(index, offset); + if (here == NULL) { + /* possibly out of range. */ + return 0; + } #else - here = index->list; - ret = index->nelements; - while (--ret && here[1].out <= offset) - here++; + here = index->list; + ret = index->nelements; + while (--ret && here[1].out <= offset) + here++; #endif/*SEEKGZIP_OPTIMIZATION*/ - /* initialize file and inflate state to start there */ - strm.zalloc = Z_NULL; - strm.zfree = Z_NULL; - strm.opaque = Z_NULL; - strm.avail_in = 0; - strm.next_in = Z_NULL; - ret = inflateInit2(&strm, -15); /* raw inflate */ - if (ret != Z_OK) - return ret; - ret = fseeko(in, here->in - (here->bits ? 1 : 0), SEEK_SET); - if (ret == -1) - goto extract_ret; - if (here->bits) { - ret = getc(in); - if (ret == -1) { - ret = ferror(in) ? Z_ERRNO : Z_DATA_ERROR; - goto extract_ret; - } - (void)inflatePrime(&strm, here->bits, ret >> (8 - here->bits)); - } - (void)inflateSetDictionary(&strm, here->window, WINSIZE); - - /* skip uncompressed bytes until offset reached, then satisfy request */ - offset -= here->out; - strm.avail_in = 0; - skip = 1; /* while skipping to offset */ - do { - /* define where to put uncompressed data, and how much */ - if (offset == 0 && skip) { /* at offset now */ - strm.avail_out = len; - strm.next_out = buf; - skip = 0; /* only do this once */ - } - if (offset > WINSIZE) { /* skip WINSIZE bytes */ - strm.avail_out = WINSIZE; - strm.next_out = discard; - offset -= WINSIZE; - } - else if (offset != 0) { /* last skip */ - strm.avail_out = (unsigned)offset; - strm.next_out = discard; - offset = 0; - } - - /* uncompress until avail_out filled, or end of stream */ - do { - if (strm.avail_in == 0) { - strm.avail_in = fread(input, 1, CHUNK, in); - if (ferror(in)) { - ret = Z_ERRNO; - goto extract_ret; - } - if (strm.avail_in == 0) { - ret = Z_DATA_ERROR; - goto extract_ret; - } - strm.next_in = input; - } - ret = inflate(&strm, Z_NO_FLUSH); /* normal inflate */ - if (ret == Z_NEED_DICT) - ret = Z_DATA_ERROR; - if (ret == Z_MEM_ERROR || ret == Z_DATA_ERROR) - goto extract_ret; - if (ret == Z_STREAM_END) - break; - } while (strm.avail_out != 0); - - /* if reach end of stream, then don't keep trying to get more */ - if (ret == Z_STREAM_END) - break; - - /* do until offset reached and requested data read, or stream ends */ - } while (skip); - - /* compute number of uncompressed bytes read after offset */ - ret = skip ? 0 : len - strm.avail_out; - - /* clean up and return bytes read or error */ + /* initialize file and inflate state to start there */ + strm.zalloc = Z_NULL; + strm.zfree = Z_NULL; + strm.opaque = Z_NULL; + strm.avail_in = 0; + strm.next_in = Z_NULL; + ret = inflateInit2(&strm, -15); /* raw inflate */ + if (ret != Z_OK) + return ret; + ret = fseeko(in, here->in - (here->bits ? 1 : 0), SEEK_SET); + if (ret == -1) + goto extract_ret; + if (here->bits) { + ret = getc(in); + if (ret == -1) { + ret = ferror(in) ? Z_ERRNO : Z_DATA_ERROR; + goto extract_ret; + } + (void)inflatePrime(&strm, here->bits, ret >> (8 - here->bits)); + } + (void)inflateSetDictionary(&strm, here->window, WINSIZE); + + /* skip uncompressed bytes until offset reached, then satisfy request */ + offset -= here->out; + strm.avail_in = 0; + skip = 1; /* while skipping to offset */ + do { + /* define where to put uncompressed data, and how much */ + if (offset == 0 && skip) { /* at offset now */ + strm.avail_out = len; + strm.next_out = buf; + skip = 0; /* only do this once */ + } + if (offset > WINSIZE) { /* skip WINSIZE bytes */ + strm.avail_out = WINSIZE; + strm.next_out = discard; + offset -= WINSIZE; + } + else if (offset != 0) { /* last skip */ + strm.avail_out = (unsigned)offset; + strm.next_out = discard; + offset = 0; + } + + /* uncompress until avail_out filled, or end of stream */ + do { + if (strm.avail_in == 0) { + strm.avail_in = fread(input, 1, CHUNK, in); + if (ferror(in)) { + ret = Z_ERRNO; + goto extract_ret; + } + if (strm.avail_in == 0) { + ret = Z_DATA_ERROR; + goto extract_ret; + } + strm.next_in = input; + } + ret = inflate(&strm, Z_NO_FLUSH); /* normal inflate */ + if (ret == Z_NEED_DICT) + ret = Z_DATA_ERROR; + if (ret == Z_MEM_ERROR || ret == Z_DATA_ERROR) + goto extract_ret; + if (ret == Z_STREAM_END) + break; + } while (strm.avail_out != 0); + + /* if reach end of stream, then don't keep trying to get more */ + if (ret == Z_STREAM_END) + break; + + /* do until offset reached and requested data read, or stream ends */ + } while (skip); + + /* compute number of uncompressed bytes read after offset */ + ret = skip ? 0 : len - strm.avail_out; + + /* clean up and return bytes read or error */ extract_ret: - (void)inflateEnd(&strm); - return ret; + (void)inflateEnd(&strm); + return ret; } /*===== End of the portion of zran.c ===== }}}*/ struct tag_seekgzip { - char *path_index; - FILE *fp; - struct access *index; - off_t offset; - int errorcode; + char *path_index; + FILE *fp; + struct access *index; + off_t offset; + int errorcode; }; static char *get_index_file(const char *target) { - char *idx = (char*)malloc(strlen(target) + 4 + 1); - if (idx == NULL) { - return NULL; - } - strcpy(idx, target); - strcat(idx, ".idx"); - return idx; + char *idx = (char*)malloc(strlen(target) + 4 + 1); + if (idx == NULL) { + return NULL; + } + strcpy(idx, target); + strcat(idx, ".idx"); + return idx; } static int write_uint32(gzFile gz, uint32_t v) { - return gzwrite(gz, &v, sizeof(v)); + return gzwrite(gz, &v, sizeof(v)); } static uint32_t read_uint32(gzFile gz) { - uint32_t v; - gzread(gz, &v, sizeof(v)); - return v; + uint32_t v; + gzread(gz, &v, sizeof(v)); + return v; } void seekgzip_index_free(seekgzip_t *sz){ @@ -426,143 +426,143 @@ int seekgzip_index_alloc(seekgzip_t *sz){ sz->index->nelements = 0; sz->index->allocated = 0; - sz->index->list = NULL; + sz->index->list = NULL; return SEEKGZIP_SUCCESS; } int seekgzip_index_build(seekgzip_t *sz) { - int len, ret = SEEKGZIP_SUCCESS; - - // Build an index for the file. - len = build_index(sz->fp, SPAN, &sz->index); - if (len < 0) { - switch (len) { - case Z_MEM_ERROR: - ret = SEEKGZIP_OUTOFMEMORY; - case Z_DATA_ERROR: - ret = SEEKGZIP_DATAERROR; - case Z_ERRNO: - ret = SEEKGZIP_READERROR; - default: - ret = SEEKGZIP_ERROR; - } + int len, ret = SEEKGZIP_SUCCESS; + + // Build an index for the file. + len = build_index(sz->fp, SPAN, &sz->index); + if (len < 0) { + switch (len) { + case Z_MEM_ERROR: + ret = SEEKGZIP_OUTOFMEMORY; + case Z_DATA_ERROR: + ret = SEEKGZIP_DATAERROR; + case Z_ERRNO: + ret = SEEKGZIP_READERROR; + default: + ret = SEEKGZIP_ERROR; + } - // invalid index, so - free it - seekgzip_index_free(sz); - } - return ret; + // invalid index, so - free it + seekgzip_index_free(sz); + } + return ret; } int seekgzip_index_save(seekgzip_t *sz){ - int i, ret = SEEKGZIP_SUCCESS; - gzFile gz; - - // Open the index file for writing. - gz = gzopen(sz->path_index, "wb"); - if (gz == NULL) - return SEEKGZIP_OPENERROR; - - // Write a header. - gzwrite(gz, "ZSEK", 4); - write_uint32(gz, (uint32_t)sizeof(off_t)); - write_uint32(gz, (uint32_t)sz->index->nelements); - - // Write out entry points. - for (i = 0;i < sz->index->nelements;++i) { - gzwrite(gz, &sz->index->list[i].out, sizeof(off_t)); - gzwrite(gz, &sz->index->list[i].in, sizeof(off_t)); - gzwrite(gz, &sz->index->list[i].bits, sizeof(int)); - gzwrite(gz, sz->index->list[i].window, WINSIZE); - } - - gzclose(gz); - return ret; + int i, ret = SEEKGZIP_SUCCESS; + gzFile gz; + + // Open the index file for writing. + gz = gzopen(sz->path_index, "wb"); + if (gz == NULL) + return SEEKGZIP_OPENERROR; + + // Write a header. + gzwrite(gz, "ZSEK", 4); + write_uint32(gz, (uint32_t)sizeof(off_t)); + write_uint32(gz, (uint32_t)sz->index->nelements); + + // Write out entry points. + for (i = 0;i < sz->index->nelements;++i) { + gzwrite(gz, &sz->index->list[i].out, sizeof(off_t)); + gzwrite(gz, &sz->index->list[i].in, sizeof(off_t)); + gzwrite(gz, &sz->index->list[i].bits, sizeof(int)); + gzwrite(gz, sz->index->list[i].window, WINSIZE); + } + + gzclose(gz); + return ret; } int seekgzip_index_load(seekgzip_t *sz){ - int i, ret = SEEKGZIP_SUCCESS; - gzFile gz; - - if( (ret = seekgzip_index_alloc(sz)) != SEEKGZIP_SUCCESS) - return ret; - - // Open the index file for reading. - gz = gzopen(sz->path_index, "rb"); - if (gz == NULL) - return SEEKGZIP_OPENERROR; - - // Read the magic string. - if (gzgetc(gz) != 'Z' || gzgetc(gz) != 'S' || gzgetc(gz) != 'E' || gzgetc(gz) != 'K'){ - ret = SEEKGZIP_IMCOMPATIBLE; + int i, ret = SEEKGZIP_SUCCESS; + gzFile gz; + + if( (ret = seekgzip_index_alloc(sz)) != SEEKGZIP_SUCCESS) + return ret; + + // Open the index file for reading. + gz = gzopen(sz->path_index, "rb"); + if (gz == NULL) + return SEEKGZIP_OPENERROR; + + // Read the magic string. + if (gzgetc(gz) != 'Z' || gzgetc(gz) != 'S' || gzgetc(gz) != 'E' || gzgetc(gz) != 'K'){ + ret = SEEKGZIP_IMCOMPATIBLE; goto error_exit; - } - - // Check the size of off_t. - if (read_uint32(gz) != sizeof(off_t)) { - ret = SEEKGZIP_IMCOMPATIBLE; - goto error_exit; - } - - // Read the number of entry points. - sz->index->nelements = sz->index->allocated = read_uint32(gz); - - // Allocate an array for entry points. - sz->index->list = (struct point*)malloc(sizeof(struct point) * sz->index->nelements); - if (sz->index->list == NULL) { - ret = SEEKGZIP_OUTOFMEMORY; - goto error_exit; - } - - // Read entry points. - for (i = 0; i < sz->index->nelements; ++i) { - gzread(gz, &sz->index->list[i].out, sizeof(off_t)); - gzread(gz, &sz->index->list[i].in, sizeof(off_t)); - gzread(gz, &sz->index->list[i].bits, sizeof(int)); - gzread(gz, sz->index->list[i].window, WINSIZE); - } + } + + // Check the size of off_t. + if (read_uint32(gz) != sizeof(off_t)) { + ret = SEEKGZIP_IMCOMPATIBLE; + goto error_exit; + } + + // Read the number of entry points. + sz->index->nelements = sz->index->allocated = read_uint32(gz); + + // Allocate an array for entry points. + sz->index->list = (struct point*)malloc(sizeof(struct point) * sz->index->nelements); + if (sz->index->list == NULL) { + ret = SEEKGZIP_OUTOFMEMORY; + goto error_exit; + } + + // Read entry points. + for (i = 0; i < sz->index->nelements; ++i) { + gzread(gz, &sz->index->list[i].out, sizeof(off_t)); + gzread(gz, &sz->index->list[i].in, sizeof(off_t)); + gzread(gz, &sz->index->list[i].bits, sizeof(int)); + gzread(gz, sz->index->list[i].window, WINSIZE); + } error_exit: - // Close the index filiiiie. - if (gzclose(gz) != 0) { - ret = SEEKGZIP_ZLIBERROR; - goto error_exit; - } - return ret; + // Close the index filiiiie. + if (gzclose(gz) != 0) { + ret = SEEKGZIP_ZLIBERROR; + goto error_exit; + } + return ret; } seekgzip_t* seekgzip_open(const char *target, int *errorcode) { - int i, ret = SEEKGZIP_SUCCESS; - gzFile gz = NULL; - seekgzip_t *sz; - - if( (sz = (seekgzip_t *)malloc(sizeof(seekgzip_t))) == NULL){ + int i, ret = SEEKGZIP_SUCCESS; + gzFile gz = NULL; + seekgzip_t *sz; + + if( (sz = (seekgzip_t *)malloc(sizeof(seekgzip_t))) == NULL){ ret = SEEKGZIP_OUTOFMEMORY; - goto error_exit; - } - - sz->offset = 0; - sz->errorcode = 0; - sz->index = NULL; - - // Open the target gzip file for reading. - sz->fp = fopen(target, "rb"); - if (sz->fp == NULL) { - ret = SEEKGZIP_OPENERROR; - goto error_exit; - } - - // Prepare the name for the index file. - sz->path_index = get_index_file(target); - if (sz->path_index == NULL) { - ret = SEEKGZIP_OUTOFMEMORY; - goto error_exit; - } - - // Load index - ret = seekgzip_index_load(sz); - switch(ret){ + goto error_exit; + } + + sz->offset = 0; + sz->errorcode = 0; + sz->index = NULL; + + // Open the target gzip file for reading. + sz->fp = fopen(target, "rb"); + if (sz->fp == NULL) { + ret = SEEKGZIP_OPENERROR; + goto error_exit; + } + + // Prepare the name for the index file. + sz->path_index = get_index_file(target); + if (sz->path_index == NULL) { + ret = SEEKGZIP_OUTOFMEMORY; + goto error_exit; + } + + // Load index + ret = seekgzip_index_load(sz); + switch(ret){ case SEEKGZIP_OPENERROR: case SEEKGZIP_IMCOMPATIBLE: // build index and save it @@ -578,17 +578,17 @@ seekgzip_t* seekgzip_open(const char *target, int *errorcode) default: goto error_exit; - } + } - if (errorcode != NULL) - *errorcode = ret; - return sz; + if (errorcode != NULL) + *errorcode = ret; + return sz; error_exit: - seekgzip_close(sz); - if (errorcode != NULL) - *errorcode = ret; - return NULL; + seekgzip_close(sz); + if (errorcode != NULL) + *errorcode = ret; + return NULL; } void seekgzip_close(seekgzip_t* sz) @@ -610,24 +610,24 @@ void seekgzip_close(seekgzip_t* sz) void seekgzip_seek(seekgzip_t *sz, off_t offset) { - sz->offset = offset; + sz->offset = offset; } off_t seekgzip_tell(seekgzip_t *sz) { - return sz->offset; + return sz->offset; } int seekgzip_read(seekgzip_t* sz, void *buffer, int size) { - int len = extract(sz->fp, sz->index, sz->offset, (unsigned char*)buffer, size); - if (0 < len) { - sz->offset += len; - } - return len; + int len = extract(sz->fp, sz->index, sz->offset, (unsigned char*)buffer, size); + if (0 < len) { + sz->offset += len; + } + return len; } int seekgzip_error(seekgzip_t* sz) { - return sz->errorcode; + return sz->errorcode; } diff --git a/seekgzip.h b/seekgzip.h index a71c880..cd65696 100644 --- a/seekgzip.h +++ b/seekgzip.h @@ -4,55 +4,55 @@ struct tag_seekgzip; typedef struct tag_seekgzip seekgzip_t; enum { - SEEKGZIP_SUCCESS=0, - SEEKGZIP_ERROR=-1024, - SEEKGZIP_OPENERROR, - SEEKGZIP_READERROR, - SEEKGZIP_WRITEERROR, - SEEKGZIP_DATAERROR, - SEEKGZIP_OUTOFMEMORY, - SEEKGZIP_IMCOMPATIBLE, - SEEKGZIP_ZLIBERROR, + SEEKGZIP_SUCCESS=0, + SEEKGZIP_ERROR=-1024, + SEEKGZIP_OPENERROR, + SEEKGZIP_READERROR, + SEEKGZIP_WRITEERROR, + SEEKGZIP_DATAERROR, + SEEKGZIP_OUTOFMEMORY, + SEEKGZIP_IMCOMPATIBLE, + SEEKGZIP_ZLIBERROR, }; int seekgzip_build( - const char *filename - ); + const char *filename + ); seekgzip_t* seekgzip_open( - const char *filename, - int *errorcode - ); + const char *filename, + int *errorcode + ); void seekgzip_close( - seekgzip_t* zs - ); + seekgzip_t* zs + ); void seekgzip_seek( - seekgzip_t *zs, - off_t offset - ); + seekgzip_t *zs, + off_t offset + ); off_t seekgzip_tell( - seekgzip_t *zs - ); + seekgzip_t *zs + ); int seekgzip_read( - seekgzip_t* zs, - void *buffer, - int size - ); + seekgzip_t* zs, + void *buffer, + int size + ); int seekgzip_error( - seekgzip_t* sgz - ); + seekgzip_t* sgz + ); #endif/*__SEEKGZIP_H__*/ From 2508b0dc9cde307fbed14faea67dc7d56acb39f5 Mon Sep 17 00:00:00 2001 From: x86_64 Date: Wed, 18 Apr 2012 13:59:17 +0400 Subject: [PATCH 09/15] Fix headers --- seekgzip.h | 5 ----- 1 file changed, 5 deletions(-) diff --git a/seekgzip.h b/seekgzip.h index cd65696..17af096 100644 --- a/seekgzip.h +++ b/seekgzip.h @@ -15,11 +15,6 @@ enum { SEEKGZIP_ZLIBERROR, }; -int -seekgzip_build( - const char *filename - ); - seekgzip_t* seekgzip_open( const char *filename, From f8b59f29c61a4ca947d057c3b8d04db3dcba3732 Mon Sep 17 00:00:00 2001 From: x86_64 Date: Wed, 18 Apr 2012 14:57:02 +0400 Subject: [PATCH 10/15] seekgzip_open() api similar to open() --- export_cpp.cpp | 4 ++-- main.c | 10 +++++----- seekgzip.c | 54 ++++++++++++++++++++++---------------------------- seekgzip.h | 2 +- 4 files changed, 32 insertions(+), 38 deletions(-) diff --git a/export_cpp.cpp b/export_cpp.cpp index dc52a5d..2664ad1 100644 --- a/export_cpp.cpp +++ b/export_cpp.cpp @@ -31,9 +31,9 @@ static std::string error_string(int errorcode) reader::reader(const char *filename) { int err = 0; - seekgzip_t* sgz = seekgzip_open(filename, &err); + seekgzip_t* sgz = seekgzip_open(filename, 0); m_obj = sgz; - if (sgz == NULL) { + if ( (err = seekgzip_error(sgz)) != SEEKGZIP_SUCCESS){ throw std::invalid_argument(error_string(err)); } } diff --git a/main.c b/main.c index 0be3625..2e01f6e 100644 --- a/main.c +++ b/main.c @@ -66,13 +66,13 @@ int main(int argc, char *argv[]) } else if (strcmp(argv[1], "-b") == 0) { const char *target = argv[2]; - + printf("Building an index: %s.idx\n", target); printf("Filesize up to: %d bit\n", (int)sizeof(off_t) * 8); printf("WARNING: if program fail to write index to file, it would silently ignore that\n"); - seekgzip_t* zs = seekgzip_open(target, &ret); - if (zs == NULL) { + seekgzip_t* zs = seekgzip_open(target, 0); + if ((ret = seekgzip_error(zs)) != SEEKGZIP_SUCCESS) { seekgzip_perror(ret); return 1; } @@ -82,8 +82,8 @@ int main(int argc, char *argv[]) } else { char *arg = argv[2], *p = NULL; off_t begin = 0, end = (off_t)-1; - seekgzip_t* zs = seekgzip_open(argv[1], NULL); - if (zs == NULL) { + seekgzip_t* zs = seekgzip_open(argv[1], 0); + if (zs == NULL || seekgzip_error(zs) != SEEKGZIP_SUCCESS) { fprintf(stderr, "ERROR: Failed to open the index file.\n"); return 1; } diff --git a/seekgzip.c b/seekgzip.c index 4aac5c4..7709268 100644 --- a/seekgzip.c +++ b/seekgzip.c @@ -531,16 +531,14 @@ int seekgzip_index_load(seekgzip_t *sz){ return ret; } -seekgzip_t* seekgzip_open(const char *target, int *errorcode) +seekgzip_t* seekgzip_open(const char *target, int flags) { int i, ret = SEEKGZIP_SUCCESS; gzFile gz = NULL; seekgzip_t *sz; - if( (sz = (seekgzip_t *)malloc(sizeof(seekgzip_t))) == NULL){ - ret = SEEKGZIP_OUTOFMEMORY; - goto error_exit; - } + if( (sz = (seekgzip_t *)malloc(sizeof(seekgzip_t))) == NULL) + return NULL; sz->offset = 0; sz->errorcode = 0; @@ -549,46 +547,39 @@ seekgzip_t* seekgzip_open(const char *target, int *errorcode) // Open the target gzip file for reading. sz->fp = fopen(target, "rb"); if (sz->fp == NULL) { - ret = SEEKGZIP_OPENERROR; + sz->errorcode = SEEKGZIP_OPENERROR; goto error_exit; } // Prepare the name for the index file. sz->path_index = get_index_file(target); if (sz->path_index == NULL) { - ret = SEEKGZIP_OUTOFMEMORY; + sz->errorcode = SEEKGZIP_OUTOFMEMORY; goto error_exit; } // Load index - ret = seekgzip_index_load(sz); + sz->errorcode = seekgzip_index_load(sz); switch(ret){ - case SEEKGZIP_OPENERROR: - case SEEKGZIP_IMCOMPATIBLE: - // build index and save it - - ret = seekgzip_index_build(sz); - if( ret == SEEKGZIP_SUCCESS ){ - seekgzip_index_save(sz); // return value is not important, maybe we cannot write to file, so - // we rebuild index on every program start. (should be warning somehow shown) - } - break; - case SEEKGZIP_SUCCESS: - break; + case SEEKGZIP_SUCCESS: + break; + case SEEKGZIP_OPENERROR: + case SEEKGZIP_IMCOMPATIBLE: + // build index and save it + + sz->errorcode = seekgzip_index_build(sz); + if( sz->errorcode == SEEKGZIP_SUCCESS ){ + seekgzip_index_save(sz); // return value is not important, maybe we cannot write to file, so + // we rebuild index on every program start. (should be warning somehow shown) + } + break; - default: - goto error_exit; + default: + goto error_exit; } - if (errorcode != NULL) - *errorcode = ret; - return sz; - error_exit: - seekgzip_close(sz); - if (errorcode != NULL) - *errorcode = ret; - return NULL; + return sz; } void seekgzip_close(seekgzip_t* sz) @@ -629,5 +620,8 @@ int seekgzip_read(seekgzip_t* sz, void *buffer, int size) int seekgzip_error(seekgzip_t* sz) { + if(sz == NULL) + return SEEKGZIP_OUTOFMEMORY; + return sz->errorcode; } diff --git a/seekgzip.h b/seekgzip.h index 17af096..3efbc0b 100644 --- a/seekgzip.h +++ b/seekgzip.h @@ -18,7 +18,7 @@ enum { seekgzip_t* seekgzip_open( const char *filename, - int *errorcode + int flags ); void From 96b98b3247f87a4d4a546e05b44553f1e0bca0d6 Mon Sep 17 00:00:00 2001 From: x86_64 Date: Wed, 18 Apr 2012 15:21:14 +0400 Subject: [PATCH 11/15] Auto-rebuild index if datafile modified --- seekgzip.c | 70 ++++++++++++++++++++++++++++++++++++++++++++++++------ seekgzip.h | 1 + 2 files changed, 64 insertions(+), 7 deletions(-) diff --git a/seekgzip.c b/seekgzip.c index 7709268..a131a3b 100644 --- a/seekgzip.c +++ b/seekgzip.c @@ -18,6 +18,8 @@ #include #include #include +#include +#include #include "seekgzip.h" #define SEEKGZIP_OPTIMIZATION @@ -376,6 +378,7 @@ static int extract(FILE *in, struct access *index, off_t offset, /*===== End of the portion of zran.c ===== }}}*/ struct tag_seekgzip { + char *path_data; char *path_index; FILE *fp; struct access *index; @@ -430,6 +433,37 @@ int seekgzip_index_alloc(seekgzip_t *sz){ return SEEKGZIP_SUCCESS; } +int seekgzip_index_checkutime(seekgzip_t *sz){ + int ret; + struct stat stats_data; + struct stat stats_index; + + if( (ret = stat(sz->path_data, &stats_data)) != 0) + return ret; + if( (ret = stat(sz->path_index, &stats_index)) != 0) + return ret; + + return (stats_data.st_mtime == stats_index.st_mtime) ? + 0 : 1; +} + +int seekgzip_index_setutime(seekgzip_t *sz){ + int ret; + struct stat stats; + struct utimbuf times; + + if( (ret = stat(sz->path_data, &stats)) != 0) + return ret; + + times.actime = stats.st_atime; + times.modtime = stats.st_mtime; + + if( (ret = utime(sz->path_index, ×)) != 0) + return ret; + + return 0; +} + int seekgzip_index_build(seekgzip_t *sz) { int len, ret = SEEKGZIP_SUCCESS; @@ -455,7 +489,8 @@ int seekgzip_index_build(seekgzip_t *sz) } int seekgzip_index_save(seekgzip_t *sz){ - int i, ret = SEEKGZIP_SUCCESS; + int ret = SEEKGZIP_SUCCESS; + unsigned int i; gzFile gz; // Open the index file for writing. @@ -477,15 +512,28 @@ int seekgzip_index_save(seekgzip_t *sz){ } gzclose(gz); + + seekgzip_index_setutime(sz); return ret; } int seekgzip_index_load(seekgzip_t *sz){ - int i, ret = SEEKGZIP_SUCCESS; + int ret = SEEKGZIP_SUCCESS; + unsigned int i; gzFile gz; if( (ret = seekgzip_index_alloc(sz)) != SEEKGZIP_SUCCESS) return ret; + + // Check index mod time + switch( (ret = seekgzip_index_checkutime(sz)) ){ + case 0: // match + break; + case 1: // not match + return SEEKGZIP_EXPIREDINDEX; + default: + return SEEKGZIP_OPENERROR; + } // Open the index file for reading. gz = gzopen(sz->path_index, "rb"); @@ -495,7 +543,7 @@ int seekgzip_index_load(seekgzip_t *sz){ // Read the magic string. if (gzgetc(gz) != 'Z' || gzgetc(gz) != 'S' || gzgetc(gz) != 'E' || gzgetc(gz) != 'K'){ ret = SEEKGZIP_IMCOMPATIBLE; - goto error_exit; + goto error_exit; } // Check the size of off_t. @@ -533,8 +581,6 @@ int seekgzip_index_load(seekgzip_t *sz){ seekgzip_t* seekgzip_open(const char *target, int flags) { - int i, ret = SEEKGZIP_SUCCESS; - gzFile gz = NULL; seekgzip_t *sz; if( (sz = (seekgzip_t *)malloc(sizeof(seekgzip_t))) == NULL) @@ -550,6 +596,11 @@ seekgzip_t* seekgzip_open(const char *target, int flags) sz->errorcode = SEEKGZIP_OPENERROR; goto error_exit; } + + if( (sz->path_data = strdup(target)) == NULL){ + sz->errorcode = SEEKGZIP_OUTOFMEMORY; + goto error_exit; + } // Prepare the name for the index file. sz->path_index = get_index_file(target); @@ -560,11 +611,12 @@ seekgzip_t* seekgzip_open(const char *target, int flags) // Load index sz->errorcode = seekgzip_index_load(sz); - switch(ret){ + switch(sz->errorcode){ case SEEKGZIP_SUCCESS: break; case SEEKGZIP_OPENERROR: case SEEKGZIP_IMCOMPATIBLE: + case SEEKGZIP_EXPIREDINDEX: // build index and save it sz->errorcode = seekgzip_index_build(sz); @@ -594,7 +646,11 @@ void seekgzip_close(seekgzip_t* sz) } if (sz->path_index != NULL){ free(sz->path_index); - sz->path_index == NULL; + sz->path_index = NULL; + } + if (sz->path_data != NULL){ + free(sz->path_data); + sz->path_data = NULL; } free(sz); } diff --git a/seekgzip.h b/seekgzip.h index 3efbc0b..b81d7dd 100644 --- a/seekgzip.h +++ b/seekgzip.h @@ -7,6 +7,7 @@ enum { SEEKGZIP_SUCCESS=0, SEEKGZIP_ERROR=-1024, SEEKGZIP_OPENERROR, + SEEKGZIP_EXPIREDINDEX, SEEKGZIP_READERROR, SEEKGZIP_WRITEERROR, SEEKGZIP_DATAERROR, From bbaa528ae36951d5c35429be0210aaf25b52968d Mon Sep 17 00:00:00 2001 From: x86_64 Date: Wed, 18 Apr 2012 15:29:00 +0400 Subject: [PATCH 12/15] nelements of type uintmax_t. Security safety checks --- seekgzip.c | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/seekgzip.c b/seekgzip.c index a131a3b..25b6083 100644 --- a/seekgzip.c +++ b/seekgzip.c @@ -24,6 +24,12 @@ #define SEEKGZIP_OPTIMIZATION +#define __HALF_MAX_SIGNED(type) ((type)1 << (sizeof(type)*8-2)) +#define __MAX_SIGNED(type) (__HALF_MAX_SIGNED(type) - 1 + __HALF_MAX_SIGNED(type)) +#define __MIN_SIGNED(type) (-1 - __MAX_SIGNED(type)) +#define __MIN(type) ((type)-1 < 1?__MIN_SIGNED(type):(type)0) +#define __MAX(type) ((type)~__MIN(type)) + int seekgzip_index_alloc(seekgzip_t *sz); void seekgzip_index_free(seekgzip_t *sz); @@ -92,8 +98,8 @@ struct point { /* access point list */ struct access { - unsigned int nelements; /* number of list entries filled in */ - unsigned int allocated; /* number of list entries allocated */ + uintmax_t nelements; /* number of list entries filled in */ + uintmax_t allocated; /* number of list entries allocated */ struct point *list; /* allocated list */ }; @@ -490,7 +496,7 @@ int seekgzip_index_build(seekgzip_t *sz) int seekgzip_index_save(seekgzip_t *sz){ int ret = SEEKGZIP_SUCCESS; - unsigned int i; + uintmax_t i; gzFile gz; // Open the index file for writing. @@ -519,7 +525,7 @@ int seekgzip_index_save(seekgzip_t *sz){ int seekgzip_index_load(seekgzip_t *sz){ int ret = SEEKGZIP_SUCCESS; - unsigned int i; + uintmax_t i; gzFile gz; if( (ret = seekgzip_index_alloc(sz)) != SEEKGZIP_SUCCESS) @@ -554,6 +560,10 @@ int seekgzip_index_load(seekgzip_t *sz){ // Read the number of entry points. sz->index->nelements = sz->index->allocated = read_uint32(gz); + if(__MAX(uintmax_t) / sizeof(struct point) <= sz->index->nelements){ + ret = SEEKGZIP_IMCOMPATIBLE; + goto error_exit; + } // Allocate an array for entry points. sz->index->list = (struct point*)malloc(sizeof(struct point) * sz->index->nelements); From 4fe7c8d76206cecea81544a49da49689b09deeef Mon Sep 17 00:00:00 2001 From: x86_64 Date: Wed, 18 Apr 2012 15:31:33 +0400 Subject: [PATCH 13/15] Readme changes --- README | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/README b/README index c6e03fe..937e763 100644 --- a/README +++ b/README @@ -18,8 +18,10 @@ streams. * HOW TO BUILD THE UTILITY -$ make +$ make all +* HOW TO INSTALL THE UTILITY +$ make install * HOW TO USE THE UTILITY @@ -29,17 +31,11 @@ This builds an index file for the specified gzip file ${FILE}. This utility creates an index file ${FILE}.idx (2) Reading the data in the specified range -$ seekgzip [BEGIN:END] +$ seekgzip [BEGIN-END] This reads the data in the gzip file ${FILE} from the offset ${BEGIN} to ${END}, and outputs the data to STDOUT. -* HOW TO BUILD PYTHON MODULE -$ make python -$ python setup.py --build_ext -$ python setup.py install - - * COPYRIGHT AND LICENSING INFORMATION This program is distributed under the zlib license. From 258b0d33a4d18b0df8a0ebf80b42c7dbd27d9559 Mon Sep 17 00:00:00 2001 From: x86_64 Date: Thu, 28 Jun 2012 16:57:39 +0000 Subject: [PATCH 14/15] Add seekgzip_packed_length and seekgzip_unpacked_length functions --- seekgzip.c | 50 +++++++++++++++++++++++++++++++++++--------------- seekgzip.h | 3 +++ 2 files changed, 38 insertions(+), 15 deletions(-) diff --git a/seekgzip.c b/seekgzip.c index 25b6083..aaec1e6 100644 --- a/seekgzip.c +++ b/seekgzip.c @@ -33,6 +33,17 @@ int seekgzip_index_alloc(seekgzip_t *sz); void seekgzip_index_free(seekgzip_t *sz); +struct tag_seekgzip { + char *path_data; + char *path_index; + FILE *fp; + struct access *index; + off_t offset; + off_t totin; + off_t totout; + int errorcode; +}; + /*===== Begin of the portion of zran.c ===== {{{*/ /* zran.c -- example of zlib/gzip stream indexing and random access @@ -165,7 +176,7 @@ struct point *findpoint(struct access *index, off_t offset) returns the number of access points on success (>= 1), Z_MEM_ERROR for out of memory, Z_DATA_ERROR for an error in the input file, or Z_ERRNO for a file read error. On success, *built points to the resulting index. */ -static int build_index(FILE *in, off_t span, struct access **built) +static int build_index(FILE *in, off_t span, struct access **built, seekgzip_t *sz) { int ret; off_t totin, totout; /* our own total counters to avoid 4GB limit */ @@ -256,6 +267,8 @@ static int build_index(FILE *in, off_t span, struct access **built) index->list = (struct point*)realloc(index->list, sizeof(struct point) * index->nelements); index->allocated = index->nelements; *built = index; + sz->totin = totin; + sz->totout = totout; return index->allocated; /* return error */ @@ -383,15 +396,6 @@ static int extract(FILE *in, struct access *index, off_t offset, /*===== End of the portion of zran.c ===== }}}*/ -struct tag_seekgzip { - char *path_data; - char *path_index; - FILE *fp; - struct access *index; - off_t offset; - int errorcode; -}; - static char *get_index_file(const char *target) { char *idx = (char*)malloc(strlen(target) + 4 + 1); @@ -475,7 +479,7 @@ int seekgzip_index_build(seekgzip_t *sz) int len, ret = SEEKGZIP_SUCCESS; // Build an index for the file. - len = build_index(sz->fp, SPAN, &sz->index); + len = build_index(sz->fp, SPAN, &sz->index, sz); if (len < 0) { switch (len) { case Z_MEM_ERROR: @@ -505,10 +509,12 @@ int seekgzip_index_save(seekgzip_t *sz){ return SEEKGZIP_OPENERROR; // Write a header. - gzwrite(gz, "ZSEK", 4); + gzwrite(gz, "ZSE2", 4); write_uint32(gz, (uint32_t)sizeof(off_t)); write_uint32(gz, (uint32_t)sz->index->nelements); - + gzwrite(gz, &sz->totin, sizeof(off_t)); + gzwrite(gz, &sz->totout, sizeof(off_t)); + // Write out entry points. for (i = 0;i < sz->index->nelements;++i) { gzwrite(gz, &sz->index->list[i].out, sizeof(off_t)); @@ -547,7 +553,7 @@ int seekgzip_index_load(seekgzip_t *sz){ return SEEKGZIP_OPENERROR; // Read the magic string. - if (gzgetc(gz) != 'Z' || gzgetc(gz) != 'S' || gzgetc(gz) != 'E' || gzgetc(gz) != 'K'){ + if (gzgetc(gz) != 'Z' || gzgetc(gz) != 'S' || gzgetc(gz) != 'E' || gzgetc(gz) != '2'){ ret = SEEKGZIP_IMCOMPATIBLE; goto error_exit; } @@ -571,7 +577,11 @@ int seekgzip_index_load(seekgzip_t *sz){ ret = SEEKGZIP_OUTOFMEMORY; goto error_exit; } - + + // Read size of unpacked file + gzread(gz, &sz->totin, sizeof(off_t)); + gzread(gz, &sz->totout, sizeof(off_t)); + // Read entry points. for (i = 0; i < sz->index->nelements; ++i) { gzread(gz, &sz->index->list[i].out, sizeof(off_t)); @@ -675,6 +685,16 @@ off_t seekgzip_tell(seekgzip_t *sz) return sz->offset; } +off_t seekgzip_unpacked_length(seekgzip_t *sz) +{ + return sz->totout; +} + +off_t seekgzip_packed_length(seekgzip_t *sz) +{ + return sz->totin; +} + int seekgzip_read(seekgzip_t* sz, void *buffer, int size) { int len = extract(sz->fp, sz->index, sz->offset, (unsigned char*)buffer, size); diff --git a/seekgzip.h b/seekgzip.h index b81d7dd..28b16f9 100644 --- a/seekgzip.h +++ b/seekgzip.h @@ -50,5 +50,8 @@ seekgzip_error( seekgzip_t* sgz ); +off_t seekgzip_unpacked_length(seekgzip_t *sz); +off_t seekgzip_packed_length(seekgzip_t *sz); + #endif/*__SEEKGZIP_H__*/ From 739a18ed366400e1439804fd84bb5ecdef98dc93 Mon Sep 17 00:00:00 2001 From: x86_64 Date: Sun, 21 Feb 2021 10:27:04 +0300 Subject: [PATCH 15/15] EPREFIX --- Makefile | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Makefile b/Makefile index a7f8c6e..ad63675 100644 --- a/Makefile +++ b/Makefile @@ -17,10 +17,10 @@ clean: rm -rf export_python.cpp install: - mkdir -p $(DESTDIR)/usr/bin/ $(DESTDIR)/usr/lib/ $(DESTDIR)/usr/include/seekgzip/ - cp $(USR_BIN_TARGETS) $(DESTDIR)/usr/bin/ - cp $(USR_LIB_TARGETS) $(DESTDIR)/usr/lib/ - cp $(USR_INC_TARGETS) $(DESTDIR)/usr/include/seekgzip/ + mkdir -p $(DESTDIR)/$(EPREFIX)/usr/bin/ $(DESTDIR)/$(EPREFIX)/usr/lib/ $(DESTDIR)/$(EPREFIX)/usr/include/seekgzip/ + cp $(USR_BIN_TARGETS) $(DESTDIR)/$(EPREFIX)/usr/bin/ + cp $(USR_LIB_TARGETS) $(DESTDIR)/$(EPREFIX)/usr/lib/ + cp $(USR_INC_TARGETS) $(DESTDIR)/$(EPREFIX)/usr/include/seekgzip/ test -f .python && $(PYTHON) setup.py install || exit 0 seekgzip: seekgzip.c main.c