Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
56 changes: 33 additions & 23 deletions Checksum.cc
Original file line number Diff line number Diff line change
Expand Up @@ -20,31 +20,13 @@
Checksum::Checksum(checksumtypes type)
: m_checksumtype(type)
{
switch (m_checksumtype) {
case checksumtypes::SHA1: {
sha1_init(&m_state.sha1);
} break;
case checksumtypes::SHA256: {
sha256_init(&m_state.sha256);
} break;
case checksumtypes::SHA512: {
sha512_init(&m_state.sha512);
} break;
case checksumtypes::MD5: {
md5_init(&m_state.md5);
} break;
#ifdef HAVE_LIBXXHASH
case checksumtypes::XXH128: {
m_state.xxh128 = XXH3_createState();
assert(m_state.xxh128 != NULL && "Out of memory!");
[[maybe_unused]] const auto ret = XXH3_128bits_reset(m_state.xxh128);
assert(ret == XXH_OK);
} break;
#endif
default:
// not allowed to have something that is not recognized.
throw std::runtime_error("wrong checksum type - programming error");
if (m_checksumtype == checksumtypes::XXH128) {
m_state.xxh128 = XXH3_createState();
assert(m_state.xxh128 != NULL && "Out of memory!");
}
#endif
reset();
}

Checksum::Checksum(Checksum&& other)
Expand Down Expand Up @@ -120,6 +102,34 @@ Checksum::update(std::size_t length, const char* buffer)
static_cast<const unsigned char*>(static_cast<const void*>(buffer)));
}

void
Checksum::reset()
{
switch (m_checksumtype) {
case checksumtypes::SHA1: {
sha1_init(&m_state.sha1);
} break;
case checksumtypes::SHA256: {
sha256_init(&m_state.sha256);
} break;
case checksumtypes::SHA512: {
sha512_init(&m_state.sha512);
} break;
case checksumtypes::MD5: {
md5_init(&m_state.md5);
} break;
#ifdef HAVE_LIBXXHASH
case checksumtypes::XXH128: {
[[maybe_unused]] const auto ret = XXH3_128bits_reset(m_state.xxh128);
assert(ret == XXH_OK);
} break;
#endif
default:
// not allowed to have something that is not recognized.
throw std::runtime_error("wrong checksum type - programming error");
}
}

#if 0
// prints checksum to stdout
static void
Expand Down
18 changes: 7 additions & 11 deletions Checksum.hh
Original file line number Diff line number Diff line change
Expand Up @@ -20,23 +20,14 @@
#include <xxhash.h>
#endif

#include "ChecksumTypes.hh"

/**
* class for checksum calculation
*/
class Checksum
{
public:
// these are the checksums that can be calculated
enum class checksumtypes
{
NOTSET = 0,
MD5,
SHA1,
SHA256,
SHA512,
XXH128
};

explicit Checksum(checksumtypes type);
Checksum(const Checksum& other);
Checksum(Checksum&& other);
Expand All @@ -45,6 +36,9 @@ public:
int update(std::size_t length, const unsigned char* buffer);
int update(std::size_t length, const char* buffer);

/// makes the object behave as if it was newly constructed
void reset();

#if 0
/// prints the checksum on stdout
int print();
Expand All @@ -57,6 +51,8 @@ public:
// returns negative if something is wrong.
[[gnu::pure]] int getDigestLength() const;

checksumtypes getType() const noexcept { return m_checksumtype; }

private:
// to know what type of checksum we are doing
const checksumtypes m_checksumtype = checksumtypes::NOTSET;
Expand Down
12 changes: 12 additions & 0 deletions ChecksumTypes.hh
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
#pragma once

/// these are the checksums that can be calculated. see class Checksum
enum class checksumtypes
{
NOTSET = 0,
MD5,
SHA1,
SHA256,
SHA512,
XXH128
};
100 changes: 50 additions & 50 deletions Fileinfo.cc
Original file line number Diff line number Diff line change
Expand Up @@ -20,27 +20,33 @@
// project
#include "Checksum.hh" //checksum calculation
#include "Fileinfo.hh"
#include "Options.hh"
#include "UndoableUnlink.hh"

int
Fileinfo::fillwithbytes(enum readtobuffermode filltype,
enum readtobuffermode lasttype,
std::vector<char>& buffer)
std::vector<char>& buffer,
Checksum& chk,
const Options& options)
{

// Decide if we are going to read from file or not.
// If file is short, first bytes might be ALL bytes!
if (lasttype != readtobuffermode::NOT_DEFINED) {
if (this->size() <= static_cast<filesizetype>(m_somebytes.size())) {
// pointless to read - all bytes in the file are in the field
// m_somebytes, or checksum is calculated!
const auto filesize = this->size();
const auto ufilesize = static_cast<std::uint64_t>(filesize);
// we might already have checksummed the entire file in the previous step, if
// it was smaller than the buffer.
if (chk.getType() == options.checksum_for_firstlast_bytes) {
if (lasttype == readtobuffermode::READ_FIRST_BYTES &&
options.first_bytes_size >= ufilesize) {
// already checksummed!
return 0;
}
if (lasttype == readtobuffermode::READ_LAST_BYTES &&
options.last_bytes_size >= ufilesize) {
// already checksummed!
return 0;
}
}

// set memory to zero
m_somebytes.fill('\0');

std::fstream f1;
f1.open(m_filename, std::ios_base::in);
if (!f1.is_open()) {
Expand All @@ -49,56 +55,50 @@ Fileinfo::fillwithbytes(enum readtobuffermode filltype,
return -1;
}

auto checksumtype = Checksum::checksumtypes::NOTSET;
// read some bytes
switch (filltype) {
case readtobuffermode::READ_FIRST_BYTES:
// read at start of file
f1.read(m_somebytes.data(), SomeByteSize);
break;
case readtobuffermode::READ_LAST_BYTES:
// read at end of file
f1.seekg(-SomeByteSize, std::ios_base::end);
f1.read(m_somebytes.data(), SomeByteSize);
break;
case readtobuffermode::CREATE_MD5_CHECKSUM:
checksumtype = Checksum::checksumtypes::MD5;
break;
case readtobuffermode::CREATE_SHA1_CHECKSUM:
checksumtype = Checksum::checksumtypes::SHA1;
break;
case readtobuffermode::CREATE_SHA256_CHECKSUM:
checksumtype = Checksum::checksumtypes::SHA256;
break;
case readtobuffermode::CREATE_SHA512_CHECKSUM:
checksumtype = Checksum::checksumtypes::SHA512;
break;
case readtobuffermode::CREATE_XXH128_CHECKSUM:
checksumtype = Checksum::checksumtypes::XXH128;
break;
default:
std::cerr << "does not know how to do that filltype:"
<< static_cast<long>(filltype) << std::endl;
bool read_entire_file = true;
std::streamsize bytes_to_read{};
if (filltype == readtobuffermode::READ_FIRST_BYTES) {
bytes_to_read = static_cast<std::streamsize>(options.first_bytes_size);
if (filesize > bytes_to_read) {
read_entire_file = false;
}
} else if (filltype == readtobuffermode::READ_LAST_BYTES) {
bytes_to_read = static_cast<std::streamsize>(options.last_bytes_size);
if (filesize > bytes_to_read) {
read_entire_file = false;
f1.seekg(-options.last_bytes_size, std::ios_base::end);
}
}

if (checksumtype != Checksum::checksumtypes::NOTSET) {
Checksum chk(checksumtype);
// set memory to zero
m_somebytes.fill('\0');

// ensure the checksum object is in a good state
chk.reset();

if (read_entire_file) {
while (f1) {
f1.read(buffer.data(), static_cast<std::streamsize>(buffer.size()));
// gcount is never negative, the cast is safe.
chk.update(static_cast<std::size_t>(f1.gcount()), buffer.data());
}

// store the result of the checksum calculation in somebytes
assert(chk.getDigestLength() > 0);
assert(static_cast<std::size_t>(chk.getDigestLength()) <=
m_somebytes.size());
if (chk.printToBuffer(m_somebytes.data(), m_somebytes.size())) {
std::cerr << "failed writing digest to buffer!!" << std::endl;
} else {
const auto bufsize = static_cast<std::streamsize>(buffer.size());
while (f1 && bytes_to_read > 0) {
f1.read(buffer.data(), std::min(bufsize, bytes_to_read));
// gcount is never negative, the cast is safe.
bytes_to_read -= f1.gcount();
chk.update(static_cast<std::size_t>(f1.gcount()), buffer.data());
}
}

// store the result of the checksum calculation in somebytes
assert(chk.getDigestLength() > 0);
assert(static_cast<std::size_t>(chk.getDigestLength()) <= m_somebytes.size());
if (chk.printToBuffer(m_somebytes.data(), m_somebytes.size())) {
std::cerr << "failed writing digest to buffer!!" << std::endl;
}

return 0;
}

Expand Down
7 changes: 6 additions & 1 deletion Fileinfo.hh
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,9 @@
// os specific headers
#include <sys/types.h> //for off_t and others.

class Checksum;
struct Options;

/**
Holds information about a file.
Keeping this small is probably beneficial for performance, because the
Expand Down Expand Up @@ -143,7 +146,9 @@ public:
*/
int fillwithbytes(enum readtobuffermode filltype,
enum readtobuffermode lasttype,
std::vector<char>& buffer);
std::vector<char>& buffer,
Checksum& cksum,
const Options& options);

/// get a pointer to the bytes read from the file
const char* getbyteptr() const { return m_somebytes.data(); }
Expand Down
22 changes: 12 additions & 10 deletions Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -4,24 +4,26 @@
AUTOMAKE_OPTIONS = gnu # I would like dist-bzip2 here, but automake complains
bin_PROGRAMS = rdfind
rdfind_SOURCES = rdfind.cc Checksum.cc Dirlist.cc Fileinfo.cc Rdutil.cc \
EasyRandom.cc UndoableUnlink.cc CmdlineParser.cc
EasyRandom.cc UndoableUnlink.cc CmdlineParser.cc Options.cc

LDADD = @LIBXXHASH@
#these are the test scripts to execute - I do not know how to glob here,
#feedback welcome.
TESTS=testcases/largefilesupport.sh \
TESTS=testcases/checksum_buffersize.sh \
testcases/checksum_options.sh \
testcases/hardlink_fails.sh \
testcases/largefilesupport.sh \
testcases/md5collisions.sh \
testcases/sha1collisions.sh \
testcases/symlinking_action.sh \
testcases/verify_deterministic_operation.sh \
testcases/verify_dryrun_option.sh \
testcases/verify_filesize_option.sh \
testcases/verify_maxfilesize_option.sh \
testcases/verify_dryrun_option.sh \
testcases/verify_nochecksum.sh \
testcases/verify_ranking.sh \
testcases/verify_deterministic_operation.sh \
testcases/checksum_options.sh \
testcases/md5collisions.sh \
testcases/sha1collisions.sh \
testcases/checksum_buffersize.sh \
testcases/verify_nochecksum.sh
testcases/verify_size_savings.sh \
testcases/verify_skipfirstbytes.sh


AUXFILES=testcases/common_funcs.sh \
Expand All @@ -41,7 +43,7 @@ AUXFILES=testcases/common_funcs.sh \
EXTRA_DIST = \
Dirlist.hh Checksum.hh Fileinfo.hh \
Rdutil.hh bootstrap.sh RdfindDebug.hh EasyRandom.hh UndoableUnlink.hh \
CmdlineParser.hh \
CmdlineParser.hh Options.hh ChecksumTypes.hh \
$(TESTS) \
$(AUXFILES) \
rdfind.1 LICENSE \
Expand Down
1 change: 1 addition & 0 deletions NEWS
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
next
optionally disable the checksum step by giving -checksum none
optionally show progress
optionally adjust the size of first/last bytes, or disable it completely.
1.7.0
requires a C++17 capable compiler.
new fast non-cryptographic hash xxh
Expand Down
Loading
Loading